diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/utils/DataSetReadFileUtils.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/utils/DataSetReadFileUtils.java index e893139ad..b996ce14a 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/utils/DataSetReadFileUtils.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/utils/DataSetReadFileUtils.java @@ -1,5 +1,8 @@ package cn.iocoder.yudao.module.llm.utils; +import cn.hutool.core.text.csv.CsvReader; +import cn.hutool.core.text.csv.CsvUtil; +import cn.hutool.core.util.URLUtil; import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetQuestionDO; import cn.iocoder.yudao.module.llm.utils.vo.CsvDataSetVO; import com.opencsv.CSVParser; @@ -52,87 +55,22 @@ public class DataSetReadFileUtils { */ public static List readParseCsv (String csvUrl) throws IOException, CsvValidationException { - List dataSetVos = new ArrayList<>(); + List dataSetVos; - // 创建CSV读取器 - CSVReader csvReader = null; - try { - URL url = new URL(csvUrl); - BufferedReader reader = new BufferedReader(new InputStreamReader(url.openStream())); + // 创建 URL 对象 + URL url = URLUtil.url(csvUrl); - // 创建 CSV 解析器,分隔符为逗号 - CSVParser parser = new CSVParserBuilder().withSeparator(',').build(); - // 构建 CSV 读取器 - csvReader = new CSVReaderBuilder(reader).withCSVParser(parser).build(); - - // 读取标题行 - String[] headers = csvReader.readNext(); - String[] line; - - while (true) { - try { - // 读取下一行数据 - line = csvReader.readNext(); - if (line == null) { - // 数据集读取完成 - break; - } - } catch (com.opencsv.exceptions.CsvValidationException e) { - // 处理读取行时的异常 - throw new IOException("读取 CSV 行时发生错误", e); - } - - // 动态读取,当行长度与标题行长度相等时 - if (line.length == headers.length) { - // 获取系统列索引 - int systemIndex = getIndex(headers, "system"); - // 获取问题列索引 - int questionIndex = getIndex(headers, "question"); - // 获取答案列索引 - int answerIndex = getIndex(headers, "answer"); - // 存储系统列的值 - String systemValue = systemIndex == -1? "" : line[systemIndex]; - // 存储问题列的值 - String questionValue = questionIndex == -1? "" : line[questionIndex]; - // 存储答案列的值 - String answerValue = answerIndex == -1? "" : line[answerIndex]; - // 根据标题行找到相应列的索引创建 CsvDataSetVO 对象 - CsvDataSetVO dataSetVO = new CsvDataSetVO(systemValue, questionValue, answerValue); - // 将对象添加到列表中 - dataSetVos.add(dataSetVO); - } - } - } finally { - if (csvReader != null) { - try { - // 关闭 CSV 读取器 - csvReader.close(); - } catch (IOException e) { - // 关闭CSV读取器异常 - log.error("关闭CSV读取器时发生错误", e); - } - } + try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(url.openStream()))) { + CsvReader reader = CsvUtil.getReader(bufferedReader); + dataSetVos = reader.read(bufferedReader, CsvDataSetVO.class); + } catch (IOException e) { + throw new RuntimeException(e); } // 返回解析后的对象 return dataSetVos; } - /** - * 查找列名在标题行中的索引 - * - * @param headers 标题行 - * @param columnName 列名 - * @return 索引 - */ - private static int getIndex (String[] headers, String columnName) { - for (int i = 0; i < headers.length; i++) { - if (headers[i].equals(columnName)) { - return i; - } - } - return -1; - } public static Workbook readXlsxFromUrl(String filePath) { HttpURLConnection connection = readFile(filePath); diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/utils/vo/CsvDataSetVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/utils/vo/CsvDataSetVO.java index 9dadac17e..8179d8dc7 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/utils/vo/CsvDataSetVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/utils/vo/CsvDataSetVO.java @@ -1,5 +1,6 @@ package cn.iocoder.yudao.module.llm.utils.vo; +import cn.hutool.core.annotation.Alias; import lombok.AllArgsConstructor; import lombok.Data; import lombok.NoArgsConstructor; @@ -13,7 +14,10 @@ import lombok.ToString; @Data @ToString public class CsvDataSetVO { + @Alias("system") private String system; + @Alias("question") private String question; + @Alias("answer") private String answer; }