From 769adc7725bf51df2055f8ba76ac9f6deb2fc9a0 Mon Sep 17 00:00:00 2001 From: limin Date: Sun, 29 Dec 2024 11:20:42 +0800 Subject: [PATCH] =?UTF-8?q?=E6=95=B0=E6=8D=AE=E8=A1=A8=E8=AF=A6=E6=83=85?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E9=97=AE=E9=A2=98=E6=98=BE=E7=A4=BA=E8=AF=A6?= =?UTF-8?q?=E6=83=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../module/llm/enums/ErrorCodeConstants.java | 3 + .../admin/dataset/vo/DatasetRespVO.java | 3 + .../admin/dataset/vo/DatasetSaveReqVO.java | 3 + .../llm/service/dataset/DatasetService.java | 2 +- .../service/dataset/DatasetServiceImpl.java | 127 ++++++++++++++++-- 5 files changed, 127 insertions(+), 11 deletions(-) diff --git a/yudao-module-llm/yudao-module-llm-api/src/main/java/cn/iocoder/yudao/module/llm/enums/ErrorCodeConstants.java b/yudao-module-llm/yudao-module-llm-api/src/main/java/cn/iocoder/yudao/module/llm/enums/ErrorCodeConstants.java index 7b69052ca..cf070ace4 100644 --- a/yudao-module-llm/yudao-module-llm-api/src/main/java/cn/iocoder/yudao/module/llm/enums/ErrorCodeConstants.java +++ b/yudao-module-llm/yudao-module-llm-api/src/main/java/cn/iocoder/yudao/module/llm/enums/ErrorCodeConstants.java @@ -27,5 +27,8 @@ public interface ErrorCodeConstants { ErrorCode MODEL_ASSESS_TASK_STOPLIST_NOT_EXISTS = new ErrorCode(10013, "自动评估维度不存在"); ErrorCode MODEL_ASSESS_STOPLIST_NOT_EXISTS = new ErrorCode(10014, "自动评估维度不存在"); ErrorCode LEARNING_RESOURCES_NOT_EXISTS = new ErrorCode(10015, "学习资源不存在"); + ErrorCode DATASET_FILES_NOT_EXISTS = new ErrorCode(10016, "数据集文件资源不存在"); + ErrorCode DATASET_QUESTION_NOT_EXISTS = new ErrorCode(10017, "数据集标准问题不存在"); + } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetRespVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetRespVO.java index 73f6af215..e2befaf78 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetRespVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetRespVO.java @@ -56,4 +56,7 @@ public class DatasetRespVO { @Schema(description = "标注进度", example = "20") private Integer annotateProgress; + + @Schema(description = "数据集数据文件", example = "[]") + private List datasetQuestionRespVOS; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetSaveReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetSaveReqVO.java index 54149c9ae..e25f32e2a 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetSaveReqVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetSaveReqVO.java @@ -44,4 +44,7 @@ public class DatasetSaveReqVO { @Schema(description = "标注进度", example = "20") private Integer annotateProgress; + @Schema(description = "数据集数据文件", example = "[]") + private List datasetFiles; + } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetService.java index 03945ec77..023b9edf4 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetService.java @@ -42,7 +42,7 @@ public interface DatasetService { * @param id 编号 * @return 数据集 */ - DatasetDO getDataset(Long id); + DatasetRespVO getDataset(Long id); /** * 获得数据集分页 diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetServiceImpl.java index fd314aeef..f665ebb34 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetServiceImpl.java @@ -1,27 +1,35 @@ package cn.iocoder.yudao.module.llm.service.dataset; + import cn.iocoder.yudao.framework.common.pojo.PageResult; import cn.iocoder.yudao.framework.common.util.object.BeanUtils; import cn.iocoder.yudao.module.llm.constant.DataConstants; -import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetPageReqVO; -import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetRespVO; -import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetSaveReqVO; -import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetTreeNode; +import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.*; import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetDO; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetFilesDO; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetQuestionDO; +import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetFilesMapper; import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetMapper; +import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetQuestionMapper; +import cn.iocoder.yudao.module.llm.utils.DataSetReadFileUtils; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; +import com.baomidou.mybatisplus.core.toolkit.CollectionUtils; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import org.springframework.stereotype.Service; import org.springframework.validation.annotation.Validated; import javax.annotation.Resource; -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.stream.Collectors; import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; import static cn.iocoder.yudao.module.llm.enums.ErrorCodeConstants.DATASET_NOT_EXISTS; @@ -37,6 +45,10 @@ public class DatasetServiceImpl implements DatasetService { @Resource private DatasetMapper datasetMapper; + @Resource + private DatasetFilesMapper datasetFilesMapper; + @Resource + private DatasetQuestionMapper datasetQuestionMapper; @Override public Long createDataset(DatasetSaveReqVO createReqVO) { @@ -46,7 +58,34 @@ public class DatasetServiceImpl implements DatasetService { // 插入 DatasetDO dataset = BeanUtils.toBean(createReqVO, DatasetDO.class); datasetMapper.insert(dataset); - // 返回 + List datasetFiles = createReqVO.getDatasetFiles(); + datasetFiles.stream().forEach( + datasetFilesSaveReqVO -> { + datasetFilesSaveReqVO.setDatasetId(dataset.getId()); + } + ); + List insertDatasetFiles = BeanUtils.toBean(datasetFiles, DatasetFilesDO.class); + System.out.println(insertDatasetFiles); + datasetFilesMapper.insertBatch(insertDatasetFiles, 100); + System.out.println(insertDatasetFiles); + // 提取文件 + List jsonFiles = insertDatasetFiles.stream() + .filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".json")) + .collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(jsonFiles)){ + readJsonFile(jsonFiles); + } + + List txtFiles = insertDatasetFiles.stream() + .filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".txt")) + .collect(Collectors.toList()); + if (CollectionUtils.isNotEmpty(txtFiles)){ + readTxtFile(txtFiles); + } + Long count = datasetQuestionMapper.selectCount(new LambdaQueryWrapper() + .eq(DatasetQuestionDO::getDatasetId, dataset.getId())); + dataset.setDataLength(count); + datasetMapper.updateById(dataset); return dataset.getId(); } @@ -86,8 +125,13 @@ public class DatasetServiceImpl implements DatasetService { } @Override - public DatasetDO getDataset(Long id) { - return datasetMapper.selectById(id); + public DatasetRespVO getDataset(Long id) { + DatasetDO datasetDO = datasetMapper.selectById(id); + DatasetRespVO datasetRespVO = BeanUtils.toBean(datasetDO, DatasetRespVO.class); + List datasetQuestionDO = datasetQuestionMapper.selectList(new LambdaQueryWrapper().eq(DatasetQuestionDO::getDatasetId, id)); + List datasetQuestionRespVOS = BeanUtils.toBean(datasetQuestionDO, DatasetQuestionRespVO.class); + datasetRespVO.setDatasetQuestionRespVOS(datasetQuestionRespVOS); + return datasetRespVO; } @Override @@ -125,4 +169,67 @@ public class DatasetServiceImpl implements DatasetService { return root; } + + public void readJsonFile(List jsonFiles){ + jsonFiles.forEach(datasetFilesDO -> { + HttpURLConnection connection = DataSetReadFileUtils.readFile(datasetFilesDO.getDatasetFileUrl()); + if (connection != null){ + try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "UTF-8"))) { + StringBuilder content = new StringBuilder(); + String line; + while ((line = in.readLine()) != null) { + content.append(line); + } + // 使用Jackson解析 Json 字符串为List对象 + ObjectMapper mapper = new ObjectMapper(); + // 使用 TypeReference 解析 JSON 字符串为 List + List stringList = mapper.readValue(content.toString(), new TypeReference>() {}); + stringList.forEach( + string -> { + DatasetQuestionDO datasetQuestionDO = new DatasetQuestionDO(); + datasetQuestionDO.setDatasetId(datasetFilesDO.getDatasetId()); + datasetQuestionDO.setQuestion(string); + datasetQuestionDO.setDatasetFilesId(datasetFilesDO.getId()); + datasetQuestionMapper.insert(datasetQuestionDO); + } + ); + }catch (Exception e){ + throw new RuntimeException("请正确上传json格式得数据!!!"); + }finally { + connection.disconnect(); + } + } + }); + } + /** + * txt文本数据 + * @param txtFiles + */ + public void readTxtFile(List txtFiles){ + txtFiles.forEach(datasetFilesDO -> { + List newContent = new ArrayList<>(); + + HttpURLConnection connection = DataSetReadFileUtils.readFile(datasetFilesDO.getDatasetFileUrl()); + if (connection != null){ + try (BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream(), "UTF-8"))) { + String inputLine; + while ((inputLine = in.readLine()) != null) { + inputLine = inputLine.trim(); // 去除行首和行尾的空白字符 + if (!inputLine.isEmpty()) { + DatasetQuestionDO datasetQuestionDO = new DatasetQuestionDO();// 检查是否为空行 + datasetQuestionDO.setDatasetId(datasetFilesDO.getDatasetId()); + datasetQuestionDO.setDatasetFilesId(datasetFilesDO.getId()); + datasetQuestionDO.setQuestion(inputLine); + datasetQuestionMapper.insert(datasetQuestionDO); + } + } + }catch (Exception e){ + e.printStackTrace(); + }finally { + connection.disconnect(); + } + } + }); + } + }