diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncDataSetService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncDataSetService.java index 327986bc4..8141b1046 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncDataSetService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncDataSetService.java @@ -1,6 +1,5 @@ package cn.iocoder.yudao.module.llm.service.async; -import cn.hutool.json.JSONObject; import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils; import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetQuestionRespVO; import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetDO; @@ -8,7 +7,6 @@ import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetMapper; import cn.iocoder.yudao.module.llm.service.dataset.vo.AigcDatasetVo; import cn.iocoder.yudao.module.llm.service.http.TrainHttpService; import cn.iocoder.yudao.module.llm.service.http.vo.AigcDatasetFileRespV0; -import cn.iocoder.yudao.module.llm.service.http.vo.ModelCompletionsReqVO; import com.baomidou.mybatisplus.core.toolkit.StringUtils; import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; @@ -33,15 +31,15 @@ public class AsyncDataSetService { private static final Logger log = LoggerFactory.getLogger(AsyncKnowledgeBase.class); @Async - public void JsonFileWrite(DatasetDO datasetDO,List datasetQuestionList) { + public void JsonFileWrite (DatasetDO datasetDO, List datasetQuestionList) { List aigcDatasetVoList = new ArrayList<>(); datasetQuestionList.forEach(dataSource -> { AigcDatasetVo aigcDatasetVo = new AigcDatasetVo(); - aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem())?dataSource.getSystem():""); - aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion())?dataSource.getQuestion():""); - if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())){ - aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer())?dataSource.getDatasetAnswerRespVO().get(0).getAnswer():""); - }else { + aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem()) ? dataSource.getSystem() : ""); + aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion()) ? dataSource.getQuestion() : ""); + if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())) { + aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer()) ? dataSource.getDatasetAnswerRespVO().get(0).getAnswer() : ""); + } else { aigcDatasetVo.setOutput(""); } aigcDatasetVoList.add(aigcDatasetVo); @@ -54,16 +52,16 @@ public class AsyncDataSetService { sb.append(json).append("\n"); // 每个 JSON 对象后换行 } InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes()); - AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(),"http://36.103.199.104:5123", inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json"); - if (aigcDatasetFileRespV0 != null){ - datasetMapper.setJobid(datasetDO.getId(),aigcDatasetFileRespV0.getFileId()); + AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(), "http://36.103.199.104:5123", inputStream, datasetDO.getDatasetName() + "new" + datasetDO.getId() + ".json"); + if (aigcDatasetFileRespV0 != null) { + datasetMapper.setJobid(datasetDO.getId(), aigcDatasetFileRespV0.getFileId()); String s3Url = aigcDatasetFileRespV0.getS3Url(); int lastIndex = s3Url.lastIndexOf("/storage"); //todo 1111 String url = s3Url.substring(lastIndex + 1); - datasetMapper.setUrl(datasetDO.getId(),url); + datasetMapper.setUrl(datasetDO.getId(), url); log.info("[JsonFileWrite][写入文件成功]"); } @@ -74,45 +72,72 @@ public class AsyncDataSetService { } - public String JsonFileWriteFine(String hostUrl,DatasetDO datasetDO,List datasetQuestionList) { - List aigcDatasetVoList = new ArrayList<>(); - datasetQuestionList.forEach(dataSource -> { - AigcDatasetVo aigcDatasetVo = new AigcDatasetVo(); - aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem())?dataSource.getSystem():""); - aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion())?dataSource.getQuestion():""); - if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())){ - aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer())?dataSource.getDatasetAnswerRespVO().get(0).getAnswer():""); - }else { - aigcDatasetVo.setOutput(""); - } - aigcDatasetVoList.add(aigcDatasetVo); - }); - ObjectMapper mapper = new ObjectMapper(); - StringBuilder sb = new StringBuilder(); + public String JsonFileWriteFine (String hostUrl, DatasetDO datasetDO, List datasetQuestionList) { try { + log.info("开始生成 JSON 文件并上传,数据集ID: {}", datasetDO.getId()); + + // 构建 AigcDatasetVo 列表 + log.debug("正在构建 AigcDatasetVo 列表..."); + List aigcDatasetVoList = new ArrayList<>(); + for (DatasetQuestionRespVO dataSource : datasetQuestionList) { + AigcDatasetVo aigcDatasetVo = new AigcDatasetVo(); + aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem()) ? dataSource.getSystem() : ""); + aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion()) ? dataSource.getQuestion() : ""); + + // 检查答案列表是否为空 + if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())) { + aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer()) ? + dataSource.getDatasetAnswerRespVO().get(0).getAnswer() : ""); + } else { + aigcDatasetVo.setOutput(""); + } + aigcDatasetVoList.add(aigcDatasetVo); + } + log.debug("AigcDatasetVo 列表构建完成。记录数量: {}", aigcDatasetVoList.size()); + + // 将 AigcDatasetVo 列表转换为 JSON 字符串 + log.debug("正在将 AigcDatasetVo 列表转换为 JSON 字符串..."); + ObjectMapper mapper = new ObjectMapper(); + StringBuilder sb = new StringBuilder(); for (AigcDatasetVo aigcDatasetVo : aigcDatasetVoList) { String json = mapper.writeValueAsString(aigcDatasetVo); - sb.append(json).append("\n"); // 每个 JSON 对象后换行 + sb.append(json).append("\n"); } - InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes()); - AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(),hostUrl, inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json"); - if (aigcDatasetFileRespV0 != null){ - datasetMapper.setJobid(datasetDO.getId(),aigcDatasetFileRespV0.getFileId()); + // 将 JSON 字符串转换为输入流 + log.debug("正在将 JSON 字符串转换为输入流..."); + InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes()); + + // 上传文件 + log.info("正在上传 JSON 文件..."); + String fileName = datasetDO.getDatasetName() + "new" + datasetDO.getId() + ".json"; + AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(), hostUrl, inputStream, fileName); + + if (aigcDatasetFileRespV0 != null) { + log.debug("文件上传成功。文件ID: {}", aigcDatasetFileRespV0.getFileId()); + + // 更新数据集的 Job ID + log.debug("正在更新数据集的 Job ID..."); + datasetMapper.setJobid(datasetDO.getId(), aigcDatasetFileRespV0.getFileId()); + + // 更新数据集的 URL String s3Url = aigcDatasetFileRespV0.getS3Url(); int lastIndex = s3Url.lastIndexOf("/storage"); - - //todo 1111 String url = s3Url.substring(lastIndex + 1); - datasetMapper.setUrl(datasetDO.getId(),url); + datasetMapper.setUrl(datasetDO.getId(), url); + + // 返回结果 String result = url.substring(hostUrl.length()); - log.info("[JsonFileWrite][写入文件成功]"); + log.info("JSON 文件生成并上传成功。返回结果: {}", result); return result; + } else { + log.error("文件上传失败。数据集ID: {}", datasetDO.getId()); + return ""; } + } catch (IOException e) { - // 记录异常信息 - log.error("[JsonFileWrite][写入文件失败] {}", e.getMessage()); + log.error("生成或上传 JSON 文件时发生异常。数据集ID: {}", datasetDO.getId(), e); + return ""; } - return ""; } }