refactor(yudao-module-llm): 文件生成和上传逻辑

- 优化了 JsonFileWrite 和 JsonFileWriteFine 方法的代码结构
- 添加了日志记录
This commit is contained in:
Liuyang 2025-02-26 17:41:50 +08:00
parent b4889fd522
commit 1b67ab3bbf

View File

@ -1,6 +1,5 @@
package cn.iocoder.yudao.module.llm.service.async;
import cn.hutool.json.JSONObject;
import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetQuestionRespVO;
import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetDO;
@ -8,7 +7,6 @@ import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetMapper;
import cn.iocoder.yudao.module.llm.service.dataset.vo.AigcDatasetVo;
import cn.iocoder.yudao.module.llm.service.http.TrainHttpService;
import cn.iocoder.yudao.module.llm.service.http.vo.AigcDatasetFileRespV0;
import cn.iocoder.yudao.module.llm.service.http.vo.ModelCompletionsReqVO;
import com.baomidou.mybatisplus.core.toolkit.StringUtils;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
@ -33,15 +31,15 @@ public class AsyncDataSetService {
private static final Logger log = LoggerFactory.getLogger(AsyncKnowledgeBase.class);
@Async
public void JsonFileWrite(DatasetDO datasetDO,List<DatasetQuestionRespVO> datasetQuestionList) {
public void JsonFileWrite (DatasetDO datasetDO, List<DatasetQuestionRespVO> datasetQuestionList) {
List<AigcDatasetVo> aigcDatasetVoList = new ArrayList<>();
datasetQuestionList.forEach(dataSource -> {
AigcDatasetVo aigcDatasetVo = new AigcDatasetVo();
aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem())?dataSource.getSystem():"");
aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion())?dataSource.getQuestion():"");
if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())){
aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer())?dataSource.getDatasetAnswerRespVO().get(0).getAnswer():"");
}else {
aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem()) ? dataSource.getSystem() : "");
aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion()) ? dataSource.getQuestion() : "");
if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())) {
aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer()) ? dataSource.getDatasetAnswerRespVO().get(0).getAnswer() : "");
} else {
aigcDatasetVo.setOutput("");
}
aigcDatasetVoList.add(aigcDatasetVo);
@ -54,16 +52,16 @@ public class AsyncDataSetService {
sb.append(json).append("\n"); // 每个 JSON 对象后换行
}
InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes());
AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(),"http://36.103.199.104:5123", inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json");
if (aigcDatasetFileRespV0 != null){
datasetMapper.setJobid(datasetDO.getId(),aigcDatasetFileRespV0.getFileId());
AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(), "http://36.103.199.104:5123", inputStream, datasetDO.getDatasetName() + "new" + datasetDO.getId() + ".json");
if (aigcDatasetFileRespV0 != null) {
datasetMapper.setJobid(datasetDO.getId(), aigcDatasetFileRespV0.getFileId());
String s3Url = aigcDatasetFileRespV0.getS3Url();
int lastIndex = s3Url.lastIndexOf("/storage");
//todo 1111
String url = s3Url.substring(lastIndex + 1);
datasetMapper.setUrl(datasetDO.getId(),url);
datasetMapper.setUrl(datasetDO.getId(), url);
log.info("[JsonFileWrite][写入文件成功]");
}
@ -74,45 +72,72 @@ public class AsyncDataSetService {
}
public String JsonFileWriteFine(String hostUrl,DatasetDO datasetDO,List<DatasetQuestionRespVO> datasetQuestionList) {
List<AigcDatasetVo> aigcDatasetVoList = new ArrayList<>();
datasetQuestionList.forEach(dataSource -> {
AigcDatasetVo aigcDatasetVo = new AigcDatasetVo();
aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem())?dataSource.getSystem():"");
aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion())?dataSource.getQuestion():"");
if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())){
aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer())?dataSource.getDatasetAnswerRespVO().get(0).getAnswer():"");
}else {
aigcDatasetVo.setOutput("");
}
aigcDatasetVoList.add(aigcDatasetVo);
});
ObjectMapper mapper = new ObjectMapper();
StringBuilder sb = new StringBuilder();
public String JsonFileWriteFine (String hostUrl, DatasetDO datasetDO, List<DatasetQuestionRespVO> datasetQuestionList) {
try {
log.info("开始生成 JSON 文件并上传数据集ID: {}", datasetDO.getId());
// 构建 AigcDatasetVo 列表
log.debug("正在构建 AigcDatasetVo 列表...");
List<AigcDatasetVo> aigcDatasetVoList = new ArrayList<>();
for (DatasetQuestionRespVO dataSource : datasetQuestionList) {
AigcDatasetVo aigcDatasetVo = new AigcDatasetVo();
aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem()) ? dataSource.getSystem() : "");
aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion()) ? dataSource.getQuestion() : "");
// 检查答案列表是否为空
if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())) {
aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer()) ?
dataSource.getDatasetAnswerRespVO().get(0).getAnswer() : "");
} else {
aigcDatasetVo.setOutput("");
}
aigcDatasetVoList.add(aigcDatasetVo);
}
log.debug("AigcDatasetVo 列表构建完成。记录数量: {}", aigcDatasetVoList.size());
// AigcDatasetVo 列表转换为 JSON 字符串
log.debug("正在将 AigcDatasetVo 列表转换为 JSON 字符串...");
ObjectMapper mapper = new ObjectMapper();
StringBuilder sb = new StringBuilder();
for (AigcDatasetVo aigcDatasetVo : aigcDatasetVoList) {
String json = mapper.writeValueAsString(aigcDatasetVo);
sb.append(json).append("\n"); // 每个 JSON 对象后换行
sb.append(json).append("\n");
}
InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes());
AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(),hostUrl, inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json");
if (aigcDatasetFileRespV0 != null){
datasetMapper.setJobid(datasetDO.getId(),aigcDatasetFileRespV0.getFileId());
// JSON 字符串转换为输入流
log.debug("正在将 JSON 字符串转换为输入流...");
InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes());
// 上传文件
log.info("正在上传 JSON 文件...");
String fileName = datasetDO.getDatasetName() + "new" + datasetDO.getId() + ".json";
AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(), hostUrl, inputStream, fileName);
if (aigcDatasetFileRespV0 != null) {
log.debug("文件上传成功。文件ID: {}", aigcDatasetFileRespV0.getFileId());
// 更新数据集的 Job ID
log.debug("正在更新数据集的 Job ID...");
datasetMapper.setJobid(datasetDO.getId(), aigcDatasetFileRespV0.getFileId());
// 更新数据集的 URL
String s3Url = aigcDatasetFileRespV0.getS3Url();
int lastIndex = s3Url.lastIndexOf("/storage");
//todo 1111
String url = s3Url.substring(lastIndex + 1);
datasetMapper.setUrl(datasetDO.getId(),url);
datasetMapper.setUrl(datasetDO.getId(), url);
// 返回结果
String result = url.substring(hostUrl.length());
log.info("[JsonFileWrite][写入文件成功]");
log.info("JSON 文件生成并上传成功。返回结果: {}", result);
return result;
} else {
log.error("文件上传失败。数据集ID: {}", datasetDO.getId());
return "";
}
} catch (IOException e) {
// 记录异常信息
log.error("[JsonFileWrite][写入文件失败] {}", e.getMessage());
log.error("生成或上传 JSON 文件时发生异常。数据集ID: {}", datasetDO.getId(), e);
return "";
}
return "";
}
}