refactor(module-llm):优化数据集处理和定时任务执行频率

- 重构 AsyncDataSetService 中的 JsonFileWrite 方法,提高数据集处理效率
- 修改 DatasetTaskSyncService 中的定时任务执行频率,从每 30秒调整为每 15秒
This commit is contained in:
limin 2025-02-14 12:47:03 +08:00
parent b5195e1113
commit f7a501cc7d
2 changed files with 16 additions and 18 deletions

View File

@ -34,29 +34,27 @@ public class AsyncDataSetService {
@Async
public void JsonFileWrite(DatasetDO datasetDO,List<DatasetQuestionRespVO> datasetQuestionList) {
StringBuilder sb = new StringBuilder();
List<AigcDatasetVo> aigcDatasetVoList = new ArrayList<>();
datasetQuestionList.forEach(dataSource -> {
List<ModelCompletionsReqVO.ModelCompletionsMessage> messages = new ArrayList<>();
ModelCompletionsReqVO.ModelCompletionsMessage questionMessage = new ModelCompletionsReqVO.ModelCompletionsMessage();
questionMessage.setRole("user");
questionMessage.setContent(StringUtils.isNotBlank(dataSource.getQuestion())?dataSource.getQuestion():"");
messages.add(questionMessage);
ModelCompletionsReqVO.ModelCompletionsMessage answerMessage = new ModelCompletionsReqVO.ModelCompletionsMessage();
answerMessage.setRole("assistant");
AigcDatasetVo aigcDatasetVo = new AigcDatasetVo();
aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem())?dataSource.getSystem():"");
aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion())?dataSource.getQuestion():"");
if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())){
answerMessage.setContent(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer())?dataSource.getDatasetAnswerRespVO().get(0).getAnswer():"");
aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer())?dataSource.getDatasetAnswerRespVO().get(0).getAnswer():"");
}else {
answerMessage.setContent("");
aigcDatasetVo.setOutput("");
}
messages.add(answerMessage);
JSONObject jsonObject = new JSONObject();
jsonObject.put("messages", messages);
sb.append(jsonObject.toString()).append("\n");
aigcDatasetVoList.add(aigcDatasetVo);
});
ObjectMapper mapper = new ObjectMapper();
try (InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes())){
AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(), inputStream, datasetDO.getDatasetName() + datasetDO.getId() + ".json");
StringBuilder sb = new StringBuilder();
try {
for (AigcDatasetVo aigcDatasetVo : aigcDatasetVoList) {
String json = mapper.writeValueAsString(aigcDatasetVo);
sb.append(json).append("\n"); // 每个 JSON 对象后换行
}
InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes());
AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(), inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json");
if (aigcDatasetFileRespV0 != null){
datasetMapper.setJobid(datasetDO.getId(),aigcDatasetFileRespV0.getFileId());

View File

@ -23,7 +23,7 @@ public class DatasetTaskSyncService {
@Resource
private AsyncDataSetService dataSetService;
@Scheduled(cron ="0/30 * * * * ?")
@Scheduled(cron ="0/15 * * * * ?")
public void syncDatasetAigcTask() {
log.info("[syncDatasetAigcTask][开始同步任务]");
// 查询已经标注完成的数据集