From 3be18ff084ba5e99a2e5e8168c5abbe77b80539a Mon Sep 17 00:00:00 2001 From: limin Date: Wed, 19 Feb 2025 15:28:17 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E6=A8=A1=E5=9E=8B=E8=B0=83?= =?UTF-8?q?=E4=BC=98=20=20=E6=95=B0=E6=8D=AE=E9=9B=86=20deepseek=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../service/async/AsyncDataSetService.java | 45 +++++++++++- .../async/AsyncFineTuningTaskService.java | 22 ++++-- .../llm/service/http/TrainHttpService.java | 4 +- .../src/main/resources/application-dev.yaml | 5 +- .../src/main/resources/application-local.yaml | 71 +++++++++++-------- 5 files changed, 107 insertions(+), 40 deletions(-) diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncDataSetService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncDataSetService.java index 695abfa2a..15a4b1d18 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncDataSetService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncDataSetService.java @@ -54,7 +54,7 @@ public class AsyncDataSetService { sb.append(json).append("\n"); // 每个 JSON 对象后换行 } InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes()); - AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(), inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json"); + AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(),"http://36.103.199.104:5123", inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json"); if (aigcDatasetFileRespV0 != null){ datasetMapper.setJobid(datasetDO.getId(),aigcDatasetFileRespV0.getFileId()); @@ -72,4 +72,47 @@ public class AsyncDataSetService { log.error("[JsonFileWrite][写入文件失败] {}", e.getMessage()); } } + + + public String JsonFileWriteFine(String hostUrl,DatasetDO datasetDO,List datasetQuestionList) { + List aigcDatasetVoList = new ArrayList<>(); + datasetQuestionList.forEach(dataSource -> { + AigcDatasetVo aigcDatasetVo = new AigcDatasetVo(); + aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem())?dataSource.getSystem():""); + aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion())?dataSource.getQuestion():""); + if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())){ + aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer())?dataSource.getDatasetAnswerRespVO().get(0).getAnswer():""); + }else { + aigcDatasetVo.setOutput(""); + } + aigcDatasetVoList.add(aigcDatasetVo); + }); + ObjectMapper mapper = new ObjectMapper(); + StringBuilder sb = new StringBuilder(); + try { + for (AigcDatasetVo aigcDatasetVo : aigcDatasetVoList) { + String json = mapper.writeValueAsString(aigcDatasetVo); + sb.append(json).append("\n"); // 每个 JSON 对象后换行 + } + InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes()); + AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(),hostUrl, inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json"); + if (aigcDatasetFileRespV0 != null){ + datasetMapper.setJobid(datasetDO.getId(),aigcDatasetFileRespV0.getFileId()); + + String s3Url = aigcDatasetFileRespV0.getS3Url(); + int lastIndex = s3Url.lastIndexOf("/storage"); + + //todo 1111 + String url = s3Url.substring(lastIndex + 1); + datasetMapper.setUrl(datasetDO.getId(),url); + + log.info("[JsonFileWrite][写入文件成功]"); + return url; + } + } catch (IOException e) { + // 记录异常信息 + log.error("[JsonFileWrite][写入文件失败] {}", e.getMessage()); + } + return ""; + } } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncFineTuningTaskService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncFineTuningTaskService.java index 3b5113d3c..ed3f229ea 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncFineTuningTaskService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncFineTuningTaskService.java @@ -1,13 +1,17 @@ package cn.iocoder.yudao.module.llm.service.async; +import cn.iocoder.yudao.framework.common.util.object.BeanUtils; +import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetQuestionRespVO; import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetRespVO; import cn.iocoder.yudao.module.llm.dal.dataobject.basemodel.BaseModelDO; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetDO; import cn.iocoder.yudao.module.llm.dal.dataobject.finetuningtask.FineTuningTaskDO; import cn.iocoder.yudao.module.llm.dal.dataobject.servername.ServerNameDO; import cn.iocoder.yudao.module.llm.dal.mysql.basemodel.BaseModelMapper; import cn.iocoder.yudao.module.llm.dal.mysql.finetuningtask.FineTuningTaskMapper; import cn.iocoder.yudao.module.llm.dal.mysql.servername.ServerNameMapper; import cn.iocoder.yudao.module.llm.enums.FinetuningTaskStatusEnum; +import cn.iocoder.yudao.module.llm.service.dataset.DatasetQuestionService; import cn.iocoder.yudao.module.llm.service.dataset.DatasetService; import cn.iocoder.yudao.module.llm.service.http.TrainHttpService; import cn.iocoder.yudao.module.llm.service.http.vo.AigcFineTuningCreateReqVO; @@ -20,6 +24,7 @@ import org.springframework.stereotype.Service; import javax.annotation.Resource; import java.util.HashMap; +import java.util.List; @Service @Slf4j @@ -39,11 +44,17 @@ public class AsyncFineTuningTaskService { @Resource private ServerNameMapper serverNameMapper; + @Resource + private DatasetQuestionService datasetQuestionService; + @Resource + private AsyncDataSetService dataSetService; //大模型平台创建调优任务 @Async public void createTuning(FineTuningTaskDO fineTuningTask) { try { + ServerNameDO serverNameDO = serverNameMapper.selectById(fineTuningTask.getGpuType()); + AigcFineTuningCreateReqVO req = getAigcFineTuningCreateReqVO(fineTuningTask); BaseModelDO baseModelDO = baseModelMapper.selectById(fineTuningTask.getBaseModelId()); Long datasetId = fineTuningTask.getDataset(); @@ -52,23 +63,26 @@ public class AsyncFineTuningTaskService { if (baseModelDO != null){ req.setModel(baseModelDO.getAigcModelName()); } + + List datasetQuestionList = datasetQuestionService.getDatasetQuestionList(dataset.getId()); + DatasetDO datasetDO = BeanUtils.toBean(dataset, DatasetDO.class); + String fileUrl = dataSetService.JsonFileWriteFine(serverNameDO.getHost(), datasetDO, datasetQuestionList); req.setDataset(StringUtils.isNotBlank(dataset.getDatasetFileUrl())?dataset.getDatasetFileUrl():"storage/json/2025/01/_xVKpfDH8.json"); - req.setDataset(dataset.getFileUrl()); + req.setDataset(fileUrl); req.setSuffix("T-"+String.valueOf(fineTuningTask.getId())); FineTuningTaskDO updateObj = new FineTuningTaskDO(); - ServerNameDO serverNameDO = serverNameMapper.selectById(fineTuningTask.getGpuType()); if (serverNameDO == null){ updateObj.setStatus(FinetuningTaskStatusEnum.CANCELLED.getStatus()); } AigcFineTuningCreateRespVO resp = trainHttpService.finetuningCreate(new HashMap<>(),serverNameDO.getHost(), req); updateObj.setId(fineTuningTask.getId()); - if (resp != null) { + if (resp != null && resp.getId()!=0) { updateObj.setJobId(resp.getJobId()); updateObj.setStatus(FinetuningTaskStatusEnum.WAITING.getStatus()); updateObj.setJobModelName(resp.getFineTunedModel()); updateObj.setTrainLog(resp.getTrainLog()); } else { - updateObj.setStatus(FinetuningTaskStatusEnum.CANCELLED.getStatus()); + updateObj.setStatus(FinetuningTaskStatusEnum.FAILED.getStatus()); } fineTuningTaskMapper.updateById(updateObj); } catch (Exception e){ diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/TrainHttpService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/TrainHttpService.java index e4a7b0121..917486d9f 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/TrainHttpService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/TrainHttpService.java @@ -295,13 +295,13 @@ public class TrainHttpService { * @throws UnirestException * @throws IOException */ - public AigcDatasetFileRespV0 AigcUploadFile(Map headers, InputStream inputStream, String fileName) throws UnirestException, IOException { + public AigcDatasetFileRespV0 AigcUploadFile(Map headers, String url,InputStream inputStream, String fileName) throws UnirestException, IOException { String aigcFileUpload = llmBackendProperties.getAigcFileUpload(); login(headers); headers.remove("Content-Type"); try { // 上传文件 - HttpResponse uploadResponse = Unirest.post(aigcFileUpload) + HttpResponse uploadResponse = Unirest.post(url+aigcFileUpload) .headers(headers) .field("file", inputStream, fileName) .field("purpose", "fine-tune") diff --git a/yudao-server/src/main/resources/application-dev.yaml b/yudao-server/src/main/resources/application-dev.yaml index 4f79adf03..57638deb5 100644 --- a/yudao-server/src/main/resources/application-dev.yaml +++ b/yudao-server/src/main/resources/application-dev.yaml @@ -256,10 +256,11 @@ llm: model_create: http://36.103.199.104:9000/api/models # aigc模型推理 aigc_model_completions: http://36.103.199.104:9000/api/channels/chat/completions - # 微调文件上传 - aigc_file_upload: http://36.103.199.104:9000/api/files + #################### 5123: 微调任务、模型部署、文件管理、提示词优化、自动评估、文生图等API。 ################### + # 微调文件上传 + aigc_file_upload: /api/files # 创建微调任务 POST finetuning_create: /llm/finetuning # 日志获取 diff --git a/yudao-server/src/main/resources/application-local.yaml b/yudao-server/src/main/resources/application-local.yaml index 274b72584..5043024f4 100644 --- a/yudao-server/src/main/resources/application-local.yaml +++ b/yudao-server/src/main/resources/application-local.yaml @@ -246,7 +246,8 @@ justauth: --- #################### 大模型训练相关配置 ################### llm: backend: - #### RAG服务 + #################### 8123: RAG服务、训练集和标注相关API。 ################### + ### RAG服务 #RAG健康检查 GET rag_health: http://36.103.199.104:8123/health #上传并向量化 POST @@ -261,8 +262,12 @@ llm: rag_query: http://36.103.199.104:8123/query #支持多个文件id查询向量 GET rag_query_multiple: http://36.103.199.104:8123/query_multiple + # 知识库向量嵌入 + embed: http://36.103.199.104:8123/embed + # 知识库查询 + embed_query: http://36.103.199.104:8123/query - #### LLM train and service api + #### LLM train and service api 训练集、标注相关API # 训练集列表 GET dataset_list: http://localhost:8123/api/mgr/datasets/list # 上传训练集 POST @@ -275,6 +280,8 @@ llm: annotation_task: http://localhost:8123/api/mgr/annotation/task # 保存标注 POST annotation_task_save: http://localhost:8123/api/mgr/annotation/task/task-6025001b-692c-44a1-9bc7-2a34bd7c0efe/segment/das-2eedd7bf-3770-4816-a961-b30c446b7a4f/mark + + #################### 9000: 大模型管理、微调任务、文件上传和模型部署相关API。 ################### # 大模型列表 GET models_list: http://36.103.199.104:9000/api/models # 登录 POST @@ -282,10 +289,6 @@ llm: account: http://36.103.199.104:9000/api/auth/account login_username: admin login_password: admin - # 创建微调任务 POST - finetuning_create: http://36.103.199.104:5123/llm/finetuning - # 日志获取 - finetuning_log: http://36.103.199.104:5123/llm/get_log # 微调任务详情 GET finetuning_detail: http://36.103.199.104:9000/api/finetuning # 微调任务取消 @@ -294,41 +297,47 @@ llm: finetuning_file_list: http://36.103.199.104:9000/api/files?purpose=fine-tune # 模型部署 model_create: http://36.103.199.104:9000/api/models + # aigc模型推理 + aigc_model_completions: http://36.103.199.104:9000/api/channels/chat/completions + + + #################### 5123: 微调任务、模型部署、文件管理、提示词优化、自动评估、文生图等API。 ################### + # 创建微调任务 POST + # 微调文件上传 + aigc_file_upload: /api/files + finetuning_create: /llm/finetuning + # 日志获取 + finetuning_log: /llm/get_log # 开始部署 - model_deploy: http://36.103.199.104:5123/llm/deploy + model_deploy: /llm/deploy # 取消部署 - model_undeploy: http://36.103.199.104:5123/llm/deploy/stop?deploy_id= + model_undeploy: /llm/deploy/stop?deploy_id= + # aigc表数据查询接口 + table_data_query: /table/%s + # 模型文件列表 + model_file_list: /models/?path= + # 模型文件下载 + model_file_download: /models/download/?file_path= + # 提示词优化 + optimize_prompt: /optimize-prompt + # 自动评估 + auto_evaluation: /llm-eval + # 文生图 + text_to_image: /generate-image + # 检查点文件列表 + check_file_list: /llm/finetuning/checkpoints?model_name= + + #################### 30000: 大模型对话相关API。 ################### #### 大模型对话 # 模型列表 GET base_model_list: http://36.103.199.104:30000/model/v1/models # 模型对话 POST model_completions: http://36.103.199.104:30000/v1/chat/completions - # aigc表数据查询接口 - table_data_query: http://36.103.199.104:5123/table/%s - # aigc模型推理 - aigc_model_completions: http://36.103.199.104:9000/api/channels/chat/completions - # 微调文件上传 - aigc_file_upload: http://36.103.199.104:9000/api/files + #################### 48080: 应用和管理服务相关API。 ################### application_api: http://localhost:48080/admin-api/llm/application/api/apiKey/chat - model_service_api : http://localhost:48080/admin-api/llm/model-service/api/apiKey/chat - - model_file_list: http://36.103.199.104:5123/models/?path= - - model_file_download: http://36.103.199.104:5123/models/download/?file_path= - # 提示词优化 - optimize_prompt: http://36.103.199.104:5123/optimize-prompt - - auto_evaluation: http://36.103.199.104:5123/llm-eval - # 文生图 - text_to_image: http://36.103.199.104:5123/generate-image - # 知识库向量嵌入 - embed: http://36.103.199.104:8123/embed - # 知识库查询 - embed_query: http://36.103.199.104:8123/query - - check_file_list: http://36.103.199.104:5123/llm/finetuning/checkpoints?model_name= + model_service_api: http://localhost:48080/admin-api/llm/model-service/api/apiKey/chat --- #################### iot相关配置 TODO 芋艿:再瞅瞅 ####################