修改模型调优 数据集 deepseek模型

This commit is contained in:
limin 2025-02-19 15:28:17 +08:00
parent 956742141b
commit 3be18ff084
5 changed files with 107 additions and 40 deletions

View File

@ -54,7 +54,7 @@ public class AsyncDataSetService {
sb.append(json).append("\n"); // 每个 JSON 对象后换行
}
InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes());
AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(), inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json");
AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(),"http://36.103.199.104:5123", inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json");
if (aigcDatasetFileRespV0 != null){
datasetMapper.setJobid(datasetDO.getId(),aigcDatasetFileRespV0.getFileId());
@ -72,4 +72,47 @@ public class AsyncDataSetService {
log.error("[JsonFileWrite][写入文件失败] {}", e.getMessage());
}
}
public String JsonFileWriteFine(String hostUrl,DatasetDO datasetDO,List<DatasetQuestionRespVO> datasetQuestionList) {
List<AigcDatasetVo> aigcDatasetVoList = new ArrayList<>();
datasetQuestionList.forEach(dataSource -> {
AigcDatasetVo aigcDatasetVo = new AigcDatasetVo();
aigcDatasetVo.setInstruction(StringUtils.isNotBlank(dataSource.getSystem())?dataSource.getSystem():"");
aigcDatasetVo.setInput(StringUtils.isNotBlank(dataSource.getQuestion())?dataSource.getQuestion():"");
if (!CollectionUtils.isAnyEmpty(dataSource.getDatasetAnswerRespVO())){
aigcDatasetVo.setOutput(StringUtils.isNotBlank(dataSource.getDatasetAnswerRespVO().get(0).getAnswer())?dataSource.getDatasetAnswerRespVO().get(0).getAnswer():"");
}else {
aigcDatasetVo.setOutput("");
}
aigcDatasetVoList.add(aigcDatasetVo);
});
ObjectMapper mapper = new ObjectMapper();
StringBuilder sb = new StringBuilder();
try {
for (AigcDatasetVo aigcDatasetVo : aigcDatasetVoList) {
String json = mapper.writeValueAsString(aigcDatasetVo);
sb.append(json).append("\n"); // 每个 JSON 对象后换行
}
InputStream inputStream = new ByteArrayInputStream(sb.toString().getBytes());
AigcDatasetFileRespV0 aigcDatasetFileRespV0 = trainHttpService.AigcUploadFile(new HashMap<>(),hostUrl, inputStream, datasetDO.getDatasetName() + "new"+datasetDO.getId() + ".json");
if (aigcDatasetFileRespV0 != null){
datasetMapper.setJobid(datasetDO.getId(),aigcDatasetFileRespV0.getFileId());
String s3Url = aigcDatasetFileRespV0.getS3Url();
int lastIndex = s3Url.lastIndexOf("/storage");
//todo 1111
String url = s3Url.substring(lastIndex + 1);
datasetMapper.setUrl(datasetDO.getId(),url);
log.info("[JsonFileWrite][写入文件成功]");
return url;
}
} catch (IOException e) {
// 记录异常信息
log.error("[JsonFileWrite][写入文件失败] {}", e.getMessage());
}
return "";
}
}

View File

@ -1,13 +1,17 @@
package cn.iocoder.yudao.module.llm.service.async;
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetQuestionRespVO;
import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetRespVO;
import cn.iocoder.yudao.module.llm.dal.dataobject.basemodel.BaseModelDO;
import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetDO;
import cn.iocoder.yudao.module.llm.dal.dataobject.finetuningtask.FineTuningTaskDO;
import cn.iocoder.yudao.module.llm.dal.dataobject.servername.ServerNameDO;
import cn.iocoder.yudao.module.llm.dal.mysql.basemodel.BaseModelMapper;
import cn.iocoder.yudao.module.llm.dal.mysql.finetuningtask.FineTuningTaskMapper;
import cn.iocoder.yudao.module.llm.dal.mysql.servername.ServerNameMapper;
import cn.iocoder.yudao.module.llm.enums.FinetuningTaskStatusEnum;
import cn.iocoder.yudao.module.llm.service.dataset.DatasetQuestionService;
import cn.iocoder.yudao.module.llm.service.dataset.DatasetService;
import cn.iocoder.yudao.module.llm.service.http.TrainHttpService;
import cn.iocoder.yudao.module.llm.service.http.vo.AigcFineTuningCreateReqVO;
@ -20,6 +24,7 @@ import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.util.HashMap;
import java.util.List;
@Service
@Slf4j
@ -39,11 +44,17 @@ public class AsyncFineTuningTaskService {
@Resource
private ServerNameMapper serverNameMapper;
@Resource
private DatasetQuestionService datasetQuestionService;
@Resource
private AsyncDataSetService dataSetService;
//大模型平台创建调优任务
@Async
public void createTuning(FineTuningTaskDO fineTuningTask) {
try {
ServerNameDO serverNameDO = serverNameMapper.selectById(fineTuningTask.getGpuType());
AigcFineTuningCreateReqVO req = getAigcFineTuningCreateReqVO(fineTuningTask);
BaseModelDO baseModelDO = baseModelMapper.selectById(fineTuningTask.getBaseModelId());
Long datasetId = fineTuningTask.getDataset();
@ -52,23 +63,26 @@ public class AsyncFineTuningTaskService {
if (baseModelDO != null){
req.setModel(baseModelDO.getAigcModelName());
}
List<DatasetQuestionRespVO> datasetQuestionList = datasetQuestionService.getDatasetQuestionList(dataset.getId());
DatasetDO datasetDO = BeanUtils.toBean(dataset, DatasetDO.class);
String fileUrl = dataSetService.JsonFileWriteFine(serverNameDO.getHost(), datasetDO, datasetQuestionList);
req.setDataset(StringUtils.isNotBlank(dataset.getDatasetFileUrl())?dataset.getDatasetFileUrl():"storage/json/2025/01/_xVKpfDH8.json");
req.setDataset(dataset.getFileUrl());
req.setDataset(fileUrl);
req.setSuffix("T-"+String.valueOf(fineTuningTask.getId()));
FineTuningTaskDO updateObj = new FineTuningTaskDO();
ServerNameDO serverNameDO = serverNameMapper.selectById(fineTuningTask.getGpuType());
if (serverNameDO == null){
updateObj.setStatus(FinetuningTaskStatusEnum.CANCELLED.getStatus());
}
AigcFineTuningCreateRespVO resp = trainHttpService.finetuningCreate(new HashMap<>(),serverNameDO.getHost(), req);
updateObj.setId(fineTuningTask.getId());
if (resp != null) {
if (resp != null && resp.getId()!=0) {
updateObj.setJobId(resp.getJobId());
updateObj.setStatus(FinetuningTaskStatusEnum.WAITING.getStatus());
updateObj.setJobModelName(resp.getFineTunedModel());
updateObj.setTrainLog(resp.getTrainLog());
} else {
updateObj.setStatus(FinetuningTaskStatusEnum.CANCELLED.getStatus());
updateObj.setStatus(FinetuningTaskStatusEnum.FAILED.getStatus());
}
fineTuningTaskMapper.updateById(updateObj);
} catch (Exception e){

View File

@ -295,13 +295,13 @@ public class TrainHttpService {
* @throws UnirestException
* @throws IOException
*/
public AigcDatasetFileRespV0 AigcUploadFile(Map<String, String> headers, InputStream inputStream, String fileName) throws UnirestException, IOException {
public AigcDatasetFileRespV0 AigcUploadFile(Map<String, String> headers, String url,InputStream inputStream, String fileName) throws UnirestException, IOException {
String aigcFileUpload = llmBackendProperties.getAigcFileUpload();
login(headers);
headers.remove("Content-Type");
try {
// 上传文件
HttpResponse<String> uploadResponse = Unirest.post(aigcFileUpload)
HttpResponse<String> uploadResponse = Unirest.post(url+aigcFileUpload)
.headers(headers)
.field("file", inputStream, fileName)
.field("purpose", "fine-tune")

View File

@ -256,10 +256,11 @@ llm:
model_create: http://36.103.199.104:9000/api/models
# aigc模型推理
aigc_model_completions: http://36.103.199.104:9000/api/channels/chat/completions
# 微调文件上传
aigc_file_upload: http://36.103.199.104:9000/api/files
#################### 5123: 微调任务、模型部署、文件管理、提示词优化、自动评估、文生图等API。 ###################
# 微调文件上传
aigc_file_upload: /api/files
# 创建微调任务 POST
finetuning_create: /llm/finetuning
# 日志获取

View File

@ -246,7 +246,8 @@ justauth:
--- #################### 大模型训练相关配置 ###################
llm:
backend:
#### RAG服务
#################### 8123: RAG服务、训练集和标注相关API。 ###################
### RAG服务
#RAG健康检查 GET
rag_health: http://36.103.199.104:8123/health
#上传并向量化 POST
@ -261,8 +262,12 @@ llm:
rag_query: http://36.103.199.104:8123/query
#支持多个文件id查询向量 GET
rag_query_multiple: http://36.103.199.104:8123/query_multiple
# 知识库向量嵌入
embed: http://36.103.199.104:8123/embed
# 知识库查询
embed_query: http://36.103.199.104:8123/query
#### LLM train and service api
#### LLM train and service api 训练集、标注相关API
# 训练集列表 GET
dataset_list: http://localhost:8123/api/mgr/datasets/list
# 上传训练集 POST
@ -275,6 +280,8 @@ llm:
annotation_task: http://localhost:8123/api/mgr/annotation/task
# 保存标注 POST
annotation_task_save: http://localhost:8123/api/mgr/annotation/task/task-6025001b-692c-44a1-9bc7-2a34bd7c0efe/segment/das-2eedd7bf-3770-4816-a961-b30c446b7a4f/mark
#################### 9000: 大模型管理、微调任务、文件上传和模型部署相关API。 ###################
# 大模型列表 GET
models_list: http://36.103.199.104:9000/api/models
# 登录 POST
@ -282,10 +289,6 @@ llm:
account: http://36.103.199.104:9000/api/auth/account
login_username: admin
login_password: admin
# 创建微调任务 POST
finetuning_create: http://36.103.199.104:5123/llm/finetuning
# 日志获取
finetuning_log: http://36.103.199.104:5123/llm/get_log
# 微调任务详情 GET
finetuning_detail: http://36.103.199.104:9000/api/finetuning
# 微调任务取消
@ -294,41 +297,47 @@ llm:
finetuning_file_list: http://36.103.199.104:9000/api/files?purpose=fine-tune
# 模型部署
model_create: http://36.103.199.104:9000/api/models
# aigc模型推理
aigc_model_completions: http://36.103.199.104:9000/api/channels/chat/completions
#################### 5123: 微调任务、模型部署、文件管理、提示词优化、自动评估、文生图等API。 ###################
# 创建微调任务 POST
# 微调文件上传
aigc_file_upload: /api/files
finetuning_create: /llm/finetuning
# 日志获取
finetuning_log: /llm/get_log
# 开始部署
model_deploy: http://36.103.199.104:5123/llm/deploy
model_deploy: /llm/deploy
# 取消部署
model_undeploy: http://36.103.199.104:5123/llm/deploy/stop?deploy_id=
model_undeploy: /llm/deploy/stop?deploy_id=
# aigc表数据查询接口
table_data_query: /table/%s
# 模型文件列表
model_file_list: /models/?path=
# 模型文件下载
model_file_download: /models/download/?file_path=
# 提示词优化
optimize_prompt: /optimize-prompt
# 自动评估
auto_evaluation: /llm-eval
# 文生图
text_to_image: /generate-image
# 检查点文件列表
check_file_list: /llm/finetuning/checkpoints?model_name=
#################### 30000: 大模型对话相关API。 ###################
#### 大模型对话
# 模型列表 GET
base_model_list: http://36.103.199.104:30000/model/v1/models
# 模型对话 POST
model_completions: http://36.103.199.104:30000/v1/chat/completions
# aigc表数据查询接口
table_data_query: http://36.103.199.104:5123/table/%s
# aigc模型推理
aigc_model_completions: http://36.103.199.104:9000/api/channels/chat/completions
# 微调文件上传
aigc_file_upload: http://36.103.199.104:9000/api/files
#################### 48080: 应用和管理服务相关API。 ###################
application_api: http://localhost:48080/admin-api/llm/application/api/apiKey/chat
model_service_api : http://localhost:48080/admin-api/llm/model-service/api/apiKey/chat
model_file_list: http://36.103.199.104:5123/models/?path=
model_file_download: http://36.103.199.104:5123/models/download/?file_path=
# 提示词优化
optimize_prompt: http://36.103.199.104:5123/optimize-prompt
auto_evaluation: http://36.103.199.104:5123/llm-eval
# 文生图
text_to_image: http://36.103.199.104:5123/generate-image
# 知识库向量嵌入
embed: http://36.103.199.104:8123/embed
# 知识库查询
embed_query: http://36.103.199.104:8123/query
check_file_list: http://36.103.199.104:5123/llm/finetuning/checkpoints?model_name=
model_service_api: http://localhost:48080/admin-api/llm/model-service/api/apiKey/chat
--- #################### iot相关配置 TODO 芋艿:再瞅瞅 ####################