diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/basemodel/BaseModelTaskService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/basemodel/BaseModelTaskService.java index 5dbec90fd..eca11d2d2 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/basemodel/BaseModelTaskService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/basemodel/BaseModelTaskService.java @@ -11,6 +11,7 @@ import cn.iocoder.yudao.module.llm.service.http.TrainHttpService; import cn.iocoder.yudao.module.llm.service.http.vo.AigcModelDeploySaveReq; import cn.iocoder.yudao.module.llm.service.http.vo.AigcModelDeployVO; import cn.iocoder.yudao.module.llm.service.http.vo.ModelDeployRespVO; +import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONObject; import com.esotericsoftware.minlog.Log; @@ -19,6 +20,7 @@ import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.module.SimpleModule; import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +import lombok.extern.slf4j.Slf4j; import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Component; @@ -29,6 +31,7 @@ import java.util.List; import java.util.stream.Collectors; @Component +@Slf4j public class BaseModelTaskService { @Resource @@ -44,44 +47,90 @@ public class BaseModelTaskService { // 减少维护 先注释掉 @Scheduled(cron ="0 0/1 * * * ?") public void synchronous() throws JsonProcessingException { - List baseModelList = baseModelService.getBaseModelList(); - for (BaseModelDO baseModelDO : baseModelList) { - Long modelId = baseModelDO.getModelId(); - Long gpuId = baseModelDO.getGpuId(); - ServerNameDO serverNameDO1 = serverNameMapper.selectById(gpuId); - String query = "?filter={\"id\":" + modelId + "}"; - String res = trainHttpService.modelTableQuery(new HashMap<>(), serverNameDO1.getHost(),"model_deploy", query); + try { + log.info("开始同步基础模型信息..."); - ObjectMapper mapper = new ObjectMapper(); - mapper.registerModule(new JavaTimeModule()); - SimpleModule module = new SimpleModule(); - module.addDeserializer(LocalDateTime.class, new AigcCustomDateTimeDeserializer()); - mapper.registerModule(module); - // 获取gpuhost主机 - ServerNameDO serverNameDO = serverNameMapper.selectById(baseModelDO.getGpuId()); - List aigcModelDeployVOS = mapper.readValue(res,new TypeReference>() {}); - if (!aigcModelDeployVOS.isEmpty()) { - AigcModelDeployVO latestRecord = aigcModelDeployVOS.get(0); - String status = latestRecord.getStatus(); - if(status.equals("stop")){ + // 获取所有基础模型列表 + log.debug("正在查询所有基础模型列表..."); + List baseModelList = baseModelService.getBaseModelList(); + log.info("成功查询到 {} 个基础模型。", baseModelList.size()); - AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(baseModelDO.getAigcModelName(), - "gpu"); - ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(),serverNameDO.getHost(), aigcModelDeploySaveReq); - if (!modelDeployRespVO.getMessage().equals("error")) { - BaseModelSaveReqVO baseModelSaveReqVO = new BaseModelSaveReqVO(); - baseModelSaveReqVO.setId(baseModelDO.getId()); - baseModelSaveReqVO.setModelId(modelDeployRespVO.getId()); - baseModelSaveReqVO.setChatUrl(modelDeployRespVO.getPort() + DEFAULT_MODEL_URL_SUFFIX); - baseModelService.updateBaseModel(new BaseModelSaveReqVO()); + // 遍历每个基础模型 + for (BaseModelDO baseModelDO : baseModelList) { + Long modelId = baseModelDO.getModelId(); + Long gpuId = baseModelDO.getGpuId(); + log.debug("正在处理基础模型,模型ID: {}, GPU ID: {}", modelId, gpuId); + + // 查询 GPU 服务器信息 + log.debug("正在查询 GPU 服务器信息,GPU ID: {}", gpuId); + ServerNameDO serverName = serverNameMapper.selectById(gpuId); + if (serverName == null) { + log.error("未找到 GPU 服务器信息,GPU ID: {}", gpuId); + continue; + } + log.debug("GPU 服务器信息查询成功。主机地址: {}", serverName.getHost()); + + // 构建查询参数并查询模型部署信息 + String query = "?filter={\"id\":" + modelId + "}"; + log.debug("正在查询模型部署信息,查询参数: {}", query); + String res = trainHttpService.modelTableQuery(new HashMap<>(), serverName.getHost(), "model_deploy", query); + log.debug("模型部署信息查询成功。响应内容: {}", res); + + // 解析响应内容 + log.debug("正在解析模型部署信息..."); + ObjectMapper mapper = new ObjectMapper(); + mapper.registerModule(new JavaTimeModule()); + SimpleModule module = new SimpleModule(); + module.addDeserializer(LocalDateTime.class, new AigcCustomDateTimeDeserializer()); + mapper.registerModule(module); + List aigcModelDeploys = mapper.readValue(res, new TypeReference>() {}); + log.debug("模型部署信息解析完成。记录数量: {}", aigcModelDeploys.size()); + + if (!aigcModelDeploys.isEmpty()) { + AigcModelDeployVO latestRecord = aigcModelDeploys.get(0); + String status = latestRecord.getStatus(); + log.debug("最新模型部署记录状态: {}", status); + + // 如果模型状态为 "stop",则重新部署 + if ("stop".equals(status)) { + log.info("模型状态为 'stop',正在重新部署模型..."); + + // 构建模型部署请求 + AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq( + baseModelDO.getAigcModelName(), "gpu"); + log.debug("模型部署请求参数: {}", JSON.toJSONString(aigcModelDeploySaveReq)); + + // 发起模型部署请求 + ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy( + new HashMap<>(), serverName.getHost(), aigcModelDeploySaveReq); + log.debug("模型部署请求完成。响应内容: {}", JSON.toJSONString(modelDeployRespVO)); + + // 更新基础模型信息 + if (!"error".equals(modelDeployRespVO.getMessage())) { + log.info("模型部署成功。正在更新基础模型信息..."); + BaseModelSaveReqVO baseModelSaveReqVO = new BaseModelSaveReqVO(); + baseModelSaveReqVO.setId(baseModelDO.getId()); + baseModelSaveReqVO.setModelId(modelDeployRespVO.getId()); + baseModelSaveReqVO.setChatUrl(modelDeployRespVO.getPort() + DEFAULT_MODEL_URL_SUFFIX); + baseModelService.updateBaseModel(baseModelSaveReqVO); + log.info("基础模型信息更新完成。模型ID: {}", baseModelDO.getId()); + } else { + log.error("模型部署失败。模型ID: {}", baseModelDO.getId()); + } } + } else { + log.warn("未找到模型部署记录。模型ID: {}", modelId); } } + + log.info("基础模型信息同步完成。"); + } catch (Exception e) { + log.error("同步基础模型信息时发生异常。", e); + throw e; } } - // @Scheduled(cron ="0 0/1 * * * ?") public void updateBaseModel() { Log.info("定时任务启动"); diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/finetuningtask/FineTuningTaskServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/finetuningtask/FineTuningTaskServiceImpl.java index 8afabb096..976396488 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/finetuningtask/FineTuningTaskServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/finetuningtask/FineTuningTaskServiceImpl.java @@ -263,6 +263,10 @@ public class FineTuningTaskServiceImpl implements FineTuningTaskService { .map(FineTuningTaskDO::getGpuType) .filter(Objects::nonNull) .collect(Collectors.toList()); + + if (CollectionUtils.isEmpty(gpuTypeIds)){ + return respVOS; + } List serverNameDOS = serverNameMapper.selectList(new LambdaQueryWrapper() .in(ServerNameDO::getId, gpuTypeIds)); Map longServerNameDOMap = cn.iocoder.yudao.framework.common.util.collection.CollectionUtils diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/TrainHttpService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/TrainHttpService.java index 614f5e0c2..9c014e423 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/TrainHttpService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/TrainHttpService.java @@ -157,8 +157,8 @@ public class TrainHttpService { try { // 记录请求信息 log.info("开始创建微调任务,请求URL: {}", url + llmBackendProperties.getFinetuningCreate()); - log.debug("请求头: {}", headers); - log.debug("请求体: {}", JSON.toJSONString(req)); + log.info("请求头: {}", headers); + log.info("请求体: {}", JSON.toJSONString(req)); // 发起 HTTP 请求 log.debug("正在发起 HTTP POST 请求...");