diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncModelServiceService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncModelServiceService.java index d561d87c8..cc76c78f6 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncModelServiceService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncModelServiceService.java @@ -9,6 +9,7 @@ import cn.iocoder.yudao.module.llm.dal.mysql.modelservice.ModelServiceMapper; import cn.iocoder.yudao.module.llm.dal.mysql.servername.ServerNameMapper; import cn.iocoder.yudao.module.llm.service.http.TrainHttpService; import cn.iocoder.yudao.module.llm.service.http.vo.*; +import com.alibaba.fastjson.JSON; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.scheduling.annotation.Async; @@ -80,29 +81,90 @@ public class AsyncModelServiceService { //模型服务开启 @Async public void startModelService(ModelServiceDO updateObj) { +// try { +// ModelServiceDO modelServiceDO = modelServiceMapper.selectById(updateObj.getId()); +// ServerNameDO serverNameDO = serverNameMapper.selectById(modelServiceDO.getGpuType()); +// if (modelServiceDO.getJobId() != null){ +// AigcModelDeployVO modelDeployVO = trainHttpService.modelUndeploy(new HashMap<>(),serverNameDO.getHost(), modelServiceDO.getJobId()); +// } +// AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(modelServiceDO.getBaseModelName(), +// "gpu"); +// ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(),serverNameDO.getHost(), aigcModelDeploySaveReq); +// log.info("modelDeploy info {}",modelDeployRespVO); +// if (modelDeployRespVO.getMessage().equals("error")){ +// updateObj.setStatus(3); +// }else { +// updateObj.setStatus(1); +// updateObj.setJobId(modelDeployRespVO.getId()); +// updateObj.setModelUrl(modelDeployRespVO.getPort()+DEFAULT_MODEL_URL_SUFFIX); +// } +// +// modelServiceMapper.updateById(updateObj); +// }catch(Exception e){ +//// updateObj.setStatus(3); +// modelServiceMapper.updateById(updateObj); +// }; try { + log.info("开始启动模型服务,服务ID: {}", updateObj.getId()); + + // 查询模型服务信息 + log.info("正在查询模型服务信息,服务ID: {}", updateObj.getId()); ModelServiceDO modelServiceDO = modelServiceMapper.selectById(updateObj.getId()); - ServerNameDO serverNameDO = serverNameMapper.selectById(modelServiceDO.getGpuType()); - if (modelServiceDO.getJobId() != null){ - AigcModelDeployVO modelDeployVO = trainHttpService.modelUndeploy(new HashMap<>(),serverNameDO.getHost(), modelServiceDO.getJobId()); + if (modelServiceDO == null) { + log.error("未找到模型服务信息,服务ID: {}", updateObj.getId()); + throw new RuntimeException("模型服务信息不存在"); } - AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(modelServiceDO.getBaseModelName(), - "gpu"); - ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(),serverNameDO.getHost(), aigcModelDeploySaveReq); - log.info("modelDeploy info {}",modelDeployRespVO); - if (modelDeployRespVO.getMessage().equals("error")){ - updateObj.setStatus(3); - }else { - updateObj.setStatus(1); - updateObj.setJobId(modelDeployRespVO.getId()); - updateObj.setModelUrl(modelDeployRespVO.getPort()+DEFAULT_MODEL_URL_SUFFIX); + log.info("模型服务信息查询成功。服务名称: {}", modelServiceDO.getBaseModelName()); + + // 查询 GPU 服务器信息 + log.info("正在查询 GPU 服务器信息,GPU 类型: {}", modelServiceDO.getGpuType()); + ServerNameDO serverNameDO = serverNameMapper.selectById(modelServiceDO.getGpuType()); + if (serverNameDO == null) { + log.error("未找到 GPU 服务器信息,GPU 类型: {}", modelServiceDO.getGpuType()); + throw new RuntimeException("GPU 服务器信息不存在"); + } + log.info("GPU 服务器信息查询成功。主机地址: {}", serverNameDO.getHost()); + + // 如果已有任务 ID,则先卸载模型 + if (modelServiceDO.getJobId() != null) { + log.info("检测到已有任务 ID,正在卸载模型,任务ID: {}", modelServiceDO.getJobId()); + AigcModelDeployVO modelDeployVO = trainHttpService.modelUndeploy(new HashMap<>(), serverNameDO.getHost(), modelServiceDO.getJobId()); + log.info("模型卸载完成。卸载结果: {}", JSON.toJSONString(modelDeployVO)); } + // 构建模型部署请求 + log.debug("正在构建模型部署请求..."); + AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq( + modelServiceDO.getBaseModelName(), "gpu"); + log.debug("模型部署请求参数: {}", JSON.toJSONString(aigcModelDeploySaveReq)); + + // 发起模型部署请求 + log.info("正在发起模型部署请求..."); + ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(), serverNameDO.getHost(), aigcModelDeploySaveReq); + log.info("模型部署请求完成。部署结果: {}", JSON.toJSONString(modelDeployRespVO)); + + // 更新模型服务状态 + if ("error".equals(modelDeployRespVO.getMessage())) { + log.error("模型部署失败。服务ID: {}", updateObj.getId()); + updateObj.setStatus(3); + } else { + log.info("模型部署成功。服务ID: {}", updateObj.getId()); + updateObj.setStatus(1); + updateObj.setJobId(modelDeployRespVO.getId()); + updateObj.setModelUrl(modelDeployRespVO.getPort() + DEFAULT_MODEL_URL_SUFFIX); + } + + // 更新数据库 + log.info("正在更新数据库中的模型服务状态..."); modelServiceMapper.updateById(updateObj); - }catch(Exception e){ -// updateObj.setStatus(3); + log.info("数据库更新完成。服务ID: {}", updateObj.getId()); + + } catch (Exception e) { + log.error("启动模型服务时发生异常。服务ID: {}", updateObj.getId(), e); + updateObj.setStatus(3); modelServiceMapper.updateById(updateObj); - }; + } + } //调型服务关闭