refactor(yudao-module-llm): 模型服务启动逻辑

- 添加日志记录,提高可追踪性
This commit is contained in:
Liuyang 2025-02-26 15:57:12 +08:00
parent 84562ba94d
commit acc34b7a1a

View File

@ -9,6 +9,7 @@ import cn.iocoder.yudao.module.llm.dal.mysql.modelservice.ModelServiceMapper;
import cn.iocoder.yudao.module.llm.dal.mysql.servername.ServerNameMapper;
import cn.iocoder.yudao.module.llm.service.http.TrainHttpService;
import cn.iocoder.yudao.module.llm.service.http.vo.*;
import com.alibaba.fastjson.JSON;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Async;
@ -80,29 +81,90 @@ public class AsyncModelServiceService {
//模型服务开启
@Async
public void startModelService(ModelServiceDO updateObj) {
// try {
// ModelServiceDO modelServiceDO = modelServiceMapper.selectById(updateObj.getId());
// ServerNameDO serverNameDO = serverNameMapper.selectById(modelServiceDO.getGpuType());
// if (modelServiceDO.getJobId() != null){
// AigcModelDeployVO modelDeployVO = trainHttpService.modelUndeploy(new HashMap<>(),serverNameDO.getHost(), modelServiceDO.getJobId());
// }
// AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(modelServiceDO.getBaseModelName(),
// "gpu");
// ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(),serverNameDO.getHost(), aigcModelDeploySaveReq);
// log.info("modelDeploy info {}",modelDeployRespVO);
// if (modelDeployRespVO.getMessage().equals("error")){
// updateObj.setStatus(3);
// }else {
// updateObj.setStatus(1);
// updateObj.setJobId(modelDeployRespVO.getId());
// updateObj.setModelUrl(modelDeployRespVO.getPort()+DEFAULT_MODEL_URL_SUFFIX);
// }
//
// modelServiceMapper.updateById(updateObj);
// }catch(Exception e){
//// updateObj.setStatus(3);
// modelServiceMapper.updateById(updateObj);
// };
try {
log.info("开始启动模型服务服务ID: {}", updateObj.getId());
// 查询模型服务信息
log.info("正在查询模型服务信息服务ID: {}", updateObj.getId());
ModelServiceDO modelServiceDO = modelServiceMapper.selectById(updateObj.getId());
ServerNameDO serverNameDO = serverNameMapper.selectById(modelServiceDO.getGpuType());
if (modelServiceDO.getJobId() != null){
AigcModelDeployVO modelDeployVO = trainHttpService.modelUndeploy(new HashMap<>(),serverNameDO.getHost(), modelServiceDO.getJobId());
if (modelServiceDO == null) {
log.error("未找到模型服务信息服务ID: {}", updateObj.getId());
throw new RuntimeException("模型服务信息不存在");
}
AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(modelServiceDO.getBaseModelName(),
"gpu");
ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(),serverNameDO.getHost(), aigcModelDeploySaveReq);
log.info("modelDeploy info {}",modelDeployRespVO);
if (modelDeployRespVO.getMessage().equals("error")){
updateObj.setStatus(3);
}else {
updateObj.setStatus(1);
updateObj.setJobId(modelDeployRespVO.getId());
updateObj.setModelUrl(modelDeployRespVO.getPort()+DEFAULT_MODEL_URL_SUFFIX);
log.info("模型服务信息查询成功。服务名称: {}", modelServiceDO.getBaseModelName());
// 查询 GPU 服务器信息
log.info("正在查询 GPU 服务器信息GPU 类型: {}", modelServiceDO.getGpuType());
ServerNameDO serverNameDO = serverNameMapper.selectById(modelServiceDO.getGpuType());
if (serverNameDO == null) {
log.error("未找到 GPU 服务器信息GPU 类型: {}", modelServiceDO.getGpuType());
throw new RuntimeException("GPU 服务器信息不存在");
}
log.info("GPU 服务器信息查询成功。主机地址: {}", serverNameDO.getHost());
// 如果已有任务 ID则先卸载模型
if (modelServiceDO.getJobId() != null) {
log.info("检测到已有任务 ID正在卸载模型任务ID: {}", modelServiceDO.getJobId());
AigcModelDeployVO modelDeployVO = trainHttpService.modelUndeploy(new HashMap<>(), serverNameDO.getHost(), modelServiceDO.getJobId());
log.info("模型卸载完成。卸载结果: {}", JSON.toJSONString(modelDeployVO));
}
// 构建模型部署请求
log.debug("正在构建模型部署请求...");
AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(
modelServiceDO.getBaseModelName(), "gpu");
log.debug("模型部署请求参数: {}", JSON.toJSONString(aigcModelDeploySaveReq));
// 发起模型部署请求
log.info("正在发起模型部署请求...");
ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(), serverNameDO.getHost(), aigcModelDeploySaveReq);
log.info("模型部署请求完成。部署结果: {}", JSON.toJSONString(modelDeployRespVO));
// 更新模型服务状态
if ("error".equals(modelDeployRespVO.getMessage())) {
log.error("模型部署失败。服务ID: {}", updateObj.getId());
updateObj.setStatus(3);
} else {
log.info("模型部署成功。服务ID: {}", updateObj.getId());
updateObj.setStatus(1);
updateObj.setJobId(modelDeployRespVO.getId());
updateObj.setModelUrl(modelDeployRespVO.getPort() + DEFAULT_MODEL_URL_SUFFIX);
}
// 更新数据库
log.info("正在更新数据库中的模型服务状态...");
modelServiceMapper.updateById(updateObj);
}catch(Exception e){
// updateObj.setStatus(3);
log.info("数据库更新完成。服务ID: {}", updateObj.getId());
} catch (Exception e) {
log.error("启动模型服务时发生异常。服务ID: {}", updateObj.getId(), e);
updateObj.setStatus(3);
modelServiceMapper.updateById(updateObj);
};
}
}
//调型服务关闭