refactor(yudao-module-llm): 模型服务启动逻辑
- 添加日志记录,提高可追踪性
This commit is contained in:
parent
84562ba94d
commit
acc34b7a1a
@ -9,6 +9,7 @@ import cn.iocoder.yudao.module.llm.dal.mysql.modelservice.ModelServiceMapper;
|
||||
import cn.iocoder.yudao.module.llm.dal.mysql.servername.ServerNameMapper;
|
||||
import cn.iocoder.yudao.module.llm.service.http.TrainHttpService;
|
||||
import cn.iocoder.yudao.module.llm.service.http.vo.*;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
@ -80,29 +81,90 @@ public class AsyncModelServiceService {
|
||||
//模型服务开启
|
||||
@Async
|
||||
public void startModelService(ModelServiceDO updateObj) {
|
||||
// try {
|
||||
// ModelServiceDO modelServiceDO = modelServiceMapper.selectById(updateObj.getId());
|
||||
// ServerNameDO serverNameDO = serverNameMapper.selectById(modelServiceDO.getGpuType());
|
||||
// if (modelServiceDO.getJobId() != null){
|
||||
// AigcModelDeployVO modelDeployVO = trainHttpService.modelUndeploy(new HashMap<>(),serverNameDO.getHost(), modelServiceDO.getJobId());
|
||||
// }
|
||||
// AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(modelServiceDO.getBaseModelName(),
|
||||
// "gpu");
|
||||
// ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(),serverNameDO.getHost(), aigcModelDeploySaveReq);
|
||||
// log.info("modelDeploy info {}",modelDeployRespVO);
|
||||
// if (modelDeployRespVO.getMessage().equals("error")){
|
||||
// updateObj.setStatus(3);
|
||||
// }else {
|
||||
// updateObj.setStatus(1);
|
||||
// updateObj.setJobId(modelDeployRespVO.getId());
|
||||
// updateObj.setModelUrl(modelDeployRespVO.getPort()+DEFAULT_MODEL_URL_SUFFIX);
|
||||
// }
|
||||
//
|
||||
// modelServiceMapper.updateById(updateObj);
|
||||
// }catch(Exception e){
|
||||
//// updateObj.setStatus(3);
|
||||
// modelServiceMapper.updateById(updateObj);
|
||||
// };
|
||||
try {
|
||||
log.info("开始启动模型服务,服务ID: {}", updateObj.getId());
|
||||
|
||||
// 查询模型服务信息
|
||||
log.info("正在查询模型服务信息,服务ID: {}", updateObj.getId());
|
||||
ModelServiceDO modelServiceDO = modelServiceMapper.selectById(updateObj.getId());
|
||||
ServerNameDO serverNameDO = serverNameMapper.selectById(modelServiceDO.getGpuType());
|
||||
if (modelServiceDO.getJobId() != null){
|
||||
AigcModelDeployVO modelDeployVO = trainHttpService.modelUndeploy(new HashMap<>(),serverNameDO.getHost(), modelServiceDO.getJobId());
|
||||
if (modelServiceDO == null) {
|
||||
log.error("未找到模型服务信息,服务ID: {}", updateObj.getId());
|
||||
throw new RuntimeException("模型服务信息不存在");
|
||||
}
|
||||
AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(modelServiceDO.getBaseModelName(),
|
||||
"gpu");
|
||||
ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(),serverNameDO.getHost(), aigcModelDeploySaveReq);
|
||||
log.info("modelDeploy info {}",modelDeployRespVO);
|
||||
if (modelDeployRespVO.getMessage().equals("error")){
|
||||
updateObj.setStatus(3);
|
||||
}else {
|
||||
updateObj.setStatus(1);
|
||||
updateObj.setJobId(modelDeployRespVO.getId());
|
||||
updateObj.setModelUrl(modelDeployRespVO.getPort()+DEFAULT_MODEL_URL_SUFFIX);
|
||||
log.info("模型服务信息查询成功。服务名称: {}", modelServiceDO.getBaseModelName());
|
||||
|
||||
// 查询 GPU 服务器信息
|
||||
log.info("正在查询 GPU 服务器信息,GPU 类型: {}", modelServiceDO.getGpuType());
|
||||
ServerNameDO serverNameDO = serverNameMapper.selectById(modelServiceDO.getGpuType());
|
||||
if (serverNameDO == null) {
|
||||
log.error("未找到 GPU 服务器信息,GPU 类型: {}", modelServiceDO.getGpuType());
|
||||
throw new RuntimeException("GPU 服务器信息不存在");
|
||||
}
|
||||
log.info("GPU 服务器信息查询成功。主机地址: {}", serverNameDO.getHost());
|
||||
|
||||
// 如果已有任务 ID,则先卸载模型
|
||||
if (modelServiceDO.getJobId() != null) {
|
||||
log.info("检测到已有任务 ID,正在卸载模型,任务ID: {}", modelServiceDO.getJobId());
|
||||
AigcModelDeployVO modelDeployVO = trainHttpService.modelUndeploy(new HashMap<>(), serverNameDO.getHost(), modelServiceDO.getJobId());
|
||||
log.info("模型卸载完成。卸载结果: {}", JSON.toJSONString(modelDeployVO));
|
||||
}
|
||||
|
||||
// 构建模型部署请求
|
||||
log.debug("正在构建模型部署请求...");
|
||||
AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(
|
||||
modelServiceDO.getBaseModelName(), "gpu");
|
||||
log.debug("模型部署请求参数: {}", JSON.toJSONString(aigcModelDeploySaveReq));
|
||||
|
||||
// 发起模型部署请求
|
||||
log.info("正在发起模型部署请求...");
|
||||
ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(), serverNameDO.getHost(), aigcModelDeploySaveReq);
|
||||
log.info("模型部署请求完成。部署结果: {}", JSON.toJSONString(modelDeployRespVO));
|
||||
|
||||
// 更新模型服务状态
|
||||
if ("error".equals(modelDeployRespVO.getMessage())) {
|
||||
log.error("模型部署失败。服务ID: {}", updateObj.getId());
|
||||
updateObj.setStatus(3);
|
||||
} else {
|
||||
log.info("模型部署成功。服务ID: {}", updateObj.getId());
|
||||
updateObj.setStatus(1);
|
||||
updateObj.setJobId(modelDeployRespVO.getId());
|
||||
updateObj.setModelUrl(modelDeployRespVO.getPort() + DEFAULT_MODEL_URL_SUFFIX);
|
||||
}
|
||||
|
||||
// 更新数据库
|
||||
log.info("正在更新数据库中的模型服务状态...");
|
||||
modelServiceMapper.updateById(updateObj);
|
||||
}catch(Exception e){
|
||||
// updateObj.setStatus(3);
|
||||
log.info("数据库更新完成。服务ID: {}", updateObj.getId());
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("启动模型服务时发生异常。服务ID: {}", updateObj.getId(), e);
|
||||
updateObj.setStatus(3);
|
||||
modelServiceMapper.updateById(updateObj);
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//调型服务关闭
|
||||
|
Loading…
x
Reference in New Issue
Block a user