模型部署 修改

This commit is contained in:
limin 2025-02-13 13:36:03 +08:00
parent 9287973dc8
commit 465694a3ff
13 changed files with 55 additions and 43 deletions

View File

@ -13,7 +13,7 @@ public class ModelServiceSaveReqVO {
private Long id;
@Schema(description = "服务名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "芋艿")
@NotEmpty(message = "服务名称不能为空")
// @NotEmpty(message = "服务名称不能为空")
private String serviceName;
@Schema(description = "微调任务ID")
@ -29,7 +29,7 @@ public class ModelServiceSaveReqVO {
private Integer gpuCount;
@Schema(description = "模型服务状态使用字典llm_model_status", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
@NotNull(message = "模型服务状态使用字典llm_model_status不能为空")
// @NotNull(message = "模型服务状态使用字典llm_model_status不能为空")
private Integer status;
private Long jobId;
@ -45,4 +45,10 @@ public class ModelServiceSaveReqVO {
*/
@Schema(description = "模型版本号", example = "1.0.0")
private String version;
/**
* 模型url
*/
@Schema(description = "模型url", example = "www.sdada.com")
private String modelUrl;
}

View File

@ -72,4 +72,6 @@ public class ModelServiceDO extends BaseDO {
* 模型版本号
*/
private String version;
private String modelUrl;
}

View File

@ -103,10 +103,12 @@ public class AsyncModelAccessManualService {
messages.add(message);
String modelName = "";
String modelUrl = "";
if(modelAssessTaskAuto.getModelType() == 0){
ModelServiceDO modelServiceDO = modelServiceMapper.selectById(modelAssessTaskAuto.getModelService());
if (modelServiceDO != null){
modelName = modelServiceDO.getBaseModelName();
modelUrl = modelServiceDO.getModelUrl();
}
}else{
BaseModelDO baseModelDO = baseModelMapper.selectById(modelAssessTaskAuto.getModelService());
@ -118,7 +120,7 @@ public class AsyncModelAccessManualService {
ModelCompletionsReqVO modelCompletionsReqVO = new ModelCompletionsReqVO();
modelCompletionsReqVO.setMessages(messages);
modelCompletionsReqVO.setModel(modelName);
ModelCompletionsRespVO modelCompletionsRespVO = modelService.modelCompletions(modelCompletionsReqVO);
ModelCompletionsRespVO modelCompletionsRespVO = modelService.modelCompletions(modelUrl,modelCompletionsReqVO);
String prompt = modelCompletionsRespVO.getAnswer();
String res = trainHttpService.autoEvaluation(prompt, datasetPrompt);
@ -199,7 +201,9 @@ public class AsyncModelAccessManualService {
try {
if (!CollectionUtils.isAnyEmpty(datasetQuestionList)){
String modelName;
String modelUrl;
if (task.getModelType()==1){
modelUrl = "";
BaseModelDO baseModelDO = baseModelMapper.selectById(task.getModelService());
if (baseModelDO != null){
modelName = baseModelDO.getAigcModelName();
@ -210,7 +214,9 @@ public class AsyncModelAccessManualService {
ModelServiceDO modelServiceDO = modelServiceMapper.selectById(task.getModelService());
if (modelServiceDO != null){
modelName = modelServiceDO.getBaseModelName();
modelUrl = modelServiceDO.getModelUrl();
} else {
modelUrl = "";
modelName = "";
}
}
@ -225,7 +231,7 @@ public class AsyncModelAccessManualService {
List<ModelCompletionsReqVO.ModelCompletionsMessage> messages = new ArrayList<>();
messages.add(message);
modelCompletionsReqVO.setMessages(messages);
ModelCompletionsRespVO modelCompletionsRespVO = modelService.modelCompletions(modelCompletionsReqVO);
ModelCompletionsRespVO modelCompletionsRespVO = modelService.modelCompletions(modelUrl,modelCompletionsReqVO);
if (modelCompletionsRespVO != null){
ManualModelAnswerDO manualModelAnswerDO = new ManualModelAnswerDO();
manualModelAnswerDO.setCreator(task.getCreator());

View File

@ -6,10 +6,7 @@ import cn.iocoder.yudao.module.llm.dal.mysql.basemodel.BaseModelMapper;
import cn.iocoder.yudao.module.llm.dal.mysql.finetuningtask.FineTuningTaskMapper;
import cn.iocoder.yudao.module.llm.dal.mysql.modelservice.ModelServiceMapper;
import cn.iocoder.yudao.module.llm.service.http.TrainHttpService;
import cn.iocoder.yudao.module.llm.service.http.vo.AIgcModelCreateSaveReq;
import cn.iocoder.yudao.module.llm.service.http.vo.AigcModelCreateRespVO;
import cn.iocoder.yudao.module.llm.service.http.vo.AigcModelDeploySaveReq;
import cn.iocoder.yudao.module.llm.service.http.vo.AigcRespVO;
import cn.iocoder.yudao.module.llm.service.http.vo.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Async;
@ -23,6 +20,7 @@ import java.util.HashMap;
public class AsyncModelServiceService {
private static final Logger log = LoggerFactory.getLogger(AsyncModelServiceService.class);
private static final String DEFAULT_MODEL_URL_SUFFIX = "/v1/chat/completions";
@Resource
private FineTuningTaskMapper fineTuningTaskMapper;
@ -104,15 +102,11 @@ public class AsyncModelServiceService {
*/
ModelServiceDO modelServiceDO = modelServiceMapper.selectById(updateObj.getId());
AigcModelDeploySaveReq aigcModelDeploySaveReq = new AigcModelDeploySaveReq(modelServiceDO.getBaseModelName(),
1,
updateObj.getGpuCount(),
"cpu",
"cpu-aigc-model",
0,
"",
"float16", 1);
AigcRespVO aigcRespVO = trainHttpService.modelDeploy(new HashMap<>(), aigcModelDeploySaveReq);
"gpu");
ModelDeployRespVO modelDeployRespVO = trainHttpService.modelDeploy(new HashMap<>(), aigcModelDeploySaveReq);
updateObj.setStatus(1);
updateObj.setJobId(modelDeployRespVO.getId());
updateObj.setModelUrl(modelDeployRespVO.getPort()+DEFAULT_MODEL_URL_SUFFIX);
modelServiceMapper.updateById(updateObj);
}catch(Exception e){
// updateObj.setStatus(3);
@ -126,7 +120,7 @@ public class AsyncModelServiceService {
try {
ModelServiceDO modelServiceDO = modelServiceMapper.selectById(updateObj.getId());
// Thread.sleep(30000);
AigcRespVO aigcRespVO = trainHttpService.modelUndeploy(new HashMap<>(), modelServiceDO.getBaseModelName());
AigcRespVO aigcRespVO = trainHttpService.modelUndeploy(new HashMap<>(), modelServiceDO.getJobId());
if (aigcRespVO.isSuccess()){
updateObj.setStatus(0);
}else {

View File

@ -163,6 +163,7 @@ public class ConversationServiceImpl implements ConversationService {
chatReqVO.setUuid(UUID.randomUUID().toString());
}
String model = null;
String selfModelUrl = "";
if (Objects.equals(1, chatReqVO.getModelType())) {
// 预制模型
BaseModelDO baseModelDO = baseModelService.getBaseModel(chatReqVO.getModelId());
@ -177,6 +178,7 @@ public class ConversationServiceImpl implements ConversationService {
throw exception(MODEL_SERVICE_NOT_EXISTS);
}
model = modelServiceDO.getBaseModelName();
selfModelUrl = modelServiceDO.getModelUrl();
}else {
throw exception(BASE_MODEL_NOT_EXISTS);
}
@ -251,7 +253,7 @@ public class ConversationServiceImpl implements ConversationService {
modelCompletionsReqVO.setMessages(messages);
// baseModel aigcModelName 为aigc中的模型名称
modelCompletionsReqVO.setModel(model);
ModelCompletionsRespVO modelCompletionsRespVO = modelService.modelCompletions(modelCompletionsReqVO);
ModelCompletionsRespVO modelCompletionsRespVO = modelService.modelCompletions(selfModelUrl,modelCompletionsReqVO);
if (modelCompletionsRespVO == null) {
throw exception(MODEL_COMPLETIONS_ERROR);
}

View File

@ -60,13 +60,20 @@ public class ModelService {
* @param req
* @return
*/
public ModelCompletionsRespVO modelCompletions(ModelCompletionsReqVO req) {
public ModelCompletionsRespVO modelCompletions(String url,ModelCompletionsReqVO req) {
if (StringUtils.isBlank(req.getModel())) {
req.setModel(DEFAULT_MODEL_ID);
}
log.info("url: {}", llmBackendProperties.getModelCompletions());
log.info("request: {}", req);
String result = HttpUtils.post(llmBackendProperties.getModelCompletions(), null, JSON.toJSONString(req));
String result;
if (StringUtils.isBlank(url)){
log.info("url: {}", llmBackendProperties.getModelCompletions());
result = HttpUtils.post(llmBackendProperties.getModelCompletions(), null, JSON.toJSONString(req));
}else {
log.info("url: {}", url);
result = HttpUtils.post(url, null, JSON.toJSONString(req));
}
log.info("response: {}", result);
if (StringUtils.isBlank(result)) {
return null;

View File

@ -238,18 +238,18 @@ public class TrainHttpService {
return res;
}
public AigcRespVO modelDeploy(Map<String, String> headers,AigcModelDeploySaveReq req){
public ModelDeployRespVO modelDeploy(Map<String, String> headers,AigcModelDeploySaveReq req){
login(headers);
String modelDeploy = llmBackendProperties.getModelDeploy();
String res = HttpUtils.post(modelDeploy, headers, JSON.toJSONString(req));
log.info(" modelDeploy:{}", res);
AigcRespVO aigcRespVO = JSON.parseObject(res, AigcRespVO.class);
return aigcRespVO;
ModelDeployRespVO modelDeployRespVO = JSON.parseObject(res.getBytes(), ModelDeployRespVO.class);
return modelDeployRespVO;
}
public AigcRespVO modelUndeploy(Map<String, String> headers, String baseModelName) {
public AigcRespVO modelUndeploy(Map<String, String> headers, Long deployId) {
login(headers);
String modelDeploy = llmBackendProperties.getModelUndeploy();
String res = HttpUtils.post(modelDeploy + baseModelName, headers,"");
String res = HttpUtils.post(modelDeploy + deployId, headers,"");
log.info(" modelDeploy:{}", res);
AigcRespVO aigcRespVO = JSON.parseObject(res, AigcRespVO.class);
log.info(" modelDeploy:{}", aigcRespVO);

View File

@ -20,15 +20,8 @@ public class AigcModelDeploySaveReq {
*/
private String model;
private Integer cpu;
private Integer gpu;
/**
* 类型cpu/gpu
*/
private String inferredType;
private String label;
private Integer maxGpuMemory;
private String modelWorker;
private String quantization;
private Integer replicas;
}

View File

@ -60,4 +60,6 @@ public class AigcModelDeployVO {
private String label;
private Integer cpu;
private Integer vllm;
private String host;
private String port;
}

View File

@ -62,7 +62,7 @@ public class ModelServiceTaskSyncService {
// 使用 TypeReference 解析 JSON 字符串为 List<String>
try {
String query = "?filter={\"deploy_name\":"+modelServiceDO.getBaseModelName()+"}";
String query = "?filter={\"id\":"+jobid+"}";
String res = trainHttpService.modelTableQuery(new HashMap<>(), "model_deploy",query);
log.info("获取 aigc model_deploy 表数据 info {}",res);
ObjectMapper mapper = new ObjectMapper();

View File

@ -254,15 +254,15 @@ llm:
# 开始部署
model_deploy: http://36.103.199.104:5123/llm/deploy
# 取消部署
model_undeploy: http://36.103.199.104:5123/llm/stop_task?model_name=
model_undeploy: http://36.103.199.104:5123/llm/deploy/stop?deploy_id=
# 微调文件上传
aigc_file_upload: http://36.103.199.104:9000/api/files
#
#### 大模型对话
# 基础模型列表 GET
base_model_list: http://36.103.199.104:9997/model/v1/models
base_model_list: http://36.103.199.104:30000/model/v1/models
# 模型对话 POST
model_completions: http://36.103.199.104:9997/v1/chat/completions
model_completions: http://36.103.199.104:30000/v1/chat/completions
# aigc表数据查询接口
table_data_query: http://36.103.199.104:5123/table/%s
# aigc模型推理

View File

@ -297,12 +297,12 @@ llm:
# 开始部署
model_deploy: http://36.103.199.104:5123/llm/deploy
# 取消部署
model_undeploy: http://36.103.199.104:5123/llm/stop_task?model_name=
model_undeploy: http://36.103.199.104:5123/llm/deploy/stop?deploy_id=
#### 大模型对话
# 模型列表 GET
base_model_list: http://36.103.199.104:9997/model/v1/models
base_model_list: http://36.103.199.104:30000/model/v1/models
# 模型对话 POST
model_completions: http://36.103.199.104:9997/v1/chat/completions
model_completions: http://36.103.199.104:30000/v1/chat/completions
# aigc表数据查询接口
table_data_query: http://36.103.199.104:5123/table/%s
# aigc模型推理

View File

@ -297,12 +297,12 @@ llm:
# 开始部署
model_deploy: http://36.103.199.104:5123/llm/deploy
# 取消部署
model_undeploy: http://36.103.199.104:5123/llm/stop_task?model_name=
model_undeploy: http://36.103.199.104:5123/llm/deploy/stop?deploy_id=
#### 大模型对话
# 模型列表 GET
base_model_list: http://36.103.199.104:9997/model/v1/models
base_model_list: http://36.103.199.104:30000/model/v1/models
# 模型对话 POST
model_completions: http://36.103.199.104:9997/v1/chat/completions
model_completions: http://36.103.199.104:30000/v1/chat/completions
# aigc表数据查询接口
table_data_query: http://36.133.1.230:5123/table/%s
# aigc模型推理