feat(llm): 优化知识库文档上传功能
- 新增知识库文档状态枚举,用于记录文档上传状态 - 在文档上传过程中更新状态,包括未上传、上传中、上传成功和上传失败 - 优化了 embedUploadFile 和 knowledgeEmbed 方法,增加了状态更新逻辑 - 新增 getKnowledgeDocuments 方法获取知识库文档对象
This commit is contained in:
parent
1f9603fc65
commit
aae2c064cb
@ -0,0 +1,66 @@
|
||||
package cn.iocoder.yudao.module.llm.enums;
|
||||
|
||||
/**
|
||||
* @Description 知识库文件上传状态
|
||||
*/
|
||||
public enum KnowledgeStatusEnum {
|
||||
|
||||
/**
|
||||
* 未上传
|
||||
*/
|
||||
NOT_UPLOADED(0, "未上传"),
|
||||
|
||||
/**
|
||||
* 上传中
|
||||
*/
|
||||
UPLOADING(1, "上传中"),
|
||||
|
||||
/**
|
||||
* 上传成功
|
||||
*/
|
||||
UPLOAD_SUCCESS(2, "上传成功"),
|
||||
|
||||
/**
|
||||
* 上传失败
|
||||
*/
|
||||
UPLOAD_FAILED(3, "上传失败");
|
||||
|
||||
/**
|
||||
* 状态码
|
||||
*/
|
||||
private final Integer status;
|
||||
|
||||
/**
|
||||
* 状态名称
|
||||
*/
|
||||
private final String name;
|
||||
|
||||
/**
|
||||
* 构造函数
|
||||
*
|
||||
* @param status 状态码
|
||||
* @param name 状态名称
|
||||
*/
|
||||
KnowledgeStatusEnum (Integer status, String name) {
|
||||
this.status = status;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取状态码
|
||||
*
|
||||
* @return 状态码
|
||||
*/
|
||||
public Integer getStatus () {
|
||||
return status;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取状态名称
|
||||
*
|
||||
* @return 状态名称
|
||||
*/
|
||||
public String getName () {
|
||||
return name;
|
||||
}
|
||||
}
|
@ -3,6 +3,8 @@ package cn.iocoder.yudao.module.llm.service.async;
|
||||
import cn.iocoder.yudao.framework.common.exception.ErrorCode;
|
||||
import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
|
||||
import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgedocuments.KnowledgeDocumentsDO;
|
||||
import cn.iocoder.yudao.module.llm.dal.mysql.knowledgedocuments.KnowledgeDocumentsMapper;
|
||||
import cn.iocoder.yudao.module.llm.enums.KnowledgeStatusEnum;
|
||||
import cn.iocoder.yudao.module.llm.framework.backend.config.LLMBackendProperties;
|
||||
import cn.iocoder.yudao.module.llm.service.http.RagHttpService;
|
||||
import cn.iocoder.yudao.module.llm.service.http.vo.KnowledgeRagEmbedReqVO;
|
||||
@ -33,6 +35,10 @@ public class AsyncKnowledgeBase {
|
||||
@Resource
|
||||
private LLMBackendProperties llmBackendProperties;
|
||||
|
||||
@Resource
|
||||
private KnowledgeDocumentsMapper knowledgeDocumentsMapper;
|
||||
|
||||
|
||||
// 向向量知识库创建文件
|
||||
@Async
|
||||
public void createKnowledgeBase (List<KnowledgeDocumentsDO> knowledgeList, List<Long> ids) {
|
||||
@ -40,37 +46,59 @@ public class AsyncKnowledgeBase {
|
||||
String mes = ragHttpService.ragDocumentsDel(llmBackendProperties.getRagDocumentsDel(), ids);
|
||||
log.info("delete knowledge base info {}", mes);
|
||||
}
|
||||
|
||||
// 注释调试
|
||||
if (!CollectionUtils.isAnyEmpty(knowledgeList)) {
|
||||
knowledgeList.stream().forEach(knowledge -> {
|
||||
knowledgeList.forEach(knowledge -> {
|
||||
try {
|
||||
log.info("knowledge base begin create {}",knowledge);
|
||||
log.info("knowledge base begin create {}", knowledge);
|
||||
|
||||
// 修改状态为 未上传
|
||||
updateFileState(knowledge, KnowledgeStatusEnum.NOT_UPLOADED);
|
||||
|
||||
RegUploadReqVO regUploadReqVO = new RegUploadReqVO()
|
||||
.setUrl(llmBackendProperties.getRagEmbed())
|
||||
.setFileId(String.valueOf(knowledge.getId()))
|
||||
.setFileName(knowledge.getDocumentName())
|
||||
.setFileUrl(knowledge.getFileUrl());
|
||||
|
||||
int lastIndex = knowledge.getDocumentName().lastIndexOf(".");
|
||||
if (lastIndex != -1){
|
||||
if (lastIndex != -1) {
|
||||
|
||||
String extension = knowledge.getDocumentName().substring(lastIndex + 1).toLowerCase();
|
||||
if ("txt".equals(extension)) {
|
||||
ragHttpService.embedUploadFile(regUploadReqVO);
|
||||
}else {
|
||||
} else {
|
||||
KnowledgeRagEmbedReqVO knowledgeRagEmbedReqVO = new KnowledgeRagEmbedReqVO()
|
||||
.setFileId(String.valueOf(knowledge.getId()))
|
||||
.setFileName(knowledge.getDocumentName())
|
||||
.setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl()))));
|
||||
ragHttpService.knowledgeEmbed(knowledgeRagEmbedReqVO,knowledge.getKnowledgeBaseId());
|
||||
ragHttpService.knowledgeEmbed(knowledgeRagEmbedReqVO, knowledge.getKnowledgeBaseId());
|
||||
}
|
||||
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("the creation of the knowledge base error {}", e.getMessage());
|
||||
// 修改状态为 上传失败
|
||||
updateFileState(knowledge, KnowledgeStatusEnum.UPLOAD_FAILED);
|
||||
}
|
||||
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 修改知识库文档状态
|
||||
*
|
||||
* @param documents 知识库文档
|
||||
* @param status 状态 (0:未上传,1:上传中,2:上传成功,3:上传失败)
|
||||
*/
|
||||
private void updateFileState (KnowledgeDocumentsDO documents, KnowledgeStatusEnum status) {
|
||||
documents.setStatus(status.getStatus());
|
||||
knowledgeDocumentsMapper.updateById(documents);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 知识库向量嵌入
|
||||
*
|
||||
@ -96,7 +124,7 @@ public class AsyncKnowledgeBase {
|
||||
.setFileName(knowledge.getDocumentName())
|
||||
.setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl()))));
|
||||
|
||||
ragHttpService.knowledgeEmbed(ragEmbedReqVo,id);
|
||||
ragHttpService.knowledgeEmbed(ragEmbedReqVo, id);
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("the creation of the knowledge base error {}", e.getMessage(), e);
|
||||
|
@ -3,8 +3,11 @@ package cn.iocoder.yudao.module.llm.service.http;
|
||||
|
||||
import cn.iocoder.yudao.framework.common.exception.ErrorCode;
|
||||
import cn.iocoder.yudao.framework.common.util.http.HttpUtils;
|
||||
import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgebase.KnowledgeBaseDO;
|
||||
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
|
||||
import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgedocuments.KnowledgeDocumentsDO;
|
||||
import cn.iocoder.yudao.module.llm.dal.mysql.knowledgebase.KnowledgeBaseMapper;
|
||||
import cn.iocoder.yudao.module.llm.dal.mysql.knowledgedocuments.KnowledgeDocumentsMapper;
|
||||
import cn.iocoder.yudao.module.llm.enums.KnowledgeStatusEnum;
|
||||
import cn.iocoder.yudao.module.llm.framework.backend.config.LLMBackendProperties;
|
||||
import cn.iocoder.yudao.module.llm.service.http.vo.*;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
@ -41,9 +44,13 @@ public class RagHttpService {
|
||||
|
||||
@Resource
|
||||
private LLMBackendProperties llmBackendProperties;
|
||||
|
||||
@Resource
|
||||
private KnowledgeBaseMapper knowledgeBaseMapper;
|
||||
|
||||
@Resource
|
||||
private KnowledgeDocumentsMapper knowledgeDocumentsMapper;
|
||||
|
||||
/**
|
||||
* RAG健康检查API
|
||||
*/
|
||||
@ -73,11 +80,19 @@ public class RagHttpService {
|
||||
* 向量知识库文档上传
|
||||
*
|
||||
* @param ragUploadReqVO
|
||||
* @return
|
||||
* @throws UnirestException
|
||||
* @throws IOException
|
||||
*/
|
||||
public RagEmbedRespVO embedUploadFile (RegUploadReqVO ragUploadReqVO) throws UnirestException, IOException {
|
||||
public void embedUploadFile (RegUploadReqVO ragUploadReqVO) throws UnirestException, IOException {
|
||||
// 根据 fileId 查询知识库文档
|
||||
KnowledgeDocumentsDO documents = getKnowledgeDocuments(ragUploadReqVO.getFileId());
|
||||
if (documents == null) {
|
||||
throw exception(new ErrorCode(10047, "知识库文档不存在"));
|
||||
}
|
||||
|
||||
// 修改状态为 上传中
|
||||
updateFileState(documents, KnowledgeStatusEnum.UPLOADING);
|
||||
|
||||
CloseableHttpClient httpClient = HttpClients.createDefault();
|
||||
RagEmbedRespVO ragEmbedRespVO = new RagEmbedRespVO();
|
||||
HttpGet request = new HttpGet(ragUploadReqVO.getFileUrl());
|
||||
@ -107,15 +122,22 @@ public class RagHttpService {
|
||||
.field("file_id", ragUploadReqVO.getFileId())
|
||||
.field("file", new ByteArrayInputStream(utf8Bytes), ragUploadReqVO.getFileName()) // 使用文件名 "file.txt" 作为示例
|
||||
.asString();
|
||||
log.info("Response Body: " + uploadResponse.getBody());
|
||||
ragEmbedRespVO = JSON.parseObject(uploadResponse.getBody(), RagEmbedRespVO.class);
|
||||
log.info(" ragEmbedRespVO:{}", ragEmbedRespVO);
|
||||
|
||||
log.info("Response Body: {}", uploadResponse.getBody());
|
||||
ragEmbedRespVO = JSON.parseObject(uploadResponse.getBody(), RagEmbedRespVO.class);
|
||||
log.info("ragEmbedRespVO:{}", ragEmbedRespVO);
|
||||
|
||||
if (ragEmbedRespVO.isStatus()) {
|
||||
// 修改状态为 上传成功
|
||||
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_SUCCESS);
|
||||
} else {
|
||||
// 修改状态为 上传失败
|
||||
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_FAILED);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return ragEmbedRespVO;
|
||||
}
|
||||
|
||||
private static String detectCharset (InputStream inputStream) throws IOException {
|
||||
@ -129,7 +151,7 @@ public class RagHttpService {
|
||||
detector.dataEnd();
|
||||
String charset = detector.getDetectedCharset();
|
||||
detector.reset();
|
||||
if(charset == null){
|
||||
if (charset == null) {
|
||||
return StandardCharsets.UTF_8.toString();
|
||||
}
|
||||
return charset;
|
||||
@ -224,35 +246,74 @@ public class RagHttpService {
|
||||
public void knowledgeEmbed (KnowledgeRagEmbedReqVO reqVO, Long id) {
|
||||
// 获取知识库向量嵌入的url
|
||||
String ragEmbed = llmBackendProperties.getEmbed();
|
||||
|
||||
log.info("url : {}", ragEmbed);
|
||||
|
||||
KnowledgeBaseDO aDo = knowledgeBaseMapper.selectById(id);
|
||||
// fileId 是 llm_knowledge_documents 表 ID
|
||||
String fileId = reqVO.getFileId();
|
||||
String fileName = reqVO.getFileName();
|
||||
|
||||
aDo.setKnowledgeBaseIntro(ragEmbed);
|
||||
knowledgeBaseMapper.updateById(aDo);
|
||||
// 根据 fileId 查询知识库文档
|
||||
KnowledgeDocumentsDO documents = getKnowledgeDocuments(id, fileId);
|
||||
if (documents == null) {
|
||||
throw exception(new ErrorCode(10047, "知识库文档不存在"));
|
||||
}
|
||||
|
||||
// 修改状态为 上传中
|
||||
updateFileState(documents, KnowledgeStatusEnum.UPLOADING);
|
||||
|
||||
// 构建请求参数
|
||||
HttpResponse<String> response = Unirest.post(ragEmbed)
|
||||
.field("file_id", reqVO.getFileId())
|
||||
.field("file", reqVO.getFileInputStream(), reqVO.getFileName())
|
||||
.field("file_id", fileId)
|
||||
.field("file", reqVO.getFileInputStream(), fileName)
|
||||
.asString();
|
||||
String body = response.getBody();
|
||||
log.info("body : {}", body);
|
||||
|
||||
JSONObject jsonObject = JSON.parseObject(body);
|
||||
|
||||
aDo.setKnowledgeBaseIntro(body);
|
||||
knowledgeBaseMapper.updateById(aDo);
|
||||
|
||||
if (jsonObject.getBoolean("status")) {
|
||||
log.info(" ========= Response Body Result: {}", response.getBody());
|
||||
knowledgeBaseMapper.updateById(aDo);
|
||||
throw exception(new ErrorCode(10047, " ------------ 知识库上传成功"));
|
||||
String responseBody = response.getBody();
|
||||
JSONObject parseObject = JSON.parseObject(responseBody);
|
||||
log.info(" ========= Response Body Result: {}", responseBody);
|
||||
|
||||
if (parseObject.getBoolean("status")) {
|
||||
// 修改状态为 上传成功
|
||||
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_SUCCESS);
|
||||
} else {
|
||||
knowledgeBaseMapper.updateById(aDo);
|
||||
throw exception(new ErrorCode(10047, " xxxxxxxxxxxx 知识库上传失败"));
|
||||
// 修改状态为 上传失败
|
||||
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_FAILED);
|
||||
throw exception(new ErrorCode(10047, responseBody));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* 修改知识库文档状态
|
||||
*
|
||||
* @param documents 知识库文档
|
||||
* @param status 状态 (0:未上传,1:上传中,2:上传成功,3:上传失败)
|
||||
*/
|
||||
private void updateFileState (KnowledgeDocumentsDO documents, KnowledgeStatusEnum status) {
|
||||
documents.setStatus(status.getStatus());
|
||||
knowledgeDocumentsMapper.updateById(documents);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取知识库文档对象
|
||||
*
|
||||
* @param id 知识库ID
|
||||
* @param fileId 知识库文档ID
|
||||
* @return 知识库对象
|
||||
*/
|
||||
private KnowledgeDocumentsDO getKnowledgeDocuments (Long id, String fileId) {
|
||||
return knowledgeDocumentsMapper.selectOne(new LambdaQueryWrapperX<KnowledgeDocumentsDO>()
|
||||
.eq(KnowledgeDocumentsDO::getKnowledgeBaseId, id)
|
||||
.eq(KnowledgeDocumentsDO::getId, fileId));
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取知识库文档对象
|
||||
*
|
||||
* @param fileId 知识库ID
|
||||
* @return 知识库对象
|
||||
*/
|
||||
private KnowledgeDocumentsDO getKnowledgeDocuments (String fileId) {
|
||||
return knowledgeDocumentsMapper.selectById(fileId);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user