feat(llm): 优化知识库文档上传功能

- 新增知识库文档状态枚举,用于记录文档上传状态
- 在文档上传过程中更新状态,包括未上传、上传中、上传成功和上传失败
- 优化了 embedUploadFile 和 knowledgeEmbed 方法,增加了状态更新逻辑
- 新增 getKnowledgeDocuments 方法获取知识库文档对象
This commit is contained in:
Liuyang 2025-02-12 11:12:21 +08:00
parent 1f9603fc65
commit aae2c064cb
3 changed files with 187 additions and 32 deletions

View File

@ -0,0 +1,66 @@
package cn.iocoder.yudao.module.llm.enums;
/**
* @Description 知识库文件上传状态
*/
public enum KnowledgeStatusEnum {
/**
* 未上传
*/
NOT_UPLOADED(0, "未上传"),
/**
* 上传中
*/
UPLOADING(1, "上传中"),
/**
* 上传成功
*/
UPLOAD_SUCCESS(2, "上传成功"),
/**
* 上传失败
*/
UPLOAD_FAILED(3, "上传失败");
/**
* 状态码
*/
private final Integer status;
/**
* 状态名称
*/
private final String name;
/**
* 构造函数
*
* @param status 状态码
* @param name 状态名称
*/
KnowledgeStatusEnum (Integer status, String name) {
this.status = status;
this.name = name;
}
/**
* 获取状态码
*
* @return 状态码
*/
public Integer getStatus () {
return status;
}
/**
* 获取状态名称
*
* @return 状态名称
*/
public String getName () {
return name;
}
}

View File

@ -3,6 +3,8 @@ package cn.iocoder.yudao.module.llm.service.async;
import cn.iocoder.yudao.framework.common.exception.ErrorCode;
import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgedocuments.KnowledgeDocumentsDO;
import cn.iocoder.yudao.module.llm.dal.mysql.knowledgedocuments.KnowledgeDocumentsMapper;
import cn.iocoder.yudao.module.llm.enums.KnowledgeStatusEnum;
import cn.iocoder.yudao.module.llm.framework.backend.config.LLMBackendProperties;
import cn.iocoder.yudao.module.llm.service.http.RagHttpService;
import cn.iocoder.yudao.module.llm.service.http.vo.KnowledgeRagEmbedReqVO;
@ -33,6 +35,10 @@ public class AsyncKnowledgeBase {
@Resource
private LLMBackendProperties llmBackendProperties;
@Resource
private KnowledgeDocumentsMapper knowledgeDocumentsMapper;
// 向向量知识库创建文件
@Async
public void createKnowledgeBase (List<KnowledgeDocumentsDO> knowledgeList, List<Long> ids) {
@ -40,37 +46,59 @@ public class AsyncKnowledgeBase {
String mes = ragHttpService.ragDocumentsDel(llmBackendProperties.getRagDocumentsDel(), ids);
log.info("delete knowledge base info {}", mes);
}
// 注释调试
if (!CollectionUtils.isAnyEmpty(knowledgeList)) {
knowledgeList.stream().forEach(knowledge -> {
knowledgeList.forEach(knowledge -> {
try {
log.info("knowledge base begin create {}",knowledge);
log.info("knowledge base begin create {}", knowledge);
// 修改状态为 未上传
updateFileState(knowledge, KnowledgeStatusEnum.NOT_UPLOADED);
RegUploadReqVO regUploadReqVO = new RegUploadReqVO()
.setUrl(llmBackendProperties.getRagEmbed())
.setFileId(String.valueOf(knowledge.getId()))
.setFileName(knowledge.getDocumentName())
.setFileUrl(knowledge.getFileUrl());
int lastIndex = knowledge.getDocumentName().lastIndexOf(".");
if (lastIndex != -1){
if (lastIndex != -1) {
String extension = knowledge.getDocumentName().substring(lastIndex + 1).toLowerCase();
if ("txt".equals(extension)) {
ragHttpService.embedUploadFile(regUploadReqVO);
}else {
} else {
KnowledgeRagEmbedReqVO knowledgeRagEmbedReqVO = new KnowledgeRagEmbedReqVO()
.setFileId(String.valueOf(knowledge.getId()))
.setFileName(knowledge.getDocumentName())
.setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl()))));
ragHttpService.knowledgeEmbed(knowledgeRagEmbedReqVO,knowledge.getKnowledgeBaseId());
ragHttpService.knowledgeEmbed(knowledgeRagEmbedReqVO, knowledge.getKnowledgeBaseId());
}
}
} catch (Exception e) {
log.error("the creation of the knowledge base error {}", e.getMessage());
// 修改状态为 上传失败
updateFileState(knowledge, KnowledgeStatusEnum.UPLOAD_FAILED);
}
});
}
}
/**
* 修改知识库文档状态
*
* @param documents 知识库文档
* @param status 状态 (0:未上传1:上传中2:上传成功3:上传失败)
*/
private void updateFileState (KnowledgeDocumentsDO documents, KnowledgeStatusEnum status) {
documents.setStatus(status.getStatus());
knowledgeDocumentsMapper.updateById(documents);
}
/**
* 知识库向量嵌入
*
@ -96,7 +124,7 @@ public class AsyncKnowledgeBase {
.setFileName(knowledge.getDocumentName())
.setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl()))));
ragHttpService.knowledgeEmbed(ragEmbedReqVo,id);
ragHttpService.knowledgeEmbed(ragEmbedReqVo, id);
} catch (Exception e) {
log.error("the creation of the knowledge base error {}", e.getMessage(), e);

View File

@ -3,8 +3,11 @@ package cn.iocoder.yudao.module.llm.service.http;
import cn.iocoder.yudao.framework.common.exception.ErrorCode;
import cn.iocoder.yudao.framework.common.util.http.HttpUtils;
import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgebase.KnowledgeBaseDO;
import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX;
import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgedocuments.KnowledgeDocumentsDO;
import cn.iocoder.yudao.module.llm.dal.mysql.knowledgebase.KnowledgeBaseMapper;
import cn.iocoder.yudao.module.llm.dal.mysql.knowledgedocuments.KnowledgeDocumentsMapper;
import cn.iocoder.yudao.module.llm.enums.KnowledgeStatusEnum;
import cn.iocoder.yudao.module.llm.framework.backend.config.LLMBackendProperties;
import cn.iocoder.yudao.module.llm.service.http.vo.*;
import com.alibaba.fastjson.JSON;
@ -41,9 +44,13 @@ public class RagHttpService {
@Resource
private LLMBackendProperties llmBackendProperties;
@Resource
private KnowledgeBaseMapper knowledgeBaseMapper;
@Resource
private KnowledgeDocumentsMapper knowledgeDocumentsMapper;
/**
* RAG健康检查API
*/
@ -73,11 +80,19 @@ public class RagHttpService {
* 向量知识库文档上传
*
* @param ragUploadReqVO
* @return
* @throws UnirestException
* @throws IOException
*/
public RagEmbedRespVO embedUploadFile (RegUploadReqVO ragUploadReqVO) throws UnirestException, IOException {
public void embedUploadFile (RegUploadReqVO ragUploadReqVO) throws UnirestException, IOException {
// 根据 fileId 查询知识库文档
KnowledgeDocumentsDO documents = getKnowledgeDocuments(ragUploadReqVO.getFileId());
if (documents == null) {
throw exception(new ErrorCode(10047, "知识库文档不存在"));
}
// 修改状态为 上传中
updateFileState(documents, KnowledgeStatusEnum.UPLOADING);
CloseableHttpClient httpClient = HttpClients.createDefault();
RagEmbedRespVO ragEmbedRespVO = new RagEmbedRespVO();
HttpGet request = new HttpGet(ragUploadReqVO.getFileUrl());
@ -107,15 +122,22 @@ public class RagHttpService {
.field("file_id", ragUploadReqVO.getFileId())
.field("file", new ByteArrayInputStream(utf8Bytes), ragUploadReqVO.getFileName()) // 使用文件名 "file.txt" 作为示例
.asString();
log.info("Response Body: " + uploadResponse.getBody());
ragEmbedRespVO = JSON.parseObject(uploadResponse.getBody(), RagEmbedRespVO.class);
log.info(" ragEmbedRespVO:{}", ragEmbedRespVO);
log.info("Response Body: {}", uploadResponse.getBody());
ragEmbedRespVO = JSON.parseObject(uploadResponse.getBody(), RagEmbedRespVO.class);
log.info("ragEmbedRespVO:{}", ragEmbedRespVO);
if (ragEmbedRespVO.isStatus()) {
// 修改状态为 上传成功
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_SUCCESS);
} else {
// 修改状态为 上传失败
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_FAILED);
}
}
}
}
}
return ragEmbedRespVO;
}
private static String detectCharset (InputStream inputStream) throws IOException {
@ -129,7 +151,7 @@ public class RagHttpService {
detector.dataEnd();
String charset = detector.getDetectedCharset();
detector.reset();
if(charset == null){
if (charset == null) {
return StandardCharsets.UTF_8.toString();
}
return charset;
@ -224,35 +246,74 @@ public class RagHttpService {
public void knowledgeEmbed (KnowledgeRagEmbedReqVO reqVO, Long id) {
// 获取知识库向量嵌入的url
String ragEmbed = llmBackendProperties.getEmbed();
log.info("url : {}", ragEmbed);
KnowledgeBaseDO aDo = knowledgeBaseMapper.selectById(id);
// fileId llm_knowledge_documents ID
String fileId = reqVO.getFileId();
String fileName = reqVO.getFileName();
aDo.setKnowledgeBaseIntro(ragEmbed);
knowledgeBaseMapper.updateById(aDo);
// 根据 fileId 查询知识库文档
KnowledgeDocumentsDO documents = getKnowledgeDocuments(id, fileId);
if (documents == null) {
throw exception(new ErrorCode(10047, "知识库文档不存在"));
}
// 修改状态为 上传中
updateFileState(documents, KnowledgeStatusEnum.UPLOADING);
// 构建请求参数
HttpResponse<String> response = Unirest.post(ragEmbed)
.field("file_id", reqVO.getFileId())
.field("file", reqVO.getFileInputStream(), reqVO.getFileName())
.field("file_id", fileId)
.field("file", reqVO.getFileInputStream(), fileName)
.asString();
String body = response.getBody();
log.info("body : {}", body);
JSONObject jsonObject = JSON.parseObject(body);
aDo.setKnowledgeBaseIntro(body);
knowledgeBaseMapper.updateById(aDo);
if (jsonObject.getBoolean("status")) {
log.info(" ========= Response Body Result: {}", response.getBody());
knowledgeBaseMapper.updateById(aDo);
throw exception(new ErrorCode(10047, " ------------ 知识库上传成功"));
String responseBody = response.getBody();
JSONObject parseObject = JSON.parseObject(responseBody);
log.info(" ========= Response Body Result: {}", responseBody);
if (parseObject.getBoolean("status")) {
// 修改状态为 上传成功
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_SUCCESS);
} else {
knowledgeBaseMapper.updateById(aDo);
throw exception(new ErrorCode(10047, " xxxxxxxxxxxx 知识库上传失败"));
// 修改状态为 上传失败
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_FAILED);
throw exception(new ErrorCode(10047, responseBody));
}
}
/**
* 修改知识库文档状态
*
* @param documents 知识库文档
* @param status 状态 (0:未上传1:上传中2:上传成功3:上传失败)
*/
private void updateFileState (KnowledgeDocumentsDO documents, KnowledgeStatusEnum status) {
documents.setStatus(status.getStatus());
knowledgeDocumentsMapper.updateById(documents);
}
/**
* 获取知识库文档对象
*
* @param id 知识库ID
* @param fileId 知识库文档ID
* @return 知识库对象
*/
private KnowledgeDocumentsDO getKnowledgeDocuments (Long id, String fileId) {
return knowledgeDocumentsMapper.selectOne(new LambdaQueryWrapperX<KnowledgeDocumentsDO>()
.eq(KnowledgeDocumentsDO::getKnowledgeBaseId, id)
.eq(KnowledgeDocumentsDO::getId, fileId));
}
/**
* 获取知识库文档对象
*
* @param fileId 知识库ID
* @return 知识库对象
*/
private KnowledgeDocumentsDO getKnowledgeDocuments (String fileId) {
return knowledgeDocumentsMapper.selectById(fileId);
}
}