refactor(llm): 重构知识库更新流程并添加 Embedding功能
- 新增 KnowledgeEmbeddingReqVO 类用于知识库 Embedding 请求参数 - 在 KnowledgeBaseController 中添加 embeddingKnowledge 方法处理 Embedding 请求 - 在 KnowledgeBaseService接口中新增 embeddingKnowledge 方法 - 在 KnowledgeBaseServiceImpl 中实现 embeddingKnowledge 方法,重构知识库更新流程- 修改 AsyncKnowledgeBase 类,分离创建和删除知识库的方法 - 更新 KnowledgeDocumentsMapper,调整删除文档的查询逻辑 - 移除 KnowledgeDocumentsSaveReqVO 中的冗余注释
This commit is contained in:
parent
31a13cdfd0
commit
18f4aeeda1
@ -55,6 +55,14 @@ public class KnowledgeBaseController {
|
||||
return success(result);
|
||||
}
|
||||
|
||||
@PutMapping("/updateHitParam")
|
||||
@Operation(summary = "更新知识库命中测试参数")
|
||||
public CommonResult<Boolean> updateHitParam(@Valid @RequestBody KnowledgeBaseSaveReqVO updateReqVO) {
|
||||
knowledgeBaseService.updateHitParam(updateReqVO);
|
||||
return success(true);
|
||||
}
|
||||
|
||||
|
||||
@PutMapping("/update")
|
||||
@Operation(summary = "更新知识库")
|
||||
// @PreAuthorize("@ss.hasPermission('llm:knowledge-base:update')")
|
||||
@ -63,6 +71,13 @@ public class KnowledgeBaseController {
|
||||
return success(true);
|
||||
}
|
||||
|
||||
@PutMapping("/embeddingKnowledge")
|
||||
@Operation(summary = "更新 Embedding", description = "对文件内容进行 Embedding 处理并更新到知识库")
|
||||
public CommonResult<Boolean> embeddingKnowledge(@Valid @RequestBody KnowledgeEmbeddingReqVO request) {
|
||||
knowledgeBaseService.embeddingKnowledge(request);
|
||||
return success(true);
|
||||
}
|
||||
|
||||
@PutMapping("/updateKnowledgeBaseInfo")
|
||||
@Operation(summary = "更新知识库")
|
||||
// @PreAuthorize("@ss.hasPermission('llm:knowledge-base:update')")
|
||||
|
@ -0,0 +1,30 @@
|
||||
package cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo;
|
||||
|
||||
import cn.iocoder.yudao.module.llm.controller.admin.knowledgedocuments.vo.KnowledgeDocumentsSaveReqVO;
|
||||
import io.swagger.v3.oas.annotations.media.Schema;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @Description 知识库 Embedding 请求参数
|
||||
* @Date 2025/3/14 13:05
|
||||
*/
|
||||
@Schema(description = "知识库 Embedding 请求参数")
|
||||
@Data
|
||||
public class KnowledgeEmbeddingReqVO {
|
||||
@Schema(description = "知识库ID", requiredMode = Schema.RequiredMode.REQUIRED, example = "25984")
|
||||
private Long id;
|
||||
|
||||
@Schema(description = "知识内容长度", example = "1000")
|
||||
private Integer contentLength;
|
||||
|
||||
@Schema(description = "分块大小(单位:字符)", example = "512")
|
||||
private Integer chunkSize;
|
||||
|
||||
@Schema(description = "分块重叠大小(单位:字符)", example = "128")
|
||||
private Integer chunkOverlap;
|
||||
|
||||
@Schema(description = "关联的知识文档列表")
|
||||
private List<KnowledgeDocumentsSaveReqVO> documents;
|
||||
}
|
@ -25,7 +25,7 @@ public class KnowledgeHitRateTestReqVO {
|
||||
* 返回结果的条数(k值)
|
||||
*/
|
||||
// @NotNull(message = "k值不能为空")
|
||||
private Integer k;
|
||||
private Integer topK;
|
||||
|
||||
/**
|
||||
* Score阈值
|
||||
|
@ -13,7 +13,6 @@ public class KnowledgeDocumentsSaveReqVO {
|
||||
private Long id;
|
||||
|
||||
@Schema(description = "知识库ID", requiredMode = Schema.RequiredMode.REQUIRED, example = "18229")
|
||||
// @NotNull(message = "知识库ID不能为空")
|
||||
private Long knowledgeBaseId;
|
||||
|
||||
@Schema(description = "文档名称", example = "芋艿")
|
||||
@ -28,4 +27,4 @@ public class KnowledgeDocumentsSaveReqVO {
|
||||
@Schema(description = "文件id", example = "1")
|
||||
private Long fileId;
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -28,7 +28,6 @@ public interface KnowledgeDocumentsMapper extends BaseMapperX<KnowledgeDocuments
|
||||
.orderByDesc(KnowledgeDocumentsDO::getId));
|
||||
}
|
||||
|
||||
@Select("SELECT id FROM llm_knowledge_documents WHERE knowledge_base_id = #{id}")
|
||||
@Select("SELECT id FROM llm_knowledge_documents WHERE knowledge_base_id = #{id} and deleted = 1")
|
||||
List<Long> selectDeleteIds(Long id);
|
||||
|
||||
}
|
||||
|
@ -10,9 +10,9 @@ import cn.iocoder.yudao.module.llm.enums.KnowledgeStatusEnum;
|
||||
import cn.iocoder.yudao.module.llm.framework.backend.config.LLMBackendProperties;
|
||||
import cn.iocoder.yudao.module.llm.service.http.RagHttpService;
|
||||
import cn.iocoder.yudao.module.llm.service.http.vo.KnowledgeRagEmbedReqVO;
|
||||
import cn.iocoder.yudao.module.llm.service.http.vo.RegUploadReqVO;
|
||||
import cn.iocoder.yudao.module.llm.service.http.vo.query.multiple.QueryMultipleReqVO;
|
||||
import cn.iocoder.yudao.module.llm.service.http.vo.query.multiple.QueryResultPairVO;
|
||||
import com.alibaba.fastjson.JSON;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
@ -23,7 +23,6 @@ import java.io.IOException;
|
||||
import java.math.RoundingMode;
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
@ -44,24 +43,12 @@ public class AsyncKnowledgeBase {
|
||||
private KnowledgeDocumentsMapper knowledgeDocumentsMapper;
|
||||
|
||||
|
||||
// 向向量知识库创建文件
|
||||
// @Async
|
||||
public void createKnowledgeBase (List<KnowledgeDocumentsDO> knowledgeList, List<Long> ids, Map<String, Integer> knowledgeParameters) {
|
||||
log.info("开始执行 createKnowledgeBase 方法。knowledgeList 大小: {}, ids 大小: {}", knowledgeList.size(), ids.size());
|
||||
|
||||
// 如果提供了 ids,则删除现有的知识库文档
|
||||
if (!CollectionUtils.isAnyEmpty(ids)) {
|
||||
log.info("正在删除现有的知识库文档,ids: {}", ids);
|
||||
try {
|
||||
String mes = ragHttpService.ragDocumentsDel(llmBackendProperties.getRagDocumentsDel(), ids);
|
||||
log.info("删除知识库信息: {}", mes);
|
||||
} catch (Exception e) {
|
||||
log.error("删除知识库信息失败: {}", e.getMessage());
|
||||
throw exception(new ErrorCode(10047, "删除知识库信息失败!"));
|
||||
}
|
||||
} else {
|
||||
log.info("未提供 ids,跳过删除操作。");
|
||||
}
|
||||
/**
|
||||
* 向向量知识库创建文件
|
||||
* @param knowledgeList
|
||||
* @param knowledgeParameters
|
||||
*/
|
||||
public void createKnowledgeBase (List<KnowledgeDocumentsDO> knowledgeList, Map<String, Integer> knowledgeParameters) {
|
||||
|
||||
// 处理 knowledgeList 中的每个知识文档
|
||||
if (!CollectionUtils.isAnyEmpty(knowledgeList)) {
|
||||
@ -100,6 +87,24 @@ public class AsyncKnowledgeBase {
|
||||
log.info("createKnowledgeBase 方法执行完成。");
|
||||
}
|
||||
|
||||
public void deletedKnowledgeBase ( List<Long> ids) {
|
||||
log.info("开始执行 deletedKnowledgeBase 方法。 ids 大小: {} ,ids:{} ", ids.size(), JSON.toJSON(ids));
|
||||
|
||||
// 如果提供了 ids,则删除现有的知识库文档
|
||||
if (!CollectionUtils.isAnyEmpty(ids)) {
|
||||
log.info("正在删除现有的知识库文档,ids: {}", ids);
|
||||
try {
|
||||
String mes = ragHttpService.ragDocumentsDel(llmBackendProperties.getRagDocumentsDel(), ids);
|
||||
log.info("删除知识库信息: {}", mes);
|
||||
} catch (Exception e) {
|
||||
log.error("删除知识库信息失败: {}", e.getMessage());
|
||||
throw exception(new ErrorCode(10047, "删除知识库信息失败!"));
|
||||
}
|
||||
} else {
|
||||
log.info("未提供 ids,跳过删除操作。");
|
||||
}
|
||||
|
||||
}
|
||||
/**
|
||||
* 修改知识库文档状态
|
||||
*
|
||||
|
@ -75,4 +75,16 @@ public interface KnowledgeBaseService {
|
||||
* @return 返回结果
|
||||
*/
|
||||
List<KnowledgeHitRateTestResultVO> executeHitRateTest (@Valid KnowledgeHitRateTestReqVO testReqVO);
|
||||
|
||||
/**
|
||||
* 知识库Embedding
|
||||
* @param request
|
||||
*/
|
||||
void embeddingKnowledge (@Valid KnowledgeEmbeddingReqVO request);
|
||||
|
||||
/**
|
||||
* 更新知识库命中测试参数
|
||||
* @param updateReqVO
|
||||
*/
|
||||
void updateHitParam (@Valid KnowledgeBaseSaveReqVO updateReqVO);
|
||||
}
|
||||
|
@ -60,31 +60,28 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public void updateKnowledgeBase (KnowledgeBaseSaveReqVO updateReqVO) {
|
||||
// 1. 校验知识库是否存在
|
||||
validateKnowledgeParam(updateReqVO);
|
||||
|
||||
// 2. 更新知识库主表基础信息
|
||||
KnowledgeBaseDO updateObj = BeanUtils.toBean(updateReqVO, KnowledgeBaseDO.class);
|
||||
knowledgeBaseMapper.updateById(updateObj);
|
||||
|
||||
// 3. 处理附表(知识文档)数据
|
||||
handleKnowledgeDocuments(updateReqVO, updateObj);
|
||||
// // 1. 校验知识库是否存在
|
||||
// validateKnowledgeParam(updateReqVO);
|
||||
//
|
||||
// // 2. 更新知识库主表基础信息
|
||||
// KnowledgeBaseDO updateObj = BeanUtils.toBean(updateReqVO, KnowledgeBaseDO.class);
|
||||
// knowledgeBaseMapper.updateById(updateObj);
|
||||
//
|
||||
// // 3. 处理附表(知识文档)数据
|
||||
// handleKnowledgeDocuments(updateReqVO, updateObj);
|
||||
}
|
||||
|
||||
/**
|
||||
* 校验知识库参数
|
||||
*
|
||||
* @param updateReqVO 更新知识库参数
|
||||
* @param request 更新知识库参数
|
||||
*/
|
||||
private void validateKnowledgeParam (KnowledgeBaseSaveReqVO updateReqVO) {
|
||||
private void validateKnowledgeParam (KnowledgeEmbeddingReqVO request) {
|
||||
// 1. 校验知识库是否存在
|
||||
validateKnowledgeBaseExists(updateReqVO.getId());
|
||||
validateKnowledgeBaseExists(request.getId());
|
||||
|
||||
// 2. 校验知识库名称是否重复
|
||||
validateKnowledgeBaseNameExists(updateReqVO);
|
||||
|
||||
// 3. 校验分块大小和分块重叠是否正确
|
||||
validateChunkParameters(updateReqVO.getChunkSize(), updateReqVO.getChunkOverlap());
|
||||
// 2. 校验分块大小和分块重叠是否正确
|
||||
validateChunkParameters(request.getChunkSize(), request.getChunkOverlap());
|
||||
}
|
||||
|
||||
/**
|
||||
@ -109,36 +106,21 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
|
||||
/**
|
||||
* 处理知识文档数据
|
||||
*
|
||||
* @param updateReqVO 更新知识库参数
|
||||
* @param updateObj 更新知识库对象
|
||||
* @param request 更新知识库参数
|
||||
* @param updateObj 更新知识库对象
|
||||
*/
|
||||
private void handleKnowledgeDocuments (KnowledgeBaseSaveReqVO updateReqVO, KnowledgeBaseDO updateObj) {
|
||||
List<KnowledgeDocumentsSaveReqVO> documents = updateReqVO.getKnowledgeDocuments();
|
||||
if (CollectionUtils.isAnyEmpty(documents)) {
|
||||
// 如果传入的文档列表为空,则删除所有关联文档
|
||||
deleteAllDocuments(updateReqVO.getId());
|
||||
return;
|
||||
}
|
||||
private void handleKnowledgeDocuments (KnowledgeEmbeddingReqVO request, KnowledgeBaseDO updateObj) {
|
||||
// 1: 删除所有旧文档
|
||||
deleteAllDocuments(updateObj.getId());
|
||||
|
||||
|
||||
// 获取需要保留的文档 ID
|
||||
List<Long> retainedIds = documents.stream()
|
||||
.map(KnowledgeDocumentsSaveReqVO::getId)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// 删除不需要保留的文档
|
||||
deleteUnretainedDocuments(updateReqVO.getId(), retainedIds);
|
||||
|
||||
// 更新或插入文档数据
|
||||
List<KnowledgeDocumentsDO> newDocuments = updateOrInsertDocuments(documents, updateReqVO.getId(), updateObj.getKnowledgeLength());
|
||||
// 2: 重新上传文档
|
||||
List<KnowledgeDocumentsDO> newDocuments = updateOrInsertDocuments(request.getDocuments(), request.getId());
|
||||
|
||||
Map<String, Integer> knowledgeParameters = new HashMap<>();
|
||||
knowledgeParameters.put("chunkSize", updateReqVO.getChunkSize());
|
||||
knowledgeParameters.put("chunkOverlap", updateReqVO.getChunkOverlap());
|
||||
knowledgeParameters.put("chunkSize", request.getChunkSize());
|
||||
knowledgeParameters.put("chunkOverlap", request.getChunkOverlap());
|
||||
|
||||
// 异步处理新增文档和删除的文档
|
||||
List<Long> deleteIds = knowledgeDocumentsMapper.selectDeleteIds(updateReqVO.getId());
|
||||
asyncKnowledgeBase.createKnowledgeBase(newDocuments, deleteIds, knowledgeParameters);
|
||||
asyncKnowledgeBase.createKnowledgeBase(newDocuments, knowledgeParameters);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -153,7 +135,7 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
|
||||
// 异步处理删除的文档
|
||||
List<Long> deleteIds = knowledgeDocumentsMapper.selectDeleteIds(knowledgeBaseId);
|
||||
if (!CollectionUtils.isAnyEmpty(deleteIds)) {
|
||||
asyncKnowledgeBase.createKnowledgeBase(new ArrayList<>(), deleteIds, new HashMap<>());
|
||||
asyncKnowledgeBase.deletedKnowledgeBase(deleteIds);
|
||||
}
|
||||
}
|
||||
|
||||
@ -177,10 +159,9 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
|
||||
*
|
||||
* @param documents 需要更新的文档数据
|
||||
* @param knowledgeBaseId 知识库 ID
|
||||
* @param chunkSize
|
||||
* @return 更新或插入的文档数据
|
||||
*/
|
||||
private List<KnowledgeDocumentsDO> updateOrInsertDocuments (List<KnowledgeDocumentsSaveReqVO> documents, Long knowledgeBaseId, Integer chunkSize) {
|
||||
private List<KnowledgeDocumentsDO> updateOrInsertDocuments (List<KnowledgeDocumentsSaveReqVO> documents, Long knowledgeBaseId) {
|
||||
List<KnowledgeDocumentsDO> newDocuments = new ArrayList<>();
|
||||
documents.forEach(doc -> {
|
||||
KnowledgeDocumentsDO docDO = BeanUtils.toBean(doc, KnowledgeDocumentsDO.class);
|
||||
@ -315,17 +296,17 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
|
||||
}
|
||||
Integer topK = 4;
|
||||
if (baseDO.getTopK() == null || baseDO.getTopK() <= 0) {
|
||||
testReqVO.setK(topK);
|
||||
testReqVO.setTopK(topK);
|
||||
} else {
|
||||
topK = baseDO.getTopK();
|
||||
testReqVO.setK(topK);
|
||||
topK = testReqVO.getTopK();
|
||||
testReqVO.setTopK(topK);
|
||||
}
|
||||
|
||||
Double score = 0.2;
|
||||
if (baseDO.getScore() == null || baseDO.getTopK() <= 0.0 || baseDO.getScore() > 1) {
|
||||
testReqVO.setScore(score);
|
||||
} else {
|
||||
score = baseDO.getScore();
|
||||
score = testReqVO.getScore();
|
||||
testReqVO.setScore(score);
|
||||
}
|
||||
|
||||
@ -349,6 +330,39 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService {
|
||||
|
||||
return result;
|
||||
}
|
||||
/**
|
||||
* 知识库Embedding
|
||||
* @param request
|
||||
*/
|
||||
@Override
|
||||
public void embeddingKnowledge (KnowledgeEmbeddingReqVO request) {
|
||||
// 1. 校验知识库是否存在
|
||||
validateKnowledgeParam(request);
|
||||
|
||||
// 2. 更新知识库主表基础信息
|
||||
KnowledgeBaseDO updateObj = BeanUtils.toBean(request, KnowledgeBaseDO.class);
|
||||
knowledgeBaseMapper.updateById(updateObj);
|
||||
|
||||
// 3. 处理附表(知识文档)数据
|
||||
handleKnowledgeDocuments(request, updateObj);
|
||||
}
|
||||
|
||||
/**
|
||||
* 更新知识库命中测试参数
|
||||
* @param updateReqVO
|
||||
*/
|
||||
@Override
|
||||
public void updateHitParam (KnowledgeBaseSaveReqVO updateReqVO) {
|
||||
// 校验存在
|
||||
validateKnowledgeBaseExists(updateReqVO.getId());
|
||||
// 更新
|
||||
KnowledgeBaseDO updateObj = new KnowledgeBaseDO();
|
||||
updateObj.setId(updateReqVO.getId())
|
||||
.setTopK(updateReqVO.getTopK())
|
||||
.setScore(updateReqVO.getScore())
|
||||
;
|
||||
knowledgeBaseMapper.updateById(updateObj);
|
||||
}
|
||||
|
||||
/**
|
||||
* 校验知识库是否存在
|
||||
|
Loading…
x
Reference in New Issue
Block a user