From 8923987afb811d7a708b9e4ef58d80666f0ebfb9 Mon Sep 17 00:00:00 2001 From: Liuyang <2746366019@qq.com> Date: Thu, 13 Mar 2025 11:01:43 +0800 Subject: [PATCH 1/7] =?UTF-8?q?refactor(yudao-module-llm):=20=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E6=B5=81=E5=BC=8F=E8=81=8A=E5=A4=A9=E6=8E=A5=E5=8F=A3?= =?UTF-8?q?=E7=9A=84=E5=BC=82=E6=AD=A5=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -移除了未使用的代码块,简化了代码结构 - 保留了 SseEmitter 的创建和异步处理逻辑 - 删除了冗余的注释和未使用的 ExecutorService 相关代码 --- .../conversation/ConversationController.java | 23 ++----------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/conversation/ConversationController.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/conversation/ConversationController.java index 555951f8b..cb4cd11ec 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/conversation/ConversationController.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/conversation/ConversationController.java @@ -109,27 +109,8 @@ public class ConversationController { @PostMapping("/stream-chat") public SseEmitter streamChat (@Valid @RequestBody ChatReqVO chatReqVO, HttpServletResponse response) { log.info("收到对话推理请求,请求参数: {}", chatReqVO); - SseEmitter emitter = new SseEmitter(120_000L); - // ExecutorService executor = Executors.newSingleThreadExecutor(); - // try { - // executor.execute(() -> { - // try { - // conversationService.chatStream(chatReqVO, emitter, response); - // } catch (Exception e) { - // emitter.completeWithError(e); - // } finally { - // executor.shutdown(); - // } - // }); - // } catch (Exception e) { - // log.error("处理对话推理请求时发生异常", e); - // try { - // emitter.completeWithError(e); - // } catch (Exception ex) { - // log.error("无法完成 SseEmitter 错误处理", ex); - // } - // } - // log.info("返回 SseEmitter 对象,准备进行流式响应"); + SseEmitter emitter = new SseEmitter(120_0000L); + // 异步处理,避免阻塞主线程 CompletableFuture.runAsync(() -> { try { From 0018c535a79fe556ee73012283cc861a99fe0eb8 Mon Sep 17 00:00:00 2001 From: Liuyang <2746366019@qq.com> Date: Thu, 13 Mar 2025 13:41:39 +0800 Subject: [PATCH 2/7] =?UTF-8?q?feat(llm):=20=E7=9F=A5=E8=AF=86=E5=BA=93?= =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E5=88=86=E5=9D=97=E9=87=8D=E5=8F=A0=E5=8F=82?= =?UTF-8?q?=E6=95=B0=E5=B9=B6=E4=BC=98=E5=8C=96=E7=9B=B8=E5=85=B3=E9=80=BB?= =?UTF-8?q?=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 KnowledgeBaseDO、KnowledgeBaseSaveReqVO 和 KnowledgeRagEmbedReqVO 中添加分块重叠字段 - 优化知识库更新逻辑,增加参数校验和错误处理 - 调整文档处理流程,支持分块大小和重叠参数 - 新增错误码常量,用于处理分块参数相关的错误 --- .../module/llm/enums/ErrorCodeConstants.java | 6 + .../vo/KnowledgeBaseSaveReqVO.java | 7 + .../knowledgebase/KnowledgeBaseDO.java | 10 + .../llm/service/async/AsyncKnowledgeBase.java | 17 +- .../llm/service/http/RagHttpService.java | 4 +- .../http/vo/KnowledgeRagEmbedReqVO.java | 5 + .../KnowledgeBaseServiceImpl.java | 182 ++++++++++++------ 7 files changed, 161 insertions(+), 70 deletions(-) diff --git a/yudao-module-llm/yudao-module-llm-api/src/main/java/cn/iocoder/yudao/module/llm/enums/ErrorCodeConstants.java b/yudao-module-llm/yudao-module-llm-api/src/main/java/cn/iocoder/yudao/module/llm/enums/ErrorCodeConstants.java index 035b54fdb..fb4a79563 100644 --- a/yudao-module-llm/yudao-module-llm-api/src/main/java/cn/iocoder/yudao/module/llm/enums/ErrorCodeConstants.java +++ b/yudao-module-llm/yudao-module-llm-api/src/main/java/cn/iocoder/yudao/module/llm/enums/ErrorCodeConstants.java @@ -103,6 +103,12 @@ public interface ErrorCodeConstants { ErrorCode KNOWLEDGE_BASE_NAME_NOT_EXISTS = new ErrorCode(10040, "知识库名称已存在"); + ErrorCode CHUNK_SIZE_MUST_BE_GREATER_THAN_ZERO = new ErrorCode(10040_1, "分块大小必须大于 0"); + + ErrorCode CHUNK_OVERLAP_MUST_BE_GREATER_THAN_OR_EQUAL_TO_ZERO = new ErrorCode(10040_2, "分块重叠必须大于或等于 0"); + + ErrorCode CHUNK_OVERLAP_MUST_BE_LESS_THAN_CHUNK_SIZE = new ErrorCode(10040_3, "分块重叠必须小于分块大小"); + ErrorCode APPLICATION_NAME_NOT_EXISTS = new ErrorCode(10041, "应用中心名称已存在"); ErrorCode MODEL_SERVIC_ENAME_NOT_EXISTS = new ErrorCode(10043, "模型名称已存在"); diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeBaseSaveReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeBaseSaveReqVO.java index 0c5d43d69..c0a9be386 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeBaseSaveReqVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeBaseSaveReqVO.java @@ -35,8 +35,15 @@ public class KnowledgeBaseSaveReqVO { /** * 分块大小 */ + @Schema(description = "分块大小") private Integer chunkSize; + /** + * 分块重叠 + */ + @Schema(description = "分块重叠,") + private Integer chunkOverlap; + @Schema(description = "文件引用") private String knowledgeFile; diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/knowledgebase/KnowledgeBaseDO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/knowledgebase/KnowledgeBaseDO.java index 6f1b957b3..79aefc497 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/knowledgebase/KnowledgeBaseDO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/knowledgebase/KnowledgeBaseDO.java @@ -4,6 +4,7 @@ import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; import com.baomidou.mybatisplus.annotation.KeySequence; import com.baomidou.mybatisplus.annotation.TableId; import com.baomidou.mybatisplus.annotation.TableName; +import io.swagger.v3.oas.annotations.media.Schema; import lombok.*; /** @@ -55,4 +56,13 @@ public class KnowledgeBaseDO extends BaseDO { */ private String knowledgeFile; + /** + * 分块大小 + */ + private Integer chunkSize; + + /** + * 分块重叠 + */ + private Integer chunkOverlap; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java index 2823f33a9..b6e8213c6 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java @@ -11,15 +11,12 @@ import cn.iocoder.yudao.module.llm.service.http.vo.KnowledgeRagEmbedReqVO; import cn.iocoder.yudao.module.llm.service.http.vo.RegUploadReqVO; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; import javax.annotation.Resource; -import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.io.InputStream; -import java.net.URL; import java.util.List; +import java.util.Map; import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; @@ -38,8 +35,8 @@ public class AsyncKnowledgeBase { // 向向量知识库创建文件 -// @Async - public void createKnowledgeBase(List knowledgeList, List ids) { + // @Async + public void createKnowledgeBase (List knowledgeList, List ids, Map knowledgeParameters) { log.info("开始执行 createKnowledgeBase 方法。knowledgeList 大小: {}, ids 大小: {}", knowledgeList.size(), ids.size()); // 如果提供了 ids,则删除现有的知识库文档 @@ -80,7 +77,7 @@ public class AsyncKnowledgeBase { if (lastIndex != -1) { String extension = knowledge.getDocumentName().substring(lastIndex + 1).toLowerCase(); log.info("文档扩展名: {}", extension); - knowledgeEmbed(knowledge, knowledge.getKnowledgeBaseId()); + knowledgeEmbed(knowledge, knowledge.getKnowledgeBaseId(), knowledgeParameters); } else { log.warn("文档无扩展名,跳过处理,文档 ID: {}", knowledge.getId()); } @@ -119,13 +116,15 @@ public class AsyncKnowledgeBase { * @param knowledge 文件 * @param id 知识库id */ - public void knowledgeEmbed (KnowledgeDocumentsDO knowledge, Long id) { + public void knowledgeEmbed (KnowledgeDocumentsDO knowledge, Long id, Map knowledgeParameters) { // 创建知识向量 KnowledgeRagEmbedReqVO ragEmbedReqVo = new KnowledgeRagEmbedReqVO() .setFileId(String.valueOf(knowledge.getId())) .setFileName(knowledge.getDocumentName()) - .setFileUrl(knowledge.getFileUrl()); + .setFileUrl(knowledge.getFileUrl()) + .setChunkSize(knowledgeParameters.get("chunkSize")) + .setChunkOverlap(knowledgeParameters.get("chunkOverlap")); try { ragHttpService.knowledgeEmbed(ragEmbedReqVo, id); diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java index 4380fdb3f..5ebbebaef 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java @@ -390,9 +390,10 @@ public class RagHttpService { String fileName = reqVO.getFileName(); String fileUrl = reqVO.getFileUrl(); Integer chunkSize = Optional.ofNullable(reqVO.getChunkSize()).orElse(1500); + Integer chunkOverlap = Optional.ofNullable(reqVO.getChunkOverlap()).orElse(300); String mediaType = getMediaType(fileName); - log.info("文件ID: {}, 文件名: {}, 文件URL: {}, 文件类型: {}, 分块大小:{}", fileId, fileName, fileUrl, mediaType,chunkSize); + log.info("文件ID: {}, 文件名: {}, 文件URL: {}, 文件类型: {}, 分块大小:{}, 分块重叠:{}", fileId, fileName, fileUrl, mediaType,chunkSize,chunkOverlap); // 获取知识库文档 log.info("开始获取知识库文档,知识库ID: {}, 文件ID: {}", id, fileId); @@ -450,6 +451,7 @@ public class RagHttpService { .setType(MultipartBody.FORM) .addFormDataPart("file_id", fileId) .addFormDataPart("chunk_size", String.valueOf(chunkSize)) + .addFormDataPart("chunk_overlap", String.valueOf(chunkOverlap)) .addFormDataPart("file", fileName, RequestBody.create(tempFilePath.toFile(), MediaType.parse(mediaType)) ) diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/KnowledgeRagEmbedReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/KnowledgeRagEmbedReqVO.java index 646c3a2bb..5cf945d91 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/KnowledgeRagEmbedReqVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/KnowledgeRagEmbedReqVO.java @@ -46,4 +46,9 @@ public class KnowledgeRagEmbedReqVO { * 分块大小 */ private Integer chunkSize; + + /** + * 分块重叠 + */ + private Integer chunkOverlap; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java index 3229b0bfb..2b0fc74d9 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java @@ -17,24 +17,17 @@ import cn.iocoder.yudao.module.llm.dal.mysql.knowledgedocuments.KnowledgeDocumen import cn.iocoder.yudao.module.llm.service.application.ApplicationService; import cn.iocoder.yudao.module.llm.service.async.AsyncKnowledgeBase; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import kong.unirest.Unirest; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; import org.springframework.validation.annotation.Validated; import javax.annotation.Resource; -import javax.annotation.Tainted; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Objects; +import java.util.*; import java.util.stream.Collectors; import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; -import static cn.iocoder.yudao.module.llm.enums.ErrorCodeConstants.KNOWLEDGE_BASE_NAME_NOT_EXISTS; -import static cn.iocoder.yudao.module.llm.enums.ErrorCodeConstants.KNOWLEDGE_BASE_NOT_EXISTS; +import static cn.iocoder.yudao.module.llm.enums.ErrorCodeConstants.*; /** * 知识库 Service 实现类 @@ -66,68 +59,137 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { } @Override -// @Transactional(rollbackFor = Exception.class) + // @Transactional(rollbackFor = Exception.class) public void updateKnowledgeBase (KnowledgeBaseSaveReqVO updateReqVO) { + // 1. 校验知识库是否存在 + validateKnowledgeParam(updateReqVO); + + // 2. 更新知识库主表基础信息 + KnowledgeBaseDO updateObj = BeanUtils.toBean(updateReqVO, KnowledgeBaseDO.class); + knowledgeBaseMapper.updateById(updateObj); + + // 3. 处理附表(知识文档)数据 + handleKnowledgeDocuments(updateReqVO, updateObj); + } + + /** + * 校验知识库参数 + * + * @param updateReqVO 更新知识库参数 + */ + private void validateKnowledgeParam (KnowledgeBaseSaveReqVO updateReqVO) { // 1. 校验知识库是否存在 validateKnowledgeBaseExists(updateReqVO.getId()); // 2. 校验知识库名称是否重复 validateKnowledgeBaseNameExists(updateReqVO); - // 3. 更新知识库主表 - KnowledgeBaseDO updateObj = BeanUtils.toBean(updateReqVO, KnowledgeBaseDO.class); - knowledgeBaseMapper.updateById(updateObj); + // 3. 校验分块大小和分块重叠是否正确 + validateChunkParameters(updateReqVO.getChunkSize(), updateReqVO.getChunkOverlap()); + } -// Unirest.config().reset(); -// Unirest.config() -// .socketTimeout(86400000) -// .connectTimeout(100000) -// .concurrency(10, 5) -// .setDefaultHeader("Accept", "application/json"); + /** + * 校验分块大小和分块重叠是否合法 + * + * @param chunkSize 分块大小 + * @param chunkOverlap 分块重叠 + * @throws IllegalArgumentException 如果校验不通过 + */ + private void validateChunkParameters (int chunkSize, int chunkOverlap) { + if (chunkSize < 1) { + throw exception(CHUNK_SIZE_MUST_BE_GREATER_THAN_ZERO); + } + if (chunkOverlap < 0) { + throw exception(CHUNK_OVERLAP_MUST_BE_GREATER_THAN_OR_EQUAL_TO_ZERO); + } + if (chunkOverlap >= chunkSize) { + throw exception(CHUNK_OVERLAP_MUST_BE_LESS_THAN_CHUNK_SIZE); + } + } - // 4. 处理附表(知识文档)数据 - if (!CollectionUtils.isAnyEmpty(updateReqVO.getKnowledgeDocuments())) { - // 4.1 获取需要保留的文档 ID - List retainedIds = updateReqVO.getKnowledgeDocuments().stream() - .map(KnowledgeDocumentsSaveReqVO::getId) - .filter(Objects::nonNull) - .collect(Collectors.toList()); - - // 4.2 删除不需要保留的文档 - LambdaQueryWrapperX deleteWrapper = new LambdaQueryWrapperX() - .eq(KnowledgeDocumentsDO::getKnowledgeBaseId, updateReqVO.getId()); - if (!CollectionUtils.isAnyEmpty(retainedIds)) { - deleteWrapper.notIn(KnowledgeDocumentsDO::getId, retainedIds); - } - knowledgeDocumentsMapper.delete(deleteWrapper); - - // 4.3 更新或插入文档数据 - List newDocuments = new ArrayList<>(); - updateReqVO.getKnowledgeDocuments().forEach(doc -> { - KnowledgeDocumentsDO docDO = BeanUtils.toBean(doc, KnowledgeDocumentsDO.class); - docDO.setKnowledgeBaseId(updateReqVO.getId()); - docDO.setChunkSize(updateObj.getKnowledgeLength()); - if (doc.getId() == null) { - newDocuments.add(docDO); // 收集新增文档 - } - knowledgeDocumentsMapper.insertOrUpdate(docDO); // 更新或插入文档 - }); - - // 4.4 异步处理新增文档和删除的文档 - List deleteIds = knowledgeDocumentsMapper.selectDeleteIds(updateReqVO.getId()); - asyncKnowledgeBase.createKnowledgeBase(newDocuments, deleteIds); - } else { - // 5. 如果传入的文档列表为空,则删除所有关联文档 - knowledgeDocumentsMapper.delete(new LambdaQueryWrapperX() - .eq(KnowledgeDocumentsDO::getKnowledgeBaseId, updateReqVO.getId())); - - // 5.1 异步处理删除的文档 - List deleteIds = knowledgeDocumentsMapper.selectDeleteIds(updateReqVO.getId()); - if (!CollectionUtils.isAnyEmpty(deleteIds)) { - asyncKnowledgeBase.createKnowledgeBase(null, deleteIds); - } + /** + * 处理知识文档数据 + * + * @param updateReqVO 更新知识库参数 + * @param updateObj 更新知识库对象 + */ + private void handleKnowledgeDocuments (KnowledgeBaseSaveReqVO updateReqVO, KnowledgeBaseDO updateObj) { + List documents = updateReqVO.getKnowledgeDocuments(); + if (CollectionUtils.isAnyEmpty(documents)) { + // 如果传入的文档列表为空,则删除所有关联文档 + deleteAllDocuments(updateReqVO.getId()); + return; } + // 获取需要保留的文档 ID + List retainedIds = documents.stream() + .map(KnowledgeDocumentsSaveReqVO::getId) + .filter(Objects::nonNull) + .collect(Collectors.toList()); + + // 删除不需要保留的文档 + deleteUnretainedDocuments(updateReqVO.getId(), retainedIds); + + // 更新或插入文档数据 + List newDocuments = updateOrInsertDocuments(documents, updateReqVO.getId(), updateObj.getKnowledgeLength()); + + Map knowledgeParameters = new HashMap<>(); + knowledgeParameters.put("chunkSize",updateReqVO.getChunkSize()); + knowledgeParameters.put("chunkOverlap",updateReqVO.getChunkOverlap()); + + // 异步处理新增文档和删除的文档 + List deleteIds = knowledgeDocumentsMapper.selectDeleteIds(updateReqVO.getId()); + asyncKnowledgeBase.createKnowledgeBase(newDocuments, deleteIds,knowledgeParameters); + } + + /** + * 删除所有关联的文档 + * + * @param knowledgeBaseId 知识库 ID + */ + private void deleteAllDocuments (Long knowledgeBaseId) { + knowledgeDocumentsMapper.delete(new LambdaQueryWrapperX() + .eq(KnowledgeDocumentsDO::getKnowledgeBaseId, knowledgeBaseId)); + + // 异步处理删除的文档 + List deleteIds = knowledgeDocumentsMapper.selectDeleteIds(knowledgeBaseId); + if (!CollectionUtils.isAnyEmpty(deleteIds)) { + asyncKnowledgeBase.createKnowledgeBase(new ArrayList<>(), deleteIds,new HashMap<>()); + } + } + + /** + * 删除不需要保留的文档 + * @param knowledgeBaseId 知识库 ID + * @param retainedIds 需要保留的文档 ID + */ + private void deleteUnretainedDocuments (Long knowledgeBaseId, List retainedIds) { + LambdaQueryWrapperX deleteWrapper = new LambdaQueryWrapperX() + .eq(KnowledgeDocumentsDO::getKnowledgeBaseId, knowledgeBaseId); + if (!CollectionUtils.isAnyEmpty(retainedIds)) { + deleteWrapper.notIn(KnowledgeDocumentsDO::getId, retainedIds); + } + knowledgeDocumentsMapper.delete(deleteWrapper); + } + + /** + * 更新或插入文档数据 + * @param documents 需要更新的文档数据 + * @param knowledgeBaseId 知识库 ID + * @param chunkSize + * @return 更新或插入的文档数据 + */ + private List updateOrInsertDocuments (List documents, Long knowledgeBaseId, Integer chunkSize) { + List newDocuments = new ArrayList<>(); + documents.forEach(doc -> { + KnowledgeDocumentsDO docDO = BeanUtils.toBean(doc, KnowledgeDocumentsDO.class); + docDO.setKnowledgeBaseId(knowledgeBaseId); + if (doc.getId() == null) { + newDocuments.add(docDO); // 收集新增文档 + } + knowledgeDocumentsMapper.insertOrUpdate(docDO); // 更新或插入文档 + }); + return newDocuments; } @Override From 52f0a6a463aa7fd91f44b9296c83ba6abab6ccf8 Mon Sep 17 00:00:00 2001 From: Liuyang <2746366019@qq.com> Date: Thu, 13 Mar 2025 14:46:22 +0800 Subject: [PATCH 3/7] =?UTF-8?q?feat(llm):=20=E5=A2=9E=E5=8A=A0=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E5=91=BD=E4=B8=AD=E7=8E=87=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 KnowledgeHitRateTestReqVO 和 KnowledgeHitRateTestResultVO 类用于命中率测试请求和响应- 在 KnowledgeBaseController 中添加 executeHitRateTest 方法处理命中率测试请求 - 在 KnowledgeBaseService接口中定义 executeHitRateTest 方法 - 在 KnowledgeBaseServiceImpl 中实现 executeHitRateTest 方法,包括查询知识库文档、调用 RAG 查询接口和解析结果 - 新增 DocumentInfoVO、MetadataVO、QueryMultipleReqVO 和 QueryResultPairVO 类用于 RAG 查询请求和响应 - 修改 AsyncKnowledgeBase 和 RagHttpService 以支持命中率测试功能 --- .../KnowledgeBaseController.java | 18 +++- .../vo/KnowledgeHitRateTestReqVO.java | 29 +++++ .../vo/KnowledgeHitRateTestResultVO.java | 29 +++++ .../llm/service/async/AsyncKnowledgeBase.java | 25 +++++ .../llm/service/http/RagHttpService.java | 100 ++++++++++++++++-- .../vo/query/multiple/DocumentInfoVO.java | 29 +++++ .../http/vo/query/multiple/MetadataVO.java | 30 ++++++ .../vo/query/multiple/QueryMultipleReqVO.java | 32 ++++++ .../vo/query/multiple/QueryResultPairVO.java | 19 ++++ .../knowledgebase/KnowledgeBaseService.java | 11 +- .../KnowledgeBaseServiceImpl.java | 35 +++++- 11 files changed, 338 insertions(+), 19 deletions(-) create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestReqVO.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestResultVO.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/DocumentInfoVO.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/MetadataVO.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryMultipleReqVO.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryResultPairVO.java diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/KnowledgeBaseController.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/KnowledgeBaseController.java index eef3bc6bc..2098c82a4 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/KnowledgeBaseController.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/KnowledgeBaseController.java @@ -6,9 +6,7 @@ import cn.iocoder.yudao.framework.common.pojo.PageParam; import cn.iocoder.yudao.framework.common.pojo.PageResult; import cn.iocoder.yudao.framework.common.util.object.BeanUtils; import cn.iocoder.yudao.framework.excel.core.util.ExcelUtils; -import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBasePageReqVO; -import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBaseRespVO; -import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBaseSaveReqVO; +import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.*; import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgebase.KnowledgeBaseDO; import cn.iocoder.yudao.module.llm.service.knowledgebase.KnowledgeBaseService; import io.swagger.v3.oas.annotations.Operation; @@ -43,6 +41,20 @@ public class KnowledgeBaseController { return success(knowledgeBaseService.createKnowledgeBase(createReqVO)); } + /** + * 执行知识库命中测试 + * + * @param testReqVO 命中测试请求参数 + * @return 命中测试结果 + */ + @PostMapping("/hit-test") + @Operation(summary = "执行知识库命中测试") + public CommonResult> executeHitRateTest( + @Valid @RequestBody KnowledgeHitRateTestReqVO testReqVO) { + List result = knowledgeBaseService.executeHitRateTest(testReqVO); + return success(result); + } + @PutMapping("/update") @Operation(summary = "更新知识库") // @PreAuthorize("@ss.hasPermission('llm:knowledge-base:update')") diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestReqVO.java new file mode 100644 index 000000000..4163e848e --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestReqVO.java @@ -0,0 +1,29 @@ +package cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo; + +import lombok.Data; + +import javax.validation.constraints.NotNull; + +/** + * @Description 知识库命中率测试请求参数 + */ +@Data +public class KnowledgeHitRateTestReqVO { + /** + * 查询内容 + */ + @NotNull(message = "查询内容不能为空") + private String query; + + /** + * 知识库ID + */ + @NotNull(message = "知识库ID不能为空") + private Long knowledgeId; + + /** + * 返回结果的条数(k值) + */ +// @NotNull(message = "k值不能为空") + private Integer k; +} diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestResultVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestResultVO.java new file mode 100644 index 000000000..c85eeb00e --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestResultVO.java @@ -0,0 +1,29 @@ +package cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo; + +import lombok.Data; + +/** + * @Description 知识库命中率测试返回结果 + */ +@Data +public class KnowledgeHitRateTestResultVO { + /** + * 页面内容 + */ + private String pageContent; + + /** + * 命中率 + */ + private Double hitRate; + + /** + * 摘要信息 + */ + private String digest; + + /** + * 文件ID + */ + private Long fileId; +} diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java index b6e8213c6..c08c61969 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java @@ -2,6 +2,7 @@ package cn.iocoder.yudao.module.llm.service.async; import cn.iocoder.yudao.framework.common.exception.ErrorCode; import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils; +import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeHitRateTestResultVO; import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgedocuments.KnowledgeDocumentsDO; import cn.iocoder.yudao.module.llm.dal.mysql.knowledgedocuments.KnowledgeDocumentsMapper; import cn.iocoder.yudao.module.llm.enums.KnowledgeStatusEnum; @@ -9,12 +10,16 @@ import cn.iocoder.yudao.module.llm.framework.backend.config.LLMBackendProperties import cn.iocoder.yudao.module.llm.service.http.RagHttpService; import cn.iocoder.yudao.module.llm.service.http.vo.KnowledgeRagEmbedReqVO; import cn.iocoder.yudao.module.llm.service.http.vo.RegUploadReqVO; +import cn.iocoder.yudao.module.llm.service.http.vo.query.multiple.QueryMultipleReqVO; +import cn.iocoder.yudao.module.llm.service.http.vo.query.multiple.QueryResultPairVO; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import javax.annotation.Resource; import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -133,4 +138,24 @@ public class AsyncKnowledgeBase { } } + + public List executeHitRateTest (String query, List fileIds, Integer k) { + QueryMultipleReqVO vo = new QueryMultipleReqVO(); + vo.setQuery(query); + vo.setFileIds(Collections.singletonList(String.valueOf(fileIds))); + vo.setK(k); + + List resultList = new ArrayList<>(); + + List result = ragHttpService.executeHitRateTest(vo); + for (QueryResultPairVO pair : result) { + KnowledgeHitRateTestResultVO resultVO = new KnowledgeHitRateTestResultVO(); + resultVO.setPageContent(pair.getDocument().getPageContent()); + resultVO.setHitRate(pair.getHitRate()); + resultVO.setDigest(pair.getDocument().getMetadata().getDigest()); + resultVO.setFileId(Long.parseLong(pair.getDocument().getMetadata().getFileId())); + resultList.add(resultVO); + } + return resultList; + } } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java index 5ebbebaef..732cb041c 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java @@ -1,6 +1,7 @@ package cn.iocoder.yudao.module.llm.service.http; +import cn.hutool.http.Header; import cn.hutool.http.HttpRequest; import cn.iocoder.yudao.framework.common.exception.ErrorCode; import cn.iocoder.yudao.framework.common.util.http.HttpUtils; @@ -11,6 +12,9 @@ import cn.iocoder.yudao.module.llm.dal.mysql.knowledgedocuments.KnowledgeDocumen import cn.iocoder.yudao.module.llm.enums.KnowledgeStatusEnum; import cn.iocoder.yudao.module.llm.framework.backend.config.LLMBackendProperties; import cn.iocoder.yudao.module.llm.service.http.vo.*; +import cn.iocoder.yudao.module.llm.service.http.vo.query.multiple.DocumentInfoVO; +import cn.iocoder.yudao.module.llm.service.http.vo.query.multiple.QueryMultipleReqVO; +import cn.iocoder.yudao.module.llm.service.http.vo.query.multiple.QueryResultPairVO; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONException; @@ -49,6 +53,7 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Optional; @@ -393,7 +398,7 @@ public class RagHttpService { Integer chunkOverlap = Optional.ofNullable(reqVO.getChunkOverlap()).orElse(300); String mediaType = getMediaType(fileName); - log.info("文件ID: {}, 文件名: {}, 文件URL: {}, 文件类型: {}, 分块大小:{}, 分块重叠:{}", fileId, fileName, fileUrl, mediaType,chunkSize,chunkOverlap); + log.info("文件ID: {}, 文件名: {}, 文件URL: {}, 文件类型: {}, 分块大小:{}, 分块重叠:{}", fileId, fileName, fileUrl, mediaType, chunkSize, chunkOverlap); // 获取知识库文档 log.info("开始获取知识库文档,知识库ID: {}, 文件ID: {}", id, fileId); @@ -418,15 +423,15 @@ public class RagHttpService { Path tempFilePath = downloadFileToTemp(fileUrl, fileName); log.info("文件已下载到临时目录: {}", tempFilePath); -// String fileSuffix = getFileSuffix(fileName); -// if ("doc".equals(fileSuffix)) { -// log.info("正在处理 doc 文件"); -// try { -// tempFilePath = converterDocToDocx(tempFilePath.toString(), tempFilePath.toString().replace(".doc", ".docx")); -// } catch (Exception e) { -// throw new RuntimeException(e); -// } -// } + // String fileSuffix = getFileSuffix(fileName); + // if ("doc".equals(fileSuffix)) { + // log.info("正在处理 doc 文件"); + // try { + // tempFilePath = converterDocToDocx(tempFilePath.toString(), tempFilePath.toString().replace(".doc", ".docx")); + // } catch (Exception e) { + // throw new RuntimeException(e); + // } + // } // if ("md".equals(fileSuffix)) { // log.info("正在处理 md 文件"); @@ -730,4 +735,79 @@ public class RagHttpService { private KnowledgeDocumentsDO getKnowledgeDocuments (String fileId) { return knowledgeDocumentsMapper.selectById(fileId); } + + public List executeHitRateTest (QueryMultipleReqVO vo) { + + String jsonString = JSON.toJSONString(vo); + String url = llmBackendProperties.getRagQueryMultiple(); + //链式构建请求 + String result2 = HttpRequest.post(url) + .header(Header.ACCEPT, "application/json") + .header(Header.CONTENT_TYPE, "application/json") + .body(jsonString) + .timeout(20000) + .execute().body(); + cn.hutool.core.lang.Console.log(result2); + return parseHitRateTestResults(result2); + } + + private static List parseHitRateTestResults (String json) { + // 将 JSON 转换为 List + // 解析 JSON 数组 + JSONArray jsonArray = JSON.parseArray(json); + + // 创建结果列表 + List results = new ArrayList<>(); + + // 遍历 JSON 数组 + for (int i = 0; i < jsonArray.size(); i++) { + JSONArray pairArray = jsonArray.getJSONArray(i); + + // 解析文档信息 + JSONObject documentJson = pairArray.getJSONObject(0); + DocumentInfoVO document = JSON.parseObject(documentJson.toJSONString(), DocumentInfoVO.class); + + // 解析命中率 + double hitRate = pairArray.getDoubleValue(1); + + // 创建 QueryResultPair 对象并添加到结果列表 + QueryResultPairVO pair = new QueryResultPairVO(); + pair.setDocument(document); + pair.setHitRate(hitRate); + results.add(pair); + } + +// // 访问数据 +// for (QueryResultPairVO pair : results) { +// System.out.println("Page Content: " + pair.getDocument().getPageContent()); +// System.out.println("Hit Rate: " + pair.getHitRate()); +// System.out.println("File ID: " + pair.getDocument().getMetadata().getFileId()); +// System.out.println("----------------------"); +// } + + return results; + } + + public static void main (String[] args) { + List ids = new ArrayList<>(); + ids.add("1111"); + ids.add("1234"); + QueryMultipleReqVO vo = new QueryMultipleReqVO(); + vo.setQuery("可乐鸡翅怎么做"); + vo.setFileIds(ids); + vo.setK(4); + String jsonString = JSON.toJSONString(vo); + String url = "http://192.168.18.66:8123/query_multiple"; + //链式构建请求 + String result2 = HttpRequest.post(url) + .header(Header.ACCEPT, "application/json") + .header(Header.CONTENT_TYPE, "application/json") + .body(jsonString) + .timeout(20000) + .execute().body(); + cn.hutool.core.lang.Console.log(result2); + // extracted(result2); + } + + } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/DocumentInfoVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/DocumentInfoVO.java new file mode 100644 index 000000000..e261a6a57 --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/DocumentInfoVO.java @@ -0,0 +1,29 @@ +package cn.iocoder.yudao.module.llm.service.http.vo.query.multiple; + +import lombok.Data; + +/** + * @Description 文档信息类 + */ +@Data +public class DocumentInfoVO { + /** + * 文档ID(可为空) + */ + private String id; + + /** + * 元数据 + */ + private MetadataVO metadata; + + /** + * 页面内容 + */ + private String pageContent; + + /** + * 文档类型 + */ + private String type; +} diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/MetadataVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/MetadataVO.java new file mode 100644 index 000000000..aac7c7285 --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/MetadataVO.java @@ -0,0 +1,30 @@ +package cn.iocoder.yudao.module.llm.service.http.vo.query.multiple; + +import lombok.Data; + +/** + * @Description 文档元数据类 + */ +@Data +public class MetadataVO { + /** + * 文件ID + */ + private String fileId; + + /** + * 用户ID + */ + private String userId; + + /** + * 文件摘要 + */ + private String digest; + + /** + * 文件来源路径 + */ + private String source; + +} diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryMultipleReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryMultipleReqVO.java new file mode 100644 index 000000000..2f14cf62e --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryMultipleReqVO.java @@ -0,0 +1,32 @@ +package cn.iocoder.yudao.module.llm.service.http.vo.query.multiple; + +import com.alibaba.fastjson.annotation.JSONField; +import lombok.Data; + +import javax.validation.constraints.NotNull; +import java.util.List; + +/** + * @Description 知识库多文件查询 + */ +@Data +public class QueryMultipleReqVO { + /** + * 查询内容 + */ + @NotNull(message = "查询内容不能为空") + private String query; + + /** + * 文件ID列表 + */ + @NotNull(message = "文件ID列表不能为空") + @JSONField(name = "file_ids") + private List fileIds; + + /** + * 返回结果的条数(k值) + */ +// @NotNull(message = "k值不能为空") + private Integer k; +} diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryResultPairVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryResultPairVO.java new file mode 100644 index 000000000..0b8c20e7e --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryResultPairVO.java @@ -0,0 +1,19 @@ +package cn.iocoder.yudao.module.llm.service.http.vo.query.multiple; + +import lombok.Data; + +/** + * @Description 查询结果对(包含文档信息和命中率) + */ +@Data +public class QueryResultPairVO { + /** + * 文档信息 + */ + private DocumentInfoVO document; + + /** + * 命中率 + */ + private Double hitRate; +} diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseService.java index 26f3117b0..35653d8b3 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseService.java @@ -1,9 +1,7 @@ package cn.iocoder.yudao.module.llm.service.knowledgebase; import cn.iocoder.yudao.framework.common.pojo.PageResult; -import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBasePageReqVO; -import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBaseRespVO; -import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBaseSaveReqVO; +import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.*; import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgebase.KnowledgeBaseDO; import javax.validation.Valid; @@ -70,4 +68,11 @@ public interface KnowledgeBaseService { * @param updateReqVO 更新信息 */ void updateKnowledgeBaseInfo (@Valid KnowledgeBaseSaveReqVO updateReqVO); + + /** + * 执行知识库命中测试 + * @param testReqVO 测试信息 + * @return 返回结果 + */ + List executeHitRateTest (@Valid KnowledgeHitRateTestReqVO testReqVO); } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java index 2b0fc74d9..7791ec316 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java @@ -5,9 +5,7 @@ import cn.iocoder.yudao.framework.common.pojo.PageResult; import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils; import cn.iocoder.yudao.framework.common.util.object.BeanUtils; import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX; -import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBasePageReqVO; -import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBaseRespVO; -import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBaseSaveReqVO; +import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.*; import cn.iocoder.yudao.module.llm.controller.admin.knowledgedocuments.vo.KnowledgeDocumentsRespVO; import cn.iocoder.yudao.module.llm.controller.admin.knowledgedocuments.vo.KnowledgeDocumentsSaveReqVO; import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgebase.KnowledgeBaseDO; @@ -298,6 +296,37 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { knowledgeBaseMapper.updateById(knowledgeBaseDO); } + /** + * 执行知识库命中测试 + * + * @param testReqVO 测试信息 + * @return 返回结果 + */ + @Override + public List executeHitRateTest (KnowledgeHitRateTestReqVO testReqVO) { + Long knowledgeId = testReqVO.getKnowledgeId(); + + // 根据知识库ID获取参数信息,关联文档 + List documentsDOS = knowledgeDocumentsMapper.selectList(new LambdaQueryWrapper() + .eq(KnowledgeDocumentsDO::getKnowledgeBaseId, knowledgeId)); + if (com.baomidou.mybatisplus.core.toolkit.CollectionUtils.isEmpty(documentsDOS)) { + throw exception(KNOWLEDGE_DOCUMENTS_NOT_EXISTS); + } + + // 获取fileId列表 + List fileIds = documentsDOS.stream() + .map(KnowledgeDocumentsDO::getFileId) + .collect(Collectors.toList()); + + List result = asyncKnowledgeBase.executeHitRateTest(testReqVO.getQuery(), fileIds, testReqVO.getK()); + + if (com.baomidou.mybatisplus.core.toolkit.CollectionUtils.isEmpty(result)){ + return Collections.emptyList(); + } + + return result; + } + /** * 校验知识库是否存在 * From 8cb60e82a8c737de2ad16ad16da19b9d74b08637 Mon Sep 17 00:00:00 2001 From: Liuyang <2746366019@qq.com> Date: Thu, 13 Mar 2025 14:47:32 +0800 Subject: [PATCH 4/7] =?UTF-8?q?feat(module-llm):=20=E4=B8=BA=20KnowledgeBa?= =?UTF-8?q?seRespVO=20=E7=B1=BB=E5=A2=9E=E5=8A=A0=E5=88=86=E5=9D=97?= =?UTF-8?q?=E5=A4=A7=E5=B0=8F=E5=92=8C=E9=87=8D=E5=8F=A0=E5=AD=97=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 在 KnowledgeBaseRespVO 类中添加 chunkSize 和 chunkOverlap 字段 - 为新字段添加 @Schema 注解以描述其用途 --- .../admin/knowledgebase/vo/KnowledgeBaseRespVO.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeBaseRespVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeBaseRespVO.java index 6425b3cf6..d7524603a 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeBaseRespVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeBaseRespVO.java @@ -52,4 +52,15 @@ public class KnowledgeBaseRespVO { @Schema(description = "文件引用上传") private List knowledgeDocuments; -} \ No newline at end of file + /** + * 分块大小 + */ + @Schema(description = "分块大小") + private Integer chunkSize; + + /** + * 分块重叠 + */ + @Schema(description = "分块重叠,") + private Integer chunkOverlap; +} From 9e82ebdf5a9f781b3993c64a103c9820ad53b13a Mon Sep 17 00:00:00 2001 From: Liuyang <2746366019@qq.com> Date: Thu, 13 Mar 2025 15:23:40 +0800 Subject: [PATCH 5/7] =?UTF-8?q?refactor(llm):=20=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E7=9F=A5=E8=AF=86=E5=BA=93=E5=91=BD=E4=B8=AD=E7=8E=87=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加文件名字段并格式化命中率显示 - 增加知识库存在性检查和默认 topK 值设置 - 优化日志输出,记录请求参数和结果 - 统一数据类型:将命中率从 Double 改为 String --- .../vo/KnowledgeHitRateTestResultVO.java | 7 ++++++- .../llm/service/async/AsyncKnowledgeBase.java | 21 +++++++++++++++++-- .../llm/service/http/RagHttpService.java | 9 ++++++-- .../KnowledgeBaseServiceImpl.java | 12 +++++++++++ 4 files changed, 44 insertions(+), 5 deletions(-) diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestResultVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestResultVO.java index c85eeb00e..886f99153 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestResultVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestResultVO.java @@ -15,7 +15,7 @@ public class KnowledgeHitRateTestResultVO { /** * 命中率 */ - private Double hitRate; + private String hitRate; /** * 摘要信息 @@ -26,4 +26,9 @@ public class KnowledgeHitRateTestResultVO { * 文件ID */ private Long fileId; + + /** + * 文件名称 + */ + private String fileName; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java index c08c61969..d3cfc49f8 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java @@ -12,12 +12,15 @@ import cn.iocoder.yudao.module.llm.service.http.vo.KnowledgeRagEmbedReqVO; import cn.iocoder.yudao.module.llm.service.http.vo.RegUploadReqVO; import cn.iocoder.yudao.module.llm.service.http.vo.query.multiple.QueryMultipleReqVO; import cn.iocoder.yudao.module.llm.service.http.vo.query.multiple.QueryResultPairVO; +import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import javax.annotation.Resource; import java.io.IOException; +import java.math.RoundingMode; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -151,9 +154,23 @@ public class AsyncKnowledgeBase { for (QueryResultPairVO pair : result) { KnowledgeHitRateTestResultVO resultVO = new KnowledgeHitRateTestResultVO(); resultVO.setPageContent(pair.getDocument().getPageContent()); - resultVO.setHitRate(pair.getHitRate()); + + DecimalFormat df = new DecimalFormat("0.00%"); + df.setRoundingMode(RoundingMode.HALF_UP); + String rateResult = df.format(pair.getHitRate()); + resultVO.setHitRate(rateResult); resultVO.setDigest(pair.getDocument().getMetadata().getDigest()); - resultVO.setFileId(Long.parseLong(pair.getDocument().getMetadata().getFileId())); + long fileId = Long.parseLong(pair.getDocument().getMetadata().getFileId()); + resultVO.setFileId(fileId); + + // 根据 fileId 查找文件名 + KnowledgeDocumentsDO documents = knowledgeDocumentsMapper.selectOne(KnowledgeDocumentsDO::getFileId, fileId); + if (documents!=null && StringUtils.isNotBlank(documents.getDocumentName())){ + resultVO.setFileName(documents.getDocumentName()); + }else { + resultVO.setFileName("未知文件"); + } + resultList.add(resultVO); } return resultList; diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java index 732cb041c..66c7c2781 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java @@ -47,12 +47,14 @@ import org.springframework.stereotype.Service; import javax.annotation.Resource; import java.io.*; +import java.math.RoundingMode; import java.net.URL; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; +import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -748,6 +750,9 @@ public class RagHttpService { .timeout(20000) .execute().body(); cn.hutool.core.lang.Console.log(result2); + + log.info("请求参数: {}",JSON.toJSONString(jsonString)); + log.info("请求结果: {}",JSON.toJSONString(result2)); return parseHitRateTestResults(result2); } @@ -768,12 +773,12 @@ public class RagHttpService { DocumentInfoVO document = JSON.parseObject(documentJson.toJSONString(), DocumentInfoVO.class); // 解析命中率 - double hitRate = pairArray.getDoubleValue(1); + Double rate = pairArray.getDoubleValue(1); // 创建 QueryResultPair 对象并添加到结果列表 QueryResultPairVO pair = new QueryResultPairVO(); pair.setDocument(document); - pair.setHitRate(hitRate); + pair.setHitRate(rate); results.add(pair); } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java index 7791ec316..77d3a7289 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java @@ -306,6 +306,18 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { public List executeHitRateTest (KnowledgeHitRateTestReqVO testReqVO) { Long knowledgeId = testReqVO.getKnowledgeId(); + KnowledgeBaseDO baseDO = knowledgeBaseMapper.selectOne(KnowledgeBaseDO::getId, knowledgeId); + if (baseDO == null) { + throw exception(KNOWLEDGE_BASE_NOT_EXISTS); + } + Integer topK=4; + if(baseDO.getTopK()==null||baseDO.getTopK()<=0){ + testReqVO.setK(topK); + }else { + topK=baseDO.getTopK(); + testReqVO.setK(topK); + } + // 根据知识库ID获取参数信息,关联文档 List documentsDOS = knowledgeDocumentsMapper.selectList(new LambdaQueryWrapper() .eq(KnowledgeDocumentsDO::getKnowledgeBaseId, knowledgeId)); From b29d9c5b0c95606f8d51fbc3929568c5e02d1af5 Mon Sep 17 00:00:00 2001 From: Liuyang <2746366019@qq.com> Date: Thu, 13 Mar 2025 15:51:10 +0800 Subject: [PATCH 6/7] =?UTF-8?q?refactor(llm):=20=E9=87=8D=E6=9E=84?= =?UTF-8?q?=E7=9F=A5=E8=AF=86=E5=BA=93=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 KnowledgeBaseService 接口并注入到 ConversationServiceImpl - 优化知识库字符串处理逻辑,增加空字符串处理 - 重构系统提示和知识库字符串的组合方式 - 新增知识库命中率测试相关功能 - 优化知识库数据结构,支持段落命中率计算 --- .../conversation/ConversationServiceImpl.java | 108 ++++++++++++++---- 1 file changed, 85 insertions(+), 23 deletions(-) diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/conversation/ConversationServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/conversation/ConversationServiceImpl.java index 3204ec00b..ee5bbfca3 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/conversation/ConversationServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/conversation/ConversationServiceImpl.java @@ -13,6 +13,9 @@ import cn.iocoder.yudao.module.llm.controller.admin.application.vo.ApplicationSa import cn.iocoder.yudao.module.llm.controller.admin.conversation.vo.*; import cn.iocoder.yudao.module.llm.controller.admin.conversation.vo.ChatReqVO; import cn.iocoder.yudao.module.llm.controller.admin.datarefluxdata.vo.DataRefluxDataSaveReqVO; +import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeBaseSaveReqVO; +import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeHitRateTestReqVO; +import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeHitRateTestResultVO; import cn.iocoder.yudao.module.llm.dal.dataobject.basemodel.BaseModelDO; import cn.iocoder.yudao.module.llm.dal.dataobject.conversation.ConversationDO; import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgedocuments.KnowledgeDocumentsDO; @@ -26,6 +29,7 @@ import cn.iocoder.yudao.module.llm.service.basemodel.BaseModelService; import cn.iocoder.yudao.module.llm.service.datarefluxdata.DataRefluxDataService; import cn.iocoder.yudao.module.llm.service.http.ModelService; import cn.iocoder.yudao.module.llm.service.http.vo.*; +import cn.iocoder.yudao.module.llm.service.knowledgebase.KnowledgeBaseService; import cn.iocoder.yudao.module.llm.service.prompttemplates.PromptTemplatesService; import com.alibaba.excel.util.StringUtils; import com.alibaba.fastjson.JSON; @@ -45,6 +49,7 @@ import javax.servlet.http.HttpServletResponse; import java.math.RoundingMode; import java.text.DecimalFormat; import java.util.*; +import java.util.stream.Collectors; import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; import static cn.iocoder.yudao.module.llm.enums.ErrorCodeConstants.*; @@ -80,6 +85,9 @@ public class ConversationServiceImpl implements ConversationService { @Resource private LLMBackendProperties llmBackendProperties; + @Resource + private KnowledgeBaseService knowledgeBaseService; + // 聊天会话历史记录缓存Key private final static String CHAT_HIStORY_REDIS_KEY = "llm:chat:history"; // 聊天会话历史记录缓存时间 @@ -439,14 +447,16 @@ public class ConversationServiceImpl implements ConversationService { // 处理 knowledgeBaseString if (StringUtils.isNotBlank(knowledgeBaseString)) { knowledgeBaseString = "" + knowledgeBaseString + ""; + }else { + knowledgeBaseString = "" + ""; } // 处理 systemPrompt systemPrompt = StringUtils.isBlank(chatReqVO.getSystemPrompt()) ? PROMPT - : chatReqVO.getSystemPrompt() + "\n" + PROMPT; + : chatReqVO.getSystemPrompt() + " \n " + PROMPT; } - String mess = systemPrompt + knowledgeBaseString; + String mess = systemPrompt + " \n "+knowledgeBaseString; // // 查询历史记录消息,并将查询出来的知识信息放入到 role = system 的消息中 // List messageHistoryList = stringRedisTemplate.opsForList().range(CHAT_HIStORY_REDIS_KEY + ":" + chatReqVO.getUuid(), 0, -1); @@ -532,30 +542,14 @@ public class ConversationServiceImpl implements ConversationService { ParagraphHitRateListVO paragraphHitRateListVO = new ParagraphHitRateListVO(); paragraphHitRateListVO.setUuid(chatReqVO.getUuid()); paragraphHitRateListVO.setGroupId(chatReqVO.getGroupId()); - List words = new ArrayList<>(); - // 2. 遍历处理每个文档 - for (KnowledgeDocumentsDO document : documentList) { - ParagraphHitRateWordVO rateWordVO = processDocument(document, chatReqVO, knowledgeBase); - if (rateWordVO != null) { - words.add(rateWordVO); - } - } - if (CollectionUtils.isEmpty(words)) { - paragraphHitRateListVO.setWordList(Collections.emptyList()); - paragraphHitRateListVO.setGroupId(""); - } else { - paragraphHitRateListVO.setWordList(words); - } + KnowledgeHitRateTestReqVO testReqVO=new KnowledgeHitRateTestReqVO(); + testReqVO.setKnowledgeId(chatReqVO.getKnowledge()); + testReqVO.setQuery(chatReqVO.getPrompt()); - // 请求结果添加到 Redis,查询段落命中率 - String redisKey = String.format("%s:%s", KNOWLEDGE_DOCUMENTS_REDIS_KEY, chatReqVO.getUuid()); - stringRedisTemplate.opsForList().rightPush(redisKey, JSON.toJSONString(paragraphHitRateListVO)); + List result = knowledgeBaseService.executeHitRateTest(testReqVO); + knowledgeBase = handlerResult(result, paragraphHitRateListVO); - List paragraphHitRateList = stringRedisTemplate.opsForList().range(redisKey, 0, -1); - if (paragraphHitRateList != null && !paragraphHitRateList.isEmpty()) { - log.info("{} 知识库查询段落命中率: {}", "[KnowledgeBase]", paragraphHitRateList); - } log.info("{} 知识库构建完成,内容长度: {}", LOG_PREFIX, knowledgeBase.length()); } catch (Exception e) { @@ -567,6 +561,74 @@ public class ConversationServiceImpl implements ConversationService { return knowledgeBase; } + private StringBuilder handlerResult (List result, ParagraphHitRateListVO paragraphHitRateListVO) { + if (CollectionUtils.isEmpty(result)){ + return new StringBuilder(); + } + + // 1: 存储到redis + saveRedis(result, paragraphHitRateListVO); + + // 2: 组成返回数据 + StringBuilder knowledgeBase = new StringBuilder(); + result.forEach(item -> { + knowledgeBase.append(item.getPageContent()); + }); + return knowledgeBase; + } + + private void saveRedis (List result, ParagraphHitRateListVO paragraphHitRateListVO) { + if (CollectionUtils.isEmpty(result)){ + return; + } + List words = new ArrayList<>(); + + // 按照fileId分组,存到Map中 + Map> groupedByFileId = result.stream() + .collect(Collectors.groupingBy(KnowledgeHitRateTestResultVO::getFileId)); + + // 遍历Map,查看分组结果 + groupedByFileId.forEach((fileId, list) -> { + System.out.println("File ID: " + fileId); + list.forEach(i->{ + ParagraphHitRateWordVO rateWordVO = new ParagraphHitRateWordVO(); + // 设置文档名称 + rateWordVO.setDocumentName(i.getFileName()); + + // 设置段落命中率 + List paragraphHitRate=new ArrayList<>(); + for (KnowledgeHitRateTestResultVO i1 : list) { + ParagraphHitRateVO rateVO = new ParagraphHitRateVO(); + rateVO.setParagraph(i1.getPageContent()); + rateVO.setHitRate(i1.getHitRate()); + rateVO.setWordCount(i1.getPageContent().length()); + paragraphHitRate.add(rateVO); + } + + rateWordVO.setParagraphHitRate(paragraphHitRate); + + words.add(rateWordVO); + }); + }); + + if (CollectionUtils.isEmpty(words)) { + paragraphHitRateListVO.setWordList(Collections.emptyList()); + paragraphHitRateListVO.setIsExist(false); + } else { + paragraphHitRateListVO.setWordList(words); + paragraphHitRateListVO.setIsExist(true); + } + + // 请求结果添加到 Redis,查询段落命中率 + String redisKey = String.format("%s:%s", KNOWLEDGE_DOCUMENTS_REDIS_KEY, paragraphHitRateListVO.getUuid()); + stringRedisTemplate.opsForList().rightPush(redisKey, JSON.toJSONString(paragraphHitRateListVO)); + + List paragraphHitRateList = stringRedisTemplate.opsForList().range(redisKey, 0, -1); + if (paragraphHitRateList != null && !paragraphHitRateList.isEmpty()) { + log.info("{} 知识库查询段落命中率: {}", "[KnowledgeBase]", paragraphHitRateList); + } + } + /** * 处理单个知识库文档的检索逻辑 */ From dff7904e39e6a46f7717f164f87ad292d5a3cb64 Mon Sep 17 00:00:00 2001 From: Liuyang <2746366019@qq.com> Date: Thu, 13 Mar 2025 16:56:27 +0800 Subject: [PATCH 7/7] =?UTF-8?q?feat(llm):=20=E4=BC=98=E5=8C=96=E7=9F=A5?= =?UTF-8?q?=E8=AF=86=E5=BA=93=E5=91=BD=E4=B8=AD=E7=8E=87=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 修改 executeHitRateTest 方法签名,使用 KnowledgeHitRateTestReqVO 作为参数 - 优化命中率测试逻辑,增加对 score 阈值的处理 - 调整 KnowledgeBaseDO 中 score 字段类型,从 Integer 改为 Double- 优化 hit rate 测试结果解析逻辑,增加错误处理 - 移除不必要的 DecimalFormat 使用,简化代码 --- .../vo/ParagraphHitRateListVO.java | 1 + .../vo/KnowledgeHitRateTestReqVO.java | 5 +++ .../knowledgebase/KnowledgeBaseDO.java | 2 +- .../llm/service/async/AsyncKnowledgeBase.java | 22 +++++++---- .../llm/service/http/RagHttpService.java | 37 ++++++++++++++----- .../vo/query/multiple/QueryMultipleReqVO.java | 2 + .../KnowledgeBaseServiceImpl.java | 36 +++++++++++------- 7 files changed, 73 insertions(+), 32 deletions(-) diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/conversation/vo/ParagraphHitRateListVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/conversation/vo/ParagraphHitRateListVO.java index 95595e67d..4c4d7a0d8 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/conversation/vo/ParagraphHitRateListVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/conversation/vo/ParagraphHitRateListVO.java @@ -11,5 +11,6 @@ import java.util.List; public class ParagraphHitRateListVO { private String uuid; private String groupId; + private Boolean isExist; private List wordList; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestReqVO.java index 4163e848e..5b23ee7b6 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestReqVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/knowledgebase/vo/KnowledgeHitRateTestReqVO.java @@ -26,4 +26,9 @@ public class KnowledgeHitRateTestReqVO { */ // @NotNull(message = "k值不能为空") private Integer k; + + /** + * Score阈值 + */ + private Double score; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/knowledgebase/KnowledgeBaseDO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/knowledgebase/KnowledgeBaseDO.java index 79aefc497..26e57133f 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/knowledgebase/KnowledgeBaseDO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/knowledgebase/KnowledgeBaseDO.java @@ -46,7 +46,7 @@ public class KnowledgeBaseDO extends BaseDO { /** * Score阈值 */ - private Integer score; + private Double score; /** * 知识长度 */ diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java index d3cfc49f8..22c486038 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java @@ -2,6 +2,7 @@ package cn.iocoder.yudao.module.llm.service.async; import cn.iocoder.yudao.framework.common.exception.ErrorCode; import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils; +import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeHitRateTestReqVO; import cn.iocoder.yudao.module.llm.controller.admin.knowledgebase.vo.KnowledgeHitRateTestResultVO; import cn.iocoder.yudao.module.llm.dal.dataobject.knowledgedocuments.KnowledgeDocumentsDO; import cn.iocoder.yudao.module.llm.dal.mysql.knowledgedocuments.KnowledgeDocumentsMapper; @@ -25,6 +26,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; @@ -142,11 +144,15 @@ public class AsyncKnowledgeBase { } - public List executeHitRateTest (String query, List fileIds, Integer k) { + public List executeHitRateTest (KnowledgeHitRateTestReqVO testReqVO , List fileIds) { + List fileIdStr = fileIds.stream() + .map(Object::toString) + .collect(Collectors.toList()); QueryMultipleReqVO vo = new QueryMultipleReqVO(); - vo.setQuery(query); - vo.setFileIds(Collections.singletonList(String.valueOf(fileIds))); - vo.setK(k); + vo.setQuery(testReqVO.getQuery()); + vo.setFileIds(fileIdStr); + vo.setK(testReqVO.getK()); + vo.setScore(testReqVO.getScore()); List resultList = new ArrayList<>(); @@ -155,10 +161,10 @@ public class AsyncKnowledgeBase { KnowledgeHitRateTestResultVO resultVO = new KnowledgeHitRateTestResultVO(); resultVO.setPageContent(pair.getDocument().getPageContent()); - DecimalFormat df = new DecimalFormat("0.00%"); - df.setRoundingMode(RoundingMode.HALF_UP); - String rateResult = df.format(pair.getHitRate()); - resultVO.setHitRate(rateResult); +// DecimalFormat df = new DecimalFormat("0.00%"); +// df.setRoundingMode(RoundingMode.HALF_UP); +// String rateResult = df.format(pair.getHitRate()); + resultVO.setHitRate(String.valueOf(pair.getHitRate())); resultVO.setDigest(pair.getDocument().getMetadata().getDigest()); long fileId = Long.parseLong(pair.getDocument().getMetadata().getFileId()); resultVO.setFileId(fileId); diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java index 66c7c2781..e34a2cb7e 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java @@ -47,14 +47,12 @@ import org.springframework.stereotype.Service; import javax.annotation.Resource; import java.io.*; -import java.math.RoundingMode; import java.net.URL; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardCopyOption; -import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -751,12 +749,29 @@ public class RagHttpService { .execute().body(); cn.hutool.core.lang.Console.log(result2); - log.info("请求参数: {}",JSON.toJSONString(jsonString)); + log.info("请求参数: {}",jsonString); log.info("请求结果: {}",JSON.toJSONString(result2)); - return parseHitRateTestResults(result2); + return parseHitRateTestResults(result2,vo.getScore()); } - private static List parseHitRateTestResults (String json) { + private static List parseHitRateTestResults (String json, Double score) { + boolean array= json.trim().startsWith("["); + // 先判断 JSON 是否是一个数组 + + if (!array){ + // 判断是否存在 detail 字段 + JSONObject jsonObject = JSON.parseObject(json); + if (jsonObject.containsKey("detail")) { + String detail = jsonObject.getString("detail"); + + if (detail.contains("No documents found for the given query")) { + throw exception(new ErrorCode(100_100_1, "未找到符合条件的文档,请检查查询条件!")); + } + return new ArrayList<>(); + } + } + + // 将 JSON 转换为 List // 解析 JSON 数组 JSONArray jsonArray = JSON.parseArray(json); @@ -775,11 +790,13 @@ public class RagHttpService { // 解析命中率 Double rate = pairArray.getDoubleValue(1); - // 创建 QueryResultPair 对象并添加到结果列表 - QueryResultPairVO pair = new QueryResultPairVO(); - pair.setDocument(document); - pair.setHitRate(rate); - results.add(pair); + if (rate >= score) { + QueryResultPairVO pair = new QueryResultPairVO(); + pair.setDocument(document); + pair.setHitRate(rate); + results.add(pair); + } + } // // 访问数据 diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryMultipleReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryMultipleReqVO.java index 2f14cf62e..1252f1366 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryMultipleReqVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/query/multiple/QueryMultipleReqVO.java @@ -29,4 +29,6 @@ public class QueryMultipleReqVO { */ // @NotNull(message = "k值不能为空") private Integer k; + + private Double score; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java index 77d3a7289..abff96751 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/knowledgebase/KnowledgeBaseServiceImpl.java @@ -131,13 +131,13 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { // 更新或插入文档数据 List newDocuments = updateOrInsertDocuments(documents, updateReqVO.getId(), updateObj.getKnowledgeLength()); - Map knowledgeParameters = new HashMap<>(); - knowledgeParameters.put("chunkSize",updateReqVO.getChunkSize()); - knowledgeParameters.put("chunkOverlap",updateReqVO.getChunkOverlap()); + Map knowledgeParameters = new HashMap<>(); + knowledgeParameters.put("chunkSize", updateReqVO.getChunkSize()); + knowledgeParameters.put("chunkOverlap", updateReqVO.getChunkOverlap()); // 异步处理新增文档和删除的文档 List deleteIds = knowledgeDocumentsMapper.selectDeleteIds(updateReqVO.getId()); - asyncKnowledgeBase.createKnowledgeBase(newDocuments, deleteIds,knowledgeParameters); + asyncKnowledgeBase.createKnowledgeBase(newDocuments, deleteIds, knowledgeParameters); } /** @@ -152,14 +152,15 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { // 异步处理删除的文档 List deleteIds = knowledgeDocumentsMapper.selectDeleteIds(knowledgeBaseId); if (!CollectionUtils.isAnyEmpty(deleteIds)) { - asyncKnowledgeBase.createKnowledgeBase(new ArrayList<>(), deleteIds,new HashMap<>()); + asyncKnowledgeBase.createKnowledgeBase(new ArrayList<>(), deleteIds, new HashMap<>()); } } /** * 删除不需要保留的文档 + * * @param knowledgeBaseId 知识库 ID - * @param retainedIds 需要保留的文档 ID + * @param retainedIds 需要保留的文档 ID */ private void deleteUnretainedDocuments (Long knowledgeBaseId, List retainedIds) { LambdaQueryWrapperX deleteWrapper = new LambdaQueryWrapperX() @@ -172,7 +173,8 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { /** * 更新或插入文档数据 - * @param documents 需要更新的文档数据 + * + * @param documents 需要更新的文档数据 * @param knowledgeBaseId 知识库 ID * @param chunkSize * @return 更新或插入的文档数据 @@ -310,14 +312,22 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { if (baseDO == null) { throw exception(KNOWLEDGE_BASE_NOT_EXISTS); } - Integer topK=4; - if(baseDO.getTopK()==null||baseDO.getTopK()<=0){ + Integer topK = 4; + if (baseDO.getTopK() == null || baseDO.getTopK() <= 0) { testReqVO.setK(topK); - }else { - topK=baseDO.getTopK(); + } else { + topK = baseDO.getTopK(); testReqVO.setK(topK); } + Double score = 0.2; + if (baseDO.getScore() == null || baseDO.getTopK() <= 0.0|| baseDO.getScore() > 1) { + testReqVO.setScore(score); + } else { + score = baseDO.getScore(); + testReqVO.setScore(score); + } + // 根据知识库ID获取参数信息,关联文档 List documentsDOS = knowledgeDocumentsMapper.selectList(new LambdaQueryWrapper() .eq(KnowledgeDocumentsDO::getKnowledgeBaseId, knowledgeId)); @@ -330,9 +340,9 @@ public class KnowledgeBaseServiceImpl implements KnowledgeBaseService { .map(KnowledgeDocumentsDO::getFileId) .collect(Collectors.toList()); - List result = asyncKnowledgeBase.executeHitRateTest(testReqVO.getQuery(), fileIds, testReqVO.getK()); + List result = asyncKnowledgeBase.executeHitRateTest(testReqVO, fileIds); - if (com.baomidou.mybatisplus.core.toolkit.CollectionUtils.isEmpty(result)){ + if (com.baomidou.mybatisplus.core.toolkit.CollectionUtils.isEmpty(result)) { return Collections.emptyList(); }