From 681c12206e28b8d9d0b8dcc1e6f57dbe05774ab9 Mon Sep 17 00:00:00 2001 From: Liuyang <2746366019@qq.com> Date: Thu, 20 Feb 2025 14:26:37 +0800 Subject: [PATCH] =?UTF-8?q?refactor(module-llm):=E9=87=8D=E6=9E=84?= =?UTF-8?q?=E7=9F=A5=E8=AF=86=E5=BA=93=E5=90=91=E9=87=8F=E5=B5=8C=E5=85=A5?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 移除 Async 注解和不必要的导入 - 优化 knowledgeEmbed 方法,使用 HTTP 客户端替代 Unirest - 添加 fileUrl 字段到 KnowledgeRagEmbedReqVO - 重构 ragHttpService.knowledgeEmbed 方法,支持文件 URL 上传 --- .../llm/service/async/AsyncKnowledgeBase.java | 17 ++-- .../llm/service/http/RagHttpService.java | 95 +++++++++++++++---- .../http/vo/KnowledgeRagEmbedReqVO.java | 5 + 3 files changed, 93 insertions(+), 24 deletions(-) diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java index ab837341a..c5479e11e 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/async/AsyncKnowledgeBase.java @@ -11,17 +11,14 @@ import cn.iocoder.yudao.module.llm.service.http.vo.KnowledgeRagEmbedReqVO; import cn.iocoder.yudao.module.llm.service.http.vo.RegUploadReqVO; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.scheduling.annotation.Async; import org.springframework.stereotype.Service; import javax.annotation.Resource; -import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.List; -import java.util.Objects; import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; @@ -101,7 +98,7 @@ public class AsyncKnowledgeBase { * @param knowledge 文件 * @param id 知识库id */ - public void knowledgeEmbed (KnowledgeDocumentsDO knowledge, Long id) { + public void knowledgeEmbed (KnowledgeDocumentsDO knowledge, Long id) { // TODO:本地调试时打开 // String tmpUrl = "http://xhllm.xinnuojinzhi.com/admin-api/infra/file/29/get/ca3d06d24f80c127ec0300408a035176f5e0bf90ce319fda17018303226e2298.doc"; @@ -112,10 +109,16 @@ public class AsyncKnowledgeBase { KnowledgeRagEmbedReqVO ragEmbedReqVo = new KnowledgeRagEmbedReqVO() .setFileId(String.valueOf(knowledge.getId())) .setFileName(knowledge.getDocumentName()) - .setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl())))) - .setFileBytes(getFileByte(knowledge.getFileUrl())); + .setFileUrl(knowledge.getFileUrl()); +// .setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl())))) +// .setFileBytes(getFileByte(knowledge.getFileUrl() - ragHttpService.knowledgeEmbed(ragEmbedReqVo, id); + + try { + ragHttpService.knowledgeEmbed(ragEmbedReqVo, id); + } catch (IOException e) { + throw new RuntimeException(e); + } } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java index 1537b301e..881681371 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/RagHttpService.java @@ -304,15 +304,20 @@ public class RagHttpService { /** * 知识库向量嵌入 * - * @param reqVO 请求参数 + * @param reqVO 请求参数,包含文件ID、文件名和文件URL等信息 * @param id 知识库ID + * @throws IOException 如果发生I/O错误 */ - public void knowledgeEmbed (KnowledgeRagEmbedReqVO reqVO, Long id) { + public void knowledgeEmbed(KnowledgeRagEmbedReqVO reqVO, Long id) throws IOException { + // 获取向量嵌入接口的URL String ragEmbed = llmBackendProperties.getEmbed(); - log.info("知识库向量嵌入接口URL: {}", ragEmbed); + // 从请求参数中获取文件ID和文件名 String fileId = reqVO.getFileId(); String fileName = reqVO.getFileName(); + String fileUrl = reqVO.getFileUrl(); + + log.info("URL: {}, fileId: {} ,fileNam: {}, fileUrl: {}, ", ragEmbed, fileId, fileName, fileUrl); // 获取知识库文档 KnowledgeDocumentsDO documents = getKnowledgeDocuments(id, fileId); @@ -323,25 +328,81 @@ public class RagHttpService { // 更新文件状态为上传中 updateFileState(documents, KnowledgeStatusEnum.UPLOADING); - // 初始化 Unirest 配置(只需一次) -// Unirest.config().socketTimeout(86400000); + // 创建HTTP客户端 + CloseableHttpClient httpClient = HttpClients.createDefault(); - // 发送 POST 请求 - try { - HttpResponse response = Unirest.post(ragEmbed) - .field("file_id", fileId) - .field("file", reqVO.getFileInputStream(), fileName) - .asString(); + // 创建HTTP GET请求,获取文件内容 + HttpGet request = new HttpGet(fileUrl); + try (CloseableHttpResponse response = httpClient.execute(request)) { + HttpEntity entity = response.getEntity(); + if (entity != null) { + try (InputStream inputStream = entity.getContent(); + BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream)) { - String responseBody = response.getBody(); - log.info("响应原始内容: {}", responseBody); + // 标记流以便后续重置 + bufferedInputStream.mark(Integer.MAX_VALUE); + // 检测文件编码 + String encoding = detectCharset(bufferedInputStream); - processResponse(responseBody, documents); + // 重置流以便重新读取 + bufferedInputStream.reset(); - } catch (Exception e) { - handleFailure(documents, FILE_UPLOAD_FAILED_MSG, e); + // 使用检测到的编码读取文件内容 + try (InputStreamReader reader = new InputStreamReader(bufferedInputStream, encoding); + BufferedReader bufferedReader = new BufferedReader(reader)) { + StringBuilder fileContentBuilder = new StringBuilder(); + String line; + while ((line = bufferedReader.readLine()) != null) { + fileContentBuilder.append(line).append(System.lineSeparator()); + } + String fileContent = fileContentBuilder.toString(); + + // 将文件内容转换为UTF-8编码的字节数组 + byte[] utf8Bytes = fileContent.getBytes(StandardCharsets.UTF_8); + + try (ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(utf8Bytes); + ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + + int bufferSize = 1024; + byte[] byteArray = new byte[bufferSize]; + int bytesRead; + + // 读取字节数组并写入输出流 + while ((bytesRead = byteArrayInputStream.read(byteArray)) != -1) { + outputStream.write(byteArray, 0, bytesRead); + } + + // 将输出流转换为字节数组 + byte[] result = outputStream.toByteArray(); + + // 发送HTTP POST请求,上传文件内容 + String body = HttpRequest.post(ragEmbed) + .form("file", result, fileName) + .form("file_id", fileId) + .execute() + .body(); + + // 打印响应内容 + log.info("响应原始内容 String: {}", body); + + // 解析响应内容 + RagEmbedRespVO ragEmbedRespVO = JSON.parseObject(body, RagEmbedRespVO.class); + log.info("解析响应原始内容 ragEmbedRespVO:{}", ragEmbedRespVO); + + // 根据响应状态更新文件状态 + if (ragEmbedRespVO.isStatus()) { + updateFileState(documents, KnowledgeStatusEnum.UPLOAD_SUCCESS); + } else { + updateFileState(documents, KnowledgeStatusEnum.UPLOAD_FAILED); + throw new RuntimeException("文件上传失败:" + ragEmbedRespVO.getMessage()); + } + } catch (UnirestException e) { + throw new RuntimeException("文件上传失败: " + e.getMessage()); + } + } + } + } } - } /** diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/KnowledgeRagEmbedReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/KnowledgeRagEmbedReqVO.java index 2652d3b86..708f0f406 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/KnowledgeRagEmbedReqVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/http/vo/KnowledgeRagEmbedReqVO.java @@ -27,6 +27,11 @@ public class KnowledgeRagEmbedReqVO { */ private String fileName; + /** + * 文件 Url + */ + private String fileUrl; + /** * 文件流 */