refactor(module-llm):重构知识库向量嵌入功能

- 移除 Async 注解和不必要的导入
- 优化 knowledgeEmbed 方法,使用 HTTP 客户端替代 Unirest
- 添加 fileUrl 字段到 KnowledgeRagEmbedReqVO
- 重构 ragHttpService.knowledgeEmbed 方法,支持文件 URL 上传
This commit is contained in:
Liuyang 2025-02-20 14:26:37 +08:00
parent 06c832fa3f
commit 681c12206e
3 changed files with 93 additions and 24 deletions

View File

@ -11,17 +11,14 @@ import cn.iocoder.yudao.module.llm.service.http.vo.KnowledgeRagEmbedReqVO;
import cn.iocoder.yudao.module.llm.service.http.vo.RegUploadReqVO;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import javax.annotation.Resource;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.List;
import java.util.Objects;
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
@ -101,7 +98,7 @@ public class AsyncKnowledgeBase {
* @param knowledge 文件
* @param id 知识库id
*/
public void knowledgeEmbed (KnowledgeDocumentsDO knowledge, Long id) {
public void knowledgeEmbed (KnowledgeDocumentsDO knowledge, Long id) {
// TODO:本地调试时打开
// String tmpUrl = "http://xhllm.xinnuojinzhi.com/admin-api/infra/file/29/get/ca3d06d24f80c127ec0300408a035176f5e0bf90ce319fda17018303226e2298.doc";
@ -112,10 +109,16 @@ public class AsyncKnowledgeBase {
KnowledgeRagEmbedReqVO ragEmbedReqVo = new KnowledgeRagEmbedReqVO()
.setFileId(String.valueOf(knowledge.getId()))
.setFileName(knowledge.getDocumentName())
.setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl()))))
.setFileBytes(getFileByte(knowledge.getFileUrl()));
.setFileUrl(knowledge.getFileUrl());
// .setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl()))))
// .setFileBytes(getFileByte(knowledge.getFileUrl()
ragHttpService.knowledgeEmbed(ragEmbedReqVo, id);
try {
ragHttpService.knowledgeEmbed(ragEmbedReqVo, id);
} catch (IOException e) {
throw new RuntimeException(e);
}
}

View File

@ -304,15 +304,20 @@ public class RagHttpService {
/**
* 知识库向量嵌入
*
* @param reqVO 请求参数
* @param reqVO 请求参数包含文件ID文件名和文件URL等信息
* @param id 知识库ID
* @throws IOException 如果发生I/O错误
*/
public void knowledgeEmbed (KnowledgeRagEmbedReqVO reqVO, Long id) {
public void knowledgeEmbed(KnowledgeRagEmbedReqVO reqVO, Long id) throws IOException {
// 获取向量嵌入接口的URL
String ragEmbed = llmBackendProperties.getEmbed();
log.info("知识库向量嵌入接口URL: {}", ragEmbed);
// 从请求参数中获取文件ID和文件名
String fileId = reqVO.getFileId();
String fileName = reqVO.getFileName();
String fileUrl = reqVO.getFileUrl();
log.info("URL: {}, fileId: {} ,fileNam: {}, fileUrl: {}, ", ragEmbed, fileId, fileName, fileUrl);
// 获取知识库文档
KnowledgeDocumentsDO documents = getKnowledgeDocuments(id, fileId);
@ -323,25 +328,81 @@ public class RagHttpService {
// 更新文件状态为上传中
updateFileState(documents, KnowledgeStatusEnum.UPLOADING);
// 初始化 Unirest 配置只需一次
// Unirest.config().socketTimeout(86400000);
// 创建HTTP客户端
CloseableHttpClient httpClient = HttpClients.createDefault();
// 发送 POST 请求
try {
HttpResponse<String> response = Unirest.post(ragEmbed)
.field("file_id", fileId)
.field("file", reqVO.getFileInputStream(), fileName)
.asString();
// 创建HTTP GET请求获取文件内容
HttpGet request = new HttpGet(fileUrl);
try (CloseableHttpResponse response = httpClient.execute(request)) {
HttpEntity entity = response.getEntity();
if (entity != null) {
try (InputStream inputStream = entity.getContent();
BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream)) {
String responseBody = response.getBody();
log.info("响应原始内容: {}", responseBody);
// 标记流以便后续重置
bufferedInputStream.mark(Integer.MAX_VALUE);
// 检测文件编码
String encoding = detectCharset(bufferedInputStream);
processResponse(responseBody, documents);
// 重置流以便重新读取
bufferedInputStream.reset();
} catch (Exception e) {
handleFailure(documents, FILE_UPLOAD_FAILED_MSG, e);
// 使用检测到的编码读取文件内容
try (InputStreamReader reader = new InputStreamReader(bufferedInputStream, encoding);
BufferedReader bufferedReader = new BufferedReader(reader)) {
StringBuilder fileContentBuilder = new StringBuilder();
String line;
while ((line = bufferedReader.readLine()) != null) {
fileContentBuilder.append(line).append(System.lineSeparator());
}
String fileContent = fileContentBuilder.toString();
// 将文件内容转换为UTF-8编码的字节数组
byte[] utf8Bytes = fileContent.getBytes(StandardCharsets.UTF_8);
try (ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(utf8Bytes);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
int bufferSize = 1024;
byte[] byteArray = new byte[bufferSize];
int bytesRead;
// 读取字节数组并写入输出流
while ((bytesRead = byteArrayInputStream.read(byteArray)) != -1) {
outputStream.write(byteArray, 0, bytesRead);
}
// 将输出流转换为字节数组
byte[] result = outputStream.toByteArray();
// 发送HTTP POST请求上传文件内容
String body = HttpRequest.post(ragEmbed)
.form("file", result, fileName)
.form("file_id", fileId)
.execute()
.body();
// 打印响应内容
log.info("响应原始内容 String: {}", body);
// 解析响应内容
RagEmbedRespVO ragEmbedRespVO = JSON.parseObject(body, RagEmbedRespVO.class);
log.info("解析响应原始内容 ragEmbedRespVO:{}", ragEmbedRespVO);
// 根据响应状态更新文件状态
if (ragEmbedRespVO.isStatus()) {
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_SUCCESS);
} else {
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_FAILED);
throw new RuntimeException("文件上传失败:" + ragEmbedRespVO.getMessage());
}
} catch (UnirestException e) {
throw new RuntimeException("文件上传失败: " + e.getMessage());
}
}
}
}
}
}
/**

View File

@ -27,6 +27,11 @@ public class KnowledgeRagEmbedReqVO {
*/
private String fileName;
/**
* 文件 Url
*/
private String fileUrl;
/**
* 文件流
*/