refactor(module-llm):重构知识库向量嵌入功能
- 移除 Async 注解和不必要的导入 - 优化 knowledgeEmbed 方法,使用 HTTP 客户端替代 Unirest - 添加 fileUrl 字段到 KnowledgeRagEmbedReqVO - 重构 ragHttpService.knowledgeEmbed 方法,支持文件 URL 上传
This commit is contained in:
parent
06c832fa3f
commit
681c12206e
@ -11,17 +11,14 @@ import cn.iocoder.yudao.module.llm.service.http.vo.KnowledgeRagEmbedReqVO;
|
||||
import cn.iocoder.yudao.module.llm.service.http.vo.RegUploadReqVO;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
|
||||
|
||||
@ -101,7 +98,7 @@ public class AsyncKnowledgeBase {
|
||||
* @param knowledge 文件
|
||||
* @param id 知识库id
|
||||
*/
|
||||
public void knowledgeEmbed (KnowledgeDocumentsDO knowledge, Long id) {
|
||||
public void knowledgeEmbed (KnowledgeDocumentsDO knowledge, Long id) {
|
||||
|
||||
// TODO:本地调试时打开
|
||||
// String tmpUrl = "http://xhllm.xinnuojinzhi.com/admin-api/infra/file/29/get/ca3d06d24f80c127ec0300408a035176f5e0bf90ce319fda17018303226e2298.doc";
|
||||
@ -112,10 +109,16 @@ public class AsyncKnowledgeBase {
|
||||
KnowledgeRagEmbedReqVO ragEmbedReqVo = new KnowledgeRagEmbedReqVO()
|
||||
.setFileId(String.valueOf(knowledge.getId()))
|
||||
.setFileName(knowledge.getDocumentName())
|
||||
.setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl()))))
|
||||
.setFileBytes(getFileByte(knowledge.getFileUrl()));
|
||||
.setFileUrl(knowledge.getFileUrl());
|
||||
// .setFileInputStream(new ByteArrayInputStream(Objects.requireNonNull(getFileByte(knowledge.getFileUrl()))))
|
||||
// .setFileBytes(getFileByte(knowledge.getFileUrl()
|
||||
|
||||
ragHttpService.knowledgeEmbed(ragEmbedReqVo, id);
|
||||
|
||||
try {
|
||||
ragHttpService.knowledgeEmbed(ragEmbedReqVo, id);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -304,15 +304,20 @@ public class RagHttpService {
|
||||
/**
|
||||
* 知识库向量嵌入
|
||||
*
|
||||
* @param reqVO 请求参数
|
||||
* @param reqVO 请求参数,包含文件ID、文件名和文件URL等信息
|
||||
* @param id 知识库ID
|
||||
* @throws IOException 如果发生I/O错误
|
||||
*/
|
||||
public void knowledgeEmbed (KnowledgeRagEmbedReqVO reqVO, Long id) {
|
||||
public void knowledgeEmbed(KnowledgeRagEmbedReqVO reqVO, Long id) throws IOException {
|
||||
// 获取向量嵌入接口的URL
|
||||
String ragEmbed = llmBackendProperties.getEmbed();
|
||||
log.info("知识库向量嵌入接口URL: {}", ragEmbed);
|
||||
|
||||
// 从请求参数中获取文件ID和文件名
|
||||
String fileId = reqVO.getFileId();
|
||||
String fileName = reqVO.getFileName();
|
||||
String fileUrl = reqVO.getFileUrl();
|
||||
|
||||
log.info("URL: {}, fileId: {} ,fileNam: {}, fileUrl: {}, ", ragEmbed, fileId, fileName, fileUrl);
|
||||
|
||||
// 获取知识库文档
|
||||
KnowledgeDocumentsDO documents = getKnowledgeDocuments(id, fileId);
|
||||
@ -323,25 +328,81 @@ public class RagHttpService {
|
||||
// 更新文件状态为上传中
|
||||
updateFileState(documents, KnowledgeStatusEnum.UPLOADING);
|
||||
|
||||
// 初始化 Unirest 配置(只需一次)
|
||||
// Unirest.config().socketTimeout(86400000);
|
||||
// 创建HTTP客户端
|
||||
CloseableHttpClient httpClient = HttpClients.createDefault();
|
||||
|
||||
// 发送 POST 请求
|
||||
try {
|
||||
HttpResponse<String> response = Unirest.post(ragEmbed)
|
||||
.field("file_id", fileId)
|
||||
.field("file", reqVO.getFileInputStream(), fileName)
|
||||
.asString();
|
||||
// 创建HTTP GET请求,获取文件内容
|
||||
HttpGet request = new HttpGet(fileUrl);
|
||||
try (CloseableHttpResponse response = httpClient.execute(request)) {
|
||||
HttpEntity entity = response.getEntity();
|
||||
if (entity != null) {
|
||||
try (InputStream inputStream = entity.getContent();
|
||||
BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream)) {
|
||||
|
||||
String responseBody = response.getBody();
|
||||
log.info("响应原始内容: {}", responseBody);
|
||||
// 标记流以便后续重置
|
||||
bufferedInputStream.mark(Integer.MAX_VALUE);
|
||||
// 检测文件编码
|
||||
String encoding = detectCharset(bufferedInputStream);
|
||||
|
||||
processResponse(responseBody, documents);
|
||||
// 重置流以便重新读取
|
||||
bufferedInputStream.reset();
|
||||
|
||||
} catch (Exception e) {
|
||||
handleFailure(documents, FILE_UPLOAD_FAILED_MSG, e);
|
||||
// 使用检测到的编码读取文件内容
|
||||
try (InputStreamReader reader = new InputStreamReader(bufferedInputStream, encoding);
|
||||
BufferedReader bufferedReader = new BufferedReader(reader)) {
|
||||
StringBuilder fileContentBuilder = new StringBuilder();
|
||||
String line;
|
||||
while ((line = bufferedReader.readLine()) != null) {
|
||||
fileContentBuilder.append(line).append(System.lineSeparator());
|
||||
}
|
||||
String fileContent = fileContentBuilder.toString();
|
||||
|
||||
// 将文件内容转换为UTF-8编码的字节数组
|
||||
byte[] utf8Bytes = fileContent.getBytes(StandardCharsets.UTF_8);
|
||||
|
||||
try (ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(utf8Bytes);
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
|
||||
|
||||
int bufferSize = 1024;
|
||||
byte[] byteArray = new byte[bufferSize];
|
||||
int bytesRead;
|
||||
|
||||
// 读取字节数组并写入输出流
|
||||
while ((bytesRead = byteArrayInputStream.read(byteArray)) != -1) {
|
||||
outputStream.write(byteArray, 0, bytesRead);
|
||||
}
|
||||
|
||||
// 将输出流转换为字节数组
|
||||
byte[] result = outputStream.toByteArray();
|
||||
|
||||
// 发送HTTP POST请求,上传文件内容
|
||||
String body = HttpRequest.post(ragEmbed)
|
||||
.form("file", result, fileName)
|
||||
.form("file_id", fileId)
|
||||
.execute()
|
||||
.body();
|
||||
|
||||
// 打印响应内容
|
||||
log.info("响应原始内容 String: {}", body);
|
||||
|
||||
// 解析响应内容
|
||||
RagEmbedRespVO ragEmbedRespVO = JSON.parseObject(body, RagEmbedRespVO.class);
|
||||
log.info("解析响应原始内容 ragEmbedRespVO:{}", ragEmbedRespVO);
|
||||
|
||||
// 根据响应状态更新文件状态
|
||||
if (ragEmbedRespVO.isStatus()) {
|
||||
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_SUCCESS);
|
||||
} else {
|
||||
updateFileState(documents, KnowledgeStatusEnum.UPLOAD_FAILED);
|
||||
throw new RuntimeException("文件上传失败:" + ragEmbedRespVO.getMessage());
|
||||
}
|
||||
} catch (UnirestException e) {
|
||||
throw new RuntimeException("文件上传失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -27,6 +27,11 @@ public class KnowledgeRagEmbedReqVO {
|
||||
*/
|
||||
private String fileName;
|
||||
|
||||
/**
|
||||
* 文件 Url
|
||||
*/
|
||||
private String fileUrl;
|
||||
|
||||
/**
|
||||
* 文件流
|
||||
*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user