增加xlsx文件

This commit is contained in:
limin 2025-01-13 17:43:24 +08:00
parent fb0e045008
commit 7abd3d5e96
3 changed files with 84 additions and 0 deletions

View File

@ -55,4 +55,6 @@ public interface DatasetService {
PageResult<DatasetDO> getDatasetPage(DatasetPageReqVO pageReqVO);
List<DatasetTreeNode> queryAll();
}

View File

@ -25,6 +25,9 @@ import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.opencsv.exceptions.CsvValidationException;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.springframework.stereotype.Service;
import org.springframework.validation.annotation.Validated;
@ -342,6 +345,58 @@ public class DatasetServiceImpl implements DatasetService {
});
}
/**
* 解析xlsx文件
*
* @param xlsxFiles
*/
public void readXlsxFile(List<DatasetFilesDO> xlsxFiles) {
xlsxFiles.forEach(datasetFilesDO -> {
Workbook sheets = DataSetReadFileUtils.readXlsxFromUrl(datasetFilesDO.getDatasetFileUrl());
if (sheets != null){
Sheet sheetDatas = sheets.getSheetAt(0);
sheetDatas.forEach(
row -> {
if (row.getRowNum() != 0) {
// 问题
DatasetQuestionDO datasetQuestionDO = new DatasetQuestionDO();
datasetQuestionDO.setDatasetId(datasetFilesDO.getDatasetId());
datasetQuestionDO.setDatasetId(datasetFilesDO.getDatasetId());
datasetQuestionDO.setDatasetFilesId(datasetFilesDO.getId());
// 答案
DatasetAnswerDO datasetAnswerDO = new DatasetAnswerDO();
datasetAnswerDO.setDatasetId(datasetFilesDO.getDatasetId());
datasetAnswerDO.setDatasetFilesId(datasetFilesDO.getId());
datasetAnswerDO.setQuestionId(datasetQuestionDO.getId());
for (Cell cell : row) {
int columnIndex = cell.getColumnIndex();
switch (columnIndex) {
case 0:
datasetQuestionDO.setSystem(cell.getStringCellValue());
break;
case 1:
datasetQuestionDO.setQuestion(cell.getStringCellValue());
break;
case 2:
String cellValue = cell.getStringCellValue();
datasetQuestionDO.setStatus(StringUtils.isNotBlank(cell.getStringCellValue()) ? 2 : 0);
datasetAnswerDO.setAnswer(cellValue);
break;
}
}
// 插入问题
datasetQuestionMapper.insert(datasetQuestionDO);
datasetAnswerDO.setQuestionId(datasetQuestionDO.getId());
// 插入回答
if (StringUtils.isNotBlank(datasetAnswerDO.getAnswer())) {
datasetAnswerMapper.insert(datasetAnswerDO);
}
}
}
);
}
});
}
public void parseFile (List<DatasetFilesSaveReqVO> datasetFiles) {
List<DatasetFilesDO> insertDatasetFiles = BeanUtils.toBean(datasetFiles, DatasetFilesDO.class);
@ -368,6 +423,13 @@ public class DatasetServiceImpl implements DatasetService {
if (CollectionUtils.isNotEmpty(csvFiles)) {
readCsvFile(csvFiles);
}
// .xlsx格式
List<DatasetFilesDO> xlsxFiles = insertDatasetFiles.stream()
.filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".xlsx"))
.collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(xlsxFiles)) {
readXlsxFile(xlsxFiles);
}
}

View File

@ -1,5 +1,6 @@
package cn.iocoder.yudao.module.llm.utils;
import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetQuestionDO;
import cn.iocoder.yudao.module.llm.utils.vo.CsvDataSetVO;
import com.opencsv.CSVParser;
import com.opencsv.CSVParserBuilder;
@ -7,8 +8,13 @@ import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.exceptions.CsvValidationException;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.springframework.stereotype.Component;
import java.io.InputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
@ -115,4 +121,18 @@ public class DataSetReadFileUtils {
}
return -1;
}
public static Workbook readXlsxFromUrl(String filePath) {
HttpURLConnection connection = readFile(filePath);
if (connection != null) {
try (InputStream inputStream = connection.getInputStream()) {
return new XSSFWorkbook(inputStream);
} catch (Exception e) {
System.out.println("Error reading XLSX file from URL: " + e.getMessage());
} finally {
connection.disconnect();
}
}
return null;
}
}