tar.gz 数据集文件上传
This commit is contained in:
parent
2b97fc0053
commit
530659f12b
@ -3,6 +3,7 @@ package cn.iocoder.yudao.module.llm.service.dataset;
|
||||
|
||||
import cn.hutool.core.io.IoUtil;
|
||||
import cn.iocoder.yudao.framework.common.exception.ErrorCode;
|
||||
import cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil;
|
||||
import cn.iocoder.yudao.framework.common.pojo.PageResult;
|
||||
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
|
||||
import cn.iocoder.yudao.module.infra.api.file.FileApi;
|
||||
@ -29,10 +30,13 @@ import com.opencsv.exceptions.CsvValidationException;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.compress.archivers.ArchiveEntry;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
|
||||
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
|
||||
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.validation.annotation.Validated;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
@ -70,6 +74,8 @@ public class DatasetServiceImpl implements DatasetService {
|
||||
@Resource
|
||||
private FileApi fileApi;
|
||||
|
||||
|
||||
@Transactional
|
||||
@Override
|
||||
public Long createDataset (DatasetSaveReqVO createReqVO) {
|
||||
// 校验
|
||||
@ -270,7 +276,53 @@ public class DatasetServiceImpl implements DatasetService {
|
||||
});
|
||||
parseFile(res);
|
||||
}
|
||||
|
||||
public void readTarGzFile(List<DatasetFilesDO> tarGzFiles, List<DatasetFilesSaveReqVO> datasetFiles) {
|
||||
DatasetFilesSaveReqVO datasetFilesSaveReqVO = datasetFiles.get(0);
|
||||
List<DatasetFilesSaveReqVO> res = new ArrayList<>();
|
||||
tarGzFiles.forEach(datasetFilesDO -> {
|
||||
HttpURLConnection connection = DataSetReadFileUtils.readFile(datasetFilesDO.getDatasetFileUrl());
|
||||
if (connection != null) {
|
||||
try {
|
||||
InputStream inputStream = connection.getInputStream();
|
||||
GzipCompressorInputStream gzipInputStream = new GzipCompressorInputStream(inputStream);
|
||||
TarArchiveInputStream tarArchiveInputStream = new TarArchiveInputStream(gzipInputStream, Charset.forName("GBK").name());
|
||||
TarArchiveEntry tarEntry;
|
||||
try {
|
||||
while ((tarEntry = tarArchiveInputStream.getNextTarEntry()) != null) {
|
||||
if (!tarEntry.isDirectory()) {
|
||||
try {
|
||||
final String name = tarEntry.getName();
|
||||
byte[] fileBytes = IoUtil.readBytes(tarArchiveInputStream, false);
|
||||
Map<String, Object> map = fileApi.llmCreateFile(name, fileBytes);
|
||||
String url = map.get("url").toString();
|
||||
Long id = Long.parseLong(map.get("id").toString());
|
||||
DatasetFilesSaveReqVO vo = new DatasetFilesSaveReqVO();
|
||||
vo.setDatasetId(datasetFilesSaveReqVO.getDatasetId());
|
||||
vo.setDatasetFile(id);
|
||||
vo.setDatasetFileUrl(url);
|
||||
vo.setId(datasetFilesSaveReqVO.getId());
|
||||
res.add(vo);
|
||||
} catch (Exception e) {
|
||||
// Handle exception for individual file
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
tarArchiveInputStream.close();
|
||||
} finally {
|
||||
tarArchiveInputStream.close();
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw ServiceExceptionUtil.exception(new ErrorCode(
|
||||
11001, "请正确上传tar.gz格式的数据!!!"));
|
||||
} finally {
|
||||
connection.disconnect();
|
||||
}
|
||||
}
|
||||
});
|
||||
parseFile(res);
|
||||
}
|
||||
// 暂时先不用
|
||||
public void readTarFile (List<DatasetFilesDO> tarFiles, List<DatasetFilesSaveReqVO> datasetFiles) {
|
||||
DatasetFilesSaveReqVO datasetFilesSaveReqVO = datasetFiles.get(0);
|
||||
List<DatasetFilesSaveReqVO> res = new ArrayList<>();
|
||||
@ -444,7 +496,7 @@ public class DatasetServiceImpl implements DatasetService {
|
||||
*
|
||||
* @param xlsxFiles
|
||||
*/
|
||||
public void readXlsxFile(List<DatasetFilesDO> xlsxFiles) {
|
||||
public void readXlsxFile(List<DatasetFilesDO> xlsxFiles) {
|
||||
xlsxFiles.forEach(datasetFilesDO -> {
|
||||
Workbook sheets = DataSetReadFileUtils.readXlsxFromUrl(datasetFilesDO.getDatasetFileUrl());
|
||||
if (sheets != null){
|
||||
@ -506,10 +558,10 @@ public class DatasetServiceImpl implements DatasetService {
|
||||
|
||||
// tar文件
|
||||
List<DatasetFilesDO> tarFiles = insertDatasetFiles.stream()
|
||||
.filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".tar"))
|
||||
.filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".tar.gz"))
|
||||
.collect(Collectors.toList());
|
||||
if (CollectionUtils.isNotEmpty(tarFiles)) {
|
||||
readTarFile(tarFiles,datasetFiles);
|
||||
readTarGzFile(tarFiles,datasetFiles);
|
||||
}
|
||||
|
||||
// 提取文件
|
||||
|
Loading…
x
Reference in New Issue
Block a user