关于数据集 更新数据集文件解析处理

This commit is contained in:
limin 2024-12-30 15:20:26 +08:00
parent f9c1405323
commit c823592ba0
2 changed files with 50 additions and 27 deletions

View File

@ -105,7 +105,7 @@ public class DatasetQuestionServiceImpl implements DatasetQuestionService {
LambdaQueryWrapper<DatasetQuestionDO> wrapper = new LambdaQueryWrapper<DatasetQuestionDO>()
.eq(DatasetQuestionDO::getDatasetId, updateReqVOS.get(0).getDatasetId());
Long sumCount = datasetQuestionMapper.selectCount(wrapper);
wrapper.eq(DatasetQuestionDO::getStatus,1);
wrapper.eq(DatasetQuestionDO::getStatus,2);
Long annoCount = datasetQuestionMapper.selectCount(wrapper);
double ratio = sumCount == 0 ? 0 : ((double) annoCount / sumCount) *100;
String formattedRatio = String.format("%.2f", ratio);

View File

@ -59,33 +59,18 @@ public class DatasetServiceImpl implements DatasetService {
DatasetDO dataset = BeanUtils.toBean(createReqVO, DatasetDO.class);
datasetMapper.insert(dataset);
List<DatasetFilesSaveReqVO> datasetFiles = createReqVO.getDatasetFiles();
datasetFiles.stream().forEach(
datasetFilesSaveReqVO -> {
datasetFilesSaveReqVO.setDatasetId(dataset.getId());
}
);
List<DatasetFilesDO> insertDatasetFiles = BeanUtils.toBean(datasetFiles, DatasetFilesDO.class);
System.out.println(insertDatasetFiles);
datasetFilesMapper.insertBatch(insertDatasetFiles, 100);
System.out.println(insertDatasetFiles);
// 提取文件
List<DatasetFilesDO> jsonFiles = insertDatasetFiles.stream()
.filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".json"))
.collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(jsonFiles)){
readJsonFile(jsonFiles);
if (CollectionUtils.isNotEmpty(datasetFiles)){
datasetFiles.stream().forEach(
datasetFilesSaveReqVO -> {
datasetFilesSaveReqVO.setDatasetId(dataset.getId());
}
);
parseFile(datasetFiles);
Long count = datasetQuestionMapper.selectCount(new LambdaQueryWrapper<DatasetQuestionDO>()
.eq(DatasetQuestionDO::getDatasetId, dataset.getId()));
dataset.setDataLength(count);
datasetMapper.updateById(dataset);
}
List<DatasetFilesDO> txtFiles = insertDatasetFiles.stream()
.filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".txt"))
.collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(txtFiles)){
readTxtFile(txtFiles);
}
Long count = datasetQuestionMapper.selectCount(new LambdaQueryWrapper<DatasetQuestionDO>()
.eq(DatasetQuestionDO::getDatasetId, dataset.getId()));
dataset.setDataLength(count);
datasetMapper.updateById(dataset);
return dataset.getId();
}
@ -107,6 +92,26 @@ public class DatasetServiceImpl implements DatasetService {
validateDatasetExists(updateReqVO.getId());
// 更新
DatasetDO updateObj = BeanUtils.toBean(updateReqVO, DatasetDO.class);
List<DatasetFilesSaveReqVO> datasetFiles = updateReqVO.getDatasetFiles();
if (CollectionUtils.isNotEmpty(datasetFiles)){
datasetFiles.stream().forEach(
datasetFilesSaveReqVO -> {
datasetFilesSaveReqVO.setDatasetId(updateObj.getId());
}
);
parseFile(datasetFiles);
Long count = datasetQuestionMapper.selectCount(new LambdaQueryWrapper<DatasetQuestionDO>()
.eq(DatasetQuestionDO::getDatasetId, updateObj.getId()));
updateObj.setDataLength(count);
Long annoCount = datasetQuestionMapper.selectCount(new LambdaQueryWrapper<DatasetQuestionDO>()
.eq(DatasetQuestionDO::getDatasetId, updateObj.getId())
.eq(DatasetQuestionDO::getStatus,2));
double ratio = count == 0 ? 0 : ((double) annoCount / count) *100;
String formattedRatio = String.format("%.2f%%", ratio);
if (formattedRatio != null){
updateObj.setAnnotateProgress(formattedRatio);
}
}
datasetMapper.updateById(updateObj);
}
@ -232,4 +237,22 @@ public class DatasetServiceImpl implements DatasetService {
});
}
public void parseFile(List<DatasetFilesSaveReqVO> datasetFiles) {
List<DatasetFilesDO> insertDatasetFiles = BeanUtils.toBean(datasetFiles, DatasetFilesDO.class);
datasetFilesMapper.insertBatch(insertDatasetFiles, 100);
// 提取文件
List<DatasetFilesDO> jsonFiles = insertDatasetFiles.stream()
.filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".json"))
.collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(jsonFiles)){
readJsonFile(jsonFiles);
}
List<DatasetFilesDO> txtFiles = insertDatasetFiles.stream()
.filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".txt"))
.collect(Collectors.toList());
if (CollectionUtils.isNotEmpty(txtFiles)){
readTxtFile(txtFiles);
}
}
}