From 6604a24e307d2035f214d8ec5fb6e4899f2f74b3 Mon Sep 17 00:00:00 2001 From: baggio19852005 Date: Sat, 27 Sep 2025 15:44:45 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E5=A4=9A=E6=A8=A1=E6=80=81?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=9B=86=E7=9B=B8=E5=85=B3=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- yudao-module-llm/yudao-module-llm-biz/pom.xml | 12 +- .../module/llm/constant/DataConstants.java | 1 + .../constant/DatasetMoreModalConstants.java | 10 + .../admin/dataset/DatasetController.java | 15 + .../dataset/DatasetQuestionController.java | 45 +- .../admin/dataset/vo/DatasetPageReqVO.java | 2 + .../dataset/vo/DatasetQuestionRespVO.java | 3 + .../admin/dataset/vo/DatasetSaveReqVO.java | 2 + .../dataset/vo/ZipProcessingResultVo.java | 23 ++ .../modelservice/ModelServiceController.java | 8 + .../modelservice/vo/ModelServiceRespVO.java | 4 + .../dataobject/dataset/DatasetAnswerDO.java | 2 + .../llm/dal/dataobject/dataset/DatasetDO.java | 2 + .../dataobject/dataset/DatasetImagesDO.java | 48 +++ .../dataset/DatasetQuestionAnswerImageDO.java | 41 ++ .../dataobject/dataset/DatasetQuestionDO.java | 2 + .../mysql/dataset/DatasetImagesMapper.java | 30 ++ .../llm/dal/mysql/dataset/DatasetMapper.java | 1 + .../DatasetQuestionAnswerImageMapper.java | 27 ++ .../basemodel/BaseModelServiceImpl.java | 2 +- .../dataset/DatasetQuestionServiceImpl.java | 20 + .../llm/service/dataset/DatasetService.java | 5 + .../service/dataset/DatasetServiceImpl.java | 391 +++++++++++++++++- .../modelservice/ModelServiceService.java | 2 + .../modelservice/ModelServiceServiceImpl.java | 27 +- .../mapper/dataset/DatasetImagesMapper.xml | 12 + .../DatasetQuestionAnswerImagesMapper.xml | 12 + 27 files changed, 708 insertions(+), 41 deletions(-) create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/constant/DatasetMoreModalConstants.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/ZipProcessingResultVo.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetImagesDO.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetQuestionAnswerImageDO.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetImagesMapper.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetQuestionAnswerImageMapper.java create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/resources/mapper/dataset/DatasetImagesMapper.xml create mode 100644 yudao-module-llm/yudao-module-llm-biz/src/main/resources/mapper/dataset/DatasetQuestionAnswerImagesMapper.xml diff --git a/yudao-module-llm/yudao-module-llm-biz/pom.xml b/yudao-module-llm/yudao-module-llm-biz/pom.xml index a33f79feb..6474d8cab 100644 --- a/yudao-module-llm/yudao-module-llm-biz/pom.xml +++ b/yudao-module-llm/yudao-module-llm-biz/pom.xml @@ -19,12 +19,22 @@ - + + org.springframework + spring-test + + + cn.iocoder.boot yudao-module-llm-api ${revision} + + cn.iocoder.boot + yudao-module-infra-biz + 2.3.0-jdk8-SNAPSHOT + diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/constant/DataConstants.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/constant/DataConstants.java index 8befa2e26..258283d8d 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/constant/DataConstants.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/constant/DataConstants.java @@ -5,4 +5,5 @@ public class DataConstants { // 个人数据集标识 public static final int dataTypePrivate = 0; public static final int dataTypePublic = 1; + } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/constant/DatasetMoreModalConstants.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/constant/DatasetMoreModalConstants.java new file mode 100644 index 000000000..f0f804810 --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/constant/DatasetMoreModalConstants.java @@ -0,0 +1,10 @@ +package cn.iocoder.yudao.module.llm.constant; + +public class DatasetMoreModalConstants { + + public static String train_jsonfile="train.json"; + public static String images_path="images/"; + + public static int BUFFER_SIZE = 4096; // 缓冲区大小 + +} diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/DatasetController.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/DatasetController.java index 82f0f4188..0f56468fb 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/DatasetController.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/DatasetController.java @@ -50,6 +50,13 @@ public class DatasetController { return success(datasetService.createDataset(createReqVO)); } + @PostMapping("/createDatasetMoreModal") + @Operation(summary = "创建数据集") + // @PreAuthorize("@ss.hasPermission('llm:dataset:create')") + public CommonResult createDatasetMoreModal (@Valid @RequestBody DatasetSaveReqVO createReqVO) { + return success(datasetService.createDatasetMoreModal(createReqVO)); + } + @PutMapping("/update") @Operation(summary = "更新数据集") // @PreAuthorize("@ss.hasPermission('llm:dataset:update')") @@ -66,6 +73,14 @@ public class DatasetController { return success(list); } + @GetMapping("/all/{type}") + @Operation(summary = "查询所有数据集接口") + // @PreAuthorize("@ss.hasPermission('llm:dataset:query')") + public CommonResult> queryAllByBaseModelType (@PathVariable("type") Integer type) { + List list = datasetService.queryAllByBaseModelType(type); + return success(list); + } + @DeleteMapping("/delete") @Operation(summary = "删除数据集") @Parameter(name = "id", description = "编号", required = true) diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/DatasetQuestionController.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/DatasetQuestionController.java index fa585ce3b..76d8e0754 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/DatasetQuestionController.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/DatasetQuestionController.java @@ -1,11 +1,9 @@ package cn.iocoder.yudao.module.llm.controller.admin.dataset; -import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetAnswerRespVO; -import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetQuestionPageReqVO; -import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetQuestionRespVO; -import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetQuestionSaveReqVO; +import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.*; import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetQuestionDO; import cn.iocoder.yudao.module.llm.service.dataset.DatasetQuestionService; +import cn.iocoder.yudao.module.llm.service.dataset.DatasetService; import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; @@ -49,6 +47,9 @@ public class DatasetQuestionController { @Resource private DatasetQuestionService datasetQuestionService; + @Resource + private DatasetService datasetService; + @PutMapping("data-anno") @Operation(summary = "保存标注接口") @@ -71,6 +72,10 @@ public class DatasetQuestionController { @ApiAccessLog(operateType = EXPORT) public void exportDatasetFilesExcel(@Valid DatasetQuestionPageReqVO pageReqVO, HttpServletResponse response) throws IOException { +// DatasetRespVO dataset = datasetService.getDataset(pageReqVO.getDatasetId()); +// if(dataset!=null&&dataset.getStatus()!=2){ +// throw new RuntimeException("只有状态为已完成的数据才能导出"); +// } HSSFWorkbook template = new HSSFWorkbook(); HSSFSheet sheet = template.createSheet(); // 创建样式并设置垂直居中 @@ -94,29 +99,31 @@ public class DatasetQuestionController { String system = item.getSystem(); String question = item.getQuestion(); List datasetAnswerRespVO = item.getDatasetAnswerRespVO(); - List collect = datasetAnswerRespVO.stream().map(DatasetAnswerRespVO::getAnswer).collect(Collectors.toList()); - if (collect.size() == 0){ - row = sheet.createRow(count); - row.createCell(0).setCellValue(system); - row.getCell(0).setCellStyle(cellStyle); - row.createCell(1).setCellValue(question); - row.getCell(1).setCellStyle(cellStyle); - row.createCell(2).setCellValue(""); - row.getCell(2).setCellStyle(cellStyle); - id.add(count); - count++; - }else { - for (String s : collect) { + if(datasetAnswerRespVO!=null&&datasetAnswerRespVO.size()>0){ + List collect = datasetAnswerRespVO.stream().map(DatasetAnswerRespVO::getAnswer).collect(Collectors.toList()); + if (collect.size() == 0){ row = sheet.createRow(count); row.createCell(0).setCellValue(system); row.getCell(0).setCellStyle(cellStyle); row.createCell(1).setCellValue(question); row.getCell(1).setCellStyle(cellStyle); - row.createCell(2).setCellValue(s); + row.createCell(2).setCellValue(""); row.getCell(2).setCellStyle(cellStyle); + id.add(count); count++; + }else { + for (String s : collect) { + row = sheet.createRow(count); + row.createCell(0).setCellValue(system); + row.getCell(0).setCellStyle(cellStyle); + row.createCell(1).setCellValue(question); + row.getCell(1).setCellStyle(cellStyle); + row.createCell(2).setCellValue(s); + row.getCell(2).setCellStyle(cellStyle); + count++; + } + id.add(count-1); } - id.add(count-1); } } //合并相同内容的单元格 diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetPageReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetPageReqVO.java index 8ab2b84cb..1bc1c08f1 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetPageReqVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetPageReqVO.java @@ -49,4 +49,6 @@ public class DatasetPageReqVO extends PageParam { @Schema(description = "标注进度", example = "20") private Integer annotateProgress; + private Integer datasetParentType; + } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetQuestionRespVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetQuestionRespVO.java index a9407517a..6d753c74f 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetQuestionRespVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetQuestionRespVO.java @@ -40,4 +40,7 @@ public class DatasetQuestionRespVO { @Schema(description = "标注内容") private List datasetAnswerRespVO; + @Schema(description = "问题对应的图片") + private List imagesList; + } \ No newline at end of file diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetSaveReqVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetSaveReqVO.java index e25f32e2a..05d7428b3 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetSaveReqVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/DatasetSaveReqVO.java @@ -46,5 +46,7 @@ public class DatasetSaveReqVO { @Schema(description = "数据集数据文件", example = "[]") private List datasetFiles; + @Schema(description = "数据集父类型(1文本数据据,2多模态数据集)", example = "1") + private Integer datasetParentType; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/ZipProcessingResultVo.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/ZipProcessingResultVo.java new file mode 100644 index 000000000..2f1e3e3d2 --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/dataset/vo/ZipProcessingResultVo.java @@ -0,0 +1,23 @@ +package cn.iocoder.yudao.module.llm.controller.admin.dataset.vo; + +import lombok.Data; + +import java.io.Serializable; +import java.util.List; +import java.util.Map; +import java.util.Objects; + +@Data +public class ZipProcessingResultVo implements Serializable { + + private String trailJson; + + private List> imagesList; + + + public ZipProcessingResultVo(String trailJson,List> imagesList){ + this.imagesList=imagesList; + this.trailJson=trailJson; + } + +} diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/modelservice/ModelServiceController.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/modelservice/ModelServiceController.java index 61c948946..48e8ae788 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/modelservice/ModelServiceController.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/modelservice/ModelServiceController.java @@ -157,6 +157,14 @@ public class ModelServiceController { return success(true); } + @PutMapping("/startDatasetMoreModal") + @Operation(summary = "启动模型") +// @PreAuthorize("@ss.hasPermission('llm:base-model:update')") + public CommonResult startDatasetMoreModal(@Valid @RequestBody ModelServiceSaveReqVO updateReqVO) { + modelServiceService.startDatasetMoreModal(updateReqVO); + return success(true); + } + @PutMapping("/disable") @Operation(summary = "禁用模型") // @PreAuthorize("@ss.hasPermission('llm:base-model:update')") diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/modelservice/vo/ModelServiceRespVO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/modelservice/vo/ModelServiceRespVO.java index 3b936c8fb..e57fcc7b9 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/modelservice/vo/ModelServiceRespVO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/controller/admin/modelservice/vo/ModelServiceRespVO.java @@ -78,4 +78,8 @@ public class ModelServiceRespVO { @Schema(description = "api秘钥") private String BaseApiKey; + + @Schema(description = "模型类型") + @ExcelProperty("模型类型") + private String modelType; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetAnswerDO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetAnswerDO.java index 42937de06..a84d2d43b 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetAnswerDO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetAnswerDO.java @@ -44,4 +44,6 @@ public class DatasetAnswerDO extends BaseDO { */ private String answer; + private String answerFrom; + } \ No newline at end of file diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetDO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetDO.java index 3d8ecbb9a..e2059eb7a 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetDO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetDO.java @@ -74,5 +74,7 @@ public class DatasetDO extends BaseDO { private String fileUrl; + private Integer datasetParentType; + } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetImagesDO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetImagesDO.java new file mode 100644 index 000000000..4c22547d4 --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetImagesDO.java @@ -0,0 +1,48 @@ +package cn.iocoder.yudao.module.llm.dal.dataobject.dataset; + +import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; +import com.baomidou.mybatisplus.annotation.KeySequence; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.*; + +/** + * 数据集数据文件 DO + * + * @author 华大大模型 + */ +@TableName("llm_dataset_images") +@KeySequence("llm_dataset_images_seq") // 用于 Oracle、PostgreSQL、Kingbase、DB2、H2 数据库的主键自增。如果是 MySQL 等数据库,可不写。 +@Data +@EqualsAndHashCode(callSuper = true) +@ToString(callSuper = true) +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class DatasetImagesDO extends BaseDO { + + /** + * 数据集ID + */ + @TableId + private Long id; + /** + * 数据集ID + */ + private Long datasetId; + /** + * 数据长度 + */ + private Long dataLength; + /** + * 数据文件(文件表的ID) + */ + private Long datasetFile; + /** + * 文件URL地址 + */ + private String datasetImageUrl; + + private String datasetImageName; + +} \ No newline at end of file diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetQuestionAnswerImageDO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetQuestionAnswerImageDO.java new file mode 100644 index 000000000..5b7428afc --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetQuestionAnswerImageDO.java @@ -0,0 +1,41 @@ +package cn.iocoder.yudao.module.llm.dal.dataobject.dataset; + +import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO; +import com.baomidou.mybatisplus.annotation.KeySequence; +import com.baomidou.mybatisplus.annotation.TableId; +import com.baomidou.mybatisplus.annotation.TableName; +import lombok.*; + +/** + * 数据集数据文件 DO + * + * @author 华大大模型 + */ +@TableName("llm_dataset_question_answer_image") +@KeySequence("llm_dataset_question_answer_image_seq") // 用于 Oracle、PostgreSQL、Kingbase、DB2、H2 数据库的主键自增。如果是 MySQL 等数据库,可不写。 +@Data +@EqualsAndHashCode(callSuper = true) +@ToString(callSuper = true) +@Builder +@NoArgsConstructor +@AllArgsConstructor +public class DatasetQuestionAnswerImageDO extends BaseDO { + + /** + * 数据集ID + */ + @TableId + private Long id; + /** + * 数据集ID + */ + private Long datasetId; + + private Long questionId; + + private Long answerId; + private Long dataImageId; + private String imageUrl; + + +} \ No newline at end of file diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetQuestionDO.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetQuestionDO.java index eca10c37e..cdbe4e6f0 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetQuestionDO.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/dataobject/dataset/DatasetQuestionDO.java @@ -46,4 +46,6 @@ public class DatasetQuestionDO extends BaseDO { @TableField("`system`") private String system; + private String questionFrom; + } \ No newline at end of file diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetImagesMapper.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetImagesMapper.java new file mode 100644 index 000000000..de90a77d7 --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetImagesMapper.java @@ -0,0 +1,30 @@ +package cn.iocoder.yudao.module.llm.dal.mysql.dataset; + +import cn.iocoder.yudao.framework.common.pojo.PageResult; +import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX; +import cn.iocoder.yudao.framework.mybatis.core.query.LambdaQueryWrapperX; +import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.DatasetFilesPageReqVO; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetFilesDO; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetImagesDO; +import org.apache.ibatis.annotations.Mapper; + + +/** + * 数据集数据文件 Mapper + * + * @author 华大大模型 + */ +@Mapper +public interface DatasetImagesMapper extends BaseMapperX { + +// default PageResult selectPage(DatasetFilesPageReqVO reqVO) { +// return selectPage(reqVO, new LambdaQueryWrapperX() +// .eqIfPresent(DatasetFilesDO::getDatasetId, reqVO.getDatasetId()) +// .eqIfPresent(DatasetFilesDO::getDataLength, reqVO.getDataLength()) +// .eqIfPresent(DatasetFilesDO::getDatasetFile, reqVO.getDatasetFile()) +// .eqIfPresent(DatasetFilesDO::getDatasetFileUrl, reqVO.getDatasetFileUrl()) +// .betweenIfPresent(DatasetFilesDO::getCreateTime, reqVO.getCreateTime()) +// .orderByDesc(DatasetFilesDO::getId)); +// } + +} \ No newline at end of file diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetMapper.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetMapper.java index ee5fd3189..9fb290faf 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetMapper.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetMapper.java @@ -30,6 +30,7 @@ public interface DatasetMapper extends BaseMapperX { .eqIfPresent(DatasetDO::getDatasetFile, reqVO.getDatasetFile()) .eqIfPresent(DatasetDO::getDatasetType, reqVO.getDatasetType()) .eqIfPresent(DatasetDO::getDatasetFileUrl, reqVO.getDatasetFileUrl()) + .eq(DatasetDO::getDatasetParentType,reqVO.getDatasetParentType()) .betweenIfPresent(DatasetDO::getCreateTime, reqVO.getCreateTime()) .orderByDesc(DatasetDO::getId)); } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetQuestionAnswerImageMapper.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetQuestionAnswerImageMapper.java new file mode 100644 index 000000000..c38674067 --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/dal/mysql/dataset/DatasetQuestionAnswerImageMapper.java @@ -0,0 +1,27 @@ +package cn.iocoder.yudao.module.llm.dal.mysql.dataset; + +import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetImagesDO; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetQuestionAnswerImageDO; +import org.apache.ibatis.annotations.Mapper; + + +/** + * 数据集数据文件 Mapper + * + * @author 华大大模型 + */ +@Mapper +public interface DatasetQuestionAnswerImageMapper extends BaseMapperX { + +// default PageResult selectPage(DatasetFilesPageReqVO reqVO) { +// return selectPage(reqVO, new LambdaQueryWrapperX() +// .eqIfPresent(DatasetFilesDO::getDatasetId, reqVO.getDatasetId()) +// .eqIfPresent(DatasetFilesDO::getDataLength, reqVO.getDataLength()) +// .eqIfPresent(DatasetFilesDO::getDatasetFile, reqVO.getDatasetFile()) +// .eqIfPresent(DatasetFilesDO::getDatasetFileUrl, reqVO.getDatasetFileUrl()) +// .betweenIfPresent(DatasetFilesDO::getCreateTime, reqVO.getCreateTime()) +// .orderByDesc(DatasetFilesDO::getId)); +// } + +} \ No newline at end of file diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/basemodel/BaseModelServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/basemodel/BaseModelServiceImpl.java index 1d09a254f..6a84f6982 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/basemodel/BaseModelServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/basemodel/BaseModelServiceImpl.java @@ -292,7 +292,7 @@ public class BaseModelServiceImpl implements BaseModelService { @Override public List listBaseModels() { LambdaQueryWrapper select = new LambdaQueryWrapper() - .select(BaseModelDO::getId, BaseModelDO::getModelName, BaseModelDO::getTheTuningName); + .select(BaseModelDO::getId, BaseModelDO::getModelName, BaseModelDO::getTheTuningName,BaseModelDO::getModelType); List selects = baseModelMapper.selectList(select); diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetQuestionServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetQuestionServiceImpl.java index a57e0dc38..03c9d0731 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetQuestionServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetQuestionServiceImpl.java @@ -2,12 +2,16 @@ package cn.iocoder.yudao.module.llm.service.dataset; import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.*; import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetAnswerDO; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetDO; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetQuestionAnswerImageDO; import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetQuestionDO; import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetAnswerMapper; import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetMapper; +import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetQuestionAnswerImageMapper; import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetQuestionMapper; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.toolkit.CollectionUtils; +import com.baomidou.mybatisplus.core.toolkit.Wrappers; import jodd.util.StringUtil; import org.springframework.stereotype.Service; @@ -43,6 +47,9 @@ public class DatasetQuestionServiceImpl implements DatasetQuestionService { @Resource private DatasetMapper datasetMapper; + @Resource + private DatasetQuestionAnswerImageMapper datasetQuestionAnswerImageMapper; + @Override public Long createDatasetQuestion(DatasetQuestionSaveReqVO createReqVO) { // 插入 @@ -83,6 +90,9 @@ public class DatasetQuestionServiceImpl implements DatasetQuestionService { @Override public PageResult getDatasetQuestionPage(DatasetQuestionPageReqVO pageReqVO) { PageResult datasetQuestionDOPageResult = datasetQuestionMapper.selectPage(pageReqVO); + Long datasetId = pageReqVO.getDatasetId(); + DatasetDO datasetDO = datasetMapper.selectById(datasetId); + Integer datasetParentType = datasetDO.getDatasetParentType(); PageResult result = BeanUtils.toBean(datasetQuestionDOPageResult, DatasetQuestionRespVO.class); if (CollectionUtils.isNotEmpty(result.getList())) { // result.getList().forEach(item -> { @@ -102,7 +112,17 @@ public class DatasetQuestionServiceImpl implements DatasetQuestionService { Map> collect1 = respVOS.stream().collect(Collectors.groupingBy(DatasetAnswerRespVO::getQuestionId)); list.forEach(item -> { item.setDatasetAnswerRespVO(collect1.get(item.getId())); + if(datasetParentType==2){ + LambdaQueryWrapper imagewrapper = new LambdaQueryWrapper() + .eq(DatasetQuestionAnswerImageDO::getQuestionId, item.getId()) + .eq(DatasetQuestionAnswerImageDO::getDatasetId,item.getDatasetId()); + + List datasetQuestionAnswerImageDOList = datasetQuestionAnswerImageMapper.selectList(imagewrapper); + List imageUrlList = datasetQuestionAnswerImageDOList.stream().map(DatasetQuestionAnswerImageDO::getImageUrl).collect(Collectors.toList()); + item.setImagesList(imageUrlList); + } }); + } return result; } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetService.java index e9c4545c5..f5a8998d5 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetService.java @@ -8,6 +8,7 @@ import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.*; import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetDO; import cn.iocoder.yudao.framework.common.pojo.PageResult; import cn.iocoder.yudao.framework.common.pojo.PageParam; +import org.springframework.transaction.annotation.Transactional; /** * 数据集 Service 接口 @@ -24,6 +25,9 @@ public interface DatasetService { */ Long createDataset(@Valid DatasetSaveReqVO createReqVO); + @Transactional + Long createDatasetMoreModal(DatasetSaveReqVO createReqVO); + /** * 更新数据集 * @@ -57,4 +61,5 @@ public interface DatasetService { List queryAll(); + List queryAllByBaseModelType(Integer type); } \ No newline at end of file diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetServiceImpl.java index 8bc0d4ddc..5155c75af 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/dataset/DatasetServiceImpl.java @@ -8,17 +8,13 @@ import cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil; import cn.iocoder.yudao.framework.common.pojo.PageResult; import cn.iocoder.yudao.framework.common.util.object.BeanUtils; import cn.iocoder.yudao.module.infra.api.file.FileApi; +import cn.iocoder.yudao.module.infra.service.file.FileService; import cn.iocoder.yudao.module.llm.constant.DataConstants; +import cn.iocoder.yudao.module.llm.constant.DatasetMoreModalConstants; import cn.iocoder.yudao.module.llm.controller.admin.dataset.dto.DataJsonTemplate; import cn.iocoder.yudao.module.llm.controller.admin.dataset.vo.*; -import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetAnswerDO; -import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetDO; -import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetFilesDO; -import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.DatasetQuestionDO; -import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetAnswerMapper; -import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetFilesMapper; -import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetMapper; -import cn.iocoder.yudao.module.llm.dal.mysql.dataset.DatasetQuestionMapper; +import cn.iocoder.yudao.module.llm.dal.dataobject.dataset.*; +import cn.iocoder.yudao.module.llm.dal.mysql.dataset.*; import cn.iocoder.yudao.module.llm.enums.DatasetStatusEnum; import cn.iocoder.yudao.module.llm.service.finetuningtask.FineTuningTaskService; import cn.iocoder.yudao.module.llm.service.modelassesstaskauto.ModelAssessTaskAutoService; @@ -26,6 +22,8 @@ import cn.iocoder.yudao.module.llm.service.modelassesstaskmanual.ModelAssessTask import cn.iocoder.yudao.module.llm.service.modelassesstaskmanualbackup.ModelAssessTaskManualBackupService; import cn.iocoder.yudao.module.llm.utils.DataSetReadFileUtils; import cn.iocoder.yudao.module.llm.utils.vo.CsvDataSetVO; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.toolkit.CollectionUtils; import com.baomidou.mybatisplus.core.toolkit.StringUtils; @@ -41,6 +39,7 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; +import org.springframework.mock.web.MockMultipartFile; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.validation.annotation.Validated; @@ -49,9 +48,12 @@ import javax.annotation.Resource; import java.io.*; import java.net.HttpURLConnection; import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; +import java.nio.charset.StandardCharsets; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; @@ -92,6 +94,20 @@ public class DatasetServiceImpl implements DatasetService { @Resource private ModelAssessTaskManualBackupService modelAssessTaskManualBackupService; + + @Resource + private DatasetImagesMapper datasetImagesMapper; + @Resource + private FileService fileService; + + @Resource + private DatasetQuestionAnswerImageMapper datasetQuestionAnswerImageMapper; + + private static final Pattern IMAGE_FILE_EXTENSION_PATTERN = Pattern.compile("\\.([^.]+)$"); + + //json文件中的地址前缀都有一个image/; + private static final String JSON_IMAGE_PERFEX="image/"; + private static long getFileContentLength(File file) throws IOException { FileInputStream fis = new FileInputStream(file); byte[] buffer = new byte[1024]; @@ -164,6 +180,318 @@ public class DatasetServiceImpl implements DatasetService { return dataset.getId(); } + + + + + + /**多模让i数据集创建 + * + * */ + @Override + @Transactional + public Long createDatasetMoreModal(DatasetSaveReqVO createReqVO) { + // 校验 + validateDatasetNameExists(createReqVO); + if (createReqVO.getType() == null) { + createReqVO.setType(0); + } + List datasetFiles = createReqVO.getDatasetFiles(); + // 插入 + DatasetDO dataset = BeanUtils.toBean(createReqVO, DatasetDO.class); + datasetMapper.insert(dataset); + + if (CollectionUtils.isNotEmpty(datasetFiles)) { + datasetFiles.stream().forEach( + datasetFilesSaveReqVO -> { + datasetFilesSaveReqVO.setDatasetId(dataset.getId()); + } + ); +// parseFile(datasetFiles); + resolveZipFileDatasetMoreModal(datasetFiles); + + Long count = datasetQuestionMapper.selectCount(new LambdaQueryWrapper() + .eq(DatasetQuestionDO::getDatasetId, dataset.getId())); + + if (count <= 0) { + throw new ServiceException(new ErrorCode(20000, "数据集问题不能为空")); + } + + dataset.setDataLength(count); + Long annoCount = datasetQuestionMapper.selectCount(new LambdaQueryWrapper() + .eq(DatasetQuestionDO::getDatasetId, dataset.getId()) + .eq(DatasetQuestionDO::getStatus, 2)); + double ratio = count == 0 ? 0 : ((double) annoCount / count) * 100; + Integer formattedRatio = ratio == 0 ? 0 : (int) ratio; + Integer status = formattedRatio == 100 ? 2 : 1; + if (formattedRatio != null) { + dataset.setAnnotateProgress(formattedRatio); + } + if (annoCount == 0) { + status = 0; + } + if (CollectionUtils.isEmpty(datasetFiles)) { + throw new ServiceException(new ErrorCode( + 20000, "数据集文件不能为空")); + } +// if (dataset.getDatasetType() == 2) { +// if (status != 2) { +// throw new ServiceException(new ErrorCode( +// 20000, "评估数据集只能上传标注完成的数据")); +// } +// } else { +// if (dataset.getStatus() != status) { +// throw new ServiceException(new ErrorCode( +// 20000, "数据集标注状态错误!应该是【" + DatasetStatusEnum.getStatusByName(status) + "】")); +// } +// } + + dataset.setStatus(status); + datasetMapper.updateById(dataset); + } + return dataset.getId(); + } + + + //解析多模态的zip文件 + public void resolveZipFileDatasetMoreModal(List datasetFiles){ + List insertDatasetFiles = BeanUtils.toBean(datasetFiles, DatasetFilesDO.class); + datasetFilesMapper.insertBatch(insertDatasetFiles, 100); + List zipFiles = insertDatasetFiles.stream() + .filter(datasetFilesDO -> datasetFilesDO.getDatasetFileUrl().toLowerCase().endsWith(".zip")) + .collect(Collectors.toList()); +// List> + zipFiles.forEach(datasetFilesDO ->{ + HttpURLConnection connection = DataSetReadFileUtils.readFile(datasetFilesDO.getDatasetFileUrl()); + List> questionAnswerList=new ArrayList<>(); + if (connection != null) { + try { + InputStream inputStream = connection.getInputStream(); + ZipProcessingResultVo zipProcessingResultVo = processZipFileStream(inputStream); + String trailJson = zipProcessingResultVo.getTrailJson(); + JSONArray jsonArray=JSONArray.parseArray(trailJson); + if(jsonArray!=null&&jsonArray.size()>0){ + for (int i=0;i quesionanswermap=new HashMap<>(); + JSONObject conversationsJson=conversationsJsonArray.getJSONObject(j); + if(j%2==0){ + DatasetQuestionDO qdo=new DatasetQuestionDO(); + qdo.setDatasetId(datasetFiles.get(0).getDatasetId()); + qdo.setDatasetFilesId(datasetFilesDO.getId()); + String question=conversationsJson.getString("value"); + String questionFrom=conversationsJson.getString("from"); + qdo.setQuestion(question); + qdo.setCreateTime(LocalDateTime.now()); + qdo.setQuestionFrom(questionFrom); + datasetQuestionMapper.insert(qdo); + //获取回答数据 + JSONObject conversationsAnswerJson=conversationsJsonArray.getJSONObject(j+1); + DatasetAnswerDO ado=new DatasetAnswerDO(); + ado.setDatasetId(datasetFiles.get(0).getDatasetId()); + ado.setDatasetFilesId(datasetFilesDO.getId()); + String answer=conversationsAnswerJson.getString("value"); + String answerfrom=conversationsAnswerJson.getString("from"); + ado.setAnswer(answer); + ado.setAnswerFrom(answerfrom); + ado.setCreateTime(LocalDateTime.now()); + ado.setQuestionId(qdo.getId()); + datasetAnswerMapper.insert(ado); + quesionanswermap.put("questionid",qdo.getId()); + quesionanswermap.put("answerid",ado.getId()); + quesionanswermap.put("images",imagesJsonArray); + questionAnswerList.add(quesionanswermap); + } + } + } + } + //将图片路径入库 + List> imagesList = zipProcessingResultVo.getImagesList(); + List datasetimagesdolist=new ArrayList<>(); + List datasetQuestionAnswerImageDOList=new ArrayList<>(); + imagesList.forEach(url ->{ + DatasetImagesDO ido=new DatasetImagesDO(); + ido.setDatasetId(datasetFilesDO.getDatasetId()); + ido.setDatasetFile(datasetFilesDO.getDatasetFile()); + ido.setDatasetImageUrl(url.get("url").toString()); + ido.setCreateTime(LocalDateTime.now()); +// ido.setDatasetImageName() +// datasetimagesdolist.add(ido); + datasetImagesMapper.insert(ido); + //向llm_dataset_question_answer_image表中插入数据 + for(int i=0;i map = questionAnswerList.get(i); + Long questionid=Long.parseLong(map.get("questionid").toString()); + Long answerid=Long.parseLong(map.get("answerid").toString()); + JSONArray imagesJsonArray= (JSONArray) map.get("images"); + String imagename=url.get("imagename"); + for (int k=0;k> uploadedImageUrls = new ArrayList<>(); + + // 使用 try-with-resources 确保 ZipInputStream 被正确关闭 + try (ZipInputStream zis = new ZipInputStream(zipInputStream, StandardCharsets.UTF_8)) { // 假设ZIP文件条目名称是UTF-8编码 + ZipEntry entry; + // 遍历ZIP文件中的每一个条目 + while ((entry = zis.getNextEntry()) != null) { + String entryName = entry.getName(); + + // 忽略目录条目 + if (entry.isDirectory()) { + zis.closeEntry(); + continue; + } + String jsonfilename=entryName.split("/")[entryName.split("/").length-1]; + // 1. 获取 train.jsonl 文件内容 + if (DatasetMoreModalConstants.train_jsonfile.equals(jsonfilename)) { + trainJsonlContent = readEntryContentAsString(zis); + System.out.println("成功读取 train.jsonl 内容,大小: " + (trainJsonlContent != null ? trainJsonlContent.length() : 0) + " 字符。"); + } + // 2. 处理 images 目录下的图片 + else if (isImageFile(entryName)) { + Map map=new HashMap<>(); + // 提取文件名 (例如:img1.jpg) + String filename = Paths.get(entryName).getFileName().toString(); + String contentType = getContentType(filename); + + // 获取图片大小,用于上传服务 + long imageSize = entry.getSize(); + + // 核心:直接将zis(当前entry的流)传递给ImageUploadService + // 注意:如果entry.getSize()为-1,依赖于ImageUploadService的实现 + // 如果ImageUploadService不能处理未知大小的流,则此处需要先将流读入内存 + String imageUrl; + if (imageSize == -1) { + System.err.println("Warning: Image entry " + entryName + " has unknown size (-1). Reading to ByteArrayOutputStream first for upload."); + byte[] imageData = readEntryContentAsBytes(zis); // 读取到内存 + imageSize = imageData.length; + InputStream tempStream = new java.io.ByteArrayInputStream(imageData); + MockMultipartFile file = new MockMultipartFile("file", filename, contentType, tempStream); + imageUrl = fileService.createFile(file.getOriginalFilename(), "", IoUtil.readBytes(file.getInputStream())); + } else { + byte[] imageData = readEntryContentAsBytes(zis); + InputStream tempStream = new java.io.ByteArrayInputStream(imageData); + MockMultipartFile file = new MockMultipartFile("file", filename, contentType, tempStream); + // 推荐方式:直接传递zis,ImageUploadService从zis中读取entry内容 + imageUrl = fileService.createFile(file.getOriginalFilename(), "", IoUtil.readBytes(file.getInputStream())); + } + map.put("url",imageUrl); + map.put("imagename",filename); + uploadedImageUrls.add(map); + System.out.println("成功上传图片: " + filename + ",URL: " + imageUrl); + } + + zis.closeEntry(); // 关闭当前条目,准备读取下一个 + } + } catch (IOException e) { + System.err.println("处理ZIP文件流失败: " + e.getMessage()); + throw e; // 重新抛出,让控制器处理 + } + + // 检查是否找到了必要文件/图片 (可选,根据业务需求) +// if (trainJsonlContent == null) { +// System.out.println("警告: ZIP文件中未找到 " + TRAIN_JSONL_PATH); +// // throw new IOException(TRAIN_JSONL_PATH + " 文件缺失。"); +// } +// if (uploadedImageUrls.isEmpty()) { +// System.out.println("警告: ZIP文件中未找到 " + IMAGES_DIR_PATH_PREFIX + " 目录下的任何图片。"); +// } + + return new ZipProcessingResultVo(trainJsonlContent, uploadedImageUrls); + } + + + private String readEntryContentAsString(ZipInputStream zis) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + byte[] buffer = new byte[DatasetMoreModalConstants.BUFFER_SIZE]; + int len; + while ((len = zis.read(buffer)) != -1) { + baos.write(buffer, 0, len); + } + return baos.toString(StandardCharsets.UTF_8.name()); // 假设 train.jsonl 是 UTF-8 编码 + } + + /** + * 从 ZipInputStream 中读取当前条目的所有内容作为字节数组。 + * 主要用于 ZipEntry.getSize() 为 -1 时作为备用方案,或者 ImageUploadService 不支持流式上传时。 + */ + private byte[] readEntryContentAsBytes(ZipInputStream zis) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + byte[] buffer = new byte[DatasetMoreModalConstants.BUFFER_SIZE]; + int len; + while ((len = zis.read(buffer)) != -1) { + baos.write(buffer, 0, len); + } + return baos.toByteArray(); + } + + /** + * 判断文件名是否为常见图片格式。 + */ + private boolean isImageFile(String filename) { + String lowerCaseFilename = filename.toLowerCase(); + return lowerCaseFilename.endsWith(".jpg") || + lowerCaseFilename.endsWith(".jpeg") || + lowerCaseFilename.endsWith(".png") || + lowerCaseFilename.endsWith(".gif") || + lowerCaseFilename.endsWith(".bmp") || + lowerCaseFilename.endsWith(".webp"); + } + + /** + * 根据文件名后缀获取MIME Content-Type。 + */ + private String getContentType(String filename) { + Matcher matcher = IMAGE_FILE_EXTENSION_PATTERN.matcher(filename.toLowerCase()); + if (matcher.find()) { + String extension = matcher.group(1); + switch (extension) { + case "jpg": + case "jpeg": return "image/jpeg"; + case "png": return "image/png"; + case "gif": return "image/gif"; + case "bmp": return "image/bmp"; + case "webp": return "image/webp"; + } + } + return "application/octet-stream"; // 默认MIME类型 + } + + + @Override @Transactional public void updateDataset(DatasetSaveReqVO updateReqVO) { @@ -364,8 +692,10 @@ public class DatasetServiceImpl implements DatasetService { public DatasetRespVO getDataset(Long id) { DatasetDO datasetDO = datasetMapper.selectById(id); DatasetRespVO datasetRespVO = BeanUtils.toBean(datasetDO, DatasetRespVO.class); + Integer datasetParentType = datasetDO.getDatasetParentType(); List datasetFilesDOS = datasetFilesMapper.selectList(new LambdaQueryWrapper().eq(DatasetFilesDO::getDatasetId, id)); datasetRespVO.setDatasetFiles(BeanUtils.toBean(datasetFilesDOS, DatasetFilesRespVO.class)); + /*List datasetQuestionDO = datasetQuestionMapper.selectList(new LambdaQueryWrapper().eq(DatasetQuestionDO::getDatasetId, id)); List datasetQuestionRespVOS = BeanUtils.toBean(datasetQuestionDO, DatasetQuestionRespVO.class); datasetRespVO.setDatasetQuestionRespVOS(datasetQuestionRespVOS);*/ @@ -387,7 +717,41 @@ public class DatasetServiceImpl implements DatasetService { result.add(datasetRespVOS0); result.add(datasetRespVOS1);*/ List datasetDOS = datasetMapper.selectList(new LambdaQueryWrapper() - .eq(DatasetDO::getStatus, 2)); // 获取所有数据集 + .eq(DatasetDO::getStatus, 2) + ); // 获取所有数据集 + + // 创建两个根节点,分别代表两种 type + DatasetTreeNode privateRoot = new DatasetTreeNode(DataConstants.dataTypePrivate); + DatasetTreeNode publicRoot = new DatasetTreeNode(DataConstants.dataTypePublic); + + for (DatasetDO datasetDO : datasetDOS) { + DatasetRespVO datasetRespVO = BeanUtils.toBean(datasetDO, DatasetRespVO.class); + // 根据 type 字段决定节点的位置 + if (datasetRespVO.getType() == DataConstants.dataTypePrivate) { + privateRoot.getChildren().add(datasetRespVO); + } else if (datasetRespVO.getType() == DataConstants.dataTypePublic) { + publicRoot.getChildren().add(datasetRespVO); + } + } + List root = new ArrayList<>(); + root.add(privateRoot); + root.add(publicRoot); + return root; + } + + @Override + public List queryAllByBaseModelType(Integer type) { + /*List datasetDOS0 = datasetMapper.selectList(new LambdaQueryWrapper().eq(DatasetDO::getType, DataConstants.dataTypePrivate)); + List datasetRespVOS0 = BeanUtils.toBean(datasetDOS0, DatasetRespVO.class); + List datasetDOS1 = datasetMapper.selectList(new LambdaQueryWrapper().eq(DatasetDO::getType, DataConstants.dataTypePublic)); + List datasetRespVOS1 = BeanUtils.toBean(datasetDOS1, DatasetRespVO.class); + List> result = new ArrayList<>(); + result.add(datasetRespVOS0); + result.add(datasetRespVOS1);*/ + List datasetDOS = datasetMapper.selectList(new LambdaQueryWrapper() + .eq(DatasetDO::getStatus, 2) + .eq(DatasetDO::getDatasetParentType,type) + ); // 获取所有数据集 // 创建两个根节点,分别代表两种 type DatasetTreeNode privateRoot = new DatasetTreeNode(DataConstants.dataTypePrivate); @@ -554,7 +918,8 @@ public class DatasetServiceImpl implements DatasetService { // 解析JSON数据 jsonParsing(content, datasetFilesDO); } catch (Exception e) { - throw exception(new ErrorCode(11000, "请正确上传json格式得数据!!!")); + e.printStackTrace(); +// throw exception(new ErrorCode(11000, "请正确上传json格式得数据!!!")); } finally { connection.disconnect(); } diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/modelservice/ModelServiceService.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/modelservice/ModelServiceService.java index bcebd2bfe..b20ac189b 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/modelservice/ModelServiceService.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/modelservice/ModelServiceService.java @@ -92,6 +92,8 @@ public interface ModelServiceService { void startTheModel(ModelServiceSaveReqVO updateReqVO); + void startDatasetMoreModal(ModelServiceSaveReqVO updateReqVO); + void disableTheModel(ModelServiceSaveReqVO updateReqVO); void inspectTheApplication(ModelServiceSaveReqVO updateReqVO); diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/modelservice/ModelServiceServiceImpl.java b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/modelservice/ModelServiceServiceImpl.java index 193c79d64..666d2da5e 100644 --- a/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/modelservice/ModelServiceServiceImpl.java +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/java/cn/iocoder/yudao/module/llm/service/modelservice/ModelServiceServiceImpl.java @@ -1,28 +1,23 @@ package cn.iocoder.yudao.module.llm.service.modelservice; -import cn.hutool.json.JSONUtil; import cn.iocoder.yudao.framework.common.exception.ErrorCode; import cn.iocoder.yudao.framework.common.pojo.PageResult; import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils; import cn.iocoder.yudao.framework.common.util.http.HttpUtils; import cn.iocoder.yudao.framework.common.util.object.BeanUtils; -import cn.iocoder.yudao.module.llm.controller.admin.basemodel.BaseModelController; import cn.iocoder.yudao.module.llm.controller.admin.modelservice.vo.ModelServicePageReqVO; import cn.iocoder.yudao.module.llm.controller.admin.modelservice.vo.ModelServiceRespVO; import cn.iocoder.yudao.module.llm.controller.admin.modelservice.vo.ModelServiceSaveReqVO; -import cn.iocoder.yudao.module.llm.dal.dataobject.application.ApplicationDO; import cn.iocoder.yudao.module.llm.dal.dataobject.basemodel.BaseModelDO; import cn.iocoder.yudao.module.llm.dal.dataobject.finetuningtask.FineTuningTaskDO; import cn.iocoder.yudao.module.llm.dal.dataobject.modelservice.ModelServiceDO; import cn.iocoder.yudao.module.llm.dal.dataobject.servername.ServerNameDO; -import cn.iocoder.yudao.module.llm.dal.mysql.basemodel.BaseModelMapper; import cn.iocoder.yudao.module.llm.dal.mysql.finetuningtask.FineTuningTaskMapper; import cn.iocoder.yudao.module.llm.dal.mysql.modelservice.ModelServiceMapper; import cn.iocoder.yudao.module.llm.framework.backend.config.LLMBackendProperties; import cn.iocoder.yudao.module.llm.service.application.ApplicationService; import cn.iocoder.yudao.module.llm.service.async.AsyncModelServiceService; import cn.iocoder.yudao.module.llm.service.basemodel.BaseModelService; -import cn.iocoder.yudao.module.llm.service.basemodel.BaseModelTaskService; import cn.iocoder.yudao.module.llm.service.http.ModelService; import cn.iocoder.yudao.module.llm.service.http.TrainHttpService; import cn.iocoder.yudao.module.llm.service.modelassesstaskauto.ModelAssessTaskAutoService; @@ -33,8 +28,6 @@ import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.ObjectUtils; -import org.apache.commons.lang3.StringUtils; import org.springframework.context.annotation.Lazy; import org.springframework.stereotype.Service; import org.springframework.validation.annotation.Validated; @@ -343,6 +336,7 @@ public class ModelServiceServiceImpl implements ModelServiceService { item.setFineTuningTaskName(fineTuningTaskDO.getModelName()); item.setBaseApiUrl(fineTuningTaskDO.getApiUrl()); item.setBaseApiKey(fineTuningTaskDO.getApiKey()); + item.setModelType(fineTuningTaskDO.getModelType()==null?"":fineTuningTaskDO.getModelType()); } }); } @@ -474,6 +468,25 @@ public class ModelServiceServiceImpl implements ModelServiceService { } } + @Override + public void startDatasetMoreModal(ModelServiceSaveReqVO updateReqVO) { + try { + Long fineTuningTask = updateReqVO.getFineTuningTask(); + BaseModelDO byAigcId = baseModelService.getById(fineTuningTask); + Map map = new HashMap<>(); + map.put("model",byAigcId.getModelName()); + log.info("开始请求", llmBackendProperties.getDeployModel()); + String resStr = HttpUtils.post(llmBackendProperties.getDeployModel(), null,JSON.toJSONString(map)); + log.info(" unActive:{}", resStr); + ModelServiceDO updateObj = BeanUtils.toBean(updateReqVO, ModelServiceDO.class); + updateObj.setStatus(1); + updateObj.setNumber(1); + modelServiceMapper.updateById(updateObj); + }catch (Exception e){ + log.error("启动基础模型状态时发生异常: {}", e.getMessage(), e); + } + } + @Override public void disableTheModel(ModelServiceSaveReqVO updateReqVO) { try { diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/resources/mapper/dataset/DatasetImagesMapper.xml b/yudao-module-llm/yudao-module-llm-biz/src/main/resources/mapper/dataset/DatasetImagesMapper.xml new file mode 100644 index 000000000..39c7cd57f --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/resources/mapper/dataset/DatasetImagesMapper.xml @@ -0,0 +1,12 @@ + + + + + + + \ No newline at end of file diff --git a/yudao-module-llm/yudao-module-llm-biz/src/main/resources/mapper/dataset/DatasetQuestionAnswerImagesMapper.xml b/yudao-module-llm/yudao-module-llm-biz/src/main/resources/mapper/dataset/DatasetQuestionAnswerImagesMapper.xml new file mode 100644 index 000000000..10699d9b6 --- /dev/null +++ b/yudao-module-llm/yudao-module-llm-biz/src/main/resources/mapper/dataset/DatasetQuestionAnswerImagesMapper.xml @@ -0,0 +1,12 @@ + + + + + + + \ No newline at end of file