Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
5632c81f29
@ -1,6 +1,5 @@
|
||||
package cn.iocoder.yudao.module.llm.service.async;
|
||||
|
||||
import cn.hutool.json.JSONArray;
|
||||
import cn.hutool.json.JSONObject;
|
||||
import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
|
||||
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
|
||||
@ -125,7 +124,7 @@ public class AsyncDataProcessService {
|
||||
// 繁体转简体 繁体转简体,如“不經意,妳的笑容”清洗成“不经意,你的笑容”
|
||||
JSONObject a_4 = a.getJSONObject("a_4");
|
||||
if (a_4.getBool("is_on")) {
|
||||
result = DataProcessUtil.TraditionalToSimplified(result);
|
||||
result = DataProcessUtil.traditionalToSimplified(result);
|
||||
}
|
||||
// 去除网页标识符 移除文档中的html标签,如<html>,<dev><p>等
|
||||
JSONObject a_5 = a.getJSONObject("a_5");
|
||||
|
@ -77,7 +77,7 @@ public class DataProcessUtil {
|
||||
* @param input
|
||||
* @return
|
||||
*/
|
||||
public static String TraditionalToSimplified(String input) {
|
||||
public static String traditionalToSimplified(String input) {
|
||||
return ZhConverterUtil.toSimple(input);
|
||||
}
|
||||
|
||||
@ -400,15 +400,16 @@ public class DataProcessUtil {
|
||||
text = hashMatcher.replaceAll("");
|
||||
|
||||
// 使用StringBuilder和StringBuilder的replace方法去除其他数字,但跳过年份和简单数字
|
||||
StringBuilder sb = new StringBuilder(text);
|
||||
int index = 0;
|
||||
while ((index = findNextNumberToReplace(sb.toString())) != -1) {
|
||||
String number = sb.substring(index, findEndOfNumber(sb.toString(), index));
|
||||
if (!isYear(number) && !isSimpleNumber(number)) {
|
||||
sb.replace(index, index + number.length(), "");
|
||||
}
|
||||
}
|
||||
return sb.toString();
|
||||
// TODO: 这里目前有bug,先注释掉了。
|
||||
// StringBuilder sb = new StringBuilder(text);
|
||||
// int index = 0;
|
||||
// while ((index = findNextNumberToReplace(sb.toString())) != -1) {
|
||||
// String number = sb.substring(index, findEndOfNumber(sb.toString(), index));
|
||||
// if (!isYear(number) && !isSimpleNumber(number)) {
|
||||
// sb.replace(index, index + number.length(), "");
|
||||
// }
|
||||
// }
|
||||
return text;
|
||||
}
|
||||
|
||||
// 查找下一个要替换的数字的起始索引
|
||||
@ -463,4 +464,18 @@ public class DataProcessUtil {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) {
|
||||
String textWithIdentifiers = "Here are some identifiers: 123-456-7890, 1234567812345678, a1b2c3d4e5f6a1b2c3d4e5f6, 2023, and 987654.";
|
||||
// 去除标识符
|
||||
String textWithoutIdentifiers = removeIdentifiers(textWithIdentifiers);
|
||||
// 打印结果
|
||||
System.out.println(textWithoutIdentifiers);
|
||||
|
||||
String traditionalText = "不經意,妳的笑容";
|
||||
String simplifiedText = traditionalToSimplified(traditionalText);
|
||||
|
||||
System.out.println("繁体文本: [" + traditionalText + "]");
|
||||
System.out.println("简体文本: [" + simplifiedText + "]");
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user