根据规则提取字符串中的内容

提取稿件内容文末的署名
以下格式:

 




/**
* 提取编辑器署名
*/
String TEMPLATE_WORDS_REPORT = "文/广州日报·新花城记者:";
String TEMPLATE_IMG_REPORT = "图/广州日报·新花城记者:";
String TEMPLATE_VIDEO_REPORT = "视频/广州日报·新花城记者:";
String TEMPLATE_WORDS_IMG_REPORT = "文、图/广州日报·新花城记者:";
String TEMPLATE_TXY = "通讯员:";
String TEMPLATE_EDITOR = "广州日报·新花城编辑:";



@ResponseBody
@RequestMapping(value = "/test1")
public String test1() {

TCmsContentVo content = dataHelper.getContent("e679bbb1c78c4a4a961e34cc7ba4c209");
// 媒体号
String txt = content.getTxt();
String result = "";
String wordsReport = extractStr(txt, PublishConstants.TEMPLATE_WORDS_REPORT);
String imgReport = extractStr(txt, PublishConstants.TEMPLATE_IMG_REPORT);
String videoReport = extractStr(txt, PublishConstants.TEMPLATE_VIDEO_REPORT);
String wordsImgReport = extractStr(txt, PublishConstants.TEMPLATE_WORDS_IMG_REPORT);
String editor = extractStr(txt, PublishConstants.TEMPLATE_EDITOR);
result = addStr(result, wordsReport);
result = addStr(result, imgReport);
result = addStr(result, videoReport);
result = addStr(result, wordsImgReport);
result = addStr(result, editor);

    // 过滤特殊字符并提取汉字
reportNames = getChinese(reportNames);
    // 记者信息
String reporterStr = content.getReporter();

JSONArray reporterArr = new JSONArray();
if (StringUtils.isNotBlank(reporterStr)) {
JSONArray reporterArray = JSONArray.fromObject(reporterStr);
for (Object reporter : reporterArray) {
JSONObject reporterObj = JSONObject.fromObject(reporter);
String reporterName = reporterObj.getString("reporterName");
result += "," + reporterName;
}
}
result = reomveRepeat(result);



return result;
}

private String addStr(String str, String add) {
if (str != null && !"".equals(str)) {
if (add != null && !"".equals(add)) {
str += "," + add;
}
} else {
if (add != null && !"".equals(add)) {
str = add;
}
}

return str;
}

private String reomveRepeat(String str){//数组去重
StringBuffer reStr = new StringBuffer();
if(reStr != null && !"".equals(reStr)){
String [] curr = str.split(",");
Set set = new HashSet();
for(String index:curr){//去重
set.add(index);
}
Object[] array = set.toArray();
String splitSetWithComma = StringUtils.join(array, ",");
reStr.append(splitSetWithComma) ;
}
return reStr.toString();
}


public String extractStr(String txt, String ruleStr) {
// 返回结构
String result = "";
// 截取规则之后的字符串
String wordsReportStr = "";
// 截取规则至</p>的字符串
String wordsPStr = "";
// 截取通讯录字符串
String wordsTXYStr = "";
// 通讯录
String txl = "";
// </p>所在位置
int wordsPInt = -1;
// 通讯录所在位置
int wordsTXYInt = -1;
// </span>所在位置
int wordsSpanInt = -1;

// 取得规则所在位置
int wordsReportInt = txt.indexOf(ruleStr);
// 如果存在
if (wordsReportInt != -1) {
// 截取规则字符串
wordsReportStr = txt.substring(wordsReportInt);

// 截至到“</p>”
wordsPInt = wordsReportStr.indexOf("</p>");
if (wordsPInt != -1) {
wordsPStr = wordsReportStr.substring(ruleStr.length(), wordsPInt);
// 如果有通讯员
wordsTXYInt = wordsPStr.indexOf(PublishConstants.TEMPLATE_TXY);
if (wordsTXYInt != -1) {
result = wordsPStr.substring(0, wordsTXYInt);
wordsTXYStr = wordsPStr.substring(wordsTXYInt);

// 是否有</span>
wordsSpanInt = wordsTXYStr.indexOf("</span>");
if (wordsSpanInt != -1) {
txl = wordsTXYStr.substring(PublishConstants.TEMPLATE_TXY.length(), wordsSpanInt);
result = result.trim() + "," +txl.trim();
} else {
txl = wordsTXYStr.substring(PublishConstants.TEMPLATE_TXY.length());
result = result.trim() + "," +txl.trim();
}

} else {
// 没有通讯员,是否有</span>
wordsSpanInt = wordsPStr.indexOf("</span>");
if (wordsSpanInt != -1) {
result = wordsPStr.substring(0, wordsSpanInt);
result = result.trim();
} else {
result = wordsPStr.trim();
}
}

}

}
result = result.replace("<br/>","");
result = result.replace("&nbsp;","");
return result;
}

/*1、至少匹配一个汉字的写法。
2、这两个unicode值正好是Unicode表中的汉字的头和尾。
3、"[]"代表里边的值出现一个就可以,后边的“+”代表至少出现1次,合起来即至少匹配一个汉字。
*/
public static String getChinese(String paramValue) {
paramValue = paramValue.replace("<br/>","");
paramValue = paramValue.replace("&nbsp;","");
paramValue = paramValue.replace("、",",");

String regex = "([\u4e00-\u9fa5,]+)";
String str = "";
Matcher matcher = Pattern.compile(regex).matcher(paramValue);
while (matcher.find()) {
str+= matcher.group(0);
}
return str;
}
posted @   FY丶  阅读(66)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· Open-Sora 2.0 重磅开源!
· 周边上新:园子的第一款马克杯温暖上架
点击右上角即可分享
微信分享提示