不规则格式进行文字识别
public static void main(String[] args) throws Exception{
String strText = "申请人:张三,男,汉族,1999年9月9日生,身份证号:123456789012345678,住址:测试地址8幢8单元8号,联系方式:028—123456。";
String regex = "[\\.,。()()::]";
String information = "";//
String idNumber = "";//申请人身份证号
int ind = -1;
String applicantInformation = strText.replace(" ","");;
//申请人姓名
if(applicantInformation.contains("申请人")) {
if(applicantInformation.contains("申请人:") || applicantInformation.contains("申请人:")) {
information = applicantInformation.substring(applicantInformation.indexOf("申请人")+4);
}else {
information = applicantInformation.substring(applicantInformation.indexOf("申请人")+3);
}
if (information.matches (".*" + regex + ".*"))
{
ind = information.split (regex)[0].length ();
}
System.out.println(information.substring(0,ind));
}else {
//其他格式处理
}
//证件号码
if(applicantInformation.contains("身份证号")){
//身份证号码
String status = applicantInformation.substring(applicantInformation.indexOf("身份证号")+4);
String msg = status.substring(0, 1);
if(msg.contains(":")||msg.contains(":")) {
idNumber = status.substring(1,19);
}else if("码".equals(msg)){
if(status.contains("码:")||status.contains("码:")) {
idNumber = status.substring(2,20);
}else {
idNumber = status.substring(1,18);
}
}else {
idNumber = status.substring(1,18);
}
System.out.println(idNumber);
}else {
//其他格式处理
}
//住址
if(applicantInformation.contains("住址")) {
if(applicantInformation.contains("住址:") || applicantInformation.contains("住址:")) {
information = applicantInformation.substring(applicantInformation.indexOf("住址")+3);
}else {
information = applicantInformation.substring(applicantInformation.indexOf("住址")+2);
}
if (information.matches (".*" + regex + ".*"))
{
ind = information.split (regex)[0].length ();
}
System.out.println(information.substring(0,ind));
}else {
//其他格式处理
}
//联系方式
if(applicantInformation.contains("联系方式")) {
if(applicantInformation.contains("联系方式:") || applicantInformation.contains("联系方式:")) {
information = applicantInformation.substring(applicantInformation.indexOf("联系方式")+5);
}else {
information = applicantInformation.substring(applicantInformation.indexOf("联系方式")+4);
}
if (information.matches (".*" + regex + ".*"))
{
ind = information.split (regex)[0].length ();
}
System.out.println(information.substring(0,ind));
}else {
//其他格式处理
}
}