提取数据(实验室检查)

复制代码

/**
* 实验室检查
*/
public class BeginPathology {
public static void main(String[] args) throws Exception {
try {

String folderPath = "C:\\Users\\kfeng5\\OneDrive - DXC Production\\Desktop\\年假工作";
File folder = new File(folderPath);
File[] files = folder.listFiles();

if (files != null) {
for (File file : files) {
if (file.getName().endsWith(".pdf")) {
PDDocument document = PDDocument.load(file);
PDFTextStripper stripper = new PDFTextStripper(); // 初始化文本剥离器
String text = stripper.getText(document);
String[] lines = text.split("\n");

boolean nameFlag = false;
boolean infoFlag = false;
boolean underFlag = false;
boolean numberFlag = false;
String name = null;
String type = null;
String[] split = null;
StringBuilder sb = new StringBuilder();
Map<String, String> hashMap = new HashMap<>();
for (String line : lines) {

Pattern compile0 = Pattern.compile("^\\d+(.*)");
Matcher matcher0 = compile0.matcher(line.trim());
if (matcher0.find()) {
if (StringUtils.isBlank(matcher0.group(1))) {
continue;//跳出此line
}
}

if (line.startsWith("附件")) {
nameFlag = true;
continue;
}

if (nameFlag) {
Pattern pattern = Pattern.compile("(.*)诊疗方案");
Matcher matcher = pattern.matcher(line);
if (matcher.find()) {
name = matcher.group(1);
nameFlag = false;
continue;
}
}

Pattern compile1 = Pattern.compile("(.*)、实验室检查");
Matcher matcher1 = compile1.matcher(line.trim());
if (matcher1.find()) {
infoFlag = true;
continue;//跳出此line
}
if(infoFlag){
if(line.length()>1){
if(line.charAt(1)=='、'){
System.out.println(name+"!"+type+"!"+sb.toString().replaceAll("\r",""));
infoFlag = false;
break;
}
}else {
System.out.println(name + "!" + "此行没有第二个字符,不能检测是否有顿号");
}

//TODO 当有以此开头的非目标内容时,会被按目标内容格式输出,所以数据异常,需调整
if (line.startsWith("(")) {
sb = getNewBuilder(sb, name, type);
//切割统一3位。得到(一)xxx检查 的xxx
type = line.substring(3).replaceAll("\r","");
underFlag = true;
continue;//跳出此line
}
if (underFlag) {
Pattern compile2 = Pattern.compile("^\\d\\.(.*)");
Matcher matcher2 = compile2.matcher(line.trim());
if(matcher2.find()){
sb = getNewBuilder(sb, name, type);
line = matcher2.group(1);
//split = matcher2.group(1).split(":", 2);
}
sb.append(line);
continue;
}
}
}
document.close();
}
}
}
}catch(Exception e){
throw new Exception(e);
}
}

//检测到标志,如果sb不为空,输出sb的内容
private static StringBuilder getNewBuilder(StringBuilder sb, String name, String type) {
if (sb.toString().length() != 0 && sb != null) {
System.out.println(name + "!" + type + "!" + sb.toString().replaceAll("\r", ""));
sb = new StringBuilder();
}
return sb;
}
}

取String类型的字符。对象.charAt(位)。
String对象类型内容的比较,用对象1.equals(对象2)方法。
复制代码
posted @   Anne起飞记  阅读(17)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· .NET10 - 预览版1新功能体验(一)
点击右上角即可分享
微信分享提示