【Java解析PDF并获取想要的字符串】
public class pdfAnalysis { /** * @throws IOException * @param从网络上下载PDF,截取PDF字符串, */ public static void main(String[] args) throws IOException { // 下载的连接 下载下来的名字 下载下来的路径 // pdfAnalysis.downLoadByUrl("", "KK.pdf", "F:/"); // 读取文件 pdfAnalysis pdf = new pdfAnalysis(); // 读取文件 String pdfName = "F:\\CC.pdf"; // 解析PDF里的值 存入变量pdf_Body String pdf_Body = pdf.readFileOfPDF(pdfName); //System.out.println(pdf_Body); /* String str = pdf_Body.substring(pdf_Body.indexOf("Arrival"), pdf_Body.indexOf("Payment Details")); String str1 = str.substring(str.indexOf("H (")); String [] pp ={"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday" ,"Sunday" }; for(String sto:pp){ if(str1.contains(sto)){ String result = str1.substring(str1.indexOf(sto)); //System.out.println(result); //System.out.println(result.length()); String result2 = result.substring(0,result.indexOf(",")); String result3 = result2.trim(); System.out.println("我要的时间:"+result3+"我是"+pdfName+"文件"); } }*/ /*if(str1.contains("Monday")||str1.contains("Tuesday")|| str1.contains("Wednesday")||str1.contains("Thursday")|| str1.contains("Friday")||str1.contains("Saturday")||str1.contains("Sunday")){ }*/ // System.out.println(str1); // 取出人名值 String name_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive"), pdf_Body.indexOf("passenger details")); // System.out.println(str); String name_Temp1 = null; String result_name = null; List<String> list_Name = new ArrayList<>(); for (int i = 1; i < name_Temp.length(); i++) { if (name_Temp.contains(i + ".")) { name_Temp1 = name_Temp.substring(name_Temp.indexOf(i + ".")); result_name = name_Temp1.substring(name_Temp1.indexOf(i + ".") + 3, name_Temp1.indexOf("Seat Number Services")); list_Name.add(result_name); } // System.out.println(add); // System.out.println(str2); if (name_Temp1.equals("null")) { continue; } } for (String i : list_Name) { System.out.println("所有的人名:" + i); }*/ if (pdfAnalysis.infile != null) { pdfAnalysis.infile.close(); System.out.println("我要准备关闭PDF文档了"); } } public static int appearNumber(String srcText, String temp) { int count = 0; Pattern p = Pattern.compile(temp); Matcher m = p.matcher(srcText); while (m.find()) { count++; } return count; } public static FileInputStream infile = null; public String readFileOfPDF(String pdfName) throws IOException { String context = null; File file = new File(pdfName);// 创建一个文件对象 try { infile = new FileInputStream(pdfName);// 创建一个文件输入流 // 新建一个PDF解析器对象 PDFParser parser = new PDFParser(infile); // 对PDF文件进行解析 parser.parse(); // 获取解析后得到的PDF文档对象 PDDocument pdfdocument = parser.getPDDocument(); // 新建一个PDF文本剥离器 PDFTextStripper stripper = new PDFTextStripper(); // 从PDF文档对象中剥离文本 context = stripper.getText(pdfdocument); System.out.println("PDF文件" + file.getAbsolutePath() + "的文本内容如下:"); // System.out.println(context); } catch (Exception e) { System.out.println("读取PDF文件" + file.getAbsolutePath() + "失败!" + e.getMessage()); } finally { if (infile != null) { try { infile.close(); } catch (IOException e1) { } } } return context; }