Java定位PDF中关键字的坐标
使用itextpdf来操作PDF文件,定位PDF文件中的关键字坐标演示
测试结果:
测试的PDF文件如下:
junit测试输出坐标:
maven配置中引入itextpdf:
<!-- 引入pdf --> <dependency> <groupId>com.itextpdf</groupId> <artifactId>itextpdf</artifactId> <version>5.5.13</version> </dependency>
定位工具类PdfHelper.java:
package com.alphajuns.util; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.parser.PdfReaderContentParser; import java.io.IOException; /** * @ClassName PdfHelper * @Description Pdf帮助类 * @Author AlphaJunS * @Date 2020/3/7 17:40 * @Version 1.0 */ public class PdfHelper { /** * @Author AlphaJunS * @Date 18:24 2020/3/7 * @Description 用于供外部类调用获取关键字所在PDF文件坐标 * @param filepath * @param keyWords * @return float[] */ public static float[] getKeyWordsByPath(String filepath, String keyWords) { float[] coordinate = null; try{ PdfReader pdfReader = new PdfReader(filepath); coordinate = getKeyWords(pdfReader, keyWords); } catch (IOException e) { e.printStackTrace(); } return coordinate; } /** * @Author AlphaJunS * @Date 18:26 2020/3/7 * @Description 获取关键字所在PDF坐标 * @param pdfReader * @param keyWords * @return float[] */ private static float[] getKeyWords(PdfReader pdfReader, String keyWords) { float[] coordinate = null; int page = 0; try{ int pageNum = pdfReader.getNumberOfPages(); PdfReaderContentParser pdfReaderContentParser = new PdfReaderContentParser(pdfReader); CustomRenderListener renderListener = new CustomRenderListener(); renderListener.setKeyWord(keyWords); for (page = 1; page <= pageNum; page++) { renderListener.setPage(page); pdfReaderContentParser.processContent(page, renderListener); coordinate = renderListener.getPcoordinate(); if (coordinate != null) break; } } catch (IOException e) { e.printStackTrace(); } return coordinate; } }
pdf帮助类CustomRenderListener.java:
package com.alphajuns.util; import com.itextpdf.awt.geom.Rectangle2D.Float; import com.itextpdf.text.pdf.parser.ImageRenderInfo; import com.itextpdf.text.pdf.parser.RenderListener; import com.itextpdf.text.pdf.parser.TextRenderInfo; /** * @Author AlphaJunS * @Date 12:53 2020/3/7 * @Description pdf签名帮助类 */ public class CustomRenderListener implements RenderListener{ private float[] pcoordinate = null; private String keyWord; private int page; public int getPage() { return page; } public void setPage(int page) { this.page = page; } public float[] getPcoordinate(){ return pcoordinate; } public String getKeyWord() { return keyWord; } public void setKeyWord(String keyWord) { this.keyWord = keyWord; } @Override public void beginTextBlock() {} @Override public void endTextBlock() {} @Override public void renderImage(ImageRenderInfo arg0) {} @Override public void renderText(TextRenderInfo textRenderInfo) { String text = textRenderInfo.getText(); if (null != text && text.contains(keyWord)) { Float boundingRectange = textRenderInfo.getBaseline().getBoundingRectange(); pcoordinate = new float[3]; pcoordinate[0] = boundingRectange.x; pcoordinate[1] = boundingRectange.y; pcoordinate[2] = page; } } }