读取pdf文档里面的文字到本地文档

private static void pdfToWord() {
    File file = new File("F:/使用Acrobat制作PDF模板说明.pdf");
    PDDocument doc = null;
    try {
        doc = PDDocument.load(file);
    } catch (IOException e) {
        e.printStackTrace();
    }
    int pagenumber=doc.getNumberOfPages();//获取总页数
    FileOutputStream fos = null;
    try {
        fos = new FileOutputStream("F:/使用Acrobat制作PDF模板说明.doc");
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }
    Writer writer = null;
    try {
        writer = new OutputStreamWriter(fos,"UTF-8");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    }
    PDFTextStripper stripper = null;
    try {
        stripper = new PDFTextStripper();
    } catch (IOException e) {
        e.printStackTrace();
    }
    stripper.setSortByPosition(true);//排序
    stripper.setStartPage(1);//设置转换的开始页
    stripper.setEndPage(pagenumber);//设置转换的结束页
    try {
        stripper.writeText(doc,writer);
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        writer.close();
        doc.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

 

posted @ 2019-07-18 14:53  An-Optimistic-Person  阅读(419)  评论(0编辑  收藏  举报