java实现pdf文件转word
1 import java.io.File; 2 import java.io.FileOutputStream; 3 import java.io.IOException; 4 import java.io.OutputStreamWriter; 5 import java.io.Writer; 6 7 import org.apache.pdfbox.pdmodel.PDDocument; 8 import org.apache.pdfbox.util.PDFTextStripper; 9 10 public class PdfToWord{ 11 public static void main(String[] args){ 12 try{ 13 String pdfFile = "C:/xxxxx.pdf"; 14 PDDocument doc = PDDocument.load(new File(pdfFile)); 15 int pagenumber = doc.getNumberOfPages(); 16 pdfFile = pdfFile.substring(0, pdfFile.lastIndexOf(".")); 17 String fileName = pdfFile + ".doc"; 18 File file = new File(fileName); 19 if (!file.exists()){ 20 file.createNewFile(); 21 } 22 FileOutputStream fos = new FileOutputStream(fileName); 23 Writer writer = new OutputStreamWriter(fos, "UTF-8"); 24 PDFTextStripper stripper = new PDFTextStripper(); 25 stripper.setSortByPosition(true);// 排序 26 stripper.setStartPage(1);// 设置转换的开始页 27 stripper.setEndPage(pagenumber);// 设置转换的结束页 28 stripper.writeText(doc, writer); 29 writer.close(); 30 doc.close(); 31 System.out.println("pdf转换word成功!"); 32 } 33 catch (IOException e){ 34 e.printStackTrace(); 35 } 36 } 37 }
代码所用到的jar包 pdfbox-1.8.2.jar 另行下载