//今天根据课本写了个 解析pdf文档的小程序
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;
public class ExtractorPDF {
/**
* @param args
*/
public static String getText(String file) //throws Exception
{String pdfFile=file;
PDDocument document=null;
String s=null;
try {
//装载文件
document=PDDocument.load(pdfFile);
//用PDFTextStripper来提取 文件
PDFTextStripper stripper=new PDFTextStripper();
s=stripper.getText(document);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
finally {
if (document!=null)
try {
document.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
return s;
}
public static void toTextFile(String file,String txt)
{
String pdfFile=file;
PDDocument document=null;
try {
//加载文件
document=PDDocument.load(pdfFile);
//用PDFTextStripper提取文件
PDFTextStripper stripper=new PDFTextStripper();
PrintWriter pw=new PrintWriter(new FileWriter(txt));
stripper.writeText(document, pw);
pw.close();
System.out.println("成功写入文本文件"+txt);
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println("文本写入失败");
e.printStackTrace();
}
finally
{
if(document!=null)
{try {
document.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}}
}
}
public static void main(String[] args) {
// TODO Auto-generated method stub
String s=getText("G:/学习资料/软件大赛学习资料/网上淘宝.pdf ");
System.out.println(s);
toTextFile("G:/学习资料/软件大赛学习资料/网上淘宝.pdf ","G:/Lucene/PDF.txt");
}
}