maven之读写pdf简单实例(pdfbox与itext)与pdfbox源码解析(访问者模式)

记录学习的脚步

本文是用pdfbox读写pdf，但是因为pdfbox在写pdf的时候，对中文的支持不好，会有乱码，我尝试着修改COSString的源码，试了UTF-8、UTF-16BE几种编码中文输出还是乱码接着把pdfbox parent中的pom 的 <project.build.sourceEncoding>ISO-8859-1</project.build.sourceEncoding> 属性改为UTF-8 还是不行好吧能力有限还是放弃了

所幸itext对中文的支持还不错使用itext进行写pdf

参考

pdfbox 官网 http://pdfbox.apache.org/cookbook/documentcreation.html

itext 官网 http://itextpdf.com/learn

下面 itext 中的代码来源于这位哥们写的 http://www.iteye.com/topic/1006313 本来是打算自己写的但是这哥们写的不错还有注释就直接用了

itext的更多详细的操作可参考 http://rensanning.iteye.com/blog/1538689 他写的很详细

1、先看pdfbox的读写pdf的代码

产生pdf的 SavePdfDocument.java类必要的地方都加了注释

package com.undergrowth.pdfbox;

import java.io.IOException;



import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.exceptions.COSVisitorException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;

/**
 * SavePdfDocument类用于产生pdf文档
 * @author Administrator
 * @date 2014-8-31
 * @version 1.0.0
 */
public class SavePdfDocument {

	/**
	 * 日志常量
	 */
	public static final Log logger=LogFactory.getLog(SavePdfDocument.class);
	
	
	/**
	 * 测试产生pdf文档
	 * @param sayWhat 要写入到pdf文档中的内容
	 * @param filePath 保存pdf的路径
	 * @throws IOException 
	 * @throws COSVisitorException 
	 * 
	 */
	public  boolean helloPdf(String sayWhat,String filePath) throws IOException, COSVisitorException{
		boolean f=false;
		PDDocument document=getPdDocument();
		PDPage page=getPdPage();
		document.addPage(page);
		
		PDFont font=getFont();
		
		PDPageContentStream contentStream=getPdPageContentStream(document, page);
		
		contentStream.beginText();
		  contentStream.setFont(font, 20);
		  contentStream.moveTextPositionByAmount(200, 300);
		  /* COSString cosString=new COSString(new String(sayWhat.getBytes(), "UTF-16BE"));
		 contentStream.drawString("hello world"+"\t");*/
		  //contentStream.drawString("hello world"+cosString.getString());
		  contentStream.drawString(sayWhat);
		  contentStream.endText();
		//关闭页面内容流
		contentStream.close();
		
		document.save(filePath);
		document.close();
		
		logger.info("成功创建pdf");
		f=true;
		return f;
	}
	
	

	
	
	
	
	/**
	 * 获取空的pdf文档对象
	 * @return PDDocument
	 */
	public PDDocument getPdDocument(){
		PDDocument document=new PDDocument();
		return document;
	}
	
	/**
	 * 通过文件名加载文档
	 * @param fileName
	 * @return PDDocument
	 * @throws IOException
	 */
	public PDDocument getPdDocument(String fileName) throws IOException{
		PDDocument document=PDDocument.load(fileName);
		return document;
	}
	
	/**
	 * 获取空的pdf页面对象
	 * @return PDPage
	 */
	public PDPage getPdPage(){
		PDPage page =new PDPage();
		return page;
	}

	/**
	 * 获取海维提卡体
	 * @return PDFont
	 */
	public PDFont getFont(){
		PDFont font=PDType1Font.HELVETICA_BOLD;
		return font;
	}
	
	/**
	 * 获取页面内容流 向页面添加内容
	 * @param document PDDocument
	 * @param page PDPage
	 * @return PDPageContentStream
	 * @throws IOException
	 */
	public PDPageContentStream getPdPageContentStream(PDDocument document,PDPage page) throws IOException{
		PDPageContentStream contentStream=new PDPageContentStream(document, page);
		return contentStream;
	}
	
}

提取pdf的 PdfTextStripperTest.java

package com.undergrowth.pdfbox;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

public class PdfTextStripperTest {

	public static Log log=LogFactory.getLog(PdfTextStripperTest.class);
	
	/**
	 * 获取文本提取
	 * 
	 * @param document
	 * @param writer
	 * @throws IOException
	 */
	public void getTextStripper(PDDocument document, Writer writer)
			throws IOException {
		PDFTextStripper textStripper = new PDFTextStripper();
		textStripper.writeText(document, writer);
	}

	/**
	 * 提取文本内容
	 * @param String fileName 加载文档的路径
	 * @return String
	 * @throws IOException
	 */
	public String getText(String fileName) throws IOException {
		String textString = "";
		SavePdfDocument pdfDocument = new SavePdfDocument();
		PDDocument document = pdfDocument.getPdDocument(fileName);
		//将提取出来的字节流转换为字符流进行显示
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		OutputStreamWriter writer = new OutputStreamWriter(out);
		getTextStripper(document, writer);
		document.close();
		out.close();
		writer.close();
		byte[] con = out.toByteArray();
		textString = new String(con);
		log.info("提取的文本内容为:"+textString);
		return textString;
	}
}

测试类

package com.undergrowth.pdfbox;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.exceptions.COSVisitorException;

import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;

/**
 * Unit test for simple App.
 */
public class AppTest 
    extends TestCase
{
    
	
	
	/**
     * Create the test case
     *
     * @param testName name of the test case
     */
    public AppTest( String testName )
    {
       super( testName );
    	
    }

    /**
     * @return the suite of tests being tested
     */
    public static Test suite()
    {
        return new TestSuite( AppTest.class );
    }

    /**
     * Rigourous Test :-)
     * @throws IOException 
     * @throws COSVisitorException 
     */
    public void testApp() throws COSVisitorException, IOException
    {
    	SavePdfDocument pdfDocument=new SavePdfDocument();
    	String filePath="e:\\hello.pdf";
    	boolean f=pdfDocument.helloPdf(("hello world"), filePath);
    	/*
    	 * boolean f=pdfDocument.helloPdf(new String("？我".getBytes("UTF-16BE"),"UTF-16BE"), filePath);
    	 * System.out.println("我".getBytes("UTF-8"));
    	System.out.println(new String("我".getBytes("UTF-16BE"), "UTF-16BE"));
    	*/
    	assertTrue( f );

    	filePath="E:\\test11.pdf";
    	
    	PdfTextStripperTest textStripperTest=new PdfTextStripperTest();
    	String stripperText = textStripperTest.getText(filePath);
    	
    	assertNotSame(stripperText, "");    	
        
    }
    
    
   
    
   
}

2、使用itext进行写pdf

PdfUtils.java

package com.undergrowth.pdfbox;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;

import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.util.PDFTextStripper;

import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Chapter;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.List;
import com.itextpdf.text.ListItem;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.Rectangle;
import com.itextpdf.text.Section;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PdfWriter;

/**
 * 来源: http://www.iteye.com/topic/1006313
 * @author Administrator
 *
 */
public class PdfUtils {

//		public static final String CHARACTOR_FONT_CH_FILE = "SIMFANG.TTF";  //仿宋常规
		public static final String CHARACTOR_FONT_CH_FILE = "SIMHEI.TTF";  //黑体常规
		
		public static final Rectangle PAGE_SIZE = PageSize.A4;
		public static final float MARGIN_LEFT = 50;
		public static final float MARGIN_RIGHT = 50;
		public static final float MARGIN_TOP = 50;
		public static final float MARGIN_BOTTOM = 50;
		public static final float SPACING = 20;
		
		
		private Document document = null;
		private FileOutputStream out=null;
		/**
		 * 功能：创建导出数据的目标文档
		 * @param fileName 存储文件的临时路径
		 * @return 
		 */
		public void createDocument(String fileName) {
			File file = new File(fileName);
			out = null;
			document = new Document(PAGE_SIZE, MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM);
			try {
				out = new FileOutputStream(file);
//				PdfWriter writer = 
				PdfWriter.getInstance(document, out);
			} catch (FileNotFoundException e) {
				e.printStackTrace();
			} catch (DocumentException e) {
				e.printStackTrace();
			}
			// 打开文档准备写入内容
			document.open();
		}
		
		/**
		 * 将章节写入到指定的PDF文档中
		 * @param chapter
		 * @return 
		 */
		public void writeChapterToDoc(Chapter chapter) {
			try {
				if(document != null) {
					if(!document.isOpen()) document.open();
					document.add(chapter);
				}
			} catch (DocumentException e) {
				e.printStackTrace();
			}
		}
		
		/**
		 * 功能  创建PDF文档中的章节
		 * @param title 章节标题
		 * @param chapterNum 章节序列号
		 * @param alignment 0表示align=left，1表示align=center
		 * @param numberDepth 章节是否带序号 设值=1 表示带序号 1.章节一；1.1小节一...，设值=0表示不带序号
		 * @param font 字体格式
		 * @return Chapter章节
		 */
		public static Chapter createChapter(String title, int chapterNum, int alignment, int numberDepth, Font font) {
			Paragraph chapterTitle = new Paragraph(title, font);
			chapterTitle.setAlignment(alignment);
			Chapter chapter = new Chapter(chapterTitle, chapterNum);
			chapter.setNumberDepth(numberDepth); 
			return chapter;
		}
		
		/**
		 * 功能：创建某指定章节下的小节
		 * @param chapter 指定章节
		 * @param title 小节标题
		 * @param font 字体格式
		 * @param numberDepth 小节是否带序号 设值=1 表示带序号 1.章节一；1.1小节一...，设值=0表示不带序号
		 * @return section在指定章节后追加小节
		 */
		public static Section createSection(Chapter chapter, String title, Font font, int numberDepth) {
			Section section = null;
			if(chapter != null) {
				Paragraph sectionTitle = new Paragraph(title, font);
				sectionTitle.setSpacingBefore(SPACING);
				section = chapter.addSection(sectionTitle);
				section.setNumberDepth(numberDepth);
			}
			return section;
		}
		
		/**
		 * 功能：向PDF文档中添加的内容
		 * @param text 内容
		 * @param font 内容对应的字体
		 * @return phrase 指定字体格式的内容
		 */
		public static Phrase createPhrase(String text,Font font) {
			Phrase phrase = new Paragraph(text,font);
			return phrase;
		}
		
		/**
		 * 功能：创建列表
		 * @param numbered  设置为 true 表明想创建一个进行编号的列表
		 * @param lettered 设置为true表示列表采用字母进行编号，为false则用数字进行编号
		 * @param symbolIndent
		 * @return list
		 */
		public static List createList(boolean numbered, boolean lettered, float symbolIndent) {
			List list = new List(numbered, lettered, symbolIndent);
			return list;
		}
		
		/**
		 * 功能：创建列表中的项
		 * @param content 列表项中的内容
		 * @param font 字体格式
		 * @return listItem
		 */
		public static ListItem createListItem(String content, Font font) {
			ListItem listItem = new ListItem(content, font);
			return listItem;
		}

		/**
		 * 功能：创造字体格式
		 * @param fontname 
		 * @param size 字体大小
		 * @param style 字体风格
		 * @param color 字体颜色
		 * @return Font
		 */
		public static Font createFont(String fontname, float size, int style, BaseColor color) {
			Font font =  FontFactory.getFont(fontname, size, style, color);
			return font;
		}
		
		/**
		 * 功能： 返回支持中文的字体---仿宋
		 * @param size 字体大小
		 * @param style 字体风格
		 * @param color 字体 颜色
		 * @return  字体格式
		 */
		public static Font createCHineseFont(float size, int style, BaseColor color) {
			BaseFont bfChinese = null;
			try {
				bfChinese = BaseFont.createFont(CHARACTOR_FONT_CH_FILE,BaseFont.IDENTITY_H, BaseFont.EMBEDDED);
			} catch (DocumentException e) {
				e.printStackTrace();
			} catch (IOException e) {
				e.printStackTrace();
			}
			return new Font(bfChinese, size, style, color);
		}
		
		/**
		 * 最后关闭PDF文档
		 */
		public void closeDocument() {
			if(document != null) {
				document.close();
			}
			
		}
		

		/**
		 * 读PDF文件，使用了pdfbox开源项目
		 * @param fileName
		 */
		public static void readPDF(String fileName) {
			File file = new File(fileName);
			FileInputStream in = null;
			try {
				in = new FileInputStream(fileName);
				// 新建一个PDF解析器对象
				PDFParser parser = new PDFParser(in);
				// 对PDF文件进行解析
				parser.parse();
				// 获取解析后得到的PDF文档对象
				PDDocument pdfdocument = parser.getPDDocument();
				// 新建一个PDF文本剥离器
				PDFTextStripper stripper = new PDFTextStripper();
				// 从PDF文档对象中剥离文本
				String result = stripper.getText(pdfdocument);
				System.out.println("PDF文件的文本内容如下：");
				System.out.println(result);

			} catch (Exception e) {
				System.out.println("读取PDF文件" + file.getAbsolutePath() + "生失败！" + e);
				e.printStackTrace();
			} finally {
				if (in != null) {
					try {
						in.close();
					} catch (IOException e1) {
					}
				}
			}
		}

		/**
		 * 测试pdf文件的创建
		 * @param args
		 */
		public static void main(String[] args) {

			String fileName = "E:\\test11.pdf";  //这里先手动把绝对路径的文件夹给补上。
			PdfUtils PdfUtils = new PdfUtils();
			
			Font chapterFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(20, Font.BOLD, new BaseColor(0, 0, 255));//文章标题字体
			Font sectionFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(16, Font.BOLD, new BaseColor(0, 0, 255));//文章小节字体
			Font textFont = com.undergrowth.pdfbox.PdfUtils.createCHineseFont(10, Font.NORMAL, new BaseColor(0, 0, 0));//小节内容字体
			
			PdfUtils.createDocument(fileName);
			Chapter chapter = com.undergrowth.pdfbox.PdfUtils.createChapter("糖尿病病例1", 1, 1, 0, chapterFont);
			Section section1 = com.undergrowth.pdfbox.PdfUtils.createSection(chapter, "病例联系人信息", sectionFont,0);
			Phrase text1 = com.undergrowth.pdfbox.PdfUtils.createPhrase("如您手中有同类现成病例，在填写完以上基础信息后，传病例附件",textFont);
			section1.add(text1);
			
			Section section2 = com.undergrowth.pdfbox.PdfUtils.createSection(chapter, "病例个人体会", sectionFont,0);
			Phrase text2 = com.undergrowth.pdfbox.PdfUtils.createPhrase("1.下载病例生成PDF文档",textFont);
//			text2.setFirstLineIndent(20);  //第一行空格距离
			section2.add(text1);
			section2.add(text2);
			
			List list = com.undergrowth.pdfbox.PdfUtils.createList(true, false, 20);
			String tmp = "还有什么能够文档。文档是 PDF 文档的所有元素的容器。 ";
			ListItem listItem1 = com.undergrowth.pdfbox.PdfUtils.createListItem(tmp,textFont);
			ListItem listItem2 = com.undergrowth.pdfbox.PdfUtils.createListItem("列表2",textFont);
			list.add(listItem1);
			list.add(listItem2);
			section2.add(list);
			
			PdfUtils.writeChapterToDoc(chapter);
			PdfUtils.closeDocument();
			
			//读取
			readPDF(fileName);
		}
	}

上面使用了黑体字体需要将黑体字体的ttf文件放在resources目录下即可

上面即使使用pdfbox与itext的简单实例

附pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
	<modelVersion>4.0.0</modelVersion>

	<groupId>com.undergrowth</groupId>
	<artifactId>pdfbox</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<packaging>jar</packaging>

	<name>pdfbox</name>
	<url>http://maven.apache.org</url>

	<properties>
		<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
	</properties>

	<dependencies>
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>3.8.1</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.apache.pdfbox</groupId>
			<artifactId>pdfbox</artifactId>
			<version>1.8.6</version>
		</dependency>
		<dependency>
			<groupId>com.ibm.icu</groupId>
			<artifactId>icu4j</artifactId>
			<version>3.8</version>
		</dependency>
		<dependency>
			<groupId>com.itextpdf</groupId>
			<artifactId>itextpdf</artifactId>
			<version>5.5.1</version>
			<type>jar</type>
		</dependency>
	</dependencies>

	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-javadoc-plugin</artifactId>
				<version>2.9.1</version>
				<configuration>
					<tags>
						<tag>
							<name>date</name>
							<placement>a</placement>
							<head>日期:</head>
						</tag>
					</tags>
				</configuration>
			</plugin>
		</plugins>
	</build>

</project>

3、再来看看pdfbox的源码吧说起pdfbox的源码编译就郁闷

因为pdfbox核心库pdfbox中测试需要用到

<dependency>
            <groupId>com.levigo.jbig2</groupId>
            <artifactId>levigo-jbig2-imageio</artifactId>
            <version>1.6.2</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>net.java.dev.jai-imageio</groupId>
            <artifactId>jai-imageio-core-standalone</artifactId>
            <version>1.2-pre-dr-b04-2011-07-04</version>
            <scope>test</scope>
        </dependency>

<repository>
            <id>jbig2.googlecode</id>
            <name>JBIG2 ImageIO-Plugin repository at googlecode.com</name>
            <url>http://jbig2-imageio.googlecode.com/svn/maven-repository/</url>
        </repository>

位于googlecode上的jar包可 google 与我天朝的关系貌似不太友好啊一直编译不过去后来干脆只有将pdfbox核心库中pdfbox的测试库全删了再把上面的依赖注释掉

哈哈编译成功

好吧还是来看看 pdfbox中用到的访问者模式吧也正是由于这个模式中的访问者的操作才将最终的document内容输出到输出流中去

访问者模式是什么啊好吧

看看这里有两篇文章有个大致印象吧

访问者模式 http://blog.csdn.net/hfmbook/article/details/7684175

访问者模式 http://www.2cto.com/kf/201402/278957.html

还是从头看起先看 PDdocument的构造器

 /**
     * Constructor, creates a new PDF Document with no pages.  You need to add
     * at least one page for the document to be valid.
     */
    public PDDocument()
    {
        document = new COSDocument();

        //First we need a trailer
        COSDictionary trailer = new COSDictionary();
        document.setTrailer( trailer );

        //Next we need the root dictionary.
        COSDictionary rootDictionary = new COSDictionary();
        trailer.setItem( COSName.ROOT, rootDictionary );
        rootDictionary.setItem( COSName.TYPE, COSName.CATALOG );
        rootDictionary.setItem( COSName.VERSION, COSName.getPDFName( "1.4" ) );

        //next we need the pages tree structure
        COSDictionary pages = new COSDictionary();
        rootDictionary.setItem( COSName.PAGES, pages );
        pages.setItem( COSName.TYPE, COSName.PAGES );
        COSArray kidsArray = new COSArray();
        pages.setItem( COSName.KIDS, kidsArray );
        pages.setItem( COSName.COUNT, COSInteger.ZERO );
    }

在构建一个新的PDDocument的时候底层使用了一个COSDocument进行替代然后在document中有一个全局的字典记录器 trailer

大致示意图画得太丑了完全没有艺术细胞哎

其实上面如果有兴趣你追踪看一下发现 new COSDocument();

 /**
     * Constructor.  Uses memory to store stream.
     */
    public COSDocument()
    {
        this(new RandomAccessBuffer(), false);
    }

 /**
     * Default constructor.
     */
    public RandomAccessBuffer()
    {
        // starting with one chunk
        bufferList = new ArrayList<byte[]>();
        currentBuffer = new byte[BUFFER_SIZE];
        bufferList.add(currentBuffer);
        pointer = 0;
        currentBufferPointer = 0;
        size = 0;
        bufferListIndex = 0;
        bufferListMaxIndex = 0;
    }

会发现 COSDocument 实际上是初始化一个16k的内存堆块

接着看创建了一个PDPage的构造函数

 /**
     * Creates a new instance of PDPage with a size of 8.5x11.
     */
    public PDPage()
    {
        page = new COSDictionary();
        page.setItem( COSName.TYPE, COSName.PAGE );
        setMediaBox( PAGE_SIZE_LETTER );
    }

创建了一个page 页面的矩形大小为 page拥有一个type和media_box属性

 /**
     * A page size of LETTER or 8.5x11.
     */
    public static final PDRectangle PAGE_SIZE_LETTER = 
        new PDRectangle( 8.5f*DEFAULT_USER_SPACE_UNIT_DPI, 11f*DEFAULT_USER_SPACE_UNIT_DPI );

page.setItem( COSName.MEDIA_BOX, mediaBoxValue.getCOSArray() );

接着看 document.addPage(page); 将创建的页面添加到文档中估计就是和上面画的那张不太好看的图挂上钩额

  /**
     * This will add a page to the document.  This is a convenience method, that
     * will add the page to the root of the hierarchy and set the parent of the
     * page to the root.
     *
     * @param page The page to add to the document.
     */
    public void addPage( PDPage page )
    {
        PDPageNode rootPages = getDocumentCatalog().getPages();
        rootPages.getKids().add( page );
        page.setParent( rootPages );
        rootPages.updateCount();
    }

看看第一个方法 getDocumentCatalog().getPages();

这个方法就是返回rootDictionary中所包含的所有页面

 /**
     * This will get the root node for the pages.
     *
     * @return The parent page node.
     */
    public PDPageNode getPages()
    {
        return new PDPageNode( (COSDictionary)root.getDictionaryObject( COSName.PAGES ) );
    }

初始化的时候 rootDictionary中的page的计数是为0的

接着第三行 page.setParent( rootPages ); 将page指向root的page页

再看PDPageContentStream的构造器

/**
     * Create a new PDPage content stream.
     *
     * @param document The document the page is part of.
     * @param sourcePage The page to write the contents to.
     * @throws IOException If there is an error writing to the page contents.
     */
    public PDPageContentStream(PDDocument document, PDPage sourcePage) throws IOException
    {
        this(document, sourcePage, false, true);
    }

 /**
     * Create a new PDPage content stream.
     *
     * @param document The document the page is part of.
     * @param sourcePage The page to write the contents to.
     * @param appendContent Indicates whether content will be overwritten. If false all previous content is deleted.
     * @param compress Tell if the content stream should compress the page contents.
     * @throws IOException If there is an error writing to the page contents.
     */
    public PDPageContentStream(PDDocument document, PDPage sourcePage, boolean appendContent, boolean compress)
            throws IOException
    {
        this(document, sourcePage, appendContent, compress, false);
    }

 /**
     * Create a new PDPage content stream.
     *
     * @param document The document the page is part of.
     * @param sourcePage The page to write the contents to.
     * @param appendContent Indicates whether content will be overwritten. If false all previous content is deleted.
     * @param compress Tell if the content stream should compress the page contents.
     * @param resetContext Tell if the graphic context should be reseted.
     * @throws IOException If there is an error writing to the page contents.
     */
    public PDPageContentStream(PDDocument document, PDPage sourcePage, boolean appendContent, boolean compress,
            boolean resetContext) throws IOException
    {
        // Get the pdstream from the source page instead of creating a new one
        PDStream contents = sourcePage.getContents();
        boolean hasContent = contents != null;

        // If request specifies the need to append to the document
        if (appendContent && hasContent)
        {

            // Create a pdstream to append new content
            PDStream contentsToAppend = new PDStream(document);

            // This will be the resulting COSStreamArray after existing and new streams are merged
            COSStreamArray compoundStream = null;

            // If contents is already an array, a new stream is simply appended to it
            if (contents.getStream() instanceof COSStreamArray)
            {
                compoundStream = (COSStreamArray) contents.getStream();
                compoundStream.appendStream(contentsToAppend.getStream());
            }
            else
            {
                // Creates the COSStreamArray and adds the current stream plus a new one to it
                COSArray newArray = new COSArray();
                newArray.add(contents.getCOSObject());
                newArray.add(contentsToAppend.getCOSObject());
                compoundStream = new COSStreamArray(newArray);
            }

            if (compress)
            {
                List<COSName> filters = new ArrayList<COSName>();
                filters.add(COSName.FLATE_DECODE);
                contentsToAppend.setFilters(filters);
            }

            if (resetContext)
            {
                // create a new stream to encapsulate the existing stream
                PDStream saveGraphics = new PDStream(document);
                output = saveGraphics.createOutputStream();
                // save the initial/unmodified graphics context
                saveGraphicsState();
                close();
                if (compress)
                {
                    List<COSName> filters = new ArrayList<COSName>();
                    filters.add(COSName.FLATE_DECODE);
                    saveGraphics.setFilters(filters);
                }
                // insert the new stream at the beginning
                compoundStream.insertCOSStream(saveGraphics);
            }

            // Sets the compoundStream as page contents
            sourcePage.setContents(new PDStream(compoundStream));
            output = contentsToAppend.createOutputStream();
            if (resetContext)
            {
                // restore the initial/unmodified graphics context
                restoreGraphicsState();
            }
        }
        else
        {
            if (hasContent)
            {
                LOG.warn("You are overwriting an existing content, you should use the append mode");
            }
            contents = new PDStream(document);
            if (compress)
            {
                List<COSName> filters = new ArrayList<COSName>();
                filters.add(COSName.FLATE_DECODE);
                contents.setFilters(filters);
            }
            sourcePage.setContents(contents);
            output = contents.createOutputStream();
        }
        formatDecimal.setMaximumFractionDigits(10);
        formatDecimal.setGroupingUsed(false);
        // this has to be done here, as the resources will be set to null when reseting the content stream
        resources = sourcePage.getResources();
        if (resources == null)
        {
            resources = new PDResources();
            sourcePage.setResources(resources);
        }

    }

其实这个方法这么多对于第一次创建PDPageContentStream的话

contents = new PDStream(document);

用document的记录文件创建 PDStream

output = contents.createOutputStream();

并且将输出流指向PDStream 即指向document的记录文件中

至于 contentStream.beginText();
contentStream.setFont(font, 20);
contentStream.moveTextPositionByAmount(200, 300); 这几个方法都比较简单就是写一些命令移动上面所见的page的位置

看drawString

/**
     * This will draw a string at the current location on the screen.
     *
     * @param text The text to draw.
     * @throws IOException If an io exception occurs.
     */
    public void drawString(String text) throws IOException
    {
        if (!inTextMode)
        {
            throw new IOException("Error: must call beginText() before drawString");
        }
        COSString string = new COSString(text);
        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
        string.writePDF(buffer);
        appendRawCommands(buffer.toByteArray());
        appendRawCommands(SPACE);
        appendRawCommands(SHOW_TEXT);
    }

这里有一个COSString类我估计就是这个类导致与中文的乱码还是看看它的构造器吧

  /**
     * Explicit constructor for ease of manual PDF construction.
     *
     * @param value
     *            The string value of the object.
     */
    public COSString(String value)
    {
        try
        {
            boolean unicode16 = false;
            char[] chars = value.toCharArray();
            int length = chars.length;
            for (int i = 0; i < length; i++)
            {
                if (chars[i] > 255)
                {
                    unicode16 = true;
                    break;
                }
            }
            if (unicode16)
            {
                byte[] data = value.getBytes("UTF-16BE");
                out = new ByteArrayOutputStream(data.length + 2);
                out.write(0xFE);
                out.write(0xFF);
                out.write(data);
            }
            else
            {
                byte[] data = value.getBytes("ISO-8859-1");
                out = new ByteArrayOutputStream(data.length);
                out.write(data);
            }
        }
        catch (IOException ignore)
        {
            LOG.error(ignore,ignore);
            // should never happen
        }
    }

很明显的看到当单个字符的编码小于255的时候使用ISO-8859-1获取到字节码 ISO-8859-1不支持中文啊不乱码才怪恩其实还有很多地方都是用的是ISO-8859-1 所以目前还不清楚到底需要改哪些地方才能正确输出中文貌似对中文的支持确实不太好

好吧接着看其实上面就是向PDPageContentStream的output输出流中写入字节其实就是向document的记录文件中写入字节

接着 contentStream.close(); 一定要 close() 因为

public class PDPageContentStream implements Closeable

实现了Closeable接口

接下来的这部操作就是执行输出操作的地方了也是用到了访问者模式的地方

document.save(filePath);

/**
     * Save the document to a file.
     *
     * @param fileName The file to save as.
     *
     * @throws IOException If there is an error saving the document.
     * @throws COSVisitorException If an error occurs while generating the data.
     */
    public void save( String fileName ) throws IOException, COSVisitorException
    {
        save( new File( fileName ) );
    }

/**
     * Save the document to a file.
     *
     * @param file The file to save as.
     *
     * @throws IOException If there is an error saving the document.
     * @throws COSVisitorException If an error occurs while generating the data.
     */
    public void save( File file ) throws IOException, COSVisitorException
    {
        save( new FileOutputStream( file ) );
    }

 /**
     * This will save the document to an output stream.
     *
     * @param output The stream to write to.
     *
     * @throws IOException If there is an error writing the document.
     * @throws COSVisitorException If an error occurs while generating the data.
     */
    public void save( OutputStream output ) throws IOException, COSVisitorException
    {
        //update the count in case any pages have been added behind the scenes.
        getDocumentCatalog().getPages().updateCount();
        COSWriter writer = null;
        try
        {
            writer = new COSWriter( output );
            writer.write( this );
            writer.close();
        }
        finally
        {
            if( writer != null )
            {
                writer.close();
            }
        }
    }

上面有一句

writer.write( this );

看看它的源码

/**
     * This will write the pdf document.
     *
     * @param doc The document to write.
     *
     * @throws COSVisitorException If an error occurs while generating the data.
     */
    public void write(PDDocument doc) throws COSVisitorException
    {
        Long idTime = doc.getDocumentId() == null ? System.currentTimeMillis() : 
                                                    doc.getDocumentId();
        
        document = doc;
        if(incrementalUpdate)
        {
            prepareIncrement(doc);
        }
        
        // if the document says we should remove encryption, then we shouldn't encrypt
        if(doc.isAllSecurityToBeRemoved())
        {
            this.willEncrypt = false;
            // also need to get rid of the "Encrypt" in the trailer so readers 
            // don't try to decrypt a document which is not encrypted
            COSDocument cosDoc = doc.getDocument();
            COSDictionary trailer = cosDoc.getTrailer();
            trailer.removeItem(COSName.ENCRYPT);
        }
        else
        {
            SecurityHandler securityHandler = document.getSecurityHandler();
            if(securityHandler != null)
            {
                try
                {
                    securityHandler.prepareDocumentForEncryption(document);
                    this.willEncrypt = true;
                }
                catch(IOException e)
                {
                    throw new COSVisitorException( e );
                }
                catch(CryptographyException e)
                {
                    throw new COSVisitorException( e );
                }
            }
            else
            {
                    this.willEncrypt = false;
            }        
        }

        COSDocument cosDoc = document.getDocument();
        COSDictionary trailer = cosDoc.getTrailer();
        COSArray idArray = (COSArray)trailer.getDictionaryObject( COSName.ID );
        if( idArray == null || incrementalUpdate)
        {
            try
            {

                //algorithm says to use time/path/size/values in doc to generate
                //the id.  We don't have path or size, so do the best we can
                MessageDigest md = MessageDigest.getInstance( "MD5" );
                md.update( Long.toString(idTime).getBytes("ISO-8859-1") );
                COSDictionary info = (COSDictionary)trailer.getDictionaryObject( COSName.INFO );
                if( info != null )
                {
                    Iterator<COSBase> values = info.getValues().iterator();
                    while( values.hasNext() )
                    {
                        md.update( values.next().toString().getBytes("ISO-8859-1") );
                    }
                }
                idArray = new COSArray();
                COSString id = new COSString( md.digest() );
                idArray.add( id );
                idArray.add( id );
                trailer.setItem( COSName.ID, idArray );
            }
            catch( NoSuchAlgorithmException e )
            {
                throw new COSVisitorException( e );
            }
            catch( UnsupportedEncodingException e )
            {
                throw new COSVisitorException( e );
            }
        }
        cosDoc.accept(this);
    }

上面那个方法重点在 cosDoc.accept(this); 方法上即COSDocument接受COSWriter对象的访问

那么访问者模式必有得四要素

访问者接口

访问者实现类

目标对象接口

目标对象实现类

在这里

访问者接口即是 ICOSVisitor

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.cos;

import org.apache.pdfbox.exceptions.COSVisitorException;

/**
 * An interface for visiting a PDF document at the type (COS) level.
 *
 * @author Michael Traut
 * @version $Revision: 1.6 $
 */
public interface ICOSVisitor
{
    /**
     * Notification of visit to Array object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromArray( COSArray obj ) throws COSVisitorException;

    /**
     * Notification of visit to boolean object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromBoolean( COSBoolean obj ) throws COSVisitorException;

    /**
     * Notification of visit to dictionary object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromDictionary( COSDictionary obj ) throws COSVisitorException;

    /**
     * Notification of visit to document object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromDocument( COSDocument obj ) throws COSVisitorException;

    /**
     * Notification of visit to float object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromFloat( COSFloat obj ) throws COSVisitorException;

    /**
     * Notification of visit to integer object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromInt( COSInteger obj ) throws COSVisitorException;

    /**
     * Notification of visit to name object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromName( COSName obj ) throws COSVisitorException;

    /**
     * Notification of visit to null object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromNull( COSNull obj ) throws COSVisitorException;

    /**
     * Notification of visit to stream object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromStream( COSStream obj ) throws COSVisitorException;

    /**
     * Notification of visit to string object.
     *
     * @param obj The Object that is being visited.
     * @return any Object depending on the visitor implementation, or null
     * @throws COSVisitorException If there is an error while visiting this object.
     */
    public Object visitFromString( COSString obj ) throws COSVisitorException;
}

可以看到有很多访问的操作方法

访问者实现类又很多这里只列举 COSWriter 的一个实现方法

 /**
     * The visit from document method.
     *
     * @param doc The object that is being visited.
     *
     * @throws COSVisitorException If there is an exception while visiting this object.
     *
     * @return null
     */
    public Object visitFromDocument(COSDocument doc) throws COSVisitorException
    {
        try
        {
            if(!incrementalUpdate)
            {
                doWriteHeader(doc);
            }
            doWriteBody(doc);
            
            // get the previous trailer
            COSDictionary trailer = doc.getTrailer();
            long hybridPrev = -1;

            if (trailer != null)
            {
                hybridPrev = trailer.getLong(COSName.XREF_STM);
            }
            
            if(incrementalUpdate)
            {
                doWriteXRefInc(doc, hybridPrev);
            }
            else
            {
                doWriteXRef(doc);
            }
            
            // the trailer section should only be used for xref tables not for xref streams
            if (!incrementalUpdate || !doc.isXRefStream() || hybridPrev != -1)
            {
                doWriteTrailer(doc);
            }
            
            // write endof
            getStandardOutput().write(STARTXREF);
            getStandardOutput().writeEOL();
            getStandardOutput().write(String.valueOf(getStartxref()).getBytes("ISO-8859-1"));
            getStandardOutput().writeEOL();
            getStandardOutput().write(EOF);
            getStandardOutput().writeEOL();
            
            if(incrementalUpdate)
            {
                doWriteSignature(doc);
            }
            
            return null;
        }
        catch (IOException e)
        {
            throw new COSVisitorException(e);
        }
        catch (SignatureException e)
        {
            throw new COSVisitorException(e);
        }
    }

目标对象接口 COSBase 有一个抽象的可接受访问的方法

 /**
     * visitor pattern double dispatch method.
     *
     * @param visitor The object to notify when visiting this object.
     * @return any object, depending on the visitor implementation, or null
     * @throws COSVisitorException If an error occurs while visiting this object.
     */
    public abstract Object accept(ICOSVisitor visitor) throws COSVisitorException;

目标对象实现类也很多只列举 COSDocument 的

  /**
     * visitor pattern double dispatch method.
     *
     * @param visitor The object to notify when visiting this object.
     * @return any object, depending on the visitor implementation, or null
     * @throws COSVisitorException If an error occurs while visiting this object.
     */
    @Override
    public Object accept(ICOSVisitor visitor) throws COSVisitorException
    {
        return visitor.visitFromDocument( this );
    }

所以在上面的write方法中调用 cosDoc.accept(this); 的时候

实际上调用了上面的accept方法接着调用了 COSWriter的visitFromDocument方法实现最终的文档内容的输出

对于访问者模式额感觉就是对同一类对象不同的访问者实现类可以做不同的事情额好像哎还是不太明白其实主要是没有真正的用过只是学过看过还是理解不深啊记录学习的脚步接着努力学习。。。

posted on 2014-09-08 17:22 liangxinzhi 阅读(1592) 评论(1) 编辑收藏举报

刷新页面返回顶部

liangxinzhi

maven之读写pdf简单实例(pdfbox与itext)与pdfbox源码解析(访问者模式)

导航

公告