java编程将HTML文件转换成PDF文件(转)

网上有好几种方法可以将将HTML文件转换成PDF文件但是有些对HTML文件格式要求比较严格，稍微错了一些就不能生成我们所要的PDF文件，这里我推荐一个

PD4ML，它可以解决HTML文件格式不正确的问题，可以生成一个比较好的PDF文件，其处理速度快，而且对CSS文件兼容的非常好。下面是最基本的

PD4ML编程：

package samples;

import java.awt.Insets;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.security.InvalidParameterException;

import org.zefer.pd4ml.PD4Constants;
import org.zefer.pd4ml.PD4ML;

public class GettingStarted1 {
    protected int topValue = 10;
    protected int leftValue = 20;
    protected int rightValue = 10;
    protected int bottomValue = 10;
    protected int userSpaceWidth = 1300;

    public static void main(String[] args) {
        try {
            GettingStarted1 jt = new GettingStarted1();
            jt.doConversion("http://pd4ml.com/sample.htm", "c:/pd4ml.pdf");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void doConversion( String url, String outputPath ) 
                throws InvalidParameterException, MalformedURLException, IOException {
        File output = new File(outputPath);
        java.io.FileOutputStream fos = new java.io.FileOutputStream(output);

        PD4ML pd4ml = new PD4ML();
            
        pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser" 
            
        // choose target paper format and "rotate" it to landscape orientation
        pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4)); 
            
        // define PDF page margins
        pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue)); 

        // source HTML document also may have margins, could be suppressed this way 
        // (PD4ML *Pro* feature):
        pd4ml.addStyle("BODY {margin: 0}", true);
            
        // If built-in basic PDF fonts are not sufficient or 
        // if you need to output non-Latin texts,
        // TTF embedding feature should help (PD4ML *Pro*)
        pd4ml.useTTF("c:/windows/fonts", true);

        pd4ml.render(new URL(url), fos); // actual document conversion from URL to file
        fos.close();
            
        System.out.println( outputPath + "\ndone." );
    }
}

The following Java class slightly changes the above example. Now it pre-reads source HTML to a string and passes it to render()method wrapped to StringReader. First it writes PDF bytes to ByteArrayOutputStream, which makes possible to measure size of the resulting document.

A disadvantage of the method is a bigger RAM utilization.

package samples;

import java.awt.Insets;;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.security.InvalidParameterException;

import org.zefer.pd4ml.PD4Constants;
import org.zefer.pd4ml.PD4ML;

public class GettingStarted2 {
    protected int topValue = 10;
    protected int leftValue = 20;
    protected int rightValue = 10;
    protected int bottomValue = 10;
    protected int userSpaceWidth = 1300;

    public static void main(String[] args) {
        try {
            GettingStarted2 jt = new GettingStarted2();
            String html = readFile("c:/sample.htm", "UTF-8");
            jt.doConversion2(html, "c:/pd4ml.pdf");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void doConversion2( String htmlDocument, String outputPath ) 
                throws InvalidParameterException, MalformedURLException, IOException {

        PD4ML pd4ml = new PD4ML();
            
        pd4ml.setHtmlWidth(userSpaceWidth); // set frame width of "virtual web browser" 
            
        // choose target paper format
        pd4ml.setPageSize(pd4ml.changePageOrientation(PD4Constants.A4)); 
            
        // define PDF page margins
        pd4ml.setPageInsetsMM(new Insets(topValue, leftValue, bottomValue, rightValue)); 

        // source HTML document also may have margins, could be suppressed this way 
        // (PD4ML *Pro* feature):
        pd4ml.addStyle("BODY {margin: 0}", true);
            
        // If built-in basic PDF fonts are not sufficient or 
        // if you need to output non-Latin texts, TTF embedding feature should help 
        // (PD4ML *Pro*)
        pd4ml.useTTF("c:/windows/fonts", true);

        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        // actual document conversion from HTML string to byte array
        pd4ml.render(new StringReader(htmlDocument), baos); 
        // if the HTML has relative references to images etc, 
        // use render() method with baseDirectory parameter instead
        baos.close();
        
        System.out.println( "resulting PDF size: " + baos.size() + " bytes" );
        // in Web scenarios it is a good idea to send the size with 
        // "Content-length" HTTP header

        File output = new File(outputPath);
        java.io.FileOutputStream fos = new java.io.FileOutputStream(output);
        fos.write( baos.toByteArray() );
        fos.close();
        
        System.out.println( outputPath + "\ndone." );
    }
    
    private final static String readFile( String path, String encoding ) throws IOException {

        File f = new File( path );
        FileInputStream is = new FileInputStream(f);
        BufferedInputStream bis = new BufferedInputStream(is);
        
        ByteArrayOutputStream fos = new ByteArrayOutputStream();
        byte buffer[] = new byte[2048];

        int read;
        do {
            read = is.read(buffer, 0, buffer.length);
            if (read > 0) { 
                fos.write(buffer, 0, read); 
            }
        } while (read > -1);

        fos.close();
        bis.close();
        is.close();

        return fos.toString(encoding);
    }
}

posted @ 2013-05-03 13:51 licomeback 阅读(2960) 评论(0) 收藏举报

刷新页面返回顶部

licomeback

java编程将HTML文件转换成PDF文件(转)

PD4ML，它可以解决HTML文件格式不正确的问题，可以生成一个比较好的PDF文件，其处理速度快，而且对CSS文件兼容的非常好。下面是最基本的

PD4ML编程：

公告