java实现PDF转HTML
问题场景:
在使用PB嵌入HTML页面时发现调不起查看PDF的插件
解决方法:
将PDF转换为HTML来展示
解决步骤:
1.下载PDF转换工具.exe
下载地址:http://pan.baidu.com/s/1eSHq3JG
2.创建工具类
package org.common.util.pdftohtml; import org.common.util.pdftohtml.StreamGobbler; /** * @author liuzhengyong * @version 1.0 时间:2013-12-30 下午2:24:10 pdf文件转html工具类 */ public class Pdf2htmlEXUtil { /** * 调用pdf2htmlEX将pdf文件转换为html文件 * * @param exeFilePath * pdf2htmlEX.exe文件路径 * @param pdfFile * pdf文件绝对路径 * @param [destDir] 生成的html文件存放路径 * @param htmlName * 生成的html文件名称 * @return */ public static boolean pdf2html(String exeFilePath, String pdfFile, String destDir, String htmlFileName) { if (!(exeFilePath != null && !"".equals(exeFilePath) && pdfFile != null && !"".equals(pdfFile) && htmlFileName != null && !"" .equals(htmlFileName))) { System.out.println("传递的参数有误!"); return false; } Runtime rt = Runtime.getRuntime(); StringBuilder command = new StringBuilder(); command.append(exeFilePath).append(" "); if (destDir != null && !"".equals(destDir.trim()))// 生成文件存放位置,需要替换文件路径中的空格 command.append("--dest-dir ").append(destDir.replace(" ", "\" \"")) .append(" "); command.append("--optimize-text 1 ");// 尽量减少用于文本的HTML元素的数目 (default: 0) command.append("--zoom 1.4 "); command.append("--process-outline 0 ");// html中显示链接:0——false,1——true command.append("--font-format woff ");// 嵌入html中的字体后缀(default ttf) // ttf,otf,woff,svg command.append(pdfFile.replace(" ", "\" \"")).append(" ");// 需要替换文件路径中的空格 if (htmlFileName != null && !"".equals(htmlFileName.trim())) { command.append(htmlFileName); if (htmlFileName.indexOf(".html") == -1) command.append(".html"); } try { System.out.println("Command:" + command.toString()); Process p = rt.exec(command.toString()); StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), "ERROR"); // 开启屏幕标准错误流 errorGobbler.start(); StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(), "STDOUT"); // 开启屏幕标准输出流 outGobbler.start(); int w = p.waitFor(); int v = p.exitValue(); if (w == 0 && v == 0) { return true; } } catch (Exception e) { e.printStackTrace(); } return false; } public static boolean pdf2html_linux(String pdfFile, String destDir, String htmlFileName) { if (!(pdfFile != null && !"".equals(pdfFile) && htmlFileName != null && !"" .equals(htmlFileName))) { System.out.println("传递的参数有误!"); return false; } Runtime rt = Runtime.getRuntime(); StringBuilder command = new StringBuilder(); command.append("pdf2htmlEX").append(" "); if (destDir != null && !"".equals(destDir.trim()))// 生成文件存放位置,需要替换文件路径中的空格 command.append("--dest-dir ").append(destDir.replace(" ", "\" \"")) .append(" "); command.append("--optimize-text 1 ");// 尽量减少用于文本的HTML元素的数目 (default: 0) command.append("--process-outline 0 ");// html中显示链接:0——false,1——true command.append("--font-format woff ");// 嵌入html中的字体后缀(default ttf) // ttf,otf,woff,svg command.append(pdfFile.replace(" ", "\" \"")).append(" ");// 需要替换文件路径中的空格 if (htmlFileName != null && !"".equals(htmlFileName.trim())) { command.append(htmlFileName); if (htmlFileName.indexOf(".html") == -1) command.append(".html"); } try { System.out.println("Command:" + command.toString()); Process p = rt.exec(command.toString()); StreamGobbler errorGobbler = new StreamGobbler(p.getErrorStream(), "ERROR"); // 开启屏幕标准错误流 errorGobbler.start(); StreamGobbler outGobbler = new StreamGobbler(p.getInputStream(), "STDOUT"); // 开启屏幕标准输出流 outGobbler.start(); int w = p.waitFor(); int v = p.exitValue(); if (w == 0 && v == 0) { return true; } } catch (Exception e) { e.printStackTrace(); } return false; } public static void main(String[] args) { pdf2html("D:\\pdf2htmlEX-v1.0\\pdf2htmlEX.exe","D:\\pdf2htmlEX-v1.0\\PDF\\my.pdf","D:\\pdf2htmlEX-v1.0\\HTML","my.html"); } }
package org.common.util.pdftohtml; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.PrintWriter; /** * 用于处理Runtime.getRuntime().exec产生的错误流及输出流 * * @author shaojing * */ public class StreamGobbler extends Thread { InputStream is; String type; OutputStream os; public StreamGobbler(InputStream is, String type) { this(is, type, null); } StreamGobbler(InputStream is, String type, OutputStream redirect) { this.is = is; this.type = type; this.os = redirect; } public void run() { InputStreamReader isr = null; BufferedReader br = null; PrintWriter pw = null; try { if (os != null) pw = new PrintWriter(os); isr = new InputStreamReader(is); br = new BufferedReader(isr); String line = null; while ((line = br.readLine()) != null) { if (pw != null) pw.println(line); System.out.println(type + ">" + line); } if (pw != null) pw.flush(); } catch (IOException ioe) { ioe.printStackTrace(); } finally { try { if (pw != null) pw.close(); if (br != null) br.close(); if (isr != null) isr.close(); } catch (IOException e) { e.printStackTrace(); } } } }
3.运行工具类中的main方法。
注:运行上述步骤,问题解决,成功生成html文件
(注意pdf2htmlEX.exe文件不要单独copy出来用,需要和pdf2htmlEX-v1.0文件夹里面的东西放在一起使用,不然会报错:Error: Cannot open the manifest file)