利用jpedal进行pdf转换成jpeg,jpg,png,tiff,tif等格式的图片
项目中运用到pdf文件转换成image图片,开始时使用pdfbox开源库进行图片转换,但是转换出来的文件中含有部分乱码的情况.下面是pdfBox 的pdf转换图片的代码示例.
try{ String password = null; int startPage = 1; String imageType = "jpg"; File imageFile = new File("E:\\upload\\pdf\\20140424\\Servlet."+ imageType); File pdfFile = new File("E:\\upload\\pdf\\20140424\\Servlet.pdf"); PDDocument document = PDDocument.load(pdfFile); endPage = document.getPageCount(); PDFImageWriter imageWriter = new PDFImageWriter(); imageWriter.writeImage(document,imageType,password,startPage, endPage,imageFile.getAbsolutePath()); document.close(); }catch(IOException e){ e.printStackTrace(); }
比较了其他的开源库之后,准备采用jpedal。但是jpedal的治疗非常少,除了官方网站外,即使是英文资料也很少。而且官方提供的代码示例中的一些方法在的lgpl授权的
jpeal的代码库中不存在。下面是收集到的一些资料
1、jpedal文档:http://javadoc.idrsolutions.com/org/jpedal/PdfDecoder.html
2、简单调用示例:http://www.idrsolutions.com/java-pdf-code-faq/#pdf2img
3、lgpl授权的jpedal库的下载地址:http://sourceforge.net/projects/jpedal/
4、转换示例示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToImages.java.html
5、高清图片转换示例地址:http://files.idrsolutions.com/samplecode/org/jpedal/examples/images/ConvertPagesToHiResImages.java.html
于是稍微修改了官方的转换示例,下面是经过测试可以使用的转换代码
import cn.com.pujiConvert.util.Common; import com.sun.imageio.plugins.jpeg.JPEGImageWriter; import org.jpedal.*; import org.jpedal.color.ColorSpaces; import org.jpedal.constants.PageInfo; import org.jpedal.exception.PdfException; import org.jpedal.external.Options; import org.jpedal.fonts.FontMappings; import org.jpedal.objects.PdfFileInformation; import org.jpedal.utils.LogWriter; import org.w3c.dom.Element; import javax.imageio.IIOImage; import javax.imageio.ImageIO; import javax.imageio.ImageTypeSpecifier; import javax.imageio.metadata.IIOMetadata; import javax.imageio.plugins.jpeg.JPEGImageWriteParam; import javax.imageio.stream.ImageOutputStream; import java.awt.*; import java.awt.image.BufferedImage; import java.io.*; import java.util.Iterator; public class ConvertPagesToImages{ /** * show if image transparent */ boolean isTransparent=false; /**output where we put files */ private String user_dir = System.getProperty("user.dir"); /**use 96 dpi as default so pages correct size (72 will be smaller) */ private float pageScaling =1.33f; /**flag to show if we print messages */ public static boolean outputMessages = false; String output_dir=null; /**correct separator for OS */ String separator = System.getProperty("file.separator"); /**the decoder object which decodes the pdf and returns a data object */ PdfDecoder decode_pdf = null; //type of image to save thumbnails private String format = "png"; /** holding all creators that produce OCR pdf's ocr*/ private String[] ocr = {"TeleForm"}; /**scaling to use - default is 100 percent */ private int scaling=100; /**file password or null */ private String password=null; //only used if between 0 and 1 private float JPEGcompression=-1f; private int pageCount = 0; public ConvertPagesToImages() { } public void init(String file_name, int scaling, String format, String output_dir, String password, int pageCount){ /*缩小比率*/ this.scaling = scaling; /*图片格式*/ this.format = format; /*输出目录*/ this.output_dir = output_dir; /*pdf密码*/ this.password = password; /*输出图片数*/ this.pageCount = pageCount; /*判断文件是否存在*/ File pdf_file = new File(file_name); if (!pdf_file.exists()) { System.out.println("File " + pdf_file + " not found"); System.out.println("May need full path"); return; } extraction(file_name, output_dir); } private void extraction(String file_name, String output_dir) { this.output_dir=output_dir; if (!user_dir.endsWith(separator)){ user_dir = user_dir + separator; } if (file_name.toLowerCase().endsWith(".pdf")) { if(output_dir==null){ output_dir=user_dir + "thumbnails" + separator; } decodeFile(file_name,output_dir); } else { String[] files = null; File inputFiles; if (!file_name.endsWith(separator)){ file_name = file_name + separator; } try { inputFiles = new File(file_name); if (!inputFiles.isDirectory()) { System.err.println(file_name + " is not a directory. Exiting program"); }else{ files = inputFiles.list(); } } catch (Exception ee) { LogWriter.writeLog("Exception trying to access file " + ee.getMessage()); } if(files!=null){ for (String file : files) { if (file.toLowerCase().endsWith(".pdf")) { if (outputMessages){ System.out.println(file_name + file); } decodeFile(file_name + file, output_dir); } } } } if(outputMessages){ System.out.println("Thumbnails created"); } } /** * routine to decode a file */ private void decodeFile(String file_name,String output_dir) { String name = "demo"; //set a default just in case int pointer = file_name.lastIndexOf(separator); if(pointer==-1){ pointer = file_name.lastIndexOf('/'); } if (pointer != -1){ name = file_name.substring(pointer + 1, file_name.length() - 4); }else if((file_name.toLowerCase().endsWith(".pdf"))){ name=file_name.substring(0,file_name.length()-4); } //fix for odd files on Linux created when you view pages if(name.startsWith(".")){ return; } //create output dir for images if(output_dir==null){ output_dir = user_dir + "thumbnails" + separator ; } //PdfDecoder returns a PdfException if there is a problem try { if(decode_pdf==null){ decode_pdf = new PdfDecoder(true); } /**optional JAI code for faster rendering*/ org.jpedal.external.ImageHandler myExampleImageHandler=new org.jpedal.examples.handlers.ExampleImageDrawOnScreenHandler(); decode_pdf.addExternalHandler(myExampleImageHandler, Options.ImageHandler); //mappings for non-embedded fonts to use FontMappings.setFontReplacements(); //true as we are rendering page decode_pdf.setExtractionMode(0, pageScaling); //don't bother to extract text and images /** * open the file (and read metadata including pages in file) */ if (outputMessages){ System.out.println("Opening file :" + file_name); } if(password != null && password != ""){ decode_pdf.openPdfFile(file_name,password); }else{ decode_pdf.openPdfFile(file_name); } } catch (Exception e) { System.err.println("8.Exception " + e + " in pdf code in "+file_name); } /** * extract data from pdf (if allowed). */ if(decode_pdf.isEncrypted() && !decode_pdf.isFileViewable()){ throw new RuntimeException("Wrong password password used=>"+password+ '<'); }else if ((decode_pdf.isEncrypted()&&(!decode_pdf.isPasswordSupplied())) && (!decode_pdf.isExtractionAllowed())) { throw new RuntimeException("Extraction not allowed"); } else { extractPageAsImage(file_name, output_dir, name, isTransparent); } /**close the pdf file */ decode_pdf.closePdfFile(); } private void extractPageAsImage(String file_name, String output_dir, String name, boolean isTransparent) { //create a directory if it doesn't exist File output_path = new File(output_dir); if (!output_path.exists()){ output_path.mkdirs(); } boolean isSingleOutputFile=false; boolean compressTiffs = false; String rawJPEGComp = null; String jpgFlag = "96"; //page range int start = 1, end = decode_pdf.getPageCount(); end = (pageCount == 0) ? end : pageCount; if (outputMessages){ System.out.println("Thumbnails will be in " + output_dir); } try { BufferedImage[] multiPages = new BufferedImage[1 + (end - start)]; for (int page = start; page < end + 1; page++){ getPage(output_dir, name, isTransparent, isSingleOutputFile,rawJPEGComp, jpgFlag, compressTiffs, start, end,multiPages, page); } } catch (Exception e) { decode_pdf.closePdfFile(); throw new RuntimeException("Exception " + e.getMessage()+" with thumbnails on File="+file_name); } } private void getPage( String output_dir, String name, boolean isTransparent, boolean isSingleOutputFile, String rawJPEGComp, String jpgFlag, boolean compressTiffs, int start, int end, BufferedImage[] multiPages, int page ) throws PdfException, IOException, FileNotFoundException { if (outputMessages ){ System.out.println("Page " + page); } /** * 补0操作 */ String pageAsString = String.valueOf(page); String maxPageSize = String.valueOf(end); int padding = maxPageSize.length()-pageAsString.length(); for(int ii = 0; ii < padding; ii++){ pageAsString = '0' + pageAsString; } String image_name; if(isSingleOutputFile){ image_name =name; }else{ image_name =name+"_page_" + pageAsString; } /** * get PRODUCER and if OCR disable text printing */ PdfFileInformation currentFileInformation = decode_pdf.getFileInformationData(); String[] values=currentFileInformation.getFieldValues(); String[] fields=PdfFileInformation.getFieldNames(); for(int i=0;i<fields.length;i++){ if(fields[i].equals("Creator")){ for (String anOcr : ocr) { if (values[i].equals(anOcr)) { decode_pdf.setRenderMode(PdfDecoder.RENDERIMAGES); } } } } BufferedImage image_to_save; if(!isTransparent){ image_to_save=decode_pdf.getPageAsImage(page); }else{ //use this if you want a transparent image image_to_save =decode_pdf.getPageAsTransparentImage(page); //java adds odd tint if you save this as JPEG which does not have transparency // so put as RGB on white background // (or save as PNG or TIFF which has transparency) // or just call decode_pdf.getPageAsImage(page) if(image_to_save!=null && format.toLowerCase().startsWith("jp")){ BufferedImage rawVersion=image_to_save; int w=rawVersion.getWidth(), h=rawVersion.getHeight(); //blank canvas image_to_save = new BufferedImage(w,h , BufferedImage.TYPE_INT_RGB); // Graphics2D g2 = image_to_save.createGraphics(); //white background g2.setPaint(Color.WHITE); g2.fillRect(0,0,w,h); //paint on image g2.drawImage(rawVersion, 0, 0,null); } } /*if just gray we can reduce memory usage by converting image to Grayscale @SuppressWarnings("rawtypes") Iterator colorspacesUsed = decode_pdf.getPageInfo(PageInfo.COLORSPACES); int nextID; boolean isGrayOnly=colorspacesUsed!=null; //assume true and disprove while(colorspacesUsed!=null && colorspacesUsed.hasNext()){ nextID= (Integer) (colorspacesUsed.next()); if(nextID!= ColorSpaces.DeviceGray && nextID!=ColorSpaces.CalGray){ isGrayOnly=false; } } //draw onto GRAY image to reduce colour depth if(isGrayOnly){ BufferedImage image_to_save2=new BufferedImage(image_to_save.getWidth(),image_to_save.getHeight(), BufferedImage.TYPE_BYTE_GRAY); image_to_save2.getGraphics().drawImage(image_to_save,0,0,null); image_to_save = image_to_save2; } //put image in array if multi-images if(isSingleOutputFile){ multiPages[page-start] = image_to_save; } if (image_to_save != null) { /**BufferedImage does not support any dpi concept. A higher dpi can be created * using JAI to convert to a higher dpi image*/ //shrink the page to 50% with graphics2D transformation //- add your own parameters as needed //you may want to replace null with a hints object if you //want to fine tune quality. /** example 1 biliniear scaling AffineTransform scale = new AffineTransform(); scale.scale(.5, .5); //50% as a decimal AffineTransformOp scalingOp =new AffineTransformOp(scale, null); image_to_save =scalingOp.filter(image_to_save, null); */ /** example 2 bicubic scaling - better quality but slower to preserve aspect ratio set newWidth or newHeight to -1*/ /**allow user to specify maximum dimension for thumbnail*/ int maxDimension = -1; if(scaling!=100 || maxDimension != -1){ int newWidth=image_to_save.getWidth()*scaling/100; int newHeight=image_to_save.getHeight()*scaling/100; Image scaledImage; if(maxDimension != -1 && (newWidth > maxDimension || newHeight > maxDimension)){ if(newWidth > newHeight){ newWidth = maxDimension; scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH); } else { newHeight = maxDimension; scaledImage= image_to_save.getScaledInstance(-1,newHeight,BufferedImage.SCALE_SMOOTH); } } else { scaledImage= image_to_save.getScaledInstance(newWidth,-1,BufferedImage.SCALE_SMOOTH); } if(format.toLowerCase().startsWith("jp")){ image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_RGB); }else{ image_to_save = new BufferedImage(scaledImage.getWidth(null),scaledImage.getHeight(null) , BufferedImage.TYPE_INT_ARGB); } Graphics2D g2 = image_to_save.createGraphics(); g2.drawImage(scaledImage, 0, 0,null); } if (format.startsWith("jp")) { saveAsJPEG(jpgFlag, image_to_save, JPEGcompression, new BufferedOutputStream(new FileOutputStream(output_dir + pageAsString + image_name + '.' + format))); } else { //save image decode_pdf.getObjectStore().saveStoredImage( output_dir + pageAsString + image_name, image_to_save, true, false, format); } } //flush images in case we do more than 1 page so only contains //images from current page decode_pdf.flushObjectValues(true); } private static void saveAsJPEG(String jpgFlag,BufferedImage image_to_save, float JPEGcompression, BufferedOutputStream fos) throws IOException { JPEGImageWriter imageWriter = (JPEGImageWriter) ImageIO.getImageWritersBySuffix("jpeg").next(); ImageOutputStream ios = ImageIO.createImageOutputStream(fos); imageWriter.setOutput(ios); IIOMetadata imageMetaData = imageWriter.getDefaultImageMetadata(new ImageTypeSpecifier(image_to_save), null); if (Common.isInteger(jpgFlag)){ int dpi = 96; try { dpi = Integer.parseInt(jpgFlag); } catch (Exception e) { e.printStackTrace(); } Element tree = (Element) imageMetaData.getAsTree("javax_imageio_jpeg_image_1.0"); Element jfif = (Element)tree.getElementsByTagName("app0JFIF").item(0); jfif.setAttribute("Xdensity", Integer.toString(dpi)); jfif.setAttribute("Ydensity", Integer.toString(dpi)); } JPEGImageWriteParam jpegParams = (JPEGImageWriteParam) imageWriter.getDefaultWriteParam(); if(JPEGcompression>=0 && JPEGcompression<=1f){ jpegParams.setCompressionMode(JPEGImageWriteParam.MODE_EXPLICIT); jpegParams.setCompressionQuality(JPEGcompression); } imageWriter.write(imageMetaData, new IIOImage(image_to_save, null, null), jpegParams); ios.close(); imageWriter.dispose(); } public static void main(String[] args) { long start=System.currentTimeMillis(); String pdfPath = "E:\\upload\\pdf\\20140424\\Servlet.pdf"; int scaling = -1; String format = "jpg"; String output_dir = "E:\\upload\\pdf\\20140424\\jpg\\"; String password = null; int pageCount = 10; ConvertPagesToImages convertPagesToImages = new ConvertPagesToImages(); convertPagesToImages.init(pdfPath, scaling, format, output_dir, password, pageCount); System.out.println("花费时间为="+(System.currentTimeMillis()-start)/1000 + "秒"); } }
功能说明:
1、支持对文件夹下的所有pdf转换成图片,同时也支持对单个pdf进行转换操作。
2、支持转换成jpg,jpeg,tiff,tif,png格式的图片
3、支持指定转换的图片数。
4、支持指定图片的存储位置
传入参数说明
1、pdfPath pdf文件绝对路径,可以是pdf所在的目录也可以是pdf文件路径
2、format 图片格式 (支持jpg,jpeg,tiff,png) ,传参时不能带有点号
3、scaling 图片比率从1到100(100 = 全尺寸) 支持设置为-1 将保持高质量
4、output_dir 输出路径,输出路径为绝对路径
5、password 文件密码 若没有传入null值