【java爬虫--2】将批量图片地址转成pdf文件

在第一步【Htmlunit+Jsoup解析非静态页面爬取图片】获得图片地址并按行保存成文件后,接着又通过下面的方法将图片批量转成pdf文件。

引入第三方工具类:

<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.4.2</version>
</dependency>

代码如下:

import com.itextpdf.text.*;
import com.itextpdf.text.pdf.PdfWriter;
import java.io.*;
import java.net.URLEncoder;
import java.util.*;

/**
 * @program: receiveDemo
 * @description: 将批量图片地址转成pdf文件
 * @author: huang wei
 * @create: 2021-04-13 16:19
 */
public class ImgToPdfUtil {
	public static void main(String[] args) {
		String imgPath = "H:\\upload\\111.txt";
		File directory = new File(imgPath);
		if (directory.isDirectory()) {
			File[] files = directory.listFiles();
			for (File file : files) {
				if (!file.isDirectory()) {
					imgOfPdf(file);
				}
			}
		}else {
			imgOfPdf(directory);
		}
	}

	/**
	 * @throws Exception
	 * @Description: 通过图片路径及生成pdf路径,将图片转成pdf
	 * @Param:
	 * @return:
	 * @author: hw
	 * @date: 2021/4/13 15:36
	 */
	public static void imgOfPdf(File imgFile) {
		try (FileReader reader = new FileReader(imgFile);
			 BufferedReader br = new BufferedReader(reader)) {
			ArrayList<String> imageList = new ArrayList<>(); //图片list集合
			String str;
			while ((str = br.readLine()) != null) {
				imageList.add(str);
			}

			String filePath = imgFile.getAbsolutePath();
			String prefix = filePath.substring(filePath.lastIndexOf(".") + 1);
			String pdfPath = filePath.replace(prefix,"pdf");

			File file = imgToPdf(imageList, pdfPath);
			file.createNewFile();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}

	public static File imgToPdf(ArrayList<String> imageList, String mOutputPdfFileName) {
		Document doc = new Document();
		// 设置文档页边距
		doc.setMargins(0, 0, 0, 0);
		try {
			PdfWriter writer = PdfWriter.getInstance(doc, new FileOutputStream(mOutputPdfFileName)); //pdf写入
			writer.setStrictImageSequence(true);//防止pdf乱序
			doc.open();//打开文档
			for (int i = 0; i < imageList.size(); i++) {  //循环图片List,将图片加入到pdf中
				String imgUrl = URLEncoder.encode(imageList.get(i),"utf-8");//URL中文需要进行转码
				imgUrl = imgUrl.replaceAll("%2F","/").replaceAll("%3A",":");//将特殊符号转码回来
				Image image = Image.getInstance(imgUrl); //通过文件路径获取image
				float height = image.getHeight();
				float width = image.getWidth();
				image.setAlignment(Image.MIDDLE);
				// 设置页面宽高与图片一致
				Rectangle rectangle = new Rectangle(width, height);
				doc.setPageSize(rectangle);
				// 在pdf创建一页
				doc.newPage();
				doc.add(image);
			}
			doc.close();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (DocumentException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}

		File mOutputPdfFile = new File(mOutputPdfFileName);  //输出流
		if (!mOutputPdfFile.exists()) {
			mOutputPdfFile.deleteOnExit();
			return null;
		}
		return mOutputPdfFile; //反回文件输出流
	}
}

image

image

posted @ 2021-04-13 16:03  逐梦寻欢  阅读(344)  评论(0编辑  收藏  举报