Java中一些文件操作常用转换
工作中偶尔遇到一些操作文件时,需要转换文件格式的情况,以下为一些常用操作:
1、上传文件时将图片转pdf再上传
@SneakyThrows public static void main(String[] args) { File file = new File("C:\\Users\\Administrator\\Downloads\\1.jpg"); FileInputStream fileInputStream; MultipartFile multipartFile = null; try { fileInputStream = new FileInputStream(file); multipartFile = new MockMultipartFile(file.getName(),file.getName(), ContentType.APPLICATION_OCTET_STREAM.toString(),fileInputStream); } catch (Exception e) { log.error("file转MultipartFile失败", e); } MultipartFile[] multipartFiles = {multipartFile}; byte[] bytes = getPdfBytes(multipartFiles); System.out.println(Base64.getEncoder().encodeToString(bytes)); InputStream inputStream = new ByteArrayInputStream(bytes); MultipartFile mfile = new MockMultipartFile(ContentType.APPLICATION_OCTET_STREAM.toString(), inputStream); //...以下为上传部分 } @SneakyThrows public static byte[] getPdfBytes(MultipartFile[] imagesFiles) { PDDocument document = new PDDocument(); for (MultipartFile datum : imagesFiles) { String filename = datum.getOriginalFilename(); String fileSuffix = filename.substring(filename.lastIndexOf(".") + 1); Iterator readers = ImageIO.getImageReadersByFormatName(fileSuffix); ImageReader reader = (ImageReader) readers.next(); ImageInputStream input = ImageIO.createImageInputStream(datum.getInputStream()); reader.setInput(input, true); int width = reader.getWidth(0); int height = reader.getHeight(0); PDPage pdPage = new PDPage(new PDRectangle(width, height)); document.addPage(pdPage); PDImageXObject pdImageXObject = PDImageXObject.createFromByteArray(document, datum.getBytes(), "图片转pdf失败"); PDPageContentStream contentStream = new PDPageContentStream(document, pdPage); //写入图片 contentStream.drawImage(pdImageXObject, 0, 0); contentStream.close(); } ByteArrayOutputStream output = new ByteArrayOutputStream(); document.save(output); document.close(); return output.toByteArray(); }
2、将pdf转换成jpg
@SneakyThrows public static void main(String[] args) { String filePath = "C:\\Users\\Administrator\\Downloads\\202xxxxx税票.pdf"; // 替换为你的文件路径 byte[] fileContent = Files.readAllBytes(Paths.get(filePath)); String base64 = Base64.getEncoder().encodeToString(fileContent); String jpg_base64 = base64pdftojpg(base64); System.out.println(jpg_base64); }
方法传入base64的pdf文件得到base64类型的jpg
private static String base64pdftojpg(String base64) { String jpg_base64 = null; Base64Decoder decoder = new Base64Decoder(); try { // Base64解码 byte[] pdf_bytes = decoder.decode(base64); PDDocument doc = PDDocument.load(pdf_bytes); int size = doc.getNumberOfPages(); /*图像合并使用的参数*/ //定义宽度 int width = 0; // 保存一张图片中的RGB数据 int[] singleImgRGB; // 定义高度,后面用于叠加 int shiftHeight = 0; //保存每张图片的像素值 BufferedImage imageResult = null; // 利用PdfBox生成图像 PDDocument pdDocument = doc; PDFRenderer renderer = new PDFRenderer(pdDocument); /*根据总页数, 按照50页生成一张长图片的逻辑, 进行拆分*/ // 每50页转成1张图片 int pageLength = size; //有多少转多少 // 总计循环的次数 int totalCount = pdDocument.getNumberOfPages() / pageLength + 1; for (int m = 0; m < totalCount; m++) { for (int i = 0; i < pageLength; i++) { int pageIndex = i + (m * pageLength); if (pageIndex == pdDocument.getNumberOfPages()) { break; } // 96为图片的dpi,dpi越大,则图片越清晰,图片越大,转换耗费的时间也越多 BufferedImage image = renderer.renderImageWithDPI(pageIndex, 106, ImageType.RGB); int imageHeight = image.getHeight(); int imageWidth = image.getWidth(); if (i == 0) { //计算高度和偏移量 //使用第一张图片宽度; width = imageWidth; // 保存每页图片的像素值 // 加个判断:如果m次循环后所剩的图片总数小于pageLength,则图片高度按剩余的张数绘制,否则会出现长图片下面全是黑色的情况 if ((pdDocument.getNumberOfPages() - m * pageLength) < pageLength) { imageResult = new BufferedImage(width, imageHeight * (pdDocument.getNumberOfPages() - m * pageLength), BufferedImage.TYPE_INT_RGB); } else { imageResult = new BufferedImage(width, imageHeight * pageLength, BufferedImage.TYPE_INT_RGB); } } else { // 将高度不断累加 shiftHeight += imageHeight; } singleImgRGB = image.getRGB(0, 0, width, imageHeight, null, 0, width); imageResult.setRGB(0, shiftHeight, width, imageHeight, singleImgRGB, 0, width); } // 这个很重要,下面会有说明 shiftHeight = 0; } pdDocument.close(); ByteArrayOutputStream baos = new ByteArrayOutputStream();//io流 ImageIO.write(imageResult, "jpg", baos);//写入流中 byte[] jpg_Bytes = baos.toByteArray();//转换成字节 BASE64Encoder encoder = new BASE64Encoder(); jpg_base64 = encoder.encodeBuffer(jpg_Bytes).trim();//转换成base64串 jpg_base64 = jpg_base64.replaceAll("\n", "").replaceAll("\r", "");//删除 \r\n baos.close(); doc.close(); } catch (IOException e) { e.printStackTrace(); } return jpg_base64; }
以上方法使用到的Maven依赖如下:
<dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>pdfbox</artifactId> <version>2.0.24</version> </dependency> <dependency> <groupId>org.apache.pdfbox</groupId> <artifactId>fontbox</artifactId> <version>2.0.24</version> </dependency> <dependency> <groupId>org.springframework</groupId> <artifactId>spring-test</artifactId> </dependency>
有个需要注意到的地方,使用pdfbox转换时,抽象类 NativeFontDirFinder 下的 getSearchableDirectories() 方法指定各个开发平台上字体的存放路径
public abstract class NativeFontDirFinder implements FontDirFinder { public NativeFontDirFinder() { } public List<File> find() { List<File> fontDirList = new ArrayList(); String[] searchableDirectories = this.getSearchableDirectories(); if (searchableDirectories != null) { String[] var3 = searchableDirectories; int var4 = searchableDirectories.length; for(int var5 = 0; var5 < var4; ++var5) { String searchableDirectorie = var3[var5]; File fontDir = new File(searchableDirectorie); try { if (fontDir.exists() && fontDir.canRead()) { fontDirList.add(fontDir); } } catch (SecurityException var9) { } } } return fontDirList; } protected abstract String[] getSearchableDirectories(); }
Linux平台的字体存放路径如下,转换时需要在会扫描到字体路径中(随便其中一个)存放有需要的字体,否则会有中文丢失不显示,部分字体乱码等情况
public class UnixFontDirFinder extends NativeFontDirFinder { public UnixFontDirFinder() { } protected String[] getSearchableDirectories() { return new String[]{System.getProperty("user.home") + "/.fonts", "/usr/local/fonts", "/usr/local/share/fonts", "/usr/share/fonts", "/usr/X11R6/lib/X11/fonts", "/usr/share/X11/fonts"}; } }
3、图片文件压缩,可以调整desFileSize的大小到能接受的文件清晰度,这样做的目的可以降低图片文件大小,以base64在接口间传输时,提高处理速率;
/** * 根据指定大小压缩图片 * * @param base64String 源图片base64字符串 * @param desFileSize 指定图片大小,单位kb * @return 压缩质量后的图片base64字符串 */ public static String compressPicForScale(String base64String, long desFileSize) { byte[] imageBytes = Base64.getDecoder().decode(base64String); if (imageBytes == null || imageBytes.length <= 0 || imageBytes.length < desFileSize * 1024) { return base64String; } long srcSize = imageBytes.length; double accuracy = getAccuracy(srcSize / 1024); try { while (imageBytes.length > desFileSize * 1024) { ByteArrayInputStream inputStream = new ByteArrayInputStream(imageBytes); ByteArrayOutputStream outputStream = new ByteArrayOutputStream(imageBytes.length); Thumbnails.of(inputStream) .scale(accuracy) .outputQuality(accuracy) .toOutputStream(outputStream); imageBytes = outputStream.toByteArray(); } log.info("【图片压缩】 | 图片原大小={}kb | 压缩后大小={}kb", srcSize / 1024, imageBytes.length / 1024); } catch (Exception e) { log.error("【图片压缩】msg=图片压缩失败!", e); } String base64 = Base64.getEncoder().encodeToString(imageBytes); return base64; } /** * 自动调节精度(经验数值) * * @param size 源图片大小 * @return 图片压缩质量比 */ private static double getAccuracy(long size) { double accuracy; if (size < 900) { accuracy = 0.85; } else if (size < 2047) { accuracy = 0.6; } else if (size < 3275) { accuracy = 0.44; } else { accuracy = 0.4; } return accuracy; }
NativeFontDirFinder