python pdf 转图片

1.需要安装requests,PyMuPDF 依赖 pip install requests , PyMuPDF。可以通过定义的缩放因子和旋转因子去缩放图片和旋转。

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import datetime
import os
import requests

import fitz  # fitz就是 pip install PyMuPDF

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/61.0.3163.100 Safari/537.36",
    "Connection": "keep-alive",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
    "Accept-Language": "zh-CN,zh;q=0.8"
}


def get_pdf(url: str):
    download_pdf = requests.get(url, headers=headers)
    with open("demo1.pdf", 'wb') as p:
        p.write(download_pdf.content)


def reset_width_height(width: int, height: int, carrier_code: str, rotate: int = 0):
    from sacle import scale
    carrier = scale.get(carrier_code)
    if carrier is not None:
        if carrier.get("fixed"):
            return carrier.get("zoom"), carrier.get("zoom")
        # 包含了此图片的标准大小,等到图片的缩放比
        scale_dict = carrier.get("scale")
        # 判断是是否需要旋转:
        stander_width = scale_dict.get("stander_width")
        stander_height = scale_dict.get("stander_height")
        # 当是奇数的时候才需要 对换长宽的数据
        if rotate % 2 == 1 and width >= height:
            width, height = height, width
        return stander_width / width, stander_height / height


def decompose_pdf(pdf_path, image_path, rotate: int = 0):
    """
    可定义缩放因子,根据不同的承运商去定态缩放
    :param rotate: 旋转因子 默认为0,既不需要旋转 值在这三个里面(1,2,3) 1-->90°  2-->180°  3-->270°
    :param pdf_path:
    :param image_path:
    :return:
    """
    # 开始时间
    start_time = datetime.datetime.now()
    # open pdf
    pdf_doc = fitz.open(pdf_path)
    for pg in range(pdf_doc.page_count):
        # current pdf page content
        page = pdf_doc[pg]
        pixmap = page.get_pixmap()
        width, height = pixmap.width, pixmap.height
        print(rotate)
        zoom_x, zoom_y = reset_width_height(width, height, "EXPRESS", rotate)
        print(f"pix原来的的宽度: {width}, 高度: {height}")
        print(f"缩放因子zoom_x:{zoom_x}, zoom_y:{zoom_y}")
        # 可定义旋转因子
        mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate * 90)
        pix = page.get_pixmap(matrix=mat, alpha=False)
        width, height = pix.width, pix.height
        print(f"pix 的宽度: {width}, 高度: {height}")
        if not os.path.exists(image_path):  # 判断存放图片的文件夹是否存在
            os.makedirs(image_path)  # 若图片文件夹不存在就创建
        img_path = f"{image_path}/images_zoom_{pg}.png"
        pix.save(img_path)  # 将图片写入指定的文件夹内
    end_time = datetime.datetime.now()  # 结束时间
    print('pdf2img时间=', (end_time - start_time).seconds)


if __name__ == "__main__":
    # get_pdf("https://file.minio.com/7d8-92bb-469350c17b1d-1729670297074_0.pdf")
    # 1、PDF地址
    pdf_path_ = 'demo1.pdf'
    # 2、需要储存图片的目录
    image_path_ = './imgs'
    decompose_pdf(pdf_path_, image_path_)

scale.python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

scale = {
    "UPS": {
        "fixed": True,
        "zoom": 15
    },
    "FEDEX": {
        "fixed": True,
        "zoom": 4
    },
    "USPS": {
        "fixed": True,
        "zoom": 4
    },
    "EXPRESS": {
        "fixed": False,
        "scale": {
            "stander_width": 600,
            "stander_height": 1000
        }
    }
}

``
posted @   lyu6  阅读(18)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
点击右上角即可分享
微信分享提示