python pdf 转图片
1.需要安装requests,PyMuPDF 依赖 pip install requests , PyMuPDF。可以通过定义的缩放因子和旋转因子去缩放图片和旋转。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import datetime
import os
import requests
import fitz # fitz就是 pip install PyMuPDF
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/61.0.3163.100 Safari/537.36",
"Connection": "keep-alive",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Language": "zh-CN,zh;q=0.8"
}
def get_pdf(url: str):
download_pdf = requests.get(url, headers=headers)
with open("demo1.pdf", 'wb') as p:
p.write(download_pdf.content)
def reset_width_height(width: int, height: int, carrier_code: str, rotate: int = 0):
from sacle import scale
carrier = scale.get(carrier_code)
if carrier is not None:
if carrier.get("fixed"):
return carrier.get("zoom"), carrier.get("zoom")
# 包含了此图片的标准大小,等到图片的缩放比
scale_dict = carrier.get("scale")
# 判断是是否需要旋转:
stander_width = scale_dict.get("stander_width")
stander_height = scale_dict.get("stander_height")
# 当是奇数的时候才需要 对换长宽的数据
if rotate % 2 == 1 and width >= height:
width, height = height, width
return stander_width / width, stander_height / height
def decompose_pdf(pdf_path, image_path, rotate: int = 0):
"""
可定义缩放因子,根据不同的承运商去定态缩放
:param rotate: 旋转因子 默认为0,既不需要旋转 值在这三个里面(1,2,3) 1-->90° 2-->180° 3-->270°
:param pdf_path:
:param image_path:
:return:
"""
# 开始时间
start_time = datetime.datetime.now()
# open pdf
pdf_doc = fitz.open(pdf_path)
for pg in range(pdf_doc.page_count):
# current pdf page content
page = pdf_doc[pg]
pixmap = page.get_pixmap()
width, height = pixmap.width, pixmap.height
print(rotate)
zoom_x, zoom_y = reset_width_height(width, height, "EXPRESS", rotate)
print(f"pix原来的的宽度: {width}, 高度: {height}")
print(f"缩放因子zoom_x:{zoom_x}, zoom_y:{zoom_y}")
# 可定义旋转因子
mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate * 90)
pix = page.get_pixmap(matrix=mat, alpha=False)
width, height = pix.width, pix.height
print(f"pix 的宽度: {width}, 高度: {height}")
if not os.path.exists(image_path): # 判断存放图片的文件夹是否存在
os.makedirs(image_path) # 若图片文件夹不存在就创建
img_path = f"{image_path}/images_zoom_{pg}.png"
pix.save(img_path) # 将图片写入指定的文件夹内
end_time = datetime.datetime.now() # 结束时间
print('pdf2img时间=', (end_time - start_time).seconds)
if __name__ == "__main__":
# get_pdf("https://file.minio.com/7d8-92bb-469350c17b1d-1729670297074_0.pdf")
# 1、PDF地址
pdf_path_ = 'demo1.pdf'
# 2、需要储存图片的目录
image_path_ = './imgs'
decompose_pdf(pdf_path_, image_path_)
scale.python
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
scale = {
"UPS": {
"fixed": True,
"zoom": 15
},
"FEDEX": {
"fixed": True,
"zoom": 4
},
"USPS": {
"fixed": True,
"zoom": 4
},
"EXPRESS": {
"fixed": False,
"scale": {
"stander_width": 600,
"stander_height": 1000
}
}
}
``
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现