使用python切割pdf文件
应用场景,需要使用python切割指定分页数据的pdf文件
首先使用pip安装以下包
pip install PyPDF2
pip install flask
实现代码如下:
import sys
from PyPDF2 import PdfReader, PdfWriter
def split_pdf(input_path, output_path, start_page, end_page):
with open(input_path, 'rb') as input_file:
pdf = PdfReader(input_file)
total_pages = len(pdf.pages)
# 处理起始页和结束页超出范围的情况
if start_page < 0:
start_page = 0
if end_page >= total_pages:
end_page = total_pages - 1
# 创建一个新的 PDF writer 对象,并拷贝指定页范围的页面
output_pdf = PdfWriter()
for page_num in range(start_page, end_page + 1):
output_pdf.add_page(pdf.pages[page_num])
# 将切割后的 PDF 页面保存到输出文件
with open(output_path, 'wb') as output_file:
output_pdf.write(output_file)
# 通过命令行参数获取传递的参数
inputPath = sys.argv[1]
outputPath = sys.argv[2]
startPage = int(sys.argv[3])
endPage = int(sys.argv[4])
split_pdf(inputPath, outputPath, startPage, endPage)
也可以封装成一个flask接口:
import logging
from flask import Flask, request, jsonify
from PyPDF2 import PdfReader, PdfWriter
#pip install PyPDF2
#pip install flask
#pip install waitress
app = Flask(__name__)
# 配置日志记录器
logging.basicConfig(filename='app.log', level=logging.INFO)
@app.route('/api/split_pdf', methods=['POST'])
def split_pdf():
# 记录请求参数
logging.info(f"Request Parameters: {request.form}")
# 通过 POST 请求获取传递的参数
input_path = request.form.get('input_path')
output_path = request.form.get('output_path')
start_page = int(request.form.get('start_page'))
end_page = int(request.form.get('end_page'))
try:
with open(input_path, 'rb') as input_file:
pdf = PdfReader(input_file)
total_pages = len(pdf.pages)
# 处理起始页和结束页超出范围的情况
if start_page < 0:
start_page = 0
if end_page >= total_pages:
end_page = total_pages - 1
# 创建一个新的 PDF writer 对象,并拷贝指定页范围的页面
output_pdf = PdfWriter()
for page_num in range(start_page, end_page + 1):
output_pdf.add_page(pdf.pages[page_num])
# 将切割后的 PDF 页面保存到输出文件
with open(output_path, 'wb') as output_file:
output_pdf.write(output_file)
logging.info(f"Split PDF: {input_path} -> {output_path}")
# 返回成功的响应
response = {
'status': 'success',
'message': 'PDF 文件切割成功。',
'output_path': output_path
}
except Exception as e:
# 返回错误的响应
response = {
'status': 'error',
'message': str(e)
}
logging.error(f"Split PDF error: {str(e)}")
return jsonify(response)
if __name__ == '__main__':
app.run(debug=True)