python之合并pdf

import os
from pathlib import Path
from PyPDF2 import PdfReader, PdfWriter

def extract_number_from_filename(path):
    # 从文件路径中提取文件名(不包括扩展名),然后再从文件名中提取前缀数字
    filename = Path(path).stem
    return int(filename.split('_')[0])


def merge_pdfs_in_folder(folder, output):
    

    # 获取文件夹中所有的PDF文件,并按照前缀数字进行排序
    paths = sorted([os.path.join(folder, filename) for filename in os.listdir(folder) if filename.endswith('.pdf')],
                   key=extract_number_from_filename)

    
    pdf_writer = PdfWriter()
    for path in paths:
        pdf_reader = PdfReader(path)
        for page in pdf_reader.pages:
            pdf_writer.add_page(page)

    with open(output, 'wb') as out:
        pdf_writer.write(out)

# 文件夹路径
folder = './ray_serve_pdfs/'
# 输出的pdf文件名
output = 'merged.pdf'
merge_pdfs_in_folder(folder, output)

 

posted @ 2023-08-07 00:27  Shiyu_Huang  阅读(122)  评论(0编辑  收藏  举报