alex_bn_lee

导航

[1008] PyPDF2, Merge PDF files, Insert PDF files

Ref: The PdfMerger Class: merges multiple PDFs into a single PDF.

  • merge(): Merge the pages from the given file into the output file at the specified page number.

  • append(): Identical to the merge() method, but assumes you want to concatenate all pages onto the end of the file instead of specifying a position.

  • write(): Write all data that has been merged to the given output file.

Ref: The PdfReader Class

Ref: The PdfWriter Class


1. Merge PDF files

  • Use the PdfMerger class from PyPDF2 to merge the PDFs.
  • Here’s an example:
Python
import PyPDF2

# List of PDFs to merge
pdfs = ['file1.pdf', 'file2.pdf', 'file3.pdf']

merger = PyPDF2.PdfMerger()
for pdf in pdfs:
    merger.append(pdf)

# Write the merged PDF to a new file
merger.write("merged.pdf")
merger.close()

2. Insert PDF files

  • You can use the PdfMerger class to merge PDFs and insert them at specific positions.
  • Here’s an example:
Python
# Open the PDF files you want to merge
input1 = open("file1.pdf", "rb")
input2 = open("file2.pdf", "rb")

# Create a PdfFileMerger object
merger = PyPDF2.PdfMerger()

# Append pages from input1 (e.g., pages 0 to 3)
merger.append(fileobj=input1, pages=(0, 3))

# Insert pages from input2 after the second page
merger.merge(position=2, fileobj=input2, pages=(0, 1))

# Write the merged PDF to an output file
with open("output.pdf", "wb") as output:
    merger.write(output)

# Close file descriptors
input1.close()
input2.close()

3. Get the number of pages

pdfReader = PyPDF2.PdfReader("file4.pdf")
print(len(pdfReader.pages))

The big exmaple:

import os, PyPDF2

# find the root directory
# root_dir = __file__[:__file__.find("\\Working\\GIS\\Data\\Models")]

root_dir = r"S:\TRAINING\Bingnan\02_Test_Data\Data_DDR\LI-4155 DDR Merrylands  NSW"
# file name for exporting files
file_name = os.path.basename(root_dir).split() 
# save the type of esr or ddr
esr_ddr = file_name.pop(1) 
file_name_new = " ".join(file_name)  

# Get files of Maps
PDFs_dir = os.path.join(root_dir, "Delivery\\PDFs")

map_path_list = []
hist_path_list = []
report_list = []

for file in os.listdir(PDFs_dir):
    if "Map B" in file: 
        hist_path_list.append(file)
    elif file.find("Map") == 0:
        map_path_list.append(file) 
    elif "LI-" in file and ".pdf" in file:
        report_list.append(file) 
        
# Combine the PDF file of maps
# Create a PDF merger object
pdf_merger = PyPDF2.PdfMerger()
for file in map_path_list:
    pdf_merger.append(os.path.join(PDFs_dir, file)) 
    
# Write the merged PDF to the output file
with open(os.path.join(root_dir, f"Delivery\\Final\\{file_name_new} - Report Maps.pdf"), "wb") as output:
    pdf_merger.write(output)
    
# Sort the historic map files
hist_path_list_new = []
for i in range(len(hist_path_list)):
    for file in hist_path_list:
        if f" B{i+1} " in file:
            hist_path_list_new.append(file) 
            
# Combine the PDF file of historic maps
# Create a PDF merger object
pdf_merger2 = PyPDF2.PdfMerger()
for file in hist_path_list_new:
    pdf_merger2.append(os.path.join(PDFs_dir, file)) 
    
# Write the merged PDF to the output file
with open(os.path.join(root_dir, f"Delivery\\Final\\{file_name_new} - Historic Imagery.pdf"), "wb") as output:
    pdf_merger2.write(output)
    
# Get the number of pages in the PDF files
with open(os.path.join(PDFs_dir, report_list[0]), 'rb') as file:
    pdf_reader = PyPDF2.PdfReader(file)
    report_pages_num = len(pdf_reader.pages)
    
with open(os.path.join(root_dir, f"Delivery\\Final\\{file_name_new} - Report Maps.pdf"), 'rb') as file:
    pdf_reader = PyPDF2.PdfReader(file)
    map_pages_num = len(pdf_reader.pages)
    
merger = PyPDF2.PdfMerger()

# Add the whole report, if for specific pages, can add parameter like "pages=..."
merger.append(os.path.join(PDFs_dir, report_list[0]))

# Insert pages from "map_combine.pdf" after the Appendix A (the third page from the end)
merger.merge(position=report_pages_num - 3, fileobj=open(os.path.join(root_dir, f"Delivery\\Final\\{file_name_new} - Report Maps.pdf"), 'rb'))  

# Insert pages from "historic_map_combine.pdf" after the Appendix B (the last page)
merger.merge(position=report_pages_num + map_pages_num - 1, fileobj=open(os.path.join(root_dir, f"Delivery\\Final\\{file_name_new} - Historic Imagery.pdf"), 'rb'))  

merger.write(open(os.path.join(root_dir, f"Delivery\\Final\\{file_name_new} - {esr_ddr} new.pdf"), 'wb'))  # Save the output PDF

 

posted on 2024-06-13 13:45  McDelfino  阅读(7)  评论(0编辑  收藏  举报