如何使用python进行pdf文件分割
1.安装 PyPDF2 包
pip install PyPDF2
然后import PyPDF2
2.在 PyPDF2 库中,可以使用以下代码打开 PDF 文件:
pdf_file = open('filename.pdf', 'rb') pdf_reader = PyPDF2.PdfFileReader(pdf_file) total_pages = pdf_reader.numPages
3.下面代码将每一页分开
from PyPDF2 import PdfFileReader,PdfFileWriter pdf_path = r"F:\工作\1.pdf" save_path = r"F:\工作\a\a" # Split Pages of PDF pdf_reader = PdfFileReader(pdf_path) for i in range(0,pdf_reader.getNumPages()): pdf_writer = PdfFileWriter() pdf_writer.addPage(pdf_reader.getPage(i)) # Every page write to a path with open(save_path+'{}.pdf'.format(str(i)), 'wb') as fh: pdf_writer.write(fh) print('{} Save Sucessfully !\n'.format(str(i)))
4. 2个PDF 文件合并为1个
from PyPDF2 import PdfFileReader,PdfFileWriter merge_pdf = r"F:\工作\z.pdf" p1_pdf = r"F:\工作\a\a0.pdf" p2_pdf = r"F:\工作\a\a1.pdf" p1_reader = PdfFileReader(p1_pdf) p2_reader = PdfFileReader(p2_pdf) merge = PdfFileWriter() # Write p1 for i in range(0,p1_reader.getNumPages()): merge.addPage(p1_reader.getPage(i)) # Write p2 for j in range(0,p2_reader.getNumPages()): merge.addPage(p2_reader.getPage(j)) # Write out with open(merge_pdf,'wb') as f: merge.write(f)
5.将多个单页合并成一页
from PyPDF2 import PdfFileReader,PdfFileWriter merge_pdf = r"F:\工作\z.pdf" p_pdf = r"F:\工作\a\a" merge = PdfFileWriter() for i in range(220,227): p_reader = PdfFileReader(p_pdf + str(i) +'.pdf') for i in range(0,p_reader.getNumPages()): merge.addPage(p_reader.getPage(i)) # Write out with open(merge_pdf,'wb') as f: merge.write(f)
6. 直接截至pdf中的某几页
from PyPDF2 import PdfFileReader,PdfFileWriter #用偏量实现纸质书页码向电子书页码的转换 offset = 11 #纸质书页码范围 page_start = 10 page_end = 15 all_pdf = r"F:\工作\a.pdf" part_pdf = r"F:\工作\p.pdf" p_reader = PdfFileReader(all_pdf) p_writer = PdfFileWriter() for i in range(page_start + offset ,page_end + offset + 1): p_writer.addPage(p_reader.getPage(i)) with open(part_pdf,'wb') as f: p_writer.write(f)
参考:https://pythonjishu.com/ugmvrnorpclhikd/
https://zhuanlan.zhihu.com/p/357378479?utm_id=0