PDF 转 PPTX

用途

  • beamer 生成的 PDF 转 PPTX
  • marp 导出的 PDF 转 PPTX
  • PPTX 导出的 PDF 再转为高清图片档 PPTX

过程

  • 将 PDF 文档导出为 PNG 图片,每页一张。
  • 每张 PNG 作为一张 slide,做成一个 PPTX 文档。

安装

需安装

使用

用法:python pdf_to_pptx.py test.pdf {RATIO} {ZOOM-FACTOR}

其中 {RATIO} 表示 PDF 页面的宽高比例,{ZOOM-FACTOR} 控制图片清晰度。

python pdf_to_pptx.py test.pdf 4:3 5

python pdf_to_pptx.py test.pdf 16:9 10

代码

pdf_to_pptx.py

# encoding: UTF-8

# pip install python-pptx
#     https://python-pptx.readthedocs.io/
# pip install pymupdf
#     https://pymupdf.readthedocs.io/en/latest/

import sys, math, io
from pptx import Presentation
from pptx.util import Inches
from pptx.parts.image import Image 
import fitz 

# args: pdf_filename, aspect_ratio, zoom_factor
pdf_filename = sys.argv[1]
width, height = list(map(int, sys.argv[2].split(':')))
zoom_factor = int(sys.argv[3])

prs = Presentation()
prs.slide_width = Inches(width)
prs.slide_height = Inches(height)
blank_slide_layout = prs.slide_layouts[6]

mat = fitz.Matrix(zoom_factor, zoom_factor)  # zoom factor in each dimension
doc = fitz.open(pdf_filename)  # open document
print('#pages', len(doc))
for i, page in enumerate(doc):  # iterate through the pages
    pix = page.getPixmap(matrix = mat, alpha=False)  # use 'mat' instead of the identity matrix
    png_data = pix.getImageData('png')
    tmp_file = io.BytesIO(png_data)
    
    slide = prs.slides.add_slide(blank_slide_layout)
    pic = slide.shapes.add_picture(tmp_file, 0, 0, width=prs.slide_width)
    print('page {}, image size: ({}, {})'.format(i+1, pix.width, pix.height))
        
pptx_filename = pdf_filename.replace('.pdf', '_converted.pptx')
prs.save(pptx_filename)
print('saved to', pptx_filename)






旧版本

安装及使用

需安装

用法:python pdf_to_pptx.py test.pdf {RATIO} {DPI}

其中 {RATIO} 表示 PDF 页面的宽高比例,{DPI} 表示图片 DPI,控制图片清晰度。

python pdf_to_pptx.py test.pdf 4:3 500

python pdf_to_pptx.py test.pdf 16:9 1000

代码

pdf_to_pptx.py

# encoding: UTF-8

# pip install python-pptx
#     https://python-pptx.readthedocs.io/
# need pdftk, pdftoppm installed

import sys
import os
import re
import math
from pptx import Presentation
from pptx.util import Inches
from pptx.parts.image import Image 

# args: pdf_filename, aspect_ratio, dpi
pdf_filename = sys.argv[1]
width, height = list(map(int, sys.argv[2].split(':')))
dpi = int(sys.argv[3])

tmp_dir = 'png_tmps'
os.system('mkdir -p {}'.format(tmp_dir))

# dump pdf meta-data to get number of pages 
os.system('pdftk {} dump_data_utf8 output {}/data.txt'.format(pdf_filename, tmp_dir))
data_content = open('{}/data.txt'.format(tmp_dir)).read()
groups = re.findall('NumberOfPages: (\d+)', data_content)
page_num = int(groups[0])
print('#pages:', page_num)

prs = Presentation()
prs.slide_width = Inches(width)
prs.slide_height = Inches(height)
# _ = math.gcd(prs.slide_width, prs.slide_height)
# print('PPT Aspect Ratio: {}/{}'.format(prs.slide_width//_, prs.slide_height//_))

blank_slide_layout = prs.slide_layouts[6]
for i in range(1, page_num+1):
    os.system('pdftoppm {0}  {1}/{2} -png -f {2} -singlefile -r {3}'.format(pdf_filename, tmp_dir, i, dpi))
    img_path = '{}/{}.png'.format(tmp_dir, i)
    img = Image.from_file(img_path)
    
    slide = prs.slides.add_slide(blank_slide_layout)
    pic = slide.shapes.add_picture(img_path, 0, 0, width=prs.slide_width)
    
    img_width, img_height = img.size
    _ = math.gcd(img_width, img_height)
    
    print(i, img.size, '{}:{}'.format(img_width//_, img_height//_))
    
pptx_filename = pdf_filename.replace('.pdf', '_converted.pptx')
prs.save(pptx_filename)
print('saved to', pptx_filename)




END

2020.5.10 晚 22:13
2020.6.14 晚 21:08 添加新版本

posted @ 2020-05-10 22:15  maxuewei2  阅读(893)  评论(0编辑  收藏  举报
这是一段经过10次base64加密的密文:Vm0wd2VFMUhSblJXYTFwT1ZsWndUMVV3WkRSV2JHeDBZM3BHYUZKc1ZqTldiVEZIVjBaS2RHVkVRbFZXYkhCUVdWWlZlRll5U2tWVWJHUk9ZV3hhVFZkWGRHRlRNazE1Vkd0YVlWSnRhRzlVVnpGdlZWWmFjMWt6YUZOTlJGWjZWakkxVDJGc1NuTmpTRUpXWWxoU00xWkdXbUZqYkhCRlZXeHdWMkV5ZHpCV2FrbzBZekpHYzFOWVpGaGlSa3BoV1ZSS2IxSkdWbk5YYlVacVlraENSbFpYZUhkV01rVjZVV3BhVjJKVVFYaFdha1poWkVaT2MySkdTbWxXUjNoWFZtMTBWMWxXVWtkV1dHaFlZbGhTV0ZSV1pGTk5SbFowVFZoa1ZXSkdiRFJWTW5oelZqSktTRlJZYUZkV1JYQk1WV3BHVDJNeFduUmlSazVzWWxob2IxWXhXbE5TTWxGNFZXdGthbEp0YUhOVk1GVXhWMFpTV0dSSFJsTk5WMUo1VmpKek5WWXdNVVZTYTNCV1ZqTlJkMVpxUm1GU2JHUnpWV3hhVjFKV2NIbFhhMVpoVkRKTmVWTnJhR2hTYkVwVVZGUktiMWRXV25KWGJVWmFWbXN4TlZaSE5VOWhiRXBZVlcxb1ZtSkhhRlJXTVZwWFl6RldkVlJzYUZOaVNFRjNWa1phYjFReFdYaFRia3BxVW01Q1YxWnVjRUpOVmxweFVWaG9hbFpyV25oV1IzaFhWakpLVjFOc2JGZGlXRUpJVmxSR2ExZEdUbkphUmxwcFZqTm9kbFpHVWtOVE1EVlhWMjVTVGxaR1NuQlVWM1J6VGtaYVdFNVZPV2hpUlhCWldWVmFRMVl5Um5KVGJXaFhZa1p3ZWxsNlJtdGtSa3B5VGxaT2FXRXdjRmxXTVZwWFlUQXhTRkpyWkZoaVJscFVXVlJPUTFsV1duTlhhM1JUVW14c05WUldWakJXTVZwelkwaHNWMVl6YUZoWlZscGhVbXhrY21GR2FHbFNNVVYzVjFaU1MxVXhUa2RUYmtwaFVteGFjRlZzVWxkbGJHUllaRWRHYWsxRVZraFdNalZQVm0xRmVWVnVRbFZXYlZFd1ZqRmFZVkl5UmtsVWJGcE9ZVE5DU1ZkVVFtOVVNVnAwVTJ0a2FsSXlhR0ZVVlZwM1ZrWmFjMWRyZEd0V2F6VXdXbFZhVDJGV1pFaGFSRTVYWVRGd1dGbHFTa3BsVms1eVdrWm9XRkl4U2xGV2FrSnZVVEZzVjFkdVRtRlNlbXhYVlcweE5GWXhXWGxrUkVKVlRXdHdWMWt3Vm05WGF6RkhZMFJPV2xaV1ZqUmFSV1JIVW1zeFYyRkhiRk5pYTBvMVZteG9kMU14VlhoVWEyUlhZbXR3V0ZsclZURmpSbHB4VkcwNVZsSnRVbGhXVjNSM1ZERmFWVlpzYUZoaE1taE1WMVphUzFKc1RuVlNiRlpYVm10d1dWWkdWbUZXYlZaSVVtdHNZVkp0VWxSWmEyaERVMFphU0dWSGRHbE5WMUl3VlRKMGIyRkdUa2RqUmxwWFlsaG9NMVl3V2xOa1IxWkdUMWQwVTFaR1dscFhiRlpyWXpGYVIxTnNXbXBTVjJoWVdXeG9VMk5XY0ZaYVJrcHNWbXR3V2xsVldtOVhSa2w0VTI1b1YxWXpVbGhWZWtaaFl6RldjMXBHYUdoTk1VcFZWbGN3ZUZVeFpFZFhXR3hzVWpOU1ZsUlhkSGRUUm10M1lVYzVWMkpHYkRaWlZWSlBWakZKZWxScVVtRlNiSEJVVmpGa1IxSXlSa2RhUjJ4VVVsVndNbFpxUm05a01VbDVVbGhvV0ZkSGFGaFpiWGhoVmpGc2MyRkdUbXBOVjNoV1ZXMDFhMVpzV25OalJFSlZWbGRvZGxadGMzaGpiR1J5WVVaa1YyVnNXbFZYVmxKSFV6RktjMVJ1VmxOaVJuQndWakJhUzJJeFduTlZhMlJYVFZWc05GWnRlSE5aVmtweVYyeGtWMkV4U2tOVWJFVTVVRkU5UFE9PQ==