打赏

Python PDF 转 JPG 推荐使用

我和同事分别用.net 和 python来实现这个功能。

做好后发现.net 转的时间很慢,python代码少,时间快,最终采用了python 我写的代码。

比较特殊的是poppler-0.68.0。大家可以参考下百度。

具体代码如下:

from pdf2image import convert_from_path
from pathlib import Path 

from os import listdir
from PIL import Image

import os,time
from shutil import copyfile
import shutil

def pdf_to_image(pdf_filename):
    #判断路径是否存在
    
    if not pdf_filename.upper().endswith(".PDF"):
        return
    print('处理 pdf_filename:', pdf_filename)

    filename_withoutext = pdf_filename.split('.')[0]
    out_path = Path(filename_withoutext)
    print('out_path',out_path)
    out_path_full = os.path.join(path_to_watch, out_path)
    print('完整路径:',out_path_full)
    out_path_full_check=Path(out_path_full)
    if not out_path_full_check.exists():
        print('创建目录:', out_path_full)
        os.mkdir(out_path_full)
    print('开始转换')
    pdf_filename = os.path.join(path_to_watch, pdf_filename)
    print('filename:', pdf_filename)
    pages = convert_from_path(pdf_filename, dpi=400, output_folder=None, fmt="JPEG",
                       thread_count=5)
    pindex=1                       
    for p in pages:
        p_f = os.path.join(out_path_full,str(pindex)+'.jpg')
        p.save(p_f)
        pindex=pindex+1

    time.sleep(1)
    print('转换完成')
    contact_image(out_path_full)
    print('合并完成')
    path_file = pdf_filename.split('.')[0]
    sub_path = os.path.join(path_to_watch, path_file)
    print('删除目录', sub_path)
    shutil.rmtree(sub_path)

def watch():
    while 1:
        time.sleep(3)
        #print('扫描目录的PDF文件')
        pdf_files = dict([(f, None) for f in os.listdir(path_to_watch) if f.upper().endswith('.PDF') ])
        for f in pdf_files:
            f_full = os.path.join(path_to_watch, f)
            f_jpg=f.split('.')[0]+'.jpg'
            f_jpg_full=os.path.join(path_to_watch,f_jpg)
            print(f_jpg_full)
            if not os.path.exists(f_jpg_full):
                print(f_full)
                time.sleep(1)
                print('文件名:', f_full)
                pdf_to_image(f)
                
        
        #return

    #while 1:
        #return
        # before = dict([(f, None) for f in os.listdir(path_to_watch)])
        # time.sleep(1)
        # after = dict([(f,None) for f in os.listdir(path_to_watch)])
        # added = [ f for f in after if not f in before]
        # removed =[f for f in before if not f in after]
        # if added:
        #     for f_add in added:
        #         time.sleep(1)
        #         print('文件名:',os.path.join(path_to_watch,f_add))
        #         pdf_to_image(f_add)
        #         path_file=f_add.split('.')[0]
        #         print('删除目录')
        #         shutil.rmtree(os.path.join(path_to_watch, path_file))
                

        # if removed:
        #     for f_r in removed:
        #         print('删除:', os.path.join(path_to_watch, f_r))

        # before = after 

def open_image(out_path_full,fn):
    image_file = os.path.join(out_path_full,fn)
    print('打开图片路径', image_file)
    return Image.open(image_file)

def contact_image(out_path_full):
    print('开始合并')
    print('合并路径:',out_path_full)
    image_list = [open_image(out_path_full, fn)
                  for fn in listdir(out_path_full) if fn.endswith('.jpg')]
    print('图片数量:',len(image_list))
    images=[]
    width=0
    height=0
    total_height=0
    max_width=0

    for i in image_list:
        if i.size[0]>width or i.size[1]>height:
            width, height = i.size
        
        print('width %d,height %d ' % (width, height))
        if height>width:
            new_image = i.resize((1102, 1564), Image.BILINEAR)  # 551*782
            images.append(new_image)
            total_height = total_height+1564
            max_width=1102
        else:
            new_image = i.resize((1102, 776), Image.BILINEAR)  # 551*782
            images.append(new_image)
            total_height = total_height+776
            max_width = 1102

        result = Image.new(images[0].mode, (max_width, total_height), "white")
    print('total_height:', total_height)
    save_path = out_path_full+".jpg"
    #copy_to=out_path_full+".swf"

    print('save path:',save_path)
    height_total=0
    for i,im in enumerate(images):
        height_im=im.size[1]
        print('height_im %d' % height_im)
        result.paste(im, box=(0, height_total))
        result.save(save_path)
        height_total = height_total+height_im

    #copyfile(save_path,copy_to)

path_to_watch =  "D:\\PDFS"
print('监听目录:', path_to_watch)
if __name__=='__main__':
    watch()

  

posted @ 2021-01-20 08:40  DanielXiaoyc  阅读(1533)  评论(0编辑  收藏  举报