Python PDF 转 JPG 推荐使用
我和同事分别用.net 和 python来实现这个功能。
做好后发现.net 转的时间很慢,python代码少,时间快,最终采用了python 我写的代码。
比较特殊的是poppler-0.68.0。大家可以参考下百度。
具体代码如下:
from pdf2image import convert_from_path from pathlib import Path from os import listdir from PIL import Image import os,time from shutil import copyfile import shutil def pdf_to_image(pdf_filename): #判断路径是否存在 if not pdf_filename.upper().endswith(".PDF"): return print('处理 pdf_filename:', pdf_filename) filename_withoutext = pdf_filename.split('.')[0] out_path = Path(filename_withoutext) print('out_path',out_path) out_path_full = os.path.join(path_to_watch, out_path) print('完整路径:',out_path_full) out_path_full_check=Path(out_path_full) if not out_path_full_check.exists(): print('创建目录:', out_path_full) os.mkdir(out_path_full) print('开始转换') pdf_filename = os.path.join(path_to_watch, pdf_filename) print('filename:', pdf_filename) pages = convert_from_path(pdf_filename, dpi=400, output_folder=None, fmt="JPEG", thread_count=5) pindex=1 for p in pages: p_f = os.path.join(out_path_full,str(pindex)+'.jpg') p.save(p_f) pindex=pindex+1 time.sleep(1) print('转换完成') contact_image(out_path_full) print('合并完成') path_file = pdf_filename.split('.')[0] sub_path = os.path.join(path_to_watch, path_file) print('删除目录', sub_path) shutil.rmtree(sub_path) def watch(): while 1: time.sleep(3) #print('扫描目录的PDF文件') pdf_files = dict([(f, None) for f in os.listdir(path_to_watch) if f.upper().endswith('.PDF') ]) for f in pdf_files: f_full = os.path.join(path_to_watch, f) f_jpg=f.split('.')[0]+'.jpg' f_jpg_full=os.path.join(path_to_watch,f_jpg) print(f_jpg_full) if not os.path.exists(f_jpg_full): print(f_full) time.sleep(1) print('文件名:', f_full) pdf_to_image(f) #return #while 1: #return # before = dict([(f, None) for f in os.listdir(path_to_watch)]) # time.sleep(1) # after = dict([(f,None) for f in os.listdir(path_to_watch)]) # added = [ f for f in after if not f in before] # removed =[f for f in before if not f in after] # if added: # for f_add in added: # time.sleep(1) # print('文件名:',os.path.join(path_to_watch,f_add)) # pdf_to_image(f_add) # path_file=f_add.split('.')[0] # print('删除目录') # shutil.rmtree(os.path.join(path_to_watch, path_file)) # if removed: # for f_r in removed: # print('删除:', os.path.join(path_to_watch, f_r)) # before = after def open_image(out_path_full,fn): image_file = os.path.join(out_path_full,fn) print('打开图片路径', image_file) return Image.open(image_file) def contact_image(out_path_full): print('开始合并') print('合并路径:',out_path_full) image_list = [open_image(out_path_full, fn) for fn in listdir(out_path_full) if fn.endswith('.jpg')] print('图片数量:',len(image_list)) images=[] width=0 height=0 total_height=0 max_width=0 for i in image_list: if i.size[0]>width or i.size[1]>height: width, height = i.size print('width %d,height %d ' % (width, height)) if height>width: new_image = i.resize((1102, 1564), Image.BILINEAR) # 551*782 images.append(new_image) total_height = total_height+1564 max_width=1102 else: new_image = i.resize((1102, 776), Image.BILINEAR) # 551*782 images.append(new_image) total_height = total_height+776 max_width = 1102 result = Image.new(images[0].mode, (max_width, total_height), "white") print('total_height:', total_height) save_path = out_path_full+".jpg" #copy_to=out_path_full+".swf" print('save path:',save_path) height_total=0 for i,im in enumerate(images): height_im=im.size[1] print('height_im %d' % height_im) result.paste(im, box=(0, height_total)) result.save(save_path) height_total = height_total+height_im #copyfile(save_path,copy_to) path_to_watch = "D:\\PDFS" print('监听目录:', path_to_watch) if __name__=='__main__': watch()