# filedeal.py #!/usr/bin/env python # -*- coding:utf-8 -*- import os import shutil from PIL import Image import io import requests import datetime import hashlib import time import office # 获取所有文件 def getAllFiles(fire_dir): filepath_list = [] for root,folder_names,file_names in os.walk(fire_dir): for file_name in file_names: file_path = root+os.sep+file_name filepath_list.append(file_path) print(file_path) print(filepath_list) return filepath_list # 获取图片的像素 def getPicsize(pic_file): pic_file =pic_file img = Image.open(pic_file) w = img.width h =img.height geshi = img.format image_size = os.path.getsize(pic_file) print(image_size) print(w) print(h) return w,h,image_size def getBaseName(file_name): file_base_name = os.path.basename(file_name) return file_base_name def getNewName(old_file_name): file_base_name = os.path.basename(old_file_name) timestrhaomiao = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f_') new_file_name = old_file_name.split(file_base_name)[0]+timestrhaomiao+file_base_name print(new_file_name) return new_file_name #获取文件md5值 def getmd5(file): if not os.path.isfile(file): return fd = open(file,'rb') md5 = hashlib.md5() md5.update(fd.read()) fd.close() return md5.hexdigest() #将所有文件打出md5值,并进行md5值排序 #src为文件夹 def paixuMd5(src): all_with_many_file_list = [] md5_not_same_list = [] md5_same_list_all =[] all_file_with_md5_list = [] all_file_with_not_same_md5_list = [] all_file_with_same_md5_list = [] #然后再次获取所有文件内容 filepath_list = getAllFiles(src) for f in filepath_list: one_f_md5_list = [] f_md5 = getmd5(f) one_f_md5_list.append(f_md5) one_f_md5_list.append(f) all_file_with_md5_list.append(one_f_md5_list) if f_md5 not in md5_not_same_list: md5_not_same_list.append(f_md5) all_file_with_not_same_md5_list.append(one_f_md5_list) else: all_file_with_same_md5_list.append(one_f_md5_list) print("-----------------") for j in range(0,len(all_file_with_same_md5_list)): one_with_many_file_list = [] one_same_f_md5 = all_file_with_same_md5_list[j][0] one_same_f = all_file_with_same_md5_list[j][1] for i in range(0,len(all_file_with_not_same_md5_list)): not_same_f_md5 = all_file_with_not_same_md5_list[i][0] not_same_f = all_file_with_not_same_md5_list[i][1] if one_same_f_md5 == not_same_f_md5: one_with_many_file_list.append(not_same_f_md5) one_with_many_file_list.append(not_same_f) one_with_many_file_list.append(one_same_f) all_with_many_file_list.append(one_with_many_file_list) break print("........") print("all_with_many_file_list:") print(all_with_many_file_list) print(len(all_with_many_file_list)) with open("youchong.txt",'w',encoding="utf-8") as f: for one_list in all_with_many_file_list: print(one_list) f.write(str(one_list)+"\n") #将目录中相同的图片移动到一个目录下 dst2_list = [src + "\\相同"] for one_dst2 in dst2_list: if not os.path.isdir(one_dst2): os.mkdir(one_dst2) #移动相同图片到同一个目录下: for one_list in all_file_with_same_md5_list: one_f = one_list[1] shutil.move(one_f, src + "\\相同") return all_file_with_md5_list #转化pdf为图片 def pdfTOimg(pfd_file_path,out_dir_path): office.pdf.pdf2imgs(pdf_path=pfd_file_path,out_dir=out_dir_path) #批量转换pdf为图片 def getDpfToImg(src): filepath_list =getAllFiles(fire_dir=src) for f in filepath_list: if f.endswith('.pdf'): pfd_file_path = f out_dir_path = str(f).strip(".pdf") pdfTOimg(pfd_file_path, out_dir_path) #对文件重命名并且分类 def deal_file(src,dst1,dst2,dst3): # 区分jpg和mp4 mp4 = [] jpg = [] png = [] jpeg = [] qita=[] #先给所有文件重命名 filepath_list = getAllFiles(src) for f in filepath_list: old_file_name =f new_file_name = getNewName(old_file_name) os.rename(old_file_name,new_file_name) #然后再次获取所有文件内容 filepath_list = getAllFiles(src) for f in filepath_list: print(f) # for f in os.listdir(src): #根据具体需求更改后缀识别参数(.mp4和jpg等) if f.endswith('.mp4'): mp4.append(f) elif f.endswith('.jpg'): jpg.append(f) elif f.endswith('.png'): png.append(f) elif f.endswith('.jpeg'): jpeg.append(f) else: qita.append(f) # 创建目标文件夹 if not os.path.isdir(dst1): os.mkdir(dst1) dst2_list = [dst2, dst2 + "\\横图", dst2 + "\\横图\\大于等于1M", dst2 + "\\横图\\小于1M", dst2 + "\\竖图", dst2 + "\\竖图\\大于等于1M", dst2 + "\\竖图\\小于1M" ] for one_dst2 in dst2_list: if not os.path.isdir(one_dst2): os.mkdir(one_dst2) if not os.path.isdir(dst3): os.mkdir(dst3) # 拷贝文件到目标文件夹 for m in mp4: try: _mp4 = os.path.join(src,m) shutil.move(_mp4,dst1) except Exception as e: print(e) for j in jpg: try: _jpg = os.path.join(src,j) w,h,image_size = getPicsize(pic_file=_jpg) if w>h: if image_size < 1024000: shutil.move(_jpg,dst2+"\\横图\\小于1M") else: shutil.move(_jpg, dst2 + "\\横图\\大于等于1M") else: if image_size < 1024000: shutil.move(_jpg, dst2 + "\\竖图\\小于1M") else: shutil.move(_jpg, dst2 + "\\竖图\\大于等于1M") except Exception as e: print(e) for p in png: try: _png = os.path.join(src,p) w,h,image_size = getPicsize(pic_file=_png) if w>h: if image_size < 1024000: shutil.move(_png,dst2+"\\横图\\小于1M") else: shutil.move(_png, dst2 + "\\横图\\大于等于1M") else: if image_size < 1024000: shutil.move(_png, dst2 + "\\竖图\\小于1M") else: shutil.move(_png, dst2 + "\\竖图\\大于等于1M") except Exception as e: print(e) for jp in jpeg: try: _jpeg = os.path.join(src,jp) w,h,image_size = getPicsize(pic_file=_jpeg) if w>h: if image_size < 1024000: shutil.move(_jpeg,dst2+"\\横图\\小于1M") else: shutil.move(_jpeg, dst2 + "\\横图\\大于等于1M") else: if image_size < 1024000: shutil.move(_jpeg, dst2 + "\\竖图\\小于1M") else: shutil.move(_jpeg, dst2 + "\\竖图\\大于等于1M") except Exception as e: print(e) for q in qita: try: _qita = os.path.join(src,q) shutil.move(_qita,dst3) except Exception as e: print(e) if __name__ == "__main__": src = r"E:\englishpic\pic\manhua" getDpfToImg(src)
#pip install -i https://mirrors.aliyun.com/pypi/simple python-office