使用python对文件进行批量处理
代码有点长,包括为类了,主要是对文件进行批量处理使用
1、批量移动文件,符合某种后缀的
2、批量查找两个文件夹重复的文件
3、批量同步两个文件夹的文件
2和3,我现在用duplicate这个软件,同步的话使用File Synchronizer这个软件,代码的话不怎么用了
4、批量移动和复制文件的时候会遇到一个问题,就是若存在相同文件名的情况,这个时候可以在文件名后加“-1”,“-2”这种方式来解决,比之前的用随机时间的要好,那个产生的文件名太长了
5、批量提取docx文档中的图片(如何批量提取doc中的呢?去excelhome论坛搜索vba转doc为docx的代码,批量转换即可
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | Sub doc2docx() 'doc文件转docx文件 Dim myDialog As FileDialog Set myDialog = Application.FileDialog(msoFileDialogFilePicker) Dim oFile As Object Dim oFilePath As Variant With myDialog .Filters.Clear '清除所有文件筛选器中的项目 .Filters.Add "所有 WORD2007 文件" , "*.doc" , 1 '增加筛选器的项目为所有doc文件 .AllowMultiSelect = True '允许多项选择 If .Show = -1 Then '确定 For Each oFilePath In .SelectedItems '在所有选取项目中循环 Set oFile = Documents.Open(oFilePath) oFile.SaveAs FileName:=Replace(oFilePath, "doc" , "docx" ), FileFormat:=16 oFile.Close Next End If End With End Sub |

1 from genericpath import exists 2 import os 3 import shutil 4 from os import path 5 from pathlib import Path 6 from hashlib import md5 7 from PIL import Image 8 import zipfile 9 from send2trash import send2trash 10 11 # revides by Stephen Shen @zju 12 # 2021-4-8 10:14:22 13 # https://rednafi.github.io/digressions/python/2020/04/13/python-pathlib.html#pathrenametarget 14 # https://pypi.org/project/Send2Trash/ 15 # revised by Stephen Shen @zju 16 # 2021年3月15日09:24:10 17 # zipfile 模块使用说明 18 # https://www.cnblogs.com/ManyQian/p/9193199.html 19 # 20 # shutil.copyfile("oldfile","newfile") #oldfile和newfile都只能是文件 21 # shutil.copy("oldfile","newfile") #oldfile只能是文件夹,newfile可以是文件,也可以是目标目录 22 # #复制文件夹: 23 # shutil.copytree("olddir","newdir") #olddir和newdir都只能是目录,且newdir必须不存在 24 # #重命名文件(目录) 25 # os.rename("oldname","newname") #文件或目录都是使用这条命令 26 # #移动文件(目录) 27 # shutil.move("oldpos","newpos") 28 29 30 class PathBox(): 31 def __init__(self): 32 pass 33 34 @staticmethod 35 def batchExtractPicsFromDocs(srcDir, dstDir, zipDir): 36 dst_dir = Path(dstDir) 37 src_dir = Path(srcDir) 38 zip_dir = Path(zipDir) 39 40 if not dst_dir.exists(): 41 dst_dir.mkdir() 42 if not src_dir.exists(): 43 src_dir.mkdir() 44 if not zip_dir.exists(): 45 zip_dir.mkdir() 46 47 for root, dirs, files in os.walk(src_dir): 48 for f in files: 49 src_path = Path(root).joinpath(f) 50 if src_path.suffix in ['.docx']: 51 # dst_sub_dir = dst_dir.joinpath(src_path.stem) 52 # if not dst_sub_dir.exists(): 53 # dst_sub_dir.mkdir() 54 55 zip_path = zip_dir.joinpath(src_path.stem+'.zip') 56 if not zip_path.exists(): 57 PathBox.copyAsZip(src_path, zip_path) 58 print('{} is copied as zip file'.format(zip_path)) 59 else: 60 print('{} is existed'.format(str(zip_path))) 61 # pics_dir = Path(PathBox.extractPics( 62 # dst_dir, zip_path, zip_dir)) 63 64 # if not pics_dir.exists(): 65 # PathBox.batchMoveFilesToOneFolder( 66 # pics_dir, dst_dir, ['.jpeg', '.png']) 67 else: 68 print('{} is not docx'.format(str(src_path))) 69 70 PathBox.extractZipFile(zip_dir, dst_dir) 71 72 @staticmethod 73 def copyAsZip(srcpath, dstpath): 74 shutil.copyfile(srcpath, dstpath) 75 76 @staticmethod 77 def extractPics(dstDir, zippath, zipDir): 78 # first clear the zipdir directory 79 # 将docx文档复制为*.zip格式 80 81 # 解压缩文件 82 try: 83 with zipfile.ZipFile(zippath, 'r') as f: 84 print('{zippath} is extracted'.format(zippath=zippath)) 85 f.extractall(zipDir) 86 except: 87 print('{zippath} cannot be extracted'.format(zippath=zippath)) 88 else: 89 picsDir = Path(zipDir).joinpath('word/media') 90 return picsDir 91 92 # if os.path.exists(picsDir): 93 # for pic in os.listdir(picsDir): 94 # oldpic=os.path.join(picsDir,pic) 95 # newpic=os.path.join(out_dir,pic) 96 # try: 97 # shutil.move(oldpic,newpic) 98 # except: 99 # print(inDir+' is skipped') 100 101 # filelist=os.listdir(zipDir) 102 # for f in filelist: 103 # filepath = os.path.join(zipDir, f ) 104 # if os.path.isfile(filepath): 105 # os.remove(filepath) 106 # elif os.path.isdir(filepath): 107 # shutil.rmtree(filepath,True) 108 109 @staticmethod 110 def getFileMd5(file_name): 111 """ 112 计算文件的md5 113 :param file_name: 114 :return: 115 """ 116 m = md5() # 创建md5对象 117 with open(file_name, 'rb') as fobj: 118 while True: 119 data = fobj.read(4096) 120 if not data: 121 break 122 m.update(data) # 更新md5对象 123 124 return m.hexdigest() # 返回md5对象 125 126 @staticmethod 127 def syncFiles(srcDir, dstDir): 128 src_dir = Path(srcDir) 129 dst_dir = Path(dstDir) 130 for root, dirs, files in os.walk(src_dir): 131 for f in files: 132 src_path = Path(root).joinpath(f) 133 rel_path = src_path.relative_to(src_dir) 134 dst_path = dst_dir.joinpath(rel_path) 135 if dst_path.exists(): 136 if os.path.getsize(src_path) == os.path.getsize(dst_path): 137 print('{} is existed'.format(str(src_path))) 138 else: 139 PathBox.copyFile(src_path, dst_path) 140 print('{} is copied'.format(str(src_path))) 141 pass 142 else: 143 if not dst_path.parent.exists(): 144 dst_path.parent.mkdir(parents=True, exist_ok=True) 145 PathBox.copyFile(src_path, dst_path) 146 print('{} is copied'.format(str(src_path))) 147 pass 148 149 @staticmethod 150 def extractZipFile(srcDir, dstDir): 151 src_dir = srcDir 152 dst_dir = dstDir 153 for root, dirs, files in os.walk(src_dir): 154 for f in files: 155 file_path = Path(root).joinpath(f) 156 if file_path.suffix in ['.zip', '.rar']: 157 try: 158 with zipfile.ZipFile(str(file_path), 'r') as f: 159 zip_dir = Path(src_dir).joinpath(file_path.stem) 160 if not zip_dir.exists(): 161 zip_dir.mkdir() 162 print('{} is extracted'.format(str(file_path))) 163 f.extractall(zip_dir) 164 except: 165 print('{} cannot be extracted'.format(str(file_path))) 166 167 @staticmethod 168 def getImageMd5(img_path): 169 try: 170 hash = md5() 171 img = open(img_path, 'rb') 172 hash.update(img.read()) 173 img.close() 174 img_md5 = hash.hexdigest() 175 return img_md5 176 except: 177 return None 178 179 @staticmethod 180 def batchRenameFileSuffix(srcDir): 181 # 批量修改目录下指定类型的后缀 182 for root, dirs, files in os.walk(srcDir): 183 for f in files: 184 srcpath = Path(os.path.join(root, f)) 185 if srcpath.suffix in ['.jpeg', '.jpg']: 186 newfilename = srcpath.stem+'.JPG' 187 srcpath.rename(srcpath.parent / newfilename) 188 print('{} is renamed'.format(srcpath)) 189 190 @staticmethod 191 def compareTwoDirsByCount(srcDir, dstDir): 192 rootdirs = os.listdir(srcDir) 193 194 for rootdir in rootdirs: 195 srcpath = os.path.join(srcDir, rootdir) 196 dstpath = os.path.join(dstDir, rootdir) 197 src_count = 0 198 dst_count = 0 199 for root, dirs, files in os.walk(srcpath): 200 for f in files: 201 src_count += 1 202 for root, dirs, files in os.walk(dstpath): 203 for f in files: 204 dst_count += 1 205 if src_count == dst_count: 206 shutil.rmtree(srcpath) 207 print('{} is removed'.format(srcpath)) 208 209 @staticmethod 210 def batchRenameFileName(srcDir): 211 # 批量修改目录下的文件名 212 index = 1 213 for root, dirs, files in os.walk(srcDir): 214 root_path = Path(root) 215 for f in files: 216 file_path = root_path.joinpath(f) 217 new_file_path = file_path 218 index = 1 219 while True: 220 new_file_name = str(index)+file_path.suffix 221 new_file_path = new_file_path.with_name(new_file_name) 222 if not new_file_path.exists(): 223 break 224 else: 225 index += 1 226 file_path.rename(new_file_path) 227 print('{} is renamed'.format(str(file_path))) 228 229 @staticmethod 230 def tongji(srcDir): 231 for root, dirs, files in os.walk(srcDir): 232 if root == srcDir: 233 pass 234 else: 235 count = len(os.listdir(root)) 236 print('{0} have total of {1} files'.format(root, count)) 237 238 @staticmethod 239 def rmEmptyDirs(srcDir): 240 for root, dirs, files in os.walk(srcDir): 241 if root == srcDir: 242 pass 243 else: 244 count = len(sorted(Path(root).rglob('**/*.*'))) 245 if count == 0: 246 try: 247 Path(root).rmdir() 248 # shutil.rmtree(root) 249 except: 250 pass 251 252 @staticmethod 253 def batchResizePics(srcDir): 254 255 dstDir = srcDir+"-resize" 256 257 size = (800, 600) 258 259 # print("picture resizing is processing,pleae wait...") 260 for root, dirs, files in os.walk(srcDir): 261 newroot = Path(root.replace(srcDir, dstDir)) 262 if not newroot.exists(): 263 newroot.mkdir(parents=True, exist_ok=True) 264 # os.mkdir(newroot) 265 266 for file in files: 267 (filename, extension) = os.path.splitext(file) 268 if extension in ['.jpg', '.jpeg', '.png', '.JPG', '.JPEG', '.PNG']: 269 newfile = newroot.joinpath(file) 270 oldfile = Path(root).joinpath(file) 271 try: 272 # print('processing') 273 im = Image.open(oldfile) 274 if not im.size == size: 275 im.thumbnail(size) 276 im.save(newfile, "jpeg") 277 print('{} is thumbnailed'.format(oldfile)) 278 else: 279 try: 280 PathBox.moveFile(oldfile, newfile) 281 # shutil.move(oldfile, newfile) 282 # print('{} is moved'.format(oldfile)) 283 except: 284 print(Exception) 285 except IOError: 286 pass 287 return dstDir 288 289 @staticmethod 290 def batchMoveNonePicsToOneFolder(srcDir, dstDir='parent'): 291 fexts = ['.jpg', '.png', '.jpeg', '.JPG', '.PNG', '.JPEG'] 292 for root, dirs, files in os.walk(srcDir): 293 for f in files: 294 src_path = Path(root).joinpath(f) 295 if not src_path.suffix in fexts: 296 dst_path = Path(srcDir).joinpath(f) 297 shutil.move(src_path, dst_path) 298 print('{} is moved'.format(src_path)) 299 300 @staticmethod 301 # 查询文件夹里是否有非IMG开头的文件 302 def excludeFilesByName(srcDir, dstDir='parent'): 303 src_dir = Path(srcDir) 304 if dstDir == 'parent': 305 dst_dir = src_dir 306 else: 307 dst_dir = Path(dstDir) 308 for root, dirs, files in os.walk(src_dir): 309 if dstDir == 'parent' and root == srcDir: 310 continue 311 else: 312 for f in files: 313 if f.startswith("IMG"): 314 pass 315 else: 316 src_path = Path(root).joinpath(f) 317 dst_path = dst_dir.joinpath(f) 318 PathBox.moveFile(src_path, dst_path) 319 320 @staticmethod 321 def batchMoveFilesToOneFolder(srcDir, dstDir='parent', fexts='all'): 322 src_dir = Path(srcDir) 323 if dstDir == 'parent': 324 dstDir = srcDir 325 326 for root, dirs, files in os.walk(srcDir): 327 if dstDir == 'parent' and root == srcDir: 328 pass 329 else: 330 for f in files: 331 src_path = Path(root).joinpath(f) 332 if not dstDir: 333 dst_path = src_dir.joinpath(f) 334 if fexts == 'all': 335 PathBox.moveFile(src_path, dst_path) 336 print('{} is moved'.format(src_path)) 337 else: 338 if src_path.suffix in fexts: 339 PathBox.moveFile(src_path, dst_path) 340 print('{} is moved'.format(src_path)) 341 else: 342 dst_path = Path(dstDir).joinpath(f) 343 if fexts == 'all': 344 PathBox.moveFile(src_path, dst_path) 345 print('{} is moved'.format(src_path)) 346 else: 347 if src_path.suffix in fexts: 348 PathBox.moveFile(src_path, dst_path) 349 print('{} is moved'.format(src_path)) 350 351 @staticmethod 352 def moveFile(src_path, dst_path): 353 index = 1 354 new_dst_path = dst_path 355 while True: 356 if new_dst_path.exists(): 357 new_dst_path = dst_path.with_name( 358 dst_path.stem+'_'+str(index)+dst_path.suffix) 359 index += 1 360 else: 361 break 362 shutil.move(src_path, dst_path) 363 print('{} is moved'.format(src_path)) 364 365 @staticmethod 366 def copyFile(src_path, dst_path): 367 index = 1 368 while True: 369 if dst_path.exists(): 370 dst_path = dst_path.with_name( 371 dst_path.stem+'_'+str(index)+dst_path.suffix) 372 index += 1 373 else: 374 break 375 try: 376 shutil.copyfile(src_path, dst_path) 377 except: 378 print(Exception) 379 380 @staticmethod 381 def compareDirsDeleteTheSameFile(srcDir, dstDir, mode='keep'): 382 # compare two dirs and delete the same file in the srcDir 383 for root, dirs, files in os.walk(srcDir): 384 for f in files: 385 src_path = Path(os.path.join(root, f)) 386 rel_path = src_path.relative_to(Path(srcDir)) 387 dst_path = Path(dstDir).joinpath(rel_path) 388 if dst_path.exists(): 389 if mode == 'keep': 390 pass 391 if mode == 'delete': 392 try: 393 send2trash(str(src_path)) 394 # os.remove(src_path) 395 print('{} is removed'.format(src_path)) 396 except: 397 print('{} cannot be removed'.format(src_path)) 398 pass 399 400 @staticmethod 401 def batchRemoveTheSameFileByMD5(srcKeepDir, srcCompareDirs=[]): 402 zd = {} 403 src_keep_dir = Path(srcKeepDir) 404 for root, dirs, files in os.walk(src_keep_dir): 405 for f in files: 406 f_path = Path(root).joinpath(f) 407 img_md5 = PathBox.getFileMd5(f_path) 408 # img_md5 = PathBox.getImageMd5(f_path) 409 if img_md5: 410 if not img_md5 in zd.keys(): 411 zd[img_md5] = f_path 412 else: 413 send2trash(str(f_path)) 414 print('{} is removed'.format(f_path)) 415 416 if srcCompareDirs: 417 for folder in srcCompareDirs: 418 src_compare_dir = Path(folder) 419 for root, dirs, files in os.walk(src_compare_dir): 420 for f in files: 421 f_path = Path(root).joinpath(f) 422 img_md5 = PathBox.getFileMd5(f_path) 423 if img_md5: 424 if not img_md5 in zd.keys(): 425 zd[img_md5] = f_path 426 else: 427 src_path = f_path 428 # dst_path=os.path.join(dstDir,src_path.name) 429 send2trash(str(src_path)) 430 # os.remove(src_path) 431 # shutil.move(src_path,dst_path) 432 print('{} is removed'.format(src_path)) 433 434 435 if __name__ == '__main__': 436 # ----------------extract pics from docx-------------------- 437 # srcDir = r'D:\Civil\32109012\_工具包\_参考资料\_桥梁检测报告' 438 # dstDir = r'D:\Civil\extract' 439 # zipDir = r'D:\Civil\zip' 440 # PathBox.batchExtractPicsFromDocs(srcDir, dstDir, zipDir) 441 # PathBox.batchMoveFilesToOneFolder(zipDir, dstDir, fexts=['.jpg', '.png', '.emf', '.jpeg']) 442 443 # ----------------bacth move files via fexts-------------------- 444 # PathBox.batchMoveFilesToOneFolder(srcDir, fexts=['.doc']) 445 # srcDir = r'D:\Civil\32109012\_工具包\_softSmall' 446 # PathBox.batchMoveFilesToOneFolder(srcDir) 447 448 # ----------------bacth exclue the same file in dirs-------------------- 449 srcKeepDir = r'D:\_soft' 450 # srcCompareDirs = [r'D:\test'] 451 PathBox.batchRemoveTheSameFileByMD5(srcKeepDir) 452 # PathBox.batchRemoveTheSameFileByMD5(srcKeepDir, srcCompareDirs) 453 454 # ----------------bacth resize the images in dirs-------------------- 455 # srcDir = r'D:\BaiduNetdiskDownload\温州东瓯DAQIAO' 456 # # dstDir = r'D:\衢州报告-resize' 457 # dstDir = PathBox.batchResizePics(srcDir) 458 # PathBox.compareDirsDeleteTheSameFile(srcDir, dstDir, mode='delete') 459 # PathBox.rmEmptyDirs(srcDir) 460 461 # ----------------bacth sync the files between two dirs---------------- 462 # srcDir = r'C:\Users\Administrator\Documents\debug\wolf' 463 # dstDir = r'C:\Users\Administrator\Documents\debug\_待整理' 464 # PathBox.syncFiles(srcDir, dstDir) 465 # PathBox.compareDirsDeleteTheSameFile(srcDir, dstDir, mode='delete') 466 # PathBox.rmEmptyDirs(srcDir) 467 468 # ----------------bacth sync the files between two dirs---------------- 469 # srcDir = r'D:\衢州报告' 470 # PathBox.excludeFilesByName(srcDir)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:基于图像分类模型对图像进行分类
· go语言实现终端里的倒计时
· 如何编写易于单元测试的代码
· 10年+ .NET Coder 心语,封装的思维:从隐藏、稳定开始理解其本质意义
· .NET Core 中如何实现缓存的预热?
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现