python查找并删除相同文件-UNIQ File-wxPython版本
今天用wxPython做了一个GUI程序,我称之为UNIQ File,实现查找指定目录内的相同文件,主要原理是计算文件的md5值(计算前先找出文件大小相同的文件,然后计算这些文件的md5值,而不是所有文件都计算,大大减少了md5的计算量),加入了多线程功能。
运行该程序需要安装wxPython。
界面图
源代码:
UNIQFile-wxPython.py
1 # -*- coding: gbk -*- 2 3 ''' 4 Author:@DoNotSpyOnMe 5 Blog: http://www.cnblogs.com/aaronhoo 6 ''' 7 8 import wx 9 import hashlib 10 import os 11 import threading 12 13 class WorkerThread(threading.Thread): 14 def __init__(self, frame,dir,operation,msg): 15 """初始化工作线程: 把主窗口传进来""" 16 threading.Thread.__init__(self) 17 self.frame = frame 18 self.dir=dir 19 self.operation=operation 20 self.msg=msg 21 self.setDaemon(True)#设置子线程随UI主线程结束而结束 22 self.start() 23 24 #---------------------------------------------------------------------- 25 def run(self): 26 """执行工作线程""" 27 self.frame.SetButtons('operating') 28 try: 29 if self.operation=='list': 30 self.listSameFile(self.dir) 31 self.frame.btnList.Enable() 32 elif self.operation=='remove': 33 self.removeSameFile(self.dir) 34 self.frame.btnRemove.Enable() 35 except: 36 pass 37 finally: 38 self.frame.SetButtons('completed') 39 # 40 # def stop(self): 41 # self.keepRunning=False 42 def appendMsg(self,msg): 43 if self.frame: 44 #以下方式可以实现终端式的刷新:自动滚动到最新行 45 self.frame.txtContent.AppendText(msg+'\n') 46 #废弃的方式 47 # currentMsg=self.frame.txtContent.GetValue() 48 # currentMsg=currentMsg+'\n'+msg 49 # self.frame.txtContent.SetValue(currentMsg) 50 51 def getFileSize(self,filePath): 52 return os.path.getsize(filePath) 53 54 ''' 一般文件的md5计算方法,一次读取文件的全部内容''' 55 def CalcMD5(self,filepath): 56 with open(filepath,'rb') as f: 57 md5obj = hashlib.md5() 58 md5obj.update(f.read()) 59 hash = md5obj.hexdigest() 60 return hash 61 '''大文件计算md5的方法,分批读取文件内容,防止内存爆掉''' 62 def GetFileMd5(self,filename): 63 if not os.path.isfile(filename): 64 return 65 myhash = hashlib.md5() 66 f = open(filename,'rb') 67 while True: 68 b = f.read(8*1024) 69 if not b : 70 break 71 myhash.update(b) 72 f.close() 73 return myhash.hexdigest() 74 75 def GetAllFiles(self,directory): 76 files=[] 77 for dirpath, dirnames,filenames in os.walk(directory): 78 if filenames!=[]: 79 for file in filenames: 80 files.append(dirpath+'\\'+file)
files.sort(key=len)#按照文件名的长度排序 81 return files 82 83 def findSameSizeFiles(self,files): 84 dicSize={} 85 for f in files: 86 size=self.getFileSize(f) 87 if not dicSize.has_key(size): 88 dicSize[size]=f 89 else: 90 dicSize[size]=dicSize[size]+';'+f 91 dicCopy=dicSize.copy() 92 for k in dicSize.iterkeys(): 93 if dicSize[k].find(';')==-1: 94 dicCopy.pop(k) 95 del dicSize 96 return dicCopy 97 98 def findSameMD5Files(self,files): 99 dicMD5={} 100 for f in files: 101 self.appendMsg('calculating the md5 value of file %s'%f) 102 md5=self.GetFileMd5(f) 103 if not dicMD5.has_key(md5): 104 dicMD5[md5]=f 105 else: 106 dicMD5[md5]=dicMD5[md5]+';'+f 107 dicCopy=dicMD5.copy() 108 for k in dicMD5.iterkeys(): 109 if dicMD5[k].find(';')==-1: 110 dicCopy.pop(k) 111 del dicMD5 112 return dicCopy 113 114 def removeSameFile(self,mydir): 115 msg='' 116 msgUniq='Congratulations,no file is removed since they are all uniq.' 117 try: 118 existsFlag=False 119 files=self.GetAllFiles(mydir) 120 self.appendMsg('%s files found in directory %s\n'%(len(files),mydir)) 121 dicFileOfSameSize=self.findSameSizeFiles(files) 122 if dicFileOfSameSize=={}: 123 self.appendMsg(msgUniq) 124 return 125 else: 126 #list the duplicated files first: 127 dicFiltered={} 128 for k in dicFileOfSameSize.iterkeys(): 129 filesOfSameSize=dicFileOfSameSize[k].split(';') 130 dicSameMD5file=self.findSameMD5Files(filesOfSameSize) 131 if dicSameMD5file!={}: 132 existsFlag=True 133 for k in dicSameMD5file.iterkeys(): 134 msg=msg+'md5 %s: %s'%(k,dicSameMD5file[k])+'\n' 135 dicFiltered[k]=dicSameMD5file[k] 136 if not existsFlag: 137 msg=msgUniq 138 return 139 else: 140 msg='Duplicated files:\n'+msg+'\n' 141 #then remove the duplicated files: 142 removeCount=0 143 for k in dicFiltered.iterkeys(): 144 sameFiles=dicFiltered[k].split(';') 145 flagRemove=False 146 for f in sameFiles: 147 if not flagRemove: 148 flagRemove=True 149 else: 150 msg=msg+'Removing file: %s'%f+'\n' 151 os.remove(f) 152 removeCount=removeCount+1 153 msg=msg+'%s files are removed.\n'%removeCount 154 except Exception,e: 155 # print e 156 msg='Exception occured.' 157 finally: 158 self.appendMsg(msg+'\n'+'Operation finished.') 159 160 161 def listSameFile(self,mydir): 162 msg='' 163 msgUniq='Congratulations,all files are uniq.' 164 try: 165 existsFlag=False 166 files=self.GetAllFiles(mydir) 167 self.appendMsg('%s files found in directory %s\n'%(len(files),mydir)) 168 dicFileOfSameSize=self.findSameSizeFiles(files) 169 if dicFileOfSameSize=={}: 170 self.appendMsg(msgUniq) 171 return 172 else: 173 for k in dicFileOfSameSize.iterkeys(): 174 filesOfSameSize=dicFileOfSameSize[k].split(';') 175 dicSameMD5file=self.findSameMD5Files(filesOfSameSize) 176 if dicSameMD5file!={}: 177 existsFlag=True 178 for k in dicSameMD5file.iterkeys(): 179 msg=msg+'md5 %s: %s'%(k,dicSameMD5file[k])+'\n' 180 if not existsFlag: 181 msg=msgUniq 182 else: 183 msg='Duplicated files:\n'+msg 184 except Exception,e: 185 # print e 186 msg='Exception occured.' 187 finally: 188 self.appendMsg(msg+'\n'+'Operation finished.') 189 190 191 class MyFrame(wx.Frame): 192 def __init__(self): 193 super(MyFrame,self).__init__(None,title='UNIQ File-wxPython',size=(780,450)) 194 pan=wx.Panel(self) 195 self.lblDir=wx.StaticText(pan,-1,'Dir:',style=wx.ALIGN_LEFT) 196 self.txtFile=wx.TextCtrl(pan,size=(380,30)) 197 # self.txtFile.Disable() 198 self.btnOpen=wx.Button(pan,label='Pick Directory') 199 self.btnOpen.Bind(wx.EVT_BUTTON, self.BtnOpenHandler) 200 self.btnList=wx.Button(pan,label='Find Same') 201 self.btnList.Bind(wx.EVT_BUTTON, self.BtnListHandler) 202 self.btnRemove=wx.Button(pan,label='Remove duplicated') 203 self.btnRemove.Bind(wx.EVT_BUTTON, self.BtnRemoveHandler) 204 # self.btnStop=wx.Button(pan,label='Stop') 205 # self.btnStop.Bind(wx.EVT_BUTTON, self.BtnStopHandler) 206 207 hbox=wx.BoxSizer() 208 hbox.Add(self.lblDir,proportion=0,flag=wx.LEFT,border=5) 209 hbox.Add(self.txtFile,proportion=0,flag=wx.LEFT,border=5) 210 hbox.Add(self.btnOpen,proportion=0,flag=wx.LEFT,border=5) 211 hbox.Add(self.btnList,proportion=0,flag=wx.LEFT,border=5) 212 hbox.Add(self.btnRemove,proportion=0,flag=wx.LEFT,border=5) 213 # hbox.Add(self.btnStop,proportion=0,flag=wx.LEFT,border=5) 214 215 self.txtContent=wx.TextCtrl(pan,style=wx.TE_MULTILINE|wx.HSCROLL) 216 vbox=wx.BoxSizer(wx.VERTICAL) 217 vbox.Add(hbox,proportion=0,flag=wx.EXPAND|wx.ALL,border=5) 218 vbox.Add(self.txtContent,proportion=1,flag=wx.EXPAND,border=5) 219 pan.SetSizer(vbox) 220 # self.SetButtons('init') 221 222 def BtnOpenHandler(self,event): 223 dlg = wx.DirDialog(None,u"选择文件夹",style=wx.DD_DEFAULT_STYLE) 224 if dlg.ShowModal() == wx.ID_OK: 225 dlg.Destroy() 226 if dlg.GetPath(): 227 self.dirSelected=dlg.GetPath() #文件夹路径 228 self.txtFile.SetValue(self.dirSelected) 229 230 self.SetButtons('selected') 231 self.txtContent.SetValue('Selected dirctory: %s\n'%self.dirSelected) 232 233 def BtnListHandler(self,event): 234 if not self.txtFile.GetValue() or not os.path.isdir(self.txtFile.GetValue()): 235 wx.MessageBox('please select a valid directory first.','Tip Message',wx.YES_DEFAULT|wx.ICON_INFORMATION) 236 return 237 self.dirSelected=self.txtFile.GetValue() 238 self.txtContent.SetValue('') 239 msg='Listing same files in %s\n'%self.dirSelected 240 self.txtContent.SetValue(msg) 241 workThread=WorkerThread(self,self.dirSelected,'list',msg) 242 243 def BtnRemoveHandler(self,event): 244 if not self.txtFile.GetValue() or not os.path.isdir(self.txtFile.GetValue()): 245 wx.MessageBox('please select a valid directory first.','Tip Message',wx.YES_DEFAULT|wx.ICON_INFORMATION) 246 return 247 self.dirSelected=self.txtFile.GetValue() 248 self.txtContent.SetValue('') 249 msg='Removing duplicated files in %s\n'%self.dirSelected 250 self.txtContent.SetValue(msg) 251 WorkerThread(self,self.dirSelected,'remove',msg) 252 253 def BtnStopHandler(self,event): 254 pass 255 256 def SetButtons(self,status): 257 if status=='init': 258 self.btnOpen.Enable() 259 self.btnList.Disable() 260 self.btnRemove.Disable() 261 # self.btnStop.Disable() 262 elif status=='operating': 263 self.btnOpen.Disable() 264 self.btnList.Disable() 265 self.btnRemove.Disable() 266 # self.btnStop.Enable() 267 elif status=='completed': 268 self.btnOpen.Enable() 269 self.btnList.Enable() 270 self.btnRemove.Enable() 271 # self.btnStop.Disable() 272 elif status=='selected': 273 self.btnOpen.Enable() 274 self.btnList.Enable() 275 self.btnRemove.Enable() 276 # self.btnStop.Disable() 277 278 if __name__=="__main__": 279 app=wx.App() 280 MyFrame().Show() 281 app.MainLoop()