python查找并删除相同文件-UNIQ File-wxPython版本

今天用wxPython做了一个GUI程序,我称之为UNIQ File,实现查找指定目录内的相同文件,主要原理是计算文件的md5值(计算前先找出文件大小相同的文件,然后计算这些文件的md5值,而不是所有文件都计算,大大减少了md5的计算量),加入了多线程功能。

运行该程序需要安装wxPython。

界面图

源代码:

UNIQFile-wxPython.py

  1 # -*- coding: gbk -*-
  2 
  3 '''
  4 Author:@DoNotSpyOnMe
  5 Blog: http://www.cnblogs.com/aaronhoo
  6 '''
  7 
  8 import wx
  9 import hashlib
 10 import os
 11 import threading
 12 
 13 class WorkerThread(threading.Thread):
 14     def __init__(self, frame,dir,operation,msg):
 15         """初始化工作线程: 把主窗口传进来"""
 16         threading.Thread.__init__(self)
 17         self.frame = frame
 18         self.dir=dir
 19         self.operation=operation
 20         self.msg=msg
 21         self.setDaemon(True)#设置子线程随UI主线程结束而结束
 22         self.start() 
 23 
 24     #----------------------------------------------------------------------
 25     def run(self):
 26         """执行工作线程"""
 27         self.frame.SetButtons('operating')
 28         try:
 29             if self.operation=='list':
 30                 self.listSameFile(self.dir)
 31                 self.frame.btnList.Enable()
 32             elif self.operation=='remove':
 33                 self.removeSameFile(self.dir)
 34                 self.frame.btnRemove.Enable()            
 35         except:
 36             pass
 37         finally:
 38             self.frame.SetButtons('completed')
 39 #         
 40 #     def stop(self):
 41 #         self.keepRunning=False
 42     def appendMsg(self,msg):
 43         if self.frame:
 44             #以下方式可以实现终端式的刷新:自动滚动到最新行
 45             self.frame.txtContent.AppendText(msg+'\n')
 46             #废弃的方式
 47 #             currentMsg=self.frame.txtContent.GetValue()
 48 #             currentMsg=currentMsg+'\n'+msg
 49 #             self.frame.txtContent.SetValue(currentMsg)
 50             
 51     def getFileSize(self,filePath):
 52         return os.path.getsize(filePath)        
 53     
 54     ''' 一般文件的md5计算方法,一次读取文件的全部内容'''           
 55     def CalcMD5(self,filepath):
 56         with open(filepath,'rb') as f:
 57             md5obj = hashlib.md5()
 58             md5obj.update(f.read())
 59             hash = md5obj.hexdigest()
 60             return hash    
 61     '''大文件计算md5的方法,分批读取文件内容,防止内存爆掉'''    
 62     def GetFileMd5(self,filename):
 63         if not os.path.isfile(filename):
 64             return
 65         myhash = hashlib.md5()
 66         f = open(filename,'rb')
 67         while True:
 68             b = f.read(8*1024)
 69             if not b :
 70                 break
 71             myhash.update(b)
 72         f.close()
 73         return myhash.hexdigest()
 74     
 75     def GetAllFiles(self,directory):
 76         files=[]
 77         for dirpath, dirnames,filenames in os.walk(directory):
 78             if filenames!=[]:
 79                 for file in filenames:
 80                     files.append(dirpath+'\\'+file)
       files.sort(key=len)#按照文件名的长度排序 
81 return files 82 83 def findSameSizeFiles(self,files): 84 dicSize={} 85 for f in files: 86 size=self.getFileSize(f) 87 if not dicSize.has_key(size): 88 dicSize[size]=f 89 else: 90 dicSize[size]=dicSize[size]+';'+f 91 dicCopy=dicSize.copy() 92 for k in dicSize.iterkeys(): 93 if dicSize[k].find(';')==-1: 94 dicCopy.pop(k) 95 del dicSize 96 return dicCopy 97 98 def findSameMD5Files(self,files): 99 dicMD5={} 100 for f in files: 101 self.appendMsg('calculating the md5 value of file %s'%f) 102 md5=self.GetFileMd5(f) 103 if not dicMD5.has_key(md5): 104 dicMD5[md5]=f 105 else: 106 dicMD5[md5]=dicMD5[md5]+';'+f 107 dicCopy=dicMD5.copy() 108 for k in dicMD5.iterkeys(): 109 if dicMD5[k].find(';')==-1: 110 dicCopy.pop(k) 111 del dicMD5 112 return dicCopy 113 114 def removeSameFile(self,mydir): 115 msg='' 116 msgUniq='Congratulations,no file is removed since they are all uniq.' 117 try: 118 existsFlag=False 119 files=self.GetAllFiles(mydir) 120 self.appendMsg('%s files found in directory %s\n'%(len(files),mydir)) 121 dicFileOfSameSize=self.findSameSizeFiles(files) 122 if dicFileOfSameSize=={}: 123 self.appendMsg(msgUniq) 124 return 125 else: 126 #list the duplicated files first: 127 dicFiltered={} 128 for k in dicFileOfSameSize.iterkeys(): 129 filesOfSameSize=dicFileOfSameSize[k].split(';') 130 dicSameMD5file=self.findSameMD5Files(filesOfSameSize) 131 if dicSameMD5file!={}: 132 existsFlag=True 133 for k in dicSameMD5file.iterkeys(): 134 msg=msg+'md5 %s: %s'%(k,dicSameMD5file[k])+'\n' 135 dicFiltered[k]=dicSameMD5file[k] 136 if not existsFlag: 137 msg=msgUniq 138 return 139 else: 140 msg='Duplicated files:\n'+msg+'\n' 141 #then remove the duplicated files: 142 removeCount=0 143 for k in dicFiltered.iterkeys(): 144 sameFiles=dicFiltered[k].split(';') 145 flagRemove=False 146 for f in sameFiles: 147 if not flagRemove: 148 flagRemove=True 149 else: 150 msg=msg+'Removing file: %s'%f+'\n' 151 os.remove(f) 152 removeCount=removeCount+1 153 msg=msg+'%s files are removed.\n'%removeCount 154 except Exception,e: 155 # print e 156 msg='Exception occured.' 157 finally: 158 self.appendMsg(msg+'\n'+'Operation finished.') 159 160 161 def listSameFile(self,mydir): 162 msg='' 163 msgUniq='Congratulations,all files are uniq.' 164 try: 165 existsFlag=False 166 files=self.GetAllFiles(mydir) 167 self.appendMsg('%s files found in directory %s\n'%(len(files),mydir)) 168 dicFileOfSameSize=self.findSameSizeFiles(files) 169 if dicFileOfSameSize=={}: 170 self.appendMsg(msgUniq) 171 return 172 else: 173 for k in dicFileOfSameSize.iterkeys(): 174 filesOfSameSize=dicFileOfSameSize[k].split(';') 175 dicSameMD5file=self.findSameMD5Files(filesOfSameSize) 176 if dicSameMD5file!={}: 177 existsFlag=True 178 for k in dicSameMD5file.iterkeys(): 179 msg=msg+'md5 %s: %s'%(k,dicSameMD5file[k])+'\n' 180 if not existsFlag: 181 msg=msgUniq 182 else: 183 msg='Duplicated files:\n'+msg 184 except Exception,e: 185 # print e 186 msg='Exception occured.' 187 finally: 188 self.appendMsg(msg+'\n'+'Operation finished.') 189 190 191 class MyFrame(wx.Frame): 192 def __init__(self): 193 super(MyFrame,self).__init__(None,title='UNIQ File-wxPython',size=(780,450)) 194 pan=wx.Panel(self) 195 self.lblDir=wx.StaticText(pan,-1,'Dir:',style=wx.ALIGN_LEFT) 196 self.txtFile=wx.TextCtrl(pan,size=(380,30)) 197 # self.txtFile.Disable() 198 self.btnOpen=wx.Button(pan,label='Pick Directory') 199 self.btnOpen.Bind(wx.EVT_BUTTON, self.BtnOpenHandler) 200 self.btnList=wx.Button(pan,label='Find Same') 201 self.btnList.Bind(wx.EVT_BUTTON, self.BtnListHandler) 202 self.btnRemove=wx.Button(pan,label='Remove duplicated') 203 self.btnRemove.Bind(wx.EVT_BUTTON, self.BtnRemoveHandler) 204 # self.btnStop=wx.Button(pan,label='Stop') 205 # self.btnStop.Bind(wx.EVT_BUTTON, self.BtnStopHandler) 206 207 hbox=wx.BoxSizer() 208 hbox.Add(self.lblDir,proportion=0,flag=wx.LEFT,border=5) 209 hbox.Add(self.txtFile,proportion=0,flag=wx.LEFT,border=5) 210 hbox.Add(self.btnOpen,proportion=0,flag=wx.LEFT,border=5) 211 hbox.Add(self.btnList,proportion=0,flag=wx.LEFT,border=5) 212 hbox.Add(self.btnRemove,proportion=0,flag=wx.LEFT,border=5) 213 # hbox.Add(self.btnStop,proportion=0,flag=wx.LEFT,border=5) 214 215 self.txtContent=wx.TextCtrl(pan,style=wx.TE_MULTILINE|wx.HSCROLL) 216 vbox=wx.BoxSizer(wx.VERTICAL) 217 vbox.Add(hbox,proportion=0,flag=wx.EXPAND|wx.ALL,border=5) 218 vbox.Add(self.txtContent,proportion=1,flag=wx.EXPAND,border=5) 219 pan.SetSizer(vbox) 220 # self.SetButtons('init') 221 222 def BtnOpenHandler(self,event): 223 dlg = wx.DirDialog(None,u"选择文件夹",style=wx.DD_DEFAULT_STYLE) 224 if dlg.ShowModal() == wx.ID_OK: 225 dlg.Destroy() 226 if dlg.GetPath(): 227 self.dirSelected=dlg.GetPath() #文件夹路径 228 self.txtFile.SetValue(self.dirSelected) 229 230 self.SetButtons('selected') 231 self.txtContent.SetValue('Selected dirctory: %s\n'%self.dirSelected) 232 233 def BtnListHandler(self,event): 234 if not self.txtFile.GetValue() or not os.path.isdir(self.txtFile.GetValue()): 235 wx.MessageBox('please select a valid directory first.','Tip Message',wx.YES_DEFAULT|wx.ICON_INFORMATION) 236 return 237 self.dirSelected=self.txtFile.GetValue() 238 self.txtContent.SetValue('') 239 msg='Listing same files in %s\n'%self.dirSelected 240 self.txtContent.SetValue(msg) 241 workThread=WorkerThread(self,self.dirSelected,'list',msg) 242 243 def BtnRemoveHandler(self,event): 244 if not self.txtFile.GetValue() or not os.path.isdir(self.txtFile.GetValue()): 245 wx.MessageBox('please select a valid directory first.','Tip Message',wx.YES_DEFAULT|wx.ICON_INFORMATION) 246 return 247 self.dirSelected=self.txtFile.GetValue() 248 self.txtContent.SetValue('') 249 msg='Removing duplicated files in %s\n'%self.dirSelected 250 self.txtContent.SetValue(msg) 251 WorkerThread(self,self.dirSelected,'remove',msg) 252 253 def BtnStopHandler(self,event): 254 pass 255 256 def SetButtons(self,status): 257 if status=='init': 258 self.btnOpen.Enable() 259 self.btnList.Disable() 260 self.btnRemove.Disable() 261 # self.btnStop.Disable() 262 elif status=='operating': 263 self.btnOpen.Disable() 264 self.btnList.Disable() 265 self.btnRemove.Disable() 266 # self.btnStop.Enable() 267 elif status=='completed': 268 self.btnOpen.Enable() 269 self.btnList.Enable() 270 self.btnRemove.Enable() 271 # self.btnStop.Disable() 272 elif status=='selected': 273 self.btnOpen.Enable() 274 self.btnList.Enable() 275 self.btnRemove.Enable() 276 # self.btnStop.Disable() 277 278 if __name__=="__main__": 279 app=wx.App() 280 MyFrame().Show() 281 app.MainLoop()

 

posted @ 2016-04-17 19:57  morein2008  阅读(731)  评论(0编辑  收藏  举报