import hashlib
import os
import send2trash
import time
'''
根据MD5,查找一个目录下的相同文件,并且可以选择是否删除(只是删除到回收站)
'''
def getMD5(path):
f=open(path,'rb')
d5 = hashlib.md5() #生成一个hash的对象
with open(path,'rb') as f:
while True:
content = f.read(40960)
if not content:
break
d5.update(content) # 每次读取一部分,然后添加到hash对象里
# print('MD5 : %s' % d5.hexdigest())
return d5.hexdigest() # 打印16进制的hash值
def getSha512(path):
f = open(path, 'rb')
sh = hashlib.sha512()
with open(path,'rb') as f:
while True:
content = f.read(40960)
if not content:
break
sh.update(content)
# print(sh.hexdigest())
return sh.hexdigest()
# 装饰器,计算时间用的
def timer(func): # 高阶函数:以函数作为参数
def deco(*args,**kwargs): # 嵌套函数,在函数内部以 def 声明一个函数,接受 被装饰函数的所有参数
time1 = time.time()
func(*args,**kwargs)
time2 = time.time()
use_time = round(time2-time1,1)
print('Elapsed %ss' %(use_time))
return deco # 注意,返回的函数没有加括号!所以返回的是一个内存地址,而不是函数的返回值
@timer
def walk(path):
size = 0
x = input('Want to delete duplicate file? y/n\n')
if x.lower() == 'y':
delete = True
else:
delete = False
dict = {}
n = 1
for folder,subfolder,filenames in os.walk(path):
for filename in filenames:
print('\rHas scanned %s files' %n,end='')
root = os.path.join(folder,filename)
md5 = getMD5(root)
if md5 in dict.keys():
# sha1 = getSha512(root)
# sha2 = getSha512(dict[md5])
# if sha1 == sha2:
# 发送到回收站
if delete == True:
send2trash.send2trash(dict[md5])
print('\n%s\n%s\n' %(root,dict[md5]))
# 获取文件大小
fsize = os.path.getsize(root)
size += fsize
else:
pass
dict[md5] = root
n += 1
# 处理文件大小的单位 M
size = size/float(1024*1024)
size = round(size,2)
if delete == True:
print('\nSaved %sM space.' % size)
else:
print("\nWasted %sM sapce." %size)
if __name__ =="__main__":
x = input('Input path:')
walk(x)