python 遍历 统计文件尺寸,列出最大25个

原文来自newsmth python版


#coding=utf-8
#找到当前目录前MACX(default:25)位占用空间最大的文件-多线程
import os,os.path
from ConfigParser import RawConfigParser as rcp
from threading import Thread,Lock
import sys
import time
MAXC=25

def insert(tenmax,filename,filesize):
    i=0
    while i<len(tenmax) and tenmax[i][1]>filesize:
        i+=1
    tenmax.insert(i,[filename,filesize])
    
def keepTenMax(tenmax,filename,filesize):
    i=len(tenmax)
    if i<MAXC:
        insert(tenmax,filename,filesize)
    elif filesize>tenmax[i-1][1]:
        insert(tenmax,filename,filesize)
        tenmax.pop()
    elif filesize==tenmax[i-1][1]:
        tenmax.append([filename,filesize])
        if len(tenmax)>MAXC:
            tenmax=tenmax[:MAXC]
    else:
        return

class MyThread(Thread):#多线程搜索
    def __init__(self,root,files,tname):
        Thread.__init__(self)
        self.root=root
        self.files=files
        self.name=tname
        self.tenmax=[]
    def run(self):
        global count
        for f in self.files:
            filename=self.root+os.sep+f
            filesize=os.stat(filename).st_size
            mylock.acquire() #获得锁
            count+=1
            mylock.release() #释放锁
            keepTenMax(self.tenmax,filename,filesize)
        
def allDone(threadlist):
    for i in threadlist:
        if i.isAlive():
            return False
    return True
        
#main
if __name__=="__main__":
    global count #全局 文件计数
    mylock=Lock() #define a lock 
    count=1
    tenmax=[]
    threadlist=[]
    if len(sys.argv)==1:
        wdir='.'
    elif len(sys.argv)==2:
        wdir=sys.argv[1]
    else:
        print 'usage:find--.py [wdir]'
        sys.exit()
    tname=1
    begin=time.time()
    for root,dirs,files in os.walk(wdir):
        if files:
            sthread=MyThread(root,files,tname)#对每一个目录开启一个线程搜索
            threadlist.append(sthread)
            sthread.start()
            print 'thread-'+str(tname)+'-start search dir:'+root
            tname+=1
    for t in threadlist:
        t.join()
    if allDone(threadlist):#统计结果
        for i in threadlist:
            tenmax.extend(i.tenmax)
    if len(tenmax)<MAXC:
        tenmax=tenmax[:len(tenmax)]
    else:
        a=[]
        for f in tenmax:
            keepTenMax(a,f[0],f[1])
        tenmax=a
    #打印并输出到parser文件-当前目录下的result.ini文件
    print
    print '[=========================threads count',len(threadlist),'====================]'
    print '[=========================try',count,'files=======================]'
    print '[=========================the ',MAXC,' thMax files list===========]'
    print
    c=1
    myrcp=rcp()
    myrcp.add_section('Result')
    for fname,fsize in tenmax:
        size='%.3fMB' % (fsize/1024.0/1024.0)
        print '[%d]%s-%s' % (c,fname,size)
        myrcp.set('Result','[%d]%s' % (c,fname),size)
        c+=1
    myrcp.write(open('result.ini','w'))
    end=time.time()
    usetime=end-begin
    print
    print '[=============================================================]'
    print 'all time:%.3fs' % usetime

这里用了多线程, 值得学习一下, 执行的速度是真快啊



posted on 2012-12-13 13:25  js.yeyong  阅读(230)  评论(0编辑  收藏  举报

导航