#!/usr/bin/python3.5
import os
import sqlite3
from multiprocessing import Process,Queue
import time
##############################################################
DAY_SECONDS = 24*60*60
QUESIZE=1000
# c.execute('''CREATE TABLE memory (user VARCHAR(20), filename VARCHAR(100), size FLOAT, ctime TIMESTAMP, mtime TIMESTAMP, atime TIMESTAMP)''')
def checkFile(file, user, que):
statinfo=os.stat(file)
que.put((user, file, statinfo))
def listDir(dir, user, que):
for file in os.listdir(dir):
path=os.path.join(dir,file)
if(os.path.isfile(path)):
checkFile(path, user, que)
elif(os.path.isdir(path)):
listDir(path, user, que)
#else:
# print("error:",path)
def checkPersonFile(dir, user, que):
if(os.path.exists('log')==False):
os.mkdir('log')
logfile = "log/" + "check_file.log"
f = open(logfile,'a')
msg = 'checking %s ...' % user
print(msg)
f.write(msg+'\n')
listDir(dir,user,que)
msg = 'check %s complete' % user
print(msg)
f.write(msg+'\n')
def write_to_db(que,db):
conn = sqlite3.connect(db)
c = conn.cursor()
cnt = 0
while True:
# if que.empty() or que.full():
# conn.commit()
info = que.get()
if info is None:
break
user = info[0]
file = 'notused'
statinfo = info[2]
c.execute('INSERT INTO memory values (?,?,?,?,?,?)',
(user, file, statinfo.st_size, statinfo.st_ctime, statinfo.st_mtime, statinfo.st_atime))
#conn.commit()
++cnt
if cnt>QUESIZE-100:
cnt=0
conn.commit()
conn.commit()
print('end')
if __name__ == '__main__':
rootDir = "/train/blue/users/"
db="memory.db"
conn = sqlite3.connect(db)
c = conn.cursor()
c.execute('''CREATE TABLE if not exists memory (user VARCHAR(20), filename VARCHAR(100), size FLOAT, ctime TIMESTAMP, mtime TIMESTAMP, atime TIMESTAMP)''')
c.execute('''DELETE FROM memory''')
conn.commit()
processes = list()
que = Queue(QUESIZE)
pdb = Process(target=write_to_db, args=(que,db))
pdb.start()
#processes.append(pdb)
for filename in os.listdir(rootDir):
file = os.path.join(rootDir, filename)
if (os.path.isdir(file)):
user = os.path.basename(file)
p = Process(target=checkPersonFile,args=(file, user, que))
p.start()
processes.append(p)
for p in processes:
p.join()
que.put(None)
pdb.join()
print("check all file complete")
# analyze the data in db
# format
header_format = '%-*s%-*s'
format = '%-*s%-*.2f'
width = 80
user_width = (width / 2).__int__()
memory_width = (width - user_width).__int__()
if(os.path.exists('log')==False):
os.mkdir('log')
logfile = "log/" + "check_file.log"
f = open(logfile,'a')
t = time.strftime("%Y/%b/%d/%H:%M:%S",time.localtime(time.time()))
f.write('*'*80+'\n')
f.write(t+'\n')
head = 'total memory'
left = ((width - len(head)) / 2).__int__()
msg = '*' * left + head + '*' * left
print(msg)
f.write(msg+'\n')
result = c.execute('''SELECT user, sum(size)/(1024*1024) as total_size from memory GROUP BY user ORDER BY total_size DESC''')
for r in result:
msg = format % (user_width, r[0], memory_width, r[1])
# msg=' user:%s, memory:%f' % r
print(msg)
f.write(msg+'\n')
days = [2, 5, 15]
whens = ['ctime', 'mtime', 'atime']
for day in days:
for when in whens:
head = 'latest %d days by %s' % (day,when)
left = ((width - len(head)) / 2).__int__()
msg = '*' * left + head + '*' * left
print(msg)
f.write(msg + '\n')
last_when = time.time() - day * DAY_SECONDS
sql = '''SELECT user, sum(size)/(1024*1024) as total_size from memory where %s < %f GROUP BY user ORDER BY total_size DESC''' % \
(when, last_when)
result = c.execute(sql)
for r in result:
#msg = ' user:%s, memory:%f' % r
msg = format % (user_width, r[0], memory_width, r[1])
print(msg)
f.write(msg + '\n')
# bigger then 4 M
sizes = [4.0*1024*1024]
for size in sizes:
head = 'bigger then %fM total memory:' % (size,)
left = ((width - len(head)) / 2).__int__()
msg = '*' * left + head + '*' * left
print(msg)
f.write(msg + '\n')
sql = '''SELECT user, sum(size)/(1024*1024) as total_size from memory where size > %f GROUP BY user ORDER BY total_size DESC''' % \
(size)
result = c.execute(sql)
for r in result:
# msg = ' user:%s, memory:%f' % r
msg = format % (user_width, r[0], memory_width, r[1])
print(msg)
f.write(msg + '\n')
conn.close()
f.write('*' * 80 + '\n')
f.close()
print('finish')