分析nginx日志脚本之python

为了对每个月的切割过的30个日志文件统计出访问最多的ip地址进行排序,整理了下面的脚本,主要思路是处理每一个日志文件的ip排序,最后进行字典合并,计算出月ip排序。
#!/usr/bin/env python

import os
import sys
import commands
import re


ipsort = {}
def IpSort(logfile):
ipadd = r'\.'.join([r'\d{1,3}']*4)
        re_ip = re.compile(ipadd)
        for line in open(logfile):
                match = re_ip.match(line)
                if match:
                        ip = match.group( )
                        ipsort[ip] = ipsort.get(ip, 0) + 1
#return ipsort
#logfile=ipsort
#print logfile,"\n",ipsort,"\n"
#ipnum[logfile] = ipsort
def ReadFile():

filedir = raw_input("Enter the path>").strip()
filelist = os.listdir(filedir)
filenum = 0
for line in filelist:
if line[-4:] == '.txt':
IpSort(line)
#s,r = commands.getstatusoutput("cat %s/%s |awk wc -l" %(filedir,line))


def mergeipnum(*ipns):
_keys = set(sum([ipn.keys() for ipn in ipns],[]))
#print _keys,
_ipnum_dic = {}
for _key in _keys:
#pass
#print _key
_ipnum_dic[_key] = sum([ipn.get(_key,0) for ipn in ipns])

print _ipnum_dic,


ReadFile()
mergeipnum(ipsort)

 

posted @ 2015-06-24 23:16  shantuwqk  阅读(426)  评论(0编辑  收藏  举报