Python正则表达式,统计分析nginx访问日志
目标:
1.正则表达式
2.oop编程,统计nginx访问日志中不同IP地址出现的次数并排序
1.正则表达式
#!/usr/bin/env python # -*- coding: utf-8 -*- import re # match # 方法一 pattern1 = re.compile(r'hello', re.I) match = pattern1.match('Hello World') if match: print match.group() # 方法二 m = re.match(r'hello', 'hello world.') print m.group() # search pattern1 = re.compile(r'World') match = pattern1.search('Hello, hello World.') if match: print match.group() # split pattern1 = re.compile(r'\d+') match = pattern1.split('one1two2three3') print match for i in match: print i # findall match = pattern1.findall('one1two2three3') print match # finditer match = pattern1.finditer('one1two2three3') for i in match: print i.group()
•运行代码,测试效果
2.oop编程,统计nginx访问日志中不同IP地址出现的次数并排序
#!/usr/bin/env python # -*- coding: utf-8 -*- import re class CountPatt(object): def __init__(self, patt): self.patt = re.compile(patt) self.result = {} def count_patt(self, fname): with open(fname) as fobj: for line in fobj: match = self.patt.search(line) if match: key = match.group() self.result[key] = self.result.get(key, 0) + 1 return self.result def sort(self): result = [] alist = self.result.items() for i in xrange(len(alist)): greater = alist[0] for item in alist[1:]: if greater[1] < item[1]: greater = item result.append(greater) alist.remove(greater) return result if __name__ == "__main__": httpd_log = '/tmp/access.log' ip_pattern = r'^(\d+\.){3}\d+' browser_pattern = r'Chrome|Safari|Firefox' a = CountPatt(ip_pattern) print a.count_patt(httpd_log) print a.sort()
•运行代码,测试效果
handetiandeMacBook-Pro:test xkops$ python test2.py {'192.168.207.21': 25, '192.168.80.165': 20, '192.168.207.1': 46, '127.0.0.1': 10} [('192.168.207.1', 46), ('192.168.207.21', 25), ('192.168.80.165', 20), ('127.0.0.1', 10)]