简单日志处理

 1 import datetime
 2 import re
 3 logfile='''58.61.164.141 - - [22/Feb/2010:09:51:46 +0800] "GET /reference-and-source/weblog-format/ HTTP/1.1" 200 6326 "-" "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)"'''
 4 
 5 def resolve_log():
 6     _pattern = '''(?P<IP>[\d\.]{7,}) - - \[(?P<datetime>[^\[\]]+)\] "(?P<request>[^"]+)" (?P<status>\d+) (?P<size>\d+) "-" "(?P<useraAgent>[^"]+)"'''
 7     _regex = re.compile(_pattern, re.S)
 8 
 9     def _getHuman(size):
10         units = ["","K","M","G","T","p"]
11         deps = 0
12         size = int(size)
13         while(size >1000):
14             deps += 1
15             size = size // 1000
16         return str(size)+units[deps]
17 
18     ops = {
19         "datetime": lambda time:datetime.datetime.strptime(time,"%d/%b/%Y:%H:%M:%S %z"),
20         "size": _getHuman,
21         "status": int,
22         "request": lambda request:dict(zip(("method","url","protocal"),request.split()))
23     }
24 
25     def _extract(logfile):
26         matcher = _regex.match(logfile)
27         if matcher:
28             return {k: ops.get(k, lambda x: x)(v) for k, v in matcher.groupdict().items()}
29         else:
30             return None
31     return _extract
32 
33 """
34     测试
35 """
36 res = resolve_log()(logfile)
37 print(res)
38 
39 """
40     可以打开一个文件测试代码
41     open("www.log",mode="r",encoding="utf8")
42 """
43 
44 loged = []
45 with open("www.log",mode="rt",encoding="utf8") as f:
46     for line in f:
47         res = resolve_log()(line)
48         loged.append(res)
View Code

 

处理结果:

{'size': '6K', 'datetime': datetime.datetime(2010, 2, 22, 9, 51, 46, tzinfo=datetime.timezone(datetime.timedelta(0, 28800))), 'IP': '58.61.164.141', 'status': 200, 'request': {'url': '/reference-and-source/weblog-format/', 'protocal': 'HTTP/1.1', 'method': 'GET'}, 'useraAgent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'}