python 用正则处理日志实例

 

前提
    了解正则基本语法
 
 
 1 import re
 2 with open('top10_xiaozhuang_net.log','r') as f1:    #读取日志文件
 3  
 4     subject=f1.readlines()
 5     with open('slice_log.log','w') as f2:      #将切割结果存储到slice_log.log
 6         for line in subject:
 7  #line:
 8 2019-04-15 00:00:00 192.168.254.253 info LinkProof: 14/04/2019 22:51:53 14/04/2019 22:52:48 114. 80.179.132 210. 29.144.  1  211.65.207.189    UDP 17224    53 0.0.0.0   OTHER         84,
 9 14/04/2019 22:51:53 14/04/2019 22:52:48 120.221.144.117 210. 29.144.  1  211.65.207.189    UDP 38883    53 0.0.0.0   OTHER         80,
10 14/04/2019 22:51:53 14/04/2019 22:52:48 112. 47. 12.154 210. 29.144.  1  211.65.207.189    UDP 34323    53 0.0.0.0   OTHER         76,
11             #将log切块,使得结果成为结构统一的块
12             result = re.split(
13                 #用问号和"...LinkProof"和","来切
14                 r""".*LinkProof:\s+|\,
15                 """,
16                 line.strip('\n'), 0, re.VERBOSE)
17                 
18  
19  
20             #result : ['', '14/04/2019 22:51:53 14/04/2019 22:52:48 120.221.145.  4 210. 29.144.  1  211.65.207.189    UDP 64777    53 0.0.0.0   OTHER        305','...','']  lenth = 9
21             #用切片去除头尾的空
22             for block in result[1:8]:
23                 f2.write(block+'\n')
24 #将日期和时间分开取,正则表达式更简单,效率会更高
25 date1 = r"\S*"  #反取,取不为空格的
26 time1 = r"\S*"
27 date2 = r"\S*"
28 time2 = r"\S*"
29 # time1 = r"\d{2}/\d{2}/\d{4}\s+(?:\d+\:){2}\d{2}"
30  
31 #取IP,因为存在IP里存在空格,所以用相对复杂的正则保证每次取到
32 ip1 = r"(?:\d{1,3}\.\s*){3}\d{1,3}"
33 ip2 = r"(?:\d{1,3}\.\s*){3}\d{1,3}"
34 ip3 = r"(?:\d{1,3}\.\s*){3}\d{1,3}"
35 protocal = r"\w{3}"
36 sizelike = r"\d*"
37 portlike = r"\d*"
38 ip4 = r"\S*"
39 type = r"\w*"
40 num = r"\d*"
41 #正则预编译
42 log_pattern = re.compile(r"(%s)\s+(%s)\s+(%s)\s+(%s)\s+(%s)\s+(%s)\s+(%s)\s+(%s)\s+(%s)\s+(%s)\s+(%s)\s+(%s)\s+(%s)" \
43                          %(date1,time1,date2,time2,ip1,ip2,ip3,protocal,sizelike,portlike,ip4,type,num),re.VERBOSE)
44 l = []
45 with open('slice_log.log','r') as f2:  #
46         lines = f2.readlines()
47  
48         for line in lines:
49             dic = {}
50             line_matchs = log_pattern.match(line)
51             if line_matchs != None:
52                 all_groups = line_matchs.groups() 
53                 dic["date1"] = all_groups[0]+" "+all_groups[1]
54                 dic["date2"] = all_groups[2]+" "+all_groups[3]
55                 
56                 #去掉IP里的空格
57                 dic["ip1"] = all_groups[4].replace(" ","")
58                 dic["ip2"] = all_groups[5].replace(" ","")
59                 dic["ip3"] = all_groups[6].replace(" ","")
60                 
61                 dic["protocal"] = all_groups[7]
62                 dic["sizelike"] = all_groups[8]
63                 dic["portlike"] = all_groups[9]
64                 dic["ip4"] = all_groups[10].replace(" ", "")
65                 dic["type"] = all_groups[11]
66                 dic["num"] = all_groups[12]
67  
68                 l.append(dic)
69                 # print((all_groups))
70                
71  
72 for item in l:
73     print(item)
74  

 

得到的结果:
 
 
可再参考文章:
posted @ 2019-04-19 17:33  夏天换上冬装  阅读(744)  评论(0编辑  收藏  举报