使用python找出nginx访问日志中访问次数最多的10个ip排序生成网页
使用python找出nginx访问日志中访问次数最多的10个ip排序生成网页
方法1:
linux下使用awk命令
# cat access1.log | awk '{print $1" "$7" "$9}'|sort -n|uniq -c |sort -n -r|head -10
方法2:
通过python处理日志
#encoding=utf-8 # 找到日志中的top 10,日志格式如下 #txt = '''100.116.167.9 - - [22/Oct/2017:03:55:53 +0800] "HEAD /check HTTP/1.0" 200 0 "-" "-" "-" ut = 0.001''' #nodes = txt.split() #print 'ip:%s, url:%s, code:%s' % (nodes[0],nodes[6],nodes[8]) # 统计ip,url,code的次数,并且生成字典 def log_analysis(log_file, dpath, topn = 10): path=log_file shandle = open(path, 'r') count = 1 log_dict = {} while True: line = shandle.readline() if line == '': break #print line nodes = line.split() #count += 1 #if count >= 10: # break # {(ip,url,code):count}当做字典的key #print 'ip:%s, url:%s, code:%s' % (nodes[0],nodes[6],nodes[8]) # 拼凑字典,如果不存在赋值为1,如果存在则+1 ip,url,code = nodes[0],nodes[6],nodes[8] if (ip, url, code) not in log_dict: log_dict[(ip, url, code)] = 1 else: log_dict[(ip, url, code)] = log_dict[(ip, url, code)] + 1 # 关闭文件句柄 shandle.close() # 对字典进行排序 #print log_dict # ('111.37.21.148', '/index', '200'): 2 rst_list = log_dict.items() #print rst_list # for j in range(10): # 冒泡法根据rst_list中的count排序,找出访问量最大的10个IP for i in range(0,len(rst_list) - 1): if rst_list[i][1] > rst_list[i+1][1]: temp = rst_list[i] rst_list[i] = rst_list[i+1] rst_list[i+1] = temp need_list = rst_list[-1:-topn - 1:-1] # 打印出top 10访问日志,并写入网页中 title = 'nginx访问日志' tbody = '' for i in need_list: tbody += '<tr>\n<td>%s</td><td>%s</td><td>%s</td><td>%s</td>\n<tr>\n' % (i[1],i[0][0],i[0][1],i[0][2]) html_tpl = ''' <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title>{title}</title> </head> <body> <table border="1" cellspacing="0" cellpadding="0" color='pink'> <thead> <tr cellspacing="0" cellpadding="0"> <th>访问次数</th> <th>ip</th> <th>url</th> <th>http_code</th> </tr> </thead> {tbody} </table> </body> </html> ''' html_handle = open(dpath,'w') html_handle.write(html_tpl.format(title = title, tbody = tbody)) html_handle.close() # 函数入口 if __name__ == '__main__': # nginx日志文件 log_file = 'access1.log' dpath = 'top10.html' # topn 表示去top多少个 # 不传,默认10个 topn = 10 # log_analysis(log_file, dpath) log_analysis(log_file,dpath,topn)
方法2
# 统计nginx日志中的前十名 def static_file(file_name): res_dict = {} with open(file_name) as f: for line in f: if line == '\n': continue # ['100.116.x.x', '-', '-', '[08/Feb/2018:14:37:13', '+0800]', '"HEAD', # '/check', 'HTTP/1.0"', '200', '0', '"-"', '"-"', '"-"', 'ut', '=', '0.002'] tmp = line.split() # print(tmp) tup = (tmp[0],tmp[8]) # 赋值 res_dict[tup] = res_dict.get(tup,0) + 1 return res_dict def generate_html(rst_list): str_html = '<table border="1" cellpading=0 cellspacing=0>' str_html += "<tr><th>ip地址</th><th>状态码</th><th>次数</th></tr>" html_tmpl = '<tr><td>%s</td><td>%s</td><td>%s</td></tr>' for (ip, status),count in rst_list[-20:]: str_html += html_tmpl % (ip,status,count) str_html += '</table>' return str_html def write_to_html(html_list): with open('res.html', 'w') as f: f.write(html_list) def main(): res_dict = static_file('voice20180208.log') res_list = sorted(res_dict.items(), key = lambda x:x[1]) # html_content = generate_html(res_list[-10:]) html_content = generate_html(res_list[-1:-20:-1]) write_to_html(html_content) if __name__ == "__main__": main()