统计nginx日志里每五分钟的访问量
#!/usr/bin/env python # -*- coding:utf-8 -*- # Author:Random_lee import time import os import re class StatusPV(object): def __init__(self): self.log_path = '/opt/apache-tomcat-7.0.69/logs/' self.log_time = time.strftime("%Y-%m-%d") self.log_name = 'localhost_access_log.%s.txt'%(self.log_time) self.logfile = os.path.join(self.log_path, self.log_name) def get_filesize(self): file_size = os.path.getsize(self.logfile) f = open(self.logfile, 'r') if file_size > 1000000000: # 文件大小超过1G从中间读取 f.seek(0, 2) self.seek= f.tell()/5 else: self.seek=0 f.close() def count_pv(self): self.get_filesize() if not os.path.exists(self.logfile): print('error:' + self.logfile + ' not existed.') return 0 else: f = open(self.logfile, 'r') f.seek(self.seek,0) num = 0 for line in f: data = time.strftime('%d/%b/%Y:%H:', time.localtime()) if data in line: expr = re.compile('^(?P<RemoteIP>.*) - - (?P<datatime>.*) (?P<request>".+") (?P<status>\d{3}) (?P<web_size>\d{1,10})') # 日志格式 10.116.201.71 - - [02/Sep/2018:09:44:13 +0800] "POST /servlet/UpdateJavaFXServlet HTTP/1.0" 200 268 try: log_info = expr.search(line) log_info = log_info.groupdict() # 解析日志信息 datatime = log_info["datatime"] # 取出日志信息中的datatime datatime = datatime.replace('[', '') datatime = datatime.replace(']', '') # 去掉[] data_time = datatime.split(' ')[0] # 取出日期时间 time_zone = datatime.split(' ')[1] # 取出时区 if time_zone == '+0800': # print(time_zone) # print(data_time) ctime = time.strptime(data_time, '%d/%b/%Y:%H:%M:%S') # 转换为格式化时间 24/Aug/2018:15:42:08 time_stamp = time.mktime(ctime) # 转换为时间戳 # print(time_stamp) if time.time() - time_stamp <= 300: # 观测的时间间隔 num += 1 # print(datatime) else: # print("error data_time:%s"%datatime) pass else: print("log format error") except: pass else: pass f.close() print(num) if __name__ == '__main__': obj_StatusPV = StatusPV() obj_StatusPV.count_pv()