Python: Log Analysis
with open(file) as f: for line in f: for field in line.split(): print(filed)
import re from collections import defaultdict valve = '''51.222.253.18 - - [02/Mar/2022:02:30:16 +0800] "GET /robots.txt HTTP/1.1" 301 169 "-" "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)" "-" ''' def valour(b: str): valor = list() flag = False tmp = '' for word in b.split(): if not flag and (word.startswith('[') or word.startswith('"')): if word.endswith(']') or word.endswith('"'): valor.append(word.strip('[]"')) continue flag = True tmp = word[1:] continue if flag: if word.endswith(']') or word.endswith('"'): tmp += f' {word[:-1]}' valor.append(tmp) flag = False continue else: tmp += f' {word}' continue valor.append(word) return valor print(valour(valve))
import re, datetime from collections import defaultdict valve = '''51.222.253.18 - - [02/Mar/2022:02:30:16 +0800] "GET /robots.txt HTTP/1.1" 301 169 "-" "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)" "-" ''' def normalize_time(timestr): # 02/Mar/2022:02:30:16 +0800 fmtstr = '%d/%b/%Y:%H:%M:%S %z' dt = datetime.datetime.strptime(timestr, fmtstr) return dt def normalize_request(request: str): return dict(zip(('method', 'url', 'protocol'), request.split())) names = ['remote', '', '', 'datetime', 'request', 'status', 'size', '', 'useragent'] dispose = [None, None, None, normalize_time, normalize_request, int, int, None, None] def valour(b: str): valor = [] vamp = '' flag = False for word in b.split(): if not flag: if word.startswith('[') or word.startswith('"'): if word.endswith(']') or word.endswith('"'): valor.append(word.strip('[]"')) else: flag = True vamp = word[1:] continue else: valor.append(word) else: if word.endswith(']') or word.endswith('"'): flag = False vamp += f' {word[:-1]}' valor.append(vamp) else: vamp += f' {word}' return valor vandal = valour(valve) vane = {} for i, name in enumerate(names): if name: if dispose[i]: vane[name] = dispose[i](vandal[i]) else: vane[name] = vandal[i] print(vandal) print(vane)
import datetime def normalize_time(time_str): return datetime.datetime.strptime(time_str, '%d/%b/%Y:%H:%M:%S %z')
def distill(line) -> dict: matcher = regex.match(line) if matcher: return {k: ops.get(k, lambda v: v)(v) for k, v in matcher.groupdict().items()} else: raise Exception('Not Match')
import re, datetime valve = '''51.222.253.18 - - [02/Mar/2022:02:30:16 +0800] "GET /robots.txt HTTP/1.1" 301 169 "-" "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)" "-" ''' pattern = r'(?P<remote>[\d\.]{7,}) - - \[(?P<datetime>[^\[\]]+)\] "(?P<request>[^"]+)" (?P<status>\d+) (?P<size>\d+) "(?P<referer>[^"]+)" "(?P<useragent>[^"]+)" "-"' regex = re.compile(pattern, flags = re.M) def normalize_time(time: str): # 02/Mar/2022:02:30:16 +0800 format = '%d/%b/%Y:%H:%M:%S %z' dt = datetime.datetime.strptime(time, format) return dt def normalize_request(request: str): return dict(zip(('method', 'url', 'protocol'), request.split())) names = ['remote', '', '', 'datetime', 'request', 'status', 'size', '', 'useragent'] dispose = { 'datetime': normalize_time, # 'request': normalize_request, 'request': lambda request: dict(zip(('method', 'url', 'protocol'), request.split())), 'status': int, 'size': int } def distill(line, regex: re.Pattern): matcher = regex.match(line) return matcher.groupdict() # vandal vane = {} # for k, v in distill(valve, regex).items(): # if dispose.get(k, None): # vane[k] = dispose.get(k)(v) # else: # vane[k] = v # vane.setdefault(k, dispose.get(k, lambda v: v)(v)) vane = {k: dispose.get(k, lambda v: v)(v) for k, v in distill(valve, regex).items()} print(vane)
import random, datetime def semen(): while True: yield {'value': random.randrange(1, 100, 2), 'datetime': datetime.datetime.now()} germ = semen() items = [next(germ) for _ in range(3)] def handler(iterable): valor = [item['value'] for item in iterable] return sum(valor) / len(valor) print(items) print(f'{handler(items):.2f}')
import random, datetime import time class UTC(datetime.tzinfo): def __init__(self, offset): self.__offset = offset def utcoffset(self, dt: datetime.datetime) -> datetime.timedelta: return datetime.timedelta(hours = self.__offset) def tzname(self, dt: datetime.datetime) -> str: return f'UTC+{self.__offset}' def dst(self, dt: datetime.datetime) -> datetime.timedelta: return datetime.timedelta(hours = self.__offset) def semen(): while True: yield {'value': random.randrange(1, 100, 2), 'datetime': datetime.datetime.now(UTC(8))} time.sleep(1) germ = semen() items = [next(germ) for _ in range(3)] def handler(iterable): valor = [item['value'] for item in iterable] return sum(valor) / len(valor) # print(items) # # print(f'{handler(items):.2f}') def window(semen, handler: callable, width: int, interval: int): """ 窗口函数 :param semen: data source, generator :param handler: deal data :param width: 数据窗口宽度 :param interval: 处理时间间隔 :return: """ start = datetime.datetime.strptime('2021-1-1 0:0:0 +0800', '%Y-%m-%d %H:%M:%S %z') current = start buffer = [] # 窗口中待计算数据 delta = datetime.timedelta(seconds = width - interval) while True: data = next(semen) if data: buffer.append(data) current = data['datetime'] if (current - start).total_seconds() >= interval: ret = handler(buffer) print(f'{ret:.2f}') buffer = [data for data in buffer if data['datetime'] > current - delta] start = current window(germ, handler, 5, 3)
import re, datetime valve = '''51.222.253.18 - - [02/Mar/2022:02:30:16 +0800] "GET /robots.txt HTTP/1.1" 301 169 "-" "Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)" "(" ''' pattern = r'(?P<remote>[\d\.]{7,}) - - \[(?P<datetime>[^\[\]]+)\] "(?P<request>[^"]+)" (?P<status>\d+) (?P<size>\d+) "(?P<referer>[^"]+)" "(?P<useragent>[^"]+)" "(?P<x_forwarded_for>[^"]+)"' regex = re.compile(pattern, flags = re.M) dispose = { 'datetime': lambda time: datetime.datetime.strptime(time, '%d/%b/%Y:%H:%M:%S %z'), 'request': lambda request: dict(zip(('method', 'url', 'protocol'), request.split())), 'status': int, 'size': int } def distill(line, regex: re.Pattern): matcher = regex.match(line) if matcher: return {k: dispose.get(k, lambda v: v)(v) for k, v in matcher.groupdict().items()} else: return {} def vandal(file): with open(file = file, mode = 'r+t', encoding = 'utf8') as f: for line in f: d = distill(line, regex) if d: yield d else: print(f'unqualified line {line}') # for d in vandal('valour.log'): # print(d) # # print(distill(valve, regex)) def window(semen, handler: callable, width: int, interval: int): """ 窗口函数 :param semen: data source, generator :param handler: deal data :param width: 数据窗口宽度 :param interval: 处理时间间隔 :return: """ start = datetime.datetime.strptime('2021-1-1 0:0:0 +0800', '%Y-%m-%d %H:%M:%S %z') current = start buffer = [] # 窗口中待计算数据 delta = datetime.timedelta(seconds = width - interval) while True: data = next(semen) if data: buffer.append(data) current = data['datetime'] if (current - start).total_seconds() >= interval: ret = handler(buffer) print(f'{ret:.2f}') buffer = [data for data in buffer if data['datetime'] > current - delta] start = current
from queue import Queue import random q = Queue(maxsize = 0) q.put(random.randint(1, 100)) q.put(random.randint(1, 100)) print(q.get()) print(q.get()) # print(q.get()) print(q.get(block=True,timeout=2))
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律