切割web日志
log='''10.1.1.95 - e800 [18/Mar/2005:12:21:42 +0800] \
"GET /stats/awstats.pl?config=e800 HTTP/1.1" 200 899 "http://10.1.1.1/pv/" \
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon)"'''
def jpm(b:str)->list:
lst=[]
flag=False
for word in log.split():
if not flag and (word.startswith('[') or word.startswith('"')):
if word.endswith(']') or word.endswith('"'):
lst.append(word.strip('[]"'))
continue
flag=True
tmp=word[1:]
continue
if flag:
if word.endswith(']') or word.endswith('"'):
tmp+=' '+word[:-1] # tmp+=word.strip(']"')
lst.append(tmp)
flag=False
# continue # continue提取
else:
tmp+=' '+word
# continue # continue 提取
continue
lst.append(word)
return lst
print(jpm(log))
def dip(sub:str)->list:
lst=[]
flag=False
for word in log.split():
# 判断flag提高效率
if not flag and (word.startswith('[') or word.startswith('"')):
tmp=word
if tmp[-1] == ']' or tmp[-1] == '"': # 对"-"特殊情况做处理
tmp=tmp.strip('[]"')
lst.append(tmp)
continue
flag=True
continue
if flag: # 进入特殊处理
if word.endswith(']') or word.endswith('"'):
tmp+=' '+word
flag=False
# print(tmp)
tmp=tmp.strip('[]"')
lst.append(tmp)
continue
else:
tmp+=' '+word
continue
lst.append(word)
return lst
print(dip(log))
import datetime
log='''10.1.1.95 - e800 [18/Mar/2005:12:21:42 +0800] \
"GET /stats/awstats.pl?config=e800 HTTP/1.1" 200 899 "http://10.1.1.1/pv/" \
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon)"'''
def convert_time(timestr:str)->datetime.datetime:
fmtstr='%d/%b/%Y:%H:%M:%S %z'
dt=datetime.datetime.strptime(timestr,fmtstr)
print(dt,type(dt))
return dt
def convert_request(request:str)->dict:
return dict(zip(('method','url','protocol'),request.split()))
def jix(b:str)->list:
lst=[]
flag=False
for word in log.split():
if not flag:
if word[0] == '[' or word.startswith('"'):
if word[-1] == ']' or word.endswith('"'):
lst.append(word[1:-1])
else:
flag=True
tmp=word[1:]
else:
lst.append(word)
continue
if flag:
if word[-1] == ']' or word.endswith('"'):
lst.append(tmp+' '+word[:-1])
flag=False
else:
tmp+=' '+word
continue
return lst
print(jix(log))
def pim():
names = ['remote', 'logname', 'username', 'datetime',
'request', 'status', 'size', 'referer', 'useragent']
funcs = (None, None, None, convert_time,
convert_request, int, int, None, None)
d = {}
for i, field in enumerate(jix(log)):
if funcs[i] is not None:
d.setdefault(names[i], funcs[i](field))
else:
d.setdefault(names[i], field)
return d
print(pim())
import re,datetime
log='''10.1.1.95 - e800 [18/Mar/2005:12:21:42 +0800] \
"GET /stats/awstats.pl?config=e800 HTTP/1.1" 200 899 "http://10.1.1.1/pv/" \
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon)"'''
def extract(line):
pattern='''(?P<remote>[\d\.]{7,})(?= ) (?P<logname>[\w-]+) (?P<username>[\w-]+) \[(?P<time>[^][]+)\] "(?P<request>[^"]+)" (?P<status>\d{3}) (?P<size>\d+) "(?P<referer>[^"]+)" "(?P<useragent>[^"]+)"'''
regex=re.compile(pattern,flags=re.S|re.I)
matcher=regex.match(log)
return matcher.groupdict()
print(extract(log))
def convert_time(timestr:str)->datetime.datetime:
fmtstr='%d/%b/%Y:%H:%M:%S %z'
return datetime.datetime.strptime(timestr,fmtstr)
def convert_request(request:str)->dict:
return dict(zip(('method','url','protocol'),request))
funcs={
'time':convert_time,
# 'time': lambda timestr:datetime.datetime.strptime(timestr,'%d/%b/%Y:%H:%M:%S %z')
'status':int,
'size':int,
'request':convert_request
# 'request': lambda request:dict(zip(('method','url','protocol'),request))
}
ciz={}
for k,v in extract(log).items():
ciz[k]=funcs.get(k,lambda m:m)(v) # default为函数,调用返回v
# print(k,v)
# if funcs.get(k,None):
# ciz[k]=funcs[k](v)
# else:
# ciz.update(((k,v),))
print(ciz)
bb={k:funcs.get(k,lambda m:m)(v) for k,v in extract(log).items()}
print(bb)
import re,datetime
log='''10.1.1.95 - e800 [18/Mar/2005:12:21:42 +0800] \
"GET /stats/awstats.pl?config=e800 HTTP/1.1" 200 899 "http://10.1.1.1/pv/" \
"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; Maxthon)"'''
pattern='''(?P<remote>[\d\.]{7,})(?= ) (?P<logname>[\w-]+) (?P<username>[\w-]+) \[(?P<time>[^][]+)\] "(?P<request>[^"]+)" (?P<status>\d{3}) ('''
regex=re.compile(pattern,flags=re.M|re.I)
def extract(line):
matcher=regex.match(line)
# if matcher:
# return (k:funcs.get(k,lambda m:m)(v) for k,v in matcher.groupdict().items())
# else:
# raise Exception('None match')
if matcher: # matcher 为None,返回默认None
return (k:funcs.get(k,lambda m:m)(v) for k,v in matcher.groupdict().items())
funcs={
'time':convert_time,
# 'time': lambda timestr:datetime.datetime.strptime(timestr,'%d/%b/%Y:%H:%M:%S %z')
'status':int,
'size':int,
'request':convert_requesta# 'request': lambda request:dict(zip(('method','url','protocol'),request))
}
lines=[]
for line in lines:
try:
d=extract(line)
except:
pass
for line in lines:
d=extract(line)
if d:
pass
else:
# todo 统计不合格日志
pass
def load(path):
with open(path,mode='rt+',encoding='utf8') as f:
for line in f:
fields=extract(line)
if fields:
yield fields
else:
pass # todo
def extract(sub):
matcher=regex.search(sub)
if matcher:
return matcher.group()
else:
raise Exception('None match')
print(extract(n))
def extract(line)->dict: # 不匹配,抛异常
matcher=regex.match(line)
if matcher:
return {k:funcs.get(k,lambda m:m)(v) for k,v in matcher.groupdict().items()}
else:
raise Exception("None match")
def extract(line)->dict: # 不匹配,返回None
matcher=regex.match(line)
niz=None
if matcher:
niz={k:funcs.get(k,lambda m:m)(v) for k,v in matcher.groupdict().items()}
return niz
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律