python pandas学习记录 二
api_access_20200821.log 3.16.25.4 - - [21/Aug/2020:00:00:02 +0800] "GET /erp/scp/connect/health HTTP/1.1" 200 54 0.060 "-" "-" 1.4.134.24 - - [21/Aug/2020:00:00:02 +0800] "GET /erp/scp/connect/health HTTP/1.1" 200 54 0.733 "-" "-" 1.4.134.24 - - [21/Aug/2020:00:00:02 +0800] "GET /api/index/home HTTP/1.1" 200 65 0.003 "-" "-" 1.6.227.186 - - [21/Aug/2020:00:00:08 +0800] "GET /erp/android/query/patch?versionName=2.2.7 HTTP/1.1" 200 50 0.026 "device:android22;model:vivovivo V3M A;version:2.2.7;imei:862350031383868" "0e04d119bb"
#根据空格区分 git 窗口 生成文件后 文本编辑器打开csv 输入头标题 cut -d' ' -f7,11 api_access_20200821.log >api_access_20200821.csv
#过滤 .png |.js| /app/ 行数据 另外村为21ok.csv grep -v .png api_access_20200821.csv |grep -v .js |grep -v '/app/' > 21ok.csv
开始解析
import pandas as pd # grep -v '.html' t21.log | grep -v '.js' | grep -v '.css' | grep -v '.png' | grep -v '.txt' | grep -v '.jpg' | grep -v '.woff' | grep -v '.ttf' | grep -v '.ico'| grep -v '/check/image' > t21_api.log # grep -v '.html' t25.log | grep -v '.js' | grep -v '.css' | grep -v '.png' | grep -v '.txt' | grep -v '.jpg' | grep -v '.woff' | grep -v '.ttf' | grep -v '.ico'| grep -v '/check/image' > t25_api.log def a(f): data = pd.read_csv(f, sep = ' ') data['uri'] = data['url'].str.split('?').str[0] print(data) agg = data.groupby('uri').agg(['count','min', 'max', 'mean']) print (agg) print (type(agg)) agg.to_excel('agg_' + f+ ".xlsx") #a('t21_api.log') #a('t25_api.log') a('21ok.csv')