Python统计web日志中每天用户访问情况
背景
统计web日志中每天用户访问情况
日志目录
import os
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import Manager
# 全局日志目录配置
LOG_DIR = "/var/log/nginx"
def get_log_access(file_name, count_dict):
"""
处理单个日志文件,统计总访问次数和唯一 IP 数量,并将结果存储在共享字典中。
参数:
- file_name: 日志文件的名称
- count_dict: 用于存储结果的共享字典
"""
ip_set = set()
total_count = 0
# 检查并生成文件路径
file_path = os.path.join(LOG_DIR, file_name)
try:
# 读取日志文件并处理每一行
with open(file_path, 'r', encoding='utf-8') as file_object:
for line in file_object:
if not line.strip():
continue
# 提取用户 IP 地址
user_ip = line.split(' - -', maxsplit=1)[0].split(',')[0]
total_count += 1
if user_ip not in ip_set:
ip_set.add(user_ip)
# 将统计结果保存到共享字典中
count_dict[file_name] = {"total": total_count, "ip": len(ip_set)}
except FileNotFoundError:
print(f"Error: File {file_name} not found in {LOG_DIR}.")
except Exception as e:
print(f"Error processing file {file_name}: {e}")
def filter_log_files(log_dir):
"""
过滤日志目录下的所有 .log 文件。
参数:
- log_dir: 日志文件所在的目录路径
返回:
- 符合条件的文件列表
"""
try:
return [f for f in os.listdir(log_dir) if f.endswith(".log")]
except FileNotFoundError:
raise FileNotFoundError(f"The directory {log_dir} does not exist.")
except Exception as e:
raise Exception(f"Error accessing directory {log_dir}: {e}")
def run():
"""
主函数,使用多进程来并行处理多个日志文件,并输出结果。
"""
# 检查日志目录是否存在
log_files = filter_log_files(LOG_DIR)
# 使用多进程池并行处理文件
with Manager() as manager, ProcessPoolExecutor(max_workers=4) as pool:
count_dict = manager.dict()
# 提交任务到进程池
for file_name in log_files:
pool.submit(get_log_access, file_name, count_dict)
# 进程池自动关闭,并等待任务完成
pool.shutdown(wait=True)
# 输出结果
for file_name, result in count_dict.items():
print(f"{file_name}: Total Requests = {result['total']}, Unique IPs = {result['ip']}")
if __name__ == '__main__':
run()
运行示例
[root@nginx script]# python3 log_ip_analyzer.py
20240811.log: Total Requests = 20528, Unique IPs = 253
20240812.log: Total Requests = 25023, Unique IPs = 125
20240813.log: Total Requests = 22023, Unique IPs = 323
20240814.log: Total Requests = 26356, Unique IPs = 364
20240815.log: Total Requests = 15563, Unique IPs = 89
本文来自博客园,作者:&UnstopPable,转载请注明原文链接:https://www.cnblogs.com/Unstoppable9527/p/18360685