统计日志的不同条数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | import collections import itertools import multiprocessing import bz2 class MapReduce( object ): def __init__( self ,map_func,reduce_func,num_workers = None ): self .map_func = map_func self .reduce_func = reduce_func self .pool = multiprocessing.Pool(num_workers) def partition( self ,mapped_values): partition_data = collections.defaultdict( list ) for key , value in mapped_values: partition_data[key].append(value) return partition_data.items() def __call__( self , inputs,chunksize = 1 ): mao_response = self .pool. map ( self .map_func,inputs,chunksize = chunksize) partitioned_data = self .partition(itertools.chain( * mao_response)) reduce_values = self .pool. map ( self .reduce_func,partitioned_data) return reduce_values def mapper_match(one_file): output = [] for line in bz2.BZ2File(one_file).readlines(): line = line.rstrip().split() if line[ 3 ] = = 'web' and line[ 5 ] = = '0' : output.append((line[ 4 ], 1 )) def reduce_match(item): cookie,occurances = item return (cookie, sum (occurances)) def mapper_count(item): _ , count = item return [(count, 1 )] def reducer_count(item): freq , occurances = item return ((freq, sum (occurances))) import glob import operator input_files = 'sssssss' mapper = MapReduce(mapper_match,reduce_match) cokkie_feq = mapper(input_files) mapper = MapReduce(reducer_count,reducer_count) cookie_fep = mapper(cokkie_feq) cookie_fep.sort (key = operator.itemgetter( 1 ),reverse = True ) for key ,value in cookie_fep: print (key,value) |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律
2017-06-11 布尔值数据类型