实现对一个列表元素统计重复元素的多种方法(counter方法的应用)
应用1:
# 对['a','2',2,4,5,'2','b',4,7,'a',5,'d','a','z']该列表的数据进行计数统计
# 使用dict ---->复杂度是最低的,这个一般面试会问到
some_data = ['a','2',2,4,5,'2','b',4,7,'a',5,'d','a','z'] count = dict() for item in some_data: if item in count: count[item] += 1 else: count[item] = 1 print(count)
# 使用defaultdict
from collections import defaultdict some_data = ['a','2',2,4,5,'2','b',4,7,'a',5,'d','a','z'] count = defaultdict(int) # 带有默认整数值 for item in some_data: count[item] += 1 print(count)
# 使用list和set结合的方法
some_data = ['a','2',2,4,5,'2','b',4,7,'a',5,'d','a','z'] count_set= set(some_data) lis = [] for item in count_set: lis.append((item,some_data.count(item))) print(lis)
# 使用collections.Counter方法
from collections import Counter some_data = ['a','2',2,4,5,'2','b',4,7,'a',5,'d','a','z'] print(Counter(some_data))
应用2:
# 统计一篇英文文章内每个单词出现频率,并返回出现频率最高的前10个单词及其出现次数
from collections import Counter import re with open('a.txt', 'r', encoding='utf-8') as f: txt = f.read() c = Counter(re.split('\W+',txt)) #取出每个单词出现的个数 print(c) ret = c.most_common(10) #取出频率最高的前10个 print(ret)