小练习
1 寻找第n次出现位置 def search_n(s, c, n): size = 0 for i, x in enumerate(s): if x == c: size += 1 if size == n: return i return -1 print(search_n("fdasadfadf", "a", 3))# 结果为7,正确 print(search_n("fdasadfadf", "a", 30))# 结果为-1,正确 2 斐波那契数列前n项 def fibonacci(n): a, b = 1, 1 for _ in range(n): yield a a, b = b, a + b list(fibonacci(5)) # [1, 1, 2, 3, 5] 3 找出所有重复元素 from collections import Counter def find_all_duplicates(lst): c = Counter(lst) return list(filter(lambda k: c[k] > 1, c)) find_all_duplicates([1, 2, 2, 3, 3, 3]) # [2,3] 4 联合统计次数 from collections import Counter a = ['apple', 'orange', 'computer', 'orange'] b = ['computer', 'orange'] ca = Counter(a) cb = Counter(b) #Counter对象间可以做数学运算 ca + cb # Counter({'orange': 3, 'computer': 2, 'apple': 1}) # 进一步抽象,实现多个列表内元素的个数统计 def sumc(*c): if (len(c) < 1): return mapc = map(Counter, c) s = Counter([]) for ic in mapc: # ic 是一个Counter对象 s += ic return s #Counter({'orange': 3, 'computer': 3, 'apple': 1, 'abc': 1, 'face': 1}) sumc(a, b, ['abc'], ['face', 'computer']) 5 groupby单字段分组 天气记录: a = [{'date': '2019-12-15', 'weather': 'cloud'}, {'date': '2019-12-13', 'weather': 'sunny'}, {'date': '2019-12-14', 'weather': 'cloud'}] 按照天气字段weather分组汇总: from itertools import groupby for k, items in groupby(a,key=lambda x:x['weather']): print(k) 输出结果看出,分组失败!原因:分组前必须按照分组字段排序,这个很坑~ cloud sunny cloud 修改代码: a.sort(key=lambda x: x['weather']) for k, items in groupby(a,key=lambda x:x['weather']): print(k) for i in items: print(i) 输出结果: cloud {'date': '2019-12-15', 'weather': 'cloud'} {'date': '2019-12-14', 'weather': 'cloud'} sunny {'date': '2019-12-13', 'weather': 'sunny'} 6 itemgetter和key函数 注意到sort和groupby所用的key函数,除了lambda写法外,还有一种简写,就是使用itemgetter: a = [{'date': '2019-12-15', 'weather': 'cloud'}, {'date': '2019-12-13', 'weather': 'sunny'}, {'date': '2019-12-14', 'weather': 'cloud'}] from operator import itemgetter from itertools import groupby a.sort(key=itemgetter('weather')) for k, items in groupby(a, key=itemgetter('weather')): print(k) for i in items: print(i) 结果: cloud {'date': '2019-12-15', 'weather': 'cloud'} {'date': '2019-12-14', 'weather': 'cloud'} sunny {'date': '2019-12-13', 'weather': 'sunny'} 7 groupby多字段分组 from operator import itemgetter from itertools import groupby a = [{'date': '2019-12-15', 'weather': 'cloud'}, {'date': '2019-12-13', 'weather': 'sunny'}, {'date': '2019-12-14', 'weather': 'cloud'}] a.sort(key=itemgetter('weather', 'date')) for k, items in groupby(a, key=itemgetter('weather','date')): print(k) for i in items: print(i) #result: ('cloud', '2019-12-14') {'weather': 'cloud', 'date': '2019-12-14'} ('cloud', '2019-12-15') {'weather': 'cloud', 'date': '2019-12-15'} ('sunny', '2019-12-13') {'weather': 'sunny', 'date': '2019-12-13'} 8 sum函数计算和聚合同时做 Python中的聚合类函数sum,min,max第一个参数是iterable类型,一般使用方法如下: a = [4,2,5,1] sum([i+1 for i in a]) # 16 使用列表生成式[i+1 for i in a]创建一个长度与a一行的临时列表,这步完成后,再做sum聚合。 试想如果你的数组a长度十百万级,再创建一个这样的临时列表就很不划算,最好是一边算一边聚合,稍改动为如下: a = [4,2,5,1] sum(i+1 for i in a) # 16 此时i+1 for i in a是(i+1 for i in a)的简写,得到一个生成器(generator)对象,如下所示: In [8]:(i+1 for i in a) OUT [8]:<generator object <genexpr> at 0x000002AC7FFA8CF0> 生成器每迭代一步吐出(yield)一个元素并计算和聚合后,进入下一次迭代,直到终点。 9 list分组(生成器版) from math import ceil def divide_iter(lst, n): if n <= 0: yield lst return i, div = 0, ceil(len(lst) / n) while i < n: yield lst[i * div: (i + 1) * div] i += 1 list(divide_iter([1, 2, 3, 4, 5], 0)) # [[1, 2, 3, 4, 5]] list(divide_iter([1, 2, 3, 4, 5], 2)) # [[1, 2, 3], [4, 5]] 10 列表全展开(生成器版) #多层列表展开成单层列表 a=[1,2,[3,4,[5,6],7],8,["python",6],9] def function(lst): for i in lst: if type(i)==list: yield from function(i) else: yield i print(list(function(a))) # [1, 2, 3, 4, 5, 6, 7, 8, 'python', 6, 9]