Python之itertools模块的使用
itertools模块的作用
itertools模块包括一组用于处理序列数据集的函数。它们的目的是快速、高效地使用内存,并且可以连接在一起来表达更复杂的基于迭代的算法。
基于迭代器的代码比使用列表的代码提供更好的内存消耗特征。由于数据在需要时才从迭代器生成,所以不需要同时将所有数据存储在内存中。这种“惰性”处理模型可以减少大数据集的交换和其他副作用,从而提高性能。
1、多个列表合并成迭代器
from itertools import chain for i in chain([1, 2, 3], ['a', 'b', 'c']): print(i, end=' ') print()
运行效果
1 2 3 a b c
2、惰性取出迭代器的数据
from itertools import chain def make_iterables_to_chain(): yield [1, 2, 3] yield ['a', 'b', 'c'] for i in chain.from_iterable(make_iterables_to_chain()): print(i, end=' ') print()
运行效果
1 2 3 a b c
3、内置函数zip的示例,主要功能:将多个迭代器的元素组合到一个元组中
for i in zip([1, 2, 3], ['a', 'b', 'c']): print(i)
运行效果
(1, 'a') (2, 'b') (3, 'c')
4、利用多个range生成数据,并且合并成一个元组
from itertools import zip_longest r1 = range(3) r2 = range(2) print('zip stops early:') print(list(zip(r1, r2))) r1 = range(3) r2 = range(2) print('\nzip_longest processes all of the values:') print(list(zip_longest(r1, r2)))
运行效果
zip stops early: #内置的zip函数,不能实现range数据的组合 [(0, 0), (1, 1)] zip_longest processes all of the values: [(0, 0), (1, 1), (2, None)]
5、从迭代器中切片获取指定位置的数据,
格式:
islice(iterable, start, stop[, step])
from itertools import islice print('Stop at 5:') for i in islice(range(100), 5): print(i, end=' ') print('\n') print('Start at 5, Stop at 10:') for i in islice(range(100), 5, 10): print(i, end=' ') print('\n') print('By tens to 100:') for i in islice(range(100), 0, 100, 10): print(i, end=' ') print('\n')
运行效果
Stop at 5: 0 1 2 3 4 Start at 5, Stop at 10: 5 6 7 8 9 By tens to 100: 0 10 20 30 40 50 60 70 80 90
6、输入一个迭代器返回多个迭代器,默认是返回2个的示例
from itertools import tee, islice, count print(count()) r = islice(count(), 5) i1, i2 = tee(r) print('i1', list(i1)) print('i2', list(i2))
运行效果
count(0) i1 [0, 1, 2, 3, 4] i2 [0, 1, 2, 3, 4]
7、 tee创建的迭代器是跟原来的共享,假如原来的有迭代,那里面的元素就减少
from itertools import count, tee, islice r = islice(count(), 5) i1, i2 = tee(r) print('r:', end='') for i in r: print(i, end=' ') if i > 1: break print() print('i1:', list(i1)) print('i2:', list(i2))
运行效果
r:0 1 2 i1: [3, 4] i2: [3, 4]
8、内置函数map()的使用
def times_two(x): return 2 * x def multiply(x, y): return (x, y, x * y) print('Doubles:') for i in map(times_two, range(5)): print(i) print('\nMultiples:') r1 = range(5) r2 = range(5, 10) for i in map(multiply, r1, r2): print('{:d} * {:d} = {:d}'.format(*i)) print('\nStopping:') r1 = range(5) r2 = range(2) for i in map(multiply, r1, r2): print(i)
运行效果
Doubles: 0 2 4 6 8 Multiples: 0 * 5 = 0 1 * 6 = 6 2 * 7 = 14 3 * 8 = 24 4 * 9 = 36 Stopping: (0, 0, 0) (1, 1, 1)
9、starmap编历的结果使用*号进行参数的分解参数
from itertools import starmap values = [(0, 5), (1, 6), (2, 7), (3, 8), (4, 9)] for i in starmap(lambda x, y: (x, y, x * y), values): print('{} * {} = {}'.format(*i))
运行效果
0 * 5 = 0
1 * 6 = 6
2 * 7 = 14
3 * 8 = 24
4 * 9 = 36
10、生成新值,count()函数返回一个迭代器,该迭代器能够无限地生成连续的整数
from itertools import * for i in zip(count(1), ['a', 'b', 'c']): print(i)
运行效果
(1, 'a') (2, 'b') (3, 'c')
11、count(),设置开始位置和步长的示例
import fractions from itertools import count start = fractions.Fraction(1, 3) step = fractions.Fraction(1, 3) for i in zip(count(start, step), ['a', 'b', 'c']): print('{}:{}'.format(*i))
运行效果
1/3:a 2/3:b 1:c
12、cycle()函数返回一个迭代器,它会无限地重复给定的参数的内容。由于必须记输入迭代器的全部内容,所以如果这个迭代器很长,则可能会耗费大量内存
from itertools import cycle for i in zip(range(7), cycle(['a', 'b', 'c'])): print(i)
运行效果
(0, 'a') (1, 'b') (2, 'c') (3, 'a') (4, 'b') (5, 'c') (6, 'a')
13、repeat()返回一个迭代器,每次访问都会返回相同的值
from itertools import repeat for i in repeat('over-and-over', 5): print(i)
运行效果
over-and-over over-and-over over-and-over over-and-over over-and-over
14、通过map()时行,多个参数的repeat()的处理
from itertools import repeat for i in map(lambda x, y: (x, y, x * y), repeat(2), range(5)): print('{:d} * {:d} = {:d}'.format(*i))
运行效果
2 * 0 = 0
2 * 1 = 2
2 * 2 = 4
2 * 3 = 6
2 * 4 = 8
15、dropwhile(),把条件为false的迭代出来的示例
from itertools import dropwhile def should_drop(x): print('Testing:', x) return x < 1 for i in dropwhile(should_drop, [-1, 0, 1, 2, -2]): print('Yielding:', i)
运行效果
Testing: -1 Testing: 0 Testing: 1 Yielding: 1 Yielding: 2 Yielding: -2
16、takewhile(),把条件为true的迭代出来的示例
from itertools import takewhile def should_drop(x): print('Testing:', x) return x < 1 for i in takewhile(should_drop, [-1, 0, 1, 2, -2]): print('Yielding:', i)
运行效果
Testing: -1 Yielding: -1 Testing: 0 Yielding: 0 Testing: 1
17、内置函数filter的过滤,只过滤条件为True示例
def check_item(x): print('Testing:', x) return x < 1 for i in filter(check_item, [-1, 0, 1, 2, -2]): print('Yielding:', i)
运行效果
Testing: -1 Yielding: -1 Testing: 0 Yielding: 0 Testing: 1 Testing: 2 Testing: -2 Yielding: -2
18、内置函数filterfalse的过滤,只过滤条件为False示例
from itertools import filterfalse def check_item(x): print('Testing:', x) return x < 1 for i in filterfalse(check_item, [-1, 0, 1, 2, -2]): print('Yielding:', i)
运行效果
Testing: -1 Testing: 0 Testing: 1 Yielding: 1 Testing: 2 Yielding: 2 Testing: -2
19、compress定义一个数据迭代器和选择迭代器进行过滤的示例
from itertools import cycle, compress every_third = cycle([False, False, True]) data = range(1, 10) #参数一:data,要处理的数据的迭代器 #参数二:every_third,选择的迭代器 for i in compress(data, every_third): print(i, end=' ') print()
运行效果
3 6 9
20、functools.total_ordering与groupby结合使用
import functools from itertools import * import operator import pprint @functools.total_ordering class Point: def __init__(self, x, y): self.x = x self.y = y def __repr__(self): return '({}, {})'.format(self.x, self.y) def __eq__(self, other): return (self.x, self.y) == (other.x, other.y) def __gt__(self, other): return (self.x, self.y) > (other.x, other.y) # Create a dataset of Point instances # islice:迭代切片函数 # count:迭代生数据值 # map : 类对象与数据,遍历循环执行 data = list(map(Point, cycle(islice(count(), 3)), islice(count(), 7))) print('Data:') pprint.pprint(data, width=35) print() # 根据X值对未排序的数据进行分组 print('分组没有排序:') for k, g in groupby(data, operator.attrgetter('x')): print(k, list(g)) print() # data数据排序 data.sort() print('Sorted:') pprint.pprint(data, width=35) print() # 根据X值对排序的数据进行分组 print('分组排序:') for k, g in groupby(data, operator.attrgetter('x')): print(k, list(g)) print() # 总结:先排序后分组
运行效果
Data: [(0, 0), (1, 1), (2, 2), (0, 3), (1, 4), (2, 5), (0, 6)] 分组没有排序: 0 [(0, 0)] 1 [(1, 1)] 2 [(2, 2)] 0 [(0, 3)] 1 [(1, 4)] 2 [(2, 5)] 0 [(0, 6)] Sorted: [(0, 0), (0, 3), (0, 6), (1, 1), (1, 4), (2, 2), (2, 5)] 分组排序: 0 [(0, 0), (0, 3), (0, 6)] 1 [(1, 1), (1, 4)] 2 [(2, 2), (2, 5)]
21、合并输入,如果元素是数字类型,则进行当前下标之前的求和,如果是字符串类型,则拆分字符串进行拼接
from itertools import accumulate print(list(accumulate(range(5)))) print(list(accumulate('abcde')))
运行效果
[0, 1, 3, 6, 10] ['a', 'ab', 'abc', 'abcd', 'abcde']
22、自己定义合并的规则
from itertools import accumulate def f(a, b): print(a, b) return b + a + b print(list(accumulate('abcde', f)))
运行效果
a b bab c cbabc d dcbabcd e ['a', 'bab', 'cbabc', 'dcbabcd', 'edcbabcde']
23、product():迭代处理多个序列的嵌套for循环的【扑克牌按牌面大小排序】示例
from itertools import product, chain FACE_CARDS = ('J', 'Q', 'K', 'A') SUITS = ('H', 'D', 'C', 'S') DECK = list( product( chain(range(2, 11), FACE_CARDS), SUITS, ) ) for card in DECK: print('{:>2}{}'.format(*card), end=' ') if card[1] == SUITS[-1]: print()
运行效果
2H 2D 2C 2S
3H 3D 3C 3S
4H 4D 4C 4S
5H 5D 5C 5S
6H 6D 6C 6S
7H 7D 7C 7S
8H 8D 8C 8S
9H 9D 9C 9S
10H 10D 10C 10S
JH JD JC JS
QH QD QC QS
KH KD KC KS
AH AD AC AS
24、 product():迭代处理多个序列的嵌套for循环的【扑克牌按牌面大小排序】改变排序的示例
from itertools import chain,product FACE_CARDS = ('J', 'Q', 'K', 'A') SUITS = ('H', 'D', 'C', 'S') DECK = list( product( SUITS, chain(range(2, 11), FACE_CARDS), ) ) for card in DECK: print('{:>2}{}'.format(card[1], card[0]), end=' ') if card[1] == FACE_CARDS[-1]: print()
运行效果
2H 3H 4H 5H 6H 7H 8H 9H 10H JH QH KH AH
2D 3D 4D 5D 6D 7D 8D 9D 10D JD QD KD AD
2C 3C 4C 5C 6C 7C 8C 9C 10C JC QC KC AC
2S 3S 4S 5S 6S 7S 8S 9S 10S JS QS KS AS
25、product()计算一个序列与自身的积,可以指定输入重复多少次的示例
from itertools import product def show(iterable): for i, item in enumerate(iterable, 1): print(item, end=' ') if (i % 3) == 0: print() print() print('Repeat 2:\n') show(list(product(range(3), repeat=2))) print('Repeat 3:\n') show(list(product(range(3), repeat=3)))
运行效果
Repeat 2: (0, 0) (0, 1) (0, 2) (1, 0) (1, 1) (1, 2) (2, 0) (2, 1) (2, 2) Repeat 3: (0, 0, 0) (0, 0, 1) (0, 0, 2) (0, 1, 0) (0, 1, 1) (0, 1, 2) (0, 2, 0) (0, 2, 1) (0, 2, 2) (1, 0, 0) (1, 0, 1) (1, 0, 2) (1, 1, 0) (1, 1, 1) (1, 1, 2) (1, 2, 0) (1, 2, 1) (1, 2, 2) (2, 0, 0) (2, 0, 1) (2, 0, 2) (2, 1, 0) (2, 1, 1) (2, 1, 2) (2, 2, 0) (2, 2, 1) (2, 2, 2)
26、permutations(),生成所有排列的全集的示例
from itertools import permutations def show(iterable): first = None for i, item in enumerate(iterable, 1): if first != item[0]: if first is not None: print() first = item[0] print(''.join(item), end=' ') print() print('All permutations:\n') show(permutations('abcd')) print('\nPairs:\n') show(permutations('abcd', r=2))
运行效果
All permutations:
abcd abdc acbd acdb adbc adcb
bacd badc bcad bcda bdac bdca
cabd cadb cbad cbda cdab cdba
dabc dacb dbac dbca dcab dcba
Pairs:
ab ac ad
ba bc bd
ca cb cd
da db dc
27、combinations()输入的成员是唯一的,输出将不包括任何重复的值的示例
from itertools import combinations def show(iterable): first = None for i, item in enumerate(iterable, 1): if first != item[0]: if first is not None: print() first = item[0] print(''.join(item), end=' ') print() print('Unique pairs:\n') show(combinations('abcd', r=2))
运行效果
Unique pairs:
ab ac ad
bc bd
cd
28、combinations_with_replacement()输入组合字符串,输出组合的唯一将不包括任何重复的值的示例
from itertools import combinations_with_replacement def show(iterable): first = None for i, item in enumerate(iterable, 1): if first != item[0]: if first is not None: print() first = item[0] print(''.join(item), end=' ') print() print('Unique pairs:\n') show(combinations_with_replacement('abcd', r=2))
运行效果
Unique pairs:
aa ab ac ad
bb bc bd
cc cd
dd