python3迭代器和生成器
1、手动访问迭代器中的元素
#要手动访问迭代器中的元素,可以使用next()函数 In [3]: with open('/etc/passwd') as f: ...: try: ...: while True: ...: print(next(f)) #next()函数访问迭代中的函数 ...: except StopIteration: #捕获结束异常 ...: print('None') #通过指定返回结束值来判断迭代结束 In [28]: with open('/etc/passwd') as f: ...: while True: ...: line = next(f,None) ...: if line is None: ...: break ...: print(line)
2、委托迭代
class node(object): def __init__(self,value): self._value = value self._children = [] def __repr__(self): return 'Node({!r})'.format(self._value) def add_child(self,node): self._children.append(node) def __iter__(self): return iter(self._children) if __name__ == '__main__': root = node(0) child1 = node(1) child2 = node(2) root.add_child(child1) root.add_child(child2) for i in root: print(i) # Node(1) Node(2)
3、用生成器创建新的迭代模式
def frange(start,stop,setup): #创建新的迭代函数 while start < stop: yield start #通过yield转换成一个生成器 start += setup #可根据要求生成迭代对象元素 for i in frange(0,10,0.8): print(i)
4、实现迭代协议
class Node: def __init__(self,value): self._value = value self._children = [] def __repr__(self): return 'Node({!r})'.format(self._value) def add_child(self,node): self._children.append(node) def __iter__(self): return iter(self._children) def depth_first(self): #首先输出自己然后输出自己的子节点 yield self for i in self: yield from i.depth_first() if __name__ == '__main__': root = Node(123) child1 = Node(10) child2 = Node(20) root.add_child(child1) root.add_child(child2) child1.add_child(Node(111)) child1.add_child(Node(222)) child2.add_child(Node(333)) for i in root.depth_first(): print(i) # Node(123) Node(10) Node(111) Node(222) Node(20) Node(333)
5、反向迭代
In [29]: a = [1,2,3,4] In [30]: for i in reversed(a): #反向迭代 ...: print(i) ...: 4 3 2 1
6、对迭代器做切片操作
#要对迭代器和生成器做数据切换处理,可以使用itertools.islice()函数 In [31]: def count(n): ...: while True: ...: yield n ...: n += 1 ...: In [33]: c = count(0) #生成器对象时不可切片操作的 In [34]: c[10:20] --------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-34-d27b6259daf3> in <module>() ----> 1 c[10:20] TypeError: 'generator' object is not subscriptable #只有使用itertools.islice()函数才可切片 In [35]: import itertools In [36]: for i in itertools.islice(c,10,20): ...: print(i,end=' ') ...: 10 11 12 13 14 15 16 17 18 19 #迭代器和生成器是没法执行普通的切片操作的,这是因为不知道它们的长度和索引,而islice()产生的结果是一个迭代器,它可以产生出所需要切片元素,它是通过访问并丢弃所有起始索引之前的元素来实现的,
#之后的元素会由islice对象来产生直到结束索引为止。并且islice()会消耗掉提供的迭代器中的数据,它只能访问一次的
7、跳过可迭代对象中的前一部分元素
#itertools模块中有一个函数dropwhile()它回迭代丢弃需要过滤的元素,但只丢弃开头的过滤元素 #cat test.txt #aaa #bbb ccc ddd #eee from itertools import dropwhile #使用dropwhile函数过滤开始元素,startswith函数指定判断元素的值 with open('test.txt') as f: for i in dropwhile(lambda x:x.startswith('#'),f): print(i,end='') #输出结构只判断开始行中首字母为#好的行将过滤掉 ccc ddd #eee #也可用使用itertools模块中的islice函数来指定跳过多少个元素 from itertools import islice items = ['a','b','c',1,2,3] for x in islice(items,4,None): print(x) #输出将跳过前面的4个元素,只输出后面的2,3 #如果需要跳过所有需要顾虑的元素,只需要指定判定规则值即可 with open('test.txt') as f: lines = (line for line in f if not line.startswith('#')) for line in lines: print(line,end='') #输出就只有不以#号开头的行了 ccc ddd
8、迭代所有可能的组合或排列
#itertools模块中提供了3个函数来解决所有元素的重排列的可能情况,先来看第一个itertools.permutations()使用方法: In [6]: from itertools import permutations In [7]: items = ['a','b','c'] In [8]: for i in permutations(items): ...: print(i) ...: ('a', 'b', 'c') ('a', 'c', 'b') ('b', 'a', 'c') ('b', 'c', 'a') ('c', 'a', 'b') ('c', 'b', 'a') #如果要限定排序的长度可用指定长度参数 In [9]: for i in permutations(items,2): ...: print(i) .... ('c', 'a') ('c', 'b') #使用第二个函数itertools.combinations()它将输出序列中所有元素的全部组合形式,但元素之间是不考虑顺序的,比如(a,b)和(b,a)是一种组合形式将只出现一次 In [12]: for j in combinations(range(4),3): ...: print(j) ...: (0, 1, 2) (0, 1, 3) (0, 2, 3) (1, 2, 3) In [13]: for j in combinations(range(4),4): ...: print(j) ...: (0, 1, 2, 3) #第三个函数itertools.combinations_with_replacement()它允许一个元素可被选择多次进行排序 In [14]: from itertools import combinations_with_replacement In [15]: for i in combinations_with_replacement(range(1,3),3): ...: print(i) ...: (1, 1, 1) (1, 1, 2) (1, 2, 2) (2, 2, 2)
9、以索引-值对的形式迭代序列
#内建的enumerate()函数可用输出元素序列,可用指定序列的开始值 In [20]: for k,v in enumerate(my_list,1): ...: print(k,'==',v) ...: 1 == a 2 == b 3 == c #打印嵌套数组 In [21]: list1 = [(1,2),(11,22),(33,44)] In [22]: for n,(k,v) in enumerate(list1,1): ...: print(n,'<===>',k,v) ...: 1 <===> 1 2 2 <===> 11 22 3 <===> 33 44
10、同时迭代多个序列
#同时迭代多个序列可用使用zip()函数,它将迭代对象产生出一个元组,整个迭代的长度取其中最短的序列长度 In [23]: list1 = [1,2,3,4,5] In [24]: list2 = ['a','b','c','d'] In [25]: list3 = ['jar','py','jc'] In [26]: for x,y,z in zip(list1,list2,list3): ...: print(x,y,z) ...: 1 a jar 2 b py 3 c jc #如果需要输出完整的序列可以使用itertools模块中的zip_longest函数 In [27]: from itertools import zip_longest In [28]: for i in zip_longest(list1,list2,list3): ...: print(i) ...: (1, 'a', 'jar') (2, 'b', 'py') (3, 'c', 'jc') (4, 'd', None) (5, None, None) #zip()通常用于处理数据配对的,如将以字典形式配对名称和值 In [29]: heard = ['name','age','salely'] In [30]: values = ['zhangsan',30,99999] In [31]: s = dict(zip(heard,values)) In [32]: s Out[32]: {'name': 'zhangsan', 'age': 30, 'salely': 99999}
11、在不同的容器中进行迭代
#itertools.chain()方法可以在多个容器中迭代对象 In [33]: from itertools import chain In [34]: a = [1,2,3] In [35]: b = ['a','b','c'] In [36]: for i in chain(a,b):print(i) 1 2 3 a b c
12、创建处理数据的管道
import os,fnmatch,gzip,bz2,re def gen_find(filepat,top): for path,dirlist,filelist in os.walk(top): for name in fnmatch.filter(filelist,filepat): yield os.path.join(path,name) def gen_opener(filenames): for filename in filenames: if filename.endswith('.gz'): f = gzip.open(filename,'rt') elif filename.endswith('.bz2'): f = bz2.open(filename,'rt') else: f = open(filename,'rt') yield f f.close() def gen_concatenate(iterators): for it in iterators: yield from it def gen_grep(pattern,lines): pat = re.compile(pattern) for line in lines: if pat.search(line): yield line if __name__ == '__main__': file_name = input('please in to file:') directory_name = input('pease in to directory:') lognames = gen_find(file_name,directory_name) files = gen_opener(lognames) lines = gen_concatenate(files) pylines = gen_grep('(?i)python',lines) for line in pylines: print(line)
13、扁平化处理嵌套型的序列
#yield from可以将迭代对象中子元素进行递归,将它们所有的值产生出来,得到的结果就是一个没有嵌套的单值序列 from collections import Iterable def flatten(items,ignore_types=(str,bytes)): for i in items: if isinstance(i,Iterable) and not isinstance(i,ignore_types): yield from flatten(i) else: yield(i) items = [1,2,[3,4,[5,6],7],8] for i in flatten(items): print(i,end=' ')
14、合并多个有序序列,再对整个有序序列进行迭代
#对多个有序序列先进行合并再进行排序,可以使用heapq.merge()函数,它不会将所有的数据读取到堆中,也不会做排序操作,
#它只是简单的检查每个输入序列中的第一个元素,将最小的发送出去,然后再重复执行这一步操作,直到序列耗尽为止 In [8]: import heapq In [9]: a = [12,3,4,9] In [10]: b = [5,32,15,1] In [11]: c = heapq.merge(a,b) In [12]: c Out[12]: <generator object merge at 0x7f781eb934f8> In [13]: d = list(c) In [14]: d Out[14]: [5, 12, 3, 4, 9, 32, 15, 1]
15、用迭代器取代while循环
#我们在处理I/O通信时经常会碰到接收数据时判断是否接收完的代码 def readder(s): while True: data = s.recv(8192) if data == b'': break #上面的代码我们可以使用iter()函数来代替,它可以选择性的接收一个无参数的可调用对象和一个结束值作为输入,iter()会创建一个迭代器然后重复调用用户提供的可调用对象,知道它返回结束值 import sys def reader(s): for i in iter(lambda:s.recv(8192),b''): data = sys.stdout.write(i) print(data)