使用协程‘并发’爬网站
import gevent,time from urllib.request import urlopen from gevent import monkey monkey.patch_all() #monkey.patch可以监听IO阻塞,加快切换速度 def f(url): print('GET:%s'%url) resp = urlopen(url) data = resp.read() # with open('python.html','wb') as f: # f.write(data) print('%d bytes received from %s'%(len(data),url)) #f('http://www.python.org') start=time.time() gevent.joinall([ gevent.spawn(f,'http://www.qq.com'), gevent.spawn(f,'http://www.baidu.com'), gevent.spawn(f,'http://www.sina.com.cn') ]) print(time.time()-start)
输出:
GET:http://www.qq.com
GET:http://www.baidu.com
GET:http://www.sina.com.cn
535921 bytes received from http://www.sina.com.cn
328386 bytes received from http://www.baidu.com
24834 bytes received from http://www.qq.com
0.6621875762939453