Python学习之实现简单的高并发爬虫爬取网页
import gevent,time from urllib import request #urllib的io操作gevent不会识别,不会自动切换,以下方法解决 from gevent import monkey monkey.patch_all() #把当前程序的所有Io操作给我单独的做上标记 def f(url): print('GET: %s' % url) resp = request.urlopen(url) data = resp.read() # f = open("url.html","wb") # f.write(data) # f.close() print('%d bytes received from %s.' % (len(data), url)) #串行 urls = [ 'https://www.python.org/', 'https://www.nginx.org/', 'https://github.com/', ] time_start = time.time() for url in urls: f(url) print("同步cost",time.time()-time_start) #协程并行 async_time_start = time.time() gevent.joinall([ gevent.spawn(f, 'https://www.python.org/'), gevent.spawn(f, 'https://www.yahoo.com/'), gevent.spawn(f, 'https://github.com/'), ]) print("异步cost",time.time()-async_time_start)