使用协程‘并发’爬网站

import gevent,time
from  urllib.request import urlopen
from gevent import monkey
monkey.patch_all()    #monkey.patch可以监听IO阻塞,加快切换速度
def f(url):
    print('GET:%s'%url)
    resp = urlopen(url)
    data = resp.read()

    # with open('python.html','wb') as f:
    #     f.write(data)
    print('%d bytes received from %s'%(len(data),url))

#f('http://www.python.org')
start=time.time()
gevent.joinall([
    gevent.spawn(f,'http://www.qq.com'),
    gevent.spawn(f,'http://www.baidu.com'),
    gevent.spawn(f,'http://www.sina.com.cn')
])
print(time.time()-start)

输出:

GET:http://www.qq.com
GET:http://www.baidu.com
GET:http://www.sina.com.cn
535921 bytes received from http://www.sina.com.cn
328386 bytes received from http://www.baidu.com
24834 bytes received from http://www.qq.com
0.6621875762939453

posted on 2021-12-21 22:41  csy113  阅读(15)  评论(0编辑  收藏  举报