一个python爬虫协程的写法(gevent模块)

from bs4 import BeautifulSoup
import requests
import gevent
from gevent import monkey, pool
monkey.patch_all()
jobs = []
links = []
p = pool.Pool(10)
urls = [
    'http://www.google.com',
    # ... another 100 urls
]
def get_links(url):
    r = requests.get(url)
    if r.status_code == 200:
        soup = BeautifulSoup(r.text)
        links + soup.find_all('a')
for url in urls:
    jobs.append(p.spawn(get_links, url))
gevent.joinall(jobs)

 

posted @ 2017-09-05 15:35  安阳小栈-客官歇会吧  阅读(443)  评论(0编辑  收藏  举报