gevent协程为啥返回的是同一页

import requests
import gevent
from gevent import monkey
monkey.patch_all()


def next_payload():
for i in range(2, 8):
request_payload = {
'CategoryId': 808,
'CategoryType': 'SiteHome',
'ItemListActionName': 'Postlist',
'PageIndex': i,
'ParentCategoryId': 0,
'TotalPostCount': 4000
}
yield request_payload


def start_crawl():
response = requests.post(
url='https://www.cnblogs.com/mvc/AggSite/PostList.aspx',
data=next(next_payload()),
headers={
'Referer': 'https://www.cnblogs.com/',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36',
}
)
f = open('cnblogs.txt', 'a', encoding='utf8')
f.write(response.text)
f.close()


gevent.joinall([
gevent.spawn(start_crawl()),
gevent.spawn(start_crawl()),
gevent.spawn(start_crawl()),
gevent.spawn(start_crawl()),
gevent.spawn(start_crawl()),
gevent.spawn(start_crawl())
])
爬虫小白一枚,想练练手,选择了博客园首页的最新博客,想练习一下多页爬取,可是不知道为什么,爬出来的东西,好像还是同一页,是我哪里写错了么。希望有大佬能指导我一下。
posted @ 2018-03-10 14:36  M_o  阅读(96)  评论(0编辑  收藏  举报