拉钩爬虫
拉钩
ajax请求,cookies反爬
# 第一页
# https://www.lagou.com/jobs/list_python/p-city_6
'''
TG-TRACK-CODE=search_code; user_trace_token=20200106214534-53c939b1-10b4-45a1-bb34-daebd661d4ab;
X_HTTP_TOKEN=acb1a28e7bde8ee74338138751eaff2f5fc5651c92; WEBTJ-ID=20200106214541-16f7b1aa6b61-0363b02d88bb6-2393f61-2073600-16f7b1aa6b719;
JSESSIONID=ABAAABAABEEAAJAA8CD0EDA72E15C2EF8CEA34B3CEB748A; _ga=GA1.2.1798328149.157831
'''
'''
TG-TRACK-CODE=search_code; user_trace_token=20200106214534-53c939b1-10b4-45a1-bb34-daebd661d4ab;
X_HTTP_TOKEN=acb1a28e7bde8ee74338138751eaff2f5fc5651c92; WEBTJ-ID=20200106214541-16f7b1aa6b61-0363b02d88bb6-2393f61-2073600-16f7b1aa6b719;
JSESSIONID=ABAAABAABEEAAJAA8CD0EDA72E15C2EF8CEA34B3CEB748A; _ga=GA1.2.1798328149.157831
'''
import requests
header1 = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
}
session = requests.session()
r = session.get(url='https://www.lagou.com/jobs/list_python/p-city_6', headers=header1)
header2 = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
'Referer': 'https://www.lagou.com/jobs/list_python',
}
for i in range(10):
data = {
'first': False,
'pn': i+2,
'kd': 'python'
}
jobs = session.post(url='https://www.lagou.com/jobs/positionAjax.json?city=%E6%9D%AD%E5%B7%9E&needAddtionalResult=false',
headers=header2,data=data)
print(jobs.json())