拉钩爬虫

拉钩

ajax请求,cookies反爬

# 第一页
# https://www.lagou.com/jobs/list_python/p-city_6
'''
TG-TRACK-CODE=search_code; user_trace_token=20200106214534-53c939b1-10b4-45a1-bb34-daebd661d4ab;
X_HTTP_TOKEN=acb1a28e7bde8ee74338138751eaff2f5fc5651c92; WEBTJ-ID=20200106214541-16f7b1aa6b61-0363b02d88bb6-2393f61-2073600-16f7b1aa6b719;
JSESSIONID=ABAAABAABEEAAJAA8CD0EDA72E15C2EF8CEA34B3CEB748A; _ga=GA1.2.1798328149.157831
'''
'''
TG-TRACK-CODE=search_code; user_trace_token=20200106214534-53c939b1-10b4-45a1-bb34-daebd661d4ab; 
X_HTTP_TOKEN=acb1a28e7bde8ee74338138751eaff2f5fc5651c92; WEBTJ-ID=20200106214541-16f7b1aa6b61-0363b02d88bb6-2393f61-2073600-16f7b1aa6b719;
 JSESSIONID=ABAAABAABEEAAJAA8CD0EDA72E15C2EF8CEA34B3CEB748A; _ga=GA1.2.1798328149.157831
'''
import requests

header1 = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
}

session = requests.session()
r = session.get(url='https://www.lagou.com/jobs/list_python/p-city_6', headers=header1)

header2 = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
    'Referer': 'https://www.lagou.com/jobs/list_python',

}


for i in range(10):
    data = {
        'first': False,
        'pn': i+2,
        'kd': 'python'
    }
    jobs = session.post(url='https://www.lagou.com/jobs/positionAjax.json?city=%E6%9D%AD%E5%B7%9E&needAddtionalResult=false',
                    headers=header2,data=data)
    print(jobs.json())

posted @ 2020-01-07 17:20  zx125  阅读(166)  评论(0编辑  收藏  举报