爬虫05-requests库用法
1.常用函数
import requests response=requests.get("http://www.baidu.com") # print(type(response.text))#打开 # print(response.text) print(response.url) print(response.encoding) print(response.status_code) print(type(response.content)) print(response.content.decode("utf-8"))
2.伪装浏览器
import requests params={ "wd":"杰伦" } headers={ "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36" } response=requests.get("http://www.baidu.com/s",params=params,headers=headers) with open("baidurequests.html","w",encoding="utf-8") as fp: fp.write(response.content.decode("utf-8")) print(response.url)
3.实战拉钩
import requests url="https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false" headers={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36", "Referer":"https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=", "Cookie":"user_trace_token=20200226133453-084540c1-9531-4fa8-873f-0dda32aa3ca4; _ga=GA1.2.836052667.1582695295; LGUID=20200226133454-167deda5-1930-4e79-8834-719427ac01be; index_location_city=%E5%85%A8%E5%9B%BD; lagou_utm_source=A; _gid=GA1.2.1642855754.1583813092; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221707ffdf39c2c3-0001957fd8ade1-3a614f0b-2073600-1707ffdf39de5f%22%2C%22%24device_id%22%3A%221707ffdf39c2c3-0001957fd8ade1-3a614f0b-2073600-1707ffdf39de5f%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_referrer_host%22%3A%22%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%7D%7D; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1583857959; JSESSIONID=ABAAAECABGFABFF0E739278063E3324CD2FE653F32A6622; WEBTJ-ID=20200311144337-170c855977310b-0f5296a11457c4-3a614f0b-2073600-170c8559774c80; PRE_UTM=; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2F; LGSID=20200311144338-e560d149-c95f-4bd7-8ae9-1e69626b0f80; PRE_HOST=www.baidu.com; PRE_SITE=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DFjfU-JD2bRU1R9CgLM1PGWfu8IKwxQNv4B2yvPQ55Fe%26wd%3D%26eqid%3Dc8e6a11c0004f43f000000055e688896; TG-TRACK-CODE=index_search; gate_login_token=5976db005818f45ed7756b1348563965e46f1400511d886af3d4d57dd9d9166a; LG_LOGIN_USER_ID=5b895ff2a4e23c48dc4c9110a6a1361bbf709630b5b17ac6756340fef1babfbf; LG_HAS_LOGIN=1; _putrc=387928C58CE0A7D1123F89F2B170EADC; login=true; unick=%E7%90%B3%E7%90%B3; showExpriedIndex=1; showExpriedCompanyHome=1; showExpriedMyPublish=1; hasDeliver=0; privacyPolicyPopup=false; _gat=1; SEARCH_ID=6eed253b1a0140ed8837d520d33f469b; X_HTTP_TOKEN=9944cc335d13b0d38539093851b568c7665cd1a0ff; LGRID=20200311144918-950d603b-ada0-4eba-9e4e-40cdc9bd6fdf"} data={ "frist":"ture", "pn":"1", "kd":"python" } response=requests.post(url,data=data,headers=headers) print(response.text)
print(response.json())#返回的是字典或者列表