使用python中urllib.request.Request()来构建ua
1.代码案例=构建http请求头
#coding=utf-8 import urllib.request import urllib.parse url = "http://www.baidu.com/" headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' }; request = urllib.request.Request(url=url,headers=headers); response = urllib.request.urlopen(request); print(response.read().decode());
2.代码案例=使用post请求调用百度接口
#使用post调用百度翻译接口 #coding=utf-8 import urllib.request import urllib.parse word="china"; post_url = "https://fanyi.baidu.com/sug"; form_data={ 'kw': word }; headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' }; request = urllib.request.Request(post_url,headers); form_data = urllib.parse.urlencode(form_data).encode(); response = urllib.request.urlopen(request,form_data); print(response.read().decode());
3.使用百度翻译接口案例
#coding=utf-8 import urllib.request import urllib.parse form_data={ 'kw': 'wolf', }; post_url = "https://fanyi.baidu.com/sug"; headers = { 'Host': 'fanyi.baidu.com', 'Connection': 'keep-alive', 'Pragma': 'no-cache', 'Cache-Control': 'no-cache', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Origin': 'https://fanyi.baidu.com', 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Referer': 'https://fanyi.baidu.com/translate?aldtype=16047&query=&keyfrom=baidu&smartresult=dict&lang=auto2zh', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,und;q=0.7', 'Cookie': 'BAIDUID=78CF95260BCDB8D770F51009AABFDD42:FG=1; BIDUPSID=78CF95260BCDB8D770F51009AABFDD42; PSTM=1585656674; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=31356_1447_21097_31425_31341_31464_30824_26350_31164_31472_22158; delPer=0; PSINO=3; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1588584480,1588693180; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1588693180; __yjsv5_shitong=1.0_7_5362048e36179917959b4ef57e3e3a96d953_300_1588693181749_111.196.240.198_f74edd74; yjs_js_security_passport=607757242e95a02e997e4fc526bc95ce8671c676_1588693182_js', } request = urllib.request.Request(post_url,headers); form_data = urllib.parse.urlencode(form_data).encode(); response = urllib.request.urlopen(post_url,form_data); print(response.read().decode());
4.使用肯德基接口获取肯德基
#coding=utf8 import urllib.request import urllib.parse post_data={ 'cname':'', 'pid':'', 'keyword': '北京', 'pageIndex': '1', 'pageSize': '10', }; headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' } url = "http://www.kfc.com.cn/kfccda/ashx/GetStoreList.ashx?op=keyword"; post_data = urllib.parse.urlencode(post_data).encode(); request = urllib.request.Request(url,headers); response = urllib.request.urlopen(request,post_data); print(response.read().decode());
5.使用python爬取百度贴吧案例
#coding=utf8 #使用python爬取百度贴吧内容练习 import urllib.request import urllib.parse import os import time url = "https://tieba.baidu.com/f?"; headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36' }; flod="./baiduba"; if not os.path.exists(flod): os.mkdir(flod); for page in range(1,4): pn = (page-1) * 50; data={ "kw" : "python", "ie" : "utf-8", "pn" : pn, }; print("第"+str(page)+ "开始下载..."); query_string = urllib.parse.urlencode(data); url_info = url+query_string; request = urllib.request.Request(url=url,headers=headers); response = urllib.request.urlopen(request); filename = "python_"+str(page)+".html"; filepath = flod+"/"+filename; with open(filepath,"wb") as fp: fp.write(response.read()); print("第"+str(page)+ "结束下载..."); time.sleep(3);