参考网址
https://blog.csdn.net/qq_35709559/article/details/84859927
爬虫示例: 将百度的图片下载到本地
'''
1.找到图片的url
2.发送请求获取响应
3.保存图片 (流媒体必须以二进制方式写入)
'''
import requests
def uploadBaidu():
url = "https://ss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superman/img/logo/logo_redBlue_32fe2c69.png"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
}
res = requests.get(url=url,headers=headers)
# 打开图片
with open("baidu_log.png", "wb") as f:
f.write(res.content)
print("OK")
if __name__ == '__main__':
uploadBaidu()
发送带参数的请求
- 以下示例的响应结果,被百度拦截了
import requests
url = "https://www.baidu.com/s"
# 确定查询字符串参数字典
params = {
'wd':"python"
}
# 设置header信息
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
}
# 带上请求头和请求参数
resp = requests.get(url, params=params, headers=headers)
print(resp.content.decode())
import requests
# 确定查询字符串参数字典
url = "https://www.baidu.com/s?wd=python"
# 设置header信息
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
}
# 带上请求头和请求参数
resp = requests.get(url, headers=headers)
print(resp.content.decode())
- 模拟登录
# 百度翻译移动端
# -*-coding:utf-8-*-
import requests
import json
class BaiduTran(object):
"""百度翻译接口"""
def __init__(self, f="zh", to="en"):
self.url = "https://fanyi.baidu.com/basetrans"
self.f = f
self.to = to
self.headers = {
"User-Agent": "Mozilla / 5.0(Linux;Android 5.0;SM - G900P Build / LRX21T) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 69.0.3497.81 Mobile Safari / 537.36"
}
def get_resp(self, query):
"""
获取翻译响应
:param query:
:return:
"""
data = {
"query":query,
"from":self.f,
"to":self.to
}
self.resp = requests.post(url=self.url,headers=self.headers, data=data)
return self.resp
def get_result(self):
result = json.loads(self.resp.content.decode())["trans"][0]["dst"]
return result
if __name__ == '__main__':
tran = BaiduTran(f="zh", to="en")
resp = tran.get_resp("今天天气真不错")
# print(resp.content.decode())
print(tran.get_result())
- 使用IP代理
# -*-coding:utf-8-*-
import requests
import json
query = input("请输入要翻译的中文:")
url = "https://fanyi.baidu.com/basetrans"
headers = {
"User-Agent": "Mozilla / 5.0(Linux;Android 5.0;SM - G900P Build / LRX21T) AppleWebKit / 537.36(KHTML, like Gecko) Chrome / 69.0.3497.81 Mobile Safari / 537.36"
}
data = {
'query': query,
'from': 'zh',
'to': 'en'
}
# 循环获取代理ip
while True:
try:
proxy_str = requests.get("代理ip池的url", timeout=3).text # 获取代理ip
print("代理ip:%s" % proxy_str)
proxies = {"https":proxy_str}
resp = requests.post(url=url,data=data,headers=headers,proxies=proxies)
break
except:
print("更换代理ip...")
print(resp.content.decode())
# 处理响应
print(json.loads(resp.content.decode())["trans"][0]["dst"])
- 使用requests处理cookie有三种方法
- cookie字符串放在headers中
headers = {
"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
"Cookie":" Pycharm-26c2d973=dbb9b300-2483-478f-9f5a-16ca4580177e; Hm_lvt_98b9d8c2fd6608d564bf2ac2ae642948=1512607763; Pycharm-26c2d974=f645329f-338e-486c-82c2-29e2a0205c74; _xsrf=2|d1a3d8ea|c5b07851cbce048bd5453846445de19d|1522379036"}
requests.get(url,headers=headers)
- 把cookie字典放传给请求方法的cookies参数接收
requests.get(url,headers=headers,cookies=cookie_dict)
- 使用requests提供的session模块
session = requests.session()
response = session.get(url,headers)
session实例在请求了一个网站后,对方服务器设置在本地的cookie会保存在session中
下一次再使用session请求对方服务器的时候,会带上前一次的cookie
- 使用 cookies
import requests
url = "http://www.baidu.com"
#发送请求,获取resposne
response = requests.get(url)
print(type(response.cookies)) # <class 'requests.cookies.RequestsCookieJar'>
#使用方法从cookiejar中提取数据
cookies = requests.utils.dict_from_cookiejar(response.cookies)
print(cookies) # {'BDORZ': '27315'}
- 证书错误的处理
import requests
url = "https://www.12306.cn/mormhweb/"
# requests.packages.urllib3.disable_warnings() # 不显示安全提示
response = requests.get(url,verify=False)
- 超时响应
response = requests.get(url,timeout=3) # 发送请求之后,3秒内返回响应,否则会报错