python中的requests库
requests使用了urllib3,但是API更加友好,推荐使用。
安装
$ pip install requests
使用
import requests
import json
url = 'https://movie.douban.com/j/search_subjects'
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
d = {
'type': 'movie',
'tag': '热门',
'page_limit': '10',
'page_start': '0'
}
with requests.get(url, headers={'user-agent': user_agent}, params=d) as res:
print(res.url) # 链接
print(res.reason) # 状态信息
print(res.status_code) # 状态码
print(res.cookies) # coolies
print(res.encoding) # 编码类型
print(res.content) # 二进制的
print(json.loads(res.text)) # html文本
print(res.request.headers) # 请求的头
print(res.headers) # 返回的头
使用session
requests默认使用Session对象,是为了在多次和服务器端交互中保留会话的信息,例如cookie。
import requests
# 新建session对象
with requests.session() as session:
# 让程序访问两次
for i in range(2):
url = 'https://movie.douban.com/j/search_subjects'
# url = 'http://httpbin.org/get'
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
d = {
'type': 'movie',
'tag': '热门',
'page_limit': '10',
'page_start': '0'
}
# 使用同一session来访问,说明具有关联性,会自动在第二次访问的时候带上cookie
with session.get(url, headers={'user-agent': user_agent}, params=d) as res:
print('=' * 100)
print(res.request.headers) # 请求的头
print(res.headers) # 返回的头
print('=' * 100)
使用session访问,第二次带上了cookie。
使用代理
import requests
ip = "xxx.xxx.xxx.xxx:16255"
proxies = {
'http': 'http://{}'.format(ip),
'https': 'http://{}'.format(ip),
}
url = "https://httpbin.org/get"
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
with requests.get(url, headers={'user-agent': user_agent}, proxies=proxies) as res:
print(res.url) # 链接
print(res.reason) # 状态信息
print(res.status_code) # 状态码
print(res.cookies) # coolies
print(res.encoding) # 编码类型
print(res.content) # 二进制的
print(res.text) # html文本
print(res.request.headers) # 请求的头
print(res.headers) # 返回的头
"""
https://httpbin.org/get
OK
200
<RequestsCookieJar[]>
utf-8
b'{\n "args": {}, \n "headers": {\n "Accept": "*/*", \n "Accept-Encoding": "gzip, deflate", \n "Host": "httpbin.org", \n "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0", \n "X-Amzn-Trace-Id": "Root=1-653f4b28-49b2e5b7060xxx6d233b3"\n }, \n "origin": "27.157.201.123", \n "url": "https://httpbin.org/get"\n}\n'
{
"args": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Host": "httpbin.org",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0",
"X-Amzn-Trace-Id": "Root=1-653f4b28-49b2e5b7060xxx6d233b3"
},
"origin": "xxx.xxx.xxx.xxx",
"url": "https://httpbin.org/get"
}
{'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}
{'Date': 'Mon, 30 Oct 2023 06:20:24 GMT', 'Content-Type': 'application/json', 'Content-Length': '364', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true'}
"""
忽略证书
关闭证书验证参数
verify=False
import requests
url = "https://192.168.140.3"
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
with requests.get(url=url, headers={'user-agent': user_agent}, verify=False) as res:
print(res.status_code)
"""
\venv\lib\site-packages\urllib3\connectionpool.py:1100: InsecureRequestWarning: Unverified HTTPS request is being made to host '192.168.140.3'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
warnings.warn(
200
"""
会有警告的信息
关闭警告信息
import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning
url = "https://192.168.140.3"
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
#关闭告警限制
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
with requests.get(url=url, headers={'user-agent': user_agent}, verify=False) as res:
print(res.status_code)
"""
200
"""
本文来自博客园,作者:厚礼蝎,转载请注明原文链接:https://www.cnblogs.com/guangdelw/p/17798019.html