python中的requests库

requests使用了urllib3,但是API更加友好,推荐使用。

安装

$ pip install requests

使用

import requests
import json

url = 'https://movie.douban.com/j/search_subjects'
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
d = {
    'type': 'movie',
    'tag': '热门',
    'page_limit': '10',
    'page_start': '0'
}
with requests.get(url, headers={'user-agent': user_agent}, params=d) as res:
    print(res.url)  # 链接
    print(res.reason)  # 状态信息
    print(res.status_code)  # 状态码
    print(res.cookies)  # coolies
    print(res.encoding)  # 编码类型
    print(res.content)  # 二进制的
    print(json.loads(res.text))  # html文本
    print(res.request.headers)  # 请求的头
    print(res.headers)  # 返回的头

使用session

requests默认使用Session对象,是为了在多次和服务器端交互中保留会话的信息,例如cookie。

import requests

# 新建session对象
with requests.session() as session:
    # 让程序访问两次
    for i in range(2):
        url = 'https://movie.douban.com/j/search_subjects'
        # url = 'http://httpbin.org/get'
        user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
        d = {
            'type': 'movie',
            'tag': '热门',
            'page_limit': '10',
            'page_start': '0'
        }
        # 使用同一session来访问,说明具有关联性,会自动在第二次访问的时候带上cookie
        with session.get(url, headers={'user-agent': user_agent}, params=d) as res:
            print('=' * 100)
            print(res.request.headers)  # 请求的头
            print(res.headers)  # 返回的头
            print('=' * 100)

使用session访问,第二次带上了cookie。

使用代理

import requests

ip = "xxx.xxx.xxx.xxx:16255"
proxies = {
    'http': 'http://{}'.format(ip),
    'https': 'http://{}'.format(ip),
}
url = "https://httpbin.org/get"
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
with requests.get(url, headers={'user-agent': user_agent}, proxies=proxies) as res:
    print(res.url)  # 链接
    print(res.reason)  # 状态信息
    print(res.status_code)  # 状态码
    print(res.cookies)  # coolies
    print(res.encoding)  # 编码类型
    print(res.content)  # 二进制的
    print(res.text)  # html文本
    print(res.request.headers)  # 请求的头
    print(res.headers)  # 返回的头
"""
https://httpbin.org/get
OK
200
<RequestsCookieJar[]>
utf-8
b'{\n  "args": {}, \n  "headers": {\n    "Accept": "*/*", \n    "Accept-Encoding": "gzip, deflate", \n    "Host": "httpbin.org", \n    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0", \n    "X-Amzn-Trace-Id": "Root=1-653f4b28-49b2e5b7060xxx6d233b3"\n  }, \n  "origin": "27.157.201.123", \n  "url": "https://httpbin.org/get"\n}\n'
{
  "args": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Host": "httpbin.org", 
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0", 
    "X-Amzn-Trace-Id": "Root=1-653f4b28-49b2e5b7060xxx6d233b3"
  }, 
  "origin": "xxx.xxx.xxx.xxx", 
  "url": "https://httpbin.org/get"
}

{'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0', 'Accept-Encoding': 'gzip, deflate', 'Accept': '*/*', 'Connection': 'keep-alive'}
{'Date': 'Mon, 30 Oct 2023 06:20:24 GMT', 'Content-Type': 'application/json', 'Content-Length': '364', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true'}
"""

忽略证书

关闭证书验证参数

verify=False

import requests

url = "https://192.168.140.3"
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'
with requests.get(url=url, headers={'user-agent': user_agent}, verify=False) as res:
    print(res.status_code)
"""
\venv\lib\site-packages\urllib3\connectionpool.py:1100: InsecureRequestWarning: Unverified HTTPS request is being made to host '192.168.140.3'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
  warnings.warn(
200
"""

会有警告的信息

关闭警告信息

import requests
from requests.packages.urllib3.exceptions import InsecureRequestWarning

url = "https://192.168.140.3"
user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'

#关闭告警限制
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
with requests.get(url=url, headers={'user-agent': user_agent}, verify=False) as res:
    print(res.status_code)
"""
200
"""
posted @ 2023-10-30 15:37  厚礼蝎  阅读(21)  评论(0编辑  收藏  举报