requests 包功能大全
requests
['ConnectTimeout', 'ConnectionError', 'DependencyWarning', 'FileModeWarning', 'HTTPError', 'NullHandler', 'PreparedRequest', 'ReadTimeout', 'Request', 'RequestException', 'RequestsDependencyWarning', 'Response', 'Session', 'Timeout', 'TooManyRedirects', 'URLRequired'
, 'adapters', 'api', 'auth', 'certs', 'chardet', 'check_compatibility', 'codes', 'compat', 'cookies', 'delete', 'exceptions', 'get', 'head', 'hooks', 'logging', 'models', 'options', 'packages', 'patch', 'post', 'put', 'request', 'session', 'sessions', 'ssl', 'status_codes', 'structures', 'urllib3', 'utils', 'warnings']
requests.cookies
['CookieConflictError', 'MockRequest', 'MockResponse', 'Morsel', 'MutableMapping', 'RequestsCookieJar', 'calendar', 'cookiejar_from_dict', 'cookielib', 'copy', 'create_cookie', 'extract_cookies_to_jar', 'get_cookie_header', 'merge_cookies', 'morsel_to_cookie', 'remove_cookie_by_name', 'threading', 'time', 'to_native_string', 'urlparse', 'urlunparse']
requests.cookies.RequestsCookieJar
'get_dict', 'list_domains', 'list_paths'
'keys', 'values', 'items', 'iteritems', 'iterkeys', 'itervalues'
'domain_re', 'dots_re', 'magic_re', 'non_word_re', 'quote_re', 'strict_domain_re'
'add_cookie_header', 'copy', 'extract_cookies', 'get', 'get_policy', 'make_cookies', 'multiple_domains', 'update'
'clear', 'clear_expired_cookies', 'clear_session_cookies'
'pop', 'popitem', 'set', 'set_cookie', 'set_cookie_if_ok', 'set_policy', 'setdefault'
requests.utils
['CaseInsensitiveDict', 'DEFAULT_CA_BUNDLE_PATH', 'DEFAULT_PORTS', 'FileModeWarning', 'InvalidHeader', 'InvalidURL', 'Mapping', 'NETRC_FILES', 'OrderedDict', 'UNRESERVED_SET', 'UnrewindableBodyError', 'add_dict_to_cookiejar', 'address_in_network', 'basestring', 'bytes', 'certs', 'check_header_validity', 'codecs', 'contextlib', 'cookiejar_from_dict', 'default_headers', 'default_user_agent', 'dict_from_cookiejar', 'dict_to_sequence', 'dotted_netmask', 'extract_zipped_paths', 'from_key_val_list', 'get_auth_from_url', 'get_encoding_from_headers', 'get_encodings_from_content', 'get_environ_proxies', 'get_netrc_auth', 'get_unicode_from_response', 'getproxies', 'getproxies_environment', 'guess_filename', 'guess_json_utf', 'integer_types', 'io', 'is_ipv4_address', 'is_py3', 'is_valid_cidr', 'iter_slices', 'os', 'parse_dict_header', 'parse_header_links', 'parse_list_header', 'prepend_scheme_if_needed', 'proxy_bypass', 'proxy_bypass_environment', 'proxy_bypass_registry', 'quote', 're', 'requote_uri', 'rewind_body', 'select_proxy', 'set_environ', 'should_bypass_proxies', 'socket', 'str', 'stream_decode_response_unicode', 'struct', 'super_len', 'sys', 'tempfile', 'to_key_val_list', 'to_native_string', 'unquote', 'unquote_header_value', 'unquote_unreserved', 'urldefragauth', 'urlparse', 'urlunparse', 'warnings', 'zipfile']
requests.session
requests.sessions.Session
[adapters', 'auth', 'cert', 'close', 'cookies', 'delete', 'get', 'get_adapter', 'get_redirect_target', 'head', 'headers', 'hooks', 'max_redirects', 'merge_environment_settings', 'mount', 'options', 'params', 'patch', 'post', 'prepare_request', 'proxies', 'put', 'rebuild_auth', 'rebuild_method', 'rebuild_proxies', 'request', 'resolve_redirects', 'send', 'should_strip_auth', 'stream', 'trust_env', 'verify']
1. 根据字典生成cookiejar
requests.cookies.cookiejar_from_dict和requests.utils.cookiejar_from_dict等价,都是根据字典生成cookiejar(requests.cookies.RequestsCookieJar)
2.根据cookiejar生成字典
requests.utils.dict_from_cookiejar(cookies),是根据cookiejar(requests.cookies.RequestsCookieJar)生成字典
(1)方法,直接使用字典
res = requests.post(url=login_url, headers=headers, data=body)
cookies = res.cookies
cookie = requests.utils.dict_from_cookiejar(cookies)#获取cookie的字典值
res = requests.get(url=get_data_url, cookies=cookie)#使用字典的cookie值
(2)方法,拼接生成cookie
res = requests.post(url=login_url, headers=headers, data=body)
cookies = res.cookies.items()
cookie = ''
for name, value in cookies:
cookie += '{0}={1};'.format(name, value)
headers = {"cookie": cookie}
res = requests.get(url=get_data_url, headers=headers)
selenium涉及的cookie函数
'add_cookie',
'get_cookies'
根据selenium获取的cookie值,设置requests的cookie值
cookies = driver.get_cookies()#获取selenium的cookie信息
sess = requests.Session()
sess.headers.clear()
for cookie in cookies:
sess.cookies.set(cookie['name'], cookie['value'])#设置requests的cookie信息
requests.utils.dict_from_cookiejar方法,这是requests库提供的一个方法,把RequestsCookieJar对象转换为一个字典(字典里只有name和value),requests库只使用name和value值,而selenium中保存的cookies中包含domain、path等信息
根据requests获取的cookie值,设置selenium的cookie值
import requests
import time
from selenium import webdriver
def get_requests_cookies():
url="http://baidu.com"
response=requests.get(url)
cookies=response.cookies.get_dict()
def get_webdriver(cookies):
opt = webdriver.ChromeOptions() # 实例化
# ------不重要,习惯写的---
opt.set_headless() # 设置无头模式,就是不显示界面
brows = webdriver.Chrome(options=opt)
#-------这里才是正题--------
# 这里一定要把add_cookie放在brows.get 后面,否则会报错,在之后再 brows.get 一次,否则浏览器是不会跳转的
brows.get("http://baidu.com")
for k,v in cookies.items():
# 第一种添加格式
brows.add_cookie({"name":k,"value":v})
# 第二种添加格式
brows.add_cookie({"domain": ".tyrz.gd.gov.cn", "name": k, "value": v, "path": "/"})
time.sleep(2)
brows.get("http://baidu.com")
brows.page_source # 获取当前页面源码,elements的源码
brows.close()