Python爬虫之Requests库的基本使用

  1 import requests
  2 response = requests.get('http://www.baidu.com/')
  3 print(type(response))
  4 print(response.status_code)
  5 print(type(response.text))
  6 print(response.text)
  7 print(response.cookies)
  8 
  9 # 各种请求方式
 10 import requests
 11 requests.post('http://httpbin.org/post')
 12 requests.put('http://httpbin.org/put')
 13 requests.delete('http://httpbin.org/delete')
 14 requests.head('http://httpbin.org/get')
 15 requests.options('http://httpbin.org/get')
 16 
 17 # 基本GET请求
 18 import requests
 19 response = requests.get('http://httpbin.org/get')
 20 print(response.text)
 21 
 22 # 带参数GET请求
 23 import requests
 24 response = requests.get('http://httpbin.org/get?name=germey&age=22')
 25 print(response.text)
 26 
 27 import requests
 28 data = {
 29     'name': 'germey',
 30     'age': 22
 31 }
 32 response = requests.get('http://httpbin.org/get', params = data)
 33 print(response.text)
 34 
 35 # 解析json
 36 import requests
 37 import json
 38 response = requests.get('http://httpbin.org/get')
 39 print(type(response.text))
 40 print(response.json())
 41 print(json.loads(response.text))
 42 print(type(response.json()))
 43 
 44 # 获取二进制数据
 45 import requests
 46 response = requests.get('http://github.com/favicon.ico')
 47 print(type(response.text), type(response.content))
 48 print(response.text)
 49 print(response.content)
 50 
 51 # 保存图片
 52 import requests
 53 response = requests.get('http://github.com/favicon.ico')
 54 with open('1.ico', 'wb') as f:
 55     f.write(response.content)
 56     f.close()
 57 
 58 # 添加headers 不添加的话会请求失败的
 59 import requests
 60 response = requests.get('http://www.zhihu.com/explore')
 61 print(response.text)
 62 
 63 import requests
 64 headers = {
 65     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
 66 }
 67 response = requests.get('http://zhihu.com/explore', headers = headers)
 68 print(response.text)
 69 
 70 # 基本的POST请求
 71 import requests
 72 data = {'name': 'germey', 'age': 22}
 73 response = requests.post('http://httpbin.org/post', data = data)
 74 print(response.text)
 75 
 76 import requests
 77 data = {'name':'germey', 'age':22}
 78 headers = {
 79     'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'
 80 }
 81 response = requests.post('http://httpbin.org/post', data = data, headers = headers)
 82 print(response.json())
 83 
 84 # response属性
 85 import requests
 86 response = requests.get('http://www.jianshu.com')
 87 print(type(response.status_code), response.status_code)
 88 print(type(response.headers), response.headers)
 89 print(type(response.cookies), response.cookies)
 90 print(type(response.url), response.url)
 91 print(type(response.history), response.history)
 92 
 93 # 文件上传
 94 import requests
 95 files = {'file':open('1.ico', 'rb')}
 96 response = requests.post('http://httpbin.org/post', files = files)
 97 print(response.text)
 98 
 99 # 获取cookie
100 import requests
101 response = requests.get('http://www.baidu.com')
102 print(response.cookies)
103 for key, value in response.cookies.items():
104     print(key + ' = ' + value)
105 
106 # 会话维持 模拟登陆(第一个例子,相当于在两个不同的浏览器请求页面,所以获取不到cookies,所以要用第二个session对象)
107 import requests
108 requests.get('http://httpbin.org/cookies/set/number/123456789')
109 response = requests.get('http://httpbin.org/cookies')
110 print(response.text)
111 
112 import requests
113 s = requests.session()
114 s.get('http://httpbin.org/cookies/set/number/123456789')
115 response = s.get('http://httpbin.org/cookies')
116 print(response.text)
117 
118 # 证书验证
119 import requests
120 response = requests.get('https://www.12306.cn')
121 print(response.status_code)

 

 1 import requests
 2 from requests.exceptions import ConnectTimeout, HTTPError, ReadTimeout, RequestException
 3 from requests.auth import HTTPBasicAuth
 4 import urllib3
 5 
 6 # 证书验证
 7 # 消除警告(下面的HTTPS页面的请求如果不进行验证就会报警告)
 8 urllib3.disable_warnings()
 9 # verify=False 请求HTTPS页面的时候不进行证书验证,默认为True
10 response = requests.get('https://www.12306.cn', verify=False)
11 print(response.status_code)
12 
13 # 代理设置
14 proxies = {
15     "http": "http://127.0.0.1:9743",
16     "https": "https://127.0.0.1:9743",
17 }
18 response = requests.get("https://www.taobao.com", proxies = proxies)
19 print(response.status_code)
20 
21 # 代理有用户名和密码的情况
22 proxies = {
23     "http": "http://user:password@127.0.0.1:9743/",
24 }
25 response = requests.get("https://www.taobao.com", proxies = proxies)
26 print(response.status_code)
27 
28 # socks代理
29 proxies = {
30     "http": "socks5://127.0.0.1:9742",
31     "https": "socks5://127.0.0.1:9742",
32 }
33 response = requests.get("https://www.taobao.com", proxies = proxies)
34 print(response.status_code)
35 
36 # 超时设置
37 try:
38     response = requests.get("http://httpbin.org/get", timeout=0.2)
39     print(response.status_code)
40 except ConnectTimeout:
41     print("timeout!")
42 
43 # 认证设置 下面两种方法都可以
44 response = requests.get("http://120.27.34.24:9001", auth=HTTPBasicAuth("user", "123"))
45 print(response.status_code)
46 
47 response = requests.get("http://120.27.34.24:9001", auth=("user", "123"))
48 print(response.status_code)
49 
50 # 异常处理
51 try:
52     response = requests.get("http://httpbin.org/get", timeout=0.2)
53     print(response.status_code)
54 except ConnectTimeout:
55     print("timeout!")
56 except RequestException:
57     print("RequestException!")
58 except HTTPError:
59     print("HttpError!")
60 except ReadTimeout:
61     print("ReadTimeout")

 

 1 import requests
 2 response = requests.get("http://www.baidu.com")
 3 print(response.cookies)
 4 print("----------")
 5 # 把cookie对象转化为字典
 6 d = requests.utils.dict_from_cookiejar(response.cookies)
 7 print(d)
 8 print("----------")
 9 # 把字典转化为cookie对象
10 print(requests.utils.cookiejar_from_dict(d))

 

1 # url解码
2 print(requests.utils.unquote("http://tieba.baidu.com/f?kw=%D2%D7%D3%EF%D1%D4&fr=ala0&tpl=5", encoding="gb18030"))
3 # url编码
4 print(requests.utils.quote("http://tieba.baidu.com/f?kw=%D2%D7%D3%EF%D1%D4&fr=ala0&tpl=5"))

 

posted @ 2018-11-27 10:30  李道臣  阅读(402)  评论(0编辑  收藏  举报