Python3 requests库基本使用
''' 对urllib库进行了封装,更简单的get,post请求方式 json数据处理 添加请求头headers POST,GET 请求发送 文件上传 跳过证书验证, 忽略警告 网页需要登录用户名密码 响应状态码 请求响应异常处理 response属性 会话保持 (cookie保持) ''' # 底层依赖urllib import requests #########各种请求方式 # requests.post('http://httpbin.org/post') # requests.put('http://httpbin.org/put') # requests.delete('http://httpbin.org/delete') # requests.head('http://httpbin.org/get') # requests.options('http://httpbin.org/get') ######## 发送一个get请求,带参数 # data = { # 'name': 'kaige', # 'age': 22 # } # response = requests.get('http://httpbin.org/get', params=data) # print(response.text) # 响应内容 # 如果返回json对象用 .json()接收 # response.json() # 等价于 json.loads(response.text) # 获取二进制数据 # response.content # 保存二进制数据 # with open('xxx.ico二进制是什么格式打开什么格式','wb') as f: # f.write(response.content) # f.close() # ------------------------------------- # 添加headers ######## # response = requests.get('https://www.zhihu.com/explore') # 拒绝访问 # print(response.text) # headers = { # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36' # } # response = requests.get('https://www.zhihu.com/explore',headers=headers) # print(response.text) # -------------------------post请求 # 基本请求带参, 加入headers 和get方式一样 # data = { # 'name': 'kaige', 'age': 22 # } # response = requests.post('http://httpbin.org/post', data=data) # print(response.text) # ---------------------------response 的属性 # response = requests.get('http://www.jianshu.com') # print(type(response.status_code), response.status_code) # 状态码 # print(type(response.headers), response.headers) # 头 # print(type(response.cookies), response.cookies) # cookie # print(type(response.url), response.url) # url # print(type(response.history), response.history) # ----------------------------状态码判断 import requests # response = requests.get('http://www.jianshu.com/hello.html') # exit() if not response.status_code == requests.codes.not_found else print('404 Not Found') # exit() if not response.status_code == 404 else print('404 Not Found') # print(response.status_code) # 也可以直接判断状态码, 官网查询 # ------------------------- 文件上传 # import requests # files = {'file': open('favicon.ico', 'rb')} # response = requests.post('http://httpbin.org/post', files=files) # print(response.text) # ------------------------ 获取cookie # import requests # response = requests.get('https://www.baidu.com') # print(response.cookies) # for key,value in response.cookies.items(): # print(key, value) # ------------------------- 会话维持 # import requests # s = requests.Session() # 发送一个post, s.post(), 相当在一个游览器操作 # s.get('http://httpbin.org/cookies/set/number/123456789') # response = s.get('http://httpbin.org/cookies') # print(response.text) # -----------------------------证书验证 # import requests # 证书不安全,访问失败, 设置verify=False,跳过证书验证, 但是还是有警告,可以用requests.pachages import rullib3.disable_warnings() 消除警告 # from requests.packages import urllib3 # # urllib3.disable_warnings() # response = requests.get('https://www.12306.cn', verify=False) # print(response.status_code) # 指定证书验证 # requests.get('https://www.12306.cn', cert=('xxx.crt', 'xxx/key')) # ----------------------------- 代理设置 # import requests # proxies = { # 'http': 'http://xxx.xx.xx.x:xxxx', # 'https': 'https://111.1.1.1:2222', # } # response = requests.get('https://www.taobao.com', proxies=proxies) # print(response.status_code) ######### 代理有用户名密码 # import requests # proxies = { # 'http': 'http://user:password@111.11.1.1:2222', # } # response = requests.get('https://www.taobao.com', proxies=proxies) # print(response) ########## socks代理 # pip install 'requests[socks]' # proxies = { # 'http': 'socks5://111.1.1.1:2222', # } 同上 # ------------------------------ 超时设置timeout # import requests # from requests.exceptions import ReadTimeout,Timeout # try: # response = requests.get('https://www.baidu.com', timeout= 0.01) # print(response.status_code) # except Timeout: # print('Timeout') # ---------------------- 认证设置(访问网站需要登录) # import requests # r = requests.get('http://120.27.34.24:9001', auth=('user', '123'), timeout=1) # print(r.status_code) # --------------------------- 异常处理 import requests from requests.exceptions import HTTPError, RequestException, ReadTimeout, Timeout try: response = requests.get('http://httpbin.org/get', timeout=0.01) print(response.status_code) except Timeout: print('TImeout') except HTTPError: print('http err') except RequestException: print('父类异常')