python3 urllib
官方文档
官方文档:https://docs.python.org/3/library/urllib.html
获取页面内容
第一种方式
import urllib.request url = 'https://www.baidu.com/' r = urllib.request.urlopen(url) print(r) # <http.client.HTTPResponse object at 0x00000201E6C66CF8> print(r.read().decode('utf-8'))
另一种方式
import urllib.request url = 'http://www.cnblogs.com/0bug/' req = urllib.request.Request(url) res = urllib.request.urlopen(req) print(res.read().decode('utf-8'))
发送内容
import urllib.request import urllib.parse url = 'http://httpbin.org/post' data = bytes(urllib.parse.urlencode({'name': 'lcg'}), encoding='utf-8') r = urllib.request.urlopen(url, data=data) print(r.read().decode('utf-8'))
设置超时时间
设置超时时间
import urllib.request url = 'http://www.cnblogs.com/0bug/' r = urllib.request.urlopen(url, timeout=1) # 设置超时时间 print(r.read().decode('utf8'))
异常处理
import urllib.request import urllib.error import socket url = 'http://www.cnblogs.com/0bug/' try: r = urllib.request.urlopen(url, timeout=0.01) # 设置超时时间 print(r.read().decode('utf8')) except urllib.error.URLError as e: if isinstance(e.reason, socket.timeout): print('请求超时')
响应码、响应头
import urllib.request url = 'http://www.cnblogs.com/0bug/' r = urllib.request.urlopen(url) print(r.status) # 200 print(r.getheaders()) # [(('Content-Type', 'text/html; charset=utf-8'),......] print(r.getheader('Content-Type')) # text/html; charset=utf-8
构造请求信息
import urllib.request import urllib.parse url = 'http://www.cnblogs.com/0bug/' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36', 'Host': 'www.cnblogs.com' } dic = {'name': 'lcg'} data = bytes(urllib.parse.urlencode(dic), encoding='utf-8') req = urllib.request.Request(url=url, data=data, headers=headers, method='POST') res = urllib.request.urlopen(req) print(res.read().decode('utf-8'))
另一种添加请求头的方式
import urllib.request import urllib.parse url = 'http://www.cnblogs.com/0bug/' dic = {'name': 'lcg'} data = bytes(urllib.parse.urlencode(dic), encoding='utf-8') req = urllib.request.Request(url=url, data=data, method='POST') req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/53...') res = urllib.request.urlopen(req) print(res.read().decode('utf-8'))
代理
import urllib.request url = 'http://www.cnblogs.com/0bug/' proxy_handler = urllib.request.ProxyHandler({ 'http': 'http://122.114.31.177:808', 'https': 'https://124.133.75.183:8118' }) opener = urllib.request.build_opener(proxy_handler) r = opener.open(url) print(r.read().decode('utf-8'))