urllib/requests基本库的使用
urllib的使用
import urllib.request #发送请求获取返回结果 response = urllib.request.urlopen("https://www.baidu.com/") print(type(response)) print(response.status) #状态码 print(response.getheaders()) #头信息 print(response.getheader('Server')) #服务器 print(response.read().decode('utf-8'))
data参数(模拟表单的提交方式,以POST方式传输数据)
import urllib.parse import urllib.request data = bytes(urllib.parse.urlencode({'word':'hello'}),encoding='utf-8') response = urllib.request.urlopen('http://httpbin.org/post',data=data) print(response.read().decode('utf-8'))
timeout参数(设置timeout实现超时处理)
import urllib.request import socket import urllib.error try: response = urllib.request.urlopen('http://httpbin.org/get',timeout=0.1) except urllib.error.URLError as e: if isinstance(e.reason,socket.timeout): print('Time Out')
Request(urlopen方法可以试下最基本的发起请求,但几个简单的参数不足以构建一个完整的请求,所以利用更强大的Request)
from urllib import request,parse url = 'http://httpbin.org/post' headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:58.0) Gecko/20100101 Firefox/58.0', 'Host':'httpbin.org' } #请求头
dict = { 'name':'Germey' }
data = bytes(parse.urlencode(dict),encoding='utf-8') #传输的data必须转化为字节类型 req = request.Request(url=url,data=data,headers=headers,method='POST') response = request.urlopen(req) print(response.read().decode('utf-8'))