python3 urllib

官方文档

官方文档:https://docs.python.org/3/library/urllib.html

获取页面内容

第一种方式

import urllib.request

url = 'https://www.baidu.com/'
r = urllib.request.urlopen(url)
print(r)  # <http.client.HTTPResponse object at 0x00000201E6C66CF8>
print(r.read().decode('utf-8'))

另一种方式

import urllib.request

url = 'http://www.cnblogs.com/0bug/'
req = urllib.request.Request(url)
res = urllib.request.urlopen(req)
print(res.read().decode('utf-8'))

发送内容

import urllib.request
import urllib.parse

url = 'http://httpbin.org/post'
data = bytes(urllib.parse.urlencode({'name': 'lcg'}), encoding='utf-8')
r = urllib.request.urlopen(url, data=data)
print(r.read().decode('utf-8'))

设置超时时间

设置超时时间

import urllib.request

url = 'http://www.cnblogs.com/0bug/'
r = urllib.request.urlopen(url, timeout=1)  # 设置超时时间
print(r.read().decode('utf8'))

异常处理

import urllib.request
import urllib.error
import socket

url = 'http://www.cnblogs.com/0bug/'
try:
    r = urllib.request.urlopen(url, timeout=0.01)  # 设置超时时间
    print(r.read().decode('utf8'))
except urllib.error.URLError as e:
    if isinstance(e.reason, socket.timeout):
        print('请求超时')

响应码、响应头

import urllib.request

url = 'http://www.cnblogs.com/0bug/'
r = urllib.request.urlopen(url) 
print(r.status)  # 200
print(r.getheaders())  # [(('Content-Type', 'text/html; charset=utf-8'),......]
print(r.getheader('Content-Type'))  # text/html; charset=utf-8

构造请求信息

import urllib.request
import urllib.parse

url = 'http://www.cnblogs.com/0bug/'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.119 Safari/537.36',
    'Host': 'www.cnblogs.com'
}
dic = {'name': 'lcg'}
data = bytes(urllib.parse.urlencode(dic), encoding='utf-8')
req = urllib.request.Request(url=url, data=data, headers=headers, method='POST')
res = urllib.request.urlopen(req)
print(res.read().decode('utf-8'))

另一种添加请求头的方式

import urllib.request
import urllib.parse

url = 'http://www.cnblogs.com/0bug/'
dic = {'name': 'lcg'}
data = bytes(urllib.parse.urlencode(dic), encoding='utf-8')
req = urllib.request.Request(url=url, data=data, method='POST')
req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/53...')
res = urllib.request.urlopen(req)
print(res.read().decode('utf-8'))

代理

import urllib.request

url = 'http://www.cnblogs.com/0bug/'
proxy_handler = urllib.request.ProxyHandler({
    'http': 'http://122.114.31.177:808',
    'https': 'https://124.133.75.183:8118'
})
opener = urllib.request.build_opener(proxy_handler)
r = opener.open(url)
print(r.read().decode('utf-8'))

  

posted @ 2018-03-27 22:15  0bug  阅读(227)  评论(0编辑  收藏  举报