Python Requests 使用摘要 一
Requests是由Kenneth Reitz推出的一个Python HTTP 请求操作包,在使用上,比系统自带的urllib2方便了很多,现在是1.2版本,可以通过easy_install安装。
一. 基本操作
r = requests.get('http://httpbin.org/get')
r = requests.post("http://httpbin.org/post")
r = requests.put("http://httpbin.org/put")
r = requests.delete("http://httpbin.org/delete")
r = requests.head("http://httpbin.org/get")
r = requests.options("http://httpbin.org/get")
print r.headers['allow']
HEAD, OPTIONS, GET
二. 查看返回内容
>>> r = requests.get('http://httpbin.org/get')
1. 响应内容,以 bytes表示
>>> r.content
'{\n "url": "http://httpbin.org/get",\n "headers": {\n "Content-Length": "0",\n "Accept-Encoding": "gzip, deflate, compress",\n "Connection": "close",\n "Accept": "**",\n "User-Agent": "python-requests/1.1.0 CPython/2.7.1 Darwin/11.4.2",\n "Host": "httpbin.org"\n },\n "args": {},\n "origin": "..."\n}'
2. 响应内容,以 unicode表示
>>> r.text
u'<html>\n <head>\n <title>404 Not Found</title>\n </head>\n <body>\n <h1>200 OK</h1>\n *************<br/><br/>\n/abcccc\n\n\n </body>\n</html>'
3. 友好提示。访问无异常,为True,否则,为False。响应HTTP状态码400及以上均为False。
>>> r.ok
True
4. 访问无异常,返回空,否则,抛出异常
>>> r.raise_for_status()
5. 响应HTTP状态码
>>> r.status_code
200
6. 访问URL
>>> r.url
u'http://httpbin.org/get'
7. 探测到headers里面的charset,要注意这里不是页面上指定的charset
>>> r.encoding
8. 如果headers里面发现不了,则会查找网页内容来探测,不过速度很慢
>>> r.apparent_encoding
9. HTTP Headers内容
>>> r.headers.keys()
['date', 'content-length', 'content-type', 'connection', 'server']
>>>r.headers['Content-Type']
'application/json'
>>> r.headers.get('content-type')
'application/json'
10. Cookies内容
>>> r.cookies.keys()
[]
>>> url = 'http://httpbin.org/cookies'
>>> cookies = dict(cookies_are='working')
>>> r = requests.get(url, cookies=cookies)
>>> r.text
u'{\n "cookies": {\n "cookies_are": "working"\n }\n}'
11. 转化成json格式输出
>>> r.json()
{u'url': u'http://httpbin.org/get', u'headers': {u'Content-Length': u'0', u'Accept-Encoding': u'gzip, deflate, compress', u'Connection': u'close', u'Accept': u'*/*', u'User-Agent': u'python-requests/1.1.0 CPython/2.7.1 Darwin/11.4.2', u'Host': u'httpbin.org'}, u'args': {}, u'origin': u'...'}
12. 发出请求到接到回应的时间差
>>> r.elapsed
datetime.timedelta(0, 1, 695693)
13. iterate内容
>>> a = r.iter_lines()
>>> a
<generator object iter_lines at 0x10e338280>
>>> a.next()
'<!DOCTYPE html>'
>>> a.next()
'<html>'
>>> a.next()
'<head>'
>>> a = r.iter_content()
>>> a
<generator object iter_slices at 0x10e3382d0>
>>> a.next()
'<'
>>> a.next()
'!'
>>> a.next()
'D'
>>> a.next()
'O'
>>> a.next()
'C'
14. Timeouts
>>> requests.get('http://github.com', timeout=0.001)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
requests.exceptions.Timeout: HTTPConnectionPool(host='github.com', port=80): Request timed out. (timeout=0.001)
三. 访问出错返回的内容
>>> r = requests.get('http://127.0.0.1:6543/abcccc') # 这是一个不存在的链接
1. 响应内容,以 unicode表示
>>> r.text
u'<html>\n <head>\n <title>404 Not Found</title>\n </head>\n <body>\n <h1>404 Not Found</h1>\n The resource could not be found.<br/><br/>\n/abcccc\n\n\n </body>\n</html>'
2. 响应内容,以bytes表示
>>> r.content
'<html>\n <head>\n <title>404 Not Found</title>\n </head>\n <body>\n <h1>404 Not Found</h1>\n The resource could not be found.<br/><br/>\n/abcccc\n\n\n </body>\n</html>'
3. 友好提示,返回False
>>> r.ok
False
4. 响应HTTP状态码
>>> r.status_code
404
5. 将访问异常外发
>>> r.raise_for_status()
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/Users/eryxlee/Workshops/python/sandbox/lib/python2.7/site-packages/requests-1.2.0-py2.7.egg/requests/models.py", line 670, in raise_for_status
raise HTTPError(http_error_msg, response=self)
requests.exceptions.HTTPError: 404 Client Error: Not Found
6. 友好提示,返回一个合适阅读的出错提示
>>> r.reason
'Not Found'
四. 跳转情况的返回
>>> r = requests.get('http://github.com')
1. 返回内容
>>> r.status_code
200
>>> r.text[:100]
u'<!DOCTYPE html>\n<html>\n <head prefix="og: http://ogp.me/ns# fb: http://ogp.me/ns/fb#githubog: http'
2. 跳转历史
>>> r.history
(<Response [301]>,)
3. 跳转内容
>>> r.history[0].text
u'<html>\r\n<head><title>301 Moved Permanently</title></head>\r\n<body bgcolor="white">\r\n<center><h1>301 Moved Permanently</h1></center>\r\n<hr><center>nginx</center>\r\n</body>\r\n</html>\r\n'
4. 禁止跳转
>>> r = requests.get('http://github.com', allow_redirects=False)
>>> r.status_code
301
>>> r.history
[]
[转自]EryxLee的博客http://blog.sina.com.cn/eryxlee