爬虫学习 一些有用的函数吧
1.geturl---- 获取真实的url
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
old_url = 'http://rrurl.cn/b1UZuP'
req = Request(old_url)
response = urlopen(req)
print ('Old url :' + old_url)
print ('Real url :' + response.geturl())
2.info()----这个返回对象的字典对象,该字典描述了获取的页面情况。通常是服务器发送的特定头headers。目前是httplib.HTTPMessage 实例。
经典的headers包含"Content-length","Content-type",和其他内容。
from urllib.request import Request, urlopen
from urllib.error import URLError, HTTPError
old_url = 'http://www.sina.com'
req = Request(old_url)
response = urlopen(req)
print ('Info():')
print (response.info())