Python—HTTP处理Gzip压缩数据
http://blog.sina.com.cn/s/blog_4bd8e63a0100mmeo.html
HTTP 请求中包含 Accept-encoding: gzip 头信息可以告诉服务器,如果它有任何新数据要发送给我时,请以压缩的格式发送。如果服务器支持压缩,它将返回由 gzip 压缩的数据并且使用Content-encoding: gzip 头信息标记。
#codeing:utf-8
import urllib2, httplib
import StringIO
import gzip
def findUrlGzip(url):
request = urllib2.Request(url)
request.add_header('Accept-encoding', 'gzip')
opener = urllib2.build_opener()
f = opener.open(request)
isGzip = f.headers.get('Content-Encoding')
#print isGzip
if isGzip :
compresseddata = f.read()
compressedstream = StringIO.StringIO(compresseddata)
gzipper = gzip.GzipFile(fileobj=compressedstream)
data = gzipper.read()
else:
data = f.read()
return data
def findUrlTitle(url):
html = findUrlGzip(url)
html = html.lower()
spos = html.find("<title>")
epos = html.find("</title>")
if spos != -1 and epos != -1 and spos < epos:
title = html[spos+7:epos]
title = title[:-9]
else:
title = ""
return title
if __name__ == "__main__":
url = 'http://business.sohu.com/20101010/n275509607.shtml'
title = findUrlTitle(url)
print title