gzip 压缩格式的网站处理方法---sina.com 分类: python python基础学习 2013-07-16 17:40 362人阅读 评论(0) 收藏
gzip 压缩格式的网站处理方法---sina.com
#coding:utf-8
import urllib2
import re
import zlib
url_address = 'http://www.sina.com.cn'
f = urllib2.urlopen(url_address)
buf = f.read()
buf = zlib.decompress(buf, 16+zlib.MAX_WBITS);
#buf = buf.replace("\n","")
print buf
练习:
#coding:utf-8
import urllib2 import zlib def main(): request = urllib2.urlopen('http://www.sina.com.cn') print request.headers.get('Content-Encoding')#获得编码格式 buf = request.read() buf = zlib.decompress(buf, 16+zlib.MAX_WBITS) print buf if __name__ == '__main__': main()
方法二:
#coding:utf-8 import urllib2,StringIO,gzip f = urllib2.urlopen('http://www.sina.com.cn') print f.headers compressdata = f.read() compressdata = StringIO.StringIO(compressdata) gzipper = gzip.GzipFile(fileobj=compressdata) data = gzipper.read() print data