Python UNICODE GBK UTF-8 之间相互转换

Python 编码格式检测,可以使用 chardet ,

例如: 

    

import urllib
rawdata = urllib.urlopen('http://www.google.cn/').read()
import chardet
print chardet.detect(rawdata)

输出结果是:
    {'confidence': 0.98999999999999999, 'encoding': 'GB2312'}

 

# win下命令行参数为gbk编码:star.gbk2unicode(sys.argv[1]) + u'也有'
def gbk2unicode(s):
    return s.decode('gbk', 'ignore')

# 脚本文件#coding:utf-8时默认不带u的字符串为utf8字符串:star.utf82unicode('我')
def utf82unicode(s):
    return s.decode('utf-8', 'ignore')

# 带u的字符串为unicode
# star.unicode2gbk(u'\u4e5f\u6709')
# star.unicode2gbk(u'也有')
def unicode2gbk(s):
    return s.encode('gbk')

# 带u的字符串为unicode
# star.unicode2utf8(u'\u4e5f\u6709')
# star.unicode2utf8(u'也有')
def unicode2utf8(s):
    return s.encode('utf-8')

# win下命令行参数为gbk编码:star.gbk2utf8(sys.argv[1]) + '也有'
def gbk2utf8(s):
    return s.decode('gbk', 'ignore').encode('utf-8')

def utf82gbk(s):
    return s.decode('utf-8', 'ignore').encode('gbk')

 

posted @ 2017-07-29 10:41  淋哥  阅读(3682)  评论(0编辑  收藏  举报