Python - 编码转换
# coding: utf-8 s = 'abc' print type(s) # str(utf-8) print len(s) # 3 s = unicode(s) # str -> unicode,其中str的每个字符值必须小于128 print type(s) # unicode print len(s) # 3 s = u'abc' print type(s) # unicode print len(s) # 3 s = s.encode('utf-8') # unicode -> str(utf-8) print type(s) # str print len(s) # 3 s = s.decode('utf-8') # str(utf-8) -> unicode,这里str的每个字符值任意 print type(s) # unicode print len(s) # 3 s = '中国' # 由于整个文件以utf-8编码 print type(s) # str(utf-8) print len(s) # 6 s = u'中国' print type(s) # unicode print len(s) # 2 s = s.encode('utf-8') print type(s) # str(utf-8) print len(s) # 6 s = s.decode('utf-8') print type(s) # unicode print len(s) # 2 s = raw_input(u'输入:') # windows下貌似中文按gbk编码,每个中文占2个字节 print type(s) # str(gbk) print len(s) # 4 s = s.decode('gbk') # 要想gbk编码转为utf-8编码,先将gbk编码转为unicode print type(s) # unicode print len(s) # 2 s = s.encode('gbk') print type(s) # str(gbk) print len(s) # 4 # 根据以上的验证,得出结论 # 各种编码都可以通过unicode来转化,unicode可以假想为一张各种字符的对照表,在这个表中可以找到世界范围内的任何一种字符 # 当然,也包括中文,每个字符都对应一个序号,如'a' -> 0x61,'中' -> 0x4e2d # unicode -> utf-8 unicode.encode('utf-8') # utf-8 -> unicode str.decode('utf-8') # gbk -> unicode str.decode('gbk') # unicode -> gbk unicode.encode('gbk')