python2.7 字符处理小节
unicode是字符集
utf-8,gbk是编码方式,将字符集编码为机器识别的字节码
#encoding: utf-8 s = "中文" #unicode的utf-8编码,\xE4\xB8\xAD\xE6\x96\x87 us = u"中文" #unicode字符集 \u4E2D \u6587 print repr(s) print repr(us) print s.decode('utf-8') #utf-8解码为无编码的unicode \u4E2D \u6587 print us.encode('utf-8') #uicode编码为utf-8
print repr(us.encode('gbk')) #uicode编码为gbk: '\xd6\xd0\xce\xc4'
str='\u4E2D\u6587'
print str str2= str.decode('unicode-escape') #字符串转换为unicode编码
print str2
print repr(str)