python学习Day06--编码
【主要内容】
1. is 和 == 区别
id()函数
== 判断两边的值
is 判断内存地址
回顾编码:
1. ASCII: 英文, 特殊字符, 数字, 8bit, 1byte
2. GBK: 中文 16bit, 2byte. 兼容ASCII
3. unicode: 万国码, 32bit 4byte. 兼容ASCII
4. UTF-8: 长度可变的unicode. 英文:8bit, 欧洲:16bit, 中文:24bit 3byte
python2 只能用ASCII
python3 有unicode, 默认的编码就是unicode
内存里用的是unicode. 硬盘和网络传输用的是utf-8或者GBK
2. encode() 编码. 获取到的是编码之后的结果. bytes
3. decode() 解码. 把bytes编程我们熟悉的字符串
【代码】
1、is和==
1 # s = "alex 是 大 xx" 2 # abc = id(s) # 得到内存地址 3 # print(abc) 4 5 # lst = ["大阳哥", "佳琪哥", "小花生", "燃哥"] 6 # print(id(lst)) # 就是一个内存地址. 毫无意义 7 8 9 # lst = ["周杰伦", "燃哥"] 10 # lst1 = ["周杰伦", "燃哥"] 11 # print(id(lst)) 12 # print(id(lst1)) 13 14 # s = "燃哥" 15 # s1 = "燃哥" 16 # # 小数据池. 会对字符串进行缓存, 为了节省内存 17 # print(id(s)) 18 # print(id(s1)) 19 20 # tu = ("燃哥", "周杰伦") 21 # tu1 = ("燃哥", "周杰伦") 22 # print(id(tu), id(tu1)) 23 24 # dic = {"a": "b", "c":"d"} 25 # dic1 = {"a": "b", "c":"d"} 26 # print(id(dic), id(dic1)) 27 28 # a = 10 29 # b = 10 30 # print(id(a), id(b)) 31 32 # 布尔也有, 33 # a = True 34 # b = True 35 # print(id(a), id(b)) 36 37 # -5 38 # a = 257 39 # b = 257 40 # print(id(a), id(b)) 41 42 # a = "小威" 43 # b = "小威" 44 # print(id(a), id(b)) 45 46 # 1. id() 查看内存地址 47 # 2. str 有小数据池的 48 49 # == is id 50 # == 判断. 左右两端是否相等和一致, 比较的是内容 51 # is 判断. 判断的是内存地址 id()的值来判断 内存地址 52 53 # lst = ["马化腾", "小威"] 54 # lst2 = ["马化腾", "小威"] 55 # print(lst == lst2) # True 56 # print(lst is lst2) # False 57 58 59 # s = "alex" 60 # print("1111111111") 61 # print("1111111111") 62 # print("1111111111") 63 # print("1111111111") 64 # print("1111111111") 65 # print("1111111111") 66 # print("1111111111") 67 # print("1111111111") 68 # print("1111111111") 69 # 70 # s2 = "alex" 71 # print(s == s2) # True 72 # print(s is s2) # True. 小数据池 73 74 # s1 = "@akljflkasdjklfjkasdlfjklsdajfklsdajfklasdjkflasdjklfjsdaklfjsdakljfklasdjfklsdajfklsdajfklsdajklfsjadklfjsadklfjasdkljfklsdjfklsdjfklsdjfklsdjfklasdjfklasdjklfjasdklakljflkasdjklfjkasdlfjklsdajfklsdajfklasdjkflasdjklfjsdaklfjsdakljfklasdjfklsdajfklsdajfklsdajklfsjadklfjsadklfjasdkljfklsdjfklsdjfklsdjfklsdjfklasdjfklasdjklfjasdkl" 75 # s2 = "@akljflkasdjklfjkasdlfjklsdajfklsdajfklasdjkflasdjklfjsdaklfjsdakljfklasdjfklsdajfklsdajfklsdajklfsjadklfjsadklfjasdkljfklsdjfklsdjfklsdjfklsdjfklasdjfklasdjklfjasdklakljflkasdjklfjkasdlfjklsdajfklsdajfklasdjkflasdjklfjsdaklfjsdakljfklasdjfklsdajfklsdajfklsdajklfsjadklfjsadklfjasdkljfklsdjfklsdjfklsdjfklsdjfklasdjfklasdjklfjasdkl" 76 # print(id(s1), id(s2)) 77 # 78 # s = "abc中def" 79 # print(s.title())
2、编码
1 # s = "alex马" 2 # 想要存储.必须进行编码 3 4 # encode() 编码之后的内容是bytes类型的数据 5 6 # 30个字节 10个字. 每个字3个字节 7 # b'\xe6\x9d\x8e\xe5\x98\x89\xe8\xaf\x9a\xe7\x9a\x84\xe5\x84\xbf\xe5\xad\x90\xe8\xa2\xab\xe7\xbb\x91\xe6\x9e\xb6\xe4\xba\x86' 8 # bs = s.encode("UTF-8") # 把字符串编码成UTF-8的形式 9 # print(bs) 10 11 # 英文:编码之后的结果是英文本身 12 # 中文:编码之后UTF-8 下. 一个中文3个字节 13 14 15 # s = "饿了么" 16 # bs = s.encode("GBK") # b'\xb6\xf6\xc1\xcb\xc3\xb4' GBK 一个中文:2个字节 17 # print(bs) 18 19 # s = "中" 20 # print(s.encode("utf-8")) 21 # print(s.encode("GBK")) 22 23 # decode()解码 24 25 # bs = b'\xb6\xf6\xc1\xcb\xc3\xb4' # 从别人那里读到的 GBK 26 # 27 # # 编程人认识的东西 28 # s = bs.decode("GBK") # 解码之后是字符串, 用什么编码就用什么解码 29 # print(s) 30 31 # GBK => utf-8 32 bs = b'\xb6\xf6\xc1\xcb\xc3\xb4' 33 # 先解码成unicode字符串 34 s = bs.decode("GBK") 35 # 在把字符串编码成UTF-8 36 bss = s.encode("UTF-8") 37 print(bss)
3、作业与练习
1 ''' 2 3 s = "k:1|k1:2|k2:3|k3:4" 4 lst=s.split("|") 5 dic={} 6 for i in lst: 7 k,v = i.split(":") 8 dic[k]=int(v) 9 print(dic) 10 11 # 有如下值li=[11,22,33,44,55,66,77,88,99,90] 12 # 将所有大于66的值保存至字典的第一个key中,将小于66的所有值保存到第二个key值中 13 li=[11,22,33,44,55,66,77,88,99,90] 14 #法一 15 # dic={"k1":[],"k2":[]} 16 # for el in li: 17 # if el > 66: 18 # dic["k1"].append(el) 19 # elif el < 66: 20 # dic["k2"].append(el) 21 # else: 22 # pass 23 # print(dic) 24 dic={} 25 for el in li: 26 if el > 66: 27 dic.setdefault("k1",[]).append(el) 28 elif el < 66: 29 dic.setdefault("k2",[]).append(el) 30 print(dic) 31 '''