开发基础 - 总结 - 数据类型(特性 方法) 字符编码
1.数据类型 整数int 字符串str 列表list 元祖tuple 字典dict 集合set 它们的特性,方法
2.字符编码 gbk unicode utf-8 encode() decode()
1.1.'bit_length', bin(23) = 0b10111 (23).bit_length() = 5 #求整数对应的bit长度 1.2.'from_bytes', int.from_bytes(b'1',byteorder='big') = 49 int.from_bytes(b'1',byteorder='little') = 49 #bytes转化成整数 int.from_bytes(b'10',byteorder='big') = 12592 int.from_bytes(b'10',byteorder='little') = 12337 #If byteorder is 'big', the most significant byte is at the beginning of the byte array. #If byteorder is 'little', the most significant byte is at the end of the byte array. 1.3.'to_bytes' (1024).to_bytes(4,byteorder='big') = b'\x00\x00\x04\x00' (1024).to_bytes(4,byteorder='little') = b'\x00\x04\x00\x00' #整数转化成bytes 1.4.'conjugate', (12).conjugate() = 12 (2+4j).conjugate() = 2-4j (-4-1j).conjugate() = -4+1j #结合 'denominator', #分母 'imag', 'numerator', #分子 'real', denominator = property(lambda self: object(), lambda self, v: None, lambda self: None) # default """the denominator of a rational number in lowest terms""" imag = property(lambda self: object(), lambda self, v: None, lambda self: None) # default """the imaginary part of a complex number""" numerator = property(lambda self: object(), lambda self, v: None, lambda self: None) # default """the numerator of a rational number in lowest terms""" real = property(lambda self: object(), lambda self, v: None, lambda self: None) # default """the real part of a complex number"""
2.1.字符串大小写转换: 'capitalize', s = "hello world" s.capitalize() = 'Hello world' #首字母变大写 'casefold', s = "HELLO WORLD" s.casefold() = 'hello world' #转化为小写 适合ascii码和其他语言eg:德语 'lower', s = 'Hello world' s.lower() = 'hello world' #转化为小写 'swapcase', s = "Hello world" s.swapcase() = 'hELLO WORLD' #大写变小写 小写变大写 'title', s = 'Hello world' s.title() = 'Hello World' #首字母转化为大写 'upper', s = 'Hello world' s.upper() = 'HELLO WORLD' #转化为大写 2.2.字符串格式输出: 'center', s = "Hello world" s.center(30,'*') = '*********Hello world**********' s.center(20) = ' Hello world ' 'ljust', s = 'Hello world' s.ljust(20,'*') = 'Hello world*********' 'rjust', s = 'Hello world' s.rjust(20,'*') = '*********Hello world' 'zfill' s = "Hello world" s.zfill(20) = '000000000Hello world' 'expandtabs', x = 'a\tb' x.expandtabs() = 'a b' x.expandtabs(20) = 'a b' #扩充标签 'format', q ='{0}多大了,{1}' q.format("alcie",12) = 'alcie多大了,12' q='{name}多大了,{age}' q.format(name='alice',age=12) = 'alice多大了,12' 'format_map', people = {'name':'alice','age':12} "my name is {name},i am {age} old".format_map(people) = 'my name is alice,i am 12 old' 2.3.字符串搜索定位与替换 'count', s = "Hello world" s.count('o') = 2 s.count('o',0,7) = 1 s.count('o',0,8) = 2 'find', p = 'Hello world' p.find('o') = 4 p.find('o',5,8) = 7 'rfind', p = 'Hello world' p.rfind('o') = 7 'index', s = "hello world" s.index('o') = 4 s.index('o',5,8) = 7 'rindex', s = "hello world" s.rindex('o') = 7 'replace', s = "hello world" s.replace('o','O') = 'hellO wOrld' s.replace('o','O',1) = 'hellO world' s.replace('o','O',2) = 'hellO wOrld' 'strip', s = '\n hello world \r \t \r\n' s.strip() = 'hello world' 'lstrip', s.lstrip() = 'hello world \r \t \r\n' 'rstrip', s.rstrip() = '\n hello world' 'maketrans', table = str.maketrans('abcde','!@#$%') table = {97: 33, 98: 64, 99: 35, 100: 36, 101: 37} 'translate', "abcdefghj".translate(table) = '!@#$%fghj' 2.4.字符串的联合与分割 'join', s = ['alice','name'] ' - '.join(s) = 'alice - name' '-'.join(['2018','2','10']) = '2018-2-10' ','.join({"name":"alice","age":12}) = 'name,age' ','.join({"name":"alice","age":'12'}.values()) = 'alice,12' 'partition', s='hello world' s.partition('w') = ('hello ', 'w', 'orld') s.partition('d') = ('hello worl', 'd', '') #str分割成了元祖 'rpartition', s='hello world' s.rpartition('o') = ('hello w', 'o', 'rld') s.partition('o') = ('hell', 'o', ' world') 'split', 'a,b,,c'.split(',') = ['a', 'b', '', 'c'] 'a,b,,c'.split(',',maxsplit=1) = ['a', 'b,,c'] 'rsplit', 'a,b,,c'.rsplit(',') = ['a', 'b', '', 'c'] 'a,b,,c'.rsplit(',',maxsplit=1) = ['a,b,', 'c'] 'splitlines', 'a b\n\nde fg\rk1\r\ns'.splitlines() = ['a b', '', 'de fg', 'k1', 's'] 'a b\n\nde fg\rk1\r\ns'.splitlines(keepends = True) = ['a b\n', '\n', 'de fg\r', 'k1\r\n', 's'] "".splitlines() = [] "".split('\n') = [''] 2.5.字符串条件判断 'startswith', s = "Hello world" s.startswith('He') = True 'endswith', p = 'Hello world' p.endswith('ld') = True p.endswith('lo',0,5) = True p.endswith('lo',0,4) = False 'isalnum', 12sdsa'.isalnum() = True 'sd'.isalnum() = True '131'.isalnum() = True '书店'.isalnum() = True 'sdas_231'.isalnum() = False #字符串与数字的任意组合即为真 'isalpha', '2'.isalpha() = False 'hello'.isalpha() = True '中国'.isalpha() = True '232hello'.isalpha() = False #不含数字的字符串为真 'isdecimal', '21'.isdecimal() = True '21.12'.isdecimal() = False 'sda2321'.isdecimal() = False 'das'.isdecimal() = False s='\u00b2' s=2 s.isdecimal() = False #只有整数为真 'isdigit', '21'.isdigit() = True '21.23'.isdigit() = False 'dsasa'.isdigit() s='\u00b2' s=2 s.isdigit() = True #只有整数为真 # all decimals are digits, but not all digits are decimals. 'isnumeric' s='\u00b2' s=2 s.isnumeric() = True '12'.isnumeric() = True 'da21'.isnumeric() = False #只有整数为真 # isdecimal: Nd, 不支持unicodede的整数 str # isdigit: No, Nd, 支持unicode的整数 str # isnumeric: No, Nd, Nl 支持unicode的小数 str num = '\u2155' print(num)=1/5 num.isnumeric() = True num.isdigit() = False num.isdecimal() = False 'isidentifier', 'dsad123'.isidentifier() = True '123_sd'.isidentifier() = False 'while'.isidentifier() = True #是否为合法的标识符 'islower', 'ASDA'.islower() = False '123sada'.islower() = True 'sds'.islower() = True #是否是小写 'isupper', 'SD'.isupper() = True 'SDaas'.isupper() = False '中'.isupper() = False 'A_213'.isupper() = True '23'.isupper() = False #判断是否是大写 'isprintable', ' '.isprintable() = True 'asdas23'.isprintable() = True 'das232\n'.isprintable() = False 'dass\tdas'.isprintable() = False #只有\n\t\r 为False 'isspace', ' '.isspace() = True '\n\r\t'.isspace() = True ' 1'.isspace() = False 's \n s'.isspace() = False #只有那两种为True 'istitle', 'Hello World'.istitle() = True 'HeLLo World'.istitle() = False '中Hello World'.istitle() = True #判断首字母是否为大写 2.6.字符串编码: 'encode', s = '中国' s.encode() = b'\xe4\xb8\xad\xe5\x9b\xbd' s.encode("utf-8") = b'\xe4\xb8\xad\xe5\x9b\xbd' s.encode("gbk") = b'\xd6\xd0\xb9\xfa' s.encode('utf-8').decode('utf-8') = '中国' 参考:https://segmentfault.com/a/1190000004598007
列表的功能:创建 查询 切片 增加 修改 删除 循环 排序
li = ['hello','world',1,2] li = [] li = list() li = ['abc',['def','ghi']] li = ['HELLO', 'world', 1, 2, 3, 'alice', 'world'] #顾头不顾尾 li[2:5] = [1, 2, 3] = ['HELLO', 1, 3] li[0:6:2] = ['HELLO', 1, 3] li[li.index('2')] = 'hello' range(10) 0,1,2,3,4,5,6,7,8,9 range(1,10,2) 1,3,5,7,9 enumerate(li) enumerate(li,3) len(li) 'append', li.append(3) li = ['hello', 'world', 1, 2, 3] li.append('alice') li = ['hello', 'world', 1, 2, 3, 'alice'] 'insert', li.insert(1,'baijing') li = ['HELLO', 'baijing', 'world', 1, 2, 3, 'alice', 'world'] 'extend', li2 = ['bai','jing'] li.extend(li2) li = ['hello', '1', '2', '3', ' ', '4', '5', 3, 'alice', 'world', ['300', '2'], 'bai', 'jing'] li3 = li2 + li 'index', li[1] = 'world' li.index('world') = 1 li[li.index('world')] = 'world' 'count', li = ['HELLO', 'world', 1, 2, 3, 'alice', 'world'] li.count('world') = 2 'clear', li.clear() li = [] 'pop', li.pop(li.index('2')) = 2 li.pop(-1) = 'jing' li.pop() #删了并返回值 'remove', li.remove('6') #删了,若没有这个值 就会报错 del li[1] del li[1:4] #根据索引删了 批量删 'sort' li.sort() #根据ascii排序的 'reverse', li.reverse() #反转li 'copy', li2 = li.copy() #浅copy两列表独立,但列表里面的列表不独立 #深copy import copy li4=copy.deepcopy(li) li 和 li4 相互独立 列表里面的列表也是相互独立的
4.2.功能:index count 切片
4.3.使用场景:显示得告知别人,此处数据不可修改 数据库连接配置信息等
li=('alice',12,12,21) 'count', li.count(12) = 2 'index' li.index(12) = 1 li[0:3] = ('alice', 12, 12)
特性:1.key必须可哈希,必须唯一 2.可存放任意多个值,可修改,可以不唯一 3.无序 4.查找速度快
hash("hello") = 8886137942810973477 (19位) #能被hash()只有不可变的元素 不可变:数字,字符串,元祖;可变:列表
2.MD5加密 --- 无法反解
用法:增 删 改 查 循环 li = {1: 2, 'alice': 'hello world', 'name': [1, 2, 3]} li['age']=12 #增 li = {1: 2, 'alice': 'hello world', 'name': [1, 2, 3], 'age': 12} li['age']=121 #改 {1: 2, 'alice': 'hello world', 'name': [1, 2, 3], 'age': 121} li['alice'] #查 找不到就报错 'hello world' 'get', li.get('alice') = 'hello world' print(li.get('sds')) = None #查 若没有找到 不会报错 'clear', s = {1: 2, 2: 2} s.clear() s = {} #清空 'copy', li2=li.copy() #深浅copy 'keys', li.keys() = dict_keys([1, 'alice', 'name', 'age']) #获取字典中的 keys 'values' li.values() = dict_values([121, '哈哈', [1, 2, 3], 121]) #获取字典中的 values 'items', li.items() = dict_items([(1, 121), ('alice', '哈哈'), ('name', [1, 2, 3]), ('age', 121)]) 'fromkeys', j.fromkeys([1,2,3],'alice') = {1: 'alice', 2: 'alice', 3: 'alice'} #批量生成字典 'pop', s = {1: 2, 2: 3} s.pop(2) = 3 s.pop(9,'error') # 删除并返回value的值 'popitem', s.popitem() = ('s', None) #随机删 若是空字典 popitem()时将会报错 'setdefault', print(s.setdefault(3,'alice')) = None s.setdefault('c','aa') = aa #有的话不加,没有的话就加进去 'update', s.update(li) #有的话替换,没有的话,就新加
作用:1.去重,把一个列表变成集合,就自动去重了 2.关系测试,测试两组数据之间得交集,差集,并集
'add', s.add('alice') s = {1, 2, 'alice'} #只有可hash的才能增加进去,可hash的有 数字 字符串 元祖 'update' {1, 2, 'alice'}.update((1,2,3,4,5,'alex')) = {1, 2, 3, 4, 5, 'alex', 'alice'} 'pop', s.pop() = 1 #删了第一个 'remove', s.remove('alice') #删除 若没有元素 就会报错 'discard', s.discard(3) #删除 若没有元素 不会报错 'clear', s.clear() 'copy', s1=s.copy() 'difference', s.difference(li) #差集 - 'difference_update', s.difference_update(li) #差集 赋值给s 'intersection', s.intersection(li) #交集 & 'intersection_update', s.intersection_update(li) #交集 赋值给s 'union', li.union(s) #并集 | 'symmetric_difference', s.symmetric_difference(li) #对称差集 'symmetric_difference_update', s.symmetric_difference_update(li) #对称差集 并赋值给了s 'issubset', s.issubset(li) = True <= #s是li的子集吗 'issuperset', li.issuperset(s) = True >= # li是s的超级集 'isdisjoint', s.isdisjoint(li) = True / False #不想交
1.列表 li=[1,2,3,4,5] str(li) tuple(li) 2.元祖 li=(1,2,3,4,5) str(li) li.__str__() list(li) 3.字典 li={"name":"alice","age":12} str(li) list(li) li.values() tuple(li) tuple(li.values()) 4.字符串 li='Hello world' list(li) eval("[1,2,3]") = [1, 2, 3] tuple(li) eval("(1,2,3)") eval("{1:2,2:3}")
字符编码 gbk unicode utf-8 encode() decode()
utf-8 占3个字节 中文
gbk 占2个字节 中文
Unicode 占2个字节 中文
Mac 和 linux 是 UTF-8
Unicode --> encode 编码 --> GBK中文,Shift-JIS日本编码
GBK -------> decode 解码 --> Unicode
Shift-JIS --> decode 解码 --> Unicode
"中国".encode("utf-8") = b'\xe4\xb8\xad\xe5\x9b\xbd'
"中国".encode("gbk") = b'\xd6\xd0\xb9\xfa'
"中国".encode("shift-jis") = b'\x92\x86\x8d\x91'
"中国".encode("utf-8").decode("utf-8") = '中国'
"中国".encode("gbk").decode("gbk") = '中国'
"中国".encode("shift-jis").decode("shift-jis") = '中国'