开发基础 - 总结 - 数据类型（特性方法）字符编码

1.数据类型整数int 字符串str 列表list 元祖tuple 字典dict 集合set 它们的特性，方法
2.字符编码 gbk unicode utf-8 encode() decode()

------------------------------------------------------------
1.int:方法

1.1.'bit_length',    bin(23) = 0b10111       (23).bit_length() = 5    #求整数对应的bit长度
1.2.'from_bytes',    int.from_bytes(b'1',byteorder='big') = 49   int.from_bytes(b'1',byteorder='little') = 49     #bytes转化成整数 
                     int.from_bytes(b'10',byteorder='big') = 12592   int.from_bytes(b'10',byteorder='little') = 12337  
                     #If byteorder is 'big', the most significant byte is at the beginning of the byte array.  
                     #If byteorder is 'little', the most significant byte is at the end of the byte array.
1.3.'to_bytes'      (1024).to_bytes(4,byteorder='big') = b'\x00\x00\x04\x00'   (1024).to_bytes(4,byteorder='little') = b'\x00\x04\x00\x00'   #整数转化成bytes       
1.4.'conjugate',    (12).conjugate() = 12    (2+4j).conjugate() = 2-4j   (-4-1j).conjugate() = -4+1j   #结合
                 
'denominator',   #分母                
'imag',  
'numerator',     #分子                    
'real',

    denominator = property(lambda self: object(), lambda self, v: None, lambda self: None)  # default
    """the denominator of a rational number in lowest terms"""

    imag = property(lambda self: object(), lambda self, v: None, lambda self: None)  # default
    """the imaginary part of a complex number"""

    numerator = property(lambda self: object(), lambda self, v: None, lambda self: None)  # default
    """the numerator of a rational number in lowest terms"""

    real = property(lambda self: object(), lambda self, v: None, lambda self: None)  # default
    """the real part of a complex number"""

------------------------------------------------------------

2.str:方法

2.1.字符串大小写转换： 
'capitalize',     s = "hello world"     s.capitalize() = 'Hello world'   #首字母变大写
'casefold',       s = "HELLO WORLD"     s.casefold() = 'hello world'     #转化为小写 适合ascii码和其他语言eg:德语 
'lower',          s = 'Hello world'     s.lower() = 'hello world'        #转化为小写 
'swapcase',       s = "Hello world"     s.swapcase() = 'hELLO WORLD'     #大写变小写 小写变大写 
'title',          s = 'Hello world'     s.title() = 'Hello World'        #首字母转化为大写  
'upper',          s = 'Hello world'     s.upper() = 'HELLO WORLD'        #转化为大写

2.2.字符串格式输出：
'center',         s = "Hello world"     s.center(30,'*') = '*********Hello world**********'  s.center(20) = '    Hello world     '
'ljust',          s = 'Hello world'     s.ljust(20,'*') = 'Hello world*********'
'rjust',          s = 'Hello world'     s.rjust(20,'*') = '*********Hello world'
'zfill'           s = "Hello world"     s.zfill(20) = '000000000Hello world'
'expandtabs',     x = 'a\tb'            x.expandtabs() = 'a       b'  x.expandtabs(20) = 'a                   b'  #扩充标签
'format',         q ='{0}多大了，{1}'   q.format("alcie",12) = 'alcie多大了，12'   q='{name}多大了，{age}'  q.format(name='alice',age=12) = 'alice多大了，12'
'format_map',     people = {'name':'alice','age':12}   "my name is {name},i am {age} old".format_map(people) = 'my name is alice,i am 12 old'

2.3.字符串搜索定位与替换
'count',          s = "Hello world"     s.count('o') = 2    s.count('o',0,7) = 1   s.count('o',0,8) = 2 
'find',           p = 'Hello world'     p.find('o') = 4     p.find('o',5,8) = 7
'rfind',          p = 'Hello world'     p.rfind('o') = 7    
'index',          s = "hello world"     s.index('o') = 4    s.index('o',5,8) = 7  
'rindex',         s = "hello world"     s.rindex('o') = 7  
'replace',        s = "hello world"     s.replace('o','O') = 'hellO wOrld'  s.replace('o','O',1) = 'hellO world'  s.replace('o','O',2) = 'hellO wOrld'  
'strip',          s = '\n hello world \r \t \r\n'      s.strip() = 'hello world' 
'lstrip',                                              s.lstrip() = 'hello world \r \t \r\n'
'rstrip',                                              s.rstrip() = '\n hello world'
'maketrans',      table = str.maketrans('abcde','!@#$%')  table = {97: 33, 98: 64, 99: 35, 100: 36, 101: 37}
'translate',      "abcdefghj".translate(table) = '!@#$%fghj'  

2.4.字符串的联合与分割
'join',           s = ['alice','name']    ' - '.join(s) = 'alice - name'   '-'.join(['2018','2','10']) = '2018-2-10'  
                  ','.join({"name":"alice","age":12}) = 'name,age'    ','.join({"name":"alice","age":'12'}.values()) = 'alice,12'  
'partition',      s='hello world'   s.partition('w') = ('hello ', 'w', 'orld')    s.partition('d') = ('hello worl', 'd', '')   #str分割成了元祖
'rpartition',     s='hello world'   s.rpartition('o') = ('hello w', 'o', 'rld')   s.partition('o') = ('hell', 'o', ' world')
'split',          'a,b,,c'.split(',') =  ['a', 'b', '', 'c']  'a,b,,c'.split(',',maxsplit=1) = ['a', 'b,,c']  
'rsplit',         'a,b,,c'.rsplit(',') = ['a', 'b', '', 'c']  'a,b,,c'.rsplit(',',maxsplit=1) = ['a,b,', 'c'] 
'splitlines',     'a b\n\nde fg\rk1\r\ns'.splitlines() = ['a b', '', 'de fg', 'k1', 's']   'a b\n\nde fg\rk1\r\ns'.splitlines(keepends = True) = ['a b\n', '\n', 'de fg\r', 'k1\r\n', 's']
                   "".splitlines() = []   "".split('\n') = ['']  
              
2.5.字符串条件判断
'startswith',     s = "Hello world"     s.startswith('He') = True 
'endswith',       p = 'Hello world'     p.endswith('ld') = True  p.endswith('lo',0,5) = True  p.endswith('lo',0,4) = False 
'isalnum',        12sdsa'.isalnum() = True    'sd'.isalnum() = True    '131'.isalnum() = True  '书店'.isalnum() = True  'sdas_231'.isalnum() = False  #字符串与数字的任意组合即为真  
'isalpha',        '2'.isalpha() = False   'hello'.isalpha() = True   '中国'.isalpha() = True   '232hello'.isalpha() = False     #不含数字的字符串为真 
'isdecimal',      '21'.isdecimal() = True  '21.12'.isdecimal() = False  'sda2321'.isdecimal() = False  'das'.isdecimal() = False  s='\u00b2' s=2 s.isdecimal() = False  #只有整数为真
'isdigit',        '21'.isdigit() = True   '21.23'.isdigit() = False   'dsasa'.isdigit() s='\u00b2' s=2  s.isdigit() = True  #只有整数为真   
                   # all decimals are digits, but not all digits are decimals.
'isnumeric'        s='\u00b2' s=2  s.isnumeric() = True  '12'.isnumeric() = True 'da21'.isnumeric() = False  #只有整数为真
                   # isdecimal: Nd, 不支持unicodede的整数 str
                   # isdigit: No, Nd, 支持unicode的整数  str 
                   # isnumeric: No, Nd, Nl 支持unicode的小数 str
                   num = '\u2155' print(num)=1/5  num.isnumeric() = True   num.isdigit() = False   num.isdecimal() = False
'isidentifier',   'dsad123'.isidentifier() = True   '123_sd'.isidentifier() = False   'while'.isidentifier() = True  #是否为合法的标识符    
'islower',        'ASDA'.islower() = False  '123sada'.islower() = True  'sds'.islower() = True  #是否是小写 
'isupper',        'SD'.isupper() = True     'SDaas'.isupper() = False   '中'.isupper() = False  'A_213'.isupper() = True  '23'.isupper() = False  #判断是否是大写
'isprintable',    ' '.isprintable() = True   'asdas23'.isprintable() = True  'das232\n'.isprintable() = False  'dass\tdas'.isprintable() = False  #只有\n\t\r 为False
'isspace',        ' '.isspace() = True   '\n\r\t'.isspace() = True   ' 1'.isspace() = False   's \n s'.isspace() = False   #只有那两种为True 
'istitle',        'Hello World'.istitle() = True   'HeLLo World'.istitle() = False  '中Hello World'.istitle() = True  #判断首字母是否为大写 

2.6.字符串编码：
'encode',         s = '中国'  s.encode() = b'\xe4\xb8\xad\xe5\x9b\xbd'  s.encode("utf-8") = b'\xe4\xb8\xad\xe5\x9b\xbd'  s.encode("gbk") = b'\xd6\xd0\xb9\xfa' 
                              s.encode('utf-8').decode('utf-8') = '中国'     

参考：https://segmentfault.com/a/1190000004598007

------------------------------------------------------------
3.列表list：
列表的功能：创建查询切片增加修改删除循环排序

li = ['hello','world',1,2] li = []  li = list()  li = ['abc',['def','ghi']]
li = ['HELLO', 'world', 1, 2, 3, 'alice', 'world']   #顾头不顾尾 
li[2:5] = [1, 2, 3] = ['HELLO', 1, 3] 
li[0:6:2] = ['HELLO', 1, 3]  
li[li.index('2')] = 'hello'
range(10)   0,1,2,3,4,5,6,7,8,9
range(1,10,2)  1,3,5,7,9
enumerate(li)   
enumerate(li,3)    
len(li)  

'append',      li.append(3)   li = ['hello', 'world', 1, 2, 3]  li.append('alice')  li = ['hello', 'world', 1, 2, 3, 'alice']
'insert',      li.insert(1,'baijing')    li = ['HELLO', 'baijing', 'world', 1, 2, 3, 'alice', 'world']  
'extend',      li2 = ['bai','jing']   li.extend(li2)  li = ['hello', '1', '2', '3', ' ', '4', '5', 3, 'alice', 'world', ['300', '2'], 'bai', 'jing']
               li3 = li2 + li 
'index',       li[1] = 'world'  li.index('world') = 1   li[li.index('world')] = 'world'  
'count',       li = ['HELLO', 'world', 1, 2, 3, 'alice', 'world']   li.count('world') = 2  
'clear',       li.clear()     li = []
'pop',         li.pop(li.index('2')) = 2  li.pop(-1) = 'jing'  li.pop() #删了并返回值 
'remove',      li.remove('6')  #删了，若没有这个值 就会报错   
               del li[1]   del li[1:4]   #根据索引删了 批量删  
'sort'         li.sort()  #根据ascii排序的   
'reverse',     li.reverse()   #反转li  
'copy',        li2 = li.copy()  #浅copy两列表独立，但列表里面的列表不独立       
               #深copy  import copy  li4=copy.deepcopy(li)  li 和 li4 相互独立 列表里面的列表也是相互独立的

------------------------------------------------------------
4.元组tuple：

4.1.特性：不可变，元祖本身不可变，如果元祖中包含其他可变元素，这些可变元素可以改变
4.2.功能：index count 切片
4.3.使用场景：显示得告知别人，此处数据不可修改数据库连接配置信息等

li=('alice',12,12,21) 
'count',      li.count(12) = 2     
'index'       li.index(12) = 1      
li[0:3] = ('alice', 12, 12)

------------------------------------------------------------

5.字典dict：

特性：1.key必须可哈希，必须唯一 2.可存放任意多个值，可修改，可以不唯一 3.无序 4.查找速度快

hash("hello") = 8886137942810973477 （19位） #能被hash()只有不可变的元素不可变：数字，字符串，元祖；可变：列表
hash的用途：
1.文件签名
2.MD5加密 --- 无法反解
3.密码验证

用法：增 删 改 查 循环
li = {1: 2, 'alice': 'hello world', 'name': [1, 2, 3]} 
li['age']=12     #增  
li = {1: 2, 'alice': 'hello world', 'name': [1, 2, 3], 'age': 12} 
li['age']=121    #改  
{1: 2, 'alice': 'hello world', 'name': [1, 2, 3], 'age': 121}
li['alice']      #查  找不到就报错
'hello world' 
'get',  li.get('alice') = 'hello world'  print(li.get('sds')) = None  #查 若没有找到 不会报错

'clear',       s = {1: 2, 2: 2}   s.clear()   s = {}  #清空
'copy',        li2=li.copy()    #深浅copy 
'keys',        li.keys()  = dict_keys([1, 'alice', 'name', 'age'])  #获取字典中的 keys
'values'       li.values() = dict_values([121, '哈哈', [1, 2, 3], 121])  #获取字典中的 values
'items',       li.items() = dict_items([(1, 121), ('alice', '哈哈'), ('name', [1, 2, 3]), ('age', 121)])
'fromkeys',    j.fromkeys([1,2,3],'alice') = {1: 'alice', 2: 'alice', 3: 'alice'} #批量生成字典  
'pop',         s = {1: 2, 2: 3}  s.pop(2) = 3   s.pop(9,'error') # 删除并返回value的值 
'popitem',     s.popitem() = ('s', None)  #随机删  若是空字典 popitem()时将会报错
'setdefault',  print(s.setdefault(3,'alice')) = None  s.setdefault('c','aa') = aa #有的话不加，没有的话就加进去 
'update',      s.update(li)  #有的话替换，没有的话，就新加

------------------------------------------------------------
6.集合set：

特性：无序，不重复得数据组合
作用：1.去重，把一个列表变成集合，就自动去重了 2.关系测试，测试两组数据之间得交集，差集，并集

'add',                   s.add('alice')   s = {1, 2, 'alice'}    #只有可hash的才能增加进去，可hash的有 数字 字符串 元祖 
'update'                 {1, 2, 'alice'}.update((1,2,3,4,5,'alex')) = {1, 2, 3, 4, 5, 'alex', 'alice'} 
'pop',                   s.pop() = 1  #删了第一个 
'remove',                s.remove('alice')   #删除 若没有元素 就会报错  
'discard',               s.discard(3)   #删除 若没有元素 不会报错 
'clear',                 s.clear()     
'copy',                  s1=s.copy()  
'difference',                    s.difference(li)   #差集 -
'difference_update',             s.difference_update(li)   #差集 赋值给s          
'intersection',                  s.intersection(li)   #交集  &
'intersection_update',           s.intersection_update(li) #交集 赋值给s 
'union',                         li.union(s)  #并集 | 
'symmetric_difference',          s.symmetric_difference(li) #对称差集  
'symmetric_difference_update',   s.symmetric_difference_update(li) #对称差集 并赋值给了s            
'issubset',                      s.issubset(li) = True  <=  #s是li的子集吗 
'issuperset',                    li.issuperset(s) = True   >= # li是s的超级集
'isdisjoint',                    s.isdisjoint(li) = True / False   #不想交

------------------------------------------------------------

转换：

1.列表  li=[1,2,3,4,5]
   str(li)    tuple(li)  
2.元祖  li=(1,2,3,4,5)
   str(li)  li.__str__()  list(li)
3.字典  li={"name":"alice","age":12}
   str(li)  list(li) li.values()  tuple(li) tuple(li.values())  
4.字符串  li='Hello world'
   list(li)  eval("[1,2,3]") = [1, 2, 3]  tuple(li)  eval("(1,2,3)")   eval("{1:2,2:3}")

------------------------------------------------------------

字符编码 gbk unicode utf-8 encode() decode()

utf-8 占3个字节中文
gbk 占2个字节中文
Unicode 占2个字节中文

Windows系统默认编码是GBK(两个字节)
Mac 和 linux 是 UTF-8

Unicode --> encode 编码 --> GBK中文，Shift-JIS日本编码
GBK -------> decode 解码 --> Unicode
Shift-JIS --> decode 解码 --> Unicode

"中国".encode("utf-8") = b'\xe4\xb8\xad\xe5\x9b\xbd'
"中国".encode("gbk") = b'\xd6\xd0\xb9\xfa'
"中国".encode("shift-jis") = b'\x92\x86\x8d\x91'
"中国".encode("utf-8").decode("utf-8") = '中国'
"中国".encode("gbk").decode("gbk") = '中国'
"中国".encode("shift-jis").decode("shift-jis") = '中国'

posted @ 2018-02-11 19:18 Alice的小屋阅读(279) 评论(0) 编辑收藏举报

刷新页面返回顶部

开发基础 - 总结 - 数据类型（特性 方法） 字符编码

公告

开发基础 - 总结 - 数据类型（特性方法）字符编码