Python序列结构--字典
-
字典(dict)是包含若干“键:值”元素的无序可变序列
-
字典中元素的“键”可以是python中任意不可变数据,例如整数、实数、复数、字符串、元组等类型可哈希数据,“键”不允许重复,“值”是可以重复的。字典在内部维护的哈希表使得检索操作非常快。
字典创建与删除
-
使用“=”
>>> aDict = {'server':'db.diveintopython3.org','database':'mysql'}
>>> x = dict() # 创建空字典
>>> x
{}
>>> y = {} # 创建空字典
>>> keys = ['a','b','c','d']
>>> values = [1,2,3,4]
>>> dictionary = dict(zip(keys,values)) # 根据已有数据创建字典
>>> dictionary
{'a': 1, 'b': 2, 'c': 3, 'd': 4}
>>> d = dict(name = 'Dong', age = 39) # 以关键参数的形式创建字典
>>> d
{'name': 'Dong', 'age': 39}
>>> aDict = dict.fromkeys(['name','age','sex']) # 以给定内容为“键”,创建值为空的字典
>>> aDict
{'name': None, 'age': None, 'sex': None}
字典推导式
-
使用字典推导式快速生成符合特定条件的字典
>>> {i:str(i) for i in range(1,5)}
{1: '1', 2: '2', 3: '3', 4: '4'}
>>> x = ['A','B','C','D']
>>> y = ['a','b','c','d']
>>> {i:j for i,j in zip(x,y)}
{'A': 'a', 'B': 'b', 'C': 'c', 'D': 'd'}
字典元素的访问
-
字典中每个元素表示一种映射关系或对应关系
>>> aDict = {'age':30,'score':[98,97],'name':'Dong','sex':'male'}
>>> aDic['age']
>>> aDict['age']
30
>>> aDict['address'] # 字典中不存在该key时,抛出异常
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
KeyError: 'address'
# 处理异常方式一:
>>> if 'address' in aDict:
... print(aDict['address'])
... else:
... print('No Exists')
...
No Exists
# 处理异常方式二:
>>> try:
... print(aDict['address'])
... except:
... print('No Exist')
...
No Exist -
get()方法:返回指定“键”的值,并且允许指定该键不存在时返回特定的“值”
>>> aDict.get('age')
30
>>> aDict.get('adress','No Exist.') # 指定键不存在时返回指定默认值
'No Exist.'
>>> import string
>>> import random
>>> x = string.ascii_letters + string.digits
>>> z = ''.join((random.choice(x) for i in range(1000)))
>>> d = dict()
>>> for ch in z: # 遍历字符串统计词频
... d[ch] = d.get(ch,0) + 1
...
>>> for k,v in sorted(d.items()): # 查看统计结果
... print(k,':',v)
...
0 : 13
1 : 19
2 : 17
3 : 17
4 : 19
5 : 25
6 : 21
7 : 12
8 : 17
9 : 17
A : 11
B : 20
C : 15
D : 21
E : 22
F : 9
G : 15
H : 12
I : 9
J : 16
K : 13
L : 16
M : 19
N : 14
O : 17
P : 11
Q : 14
R : 16
S : 11
T : 22
U : 13
V : 20
W : 21
X : 17
Y : 14
Z : 21
a : 17b : 9c : 17d : 15e : 14f : 11g : 18h : 20i : 21j : 19k : 20l : 9m : 16n : 10o : 13p : 14q : 25r : 17s : 12t : 20u : 10v : 20w : 17x : 10y : 15z : 25 -
setdefault()方法:用于返回指定“键”对应的值,如果字典中不存在该“键”,就添加一个新元素并设置该“键”对应的“值”(默认为None)
>>> aDict.setdefault('adress','SDIBT')
'SDIBT'
>>> aDict
{'age': 30, 'score': [98, 97], 'name': 'Dong', 'sex': 'male', 'adress': 'SDIBT'}
>>> aDict.setdefault('age',23)
30 -
对字典直接进行迭代或者遍历时默认是遍历字典的“键”
>>> aDict
{'age': 30, 'score': [98, 97], 'name': 'Dong', 'sex': 'male', 'adress': 'SDIBT'}
>>> for item in aDict: # 默认遍历字典的“键”
... print(item,end=' ')
...
age score name sex adress >>>
>>> for item in aDict.items(): # 明确指定遍历字典的元素
... print(item,end=' ')
...
('age', 30) ('score', [98, 97]) ('name', 'Dong') ('sex', 'male') ('adress', 'SDIBT') >>>
>>> aDict.items()
dict_items([('age', 30), ('score', [98, 97]), ('name', 'Dong'), ('sex', 'male'), ('adress', 'SDIBT')])
>>> aDict.keys()
dict_keys(['age', 'score', 'name', 'sex', 'adress'])
>>> aDict.values()
dict_values([30, [98, 97], 'Dong', 'male', 'SDIBT'])
元素的添加、修改与删除
-
当以指定“键”为下标为字典元素赋值时,该键存在表示修改,不存在表示添加
>>> aDict = {'age': 30, 'score': [98, 97], 'name': 'Dong', 'sex': 'male', 'adress': 'SDIBT'}
>>> aDict['age'] = 39
>>> aDict
{'age': 39, 'score': [98, 97], 'name': 'Dong', 'sex': 'male', 'adress': 'SDIBT'}
>>> aDict['school'] = 'sichuandaxue'
>>> aDict
{'age': 39, 'score': [98, 97], 'name': 'Dong', 'sex': 'male', 'adress': 'SDIBT', 'school': 'sichuandaxue'} -
update()方法可以将另外一个字典的“键:值”一次性全部添加到当前字典对象,如果两个字典中存在相同的“键”,则以另一个字典中的“值”为准对当前字典进行更新
>>> aDict = {'age': 30, 'score': [98, 97], 'name': 'Dong', 'sex': 'male', 'adress': 'SDIBT'}
>>> aDict.update({'a': 87, 'age':39})
>>> aDict
{'age': 39, 'score': [98, 97], 'name': 'Dong', 'sex': 'male', 'adress': 'SDIBT', 'a': 87} -
del命令删除字典中指定的元素
>>> aDict = {'age': 30, 'score': [98, 97], 'name': 'Dong', 'sex': 'male', 'adress': 'SDIBT'}
>>> del aDict['adress']
>>> aDict
{'age': 30, 'score': [98, 97], 'name': 'Dong', 'sex': 'male'}
>>> del aDict
>>> aDict
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'aDict' is not defined -
字典对象的pop()和popitem()方法可以弹出并删除指定的元素
>>> aDict = {'age': 30, 'score': [98, 97], 'name': 'Dong', 'sex': 'male', 'adress': 'SDIBT'}
>>> aDict.pop('name') # 弹出指定键对应的值
'Dong'
>>> aDict
{'age': 30, 'score': [98, 97], 'sex': 'male', 'adress': 'SDIBT'}
>>> aDict.popitem() # 弹出一个元素
('adress', 'SDIBT')
>>> aDict
{'age': 30, 'score': [98, 97], 'sex': 'male'}
标准库collections中与字典有关的类
-
OrderedDict类
字典dict是无序的,如果需要一个可以记住元素插入顺序的字典,可以使用collections.OrderedDict
>>> import collections
>>> x = collections.OrderedDict()
>>> x['a'] = 3
>>> x['b'] = 5
>>> x['c'] = 8
>>> x
OrderedDict([('a', 3), ('b', 5), ('c', 8)]) -
defaultdict类
字母出现频次统计问题,也可以使用collections模块的defaultdict类来实现
>>> import string
>>> import random
>>> x = string.ascii_letters+string.digits+string.punctuation
>>> z = ''.join([random.choice(x) for i in range(100)])
>>> from collections import defaultdict
>>> frequences = defaultdict(int) # 所有值默认为0
>>> frequences
defaultdict(<class 'int'>, {})
>>> for item in z:
... frequences[item] += 1
...
>>> frequences.items()
dict_items([('F', 1), ('[', 2), ('q', 1), ('>', 2), ('d', 5), ('`', 1), ('e', 2), ('!', 3), ('A', 1), ('R', 1), ('Z', 2), ('V', 2), ('g', 2), ('n', 2), ('2', 1), ('w', 1), ('|', 1), ('v', 3), ('c', 2), ('u', 3), ('&', 4), ('m', 2), ('S', 2), (',', 2), ('@', 3), ('$', 2), ('{', 1), ('j', 1), ('\\', 1), ('~', 1), ('U', 1), ('=', 1), ('M', 4), ('l', 1), ('^', 1), ('}', 1), (']', 2), ('0', 1), ('+', 2), ('(', 1), ('"', 1), ('Q', 1), ('4', 2), ('.', 1), ('x', 1), ("'", 1), ('<', 2), ('/', 2), (';', 1), ('E', 1), (')', 1), ('o', 1), ('P', 1), ('W', 1), ('B', 1), ('K', 1), ('8', 1), ('_', 1), ('N', 1), ('h', 1), ('7', 1), ('I', 1), ('G', 1), ('*', 1), ('y', 1)])创建defaultdict对象时,传递的参数表示字典中值的类型
>>> from collections import defaultdict
>>> games = defaultdict(list)
>>> games
defaultdict(<class 'list'>, {})
>>> games['name'].append('dong')
>>> games['name'].append('zhang')
>>> games['score'].append(90)
>>> games['score'].append(93)
>>> games
defaultdict(<class 'list'>, {'name': ['dong', 'zhang'], 'score': [90, 93]}) -
Counter类
对于词频统计的问题,使用collections模块的Counter类可以更加快速地实现这个功能,并且能够提供更多的功能,例如,查找出现次数最多的元素
>>> import string
>>> import random
>>> x = string.ascii_letters+string.digits+string.punctuation
>>> z = ''.join([random.choice(x) for i in range(100)])
>>> from collections import Counter
>>> frequences = Counter(z)
>>> frequences.items()
dict_items([('H', 12), ('%', 18), ('K', 13), ('A', 12), ('\\', 6), ('N', 11), ('2', 14), ('y', 13), ('z', 12), ('T', 10), (':', 8), ('m', 8), ("'", 11), ('R', 12), (',', 10), ('E', 7), ('e', 16), ('b', 10), ('f', 16), ('+', 8), ('7', 15), ('v', 9), ('l', 15), ('"', 9), ('.', 12), ('^', 20), ('_', 16), ('>', 7), ('h', 12), ('C', 12), ('p', 13), ('n', 8), ('Y', 14), ('L', 11), ('O', 12), ('{', 5), ('3', 10), (')', 15), ('}', 4), ('|', 14), ('a', 10), ('@', 9), ('w', 10), ('B', 11), ('6', 8), ('Q', 11), ('`', 10), ('/', 8), ('<', 5), ('=', 12), ('M', 12), ('4', 6), ('s', 18), ('[', 7), ('G', 12), ('#', 16), ('o', 13), ('*', 8), ('i', 16), ('P', 12), ('k', 17), ('j', 4), ('-', 15), ('D', 4), (']', 6), ('q', 16), ('$', 17), ('J', 15), ('U', 14), ('t', 11), ('I', 11), ('0', 7), ('r', 12), ('&', 6), ('!', 12), ('u', 10), ('F', 9), ('W', 6), ('c', 11), ('1', 8), ('5', 6), (';', 5), ('V', 12), ('~', 10), ('Z', 11), ('d', 9), ('9', 9), ('X', 13), ('8', 9), ('?', 5), ('S', 6), ('x', 7), ('(', 7), ('g', 6)])>>> frequences.most_common(1) # 返回出现次数最多的一个字符及其词频[('^', 20)]>>> frequences.most_common(3)[('^', 20), ('%', 18), ('s', 18)]>>> frequences.most_common(10)[('^', 20), ('%', 18), ('s', 18), ('k', 17), ('$', 17), ('e', 16), ('f', 16), ('_', 16), ('#', 16), ('i', 16)]>>> z = ''.join([random.choice(x) for i in range(10000)])>>> frequences = Counter(z)>>> frequences.most_common(10)[('O', 127), ('c', 125), ('5', 121), ('-', 121), ('\\', 121), ("'", 120), ('~', 118), (',', 118), ('J', 118), ('<', 117)]>>> z = [1,2,3,4,1,'a','v','wer','wer','wer',1]>>> frequences = Counter(z)>>> frequences.most_common(3)[(1, 3), ('wer', 3), (2, 1)]