字典和集合
字典构造方法:
a = dict(one=1, two=2, three=3) b = {'one':1, 'two':2, 'three':3} c = dict(zip(['one','two','three'],[1,2,3])) d = dict([('one',1),('two',2),('three',3)]) e = dict({'one':1, 'two':2, 'three':3}) print(a==b==c==d==e)
输出:
True
字典新增元素
cat_names = ['andy','bob','candy'] cat_ages = [12,3,6] cat_dict = dict(zip(cat_names,cat_ages)) print('origin dict:',cat_dict) print('-' * 20,'__setitem__','-' * 50) # 向dict增加键值对 cat_dict['fancl']=13 print('after inserted:',cat_dict) print('-' * 20,'setdefault()','-' * 50) # setdefault()方法:若键已存在,则返回相应的值;若键不存在,则将键值对新增到dict中 print('andy:',cat_dict.setdefault('andy',10)) print('david:',cat_dict.setdefault('david',10)) print('after inserted:',cat_dict)
输出:
origin dict: {'andy': 12, 'bob': 3, 'candy': 6}
-------------------- __setitem__ --------------------------------------------------
after inserted: {'andy': 12, 'bob': 3, 'candy': 6, 'fancl': 13}
-------------------- setdefault() --------------------------------------------------
andy: 12
david: 10
after inserted: {'andy': 12, 'bob': 3, 'candy': 6, 'fancl': 13, 'david': 10}
字典推导
字典推导可以从任何以键值对作为元素的可迭代对象中构建出字典
numbers = [('one',1),('two',2),('three',3)] num_dict = {value:name for name,value in numbers} print(num_dict) num_dict_upper = {value:name.upper() for value,name in num_dict.items()} print(num_dict_upper)
输出:
{1: 'one', 2: 'two', 3: 'three'}
{1: 'ONE', 2: 'TWO', 3: 'THREE'}
字典迭代
cat_names = ['andy','bob','candy'] cat_ages = [12,3,6] # 用zip初始化字典 cat_dict = dict(zip(cat_names,cat_ages)) # iter函数获取到的是键的迭代器 for idx in iter(cat_dict): print(idx) print('-' * 100) # 也可以用keys()方法返回所有键 for idx in cat_dict.keys(): print(idx) print('-' * 100) # items()方法返回所有键值对 for item in cat_dict.items(): print(item)
输出:
andy
bob
candy
----------------------------------------------------------------------------------------------------
andy
bob
candy
----------------------------------------------------------------------------------------------------
('andy', 12)
('bob', 3)
('candy', 6)
dict.update方法
numbers = [('one',1),('two',2),('three',3)] # 字典的键是内置类型 num_dict = {name:value for name,value in numbers} print(num_dict) # update会根据键更新值 num_dict.update({'two':4}) print(num_dict) # 没有找到需要更新的键,则添加进字典 num_dict.update({'five':5}) print(num_dict) print('-' * 100) # 一个实现了__eq__和__hash__的自定义类 class myname: def __init__(self,name): self.name=name def __eq__(self, other): return self.name==other.name def __str__(self): return self.name def __repr__(self): return self.name def __hash__(self): return hash(self.name) numbers = [(myname('one'),1),(myname('two'),2),(myname('three'),3)] # 字典的键是自定义类型 num_dict = {name:value for name,value in numbers} print(num_dict) # 但也可以根据__eq__实现根据键的更新 num_dict.update({myname('two'):4}) print(num_dict)
输出:
{'one': 1, 'two': 2, 'three': 3}
{'one': 1, 'two': 4, 'three': 3}
{'one': 1, 'two': 4, 'three': 3, 'five': 5}
----------------------------------------------------------------------------------------------------
{one: 1, two: 2, three: 3}
{one: 1, two: 4, three: 3}
dict.setdefault
import re s = '''dog hits cat, cat hates dog, you eat dog but not cat. ''' patt = re.compile(r'\w+') index = {} for match in patt.finditer(s): word = match.group() # word存在取出相应的值 # word不存在向字典中添加一个项目,key=word,value=0,返回0 count = index.setdefault(word,0) index.update({word:count+1}) # 按照word出现次数排序输出 for word,count in sorted(index.items(),key=lambda item:item[1],reverse=True): print(word,':',count)
输出:
dog : 3
cat : 3
hits : 1
hates : 1
you : 1
eat : 1
but : 1
not : 1
defaultdict
import re import collections s = '''dog hits cat, cat hates dog, you eat dog but not cat. ''' patt = re.compile(r'\w+') # defaultdict需要传一个可调用对象作为参数,在赋默认值时使用 index = collections.defaultdict(int) for match in patt.finditer(s): word = match.group() # 使用defaultdict,自动给不存在的word赋默认值0 index[word]+=1 # 按照word出现次数排序输出 for word,count in sorted(index.items(),key=lambda item:item[1],reverse=True): print(word,':',count)
输出:
dog : 3
cat : 3
hits : 1
hates : 1
you : 1
eat : 1
but : 1
not : 1
import re from collections import defaultdict ''' contents in dogcat.txt: a dog sits by a cat a cat sits by a dog two animals sit together ''' word_re = re.compile(r'\w+') index = defaultdict(list) with open('d:\\dogcat.txt') as fp: for line_no,line in enumerate(fp): for match in word_re.finditer(line): word = match.group() col_no = match.start()+1 location = (line_no,col_no) # defalutdict会根据构造函数的参数, # 对不在映射内的键自动初始化一个值,这里是list() index[word].append(location) for word in sorted(index,key=str.upper): print(word,index[word])
输出:
a [(0, 1), (0, 15), (1, 1), (1, 15)]
animals [(2, 5)]
by [(0, 12), (1, 12)]
cat [(0, 17), (1, 3)]
dog [(0, 3), (1, 17)]
sit [(2, 13)]
sits [(0, 7), (1, 7)]
together [(2, 17)]
two [(2, 1)]
dict.__missing__方法
__missing__方法只会被__getitem__调用
提供__missing__方法时get或者__contains__的使用都没有影响
class StrKeyDict0(dict): # __getitem__方法未获取到key时,调用__missing__方法 def __missing__(self, key): if(isinstance(key,str)): raise KeyError(key) return self[str(key)] # 这里利用self[key]把属性获取委托给__getitem__ def get(self, key,default=None): try: return self[key] except KeyError: return default def __contains__(self, item): return item in self.keys() or str(item) in self.keys() sk = StrKeyDict0([('2','two'),('4','four')]) # 调用__getitem__ print(sk[2]) # 调用get print(sk.get(4))
输出:
two
four
popitem():dict和OrderedDict比较
OrderedDict在添加键的时候会保持顺序,因此键的迭代顺序与输入保持一致
OrderedDict的popitem方法默认删除并返回字典里最后一个元素,但传递last=False参数,那么它删除并返回第一个被添加进去的元素
from collections import OrderedDict dog_names = ['daniel','fancl','grace'] dog_ages = [9,10,11] dog_dict = dict(zip(dog_names,dog_ages)) print('before pop:',dog_dict) # 随机移除元素 print('pop item:',dog_dict.popitem()) print('after pop:',dog_dict) print('-'*100) cat_names = ['andy','bob','candy'] cat_ages = [12,3,6] cat_dict = OrderedDict(zip(cat_names,cat_ages)) #cat_dict.move_to_end('bob',False) print('before pop:',cat_dict) # last=False按先进先出顺序移除 # last=True按后进先出顺序移除 print('pop item:',cat_dict.popitem(last=False)) print('after pop:',cat_dict)
输出:
before pop: {'daniel': 9, 'fancl': 10, 'grace': 11}
pop item: ('grace', 11)
after pop: {'daniel': 9, 'fancl': 10}
----------------------------------------------------------------------------------------------------
before pop: OrderedDict([('andy', 12), ('bob', 3), ('candy', 6)])
pop item: ('andy', 12)
after pop: OrderedDict([('bob', 3), ('candy', 6)])
collections.Counter
一个计数器字典
对传入的可迭代对象进行计数,形成一个字典
每次update的时候会增加这个计数器
most_common(n)会按照次序返回映射里最常见的n个键和它们的计数
import collections count = collections.Counter('abrabcablle') print('count :',count) count.update('abrabcablle') print('count :',count) print('count.most_common(2) : ',count.most_common(2))
输出:
count : Counter({'a': 3, 'b': 3, 'l': 2, 'r': 1, 'c': 1, 'e': 1})
count : Counter({'a': 6, 'b': 6, 'l': 4, 'r': 2, 'c': 2, 'e': 2})
count.most_common(2) : [('a', 6), ('b', 6)]
继承collections.UserDict
UserDict不是dict的子类,但UserDict有一个叫data的属性,是dict的实例
import collections class StrKeyDict(collections.UserDict): def __missing__(self, key): if(isinstance(key,str)): raise KeyError(key) return self.data[str(key)] def __setitem__(self, key, value): self.data[str(key)] = value def __contains__(self, item): return str(item) in self.data sk = StrKeyDict([('2','two'),('4','four')]) # 调用__getitem__ print(sk[2]) # 调用get # UserDict继承Mapping.get方法,源码与StrKeyDict0中的写法相同,所以不需要再改写 print(sk.get(4))
输出:
two
four
不可变映射类型
标准库里的所有映射都是可变的
types模块引入了一个封装类名叫MappingProxyType,如果给这个类一个映射,会返回一个只读的映射视图
但它是动态的,意味着如果对原映射做出改动,我们通过这个视图可以观察到
from types import MappingProxyType d = dict([(1,'one')]) md = MappingProxyType(d) print('md :',md) print('md[1] : ',md[1]) # 只读的视图,不允许赋值 # TypeError: 'mappingproxy' object does not support item assignment # md[2] = 'two' d[2] = 'two' # 但会根据原映射动态修改 print('md :',md)
输出:
md : {1: 'one'}
md[1] : one
md : {1: 'one', 2: 'two'}
建立集合和frozenset
# 建立空集合 s = set() print(s) # 用字面量建立集合 s = {1,2,3} print(s) # 集合推导 s = {x for x in range(10)} print(s) # 建立frozenset fs = frozenset(s) print(fs)
输出:
set()
{1, 2, 3}
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})
关于字典和集合的散列
如果你实现了一个类的__eq__方法,并且希望它是可散列的,那么它一定要有个恰当的__hash__方法,保证在a==b为真的情况下hash(a)==hash(b)也必定为真。否则会破坏恒定的散列表算法
另一方面,如果一个含有自定义的__eq__依赖的类处于可变的状态,那就不要在这个类中实现__hash__方法,因为它的实例是不可散列的