字典和集合

字典构造方法:

a = dict(one=1, two=2, three=3)
b = {'one':1, 'two':2, 'three':3}
c = dict(zip(['one','two','three'],[1,2,3]))
d = dict([('one',1),('two',2),('three',3)])
e = dict({'one':1, 'two':2, 'three':3})

print(a==b==c==d==e)

输出:
True

 

字典新增元素

cat_names = ['andy','bob','candy']

cat_ages = [12,3,6]

cat_dict = dict(zip(cat_names,cat_ages))

print('origin dict:',cat_dict)
print('-' * 20,'__setitem__','-' * 50)

# 向dict增加键值对
cat_dict['fancl']=13
print('after inserted:',cat_dict)

print('-' * 20,'setdefault()','-' * 50)

# setdefault()方法:若键已存在,则返回相应的值;若键不存在,则将键值对新增到dict中
print('andy:',cat_dict.setdefault('andy',10))
print('david:',cat_dict.setdefault('david',10))
print('after inserted:',cat_dict)

输出:

origin dict: {'andy': 12, 'bob': 3, 'candy': 6}
-------------------- __setitem__ --------------------------------------------------
after inserted: {'andy': 12, 'bob': 3, 'candy': 6, 'fancl': 13}
-------------------- setdefault() --------------------------------------------------
andy: 12
david: 10
after inserted: {'andy': 12, 'bob': 3, 'candy': 6, 'fancl': 13, 'david': 10}

 

 

 

字典推导

  字典推导可以从任何以键值对作为元素的可迭代对象中构建出字典

numbers = [('one',1),('two',2),('three',3)]

num_dict = {value:name for name,value in numbers}

print(num_dict)

num_dict_upper = {value:name.upper() for value,name in num_dict.items()}

print(num_dict_upper)

输出:

{1: 'one', 2: 'two', 3: 'three'}
{1: 'ONE', 2: 'TWO', 3: 'THREE'}

 

字典迭代

cat_names = ['andy','bob','candy']

cat_ages = [12,3,6]
# 用zip初始化字典
cat_dict = dict(zip(cat_names,cat_ages))

# iter函数获取到的是键的迭代器
for idx in iter(cat_dict):
    print(idx)

print('-' * 100)

# 也可以用keys()方法返回所有键
for idx in cat_dict.keys():
    print(idx)

print('-' * 100)

# items()方法返回所有键值对
for item in cat_dict.items():
    print(item)

输出:

andy
bob
candy
----------------------------------------------------------------------------------------------------
andy
bob
candy
----------------------------------------------------------------------------------------------------
('andy', 12)
('bob', 3)
('candy', 6)

 

 

dict.update方法

numbers = [('one',1),('two',2),('three',3)]
# 字典的键是内置类型
num_dict = {name:value for name,value in numbers}
print(num_dict)

# update会根据键更新值
num_dict.update({'two':4})
print(num_dict)

# 没有找到需要更新的键,则添加进字典
num_dict.update({'five':5})
print(num_dict)

print('-' * 100)

# 一个实现了__eq__和__hash__的自定义类
class myname:
    def __init__(self,name):
        self.name=name

    def __eq__(self, other):
        return self.name==other.name

    def __str__(self):
        return self.name

    def __repr__(self):
        return self.name

    def __hash__(self):
        return hash(self.name)


numbers = [(myname('one'),1),(myname('two'),2),(myname('three'),3)]
# 字典的键是自定义类型
num_dict = {name:value for name,value in numbers}
print(num_dict)

# 但也可以根据__eq__实现根据键的更新
num_dict.update({myname('two'):4})
print(num_dict)

输出:

{'one': 1, 'two': 2, 'three': 3}
{'one': 1, 'two': 4, 'three': 3}
{'one': 1, 'two': 4, 'three': 3, 'five': 5}
----------------------------------------------------------------------------------------------------
{one: 1, two: 2, three: 3}
{one: 1, two: 4, three: 3}

 

dict.setdefault

import re

s = '''dog hits cat, cat hates dog, you eat dog but not cat.
'''

patt = re.compile(r'\w+')

index = {}

for match in patt.finditer(s):
    word = match.group()
    # word存在取出相应的值
    # word不存在向字典中添加一个项目,key=word,value=0,返回0
    count = index.setdefault(word,0)
    index.update({word:count+1})

# 按照word出现次数排序输出
for word,count in sorted(index.items(),key=lambda item:item[1],reverse=True):
    print(word,':',count)

输出:

dog : 3
cat : 3
hits : 1
hates : 1
you : 1
eat : 1
but : 1
not : 1

 defaultdict

import re
import collections

s = '''dog hits cat, cat hates dog, you eat dog but not cat.
'''

patt = re.compile(r'\w+')

# defaultdict需要传一个可调用对象作为参数,在赋默认值时使用
index = collections.defaultdict(int)

for match in patt.finditer(s):
    word = match.group()
    # 使用defaultdict,自动给不存在的word赋默认值0
    index[word]+=1

# 按照word出现次数排序输出
for word,count in sorted(index.items(),key=lambda item:item[1],reverse=True):
    print(word,':',count)

输出:

dog : 3
cat : 3
hits : 1
hates : 1
you : 1
eat : 1
but : 1
not : 1

 

import re
from collections import defaultdict

'''
contents in dogcat.txt:
a dog sits by a cat
a cat sits by a dog
two animals sit together
'''

word_re = re.compile(r'\w+')

index = defaultdict(list)

with open('d:\\dogcat.txt') as fp:
    for line_no,line in enumerate(fp):
        for match in word_re.finditer(line):
            word = match.group()
            col_no = match.start()+1
            location = (line_no,col_no)
            # defalutdict会根据构造函数的参数,
            # 对不在映射内的键自动初始化一个值,这里是list()
            index[word].append(location)

for word in sorted(index,key=str.upper):
    print(word,index[word])

输出:

a [(0, 1), (0, 15), (1, 1), (1, 15)]
animals [(2, 5)]
by [(0, 12), (1, 12)]
cat [(0, 17), (1, 3)]
dog [(0, 3), (1, 17)]
sit [(2, 13)]
sits [(0, 7), (1, 7)]
together [(2, 17)]
two [(2, 1)]

 

dict.__missing__方法

  __missing__方法只会被__getitem__调用

  提供__missing__方法时get或者__contains__的使用都没有影响

class StrKeyDict0(dict):
    # __getitem__方法未获取到key时,调用__missing__方法
    def __missing__(self, key):
        if(isinstance(key,str)):
            raise KeyError(key)
        return self[str(key)]

    # 这里利用self[key]把属性获取委托给__getitem__
    def get(self, key,default=None):
        try:
            return self[key]
        except KeyError:
            return default

    def __contains__(self, item):
        return item in self.keys() or str(item) in self.keys()

sk = StrKeyDict0([('2','two'),('4','four')])

# 调用__getitem__
print(sk[2])

# 调用get
print(sk.get(4))

输出:

two
four

 popitem():dict和OrderedDict比较

  OrderedDict在添加键的时候会保持顺序,因此键的迭代顺序与输入保持一致

  OrderedDict的popitem方法默认删除并返回字典里最后一个元素,但传递last=False参数,那么它删除并返回第一个被添加进去的元素

 

from collections import OrderedDict

dog_names = ['daniel','fancl','grace']

dog_ages = [9,10,11]

dog_dict = dict(zip(dog_names,dog_ages))

print('before pop:',dog_dict)
# 随机移除元素
print('pop item:',dog_dict.popitem())
print('after pop:',dog_dict)


print('-'*100)

cat_names = ['andy','bob','candy']

cat_ages = [12,3,6]

cat_dict = OrderedDict(zip(cat_names,cat_ages))

#cat_dict.move_to_end('bob',False)

print('before pop:',cat_dict)
# last=False按先进先出顺序移除
# last=True按后进先出顺序移除
print('pop item:',cat_dict.popitem(last=False))
print('after pop:',cat_dict)

输出:

before pop: {'daniel': 9, 'fancl': 10, 'grace': 11}
pop item: ('grace', 11)
after pop: {'daniel': 9, 'fancl': 10}
----------------------------------------------------------------------------------------------------
before pop: OrderedDict([('andy', 12), ('bob', 3), ('candy', 6)])
pop item: ('andy', 12)
after pop: OrderedDict([('bob', 3), ('candy', 6)])

 

 

collections.Counter

  一个计数器字典

  对传入的可迭代对象进行计数,形成一个字典

  每次update的时候会增加这个计数器

  most_common(n)会按照次序返回映射里最常见的n个键和它们的计数

import collections

count = collections.Counter('abrabcablle')

print('count :',count)

count.update('abrabcablle')

print('count :',count)

print('count.most_common(2) : ',count.most_common(2))

输出:

count : Counter({'a': 3, 'b': 3, 'l': 2, 'r': 1, 'c': 1, 'e': 1})
count : Counter({'a': 6, 'b': 6, 'l': 4, 'r': 2, 'c': 2, 'e': 2})
count.most_common(2) : [('a', 6), ('b', 6)]

 

继承collections.UserDict

  UserDict不是dict的子类,但UserDict有一个叫data的属性,是dict的实例

import collections

class StrKeyDict(collections.UserDict):
    def __missing__(self, key):
        if(isinstance(key,str)):
            raise KeyError(key)
        return self.data[str(key)]

    def __setitem__(self, key, value):
        self.data[str(key)] =  value

    def __contains__(self, item):
        return str(item) in self.data

sk = StrKeyDict([('2','two'),('4','four')])

# 调用__getitem__
print(sk[2])

# 调用get
# UserDict继承Mapping.get方法,源码与StrKeyDict0中的写法相同,所以不需要再改写
print(sk.get(4))

输出:

two
four

 

不可变映射类型

  标准库里的所有映射都是可变的

  types模块引入了一个封装类名叫MappingProxyType,如果给这个类一个映射,会返回一个只读的映射视图

  但它是动态的,意味着如果对原映射做出改动,我们通过这个视图可以观察到

from types import MappingProxyType

d = dict([(1,'one')])

md = MappingProxyType(d)

print('md :',md)
print('md[1] : ',md[1])

# 只读的视图,不允许赋值
# TypeError: 'mappingproxy' object does not support item assignment
# md[2] = 'two'

d[2] = 'two'
# 但会根据原映射动态修改
print('md :',md)

输出:

md : {1: 'one'}
md[1] : one
md : {1: 'one', 2: 'two'}

 

建立集合和frozenset

# 建立空集合
s = set()
print(s)

# 用字面量建立集合
s = {1,2,3}
print(s)

# 集合推导
s = {x for x in range(10)}
print(s)

# 建立frozenset
fs = frozenset(s)
print(fs)

输出:

set()
{1, 2, 3}
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

 

关于字典和集合的散列

  如果你实现了一个类的__eq__方法,并且希望它是可散列的,那么它一定要有个恰当的__hash__方法,保证在a==b为真的情况下hash(a)==hash(b)也必定为真。否则会破坏恒定的散列表算法

  另一方面,如果一个含有自定义的__eq__依赖的类处于可变的状态,那就不要在这个类中实现__hash__方法,因为它的实例是不可散列的

posted @ 2018-10-29 22:20  StackNeverOverFlow  阅读(157)  评论(0编辑  收藏  举报