第五章：Python高级编程-深入Python的dict和set

5.1 dict的abc继承关系

和list（Sequence）相似，都继承于Collection，添加了一些方法

from collections.abc import Mapping,MutableMapping
# dict是属于Mapping类型的
a = {}
print(type(a)) # dict
print(isinstance(a,MutableMapping)) # 是属于MutableMapping类型的
"""
<class 'dict'>
True
"""
# 但是它不是通过继承的方式,而是实现了这个类中的一些方法,通过MutableMapping.register(dict)的方法

collections.abc模块

class Mapping(Collection):

    __slots__ = ()

    """A Mapping is a generic container for associating key/value
    pairs.

    This class provides concrete generic implementations of all
    methods except for __getitem__, __iter__, and __len__.

    """

    @abstractmethod
    def __getitem__(self, key):
        raise KeyError

    def get(self, key, default=None):
        'D.get(k[,d]) -> D[k] if k in D, else d.  d defaults to None.'
        try:
            return self[key]
        except KeyError:
            return default

    def __contains__(self, key):
        try:
            self[key]
        except KeyError:
            return False
        else:
            return True

    def keys(self):
        "D.keys() -> a set-like object providing a view on D's keys"
        return KeysView(self)

    def items(self):
        "D.items() -> a set-like object providing a view on D's items"
        return ItemsView(self)

    def values(self):
        "D.values() -> an object providing a view on D's values"
        return ValuesView(self)

    def __eq__(self, other):
        if not isinstance(other, Mapping):
            return NotImplemented
        return dict(self.items()) == dict(other.items())

    __reversed__ = None


class MutableMapping(Mapping):

    __slots__ = ()

    """A MutableMapping is a generic container for associating
    key/value pairs.

    This class provides concrete generic implementations of all
    methods except for __getitem__, __setitem__, __delitem__,
    __iter__, and __len__.

    """

    @abstractmethod
    def __setitem__(self, key, value):
        raise KeyError

    @abstractmethod
    def __delitem__(self, key):
        raise KeyError

    __marker = object()

    def pop(self, key, default=__marker):
        '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
          If key is not found, d is returned if given, otherwise KeyError is raised.
        '''
        try:
            value = self[key]
        except KeyError:
            if default is self.__marker:
                raise
            return default
        else:
            del self[key]
            return value

    def popitem(self):
        '''D.popitem() -> (k, v), remove and return some (key, value) pair
           as a 2-tuple; but raise KeyError if D is empty.
        '''
        try:
            key = next(iter(self))
        except StopIteration:
            raise KeyError from None
        value = self[key]
        del self[key]
        return key, value

    def clear(self):
        'D.clear() -> None.  Remove all items from D.'
        try:
            while True:
                self.popitem()
        except KeyError:
            pass

    def update(*args, **kwds):
        ''' D.update([E, ]**F) -> None.  Update D from mapping/iterable E and F.
            If E present and has a .keys() method, does:     for k in E: D[k] = E[k]
            If E present and lacks .keys() method, does:     for (k, v) in E: D[k] = v
            In either case, this is followed by: for k, v in F.items(): D[k] = v
        '''
        if not args:
            raise TypeError("descriptor 'update' of 'MutableMapping' object "
                            "needs an argument")
        self, *args = args
        if len(args) > 1:
            raise TypeError('update expected at most 1 arguments, got %d' %
                            len(args))
        if args:
            other = args[0]
            if isinstance(other, Mapping):
                for key in other:
                    self[key] = other[key]
            elif hasattr(other, "keys"):
                for key in other.keys():
                    self[key] = other[key]
            else:
                for key, value in other:
                    self[key] = value
        for key, value in kwds.items():
            self[key] = value

    def setdefault(self, key, default=None):
        'D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D'
        try:
            return self[key]
        except KeyError:
            self[key] = default
        return default

5.2 dict的常用方法

浅拷贝

a = {'LYQ1':{'SWPU':'软件工程'},
     'LYQ2':{'SWPU2':'软件工程2'}}
#这是浅拷贝，指向的是同一值，修改一个，另一个也会修改,所以我们看到下面a和b输出是一样的
b=a.copy()
b['LYQ1']['SWPU']='我是浅拷贝'
print(b)
print(a)

"""
{'LYQ1': {'SWPU': '我是浅拷贝'}, 'LYQ2': {'SWPU2': '软件工程2'}}
{'LYQ1': {'SWPU': '我是浅拷贝'}, 'LYQ2': {'SWPU2': '软件工程2'}}
"""

# 值是不可变对象，copy方法是浅拷贝：深拷贝父对象（一级目录），子对象（二级目录）不拷贝，还是引用
c = {"a": "b"}
d = c.copy()
d["a"] = "sx"
print(c)
print(d)

"""
{'a': 'b'}
{'a': 'sx'}
"""

深拷贝

a = {'LYQ1':{'SWPU':'软件工程'},
     'LYQ2':{'SWPU2':'软件工程2'}}
import copy
#深拷贝，指向不同的对象
deep_b=copy.deepcopy(a)
deep_b['LYQ1']['SWPU']='我是深拷贝'
print(deep_b)
print(a)

"""
{'LYQ1': {'SWPU': '我是深拷贝'}, 'LYQ2': {'SWPU2': '软件工程2'}}
{'LYQ1': {'SWPU': '软件工程'}, 'LYQ2': {'SWPU2': '软件工程2'}}
"""

fromkeys():

#把一个可迭代对象转换为dict，{'SWPU':'软件工程'}为默认值
my_list=['Stu1','Stu2']
my_dict=dict.fromkeys(my_list,{'SWPU':'软件工程'})
print(my_dict)

"""
{'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}}
"""

get(key, value)

# 为了预防keyerror
new_dict = {'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}}
aa = new_dict.get("stu6", {"age": 18})
print(aa)

items()：循环，返回key，value

new_dict = {'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}}
for k, v in new_dict.items():
    print(k, v)
    
"""
Stu1 {'SWPU': '软件工程'}
Stu2 {'SWPU': '软件工程'}
"""

setdefault(): 有值直接取值，没有值则将值设置进去，并获取该值返回

new_dict = {'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}}
default_value1 = new_dict.setdefault("Stu12", "kobe")
default_value2 = new_dict.setdefault("Stu2", "kobe")
print(default_value1)
print(default_value2)
print(new_dict)

"""
kobe
{'SWPU': '软件工程'}
{'Stu1': {'SWPU': '软件工程'}, 'Stu2': {'SWPU': '软件工程'}, 'Stu12': 'kobe'}
"""

update():添加键值对或更新键值对

a = {'kobe':{'SWPU':'软件工程'},
     'james':{'SWPU2':'软件工程2'}}
#添加新键值对（即合并两个字典）
a.update({'LYQ3':'NEW'})
#第二种方式
a.update(LYQ4='NEW2',LYQ5='NEW3')
#第三种方式，list里面放tuple，tuple里面放tuple等（可迭代就行）
a.update([('LYQ6','NEW6')])
print(a)
print("*"*60)
#修改键值对
a.update({'kobe':'我修改了'})
print(a)

"""
{'kobe': {'SWPU': '软件工程'}, 'james': {'SWPU2': '软件工程2'}, 'LYQ3': 'NEW', 'LYQ4': 'NEW2', 'LYQ5': 'NEW3', 'LYQ6': 'NEW6'}
************************************************************
{'kobe': '我修改了', 'james': {'SWPU2': '软件工程2'}, 'LYQ3': 'NEW', 'LYQ4': 'NEW2', 'LYQ5': 'NEW3', 'LYQ6': 'NEW6'}
"""

dict源码:

class dict(object):
    """
    dict() -> new empty dictionary
    dict(mapping) -> new dictionary initialized from a mapping object's
        (key, value) pairs
    dict(iterable) -> new dictionary initialized as if via:
        d = {}
        for k, v in iterable:
            d[k] = v
    dict(**kwargs) -> new dictionary initialized with the name=value pairs
        in the keyword argument list.  For example:  dict(one=1, two=2)
    """
    def clear(self): # real signature unknown; restored from __doc__
        """ D.clear() -> None.  Remove all items from D. """
        pass

    def copy(self): # real signature unknown; restored from __doc__
        """ D.copy() -> a shallow copy of D """
        pass

    @staticmethod # known case
    def fromkeys(*args, **kwargs): # real signature unknown
        """ Create a new dictionary with keys from iterable and values set to value. """
        pass

    def get(self, *args, **kwargs): # real signature unknown
        """ Return the value for key if key is in the dictionary, else default. """
        pass

    def items(self): # real signature unknown; restored from __doc__
        """ D.items() -> a set-like object providing a view on D's items """
        pass

    def keys(self): # real signature unknown; restored from __doc__
        """ D.keys() -> a set-like object providing a view on D's keys """
        pass

    def pop(self, k, d=None): # real signature unknown; restored from __doc__
        """
        D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
        If key is not found, d is returned if given, otherwise KeyError is raised
        """
        pass

    def popitem(self): # real signature unknown; restored from __doc__
        """
        D.popitem() -> (k, v), remove and return some (key, value) pair as a
        2-tuple; but raise KeyError if D is empty.
        """
        pass

    def setdefault(self, *args, **kwargs): # real signature unknown
        """
        Insert key with a value of default if key is not in the dictionary.
        
        Return the value for key if key is in the dictionary, else default.
        """
        pass

    def update(self, E=None, **F): # known special case of dict.update
        """
        D.update([E, ]**F) -> None.  Update D from dict/iterable E and F.
        If E is present and has a .keys() method, then does:  for k in E: D[k] = E[k]
        If E is present and lacks a .keys() method, then does:  for k, v in E: D[k] = v
        In either case, this is followed by: for k in F:  D[k] = F[k]
        """
        pass

    def values(self): # real signature unknown; restored from __doc__
        """ D.values() -> an object providing a view on D's values """
        pass

    def __contains__(self, *args, **kwargs): # real signature unknown
        """ True if the dictionary has the specified key, else False. """
        pass

    def __delitem__(self, *args, **kwargs): # real signature unknown
        """ Delete self[key]. """
        pass

    def __eq__(self, *args, **kwargs): # real signature unknown
        """ Return self==value. """
        pass

    def __getattribute__(self, *args, **kwargs): # real signature unknown
        """ Return getattr(self, name). """
        pass

    def __getitem__(self, y): # real signature unknown; restored from __doc__
        """ x.__getitem__(y) <==> x[y] """
        pass

    def __ge__(self, *args, **kwargs): # real signature unknown
        """ Return self>=value. """
        pass

    def __gt__(self, *args, **kwargs): # real signature unknown
        """ Return self>value. """
        pass

    def __init__(self, seq=None, **kwargs): # known special case of dict.__init__
        """
        dict() -> new empty dictionary
        dict(mapping) -> new dictionary initialized from a mapping object's
            (key, value) pairs
        dict(iterable) -> new dictionary initialized as if via:
            d = {}
            for k, v in iterable:
                d[k] = v
        dict(**kwargs) -> new dictionary initialized with the name=value pairs
            in the keyword argument list.  For example:  dict(one=1, two=2)
        # (copied from class doc)
        """
        pass

    def __iter__(self, *args, **kwargs): # real signature unknown
        """ Implement iter(self). """
        pass

    def __len__(self, *args, **kwargs): # real signature unknown
        """ Return len(self). """
        pass

    def __le__(self, *args, **kwargs): # real signature unknown
        """ Return self<=value. """
        pass

    def __lt__(self, *args, **kwargs): # real signature unknown
        """ Return self<value. """
        pass

    @staticmethod # known case of __new__
    def __new__(*args, **kwargs): # real signature unknown
        """ Create and return a new object.  See help(type) for accurate signature. """
        pass

    def __ne__(self, *args, **kwargs): # real signature unknown
        """ Return self!=value. """
        pass

    def __repr__(self, *args, **kwargs): # real signature unknown
        """ Return repr(self). """
        pass

    def __setitem__(self, *args, **kwargs): # real signature unknown
        """ Set self[key] to value. """
        pass

    def __sizeof__(self): # real signature unknown; restored from __doc__
        """ D.__sizeof__() -> size of D in memory, in bytes """
        pass

    __hash__ = None

5.3 dict的子类

当我们要自定义一个字典的时候,不要使用直接继承自dict,因为有些操作会不生效

"""
不建议直接继承dict，而是collections.UserDict
"""
# 不建议继承list和dict
class MyDict(dict):
    def __setitem__(self, key, value):
        super().__setitem__(key, value * 2)
#未调用自己写的方法, c语言编写的dict某些时候不会去调用覆盖的方法
my_dict=MyDict(one=1)
print(my_dict)

print("*"*10)

#调用自己写的方法
my_dict['one']=1
print(my_dict)

"""
{'one': 1}
**********
{'one': 2}
"""

使用继承UserDict的方式来实现自定义的字典.

from collections import UserDict
class MyDict2(UserDict):
    def __setitem__(self, key, value):
        super().__setitem__(key, value * 2)
my_dict2=MyDict2(one=1)
print(my_dict2)

Userdict源码：当取不到某个key时，就会调用__missing__方法（如果有__missing__）获取默认值

创建带有默认值的字典. collections中的defaultdict

字典之所以可以实现带有默认值,其实是它内部实现了__missing__方法,在UserDict类里面的__getitem__方法中会调用__missing__方法

from collections import defaultdict
#可以时dict,int,str,list,tuple等等
my_dict=defaultdict(dict)
#找不到key，实际调用的时__missing__方法
print(my_dict['haha'])

defaultdict源码：之所以可以设置默认值就是因为实现了__missing__方法

5.4 set和frozenset

"""
set 集合 
frozenset(不可变集合) 无序 不重复
"""

s = set("abcde")  # 接受迭代类型；字符串，列表...
print(s)

# 向set添加数据
s.add()
s.update()

difference()  # 差值
-  # 差集 实现于__ior__魔法函数

# / & -

#set 集合 fronzenset (不可变集合) 无序， 不重复
# s = set('abcdee')
# s = set(['a','b','c','d','e'])
s = {'a','b', 'c'}
# s = frozenset("abcde") # frozenset 可以作为dict的key
# print(s)

# clear() 清空集合
# copy() 浅拷贝集合
# pop() 弹出最后一个元素
# remove() 删除一个集合元素

#向set添加数据
another_set = set("cef")
re_set = s.difference(another_set)
re_set = s - another_set
re_set = s & another_set # 交集
re_set = s | another_set # 并集

#set性能很高
# | & -  #集合运算
print(re_set)

print (s.issubset(re_set))
# 也可以用if in判断（实现于__contains__魔法函数）
# if "c" in re_set:
#     print ("i am in set")

5.5 dict和set的实现原理

"""
测试list和dict的性能
"""
from random import randint


def load_list_data(total_nums, target_nums):
    """
    从文件中读取数据，以list的方式返回
    :param total_nums: 读取的数量
    :param target_nums: 需要查询的数据的数量
    """
    all_data = []
    target_data = []
    file_name = "D:/note/fbobject_idnew.txt"
    with open(file_name, encoding="utf8", mode="r") as f_open:
        for count, line in enumerate(f_open):
            if count < total_nums:
                all_data.append(line)
            else:
                break

    for x in range(target_nums):
        random_index = randint(0, total_nums)
        if all_data[random_index] not in target_data:
            target_data.append(all_data[random_index])
            if len(target_data) == target_nums:
                break

    return all_data, target_data

def load_dict_data(total_nums, target_nums):
    """
    从文件中读取数据，以dict的方式返回
    :param total_nums: 读取的数量
    :param target_nums: 需要查询的数据的数量
    """
    all_data = {}
    target_data = []
    ## 1000万或上百万字符串的文本
    file_name = "D:/note/fbobject_idnew.txt"
    with open(file_name, encoding="utf8", mode="r") as f_open:
        for count, line in enumerate(f_open):
            if count < total_nums:
                all_data[line] = 0
            else:
                break
    all_data_list = list(all_data)
    for x in range(target_nums):
        random_index = randint(0, total_nums-1)
        if all_data_list[random_index] not in target_data:
            target_data.append(all_data_list[random_index])
            if len(target_data) == target_nums:
                break

    return all_data, target_data


def find_test(all_data, target_data):
    #测试运行时间
    test_times = 100
    total_times = 0
    import time
    for i in range(test_times):
        find = 0
        start_time = time.time()
        for data in target_data:
            if data in all_data:
                find += 1
        last_time = time.time() - start_time
        total_times += last_time
    return total_times/test_times


if __name__ == "__main__":
    # all_data, target_data = load_list_data(10000, 1000)
    # all_data, target_data = load_list_data(100000, 1000)
    # all_data, target_data = load_list_data(1000000, 1000)


    # all_data, target_data = load_dict_data(10000, 1000)
    # all_data, target_data = load_dict_data(100000, 1000)
    # all_data, target_data = load_dict_data(1000000, 1000)
    all_data, target_data = load_dict_data(2000000, 1000)
    last_time = find_test(all_data, target_data)

    #dict查找的性能远远大于list
    #在list中随着list数据的增大 查找时间会增大
    #在dict中查找元素不会随着dict的增大而增大
    print(last_time)
"""
1.dict的key或者set的值，都必须是可以hash的（不可变对象都是可以hash的，如str，frozenset，tuple，自己实现的类【实现__hash__魔法函数】）；
2.dict内存花销大，但是查询速度快，自定义的对象或者python内置的对象都是用dict包装的；
3.dict的存储顺序与元素添加顺序有关；
4.添加数据有可能改变已有数据的顺序；
5.取数据的时间复杂度为O(1)
"""

通过hash函数计算key（有很多的算法），这里是通过hash函数计算然后与7进行与运算，在计算过程中有可能冲突，得到同样的位置（有很多的解决方法），如’abc‘取一位'c'加一位随机数，如果冲突，就向前多取一位再计算...（还有先声明一个很小的内存空间，可能存在一些空白，计算空白，如果小于1/3，然后声明一个更大的空间，拷贝过去，减少冲突）

查找数据，先计算hash值定位，查找是否为空，为空就抛出错误，如果不为空查看是否相等，如果被其他占领就不相等，然后又进行冲突解决

posted @ 2020-05-18 22:57 公众号海哥python 阅读(165) 评论(0) 编辑收藏举报

刷新页面返回顶部

公众号-海哥python

体系化持续输出

第五章：Python高级编程-深入Python的dict和set

公告