Python deepcopy src analysis

先从入口开始看

from copy import deepcopy

if __name__ == '__main__':
    l1 = [1, 2, 3]
    l2 = deepcopy(l1)  # entrypoint

进入deepcopy函数

def deepcopy(x, memo=None, _nil=[]):
    """Deep copy operation on arbitrary Python objects.

    See the module's __doc__ string for more info.
    """

    # 还不清楚memo和_nil是什么
    if memo is None:
        memo = {}

    d = id(x)  # 被复制对象的id
    y = memo.get(d, _nil)  # 猜测memo应该是id->变量映射
    if y is not _nil:
        return y

    cls = type(x)  # 被复制对象的类型

    copier = _deepcopy_dispatch.get(cls)  # 为不同类型分配不同的copy函数
    # 若有copier,执行之
    if copier is not None:
        y = copier(x, memo)
    else:
        # 若为基础类型,执行最基础的deepcopy方法
        if issubclass(cls, type):
            y = _deepcopy_atomic(x, memo)
        else:
            # 支持用户实现__deepcopy__方法
            copier = getattr(x, "__deepcopy__", None)
            if copier is not None:
                y = copier(memo)
            else:
                # 如果被复制对象没有实现__deepcopy__方法,则通过redutor和__reconstruct方法重新构建对象
                reductor = dispatch_table.get(cls)
                if reductor:
                    rv = reductor(x)
                else:
                    reductor = getattr(x, "__reduce_ex__", None)
                    if reductor is not None:
                        rv = reductor(4)
                    else:
                        reductor = getattr(x, "__reduce__", None)
                        if reductor:
                            rv = reductor()
                        else:
                            raise Error(
                                "un(deep)copyable object of type %s" % cls)
                if isinstance(rv, str):
                    y = x
                else:
                    y = _reconstruct(x, memo, *rv)

    # If is its own copy, don't memoize.
    if y is not x:
        memo[d] = y
        _keep_alive(x, memo) # Make sure x lives at least as long as d
    return y

在上面的例子,我们要复制的对象是个list,而copier来源于_deepcopy_dispatch,看看_deepcopy_dispatch的定义。

_deepcopy_dispatch = d = {}

def _deepcopy_atomic(x, memo):
    return x
d[type(None)] = _deepcopy_atomic
d[type(Ellipsis)] = _deepcopy_atomic
d[type(NotImplemented)] = _deepcopy_atomic
d[int] = _deepcopy_atomic
d[float] = _deepcopy_atomic
d[bool] = _deepcopy_atomic
# 省略其他类型的copier映射关系

def _deepcopy_list(x, memo, deepcopy=deepcopy):
    y = []
    memo[id(x)] = y
    append = y.append
    for a in x:
        append(deepcopy(a, memo))
    return y
d[list] = _deepcopy_list

看到这里就很清晰了,memo实际上是id到变量的映射。在_deepcopy_list中,新创建了一个列表y,并循环调用deepcopy复制子元素的内容。为了防止被复制对象x被提前析构,deepcopy调用_keep_alive方法保活x。

def _keep_alive(x, memo):
    """Keeps a reference to the object x in the memo.

    Because we remember objects by their id, we have
    to assure that possibly temporary objects are kept
    alive by referencing them.
    We store a reference at the id of the memo, which should
    normally not be used unless someone tries to deepcopy
    the memo itself...
    """
    try:
        memo[id(memo)].append(x)
    except KeyError:
        # aha, this is the first one :-)
        memo[id(memo)]=[x]

这里多提一下,当自定义类型继承了基础类型(比如继承dict),要怎么实现__deepcopy__方法呢?这个方法很容易写成shallow copy或写出死循环,举个反例。

# 死循环
class MyDict(dict):
    def __deepcopy__(self, memo=None, _nil=[]):
        dict = MyDict()
        for key in self.keys():
            dict.__setattr__(key, self.__getattr__(key))

正确做法是学习deepcopy写法,利用memo参数规避死循环。

class MyDict(dict):
    def __deepcopy__(self, memo=None, _nil=[]):
        if memo is None:
            memo = {}
        d = id(self)
        y = memo.get(d, _nil)
        if y is not _nil:
            return y

        ret_dict = MyDict()
        memo[d] = id(dict)
        for key in self.keys():
            ret_dict.__setattr__(deepcopy(key, memo), deepcopy(self.__getattr__(key), memo))
        
        return ret_dict
posted @ 2022-11-11 12:06  JHSeng  阅读(50)  评论(0编辑  收藏  举报