Python deepcopy src analysis
先从入口开始看
from copy import deepcopy
if __name__ == '__main__':
l1 = [1, 2, 3]
l2 = deepcopy(l1) # entrypoint
进入deepcopy函数
def deepcopy(x, memo=None, _nil=[]):
"""Deep copy operation on arbitrary Python objects.
See the module's __doc__ string for more info.
"""
# 还不清楚memo和_nil是什么
if memo is None:
memo = {}
d = id(x) # 被复制对象的id
y = memo.get(d, _nil) # 猜测memo应该是id->变量映射
if y is not _nil:
return y
cls = type(x) # 被复制对象的类型
copier = _deepcopy_dispatch.get(cls) # 为不同类型分配不同的copy函数
# 若有copier,执行之
if copier is not None:
y = copier(x, memo)
else:
# 若为基础类型,执行最基础的deepcopy方法
if issubclass(cls, type):
y = _deepcopy_atomic(x, memo)
else:
# 支持用户实现__deepcopy__方法
copier = getattr(x, "__deepcopy__", None)
if copier is not None:
y = copier(memo)
else:
# 如果被复制对象没有实现__deepcopy__方法,则通过redutor和__reconstruct方法重新构建对象
reductor = dispatch_table.get(cls)
if reductor:
rv = reductor(x)
else:
reductor = getattr(x, "__reduce_ex__", None)
if reductor is not None:
rv = reductor(4)
else:
reductor = getattr(x, "__reduce__", None)
if reductor:
rv = reductor()
else:
raise Error(
"un(deep)copyable object of type %s" % cls)
if isinstance(rv, str):
y = x
else:
y = _reconstruct(x, memo, *rv)
# If is its own copy, don't memoize.
if y is not x:
memo[d] = y
_keep_alive(x, memo) # Make sure x lives at least as long as d
return y
在上面的例子,我们要复制的对象是个list,而copier来源于_deepcopy_dispatch,看看_deepcopy_dispatch的定义。
_deepcopy_dispatch = d = {}
def _deepcopy_atomic(x, memo):
return x
d[type(None)] = _deepcopy_atomic
d[type(Ellipsis)] = _deepcopy_atomic
d[type(NotImplemented)] = _deepcopy_atomic
d[int] = _deepcopy_atomic
d[float] = _deepcopy_atomic
d[bool] = _deepcopy_atomic
# 省略其他类型的copier映射关系
def _deepcopy_list(x, memo, deepcopy=deepcopy):
y = []
memo[id(x)] = y
append = y.append
for a in x:
append(deepcopy(a, memo))
return y
d[list] = _deepcopy_list
看到这里就很清晰了,memo实际上是id到变量的映射。在_deepcopy_list中,新创建了一个列表y,并循环调用deepcopy复制子元素的内容。为了防止被复制对象x被提前析构,deepcopy
调用_keep_alive
方法保活x。
def _keep_alive(x, memo):
"""Keeps a reference to the object x in the memo.
Because we remember objects by their id, we have
to assure that possibly temporary objects are kept
alive by referencing them.
We store a reference at the id of the memo, which should
normally not be used unless someone tries to deepcopy
the memo itself...
"""
try:
memo[id(memo)].append(x)
except KeyError:
# aha, this is the first one :-)
memo[id(memo)]=[x]
这里多提一下,当自定义类型继承了基础类型(比如继承dict),要怎么实现__deepcopy__
方法呢?这个方法很容易写成shallow copy或写出死循环,举个反例。
# 死循环
class MyDict(dict):
def __deepcopy__(self, memo=None, _nil=[]):
dict = MyDict()
for key in self.keys():
dict.__setattr__(key, self.__getattr__(key))
正确做法是学习deepcopy写法,利用memo参数规避死循环。
class MyDict(dict):
def __deepcopy__(self, memo=None, _nil=[]):
if memo is None:
memo = {}
d = id(self)
y = memo.get(d, _nil)
if y is not _nil:
return y
ret_dict = MyDict()
memo[d] = id(dict)
for key in self.keys():
ret_dict.__setattr__(deepcopy(key, memo), deepcopy(self.__getattr__(key), memo))
return ret_dict