lmgsanm

每天学习一点,每天进步一点点…… Tomorrow is another beatifull day

导航

python模块:pickle

   1 """Create portable serialized representations of Python objects.
   2 
   3 See module copyreg for a mechanism for registering custom picklers.
   4 See module pickletools source for extensive comments.
   5 
   6 Classes:
   7 
   8     Pickler
   9     Unpickler
  10 
  11 Functions:
  12 
  13     dump(object, file)
  14     dumps(object) -> string
  15     load(file) -> object
  16     loads(string) -> object
  17 
  18 Misc variables:
  19 
  20     __version__
  21     format_version
  22     compatible_formats
  23 
  24 """
  25 
  26 from types import FunctionType
  27 from copyreg import dispatch_table
  28 from copyreg import _extension_registry, _inverted_registry, _extension_cache
  29 from itertools import islice
  30 from functools import partial
  31 import sys
  32 from sys import maxsize
  33 from struct import pack, unpack
  34 import re
  35 import io
  36 import codecs
  37 import _compat_pickle
  38 
  39 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
  40            "Unpickler", "dump", "dumps", "load", "loads"]
  41 
  42 # Shortcut for use in isinstance testing
  43 bytes_types = (bytes, bytearray)
  44 
  45 # These are purely informational; no code uses these.
  46 format_version = "4.0"                  # File format version we write
  47 compatible_formats = ["1.0",            # Original protocol 0
  48                       "1.1",            # Protocol 0 with INST added
  49                       "1.2",            # Original protocol 1
  50                       "1.3",            # Protocol 1 with BINFLOAT added
  51                       "2.0",            # Protocol 2
  52                       "3.0",            # Protocol 3
  53                       "4.0",            # Protocol 4
  54                       ]                 # Old format versions we can read
  55 
  56 # This is the highest protocol number we know how to read.
  57 HIGHEST_PROTOCOL = 4
  58 
  59 # The protocol we write by default.  May be less than HIGHEST_PROTOCOL.
  60 # We intentionally write a protocol that Python 2.x cannot read;
  61 # there are too many issues with that.
  62 DEFAULT_PROTOCOL = 3
  63 
  64 class PickleError(Exception):
  65     """A common base class for the other pickling exceptions."""
  66     pass
  67 
  68 class PicklingError(PickleError):
  69     """This exception is raised when an unpicklable object is passed to the
  70     dump() method.
  71 
  72     """
  73     pass
  74 
  75 class UnpicklingError(PickleError):
  76     """This exception is raised when there is a problem unpickling an object,
  77     such as a security violation.
  78 
  79     Note that other exceptions may also be raised during unpickling, including
  80     (but not necessarily limited to) AttributeError, EOFError, ImportError,
  81     and IndexError.
  82 
  83     """
  84     pass
  85 
  86 # An instance of _Stop is raised by Unpickler.load_stop() in response to
  87 # the STOP opcode, passing the object that is the result of unpickling.
  88 class _Stop(Exception):
  89     def __init__(self, value):
  90         self.value = value
  91 
  92 # Jython has PyStringMap; it's a dict subclass with string keys
  93 try:
  94     from org.python.core import PyStringMap
  95 except ImportError:
  96     PyStringMap = None
  97 
  98 # Pickle opcodes.  See pickletools.py for extensive docs.  The listing
  99 # here is in kind-of alphabetical order of 1-character pickle code.
 100 # pickletools groups them by purpose.
 101 
 102 MARK           = b'('   # push special markobject on stack
 103 STOP           = b'.'   # every pickle ends with STOP
 104 POP            = b'0'   # discard topmost stack item
 105 POP_MARK       = b'1'   # discard stack top through topmost markobject
 106 DUP            = b'2'   # duplicate top stack item
 107 FLOAT          = b'F'   # push float object; decimal string argument
 108 INT            = b'I'   # push integer or bool; decimal string argument
 109 BININT         = b'J'   # push four-byte signed int
 110 BININT1        = b'K'   # push 1-byte unsigned int
 111 LONG           = b'L'   # push long; decimal string argument
 112 BININT2        = b'M'   # push 2-byte unsigned int
 113 NONE           = b'N'   # push None
 114 PERSID         = b'P'   # push persistent object; id is taken from string arg
 115 BINPERSID      = b'Q'   #  "       "         "  ;  "  "   "     "  stack
 116 REDUCE         = b'R'   # apply callable to argtuple, both on stack
 117 STRING         = b'S'   # push string; NL-terminated string argument
 118 BINSTRING      = b'T'   # push string; counted binary string argument
 119 SHORT_BINSTRING= b'U'   #  "     "   ;    "      "       "      " < 256 bytes
 120 UNICODE        = b'V'   # push Unicode string; raw-unicode-escaped'd argument
 121 BINUNICODE     = b'X'   #   "     "       "  ; counted UTF-8 string argument
 122 APPEND         = b'a'   # append stack top to list below it
 123 BUILD          = b'b'   # call __setstate__ or __dict__.update()
 124 GLOBAL         = b'c'   # push self.find_class(modname, name); 2 string args
 125 DICT           = b'd'   # build a dict from stack items
 126 EMPTY_DICT     = b'}'   # push empty dict
 127 APPENDS        = b'e'   # extend list on stack by topmost stack slice
 128 GET            = b'g'   # push item from memo on stack; index is string arg
 129 BINGET         = b'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
 130 INST           = b'i'   # build & push class instance
 131 LONG_BINGET    = b'j'   # push item from memo on stack; index is 4-byte arg
 132 LIST           = b'l'   # build list from topmost stack items
 133 EMPTY_LIST     = b']'   # push empty list
 134 OBJ            = b'o'   # build & push class instance
 135 PUT            = b'p'   # store stack top in memo; index is string arg
 136 BINPUT         = b'q'   #   "     "    "   "   " ;   "    " 1-byte arg
 137 LONG_BINPUT    = b'r'   #   "     "    "   "   " ;   "    " 4-byte arg
 138 SETITEM        = b's'   # add key+value pair to dict
 139 TUPLE          = b't'   # build tuple from topmost stack items
 140 EMPTY_TUPLE    = b')'   # push empty tuple
 141 SETITEMS       = b'u'   # modify dict by adding topmost key+value pairs
 142 BINFLOAT       = b'G'   # push float; arg is 8-byte float encoding
 143 
 144 TRUE           = b'I01\n'  # not an opcode; see INT docs in pickletools.py
 145 FALSE          = b'I00\n'  # not an opcode; see INT docs in pickletools.py
 146 
 147 # Protocol 2
 148 
 149 PROTO          = b'\x80'  # identify pickle protocol
 150 NEWOBJ         = b'\x81'  # build object by applying cls.__new__ to argtuple
 151 EXT1           = b'\x82'  # push object from extension registry; 1-byte index
 152 EXT2           = b'\x83'  # ditto, but 2-byte index
 153 EXT4           = b'\x84'  # ditto, but 4-byte index
 154 TUPLE1         = b'\x85'  # build 1-tuple from stack top
 155 TUPLE2         = b'\x86'  # build 2-tuple from two topmost stack items
 156 TUPLE3         = b'\x87'  # build 3-tuple from three topmost stack items
 157 NEWTRUE        = b'\x88'  # push True
 158 NEWFALSE       = b'\x89'  # push False
 159 LONG1          = b'\x8a'  # push long from < 256 bytes
 160 LONG4          = b'\x8b'  # push really big long
 161 
 162 _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
 163 
 164 # Protocol 3 (Python 3.x)
 165 
 166 BINBYTES       = b'B'   # push bytes; counted binary string argument
 167 SHORT_BINBYTES = b'C'   #  "     "   ;    "      "       "      " < 256 bytes
 168 
 169 # Protocol 4
 170 SHORT_BINUNICODE = b'\x8c'  # push short string; UTF-8 length < 256 bytes
 171 BINUNICODE8      = b'\x8d'  # push very long string
 172 BINBYTES8        = b'\x8e'  # push very long bytes string
 173 EMPTY_SET        = b'\x8f'  # push empty set on the stack
 174 ADDITEMS         = b'\x90'  # modify set by adding topmost stack items
 175 FROZENSET        = b'\x91'  # build frozenset from topmost stack items
 176 NEWOBJ_EX        = b'\x92'  # like NEWOBJ but work with keyword only arguments
 177 STACK_GLOBAL     = b'\x93'  # same as GLOBAL but using names on the stacks
 178 MEMOIZE          = b'\x94'  # store top of the stack in memo
 179 FRAME            = b'\x95'  # indicate the beginning of a new frame
 180 
 181 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
 182 
 183 
 184 class _Framer:
 185 
 186     _FRAME_SIZE_TARGET = 64 * 1024
 187 
 188     def __init__(self, file_write):
 189         self.file_write = file_write
 190         self.current_frame = None
 191 
 192     def start_framing(self):
 193         self.current_frame = io.BytesIO()
 194 
 195     def end_framing(self):
 196         if self.current_frame and self.current_frame.tell() > 0:
 197             self.commit_frame(force=True)
 198             self.current_frame = None
 199 
 200     def commit_frame(self, force=False):
 201         if self.current_frame:
 202             f = self.current_frame
 203             if f.tell() >= self._FRAME_SIZE_TARGET or force:
 204                 with f.getbuffer() as data:
 205                     n = len(data)
 206                     write = self.file_write
 207                     write(FRAME)
 208                     write(pack("<Q", n))
 209                     write(data)
 210                 f.seek(0)
 211                 f.truncate()
 212 
 213     def write(self, data):
 214         if self.current_frame:
 215             return self.current_frame.write(data)
 216         else:
 217             return self.file_write(data)
 218 
 219 
 220 class _Unframer:
 221 
 222     def __init__(self, file_read, file_readline, file_tell=None):
 223         self.file_read = file_read
 224         self.file_readline = file_readline
 225         self.current_frame = None
 226 
 227     def read(self, n):
 228         if self.current_frame:
 229             data = self.current_frame.read(n)
 230             if not data and n != 0:
 231                 self.current_frame = None
 232                 return self.file_read(n)
 233             if len(data) < n:
 234                 raise UnpicklingError(
 235                     "pickle exhausted before end of frame")
 236             return data
 237         else:
 238             return self.file_read(n)
 239 
 240     def readline(self):
 241         if self.current_frame:
 242             data = self.current_frame.readline()
 243             if not data:
 244                 self.current_frame = None
 245                 return self.file_readline()
 246             if data[-1] != b'\n'[0]:
 247                 raise UnpicklingError(
 248                     "pickle exhausted before end of frame")
 249             return data
 250         else:
 251             return self.file_readline()
 252 
 253     def load_frame(self, frame_size):
 254         if self.current_frame and self.current_frame.read() != b'':
 255             raise UnpicklingError(
 256                 "beginning of a new frame before end of current frame")
 257         self.current_frame = io.BytesIO(self.file_read(frame_size))
 258 
 259 
 260 # Tools used for pickling.
 261 
 262 def _getattribute(obj, name):
 263     for subpath in name.split('.'):
 264         if subpath == '<locals>':
 265             raise AttributeError("Can't get local attribute {!r} on {!r}"
 266                                  .format(name, obj))
 267         try:
 268             parent = obj
 269             obj = getattr(obj, subpath)
 270         except AttributeError:
 271             raise AttributeError("Can't get attribute {!r} on {!r}"
 272                                  .format(name, obj))
 273     return obj, parent
 274 
 275 def whichmodule(obj, name):
 276     """Find the module an object belong to."""
 277     module_name = getattr(obj, '__module__', None)
 278     if module_name is not None:
 279         return module_name
 280     # Protect the iteration by using a list copy of sys.modules against dynamic
 281     # modules that trigger imports of other modules upon calls to getattr.
 282     for module_name, module in list(sys.modules.items()):
 283         if module_name == '__main__' or module is None:
 284             continue
 285         try:
 286             if _getattribute(module, name)[0] is obj:
 287                 return module_name
 288         except AttributeError:
 289             pass
 290     return '__main__'
 291 
 292 def encode_long(x):
 293     r"""Encode a long to a two's complement little-endian binary string.
 294     Note that 0 is a special case, returning an empty string, to save a
 295     byte in the LONG1 pickling context.
 296 
 297     >>> encode_long(0)
 298     b''
 299     >>> encode_long(255)
 300     b'\xff\x00'
 301     >>> encode_long(32767)
 302     b'\xff\x7f'
 303     >>> encode_long(-256)
 304     b'\x00\xff'
 305     >>> encode_long(-32768)
 306     b'\x00\x80'
 307     >>> encode_long(-128)
 308     b'\x80'
 309     >>> encode_long(127)
 310     b'\x7f'
 311     >>>
 312     """
 313     if x == 0:
 314         return b''
 315     nbytes = (x.bit_length() >> 3) + 1
 316     result = x.to_bytes(nbytes, byteorder='little', signed=True)
 317     if x < 0 and nbytes > 1:
 318         if result[-1] == 0xff and (result[-2] & 0x80) != 0:
 319             result = result[:-1]
 320     return result
 321 
 322 def decode_long(data):
 323     r"""Decode a long from a two's complement little-endian binary string.
 324 
 325     >>> decode_long(b'')
 326     0
 327     >>> decode_long(b"\xff\x00")
 328     255
 329     >>> decode_long(b"\xff\x7f")
 330     32767
 331     >>> decode_long(b"\x00\xff")
 332     -256
 333     >>> decode_long(b"\x00\x80")
 334     -32768
 335     >>> decode_long(b"\x80")
 336     -128
 337     >>> decode_long(b"\x7f")
 338     127
 339     """
 340     return int.from_bytes(data, byteorder='little', signed=True)
 341 
 342 
 343 # Pickling machinery
 344 
 345 class _Pickler:
 346 
 347     def __init__(self, file, protocol=None, *, fix_imports=True):
 348         """This takes a binary file for writing a pickle data stream.
 349 
 350         The optional *protocol* argument tells the pickler to use the
 351         given protocol; supported protocols are 0, 1, 2, 3 and 4.  The
 352         default protocol is 3; a backward-incompatible protocol designed
 353         for Python 3.
 354 
 355         Specifying a negative protocol version selects the highest
 356         protocol version supported.  The higher the protocol used, the
 357         more recent the version of Python needed to read the pickle
 358         produced.
 359 
 360         The *file* argument must have a write() method that accepts a
 361         single bytes argument. It can thus be a file object opened for
 362         binary writing, an io.BytesIO instance, or any other custom
 363         object that meets this interface.
 364 
 365         If *fix_imports* is True and *protocol* is less than 3, pickle
 366         will try to map the new Python 3 names to the old module names
 367         used in Python 2, so that the pickle data stream is readable
 368         with Python 2.
 369         """
 370         if protocol is None:
 371             protocol = DEFAULT_PROTOCOL
 372         if protocol < 0:
 373             protocol = HIGHEST_PROTOCOL
 374         elif not 0 <= protocol <= HIGHEST_PROTOCOL:
 375             raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
 376         try:
 377             self._file_write = file.write
 378         except AttributeError:
 379             raise TypeError("file must have a 'write' attribute")
 380         self.framer = _Framer(self._file_write)
 381         self.write = self.framer.write
 382         self.memo = {}
 383         self.proto = int(protocol)
 384         self.bin = protocol >= 1
 385         self.fast = 0
 386         self.fix_imports = fix_imports and protocol < 3
 387 
 388     def clear_memo(self):
 389         """Clears the pickler's "memo".
 390 
 391         The memo is the data structure that remembers which objects the
 392         pickler has already seen, so that shared or recursive objects
 393         are pickled by reference and not by value.  This method is
 394         useful when re-using picklers.
 395         """
 396         self.memo.clear()
 397 
 398     def dump(self, obj):
 399         """Write a pickled representation of obj to the open file."""
 400         # Check whether Pickler was initialized correctly. This is
 401         # only needed to mimic the behavior of _pickle.Pickler.dump().
 402         if not hasattr(self, "_file_write"):
 403             raise PicklingError("Pickler.__init__() was not called by "
 404                                 "%s.__init__()" % (self.__class__.__name__,))
 405         if self.proto >= 2:
 406             self.write(PROTO + pack("<B", self.proto))
 407         if self.proto >= 4:
 408             self.framer.start_framing()
 409         self.save(obj)
 410         self.write(STOP)
 411         self.framer.end_framing()
 412 
 413     def memoize(self, obj):
 414         """Store an object in the memo."""
 415 
 416         # The Pickler memo is a dictionary mapping object ids to 2-tuples
 417         # that contain the Unpickler memo key and the object being memoized.
 418         # The memo key is written to the pickle and will become
 419         # the key in the Unpickler's memo.  The object is stored in the
 420         # Pickler memo so that transient objects are kept alive during
 421         # pickling.
 422 
 423         # The use of the Unpickler memo length as the memo key is just a
 424         # convention.  The only requirement is that the memo values be unique.
 425         # But there appears no advantage to any other scheme, and this
 426         # scheme allows the Unpickler memo to be implemented as a plain (but
 427         # growable) array, indexed by memo key.
 428         if self.fast:
 429             return
 430         assert id(obj) not in self.memo
 431         idx = len(self.memo)
 432         self.write(self.put(idx))
 433         self.memo[id(obj)] = idx, obj
 434 
 435     # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
 436     def put(self, idx):
 437         if self.proto >= 4:
 438             return MEMOIZE
 439         elif self.bin:
 440             if idx < 256:
 441                 return BINPUT + pack("<B", idx)
 442             else:
 443                 return LONG_BINPUT + pack("<I", idx)
 444         else:
 445             return PUT + repr(idx).encode("ascii") + b'\n'
 446 
 447     # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
 448     def get(self, i):
 449         if self.bin:
 450             if i < 256:
 451                 return BINGET + pack("<B", i)
 452             else:
 453                 return LONG_BINGET + pack("<I", i)
 454 
 455         return GET + repr(i).encode("ascii") + b'\n'
 456 
 457     def save(self, obj, save_persistent_id=True):
 458         self.framer.commit_frame()
 459 
 460         # Check for persistent id (defined by a subclass)
 461         pid = self.persistent_id(obj)
 462         if pid is not None and save_persistent_id:
 463             self.save_pers(pid)
 464             return
 465 
 466         # Check the memo
 467         x = self.memo.get(id(obj))
 468         if x is not None:
 469             self.write(self.get(x[0]))
 470             return
 471 
 472         # Check the type dispatch table
 473         t = type(obj)
 474         f = self.dispatch.get(t)
 475         if f is not None:
 476             f(self, obj) # Call unbound method with explicit self
 477             return
 478 
 479         # Check private dispatch table if any, or else copyreg.dispatch_table
 480         reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
 481         if reduce is not None:
 482             rv = reduce(obj)
 483         else:
 484             # Check for a class with a custom metaclass; treat as regular class
 485             try:
 486                 issc = issubclass(t, type)
 487             except TypeError: # t is not a class (old Boost; see SF #502085)
 488                 issc = False
 489             if issc:
 490                 self.save_global(obj)
 491                 return
 492 
 493             # Check for a __reduce_ex__ method, fall back to __reduce__
 494             reduce = getattr(obj, "__reduce_ex__", None)
 495             if reduce is not None:
 496                 rv = reduce(self.proto)
 497             else:
 498                 reduce = getattr(obj, "__reduce__", None)
 499                 if reduce is not None:
 500                     rv = reduce()
 501                 else:
 502                     raise PicklingError("Can't pickle %r object: %r" %
 503                                         (t.__name__, obj))
 504 
 505         # Check for string returned by reduce(), meaning "save as global"
 506         if isinstance(rv, str):
 507             self.save_global(obj, rv)
 508             return
 509 
 510         # Assert that reduce() returned a tuple
 511         if not isinstance(rv, tuple):
 512             raise PicklingError("%s must return string or tuple" % reduce)
 513 
 514         # Assert that it returned an appropriately sized tuple
 515         l = len(rv)
 516         if not (2 <= l <= 5):
 517             raise PicklingError("Tuple returned by %s must have "
 518                                 "two to five elements" % reduce)
 519 
 520         # Save the reduce() output and finally memoize the object
 521         self.save_reduce(obj=obj, *rv)
 522 
 523     def persistent_id(self, obj):
 524         # This exists so a subclass can override it
 525         return None
 526 
 527     def save_pers(self, pid):
 528         # Save a persistent id reference
 529         if self.bin:
 530             self.save(pid, save_persistent_id=False)
 531             self.write(BINPERSID)
 532         else:
 533             try:
 534                 self.write(PERSID + str(pid).encode("ascii") + b'\n')
 535             except UnicodeEncodeError:
 536                 raise PicklingError(
 537                     "persistent IDs in protocol 0 must be ASCII strings")
 538 
 539     def save_reduce(self, func, args, state=None, listitems=None,
 540                     dictitems=None, obj=None):
 541         # This API is called by some subclasses
 542 
 543         if not isinstance(args, tuple):
 544             raise PicklingError("args from save_reduce() must be a tuple")
 545         if not callable(func):
 546             raise PicklingError("func from save_reduce() must be callable")
 547 
 548         save = self.save
 549         write = self.write
 550 
 551         func_name = getattr(func, "__name__", "")
 552         if self.proto >= 2 and func_name == "__newobj_ex__":
 553             cls, args, kwargs = args
 554             if not hasattr(cls, "__new__"):
 555                 raise PicklingError("args[0] from {} args has no __new__"
 556                                     .format(func_name))
 557             if obj is not None and cls is not obj.__class__:
 558                 raise PicklingError("args[0] from {} args has the wrong class"
 559                                     .format(func_name))
 560             if self.proto >= 4:
 561                 save(cls)
 562                 save(args)
 563                 save(kwargs)
 564                 write(NEWOBJ_EX)
 565             else:
 566                 func = partial(cls.__new__, cls, *args, **kwargs)
 567                 save(func)
 568                 save(())
 569                 write(REDUCE)
 570         elif self.proto >= 2 and func_name == "__newobj__":
 571             # A __reduce__ implementation can direct protocol 2 or newer to
 572             # use the more efficient NEWOBJ opcode, while still
 573             # allowing protocol 0 and 1 to work normally.  For this to
 574             # work, the function returned by __reduce__ should be
 575             # called __newobj__, and its first argument should be a
 576             # class.  The implementation for __newobj__
 577             # should be as follows, although pickle has no way to
 578             # verify this:
 579             #
 580             # def __newobj__(cls, *args):
 581             #     return cls.__new__(cls, *args)
 582             #
 583             # Protocols 0 and 1 will pickle a reference to __newobj__,
 584             # while protocol 2 (and above) will pickle a reference to
 585             # cls, the remaining args tuple, and the NEWOBJ code,
 586             # which calls cls.__new__(cls, *args) at unpickling time
 587             # (see load_newobj below).  If __reduce__ returns a
 588             # three-tuple, the state from the third tuple item will be
 589             # pickled regardless of the protocol, calling __setstate__
 590             # at unpickling time (see load_build below).
 591             #
 592             # Note that no standard __newobj__ implementation exists;
 593             # you have to provide your own.  This is to enforce
 594             # compatibility with Python 2.2 (pickles written using
 595             # protocol 0 or 1 in Python 2.3 should be unpicklable by
 596             # Python 2.2).
 597             cls = args[0]
 598             if not hasattr(cls, "__new__"):
 599                 raise PicklingError(
 600                     "args[0] from __newobj__ args has no __new__")
 601             if obj is not None and cls is not obj.__class__:
 602                 raise PicklingError(
 603                     "args[0] from __newobj__ args has the wrong class")
 604             args = args[1:]
 605             save(cls)
 606             save(args)
 607             write(NEWOBJ)
 608         else:
 609             save(func)
 610             save(args)
 611             write(REDUCE)
 612 
 613         if obj is not None:
 614             # If the object is already in the memo, this means it is
 615             # recursive. In this case, throw away everything we put on the
 616             # stack, and fetch the object back from the memo.
 617             if id(obj) in self.memo:
 618                 write(POP + self.get(self.memo[id(obj)][0]))
 619             else:
 620                 self.memoize(obj)
 621 
 622         # More new special cases (that work with older protocols as
 623         # well): when __reduce__ returns a tuple with 4 or 5 items,
 624         # the 4th and 5th item should be iterators that provide list
 625         # items and dict items (as (key, value) tuples), or None.
 626 
 627         if listitems is not None:
 628             self._batch_appends(listitems)
 629 
 630         if dictitems is not None:
 631             self._batch_setitems(dictitems)
 632 
 633         if state is not None:
 634             save(state)
 635             write(BUILD)
 636 
 637     # Methods below this point are dispatched through the dispatch table
 638 
 639     dispatch = {}
 640 
 641     def save_none(self, obj):
 642         self.write(NONE)
 643     dispatch[type(None)] = save_none
 644 
 645     def save_bool(self, obj):
 646         if self.proto >= 2:
 647             self.write(NEWTRUE if obj else NEWFALSE)
 648         else:
 649             self.write(TRUE if obj else FALSE)
 650     dispatch[bool] = save_bool
 651 
 652     def save_long(self, obj):
 653         if self.bin:
 654             # If the int is small enough to fit in a signed 4-byte 2's-comp
 655             # format, we can store it more efficiently than the general
 656             # case.
 657             # First one- and two-byte unsigned ints:
 658             if obj >= 0:
 659                 if obj <= 0xff:
 660                     self.write(BININT1 + pack("<B", obj))
 661                     return
 662                 if obj <= 0xffff:
 663                     self.write(BININT2 + pack("<H", obj))
 664                     return
 665             # Next check for 4-byte signed ints:
 666             if -0x80000000 <= obj <= 0x7fffffff:
 667                 self.write(BININT + pack("<i", obj))
 668                 return
 669         if self.proto >= 2:
 670             encoded = encode_long(obj)
 671             n = len(encoded)
 672             if n < 256:
 673                 self.write(LONG1 + pack("<B", n) + encoded)
 674             else:
 675                 self.write(LONG4 + pack("<i", n) + encoded)
 676             return
 677         self.write(LONG + repr(obj).encode("ascii") + b'L\n')
 678     dispatch[int] = save_long
 679 
 680     def save_float(self, obj):
 681         if self.bin:
 682             self.write(BINFLOAT + pack('>d', obj))
 683         else:
 684             self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
 685     dispatch[float] = save_float
 686 
 687     def save_bytes(self, obj):
 688         if self.proto < 3:
 689             if not obj: # bytes object is empty
 690                 self.save_reduce(bytes, (), obj=obj)
 691             else:
 692                 self.save_reduce(codecs.encode,
 693                                  (str(obj, 'latin1'), 'latin1'), obj=obj)
 694             return
 695         n = len(obj)
 696         if n <= 0xff:
 697             self.write(SHORT_BINBYTES + pack("<B", n) + obj)
 698         elif n > 0xffffffff and self.proto >= 4:
 699             self.write(BINBYTES8 + pack("<Q", n) + obj)
 700         else:
 701             self.write(BINBYTES + pack("<I", n) + obj)
 702         self.memoize(obj)
 703     dispatch[bytes] = save_bytes
 704 
 705     def save_str(self, obj):
 706         if self.bin:
 707             encoded = obj.encode('utf-8', 'surrogatepass')
 708             n = len(encoded)
 709             if n <= 0xff and self.proto >= 4:
 710                 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
 711             elif n > 0xffffffff and self.proto >= 4:
 712                 self.write(BINUNICODE8 + pack("<Q", n) + encoded)
 713             else:
 714                 self.write(BINUNICODE + pack("<I", n) + encoded)
 715         else:
 716             obj = obj.replace("\\", "\\u005c")
 717             obj = obj.replace("\n", "\\u000a")
 718             self.write(UNICODE + obj.encode('raw-unicode-escape') +
 719                        b'\n')
 720         self.memoize(obj)
 721     dispatch[str] = save_str
 722 
 723     def save_tuple(self, obj):
 724         if not obj: # tuple is empty
 725             if self.bin:
 726                 self.write(EMPTY_TUPLE)
 727             else:
 728                 self.write(MARK + TUPLE)
 729             return
 730 
 731         n = len(obj)
 732         save = self.save
 733         memo = self.memo
 734         if n <= 3 and self.proto >= 2:
 735             for element in obj:
 736                 save(element)
 737             # Subtle.  Same as in the big comment below.
 738             if id(obj) in memo:
 739                 get = self.get(memo[id(obj)][0])
 740                 self.write(POP * n + get)
 741             else:
 742                 self.write(_tuplesize2code[n])
 743                 self.memoize(obj)
 744             return
 745 
 746         # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
 747         # has more than 3 elements.
 748         write = self.write
 749         write(MARK)
 750         for element in obj:
 751             save(element)
 752 
 753         if id(obj) in memo:
 754             # Subtle.  d was not in memo when we entered save_tuple(), so
 755             # the process of saving the tuple's elements must have saved
 756             # the tuple itself:  the tuple is recursive.  The proper action
 757             # now is to throw away everything we put on the stack, and
 758             # simply GET the tuple (it's already constructed).  This check
 759             # could have been done in the "for element" loop instead, but
 760             # recursive tuples are a rare thing.
 761             get = self.get(memo[id(obj)][0])
 762             if self.bin:
 763                 write(POP_MARK + get)
 764             else:   # proto 0 -- POP_MARK not available
 765                 write(POP * (n+1) + get)
 766             return
 767 
 768         # No recursion.
 769         write(TUPLE)
 770         self.memoize(obj)
 771 
 772     dispatch[tuple] = save_tuple
 773 
 774     def save_list(self, obj):
 775         if self.bin:
 776             self.write(EMPTY_LIST)
 777         else:   # proto 0 -- can't use EMPTY_LIST
 778             self.write(MARK + LIST)
 779 
 780         self.memoize(obj)
 781         self._batch_appends(obj)
 782 
 783     dispatch[list] = save_list
 784 
 785     _BATCHSIZE = 1000
 786 
 787     def _batch_appends(self, items):
 788         # Helper to batch up APPENDS sequences
 789         save = self.save
 790         write = self.write
 791 
 792         if not self.bin:
 793             for x in items:
 794                 save(x)
 795                 write(APPEND)
 796             return
 797 
 798         it = iter(items)
 799         while True:
 800             tmp = list(islice(it, self._BATCHSIZE))
 801             n = len(tmp)
 802             if n > 1:
 803                 write(MARK)
 804                 for x in tmp:
 805                     save(x)
 806                 write(APPENDS)
 807             elif n:
 808                 save(tmp[0])
 809                 write(APPEND)
 810             # else tmp is empty, and we're done
 811             if n < self._BATCHSIZE:
 812                 return
 813 
 814     def save_dict(self, obj):
 815         if self.bin:
 816             self.write(EMPTY_DICT)
 817         else:   # proto 0 -- can't use EMPTY_DICT
 818             self.write(MARK + DICT)
 819 
 820         self.memoize(obj)
 821         self._batch_setitems(obj.items())
 822 
 823     dispatch[dict] = save_dict
 824     if PyStringMap is not None:
 825         dispatch[PyStringMap] = save_dict
 826 
 827     def _batch_setitems(self, items):
 828         # Helper to batch up SETITEMS sequences; proto >= 1 only
 829         save = self.save
 830         write = self.write
 831 
 832         if not self.bin:
 833             for k, v in items:
 834                 save(k)
 835                 save(v)
 836                 write(SETITEM)
 837             return
 838 
 839         it = iter(items)
 840         while True:
 841             tmp = list(islice(it, self._BATCHSIZE))
 842             n = len(tmp)
 843             if n > 1:
 844                 write(MARK)
 845                 for k, v in tmp:
 846                     save(k)
 847                     save(v)
 848                 write(SETITEMS)
 849             elif n:
 850                 k, v = tmp[0]
 851                 save(k)
 852                 save(v)
 853                 write(SETITEM)
 854             # else tmp is empty, and we're done
 855             if n < self._BATCHSIZE:
 856                 return
 857 
 858     def save_set(self, obj):
 859         save = self.save
 860         write = self.write
 861 
 862         if self.proto < 4:
 863             self.save_reduce(set, (list(obj),), obj=obj)
 864             return
 865 
 866         write(EMPTY_SET)
 867         self.memoize(obj)
 868 
 869         it = iter(obj)
 870         while True:
 871             batch = list(islice(it, self._BATCHSIZE))
 872             n = len(batch)
 873             if n > 0:
 874                 write(MARK)
 875                 for item in batch:
 876                     save(item)
 877                 write(ADDITEMS)
 878             if n < self._BATCHSIZE:
 879                 return
 880     dispatch[set] = save_set
 881 
 882     def save_frozenset(self, obj):
 883         save = self.save
 884         write = self.write
 885 
 886         if self.proto < 4:
 887             self.save_reduce(frozenset, (list(obj),), obj=obj)
 888             return
 889 
 890         write(MARK)
 891         for item in obj:
 892             save(item)
 893 
 894         if id(obj) in self.memo:
 895             # If the object is already in the memo, this means it is
 896             # recursive. In this case, throw away everything we put on the
 897             # stack, and fetch the object back from the memo.
 898             write(POP_MARK + self.get(self.memo[id(obj)][0]))
 899             return
 900 
 901         write(FROZENSET)
 902         self.memoize(obj)
 903     dispatch[frozenset] = save_frozenset
 904 
 905     def save_global(self, obj, name=None):
 906         write = self.write
 907         memo = self.memo
 908 
 909         if name is None:
 910             name = getattr(obj, '__qualname__', None)
 911         if name is None:
 912             name = obj.__name__
 913 
 914         module_name = whichmodule(obj, name)
 915         try:
 916             __import__(module_name, level=0)
 917             module = sys.modules[module_name]
 918             obj2, parent = _getattribute(module, name)
 919         except (ImportError, KeyError, AttributeError):
 920             raise PicklingError(
 921                 "Can't pickle %r: it's not found as %s.%s" %
 922                 (obj, module_name, name))
 923         else:
 924             if obj2 is not obj:
 925                 raise PicklingError(
 926                     "Can't pickle %r: it's not the same object as %s.%s" %
 927                     (obj, module_name, name))
 928 
 929         if self.proto >= 2:
 930             code = _extension_registry.get((module_name, name))
 931             if code:
 932                 assert code > 0
 933                 if code <= 0xff:
 934                     write(EXT1 + pack("<B", code))
 935                 elif code <= 0xffff:
 936                     write(EXT2 + pack("<H", code))
 937                 else:
 938                     write(EXT4 + pack("<i", code))
 939                 return
 940         lastname = name.rpartition('.')[2]
 941         if parent is module:
 942             name = lastname
 943         # Non-ASCII identifiers are supported only with protocols >= 3.
 944         if self.proto >= 4:
 945             self.save(module_name)
 946             self.save(name)
 947             write(STACK_GLOBAL)
 948         elif parent is not module:
 949             self.save_reduce(getattr, (parent, lastname))
 950         elif self.proto >= 3:
 951             write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
 952                   bytes(name, "utf-8") + b'\n')
 953         else:
 954             if self.fix_imports:
 955                 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
 956                 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
 957                 if (module_name, name) in r_name_mapping:
 958                     module_name, name = r_name_mapping[(module_name, name)]
 959                 elif module_name in r_import_mapping:
 960                     module_name = r_import_mapping[module_name]
 961             try:
 962                 write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
 963                       bytes(name, "ascii") + b'\n')
 964             except UnicodeEncodeError:
 965                 raise PicklingError(
 966                     "can't pickle global identifier '%s.%s' using "
 967                     "pickle protocol %i" % (module, name, self.proto))
 968 
 969         self.memoize(obj)
 970 
 971     def save_type(self, obj):
 972         if obj is type(None):
 973             return self.save_reduce(type, (None,), obj=obj)
 974         elif obj is type(NotImplemented):
 975             return self.save_reduce(type, (NotImplemented,), obj=obj)
 976         elif obj is type(...):
 977             return self.save_reduce(type, (...,), obj=obj)
 978         return self.save_global(obj)
 979 
 980     dispatch[FunctionType] = save_global
 981     dispatch[type] = save_type
 982 
 983 
 984 # Unpickling machinery
 985 
 986 class _Unpickler:
 987 
 988     def __init__(self, file, *, fix_imports=True,
 989                  encoding="ASCII", errors="strict"):
 990         """This takes a binary file for reading a pickle data stream.
 991 
 992         The protocol version of the pickle is detected automatically, so
 993         no proto argument is needed.
 994 
 995         The argument *file* must have two methods, a read() method that
 996         takes an integer argument, and a readline() method that requires
 997         no arguments.  Both methods should return bytes.  Thus *file*
 998         can be a binary file object opened for reading, an io.BytesIO
 999         object, or any other custom object that meets this interface.
1000 
1001         The file-like object must have two methods, a read() method
1002         that takes an integer argument, and a readline() method that
1003         requires no arguments.  Both methods should return bytes.
1004         Thus file-like object can be a binary file object opened for
1005         reading, a BytesIO object, or any other custom object that
1006         meets this interface.
1007 
1008         Optional keyword arguments are *fix_imports*, *encoding* and
1009         *errors*, which are used to control compatibility support for
1010         pickle stream generated by Python 2.  If *fix_imports* is True,
1011         pickle will try to map the old Python 2 names to the new names
1012         used in Python 3.  The *encoding* and *errors* tell pickle how
1013         to decode 8-bit string instances pickled by Python 2; these
1014         default to 'ASCII' and 'strict', respectively. *encoding* can be
1015         'bytes' to read theses 8-bit string instances as bytes objects.
1016         """
1017         self._file_readline = file.readline
1018         self._file_read = file.read
1019         self.memo = {}
1020         self.encoding = encoding
1021         self.errors = errors
1022         self.proto = 0
1023         self.fix_imports = fix_imports
1024 
1025     def load(self):
1026         """Read a pickled object representation from the open file.
1027 
1028         Return the reconstituted object hierarchy specified in the file.
1029         """
1030         # Check whether Unpickler was initialized correctly. This is
1031         # only needed to mimic the behavior of _pickle.Unpickler.dump().
1032         if not hasattr(self, "_file_read"):
1033             raise UnpicklingError("Unpickler.__init__() was not called by "
1034                                   "%s.__init__()" % (self.__class__.__name__,))
1035         self._unframer = _Unframer(self._file_read, self._file_readline)
1036         self.read = self._unframer.read
1037         self.readline = self._unframer.readline
1038         self.metastack = []
1039         self.stack = []
1040         self.append = self.stack.append
1041         self.proto = 0
1042         read = self.read
1043         dispatch = self.dispatch
1044         try:
1045             while True:
1046                 key = read(1)
1047                 if not key:
1048                     raise EOFError
1049                 assert isinstance(key, bytes_types)
1050                 dispatch[key[0]](self)
1051         except _Stop as stopinst:
1052             return stopinst.value
1053 
1054     # Return a list of items pushed in the stack after last MARK instruction.
1055     def pop_mark(self):
1056         items = self.stack
1057         self.stack = self.metastack.pop()
1058         self.append = self.stack.append
1059         return items
1060 
1061     def persistent_load(self, pid):
1062         raise UnpicklingError("unsupported persistent id encountered")
1063 
1064     dispatch = {}
1065 
1066     def load_proto(self):
1067         proto = self.read(1)[0]
1068         if not 0 <= proto <= HIGHEST_PROTOCOL:
1069             raise ValueError("unsupported pickle protocol: %d" % proto)
1070         self.proto = proto
1071     dispatch[PROTO[0]] = load_proto
1072 
1073     def load_frame(self):
1074         frame_size, = unpack('<Q', self.read(8))
1075         if frame_size > sys.maxsize:
1076             raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1077         self._unframer.load_frame(frame_size)
1078     dispatch[FRAME[0]] = load_frame
1079 
1080     def load_persid(self):
1081         try:
1082             pid = self.readline()[:-1].decode("ascii")
1083         except UnicodeDecodeError:
1084             raise UnpicklingError(
1085                 "persistent IDs in protocol 0 must be ASCII strings")
1086         self.append(self.persistent_load(pid))
1087     dispatch[PERSID[0]] = load_persid
1088 
1089     def load_binpersid(self):
1090         pid = self.stack.pop()
1091         self.append(self.persistent_load(pid))
1092     dispatch[BINPERSID[0]] = load_binpersid
1093 
1094     def load_none(self):
1095         self.append(None)
1096     dispatch[NONE[0]] = load_none
1097 
1098     def load_false(self):
1099         self.append(False)
1100     dispatch[NEWFALSE[0]] = load_false
1101 
1102     def load_true(self):
1103         self.append(True)
1104     dispatch[NEWTRUE[0]] = load_true
1105 
1106     def load_int(self):
1107         data = self.readline()
1108         if data == FALSE[1:]:
1109             val = False
1110         elif data == TRUE[1:]:
1111             val = True
1112         else:
1113             val = int(data, 0)
1114         self.append(val)
1115     dispatch[INT[0]] = load_int
1116 
1117     def load_binint(self):
1118         self.append(unpack('<i', self.read(4))[0])
1119     dispatch[BININT[0]] = load_binint
1120 
1121     def load_binint1(self):
1122         self.append(self.read(1)[0])
1123     dispatch[BININT1[0]] = load_binint1
1124 
1125     def load_binint2(self):
1126         self.append(unpack('<H', self.read(2))[0])
1127     dispatch[BININT2[0]] = load_binint2
1128 
1129     def load_long(self):
1130         val = self.readline()[:-1]
1131         if val and val[-1] == b'L'[0]:
1132             val = val[:-1]
1133         self.append(int(val, 0))
1134     dispatch[LONG[0]] = load_long
1135 
1136     def load_long1(self):
1137         n = self.read(1)[0]
1138         data = self.read(n)
1139         self.append(decode_long(data))
1140     dispatch[LONG1[0]] = load_long1
1141 
1142     def load_long4(self):
1143         n, = unpack('<i', self.read(4))
1144         if n < 0:
1145             # Corrupt or hostile pickle -- we never write one like this
1146             raise UnpicklingError("LONG pickle has negative byte count")
1147         data = self.read(n)
1148         self.append(decode_long(data))
1149     dispatch[LONG4[0]] = load_long4
1150 
1151     def load_float(self):
1152         self.append(float(self.readline()[:-1]))
1153     dispatch[FLOAT[0]] = load_float
1154 
1155     def load_binfloat(self):
1156         self.append(unpack('>d', self.read(8))[0])
1157     dispatch[BINFLOAT[0]] = load_binfloat
1158 
1159     def _decode_string(self, value):
1160         # Used to allow strings from Python 2 to be decoded either as
1161         # bytes or Unicode strings.  This should be used only with the
1162         # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1163         if self.encoding == "bytes":
1164             return value
1165         else:
1166             return value.decode(self.encoding, self.errors)
1167 
1168     def load_string(self):
1169         data = self.readline()[:-1]
1170         # Strip outermost quotes
1171         if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1172             data = data[1:-1]
1173         else:
1174             raise UnpicklingError("the STRING opcode argument must be quoted")
1175         self.append(self._decode_string(codecs.escape_decode(data)[0]))
1176     dispatch[STRING[0]] = load_string
1177 
1178     def load_binstring(self):
1179         # Deprecated BINSTRING uses signed 32-bit length
1180         len, = unpack('<i', self.read(4))
1181         if len < 0:
1182             raise UnpicklingError("BINSTRING pickle has negative byte count")
1183         data = self.read(len)
1184         self.append(self._decode_string(data))
1185     dispatch[BINSTRING[0]] = load_binstring
1186 
1187     def load_binbytes(self):
1188         len, = unpack('<I', self.read(4))
1189         if len > maxsize:
1190             raise UnpicklingError("BINBYTES exceeds system's maximum size "
1191                                   "of %d bytes" % maxsize)
1192         self.append(self.read(len))
1193     dispatch[BINBYTES[0]] = load_binbytes
1194 
1195     def load_unicode(self):
1196         self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1197     dispatch[UNICODE[0]] = load_unicode
1198 
1199     def load_binunicode(self):
1200         len, = unpack('<I', self.read(4))
1201         if len > maxsize:
1202             raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1203                                   "of %d bytes" % maxsize)
1204         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1205     dispatch[BINUNICODE[0]] = load_binunicode
1206 
1207     def load_binunicode8(self):
1208         len, = unpack('<Q', self.read(8))
1209         if len > maxsize:
1210             raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1211                                   "of %d bytes" % maxsize)
1212         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1213     dispatch[BINUNICODE8[0]] = load_binunicode8
1214 
1215     def load_binbytes8(self):
1216         len, = unpack('<Q', self.read(8))
1217         if len > maxsize:
1218             raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1219                                   "of %d bytes" % maxsize)
1220         self.append(self.read(len))
1221     dispatch[BINBYTES8[0]] = load_binbytes8
1222 
1223     def load_short_binstring(self):
1224         len = self.read(1)[0]
1225         data = self.read(len)
1226         self.append(self._decode_string(data))
1227     dispatch[SHORT_BINSTRING[0]] = load_short_binstring
1228 
1229     def load_short_binbytes(self):
1230         len = self.read(1)[0]
1231         self.append(self.read(len))
1232     dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1233 
1234     def load_short_binunicode(self):
1235         len = self.read(1)[0]
1236         self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1237     dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1238 
1239     def load_tuple(self):
1240         items = self.pop_mark()
1241         self.append(tuple(items))
1242     dispatch[TUPLE[0]] = load_tuple
1243 
1244     def load_empty_tuple(self):
1245         self.append(())
1246     dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
1247 
1248     def load_tuple1(self):
1249         self.stack[-1] = (self.stack[-1],)
1250     dispatch[TUPLE1[0]] = load_tuple1
1251 
1252     def load_tuple2(self):
1253         self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1254     dispatch[TUPLE2[0]] = load_tuple2
1255 
1256     def load_tuple3(self):
1257         self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1258     dispatch[TUPLE3[0]] = load_tuple3
1259 
1260     def load_empty_list(self):
1261         self.append([])
1262     dispatch[EMPTY_LIST[0]] = load_empty_list
1263 
1264     def load_empty_dictionary(self):
1265         self.append({})
1266     dispatch[EMPTY_DICT[0]] = load_empty_dictionary
1267 
1268     def load_empty_set(self):
1269         self.append(set())
1270     dispatch[EMPTY_SET[0]] = load_empty_set
1271 
1272     def load_frozenset(self):
1273         items = self.pop_mark()
1274         self.append(frozenset(items))
1275     dispatch[FROZENSET[0]] = load_frozenset
1276 
1277     def load_list(self):
1278         items = self.pop_mark()
1279         self.append(items)
1280     dispatch[LIST[0]] = load_list
1281 
1282     def load_dict(self):
1283         items = self.pop_mark()
1284         d = {items[i]: items[i+1]
1285              for i in range(0, len(items), 2)}
1286         self.append(d)
1287     dispatch[DICT[0]] = load_dict
1288 
1289     # INST and OBJ differ only in how they get a class object.  It's not
1290     # only sensible to do the rest in a common routine, the two routines
1291     # previously diverged and grew different bugs.
1292     # klass is the class to instantiate, and k points to the topmost mark
1293     # object, following which are the arguments for klass.__init__.
1294     def _instantiate(self, klass, args):
1295         if (args or not isinstance(klass, type) or
1296             hasattr(klass, "__getinitargs__")):
1297             try:
1298                 value = klass(*args)
1299             except TypeError as err:
1300                 raise TypeError("in constructor for %s: %s" %
1301                                 (klass.__name__, str(err)), sys.exc_info()[2])
1302         else:
1303             value = klass.__new__(klass)
1304         self.append(value)
1305 
1306     def load_inst(self):
1307         module = self.readline()[:-1].decode("ascii")
1308         name = self.readline()[:-1].decode("ascii")
1309         klass = self.find_class(module, name)
1310         self._instantiate(klass, self.pop_mark())
1311     dispatch[INST[0]] = load_inst
1312 
1313     def load_obj(self):
1314         # Stack is ... markobject classobject arg1 arg2 ...
1315         args = self.pop_mark()
1316         cls = args.pop(0)
1317         self._instantiate(cls, args)
1318     dispatch[OBJ[0]] = load_obj
1319 
1320     def load_newobj(self):
1321         args = self.stack.pop()
1322         cls = self.stack.pop()
1323         obj = cls.__new__(cls, *args)
1324         self.append(obj)
1325     dispatch[NEWOBJ[0]] = load_newobj
1326 
1327     def load_newobj_ex(self):
1328         kwargs = self.stack.pop()
1329         args = self.stack.pop()
1330         cls = self.stack.pop()
1331         obj = cls.__new__(cls, *args, **kwargs)
1332         self.append(obj)
1333     dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1334 
1335     def load_global(self):
1336         module = self.readline()[:-1].decode("utf-8")
1337         name = self.readline()[:-1].decode("utf-8")
1338         klass = self.find_class(module, name)
1339         self.append(klass)
1340     dispatch[GLOBAL[0]] = load_global
1341 
1342     def load_stack_global(self):
1343         name = self.stack.pop()
1344         module = self.stack.pop()
1345         if type(name) is not str or type(module) is not str:
1346             raise UnpicklingError("STACK_GLOBAL requires str")
1347         self.append(self.find_class(module, name))
1348     dispatch[STACK_GLOBAL[0]] = load_stack_global
1349 
1350     def load_ext1(self):
1351         code = self.read(1)[0]
1352         self.get_extension(code)
1353     dispatch[EXT1[0]] = load_ext1
1354 
1355     def load_ext2(self):
1356         code, = unpack('<H', self.read(2))
1357         self.get_extension(code)
1358     dispatch[EXT2[0]] = load_ext2
1359 
1360     def load_ext4(self):
1361         code, = unpack('<i', self.read(4))
1362         self.get_extension(code)
1363     dispatch[EXT4[0]] = load_ext4
1364 
1365     def get_extension(self, code):
1366         nil = []
1367         obj = _extension_cache.get(code, nil)
1368         if obj is not nil:
1369             self.append(obj)
1370             return
1371         key = _inverted_registry.get(code)
1372         if not key:
1373             if code <= 0: # note that 0 is forbidden
1374                 # Corrupt or hostile pickle.
1375                 raise UnpicklingError("EXT specifies code <= 0")
1376             raise ValueError("unregistered extension code %d" % code)
1377         obj = self.find_class(*key)
1378         _extension_cache[code] = obj
1379         self.append(obj)
1380 
1381     def find_class(self, module, name):
1382         # Subclasses may override this.
1383         if self.proto < 3 and self.fix_imports:
1384             if (module, name) in _compat_pickle.NAME_MAPPING:
1385                 module, name = _compat_pickle.NAME_MAPPING[(module, name)]
1386             elif module in _compat_pickle.IMPORT_MAPPING:
1387                 module = _compat_pickle.IMPORT_MAPPING[module]
1388         __import__(module, level=0)
1389         if self.proto >= 4:
1390             return _getattribute(sys.modules[module], name)[0]
1391         else:
1392             return getattr(sys.modules[module], name)
1393 
1394     def load_reduce(self):
1395         stack = self.stack
1396         args = stack.pop()
1397         func = stack[-1]
1398         stack[-1] = func(*args)
1399     dispatch[REDUCE[0]] = load_reduce
1400 
1401     def load_pop(self):
1402         if self.stack:
1403             del self.stack[-1]
1404         else:
1405             self.pop_mark()
1406     dispatch[POP[0]] = load_pop
1407 
1408     def load_pop_mark(self):
1409         self.pop_mark()
1410     dispatch[POP_MARK[0]] = load_pop_mark
1411 
1412     def load_dup(self):
1413         self.append(self.stack[-1])
1414     dispatch[DUP[0]] = load_dup
1415 
1416     def load_get(self):
1417         i = int(self.readline()[:-1])
1418         self.append(self.memo[i])
1419     dispatch[GET[0]] = load_get
1420 
1421     def load_binget(self):
1422         i = self.read(1)[0]
1423         self.append(self.memo[i])
1424     dispatch[BINGET[0]] = load_binget
1425 
1426     def load_long_binget(self):
1427         i, = unpack('<I', self.read(4))
1428         self.append(self.memo[i])
1429     dispatch[LONG_BINGET[0]] = load_long_binget
1430 
1431     def load_put(self):
1432         i = int(self.readline()[:-1])
1433         if i < 0:
1434             raise ValueError("negative PUT argument")
1435         self.memo[i] = self.stack[-1]
1436     dispatch[PUT[0]] = load_put
1437 
1438     def load_binput(self):
1439         i = self.read(1)[0]
1440         if i < 0:
1441             raise ValueError("negative BINPUT argument")
1442         self.memo[i] = self.stack[-1]
1443     dispatch[BINPUT[0]] = load_binput
1444 
1445     def load_long_binput(self):
1446         i, = unpack('<I', self.read(4))
1447         if i > maxsize:
1448             raise ValueError("negative LONG_BINPUT argument")
1449         self.memo[i] = self.stack[-1]
1450     dispatch[LONG_BINPUT[0]] = load_long_binput
1451 
1452     def load_memoize(self):
1453         memo = self.memo
1454         memo[len(memo)] = self.stack[-1]
1455     dispatch[MEMOIZE[0]] = load_memoize
1456 
1457     def load_append(self):
1458         stack = self.stack
1459         value = stack.pop()
1460         list = stack[-1]
1461         list.append(value)
1462     dispatch[APPEND[0]] = load_append
1463 
1464     def load_appends(self):
1465         items = self.pop_mark()
1466         list_obj = self.stack[-1]
1467         if isinstance(list_obj, list):
1468             list_obj.extend(items)
1469         else:
1470             append = list_obj.append
1471             for item in items:
1472                 append(item)
1473     dispatch[APPENDS[0]] = load_appends
1474 
1475     def load_setitem(self):
1476         stack = self.stack
1477         value = stack.pop()
1478         key = stack.pop()
1479         dict = stack[-1]
1480         dict[key] = value
1481     dispatch[SETITEM[0]] = load_setitem
1482 
1483     def load_setitems(self):
1484         items = self.pop_mark()
1485         dict = self.stack[-1]
1486         for i in range(0, len(items), 2):
1487             dict[items[i]] = items[i + 1]
1488     dispatch[SETITEMS[0]] = load_setitems
1489 
1490     def load_additems(self):
1491         items = self.pop_mark()
1492         set_obj = self.stack[-1]
1493         if isinstance(set_obj, set):
1494             set_obj.update(items)
1495         else:
1496             add = set_obj.add
1497             for item in items:
1498                 add(item)
1499     dispatch[ADDITEMS[0]] = load_additems
1500 
1501     def load_build(self):
1502         stack = self.stack
1503         state = stack.pop()
1504         inst = stack[-1]
1505         setstate = getattr(inst, "__setstate__", None)
1506         if setstate is not None:
1507             setstate(state)
1508             return
1509         slotstate = None
1510         if isinstance(state, tuple) and len(state) == 2:
1511             state, slotstate = state
1512         if state:
1513             inst_dict = inst.__dict__
1514             intern = sys.intern
1515             for k, v in state.items():
1516                 if type(k) is str:
1517                     inst_dict[intern(k)] = v
1518                 else:
1519                     inst_dict[k] = v
1520         if slotstate:
1521             for k, v in slotstate.items():
1522                 setattr(inst, k, v)
1523     dispatch[BUILD[0]] = load_build
1524 
1525     def load_mark(self):
1526         self.metastack.append(self.stack)
1527         self.stack = []
1528         self.append = self.stack.append
1529     dispatch[MARK[0]] = load_mark
1530 
1531     def load_stop(self):
1532         value = self.stack.pop()
1533         raise _Stop(value)
1534     dispatch[STOP[0]] = load_stop
1535 
1536 
1537 # Shorthands
1538 
1539 def _dump(obj, file, protocol=None, *, fix_imports=True):
1540     _Pickler(file, protocol, fix_imports=fix_imports).dump(obj)
1541 
1542 def _dumps(obj, protocol=None, *, fix_imports=True):
1543     f = io.BytesIO()
1544     _Pickler(f, protocol, fix_imports=fix_imports).dump(obj)
1545     res = f.getvalue()
1546     assert isinstance(res, bytes_types)
1547     return res
1548 
1549 def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"):
1550     return _Unpickler(file, fix_imports=fix_imports,
1551                      encoding=encoding, errors=errors).load()
1552 
1553 def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"):
1554     if isinstance(s, str):
1555         raise TypeError("Can't load pickle from unicode string")
1556     file = io.BytesIO(s)
1557     return _Unpickler(file, fix_imports=fix_imports,
1558                       encoding=encoding, errors=errors).load()
1559 
1560 # Use the faster _pickle if possible
1561 try:
1562     from _pickle import (
1563         PickleError,
1564         PicklingError,
1565         UnpicklingError,
1566         Pickler,
1567         Unpickler,
1568         dump,
1569         dumps,
1570         load,
1571         loads
1572     )
1573 except ImportError:
1574     Pickler, Unpickler = _Pickler, _Unpickler
1575     dump, dumps, load, loads = _dump, _dumps, _load, _loads
1576 
1577 # Doctest
1578 def _test():
1579     import doctest
1580     return doctest.testmod()
1581 
1582 if __name__ == "__main__":
1583     import argparse
1584     parser = argparse.ArgumentParser(
1585         description='display contents of the pickle files')
1586     parser.add_argument(
1587         'pickle_file', type=argparse.FileType('br'),
1588         nargs='*', help='the pickle file')
1589     parser.add_argument(
1590         '-t', '--test', action='store_true',
1591         help='run self-test suite')
1592     parser.add_argument(
1593         '-v', action='store_true',
1594         help='run verbosely; only affects self-test run')
1595     args = parser.parse_args()
1596     if args.test:
1597         _test()
1598     else:
1599         if not args.pickle_file:
1600             parser.print_help()
1601         else:
1602             import pprint
1603             for f in args.pickle_file:
1604                 obj = load(f)
1605                 pprint.pprint(obj)
python:pickle

 

posted on 2018-01-29 22:11  lmgsanm  阅读(994)  评论(0编辑  收藏  举报