python模块:pickle
1 """Create portable serialized representations of Python objects. 2 3 See module copyreg for a mechanism for registering custom picklers. 4 See module pickletools source for extensive comments. 5 6 Classes: 7 8 Pickler 9 Unpickler 10 11 Functions: 12 13 dump(object, file) 14 dumps(object) -> string 15 load(file) -> object 16 loads(string) -> object 17 18 Misc variables: 19 20 __version__ 21 format_version 22 compatible_formats 23 24 """ 25 26 from types import FunctionType 27 from copyreg import dispatch_table 28 from copyreg import _extension_registry, _inverted_registry, _extension_cache 29 from itertools import islice 30 from functools import partial 31 import sys 32 from sys import maxsize 33 from struct import pack, unpack 34 import re 35 import io 36 import codecs 37 import _compat_pickle 38 39 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", 40 "Unpickler", "dump", "dumps", "load", "loads"] 41 42 # Shortcut for use in isinstance testing 43 bytes_types = (bytes, bytearray) 44 45 # These are purely informational; no code uses these. 46 format_version = "4.0" # File format version we write 47 compatible_formats = ["1.0", # Original protocol 0 48 "1.1", # Protocol 0 with INST added 49 "1.2", # Original protocol 1 50 "1.3", # Protocol 1 with BINFLOAT added 51 "2.0", # Protocol 2 52 "3.0", # Protocol 3 53 "4.0", # Protocol 4 54 ] # Old format versions we can read 55 56 # This is the highest protocol number we know how to read. 57 HIGHEST_PROTOCOL = 4 58 59 # The protocol we write by default. May be less than HIGHEST_PROTOCOL. 60 # We intentionally write a protocol that Python 2.x cannot read; 61 # there are too many issues with that. 62 DEFAULT_PROTOCOL = 3 63 64 class PickleError(Exception): 65 """A common base class for the other pickling exceptions.""" 66 pass 67 68 class PicklingError(PickleError): 69 """This exception is raised when an unpicklable object is passed to the 70 dump() method. 71 72 """ 73 pass 74 75 class UnpicklingError(PickleError): 76 """This exception is raised when there is a problem unpickling an object, 77 such as a security violation. 78 79 Note that other exceptions may also be raised during unpickling, including 80 (but not necessarily limited to) AttributeError, EOFError, ImportError, 81 and IndexError. 82 83 """ 84 pass 85 86 # An instance of _Stop is raised by Unpickler.load_stop() in response to 87 # the STOP opcode, passing the object that is the result of unpickling. 88 class _Stop(Exception): 89 def __init__(self, value): 90 self.value = value 91 92 # Jython has PyStringMap; it's a dict subclass with string keys 93 try: 94 from org.python.core import PyStringMap 95 except ImportError: 96 PyStringMap = None 97 98 # Pickle opcodes. See pickletools.py for extensive docs. The listing 99 # here is in kind-of alphabetical order of 1-character pickle code. 100 # pickletools groups them by purpose. 101 102 MARK = b'(' # push special markobject on stack 103 STOP = b'.' # every pickle ends with STOP 104 POP = b'0' # discard topmost stack item 105 POP_MARK = b'1' # discard stack top through topmost markobject 106 DUP = b'2' # duplicate top stack item 107 FLOAT = b'F' # push float object; decimal string argument 108 INT = b'I' # push integer or bool; decimal string argument 109 BININT = b'J' # push four-byte signed int 110 BININT1 = b'K' # push 1-byte unsigned int 111 LONG = b'L' # push long; decimal string argument 112 BININT2 = b'M' # push 2-byte unsigned int 113 NONE = b'N' # push None 114 PERSID = b'P' # push persistent object; id is taken from string arg 115 BINPERSID = b'Q' # " " " ; " " " " stack 116 REDUCE = b'R' # apply callable to argtuple, both on stack 117 STRING = b'S' # push string; NL-terminated string argument 118 BINSTRING = b'T' # push string; counted binary string argument 119 SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes 120 UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument 121 BINUNICODE = b'X' # " " " ; counted UTF-8 string argument 122 APPEND = b'a' # append stack top to list below it 123 BUILD = b'b' # call __setstate__ or __dict__.update() 124 GLOBAL = b'c' # push self.find_class(modname, name); 2 string args 125 DICT = b'd' # build a dict from stack items 126 EMPTY_DICT = b'}' # push empty dict 127 APPENDS = b'e' # extend list on stack by topmost stack slice 128 GET = b'g' # push item from memo on stack; index is string arg 129 BINGET = b'h' # " " " " " " ; " " 1-byte arg 130 INST = b'i' # build & push class instance 131 LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg 132 LIST = b'l' # build list from topmost stack items 133 EMPTY_LIST = b']' # push empty list 134 OBJ = b'o' # build & push class instance 135 PUT = b'p' # store stack top in memo; index is string arg 136 BINPUT = b'q' # " " " " " ; " " 1-byte arg 137 LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg 138 SETITEM = b's' # add key+value pair to dict 139 TUPLE = b't' # build tuple from topmost stack items 140 EMPTY_TUPLE = b')' # push empty tuple 141 SETITEMS = b'u' # modify dict by adding topmost key+value pairs 142 BINFLOAT = b'G' # push float; arg is 8-byte float encoding 143 144 TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py 145 FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py 146 147 # Protocol 2 148 149 PROTO = b'\x80' # identify pickle protocol 150 NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple 151 EXT1 = b'\x82' # push object from extension registry; 1-byte index 152 EXT2 = b'\x83' # ditto, but 2-byte index 153 EXT4 = b'\x84' # ditto, but 4-byte index 154 TUPLE1 = b'\x85' # build 1-tuple from stack top 155 TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items 156 TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items 157 NEWTRUE = b'\x88' # push True 158 NEWFALSE = b'\x89' # push False 159 LONG1 = b'\x8a' # push long from < 256 bytes 160 LONG4 = b'\x8b' # push really big long 161 162 _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] 163 164 # Protocol 3 (Python 3.x) 165 166 BINBYTES = b'B' # push bytes; counted binary string argument 167 SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes 168 169 # Protocol 4 170 SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes 171 BINUNICODE8 = b'\x8d' # push very long string 172 BINBYTES8 = b'\x8e' # push very long bytes string 173 EMPTY_SET = b'\x8f' # push empty set on the stack 174 ADDITEMS = b'\x90' # modify set by adding topmost stack items 175 FROZENSET = b'\x91' # build frozenset from topmost stack items 176 NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments 177 STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks 178 MEMOIZE = b'\x94' # store top of the stack in memo 179 FRAME = b'\x95' # indicate the beginning of a new frame 180 181 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)]) 182 183 184 class _Framer: 185 186 _FRAME_SIZE_TARGET = 64 * 1024 187 188 def __init__(self, file_write): 189 self.file_write = file_write 190 self.current_frame = None 191 192 def start_framing(self): 193 self.current_frame = io.BytesIO() 194 195 def end_framing(self): 196 if self.current_frame and self.current_frame.tell() > 0: 197 self.commit_frame(force=True) 198 self.current_frame = None 199 200 def commit_frame(self, force=False): 201 if self.current_frame: 202 f = self.current_frame 203 if f.tell() >= self._FRAME_SIZE_TARGET or force: 204 with f.getbuffer() as data: 205 n = len(data) 206 write = self.file_write 207 write(FRAME) 208 write(pack("<Q", n)) 209 write(data) 210 f.seek(0) 211 f.truncate() 212 213 def write(self, data): 214 if self.current_frame: 215 return self.current_frame.write(data) 216 else: 217 return self.file_write(data) 218 219 220 class _Unframer: 221 222 def __init__(self, file_read, file_readline, file_tell=None): 223 self.file_read = file_read 224 self.file_readline = file_readline 225 self.current_frame = None 226 227 def read(self, n): 228 if self.current_frame: 229 data = self.current_frame.read(n) 230 if not data and n != 0: 231 self.current_frame = None 232 return self.file_read(n) 233 if len(data) < n: 234 raise UnpicklingError( 235 "pickle exhausted before end of frame") 236 return data 237 else: 238 return self.file_read(n) 239 240 def readline(self): 241 if self.current_frame: 242 data = self.current_frame.readline() 243 if not data: 244 self.current_frame = None 245 return self.file_readline() 246 if data[-1] != b'\n'[0]: 247 raise UnpicklingError( 248 "pickle exhausted before end of frame") 249 return data 250 else: 251 return self.file_readline() 252 253 def load_frame(self, frame_size): 254 if self.current_frame and self.current_frame.read() != b'': 255 raise UnpicklingError( 256 "beginning of a new frame before end of current frame") 257 self.current_frame = io.BytesIO(self.file_read(frame_size)) 258 259 260 # Tools used for pickling. 261 262 def _getattribute(obj, name): 263 for subpath in name.split('.'): 264 if subpath == '<locals>': 265 raise AttributeError("Can't get local attribute {!r} on {!r}" 266 .format(name, obj)) 267 try: 268 parent = obj 269 obj = getattr(obj, subpath) 270 except AttributeError: 271 raise AttributeError("Can't get attribute {!r} on {!r}" 272 .format(name, obj)) 273 return obj, parent 274 275 def whichmodule(obj, name): 276 """Find the module an object belong to.""" 277 module_name = getattr(obj, '__module__', None) 278 if module_name is not None: 279 return module_name 280 # Protect the iteration by using a list copy of sys.modules against dynamic 281 # modules that trigger imports of other modules upon calls to getattr. 282 for module_name, module in list(sys.modules.items()): 283 if module_name == '__main__' or module is None: 284 continue 285 try: 286 if _getattribute(module, name)[0] is obj: 287 return module_name 288 except AttributeError: 289 pass 290 return '__main__' 291 292 def encode_long(x): 293 r"""Encode a long to a two's complement little-endian binary string. 294 Note that 0 is a special case, returning an empty string, to save a 295 byte in the LONG1 pickling context. 296 297 >>> encode_long(0) 298 b'' 299 >>> encode_long(255) 300 b'\xff\x00' 301 >>> encode_long(32767) 302 b'\xff\x7f' 303 >>> encode_long(-256) 304 b'\x00\xff' 305 >>> encode_long(-32768) 306 b'\x00\x80' 307 >>> encode_long(-128) 308 b'\x80' 309 >>> encode_long(127) 310 b'\x7f' 311 >>> 312 """ 313 if x == 0: 314 return b'' 315 nbytes = (x.bit_length() >> 3) + 1 316 result = x.to_bytes(nbytes, byteorder='little', signed=True) 317 if x < 0 and nbytes > 1: 318 if result[-1] == 0xff and (result[-2] & 0x80) != 0: 319 result = result[:-1] 320 return result 321 322 def decode_long(data): 323 r"""Decode a long from a two's complement little-endian binary string. 324 325 >>> decode_long(b'') 326 0 327 >>> decode_long(b"\xff\x00") 328 255 329 >>> decode_long(b"\xff\x7f") 330 32767 331 >>> decode_long(b"\x00\xff") 332 -256 333 >>> decode_long(b"\x00\x80") 334 -32768 335 >>> decode_long(b"\x80") 336 -128 337 >>> decode_long(b"\x7f") 338 127 339 """ 340 return int.from_bytes(data, byteorder='little', signed=True) 341 342 343 # Pickling machinery 344 345 class _Pickler: 346 347 def __init__(self, file, protocol=None, *, fix_imports=True): 348 """This takes a binary file for writing a pickle data stream. 349 350 The optional *protocol* argument tells the pickler to use the 351 given protocol; supported protocols are 0, 1, 2, 3 and 4. The 352 default protocol is 3; a backward-incompatible protocol designed 353 for Python 3. 354 355 Specifying a negative protocol version selects the highest 356 protocol version supported. The higher the protocol used, the 357 more recent the version of Python needed to read the pickle 358 produced. 359 360 The *file* argument must have a write() method that accepts a 361 single bytes argument. It can thus be a file object opened for 362 binary writing, an io.BytesIO instance, or any other custom 363 object that meets this interface. 364 365 If *fix_imports* is True and *protocol* is less than 3, pickle 366 will try to map the new Python 3 names to the old module names 367 used in Python 2, so that the pickle data stream is readable 368 with Python 2. 369 """ 370 if protocol is None: 371 protocol = DEFAULT_PROTOCOL 372 if protocol < 0: 373 protocol = HIGHEST_PROTOCOL 374 elif not 0 <= protocol <= HIGHEST_PROTOCOL: 375 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) 376 try: 377 self._file_write = file.write 378 except AttributeError: 379 raise TypeError("file must have a 'write' attribute") 380 self.framer = _Framer(self._file_write) 381 self.write = self.framer.write 382 self.memo = {} 383 self.proto = int(protocol) 384 self.bin = protocol >= 1 385 self.fast = 0 386 self.fix_imports = fix_imports and protocol < 3 387 388 def clear_memo(self): 389 """Clears the pickler's "memo". 390 391 The memo is the data structure that remembers which objects the 392 pickler has already seen, so that shared or recursive objects 393 are pickled by reference and not by value. This method is 394 useful when re-using picklers. 395 """ 396 self.memo.clear() 397 398 def dump(self, obj): 399 """Write a pickled representation of obj to the open file.""" 400 # Check whether Pickler was initialized correctly. This is 401 # only needed to mimic the behavior of _pickle.Pickler.dump(). 402 if not hasattr(self, "_file_write"): 403 raise PicklingError("Pickler.__init__() was not called by " 404 "%s.__init__()" % (self.__class__.__name__,)) 405 if self.proto >= 2: 406 self.write(PROTO + pack("<B", self.proto)) 407 if self.proto >= 4: 408 self.framer.start_framing() 409 self.save(obj) 410 self.write(STOP) 411 self.framer.end_framing() 412 413 def memoize(self, obj): 414 """Store an object in the memo.""" 415 416 # The Pickler memo is a dictionary mapping object ids to 2-tuples 417 # that contain the Unpickler memo key and the object being memoized. 418 # The memo key is written to the pickle and will become 419 # the key in the Unpickler's memo. The object is stored in the 420 # Pickler memo so that transient objects are kept alive during 421 # pickling. 422 423 # The use of the Unpickler memo length as the memo key is just a 424 # convention. The only requirement is that the memo values be unique. 425 # But there appears no advantage to any other scheme, and this 426 # scheme allows the Unpickler memo to be implemented as a plain (but 427 # growable) array, indexed by memo key. 428 if self.fast: 429 return 430 assert id(obj) not in self.memo 431 idx = len(self.memo) 432 self.write(self.put(idx)) 433 self.memo[id(obj)] = idx, obj 434 435 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. 436 def put(self, idx): 437 if self.proto >= 4: 438 return MEMOIZE 439 elif self.bin: 440 if idx < 256: 441 return BINPUT + pack("<B", idx) 442 else: 443 return LONG_BINPUT + pack("<I", idx) 444 else: 445 return PUT + repr(idx).encode("ascii") + b'\n' 446 447 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. 448 def get(self, i): 449 if self.bin: 450 if i < 256: 451 return BINGET + pack("<B", i) 452 else: 453 return LONG_BINGET + pack("<I", i) 454 455 return GET + repr(i).encode("ascii") + b'\n' 456 457 def save(self, obj, save_persistent_id=True): 458 self.framer.commit_frame() 459 460 # Check for persistent id (defined by a subclass) 461 pid = self.persistent_id(obj) 462 if pid is not None and save_persistent_id: 463 self.save_pers(pid) 464 return 465 466 # Check the memo 467 x = self.memo.get(id(obj)) 468 if x is not None: 469 self.write(self.get(x[0])) 470 return 471 472 # Check the type dispatch table 473 t = type(obj) 474 f = self.dispatch.get(t) 475 if f is not None: 476 f(self, obj) # Call unbound method with explicit self 477 return 478 479 # Check private dispatch table if any, or else copyreg.dispatch_table 480 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t) 481 if reduce is not None: 482 rv = reduce(obj) 483 else: 484 # Check for a class with a custom metaclass; treat as regular class 485 try: 486 issc = issubclass(t, type) 487 except TypeError: # t is not a class (old Boost; see SF #502085) 488 issc = False 489 if issc: 490 self.save_global(obj) 491 return 492 493 # Check for a __reduce_ex__ method, fall back to __reduce__ 494 reduce = getattr(obj, "__reduce_ex__", None) 495 if reduce is not None: 496 rv = reduce(self.proto) 497 else: 498 reduce = getattr(obj, "__reduce__", None) 499 if reduce is not None: 500 rv = reduce() 501 else: 502 raise PicklingError("Can't pickle %r object: %r" % 503 (t.__name__, obj)) 504 505 # Check for string returned by reduce(), meaning "save as global" 506 if isinstance(rv, str): 507 self.save_global(obj, rv) 508 return 509 510 # Assert that reduce() returned a tuple 511 if not isinstance(rv, tuple): 512 raise PicklingError("%s must return string or tuple" % reduce) 513 514 # Assert that it returned an appropriately sized tuple 515 l = len(rv) 516 if not (2 <= l <= 5): 517 raise PicklingError("Tuple returned by %s must have " 518 "two to five elements" % reduce) 519 520 # Save the reduce() output and finally memoize the object 521 self.save_reduce(obj=obj, *rv) 522 523 def persistent_id(self, obj): 524 # This exists so a subclass can override it 525 return None 526 527 def save_pers(self, pid): 528 # Save a persistent id reference 529 if self.bin: 530 self.save(pid, save_persistent_id=False) 531 self.write(BINPERSID) 532 else: 533 try: 534 self.write(PERSID + str(pid).encode("ascii") + b'\n') 535 except UnicodeEncodeError: 536 raise PicklingError( 537 "persistent IDs in protocol 0 must be ASCII strings") 538 539 def save_reduce(self, func, args, state=None, listitems=None, 540 dictitems=None, obj=None): 541 # This API is called by some subclasses 542 543 if not isinstance(args, tuple): 544 raise PicklingError("args from save_reduce() must be a tuple") 545 if not callable(func): 546 raise PicklingError("func from save_reduce() must be callable") 547 548 save = self.save 549 write = self.write 550 551 func_name = getattr(func, "__name__", "") 552 if self.proto >= 2 and func_name == "__newobj_ex__": 553 cls, args, kwargs = args 554 if not hasattr(cls, "__new__"): 555 raise PicklingError("args[0] from {} args has no __new__" 556 .format(func_name)) 557 if obj is not None and cls is not obj.__class__: 558 raise PicklingError("args[0] from {} args has the wrong class" 559 .format(func_name)) 560 if self.proto >= 4: 561 save(cls) 562 save(args) 563 save(kwargs) 564 write(NEWOBJ_EX) 565 else: 566 func = partial(cls.__new__, cls, *args, **kwargs) 567 save(func) 568 save(()) 569 write(REDUCE) 570 elif self.proto >= 2 and func_name == "__newobj__": 571 # A __reduce__ implementation can direct protocol 2 or newer to 572 # use the more efficient NEWOBJ opcode, while still 573 # allowing protocol 0 and 1 to work normally. For this to 574 # work, the function returned by __reduce__ should be 575 # called __newobj__, and its first argument should be a 576 # class. The implementation for __newobj__ 577 # should be as follows, although pickle has no way to 578 # verify this: 579 # 580 # def __newobj__(cls, *args): 581 # return cls.__new__(cls, *args) 582 # 583 # Protocols 0 and 1 will pickle a reference to __newobj__, 584 # while protocol 2 (and above) will pickle a reference to 585 # cls, the remaining args tuple, and the NEWOBJ code, 586 # which calls cls.__new__(cls, *args) at unpickling time 587 # (see load_newobj below). If __reduce__ returns a 588 # three-tuple, the state from the third tuple item will be 589 # pickled regardless of the protocol, calling __setstate__ 590 # at unpickling time (see load_build below). 591 # 592 # Note that no standard __newobj__ implementation exists; 593 # you have to provide your own. This is to enforce 594 # compatibility with Python 2.2 (pickles written using 595 # protocol 0 or 1 in Python 2.3 should be unpicklable by 596 # Python 2.2). 597 cls = args[0] 598 if not hasattr(cls, "__new__"): 599 raise PicklingError( 600 "args[0] from __newobj__ args has no __new__") 601 if obj is not None and cls is not obj.__class__: 602 raise PicklingError( 603 "args[0] from __newobj__ args has the wrong class") 604 args = args[1:] 605 save(cls) 606 save(args) 607 write(NEWOBJ) 608 else: 609 save(func) 610 save(args) 611 write(REDUCE) 612 613 if obj is not None: 614 # If the object is already in the memo, this means it is 615 # recursive. In this case, throw away everything we put on the 616 # stack, and fetch the object back from the memo. 617 if id(obj) in self.memo: 618 write(POP + self.get(self.memo[id(obj)][0])) 619 else: 620 self.memoize(obj) 621 622 # More new special cases (that work with older protocols as 623 # well): when __reduce__ returns a tuple with 4 or 5 items, 624 # the 4th and 5th item should be iterators that provide list 625 # items and dict items (as (key, value) tuples), or None. 626 627 if listitems is not None: 628 self._batch_appends(listitems) 629 630 if dictitems is not None: 631 self._batch_setitems(dictitems) 632 633 if state is not None: 634 save(state) 635 write(BUILD) 636 637 # Methods below this point are dispatched through the dispatch table 638 639 dispatch = {} 640 641 def save_none(self, obj): 642 self.write(NONE) 643 dispatch[type(None)] = save_none 644 645 def save_bool(self, obj): 646 if self.proto >= 2: 647 self.write(NEWTRUE if obj else NEWFALSE) 648 else: 649 self.write(TRUE if obj else FALSE) 650 dispatch[bool] = save_bool 651 652 def save_long(self, obj): 653 if self.bin: 654 # If the int is small enough to fit in a signed 4-byte 2's-comp 655 # format, we can store it more efficiently than the general 656 # case. 657 # First one- and two-byte unsigned ints: 658 if obj >= 0: 659 if obj <= 0xff: 660 self.write(BININT1 + pack("<B", obj)) 661 return 662 if obj <= 0xffff: 663 self.write(BININT2 + pack("<H", obj)) 664 return 665 # Next check for 4-byte signed ints: 666 if -0x80000000 <= obj <= 0x7fffffff: 667 self.write(BININT + pack("<i", obj)) 668 return 669 if self.proto >= 2: 670 encoded = encode_long(obj) 671 n = len(encoded) 672 if n < 256: 673 self.write(LONG1 + pack("<B", n) + encoded) 674 else: 675 self.write(LONG4 + pack("<i", n) + encoded) 676 return 677 self.write(LONG + repr(obj).encode("ascii") + b'L\n') 678 dispatch[int] = save_long 679 680 def save_float(self, obj): 681 if self.bin: 682 self.write(BINFLOAT + pack('>d', obj)) 683 else: 684 self.write(FLOAT + repr(obj).encode("ascii") + b'\n') 685 dispatch[float] = save_float 686 687 def save_bytes(self, obj): 688 if self.proto < 3: 689 if not obj: # bytes object is empty 690 self.save_reduce(bytes, (), obj=obj) 691 else: 692 self.save_reduce(codecs.encode, 693 (str(obj, 'latin1'), 'latin1'), obj=obj) 694 return 695 n = len(obj) 696 if n <= 0xff: 697 self.write(SHORT_BINBYTES + pack("<B", n) + obj) 698 elif n > 0xffffffff and self.proto >= 4: 699 self.write(BINBYTES8 + pack("<Q", n) + obj) 700 else: 701 self.write(BINBYTES + pack("<I", n) + obj) 702 self.memoize(obj) 703 dispatch[bytes] = save_bytes 704 705 def save_str(self, obj): 706 if self.bin: 707 encoded = obj.encode('utf-8', 'surrogatepass') 708 n = len(encoded) 709 if n <= 0xff and self.proto >= 4: 710 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded) 711 elif n > 0xffffffff and self.proto >= 4: 712 self.write(BINUNICODE8 + pack("<Q", n) + encoded) 713 else: 714 self.write(BINUNICODE + pack("<I", n) + encoded) 715 else: 716 obj = obj.replace("\\", "\\u005c") 717 obj = obj.replace("\n", "\\u000a") 718 self.write(UNICODE + obj.encode('raw-unicode-escape') + 719 b'\n') 720 self.memoize(obj) 721 dispatch[str] = save_str 722 723 def save_tuple(self, obj): 724 if not obj: # tuple is empty 725 if self.bin: 726 self.write(EMPTY_TUPLE) 727 else: 728 self.write(MARK + TUPLE) 729 return 730 731 n = len(obj) 732 save = self.save 733 memo = self.memo 734 if n <= 3 and self.proto >= 2: 735 for element in obj: 736 save(element) 737 # Subtle. Same as in the big comment below. 738 if id(obj) in memo: 739 get = self.get(memo[id(obj)][0]) 740 self.write(POP * n + get) 741 else: 742 self.write(_tuplesize2code[n]) 743 self.memoize(obj) 744 return 745 746 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple 747 # has more than 3 elements. 748 write = self.write 749 write(MARK) 750 for element in obj: 751 save(element) 752 753 if id(obj) in memo: 754 # Subtle. d was not in memo when we entered save_tuple(), so 755 # the process of saving the tuple's elements must have saved 756 # the tuple itself: the tuple is recursive. The proper action 757 # now is to throw away everything we put on the stack, and 758 # simply GET the tuple (it's already constructed). This check 759 # could have been done in the "for element" loop instead, but 760 # recursive tuples are a rare thing. 761 get = self.get(memo[id(obj)][0]) 762 if self.bin: 763 write(POP_MARK + get) 764 else: # proto 0 -- POP_MARK not available 765 write(POP * (n+1) + get) 766 return 767 768 # No recursion. 769 write(TUPLE) 770 self.memoize(obj) 771 772 dispatch[tuple] = save_tuple 773 774 def save_list(self, obj): 775 if self.bin: 776 self.write(EMPTY_LIST) 777 else: # proto 0 -- can't use EMPTY_LIST 778 self.write(MARK + LIST) 779 780 self.memoize(obj) 781 self._batch_appends(obj) 782 783 dispatch[list] = save_list 784 785 _BATCHSIZE = 1000 786 787 def _batch_appends(self, items): 788 # Helper to batch up APPENDS sequences 789 save = self.save 790 write = self.write 791 792 if not self.bin: 793 for x in items: 794 save(x) 795 write(APPEND) 796 return 797 798 it = iter(items) 799 while True: 800 tmp = list(islice(it, self._BATCHSIZE)) 801 n = len(tmp) 802 if n > 1: 803 write(MARK) 804 for x in tmp: 805 save(x) 806 write(APPENDS) 807 elif n: 808 save(tmp[0]) 809 write(APPEND) 810 # else tmp is empty, and we're done 811 if n < self._BATCHSIZE: 812 return 813 814 def save_dict(self, obj): 815 if self.bin: 816 self.write(EMPTY_DICT) 817 else: # proto 0 -- can't use EMPTY_DICT 818 self.write(MARK + DICT) 819 820 self.memoize(obj) 821 self._batch_setitems(obj.items()) 822 823 dispatch[dict] = save_dict 824 if PyStringMap is not None: 825 dispatch[PyStringMap] = save_dict 826 827 def _batch_setitems(self, items): 828 # Helper to batch up SETITEMS sequences; proto >= 1 only 829 save = self.save 830 write = self.write 831 832 if not self.bin: 833 for k, v in items: 834 save(k) 835 save(v) 836 write(SETITEM) 837 return 838 839 it = iter(items) 840 while True: 841 tmp = list(islice(it, self._BATCHSIZE)) 842 n = len(tmp) 843 if n > 1: 844 write(MARK) 845 for k, v in tmp: 846 save(k) 847 save(v) 848 write(SETITEMS) 849 elif n: 850 k, v = tmp[0] 851 save(k) 852 save(v) 853 write(SETITEM) 854 # else tmp is empty, and we're done 855 if n < self._BATCHSIZE: 856 return 857 858 def save_set(self, obj): 859 save = self.save 860 write = self.write 861 862 if self.proto < 4: 863 self.save_reduce(set, (list(obj),), obj=obj) 864 return 865 866 write(EMPTY_SET) 867 self.memoize(obj) 868 869 it = iter(obj) 870 while True: 871 batch = list(islice(it, self._BATCHSIZE)) 872 n = len(batch) 873 if n > 0: 874 write(MARK) 875 for item in batch: 876 save(item) 877 write(ADDITEMS) 878 if n < self._BATCHSIZE: 879 return 880 dispatch[set] = save_set 881 882 def save_frozenset(self, obj): 883 save = self.save 884 write = self.write 885 886 if self.proto < 4: 887 self.save_reduce(frozenset, (list(obj),), obj=obj) 888 return 889 890 write(MARK) 891 for item in obj: 892 save(item) 893 894 if id(obj) in self.memo: 895 # If the object is already in the memo, this means it is 896 # recursive. In this case, throw away everything we put on the 897 # stack, and fetch the object back from the memo. 898 write(POP_MARK + self.get(self.memo[id(obj)][0])) 899 return 900 901 write(FROZENSET) 902 self.memoize(obj) 903 dispatch[frozenset] = save_frozenset 904 905 def save_global(self, obj, name=None): 906 write = self.write 907 memo = self.memo 908 909 if name is None: 910 name = getattr(obj, '__qualname__', None) 911 if name is None: 912 name = obj.__name__ 913 914 module_name = whichmodule(obj, name) 915 try: 916 __import__(module_name, level=0) 917 module = sys.modules[module_name] 918 obj2, parent = _getattribute(module, name) 919 except (ImportError, KeyError, AttributeError): 920 raise PicklingError( 921 "Can't pickle %r: it's not found as %s.%s" % 922 (obj, module_name, name)) 923 else: 924 if obj2 is not obj: 925 raise PicklingError( 926 "Can't pickle %r: it's not the same object as %s.%s" % 927 (obj, module_name, name)) 928 929 if self.proto >= 2: 930 code = _extension_registry.get((module_name, name)) 931 if code: 932 assert code > 0 933 if code <= 0xff: 934 write(EXT1 + pack("<B", code)) 935 elif code <= 0xffff: 936 write(EXT2 + pack("<H", code)) 937 else: 938 write(EXT4 + pack("<i", code)) 939 return 940 lastname = name.rpartition('.')[2] 941 if parent is module: 942 name = lastname 943 # Non-ASCII identifiers are supported only with protocols >= 3. 944 if self.proto >= 4: 945 self.save(module_name) 946 self.save(name) 947 write(STACK_GLOBAL) 948 elif parent is not module: 949 self.save_reduce(getattr, (parent, lastname)) 950 elif self.proto >= 3: 951 write(GLOBAL + bytes(module_name, "utf-8") + b'\n' + 952 bytes(name, "utf-8") + b'\n') 953 else: 954 if self.fix_imports: 955 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING 956 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING 957 if (module_name, name) in r_name_mapping: 958 module_name, name = r_name_mapping[(module_name, name)] 959 elif module_name in r_import_mapping: 960 module_name = r_import_mapping[module_name] 961 try: 962 write(GLOBAL + bytes(module_name, "ascii") + b'\n' + 963 bytes(name, "ascii") + b'\n') 964 except UnicodeEncodeError: 965 raise PicklingError( 966 "can't pickle global identifier '%s.%s' using " 967 "pickle protocol %i" % (module, name, self.proto)) 968 969 self.memoize(obj) 970 971 def save_type(self, obj): 972 if obj is type(None): 973 return self.save_reduce(type, (None,), obj=obj) 974 elif obj is type(NotImplemented): 975 return self.save_reduce(type, (NotImplemented,), obj=obj) 976 elif obj is type(...): 977 return self.save_reduce(type, (...,), obj=obj) 978 return self.save_global(obj) 979 980 dispatch[FunctionType] = save_global 981 dispatch[type] = save_type 982 983 984 # Unpickling machinery 985 986 class _Unpickler: 987 988 def __init__(self, file, *, fix_imports=True, 989 encoding="ASCII", errors="strict"): 990 """This takes a binary file for reading a pickle data stream. 991 992 The protocol version of the pickle is detected automatically, so 993 no proto argument is needed. 994 995 The argument *file* must have two methods, a read() method that 996 takes an integer argument, and a readline() method that requires 997 no arguments. Both methods should return bytes. Thus *file* 998 can be a binary file object opened for reading, an io.BytesIO 999 object, or any other custom object that meets this interface. 1000 1001 The file-like object must have two methods, a read() method 1002 that takes an integer argument, and a readline() method that 1003 requires no arguments. Both methods should return bytes. 1004 Thus file-like object can be a binary file object opened for 1005 reading, a BytesIO object, or any other custom object that 1006 meets this interface. 1007 1008 Optional keyword arguments are *fix_imports*, *encoding* and 1009 *errors*, which are used to control compatibility support for 1010 pickle stream generated by Python 2. If *fix_imports* is True, 1011 pickle will try to map the old Python 2 names to the new names 1012 used in Python 3. The *encoding* and *errors* tell pickle how 1013 to decode 8-bit string instances pickled by Python 2; these 1014 default to 'ASCII' and 'strict', respectively. *encoding* can be 1015 'bytes' to read theses 8-bit string instances as bytes objects. 1016 """ 1017 self._file_readline = file.readline 1018 self._file_read = file.read 1019 self.memo = {} 1020 self.encoding = encoding 1021 self.errors = errors 1022 self.proto = 0 1023 self.fix_imports = fix_imports 1024 1025 def load(self): 1026 """Read a pickled object representation from the open file. 1027 1028 Return the reconstituted object hierarchy specified in the file. 1029 """ 1030 # Check whether Unpickler was initialized correctly. This is 1031 # only needed to mimic the behavior of _pickle.Unpickler.dump(). 1032 if not hasattr(self, "_file_read"): 1033 raise UnpicklingError("Unpickler.__init__() was not called by " 1034 "%s.__init__()" % (self.__class__.__name__,)) 1035 self._unframer = _Unframer(self._file_read, self._file_readline) 1036 self.read = self._unframer.read 1037 self.readline = self._unframer.readline 1038 self.metastack = [] 1039 self.stack = [] 1040 self.append = self.stack.append 1041 self.proto = 0 1042 read = self.read 1043 dispatch = self.dispatch 1044 try: 1045 while True: 1046 key = read(1) 1047 if not key: 1048 raise EOFError 1049 assert isinstance(key, bytes_types) 1050 dispatch[key[0]](self) 1051 except _Stop as stopinst: 1052 return stopinst.value 1053 1054 # Return a list of items pushed in the stack after last MARK instruction. 1055 def pop_mark(self): 1056 items = self.stack 1057 self.stack = self.metastack.pop() 1058 self.append = self.stack.append 1059 return items 1060 1061 def persistent_load(self, pid): 1062 raise UnpicklingError("unsupported persistent id encountered") 1063 1064 dispatch = {} 1065 1066 def load_proto(self): 1067 proto = self.read(1)[0] 1068 if not 0 <= proto <= HIGHEST_PROTOCOL: 1069 raise ValueError("unsupported pickle protocol: %d" % proto) 1070 self.proto = proto 1071 dispatch[PROTO[0]] = load_proto 1072 1073 def load_frame(self): 1074 frame_size, = unpack('<Q', self.read(8)) 1075 if frame_size > sys.maxsize: 1076 raise ValueError("frame size > sys.maxsize: %d" % frame_size) 1077 self._unframer.load_frame(frame_size) 1078 dispatch[FRAME[0]] = load_frame 1079 1080 def load_persid(self): 1081 try: 1082 pid = self.readline()[:-1].decode("ascii") 1083 except UnicodeDecodeError: 1084 raise UnpicklingError( 1085 "persistent IDs in protocol 0 must be ASCII strings") 1086 self.append(self.persistent_load(pid)) 1087 dispatch[PERSID[0]] = load_persid 1088 1089 def load_binpersid(self): 1090 pid = self.stack.pop() 1091 self.append(self.persistent_load(pid)) 1092 dispatch[BINPERSID[0]] = load_binpersid 1093 1094 def load_none(self): 1095 self.append(None) 1096 dispatch[NONE[0]] = load_none 1097 1098 def load_false(self): 1099 self.append(False) 1100 dispatch[NEWFALSE[0]] = load_false 1101 1102 def load_true(self): 1103 self.append(True) 1104 dispatch[NEWTRUE[0]] = load_true 1105 1106 def load_int(self): 1107 data = self.readline() 1108 if data == FALSE[1:]: 1109 val = False 1110 elif data == TRUE[1:]: 1111 val = True 1112 else: 1113 val = int(data, 0) 1114 self.append(val) 1115 dispatch[INT[0]] = load_int 1116 1117 def load_binint(self): 1118 self.append(unpack('<i', self.read(4))[0]) 1119 dispatch[BININT[0]] = load_binint 1120 1121 def load_binint1(self): 1122 self.append(self.read(1)[0]) 1123 dispatch[BININT1[0]] = load_binint1 1124 1125 def load_binint2(self): 1126 self.append(unpack('<H', self.read(2))[0]) 1127 dispatch[BININT2[0]] = load_binint2 1128 1129 def load_long(self): 1130 val = self.readline()[:-1] 1131 if val and val[-1] == b'L'[0]: 1132 val = val[:-1] 1133 self.append(int(val, 0)) 1134 dispatch[LONG[0]] = load_long 1135 1136 def load_long1(self): 1137 n = self.read(1)[0] 1138 data = self.read(n) 1139 self.append(decode_long(data)) 1140 dispatch[LONG1[0]] = load_long1 1141 1142 def load_long4(self): 1143 n, = unpack('<i', self.read(4)) 1144 if n < 0: 1145 # Corrupt or hostile pickle -- we never write one like this 1146 raise UnpicklingError("LONG pickle has negative byte count") 1147 data = self.read(n) 1148 self.append(decode_long(data)) 1149 dispatch[LONG4[0]] = load_long4 1150 1151 def load_float(self): 1152 self.append(float(self.readline()[:-1])) 1153 dispatch[FLOAT[0]] = load_float 1154 1155 def load_binfloat(self): 1156 self.append(unpack('>d', self.read(8))[0]) 1157 dispatch[BINFLOAT[0]] = load_binfloat 1158 1159 def _decode_string(self, value): 1160 # Used to allow strings from Python 2 to be decoded either as 1161 # bytes or Unicode strings. This should be used only with the 1162 # STRING, BINSTRING and SHORT_BINSTRING opcodes. 1163 if self.encoding == "bytes": 1164 return value 1165 else: 1166 return value.decode(self.encoding, self.errors) 1167 1168 def load_string(self): 1169 data = self.readline()[:-1] 1170 # Strip outermost quotes 1171 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'': 1172 data = data[1:-1] 1173 else: 1174 raise UnpicklingError("the STRING opcode argument must be quoted") 1175 self.append(self._decode_string(codecs.escape_decode(data)[0])) 1176 dispatch[STRING[0]] = load_string 1177 1178 def load_binstring(self): 1179 # Deprecated BINSTRING uses signed 32-bit length 1180 len, = unpack('<i', self.read(4)) 1181 if len < 0: 1182 raise UnpicklingError("BINSTRING pickle has negative byte count") 1183 data = self.read(len) 1184 self.append(self._decode_string(data)) 1185 dispatch[BINSTRING[0]] = load_binstring 1186 1187 def load_binbytes(self): 1188 len, = unpack('<I', self.read(4)) 1189 if len > maxsize: 1190 raise UnpicklingError("BINBYTES exceeds system's maximum size " 1191 "of %d bytes" % maxsize) 1192 self.append(self.read(len)) 1193 dispatch[BINBYTES[0]] = load_binbytes 1194 1195 def load_unicode(self): 1196 self.append(str(self.readline()[:-1], 'raw-unicode-escape')) 1197 dispatch[UNICODE[0]] = load_unicode 1198 1199 def load_binunicode(self): 1200 len, = unpack('<I', self.read(4)) 1201 if len > maxsize: 1202 raise UnpicklingError("BINUNICODE exceeds system's maximum size " 1203 "of %d bytes" % maxsize) 1204 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1205 dispatch[BINUNICODE[0]] = load_binunicode 1206 1207 def load_binunicode8(self): 1208 len, = unpack('<Q', self.read(8)) 1209 if len > maxsize: 1210 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size " 1211 "of %d bytes" % maxsize) 1212 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1213 dispatch[BINUNICODE8[0]] = load_binunicode8 1214 1215 def load_binbytes8(self): 1216 len, = unpack('<Q', self.read(8)) 1217 if len > maxsize: 1218 raise UnpicklingError("BINBYTES8 exceeds system's maximum size " 1219 "of %d bytes" % maxsize) 1220 self.append(self.read(len)) 1221 dispatch[BINBYTES8[0]] = load_binbytes8 1222 1223 def load_short_binstring(self): 1224 len = self.read(1)[0] 1225 data = self.read(len) 1226 self.append(self._decode_string(data)) 1227 dispatch[SHORT_BINSTRING[0]] = load_short_binstring 1228 1229 def load_short_binbytes(self): 1230 len = self.read(1)[0] 1231 self.append(self.read(len)) 1232 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes 1233 1234 def load_short_binunicode(self): 1235 len = self.read(1)[0] 1236 self.append(str(self.read(len), 'utf-8', 'surrogatepass')) 1237 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode 1238 1239 def load_tuple(self): 1240 items = self.pop_mark() 1241 self.append(tuple(items)) 1242 dispatch[TUPLE[0]] = load_tuple 1243 1244 def load_empty_tuple(self): 1245 self.append(()) 1246 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple 1247 1248 def load_tuple1(self): 1249 self.stack[-1] = (self.stack[-1],) 1250 dispatch[TUPLE1[0]] = load_tuple1 1251 1252 def load_tuple2(self): 1253 self.stack[-2:] = [(self.stack[-2], self.stack[-1])] 1254 dispatch[TUPLE2[0]] = load_tuple2 1255 1256 def load_tuple3(self): 1257 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] 1258 dispatch[TUPLE3[0]] = load_tuple3 1259 1260 def load_empty_list(self): 1261 self.append([]) 1262 dispatch[EMPTY_LIST[0]] = load_empty_list 1263 1264 def load_empty_dictionary(self): 1265 self.append({}) 1266 dispatch[EMPTY_DICT[0]] = load_empty_dictionary 1267 1268 def load_empty_set(self): 1269 self.append(set()) 1270 dispatch[EMPTY_SET[0]] = load_empty_set 1271 1272 def load_frozenset(self): 1273 items = self.pop_mark() 1274 self.append(frozenset(items)) 1275 dispatch[FROZENSET[0]] = load_frozenset 1276 1277 def load_list(self): 1278 items = self.pop_mark() 1279 self.append(items) 1280 dispatch[LIST[0]] = load_list 1281 1282 def load_dict(self): 1283 items = self.pop_mark() 1284 d = {items[i]: items[i+1] 1285 for i in range(0, len(items), 2)} 1286 self.append(d) 1287 dispatch[DICT[0]] = load_dict 1288 1289 # INST and OBJ differ only in how they get a class object. It's not 1290 # only sensible to do the rest in a common routine, the two routines 1291 # previously diverged and grew different bugs. 1292 # klass is the class to instantiate, and k points to the topmost mark 1293 # object, following which are the arguments for klass.__init__. 1294 def _instantiate(self, klass, args): 1295 if (args or not isinstance(klass, type) or 1296 hasattr(klass, "__getinitargs__")): 1297 try: 1298 value = klass(*args) 1299 except TypeError as err: 1300 raise TypeError("in constructor for %s: %s" % 1301 (klass.__name__, str(err)), sys.exc_info()[2]) 1302 else: 1303 value = klass.__new__(klass) 1304 self.append(value) 1305 1306 def load_inst(self): 1307 module = self.readline()[:-1].decode("ascii") 1308 name = self.readline()[:-1].decode("ascii") 1309 klass = self.find_class(module, name) 1310 self._instantiate(klass, self.pop_mark()) 1311 dispatch[INST[0]] = load_inst 1312 1313 def load_obj(self): 1314 # Stack is ... markobject classobject arg1 arg2 ... 1315 args = self.pop_mark() 1316 cls = args.pop(0) 1317 self._instantiate(cls, args) 1318 dispatch[OBJ[0]] = load_obj 1319 1320 def load_newobj(self): 1321 args = self.stack.pop() 1322 cls = self.stack.pop() 1323 obj = cls.__new__(cls, *args) 1324 self.append(obj) 1325 dispatch[NEWOBJ[0]] = load_newobj 1326 1327 def load_newobj_ex(self): 1328 kwargs = self.stack.pop() 1329 args = self.stack.pop() 1330 cls = self.stack.pop() 1331 obj = cls.__new__(cls, *args, **kwargs) 1332 self.append(obj) 1333 dispatch[NEWOBJ_EX[0]] = load_newobj_ex 1334 1335 def load_global(self): 1336 module = self.readline()[:-1].decode("utf-8") 1337 name = self.readline()[:-1].decode("utf-8") 1338 klass = self.find_class(module, name) 1339 self.append(klass) 1340 dispatch[GLOBAL[0]] = load_global 1341 1342 def load_stack_global(self): 1343 name = self.stack.pop() 1344 module = self.stack.pop() 1345 if type(name) is not str or type(module) is not str: 1346 raise UnpicklingError("STACK_GLOBAL requires str") 1347 self.append(self.find_class(module, name)) 1348 dispatch[STACK_GLOBAL[0]] = load_stack_global 1349 1350 def load_ext1(self): 1351 code = self.read(1)[0] 1352 self.get_extension(code) 1353 dispatch[EXT1[0]] = load_ext1 1354 1355 def load_ext2(self): 1356 code, = unpack('<H', self.read(2)) 1357 self.get_extension(code) 1358 dispatch[EXT2[0]] = load_ext2 1359 1360 def load_ext4(self): 1361 code, = unpack('<i', self.read(4)) 1362 self.get_extension(code) 1363 dispatch[EXT4[0]] = load_ext4 1364 1365 def get_extension(self, code): 1366 nil = [] 1367 obj = _extension_cache.get(code, nil) 1368 if obj is not nil: 1369 self.append(obj) 1370 return 1371 key = _inverted_registry.get(code) 1372 if not key: 1373 if code <= 0: # note that 0 is forbidden 1374 # Corrupt or hostile pickle. 1375 raise UnpicklingError("EXT specifies code <= 0") 1376 raise ValueError("unregistered extension code %d" % code) 1377 obj = self.find_class(*key) 1378 _extension_cache[code] = obj 1379 self.append(obj) 1380 1381 def find_class(self, module, name): 1382 # Subclasses may override this. 1383 if self.proto < 3 and self.fix_imports: 1384 if (module, name) in _compat_pickle.NAME_MAPPING: 1385 module, name = _compat_pickle.NAME_MAPPING[(module, name)] 1386 elif module in _compat_pickle.IMPORT_MAPPING: 1387 module = _compat_pickle.IMPORT_MAPPING[module] 1388 __import__(module, level=0) 1389 if self.proto >= 4: 1390 return _getattribute(sys.modules[module], name)[0] 1391 else: 1392 return getattr(sys.modules[module], name) 1393 1394 def load_reduce(self): 1395 stack = self.stack 1396 args = stack.pop() 1397 func = stack[-1] 1398 stack[-1] = func(*args) 1399 dispatch[REDUCE[0]] = load_reduce 1400 1401 def load_pop(self): 1402 if self.stack: 1403 del self.stack[-1] 1404 else: 1405 self.pop_mark() 1406 dispatch[POP[0]] = load_pop 1407 1408 def load_pop_mark(self): 1409 self.pop_mark() 1410 dispatch[POP_MARK[0]] = load_pop_mark 1411 1412 def load_dup(self): 1413 self.append(self.stack[-1]) 1414 dispatch[DUP[0]] = load_dup 1415 1416 def load_get(self): 1417 i = int(self.readline()[:-1]) 1418 self.append(self.memo[i]) 1419 dispatch[GET[0]] = load_get 1420 1421 def load_binget(self): 1422 i = self.read(1)[0] 1423 self.append(self.memo[i]) 1424 dispatch[BINGET[0]] = load_binget 1425 1426 def load_long_binget(self): 1427 i, = unpack('<I', self.read(4)) 1428 self.append(self.memo[i]) 1429 dispatch[LONG_BINGET[0]] = load_long_binget 1430 1431 def load_put(self): 1432 i = int(self.readline()[:-1]) 1433 if i < 0: 1434 raise ValueError("negative PUT argument") 1435 self.memo[i] = self.stack[-1] 1436 dispatch[PUT[0]] = load_put 1437 1438 def load_binput(self): 1439 i = self.read(1)[0] 1440 if i < 0: 1441 raise ValueError("negative BINPUT argument") 1442 self.memo[i] = self.stack[-1] 1443 dispatch[BINPUT[0]] = load_binput 1444 1445 def load_long_binput(self): 1446 i, = unpack('<I', self.read(4)) 1447 if i > maxsize: 1448 raise ValueError("negative LONG_BINPUT argument") 1449 self.memo[i] = self.stack[-1] 1450 dispatch[LONG_BINPUT[0]] = load_long_binput 1451 1452 def load_memoize(self): 1453 memo = self.memo 1454 memo[len(memo)] = self.stack[-1] 1455 dispatch[MEMOIZE[0]] = load_memoize 1456 1457 def load_append(self): 1458 stack = self.stack 1459 value = stack.pop() 1460 list = stack[-1] 1461 list.append(value) 1462 dispatch[APPEND[0]] = load_append 1463 1464 def load_appends(self): 1465 items = self.pop_mark() 1466 list_obj = self.stack[-1] 1467 if isinstance(list_obj, list): 1468 list_obj.extend(items) 1469 else: 1470 append = list_obj.append 1471 for item in items: 1472 append(item) 1473 dispatch[APPENDS[0]] = load_appends 1474 1475 def load_setitem(self): 1476 stack = self.stack 1477 value = stack.pop() 1478 key = stack.pop() 1479 dict = stack[-1] 1480 dict[key] = value 1481 dispatch[SETITEM[0]] = load_setitem 1482 1483 def load_setitems(self): 1484 items = self.pop_mark() 1485 dict = self.stack[-1] 1486 for i in range(0, len(items), 2): 1487 dict[items[i]] = items[i + 1] 1488 dispatch[SETITEMS[0]] = load_setitems 1489 1490 def load_additems(self): 1491 items = self.pop_mark() 1492 set_obj = self.stack[-1] 1493 if isinstance(set_obj, set): 1494 set_obj.update(items) 1495 else: 1496 add = set_obj.add 1497 for item in items: 1498 add(item) 1499 dispatch[ADDITEMS[0]] = load_additems 1500 1501 def load_build(self): 1502 stack = self.stack 1503 state = stack.pop() 1504 inst = stack[-1] 1505 setstate = getattr(inst, "__setstate__", None) 1506 if setstate is not None: 1507 setstate(state) 1508 return 1509 slotstate = None 1510 if isinstance(state, tuple) and len(state) == 2: 1511 state, slotstate = state 1512 if state: 1513 inst_dict = inst.__dict__ 1514 intern = sys.intern 1515 for k, v in state.items(): 1516 if type(k) is str: 1517 inst_dict[intern(k)] = v 1518 else: 1519 inst_dict[k] = v 1520 if slotstate: 1521 for k, v in slotstate.items(): 1522 setattr(inst, k, v) 1523 dispatch[BUILD[0]] = load_build 1524 1525 def load_mark(self): 1526 self.metastack.append(self.stack) 1527 self.stack = [] 1528 self.append = self.stack.append 1529 dispatch[MARK[0]] = load_mark 1530 1531 def load_stop(self): 1532 value = self.stack.pop() 1533 raise _Stop(value) 1534 dispatch[STOP[0]] = load_stop 1535 1536 1537 # Shorthands 1538 1539 def _dump(obj, file, protocol=None, *, fix_imports=True): 1540 _Pickler(file, protocol, fix_imports=fix_imports).dump(obj) 1541 1542 def _dumps(obj, protocol=None, *, fix_imports=True): 1543 f = io.BytesIO() 1544 _Pickler(f, protocol, fix_imports=fix_imports).dump(obj) 1545 res = f.getvalue() 1546 assert isinstance(res, bytes_types) 1547 return res 1548 1549 def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict"): 1550 return _Unpickler(file, fix_imports=fix_imports, 1551 encoding=encoding, errors=errors).load() 1552 1553 def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict"): 1554 if isinstance(s, str): 1555 raise TypeError("Can't load pickle from unicode string") 1556 file = io.BytesIO(s) 1557 return _Unpickler(file, fix_imports=fix_imports, 1558 encoding=encoding, errors=errors).load() 1559 1560 # Use the faster _pickle if possible 1561 try: 1562 from _pickle import ( 1563 PickleError, 1564 PicklingError, 1565 UnpicklingError, 1566 Pickler, 1567 Unpickler, 1568 dump, 1569 dumps, 1570 load, 1571 loads 1572 ) 1573 except ImportError: 1574 Pickler, Unpickler = _Pickler, _Unpickler 1575 dump, dumps, load, loads = _dump, _dumps, _load, _loads 1576 1577 # Doctest 1578 def _test(): 1579 import doctest 1580 return doctest.testmod() 1581 1582 if __name__ == "__main__": 1583 import argparse 1584 parser = argparse.ArgumentParser( 1585 description='display contents of the pickle files') 1586 parser.add_argument( 1587 'pickle_file', type=argparse.FileType('br'), 1588 nargs='*', help='the pickle file') 1589 parser.add_argument( 1590 '-t', '--test', action='store_true', 1591 help='run self-test suite') 1592 parser.add_argument( 1593 '-v', action='store_true', 1594 help='run verbosely; only affects self-test run') 1595 args = parser.parse_args() 1596 if args.test: 1597 _test() 1598 else: 1599 if not args.pickle_file: 1600 parser.print_help() 1601 else: 1602 import pprint 1603 for f in args.pickle_file: 1604 obj = load(f) 1605 pprint.pprint(obj)
每天更新一点点,温习一点点点,进步一点点