python模块:xml.etree.ElementTree
1 """Lightweight XML support for Python. 2 3 XML is an inherently hierarchical data format, and the most natural way to 4 represent it is with a tree. This module has two classes for this purpose: 5 6 1. ElementTree represents the whole XML document as a tree and 7 8 2. Element represents a single node in this tree. 9 10 Interactions with the whole document (reading and writing to/from files) are 11 usually done on the ElementTree level. Interactions with a single XML element 12 and its sub-elements are done on the Element level. 13 14 Element is a flexible container object designed to store hierarchical data 15 structures in memory. It can be described as a cross between a list and a 16 dictionary. Each Element has a number of properties associated with it: 17 18 'tag' - a string containing the element's name. 19 20 'attributes' - a Python dictionary storing the element's attributes. 21 22 'text' - a string containing the element's text content. 23 24 'tail' - an optional string containing text after the element's end tag. 25 26 And a number of child elements stored in a Python sequence. 27 28 To create an element instance, use the Element constructor, 29 or the SubElement factory function. 30 31 You can also use the ElementTree class to wrap an element structure 32 and convert it to and from XML. 33 34 """ 35 36 #--------------------------------------------------------------------- 37 # Licensed to PSF under a Contributor Agreement. 38 # See http://www.python.org/psf/license for licensing details. 39 # 40 # ElementTree 41 # Copyright (c) 1999-2008 by Fredrik Lundh. All rights reserved. 42 # 43 # fredrik@pythonware.com 44 # http://www.pythonware.com 45 # -------------------------------------------------------------------- 46 # The ElementTree toolkit is 47 # 48 # Copyright (c) 1999-2008 by Fredrik Lundh 49 # 50 # By obtaining, using, and/or copying this software and/or its 51 # associated documentation, you agree that you have read, understood, 52 # and will comply with the following terms and conditions: 53 # 54 # Permission to use, copy, modify, and distribute this software and 55 # its associated documentation for any purpose and without fee is 56 # hereby granted, provided that the above copyright notice appears in 57 # all copies, and that both that copyright notice and this permission 58 # notice appear in supporting documentation, and that the name of 59 # Secret Labs AB or the author not be used in advertising or publicity 60 # pertaining to distribution of the software without specific, written 61 # prior permission. 62 # 63 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD 64 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- 65 # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR 66 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY 67 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, 68 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS 69 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE 70 # OF THIS SOFTWARE. 71 # -------------------------------------------------------------------- 72 73 __all__ = [ 74 # public symbols 75 "Comment", 76 "dump", 77 "Element", "ElementTree", 78 "fromstring", "fromstringlist", 79 "iselement", "iterparse", 80 "parse", "ParseError", 81 "PI", "ProcessingInstruction", 82 "QName", 83 "SubElement", 84 "tostring", "tostringlist", 85 "TreeBuilder", 86 "VERSION", 87 "XML", "XMLID", 88 "XMLParser", "XMLPullParser", 89 "register_namespace", 90 ] 91 92 VERSION = "1.3.0" 93 94 import sys 95 import re 96 import warnings 97 import io 98 import collections 99 import contextlib 100 101 from . import ElementPath 102 103 104 class ParseError(SyntaxError): 105 """An error when parsing an XML document. 106 107 In addition to its exception value, a ParseError contains 108 two extra attributes: 109 'code' - the specific exception code 110 'position' - the line and column of the error 111 112 """ 113 pass 114 115 # -------------------------------------------------------------------- 116 117 118 def iselement(element): 119 """Return True if *element* appears to be an Element.""" 120 return hasattr(element, 'tag') 121 122 123 class Element: 124 """An XML element. 125 126 This class is the reference implementation of the Element interface. 127 128 An element's length is its number of subelements. That means if you 129 want to check if an element is truly empty, you should check BOTH 130 its length AND its text attribute. 131 132 The element tag, attribute names, and attribute values can be either 133 bytes or strings. 134 135 *tag* is the element name. *attrib* is an optional dictionary containing 136 element attributes. *extra* are additional element attributes given as 137 keyword arguments. 138 139 Example form: 140 <tag attrib>text<child/>...</tag>tail 141 142 """ 143 144 tag = None 145 """The element's name.""" 146 147 attrib = None 148 """Dictionary of the element's attributes.""" 149 150 text = None 151 """ 152 Text before first subelement. This is either a string or the value None. 153 Note that if there is no text, this attribute may be either 154 None or the empty string, depending on the parser. 155 156 """ 157 158 tail = None 159 """ 160 Text after this element's end tag, but before the next sibling element's 161 start tag. This is either a string or the value None. Note that if there 162 was no text, this attribute may be either None or an empty string, 163 depending on the parser. 164 165 """ 166 167 def __init__(self, tag, attrib={}, **extra): 168 if not isinstance(attrib, dict): 169 raise TypeError("attrib must be dict, not %s" % ( 170 attrib.__class__.__name__,)) 171 attrib = attrib.copy() 172 attrib.update(extra) 173 self.tag = tag 174 self.attrib = attrib 175 self._children = [] 176 177 def __repr__(self): 178 return "<%s %r at %#x>" % (self.__class__.__name__, self.tag, id(self)) 179 180 def makeelement(self, tag, attrib): 181 """Create a new element with the same type. 182 183 *tag* is a string containing the element name. 184 *attrib* is a dictionary containing the element attributes. 185 186 Do not call this method, use the SubElement factory function instead. 187 188 """ 189 return self.__class__(tag, attrib) 190 191 def copy(self): 192 """Return copy of current element. 193 194 This creates a shallow copy. Subelements will be shared with the 195 original tree. 196 197 """ 198 elem = self.makeelement(self.tag, self.attrib) 199 elem.text = self.text 200 elem.tail = self.tail 201 elem[:] = self 202 return elem 203 204 def __len__(self): 205 return len(self._children) 206 207 def __bool__(self): 208 warnings.warn( 209 "The behavior of this method will change in future versions. " 210 "Use specific 'len(elem)' or 'elem is not None' test instead.", 211 FutureWarning, stacklevel=2 212 ) 213 return len(self._children) != 0 # emulate old behaviour, for now 214 215 def __getitem__(self, index): 216 return self._children[index] 217 218 def __setitem__(self, index, element): 219 # if isinstance(index, slice): 220 # for elt in element: 221 # assert iselement(elt) 222 # else: 223 # assert iselement(element) 224 self._children[index] = element 225 226 def __delitem__(self, index): 227 del self._children[index] 228 229 def append(self, subelement): 230 """Add *subelement* to the end of this element. 231 232 The new element will appear in document order after the last existing 233 subelement (or directly after the text, if it's the first subelement), 234 but before the end tag for this element. 235 236 """ 237 self._assert_is_element(subelement) 238 self._children.append(subelement) 239 240 def extend(self, elements): 241 """Append subelements from a sequence. 242 243 *elements* is a sequence with zero or more elements. 244 245 """ 246 for element in elements: 247 self._assert_is_element(element) 248 self._children.extend(elements) 249 250 def insert(self, index, subelement): 251 """Insert *subelement* at position *index*.""" 252 self._assert_is_element(subelement) 253 self._children.insert(index, subelement) 254 255 def _assert_is_element(self, e): 256 # Need to refer to the actual Python implementation, not the 257 # shadowing C implementation. 258 if not isinstance(e, _Element_Py): 259 raise TypeError('expected an Element, not %s' % type(e).__name__) 260 261 def remove(self, subelement): 262 """Remove matching subelement. 263 264 Unlike the find methods, this method compares elements based on 265 identity, NOT ON tag value or contents. To remove subelements by 266 other means, the easiest way is to use a list comprehension to 267 select what elements to keep, and then use slice assignment to update 268 the parent element. 269 270 ValueError is raised if a matching element could not be found. 271 272 """ 273 # assert iselement(element) 274 self._children.remove(subelement) 275 276 def getchildren(self): 277 """(Deprecated) Return all subelements. 278 279 Elements are returned in document order. 280 281 """ 282 warnings.warn( 283 "This method will be removed in future versions. " 284 "Use 'list(elem)' or iteration over elem instead.", 285 DeprecationWarning, stacklevel=2 286 ) 287 return self._children 288 289 def find(self, path, namespaces=None): 290 """Find first matching element by tag name or path. 291 292 *path* is a string having either an element tag or an XPath, 293 *namespaces* is an optional mapping from namespace prefix to full name. 294 295 Return the first matching element, or None if no element was found. 296 297 """ 298 return ElementPath.find(self, path, namespaces) 299 300 def findtext(self, path, default=None, namespaces=None): 301 """Find text for first matching element by tag name or path. 302 303 *path* is a string having either an element tag or an XPath, 304 *default* is the value to return if the element was not found, 305 *namespaces* is an optional mapping from namespace prefix to full name. 306 307 Return text content of first matching element, or default value if 308 none was found. Note that if an element is found having no text 309 content, the empty string is returned. 310 311 """ 312 return ElementPath.findtext(self, path, default, namespaces) 313 314 def findall(self, path, namespaces=None): 315 """Find all matching subelements by tag name or path. 316 317 *path* is a string having either an element tag or an XPath, 318 *namespaces* is an optional mapping from namespace prefix to full name. 319 320 Returns list containing all matching elements in document order. 321 322 """ 323 return ElementPath.findall(self, path, namespaces) 324 325 def iterfind(self, path, namespaces=None): 326 """Find all matching subelements by tag name or path. 327 328 *path* is a string having either an element tag or an XPath, 329 *namespaces* is an optional mapping from namespace prefix to full name. 330 331 Return an iterable yielding all matching elements in document order. 332 333 """ 334 return ElementPath.iterfind(self, path, namespaces) 335 336 def clear(self): 337 """Reset element. 338 339 This function removes all subelements, clears all attributes, and sets 340 the text and tail attributes to None. 341 342 """ 343 self.attrib.clear() 344 self._children = [] 345 self.text = self.tail = None 346 347 def get(self, key, default=None): 348 """Get element attribute. 349 350 Equivalent to attrib.get, but some implementations may handle this a 351 bit more efficiently. *key* is what attribute to look for, and 352 *default* is what to return if the attribute was not found. 353 354 Returns a string containing the attribute value, or the default if 355 attribute was not found. 356 357 """ 358 return self.attrib.get(key, default) 359 360 def set(self, key, value): 361 """Set element attribute. 362 363 Equivalent to attrib[key] = value, but some implementations may handle 364 this a bit more efficiently. *key* is what attribute to set, and 365 *value* is the attribute value to set it to. 366 367 """ 368 self.attrib[key] = value 369 370 def keys(self): 371 """Get list of attribute names. 372 373 Names are returned in an arbitrary order, just like an ordinary 374 Python dict. Equivalent to attrib.keys() 375 376 """ 377 return self.attrib.keys() 378 379 def items(self): 380 """Get element attributes as a sequence. 381 382 The attributes are returned in arbitrary order. Equivalent to 383 attrib.items(). 384 385 Return a list of (name, value) tuples. 386 387 """ 388 return self.attrib.items() 389 390 def iter(self, tag=None): 391 """Create tree iterator. 392 393 The iterator loops over the element and all subelements in document 394 order, returning all elements with a matching tag. 395 396 If the tree structure is modified during iteration, new or removed 397 elements may or may not be included. To get a stable set, use the 398 list() function on the iterator, and loop over the resulting list. 399 400 *tag* is what tags to look for (default is to return all elements) 401 402 Return an iterator containing all the matching elements. 403 404 """ 405 if tag == "*": 406 tag = None 407 if tag is None or self.tag == tag: 408 yield self 409 for e in self._children: 410 yield from e.iter(tag) 411 412 # compatibility 413 def getiterator(self, tag=None): 414 # Change for a DeprecationWarning in 1.4 415 warnings.warn( 416 "This method will be removed in future versions. " 417 "Use 'elem.iter()' or 'list(elem.iter())' instead.", 418 PendingDeprecationWarning, stacklevel=2 419 ) 420 return list(self.iter(tag)) 421 422 def itertext(self): 423 """Create text iterator. 424 425 The iterator loops over the element and all subelements in document 426 order, returning all inner text. 427 428 """ 429 tag = self.tag 430 if not isinstance(tag, str) and tag is not None: 431 return 432 t = self.text 433 if t: 434 yield t 435 for e in self: 436 yield from e.itertext() 437 t = e.tail 438 if t: 439 yield t 440 441 442 def SubElement(parent, tag, attrib={}, **extra): 443 """Subelement factory which creates an element instance, and appends it 444 to an existing parent. 445 446 The element tag, attribute names, and attribute values can be either 447 bytes or Unicode strings. 448 449 *parent* is the parent element, *tag* is the subelements name, *attrib* is 450 an optional directory containing element attributes, *extra* are 451 additional attributes given as keyword arguments. 452 453 """ 454 attrib = attrib.copy() 455 attrib.update(extra) 456 element = parent.makeelement(tag, attrib) 457 parent.append(element) 458 return element 459 460 461 def Comment(text=None): 462 """Comment element factory. 463 464 This function creates a special element which the standard serializer 465 serializes as an XML comment. 466 467 *text* is a string containing the comment string. 468 469 """ 470 element = Element(Comment) 471 element.text = text 472 return element 473 474 475 def ProcessingInstruction(target, text=None): 476 """Processing Instruction element factory. 477 478 This function creates a special element which the standard serializer 479 serializes as an XML comment. 480 481 *target* is a string containing the processing instruction, *text* is a 482 string containing the processing instruction contents, if any. 483 484 """ 485 element = Element(ProcessingInstruction) 486 element.text = target 487 if text: 488 element.text = element.text + " " + text 489 return element 490 491 PI = ProcessingInstruction 492 493 494 class QName: 495 """Qualified name wrapper. 496 497 This class can be used to wrap a QName attribute value in order to get 498 proper namespace handing on output. 499 500 *text_or_uri* is a string containing the QName value either in the form 501 {uri}local, or if the tag argument is given, the URI part of a QName. 502 503 *tag* is an optional argument which if given, will make the first 504 argument (text_or_uri) be interpreted as a URI, and this argument (tag) 505 be interpreted as a local name. 506 507 """ 508 def __init__(self, text_or_uri, tag=None): 509 if tag: 510 text_or_uri = "{%s}%s" % (text_or_uri, tag) 511 self.text = text_or_uri 512 def __str__(self): 513 return self.text 514 def __repr__(self): 515 return '<%s %r>' % (self.__class__.__name__, self.text) 516 def __hash__(self): 517 return hash(self.text) 518 def __le__(self, other): 519 if isinstance(other, QName): 520 return self.text <= other.text 521 return self.text <= other 522 def __lt__(self, other): 523 if isinstance(other, QName): 524 return self.text < other.text 525 return self.text < other 526 def __ge__(self, other): 527 if isinstance(other, QName): 528 return self.text >= other.text 529 return self.text >= other 530 def __gt__(self, other): 531 if isinstance(other, QName): 532 return self.text > other.text 533 return self.text > other 534 def __eq__(self, other): 535 if isinstance(other, QName): 536 return self.text == other.text 537 return self.text == other 538 539 # -------------------------------------------------------------------- 540 541 542 class ElementTree: 543 """An XML element hierarchy. 544 545 This class also provides support for serialization to and from 546 standard XML. 547 548 *element* is an optional root element node, 549 *file* is an optional file handle or file name of an XML file whose 550 contents will be used to initialize the tree with. 551 552 """ 553 def __init__(self, element=None, file=None): 554 # assert element is None or iselement(element) 555 self._root = element # first node 556 if file: 557 self.parse(file) 558 559 def getroot(self): 560 """Return root element of this tree.""" 561 return self._root 562 563 def _setroot(self, element): 564 """Replace root element of this tree. 565 566 This will discard the current contents of the tree and replace it 567 with the given element. Use with care! 568 569 """ 570 # assert iselement(element) 571 self._root = element 572 573 def parse(self, source, parser=None): 574 """Load external XML document into element tree. 575 576 *source* is a file name or file object, *parser* is an optional parser 577 instance that defaults to XMLParser. 578 579 ParseError is raised if the parser fails to parse the document. 580 581 Returns the root element of the given source document. 582 583 """ 584 close_source = False 585 if not hasattr(source, "read"): 586 source = open(source, "rb") 587 close_source = True 588 try: 589 if parser is None: 590 # If no parser was specified, create a default XMLParser 591 parser = XMLParser() 592 if hasattr(parser, '_parse_whole'): 593 # The default XMLParser, when it comes from an accelerator, 594 # can define an internal _parse_whole API for efficiency. 595 # It can be used to parse the whole source without feeding 596 # it with chunks. 597 self._root = parser._parse_whole(source) 598 return self._root 599 while True: 600 data = source.read(65536) 601 if not data: 602 break 603 parser.feed(data) 604 self._root = parser.close() 605 return self._root 606 finally: 607 if close_source: 608 source.close() 609 610 def iter(self, tag=None): 611 """Create and return tree iterator for the root element. 612 613 The iterator loops over all elements in this tree, in document order. 614 615 *tag* is a string with the tag name to iterate over 616 (default is to return all elements). 617 618 """ 619 # assert self._root is not None 620 return self._root.iter(tag) 621 622 # compatibility 623 def getiterator(self, tag=None): 624 # Change for a DeprecationWarning in 1.4 625 warnings.warn( 626 "This method will be removed in future versions. " 627 "Use 'tree.iter()' or 'list(tree.iter())' instead.", 628 PendingDeprecationWarning, stacklevel=2 629 ) 630 return list(self.iter(tag)) 631 632 def find(self, path, namespaces=None): 633 """Find first matching element by tag name or path. 634 635 Same as getroot().find(path), which is Element.find() 636 637 *path* is a string having either an element tag or an XPath, 638 *namespaces* is an optional mapping from namespace prefix to full name. 639 640 Return the first matching element, or None if no element was found. 641 642 """ 643 # assert self._root is not None 644 if path[:1] == "/": 645 path = "." + path 646 warnings.warn( 647 "This search is broken in 1.3 and earlier, and will be " 648 "fixed in a future version. If you rely on the current " 649 "behaviour, change it to %r" % path, 650 FutureWarning, stacklevel=2 651 ) 652 return self._root.find(path, namespaces) 653 654 def findtext(self, path, default=None, namespaces=None): 655 """Find first matching element by tag name or path. 656 657 Same as getroot().findtext(path), which is Element.findtext() 658 659 *path* is a string having either an element tag or an XPath, 660 *namespaces* is an optional mapping from namespace prefix to full name. 661 662 Return the first matching element, or None if no element was found. 663 664 """ 665 # assert self._root is not None 666 if path[:1] == "/": 667 path = "." + path 668 warnings.warn( 669 "This search is broken in 1.3 and earlier, and will be " 670 "fixed in a future version. If you rely on the current " 671 "behaviour, change it to %r" % path, 672 FutureWarning, stacklevel=2 673 ) 674 return self._root.findtext(path, default, namespaces) 675 676 def findall(self, path, namespaces=None): 677 """Find all matching subelements by tag name or path. 678 679 Same as getroot().findall(path), which is Element.findall(). 680 681 *path* is a string having either an element tag or an XPath, 682 *namespaces* is an optional mapping from namespace prefix to full name. 683 684 Return list containing all matching elements in document order. 685 686 """ 687 # assert self._root is not None 688 if path[:1] == "/": 689 path = "." + path 690 warnings.warn( 691 "This search is broken in 1.3 and earlier, and will be " 692 "fixed in a future version. If you rely on the current " 693 "behaviour, change it to %r" % path, 694 FutureWarning, stacklevel=2 695 ) 696 return self._root.findall(path, namespaces) 697 698 def iterfind(self, path, namespaces=None): 699 """Find all matching subelements by tag name or path. 700 701 Same as getroot().iterfind(path), which is element.iterfind() 702 703 *path* is a string having either an element tag or an XPath, 704 *namespaces* is an optional mapping from namespace prefix to full name. 705 706 Return an iterable yielding all matching elements in document order. 707 708 """ 709 # assert self._root is not None 710 if path[:1] == "/": 711 path = "." + path 712 warnings.warn( 713 "This search is broken in 1.3 and earlier, and will be " 714 "fixed in a future version. If you rely on the current " 715 "behaviour, change it to %r" % path, 716 FutureWarning, stacklevel=2 717 ) 718 return self._root.iterfind(path, namespaces) 719 720 def write(self, file_or_filename, 721 encoding=None, 722 xml_declaration=None, 723 default_namespace=None, 724 method=None, *, 725 short_empty_elements=True): 726 """Write element tree to a file as XML. 727 728 Arguments: 729 *file_or_filename* -- file name or a file object opened for writing 730 731 *encoding* -- the output encoding (default: US-ASCII) 732 733 *xml_declaration* -- bool indicating if an XML declaration should be 734 added to the output. If None, an XML declaration 735 is added if encoding IS NOT either of: 736 US-ASCII, UTF-8, or Unicode 737 738 *default_namespace* -- sets the default XML namespace (for "xmlns") 739 740 *method* -- either "xml" (default), "html, "text", or "c14n" 741 742 *short_empty_elements* -- controls the formatting of elements 743 that contain no content. If True (default) 744 they are emitted as a single self-closed 745 tag, otherwise they are emitted as a pair 746 of start/end tags 747 748 """ 749 if not method: 750 method = "xml" 751 elif method not in _serialize: 752 raise ValueError("unknown method %r" % method) 753 if not encoding: 754 if method == "c14n": 755 encoding = "utf-8" 756 else: 757 encoding = "us-ascii" 758 enc_lower = encoding.lower() 759 with _get_writer(file_or_filename, enc_lower) as write: 760 if method == "xml" and (xml_declaration or 761 (xml_declaration is None and 762 enc_lower not in ("utf-8", "us-ascii", "unicode"))): 763 declared_encoding = encoding 764 if enc_lower == "unicode": 765 # Retrieve the default encoding for the xml declaration 766 import locale 767 declared_encoding = locale.getpreferredencoding() 768 write("<?xml version='1.0' encoding='%s'?>\n" % ( 769 declared_encoding,)) 770 if method == "text": 771 _serialize_text(write, self._root) 772 else: 773 qnames, namespaces = _namespaces(self._root, default_namespace) 774 serialize = _serialize[method] 775 serialize(write, self._root, qnames, namespaces, 776 short_empty_elements=short_empty_elements) 777 778 def write_c14n(self, file): 779 # lxml.etree compatibility. use output method instead 780 return self.write(file, method="c14n") 781 782 # -------------------------------------------------------------------- 783 # serialization support 784 785 @contextlib.contextmanager 786 def _get_writer(file_or_filename, encoding): 787 # returns text write method and release all resources after using 788 try: 789 write = file_or_filename.write 790 except AttributeError: 791 # file_or_filename is a file name 792 if encoding == "unicode": 793 file = open(file_or_filename, "w") 794 else: 795 file = open(file_or_filename, "w", encoding=encoding, 796 errors="xmlcharrefreplace") 797 with file: 798 yield file.write 799 else: 800 # file_or_filename is a file-like object 801 # encoding determines if it is a text or binary writer 802 if encoding == "unicode": 803 # use a text writer as is 804 yield write 805 else: 806 # wrap a binary writer with TextIOWrapper 807 with contextlib.ExitStack() as stack: 808 if isinstance(file_or_filename, io.BufferedIOBase): 809 file = file_or_filename 810 elif isinstance(file_or_filename, io.RawIOBase): 811 file = io.BufferedWriter(file_or_filename) 812 # Keep the original file open when the BufferedWriter is 813 # destroyed 814 stack.callback(file.detach) 815 else: 816 # This is to handle passed objects that aren't in the 817 # IOBase hierarchy, but just have a write method 818 file = io.BufferedIOBase() 819 file.writable = lambda: True 820 file.write = write 821 try: 822 # TextIOWrapper uses this methods to determine 823 # if BOM (for UTF-16, etc) should be added 824 file.seekable = file_or_filename.seekable 825 file.tell = file_or_filename.tell 826 except AttributeError: 827 pass 828 file = io.TextIOWrapper(file, 829 encoding=encoding, 830 errors="xmlcharrefreplace", 831 newline="\n") 832 # Keep the original file open when the TextIOWrapper is 833 # destroyed 834 stack.callback(file.detach) 835 yield file.write 836 837 def _namespaces(elem, default_namespace=None): 838 # identify namespaces used in this tree 839 840 # maps qnames to *encoded* prefix:local names 841 qnames = {None: None} 842 843 # maps uri:s to prefixes 844 namespaces = {} 845 if default_namespace: 846 namespaces[default_namespace] = "" 847 848 def add_qname(qname): 849 # calculate serialized qname representation 850 try: 851 if qname[:1] == "{": 852 uri, tag = qname[1:].rsplit("}", 1) 853 prefix = namespaces.get(uri) 854 if prefix is None: 855 prefix = _namespace_map.get(uri) 856 if prefix is None: 857 prefix = "ns%d" % len(namespaces) 858 if prefix != "xml": 859 namespaces[uri] = prefix 860 if prefix: 861 qnames[qname] = "%s:%s" % (prefix, tag) 862 else: 863 qnames[qname] = tag # default element 864 else: 865 if default_namespace: 866 # FIXME: can this be handled in XML 1.0? 867 raise ValueError( 868 "cannot use non-qualified names with " 869 "default_namespace option" 870 ) 871 qnames[qname] = qname 872 except TypeError: 873 _raise_serialization_error(qname) 874 875 # populate qname and namespaces table 876 for elem in elem.iter(): 877 tag = elem.tag 878 if isinstance(tag, QName): 879 if tag.text not in qnames: 880 add_qname(tag.text) 881 elif isinstance(tag, str): 882 if tag not in qnames: 883 add_qname(tag) 884 elif tag is not None and tag is not Comment and tag is not PI: 885 _raise_serialization_error(tag) 886 for key, value in elem.items(): 887 if isinstance(key, QName): 888 key = key.text 889 if key not in qnames: 890 add_qname(key) 891 if isinstance(value, QName) and value.text not in qnames: 892 add_qname(value.text) 893 text = elem.text 894 if isinstance(text, QName) and text.text not in qnames: 895 add_qname(text.text) 896 return qnames, namespaces 897 898 def _serialize_xml(write, elem, qnames, namespaces, 899 short_empty_elements, **kwargs): 900 tag = elem.tag 901 text = elem.text 902 if tag is Comment: 903 write("<!--%s-->" % text) 904 elif tag is ProcessingInstruction: 905 write("<?%s?>" % text) 906 else: 907 tag = qnames[tag] 908 if tag is None: 909 if text: 910 write(_escape_cdata(text)) 911 for e in elem: 912 _serialize_xml(write, e, qnames, None, 913 short_empty_elements=short_empty_elements) 914 else: 915 write("<" + tag) 916 items = list(elem.items()) 917 if items or namespaces: 918 if namespaces: 919 for v, k in sorted(namespaces.items(), 920 key=lambda x: x[1]): # sort on prefix 921 if k: 922 k = ":" + k 923 write(" xmlns%s=\"%s\"" % ( 924 k, 925 _escape_attrib(v) 926 )) 927 for k, v in sorted(items): # lexical order 928 if isinstance(k, QName): 929 k = k.text 930 if isinstance(v, QName): 931 v = qnames[v.text] 932 else: 933 v = _escape_attrib(v) 934 write(" %s=\"%s\"" % (qnames[k], v)) 935 if text or len(elem) or not short_empty_elements: 936 write(">") 937 if text: 938 write(_escape_cdata(text)) 939 for e in elem: 940 _serialize_xml(write, e, qnames, None, 941 short_empty_elements=short_empty_elements) 942 write("</" + tag + ">") 943 else: 944 write(" />") 945 if elem.tail: 946 write(_escape_cdata(elem.tail)) 947 948 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr", 949 "img", "input", "isindex", "link", "meta", "param") 950 951 try: 952 HTML_EMPTY = set(HTML_EMPTY) 953 except NameError: 954 pass 955 956 def _serialize_html(write, elem, qnames, namespaces, **kwargs): 957 tag = elem.tag 958 text = elem.text 959 if tag is Comment: 960 write("<!--%s-->" % _escape_cdata(text)) 961 elif tag is ProcessingInstruction: 962 write("<?%s?>" % _escape_cdata(text)) 963 else: 964 tag = qnames[tag] 965 if tag is None: 966 if text: 967 write(_escape_cdata(text)) 968 for e in elem: 969 _serialize_html(write, e, qnames, None) 970 else: 971 write("<" + tag) 972 items = list(elem.items()) 973 if items or namespaces: 974 if namespaces: 975 for v, k in sorted(namespaces.items(), 976 key=lambda x: x[1]): # sort on prefix 977 if k: 978 k = ":" + k 979 write(" xmlns%s=\"%s\"" % ( 980 k, 981 _escape_attrib(v) 982 )) 983 for k, v in sorted(items): # lexical order 984 if isinstance(k, QName): 985 k = k.text 986 if isinstance(v, QName): 987 v = qnames[v.text] 988 else: 989 v = _escape_attrib_html(v) 990 # FIXME: handle boolean attributes 991 write(" %s=\"%s\"" % (qnames[k], v)) 992 write(">") 993 ltag = tag.lower() 994 if text: 995 if ltag == "script" or ltag == "style": 996 write(text) 997 else: 998 write(_escape_cdata(text)) 999 for e in elem: 1000 _serialize_html(write, e, qnames, None) 1001 if ltag not in HTML_EMPTY: 1002 write("</" + tag + ">") 1003 if elem.tail: 1004 write(_escape_cdata(elem.tail)) 1005 1006 def _serialize_text(write, elem): 1007 for part in elem.itertext(): 1008 write(part) 1009 if elem.tail: 1010 write(elem.tail) 1011 1012 _serialize = { 1013 "xml": _serialize_xml, 1014 "html": _serialize_html, 1015 "text": _serialize_text, 1016 # this optional method is imported at the end of the module 1017 # "c14n": _serialize_c14n, 1018 } 1019 1020 1021 def register_namespace(prefix, uri): 1022 """Register a namespace prefix. 1023 1024 The registry is global, and any existing mapping for either the 1025 given prefix or the namespace URI will be removed. 1026 1027 *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and 1028 attributes in this namespace will be serialized with prefix if possible. 1029 1030 ValueError is raised if prefix is reserved or is invalid. 1031 1032 """ 1033 if re.match(r"ns\d+$", prefix): 1034 raise ValueError("Prefix format reserved for internal use") 1035 for k, v in list(_namespace_map.items()): 1036 if k == uri or v == prefix: 1037 del _namespace_map[k] 1038 _namespace_map[uri] = prefix 1039 1040 _namespace_map = { 1041 # "well-known" namespace prefixes 1042 "http://www.w3.org/XML/1998/namespace": "xml", 1043 "http://www.w3.org/1999/xhtml": "html", 1044 "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", 1045 "http://schemas.xmlsoap.org/wsdl/": "wsdl", 1046 # xml schema 1047 "http://www.w3.org/2001/XMLSchema": "xs", 1048 "http://www.w3.org/2001/XMLSchema-instance": "xsi", 1049 # dublin core 1050 "http://purl.org/dc/elements/1.1/": "dc", 1051 } 1052 # For tests and troubleshooting 1053 register_namespace._namespace_map = _namespace_map 1054 1055 def _raise_serialization_error(text): 1056 raise TypeError( 1057 "cannot serialize %r (type %s)" % (text, type(text).__name__) 1058 ) 1059 1060 def _escape_cdata(text): 1061 # escape character data 1062 try: 1063 # it's worth avoiding do-nothing calls for strings that are 1064 # shorter than 500 character, or so. assume that's, by far, 1065 # the most common case in most applications. 1066 if "&" in text: 1067 text = text.replace("&", "&") 1068 if "<" in text: 1069 text = text.replace("<", "<") 1070 if ">" in text: 1071 text = text.replace(">", ">") 1072 return text 1073 except (TypeError, AttributeError): 1074 _raise_serialization_error(text) 1075 1076 def _escape_attrib(text): 1077 # escape attribute value 1078 try: 1079 if "&" in text: 1080 text = text.replace("&", "&") 1081 if "<" in text: 1082 text = text.replace("<", "<") 1083 if ">" in text: 1084 text = text.replace(">", ">") 1085 if "\"" in text: 1086 text = text.replace("\"", """) 1087 # The following business with carriage returns is to satisfy 1088 # Section 2.11 of the XML specification, stating that 1089 # CR or CR LN should be replaced with just LN 1090 # http://www.w3.org/TR/REC-xml/#sec-line-ends 1091 if "\r\n" in text: 1092 text = text.replace("\r\n", "\n") 1093 if "\r" in text: 1094 text = text.replace("\r", "\n") 1095 #The following four lines are issue 17582 1096 if "\n" in text: 1097 text = text.replace("\n", " ") 1098 if "\t" in text: 1099 text = text.replace("\t", "	") 1100 return text 1101 except (TypeError, AttributeError): 1102 _raise_serialization_error(text) 1103 1104 def _escape_attrib_html(text): 1105 # escape attribute value 1106 try: 1107 if "&" in text: 1108 text = text.replace("&", "&") 1109 if ">" in text: 1110 text = text.replace(">", ">") 1111 if "\"" in text: 1112 text = text.replace("\"", """) 1113 return text 1114 except (TypeError, AttributeError): 1115 _raise_serialization_error(text) 1116 1117 # -------------------------------------------------------------------- 1118 1119 def tostring(element, encoding=None, method=None, *, 1120 short_empty_elements=True): 1121 """Generate string representation of XML element. 1122 1123 All subelements are included. If encoding is "unicode", a string 1124 is returned. Otherwise a bytestring is returned. 1125 1126 *element* is an Element instance, *encoding* is an optional output 1127 encoding defaulting to US-ASCII, *method* is an optional output which can 1128 be one of "xml" (default), "html", "text" or "c14n". 1129 1130 Returns an (optionally) encoded string containing the XML data. 1131 1132 """ 1133 stream = io.StringIO() if encoding == 'unicode' else io.BytesIO() 1134 ElementTree(element).write(stream, encoding, method=method, 1135 short_empty_elements=short_empty_elements) 1136 return stream.getvalue() 1137 1138 class _ListDataStream(io.BufferedIOBase): 1139 """An auxiliary stream accumulating into a list reference.""" 1140 def __init__(self, lst): 1141 self.lst = lst 1142 1143 def writable(self): 1144 return True 1145 1146 def seekable(self): 1147 return True 1148 1149 def write(self, b): 1150 self.lst.append(b) 1151 1152 def tell(self): 1153 return len(self.lst) 1154 1155 def tostringlist(element, encoding=None, method=None, *, 1156 short_empty_elements=True): 1157 lst = [] 1158 stream = _ListDataStream(lst) 1159 ElementTree(element).write(stream, encoding, method=method, 1160 short_empty_elements=short_empty_elements) 1161 return lst 1162 1163 1164 def dump(elem): 1165 """Write element tree or element structure to sys.stdout. 1166 1167 This function should be used for debugging only. 1168 1169 *elem* is either an ElementTree, or a single Element. The exact output 1170 format is implementation dependent. In this version, it's written as an 1171 ordinary XML file. 1172 1173 """ 1174 # debugging 1175 if not isinstance(elem, ElementTree): 1176 elem = ElementTree(elem) 1177 elem.write(sys.stdout, encoding="unicode") 1178 tail = elem.getroot().tail 1179 if not tail or tail[-1] != "\n": 1180 sys.stdout.write("\n") 1181 1182 # -------------------------------------------------------------------- 1183 # parsing 1184 1185 1186 def parse(source, parser=None): 1187 """Parse XML document into element tree. 1188 1189 *source* is a filename or file object containing XML data, 1190 *parser* is an optional parser instance defaulting to XMLParser. 1191 1192 Return an ElementTree instance. 1193 1194 """ 1195 tree = ElementTree() 1196 tree.parse(source, parser) 1197 return tree 1198 1199 1200 def iterparse(source, events=None, parser=None): 1201 """Incrementally parse XML document into ElementTree. 1202 1203 This class also reports what's going on to the user based on the 1204 *events* it is initialized with. The supported events are the strings 1205 "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get 1206 detailed namespace information). If *events* is omitted, only 1207 "end" events are reported. 1208 1209 *source* is a filename or file object containing XML data, *events* is 1210 a list of events to report back, *parser* is an optional parser instance. 1211 1212 Returns an iterator providing (event, elem) pairs. 1213 1214 """ 1215 # Use the internal, undocumented _parser argument for now; When the 1216 # parser argument of iterparse is removed, this can be killed. 1217 pullparser = XMLPullParser(events=events, _parser=parser) 1218 def iterator(): 1219 try: 1220 while True: 1221 yield from pullparser.read_events() 1222 # load event buffer 1223 data = source.read(16 * 1024) 1224 if not data: 1225 break 1226 pullparser.feed(data) 1227 root = pullparser._close_and_return_root() 1228 yield from pullparser.read_events() 1229 it.root = root 1230 finally: 1231 if close_source: 1232 source.close() 1233 1234 class IterParseIterator(collections.Iterator): 1235 __next__ = iterator().__next__ 1236 it = IterParseIterator() 1237 it.root = None 1238 del iterator, IterParseIterator 1239 1240 close_source = False 1241 if not hasattr(source, "read"): 1242 source = open(source, "rb") 1243 close_source = True 1244 1245 return it 1246 1247 1248 class XMLPullParser: 1249 1250 def __init__(self, events=None, *, _parser=None): 1251 # The _parser argument is for internal use only and must not be relied 1252 # upon in user code. It will be removed in a future release. 1253 # See http://bugs.python.org/issue17741 for more details. 1254 1255 self._events_queue = collections.deque() 1256 self._parser = _parser or XMLParser(target=TreeBuilder()) 1257 # wire up the parser for event reporting 1258 if events is None: 1259 events = ("end",) 1260 self._parser._setevents(self._events_queue, events) 1261 1262 def feed(self, data): 1263 """Feed encoded data to parser.""" 1264 if self._parser is None: 1265 raise ValueError("feed() called after end of stream") 1266 if data: 1267 try: 1268 self._parser.feed(data) 1269 except SyntaxError as exc: 1270 self._events_queue.append(exc) 1271 1272 def _close_and_return_root(self): 1273 # iterparse needs this to set its root attribute properly :( 1274 root = self._parser.close() 1275 self._parser = None 1276 return root 1277 1278 def close(self): 1279 """Finish feeding data to parser. 1280 1281 Unlike XMLParser, does not return the root element. Use 1282 read_events() to consume elements from XMLPullParser. 1283 """ 1284 self._close_and_return_root() 1285 1286 def read_events(self): 1287 """Return an iterator over currently available (event, elem) pairs. 1288 1289 Events are consumed from the internal event queue as they are 1290 retrieved from the iterator. 1291 """ 1292 events = self._events_queue 1293 while events: 1294 event = events.popleft() 1295 if isinstance(event, Exception): 1296 raise event 1297 else: 1298 yield event 1299 1300 1301 def XML(text, parser=None): 1302 """Parse XML document from string constant. 1303 1304 This function can be used to embed "XML Literals" in Python code. 1305 1306 *text* is a string containing XML data, *parser* is an 1307 optional parser instance, defaulting to the standard XMLParser. 1308 1309 Returns an Element instance. 1310 1311 """ 1312 if not parser: 1313 parser = XMLParser(target=TreeBuilder()) 1314 parser.feed(text) 1315 return parser.close() 1316 1317 1318 def XMLID(text, parser=None): 1319 """Parse XML document from string constant for its IDs. 1320 1321 *text* is a string containing XML data, *parser* is an 1322 optional parser instance, defaulting to the standard XMLParser. 1323 1324 Returns an (Element, dict) tuple, in which the 1325 dict maps element id:s to elements. 1326 1327 """ 1328 if not parser: 1329 parser = XMLParser(target=TreeBuilder()) 1330 parser.feed(text) 1331 tree = parser.close() 1332 ids = {} 1333 for elem in tree.iter(): 1334 id = elem.get("id") 1335 if id: 1336 ids[id] = elem 1337 return tree, ids 1338 1339 # Parse XML document from string constant. Alias for XML(). 1340 fromstring = XML 1341 1342 def fromstringlist(sequence, parser=None): 1343 """Parse XML document from sequence of string fragments. 1344 1345 *sequence* is a list of other sequence, *parser* is an optional parser 1346 instance, defaulting to the standard XMLParser. 1347 1348 Returns an Element instance. 1349 1350 """ 1351 if not parser: 1352 parser = XMLParser(target=TreeBuilder()) 1353 for text in sequence: 1354 parser.feed(text) 1355 return parser.close() 1356 1357 # -------------------------------------------------------------------- 1358 1359 1360 class TreeBuilder: 1361 """Generic element structure builder. 1362 1363 This builder converts a sequence of start, data, and end method 1364 calls to a well-formed element structure. 1365 1366 You can use this class to build an element structure using a custom XML 1367 parser, or a parser for some other XML-like format. 1368 1369 *element_factory* is an optional element factory which is called 1370 to create new Element instances, as necessary. 1371 1372 """ 1373 def __init__(self, element_factory=None): 1374 self._data = [] # data collector 1375 self._elem = [] # element stack 1376 self._last = None # last element 1377 self._tail = None # true if we're after an end tag 1378 if element_factory is None: 1379 element_factory = Element 1380 self._factory = element_factory 1381 1382 def close(self): 1383 """Flush builder buffers and return toplevel document Element.""" 1384 assert len(self._elem) == 0, "missing end tags" 1385 assert self._last is not None, "missing toplevel element" 1386 return self._last 1387 1388 def _flush(self): 1389 if self._data: 1390 if self._last is not None: 1391 text = "".join(self._data) 1392 if self._tail: 1393 assert self._last.tail is None, "internal error (tail)" 1394 self._last.tail = text 1395 else: 1396 assert self._last.text is None, "internal error (text)" 1397 self._last.text = text 1398 self._data = [] 1399 1400 def data(self, data): 1401 """Add text to current element.""" 1402 self._data.append(data) 1403 1404 def start(self, tag, attrs): 1405 """Open new element and return it. 1406 1407 *tag* is the element name, *attrs* is a dict containing element 1408 attributes. 1409 1410 """ 1411 self._flush() 1412 self._last = elem = self._factory(tag, attrs) 1413 if self._elem: 1414 self._elem[-1].append(elem) 1415 self._elem.append(elem) 1416 self._tail = 0 1417 return elem 1418 1419 def end(self, tag): 1420 """Close and return current Element. 1421 1422 *tag* is the element name. 1423 1424 """ 1425 self._flush() 1426 self._last = self._elem.pop() 1427 assert self._last.tag == tag,\ 1428 "end tag mismatch (expected %s, got %s)" % ( 1429 self._last.tag, tag) 1430 self._tail = 1 1431 return self._last 1432 1433 1434 # also see ElementTree and TreeBuilder 1435 class XMLParser: 1436 """Element structure builder for XML source data based on the expat parser. 1437 1438 *html* are predefined HTML entities (deprecated and not supported), 1439 *target* is an optional target object which defaults to an instance of the 1440 standard TreeBuilder class, *encoding* is an optional encoding string 1441 which if given, overrides the encoding specified in the XML file: 1442 http://www.iana.org/assignments/character-sets 1443 1444 """ 1445 1446 def __init__(self, html=0, target=None, encoding=None): 1447 try: 1448 from xml.parsers import expat 1449 except ImportError: 1450 try: 1451 import pyexpat as expat 1452 except ImportError: 1453 raise ImportError( 1454 "No module named expat; use SimpleXMLTreeBuilder instead" 1455 ) 1456 parser = expat.ParserCreate(encoding, "}") 1457 if target is None: 1458 target = TreeBuilder() 1459 # underscored names are provided for compatibility only 1460 self.parser = self._parser = parser 1461 self.target = self._target = target 1462 self._error = expat.error 1463 self._names = {} # name memo cache 1464 # main callbacks 1465 parser.DefaultHandlerExpand = self._default 1466 if hasattr(target, 'start'): 1467 parser.StartElementHandler = self._start 1468 if hasattr(target, 'end'): 1469 parser.EndElementHandler = self._end 1470 if hasattr(target, 'data'): 1471 parser.CharacterDataHandler = target.data 1472 # miscellaneous callbacks 1473 if hasattr(target, 'comment'): 1474 parser.CommentHandler = target.comment 1475 if hasattr(target, 'pi'): 1476 parser.ProcessingInstructionHandler = target.pi 1477 # Configure pyexpat: buffering, new-style attribute handling. 1478 parser.buffer_text = 1 1479 parser.ordered_attributes = 1 1480 parser.specified_attributes = 1 1481 self._doctype = None 1482 self.entity = {} 1483 try: 1484 self.version = "Expat %d.%d.%d" % expat.version_info 1485 except AttributeError: 1486 pass # unknown 1487 1488 def _setevents(self, events_queue, events_to_report): 1489 # Internal API for XMLPullParser 1490 # events_to_report: a list of events to report during parsing (same as 1491 # the *events* of XMLPullParser's constructor. 1492 # events_queue: a list of actual parsing events that will be populated 1493 # by the underlying parser. 1494 # 1495 parser = self._parser 1496 append = events_queue.append 1497 for event_name in events_to_report: 1498 if event_name == "start": 1499 parser.ordered_attributes = 1 1500 parser.specified_attributes = 1 1501 def handler(tag, attrib_in, event=event_name, append=append, 1502 start=self._start): 1503 append((event, start(tag, attrib_in))) 1504 parser.StartElementHandler = handler 1505 elif event_name == "end": 1506 def handler(tag, event=event_name, append=append, 1507 end=self._end): 1508 append((event, end(tag))) 1509 parser.EndElementHandler = handler 1510 elif event_name == "start-ns": 1511 def handler(prefix, uri, event=event_name, append=append): 1512 append((event, (prefix or "", uri or ""))) 1513 parser.StartNamespaceDeclHandler = handler 1514 elif event_name == "end-ns": 1515 def handler(prefix, event=event_name, append=append): 1516 append((event, None)) 1517 parser.EndNamespaceDeclHandler = handler 1518 else: 1519 raise ValueError("unknown event %r" % event_name) 1520 1521 def _raiseerror(self, value): 1522 err = ParseError(value) 1523 err.code = value.code 1524 err.position = value.lineno, value.offset 1525 raise err 1526 1527 def _fixname(self, key): 1528 # expand qname, and convert name string to ascii, if possible 1529 try: 1530 name = self._names[key] 1531 except KeyError: 1532 name = key 1533 if "}" in name: 1534 name = "{" + name 1535 self._names[key] = name 1536 return name 1537 1538 def _start(self, tag, attr_list): 1539 # Handler for expat's StartElementHandler. Since ordered_attributes 1540 # is set, the attributes are reported as a list of alternating 1541 # attribute name,value. 1542 fixname = self._fixname 1543 tag = fixname(tag) 1544 attrib = {} 1545 if attr_list: 1546 for i in range(0, len(attr_list), 2): 1547 attrib[fixname(attr_list[i])] = attr_list[i+1] 1548 return self.target.start(tag, attrib) 1549 1550 def _end(self, tag): 1551 return self.target.end(self._fixname(tag)) 1552 1553 def _default(self, text): 1554 prefix = text[:1] 1555 if prefix == "&": 1556 # deal with undefined entities 1557 try: 1558 data_handler = self.target.data 1559 except AttributeError: 1560 return 1561 try: 1562 data_handler(self.entity[text[1:-1]]) 1563 except KeyError: 1564 from xml.parsers import expat 1565 err = expat.error( 1566 "undefined entity %s: line %d, column %d" % 1567 (text, self.parser.ErrorLineNumber, 1568 self.parser.ErrorColumnNumber) 1569 ) 1570 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY 1571 err.lineno = self.parser.ErrorLineNumber 1572 err.offset = self.parser.ErrorColumnNumber 1573 raise err 1574 elif prefix == "<" and text[:9] == "<!DOCTYPE": 1575 self._doctype = [] # inside a doctype declaration 1576 elif self._doctype is not None: 1577 # parse doctype contents 1578 if prefix == ">": 1579 self._doctype = None 1580 return 1581 text = text.strip() 1582 if not text: 1583 return 1584 self._doctype.append(text) 1585 n = len(self._doctype) 1586 if n > 2: 1587 type = self._doctype[1] 1588 if type == "PUBLIC" and n == 4: 1589 name, type, pubid, system = self._doctype 1590 if pubid: 1591 pubid = pubid[1:-1] 1592 elif type == "SYSTEM" and n == 3: 1593 name, type, system = self._doctype 1594 pubid = None 1595 else: 1596 return 1597 if hasattr(self.target, "doctype"): 1598 self.target.doctype(name, pubid, system[1:-1]) 1599 elif self.doctype != self._XMLParser__doctype: 1600 # warn about deprecated call 1601 self._XMLParser__doctype(name, pubid, system[1:-1]) 1602 self.doctype(name, pubid, system[1:-1]) 1603 self._doctype = None 1604 1605 def doctype(self, name, pubid, system): 1606 """(Deprecated) Handle doctype declaration 1607 1608 *name* is the Doctype name, *pubid* is the public identifier, 1609 and *system* is the system identifier. 1610 1611 """ 1612 warnings.warn( 1613 "This method of XMLParser is deprecated. Define doctype() " 1614 "method on the TreeBuilder target.", 1615 DeprecationWarning, 1616 ) 1617 1618 # sentinel, if doctype is redefined in a subclass 1619 __doctype = doctype 1620 1621 def feed(self, data): 1622 """Feed encoded data to parser.""" 1623 try: 1624 self.parser.Parse(data, 0) 1625 except self._error as v: 1626 self._raiseerror(v) 1627 1628 def close(self): 1629 """Finish feeding data to parser and return element structure.""" 1630 try: 1631 self.parser.Parse("", 1) # end of data 1632 except self._error as v: 1633 self._raiseerror(v) 1634 try: 1635 close_handler = self.target.close 1636 except AttributeError: 1637 pass 1638 else: 1639 return close_handler() 1640 finally: 1641 # get rid of circular references 1642 del self.parser, self._parser 1643 del self.target, self._target 1644 1645 1646 # Import the C accelerators 1647 try: 1648 # Element is going to be shadowed by the C implementation. We need to keep 1649 # the Python version of it accessible for some "creative" by external code 1650 # (see tests) 1651 _Element_Py = Element 1652 1653 # Element, SubElement, ParseError, TreeBuilder, XMLParser 1654 from _elementtree import * 1655 except ImportError: 1656 pass
每天更新一点点,温习一点点点,进步一点点