src/ElementTreePython3.py

   1 """Lightweight XML support for Python.
   2
   3  XML is an inherently hierarchical data format, and the most natural way to
   4  represent it is with a tree.  This module has two classes for this purpose:
   5
   6     1. ElementTree represents the whole XML document as a tree and
   7
   8     2. Element represents a single node in this tree.
   9
  10  Interactions with the whole document (reading and writing to/from files) are
  11  usually done on the ElementTree level.  Interactions with a single XML element
  12  and its sub-elements are done on the Element level.
  13
  14  Element is a flexible container object designed to store hierarchical data
  15  structures in memory. It can be described as a cross between a list and a
  16  dictionary.  Each Element has a number of properties associated with it:
  17
  18     'tag' - a string containing the element's name.
  19
  20     'attributes' - a Python dictionary storing the element's attributes.
  21
  22     'text' - a string containing the element's text content.
  23
  24     'tail' - an optional string containing text after the element's end tag.
  25
  26     And a number of child elements stored in a Python sequence.
  27
  28  To create an element instance, use the Element constructor,
  29  or the SubElement factory function.
  30
  31  You can also use the ElementTree class to wrap an element structure
  32  and convert it to and from XML.
  33
  34 """
  35
  36 #---------------------------------------------------------------------
  37 # Licensed to PSF under a Contributor Agreement.
  38 # See http://www.python.org/psf/license for licensing details.
  39 #
  40 # ElementTree
  41 # Copyright (c) 1999-2008 by Fredrik Lundh.  All rights reserved.
  42 #
  43 # fredrik@pythonware.com
  44 # http://www.pythonware.com
  45 # --------------------------------------------------------------------
  46 # The ElementTree toolkit is
  47 #
  48 # Copyright (c) 1999-2008 by Fredrik Lundh
  49 #
  50 # By obtaining, using, and/or copying this software and/or its
  51 # associated documentation, you agree that you have read, understood,
  52 # and will comply with the following terms and conditions:
  53 #
  54 # Permission to use, copy, modify, and distribute this software and
  55 # its associated documentation for any purpose and without fee is
  56 # hereby granted, provided that the above copyright notice appears in
  57 # all copies, and that both that copyright notice and this permission
  58 # notice appear in supporting documentation, and that the name of
  59 # Secret Labs AB or the author not be used in advertising or publicity
  60 # pertaining to distribution of the software without specific, written
  61 # prior permission.
  62 #
  63 # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  64 # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
  65 # ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
  66 # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  67 # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  68 # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  69 # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  70 # OF THIS SOFTWARE.
  71 # --------------------------------------------------------------------
  72
  73 __all__ = [
  74     # public symbols
  75     "Comment",
  76     "dump",
  77     "Element", "ElementTree",
  78     "fromstring", "fromstringlist",
  79     "iselement", "iterparse",
  80     "parse", "ParseError",
  81     "PI", "ProcessingInstruction",
  82     "QName",
  83     "SubElement",
  84     "tostring", "tostringlist",
  85     "TreeBuilder",
  86     "VERSION",
  87     "XML", "XMLID",
  88     "XMLParser",
  89     "register_namespace",
  90     ]
  91
  92 VERSION = "1.3.0"
  93
  94 import sys
  95 import re
  96 import warnings
  97 import io
  98 import contextlib
  99
 100 import ElementPath
 101
 102
 103 class ParseError(SyntaxError):
 104     """An error when parsing an XML document.
 105
 106     In addition to its exception value, a ParseError contains
 107     two extra attributes:
 108         'code'     - the specific exception code
 109         'position' - the line and column of the error
 110
 111     """
 112     pass
 113
 114 # --------------------------------------------------------------------
 115
 116
 117 def iselement(element):
 118     """Return True if *element* appears to be an Element."""
 119     return hasattr(element, 'tag')
 120
 121
 122 class Element:
 123     """An XML element.
 124
 125     This class is the reference implementation of the Element interface.
 126
 127     An element's length is its number of subelements.  That means if you
 128     want to check if an element is truly empty, you should check BOTH
 129     its length AND its text attribute.
 130
 131     The element tag, attribute names, and attribute values can be either
 132     bytes or strings.
 133
 134     *tag* is the element name.  *attrib* is an optional dictionary containing
 135     element attributes. *extra* are additional element attributes given as
 136     keyword arguments.
 137
 138     Example form:
 139         <tag attrib>text<child/>...</tag>tail
 140
 141     """
 142
 143     tag = None
 144     """The element's name."""
 145
 146     attrib = None
 147     """Dictionary of the element's attributes."""
 148
 149     text = None
 150     """
 151     Text before first subelement. This is either a string or the value None.
 152     Note that if there is no text, this attribute may be either
 153     None or the empty string, depending on the parser.
 154
 155     """
 156
 157     tail = None
 158     """
 159     Text after this element's end tag, but before the next sibling element's
 160     start tag.  This is either a string or the value None.  Note that if there
 161     was no text, this attribute may be either None or an empty string,
 162     depending on the parser.
 163
 164     """
 165
 166     def __init__(self, tag, attrib={}, **extra):
 167         if not isinstance(attrib, dict):
 168             raise TypeError("attrib must be dict, not %s" % (
 169                 attrib.__class__.__name__,))
 170         attrib = attrib.copy()
 171         attrib.update(extra)
 172         self.tag = tag
 173         self.attrib = attrib
 174         self._children = []
 175
 176     def __repr__(self):
 177         return "<Element %s at 0x%x>" % (repr(self.tag), id(self))
 178
 179     def makeelement(self, tag, attrib):
 180         """Create a new element with the same type.
 181
 182         *tag* is a string containing the element name.
 183         *attrib* is a dictionary containing the element attributes.
 184
 185         Do not call this method, use the SubElement factory function instead.
 186
 187         """
 188         return self.__class__(tag, attrib)
 189
 190     def copy(self):
 191         """Return copy of current element.
 192
 193         This creates a shallow copy. Subelements will be shared with the
 194         original tree.
 195
 196         """
 197         elem = self.makeelement(self.tag, self.attrib)
 198         elem.text = self.text
 199         elem.tail = self.tail
 200         elem[:] = self
 201         return elem
 202
 203     def __len__(self):
 204         return len(self._children)
 205
 206     def __bool__(self):
 207         warnings.warn(
 208             "The behavior of this method will change in future versions.  "
 209             "Use specific 'len(elem)' or 'elem is not None' test instead.",
 210             FutureWarning, stacklevel=2
 211             )
 212         return len(self._children) != 0 # emulate old behaviour, for now
 213
 214     def __getitem__(self, index):
 215         return self._children[index]
 216
 217     def __setitem__(self, index, element):
 218         # if isinstance(index, slice):
 219         #     for elt in element:
 220         #         assert iselement(elt)
 221         # else:
 222         #     assert iselement(element)
 223         self._children[index] = element
 224
 225     def __delitem__(self, index):
 226         del self._children[index]
 227
 228     def append(self, subelement):
 229         """Add *subelement* to the end of this element.
 230
 231         The new element will appear in document order after the last existing
 232         subelement (or directly after the text, if it's the first subelement),
 233         but before the end tag for this element.
 234
 235         """
 236         self._assert_is_element(subelement)
 237         self._children.append(subelement)
 238
 239     def extend(self, elements):
 240         """Append subelements from a sequence.
 241
 242         *elements* is a sequence with zero or more elements.
 243
 244         """
 245         for element in elements:
 246             self._assert_is_element(element)
 247         self._children.extend(elements)
 248
 249     def insert(self, index, subelement):
 250         """Insert *subelement* at position *index*."""
 251         self._assert_is_element(subelement)
 252         self._children.insert(index, subelement)
 253
 254     def _assert_is_element(self, e):
 255         # Need to refer to the actual Python implementation, not the
 256         # shadowing C implementation.
 257         if not isinstance(e, _Element_Py):
 258             raise TypeError('expected an Element, not %s' % type(e).__name__)
 259
 260     def remove(self, subelement):
 261         """Remove matching subelement.
 262
 263         Unlike the find methods, this method compares elements based on
 264         identity, NOT ON tag value or contents.  To remove subelements by
 265         other means, the easiest way is to use a list comprehension to
 266         select what elements to keep, and then use slice assignment to update
 267         the parent element.
 268
 269         ValueError is raised if a matching element could not be found.
 270
 271         """
 272         # assert iselement(element)
 273         self._children.remove(subelement)
 274
 275     def getchildren(self):
 276         """(Deprecated) Return all subelements.
 277
 278         Elements are returned in document order.
 279
 280         """
 281         warnings.warn(
 282             "This method will be removed in future versions.  "
 283             "Use 'list(elem)' or iteration over elem instead.",
 284             DeprecationWarning, stacklevel=2
 285             )
 286         return self._children
 287
 288     def find(self, path, namespaces=None):
 289         """Find first matching element by tag name or path.
 290
 291         *path* is a string having either an element tag or an XPath,
 292         *namespaces* is an optional mapping from namespace prefix to full name.
 293
 294         Return the first matching element, or None if no element was found.
 295
 296         """
 297         return ElementPath.find(self, path, namespaces)
 298
 299     def findtext(self, path, default=None, namespaces=None):
 300         """Find text for first matching element by tag name or path.
 301
 302         *path* is a string having either an element tag or an XPath,
 303         *default* is the value to return if the element was not found,
 304         *namespaces* is an optional mapping from namespace prefix to full name.
 305
 306         Return text content of first matching element, or default value if
 307         none was found.  Note that if an element is found having no text
 308         content, the empty string is returned.
 309
 310         """
 311         return ElementPath.findtext(self, path, default, namespaces)
 312
 313     def findall(self, path, namespaces=None):
 314         """Find all matching subelements by tag name or path.
 315
 316         *path* is a string having either an element tag or an XPath,
 317         *namespaces* is an optional mapping from namespace prefix to full name.
 318
 319         Returns list containing all matching elements in document order.
 320
 321         """
 322         return ElementPath.findall(self, path, namespaces)
 323
 324     def iterfind(self, path, namespaces=None):
 325         """Find all matching subelements by tag name or path.
 326
 327         *path* is a string having either an element tag or an XPath,
 328         *namespaces* is an optional mapping from namespace prefix to full name.
 329
 330         Return an iterable yielding all matching elements in document order.
 331
 332         """
 333         return ElementPath.iterfind(self, path, namespaces)
 334
 335     def clear(self):
 336         """Reset element.
 337
 338         This function removes all subelements, clears all attributes, and sets
 339         the text and tail attributes to None.
 340
 341         """
 342         self.attrib.clear()
 343         self._children = []
 344         self.text = self.tail = None
 345
 346     def get(self, key, default=None):
 347         """Get element attribute.
 348
 349         Equivalent to attrib.get, but some implementations may handle this a
 350         bit more efficiently.  *key* is what attribute to look for, and
 351         *default* is what to return if the attribute was not found.
 352
 353         Returns a string containing the attribute value, or the default if
 354         attribute was not found.
 355
 356         """
 357         return self.attrib.get(key, default)
 358
 359     def set(self, key, value):
 360         """Set element attribute.
 361
 362         Equivalent to attrib[key] = value, but some implementations may handle
 363         this a bit more efficiently.  *key* is what attribute to set, and
 364         *value* is the attribute value to set it to.
 365
 366         """
 367         self.attrib[key] = value
 368
 369     def keys(self):
 370         """Get list of attribute names.
 371
 372         Names are returned in an arbitrary order, just like an ordinary
 373         Python dict.  Equivalent to attrib.keys()
 374
 375         """
 376         return self.attrib.keys()
 377
 378     def items(self):
 379         """Get element attributes as a sequence.
 380
 381         The attributes are returned in arbitrary order.  Equivalent to
 382         attrib.items().
 383
 384         Return a list of (name, value) tuples.
 385
 386         """
 387         return self.attrib.items()
 388
 389     def iter(self, tag=None):
 390         """Create tree iterator.
 391
 392         The iterator loops over the element and all subelements in document
 393         order, returning all elements with a matching tag.
 394
 395         If the tree structure is modified during iteration, new or removed
 396         elements may or may not be included.  To get a stable set, use the
 397         list() function on the iterator, and loop over the resulting list.
 398
 399         *tag* is what tags to look for (default is to return all elements)
 400
 401         Return an iterator containing all the matching elements.
 402
 403         """
 404         if tag == "*":
 405             tag = None
 406         if tag is None or self.tag == tag:
 407             yield self
 408         for e in self._children:
 409             yield from e.iter(tag)
 410
 411     # compatibility
 412     def getiterator(self, tag=None):
 413         # Change for a DeprecationWarning in 1.4
 414         warnings.warn(
 415             "This method will be removed in future versions.  "
 416             "Use 'elem.iter()' or 'list(elem.iter())' instead.",
 417             PendingDeprecationWarning, stacklevel=2
 418         )
 419         return list(self.iter(tag))
 420
 421     def itertext(self):
 422         """Create text iterator.
 423
 424         The iterator loops over the element and all subelements in document
 425         order, returning all inner text.
 426
 427         """
 428         tag = self.tag
 429         if not isinstance(tag, str) and tag is not None:
 430             return
 431         if self.text:
 432             yield self.text
 433         for e in self:
 434             yield from e.itertext()
 435             if e.tail:
 436                 yield e.tail
 437
 438
 439 def SubElement(parent, tag, attrib={}, **extra):
 440     """Subelement factory which creates an element instance, and appends it
 441     to an existing parent.
 442
 443     The element tag, attribute names, and attribute values can be either
 444     bytes or Unicode strings.
 445
 446     *parent* is the parent element, *tag* is the subelements name, *attrib* is
 447     an optional directory containing element attributes, *extra* are
 448     additional attributes given as keyword arguments.
 449
 450     """
 451     attrib = attrib.copy()
 452     attrib.update(extra)
 453     element = parent.makeelement(tag, attrib)
 454     parent.append(element)
 455     return element
 456
 457
 458 def Comment(text=None):
 459     """Comment element factory.
 460
 461     This function creates a special element which the standard serializer
 462     serializes as an XML comment.
 463
 464     *text* is a string containing the comment string.
 465
 466     """
 467     element = Element(Comment)
 468     element.text = text
 469     return element
 470
 471
 472 def ProcessingInstruction(target, text=None):
 473     """Processing Instruction element factory.
 474
 475     This function creates a special element which the standard serializer
 476     serializes as an XML comment.
 477
 478     *target* is a string containing the processing instruction, *text* is a
 479     string containing the processing instruction contents, if any.
 480
 481     """
 482     element = Element(ProcessingInstruction)
 483     element.text = target
 484     if text:
 485         element.text = element.text + " " + text
 486     return element
 487
 488 PI = ProcessingInstruction
 489
 490
 491 class QName:
 492     """Qualified name wrapper.
 493
 494     This class can be used to wrap a QName attribute value in order to get
 495     proper namespace handing on output.
 496
 497     *text_or_uri* is a string containing the QName value either in the form
 498     {uri}local, or if the tag argument is given, the URI part of a QName.
 499
 500     *tag* is an optional argument which if given, will make the first
 501     argument (text_or_uri) be interpreted as a URI, and this argument (tag)
 502     be interpreted as a local name.
 503
 504     """
 505     def __init__(self, text_or_uri, tag=None):
 506         if tag:
 507             text_or_uri = "{%s}%s" % (text_or_uri, tag)
 508         self.text = text_or_uri
 509     def __str__(self):
 510         return self.text
 511     def __repr__(self):
 512         return '<QName %r>' % (self.text,)
 513     def __hash__(self):
 514         return hash(self.text)
 515     def __le__(self, other):
 516         if isinstance(other, QName):
 517             return self.text <= other.text
 518         return self.text <= other
 519     def __lt__(self, other):
 520         if isinstance(other, QName):
 521             return self.text < other.text
 522         return self.text < other
 523     def __ge__(self, other):
 524         if isinstance(other, QName):
 525             return self.text >= other.text
 526         return self.text >= other
 527     def __gt__(self, other):
 528         if isinstance(other, QName):
 529             return self.text > other.text
 530         return self.text > other
 531     def __eq__(self, other):
 532         if isinstance(other, QName):
 533             return self.text == other.text
 534         return self.text == other
 535     def __ne__(self, other):
 536         if isinstance(other, QName):
 537             return self.text != other.text
 538         return self.text != other
 539
 540 # --------------------------------------------------------------------
 541
 542
 543 class ElementTree:
 544     """An XML element hierarchy.
 545
 546     This class also provides support for serialization to and from
 547     standard XML.
 548
 549     *element* is an optional root element node,
 550     *file* is an optional file handle or file name of an XML file whose
 551     contents will be used to initialize the tree with.
 552
 553     """
 554     def __init__(self, element=None, file=None):
 555         # assert element is None or iselement(element)
 556         self._root = element # first node
 557         if file:
 558             self.parse(file)
 559
 560     def getroot(self):
 561         """Return root element of this tree."""
 562         return self._root
 563
 564     def _setroot(self, element):
 565         """Replace root element of this tree.
 566
 567         This will discard the current contents of the tree and replace it
 568         with the given element.  Use with care!
 569
 570         """
 571         # assert iselement(element)
 572         self._root = element
 573
 574     def parse(self, source, parser=None):
 575         """Load external XML document into element tree.
 576
 577         *source* is a file name or file object, *parser* is an optional parser
 578         instance that defaults to XMLParser.
 579
 580         ParseError is raised if the parser fails to parse the document.
 581
 582         Returns the root element of the given source document.
 583
 584         """
 585         close_source = False
 586         if not hasattr(source, "read"):
 587             source = open(source, "rb")
 588             close_source = True
 589         try:
 590             if parser is None:
 591                 # If no parser was specified, create a default XMLParser
 592                 parser = XMLParser()
 593                 if hasattr(parser, '_parse_whole'):
 594                     # The default XMLParser, when it comes from an accelerator,
 595                     # can define an internal _parse_whole API for efficiency.
 596                     # It can be used to parse the whole source without feeding
 597                     # it with chunks.
 598                     self._root = parser._parse_whole(source)
 599                     return self._root
 600             while True:
 601                 data = source.read(65536)
 602                 if not data:
 603                     break
 604                 parser.feed(data)
 605             self._root = parser.close()
 606             return self._root
 607         finally:
 608             if close_source:
 609                 source.close()
 610
 611     def iter(self, tag=None):
 612         """Create and return tree iterator for the root element.
 613
 614         The iterator loops over all elements in this tree, in document order.
 615
 616         *tag* is a string with the tag name to iterate over
 617         (default is to return all elements).
 618
 619         """
 620         # assert self._root is not None
 621         return self._root.iter(tag)
 622
 623     # compatibility
 624     def getiterator(self, tag=None):
 625         # Change for a DeprecationWarning in 1.4
 626         warnings.warn(
 627             "This method will be removed in future versions.  "
 628             "Use 'tree.iter()' or 'list(tree.iter())' instead.",
 629             PendingDeprecationWarning, stacklevel=2
 630         )
 631         return list(self.iter(tag))
 632
 633     def find(self, path, namespaces=None):
 634         """Find first matching element by tag name or path.
 635
 636         Same as getroot().find(path), which is Element.find()
 637
 638         *path* is a string having either an element tag or an XPath,
 639         *namespaces* is an optional mapping from namespace prefix to full name.
 640
 641         Return the first matching element, or None if no element was found.
 642
 643         """
 644         # assert self._root is not None
 645         if path[:1] == "/":
 646             path = "." + path
 647             warnings.warn(
 648                 "This search is broken in 1.3 and earlier, and will be "
 649                 "fixed in a future version.  If you rely on the current "
 650                 "behaviour, change it to %r" % path,
 651                 FutureWarning, stacklevel=2
 652                 )
 653         return self._root.find(path, namespaces)
 654
 655     def findtext(self, path, default=None, namespaces=None):
 656         """Find first matching element by tag name or path.
 657
 658         Same as getroot().findtext(path),  which is Element.findtext()
 659
 660         *path* is a string having either an element tag or an XPath,
 661         *namespaces* is an optional mapping from namespace prefix to full name.
 662
 663         Return the first matching element, or None if no element was found.
 664
 665         """
 666         # assert self._root is not None
 667         if path[:1] == "/":
 668             path = "." + path
 669             warnings.warn(
 670                 "This search is broken in 1.3 and earlier, and will be "
 671                 "fixed in a future version.  If you rely on the current "
 672                 "behaviour, change it to %r" % path,
 673                 FutureWarning, stacklevel=2
 674                 )
 675         return self._root.findtext(path, default, namespaces)
 676
 677     def findall(self, path, namespaces=None):
 678         """Find all matching subelements by tag name or path.
 679
 680         Same as getroot().findall(path), which is Element.findall().
 681
 682         *path* is a string having either an element tag or an XPath,
 683         *namespaces* is an optional mapping from namespace prefix to full name.
 684
 685         Return list containing all matching elements in document order.
 686
 687         """
 688         # assert self._root is not None
 689         if path[:1] == "/":
 690             path = "." + path
 691             warnings.warn(
 692                 "This search is broken in 1.3 and earlier, and will be "
 693                 "fixed in a future version.  If you rely on the current "
 694                 "behaviour, change it to %r" % path,
 695                 FutureWarning, stacklevel=2
 696                 )
 697         return self._root.findall(path, namespaces)
 698
 699     def iterfind(self, path, namespaces=None):
 700         """Find all matching subelements by tag name or path.
 701
 702         Same as getroot().iterfind(path), which is element.iterfind()
 703
 704         *path* is a string having either an element tag or an XPath,
 705         *namespaces* is an optional mapping from namespace prefix to full name.
 706
 707         Return an iterable yielding all matching elements in document order.
 708
 709         """
 710         # assert self._root is not None
 711         if path[:1] == "/":
 712             path = "." + path
 713             warnings.warn(
 714                 "This search is broken in 1.3 and earlier, and will be "
 715                 "fixed in a future version.  If you rely on the current "
 716                 "behaviour, change it to %r" % path,
 717                 FutureWarning, stacklevel=2
 718                 )
 719         return self._root.iterfind(path, namespaces)
 720
 721     def write(self, file_or_filename,
 722               encoding=None,
 723               xml_declaration=None,
 724               default_namespace=None,
 725               method=None, *,
 726               short_empty_elements=True):
 727         """Write element tree to a file as XML.
 728
 729         Arguments:
 730           *file_or_filename* -- file name or a file object opened for writing
 731
 732           *encoding* -- the output encoding (default: US-ASCII)
 733
 734           *xml_declaration* -- bool indicating if an XML declaration should be
 735                                added to the output. If None, an XML declaration
 736                                is added if encoding IS NOT either of:
 737                                US-ASCII, UTF-8, or Unicode
 738
 739           *default_namespace* -- sets the default XML namespace (for "xmlns")
 740
 741           *method* -- either "xml" (default), "html, "text", or "c14n"
 742
 743           *short_empty_elements* -- controls the formatting of elements
 744                                     that contain no content. If True (default)
 745                                     they are emitted as a single self-closed
 746                                     tag, otherwise they are emitted as a pair
 747                                     of start/end tags
 748
 749         """
 750         if not method:
 751             method = "xml"
 752         elif method not in _serialize:
 753             raise ValueError("unknown method %r" % method)
 754         if not encoding:
 755             if method == "c14n":
 756                 encoding = "utf-8"
 757             else:
 758                 encoding = "us-ascii"
 759         enc_lower = encoding.lower()
 760         with _get_writer(file_or_filename, enc_lower) as write:
 761             if method == "xml" and (xml_declaration or
 762                     (xml_declaration is None and
 763                      enc_lower not in ("utf-8", "us-ascii", "unicode"))):
 764                 declared_encoding = encoding
 765                 if enc_lower == "unicode":
 766                     # Retrieve the default encoding for the xml declaration
 767                     import locale
 768                     declared_encoding = locale.getpreferredencoding()
 769                 write("<?xml version='1.0' encoding='%s'?>\n" % (
 770                     declared_encoding,))
 771             if method == "text":
 772                 _serialize_text(write, self._root)
 773             else:
 774                 qnames, namespaces = _namespaces(self._root, default_namespace)
 775                 serialize = _serialize[method]
 776                 serialize(write, self._root, qnames, namespaces,
 777                           short_empty_elements=short_empty_elements)
 778
 779     def write_c14n(self, file):
 780         # lxml.etree compatibility.  use output method instead
 781         return self.write(file, method="c14n")
 782
 783 # --------------------------------------------------------------------
 784 # serialization support
 785
 786 @contextlib.contextmanager
 787 def _get_writer(file_or_filename, encoding):
 788     # returns text write method and release all resources after using
 789     try:
 790         write = file_or_filename.write
 791     except AttributeError:
 792         # file_or_filename is a file name
 793         if encoding == "unicode":
 794             file = open(file_or_filename, "w")
 795         else:
 796             file = open(file_or_filename, "w", encoding=encoding,
 797                         errors="xmlcharrefreplace")
 798         with file:
 799             yield file.write
 800     else:
 801         # file_or_filename is a file-like object
 802         # encoding determines if it is a text or binary writer
 803         if encoding == "unicode":
 804             # use a text writer as is
 805             yield write
 806         else:
 807             # wrap a binary writer with TextIOWrapper
 808             with contextlib.ExitStack() as stack:
 809                 if isinstance(file_or_filename, io.BufferedIOBase):
 810                     file = file_or_filename
 811                 elif isinstance(file_or_filename, io.RawIOBase):
 812                     file = io.BufferedWriter(file_or_filename)
 813                     # Keep the original file open when the BufferedWriter is
 814                     # destroyed
 815                     stack.callback(file.detach)
 816                 else:
 817                     # This is to handle passed objects that aren't in the
 818                     # IOBase hierarchy, but just have a write method
 819                     file = io.BufferedIOBase()
 820                     file.writable = lambda: True
 821                     file.write = write
 822                     try:
 823                         # TextIOWrapper uses this methods to determine
 824                         # if BOM (for UTF-16, etc) should be added
 825                         file.seekable = file_or_filename.seekable
 826                         file.tell = file_or_filename.tell
 827                     except AttributeError:
 828                         pass
 829                 file = io.TextIOWrapper(file,
 830                                         encoding=encoding,
 831                                         errors="xmlcharrefreplace",
 832                                         newline="\n")
 833                 # Keep the original file open when the TextIOWrapper is
 834                 # destroyed
 835                 stack.callback(file.detach)
 836                 yield file.write
 837
 838 def _namespaces(elem, default_namespace=None):
 839     # identify namespaces used in this tree
 840
 841     # maps qnames to *encoded* prefix:local names
 842     qnames = {None: None}
 843
 844     # maps uri:s to prefixes
 845     namespaces = {}
 846     if default_namespace:
 847         namespaces[default_namespace] = ""
 848
 849     def add_qname(qname):
 850         # calculate serialized qname representation
 851         try:
 852             if qname[:1] == "{":
 853                 uri, tag = qname[1:].rsplit("}", 1)
 854                 prefix = namespaces.get(uri)
 855                 if prefix is None:
 856                     prefix = _namespace_map.get(uri)
 857                     if prefix is None:
 858                         prefix = "ns%d" % len(namespaces)
 859                     if prefix != "xml":
 860                         namespaces[uri] = prefix
 861                 if prefix:
 862                     qnames[qname] = "%s:%s" % (prefix, tag)
 863                 else:
 864                     qnames[qname] = tag # default element
 865             else:
 866                 if default_namespace:
 867                     # FIXME: can this be handled in XML 1.0?
 868                     raise ValueError(
 869                         "cannot use non-qualified names with "
 870                         "default_namespace option"
 871                         )
 872                 qnames[qname] = qname
 873         except TypeError:
 874             _raise_serialization_error(qname)
 875
 876     # populate qname and namespaces table
 877     for elem in elem.iter():
 878         tag = elem.tag
 879         if isinstance(tag, QName):
 880             if tag.text not in qnames:
 881                 add_qname(tag.text)
 882         elif isinstance(tag, str):
 883             if tag not in qnames:
 884                 add_qname(tag)
 885         elif tag is not None and tag is not Comment and tag is not PI:
 886             _raise_serialization_error(tag)
 887         for key, value in elem.items():
 888             if isinstance(key, QName):
 889                 key = key.text
 890             if key not in qnames:
 891                 add_qname(key)
 892             if isinstance(value, QName) and value.text not in qnames:
 893                 add_qname(value.text)
 894         text = elem.text
 895         if isinstance(text, QName) and text.text not in qnames:
 896             add_qname(text.text)
 897     return qnames, namespaces
 898
 899 def _serialize_xml(write, elem, qnames, namespaces,
 900                    short_empty_elements, **kwargs):
 901     tag = elem.tag
 902     text = elem.text
 903     if tag is Comment:
 904         write("<!--%s-->" % text)
 905     elif tag is ProcessingInstruction:
 906         write("<?%s?>" % text)
 907     else:
 908         tag = qnames[tag]
 909         if tag is None:
 910             if text:
 911                 write(_escape_cdata(text))
 912             for e in elem:
 913                 _serialize_xml(write, e, qnames, None,
 914                                short_empty_elements=short_empty_elements)
 915         else:
 916             write("<" + tag)
 917             items = list(elem.items())
 918             if items or namespaces:
 919                 if namespaces:
 920                     for v, k in sorted(namespaces.items(),
 921                                        key=lambda x: x[1]):  # sort on prefix
 922                         if k:
 923                             k = ":" + k
 924                         write(" xmlns%s=\"%s\"" % (
 925                             k,
 926                             _escape_attrib(v)
 927                             ))
 928                 for k, v in sorted(items):  # lexical order
 929                     if isinstance(k, QName):
 930                         k = k.text
 931                     if isinstance(v, QName):
 932                         v = qnames[v.text]
 933                     else:
 934                         v = _escape_attrib(v)
 935                     write(" %s=\"%s\"" % (qnames[k], v))
 936             if text or len(elem) or not short_empty_elements:
 937                 write(">")
 938                 if text:
 939                     write(_escape_cdata(text))
 940                 for e in elem:
 941                     _serialize_xml(write, e, qnames, None,
 942                                    short_empty_elements=short_empty_elements)
 943                 write("</" + tag + ">")
 944             else:
 945                 write(" />")
 946     if elem.tail:
 947         write(_escape_cdata(elem.tail))
 948
 949 # add from cvw jan 2019
 950 def _serialize_pretty_xml(write, elem, qnames, namespaces,
 951                      short_empty_elements, indent=0):
 952     # print("*****pretty***** indent", elem.tag, indent)
 953     tag = elem.tag
 954     text = elem.text
 955     if tag is Comment:
 956       write("<!--%s-->" % text)
 957     elif tag is ProcessingInstruction:
 958       write("<?%s?>" % text)
 959     else:
 960       tag = qnames[tag]
 961       if tag is None:
 962         if text:
 963           write(_escape_cdata(text))
 964         for e in elem:
 965           _serialize_pretty_xml(write, e, qnames, None,
 966                          short_empty_elements=short_empty_elements, indent=indent)
 967       else:
 968         write(" "*indent + "<" + tag)
 969         items = list(elem.items())
 970         if items or namespaces:
 971           if namespaces:
 972             for v, k in sorted(namespaces.items(),
 973                                key=lambda x: x[1]):  # sort on prefix
 974               if k:
 975                 k = ":" + k
 976               write(" xmlns%s=\"%s\"" % (
 977                 k,
 978                 _escape_attrib(v)
 979               ))
 980           for k, v in sorted(items):  # lexical order
 981             # print("atrrib ", k, v)
 982             if isinstance(k, QName):
 983               k = k.text
 984             if isinstance(v, QName):
 985               v = qnames[v.text]
 986             else:
 987               v = _escape_attrib(v)
 988             write(" %s=\"%s\"" % (qnames[k], v))
 989         if text or len(elem) or not short_empty_elements:
 990           if text:
 991             write(">")
 992             write(_escape_cdata(text))
 993           else:
 994             write(">\n")
 995
 996           for e in elem:
 997             _serialize_pretty_xml(write, e, qnames, None,
 998                            short_empty_elements=short_empty_elements, indent=indent+2)
 999           write(" "*indent + "</" + tag + ">\n")
1000         else:
1001           write(" />\n")
1002     if elem.tail:
1003       write(_escape_cdata(elem.tail))
1004
1005
1006 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
1007               "img", "input", "isindex", "link", "meta", "param")
1008
1009 try:
1010     HTML_EMPTY = set(HTML_EMPTY)
1011 except NameError:
1012     pass
1013
1014 def _serialize_html(write, elem, qnames, namespaces, **kwargs):
1015     tag = elem.tag
1016     text = elem.text
1017     if tag is Comment:
1018         write("<!--%s-->" % _escape_cdata(text))
1019     elif tag is ProcessingInstruction:
1020         write("<?%s?>" % _escape_cdata(text))
1021     else:
1022         tag = qnames[tag]
1023         if tag is None:
1024             if text:
1025                 write(_escape_cdata(text))
1026             for e in elem:
1027                 _serialize_html(write, e, qnames, None)
1028         else:
1029             write("<" + tag)
1030             items = list(elem.items())
1031             if items or namespaces:
1032                 if namespaces:
1033                     for v, k in sorted(namespaces.items(),
1034                                        key=lambda x: x[1]):  # sort on prefix
1035                         if k:
1036                             k = ":" + k
1037                         write(" xmlns%s=\"%s\"" % (
1038                             k,
1039                             _escape_attrib(v)
1040                             ))
1041                 for k, v in sorted(items):  # lexical order
1042                     if isinstance(k, QName):
1043                         k = k.text
1044                     if isinstance(v, QName):
1045                         v = qnames[v.text]
1046                     else:
1047                         v = _escape_attrib_html(v)
1048                     # FIXME: handle boolean attributes
1049                     write(" %s=\"%s\"" % (qnames[k], v))
1050             write(">")
1051             ltag = tag.lower()
1052             if text:
1053                 if ltag == "script" or ltag == "style":
1054                     write(text)
1055                 else:
1056                     write(_escape_cdata(text))
1057             for e in elem:
1058                 _serialize_html(write, e, qnames, None)
1059             if ltag not in HTML_EMPTY:
1060                 write("</" + tag + ">")
1061     if elem.tail:
1062         write(_escape_cdata(elem.tail))
1063
1064 def _serialize_text(write, elem):
1065     for part in elem.itertext():
1066         write(part)
1067     if elem.tail:
1068         write(elem.tail)
1069
1070 _serialize = {
1071     "xml": _serialize_xml,
1072     "pretty_xml": _serialize_pretty_xml,
1073     "html": _serialize_html,
1074     "text": _serialize_text,
1075 # this optional method is imported at the end of the module
1076 #   "c14n": _serialize_c14n,
1077 }
1078
1079
1080 def register_namespace(prefix, uri):
1081     """Register a namespace prefix.
1082
1083     The registry is global, and any existing mapping for either the
1084     given prefix or the namespace URI will be removed.
1085
1086     *prefix* is the namespace prefix, *uri* is a namespace uri. Tags and
1087     attributes in this namespace will be serialized with prefix if possible.
1088
1089     ValueError is raised if prefix is reserved or is invalid.
1090
1091     """
1092     if re.match("ns\d+$", prefix):
1093         raise ValueError("Prefix format reserved for internal use")
1094     for k, v in list(_namespace_map.items()):
1095         if k == uri or v == prefix:
1096             del _namespace_map[k]
1097     _namespace_map[uri] = prefix
1098
1099 _namespace_map = {
1100     # "well-known" namespace prefixes
1101     "http://www.w3.org/XML/1998/namespace": "xml",
1102     "http://www.w3.org/1999/xhtml": "html",
1103     "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
1104     "http://schemas.xmlsoap.org/wsdl/": "wsdl",
1105     # xml schema
1106     "http://www.w3.org/2001/XMLSchema": "xs",
1107     "http://www.w3.org/2001/XMLSchema-instance": "xsi",
1108     # dublin core
1109     "http://purl.org/dc/elements/1.1/": "dc",
1110 }
1111 # For tests and troubleshooting
1112 register_namespace._namespace_map = _namespace_map
1113
1114 def _raise_serialization_error(text):
1115     raise TypeError(
1116         "cannot serialize %r (type %s)" % (text, type(text).__name__)
1117         )
1118
1119 def _escape_cdata(text):
1120     # escape character data
1121     try:
1122         # it's worth avoiding do-nothing calls for strings that are
1123         # shorter than 500 character, or so.  assume that's, by far,
1124         # the most common case in most applications.
1125         if "&" in text:
1126             text = text.replace("&", "&amp;")
1127         if "<" in text:
1128             text = text.replace("<", "&lt;")
1129         if ">" in text:
1130             text = text.replace(">", "&gt;")
1131         return text
1132     except (TypeError, AttributeError):
1133         _raise_serialization_error(text)
1134
1135 def _escape_attrib(text):
1136     # escape attribute value
1137     try:
1138         if "&" in text:
1139             text = text.replace("&", "&amp;")
1140         if "<" in text:
1141             text = text.replace("<", "&lt;")
1142         if ">" in text:
1143             text = text.replace(">", "&gt;")
1144         if "\"" in text:
1145             text = text.replace("\"", "&quot;")
1146         if "\n" in text:
1147             text = text.replace("\n", "&#10;")
1148         return text
1149     except (TypeError, AttributeError):
1150         _raise_serialization_error(text)
1151
1152 def _escape_attrib_html(text):
1153     # escape attribute value
1154     try:
1155         if "&" in text:
1156             text = text.replace("&", "&amp;")
1157         if ">" in text:
1158             text = text.replace(">", "&gt;")
1159         if "\"" in text:
1160             text = text.replace("\"", "&quot;")
1161         return text
1162     except (TypeError, AttributeError):
1163         _raise_serialization_error(text)
1164
1165 # --------------------------------------------------------------------
1166
1167 def tostring(element, encoding=None, method=None, *,
1168              short_empty_elements=True):
1169     """Generate string representation of XML element.
1170
1171     All subelements are included.  If encoding is "unicode", a string
1172     is returned. Otherwise a bytestring is returned.
1173
1174     *element* is an Element instance, *encoding* is an optional output
1175     encoding defaulting to US-ASCII, *method* is an optional output which can
1176     be one of "xml" (default), "html", "text" or "c14n".
1177
1178     Returns an (optionally) encoded string containing the XML data.
1179
1180     """
1181     stream = io.StringIO() if encoding == 'unicode' else io.BytesIO()
1182     ElementTree(element).write(stream, encoding, method=method,
1183                                short_empty_elements=short_empty_elements)
1184     return stream.getvalue()
1185
1186 class _ListDataStream(io.BufferedIOBase):
1187     """An auxiliary stream accumulating into a list reference."""
1188     def __init__(self, lst):
1189         self.lst = lst
1190
1191     def writable(self):
1192         return True
1193
1194     def seekable(self):
1195         return True
1196
1197     def write(self, b):
1198         self.lst.append(b)
1199
1200     def tell(self):
1201         return len(self.lst)
1202
1203 def tostringlist(element, encoding=None, method=None, *,
1204                  short_empty_elements=True):
1205     lst = []
1206     stream = _ListDataStream(lst)
1207     ElementTree(element).write(stream, encoding, method=method,
1208                                short_empty_elements=short_empty_elements)
1209     return lst
1210
1211
1212 def dump(elem):
1213     """Write element tree or element structure to sys.stdout.
1214
1215     This function should be used for debugging only.
1216
1217     *elem* is either an ElementTree, or a single Element.  The exact output
1218     format is implementation dependent.  In this version, it's written as an
1219     ordinary XML file.
1220
1221     """
1222     # debugging
1223     if not isinstance(elem, ElementTree):
1224         elem = ElementTree(elem)
1225     elem.write(sys.stdout, encoding="unicode")
1226     tail = elem.getroot().tail
1227     if not tail or tail[-1] != "\n":
1228         sys.stdout.write("\n")
1229
1230 # --------------------------------------------------------------------
1231 # parsing
1232
1233
1234 def parse(source, parser=None):
1235     """Parse XML document into element tree.
1236
1237     *source* is a filename or file object containing XML data,
1238     *parser* is an optional parser instance defaulting to XMLParser.
1239
1240     Return an ElementTree instance.
1241
1242     """
1243     tree = ElementTree()
1244     tree.parse(source, parser)
1245     return tree
1246
1247
1248 def iterparse(source, events=None, parser=None):
1249     """Incrementally parse XML document into ElementTree.
1250
1251     This class also reports what's going on to the user based on the
1252     *events* it is initialized with.  The supported events are the strings
1253     "start", "end", "start-ns" and "end-ns" (the "ns" events are used to get
1254     detailed namespace information).  If *events* is omitted, only
1255     "end" events are reported.
1256
1257     *source* is a filename or file object containing XML data, *events* is
1258     a list of events to report back, *parser* is an optional parser instance.
1259
1260     Returns an iterator providing (event, elem) pairs.
1261
1262     """
1263     close_source = False
1264     if not hasattr(source, "read"):
1265         source = open(source, "rb")
1266         close_source = True
1267     try:
1268         return _IterParseIterator(source, events, parser, close_source)
1269     except:
1270         if close_source:
1271             source.close()
1272         raise
1273
1274
1275 class XMLPullParser:
1276
1277     def __init__(self, events=None, *, _parser=None):
1278         # The _parser argument is for internal use only and must not be relied
1279         # upon in user code. It will be removed in a future release.
1280         # See http://bugs.python.org/issue17741 for more details.
1281
1282         # _elementtree.c expects a list, not a deque
1283         self._events_queue = []
1284         self._index = 0
1285         self._parser = _parser or XMLParser(target=TreeBuilder())
1286         # wire up the parser for event reporting
1287         if events is None:
1288             events = ("end",)
1289         self._parser._setevents(self._events_queue, events)
1290
1291     def feed(self, data):
1292         """Feed encoded data to parser."""
1293         if self._parser is None:
1294             raise ValueError("feed() called after end of stream")
1295         if data:
1296             try:
1297                 self._parser.feed(data)
1298             except SyntaxError as exc:
1299                 self._events_queue.append(exc)
1300
1301     def _close_and_return_root(self):
1302         # iterparse needs this to set its root attribute properly :(
1303         root = self._parser.close()
1304         self._parser = None
1305         return root
1306
1307     def close(self):
1308         """Finish feeding data to parser.
1309
1310         Unlike XMLParser, does not return the root element. Use
1311         read_events() to consume elements from XMLPullParser.
1312         """
1313         self._close_and_return_root()
1314
1315     def read_events(self):
1316         """Return an iterator over currently available (event, elem) pairs.
1317
1318         Events are consumed from the internal event queue as they are
1319         retrieved from the iterator.
1320         """
1321         events = self._events_queue
1322         while True:
1323             index = self._index
1324             try:
1325                 event = events[self._index]
1326                 # Avoid retaining references to past events
1327                 events[self._index] = None
1328             except IndexError:
1329                 break
1330             index += 1
1331             # Compact the list in a O(1) amortized fashion
1332             # As noted above, _elementree.c needs a list, not a deque
1333             if index * 2 >= len(events):
1334                 events[:index] = []
1335                 self._index = 0
1336             else:
1337                 self._index = index
1338             if isinstance(event, Exception):
1339                 raise event
1340             else:
1341                 yield event
1342
1343
1344 class _IterParseIterator:
1345
1346     def __init__(self, source, events, parser, close_source=False):
1347         # Use the internal, undocumented _parser argument for now; When the
1348         # parser argument of iterparse is removed, this can be killed.
1349         self._parser = XMLPullParser(events=events, _parser=parser)
1350         self._file = source
1351         self._close_file = close_source
1352         self.root = self._root = None
1353
1354     def __next__(self):
1355         try:
1356             while 1:
1357                 for event in self._parser.read_events():
1358                     return event
1359                 if self._parser._parser is None:
1360                     break
1361                 # load event buffer
1362                 data = self._file.read(16 * 1024)
1363                 if data:
1364                     self._parser.feed(data)
1365                 else:
1366                     self._root = self._parser._close_and_return_root()
1367             self.root = self._root
1368         except:
1369             if self._close_file:
1370                 self._file.close()
1371             raise
1372         if self._close_file:
1373             self._file.close()
1374         raise StopIteration
1375
1376     def __iter__(self):
1377         return self
1378
1379
1380 def XML(text, parser=None):
1381     """Parse XML document from string constant.
1382
1383     This function can be used to embed "XML Literals" in Python code.
1384
1385     *text* is a string containing XML data, *parser* is an
1386     optional parser instance, defaulting to the standard XMLParser.
1387
1388     Returns an Element instance.
1389
1390     """
1391     if not parser:
1392         parser = XMLParser(target=TreeBuilder())
1393     parser.feed(text)
1394     return parser.close()
1395
1396
1397 def XMLID(text, parser=None):
1398     """Parse XML document from string constant for its IDs.
1399
1400     *text* is a string containing XML data, *parser* is an
1401     optional parser instance, defaulting to the standard XMLParser.
1402
1403     Returns an (Element, dict) tuple, in which the
1404     dict maps element id:s to elements.
1405
1406     """
1407     if not parser:
1408         parser = XMLParser(target=TreeBuilder())
1409     parser.feed(text)
1410     tree = parser.close()
1411     ids = {}
1412     for elem in tree.iter():
1413         id = elem.get("id")
1414         if id:
1415             ids[id] = elem
1416     return tree, ids
1417
1418 # Parse XML document from string constant.  Alias for XML().
1419 fromstring = XML
1420
1421 def fromstringlist(sequence, parser=None):
1422     """Parse XML document from sequence of string fragments.
1423
1424     *sequence* is a list of other sequence, *parser* is an optional parser
1425     instance, defaulting to the standard XMLParser.
1426
1427     Returns an Element instance.
1428
1429     """
1430     if not parser:
1431         parser = XMLParser(target=TreeBuilder())
1432     for text in sequence:
1433         parser.feed(text)
1434     return parser.close()
1435
1436 # --------------------------------------------------------------------
1437
1438
1439 class TreeBuilder:
1440     """Generic element structure builder.
1441
1442     This builder converts a sequence of start, data, and end method
1443     calls to a well-formed element structure.
1444
1445     You can use this class to build an element structure using a custom XML
1446     parser, or a parser for some other XML-like format.
1447
1448     *element_factory* is an optional element factory which is called
1449     to create new Element instances, as necessary.
1450
1451     """
1452     def __init__(self, element_factory=None):
1453         self._data = [] # data collector
1454         self._elem = [] # element stack
1455         self._last = None # last element
1456         self._tail = None # true if we're after an end tag
1457         if element_factory is None:
1458             element_factory = Element
1459         self._factory = element_factory
1460
1461     def close(self):
1462         """Flush builder buffers and return toplevel document Element."""
1463         assert len(self._elem) == 0, "missing end tags"
1464         assert self._last is not None, "missing toplevel element"
1465         return self._last
1466
1467     def _flush(self):
1468         if self._data:
1469             if self._last is not None:
1470                 text = "".join(self._data)
1471                 if self._tail:
1472                     assert self._last.tail is None, "internal error (tail)"
1473                     self._last.tail = text
1474                 else:
1475                     assert self._last.text is None, "internal error (text)"
1476                     self._last.text = text
1477             self._data = []
1478
1479     def data(self, data):
1480         """Add text to current element."""
1481         self._data.append(data)
1482
1483     def start(self, tag, attrs):
1484         """Open new element and return it.
1485
1486         *tag* is the element name, *attrs* is a dict containing element
1487         attributes.
1488
1489         """
1490         self._flush()
1491         self._last = elem = self._factory(tag, attrs)
1492         if self._elem:
1493             self._elem[-1].append(elem)
1494         self._elem.append(elem)
1495         self._tail = 0
1496         return elem
1497
1498     def end(self, tag):
1499         """Close and return current Element.
1500
1501         *tag* is the element name.
1502
1503         """
1504         self._flush()
1505         self._last = self._elem.pop()
1506         assert self._last.tag == tag,\
1507                "end tag mismatch (expected %s, got %s)" % (
1508                    self._last.tag, tag)
1509         self._tail = 1
1510         return self._last
1511
1512
1513 # also see ElementTree and TreeBuilder
1514 class XMLParser:
1515     """Element structure builder for XML source data based on the expat parser.
1516
1517     *html* are predefined HTML entities (not supported currently),
1518     *target* is an optional target object which defaults to an instance of the
1519     standard TreeBuilder class, *encoding* is an optional encoding string
1520     which if given, overrides the encoding specified in the XML file:
1521     http://www.iana.org/assignments/character-sets
1522
1523     """
1524
1525     def __init__(self, html=0, target=None, encoding=None):
1526         try:
1527             from xml.parsers import expat
1528         except ImportError:
1529             try:
1530                 import pyexpat as expat
1531             except ImportError:
1532                 raise ImportError(
1533                     "No module named expat; use SimpleXMLTreeBuilder instead"
1534                     )
1535         parser = expat.ParserCreate(encoding, "}")
1536         if target is None:
1537             target = TreeBuilder()
1538         # underscored names are provided for compatibility only
1539         self.parser = self._parser = parser
1540         self.target = self._target = target
1541         self._error = expat.error
1542         self._names = {} # name memo cache
1543         # main callbacks
1544         parser.DefaultHandlerExpand = self._default
1545         if hasattr(target, 'start'):
1546             parser.StartElementHandler = self._start
1547         if hasattr(target, 'end'):
1548             parser.EndElementHandler = self._end
1549         if hasattr(target, 'data'):
1550             parser.CharacterDataHandler = target.data
1551         # miscellaneous callbacks
1552         if hasattr(target, 'comment'):
1553             parser.CommentHandler = target.comment
1554         if hasattr(target, 'pi'):
1555             parser.ProcessingInstructionHandler = target.pi
1556         # Configure pyexpat: buffering, new-style attribute handling.
1557         parser.buffer_text = 1
1558         parser.ordered_attributes = 1
1559         parser.specified_attributes = 1
1560         self._doctype = None
1561         self.entity = {}
1562         try:
1563             self.version = "Expat %d.%d.%d" % expat.version_info
1564         except AttributeError:
1565             pass # unknown
1566
1567     def _setevents(self, events_queue, events_to_report):
1568         # Internal API for XMLPullParser
1569         # events_to_report: a list of events to report during parsing (same as
1570         # the *events* of XMLPullParser's constructor.
1571         # events_queue: a list of actual parsing events that will be populated
1572         # by the underlying parser.
1573         #
1574         parser = self._parser
1575         append = events_queue.append
1576         for event_name in events_to_report:
1577             if event_name == "start":
1578                 parser.ordered_attributes = 1
1579                 parser.specified_attributes = 1
1580                 def handler(tag, attrib_in, event=event_name, append=append,
1581                             start=self._start):
1582                     append((event, start(tag, attrib_in)))
1583                 parser.StartElementHandler = handler
1584             elif event_name == "end":
1585                 def handler(tag, event=event_name, append=append,
1586                             end=self._end):
1587                     append((event, end(tag)))
1588                 parser.EndElementHandler = handler
1589             elif event_name == "start-ns":
1590                 def handler(prefix, uri, event=event_name, append=append):
1591                     append((event, (prefix or "", uri or "")))
1592                 parser.StartNamespaceDeclHandler = handler
1593             elif event_name == "end-ns":
1594                 def handler(prefix, event=event_name, append=append):
1595                     append((event, None))
1596                 parser.EndNamespaceDeclHandler = handler
1597             else:
1598                 raise ValueError("unknown event %r" % event_name)
1599
1600     def _raiseerror(self, value):
1601         err = ParseError(value)
1602         err.code = value.code
1603         err.position = value.lineno, value.offset
1604         raise err
1605
1606     def _fixname(self, key):
1607         # expand qname, and convert name string to ascii, if possible
1608         try:
1609             name = self._names[key]
1610         except KeyError:
1611             name = key
1612             if "}" in name:
1613                 name = "{" + name
1614             self._names[key] = name
1615         return name
1616
1617     def _start(self, tag, attr_list):
1618         # Handler for expat's StartElementHandler. Since ordered_attributes
1619         # is set, the attributes are reported as a list of alternating
1620         # attribute name,value.
1621         fixname = self._fixname
1622         tag = fixname(tag)
1623         attrib = {}
1624         if attr_list:
1625             for i in range(0, len(attr_list), 2):
1626                 attrib[fixname(attr_list[i])] = attr_list[i+1]
1627         return self.target.start(tag, attrib)
1628
1629     def _end(self, tag):
1630         return self.target.end(self._fixname(tag))
1631
1632     def _default(self, text):
1633         prefix = text[:1]
1634         if prefix == "&":
1635             # deal with undefined entities
1636             try:
1637                 data_handler = self.target.data
1638             except AttributeError:
1639                 return
1640             try:
1641                 data_handler(self.entity[text[1:-1]])
1642             except KeyError:
1643                 from xml.parsers import expat
1644                 err = expat.error(
1645                     "undefined entity %s: line %d, column %d" %
1646                     (text, self.parser.ErrorLineNumber,
1647                     self.parser.ErrorColumnNumber)
1648                     )
1649                 err.code = 11 # XML_ERROR_UNDEFINED_ENTITY
1650                 err.lineno = self.parser.ErrorLineNumber
1651                 err.offset = self.parser.ErrorColumnNumber
1652                 raise err
1653         elif prefix == "<" and text[:9] == "<!DOCTYPE":
1654             self._doctype = [] # inside a doctype declaration
1655         elif self._doctype is not None:
1656             # parse doctype contents
1657             if prefix == ">":
1658                 self._doctype = None
1659                 return
1660             text = text.strip()
1661             if not text:
1662                 return
1663             self._doctype.append(text)
1664             n = len(self._doctype)
1665             if n > 2:
1666                 type = self._doctype[1]
1667                 if type == "PUBLIC" and n == 4:
1668                     name, type, pubid, system = self._doctype
1669                     if pubid:
1670                         pubid = pubid[1:-1]
1671                 elif type == "SYSTEM" and n == 3:
1672                     name, type, system = self._doctype
1673                     pubid = None
1674                 else:
1675                     return
1676                 if hasattr(self.target, "doctype"):
1677                     self.target.doctype(name, pubid, system[1:-1])
1678                 elif self.doctype != self._XMLParser__doctype:
1679                     # warn about deprecated call
1680                     self._XMLParser__doctype(name, pubid, system[1:-1])
1681                     self.doctype(name, pubid, system[1:-1])
1682                 self._doctype = None
1683
1684     def doctype(self, name, pubid, system):
1685         """(Deprecated)  Handle doctype declaration
1686
1687         *name* is the Doctype name, *pubid* is the public identifier,
1688         and *system* is the system identifier.
1689
1690         """
1691         warnings.warn(
1692             "This method of XMLParser is deprecated.  Define doctype() "
1693             "method on the TreeBuilder target.",
1694             DeprecationWarning,
1695             )
1696
1697     # sentinel, if doctype is redefined in a subclass
1698     __doctype = doctype
1699
1700     def feed(self, data):
1701         """Feed encoded data to parser."""
1702         try:
1703             self.parser.Parse(data, 0)
1704         except self._error as v:
1705             self._raiseerror(v)
1706
1707     def close(self):
1708         """Finish feeding data to parser and return element structure."""
1709         try:
1710             self.parser.Parse("", 1) # end of data
1711         except self._error as v:
1712             self._raiseerror(v)
1713         try:
1714             close_handler = self.target.close
1715         except AttributeError:
1716             pass
1717         else:
1718             return close_handler()
1719         finally:
1720             # get rid of circular references
1721             del self.parser, self._parser
1722             del self.target, self._target
1723
1724
1725 # Import the C accelerators
1726 try:
1727     # Element is going to be shadowed by the C implementation. We need to keep
1728     # the Python version of it accessible for some "creative" by external code
1729     # (see tests)
1730     _Element_Py = Element
1731
1732     # Element, SubElement, ParseError, TreeBuilder, XMLParser
1733     from _elementtree import *
1734 except ImportError:
1735     pass