diff --git a/lib/lxml/ElementInclude.py b/lib/lxml/ElementInclude.py
new file mode 100644
index 00000000..f7806709
--- /dev/null
+++ b/lib/lxml/ElementInclude.py
@@ -0,0 +1,223 @@
+#
+# ElementTree
+# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
+#
+# limited xinclude support for element trees
+#
+# history:
+# 2003-08-15 fl created
+# 2003-11-14 fl fixed default loader
+#
+# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
+#
+# fredrik@pythonware.com
+# http://www.pythonware.com
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2004 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+"""
+Limited XInclude support for the ElementTree package.
+
+While lxml.etree has full support for XInclude (see
+`etree.ElementTree.xinclude()`), this module provides a simpler, pure
+Python, ElementTree compatible implementation that supports a simple
+form of custom URL resolvers.
+"""
+
+from lxml import etree
+import copy
+try:
+ from urlparse import urljoin
+ from urllib2 import urlopen
+except ImportError:
+ # Python 3
+ from urllib.parse import urljoin
+ from urllib.request import urlopen
+
+try:
+ set
+except NameError:
+ # Python 2.3
+ from sets import Set as set
+
+XINCLUDE = "{http://www.w3.org/2001/XInclude}"
+
+XINCLUDE_INCLUDE = XINCLUDE + "include"
+XINCLUDE_FALLBACK = XINCLUDE + "fallback"
+
+##
+# Fatal include error.
+
+class FatalIncludeError(etree.LxmlSyntaxError):
+ pass
+
+##
+# ET compatible default loader.
+# This loader reads an included resource from disk.
+#
+# @param href Resource reference.
+# @param parse Parse mode. Either "xml" or "text".
+# @param encoding Optional text encoding.
+# @return The expanded resource. If the parse mode is "xml", this
+# is an ElementTree instance. If the parse mode is "text", this
+# is a Unicode string. If the loader fails, it can return None
+# or raise an IOError exception.
+# @throws IOError If the loader fails to load the resource.
+
+def default_loader(href, parse, encoding=None):
+ file = open(href, 'rb')
+ if parse == "xml":
+ data = etree.parse(file).getroot()
+ else:
+ data = file.read()
+ if not encoding:
+ encoding = 'utf-8'
+ data = data.decode(encoding)
+ file.close()
+ return data
+
+##
+# Default loader used by lxml.etree - handles custom resolvers properly
+#
+
+def _lxml_default_loader(href, parse, encoding=None, parser=None):
+ if parse == "xml":
+ data = etree.parse(href, parser).getroot()
+ else:
+ if "://" in href:
+ f = urlopen(href)
+ else:
+ f = open(href, 'rb')
+ data = f.read()
+ f.close()
+ if not encoding:
+ encoding = 'utf-8'
+ data = data.decode(encoding)
+ return data
+
+##
+# Wrapper for ET compatibility - drops the parser
+
+def _wrap_et_loader(loader):
+ def load(href, parse, encoding=None, parser=None):
+ return loader(href, parse, encoding)
+ return load
+
+
+##
+# Expand XInclude directives.
+#
+# @param elem Root element.
+# @param loader Optional resource loader. If omitted, it defaults
+# to {@link default_loader}. If given, it should be a callable
+# that implements the same interface as default_loader.
+# @throws FatalIncludeError If the function fails to include a given
+# resource, or if the tree contains malformed XInclude elements.
+# @throws IOError If the function fails to load a given resource.
+# @returns the node or its replacement if it was an XInclude node
+
+def include(elem, loader=None, base_url=None):
+ if base_url is None:
+ if hasattr(elem, 'getroot'):
+ tree = elem
+ elem = elem.getroot()
+ else:
+ tree = elem.getroottree()
+ if hasattr(tree, 'docinfo'):
+ base_url = tree.docinfo.URL
+ elif hasattr(elem, 'getroot'):
+ elem = elem.getroot()
+ _include(elem, loader, base_url=base_url)
+
+def _include(elem, loader=None, _parent_hrefs=None, base_url=None):
+ if loader is not None:
+ load_include = _wrap_et_loader(loader)
+ else:
+ load_include = _lxml_default_loader
+
+ if _parent_hrefs is None:
+ _parent_hrefs = set()
+
+ parser = elem.getroottree().parser
+
+ include_elements = list(
+ elem.iter('{http://www.w3.org/2001/XInclude}*'))
+
+ for e in include_elements:
+ if e.tag == XINCLUDE_INCLUDE:
+ # process xinclude directive
+ href = urljoin(base_url, e.get("href"))
+ parse = e.get("parse", "xml")
+ parent = e.getparent()
+ if parse == "xml":
+ if href in _parent_hrefs:
+ raise FatalIncludeError(
+ "recursive include of %r detected" % href
+ )
+ _parent_hrefs.add(href)
+ node = load_include(href, parse, parser=parser)
+ if node is None:
+ raise FatalIncludeError(
+ "cannot load %r as %r" % (href, parse)
+ )
+ node = _include(node, loader, _parent_hrefs)
+ if e.tail:
+ node.tail = (node.tail or "") + e.tail
+ if parent is None:
+ return node # replaced the root node!
+ parent.replace(e, node)
+ elif parse == "text":
+ text = load_include(href, parse, encoding=e.get("encoding"))
+ if text is None:
+ raise FatalIncludeError(
+ "cannot load %r as %r" % (href, parse)
+ )
+ predecessor = e.getprevious()
+ if predecessor is not None:
+ predecessor.tail = (predecessor.tail or "") + text
+ elif parent is None:
+ return text # replaced the root node!
+ else:
+ parent.text = (parent.text or "") + text + (e.tail or "")
+ parent.remove(e)
+ else:
+ raise FatalIncludeError(
+ "unknown parse type in xi:include tag (%r)" % parse
+ )
+ elif e.tag == XINCLUDE_FALLBACK:
+ parent = e.getparent()
+ if parent is not None and parent.tag != XINCLUDE_INCLUDE:
+ raise FatalIncludeError(
+ "xi:fallback tag must be child of xi:include (%r)" % e.tag
+ )
+ else:
+ raise FatalIncludeError(
+ "Invalid element found in XInclude namespace (%r)" % e.tag
+ )
+ return elem
diff --git a/lib/lxml/__init__.py b/lib/lxml/__init__.py
new file mode 100644
index 00000000..07cbe3a2
--- /dev/null
+++ b/lib/lxml/__init__.py
@@ -0,0 +1,20 @@
+# this is a package
+
+def get_include():
+ """
+ Returns a list of header include paths (for lxml itself, libxml2
+ and libxslt) needed to compile C code against lxml if it was built
+ with statically linked libraries.
+ """
+ import os
+ lxml_path = __path__[0]
+ include_path = os.path.join(lxml_path, 'includes')
+ includes = [include_path, lxml_path]
+
+ for name in os.listdir(include_path):
+ path = os.path.join(include_path, name)
+ if os.path.isdir(path):
+ includes.append(path)
+
+ return includes
+
diff --git a/lib/lxml/_elementpath.py b/lib/lxml/_elementpath.py
new file mode 100644
index 00000000..bc9176e8
--- /dev/null
+++ b/lib/lxml/_elementpath.py
@@ -0,0 +1,306 @@
+#
+# ElementTree
+# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
+#
+# limited xpath support for element trees
+#
+# history:
+# 2003-05-23 fl created
+# 2003-05-28 fl added support for // etc
+# 2003-08-27 fl fixed parsing of periods in element names
+# 2007-09-10 fl new selection engine
+# 2007-09-12 fl fixed parent selector
+# 2007-09-13 fl added iterfind; changed findall to return a list
+# 2007-11-30 fl added namespaces support
+# 2009-10-30 fl added child element value filter
+#
+# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
+#
+# fredrik@pythonware.com
+# http://www.pythonware.com
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2009 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+##
+# Implementation module for XPath support. There's usually no reason
+# to import this module directly; the ElementTree does this for
+# you, if needed.
+##
+
+import re
+
+xpath_tokenizer_re = re.compile(
+ "("
+ "'[^']*'|\"[^\"]*\"|"
+ "::|"
+ "//?|"
+ "\.\.|"
+ "\(\)|"
+ "[/.*:\[\]\(\)@=])|"
+ "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
+ "\s+"
+ )
+
+def xpath_tokenizer(pattern, namespaces=None):
+ for token in xpath_tokenizer_re.findall(pattern):
+ tag = token[1]
+ if tag and tag[0] != "{" and ":" in tag:
+ try:
+ prefix, uri = tag.split(":", 1)
+ if not namespaces:
+ raise KeyError
+ yield token[0], "{%s}%s" % (namespaces[prefix], uri)
+ except KeyError:
+ raise SyntaxError("prefix %r not found in prefix map" % prefix)
+ else:
+ yield token
+
+
+def prepare_child(next, token):
+ tag = token[1]
+ def select(result):
+ for elem in result:
+ for e in elem.iterchildren(tag):
+ yield e
+ return select
+
+def prepare_star(next, token):
+ def select(result):
+ for elem in result:
+ for e in elem.iterchildren('*'):
+ yield e
+ return select
+
+def prepare_self(next, token):
+ def select(result):
+ return result
+ return select
+
+def prepare_descendant(next, token):
+ token = next()
+ if token[0] == "*":
+ tag = "*"
+ elif not token[0]:
+ tag = token[1]
+ else:
+ raise SyntaxError("invalid descendant")
+ def select(result):
+ for elem in result:
+ for e in elem.iterdescendants(tag):
+ yield e
+ return select
+
+def prepare_parent(next, token):
+ def select(result):
+ for elem in result:
+ parent = elem.getparent()
+ if parent is not None:
+ yield parent
+ return select
+
+def prepare_predicate(next, token):
+ # FIXME: replace with real parser!!! refs:
+ # http://effbot.org/zone/simple-iterator-parser.htm
+ # http://javascript.crockford.com/tdop/tdop.html
+ signature = []
+ predicate = []
+ while 1:
+ token = next()
+ if token[0] == "]":
+ break
+ if token[0] and token[0][:1] in "'\"":
+ token = "'", token[0][1:-1]
+ signature.append(token[0] or "-")
+ predicate.append(token[1])
+ signature = "".join(signature)
+ # use signature to determine predicate type
+ if signature == "@-":
+ # [@attribute] predicate
+ key = predicate[1]
+ def select(result):
+ for elem in result:
+ if elem.get(key) is not None:
+ yield elem
+ return select
+ if signature == "@-='":
+ # [@attribute='value']
+ key = predicate[1]
+ value = predicate[-1]
+ def select(result):
+ for elem in result:
+ if elem.get(key) == value:
+ yield elem
+ return select
+ if signature == "-" and not re.match("-?\d+$", predicate[0]):
+ # [tag]
+ tag = predicate[0]
+ def select(result):
+ for elem in result:
+ for _ in elem.iterchildren(tag):
+ yield elem
+ break
+ return select
+ if signature == "-='" and not re.match("-?\d+$", predicate[0]):
+ # [tag='value']
+ tag = predicate[0]
+ value = predicate[-1]
+ def select(result):
+ for elem in result:
+ for e in elem.iterchildren(tag):
+ if "".join(e.itertext()) == value:
+ yield elem
+ break
+ return select
+ if signature == "-" or signature == "-()" or signature == "-()-":
+ # [index] or [last()] or [last()-index]
+ if signature == "-":
+ # [index]
+ index = int(predicate[0]) - 1
+ if index < 0:
+ if index == -1:
+ raise SyntaxError(
+ "indices in path predicates are 1-based, not 0-based")
+ else:
+ raise SyntaxError("path index >= 1 expected")
+ else:
+ if predicate[0] != "last":
+ raise SyntaxError("unsupported function")
+ if signature == "-()-":
+ try:
+ index = int(predicate[2]) - 1
+ except ValueError:
+ raise SyntaxError("unsupported expression")
+ else:
+ index = -1
+ def select(result):
+ for elem in result:
+ parent = elem.getparent()
+ if parent is None:
+ continue
+ try:
+ # FIXME: what if the selector is "*" ?
+ elems = list(parent.iterchildren(elem.tag))
+ if elems[index] is elem:
+ yield elem
+ except IndexError:
+ pass
+ return select
+ raise SyntaxError("invalid predicate")
+
+ops = {
+ "": prepare_child,
+ "*": prepare_star,
+ ".": prepare_self,
+ "..": prepare_parent,
+ "//": prepare_descendant,
+ "[": prepare_predicate,
+ }
+
+_cache = {}
+
+# --------------------------------------------------------------------
+
+def _build_path_iterator(path, namespaces):
+ # compile selector pattern
+ if path[-1:] == "/":
+ path = path + "*" # implicit all (FIXME: keep this?)
+ try:
+ return _cache[(path, namespaces and tuple(sorted(namespaces.items())) or None)]
+ except KeyError:
+ pass
+ if len(_cache) > 100:
+ _cache.clear()
+
+ if path[:1] == "/":
+ raise SyntaxError("cannot use absolute path on element")
+ stream = iter(xpath_tokenizer(path, namespaces))
+ try:
+ _next = stream.next
+ except AttributeError:
+ # Python 3
+ _next = stream.__next__
+ try:
+ token = _next()
+ except StopIteration:
+ raise SyntaxError("empty path expression")
+ selector = []
+ while 1:
+ try:
+ selector.append(ops[token[0]](_next, token))
+ except StopIteration:
+ raise SyntaxError("invalid path")
+ try:
+ token = _next()
+ if token[0] == "/":
+ token = _next()
+ except StopIteration:
+ break
+ _cache[path] = selector
+ return selector
+
+##
+# Iterate over the matching nodes
+
+def iterfind(elem, path, namespaces=None):
+ selector = _build_path_iterator(path, namespaces)
+ result = iter((elem,))
+ for select in selector:
+ result = select(result)
+ return result
+
+##
+# Find first matching object.
+
+def find(elem, path, namespaces=None):
+ it = iterfind(elem, path, namespaces)
+ try:
+ try:
+ _next = it.next
+ except AttributeError:
+ return next(it)
+ else:
+ return _next()
+ except StopIteration:
+ return None
+
+##
+# Find all matching objects.
+
+def findall(elem, path, namespaces=None):
+ return list(iterfind(elem, path, namespaces))
+
+##
+# Find text for first matching object.
+
+def findtext(elem, path, default=None, namespaces=None):
+ el = find(elem, path, namespaces)
+ if el is None:
+ return default
+ else:
+ return el.text or ''
diff --git a/lib/lxml/apihelpers.pxi b/lib/lxml/apihelpers.pxi
new file mode 100644
index 00000000..c41e3044
--- /dev/null
+++ b/lib/lxml/apihelpers.pxi
@@ -0,0 +1,1645 @@
+# Private/public helper functions for API functions
+
+from lxml.includes cimport uri
+
+cdef object OrderedDict = None
+try:
+ from collections import OrderedDict
+except ImportError:
+ pass
+
+cdef void displayNode(xmlNode* c_node, indent):
+ # to help with debugging
+ cdef xmlNode* c_child
+ try:
+ print indent * u' ', c_node
+ c_child = c_node.children
+ while c_child is not NULL:
+ displayNode(c_child, indent + 1)
+ c_child = c_child.next
+ finally:
+ return # swallow any exceptions
+
+cdef inline int _assertValidNode(_Element element) except -1:
+ assert element._c_node is not NULL, u"invalid Element proxy at %s" % id(element)
+
+cdef inline int _assertValidDoc(_Document doc) except -1:
+ assert doc._c_doc is not NULL, u"invalid Document proxy at %s" % id(doc)
+
+cdef _Document _documentOrRaise(object input):
+ u"""Call this to get the document of a _Document, _ElementTree or _Element
+ object, or to raise an exception if it can't be determined.
+
+ Should be used in all API functions for consistency.
+ """
+ cdef _Document doc
+ if isinstance(input, _ElementTree):
+ if (<_ElementTree>input)._context_node is not None:
+ doc = (<_ElementTree>input)._context_node._doc
+ else:
+ doc = None
+ elif isinstance(input, _Element):
+ doc = (<_Element>input)._doc
+ elif isinstance(input, _Document):
+ doc = <_Document>input
+ else:
+ raise TypeError, u"Invalid input object: %s" % \
+ python._fqtypename(input).decode('utf8')
+ if doc is None:
+ raise ValueError, u"Input object has no document: %s" % \
+ python._fqtypename(input).decode('utf8')
+ _assertValidDoc(doc)
+ return doc
+
+cdef _Element _rootNodeOrRaise(object input):
+ u"""Call this to get the root node of a _Document, _ElementTree or
+ _Element object, or to raise an exception if it can't be determined.
+
+ Should be used in all API functions for consistency.
+ """
+ cdef _Element node
+ if isinstance(input, _ElementTree):
+ node = (<_ElementTree>input)._context_node
+ elif isinstance(input, _Element):
+ node = <_Element>input
+ elif isinstance(input, _Document):
+ node = (<_Document>input).getroot()
+ else:
+ raise TypeError, u"Invalid input object: %s" % \
+ python._fqtypename(input).decode('utf8')
+ if (node is None or not node._c_node or
+ node._c_node.type != tree.XML_ELEMENT_NODE):
+ raise ValueError, u"Input object has no element: %s" % \
+ python._fqtypename(input).decode('utf8')
+ _assertValidNode(node)
+ return node
+
+cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc,
+ _BaseParser parser, text, tail, attrib, nsmap,
+ dict extra_attrs):
+ u"""Create a new element and initialize text content, namespaces and
+ attributes.
+
+ This helper function will reuse as much of the existing document as
+ possible:
+
+ If 'parser' is None, the parser will be inherited from 'doc' or the
+ default parser will be used.
+
+ If 'doc' is None, 'c_doc' is used to create a new _Document and the new
+ element is made its root node.
+
+ If 'c_doc' is also NULL, a new xmlDoc will be created.
+ """
+ cdef xmlNode* c_node
+ if doc is not None:
+ c_doc = doc._c_doc
+ ns_utf, name_utf = _getNsTag(tag)
+ if parser is not None and parser._for_html:
+ _htmlTagValidOrRaise(name_utf)
+ if c_doc is NULL:
+ c_doc = _newHTMLDoc()
+ else:
+ _tagValidOrRaise(name_utf)
+ if c_doc is NULL:
+ c_doc = _newXMLDoc()
+ c_node = _createElement(c_doc, name_utf)
+ if c_node is NULL:
+ if doc is None and c_doc is not NULL:
+ tree.xmlFreeDoc(c_doc)
+ raise MemoryError()
+ try:
+ if doc is None:
+ tree.xmlDocSetRootElement(c_doc, c_node)
+ doc = _documentFactory(c_doc, parser)
+ if text is not None:
+ _setNodeText(c_node, text)
+ if tail is not None:
+ _setTailText(c_node, tail)
+ # add namespaces to node if necessary
+ _initNodeNamespaces(c_node, doc, ns_utf, nsmap)
+ _initNodeAttributes(c_node, doc, attrib, extra_attrs)
+ return _elementFactory(doc, c_node)
+ except:
+ # free allocated c_node/c_doc unless Python does it for us
+ if c_node.doc is not c_doc:
+ # node not yet in document => will not be freed by document
+ if tail is not None:
+ _removeText(c_node.next) # tail
+ tree.xmlFreeNode(c_node)
+ if doc is None:
+ # c_doc will not be freed by doc
+ tree.xmlFreeDoc(c_doc)
+ raise
+
+cdef int _initNewElement(_Element element, bint is_html, name_utf, ns_utf,
+ _BaseParser parser, attrib, nsmap, dict extra_attrs) except -1:
+ u"""Initialise a new Element object.
+
+ This is used when users instantiate a Python Element subclass
+ directly, without it being mapped to an existing XML node.
+ """
+ cdef xmlDoc* c_doc
+ cdef xmlNode* c_node
+ cdef _Document doc
+ if is_html:
+ _htmlTagValidOrRaise(name_utf)
+ c_doc = _newHTMLDoc()
+ else:
+ _tagValidOrRaise(name_utf)
+ c_doc = _newXMLDoc()
+ c_node = _createElement(c_doc, name_utf)
+ if c_node is NULL:
+ if c_doc is not NULL:
+ tree.xmlFreeDoc(c_doc)
+ raise MemoryError()
+ tree.xmlDocSetRootElement(c_doc, c_node)
+ doc = _documentFactory(c_doc, parser)
+ # add namespaces to node if necessary
+ _initNodeNamespaces(c_node, doc, ns_utf, nsmap)
+ _initNodeAttributes(c_node, doc, attrib, extra_attrs)
+ _registerProxy(element, doc, c_node)
+ element._init()
+ return 0
+
+cdef _Element _makeSubElement(_Element parent, tag, text, tail,
+ attrib, nsmap, dict extra_attrs):
+ u"""Create a new child element and initialize text content, namespaces and
+ attributes.
+ """
+ cdef xmlNode* c_node
+ cdef xmlDoc* c_doc
+ if parent is None or parent._doc is None:
+ return None
+ _assertValidNode(parent)
+ ns_utf, name_utf = _getNsTag(tag)
+ c_doc = parent._doc._c_doc
+
+ if parent._doc._parser is not None and parent._doc._parser._for_html:
+ _htmlTagValidOrRaise(name_utf)
+ else:
+ _tagValidOrRaise(name_utf)
+
+ c_node = _createElement(c_doc, name_utf)
+ if c_node is NULL:
+ raise MemoryError()
+ tree.xmlAddChild(parent._c_node, c_node)
+
+ try:
+ if text is not None:
+ _setNodeText(c_node, text)
+ if tail is not None:
+ _setTailText(c_node, tail)
+
+ # add namespaces to node if necessary
+ _initNodeNamespaces(c_node, parent._doc, ns_utf, nsmap)
+ _initNodeAttributes(c_node, parent._doc, attrib, extra_attrs)
+ return _elementFactory(parent._doc, c_node)
+ except:
+ # make sure we clean up in case of an error
+ _removeNode(parent._doc, c_node)
+ raise
+
+cdef int _initNodeNamespaces(xmlNode* c_node, _Document doc,
+ object node_ns_utf, object nsmap) except -1:
+ u"""Lookup current namespace prefixes, then set namespace structure for
+ node and register new ns-prefix mappings.
+
+ This only works for a newly created node!
+ """
+ cdef xmlNs* c_ns
+ cdef list nsdefs
+ if not nsmap:
+ if node_ns_utf is not None:
+ _uriValidOrRaise(node_ns_utf)
+ doc._setNodeNs(c_node, _xcstr(node_ns_utf))
+ return 0
+
+ nsdefs = list(nsmap.items())
+ if None in nsmap and len(nsdefs) > 1:
+ # Move the default namespace to the end. This makes sure libxml2
+ # prefers a prefix if the ns is defined redundantly on the same
+ # element. That way, users can work around a problem themselves
+ # where default namespace attributes on non-default namespaced
+ # elements serialise without prefix (i.e. into the non-default
+ # namespace).
+ item = (None, nsmap[None])
+ nsdefs.remove(item)
+ nsdefs.append(item)
+
+ for prefix, href in nsdefs:
+ href_utf = _utf8(href)
+ _uriValidOrRaise(href_utf)
+ c_href = _xcstr(href_utf)
+ if prefix is not None:
+ prefix_utf = _utf8(prefix)
+ _prefixValidOrRaise(prefix_utf)
+ c_prefix = _xcstr(prefix_utf)
+ else:
+ c_prefix = NULL
+ # add namespace with prefix if it is not already known
+ c_ns = tree.xmlSearchNs(doc._c_doc, c_node, c_prefix)
+ if c_ns is NULL or \
+ c_ns.href is NULL or \
+ tree.xmlStrcmp(c_ns.href, c_href) != 0:
+ c_ns = tree.xmlNewNs(c_node, c_href, c_prefix)
+ if href_utf == node_ns_utf:
+ tree.xmlSetNs(c_node, c_ns)
+ node_ns_utf = None
+
+ if node_ns_utf is not None:
+ doc._setNodeNs(c_node, _xcstr(node_ns_utf))
+ return 0
+
+cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, dict extra):
+ u"""Initialise the attributes of an element node.
+ """
+ cdef bint is_html
+ cdef xmlNs* c_ns
+ if attrib is not None and not hasattr(attrib, u'items'):
+ raise TypeError, u"Invalid attribute dictionary: %s" % \
+ python._fqtypename(attrib).decode('utf8')
+ if not attrib and not extra:
+ return # nothing to do
+ is_html = doc._parser._for_html
+ seen = set()
+ if extra:
+ for name, value in sorted(extra.items()):
+ _addAttributeToNode(c_node, doc, is_html, name, value, seen)
+ if attrib:
+ # attrib will usually be a plain unordered dict
+ if type(attrib) is dict:
+ attrib = sorted(attrib.items())
+ elif isinstance(attrib, _Attrib) or (
+ OrderedDict is not None and isinstance(attrib, OrderedDict)):
+ attrib = attrib.items()
+ else:
+ # assume it's an unordered mapping of some kind
+ attrib = sorted(attrib.items())
+ for name, value in attrib:
+ _addAttributeToNode(c_node, doc, is_html, name, value, seen)
+
+cdef int _addAttributeToNode(xmlNode* c_node, _Document doc, bint is_html,
+ name, value, set seen_tags) except -1:
+ ns_utf, name_utf = tag = _getNsTag(name)
+ if tag in seen_tags:
+ return 0
+ seen_tags.add(tag)
+ if not is_html:
+ _attributeValidOrRaise(name_utf)
+ value_utf = _utf8(value)
+ if ns_utf is None:
+ tree.xmlNewProp(c_node, _xcstr(name_utf), _xcstr(value_utf))
+ else:
+ _uriValidOrRaise(ns_utf)
+ c_ns = doc._findOrBuildNodeNs(c_node, _xcstr(ns_utf), NULL, 1)
+ tree.xmlNewNsProp(c_node, c_ns,
+ _xcstr(name_utf), _xcstr(value_utf))
+ return 0
+
+ctypedef struct _ns_node_ref:
+ xmlNs* ns
+ xmlNode* node
+
+cdef int _removeUnusedNamespaceDeclarations(xmlNode* c_element) except -1:
+ u"""Remove any namespace declarations from a subtree that are not used by
+ any of its elements (or attributes).
+ """
+ cdef _ns_node_ref* c_ns_list
+ cdef _ns_node_ref* c_nsref_ptr
+ cdef xmlNs* c_nsdef
+ cdef xmlNode* c_node
+ cdef size_t c_ns_list_size
+ cdef size_t c_ns_list_len
+ cdef size_t i
+
+ c_ns_list = NULL
+ c_ns_list_size = 0
+ c_ns_list_len = 0
+
+ if c_element.parent is not NULL and \
+ c_element.parent.type == tree.XML_DOCUMENT_NODE:
+ # include the document node
+ c_nsdef = c_element.parent.nsDef
+ while c_nsdef is not NULL:
+ if c_ns_list_len >= c_ns_list_size:
+ if c_ns_list is NULL:
+ c_ns_list_size = 20
+ else:
+ c_ns_list_size *= 2
+ c_nsref_ptr = <_ns_node_ref*> stdlib.realloc(
+ c_ns_list, c_ns_list_size * sizeof(_ns_node_ref))
+ if c_nsref_ptr is NULL:
+ if c_ns_list is not NULL:
+ stdlib.free(c_ns_list)
+ raise MemoryError()
+ c_ns_list = c_nsref_ptr
+
+ c_ns_list[c_ns_list_len].ns = c_nsdef
+ c_ns_list[c_ns_list_len].node = c_element.parent
+ c_ns_list_len += 1
+ c_nsdef = c_nsdef.next
+
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_element, c_element, 1)
+ # collect all new namespace declarations into the ns list
+ c_nsdef = c_element.nsDef
+ while c_nsdef is not NULL:
+ if c_ns_list_len >= c_ns_list_size:
+ if c_ns_list is NULL:
+ c_ns_list_size = 20
+ else:
+ c_ns_list_size *= 2
+ c_nsref_ptr = <_ns_node_ref*> stdlib.realloc(
+ c_ns_list, c_ns_list_size * sizeof(_ns_node_ref))
+ if c_nsref_ptr is NULL:
+ if c_ns_list is not NULL:
+ stdlib.free(c_ns_list)
+ raise MemoryError()
+ c_ns_list = c_nsref_ptr
+
+ c_ns_list[c_ns_list_len].ns = c_nsdef
+ c_ns_list[c_ns_list_len].node = c_element
+ c_ns_list_len += 1
+ c_nsdef = c_nsdef.next
+
+ # remove all namespace declarations from the list that are referenced
+ if c_element.type == tree.XML_ELEMENT_NODE:
+ c_node = c_element
+ while c_node is not NULL:
+ if c_node.ns is not NULL:
+ for i in range(c_ns_list_len):
+ if c_node.ns is c_ns_list[i].ns:
+ c_ns_list_len -= 1
+ c_ns_list[i].ns = c_ns_list[c_ns_list_len].ns
+ c_ns_list[i].node = c_ns_list[c_ns_list_len].node
+ c_ns_list[c_ns_list_len].ns = NULL
+ c_ns_list[c_ns_list_len].node = NULL
+ break
+ if c_node is c_element:
+ # continue with attributes
+ c_node = c_element.properties
+ else:
+ c_node = c_node.next
+ tree.END_FOR_EACH_ELEMENT_FROM(c_element)
+
+ if c_ns_list is NULL:
+ return 0
+
+ # free all namespace declarations that remained in the list
+ for i in range(c_ns_list_len):
+ c_node = c_ns_list[i].node
+ c_nsdef = c_node.nsDef
+ if c_nsdef is c_ns_list[i].ns:
+ c_node.nsDef = c_node.nsDef.next
+ else:
+ while c_nsdef.next is not c_ns_list[i].ns:
+ c_nsdef = c_nsdef.next
+ c_nsdef.next = c_nsdef.next.next
+ tree.xmlFreeNs(c_ns_list[i].ns)
+
+ if c_ns_list is not NULL:
+ stdlib.free(c_ns_list)
+ return 0
+
+cdef xmlNs* _searchNsByHref(xmlNode* c_node, const_xmlChar* c_href, bint is_attribute):
+ u"""Search a namespace declaration that covers a node (element or
+ attribute).
+
+ For attributes, try to find a prefixed namespace declaration
+ instead of the default namespaces. This helps in supporting
+ round-trips for attributes on elements with a different namespace.
+ """
+ cdef xmlNs* c_ns
+ cdef xmlNs* c_default_ns = NULL
+ cdef xmlNode* c_element
+ if c_href is NULL or c_node is NULL or c_node.type == tree.XML_ENTITY_REF_NODE:
+ return NULL
+ if tree.xmlStrcmp(c_href, tree.XML_XML_NAMESPACE) == 0:
+ # no special cases here, let libxml2 handle this
+ return tree.xmlSearchNsByHref(c_node.doc, c_node, c_href)
+ if c_node.type == tree.XML_ATTRIBUTE_NODE:
+ is_attribute = 1
+ while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE:
+ c_node = c_node.parent
+ c_element = c_node
+ while c_node is not NULL:
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ c_ns = c_node.nsDef
+ while c_ns is not NULL:
+ if c_ns.href is not NULL and tree.xmlStrcmp(c_href, c_ns.href) == 0:
+ if c_ns.prefix is NULL and is_attribute:
+ # for attributes, continue searching a named
+ # prefix, but keep the first default namespace
+ # declaration that we found
+ if c_default_ns is NULL:
+ c_default_ns = c_ns
+ elif tree.xmlSearchNs(
+ c_element.doc, c_element, c_ns.prefix) is c_ns:
+ # start node is in namespace scope => found!
+ return c_ns
+ c_ns = c_ns.next
+ if c_node is not c_element and c_node.ns is not NULL:
+ # optimise: the node may have the namespace itself
+ c_ns = c_node.ns
+ if c_ns.href is not NULL and tree.xmlStrcmp(c_href, c_ns.href) == 0:
+ if c_ns.prefix is NULL and is_attribute:
+ # for attributes, continue searching a named
+ # prefix, but keep the first default namespace
+ # declaration that we found
+ if c_default_ns is NULL:
+ c_default_ns = c_ns
+ elif tree.xmlSearchNs(
+ c_element.doc, c_element, c_ns.prefix) is c_ns:
+ # start node is in namespace scope => found!
+ return c_ns
+ c_node = c_node.parent
+ # nothing found => use a matching default namespace or fail
+ if c_default_ns is not NULL:
+ if tree.xmlSearchNs(c_element.doc, c_element, NULL) is c_default_ns:
+ return c_default_ns
+ return NULL
+
+cdef int _replaceNodeByChildren(_Document doc, xmlNode* c_node) except -1:
+ # NOTE: this does not deallocate the node, just unlink it!
+ cdef xmlNode* c_parent
+ cdef xmlNode* c_child
+ if c_node.children is NULL:
+ tree.xmlUnlinkNode(c_node)
+ return 0
+
+ c_parent = c_node.parent
+ # fix parent links of children
+ c_child = c_node.children
+ while c_child is not NULL:
+ c_child.parent = c_parent
+ c_child = c_child.next
+
+ # fix namespace references of children if their parent's namespace
+ # declarations get lost
+ if c_node.nsDef is not NULL:
+ c_child = c_node.children
+ while c_child is not NULL:
+ moveNodeToDocument(doc, doc._c_doc, c_child)
+ c_child = c_child.next
+
+ # fix sibling links to/from child slice
+ if c_node.prev is NULL:
+ c_parent.children = c_node.children
+ else:
+ c_node.prev.next = c_node.children
+ c_node.children.prev = c_node.prev
+ if c_node.next is NULL:
+ c_parent.last = c_node.last
+ else:
+ c_node.next.prev = c_node.last
+ c_node.last.next = c_node.next
+
+ # unlink c_node
+ c_node.children = c_node.last = NULL
+ c_node.parent = c_node.next = c_node.prev = NULL
+ return 0
+
+cdef object _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node):
+ c_href = _getNs(c_attrib_node)
+ value = tree.xmlGetNsProp(c_element, c_attrib_node.name, c_href)
+ try:
+ result = funicode(value)
+ finally:
+ tree.xmlFree(value)
+ return result
+
+cdef object _attributeValueFromNsName(xmlNode* c_element,
+ const_xmlChar* c_href, const_xmlChar* c_name):
+ c_result = tree.xmlGetNsProp(c_element, c_name, c_href)
+ if c_result is NULL:
+ return None
+ try:
+ result = funicode(c_result)
+ finally:
+ tree.xmlFree(c_result)
+ return result
+
+cdef object _getNodeAttributeValue(xmlNode* c_node, key, default):
+ ns, tag = _getNsTag(key)
+ c_href = NULL if ns is None else _xcstr(ns)
+ c_result = tree.xmlGetNsProp(c_node, _xcstr(tag), c_href)
+ if c_result is NULL:
+ # XXX free namespace that is not in use..?
+ return default
+ try:
+ result = funicode(c_result)
+ finally:
+ tree.xmlFree(c_result)
+ return result
+
+cdef inline object _getAttributeValue(_Element element, key, default):
+ return _getNodeAttributeValue(element._c_node, key, default)
+
+cdef int _setAttributeValue(_Element element, key, value) except -1:
+ cdef xmlNs* c_ns
+ ns, tag = _getNsTag(key)
+ if not element._doc._parser._for_html:
+ _attributeValidOrRaise(tag)
+ c_tag = _xcstr(tag)
+ if isinstance(value, QName):
+ value = _resolveQNameText(element, value)
+ else:
+ value = _utf8(value)
+ c_value = _xcstr(value)
+ if ns is None:
+ c_ns = NULL
+ else:
+ c_ns = element._doc._findOrBuildNodeNs(element._c_node, _xcstr(ns), NULL, 1)
+ tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value)
+ return 0
+
+cdef int _delAttribute(_Element element, key) except -1:
+ ns, tag = _getNsTag(key)
+ c_href = NULL if ns is None else _xcstr(ns)
+ if _delAttributeFromNsName(element._c_node, c_href, _xcstr(tag)):
+ raise KeyError, key
+ return 0
+
+cdef int _delAttributeFromNsName(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
+ c_attr = tree.xmlHasNsProp(c_node, c_name, c_href)
+ if c_attr is NULL:
+ # XXX free namespace that is not in use..?
+ return -1
+ tree.xmlRemoveProp(c_attr)
+ return 0
+
+cdef list _collectAttributes(xmlNode* c_node, int collecttype):
+ u"""Collect all attributes of a node in a list. Depending on collecttype,
+ it collects either the name (1), the value (2) or the name-value tuples.
+ """
+ cdef Py_ssize_t count
+ c_attr = c_node.properties
+ count = 0
+ while c_attr is not NULL:
+ if c_attr.type == tree.XML_ATTRIBUTE_NODE:
+ count += 1
+ c_attr = c_attr.next
+
+ if not count:
+ return []
+
+ attributes = [None] * count
+ c_attr = c_node.properties
+ count = 0
+ while c_attr is not NULL:
+ if c_attr.type == tree.XML_ATTRIBUTE_NODE:
+ if collecttype == 1:
+ item = _namespacedName(c_attr)
+ elif collecttype == 2:
+ item = _attributeValue(c_node, c_attr)
+ else:
+ item = (_namespacedName(c_attr),
+ _attributeValue(c_node, c_attr))
+ attributes[count] = item
+ count += 1
+ c_attr = c_attr.next
+ return attributes
+
+cdef object __RE_XML_ENCODING = re.compile(
+ ur'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)
+
+cdef object __REPLACE_XML_ENCODING = __RE_XML_ENCODING.sub
+cdef object __HAS_XML_ENCODING = __RE_XML_ENCODING.match
+
+cdef object _stripEncodingDeclaration(object xml_string):
+ # this is a hack to remove the XML encoding declaration from unicode
+ return __REPLACE_XML_ENCODING(ur'\g<1>\g<2>', xml_string)
+
+cdef bint _hasEncodingDeclaration(object xml_string) except -1:
+ # check if a (unicode) string has an XML encoding declaration
+ return __HAS_XML_ENCODING(xml_string) is not None
+
+cdef inline bint _hasText(xmlNode* c_node):
+ return c_node is not NULL and _textNodeOrSkip(c_node.children) is not NULL
+
+cdef inline bint _hasTail(xmlNode* c_node):
+ return c_node is not NULL and _textNodeOrSkip(c_node.next) is not NULL
+
+cdef _collectText(xmlNode* c_node):
+ u"""Collect all text nodes and return them as a unicode string.
+
+ Start collecting at c_node.
+
+ If there was no text to collect, return None
+ """
+ cdef Py_ssize_t scount
+ cdef xmlChar* c_text
+ cdef xmlNode* c_node_cur
+ # check for multiple text nodes
+ scount = 0
+ c_text = NULL
+ c_node_cur = c_node = _textNodeOrSkip(c_node)
+ while c_node_cur is not NULL:
+ if c_node_cur.content[0] != c'\0':
+ c_text = c_node_cur.content
+ scount += 1
+ c_node_cur = _textNodeOrSkip(c_node_cur.next)
+
+ # handle two most common cases first
+ if c_text is NULL:
+ return '' if scount > 0 else None
+ if scount == 1:
+ return funicode(c_text)
+
+ # the rest is not performance critical anymore
+ result = b''
+ while c_node is not NULL:
+ result += c_node.content
+ c_node = _textNodeOrSkip(c_node.next)
+ return funicode(result)
+
+cdef void _removeText(xmlNode* c_node):
+ u"""Remove all text nodes.
+
+ Start removing at c_node.
+ """
+ cdef xmlNode* c_next
+ c_node = _textNodeOrSkip(c_node)
+ while c_node is not NULL:
+ c_next = _textNodeOrSkip(c_node.next)
+ tree.xmlUnlinkNode(c_node)
+ tree.xmlFreeNode(c_node)
+ c_node = c_next
+
+cdef int _setNodeText(xmlNode* c_node, value) except -1:
+ cdef xmlNode* c_text_node
+ # remove all text nodes at the start first
+ _removeText(c_node.children)
+ if value is None:
+ return 0
+ # now add new text node with value at start
+ if python._isString(value):
+ text = _utf8(value)
+ c_text_node = tree.xmlNewDocText(c_node.doc, _xcstr(text))
+ elif isinstance(value, CDATA):
+ c_text_node = tree.xmlNewCDataBlock(
+ c_node.doc, _xcstr((value)._utf8_data),
+ python.PyBytes_GET_SIZE((value)._utf8_data))
+ else:
+ # this will raise the right error
+ _utf8(value)
+ return -1
+ if c_node.children is NULL:
+ tree.xmlAddChild(c_node, c_text_node)
+ else:
+ tree.xmlAddPrevSibling(c_node.children, c_text_node)
+ return 0
+
+cdef int _setTailText(xmlNode* c_node, value) except -1:
+ cdef xmlNode* c_text_node
+ # remove all text nodes at the start first
+ _removeText(c_node.next)
+ if value is None:
+ return 0
+ text = _utf8(value)
+ c_text_node = tree.xmlNewDocText(c_node.doc, _xcstr(text))
+ # XXX what if we're the top element?
+ tree.xmlAddNextSibling(c_node, c_text_node)
+ return 0
+
+cdef bytes _resolveQNameText(_Element element, value):
+ cdef xmlNs* c_ns
+ ns, tag = _getNsTag(value)
+ if ns is None:
+ return tag
+ else:
+ c_ns = element._doc._findOrBuildNodeNs(
+ element._c_node, _xcstr(ns), NULL, 0)
+ return python.PyBytes_FromFormat('%s:%s', c_ns.prefix, _cstr(tag))
+
+cdef inline bint _hasChild(xmlNode* c_node):
+ return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL
+
+cdef inline Py_ssize_t _countElements(xmlNode* c_node):
+ u"Counts the elements within the following siblings and the node itself."
+ cdef Py_ssize_t count
+ count = 0
+ while c_node is not NULL:
+ if _isElement(c_node):
+ count += 1
+ c_node = c_node.next
+ return count
+
+cdef int _findChildSlice(
+ slice sliceobject, xmlNode* c_parent,
+ xmlNode** c_start_node, Py_ssize_t* c_step, Py_ssize_t* c_length) except -1:
+ u"""Resolve a children slice.
+
+ Returns the start node, step size and the slice length in the
+ pointer arguments.
+ """
+ cdef Py_ssize_t start = 0, stop = 0, childcount
+ childcount = _countElements(c_parent.children)
+ if childcount == 0:
+ c_start_node[0] = NULL
+ c_length[0] = 0
+ if sliceobject.step is None:
+ c_step[0] = 1
+ else:
+ python._PyEval_SliceIndex(sliceobject.step, c_step)
+ return 0
+ python.PySlice_GetIndicesEx(
+ sliceobject, childcount, &start, &stop, c_step, c_length)
+ if start > childcount / 2:
+ c_start_node[0] = _findChildBackwards(c_parent, childcount - start - 1)
+ else:
+ c_start_node[0] = _findChild(c_parent, start)
+ return 0
+
+cdef bint _isFullSlice(slice sliceobject) except -1:
+ u"""Conservative guess if this slice is a full slice as in ``s[:]``.
+ """
+ cdef Py_ssize_t step = 0
+ if sliceobject is None:
+ return 0
+ if sliceobject.start is None and \
+ sliceobject.stop is None:
+ if sliceobject.step is None:
+ return 1
+ python._PyEval_SliceIndex(sliceobject.step, &step)
+ if step == 1:
+ return 1
+ return 0
+ return 0
+
+cdef _collectChildren(_Element element):
+ cdef xmlNode* c_node
+ cdef list result = []
+ c_node = element._c_node.children
+ if c_node is not NULL:
+ if not _isElement(c_node):
+ c_node = _nextElement(c_node)
+ while c_node is not NULL:
+ result.append(_elementFactory(element._doc, c_node))
+ c_node = _nextElement(c_node)
+ return result
+
+cdef inline xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index):
+ if index < 0:
+ return _findChildBackwards(c_node, -index - 1)
+ else:
+ return _findChildForwards(c_node, index)
+
+cdef inline xmlNode* _findChildForwards(xmlNode* c_node, Py_ssize_t index):
+ u"""Return child element of c_node with index, or return NULL if not found.
+ """
+ cdef xmlNode* c_child
+ cdef Py_ssize_t c
+ c_child = c_node.children
+ c = 0
+ while c_child is not NULL:
+ if _isElement(c_child):
+ if c == index:
+ return c_child
+ c += 1
+ c_child = c_child.next
+ return NULL
+
+cdef inline xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index):
+ u"""Return child element of c_node with index, or return NULL if not found.
+ Search from the end.
+ """
+ cdef xmlNode* c_child
+ cdef Py_ssize_t c
+ c_child = c_node.last
+ c = 0
+ while c_child is not NULL:
+ if _isElement(c_child):
+ if c == index:
+ return c_child
+ c += 1
+ c_child = c_child.prev
+ return NULL
+
+cdef inline xmlNode* _textNodeOrSkip(xmlNode* c_node) nogil:
+ u"""Return the node if it's a text node. Skip over ignorable nodes in a
+ series of text nodes. Return NULL if a non-ignorable node is found.
+
+ This is used to skip over XInclude nodes when collecting adjacent text
+ nodes.
+ """
+ while c_node is not NULL:
+ if c_node.type == tree.XML_TEXT_NODE or \
+ c_node.type == tree.XML_CDATA_SECTION_NODE:
+ return c_node
+ elif c_node.type == tree.XML_XINCLUDE_START or \
+ c_node.type == tree.XML_XINCLUDE_END:
+ c_node = c_node.next
+ else:
+ return NULL
+ return NULL
+
+cdef inline xmlNode* _nextElement(xmlNode* c_node):
+ u"""Given a node, find the next sibling that is an element.
+ """
+ if c_node is NULL:
+ return NULL
+ c_node = c_node.next
+ while c_node is not NULL:
+ if _isElement(c_node):
+ return c_node
+ c_node = c_node.next
+ return NULL
+
+cdef inline xmlNode* _previousElement(xmlNode* c_node):
+ u"""Given a node, find the next sibling that is an element.
+ """
+ if c_node is NULL:
+ return NULL
+ c_node = c_node.prev
+ while c_node is not NULL:
+ if _isElement(c_node):
+ return c_node
+ c_node = c_node.prev
+ return NULL
+
+cdef inline xmlNode* _parentElement(xmlNode* c_node):
+ u"Given a node, find the parent element."
+ if c_node is NULL or not _isElement(c_node):
+ return NULL
+ c_node = c_node.parent
+ if c_node is NULL or not _isElement(c_node):
+ return NULL
+ return c_node
+
+cdef inline bint _tagMatches(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
+ u"""Tests if the node matches namespace URI and tag name.
+
+ A node matches if it matches both c_href and c_name.
+
+ A node matches c_href if any of the following is true:
+ * c_href is NULL
+ * its namespace is NULL and c_href is the empty string
+ * its namespace string equals the c_href string
+
+ A node matches c_name if any of the following is true:
+ * c_name is NULL
+ * its name string equals the c_name string
+ """
+ if c_node is NULL:
+ return 0
+ if c_node.type != tree.XML_ELEMENT_NODE:
+ # not an element, only succeed if we match everything
+ return c_name is NULL and c_href is NULL
+ if c_name is NULL:
+ if c_href is NULL:
+ # always match
+ return 1
+ else:
+ c_node_href = _getNs(c_node)
+ if c_node_href is NULL:
+ return c_href[0] == c'\0'
+ else:
+ return tree.xmlStrcmp(c_node_href, c_href) == 0
+ elif c_href is NULL:
+ if _getNs(c_node) is not NULL:
+ return 0
+ return c_node.name == c_name or tree.xmlStrcmp(c_node.name, c_name) == 0
+ elif c_node.name == c_name or tree.xmlStrcmp(c_node.name, c_name) == 0:
+ c_node_href = _getNs(c_node)
+ if c_node_href is NULL:
+ return c_href[0] == c'\0'
+ else:
+ return tree.xmlStrcmp(c_node_href, c_href) == 0
+ else:
+ return 0
+
+cdef inline bint _tagMatchesExactly(xmlNode* c_node, qname* c_qname):
+ u"""Tests if the node matches namespace URI and tag name.
+
+ This differs from _tagMatches() in that it does not consider a
+ NULL value in qname.href a wildcard, and that it expects the c_name
+ to be taken from the doc dict, i.e. it only compares the names by
+ address.
+
+ A node matches if it matches both href and c_name of the qname.
+
+ A node matches c_href if any of the following is true:
+ * its namespace is NULL and c_href is the empty string
+ * its namespace string equals the c_href string
+
+ A node matches c_name if any of the following is true:
+ * c_name is NULL
+ * its name string points to the same address (!) as c_name
+ """
+ return _nsTagMatchesExactly(_getNs(c_node), c_node.name, c_qname)
+
+cdef inline bint _nsTagMatchesExactly(const_xmlChar* c_node_href,
+ const_xmlChar* c_node_name,
+ qname* c_qname):
+ u"""Tests if name and namespace URI match those of c_qname.
+
+ This differs from _tagMatches() in that it does not consider a
+ NULL value in qname.href a wildcard, and that it expects the c_name
+ to be taken from the doc dict, i.e. it only compares the names by
+ address.
+
+ A node matches if it matches both href and c_name of the qname.
+
+ A node matches c_href if any of the following is true:
+ * its namespace is NULL and c_href is the empty string
+ * its namespace string equals the c_href string
+
+ A node matches c_name if any of the following is true:
+ * c_name is NULL
+ * its name string points to the same address (!) as c_name
+ """
+ cdef char* c_href
+ if c_qname.c_name is not NULL and c_qname.c_name is not c_node_name:
+ return 0
+ if c_qname.href is NULL:
+ return 1
+ c_href = python.__cstr(c_qname.href)
+ if c_href[0] == '\0':
+ return c_node_href is NULL or c_node_href[0] == '\0'
+ elif c_node_href is NULL:
+ return 0
+ else:
+ return tree.xmlStrcmp(c_href, c_node_href) == 0
+
+cdef Py_ssize_t _mapTagsToQnameMatchArray(xmlDoc* c_doc, list ns_tags,
+ qname* c_ns_tags, bint force_into_dict) except -1:
+ u"""Map a sequence of (name, namespace) pairs to a qname array for efficient
+ matching with _tagMatchesExactly() above.
+
+ Note that each qname struct in the array owns its href byte string object
+ if it is not NULL.
+ """
+ cdef Py_ssize_t count = 0, i
+ cdef bytes ns, tag
+ for ns, tag in ns_tags:
+ if tag is None:
+ c_tag = NULL
+ elif force_into_dict:
+ c_tag = tree.xmlDictLookup(c_doc.dict, _xcstr(tag), len(tag))
+ if c_tag is NULL:
+ # clean up before raising the error
+ for i in xrange(count):
+ cpython.ref.Py_XDECREF(c_ns_tags[i].href)
+ raise MemoryError()
+ else:
+ c_tag = tree.xmlDictExists(c_doc.dict, _xcstr(tag), len(tag))
+ if c_tag is NULL:
+ # not in the dict => not in the document
+ continue
+ c_ns_tags[count].c_name = c_tag
+ if ns is None:
+ c_ns_tags[count].href = NULL
+ else:
+ cpython.ref.Py_INCREF(ns) # keep an owned reference!
+ c_ns_tags[count].href = ns
+ count += 1
+ return count
+
+cdef int _removeNode(_Document doc, xmlNode* c_node) except -1:
+ u"""Unlink and free a node and subnodes if possible. Otherwise, make sure
+ it's self-contained.
+ """
+ cdef xmlNode* c_next
+ c_next = c_node.next
+ tree.xmlUnlinkNode(c_node)
+ _moveTail(c_next, c_node)
+ if not attemptDeallocation(c_node):
+ # make namespaces absolute
+ moveNodeToDocument(doc, c_node.doc, c_node)
+ return 0
+
+cdef int _removeSiblings(xmlNode* c_element, tree.xmlElementType node_type, bint with_tail) except -1:
+ cdef xmlNode* c_node
+ cdef xmlNode* c_next
+ c_node = c_element.next
+ while c_node is not NULL:
+ c_next = _nextElement(c_node)
+ if c_node.type == node_type:
+ if with_tail:
+ _removeText(c_node.next)
+ tree.xmlUnlinkNode(c_node)
+ attemptDeallocation(c_node)
+ c_node = c_next
+ c_node = c_element.prev
+ while c_node is not NULL:
+ c_next = _previousElement(c_node)
+ if c_node.type == node_type:
+ if with_tail:
+ _removeText(c_node.next)
+ tree.xmlUnlinkNode(c_node)
+ attemptDeallocation(c_node)
+ c_node = c_next
+ return 0
+
+cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target):
+ cdef xmlNode* c_next
+ # tail support: look for any text nodes trailing this node and
+ # move them too
+ c_tail = _textNodeOrSkip(c_tail)
+ while c_tail is not NULL:
+ c_next = _textNodeOrSkip(c_tail.next)
+ c_target = tree.xmlAddNextSibling(c_target, c_tail)
+ c_tail = c_next
+
+cdef int _copyTail(xmlNode* c_tail, xmlNode* c_target) except -1:
+ cdef xmlNode* c_new_tail
+ # tail copying support: look for any text nodes trailing this node and
+ # copy it to the target node
+ c_tail = _textNodeOrSkip(c_tail)
+ while c_tail is not NULL:
+ if c_target.doc is not c_tail.doc:
+ c_new_tail = tree.xmlDocCopyNode(c_tail, c_target.doc, 0)
+ else:
+ c_new_tail = tree.xmlCopyNode(c_tail, 0)
+ if c_new_tail is NULL:
+ raise MemoryError()
+ c_target = tree.xmlAddNextSibling(c_target, c_new_tail)
+ c_tail = _textNodeOrSkip(c_tail.next)
+ return 0
+
+cdef int _copyNonElementSiblings(xmlNode* c_node, xmlNode* c_target) except -1:
+ cdef xmlNode* c_copy
+ cdef xmlNode* c_sibling = c_node
+ while c_sibling.prev != NULL and \
+ (c_sibling.prev.type == tree.XML_PI_NODE or \
+ c_sibling.prev.type == tree.XML_COMMENT_NODE):
+ c_sibling = c_sibling.prev
+ while c_sibling != c_node:
+ c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1)
+ if c_copy is NULL:
+ raise MemoryError()
+ tree.xmlAddPrevSibling(c_target, c_copy)
+ c_sibling = c_sibling.next
+ while c_sibling.next != NULL and \
+ (c_sibling.next.type == tree.XML_PI_NODE or \
+ c_sibling.next.type == tree.XML_COMMENT_NODE):
+ c_sibling = c_sibling.next
+ c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1)
+ if c_copy is NULL:
+ raise MemoryError()
+ tree.xmlAddNextSibling(c_target, c_copy)
+
+cdef int _deleteSlice(_Document doc, xmlNode* c_node,
+ Py_ssize_t count, Py_ssize_t step) except -1:
+ u"""Delete slice, ``count`` items starting with ``c_node`` with a step
+ width of ``step``.
+ """
+ cdef xmlNode* c_next
+ cdef Py_ssize_t c, i
+ cdef _node_to_node_function next_element
+ if c_node is NULL:
+ return 0
+ if step > 0:
+ next_element = _nextElement
+ else:
+ step = -step
+ next_element = _previousElement
+ # now start deleting nodes
+ c = 0
+ c_next = c_node
+ while c_node is not NULL and c < count:
+ for i in range(step):
+ c_next = next_element(c_next)
+ _removeNode(doc, c_node)
+ c += 1
+ c_node = c_next
+ return 0
+
+cdef int _replaceSlice(_Element parent, xmlNode* c_node,
+ Py_ssize_t slicelength, Py_ssize_t step,
+ bint left_to_right, elements) except -1:
+ u"""Replace the slice of ``count`` elements starting at ``c_node`` with
+ positive step width ``step`` by the Elements in ``elements``. The
+ direction is given by the boolean argument ``left_to_right``.
+
+ ``c_node`` may be NULL to indicate the end of the children list.
+ """
+ cdef xmlNode* c_orig_neighbour
+ cdef xmlNode* c_next
+ cdef xmlDoc* c_source_doc
+ cdef _Element element
+ cdef Py_ssize_t seqlength, i, c
+ cdef _node_to_node_function next_element
+ assert step > 0
+ if left_to_right:
+ next_element = _nextElement
+ else:
+ next_element = _previousElement
+
+ if not isinstance(elements, (list, tuple)):
+ elements = list(elements)
+
+ if step > 1:
+ # *replacing* children stepwise with list => check size!
+ seqlength = len(elements)
+ if seqlength != slicelength:
+ raise ValueError, u"attempt to assign sequence of size %d " \
+ u"to extended slice of size %d" % (seqlength, slicelength)
+
+ if c_node is NULL:
+ # no children yet => add all elements straight away
+ if left_to_right:
+ for element in elements:
+ assert element is not None, u"Node must not be None"
+ _appendChild(parent, element)
+ else:
+ for element in elements:
+ assert element is not None, u"Node must not be None"
+ _prependChild(parent, element)
+ return 0
+
+ # remove the elements first as some might be re-added
+ if left_to_right:
+ # L->R, remember left neighbour
+ c_orig_neighbour = _previousElement(c_node)
+ else:
+ # R->L, remember right neighbour
+ c_orig_neighbour = _nextElement(c_node)
+
+ # We remove the original slice elements one by one. Since we hold
+ # a Python reference to all elements that we will insert, it is
+ # safe to let _removeNode() try (and fail) to free them even if
+ # the element itself or one of its descendents will be reinserted.
+ c = 0
+ c_next = c_node
+ while c_node is not NULL and c < slicelength:
+ for i in range(step):
+ c_next = next_element(c_next)
+ _removeNode(parent._doc, c_node)
+ c += 1
+ c_node = c_next
+
+ # make sure each element is inserted only once
+ elements = iter(elements)
+
+ # find the first node right of the new insertion point
+ if left_to_right:
+ if c_orig_neighbour is not NULL:
+ c_node = next_element(c_orig_neighbour)
+ else:
+ # before the first element
+ c_node = _findChildForwards(parent._c_node, 0)
+ elif c_orig_neighbour is NULL:
+ # at the end, but reversed stepping
+ # append one element and go to the next insertion point
+ for element in elements:
+ assert element is not None, u"Node must not be None"
+ _appendChild(parent, element)
+ c_node = element._c_node
+ if slicelength > 0:
+ slicelength -= 1
+ for i in range(1, step):
+ c_node = next_element(c_node)
+ break
+
+ if left_to_right:
+ # adjust step size after removing slice as we are not stepping
+ # over the newly inserted elements
+ step -= 1
+
+ # now insert elements where we removed them
+ if c_node is not NULL:
+ for element in elements:
+ assert element is not None, u"Node must not be None"
+ _assertValidNode(element)
+ # move element and tail over
+ c_source_doc = element._c_node.doc
+ c_next = element._c_node.next
+ tree.xmlAddPrevSibling(c_node, element._c_node)
+ _moveTail(c_next, element._c_node)
+
+ # integrate element into new document
+ moveNodeToDocument(parent._doc, c_source_doc, element._c_node)
+
+ # stop at the end of the slice
+ if slicelength > 0:
+ slicelength = slicelength - 1
+ for i in range(step):
+ c_node = next_element(c_node)
+ if c_node is NULL:
+ break
+ else:
+ # everything inserted
+ return 0
+
+ # append the remaining elements at the respective end
+ if left_to_right:
+ for element in elements:
+ assert element is not None, u"Node must not be None"
+ _assertValidNode(element)
+ _appendChild(parent, element)
+ else:
+ for element in elements:
+ assert element is not None, u"Node must not be None"
+ _assertValidNode(element)
+ _prependChild(parent, element)
+
+ return 0
+
+cdef int _appendChild(_Element parent, _Element child) except -1:
+ u"""Append a new child to a parent element.
+ """
+ c_node = child._c_node
+ c_source_doc = c_node.doc
+ # prevent cycles
+ c_parent = parent._c_node
+ while c_parent:
+ if c_parent is c_node:
+ raise ValueError("cannot append parent to itself")
+ c_parent = c_parent.parent
+ # store possible text node
+ c_next = c_node.next
+ # move node itself
+ tree.xmlUnlinkNode(c_node)
+ tree.xmlAddChild(parent._c_node, c_node)
+ _moveTail(c_next, c_node)
+ # uh oh, elements may be pointing to different doc when
+ # parent element has moved; change them too..
+ moveNodeToDocument(parent._doc, c_source_doc, c_node)
+ return 0
+
+cdef int _prependChild(_Element parent, _Element child) except -1:
+ u"""Prepend a new child to a parent element.
+ """
+ c_node = child._c_node
+ c_source_doc = c_node.doc
+ # prevent cycles
+ c_parent = parent._c_node
+ while c_parent:
+ if c_parent is c_node:
+ raise ValueError("cannot append parent to itself")
+ c_parent = c_parent.parent
+ # store possible text node
+ c_next = c_node.next
+ # move node itself
+ c_child = _findChildForwards(parent._c_node, 0)
+ if c_child is NULL:
+ tree.xmlUnlinkNode(c_node)
+ tree.xmlAddChild(parent._c_node, c_node)
+ else:
+ tree.xmlAddPrevSibling(c_child, c_node)
+ _moveTail(c_next, c_node)
+ # uh oh, elements may be pointing to different doc when
+ # parent element has moved; change them too..
+ moveNodeToDocument(parent._doc, c_source_doc, c_node)
+ return 0
+
+cdef int _appendSibling(_Element element, _Element sibling) except -1:
+ u"""Add a new sibling behind an element.
+ """
+ c_node = sibling._c_node
+ if element._c_node is c_node:
+ return 0 # nothing to do
+ c_source_doc = c_node.doc
+ # store possible text node
+ c_next = c_node.next
+ # move node itself
+ tree.xmlAddNextSibling(element._c_node, c_node)
+ _moveTail(c_next, c_node)
+ # uh oh, elements may be pointing to different doc when
+ # parent element has moved; change them too..
+ moveNodeToDocument(element._doc, c_source_doc, c_node)
+ return 0
+
+cdef int _prependSibling(_Element element, _Element sibling) except -1:
+ u"""Add a new sibling before an element.
+ """
+ c_node = sibling._c_node
+ if element._c_node is c_node:
+ return 0 # nothing to do
+ c_source_doc = c_node.doc
+ # store possible text node
+ c_next = c_node.next
+ # move node itself
+ tree.xmlAddPrevSibling(element._c_node, c_node)
+ _moveTail(c_next, c_node)
+ # uh oh, elements may be pointing to different doc when
+ # parent element has moved; change them too..
+ moveNodeToDocument(element._doc, c_source_doc, c_node)
+ return 0
+
+cdef inline int isutf8(const_xmlChar* s):
+ cdef xmlChar c = s[0]
+ while c != c'\0':
+ if c & 0x80:
+ return 1
+ s += 1
+ c = s[0]
+ return 0
+
+cdef int check_string_utf8(bytes pystring):
+ u"""Check if a string looks like valid UTF-8 XML content. Returns 0
+ for ASCII, 1 for UTF-8 and -1 in the case of errors, such as NULL
+ bytes or ASCII control characters.
+ """
+ cdef const_xmlChar* s = _xcstr(pystring)
+ cdef const_xmlChar* c_end = s + len(pystring)
+ cdef bint is_non_ascii = 0
+ while s < c_end:
+ if s[0] & 0x80:
+ # skip over multi byte sequences
+ while s < c_end and s[0] & 0x80:
+ s += 1
+ is_non_ascii = 1
+ if s < c_end and not tree.xmlIsChar_ch(s[0]):
+ return -1 # invalid!
+ s += 1
+ return is_non_ascii
+
+cdef inline object funicodeOrNone(const_xmlChar* s):
+ return funicode(s) if s is not NULL else None
+
+cdef inline object funicodeOrEmpty(const_xmlChar* s):
+ return funicode(s) if s is not NULL else ''
+
+cdef object funicode(const_xmlChar* s):
+ cdef Py_ssize_t slen
+ cdef const_xmlChar* spos
+ cdef bint is_non_ascii
+ if python.LXML_UNICODE_STRINGS:
+ return s.decode('UTF-8')
+ spos = s
+ is_non_ascii = 0
+ while spos[0] != c'\0':
+ if spos[0] & 0x80:
+ is_non_ascii = 1
+ break
+ spos += 1
+ slen = spos - s
+ if spos[0] != c'\0':
+ slen += tree.xmlStrlen(spos)
+ if is_non_ascii:
+ return s[:slen].decode('UTF-8')
+ return s[:slen]
+
+cdef bytes _utf8(object s):
+ """Test if a string is valid user input and encode it to UTF-8.
+ Reject all bytes/unicode input that contains non-XML characters.
+ Reject all bytes input that contains non-ASCII characters.
+ """
+ cdef int invalid
+ cdef bytes utf8_string
+ if not python.IS_PYTHON3 and type(s) is bytes:
+ utf8_string = s
+ invalid = check_string_utf8(utf8_string)
+ elif isinstance(s, unicode):
+ utf8_string = (s).encode('utf8')
+ invalid = check_string_utf8(utf8_string) == -1 # non-XML?
+ elif isinstance(s, (bytes, bytearray)):
+ utf8_string = bytes(s)
+ invalid = check_string_utf8(utf8_string)
+ else:
+ raise TypeError("Argument must be bytes or unicode, got '%.200s'" % type(s).__name__)
+ if invalid:
+ raise ValueError(
+ "All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters")
+ return utf8_string
+
+cdef bytes _utf8orNone(object s):
+ return _utf8(s) if s is not None else None
+
+cdef bint _isFilePath(const_xmlChar* c_path):
+ u"simple heuristic to see if a path is a filename"
+ cdef xmlChar c
+ # test if it looks like an absolute Unix path or a Windows network path
+ if c_path[0] == c'/':
+ return 1
+
+ # test if it looks like an absolute Windows path or URL
+ if (c_path[0] >= c'a' and c_path[0] <= c'z') or \
+ (c_path[0] >= c'A' and c_path[0] <= c'Z'):
+ c_path += 1
+ if c_path[0] == c':' and c_path[1] in b'\0\\':
+ return 1 # C: or C:\...
+
+ # test if it looks like a URL with scheme://
+ while (c_path[0] >= c'a' and c_path[0] <= c'z') or \
+ (c_path[0] >= c'A' and c_path[0] <= c'Z'):
+ c_path += 1
+ if c_path[0] == c':' and c_path[1] == c'/' and c_path[2] == c'/':
+ return 0
+
+ # assume it's a relative path
+ return 1
+
+cdef object _encodeFilename(object filename):
+ u"""Make sure a filename is 8-bit encoded (or None).
+ """
+ if filename is None:
+ return None
+ elif isinstance(filename, bytes):
+ return filename
+ elif isinstance(filename, unicode):
+ filename8 = (filename).encode('utf8')
+ if _isFilePath(filename8):
+ try:
+ return python.PyUnicode_AsEncodedString(
+ filename, _C_FILENAME_ENCODING, NULL)
+ except UnicodeEncodeError:
+ pass
+ return filename8
+ else:
+ raise TypeError("Argument must be string or unicode.")
+
+cdef object _decodeFilename(const_xmlChar* c_path):
+ u"""Make the filename a unicode string if we are in Py3.
+ """
+ return _decodeFilenameWithLength(c_path, tree.xmlStrlen(c_path))
+
+cdef object _decodeFilenameWithLength(const_xmlChar* c_path, size_t c_len):
+ u"""Make the filename a unicode string if we are in Py3.
+ """
+ if _isFilePath(c_path):
+ try:
+ return python.PyUnicode_Decode(
+ c_path, c_len, _C_FILENAME_ENCODING, NULL)
+ except UnicodeDecodeError:
+ pass
+ try:
+ return (c_path)[:c_len].decode('UTF-8')
+ except UnicodeDecodeError:
+ # this is a stupid fallback, but it might still work...
+ return (c_path)[:c_len].decode('latin-1', 'replace')
+
+cdef object _encodeFilenameUTF8(object filename):
+ u"""Recode filename as UTF-8. Tries ASCII, local filesystem encoding and
+ UTF-8 as source encoding.
+ """
+ cdef char* c_filename
+ if filename is None:
+ return None
+ elif isinstance(filename, bytes):
+ if not check_string_utf8(filename):
+ # plain ASCII!
+ return filename
+ c_filename = _cstr(filename)
+ try:
+ # try to decode with default encoding
+ filename = python.PyUnicode_Decode(
+ c_filename, len(filename),
+ _C_FILENAME_ENCODING, NULL)
+ except UnicodeDecodeError as decode_exc:
+ try:
+ # try if it's proper UTF-8
+ (filename).decode('utf8')
+ return filename
+ except UnicodeDecodeError:
+ raise decode_exc # otherwise re-raise original exception
+ if isinstance(filename, unicode):
+ return (filename).encode('utf8')
+ else:
+ raise TypeError("Argument must be string or unicode.")
+
+cdef tuple _getNsTag(tag):
+ u"""Given a tag, find namespace URI and tag name.
+ Return None for NS uri if no namespace URI provided.
+ """
+ return __getNsTag(tag, 0)
+
+cdef tuple _getNsTagWithEmptyNs(tag):
+ u"""Given a tag, find namespace URI and tag name. Return None for NS uri
+ if no namespace URI provided, or the empty string if namespace
+ part is '{}'.
+ """
+ return __getNsTag(tag, 1)
+
+cdef tuple __getNsTag(tag, bint empty_ns):
+ cdef char* c_tag
+ cdef char* c_ns_end
+ cdef Py_ssize_t taglen
+ cdef Py_ssize_t nslen
+ cdef bytes ns = None
+ # _isString() is much faster than isinstance()
+ if not _isString(tag) and isinstance(tag, QName):
+ tag = (tag).text
+ tag = _utf8(tag)
+ c_tag = _cstr(tag)
+ if c_tag[0] == c'{':
+ c_tag += 1
+ c_ns_end = cstring_h.strchr(c_tag, c'}')
+ if c_ns_end is NULL:
+ raise ValueError, u"Invalid tag name"
+ nslen = c_ns_end - c_tag
+ taglen = python.PyBytes_GET_SIZE(tag) - nslen - 2
+ if taglen == 0:
+ raise ValueError, u"Empty tag name"
+ if nslen > 0:
+ ns = c_tag[:nslen]
+ elif empty_ns:
+ ns = b''
+ tag = c_ns_end[1:taglen+1]
+ elif python.PyBytes_GET_SIZE(tag) == 0:
+ raise ValueError, u"Empty tag name"
+ return ns, tag
+
+cdef inline int _pyXmlNameIsValid(name_utf8):
+ return _xmlNameIsValid(_xcstr(name_utf8))
+
+cdef inline int _pyHtmlNameIsValid(name_utf8):
+ return _htmlNameIsValid(_xcstr(name_utf8))
+
+cdef inline int _xmlNameIsValid(const_xmlChar* c_name):
+ return tree.xmlValidateNCName(c_name, 0) == 0
+
+cdef int _htmlNameIsValid(const_xmlChar* c_name):
+ if c_name is NULL or c_name[0] == c'\0':
+ return 0
+ while c_name[0] != c'\0':
+ if c_name[0] in b'&<>/"\'\t\n\x0B\x0C\r ':
+ return 0
+ c_name += 1
+ return 1
+
+cdef bint _characterReferenceIsValid(const_xmlChar* c_name):
+ cdef bint is_hex
+ if c_name[0] == c'x':
+ c_name += 1
+ is_hex = 1
+ else:
+ is_hex = 0
+ if c_name[0] == c'\0':
+ return 0
+ while c_name[0] != c'\0':
+ if c_name[0] < c'0' or c_name[0] > c'9':
+ if not is_hex:
+ return 0
+ if not (c'a' <= c_name[0] <= c'f'):
+ if not (c'A' <= c_name[0] <= c'F'):
+ return 0
+ c_name += 1
+ return 1
+
+cdef int _tagValidOrRaise(tag_utf) except -1:
+ if not _pyXmlNameIsValid(tag_utf):
+ raise ValueError(u"Invalid tag name %r" %
+ (tag_utf).decode('utf8'))
+ return 0
+
+cdef int _htmlTagValidOrRaise(tag_utf) except -1:
+ if not _pyHtmlNameIsValid(tag_utf):
+ raise ValueError(u"Invalid HTML tag name %r" %
+ (tag_utf).decode('utf8'))
+ return 0
+
+cdef int _attributeValidOrRaise(name_utf) except -1:
+ if not _pyXmlNameIsValid(name_utf):
+ raise ValueError(u"Invalid attribute name %r" %
+ (name_utf).decode('utf8'))
+ return 0
+
+cdef int _prefixValidOrRaise(tag_utf) except -1:
+ if not _pyXmlNameIsValid(tag_utf):
+ raise ValueError(u"Invalid namespace prefix %r" %
+ (tag_utf).decode('utf8'))
+ return 0
+
+cdef int _uriValidOrRaise(uri_utf) except -1:
+ cdef uri.xmlURI* c_uri = uri.xmlParseURI(_cstr(uri_utf))
+ if c_uri is NULL:
+ raise ValueError(u"Invalid namespace URI %r" %
+ (uri_utf).decode('utf8'))
+ uri.xmlFreeURI(c_uri)
+ return 0
+
+cdef inline object _namespacedName(xmlNode* c_node):
+ return _namespacedNameFromNsName(_getNs(c_node), c_node.name)
+
+cdef object _namespacedNameFromNsName(const_xmlChar* href, const_xmlChar* name):
+ if href is NULL:
+ return funicode(name)
+ elif python.LXML_UNICODE_STRINGS and not python.IS_PYPY:
+ return python.PyUnicode_FromFormat("{%s}%s", href, name)
+ else:
+ s = python.PyBytes_FromFormat("{%s}%s", href, name)
+ if python.LXML_UNICODE_STRINGS or isutf8(_xcstr(s)):
+ return (s).decode('utf8')
+ else:
+ return s
+
+cdef _getFilenameForFile(source):
+ u"""Given a Python File or Gzip object, give filename back.
+
+ Returns None if not a file object.
+ """
+ # urllib2 provides a geturl() method
+ try:
+ return source.geturl()
+ except:
+ pass
+ # file instances have a name attribute
+ try:
+ filename = source.name
+ if _isString(filename):
+ return os_path_abspath(filename)
+ except:
+ pass
+ # gzip file instances have a filename attribute (before Py3k)
+ try:
+ filename = source.filename
+ if _isString(filename):
+ return os_path_abspath(filename)
+ except:
+ pass
+ # can't determine filename
+ return None
diff --git a/lib/lxml/builder.py b/lib/lxml/builder.py
new file mode 100644
index 00000000..ad61a80e
--- /dev/null
+++ b/lib/lxml/builder.py
@@ -0,0 +1,238 @@
+#
+# Element generator factory by Fredrik Lundh.
+#
+# Source:
+# http://online.effbot.org/2006_11_01_archive.htm#et-builder
+# http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py
+#
+# --------------------------------------------------------------------
+# The ElementTree toolkit is
+#
+# Copyright (c) 1999-2004 by Fredrik Lundh
+#
+# By obtaining, using, and/or copying this software and/or its
+# associated documentation, you agree that you have read, understood,
+# and will comply with the following terms and conditions:
+#
+# Permission to use, copy, modify, and distribute this software and
+# its associated documentation for any purpose and without fee is
+# hereby granted, provided that the above copyright notice appears in
+# all copies, and that both that copyright notice and this permission
+# notice appear in supporting documentation, and that the name of
+# Secret Labs AB or the author not be used in advertising or publicity
+# pertaining to distribution of the software without specific, written
+# prior permission.
+#
+# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
+# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
+# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
+# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+# OF THIS SOFTWARE.
+# --------------------------------------------------------------------
+
+"""
+The ``E`` Element factory for generating XML documents.
+"""
+
+import lxml.etree as ET
+
+try:
+ from functools import partial
+except ImportError:
+ # fake it for pre-2.5 releases
+ def partial(func, tag):
+ return lambda *args, **kwargs: func(tag, *args, **kwargs)
+
+try:
+ callable
+except NameError:
+ # Python 3
+ def callable(f):
+ return hasattr(f, '__call__')
+
+try:
+ basestring
+except NameError:
+ basestring = str
+
+try:
+ unicode
+except NameError:
+ unicode = str
+
+
+class ElementMaker(object):
+ """Element generator factory.
+
+ Unlike the ordinary Element factory, the E factory allows you to pass in
+ more than just a tag and some optional attributes; you can also pass in
+ text and other elements. The text is added as either text or tail
+ attributes, and elements are inserted at the right spot. Some small
+ examples::
+
+ >>> from lxml import etree as ET
+ >>> from lxml.builder import E
+
+ >>> ET.tostring(E("tag"))
+ ''
+ >>> ET.tostring(E("tag", "text"))
+ 'text'
+ >>> ET.tostring(E("tag", "text", key="value"))
+ 'text'
+ >>> ET.tostring(E("tag", E("subtag", "text"), "tail"))
+ 'texttail'
+
+ For simple tags, the factory also allows you to write ``E.tag(...)`` instead
+ of ``E('tag', ...)``::
+
+ >>> ET.tostring(E.tag())
+ ''
+ >>> ET.tostring(E.tag("text"))
+ 'text'
+ >>> ET.tostring(E.tag(E.subtag("text"), "tail"))
+ 'texttail'
+
+ Here's a somewhat larger example; this shows how to generate HTML
+ documents, using a mix of prepared factory functions for inline elements,
+ nested ``E.tag`` calls, and embedded XHTML fragments::
+
+ # some common inline elements
+ A = E.a
+ I = E.i
+ B = E.b
+
+ def CLASS(v):
+ # helper function, 'class' is a reserved word
+ return {'class': v}
+
+ page = (
+ E.html(
+ E.head(
+ E.title("This is a sample document")
+ ),
+ E.body(
+ E.h1("Hello!", CLASS("title")),
+ E.p("This is a paragraph with ", B("bold"), " text in it!"),
+ E.p("This is another paragraph, with a ",
+ A("link", href="http://www.python.org"), "."),
+ E.p("Here are some reservered characters: ."),
+ ET.XML("
And finally, here is an embedded XHTML fragment.
"),
+ )
+ )
+ )
+
+ print ET.tostring(page)
+
+ Here's a prettyprinted version of the output from the above script::
+
+
+
+ This is a sample document
+
+
+
+
+
+
+ For namespace support, you can pass a namespace map (``nsmap``)
+ and/or a specific target ``namespace`` to the ElementMaker class::
+
+ >>> E = ElementMaker(namespace="http://my.ns/")
+ >>> print(ET.tostring( E.test ))
+
+
+ >>> E = ElementMaker(namespace="http://my.ns/", nsmap={'p':'http://my.ns/'})
+ >>> print(ET.tostring( E.test ))
+
+ """
+
+ def __init__(self, typemap=None,
+ namespace=None, nsmap=None, makeelement=None):
+ if namespace is not None:
+ self._namespace = '{' + namespace + '}'
+ else:
+ self._namespace = None
+
+ if nsmap:
+ self._nsmap = dict(nsmap)
+ else:
+ self._nsmap = None
+
+ if makeelement is not None:
+ assert callable(makeelement)
+ self._makeelement = makeelement
+ else:
+ self._makeelement = ET.Element
+
+ # initialize type map for this element factory
+
+ if typemap:
+ typemap = typemap.copy()
+ else:
+ typemap = {}
+
+ def add_text(elem, item):
+ try:
+ elem[-1].tail = (elem[-1].tail or "") + item
+ except IndexError:
+ elem.text = (elem.text or "") + item
+ if str not in typemap:
+ typemap[str] = add_text
+ if unicode not in typemap:
+ typemap[unicode] = add_text
+
+ def add_dict(elem, item):
+ attrib = elem.attrib
+ for k, v in item.items():
+ if isinstance(v, basestring):
+ attrib[k] = v
+ else:
+ attrib[k] = typemap[type(v)](None, v)
+ if dict not in typemap:
+ typemap[dict] = add_dict
+
+ self._typemap = typemap
+
+ def __call__(self, tag, *children, **attrib):
+ get = self._typemap.get
+
+ if self._namespace is not None and tag[0] != '{':
+ tag = self._namespace + tag
+ elem = self._makeelement(tag, nsmap=self._nsmap)
+ if attrib:
+ get(dict)(elem, attrib)
+
+ for item in children:
+ if callable(item):
+ item = item()
+ t = get(type(item))
+ if t is None:
+ if ET.iselement(item):
+ elem.append(item)
+ continue
+ for basetype in type(item).__mro__:
+ # See if the typemap knows of any of this type's bases.
+ t = get(basetype)
+ if t is not None:
+ break
+ else:
+ raise TypeError("bad argument type: %s(%r)" %
+ (type(item).__name__, item))
+ v = t(elem, item)
+ if v:
+ get(type(v))(elem, v)
+
+ return elem
+
+ def __getattr__(self, tag):
+ return partial(self, tag)
+
+# create factory object
+E = ElementMaker()
diff --git a/lib/lxml/classlookup.pxi b/lib/lxml/classlookup.pxi
new file mode 100644
index 00000000..82740a51
--- /dev/null
+++ b/lib/lxml/classlookup.pxi
@@ -0,0 +1,565 @@
+# Configurable Element class lookup
+
+################################################################################
+# Custom Element classes
+
+cdef public class ElementBase(_Element) [ type LxmlElementBaseType,
+ object LxmlElementBase ]:
+ u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
+
+ The public Element class. All custom Element classes must inherit
+ from this one. To create an Element, use the `Element()` factory.
+
+ BIG FAT WARNING: Subclasses *must not* override __init__ or
+ __new__ as it is absolutely undefined when these objects will be
+ created or destroyed. All persistent state of Elements must be
+ stored in the underlying XML. If you really need to initialize
+ the object after creation, you can implement an ``_init(self)``
+ method that will be called directly after object creation.
+
+ Subclasses of this class can be instantiated to create a new
+ Element. By default, the tag name will be the class name and the
+ namespace will be empty. You can modify this with the following
+ class attributes:
+
+ * TAG - the tag name, possibly containing a namespace in Clark
+ notation
+
+ * NAMESPACE - the default namespace URI, unless provided as part
+ of the TAG attribute.
+
+ * HTML - flag if the class is an HTML tag, as opposed to an XML
+ tag. This only applies to un-namespaced tags and defaults to
+ false (i.e. XML).
+
+ * PARSER - the parser that provides the configuration for the
+ newly created document. Providing an HTML parser here will
+ default to creating an HTML element.
+
+ In user code, the latter three are commonly inherited in class
+ hierarchies that implement a common namespace.
+ """
+ def __init__(self, *children, attrib=None, nsmap=None, **_extra):
+ u"""ElementBase(*children, attrib=None, nsmap=None, **_extra)
+ """
+ cdef bint is_html = 0
+ cdef _BaseParser parser
+ cdef _Element last_child
+ # don't use normal attribute access as it might be overridden
+ _getattr = object.__getattribute__
+ try:
+ namespace = _utf8(_getattr(self, 'NAMESPACE'))
+ except AttributeError:
+ namespace = None
+ try:
+ ns, tag = _getNsTag(_getattr(self, 'TAG'))
+ if ns is not None:
+ namespace = ns
+ except AttributeError:
+ tag = _utf8(_getattr(_getattr(self, '__class__'), '__name__'))
+ if b'.' in tag:
+ tag = tag.split(b'.')[-1]
+ try:
+ parser = _getattr(self, 'PARSER')
+ except AttributeError:
+ parser = None
+ for child in children:
+ if isinstance(child, _Element):
+ parser = (<_Element>child)._doc._parser
+ break
+ if isinstance(parser, HTMLParser):
+ is_html = 1
+ if namespace is None:
+ try:
+ is_html = _getattr(self, 'HTML')
+ except AttributeError:
+ pass
+ _initNewElement(self, is_html, tag, namespace, parser,
+ attrib, nsmap, _extra)
+ last_child = None
+ for child in children:
+ if _isString(child):
+ if last_child is None:
+ _setNodeText(self._c_node,
+ (_collectText(self._c_node.children) or '') + child)
+ else:
+ _setTailText(last_child._c_node,
+ (_collectText(last_child._c_node.next) or '') + child)
+ elif isinstance(child, _Element):
+ last_child = child
+ _appendChild(self, last_child)
+ elif isinstance(child, type) and issubclass(child, ElementBase):
+ last_child = child()
+ _appendChild(self, last_child)
+ else:
+ raise TypeError, "Invalid child type: %r" % type(child)
+
+cdef class CommentBase(_Comment):
+ u"""All custom Comment classes must inherit from this one.
+
+ To create an XML Comment instance, use the ``Comment()`` factory.
+
+ Subclasses *must not* override __init__ or __new__ as it is
+ absolutely undefined when these objects will be created or
+ destroyed. All persistent state of Comments must be stored in the
+ underlying XML. If you really need to initialize the object after
+ creation, you can implement an ``_init(self)`` method that will be
+ called after object creation.
+ """
+ def __init__(self, text):
+ # copied from Comment() factory
+ cdef _Document doc
+ cdef xmlDoc* c_doc
+ if text is None:
+ text = b''
+ else:
+ text = _utf8(text)
+ c_doc = _newXMLDoc()
+ doc = _documentFactory(c_doc, None)
+ self._c_node = _createComment(c_doc, _xcstr(text))
+ if self._c_node is NULL:
+ raise MemoryError()
+ tree.xmlAddChild(c_doc, self._c_node)
+ _registerProxy(self, doc, self._c_node)
+ self._init()
+
+cdef class PIBase(_ProcessingInstruction):
+ u"""All custom Processing Instruction classes must inherit from this one.
+
+ To create an XML ProcessingInstruction instance, use the ``PI()``
+ factory.
+
+ Subclasses *must not* override __init__ or __new__ as it is
+ absolutely undefined when these objects will be created or
+ destroyed. All persistent state of PIs must be stored in the
+ underlying XML. If you really need to initialize the object after
+ creation, you can implement an ``_init(self)`` method that will be
+ called after object creation.
+ """
+ def __init__(self, target, text=None):
+ # copied from PI() factory
+ cdef _Document doc
+ cdef xmlDoc* c_doc
+ target = _utf8(target)
+ if text is None:
+ text = b''
+ else:
+ text = _utf8(text)
+ c_doc = _newXMLDoc()
+ doc = _documentFactory(c_doc, None)
+ self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text))
+ if self._c_node is NULL:
+ raise MemoryError()
+ tree.xmlAddChild(c_doc, self._c_node)
+ _registerProxy(self, doc, self._c_node)
+ self._init()
+
+cdef class EntityBase(_Entity):
+ u"""All custom Entity classes must inherit from this one.
+
+ To create an XML Entity instance, use the ``Entity()`` factory.
+
+ Subclasses *must not* override __init__ or __new__ as it is
+ absolutely undefined when these objects will be created or
+ destroyed. All persistent state of Entities must be stored in the
+ underlying XML. If you really need to initialize the object after
+ creation, you can implement an ``_init(self)`` method that will be
+ called after object creation.
+ """
+ def __init__(self, name):
+ cdef _Document doc
+ cdef xmlDoc* c_doc
+ name_utf = _utf8(name)
+ c_name = _xcstr(name_utf)
+ if c_name[0] == c'#':
+ if not _characterReferenceIsValid(c_name + 1):
+ raise ValueError, u"Invalid character reference: '%s'" % name
+ elif not _xmlNameIsValid(c_name):
+ raise ValueError, u"Invalid entity reference: '%s'" % name
+ c_doc = _newXMLDoc()
+ doc = _documentFactory(c_doc, None)
+ self._c_node = _createEntity(c_doc, c_name)
+ if self._c_node is NULL:
+ raise MemoryError()
+ tree.xmlAddChild(c_doc, self._c_node)
+ _registerProxy(self, doc, self._c_node)
+ self._init()
+
+
+cdef int _validateNodeClass(xmlNode* c_node, cls) except -1:
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ expected = ElementBase
+ elif c_node.type == tree.XML_COMMENT_NODE:
+ expected = CommentBase
+ elif c_node.type == tree.XML_ENTITY_REF_NODE:
+ expected = EntityBase
+ elif c_node.type == tree.XML_PI_NODE:
+ expected = PIBase
+ else:
+ assert 0, u"Unknown node type: %s" % c_node.type
+
+ if not (isinstance(cls, type) and issubclass(cls, expected)):
+ raise TypeError(
+ "result of class lookup must be subclass of %s, got %s"
+ % (type(expected), type(cls)))
+ return 0
+
+
+################################################################################
+# Element class lookup
+
+ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*)
+
+# class to store element class lookup functions
+cdef public class ElementClassLookup [ type LxmlElementClassLookupType,
+ object LxmlElementClassLookup ]:
+ u"""ElementClassLookup(self)
+ Superclass of Element class lookups.
+ """
+ cdef _element_class_lookup_function _lookup_function
+ def __cinit__(self):
+ self._lookup_function = NULL # use default lookup
+
+cdef public class FallbackElementClassLookup(ElementClassLookup) \
+ [ type LxmlFallbackElementClassLookupType,
+ object LxmlFallbackElementClassLookup ]:
+ u"""FallbackElementClassLookup(self, fallback=None)
+
+ Superclass of Element class lookups with additional fallback.
+ """
+ cdef readonly ElementClassLookup fallback
+ cdef _element_class_lookup_function _fallback_function
+ def __cinit__(self):
+ # fall back to default lookup
+ self._fallback_function = _lookupDefaultElementClass
+
+ def __init__(self, ElementClassLookup fallback=None):
+ if fallback is not None:
+ self._setFallback(fallback)
+ else:
+ self._fallback_function = _lookupDefaultElementClass
+
+ cdef void _setFallback(self, ElementClassLookup lookup):
+ u"""Sets the fallback scheme for this lookup method.
+ """
+ self.fallback = lookup
+ self._fallback_function = lookup._lookup_function
+ if self._fallback_function is NULL:
+ self._fallback_function = _lookupDefaultElementClass
+
+ def set_fallback(self, ElementClassLookup lookup not None):
+ u"""set_fallback(self, lookup)
+
+ Sets the fallback scheme for this lookup method.
+ """
+ self._setFallback(lookup)
+
+cdef inline object _callLookupFallback(FallbackElementClassLookup lookup,
+ _Document doc, xmlNode* c_node):
+ return lookup._fallback_function(lookup.fallback, doc, c_node)
+
+
+################################################################################
+# default lookup scheme
+
+cdef class ElementDefaultClassLookup(ElementClassLookup):
+ u"""ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None)
+ Element class lookup scheme that always returns the default Element
+ class.
+
+ The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
+ accept the respective Element classes.
+ """
+ cdef readonly object element_class
+ cdef readonly object comment_class
+ cdef readonly object pi_class
+ cdef readonly object entity_class
+ def __cinit__(self):
+ self._lookup_function = _lookupDefaultElementClass
+
+ def __init__(self, element=None, comment=None, pi=None, entity=None):
+ if element is None:
+ self.element_class = _Element
+ elif issubclass(element, ElementBase):
+ self.element_class = element
+ else:
+ raise TypeError, u"element class must be subclass of ElementBase"
+
+ if comment is None:
+ self.comment_class = _Comment
+ elif issubclass(comment, CommentBase):
+ self.comment_class = comment
+ else:
+ raise TypeError, u"comment class must be subclass of CommentBase"
+
+ if entity is None:
+ self.entity_class = _Entity
+ elif issubclass(entity, EntityBase):
+ self.entity_class = entity
+ else:
+ raise TypeError, u"Entity class must be subclass of EntityBase"
+
+ if pi is None:
+ self.pi_class = None # special case, see below
+ elif issubclass(pi, PIBase):
+ self.pi_class = pi
+ else:
+ raise TypeError, u"PI class must be subclass of PIBase"
+
+cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node):
+ u"Trivial class lookup function that always returns the default class."
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ if state is not None:
+ return (state).element_class
+ else:
+ return _Element
+ elif c_node.type == tree.XML_COMMENT_NODE:
+ if state is not None:
+ return (state).comment_class
+ else:
+ return _Comment
+ elif c_node.type == tree.XML_ENTITY_REF_NODE:
+ if state is not None:
+ return (state).entity_class
+ else:
+ return _Entity
+ elif c_node.type == tree.XML_PI_NODE:
+ if state is None or (state).pi_class is None:
+ # special case XSLT-PI
+ if c_node.name is not NULL and c_node.content is not NULL:
+ if tree.xmlStrcmp(c_node.name, "xml-stylesheet") == 0:
+ if tree.xmlStrstr(c_node.content, "text/xsl") is not NULL or \
+ tree.xmlStrstr(c_node.content, "text/xml") is not NULL:
+ return _XSLTProcessingInstruction
+ return _ProcessingInstruction
+ else:
+ return (state).pi_class
+ else:
+ assert 0, u"Unknown node type: %s" % c_node.type
+
+
+################################################################################
+# attribute based lookup scheme
+
+cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup):
+ u"""AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None)
+ Checks an attribute of an Element and looks up the value in a
+ class dictionary.
+
+ Arguments:
+ - attribute name - '{ns}name' style string
+ - class mapping - Python dict mapping attribute values to Element classes
+ - fallback - optional fallback lookup mechanism
+
+ A None key in the class mapping will be checked if the attribute is
+ missing.
+ """
+ cdef object _class_mapping
+ cdef tuple _pytag
+ cdef const_xmlChar* _c_ns
+ cdef const_xmlChar* _c_name
+ def __cinit__(self):
+ self._lookup_function = _attribute_class_lookup
+
+ def __init__(self, attribute_name, class_mapping,
+ ElementClassLookup fallback=None):
+ self._pytag = _getNsTag(attribute_name)
+ ns, name = self._pytag
+ if ns is None:
+ self._c_ns = NULL
+ else:
+ self._c_ns = _xcstr(ns)
+ self._c_name = _xcstr(name)
+ self._class_mapping = dict(class_mapping)
+
+ FallbackElementClassLookup.__init__(self, fallback)
+
+cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node):
+ cdef AttributeBasedElementClassLookup lookup
+ cdef python.PyObject* dict_result
+
+ lookup = state
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ value = _attributeValueFromNsName(
+ c_node, lookup._c_ns, lookup._c_name)
+ dict_result = python.PyDict_GetItem(lookup._class_mapping, value)
+ if dict_result is not NULL:
+ cls =