我们从Python开源项目中,提取了以下16个代码示例,用于说明如何使用lxml.etree._ElementTree()。
def map_node_to_class(self, node): if isinstance(node, etree._ProcessingInstruction): return nodes.ProcessingInstruction elif isinstance(node, etree._Comment): return nodes.Comment elif isinstance(node, etree._ElementTree): return nodes.Document elif isinstance(node, etree._Element): return nodes.Element elif isinstance(node, LXMLAttribute): return nodes.Attribute elif isinstance(node, LXMLText): if node.is_cdata: return nodes.CDATA else: return nodes.Text raise exceptions.Xml4hImplementationBug( 'Unrecognized type for implementation node: %s' % node)
def match_xpath(xpath, doc): """Return a match of expression `xpath` against document `doc`. :type xpath: Either `unicode` or `etree.XPath` :type doc: Either `etree._ElementTree` or `etree.XPathDocumentEvaluator` :rtype: bool """ is_xpath_compiled = is_compiled_xpath(xpath) is_doc_compiled = is_compiled_doc(doc) if is_xpath_compiled and is_doc_compiled: return doc(xpath.path) elif is_xpath_compiled: return xpath(doc) elif is_doc_compiled: return doc(xpath) else: return doc.xpath(xpath)
def try_match_xpath(xpath, doc, logger=logging): """See if the XPath expression matches the given XML document. Invalid XPath expressions are logged, and are returned as a non-match. :type xpath: Either `unicode` or `etree.XPath` :type doc: Either `etree._ElementTree` or `etree.XPathDocumentEvaluator` :rtype: bool """ try: # Evaluating an XPath expression against a document with LXML # can return a list or a string, and perhaps other types. # Casting the return value into a boolean context appears to # be the most reliable way of detecting a match. return bool(match_xpath(xpath, doc)) except etree.XPathEvalError as error: # Get a plaintext version of `xpath`. expr = xpath.path if is_compiled_xpath(xpath) else xpath logger.warning("Invalid expression '%s': %s", expr, str(error)) return False
def getTreesFor(self, document, content_type): """Provides all XML documents for that content type @param document: a Document or subclass object @param content_type: a MIME content type @return: list of etree._ElementTree of that content type """ # Relative path without potential leading path separator # otherwise os.path.join doesn't work for rel_path in self.overrides[content_type]: if rel_path[0] in ('/', '\\'): rel_path = rel_path[1:] file_path = os.path.join(document._cache_dir, rel_path) yield etree.parse(utils.xmlFile(file_path, 'rb')) return
def parse_file(args: Namespace) -> etree._ElementTree: parser = etree.XMLParser(recover=args.recover) return etree.parse(args.target, parser=parser)
def write_file(document: etree._ElementTree, args: Namespace) -> None: document.write(args.target, pretty_print=args.pretty, # TODO obtain options from source: encoding='utf-8', xml_declaration=True) dbg('Wrote result back to file.')
def test_make_tree(self): tree = self.helper.make_tree(self.sample) self.assertTrue(isinstance(tree, ET._ElementTree))
def xpath_on_node(self, node, xpath, **kwargs): """ Return result of performing the given XPath query on the given node. All known namespace prefix-to-URI mappings in the document are automatically included in the XPath invocation. If an empty/default namespace (i.e. None) is defined, this is converted to the prefix name '_' so it can be used despite empty namespace prefixes being unsupported by XPath. """ if isinstance(node, etree._ElementTree): # Document node lxml.etree._ElementTree has no nsmap, lookup root root = self.get_impl_root(node) namespaces_dict = root.nsmap.copy() else: namespaces_dict = node.nsmap.copy() if 'namespaces' in kwargs: namespaces_dict.update(kwargs['namespaces']) # Empty namespace prefix is not supported, convert to '_' prefix if None in namespaces_dict: default_ns_uri = namespaces_dict.pop(None) namespaces_dict['_'] = default_ns_uri # Include XMLNS namespace if it's not already defined if not 'xmlns' in namespaces_dict: namespaces_dict['xmlns'] = nodes.Node.XMLNS_URI return node.xpath(xpath, namespaces=namespaces_dict) # Node implementation methods
def get_node_namespace_uri(self, node): if '}' in node.tag: return node.tag.split('}')[0][1:] elif isinstance(node, LXMLAttribute): return node.namespace_uri elif isinstance(node, etree._ElementTree): return None elif isinstance(node, etree._Element): qname, ns_uri = self._unpack_name(node.tag, node)[:2] return ns_uri else: return None
def get_node_parent(self, node): if isinstance(node, etree._ElementTree): return None else: parent = node.getparent() # Return ElementTree as root element's parent if parent is None: return self.impl_document return parent
def get_node_children(self, node): if isinstance(node, etree._ElementTree): children = [node.getroot()] else: if not hasattr(node, 'getchildren'): return [] children = node.getchildren() # Hack to treat text attribute as child text nodes if node.text is not None: children.insert(0, LXMLText(node.text, parent=node)) return children
def is_etree(tree): """Return ``True`` if `tree` is an lxml etree ElementTree object.""" return isinstance(tree, etree._ElementTree)
def fromstring(context, parser=None, custom_parser=None): """use html parser if we don't have clean xml """ if hasattr(context, 'read') and hasattr(context.read, '__call__'): meth = 'parse' else: meth = 'fromstring' if custom_parser is None: if parser is None: try: result = getattr(etree, meth)(context) except etree.XMLSyntaxError: if hasattr(context, 'seek'): context.seek(0) result = getattr(lxml.html, meth)(context) if isinstance(result, etree._ElementTree): return [result.getroot()] else: return [result] elif parser == 'xml': custom_parser = getattr(etree, meth) elif parser == 'html': custom_parser = getattr(lxml.html, meth) elif parser == 'html5': from lxml.html import html5parser custom_parser = getattr(html5parser, meth) elif parser == 'soup': from lxml.html import soupparser custom_parser = getattr(soupparser, meth) elif parser == 'html_fragments': custom_parser = lxml.html.fragments_fromstring else: raise ValueError('No such parser: "%s"' % parser) result = custom_parser(context) if type(result) is list: return result elif isinstance(result, etree._ElementTree): return [result.getroot()] elif result is not None: return [result] else: return []