我们从Python开源项目中,提取了以下41个代码示例,用于说明如何使用lxml.etree.ProcessingInstruction()。
def saxify(self): self._content_handler.startDocument() element = self._element if hasattr(element, 'getprevious'): siblings = [] sibling = element.getprevious() while getattr(sibling, 'tag', None) is ProcessingInstruction: siblings.append(sibling) sibling = sibling.getprevious() for sibling in siblings[::-1]: self._recursive_saxify(sibling, {}) self._recursive_saxify(element, {}) if hasattr(element, 'getnext'): sibling = element.getnext() while getattr(sibling, 'tag', None) is ProcessingInstruction: self._recursive_saxify(sibling, {}) sibling = sibling.getnext() self._content_handler.endDocument()
def map_node_to_class(self, node): if isinstance(node, etree._ProcessingInstruction): return nodes.ProcessingInstruction elif isinstance(node, etree._Comment): return nodes.Comment elif isinstance(node, etree._ElementTree): return nodes.Document elif isinstance(node, etree._Element): return nodes.Element elif isinstance(node, LXMLAttribute): return nodes.Attribute elif isinstance(node, LXMLText): if node.is_cdata: return nodes.CDATA else: return nodes.Text raise exceptions.Xml4hImplementationBug( 'Unrecognized type for implementation node: %s' % node)
def test_html(self, data): if etree: try: dom = etree.XML(data) except Exception, e: raise DataError("Invalid XHTML in '%s': %s" % (data, e), self) for elm in dom.iter(): if elm.tag in BAD_HTML_TAGS: raise DataError("HTML vulnerability '%s' in '%s'" % (elm.tag, data), self) elif elm.tag in [etree.Comment, etree.ProcessingInstruction]: raise DataError("HTML Comment vulnerability '%s'" % elm, self) elif elm.tag == 'a': for x in elm.attrib.keys(): if x != "href": raise DataError("Vulnerable attribute '%s' on a tag" % x, self) elif elm.tag == 'img': for x in elm.attrib.keys(): if not x in ['src', 'alt']: raise DataError("Vulnerable attribute '%s' on img tag" % x, self) else: if elm.attrib: raise DataError("Attributes not allowed on %s tag" % (elm.tag), self) if not elm.tag in GOOD_HTML_TAGS: self.maybe_warn("Risky HTML tag '%s' in '%s'" % (elm.tag, data)) # Cannot keep CDATA sections separate from text when parsing in LXML :(
def _parse_element_r(self, el, specials, refs, id=None, element_cls=Paragraph): """Recursively parse HTML/XML element and its children into a list of Document elements.""" elements = [] if el.tag in {etree.Comment, etree.ProcessingInstruction}: return [] # if el in refs: # return [element_cls('', references=refs[el])] if el in specials: return specials[el] id = el.get('id', id) references = refs.get(el, []) if el.text is not None: elements.append(element_cls(six.text_type(el.text), id=id, references=references)) elif references: elements.append(element_cls('', id=id, references=references)) for child in el: # br is a special case - technically inline, but we want to split if child.tag not in {etree.Comment, etree.ProcessingInstruction} and child.tag.lower() == 'br': elements.append(element_cls('')) child_elements = self._parse_element_r(child, specials=specials, refs=refs, id=id, element_cls=element_cls) if (self._is_inline(child) and len(elements) > 0 and len(child_elements) > 0 and isinstance(elements[-1], (Text, Sentence)) and isinstance(child_elements[0], (Text, Sentence)) and type(elements[-1]) == type(child_elements[0])): elements[-1] += child_elements.pop(0) elements.extend(child_elements) if child.tail is not None: if self._is_inline(child) and len(elements) > 0 and isinstance(elements[-1], element_cls): elements[-1] += element_cls(six.text_type(child.tail), id=id) else: elements.append(element_cls(six.text_type(child.tail), id=id)) return elements
def _is_inline(self, element): """Return True if an element is inline.""" if element.tag not in {etree.Comment, etree.ProcessingInstruction} and element.tag.lower() in self.inline_elements: return True return False
def processingInstruction(self, target, data): pi = ProcessingInstruction(target, data) if self._root is None: self._root_siblings.append(pi) else: self._element_stack[-1].append(pi)
def test_wrap_node_and_is_type_methods(self): # Wrap root element wrapped_node = self.adapter_class.wrap_node(self.root_elem, self.doc) self.assertEqual(self.root_elem, wrapped_node.impl_node) self.assertEqual('DocRoot', wrapped_node.name) self.assertEqual(self.doc, wrapped_node.impl_document) self.assertTrue(wrapped_node.is_type(xml4h.nodes.ELEMENT_NODE)) self.assertTrue(wrapped_node.is_element) # Wrap a non-root element wrapped_node = self.adapter_class.wrap_node(self.elem3_second, self.doc) self.assertEqual(self.elem3_second, wrapped_node.impl_node) self.assertEqual('ns2:Element3', wrapped_node.name) self.assertEqual('Element4', wrapped_node.parent.name) self.assertTrue(wrapped_node.is_type(xml4h.nodes.ELEMENT_NODE)) self.assertTrue(wrapped_node.is_element) # Test node types wrapped_node = self.adapter_class.wrap_node(self.text_node, self.doc) self.assertIsInstance(wrapped_node, xml4h.nodes.Text) self.assertTrue(wrapped_node.is_type(xml4h.nodes.TEXT_NODE)) self.assertTrue(wrapped_node.is_text) wrapped_node = self.adapter_class.wrap_node(self.cdata_node, self.doc) self.assertIsInstance(wrapped_node, xml4h.nodes.CDATA) self.assertTrue(wrapped_node.is_type(xml4h.nodes.CDATA_NODE)) self.assertTrue(wrapped_node.is_cdata) wrapped_node = self.adapter_class.wrap_node(self.comment_node, self.doc) self.assertIsInstance(wrapped_node, xml4h.nodes.Comment) self.assertTrue(wrapped_node.is_type(xml4h.nodes.COMMENT_NODE)) self.assertTrue(wrapped_node.is_comment) wrapped_node = self.adapter_class.wrap_node( self.instruction_node, self.doc) self.assertIsInstance( wrapped_node, xml4h.nodes.ProcessingInstruction) self.assertTrue( wrapped_node.is_type(xml4h.nodes.PROCESSING_INSTRUCTION_NODE)) self.assertTrue(wrapped_node.is_processing_instruction)
def new_impl_instruction(self, target, data): return etree.ProcessingInstruction(target, data)
def _recursive_saxify(self, element, prefixes): content_handler = self._content_handler tag = element.tag if tag is Comment or tag is ProcessingInstruction: if tag is ProcessingInstruction: content_handler.processingInstruction( element.target, element.text) if element.tail: content_handler.characters(element.tail) return new_prefixes = [] build_qname = self._build_qname attribs = element.items() if attribs: attr_values = {} attr_qnames = {} for attr_ns_name, value in attribs: attr_ns_tuple = _getNsTag(attr_ns_name) attr_values[attr_ns_tuple] = value attr_qnames[attr_ns_tuple] = build_qname( attr_ns_tuple[0], attr_ns_tuple[1], prefixes, new_prefixes) sax_attributes = self._attr_class(attr_values, attr_qnames) else: sax_attributes = self._empty_attributes ns_uri, local_name = _getNsTag(tag) qname = build_qname(ns_uri, local_name, prefixes, new_prefixes) for prefix, uri in new_prefixes: content_handler.startPrefixMapping(prefix, uri) content_handler.startElementNS((ns_uri, local_name), qname, sax_attributes) if element.text: content_handler.characters(element.text) for child in element: self._recursive_saxify(child, prefixes) content_handler.endElementNS((ns_uri, local_name), qname) for prefix, uri in new_prefixes: content_handler.endPrefixMapping(prefix) if element.tail: content_handler.characters(element.tail)
def setUp(self): if not xml4h.LXMLAdapter.is_available(): self.skipTest("lxml library is not installed") from lxml import etree # Build a DOM using minidom for testing self.root_elem = etree.Element('{urn:test}DocRoot', nsmap={ None: 'urn:test'}) doc = etree.ElementTree(self.root_elem) self.elem1 = etree.Element(u'??1', nsmap={'ns1': 'urn:ns1'}) self.elem1.attrib['a'] = '1' self.elem1.attrib['{urn:ns1}b'] = '2' self.elem2 = etree.Element('Element2') self.elem3 = etree.Element('{urn:ns1}Element3', nsmap={None: 'urn:ns1'}) self.elem4 = etree.Element('{urn:ns1}Element4', nsmap={None: 'urn:ns1'}) self.elem2_second = etree.Element('Element2') self.elem3_second = etree.Element('{urn:ns2}Element3', nsmap={'ns2': 'urn:ns2'}) self.text_node = xml4h.impls.lxml_etree.LXMLText( 'Some text', self.elem1) self.elem1.text = self.text_node.text self.cdata_node = xml4h.impls.lxml_etree.LXMLText( 'Some cdata', self.elem2, is_cdata=True) self.elem2.text = self.cdata_node.text self.comment_node = etree.Comment('A comment') self.instruction_node = etree.ProcessingInstruction( 'pi-target', 'pi-data') self.root_elem.append(self.elem1) self.root_elem.append(self.elem2) self.root_elem.append(self.elem3) self.root_elem.append(self.elem4) self.elem3.append(self.elem2_second) self.elem2_second.append(self.comment_node) self.elem4.append(self.elem3_second) self.elem3_second.append(self.instruction_node) self.doc = doc self.xml4h_doc = xml4h.LXMLAdapter.wrap_document(doc) self.xml4h_root = self.xml4h_doc.root self.xml4h_text = xml4h.LXMLAdapter.wrap_node(self.text_node, self.doc)
def setUp(self): # Use c-based or pure python ElementTree impl based on test class if self.__class__ == TestcElementTreeNodes: if not self.adapter_class.is_available(): self.skipTest( "C-based ElementTree library is not installed" " or is too outdated to be supported by xml4h") import xml.etree.cElementTree as ET else: if not self.adapter_class.is_available(): self.skipTest( "Pure Python ElementTree library is not installed" " or is not too outdated to be supported by xml4h") import xml.etree.ElementTree as ET # Build a DOM using minidom for testing self.root_elem = ET.Element('{urn:test}DocRoot') doc = ET.ElementTree(self.root_elem) self.elem1 = ET.Element(u'??1') self.elem1.attrib['xmlns:ns1'] = 'urn:ns1' self.elem1.attrib['a'] = '1' self.elem1.attrib['{urn:ns1}b'] = '2' self.elem2 = ET.Element('Element2') self.elem3 = ET.Element('{urn:ns1}Element3') self.elem3.attrib['xmlns'] = 'urn:ns1' self.elem4 = ET.Element('{urn:ns1}Element4') self.elem3.attrib['xmlns'] = 'urn:ns1' self.elem2_second = ET.Element('Element2') self.elem3_second = ET.Element('{urn:ns2}Element3') self.elem3_second.attrib['xmlns:ns2'] = 'urn:ns2' self.text_node = xml4h.impls.xml_etree_elementtree.ElementTreeText( 'Some text', self.elem1) self.elem1.text = self.text_node.text self.cdata_node = xml4h.impls.xml_etree_elementtree.ElementTreeText( 'Some cdata', self.elem2, is_cdata=True) self.elem2.text = self.cdata_node.text self.comment_node = ET.Comment('A comment') self.instruction_node = ET.ProcessingInstruction( 'pi-target', 'pi-data') self.root_elem.append(self.elem1) self.root_elem.append(self.elem2) self.root_elem.append(self.elem3) self.root_elem.append(self.elem4) self.elem3.append(self.elem2_second) self.elem2_second.append(self.comment_node) self.elem4.append(self.elem3_second) self.elem3_second.append(self.instruction_node) self.doc = doc self.xml4h_doc = self.adapter_class.wrap_document(doc) self.xml4h_root = self.xml4h_doc.root self.xml4h_text = self.adapter_class.wrap_node( self.text_node, self.doc)