我们从Python开源项目中,提取了以下21个代码示例,用于说明如何使用xml.dom.pulldom.START_ELEMENT。
def PullDOMAdapter(node): from xml.dom import Node from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, COMMENT, CHARACTERS if node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE): for childNode in node.childNodes: for event in PullDOMAdapter(childNode): yield event elif node.nodeType == Node.DOCUMENT_TYPE_NODE: raise NotImplementedError("DOCTYPE nodes are not supported by PullDOM") elif node.nodeType == Node.COMMENT_NODE: yield COMMENT, node elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE): yield CHARACTERS, node elif node.nodeType == Node.ELEMENT_NODE: yield START_ELEMENT, node for childNode in node.childNodes: for event in PullDOMAdapter(childNode): yield event yield END_ELEMENT, node else: raise NotImplementedError("Node type not supported: " + str(node.nodeType))
def test_expandItem(self): """Ensure expandItem works as expected.""" items = pulldom.parseString(SMALL_SAMPLE) # Loop through the nodes until we get to a "title" start tag: for evt, item in items: if evt == pulldom.START_ELEMENT and item.tagName == "title": items.expandNode(item) self.assertEqual(1, len(item.childNodes)) break else: self.fail("No \"title\" element detected in SMALL_SAMPLE!") # Loop until we get to the next start-element: for evt, node in items: if evt == pulldom.START_ELEMENT: break self.assertEqual("hr", node.tagName, "expandNode did not leave DOMEventStream in the correct state.") # Attempt to expand a standalone element: items.expandNode(node) self.assertEqual(next(items)[0], pulldom.CHARACTERS) evt, node = next(items) self.assertEqual(node.tagName, "p") items.expandNode(node) next(items) # Skip character data evt, node = next(items) self.assertEqual(node.tagName, "html") with self.assertRaises(StopIteration): next(items) items.clear() self.assertIsNone(items.parser) self.assertIsNone(items.stream)
def test_basic(self): """Ensure SAX2DOM can parse from a stream.""" with io.StringIO(SMALL_SAMPLE) as fin: sd = SAX2DOMTestHelper(fin, xml.sax.make_parser(), len(SMALL_SAMPLE)) for evt, node in sd: if evt == pulldom.START_ELEMENT and node.tagName == "html": break # Because the buffer is the same length as the XML, all the # nodes should have been parsed and added: self.assertGreater(len(node.childNodes), 0)
def dmoz_reader(filename): doc = pulldom.parse(filename) for event, node in doc: if event == pulldom.START_ELEMENT and node.tagName == 'ExternalPage': doc.expandNode(node) url = node.attributes['about'].value topic_node = node.getElementsByTagName('topic')[0] topics = topic_node.childNodes[0].data yield url, topics
def startElementNS(self, name, tagName , attrs): # Retrieve xml namespace declaration attributes. xmlns_uri = 'http://www.w3.org/2000/xmlns/' xmlns_attrs = getattr(self, '_xmlns_attrs', None) if xmlns_attrs is not None: for aname, value in xmlns_attrs: attrs._attrs[(xmlns_uri, aname)] = value self._xmlns_attrs = [] uri, localname = name if uri: # When using namespaces, the reader may or may not # provide us with the original name. If not, create # *a* valid tagName from the current context. if tagName is None: prefix = self._current_context[uri] if prefix: tagName = prefix + ":" + localname else: tagName = localname if self.document: node = self.document.createElementNS(uri, tagName) else: node = self.buildDocument(uri, tagName) else: # When the tagname is not prefixed, it just appears as # localname if self.document: node = self.document.createElement(localname) else: node = self.buildDocument(None, localname) for aname,value in attrs.iteritems(): a_uri, a_localname = aname if a_uri == xmlns_uri: if a_localname == 'xmlns': qname = a_localname else: qname = 'xmlns:' + a_localname attr = self.document.createAttributeNS(a_uri, qname) node.setAttributeNodeNS(attr) elif a_uri: prefix = self._current_context[a_uri] if prefix: qname = prefix + ":" + a_localname else: qname = a_localname attr = self.document.createAttributeNS(a_uri, qname) node.setAttributeNodeNS(attr) else: attr = self.document.createAttribute(a_localname) node.setAttributeNode(attr) attr.value = value self.lastEvent[1] = [(START_ELEMENT, node), None] self.lastEvent = self.lastEvent[1] self.push(node)
def startElementNS(self, name, tagName , attrs): # Retrieve xml namespace declaration attributes. xmlns_uri = 'http://www.w3.org/2000/xmlns/' xmlns_attrs = getattr(self, '_xmlns_attrs', None) if xmlns_attrs is not None: for aname, value in xmlns_attrs: attrs._attrs[(xmlns_uri, aname)] = value self._xmlns_attrs = [] uri, localname = name if uri: # When using namespaces, the reader may or may not # provide us with the original name. If not, create # *a* valid tagName from the current context. if tagName is None: prefix = self._current_context[uri] if prefix: tagName = prefix + ":" + localname else: tagName = localname if self.document: node = self.document.createElementNS(uri, tagName) else: node = self.buildDocument(uri, tagName) else: # When the tagname is not prefixed, it just appears as # localname if self.document: node = self.document.createElement(localname) else: node = self.buildDocument(None, localname) for aname,value in attrs.items(): a_uri, a_localname = aname if a_uri == xmlns_uri: if a_localname == 'xmlns': qname = a_localname else: qname = 'xmlns:' + a_localname attr = self.document.createAttributeNS(a_uri, qname) node.setAttributeNodeNS(attr) elif a_uri: prefix = self._current_context[a_uri] if prefix: qname = prefix + ":" + a_localname else: qname = a_localname attr = self.document.createAttributeNS(a_uri, qname) node.setAttributeNodeNS(attr) else: attr = self.document.createAttribute(a_localname) node.setAttributeNode(attr) attr.value = value self.lastEvent[1] = [(START_ELEMENT, node), None] self.lastEvent = self.lastEvent[1] self.push(node)
def test_parse_semantics(self): """Test DOMEventStream parsing semantics.""" items = pulldom.parseString(SMALL_SAMPLE) evt, node = next(items) # Just check the node is a Document: self.assertTrue(hasattr(node, "createElement")) self.assertEqual(pulldom.START_DOCUMENT, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("html", node.tagName) self.assertEqual(2, len(node.attributes)) self.assertEqual(node.attributes.getNamedItem("xmlns:xdc").value, "http://www.xml.com/books") evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) # Line break evt, node = next(items) # XXX - A comment should be reported here! # self.assertEqual(pulldom.COMMENT, evt) # Line break after swallowed comment: self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual("title", node.tagName) title_node = node evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) self.assertEqual("Introduction to XSL", node.data) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("title", node.tagName) self.assertTrue(title_node is node) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("hr", node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("hr", node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("p", node.tagName) evt, node = next(items) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("xdc:author", node.tagName) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("xdc:author", node.tagName) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) evt, node = next(items) self.assertEqual(pulldom.CHARACTERS, evt) evt, node = next(items) self.assertEqual(pulldom.END_ELEMENT, evt) # XXX No END_DOCUMENT item is ever obtained: #evt, node = next(items) #self.assertEqual(pulldom.END_DOCUMENT, evt)
def _test_thorough(self, pd, before_root=True): """Test some of the hard-to-reach parts of the parser, using a mock parser.""" evt, node = next(pd) self.assertEqual(pulldom.START_DOCUMENT, evt) # Just check the node is a Document: self.assertTrue(hasattr(node, "createElement")) if before_root: evt, node = next(pd) self.assertEqual(pulldom.COMMENT, evt) self.assertEqual("a comment", node.data) evt, node = next(pd) self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) self.assertEqual("target", node.target) self.assertEqual("data", node.data) evt, node = next(pd) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("html", node.tagName) evt, node = next(pd) self.assertEqual(pulldom.COMMENT, evt) self.assertEqual("a comment", node.data) evt, node = next(pd) self.assertEqual(pulldom.PROCESSING_INSTRUCTION, evt) self.assertEqual("target", node.target) self.assertEqual("data", node.data) evt, node = next(pd) self.assertEqual(pulldom.START_ELEMENT, evt) self.assertEqual("p", node.tagName) evt, node = next(pd) self.assertEqual(pulldom.CHARACTERS, evt) self.assertEqual("text", node.data) evt, node = next(pd) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("p", node.tagName) evt, node = next(pd) self.assertEqual(pulldom.END_ELEMENT, evt) self.assertEqual("html", node.tagName) evt, node = next(pd) self.assertEqual(pulldom.END_DOCUMENT, evt)