我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用lxml.etree.ElementTree()。
def save_opencv_xml_file(path, xml_generator): """ Save something in opencv's XML format @param path: path where to save the file @param xml_generator: function that accepts an LXML root element as a parameter and generates all the necessary XML to go in the file """ root = etree.Element("opencv_storage") xml_generator(root) et = etree.ElementTree(root) with open(path, 'wb') as f: et.write(f, encoding="utf-8", xml_declaration=True, pretty_print=True) # little hack necessary to replace the single quotes (that OpenCV doesn't like) with double quotes s = open(path).read() s = s.replace("'", "\"") with open(path, 'w') as f: f.write(s) f.flush()
def save_projects(self, *args): root = ET.Element("projects") for project in self.projects_list: p = ET.SubElement(root, "project") ET.SubElement(p, "name").text = project.name ET.SubElement(p, "description").text = project.description ET.SubElement(p, "path").text = project.path ET.SubElement(p, "update").text = project.update_cmd ET.SubElement(p, "compile").text = project.compile_cmd ET.SubElement(p, "run").text = project.run_cmd if project.last_update is None: ET.SubElement(p, "last_update").text = None else: ET.SubElement(p, "last_update").text = project.last_update.strftime(settings.DATE_FORMAT) if project.last_compile is None: ET.SubElement(p, "last_compile").text = None else: ET.SubElement(p, "last_compile").text = project.last_compile.strftime(settings.DATE_FORMAT) tree = ET.ElementTree(root) tree.write(settings.PATH_PROJECT_FILE, pretty_print=True) logging.debug("All projects are saved")
def readXml(self, simType): path = os.path.dirname(__file__) if simType == types.TYPE_USIM: path = os.path.join(path, "sim_files_3g.xml") else: path = os.path.join(path, "sim_files_2g.xml") tree = etree.ElementTree() if not os.path.exists(path): logging.warning("File %s not exists" %path) logging.info("Create xml") if simType == types.TYPE_USIM: root = etree.Element('sim_3G') else: root = etree.Element('sim_2G') else: parser = etree.XMLParser(remove_blank_text=True) root = etree.parse(path, parser).getroot() return path, root
def getPathFromFile(self, file): if file == None: return pathXml = etree.ElementTree(self.root).getpath(file) pathXml = pathXml.split("mf")[1] path = "./mf[@id='3F00']" for _file in pathXml.split('/'): if not _file: #path = types.addToPath(path, "/") continue absPath = types.addToPath(path, _file) id = etree.ElementTree(self.root).xpath(absPath)[0].attrib['id'] #"./mf/df[@id='ADF0']" fileId = "%s[@id='%s']" %(_file.split('[')[0], id) path = types.addToPath(path, fileId) return path
def on_post_save_async(self, view): if not INFOS.addon or not view.file_name(): return False if view.file_name().endswith(".xml"): if not self.is_modified: return False INFOS.addon.update_xml_files() filename = os.path.basename(view.file_name()) folder = view.file_name().split(os.sep)[-2] INFOS.addon.reload(view.file_name()) self.root = utils.get_root_from_file(view.file_name()) self.tree = ET.ElementTree(self.root) if (folder in INFOS.addon.window_files and filename in INFOS.addon.window_files[folder]) or folder == "colors": if self.settings.get("auto_reload_skin", True): self.is_modified = False view.window().run_command("execute_builtin", {"builtin": "ReloadSkin()"}) if self.settings.get("auto_skin_check", True): view.window().run_command("check_variables", {"check_type": "file"}) if view.file_name().endswith(".po"): INFOS.addon.update_labels()
def xml_to_text(in_dir, out_dir, tag): create_dirs(out_dir) in_files = get_files(in_dir) for fi in in_files: with codecs.open(fi, encoding='utf-8') as f: root = etree.ElementTree().parse(f) if tag is not None: elements = list(root.iter('{*}' + tag)) else: elements = [root] texts = [] for el in elements: texts.append(' '.join( [e.text for e in el.iterdescendants() if e.text is not None])) out_file = out_file_name(out_dir, fi, 'txt') with codecs.open(out_file, 'wb', encoding='utf-8') as f: f.write('\n'.join(texts)) f.write('\n')
def simpleops(): """ >>> elem = ElementTree.XML("<body><tag/></body>") >>> serialize(elem) '<body><tag/></body>' >>> e = ElementTree.Element("tag2") >>> elem.append(e) >>> serialize(elem) '<body><tag/><tag2/></body>' >>> elem.remove(e) >>> serialize(elem) '<body><tag/></body>' >>> elem.insert(0, e) >>> serialize(elem) '<body><tag2/><tag/></body>' >>> elem.remove(e) >>> elem.extend([e]) >>> serialize(elem) '<body><tag/><tag2/></body>' >>> elem.remove(e) """
def entity(): """ Test entity handling. 1) bad entities >>> ElementTree.XML("<document>&entity;</document>") Traceback (most recent call last): ExpatError: undefined entity: line 1, column 10 >>> ElementTree.XML(ENTITY_XML) Traceback (most recent call last): ExpatError: undefined entity &entity;: line 5, column 10 (add more tests here) """ # doesn't work with lxml.etree
def parsefile(): """ Test parsing from file. Note that we're opening the files in here; by default, the 'parse' function opens the file in binary mode, and doctest doesn't filter out carriage returns. >>> file = open("samples/simple.xml", "rb") >>> tree = ElementTree.parse(file) >>> file.close() >>> tree.write(stdout()) <root> <element key="value">text</element> <element>text</element>tail <empty-element/> </root> >>> file = open("samples/simple-ns.xml", "rb") >>> tree = ElementTree.parse(file) >>> file.close() >>> tree.write(stdout()) <root xmlns="http://namespace/"> <element key="value">text</element> <element>text</element>tail <empty-element/> </root> """
def qname(): """ Test QName handling. 1) decorated tags >>> elem = ElementTree.Element("{uri}tag") >>> serialize(elem) # 1.1 '<ns0:tag xmlns:ns0="uri"/>' ## 2) decorated attributes ## >>> elem.attrib["{uri}key"] = "value" ## >>> serialize(elem) # 2.1 ## '<ns0:tag ns0:key="value" xmlns:ns0="uri"/>' """
def _volume(self, value, cmd='PUT'): root = etree.Element('YAMAHA_AV') root.set('cmd', cmd) system = etree.SubElement(root, 'Main_Zone') volume = etree.SubElement(system, 'Volume') level = etree.SubElement(volume, 'Lvl') if cmd == 'GET': level.text = value else: val = etree.SubElement(level, 'Val') val.text = str(value) exponent = etree.SubElement(level, 'Exp') exponent.text = '1' unit = etree.SubElement(level, 'Unit') unit.text = 'dB' tree = etree.ElementTree(root) return self._return_document(tree)
def save(self, output=None): output = self.open_out_stream(output) self.prefixes.clear() self.reverse_nsmap.clear() # Compute required nsmap for subpackages if not self.contents: tree = etree.ElementTree() else: root = self.contents[0] self.register_eobject_epackage(root) for eobj in root.eAllContents(): self.register_eobject_epackage(eobj) tree = etree.ElementTree(self._go_across(root)) tree.write(output, pretty_print=True, xml_declaration=True, encoding=tree.docinfo.encoding) self.uri.close_stream()
def run(self, xml, **kwargs): """Method takes either an etree.ElementTree or raw XML text as the first argument. Args: xml(etree.ElementTree or text """ self.output = self.__graph__() if isinstance(xml, str): try: self.source = etree.XML(xml) except ValueError: try: self.source = etree.XML(xml.encode()) except: raise ValueError("Cannot run error {}".format(sys.exc_info()[0])) else: self.source = xml super(XMLProcessor, self).run(**kwargs)
def sortFile(fileobj): with open(fileobj['filename'], 'r') as original: # parse the XML file and get a pointer to the top xmldoc = le.parse(original) xmlroot = xmldoc.getroot() # create a new XML element that will be the top of # the sorted copy of the XML file newxmlroot = le.Element(xmlroot.tag) # create the sorted copy of the XML file sortAttrs(xmlroot, newxmlroot) sortElements(list(xmlroot), newxmlroot) # write the sorted XML file to the temp file newtree = le.ElementTree(newxmlroot) with open(fileobj['tmpfilename'], 'wb') as newfile: newtree.write(newfile, pretty_print=True) # # sort each of the specified files
def _toPMML(self,filename): """Write the trained model to PMML. Return PMML as string""" X = self.xTrain; Y = self.yTrain; gamma = self.gamma nugget = self.nugget k_lambda = self.k_lambda copywrite = "DMG.org" xrow,yrow,xcol,ycol = translator.trans_get_dimension(X,Y) featureName,targetName = translator.trans_name(xcol, ycol) # Start constructing the XML Tree PMML = translator.trans_root(None,copywrite,None) PMML = translator.trans_dataDictionary(PMML,featureName,targetName,xcol,ycol) GPM = translator.trans_GP(PMML) GPM = translator.trans_miningSchema(GPM,featureName,targetName) GPM = translator.trans_output(GPM) GPM = translator.trans_kernel(GPM,k_lambda,nugget,gamma,xcol,'squared_exponential') GPData = translator.trans_traininginstances(GPM,xrow,xcol+ycol) translator.trans_instancefields(GPData,featureName,targetName) translator.trans_inlinetable(GPData,featureName,targetName,X,Y) # Write the tree to file tree = ET.ElementTree(PMML) tree.write(filename,pretty_print=True,xml_declaration=True,encoding="utf-8") print 'Wrote PMML file to %s'%filename
def _file_to_tree(_data_format, _reference): """Reads a file and chooses the right parser to make it an lxml element tree""" print("format_to_tree : " + _data_format) if _data_format == 'HTML': from lxml import html return html.parse(_reference) if _data_format == 'XML': from lxml import etree return etree.parse(_reference) if _data_format == 'JSON': from lxml import etree from json_lxml import element with open(_reference, "r") as _f: _top_element = json.load(_f) return etree.ElementTree(element("top",_top_element)) else: raise Exception("_file_to_tree: " + _data_format + " is not supported")
def test_certificate_record_with_empty_pub_key(): xml_tree = etree.Element( 'blocklist', xmlns="http://www.mozilla.org/2006/addons-blocklist", lastupdate='1459262434336' ) exporter.write_cert_items(xml_tree, [CERTIFICATE_DATA_WITH_EMPTY_PUB_KEY]) result = etree.tostring( etree.ElementTree(xml_tree), pretty_print=True, xml_declaration=True, encoding='UTF-8').decode('utf-8') assert result == b"""<?xml version='1.0' encoding='UTF-8'?> <blocklist lastupdate="1459262434336" \ xmlns="http://www.mozilla.org/2006/addons-blocklist"> <certItems> <certItem issuerName="MBQxEjAQBgNVBAMTCWVEZWxsUm9vdA=="> <serialNumber>a8V7lRiTqpdLYkrAiPw7tg==</serialNumber> </certItem> </certItems> </blocklist> """.decode('utf-8')
def test_certificate_record_with_subject_and_pubKeyHash(): xml_tree = etree.Element( 'blocklist', xmlns="http://www.mozilla.org/2006/addons-blocklist", lastupdate='1459262434336' ) exporter.write_cert_items(xml_tree, [CERTIFICATE_DATA_WITH_SUBJECT]) result = etree.tostring( etree.ElementTree(xml_tree), pretty_print=True, xml_declaration=True, encoding='UTF-8').decode('utf-8') assert result == b"""<?xml version='1.0' encoding='UTF-8'?> <blocklist lastupdate="1459262434336" \ xmlns="http://www.mozilla.org/2006/addons-blocklist"> <certItems> <certItem pubKeyHash="VCIlmPM9NkgFQtrs4Oa5TeFcDu6MWRTKSNdePEhOgD8=" \ subject="MCIxIDAeBgNVBAMMF0Fub3RoZXIgVGVzdCBFbmQtZW50aXR5"/> </certItems> </blocklist> """.decode('utf-8')
def test_certificate_record_with_subject_and_pubKeyHash_with_empty_issuerName(): xml_tree = etree.Element( 'blocklist', xmlns="http://www.mozilla.org/2006/addons-blocklist", lastupdate='1459262434336' ) exporter.write_cert_items(xml_tree, [CERTIFICATE_DATA_WITH_SUBJECT_AND_EMPTY_ISSUER]) result = etree.tostring( etree.ElementTree(xml_tree), pretty_print=True, xml_declaration=True, encoding='UTF-8').decode('utf-8') assert result == b"""<?xml version='1.0' encoding='UTF-8'?> <blocklist lastupdate="1459262434336" \ xmlns="http://www.mozilla.org/2006/addons-blocklist"> <certItems> <certItem pubKeyHash="VCIlmPM9NkgFQtrs4Oa5TeFcDu6MWRTKSNdePEhOgD8=" \ subject="MCIxIDAeBgNVBAMMF0Fub3RoZXIgVGVzdCBFbmQtZW50aXR5"/> </certItems> </blocklist> """.decode('utf-8')
def CrawData(self, url): if self.Login !="" and self.haslogin == False: self.HttpItem.opener = self.autologin(self.Login); self.haslogin = True; html = self.HttpItem.GetHTML(url); root =None if html=='' else etree.HTML(html); if root is None: return {} if self.IsMultiData == 'One' else []; tree = etree.ElementTree(root); if isinstance(self.CrawItems, list) and len(self.CrawItems) == 0: return {'Content': html}; return self.GetDataFromCrawItems(tree );
def preprocess_xml(doc_path): ''' Parse and drop namespaces from an XML document. Parameters ---------- doc_path: str Returns ------- etree.ElementTree ''' tree = etree.parse(doc_path) root = tree.getroot() for ns in root.nsmap.values(): remove_namespace(tree, ns) return tree
def __init__(self, name): self.name = name tree = ElementTree(file = name) self.labels = [elt.tag.strip() for elt in tree.find("labels")] for elt in tree.findall("validation_status"): generation = elt.get("generation") status = elt.get("status") uri = elt.text.strip() if not show_rsync_transfer and status.startswith("rsync_transfer_"): continue if show_backup_generation: key = (uri, generation) elif generation == "backup": continue else: key = uri if key not in self: self[key] = Object(self, uri, generation) self[key].add(status)
def create_document(cls, width, height, doc_id=None, doc_units=None): """Create a minimal SVG document. Returns: An ElementTree """ def floatystr(fstr): # Strip off trailing zeros from fixed point float string return ('%f' % fstr).rstrip('0').rstrip('.') if doc_units is None: doc_units = 'px' docroot = etree.Element(svg_ns('svg'), nsmap=SVG_NS) width_str = floatystr(width) height_str = floatystr(height) docroot.set('width', '%s%s' % (width_str, doc_units)) docroot.set('height', '%s%s' % (height_str, doc_units)) docroot.set('viewbox', '0 0 %s %s' % (width_str, height_str)) if doc_id is not None: docroot.set('id', doc_id) return etree.ElementTree(docroot)
def test_parent(self): # Document node has no parent xml4h_doc = self.adapter_class.wrap_node(self.doc, self.doc) self.assertEqual(None, xml4h_doc.parent) # Root element has document as parent self.assertIsInstance(self.xml4h_root.parent, xml4h.nodes.Document) # Find parents of elements self.assertEqual(self.root_elem, self.adapter_class.wrap_node(self.elem1, self.doc).parent.impl_node) self.assertEqual(self.elem3, self.adapter_class.wrap_node( self.elem2_second, self.doc).parent.impl_node) # Parent of text node (Text not stored as node in lxml/ElementTree) if not isinstance(self, (TestLXMLNodes, TestElementTreeNodes)): self.assertEqual(self.elem1, self.adapter_class.wrap_node( self.text_node, self.doc).parent.impl_node) # Chain parent calls wrapped_elem = self.adapter_class.wrap_node(self.elem3_second, self.doc) self.assertEqual(self.root_elem, wrapped_elem.parent.parent.impl_node)
def xml_write(self, filename): """Writes a depletion chain XML file. Parameters ---------- filename : str The path to the depletion chain XML file. """ root_elem = ET.Element('depletion') for nuclide in self.nuclides: root_elem.append(nuclide.xml_write()) tree = ET.ElementTree(root_elem) if _have_lxml: tree.write(filename, encoding='utf-8', pretty_print=True) else: clean_xml_indentation(root_elem, spaces_per_level=2) tree.write(filename, encoding='utf-8')
def to_etree(doc, encoding=None): """Return the `doc` as an ElementTree object. Args: doc: A filename, file-like object, or etree object (Element/ElementTree). encoding: The file encoding. Returns: An etree ElementTree object. """ if is_etree(doc): return doc elif etree.iselement(doc): return etree.ElementTree(doc) else: parser = get_xml_parser(encoding=encoding) return etree.parse(doc, parser=parser)
def _details_do_merge(details, root): # Merge the remaining details into the composite document. for namespace in sorted(details): xmldata = details[namespace] if xmldata is not None: try: detail = etree.fromstring(xmldata) except etree.XMLSyntaxError as e: maaslog.warning("Invalid %s details: %s", namespace, e) else: # Add the namespace to all unqualified elements. for elem in detail.iter("{}*"): elem.tag = etree.QName(namespace, elem.tag) root.append(detail) # Re-home `root` in a new tree. This ensures that XPath # expressions like "/some-tag" work correctly. Without this, when # there's well-formed lshw data -- see the backward-compatibilty # hack futher up -- expressions would be evaluated from the first # root created in this function, even though that root is now the # parent of the current `root`. return etree.ElementTree(root)
def _send_request(self, request): """Send XML data to OpenVAS Manager and get results""" block_size = 1024 if etree.iselement(request): root = etree.ElementTree(request) root.write(self.socket, encoding="utf-8") else: if isinstance(request, six.text_type): request = request.encode("utf-8") self.socket.send(request) parser = etree.XMLTreeBuilder() while True: response = self.socket.recv(block_size) parser.feed(response) if len(response) < block_size: break root = parser.close() return root
def saveNet(self, filename): '''Save XML tree to file. :param filename: Filepath where the XML tree is saved. ''' doctype = """<!DOCTYPE ColouredPetriNet SYSTEM "ColouredPetriNet.dtd">""" with et.xmlfile(filename, encoding='utf-8') as xf: xf.write_declaration(standalone=False) xf.write_doctype(doctype) tree = et.ElementTree(self.cpn) xf.write(tree.getroot()) pass #------------------------------------------------------------------------------------------------
def _write(self, stream): ET.ElementTree(self._img).write(stream, xml_declaration=False)
def _write(self, stream): ET.ElementTree(self._img).write(stream, encoding="UTF-8", xml_declaration=True)
def save(filename, pynode=root): xmlroot = ET.Element('PropertyList') xml = ET.ElementTree(xmlroot) _buildXML(xmlroot, pynode) try: xml.write(filename, encoding="us-ascii", xml_declaration=False, pretty_print=True) except: print filename + ": xml write error:\n" + str(sys.exc_info()[1]) return
def save(self, name): tree = etree.ElementTree(self.root) tree.write(name, pretty_print=True, xml_declaration=True, encoding="utf-8")
def getPathFromNode(self, node): pathXml = etree.ElementTree(self.root).getpath(node) pathXml = pathXml.split("mf")[1] path = "./mf[@id='3F00']" for _file in pathXml.split('/'): if not _file: path = types.addToPath(path, "/") continue absPath = types.addToPath(path, _file) id = etree.ElementTree(self.root).xpath(absPath)[0].attrib['id'] #"./mf/df[@id='ADF0']" fileId = "%s[@id='%s']" %(_file.split('[')[0], id) path = types.addToPath(path, fileId) return path
def writeXml(simXmlFile, root): tree = etree.ElementTree(root) xml_document = etree.tostring(tree, pretty_print=True, xml_declaration=True, encoding='utf-8') file = open(simXmlFile, mode="w") file.write(xml_document) file.close()
def readXml(file): tree = etree.ElementTree() parser = etree.XMLParser(remove_blank_text=True) root = etree.parse(file, parser).getroot() return root
def check_status(self): """ check currently visible view, assign syntax file and update InfoProvider if needed """ view = sublime.active_window().active_view() self.filename = view.file_name() self.root = None self.tree = None if not self.filename: return None if INFOS.addon and self.filename and self.filename.endswith(".xml"): self.root = utils.get_root_from_file(self.filename) self.tree = ET.ElementTree(self.root) view.assign_syntax('Packages/KodiDevKit/KodiSkinXML.sublime-syntax') if self.filename and self.filename.endswith(".po"): view.assign_syntax('Packages/KodiDevKit/Gettext.tmLanguage') if self.filename and self.filename.endswith(".log"): view.assign_syntax('Packages/KodiDevKit/KodiLog.sublime-syntax') if view: wnd = view.window() if wnd is None: return None variables = wnd.extract_variables() if "folder" in variables: project_folder = variables["folder"] if project_folder and project_folder != self.actual_project: self.actual_project = project_folder logging.info("project change detected: " + project_folder) INFOS.init_addon(project_folder) else: logging.info("Could not find folder path in project file")
def save_xml(filename, root): """ save xml node *root as file with path *filename """ tree = ET.ElementTree(root) content = ET.tostring(tree, encoding='UTF-8', xml_declaration=True) content = yattag.indent(string=content.decode("utf-8"), indentation="\t") with open(filename, 'w', encoding="utf-8") as xml_file: xml_file.write(content)
def empty_svg(**attrs): document = etree.ElementTree(etree.fromstring( """<?xml version="1.0" standalone="no"?> <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> <svg xmlns="http://www.w3.org/2000/svg" version="1.1" width="29.7002cm" height="21.0007cm" viewBox="0 0 116930 82680 "> <title>Picutre generated by pcb2svg</title> <desc>Picture generated by pcb2svg</desc> </svg>""")) root = document.getroot() for key, value in attrs.items(): root.attrib[key] = value return document
def _get_etree(self): "Contains the generated ElementTree after parsing is finished." return ElementTree(self._root)
def parse(file, beautifulsoup=None, makeelement=None, **bsargs): """Parse a file into an ElemenTree using the BeautifulSoup parser. You can pass a different BeautifulSoup parser through the `beautifulsoup` keyword, and a diffent Element factory function through the `makeelement` keyword. By default, the standard ``BeautifulSoup`` class and the default factory of `lxml.html` are used. """ if not hasattr(file, 'read'): file = open(file) root = _parse(file, beautifulsoup, makeelement, **bsargs) return etree.ElementTree(root)
def serialize(elem, **options): file = BytesIO() tree = ElementTree.ElementTree(elem) tree.write(file, **options) if sys.version_info[0] < 3: try: encoding = options["encoding"] except KeyError: encoding = "utf-8" else: encoding = 'ISO8859-1' result = fix_compatibility(file.getvalue().decode(encoding)) if sys.version_info[0] < 3: result = result.encode(encoding) return result
def sanity(): """ >>> from elementtree.ElementTree import * >>> from elementtree.ElementInclude import * >>> from elementtree.ElementPath import * >>> from elementtree.HTMLTreeBuilder import * >>> from elementtree.SimpleXMLWriter import * >>> from elementtree.TidyTools import * """ # doesn't work with lxml.etree
def version(): """ >>> ElementTree.VERSION '1.3a2' """ # doesn't work with lxml.etree
def simplefind(): """ Test find methods using the elementpath fallback. >>> CurrentElementPath = ElementTree.ElementPath >>> ElementTree.ElementPath = ElementTree._SimpleElementPath() >>> elem = SAMPLE_XML >>> elem.find("tag").tag 'tag' >>> ElementTree.ElementTree(elem).find("tag").tag 'tag' >>> elem.findtext("tag") 'text' >>> elem.findtext("tog") >>> elem.findtext("tog", "default") 'default' >>> ElementTree.ElementTree(elem).findtext("tag") 'text' >>> summarize_list(elem.findall("tag")) ['tag', 'tag'] >>> summarize_list(elem.findall(".//tag")) ['tag', 'tag', 'tag'] Path syntax doesn't work in this case. >>> elem.find("section/tag") >>> elem.findtext("section/tag") >>> elem.findall("section/tag") [] >>> ElementTree.ElementPath = CurrentElementPath """ # doesn't work with lxml.etree
def parsefile(): """ Test parsing from file. >>> tree = ElementTree.parse("samples/simple.xml") >>> normalize_crlf(tree) >>> tree.write(stdout()) <root> <element key="value">text</element> <element>text</element>tail <empty-element/> </root> >>> tree = ElementTree.parse("samples/simple-ns.xml") >>> normalize_crlf(tree) >>> tree.write(stdout()) <root xmlns="http://namespace/"> <element key="value">text</element> <element>text</element>tail <empty-element/> </root> ## <ns0:root xmlns:ns0="http://namespace/"> ## <ns0:element key="value">text</ns0:element> ## <ns0:element>text</ns0:element>tail ## <ns0:empty-element/> ## </ns0:root> """
def parsehtml(): """ Test HTML parsing. >>> # p = HTMLTreeBuilder.TreeBuilder() >>> p = ElementTree.HTMLParser() >>> p.feed("<p><p>spam<b>egg</b></p>") >>> serialize(p.close()) '<p>spam<b>egg</b></p>' """ # doesn't work with lxml.etree
def parseliteral(): r""" >>> element = ElementTree.XML("<html><body>text</body></html>") >>> ElementTree.ElementTree(element).write(stdout()) <html><body>text</body></html> >>> element = ElementTree.fromstring("<html><body>text</body></html>") >>> ElementTree.ElementTree(element).write(stdout()) <html><body>text</body></html> ## >>> sequence = ["<html><body>", "text</bo", "dy></html>"] ## >>> element = ElementTree.fromstringlist(sequence) ## >>> ElementTree.ElementTree(element).write(stdout()) ## <html><body>text</body></html> >>> print(repr(ElementTree.tostring(element)).lstrip('b')) '<html><body>text</body></html>' # looks different in lxml # >>> print(ElementTree.tostring(element, "ascii")) # <?xml version='1.0' encoding='ascii'?> # <html><body>text</body></html> >>> _, ids = ElementTree.XMLID("<html><body>text</body></html>") >>> len(ids) 0 >>> _, ids = ElementTree.XMLID("<html><body id='body'>text</body></html>") >>> len(ids) 1 >>> ids["body"].tag 'body' """