我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用lxml.etree.parse()。
def schematron(cls, schema): transforms = [ "xml/schematron/iso_dsdl_include.xsl", "xml/schematron/iso_abstract_expand.xsl", "xml/schematron/iso_svrl_for_xslt1.xsl", ] if isinstance(schema, file): compiled = etree.parse(schema) else: compiled = schema for filename in transforms: with resource_stream( __name__, filename) as stream: xform_xml = etree.parse(stream) xform = etree.XSLT(xform_xml) compiled = xform(compiled) return etree.XSLT(compiled)
def parse(inFileName): doc = parsexml_(inFileName) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'softPkg' rootClass = softPkg rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. doc = None ## sys.stdout.write('<?xml version="1.0" ?>\n') ## rootObj.export(sys.stdout, 0, name_=rootTag, ## namespacedef_='', ## pretty_print=True) return rootObj
def parse(inFileName): doc = parsexml_(inFileName) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'devicepkg' rootClass = devicepkg rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. doc = None ## sys.stdout.write('<?xml version="1.0" ?>\n') ## rootObj.export(sys.stdout, 0, name_=rootTag, ## namespacedef_='', ## pretty_print=True) return rootObj
def parse(inFileName): doc = parsexml_(inFileName) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'deviceconfiguration' rootClass = deviceconfiguration rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. doc = None ## sys.stdout.write('<?xml version="1.0" ?>\n') ## rootObj.export(sys.stdout, 0, name_=rootTag, ## namespacedef_='', ## pretty_print=True) return rootObj
def parse(inFileName): doc = parsexml_(inFileName) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'domainmanagerconfiguration' rootClass = domainmanagerconfiguration rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. doc = None ## sys.stdout.write('<?xml version="1.0" ?>\n') ## rootObj.export(sys.stdout, 0, name_=rootTag, ## namespacedef_='', ## pretty_print=True) return rootObj
def parse(inFileName): doc = parsexml_(inFileName) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'profile' rootClass = profile rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. doc = None ## sys.stdout.write('<?xml version="1.0" ?>\n') ## rootObj.export(sys.stdout, 0, name_=rootTag, ## namespacedef_='', ## pretty_print=True) return rootObj
def parse(inFileName): doc = parsexml_(inFileName) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'softwareassembly' rootClass = softwareassembly rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. doc = None ## sys.stdout.write('<?xml version="1.0" ?>\n') ## rootObj.export(sys.stdout, 0, name_=rootTag, ## namespacedef_='', ## pretty_print=True) return rootObj
def parse(inFileName): doc = parsexml_(inFileName) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'softwarecomponent' rootClass = softwarecomponent rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. doc = None ## sys.stdout.write('<?xml version="1.0" ?>\n') ## rootObj.export(sys.stdout, 0, name_=rootTag, ## namespacedef_='', ## pretty_print=True) return rootObj
def parse(inFileName): doc = parsexml_(inFileName) rootNode = doc.getroot() rootTag, rootClass = get_root_tag(rootNode) if rootClass is None: rootTag = 'properties' rootClass = properties rootObj = rootClass.factory() rootObj.build(rootNode) # Enable Python to collect the space used by the DOM. doc = None ## sys.stdout.write('<?xml version="1.0" ?>\n') ## rootObj.export(sys.stdout, 0, name_=rootTag, ## namespacedef_='', ## pretty_print=True) return rootObj
def _load_image_data(self, file_, source): """Read image settings from SVG tags""" tree = etree.parse(source, self.parser) root = tree.getroot() xhtml = "{%s}" % root.nsmap[None] imagedata = ImageData(file_, tree) transform_tag = root.find(".//%s*[@id='transform']" % xhtml) imagedata.set_transform(transform_tag) background_tag = root.find(".//%s*[@id='background']" % xhtml) imagedata.set_background(background_tag) counter = count(1) while True: index = next(counter) id_ = "color" + str(index) tag = root.find(".//%s*[@id='%s']" % (xhtml, id_)) if tag is None: break imagedata.set_color(tag, id_) return imagedata
def pdb_chain_stoichiometry_biomolone(pdbid): """Get the stoichiometry of the chains in biological assembly 1 as a dictionary. Steps taken are: 1) Download PDB and parse header, make biomolecule if provided 2) Count how many times each chain appears in biomolecule #1 3) Convert chain id to uniprot id 4) Return final dictionary Args: pdbid (str): 4 character PDB ID Returns: dict: {(ChainID,UniProtID): # occurences} """ pass
def addParamFile(cls, paramsFile): paramXmlRoot = etree.parse(paramsFile) for param in paramXmlRoot.getroot(): if param.tag == "Parameter": paramLabel = param.get("name") exec("cls."+paramLabel.upper()+"_LABEL=\""+paramLabel+"\"") for element in param: if element.tag == "Check_Method": exec(element.text) methodName = element.text.split(' ')[1].split('(')[0] exec("cls."+paramLabel.upper()+"_"+element.tag.upper()+"="+methodName) elif element.tag != "NeededAttributes": exec("cls."+paramLabel.upper()+"_"+element.tag.upper()+"="+element.text) else: for attribute in element: if attribute.tag == "Attribute": exec("cls."+attribute.get("name")+"="+attribute.text) if paramLabel not in cls.ALL_PARAMS: cls.ALL_PARAMS.append(paramLabel) else: raise PYGA_ParametersError("ERROR: Parameter " + unicode(paramLabel) + " defined twice.")
def produce_output(inputf, outfile): ecbplus = etree.parse(inputf, etree.XMLParser(remove_blank_text=True)) root_ecbplus = ecbplus.getroot() root_ecbplus.getchildren() event_mentions = extract_event_CAT(ecbplus) event_per_sentence = event_sentence(ecbplus, event_mentions) event_pairs = generate_event_pairs(event_per_sentence) # print(event_mentions) for k, v in event_pairs.items(): for i in v: output = open(outfile, "a") output.writelines("_".join(event_mentions[i[0]]) + "\t" + "_".join(event_mentions[i[1]]) + "\tPRECONDITION" + "\n") output.close()
def __init__(self, file_like) : parser = etree.XMLParser(ns_clean=True) tree = etree.parse(file_like, parser) gexf_xml = tree.getroot() tag = self.ns_clean(gexf_xml.tag).lower() if tag <> "gexf" : self.msg_unexpected_tag("gexf", tag) return self.gexf_obj = None for child in gexf_xml : tag = self.ns_clean(child.tag).lower() if tag == "meta" : meta_xml = child self.gexf_obj = self.extract_gexf_obj(meta_xml) if tag == "graph" : graph_xml = child if self.gexf_obj == None : self.msg_unexpected_tag("meta", tag) return self.graph_obj = self.extract_graph_obj(graph_xml)
def test_to_dict_from_etree(self): vh_xml_tree = _ElementTree.parse('examples/vehicles/vehicles.xml') col_xml_tree = _ElementTree.parse('examples/collection/collection.xml') xml_dict = self.vh_schema.to_dict(vh_xml_tree) self.assertNotEqual(xml_dict, _VEHICLES_DICT) # XSI namespace unmapped xml_dict = self.vh_schema.to_dict(vh_xml_tree, namespaces=self.namespaces) self.assertEqual(xml_dict, _VEHICLES_DICT) xml_dict = xmlschema.to_dict(vh_xml_tree, self.vh_schema.url, namespaces=self.namespaces) self.assertEqual(xml_dict, _VEHICLES_DICT) xml_dict = self.col_schema.to_dict(col_xml_tree) self.assertNotEqual(xml_dict, _COLLECTION_DICT) xml_dict = self.col_schema.to_dict(col_xml_tree, namespaces=self.namespaces) self.assertEqual(xml_dict, _COLLECTION_DICT) xml_dict = xmlschema.to_dict(col_xml_tree, self.col_schema.url, namespaces=self.namespaces) self.assertEqual(xml_dict, _COLLECTION_DICT)
def parse_log(self, path): session = DBSession() # This file is big enough (and compressed) that we might as well just parse # it once and worry about saving memory if/when that becomes an issue. hk_events = [] with ZipFile(path, 'r') as zfile: xfile = zfile.open('apple_health_export/export.xml') tree = etree.parse(xfile) root = tree.getroot() for rnum, record in enumerate(root.iterfind('.//Record'), start=1): event = self.parse_record(record) if event: hk_events.append(event) # find earliest/latest and delete any existing rows from this range times = [e.time for e in hk_events] self._flush_old_data(session, self.SOURCE, min(times), max(times)) session.commit() # Now we can restart the csv reader to actually load the data for event in hk_events: session.merge(event) print("Adding {} events".format(len(hk_events))) session.commit()
def readXml(self, simType): path = os.path.dirname(__file__) if simType == types.TYPE_USIM: path = os.path.join(path, "sim_files_3g.xml") else: path = os.path.join(path, "sim_files_2g.xml") tree = etree.ElementTree() if not os.path.exists(path): logging.warning("File %s not exists" %path) logging.info("Create xml") if simType == types.TYPE_USIM: root = etree.Element('sim_3G') else: root = etree.Element('sim_2G') else: parser = etree.XMLParser(remove_blank_text=True) root = etree.parse(path, parser).getroot() return path, root
def _le_xml(self, arquivo): if arquivo is None: return False if not isinstance(arquivo, basestring): arquivo = etree.tounicode(arquivo) if arquivo is not None: if isinstance(arquivo, basestring): if NAMESPACE_NFSE in arquivo: arquivo = por_acentos(arquivo) if u'<' in arquivo: self._xml = etree.fromstring(tira_abertura(arquivo)) else: arq = open(arquivo) txt = ''.join(arq.readlines()) txt = tira_abertura(txt) arq.close() self._xml = etree.fromstring(txt) else: self._xml = etree.parse(arquivo) return True return False
def validar(self): arquivo_esquema = self.caminho_esquema + self.arquivo_esquema # Aqui é importante remover a declaração do encoding # para evitar erros de conversão unicode para ascii xml = tira_abertura(self.xml).encode(u'utf-8') esquema = etree.XMLSchema(etree.parse(arquivo_esquema)) if not esquema.validate(etree.fromstring(xml)): for e in esquema.error_log: if e.level == 1: self.alertas.append(e.message.replace('{http://www.portalfiscal.inf.br/nfe}', '')) elif e.level == 2: self.erros.append(e.message.replace('{http://www.portalfiscal.inf.br/nfe}', '')) return esquema.error_log
def xml_to_xsd_validation(file_xml, file_xsd): """ Verify that the XML compliance with XSD Arguments: 1. file_xml: Input xml file 2. file_xsd: xsd file which needs to be validated against xml Return: No return value """ try: print_info("Validating:{0}".format(file_xml)) print_info("xsd_file:{0}".format(file_xsd)) xml_doc = parse(file_xml) xsd_doc = parse(file_xsd) xmlschema = XMLSchema(xsd_doc) xmlschema.assert_(xml_doc) return True except XMLSyntaxError as err: print_error("PARSING ERROR:{0}".format(err)) return False except AssertionError, err: print_error("Incorrect XML schema: {0}".format(err)) return False
def getChildTextbyParentAttribute (datafile, pnode, patt, pattval, cnode): """ Seraches XML file for the parent node with a specific value. Finds the child node and returns its text datafile = xml file searched pnode = parent node patt = parent node attribute patval = parent node attribute value cnode = child node """ tree = ElementTree.parse(datafile) root = tree.getroot() value = False for node in root.findall(pnode): attribute = node.get(patt) if attribute == pattval: cnode = node.find(cnode) if cnode is not None: value = cnode.text else: return None break return value
def getChildTextbyParentTag (datafile, pnode, cnode): """ Seraches XML file for the first parent. Finds the child node and returns its text datafile = xml file searched pnode = parent node cnode = child node """ value = False tree = ElementTree.parse(datafile) root = tree.getroot() node = root.find(pnode) if node is not None: child = node.find(cnode) if child is not None: value = child.text return value else: # print_info("could not find cnode under the given pnode") return value else: # print_info("could not find pnode in the provided file") return value
def getChildTextbyOtherChild (datafile, pnode, cnode, cvalue, rnode): """ Searches XML file for the parent node. Finds the 1st child node and checks its value if value is a match, then search for second child and return its value datafile = xml file searched pnode = parent node cnode = child node cvalue = child node value rnode = reference node or False if doesn't exist """ tree = ElementTree.parse(datafile) root = tree.getroot() rnodev = False for node in root.findall(pnode): value = node.find(cnode).text if value == cvalue: # print_debug("-D- rnode: '%s'" % rnode) if node.find(rnode) is not None: rnodev = node.find(rnode).text break return rnodev
def verifyParentandChildrenMatch (datafile, pnode, cnode, cvalue, rnode, rvalue): """ Searches XML file for the parent node. Finds the 1st child node and checks its value if value is a match, then search for second child and check if its value matches datafile = xml file searched pnode = parent node cnode = child node cvalue = child node value rnode = reference node rvalue = refernce node value """ tree = ElementTree.parse(datafile) root = tree.getroot() status = False for node in root.findall(pnode): value = node.find(cnode).text if value == cvalue: if node.find(rnode) is not None: cnodev = node.find(rnode).text # print_debug("-D- cnodev: '%s', rvalue : '%s'" % (cnodev, rvalue)) if cnodev == rvalue: # print_debug("-D- BREAK END METHOD verifyParentandChildrenMatch_Status '%s'" % status) return True return status
def getElementsListWithTagAttribValueMatch(datafile, tag, attrib, value): """ This method takes an xml document as input and finds all the sub elements (parent/children) containing specified tag and an attribute with the specified value. Returns a list of matching elements. Arguments: datafile = input xml file to be parsed. tag = tag value of the sub-element(parent/child) to be searched for. attrib = attribute name for the sub-element with above given tag should have. value = attribute value that the sub-element with above given tag, attribute should have. """ element_list = [] root = ElementTree.parse(datafile).getroot() for element in root.iterfind(".//%s[@%s='%s']" % (tag, attrib, value)): element_list.append(element) return element_list
def getElementListWithSpecificXpath(datafile, xpath): """ This method takes an xml document as input and finds all the sub elements (parent/children) containing specified xpath Returns a list of matching elements. Arguments: parent = parent element xpath = a valid xml path value as supported by python, refer https://docs.python.org/2/library/xml.etree.elementtree.html """ element_list = [] root = ElementTree.parse(datafile).getroot() for element in root.iterfind(xpath): element_list.append(element) return element_list
def getConfigElementTextWithSpecificXpath(datafile, xpath): """ This method takes an xml document as input and finds the first sub element (parent/children) containing specified xpath which should be a filepath to a netconf config file Returns the element text attribute Arguments: parent = parent element xpath = a valid xml path value as supported by python, refer https://docs.python.org/2/library/xml.etree.elementtree.html """ root = ElementTree.parse(datafile).getroot() elem1 = root.find(xpath).text elem2_root = ElementTree.parse(elem1) elem2 = elem2_root.find('config') elem2_string = ElementTree.tostring(elem2) return elem2_string
def getChildElementWithSpecificXpath(start, xpath): """ This method takes a xml file or parent element as input and finds the first child containing specified xpath Returns the child element. Arguments: start = xml file or parent element xpath = a valid xml path value as supported by python, refer https://docs.python.org/2/library/xml.etree.elementtree.html """ node = False if isinstance(start, (file, str)): # check if file exist here if file_Utils.fileExists(start): node = ElementTree.parse(start).getroot() else: print_warning('The file={0} is not found.'.format(start)) elif isinstance(start, ElementTree.Element): node = start if node is not False or node is not None: element = node.find(xpath) else: element = False return element
def get_element_by_attribute(xml_file, tag_name, attr_name, attr_value): """ Gets the element with matching tag_name, attribute name and attribute value """ element= "" doc = minidom.parse(xml_file) element_list = doc.getElementsByTagName(tag_name) found = "No" for element in element_list: if element.getAttribute(attr_name) == attr_value: found = "Yes" break if found == "Yes": return element else: return False
def del_tags_from_xml(xml, tag_list=[]): """ It deletes the tags either by their names or xpath Arguments: 1.xml: It takes xml file path or xml string as input 2.tag_list: It contains list of tags which needs to be removed Returns: It returns xml string """ if os.path.exists(xml): tree = ElementTree.parse(xml) root = tree.getroot() else: root = ElementTree.fromstring(xml) for tag in tag_list: if 'xpath=' in tag: tag = tag.strip('xpath=') req_tags = getChildElementsListWithSpecificXpath(root, tag) else: req_tags = getChildElementsListWithSpecificXpath(root, ".//{0}".format(tag)) recursive_delete_among_children(root, req_tags) xml_string = ElementTree.tostring(root, encoding='utf-8', method='xml') return xml_string
def convert_xml_to_list_of_dict(file_name): """ Takes xml file path as input and converts to list of dictionaries Arguments: file_name : It takes xml file path as input Returns: list_of_dict: list of dictionaries where keys are tag names and values are respective text of the tag. """ tree = ElementTree.parse(file_name) root = tree.getroot() list_of_dict = [] for child in root: subchild_dict = OrderedDict() for subchild in child: subchild_dict[subchild.tag] = subchild.text list_of_dict.append(subchild_dict) return list_of_dict #2016/06/22 ymizugaki add begin
def extract_svg_content(filename): prefix = unique_prefix() + "_" root = etree.parse(filename).getroot() # We have to ensure all Ids in SVG are unique. Let's make it nasty by # collecting all ids and doing search & replace # Potentially dangerous (can break user text) ids = [] for el in root.getiterator(): if "id" in el.attrib and el.attrib["id"] != "origin": ids.append(el.attrib["id"]) with open(filename) as f: content = f.read() for i in ids: content = content.replace("#"+i, "#" + prefix + i) root = etree.fromstring(content) # Remove SVG namespace to ease our lifes and change ids for el in root.getiterator(): if "id" in el.attrib and el.attrib["id"] != "origin": el.attrib["id"] = prefix + el.attrib["id"] if '}' in str(el.tag): el.tag = el.tag.split('}', 1)[1] return [ x for x in root if x.tag and x.tag not in ["title", "desc"]]
def file(self, path, output, args={}, progress_callback=lambda *x: None): self.logger.debug('??????????') self.web_cache[path] = dict(args) url = urllib.parse.urljoin(self.file_url, urllib.parse.quote(path)) if len(args) > 0: url += '?' + urllib.parse.urlencode(args) self.logger.debug('HTTP ?????{}'.format(url)) self.curl.setopt(pycurl.URL, url) self.curl.setopt(pycurl.COOKIE, self.web_cookie) self.curl.setopt(pycurl.NOBODY, False) self.curl.setopt(pycurl.NOPROGRESS, False) self.curl.setopt(pycurl.WRITEDATA, output) self.curl.setopt(pycurl.HEADERFUNCTION, lambda *x: None) self.curl.setopt(pycurl.XFERINFOFUNCTION, progress_callback) self.curl.perform() status = self.curl.getinfo(pycurl.RESPONSE_CODE) if status != 200: raise ServerError(status)
def file_size(self, path, args={}): self.logger.debug('????????????') self.web_cache[path] = dict(args) url = urllib.parse.urljoin(self.file_url, urllib.parse.quote(path)) if len(args) > 0: url += '?' + urllib.parse.urlencode(args) self.logger.debug('HTTP ?????{}'.format(url)) self.curl.setopt(pycurl.URL, url) self.curl.setopt(pycurl.COOKIE, self.web_cookie) self.curl.setopt(pycurl.NOBODY, True) self.curl.setopt(pycurl.NOPROGRESS, True) self.curl.setopt(pycurl.WRITEDATA, io.BytesIO()) self.curl.setopt(pycurl.HEADERFUNCTION, lambda *x: None) self.curl.setopt(pycurl.XFERINFOFUNCTION, lambda *x: None) self.curl.perform() status = self.curl.getinfo(pycurl.RESPONSE_CODE) if status != 200: raise ServerError(status) return self.curl.getinfo(pycurl.CONTENT_LENGTH_DOWNLOAD)
def web_redirect(self, path, args={}): self.logger.debug('????????????') self.web_cache[path] = dict(args) url = urllib.parse.urljoin(self.web_url, urllib.parse.quote(path)) if len(args) > 0: url += '?' + urllib.parse.urlencode(args) self.logger.debug('HTTP ?????{}'.format(url)) headers = io.BytesIO() self.curl.setopt(pycurl.URL, url) self.curl.setopt(pycurl.COOKIE, self.web_cookie) self.curl.setopt(pycurl.NOBODY, False) self.curl.setopt(pycurl.NOPROGRESS, True) self.curl.setopt(pycurl.WRITEDATA, NoneIO()) self.curl.setopt(pycurl.HEADERFUNCTION, headers.write) self.curl.setopt(pycurl.XFERINFOFUNCTION, lambda *x: None) self.curl.perform() status = self.curl.getinfo(pycurl.RESPONSE_CODE) if status != 302: raise ServerError(status) for header_line in headers.getvalue().split(b'\r\n'): if header_line.startswith(b'Location:'): return header_line.split(b':', maxsplit=1)[1].strip().decode() return None
def execute(path, cmd, uuid): filename = "%s/%s" % (OUTPUT_PATH, uuid) nmap_cmd = '%s %s -oA %s' % (path, cmd, filename) ops = NmapOptions() ops.parse_string(nmap_cmd) proc = subprocess.Popen(ops.render(), shell=False) proc.wait() print('\n[%s] Finished execution of command "%s"' % (datetime.datetime.now(), cmd)) dom = ET.parse("%s.xml" % filename) xsl_filename = dom.getroot().getprevious().getprevious().parseXSL() # need to add error checking transform = ET.XSLT(xsl_filename) html = transform(dom) html_file = open('%s.html' % filename, 'w') html.write(html_file) print('[%s] HTML report generated (%s.html)' % (datetime.datetime.now(), filename))
def remove_resource_value(issue, filepath, ignore_layouts_value): """ Read an xml file and remove an element which is unused, then save the file back to the filesystem """ # if os.path.exists(filepath): # tory ignore layouts ?????layout??? print "remove_resource_value()...%s --> %s" % (issue.elements[0][0], filepath) if os.path.exists(filepath) and (ignore_layouts_value is False or issue.elements[0][0] != 'layout'): doCheck(filepath=filepath,issue=issue) for element in issue.elements: print('removing {0} from resource {1}'.format(element, filepath)) parser = etree.XMLParser(remove_blank_text=False, remove_comments=False, remove_pis=False, strip_cdata=False, resolve_entities=False) tree = etree.parse(filepath, parser) root = tree.getroot() for unused_value in root.findall('.//{0}[@name="{1}"]'.format(element[0], element[1])): root.remove(unused_value) with open(filepath, 'wb') as resource: tree.write(resource, encoding='utf-8', xml_declaration=True)
def _lxml_default_loader(href, parse, encoding=None, parser=None): if parse == "xml": data = etree.parse(href, parser).getroot() else: if "://" in href: f = urlopen(href) else: f = open(href, 'rb') data = f.read() f.close() if not encoding: encoding = 'utf-8' data = data.decode(encoding) return data ## # Wrapper for ET compatibility - drops the parser
def _wrap_et_loader(loader): def load(href, parse, encoding=None, parser=None): return loader(href, parse, encoding) return load ## # Expand XInclude directives. # # @param elem Root element. # @param loader Optional resource loader. If omitted, it defaults # to {@link default_loader}. If given, it should be a callable # that implements the same interface as <b>default_loader</b>. # @throws FatalIncludeError If the function fails to include a given # resource, or if the tree contains malformed XInclude elements. # @throws IOError If the function fails to load a given resource. # @returns the node or its replacement if it was an XInclude node
def simpleparsefile(): """ Test the xmllib-based parser. >>> from elementtree import SimpleXMLTreeBuilder >>> parser = SimpleXMLTreeBuilder.TreeBuilder() >>> tree = ElementTree.parse("samples/simple.xml", parser) >>> normalize_crlf(tree) >>> tree.write(sys.stdout) <root> <element key="value">text</element> <element>text</element>tail <empty-element /> </root> """ # doesn't work with lxml.etree
def parsefile(): """ Test parsing from file. Note that we're opening the files in here; by default, the 'parse' function opens the file in binary mode, and doctest doesn't filter out carriage returns. >>> file = open("samples/simple.xml", "rb") >>> tree = ElementTree.parse(file) >>> file.close() >>> tree.write(stdout()) <root> <element key="value">text</element> <element>text</element>tail <empty-element/> </root> >>> file = open("samples/simple-ns.xml", "rb") >>> tree = ElementTree.parse(file) >>> file.close() >>> tree.write(stdout()) <root xmlns="http://namespace/"> <element key="value">text</element> <element>text</element>tail <empty-element/> </root> """
def add_projection_indices(uttlist, token_xpath, attrib_name, outdir): ## Taken from: ~/proj/dnn_tts/script/add_token_index.py ''' For utts in uttlist, add attribute called <attrib_name> to all nodes matching <token_xpath> with a corpus-unique integer value > 0. Add default 0-valued attrib at root node. ''' i = 1 for uttfile in uttlist: utt = etree.parse(uttfile) ## clear target attribute name from all nodes to be safe: for node in utt.xpath('//*'): ## all nodes if attrib_name in node.attrib: del node.attrib[attrib_name] root_node = utt.getroot() root_node.attrib[attrib_name] = '0' ## 0 is the defualt 'n/a' value -- *some* ancestor of all nodes will have the relevant attibute to fall back on for node in utt.xpath(token_xpath): node.attrib[attrib_name] = str(i) i += 1 junk,fname = os.path.split(uttfile) outfile = os.path.join(outdir, fname) utt.write(outfile, encoding='utf-8', pretty_print=True)
def list_courses(self): ''' List courses available in Studio site ''' self.ensure_studio_site() url = "%s/home/" % self.BASE ret = self.ses.get(url) parser = etree.HTMLParser() xml = etree.parse(StringIO(ret.content), parser).getroot() courses = [] course_ids = [] for course in xml.findall('.//li[@class="course-item"]'): cid = course.get("data-course-key") if self.verbose: print cid # etree.tostring(course) courses.append(course) course_ids.append(cid) return {'xml': courses, 'course_ids': course_ids, }
def _get_block_child_info_from_content_preview(self, block_id): ''' Get child info dict from content preview ''' xblock = self.get_xblock(usage_key=block_id, view="container_preview") html = xblock['html'] parser = etree.HTMLParser() xml = etree.parse(StringIO(html), parser).getroot() ids =[] child_blocks = [] for elem in xml.findall('.//li[@class="studio-xblock-wrapper is-draggable"]'): cid = elem.get('data-locator') ids.append(cid) child_blocks.append(self.get_xblock(usage_key=cid)) child_info = {'children': child_blocks, 'child_ids': ids, } return child_info
def process_file(self, fn): xml = etree.parse(fn).getroot() tests = [] for problem in xml.findall('.//problem'): url_name = problem.get('url_name') responses = [] for cr in problem.findall('.//customresponse'): for line in cr.findall('.//textline'): responses.append(line.get('correct_answer')) test = {'url_name': url_name, 'responses': responses, 'expected': ['correct'] * len(responses)} tests.append(test) sys.stderr.write("%d tests added\n" % len(tests)) cut_spec = {'config': {}, 'tests': tests} config_keys = ["username", "password", "course_id", "site_base_url"] for ck in config_keys: val = getattr(self.optargs, ck) if val: cut_spec['config'][ck] = val print yaml.dump(cut_spec)
def get_documentation(self, element, namespace=None, schema_str=None): """**Helper method:** should return an schema specific documentation given an element parsing or getting the `Clark's Notation`_ `{url:schema}Element` from the message error on validate method. :param str element: Element string following the Clark's Notation :param dict namespace: Element string following the Clark's Notation :returns: The documentation text if exists :rtype: unicode .. _`Clark's Notation`: http://effbot.org/zone/element-namespaces.htm """ if namespace is None: namespace = {'xs': 'http://www.w3.org/2001/XMLSchema'} schema_root = etree.parse(StringIO(self.schema)) document = schema_root.xpath(self.get_element_from_clark(element), namespaces=namespace) return document and document[0].text or ''
def _transform_to_html(self, content, xslt_package=None, xslt_path=None): xslt_package = xslt_package or __name__ xslt_path = xslt_path or \ '../templates/ckanext/spatial/gemini2-html-stylesheet.xsl' # optimise -- read transform only once and compile rather # than at each request with resource_stream(xslt_package, xslt_path) as style: style_xml = etree.parse(style) transformer = etree.XSLT(style_xml) xml = etree.parse(StringIO(content.encode('utf-8'))) html = transformer(xml) response.headers['Content-Type'] = 'text/html; charset=utf-8' response.headers['Content-Length'] = len(content) result = etree.tostring(html, pretty_print=True) return result