我们从Python开源项目中,提取了以下11个代码示例,用于说明如何使用xml.etree.ElementTree()。
def __call__(self, driver, session: requests.Session, element: ElementTree): pass
def extract_trust_anchors_from_xml(trust_anchor_xml): """Takes a bytestring with the XML from IANA; returns a list of trust anchors.""" # Turn the bytes from trust_anchor_xml into a string trust_anchor_xml_string = bytes_to_string(trust_anchor_xml) # Sanity check: make sure there is enough text in the returned stuff if len(trust_anchor_xml_string) < 100: die("The XML was too short: {} chars.".format(len(trust_anchor_xml_string))) # ElementTree requries a file so use StringIO to turn the string into a file try: trust_anchor_as_file = StringIO(trust_anchor_xml_string) # This works for Python 3 except: trust_anchor_as_file = StringIO(unicode(trust_anchor_xml_string)) # Needed for Python 2 # Get the tree trust_anchor_tree = xml.etree.ElementTree.ElementTree(file=trust_anchor_as_file) # Get all the KeyDigest elements digest_elements = trust_anchor_tree.findall(".//KeyDigest") print("There were {} KeyDigest elements in the trust anchor file.".format(\ len(digest_elements))) trust_anchors = [] # Global list of dicts that is taken from the XML file # Collect the values for the KeyDigest subelements and attributes for (count, this_digest_element) in enumerate(digest_elements): digest_value_dict = {} for this_subelement in ["KeyTag", "Algorithm", "DigestType", "Digest"]: try: this_key_tag_text = (this_digest_element.find(this_subelement)).text except: die("Did not find {} element in a KeyDigest in a trust anchor.".format(\ this_subelement)) digest_value_dict[this_subelement] = this_key_tag_text for this_attribute in ["validFrom", "validUntil"]: if this_attribute in this_digest_element.keys(): digest_value_dict[this_attribute] = this_digest_element.attrib[this_attribute] else: digest_value_dict[this_attribute] = "" # Missing attributes get empty values # Save this to the global trust_anchors list print("Added the trust anchor {} to the list:\n{}".format(count, pprint.pformat(\ digest_value_dict))) trust_anchors.append(digest_value_dict) if len(trust_anchors) == 0: die("There were no trust anchors found in the XML file.") return trust_anchors
def trainSVMTK(docs, pairs, dditype, model="svm_tk_classifier.model", excludesentences=[]): if os.path.isfile("ddi_models/" + model): os.remove("ddi_models/" + model) if os.path.isfile("ddi_models/" + model + ".txt"): os.remove("ddi_models/" + model + ".txt") #docs = use_external_data(docs, excludesentences, dditype) xerrors = 0 with open("ddi_models/" + model + ".txt", 'w') as train: #print pairs for p in pairs: if dditype != "all" and pairs[p][relations.PAIR_DDI] and pairs[p][relations.PAIR_TYPE] != dditype: continue sid = relations.getSentenceID(p) if sid not in excludesentences: tree = pairs[p][relations.PAIR_DEP_TREE][:] #print "tree1:", tree #if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 20: #print line # line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|" # xerrors += 1 #else: line = get_svm_train_line(tree, pairs[p], sid, docs[sid][relations.SENTENCE_PAIRS][p]) if not pairs[p][relations.PAIR_DDI]: line = '-' + line elif pairs[p][relations.PAIR_TYPE] != dditype and dditype != "all": line = '-' + line train.write(line) #print "tree errors:", xerrors svmlightcall = Popen(["./svm-light-TK-1.2/svm-light-TK-1.2.1/svm_learn", "-t", "5", "-L", "0.4", "-T", "2", "-S", "2", "-g", "10", "-D", "0", "-C", "T", basedir + model + ".txt", basedir + model], stdout = PIPE, stderr = PIPE) res = svmlightcall.communicate() if not os.path.isfile("ddi_models/" + model): print "failed training model " + basedir + model print res sys.exit()
def get_act(self, scraper): """Creates Parse_XML_Action. Higher order function. Args: @Action """ def act(): if not scraper.text: Get_Action().execute(scraper) if not scraper.text: return scraper.xml_tree = xml.etree.ElementTree.parse(scraper.text) return act
def get_act(self, scraper): """Creates Find_XML_Elements_Action. Higher-order function Args: @Action """ def act(): if not scraper.xml_tree: Parse_XML_Action().execute(scraper) if not scraper.xml_tree: return xml_elements = scraper.xml_elements[:] def find(element): """Helper function. Recursively traverses tree to find if the elements satisfy the tag/attribute pairs. Args: element <ElementTree>: the HTML element that is about to be examined """ if element.tag in self.tags or not self.tags: element_index = self.tags.index(element.tag) if all([(key in element.attrib and element.attrib[key] == self.attributes[element_index][key]) for key in self.attributes[element_index]]): scraper.xml_elements.append(element) for sub_element in element: find(sub_element) if self.find_subelements: for e in xml_elements: find(e) else: find(scraper.xml_tree) return act
def preprocess(self, **k): self.login = k.get('login') tree = etree.ElementTree() if k.get('xmlfile') is not None: tree.parse(k['xmlfile']) else: tree.fromstring(k['xml']) self.cred = tree.find(".//Account/Credentials2") if self.cred is None: self.cred = tree.find(".//Account/Credentials3") if self.cred is not None: self.cred = self.cred.text.decode('hex')
def get_svm_train_line(tree, pair, sid): lmtzr = WordNetLemmatizer() e1id = compact_id(pair.eids[0]) e2id = compact_id(pair.eids[1]) tree = tree.replace(pair.entities[0].tokens[0].text, 'candidatedrug') tree = tree.replace(pair.entities[1].tokens[0].text, 'candidatedrug') #tree = tree.replace(sid.replace('.', '').replace('-', '') + 'e', 'otherdrug') sid2 = compact_id(sid) + 'e' # TODO: replace other entities #tree = rext.sub(sid2 + r'\d+', 'otherdrug', tree) #print "tree2:", tree if tree[0] != '(': tree = '(S (' + tree + ' NN))' #this depends on the version of nlkt ptree = Tree.fromstring(tree) #ptree = Tree.parse(tree) leaves = list(ptree.pos()) lemmaleaves = [] for t in leaves: pos = get_wordnet_pos(t[1]) lemma = lmtzr.lemmatize(t[0].lower(), pos) lemmaleaves.append(lemma) #lemmaleaves = [ for t in leaves)] logging.debug("tree:" + tree) line = '1 ' line += '|BT|' + tree #bowline = '(BOW (' + ' *)('.join(lemmaleaves) + ' *)) ' #ptree = Tree.parse(bowline) #ptree = ptree.pprint(indent=-1000) #bowline = ptree.replace('\n', ' ') #bowline = '|BT| ' + bowline #if not bowline.count("otherdrug") > 8: # line += bowline #else: #print "problem with BOW!" #line += bowline line += '|ET| ' #i = 1 #for m in docsp[ddi.PAIR_SSM_VECTOR]: # line += " %s:%s" % (i, m) # i += 1 #line += " 2:" + str() #line += " |EV|" line += '\n' return line
def testSVMTK(sentence, pairs, pairs_list, model="svm_tk_classifier.model", tag=""): if os.path.isfile(basedir + tag + "svm_test_data.txt"): os.remove(basedir + tag + "svm_test_data.txt") if os.path.isfile(basedir + tag + "svm_test_output.txt"): os.remove(basedir + tag + "svm_test_output.txt") #docs = use_external_data(docs, excludesentences, dditype) #pidlist = pairs.keys() total = 0 with open(temp_dir + tag + "svm_test_data.txt", 'w') as test: for pid in pairs: sid = pairs[pid].sid tree = sentence.parsetree #if len(docs[sid][ddi.SENTENCE_ENTITIES]) > 30: #print line #line = reparse_tree(line) # line = "1 |BT| (ROOT (NP (NN candidatedrug) (, ,) (NN candidatedrug))) |ET|\n" # xerrors += 1 #else: line = get_svm_train_line(tree, pairs[pid], sid) line = '-' + line test.write(line) total += 1 #print "tree errors:", xerrors, "total:", total svmtklightargs = ["./bin/svm-light-TK-1.2/svm-light-TK-1.2.1/svm_classify", temp_dir + tag + "svm_test_data.txt", basedir + model, temp_dir + tag + "svm_test_output.txt"] svmlightcall = Popen(svmtklightargs, stdout=PIPE, stderr=PIPE) res = svmlightcall.communicate() # logging.debug(res[0].split('\n')[-3:]) #os.system(' '.join(svmtklightargs)) if not os.path.isfile(temp_dir + tag + "svm_test_output.txt"): print "something went wrong with SVM-light-TK" print res sys.exit() with open(temp_dir + tag + "svm_test_output.txt", 'r') as out: lines = out.readlines() if len(lines) != len(pairs_list): print "check " + tag + "svm_test_output.txt! something is wrong" print res sys.exit() for p, pid in enumerate(pairs): score = float(lines[p]) if float(score) < 0: pairs[pid].recognized_by[relations.SST_PRED] = -1 else: pairs[pid].recognized_by[relations.SST_PRED] = 1 logging.info("{} - {} SST: {}".format(pairs[pid].entities[0], pairs[pid].entities[0], score)) return pairs