我们从Python开源项目中,提取了以下3个代码示例,用于说明如何使用lxml.etree.strip_tags()。
def main(self): for infile in self.infiles: print(infile) tree = self.read_xml(infile) elements = tree.xpath('.//{}'.format(self.element)) for e in elements: if self.sentence: sentences = self.get_sentences(e) for s in sentences: if self.tokenize: tags = self.tagger.tag_text(html.unescape(s), notagdns=True, notagip=True, notagurl=True, notagemail=True) else: tags = self.tagger.tag_text(html.unescape(s), notagdns=True, notagip=True, notagurl=True, notagemail=True, tagonly=True) tags = self.escape(tags) xml = etree.SubElement(e, 's') for tag in tags: try: xml.append(etree.fromstring(tag)) except: dummy_token = etree.Element('dummy') dummy_token.text = '\n{}\n'.format(tag) xml.append(dummy_token) etree.strip_tags(xml, 'dummy') else: if self.tokenize: tags = self.tagger.tag_text(html.unescape(etree.tostring(e, encoding='utf-8').decode()), notagdns=True, notagip=True, notagurl=True, notagemail=True) else: tags = self.tagger.tag_text(html.unescape(etree.tostring(e, encoding='utf-8').decode()), notagdns=True, notagip=True, notagurl=True, notagemail=True, tagonly=True) tags = self.escape(tags) tags = '\n'.join(tags) xml = etree.fromstring(tags) e.getparent().replace(e, xml) self.serialize(infile, tree) self.counter += 1 pass
def stripTags(self, node, *tags): etree.strip_tags(node, *tags)
def stripTags(self, *tags): if not self.error: et.strip_tags(self.tree, tags)