我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用html5lib.getTreeWalker()。
def taskTx(sock, message, mtype): # a poor implementation of an output coroutine. global revertProtocol tp = html5lib.getTreeBuilder("dom") p = html5lib.HTMLParser(tree=tp) tw = html5lib.getTreeWalker("dom") parsedTX = p.parseFragment(message) cleanTX = sanitizer.Filter(tw(parsedTX)) s = html5lib.serializer.HTMLSerializer() pretx = s.serialize(cleanTX) tx = '' for item in pretx: tx += item if message == b"200": await sock.send("Goodbye.") await sock.close() return if message == b"202": await sock.send("Authentication Successful, you are now the admin terminal.") else: if revertProtocol: await sock.send(tx) return else: await sock.send(json.dumps({"MSG_TYPE":mtype, "MSG":tx})) return
def setUp(self): self.parser = etree.XMLParser(resolve_entities=False) self.treewalker = html5lib.getTreeWalker("lxml") self.serializer = serializer.HTMLSerializer()
def write_node(node, out): walker = html5lib.getTreeWalker("dom") stream = walker(node) s = html5lib.serializer.HTMLSerializer( quote_attr_values='always', minimize_boolean_attributes=False, use_best_quote_char=True, omit_optional_tags=False ) for txt in s.serialize(stream): out.write(txt)
def __init__(self, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False, url_re=URL_RE, email_re=EMAIL_RE): """Creates a Linker instance :arg list callbacks: list of callbacks to run when adjusting tag attributes; defaults to ``bleach.linkifier.DEFAULT_CALLBACKS`` :arg list skip_tags: list of tags that you don't want to linkify the contents of; for example, you could set this to ``['pre']`` to skip linkifying contents of ``pre`` tags :arg bool parse_email: whether or not to linkify email addresses :arg re url_re: url matching regex :arg re email_re: email matching regex :returns: linkified text as unicode """ self.callbacks = callbacks self.skip_tags = skip_tags self.parse_email = parse_email self.url_re = url_re self.email_re = email_re self.parser = html5lib.HTMLParser(namespaceHTMLElements=False) self.walker = html5lib.getTreeWalker('etree') self.serializer = HTMLSerializer( quote_attr_values='always', omit_optional_tags=False, # linkify does not sanitize sanitize=False, # linkify alphabetizes alphabetical_attributes=False, )
def __init__(self, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, styles=ALLOWED_STYLES, protocols=ALLOWED_PROTOCOLS, strip=False, strip_comments=True, filters=None): """Initializes a Cleaner :arg list tags: allowed list of tags; defaults to ``bleach.sanitizer.ALLOWED_TAGS`` :arg dict attributes: allowed attributes; can be a callable, list or dict; defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES`` :arg list styles: allowed list of css styles; defaults to ``bleach.sanitizer.ALLOWED_STYLES`` :arg list protocols: allowed list of protocols for links; defaults to ``bleach.sanitizer.ALLOWED_PROTOCOLS`` :arg bool strip: whether or not to strip disallowed elements :arg bool strip_comments: whether or not to strip HTML comments :arg list filters: list of html5lib Filter classes to pass streamed content through .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters .. Warning:: Using filters changes the output of ``bleach.Cleaner.clean``. Make sure the way the filters change the output are secure. """ self.tags = tags self.attributes = attributes self.styles = styles self.protocols = protocols self.strip = strip self.strip_comments = strip_comments self.filters = filters or [] self.parser = BleachHTMLParser(namespaceHTMLElements=False) self.walker = html5lib.getTreeWalker('etree') self.serializer = BleachHTMLSerializer( quote_attr_values='always', omit_optional_tags=False, escape_lt_in_attrs=True, # We want to leave entities as they are without escaping or # resolving or expanding resolve_entities=False, # Bleach has its own sanitizer, so don't use the html5lib one sanitize=False, # Bleach sanitizer alphabetizes already, so don't use the html5lib one alphabetical_attributes=False, )