Python html5lib 模块,getTreeWalker() 实例源码

我们从Python开源项目中,提取了以下5个代码示例,用于说明如何使用html5lib.getTreeWalker()

项目:Tarnished-Tale    作者:ZAdamMac    | 项目源码 | 文件源码
def taskTx(sock, message, mtype):  # a poor implementation of an output coroutine.
    global revertProtocol
    tp = html5lib.getTreeBuilder("dom")
    p = html5lib.HTMLParser(tree=tp)
    tw = html5lib.getTreeWalker("dom")
    parsedTX = p.parseFragment(message)
    cleanTX = sanitizer.Filter(tw(parsedTX))
    s = html5lib.serializer.HTMLSerializer()
    pretx = s.serialize(cleanTX)
    tx = ''
    for item in pretx:
        tx += item
    if message == b"200":
        await sock.send("Goodbye.")
        await sock.close()
        return
    if message == b"202":
        await sock.send("Authentication Successful, you are now the admin terminal.")
    else:
        if revertProtocol:
            await sock.send(tx)
            return
        else:
            await sock.send(json.dumps({"MSG_TYPE":mtype, "MSG":tx}))
            return
项目:v2ex-tornado-2    作者:coderyy    | 项目源码 | 文件源码
def setUp(self):
            self.parser = etree.XMLParser(resolve_entities=False)
            self.treewalker = html5lib.getTreeWalker("lxml")
            self.serializer = serializer.HTMLSerializer()
项目:python-magery    作者:caolan    | 项目源码 | 文件源码
def write_node(node, out):
    walker = html5lib.getTreeWalker("dom")
    stream = walker(node)
    s = html5lib.serializer.HTMLSerializer(
        quote_attr_values='always',
        minimize_boolean_attributes=False,
        use_best_quote_char=True,
        omit_optional_tags=False
    )
    for txt in s.serialize(stream):
        out.write(txt)
项目:appengine_multiblog    作者:tstillwell    | 项目源码 | 文件源码
def __init__(self, callbacks=DEFAULT_CALLBACKS, skip_tags=None, parse_email=False,
                 url_re=URL_RE, email_re=EMAIL_RE):
        """Creates a Linker instance

        :arg list callbacks: list of callbacks to run when adjusting tag attributes;
            defaults to ``bleach.linkifier.DEFAULT_CALLBACKS``

        :arg list skip_tags: list of tags that you don't want to linkify the
            contents of; for example, you could set this to ``['pre']`` to skip
            linkifying contents of ``pre`` tags

        :arg bool parse_email: whether or not to linkify email addresses

        :arg re url_re: url matching regex

        :arg re email_re: email matching regex

        :returns: linkified text as unicode

        """
        self.callbacks = callbacks
        self.skip_tags = skip_tags
        self.parse_email = parse_email
        self.url_re = url_re
        self.email_re = email_re

        self.parser = html5lib.HTMLParser(namespaceHTMLElements=False)
        self.walker = html5lib.getTreeWalker('etree')
        self.serializer = HTMLSerializer(
            quote_attr_values='always',
            omit_optional_tags=False,

            # linkify does not sanitize
            sanitize=False,

            # linkify alphabetizes
            alphabetical_attributes=False,
        )
项目:appengine_multiblog    作者:tstillwell    | 项目源码 | 文件源码
def __init__(self, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES,
                 styles=ALLOWED_STYLES, protocols=ALLOWED_PROTOCOLS, strip=False,
                 strip_comments=True, filters=None):
        """Initializes a Cleaner

        :arg list tags: allowed list of tags; defaults to
            ``bleach.sanitizer.ALLOWED_TAGS``

        :arg dict attributes: allowed attributes; can be a callable, list or dict;
            defaults to ``bleach.sanitizer.ALLOWED_ATTRIBUTES``

        :arg list styles: allowed list of css styles; defaults to
            ``bleach.sanitizer.ALLOWED_STYLES``

        :arg list protocols: allowed list of protocols for links; defaults
            to ``bleach.sanitizer.ALLOWED_PROTOCOLS``

        :arg bool strip: whether or not to strip disallowed elements

        :arg bool strip_comments: whether or not to strip HTML comments

        :arg list filters: list of html5lib Filter classes to pass streamed content through

            .. seealso:: http://html5lib.readthedocs.io/en/latest/movingparts.html#filters

            .. Warning::

               Using filters changes the output of ``bleach.Cleaner.clean``.
               Make sure the way the filters change the output are secure.

        """
        self.tags = tags
        self.attributes = attributes
        self.styles = styles
        self.protocols = protocols
        self.strip = strip
        self.strip_comments = strip_comments
        self.filters = filters or []

        self.parser = BleachHTMLParser(namespaceHTMLElements=False)
        self.walker = html5lib.getTreeWalker('etree')
        self.serializer = BleachHTMLSerializer(
            quote_attr_values='always',
            omit_optional_tags=False,
            escape_lt_in_attrs=True,

            # We want to leave entities as they are without escaping or
            # resolving or expanding
            resolve_entities=False,

            # Bleach has its own sanitizer, so don't use the html5lib one
            sanitize=False,

            # Bleach sanitizer alphabetizes already, so don't use the html5lib one
            alphabetical_attributes=False,
        )