我们从Python开源项目中,提取了以下47个代码示例,用于说明如何使用bleach.linkify()。
def process_text_links(text): """Process links in text, adding some attributes and linkifying textual links.""" link_callbacks = [callbacks.nofollow, callbacks.target_blank] def link_attributes(attrs, new=False): """Run standard callbacks except for internal links.""" href_key = (None, "href") if attrs.get(href_key).startswith("/"): return attrs # Run the standard callbacks for callback in link_callbacks: attrs = callback(attrs, new) return attrs return bleach.linkify( text, callbacks=[link_attributes], parse_email=False, skip_tags=["code"], )
def find_urls_in_text(text): """Find url's from text. Bleach does the heavy lifting here by identifying the links. :param text: Text to search links from :returns: set of urls """ urls = [] def link_collector(attrs, new=False): href_key = (None, "href") urls.append(attrs.get(href_key)) return None bleach.linkify(text, callbacks=[link_collector], parse_email=False, skip_tags=["code"]) return set(urls)
def preview_body(target, value, oldvalue, initiator): allowed_tags = [ 'a', 'abbr', 'acronym', 'b', 'img', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p' ] target.body_html = bleach.linkify(bleach.clean( markdown(value, output_format='html'), tags=allowed_tags, strip=True, attributes={ '*': ['class'], 'a': ['href', 'rel'], 'img': ['src', 'alt'], # ??????? } )) # ??????JSON????????
def test_email_link_escaping(): tests = ( ('''<a href='mailto:"james"@example.com'>''' '''"james"@example.com</a>''', '"james"@example.com'), ('''<a href="mailto:"j'ames"@example.com">''' '''"j'ames"@example.com</a>''', '"j\'ames"@example.com'), ('''<a href='mailto:"ja>mes"@example.com'>''' '''"ja>mes"@example.com</a>''', '"ja>mes"@example.com'), ) def _check(o, i): eq_(o, linkify(i, parse_email=True)) for (o, i) in tests: yield _check, o, i
def test_link_query(): in_(('<a href="http://xx.com/?test=win" rel="nofollow">' 'http://xx.com/?test=win</a>', '<a rel="nofollow" href="http://xx.com/?test=win">' 'http://xx.com/?test=win</a>'), linkify('http://xx.com/?test=win')) in_(('<a href="http://xx.com/?test=win" rel="nofollow">' 'xx.com/?test=win</a>', '<a rel="nofollow" href="http://xx.com/?test=win">' 'xx.com/?test=win</a>'), linkify('xx.com/?test=win')) in_(('<a href="http://xx.com?test=win" rel="nofollow">' 'xx.com?test=win</a>', '<a rel="nofollow" href="http://xx.com?test=win">' 'xx.com?test=win</a>'), linkify('xx.com?test=win'))
def test_end_of_sentence(): """example.com. should match.""" outs = ('<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}', '<a rel="nofollow" href="http://{0!s}">{0!s}</a>{1!s}') intxt = '{0!s}{1!s}' def check(u, p): in_([out.format(u, p) for out in outs], linkify(intxt.format(u, p))) tests = ( ('example.com', '.'), ('example.com', '...'), ('ex.com/foo', '.'), ('ex.com/foo', '....'), ) for u, p in tests: yield check, u, p
def test_url_utf8(): """Allow UTF8 characters in URLs themselves.""" outs = ('<a href="{0!s}" rel="nofollow">{0!s}</a>', '<a rel="nofollow" href="{0!s}">{0!s}</a>') out = lambda url: [x.format(url) for x in outs] tests = ( ('http://éxámplé.com/', out('http://éxámplé.com/')), ('http://éxámplé.com/íàñá/', out('http://éxámplé.com/íàñá/')), ('http://éxámplé.com/íàñá/?foo=bar', out('http://éxámplé.com/íàñá/?foo=bar')), ('http://éxámplé.com/íàñá/?fóo=bár', out('http://éxámplé.com/íàñá/?fóo=bár')), ) def check(test, expected_output): in_(expected_output, linkify(test)) for test, expected_output in tests: yield check, test, expected_output
def test_end_of_sentence(): """example.com. should match.""" out = '<a href="http://{0!s}" rel="nofollow">{0!s}</a>{1!s}' intxt = '{0!s}{1!s}' def check(u, p): eq_(out.format(u, p), linkify(intxt.format(u, p))) tests = ( ('example.com', '.'), ('example.com', '...'), ('ex.com/foo', '.'), ('ex.com/foo', '....'), ) for u, p in tests: yield check, u, p
def test_ports(): """URLs can contain port numbers.""" tests = ( ('http://foo.com:8000', ('http://foo.com:8000', '')), ('http://foo.com:8000/', ('http://foo.com:8000/', '')), ('http://bar.com:xkcd', ('http://bar.com', ':xkcd')), ('http://foo.com:81/bar', ('http://foo.com:81/bar', '')), ('http://foo.com:', ('http://foo.com', ':')), ) def check(test, output): out = '<a href="{0}" rel="nofollow">{0}</a>{1}' eq_(out.format(*output), linkify(test)) for test, output in tests: yield check, test, output
def htmlize(text): """ This helper method renders Markdown then uses Bleach to sanitize it as well as convert all links to actual links. """ text = bleach.clean(text, strip=True) # Clean the text by stripping bad HTML tags text = markdown(text) # Convert the markdown to HTML text = bleach.linkify(text) # Add links from the text and add nofollow to existing links return text # Compile regular expression functions for query normalization
def on_changed_about_me(target, value, oldvalue, initiaor): allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquate', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p'] target.about_me_html = bleach.linkify( bleach.clean(markdown(value, output_format='html'), tags=allowed_tags, strip=True))
def on_changed_summary(target, value, oldvalue, initiaor): allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquate', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p'] target.summary_html = bleach.linkify( bleach.clean(markdown(value, output_format='html'), tags=allowed_tags, strip=True))
def on_changed_catalog(target, value, oldvalue, initiaor): allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquate', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p'] target.catalog_html = bleach.linkify( bleach.clean(markdown(value, output_format='html'), tags=allowed_tags, strip=True))
def on_changed_body(target, value, oldvalue, initiator): allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p'] target.body_html = bleach.linkify(bleach.clean( markdown(value, output_format='html'), tags=allowed_tags, strip=True))
def on_changed_body(target, value, oldvalue, initiator): allowed_tags = ['a', 'abbr', 'acronym', 'b', 'code', 'em', 'i', 'strong'] target.body_html = bleach.linkify(bleach.clean( markdown(value, output_format='html'), tags=allowed_tags, strip=True))
def render_markdown(self): """Render markdown source to HTML with a tag whitelist.""" allowed_tags = ['a', 'abbr', 'acronym', 'b', 'code', 'em', 'i', 'strong'] self.html = bleach.linkify(bleach.clean( markdown(self.source, output_format='html'), tags=allowed_tags, strip=True))
def markdown(value): """ Translate markdown to a safe subset of HTML. """ cleaned = bleach.clean(markdown_library.markdown(value), tags=bleach.ALLOWED_TAGS + ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']) linkified = bleach.linkify(cleaned) return mark_safe(linkified)
def on_changed_body(target, value, oldvalue, initiator): allowed_tags = [ 'a', 'abbr', 'acronym', 'b', 'code', 'em', 'img', 'i', 'strong' ] target.body_html = bleach.linkify(bleach.clean( markdown(value, output_format='html'), tags=allowed_tags, strip=True ))
def test_empty(): eq_('', linkify(''))
def test_simple_link(): in_(('a <a href="http://example.com" rel="nofollow">http://example.com' '</a> link', 'a <a rel="nofollow" href="http://example.com">http://example.com' '</a> link'), linkify('a http://example.com link')) in_(('a <a href="https://example.com" rel="nofollow">https://example.com' '</a> link', 'a <a rel="nofollow" href="https://example.com">https://example.com' '</a> link'), linkify('a https://example.com link')) in_(('a <a href="http://example.com" rel="nofollow">example.com</a> link', 'a <a rel="nofollow" href="http://example.com">example.com</a> link'), linkify('a example.com link'))
def test_trailing_slash(): in_(('<a href="http://examp.com/" rel="nofollow">http://examp.com/</a>', '<a rel="nofollow" href="http://examp.com/">http://examp.com/</a>'), linkify('http://examp.com/')) in_(('<a href="http://example.com/foo/" rel="nofollow">' 'http://example.com/foo/</a>', '<a rel="nofollow" href="http://example.com/foo/">' 'http://example.com/foo/</a>'), linkify('http://example.com/foo/')) in_(('<a href="http://example.com/foo/bar/" rel="nofollow">' 'http://example.com/foo/bar/</a>', '<a rel="nofollow" href="http://example.com/foo/bar/">' 'http://example.com/foo/bar/</a>'), linkify('http://example.com/foo/bar/'))
def test_mangle_text(): """We can muck with the inner text of a link.""" def ft(attrs, new=False): attrs['_text'] = 'bar' return attrs eq_('<a href="http://ex.mp">bar</a> <a href="http://ex.mp/foo">bar</a>', linkify('http://ex.mp <a href="http://ex.mp/foo">foo</a>', [ft]))
def test_prevent_links(): """Returning None from any callback should remove links or prevent them from being created.""" def no_new_links(attrs, new=False): if new: return None return attrs def no_old_links(attrs, new=False): if not new: return None return attrs def noop(attrs, new=False): return attrs in_text = 'a ex.mp <a href="http://example.com">example</a>' out_text = 'a <a href="http://ex.mp">ex.mp</a> example' tests = ( ([noop], ('a <a href="http://ex.mp">ex.mp</a> ' '<a href="http://example.com">example</a>'), 'noop'), ([no_new_links, noop], in_text, 'no new, noop'), ([noop, no_new_links], in_text, 'noop, no new'), ([no_old_links, noop], out_text, 'no old, noop'), ([noop, no_old_links], out_text, 'noop, no old'), ([no_old_links, no_new_links], 'a ex.mp example', 'no links'), ) def _check(cb, o, msg): eq_(o, linkify(in_text, cb), msg) for (cb, o, msg) in tests: yield _check, cb, o, msg
def test_set_attrs(): """We can set random attributes on links.""" def set_attr(attrs, new=False): attrs['rev'] = 'canonical' return attrs in_(('<a href="http://ex.mp" rev="canonical">ex.mp</a>', '<a rev="canonical" href="http://ex.mp">ex.mp</a>'), linkify('ex.mp', [set_attr]))
def test_tlds(): in_(('<a href="http://example.com" rel="nofollow">example.com</a>', '<a rel="nofollow" href="http://example.com">example.com</a>'), linkify('example.com')) in_(('<a href="http://example.co.uk" rel="nofollow">example.co.uk</a>', '<a rel="nofollow" href="http://example.co.uk">example.co.uk</a>'), linkify('example.co.uk')) in_(('<a href="http://example.edu" rel="nofollow">example.edu</a>', '<a rel="nofollow" href="http://example.edu">example.edu</a>'), linkify('example.edu')) eq_('example.xxx', linkify('example.xxx')) eq_(' brie', linkify(' brie')) in_(('<a href="http://bit.ly/fun" rel="nofollow">bit.ly/fun</a>', '<a rel="nofollow" href="http://bit.ly/fun">bit.ly/fun</a>'), linkify('bit.ly/fun'))
def test_escaping(): eq_('< unrelated', linkify('< unrelated'))
def test_nofollow_off(): eq_('<a href="http://example.com">example.com</a>', linkify('example.com', []))
def test_link_in_html(): in_(('<i><a href="http://yy.com" rel="nofollow">http://yy.com</a></i>', '<i><a rel="nofollow" href="http://yy.com">http://yy.com</a></i>'), linkify('<i>http://yy.com</i>')) in_(('<em><strong><a href="http://xx.com" rel="nofollow">http://xx.com' '</a></strong></em>', '<em><strong><a rel="nofollow" href="http://xx.com">http://xx.com' '</a></strong></em>'), linkify('<em><strong>http://xx.com</strong></em>'))
def test_add_rel_nofollow(): """Verify that rel="nofollow" is added to an existing link""" in_(('<a href="http://yy.com" rel="nofollow">http://yy.com</a>', '<a rel="nofollow" href="http://yy.com">http://yy.com</a>'), linkify('<a href="http://yy.com">http://yy.com</a>'))
def test_url_with_path(): in_(('<a href="http://example.com/path/to/file" rel="nofollow">' 'http://example.com/path/to/file</a>', '<a rel="nofollow" href="http://example.com/path/to/file">' 'http://example.com/path/to/file</a>'), linkify('http://example.com/path/to/file'))
def test_link_ftp(): in_(('<a href="ftp://ftp.mozilla.org/some/file" rel="nofollow">' 'ftp://ftp.mozilla.org/some/file</a>', '<a rel="nofollow" href="ftp://ftp.mozilla.org/some/file">' 'ftp://ftp.mozilla.org/some/file</a>'), linkify('ftp://ftp.mozilla.org/some/file'))
def test_link_fragment(): in_(('<a href="http://xx.com/path#frag" rel="nofollow">' 'http://xx.com/path#frag</a>', '<a rel="nofollow" href="http://xx.com/path#frag">' 'http://xx.com/path#frag</a>'), linkify('http://xx.com/path#frag'))
def test_escaped_html(): """If I pass in escaped HTML, it should probably come out escaped.""" s = '<em>strong</em>' eq_(s, linkify(s))
def test_non_url(): """document.vulnerable should absolutely not be linkified.""" s = 'document.vulnerable' eq_(s, linkify(s))
def test_javascript_url(): """javascript: urls should never be linkified.""" s = 'javascript:document.vulnerable' eq_(s, linkify(s))
def test_unsafe_url(): """Any unsafe char ({}[]<>, etc.) in the path should end URL scanning.""" in_(('All your{"<a href="http://xx.yy.com/grover.png" ' 'rel="nofollow">xx.yy.com/grover.png</a>"}base are', 'All your{"<a rel="nofollow" href="http://xx.yy.com/grover.png"' '>xx.yy.com/grover.png</a>"}base are'), linkify('All your{"xx.yy.com/grover.png"}base are'))
def test_libgl(): """libgl.so.1 should not be linkified.""" eq_('libgl.so.1', linkify('libgl.so.1'))
def test_end_of_clause(): """example.com/foo, shouldn't include the ,""" in_(('<a href="http://ex.com/foo" rel="nofollow">ex.com/foo</a>, bar', '<a rel="nofollow" href="http://ex.com/foo">ex.com/foo</a>, bar'), linkify('ex.com/foo, bar'))