我们从Python开源项目中,提取了以下50个代码示例,用于说明如何使用bleach.clean()。
def safe_text_for_markdown(text): """Clean the text using bleach but keep certain Markdown sections. Markdown code ie ` or ``` combos. For single `, do not allow line breaks between the tag. Quotes ie '> ' which bleach would clean up. """ code_blocks, text = code_blocks_add_markers(text) # Store quotes next text = re.sub(r"(^> )", "%%safe_quote_in_start%%", text) text = re.sub(r"(\n> )", "%%safe_quote_in_new_line%%", text, flags=re.DOTALL) # Nuke all html, scripts, etc text = bleach.clean(text or "") # Return quotes text = text.replace("%%safe_quote_in_start%%", "> ") text = text.replace("%%safe_quote_in_new_line%%", "\n> ") text = code_blocks_restore(code_blocks, text) return text
def test_stripping(): eq_('a test <em>with</em> <b>html</b> tags', bleach.clean('a test <em>with</em> <b>html</b> tags', strip=True)) eq_('a test <em>with</em> <b>html</b> tags', bleach.clean('a test <em>with</em> <img src="http://example.com/"> ' '<b>html</b> tags', strip=True)) s = '<p><a href="http://example.com/">link text</a></p>' eq_('<p>link text</p>', bleach.clean(s, tags=['p'], strip=True)) s = '<p><span>multiply <span>nested <span>text</span></span></span></p>' eq_('<p>multiply nested text</p>', bleach.clean(s, tags=['p'], strip=True)) s = ('<p><a href="http://example.com/"><img src="http://example.com/">' '</a></p>') eq_('<p><a href="http://example.com/"></a></p>', bleach.clean(s, tags=['p', 'a'], strip=True))
def render_markdown(data, auto_link=True, allow_html=False): ''' Returns the data as rendered markdown :param auto_link: Should ckan specific links be created e.g. `group:xxx` :type auto_link: bool :param allow_html: If True then html entities in the markdown data. This is dangerous if users have added malicious content. If False all html tags are removed. :type allow_html: bool ''' if not data: return '' if allow_html: data = markdown(data.strip()) else: data = RE_MD_HTML_TAGS.sub('', data.strip()) data = clean_html( markdown(data), strip=True, tags=MARKDOWN_TAGS, attributes=MARKDOWN_ATTRIBUTES) # tags can be added by tag:... or tag:"...." and a link will be made # from it if auto_link: data = html_auto_link(data) return literal(data)
def clean_html(text): """ Strip disallowed HTML tags and attributes from given input. Allowed tags and attributes can be defined in settings. :param text: HTML to sanitize from disallowed tags and attributes. :type text: str :return: Clean version of given HTML where disallowed tags and attributes have been removed from. :rtype: str """ return bleach.clean( text=text, tags=settings.TEXT_MEDIUMEDITOR_HTML_ALLOWED_TAGS, attributes=settings.TEXT_MEDIUMEDITOR_HTML_ALLOWED_ATTRIBUTES, )
def post(self, request): form = self.form_class(request.POST) if form.is_valid(): title = form.cleaned_data['title'] body = form.cleaned_data['body'] body_html = markdown.markdown(body) body_html = bleach.clean(body_html, tags=settings.ARTICLE_TAGS, strip=True) article = Post(title=title, body=body, user=request.user, body_html=body_html) article.save() vote_obj = VotePost(user=request.user, post=article, value=1) vote_obj.save() article.upvotes += 1 article.net_votes += 1 article.save() messages.success(request, 'Article has been submitted.') return redirect(reverse('ploghubapp:home_page') + '?sort_by=new') else: return render(request, self.template_name, {'form' : form})
def task_instance_link(v, c, m, p): dag_id = bleach.clean(m.dag_id) task_id = bleach.clean(m.task_id) url = url_for( 'airflow.task', dag_id=dag_id, task_id=task_id, execution_date=m.execution_date.isoformat()) url_root = url_for( 'airflow.graph', dag_id=dag_id, root=task_id, execution_date=m.execution_date.isoformat()) return Markup( """ <span style="white-space: nowrap;"> <a href="{url}">{task_id}</a> <a href="{url_root}" title="Filter on this task and upstream"> <span class="glyphicon glyphicon-filter" style="margin-left: 0px;" aria-hidden="true"></span> </a> </span> """.format(**locals()))
def safe_markdown(comment, extensions=[]): html = markdown.markdown(comment, extensions=extensions) return bleach.clean( text=html, tags=[ 'a', 'abbr', 'acronym', 'b', 'blockqote', 'code', 'em', 'i', 'li', 'ol', 'strong', 'ul', 'p', 'span', 'h1', 'h2', 'h3', 'pre', 'blockquote', 'table', 'thead', 'tr', 'th', 'td', 'tbody', 'dl', 'dt', 'sup', 'div', 'hr', ], attributes={ '*': ['class'], 'a': ['href', 'title', 'class', 'id'], 'acronym': ['title'], 'abbr': ['title'], 'sup': ['id'], 'li': ['id'] }, )
def safe_text(text): """Clean text, stripping all tags, attributes and styles.""" return bleach.clean(text or "", tags=[], attributes=[], styles=[], strip=True)
def htmlize(text): """ This helper method renders Markdown then uses Bleach to sanitize it as well as convert all links to actual links. """ text = bleach.clean(text, strip=True) # Clean the text by stripping bad HTML tags text = markdown(text) # Convert the markdown to HTML text = bleach.linkify(text) # Add links from the text and add nofollow to existing links return text # Compile regular expression functions for query normalization
def on_changed_about_me(target, value, oldvalue, initiaor): allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquate', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p'] target.about_me_html = bleach.linkify( bleach.clean(markdown(value, output_format='html'), tags=allowed_tags, strip=True))
def on_changed_summary(target, value, oldvalue, initiaor): allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquate', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p'] target.summary_html = bleach.linkify( bleach.clean(markdown(value, output_format='html'), tags=allowed_tags, strip=True))
def on_changed_catalog(target, value, oldvalue, initiaor): allowed_tags = ['a', 'abbr', 'acronym', 'b', 'blockquate', 'code', 'em', 'i', 'li', 'ol', 'pre', 'strong', 'ul', 'h1', 'h2', 'h3', 'p'] target.catalog_html = bleach.linkify( bleach.clean(markdown(value, output_format='html'), tags=allowed_tags, strip=True))
def md2html(content): cleaned_content = bleach.clean(content, []).replace('>', '>') unclean_html = markdown.markdown( cleaned_content, extensions=settings.MARKDOWN_EXTENSIONS) response = bleach.clean(unclean_html, SAFE_TAGS, SAFE_ATTRS) return response
def markdown(value): """ Translate markdown to a safe subset of HTML. """ cleaned = bleach.clean(markdown_library.markdown(value), tags=bleach.ALLOWED_TAGS + ['p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']) linkified = bleach.linkify(cleaned) return mark_safe(linkified)
def render(text): formatted_html = markdown( text, extensions=[ 'markdown.extensions.extra', 'markdown.extensions.codehilite', 'markdown.extensions.nl2br', 'markdown.extensions.sane_lists', 'markdown.extensions.toc', 'markdown.extensions.wikilinks' ], output_format='html5' ) # Sanitizing html with bleach to avoid code injection sanitized_html = bleach_clean( formatted_html, # Allowed tags, attributes and styles tags=[ 'p', 'div', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'tt', 'pre', 'em', 'strong', 'ul', 'li', 'dl', 'dd', 'dt', 'code', 'img', 'a', 'table', 'tr', 'th', 'td', 'tbody', 'caption', 'colgroup', 'thead', 'tfoot', 'blockquote', 'ol', 'hr', 'br', "sub", "sup" ], attributes={ '*': ['class', 'style', 'id'], 'a': ['href', 'target', 'rel'], 'img': ['src', 'alt'], 'tr': ['rowspan', 'colspan'], 'td': ['rowspan', 'colspan', 'align'] }, styles=[ 'color', 'background-color', 'font-family', 'font-weight', 'font-size', 'width', 'height', 'text-align', 'border', 'border-top', 'border-bottom', 'border-left', 'border-right', 'padding', 'padding-top', 'padding-bottom', 'padding-left', 'padding-right', 'margin', 'margin-top', 'margin-bottom', 'margin-left', 'margin-right' ] ) return mark_safe(sanitized_html)
def format(rawstr): return bleach.clean( markdown( rawstr, output_format='html5', lazy_ol=False ), tags=ALLOWED_TAGS )
def clean(self): #import pdb; pdb.set_trace() data = super(MembershipRequestForm, self).clean() type_of_membership = data["type_of_membership"] number_of_shares = data["number_of_shares"] if type_of_membership == "collective": if int(number_of_shares) < 2: msg = "Number of shares must be at least 2 for a collective." self.add_error('number_of_shares', msg)
def _clean_fields(self): super(MembershipRequestForm, self)._clean_fields() for name, value in self.cleaned_data.items(): self.cleaned_data[name] = bleach.clean(value)
def clean(self): #import pdb; pdb.set_trace() data = super(ProjectCreateForm, self).clean() url = data["url"] if not url[0:3] == "http": data["url"] = "http://" + url #if type_of_user == "collective": #if int(number_of_shares) < 2: # msg = "Number of shares must be at least 2 for a collective." # self.add_error('number_of_shares', msg)
def _clean_fields(self): super(ProjectCreateForm, self)._clean_fields() for name, value in self.cleaned_data.items(): self.cleaned_data[name] = bleach.clean(value)
def clean(self): #import pdb; pdb.set_trace() data = super(JoinRequestForm, self).clean() type_of_user = data["type_of_user"] #number_of_shares = data["number_of_shares"] #if type_of_user == "collective": #if int(number_of_shares) < 2: # msg = "Number of shares must be at least 2 for a collective." # self.add_error('number_of_shares', msg)
def clean(self): #import pdb; pdb.set_trace() data = super(JoinRequestInternalForm, self).clean() #type_of_user = data["type_of_user"] #number_of_shares = data["number_of_shares"] #if type_of_user == "collective": #if int(number_of_shares) < 2: # msg = "Number of shares must be at least 2 for a collective." # self.add_error('number_of_shares', msg)
def _clean_fields(self): super(JoinRequestInternalForm, self)._clean_fields() for name, value in self.cleaned_data.items(): self.cleaned_data[name] = bleach.clean(value)
def test_nested_script_tag(): eq_('<<script>script>evil()<</script>/script>', clean('<<script>script>evil()<</script>/script>')) eq_('<<x>script>evil()<</x>/script>', clean('<<x>script>evil()<</x>/script>'))
def test_nested_script_tag_r(): eq_('<script<script>>evil()</script<>>', clean('<script<script>>evil()</script</script>>'))
def test_invalid_attr(): IMG = ['img', ] IMG_ATTR = ['src'] eq_('<a href="test">test</a>', clean('<a onclick="evil" href="test">test</a>')) eq_('<img src="test">', clean('<img onclick="evil" src="test" />', tags=IMG, attributes=IMG_ATTR)) eq_('<img src="test">', clean('<img href="invalid" src="test" />', tags=IMG, attributes=IMG_ATTR))
def test_unquoted_attr(): eq_('<abbr title="mytitle">myabbr</abbr>', clean('<abbr title=mytitle>myabbr</abbr>'))
def test_unquoted_event_handler(): eq_('<a href="http://xx.com">xx.com</a>', clean('<a href="http://xx.com" onclick=foo()>xx.com</a>'))
def test_invalid_href_attr(): eq_('<a>xss</a>', clean('<a href="javascript:alert(\'XSS\')">xss</a>'))
def test_invalid_filter_attr(): IMG = ['img', ] IMG_ATTR = {'img': lambda n, v: n == 'src' and v == "http://example.com/"} eq_('<img src="http://example.com/">', clean('<img onclick="evil" src="http://example.com/" />', tags=IMG, attributes=IMG_ATTR)) eq_('<img>', clean('<img onclick="evil" src="http://badhost.com/" />', tags=IMG, attributes=IMG_ATTR))
def test_invalid_tag_char(): eq_('<script xss="" src="http://xx.com/xss.js"></script>', clean('<script/xss src="http://xx.com/xss.js"></script>')) eq_('<script src="http://xx.com/xss.js"></script>', clean('<script/src="http://xx.com/xss.js"></script>'))
def test_unclosed_tag(): eq_('<script src="http://xx.com/xss.js&lt;b">', clean('<script src=http://xx.com/xss.js<b>')) eq_('<script src="http://xx.com/xss.js" <b="">', clean('<script src="http://xx.com/xss.js"<b>')) eq_('<script src="http://xx.com/xss.js" <b="">', clean('<script src="http://xx.com/xss.js" <b>'))
def test_strip(): """Using strip=True shouldn't result in malicious content.""" s = '<scri<script>pt>alert(1)</scr</script>ipt>' eq_('pt>alert(1)ipt>', clean(s, strip=True)) s = '<scri<scri<script>pt>pt>alert(1)</script>' eq_('pt>pt>alert(1)', clean(s, strip=True))
def test_poster_attribute(): """Poster attributes should not allow javascript.""" tags = ['video'] attrs = {'video': ['poster']} test = '<video poster="javascript:alert(1)"></video>' expect = '<video></video>' eq_(expect, clean(test, tags=tags, attributes=attrs)) ok = '<video poster="/foo.png"></video>' eq_(ok, clean(ok, tags=tags, attributes=attrs))
def test_feed_protocol(): eq_('<a>foo</a>', clean('<a href="feed:file:///tmp/foo">foo</a>'))
def test_japanese_safe_simple(): eq_('???????????', clean('???????????')) eq_('???????????', linkify('???????????'))
def test_japanese_strip(): eq_('<em>???????????</em>', clean('<em>???????????</em>')) eq_('<span>???????????</span>', clean('<span>???????????</span>'))
def test_russian_simple(): eq_('????????', clean('????????')) eq_('????????', linkify('????????'))
def test_allowed_css(): tests = ( ('font-family: Arial; color: red; float: left; ' 'background-color: red;', 'color: red;', ['color']), ('border: 1px solid blue; color: red; float: left;', 'color: red;', ['color']), ('border: 1px solid blue; color: red; float: left;', 'color: red; float: left;', ['color', 'float']), ('color: red; float: left; padding: 1em;', 'color: red; float: left;', ['color', 'float']), ('color: red; float: left; padding: 1em;', 'color: red;', ['color']), ('cursor: -moz-grab;', 'cursor: -moz-grab;', ['cursor']), ('color: hsl(30,100%,50%);', 'color: hsl(30,100%,50%);', ['color']), ('color: rgba(255,0,0,0.4);', 'color: rgba(255,0,0,0.4);', ['color']), ("text-overflow: ',' ellipsis;", "text-overflow: ',' ellipsis;", ['text-overflow']), ('text-overflow: "," ellipsis;', 'text-overflow: "," ellipsis;', ['text-overflow']), ('font-family: "Arial";', 'font-family: "Arial";', ['font-family']), ) p_single = '<p style="{0!s}">bar</p>' p_double = "<p style='{0!s}'>bar</p>" def check(i, o, s): if '"' in i: eq_(p_double.format(o), clean(p_double.format(i), styles=s)) else: eq_(p_single.format(o), clean(p_single.format(i), styles=s)) for i, o, s in tests: yield check, i, o, s
def test_valid_css(): """The sanitizer should fix missing CSS values.""" styles = ['color', 'float'] eq_('<p style="float: left;">foo</p>', clean('<p style="float: left; color: ">foo</p>', styles=styles)) eq_('<p style="">foo</p>', clean('<p style="color: float: left;">foo</p>', styles=styles))
def test_style_hang(): """The sanitizer should not hang on any inline styles""" # TODO: Neaten this up. It's copypasta from MDN/Kuma to repro the bug style = ("""margin-top: 0px; margin-right: 0px; margin-bottom: 1.286em; """ """margin-left: 0px; padding-top: 15px; padding-right: 15px; """ """padding-bottom: 15px; padding-left: 15px; border-top-width: """ """1px; border-right-width: 1px; border-bottom-width: 1px; """ """border-left-width: 1px; border-top-style: dotted; """ """border-right-style: dotted; border-bottom-style: dotted; """ """border-left-style: dotted; border-top-color: rgb(203, 200, """ """185); border-right-color: rgb(203, 200, 185); """ """border-bottom-color: rgb(203, 200, 185); border-left-color: """ """rgb(203, 200, 185); background-image: initial; """ """background-attachment: initial; background-origin: initial; """ """background-clip: initial; background-color: """ """rgb(246, 246, 242); overflow-x: auto; overflow-y: auto; """ """font: normal normal normal 100%/normal 'Courier New', """ """'Andale Mono', monospace; background-position: initial """ """initial; background-repeat: initial initial;""") html = '<p style="{0!s}">Hello world</p>'.format(style) styles = [ 'border', 'float', 'overflow', 'min-height', 'vertical-align', 'white-space', 'margin', 'margin-left', 'margin-top', 'margin-bottom', 'margin-right', 'padding', 'padding-left', 'padding-top', 'padding-bottom', 'padding-right', 'background', 'background-color', 'font', 'font-size', 'font-weight', 'text-align', 'text-transform', ] expected = ("""<p style="margin-top: 0px; margin-right: 0px; """ """margin-bottom: 1.286em; margin-left: 0px; padding-top: """ """15px; padding-right: 15px; padding-bottom: 15px; """ """padding-left: 15px; background-color: """ """rgb(246, 246, 242); font: normal normal normal """ """100%/normal 'Courier New', 'Andale Mono', monospace;">""" """Hello world</p>""") result = clean(html, styles=styles) eq_(expected, result)
def test_empty(): eq_('', bleach.clean(''))
def test_nbsp(): if six.PY3: expected = '\xa0test string\xa0' else: expected = six.u('\\xa0test string\\xa0') eq_(expected, bleach.clean(' test string '))
def test_with_comments(): html = '<!-- comment -->Just text' eq_('Just text', bleach.clean(html)) eq_(html, bleach.clean(html, strip_comments=False))
def test_no_html(): eq_('no html string', bleach.clean('no html string'))
def test_allowed_html(): eq_('an <strong>allowed</strong> tag', bleach.clean('an <strong>allowed</strong> tag')) eq_('another <em>good</em> tag', bleach.clean('another <em>good</em> tag'))
def test_bad_html(): eq_('a <em>fixed tag</em>', bleach.clean('a <em>fixed tag'))