33"""Utilities for working with HTML."""
44import bleach
55
6+ from html5lib .filters .base import Filter
7+
68import debug # pyflakes:ignore
79
810from django .utils .functional import keep_lazy
911from django .utils import six
1012
11- acceptable_elements = ('a' , 'abbr' , 'acronym' , 'address' , 'b' , 'big' ,
13+ acceptable_tags = ('a' , 'abbr' , 'acronym' , 'address' , 'b' , 'big' ,
1214 'blockquote' , 'br' , 'caption' , 'center' , 'cite' , 'code' , 'col' ,
1315 'colgroup' , 'dd' , 'del' , 'dfn' , 'dir' , 'div' , 'dl' , 'dt' , 'em' , 'font' ,
14- 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' , 'hr' , 'i' , 'img' , ' ins' , 'kbd' ,
16+ 'h1' , 'h2' , 'h3' , 'h4' , 'h5' , 'h6' , 'hr' , 'i' , 'ins' , 'kbd' ,
1517 'li' , 'ol' , 'p' , 'pre' , 'q' , 's' , 'samp' , 'small' , 'span' , 'strike' ,
1618 'strong' , 'sub' , 'sup' , 'table' , 'tbody' , 'td' , 'tfoot' , 'th' , 'thead' ,
1719 'tr' , 'tt' , 'u' , 'ul' , 'var' )
1820
21+ strip_completely = ['style' , 'script' , ]
22+
23+ class StripFilter (Filter ):
24+ def __iter__ (self ):
25+ open_tags = []
26+ for token in Filter .__iter__ (self ):
27+ if token ["type" ] in ["EmptyTag" , "StartTag" ]:
28+ open_tags .append (token ["name" ])
29+ if not set (strip_completely ) & set (open_tags ):
30+ yield token
31+ if token ["type" ] in ["EmptyTag" , "EndTag" ]:
32+ open_tags .pop ()
33+
34+ # Leave the stripping of the strip_completely tags to StripFilter
35+ bleach_tags = list (set (acceptable_tags ) | set (strip_completely ))
36+ cleaner = bleach .sanitizer .Cleaner (tags = bleach_tags , filters = [StripFilter ], strip = True )
37+
1938def unescape (text ):
2039 """
2140 Returns the given text with ampersands, quotes and angle brackets decoded
@@ -27,13 +46,10 @@ def unescape(text):
2746
2847def remove_tags (html , tags ):
2948 """Returns the given HTML sanitized, and with the given tags removed."""
30- allowed = set (acceptable_elements ) - set ([ t .lower () for t in tags ])
49+ allowed = set (acceptable_tags ) - set ([ t .lower () for t in tags ])
3150 return bleach .clean (html , tags = allowed )
3251remove_tags = keep_lazy (remove_tags , six .text_type )
3352
34- def sanitize_html (html , tags = acceptable_elements , extra = [], remove = [], strip = True ):
35- tags = list (set (tags ) | set (t .lower () for t in extra ) ^ set (t .lower for t in remove ))
36- return bleach .clean (html , tags = tags , strip = strip )
53+ def sanitize_html (html ):
54+ return cleaner .clean (html )
3755
38- def clean_html (html ):
39- return bleach .clean (html )
0 commit comments