Changeset 1174
- Timestamp:
- Sep 3, 2011, 12:01:42 AM (12 years ago)
- Location:
- trunk/genshi/filters
- Files:
-
- 2 edited
-
html.py (modified) (7 diffs)
-
tests/test_html.py (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/genshi/filters/html.py
r1158 r1174 231 231 :warn: Note that this special processing of CSS is currently only applied to 232 232 style attributes, **not** style elements. 233 " ""233 " """ 234 234 235 235 SAFE_TAGS = frozenset(['a', 'abbr', 'acronym', 'address', 'area', 'b', … … 255 255 'type', 'usemap', 'valign', 'value', 'vspace', 'width']) 256 256 257 SAFE_CSS = frozenset([ 258 # CSS 3 properties <http://www.w3.org/TR/CSS/#properties> 259 'background', 'background-attachment', 'background-color', 260 'background-image', 'background-position', 'background-repeat', 261 'border', 'border-bottom', 'border-bottom-color', 262 'border-bottom-style', 'border-bottom-width', 'border-collapse', 263 'border-color', 'border-left', 'border-left-color', 264 'border-left-style', 'border-left-width', 'border-right', 265 'border-right-color', 'border-right-style', 'border-right-width', 266 'border-spacing', 'border-style', 'border-top', 'border-top-color', 267 'border-top-style', 'border-top-width', 'border-width', 'bottom', 268 'caption-side', 'clear', 'clip', 'color', 'content', 269 'counter-increment', 'counter-reset', 'cursor', 'direction', 'display', 270 'empty-cells', 'float', 'font', 'font-family', 'font-size', 271 'font-style', 'font-variant', 'font-weight', 'height', 'left', 272 'letter-spacing', 'line-height', 'list-style', 'list-style-image', 273 'list-style-position', 'list-style-type', 'margin', 'margin-bottom', 274 'margin-left', 'margin-right', 'margin-top', 'max-height', 'max-width', 275 'min-height', 'min-width', 'opacity', 'orphans', 'outline', 276 'outline-color', 'outline-style', 'outline-width', 'overflow', 277 'padding', 'padding-bottom', 'padding-left', 'padding-right', 278 'padding-top', 'page-break-after', 'page-break-before', 279 'page-break-inside', 'quotes', 'right', 'table-layout', 280 'text-align', 'text-decoration', 'text-indent', 'text-transform', 281 'top', 'unicode-bidi', 'vertical-align', 'visibility', 'white-space', 282 'widows', 'width', 'word-spacing', 'z-index', 283 ]) 284 257 285 SAFE_SCHEMES = frozenset(['file', 'ftp', 'http', 'https', 'mailto', None]) 258 286 … … 261 289 262 290 def __init__(self, safe_tags=SAFE_TAGS, safe_attrs=SAFE_ATTRS, 263 safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS): 291 safe_schemes=SAFE_SCHEMES, uri_attrs=URI_ATTRS, 292 safe_css=SAFE_CSS): 264 293 """Create the sanitizer. 265 294 … … 272 301 """ 273 302 self.safe_tags = safe_tags 274 "The set of tag names that are considered safe."303 # The set of tag names that are considered safe. 275 304 self.safe_attrs = safe_attrs 276 "The set of attribute names that are considered safe." 305 # The set of attribute names that are considered safe. 306 self.safe_css = safe_css 307 # The set of CSS properties that are considered safe. 277 308 self.uri_attrs = uri_attrs 278 "The set of names of attributes that may contain URIs."309 # The set of names of attributes that may contain URIs. 279 310 self.safe_schemes = safe_schemes 280 "The set of URI schemes that are considered safe." 311 # The set of URI schemes that are considered safe. 312 313 # IE6 <http://heideri.ch/jso/#80> 314 _EXPRESSION_SEARCH = re.compile(u""" 315 [eE 316 \uFF25 # FULLWIDTH LATIN CAPITAL LETTER E 317 \uFF45 # FULLWIDTH LATIN SMALL LETTER E 318 ] 319 [xX 320 \uFF38 # FULLWIDTH LATIN CAPITAL LETTER X 321 \uFF58 # FULLWIDTH LATIN SMALL LETTER X 322 ] 323 [pP 324 \uFF30 # FULLWIDTH LATIN CAPITAL LETTER P 325 \uFF50 # FULLWIDTH LATIN SMALL LETTER P 326 ] 327 [rR 328 \u0280 # LATIN LETTER SMALL CAPITAL R 329 \uFF32 # FULLWIDTH LATIN CAPITAL LETTER R 330 \uFF52 # FULLWIDTH LATIN SMALL LETTER R 331 ] 332 [eE 333 \uFF25 # FULLWIDTH LATIN CAPITAL LETTER E 334 \uFF45 # FULLWIDTH LATIN SMALL LETTER E 335 ] 336 [sS 337 \uFF33 # FULLWIDTH LATIN CAPITAL LETTER S 338 \uFF53 # FULLWIDTH LATIN SMALL LETTER S 339 ]{2} 340 [iI 341 \u026A # LATIN LETTER SMALL CAPITAL I 342 \uFF29 # FULLWIDTH LATIN CAPITAL LETTER I 343 \uFF49 # FULLWIDTH LATIN SMALL LETTER I 344 ] 345 [oO 346 \uFF2F # FULLWIDTH LATIN CAPITAL LETTER O 347 \uFF4F # FULLWIDTH LATIN SMALL LETTER O 348 ] 349 [nN 350 \u0274 # LATIN LETTER SMALL CAPITAL N 351 \uFF2E # FULLWIDTH LATIN CAPITAL LETTER N 352 \uFF4E # FULLWIDTH LATIN SMALL LETTER N 353 ] 354 """, re.VERBOSE).search 355 356 # IE6 <http://openmya.hacker.jp/hasegawa/security/expression.txt> 357 # 7) Particular bit of Unicode characters 358 _URL_FINDITER = re.compile( 359 u'[Uu][Rr\u0280][Ll\u029F]\s*\(([^)]+)').finditer 281 360 282 361 def __call__(self, stream): … … 337 416 :since: version 0.6 338 417 """ 339 if propname == 'position':418 if propname not in self.safe_css: 340 419 return False 341 420 if propname.startswith('margin') and '-' in value: … … 431 510 continue 432 511 is_evil = False 433 if 'expression' in value:512 if self._EXPRESSION_SEARCH(value): 434 513 is_evil = True 435 for match in re.finditer(r'url\s*\(([^)]+)',value):514 for match in self._URL_FINDITER(value): 436 515 if not self.is_safe_uri(match.group(1)): 437 516 is_evil = True … … 442 521 443 522 _NORMALIZE_NEWLINES = re.compile(r'\r\n').sub 444 _UNICODE_ESCAPE = re.compile(r'\\([0-9a-fA-F]{1,6})\s?').sub 523 _UNICODE_ESCAPE = re.compile( 524 r"""\\([0-9a-fA-F]{1,6})\s?|\\([^\r\n\f0-9a-fA-F'"{};:()#*])""", 525 re.UNICODE).sub 445 526 446 527 def _replace_unicode_escapes(self, text): 447 528 def _repl(match): 448 return unichr(int(match.group(1), 16)) 529 t = match.group(1) 530 if t: 531 return unichr(int(t, 16)) 532 t = match.group(2) 533 if t == '\\': 534 return r'\\' 535 else: 536 return t 449 537 return self._UNICODE_ESCAPE(_repl, self._NORMALIZE_NEWLINES('\n', text)) 450 538 -
trunk/genshi/filters/tests/test_html.py
r1158 r1174 362 362 363 363 364 def StyleSanitizer(): 365 safe_attrs = HTMLSanitizer.SAFE_ATTRS | frozenset(['style']) 366 return HTMLSanitizer(safe_attrs=safe_attrs) 367 368 364 369 class HTMLSanitizerTestCase(unittest.TestCase): 365 370 … … 421 426 422 427 def test_sanitize_remove_style_scripts(self): 423 sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))428 sanitizer = StyleSanitizer() 424 429 # Inline style with url() using javascript: scheme 425 430 html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>') … … 454 459 455 460 def test_sanitize_remove_style_phishing(self): 456 sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))461 sanitizer = StyleSanitizer() 457 462 # The position property is not allowed 458 463 html = HTML(u'<div style="position:absolute;top:0"></div>') … … 501 506 self.assertEquals('<img/>', (html | HTMLSanitizer()).render()) 502 507 508 def test_sanitize_expression(self): 509 html = HTML(ur'<div style="top:expression(alert())">XSS</div>') 510 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 511 512 def test_capital_expression(self): 513 html = HTML(ur'<div style="top:EXPRESSION(alert())">XSS</div>') 514 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 515 516 def test_sanitize_url_with_javascript(self): 517 html = HTML(u'<div style="background-image:url(javascript:alert())">' 518 u'XSS</div>') 519 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 520 521 def test_sanitize_capital_url_with_javascript(self): 522 html = HTML(u'<div style="background-image:URL(javascript:alert())">' 523 u'XSS</div>') 524 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 525 526 def test_sanitize_unicode_escapes(self): 527 html = HTML(ur'<div style="top:exp\72 ess\000069 on(alert())">' 528 ur'XSS</div>') 529 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 530 531 def test_sanitize_backslash_without_hex(self): 532 html = HTML(ur'<div style="top:e\xp\ression(alert())">XSS</div>') 533 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 534 html = HTML(ur'<div style="top:e\\xp\\ression(alert())">XSS</div>') 535 self.assertEqual(r'<div style="top:e\\xp\\ression(alert())">' 536 'XSS</div>', 537 unicode(html | StyleSanitizer())) 538 539 def test_sanitize_unsafe_props(self): 540 html = HTML(u'<div style="POSITION:RELATIVE">XSS</div>') 541 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 542 543 html = HTML(u'<div style="behavior:url(test.htc)">XSS</div>') 544 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 545 546 html = HTML(u'<div style="-ms-behavior:url(test.htc) url(#obj)">' 547 u'XSS</div>') 548 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 549 550 html = HTML(u"""<div style="-o-link:'javascript:alert(1)';""" 551 u"""-o-link-source:current">XSS</div>""") 552 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 553 554 html = HTML(u"""<div style="-moz-binding:url(xss.xbl)">XSS</div>""") 555 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 556 557 def test_sanitize_negative_margin(self): 558 html = HTML(u'<div style="margin-top:-9999px">XSS</div>') 559 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 560 html = HTML(u'<div style="margin:0 -9999px">XSS</div>') 561 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 562 563 def test_sanitize_css_hack(self): 564 html = HTML(u'<div style="*position:static">XSS</div>') 565 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 566 567 html = HTML(u'<div style="_margin:-10px">XSS</div>') 568 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 569 570 def test_sanitize_property_name(self): 571 html = HTML(u'<div style="display:none;border-left-color:red;' 572 u'user_defined:1;-moz-user-selct:-moz-all">prop</div>') 573 self.assertEqual('<div style="display:none; border-left-color:red' 574 '">prop</div>', 575 unicode(html | StyleSanitizer())) 576 577 def test_sanitize_unicode_expression(self): 578 # Fullwidth small letters 579 html = HTML(u'<div style="top:expression(alert())">' 580 u'XSS</div>') 581 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 582 # Fullwidth capital letters 583 html = HTML(u'<div style="top:EXPRESSION(alert())">' 584 u'XSS</div>') 585 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 586 # IPA extensions 587 html = HTML(u'<div style="top:expʀessɪoɴ(alert())">' 588 u'XSS</div>') 589 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 590 591 def test_sanitize_unicode_url(self): 592 # IPA extensions 593 html = HTML(u'<div style="background-image:uʀʟ(javascript:alert())">' 594 u'XSS</div>') 595 self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer())) 596 503 597 504 598 def suite():
Note: See TracChangeset
for help on using the changeset viewer.
