Edgewall Software

source: trunk/genshi/filters/tests/test_html.py

Last change on this file was 1246, checked in by hodgestar, 10 years ago

Also allow stripping of unsafe script tags (Python 3.4 parses the second example as a tag whose name is script&xyz).

  • Property svn:eol-style set to native
File size: 26.9 KB
Line 
1# -*- coding: utf-8 -*-
2#
3# Copyright (C) 2006-2009 Edgewall Software
4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://genshi.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://genshi.edgewall.org/log/.
13
14import doctest
15import unittest
16
17from genshi.input import HTML, ParseError
18from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
19from genshi.template import MarkupTemplate
20
21class HTMLFormFillerTestCase(unittest.TestCase):
22
23    def test_fill_input_text_no_value(self):
24        html = HTML(u"""<form><p>
25          <input type="text" name="foo" />
26        </p></form>""") | HTMLFormFiller()
27        self.assertEquals("""<form><p>
28          <input type="text" name="foo"/>
29        </p></form>""", html.render())
30
31    def test_fill_input_text_single_value(self):
32        html = HTML(u"""<form><p>
33          <input type="text" name="foo" />
34        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
35        self.assertEquals("""<form><p>
36          <input type="text" name="foo" value="bar"/>
37        </p></form>""", html.render())
38
39    def test_fill_input_text_multi_value(self):
40        html = HTML(u"""<form><p>
41          <input type="text" name="foo" />
42        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
43        self.assertEquals("""<form><p>
44          <input type="text" name="foo" value="bar"/>
45        </p></form>""", html.render())
46
47    def test_fill_input_hidden_no_value(self):
48        html = HTML(u"""<form><p>
49          <input type="hidden" name="foo" />
50        </p></form>""") | HTMLFormFiller()
51        self.assertEquals("""<form><p>
52          <input type="hidden" name="foo"/>
53        </p></form>""", html.render())
54
55    def test_fill_input_hidden_single_value(self):
56        html = HTML(u"""<form><p>
57          <input type="hidden" name="foo" />
58        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
59        self.assertEquals("""<form><p>
60          <input type="hidden" name="foo" value="bar"/>
61        </p></form>""", html.render())
62
63    def test_fill_input_hidden_multi_value(self):
64        html = HTML(u"""<form><p>
65          <input type="hidden" name="foo" />
66        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
67        self.assertEquals("""<form><p>
68          <input type="hidden" name="foo" value="bar"/>
69        </p></form>""", html.render())
70
71    def test_fill_textarea_no_value(self):
72        html = HTML(u"""<form><p>
73          <textarea name="foo"></textarea>
74        </p></form>""") | HTMLFormFiller()
75        self.assertEquals("""<form><p>
76          <textarea name="foo"/>
77        </p></form>""", html.render())
78
79    def test_fill_textarea_single_value(self):
80        html = HTML(u"""<form><p>
81          <textarea name="foo"></textarea>
82        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
83        self.assertEquals("""<form><p>
84          <textarea name="foo">bar</textarea>
85        </p></form>""", html.render())
86
87    def test_fill_textarea_multi_value(self):
88        html = HTML(u"""<form><p>
89          <textarea name="foo"></textarea>
90        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
91        self.assertEquals("""<form><p>
92          <textarea name="foo">bar</textarea>
93        </p></form>""", html.render())
94
95    def test_fill_textarea_multiple(self):
96        # Ensure that the subsequent textarea doesn't get the data from the
97        # first
98        html = HTML(u"""<form><p>
99          <textarea name="foo"></textarea>
100          <textarea name="bar"></textarea>
101        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
102        self.assertEquals("""<form><p>
103          <textarea name="foo">Some text</textarea>
104          <textarea name="bar"/>
105        </p></form>""", html.render())
106
107    def test_fill_textarea_preserve_original(self):
108        html = HTML(u"""<form><p>
109          <textarea name="foo"></textarea>
110          <textarea name="bar">Original value</textarea>
111        </p></form>""") | HTMLFormFiller(data={'foo': 'Some text'})
112        self.assertEquals("""<form><p>
113          <textarea name="foo">Some text</textarea>
114          <textarea name="bar">Original value</textarea>
115        </p></form>""", html.render())
116
117    def test_fill_input_checkbox_single_value_auto_no_value(self):
118        html = HTML(u"""<form><p>
119          <input type="checkbox" name="foo" />
120        </p></form>""") | HTMLFormFiller()
121        self.assertEquals("""<form><p>
122          <input type="checkbox" name="foo"/>
123        </p></form>""", html.render())
124
125    def test_fill_input_checkbox_single_value_auto(self):
126        html = HTML(u"""<form><p>
127          <input type="checkbox" name="foo" />
128        </p></form>""")
129        self.assertEquals("""<form><p>
130          <input type="checkbox" name="foo"/>
131        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
132        self.assertEquals("""<form><p>
133          <input type="checkbox" name="foo" checked="checked"/>
134        </p></form>""", (html | HTMLFormFiller(data={'foo': 'on'})).render())
135
136    def test_fill_input_checkbox_single_value_defined(self):
137        html = HTML("""<form><p>
138          <input type="checkbox" name="foo" value="1" />
139        </p></form>""", encoding='ascii')
140        self.assertEquals("""<form><p>
141          <input type="checkbox" name="foo" value="1" checked="checked"/>
142        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
143        self.assertEquals("""<form><p>
144          <input type="checkbox" name="foo" value="1"/>
145        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
146
147    def test_fill_input_checkbox_multi_value_auto(self):
148        html = HTML("""<form><p>
149          <input type="checkbox" name="foo" />
150        </p></form>""", encoding='ascii')
151        self.assertEquals("""<form><p>
152          <input type="checkbox" name="foo"/>
153        </p></form>""", (html | HTMLFormFiller(data={'foo': []})).render())
154        self.assertEquals("""<form><p>
155          <input type="checkbox" name="foo" checked="checked"/>
156        </p></form>""", (html | HTMLFormFiller(data={'foo': ['on']})).render())
157
158    def test_fill_input_checkbox_multi_value_defined(self):
159        html = HTML(u"""<form><p>
160          <input type="checkbox" name="foo" value="1" />
161        </p></form>""")
162        self.assertEquals("""<form><p>
163          <input type="checkbox" name="foo" value="1" checked="checked"/>
164        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
165        self.assertEquals("""<form><p>
166          <input type="checkbox" name="foo" value="1"/>
167        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
168
169    def test_fill_input_radio_no_value(self):
170        html = HTML(u"""<form><p>
171          <input type="radio" name="foo" />
172        </p></form>""") | HTMLFormFiller()
173        self.assertEquals("""<form><p>
174          <input type="radio" name="foo"/>
175        </p></form>""", html.render())
176
177    def test_fill_input_radio_single_value(self):
178        html = HTML(u"""<form><p>
179          <input type="radio" name="foo" value="1" />
180        </p></form>""")
181        self.assertEquals("""<form><p>
182          <input type="radio" name="foo" value="1" checked="checked"/>
183        </p></form>""", (html | HTMLFormFiller(data={'foo': '1'})).render())
184        self.assertEquals("""<form><p>
185          <input type="radio" name="foo" value="1"/>
186        </p></form>""", (html | HTMLFormFiller(data={'foo': '2'})).render())
187
188    def test_fill_input_radio_multi_value(self):
189        html = HTML(u"""<form><p>
190          <input type="radio" name="foo" value="1" />
191        </p></form>""")
192        self.assertEquals("""<form><p>
193          <input type="radio" name="foo" value="1" checked="checked"/>
194        </p></form>""", (html | HTMLFormFiller(data={'foo': ['1']})).render())
195        self.assertEquals("""<form><p>
196          <input type="radio" name="foo" value="1"/>
197        </p></form>""", (html | HTMLFormFiller(data={'foo': ['2']})).render())
198
199    def test_fill_input_radio_empty_string(self):
200        html = HTML(u"""<form><p>
201          <input type="radio" name="foo" value="" />
202        </p></form>""")
203        self.assertEquals("""<form><p>
204          <input type="radio" name="foo" value="" checked="checked"/>
205        </p></form>""", (html | HTMLFormFiller(data={'foo': ''})).render())
206
207    def test_fill_input_radio_multi_empty_string(self):
208        html = HTML(u"""<form><p>
209          <input type="radio" name="foo" value="" />
210        </p></form>""")
211        self.assertEquals("""<form><p>
212          <input type="radio" name="foo" value="" checked="checked"/>
213        </p></form>""", (html | HTMLFormFiller(data={'foo': ['']})).render())
214
215    def test_fill_select_no_value_auto(self):
216        html = HTML(u"""<form><p>
217          <select name="foo">
218            <option>1</option>
219            <option>2</option>
220            <option>3</option>
221          </select>
222        </p></form>""") | HTMLFormFiller()
223        self.assertEquals("""<form><p>
224          <select name="foo">
225            <option>1</option>
226            <option>2</option>
227            <option>3</option>
228          </select>
229        </p></form>""", html.render())
230
231    def test_fill_select_no_value_defined(self):
232        html = HTML(u"""<form><p>
233          <select name="foo">
234            <option value="1">1</option>
235            <option value="2">2</option>
236            <option value="3">3</option>
237          </select>
238        </p></form>""") | HTMLFormFiller()
239        self.assertEquals("""<form><p>
240          <select name="foo">
241            <option value="1">1</option>
242            <option value="2">2</option>
243            <option value="3">3</option>
244          </select>
245        </p></form>""", html.render())
246
247    def test_fill_select_single_value_auto(self):
248        html = HTML(u"""<form><p>
249          <select name="foo">
250            <option>1</option>
251            <option>2</option>
252            <option>3</option>
253          </select>
254        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
255        self.assertEquals("""<form><p>
256          <select name="foo">
257            <option selected="selected">1</option>
258            <option>2</option>
259            <option>3</option>
260          </select>
261        </p></form>""", html.render())
262
263    def test_fill_select_single_value_defined(self):
264        html = HTML(u"""<form><p>
265          <select name="foo">
266            <option value="1">1</option>
267            <option value="2">2</option>
268            <option value="3">3</option>
269          </select>
270        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
271        self.assertEquals("""<form><p>
272          <select name="foo">
273            <option value="1" selected="selected">1</option>
274            <option value="2">2</option>
275            <option value="3">3</option>
276          </select>
277        </p></form>""", html.render())
278
279    def test_fill_select_multi_value_auto(self):
280        html = HTML(u"""<form><p>
281          <select name="foo" multiple>
282            <option>1</option>
283            <option>2</option>
284            <option>3</option>
285          </select>
286        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
287        self.assertEquals("""<form><p>
288          <select name="foo" multiple="multiple">
289            <option selected="selected">1</option>
290            <option>2</option>
291            <option selected="selected">3</option>
292          </select>
293        </p></form>""", html.render())
294
295    def test_fill_select_multi_value_defined(self):
296        html = HTML(u"""<form><p>
297          <select name="foo" multiple>
298            <option value="1">1</option>
299            <option value="2">2</option>
300            <option value="3">3</option>
301          </select>
302        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
303        self.assertEquals("""<form><p>
304          <select name="foo" multiple="multiple">
305            <option value="1" selected="selected">1</option>
306            <option value="2">2</option>
307            <option value="3" selected="selected">3</option>
308          </select>
309        </p></form>""", html.render())
310
311    def test_fill_option_segmented_text(self):
312        html = MarkupTemplate(u"""<form>
313          <select name="foo">
314            <option value="1">foo $x</option>
315          </select>
316        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': '1'})
317        self.assertEquals(u"""<form>
318          <select name="foo">
319            <option value="1" selected="selected">foo 1</option>
320          </select>
321        </form>""", html.render())
322
323    def test_fill_option_segmented_text_no_value(self):
324        html = MarkupTemplate("""<form>
325          <select name="foo">
326            <option>foo $x bar</option>
327          </select>
328        </form>""").generate(x=1) | HTMLFormFiller(data={'foo': 'foo 1 bar'})
329        self.assertEquals("""<form>
330          <select name="foo">
331            <option selected="selected">foo 1 bar</option>
332          </select>
333        </form>""", html.render())
334
335    def test_fill_option_unicode_value(self):
336        html = HTML(u"""<form>
337          <select name="foo">
338            <option value="&ouml;">foo</option>
339          </select>
340        </form>""") | HTMLFormFiller(data={'foo': u'ö'})
341        self.assertEquals(u"""<form>
342          <select name="foo">
343            <option value="ö" selected="selected">foo</option>
344          </select>
345        </form>""", html.render(encoding=None))
346
347    def test_fill_input_password_disabled(self):
348        html = HTML(u"""<form><p>
349          <input type="password" name="pass" />
350        </p></form>""") | HTMLFormFiller(data={'pass': 'bar'})
351        self.assertEquals("""<form><p>
352          <input type="password" name="pass"/>
353        </p></form>""", html.render())
354
355    def test_fill_input_password_enabled(self):
356        html = HTML(u"""<form><p>
357          <input type="password" name="pass" />
358        </p></form>""") | HTMLFormFiller(data={'pass': '1234'}, passwords=True)
359        self.assertEquals("""<form><p>
360          <input type="password" name="pass" value="1234"/>
361        </p></form>""", html.render())
362
363
364def StyleSanitizer():
365    safe_attrs = HTMLSanitizer.SAFE_ATTRS | frozenset(['style'])
366    return HTMLSanitizer(safe_attrs=safe_attrs)
367
368
369class HTMLSanitizerTestCase(unittest.TestCase):
370
371    def assert_parse_error_or_equal(self, expected, exploit,
372                                    allow_strip=False):
373        try:
374            html = HTML(exploit)
375        except ParseError:
376            return
377        sanitized_html = (html | HTMLSanitizer()).render()
378        if not sanitized_html and allow_strip:
379            return
380        self.assertEquals(expected, sanitized_html)
381
382    def test_sanitize_unchanged(self):
383        html = HTML(u'<a href="#">fo<br />o</a>')
384        self.assertEquals('<a href="#">fo<br/>o</a>',
385                          (html | HTMLSanitizer()).render())
386        html = HTML(u'<a href="#with:colon">foo</a>')
387        self.assertEquals('<a href="#with:colon">foo</a>',
388                          (html | HTMLSanitizer()).render())
389
390    def test_sanitize_escape_text(self):
391        html = HTML(u'<a href="#">fo&amp;</a>')
392        self.assertEquals('<a href="#">fo&amp;</a>',
393                          (html | HTMLSanitizer()).render())
394        html = HTML(u'<a href="#">&lt;foo&gt;</a>')
395        self.assertEquals('<a href="#">&lt;foo&gt;</a>',
396                          (html | HTMLSanitizer()).render())
397
398    def test_sanitize_entityref_text(self):
399        html = HTML(u'<a href="#">fo&ouml;</a>')
400        self.assertEquals(u'<a href="#">foö</a>',
401                          (html | HTMLSanitizer()).render(encoding=None))
402
403    def test_sanitize_escape_attr(self):
404        html = HTML(u'<div title="&lt;foo&gt;"></div>')
405        self.assertEquals('<div title="&lt;foo&gt;"/>',
406                          (html | HTMLSanitizer()).render())
407
408    def test_sanitize_close_empty_tag(self):
409        html = HTML(u'<a href="#">fo<br>o</a>')
410        self.assertEquals('<a href="#">fo<br/>o</a>',
411                          (html | HTMLSanitizer()).render())
412
413    def test_sanitize_invalid_entity(self):
414        html = HTML(u'&junk;')
415        self.assertEquals('&amp;junk;', (html | HTMLSanitizer()).render())
416
417    def test_sanitize_remove_script_elem(self):
418        html = HTML(u'<script>alert("Foo")</script>')
419        self.assertEquals('', (html | HTMLSanitizer()).render())
420        html = HTML(u'<SCRIPT SRC="http://example.com/"></SCRIPT>')
421        self.assertEquals('', (html | HTMLSanitizer()).render())
422        src = u'<SCR\0IPT>alert("foo")</SCR\0IPT>'
423        self.assert_parse_error_or_equal('&lt;SCR\x00IPT&gt;alert("foo")', src,
424                                         allow_strip=True)
425        src = u'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>'
426        self.assert_parse_error_or_equal('&lt;SCRIPT&amp;XYZ; '
427                                         'SRC="http://example.com/"&gt;', src,
428                                         allow_strip=True)
429
430    def test_sanitize_remove_onclick_attr(self):
431        html = HTML(u'<div onclick=\'alert("foo")\' />')
432        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
433
434    def test_sanitize_remove_input_password(self):
435        html = HTML(u'<form><input type="password" /></form>')
436        self.assertEquals('<form/>', (html | HTMLSanitizer()).render())
437
438    def test_sanitize_remove_comments(self):
439        html = HTML(u'''<div><!-- conditional comment crap --></div>''')
440        self.assertEquals('<div/>', (html | HTMLSanitizer()).render())
441
442    def test_sanitize_remove_style_scripts(self):
443        sanitizer = StyleSanitizer()
444        # Inline style with url() using javascript: scheme
445        html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
446        self.assertEquals('<div/>', (html | sanitizer).render())
447        # Inline style with url() using javascript: scheme, using control char
448        html = HTML(u'<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
449        self.assertEquals('<div/>', (html | sanitizer).render())
450        # Inline style with url() using javascript: scheme, in quotes
451        html = HTML(u'<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
452        self.assertEquals('<div/>', (html | sanitizer).render())
453        # IE expressions in CSS not allowed
454        html = HTML(u'<DIV STYLE=\'width: expression(alert("foo"));\'>')
455        self.assertEquals('<div/>', (html | sanitizer).render())
456        html = HTML(u'<DIV STYLE=\'width: e/**/xpression(alert("foo"));\'>')
457        self.assertEquals('<div/>', (html | sanitizer).render())
458        html = HTML(u'<DIV STYLE=\'background: url(javascript:alert("foo"));'
459                                 'color: #fff\'>')
460        self.assertEquals('<div style="color: #fff"/>',
461                          (html | sanitizer).render())
462        # Inline style with url() using javascript: scheme, using unicode
463        # escapes
464        html = HTML(u'<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
465        self.assertEquals('<div/>', (html | sanitizer).render())
466        html = HTML(u'<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
467        self.assertEquals('<div/>', (html | sanitizer).render())
468        html = HTML(u'<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
469        self.assertEquals('<div/>', (html | sanitizer).render())
470        html = HTML(u'<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
471        self.assertEquals('<div/>', (html | sanitizer).render())
472        html = HTML(u'<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
473        self.assertEquals('<div/>', (html | sanitizer).render())
474
475    def test_sanitize_remove_style_phishing(self):
476        sanitizer = StyleSanitizer()
477        # The position property is not allowed
478        html = HTML(u'<div style="position:absolute;top:0"></div>')
479        self.assertEquals('<div style="top:0"/>', (html | sanitizer).render())
480        # Normal margins get passed through
481        html = HTML(u'<div style="margin:10px 20px"></div>')
482        self.assertEquals('<div style="margin:10px 20px"/>',
483                          (html | sanitizer).render())
484        # But not negative margins
485        html = HTML(u'<div style="margin:-1000px 0 0"></div>')
486        self.assertEquals('<div/>', (html | sanitizer).render())
487        html = HTML(u'<div style="margin-left:-2000px 0 0"></div>')
488        self.assertEquals('<div/>', (html | sanitizer).render())
489        html = HTML(u'<div style="margin-left:1em 1em 1em -4000px"></div>')
490        self.assertEquals('<div/>', (html | sanitizer).render())
491
492    def test_sanitize_remove_src_javascript(self):
493        html = HTML(u'<img src=\'javascript:alert("foo")\'>')
494        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
495        # Case-insensitive protocol matching
496        html = HTML(u'<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
497        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
498        # Grave accents (not parsed)
499        src = u'<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>'
500        self.assert_parse_error_or_equal('<img/>', src)
501        # Protocol encoded using UTF-8 numeric entities
502        html = HTML(u'<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
503                    '&#112;&#116;&#58;alert("foo")\'>')
504        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
505        # Protocol encoded using UTF-8 numeric entities without a semicolon
506        # (which is allowed because the max number of digits is used)
507        html = HTML(u'<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
508                    '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
509                    '&#0000058alert("foo")\'>')
510        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
511        # Protocol encoded using UTF-8 numeric hex entities without a semicolon
512        # (which is allowed because the max number of digits is used)
513        html = HTML(u'<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
514                    '&#x70&#x74&#x3A;alert("foo")\'>')
515        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
516        # Embedded tab character in protocol
517        html = HTML(u'<IMG SRC=\'jav\tascript:alert("foo");\'>')
518        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
519        # Embedded tab character in protocol, but encoded this time
520        html = HTML(u'<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
521        self.assertEquals('<img/>', (html | HTMLSanitizer()).render())
522
523    def test_sanitize_expression(self):
524        html = HTML(ur'<div style="top:expression(alert())">XSS</div>')
525        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
526
527    def test_capital_expression(self):
528        html = HTML(ur'<div style="top:EXPRESSION(alert())">XSS</div>')
529        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
530
531    def test_sanitize_url_with_javascript(self):
532        html = HTML(u'<div style="background-image:url(javascript:alert())">'
533                    u'XSS</div>')
534        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
535
536    def test_sanitize_capital_url_with_javascript(self):
537        html = HTML(u'<div style="background-image:URL(javascript:alert())">'
538                    u'XSS</div>')
539        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
540
541    def test_sanitize_unicode_escapes(self):
542        html = HTML(ur'<div style="top:exp\72 ess\000069 on(alert())">'
543                    ur'XSS</div>')
544        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
545
546    def test_sanitize_backslash_without_hex(self):
547        html = HTML(ur'<div style="top:e\xp\ression(alert())">XSS</div>')
548        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
549        html = HTML(ur'<div style="top:e\\xp\\ression(alert())">XSS</div>')
550        self.assertEqual(r'<div style="top:e\\xp\\ression(alert())">'
551                         'XSS</div>',
552                         unicode(html | StyleSanitizer()))
553
554    def test_sanitize_unsafe_props(self):
555        html = HTML(u'<div style="POSITION:RELATIVE">XSS</div>')
556        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
557
558        html = HTML(u'<div style="behavior:url(test.htc)">XSS</div>')
559        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
560
561        html = HTML(u'<div style="-ms-behavior:url(test.htc) url(#obj)">'
562                    u'XSS</div>')
563        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
564
565        html = HTML(u"""<div style="-o-link:'javascript:alert(1)';"""
566                    u"""-o-link-source:current">XSS</div>""")
567        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
568
569        html = HTML(u"""<div style="-moz-binding:url(xss.xbl)">XSS</div>""")
570        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
571
572    def test_sanitize_negative_margin(self):
573        html = HTML(u'<div style="margin-top:-9999px">XSS</div>')
574        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
575        html = HTML(u'<div style="margin:0 -9999px">XSS</div>')
576        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
577
578    def test_sanitize_css_hack(self):
579        html = HTML(u'<div style="*position:static">XSS</div>')
580        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
581
582        html = HTML(u'<div style="_margin:-10px">XSS</div>')
583        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
584
585    def test_sanitize_property_name(self):
586        html = HTML(u'<div style="display:none;border-left-color:red;'
587                    u'user_defined:1;-moz-user-selct:-moz-all">prop</div>')
588        self.assertEqual('<div style="display:none; border-left-color:red'
589                         '">prop</div>',
590                         unicode(html | StyleSanitizer()))
591
592    def test_sanitize_unicode_expression(self):
593        # Fullwidth small letters
594        html = HTML(u'<div style="top:expression(alert())">'
595                    u'XSS</div>')
596        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
597        # Fullwidth capital letters
598        html = HTML(u'<div style="top:EXPRESSION(alert())">'
599                    u'XSS</div>')
600        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
601        # IPA extensions
602        html = HTML(u'<div style="top:expʀessɪoɴ(alert())">'
603                    u'XSS</div>')
604        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
605
606    def test_sanitize_unicode_url(self):
607        # IPA extensions
608        html = HTML(u'<div style="background-image:uʀʟ(javascript:alert())">'
609                    u'XSS</div>')
610        self.assertEqual('<div>XSS</div>', unicode(html | StyleSanitizer()))
611
612
613def suite():
614    suite = unittest.TestSuite()
615    suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
616    suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
617    suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
618    return suite
619
620
621if __name__ == '__main__':
622    unittest.main(defaultTest='suite')
Note: See TracBrowser for help on using the repository browser.