Edgewall Software

source: trunk/genshi/util.py

Last change on this file was 1156, checked in by hodgestar, 13 years ago

Merge r1139 from py3k: add compatibility functions for dealing with python 3; factor existing compatibility functions out from genshi utils.

  • Property svn:eol-style set to native
File size: 6.7 KB
RevLine 
[339]1# -*- coding: utf-8 -*-
2#
[1077]3# Copyright (C) 2006-2009 Edgewall Software
[339]4# All rights reserved.
5#
6# This software is licensed as described in the file COPYING, which
7# you should have received as part of this distribution. The terms
8# are also available at http://genshi.edgewall.org/wiki/License.
9#
10# This software consists of voluntary contributions made by many
11# individuals. For the exact contribution history, see the revision
12# history and logs, available at http://genshi.edgewall.org/log/.
13
14"""Various utility classes and functions."""
15
[1082]16import htmlentitydefs as entities
[485]17import re
[339]18
[1156]19from compat import any, all, stringrepr
20
[517]21__docformat__ = 'restructuredtext en'
[485]22
[517]23
[339]24class LRUCache(dict):
25    """A dictionary-like object that stores only a certain number of items, and
26    discards its least recently used item when full.
27   
28    >>> cache = LRUCache(3)
29    >>> cache['A'] = 0
30    >>> cache['B'] = 1
31    >>> cache['C'] = 2
32    >>> len(cache)
33    3
34   
35    >>> cache['A']
36    0
37   
38    Adding new items to the cache does not increase its size. Instead, the least
39    recently used item is dropped:
40   
41    >>> cache['D'] = 3
42    >>> len(cache)
43    3
44    >>> 'B' in cache
45    False
46   
47    Iterating over the cache returns the keys, starting with the most recently
48    used:
49   
50    >>> for key in cache:
[1076]51    ...     print(key)
[339]52    D
53    A
54    C
55
56    This code is based on the LRUCache class from ``myghtyutils.util``, written
57    by Mike Bayer and released under the MIT license. See:
58
59      http://svn.myghty.org/myghtyutils/trunk/lib/myghtyutils/util.py
60    """
61
62    class _Item(object):
63        def __init__(self, key, value):
[1077]64            self.prv = self.nxt = None
[339]65            self.key = key
66            self.value = value
67        def __repr__(self):
68            return repr(self.value)
69
70    def __init__(self, capacity):
71        self._dict = dict()
72        self.capacity = capacity
73        self.head = None
74        self.tail = None
75
76    def __contains__(self, key):
77        return key in self._dict
78
79    def __iter__(self):
80        cur = self.head
81        while cur:
82            yield cur.key
[1077]83            cur = cur.nxt
[339]84
85    def __len__(self):
86        return len(self._dict)
87
88    def __getitem__(self, key):
89        item = self._dict[key]
90        self._update_item(item)
91        return item.value
92
93    def __setitem__(self, key, value):
94        item = self._dict.get(key)
95        if item is None:
96            item = self._Item(key, value)
97            self._dict[key] = item
98            self._insert_item(item)
99        else:
100            item.value = value
101            self._update_item(item)
102            self._manage_size()
103
104    def __repr__(self):
105        return repr(self._dict)
106
107    def _insert_item(self, item):
[1077]108        item.prv = None
109        item.nxt = self.head
[339]110        if self.head is not None:
[1077]111            self.head.prv = item
[339]112        else:
113            self.tail = item
114        self.head = item
115        self._manage_size()
116
117    def _manage_size(self):
118        while len(self._dict) > self.capacity:
119            olditem = self._dict[self.tail.key]
120            del self._dict[self.tail.key]
121            if self.tail != self.head:
[1077]122                self.tail = self.tail.prv
123                self.tail.nxt = None
[339]124            else:
125                self.head = self.tail = None
126
127    def _update_item(self, item):
128        if self.head == item:
129            return
130
[1077]131        prv = item.prv
132        prv.nxt = item.nxt
133        if item.nxt is not None:
134            item.nxt.prv = prv
[339]135        else:
[1077]136            self.tail = prv
[339]137
[1077]138        item.prv = None
139        item.nxt = self.head
140        self.head.prv = self.head = item
[437]141
142
143def flatten(items):
[525]144    """Flattens a potentially nested sequence into a flat list.
[437]145   
[525]146    :param items: the sequence to flatten
147   
[437]148    >>> flatten((1, 2))
149    [1, 2]
150    >>> flatten([1, (2, 3), 4])
151    [1, 2, 3, 4]
152    >>> flatten([1, (2, [3, 4]), 5])
153    [1, 2, 3, 4, 5]
154    """
155    retval = []
156    for item in items:
[694]157        if isinstance(item, (frozenset, list, set, tuple)):
[437]158            retval += flatten(item)
159        else:
160            retval.append(item)
161    return retval
[485]162
[1075]163
[485]164def plaintext(text, keeplinebreaks=True):
[1079]165    """Return the text with all entities and tags removed.
[525]166   
167    >>> plaintext('<b>1 &lt; 2</b>')
168    u'1 < 2'
169   
170    The `keeplinebreaks` parameter can be set to ``False`` to replace any line
171    breaks by simple spaces:
172   
173    >>> plaintext('''<b>1
174    ... &lt;
175    ... 2</b>''', keeplinebreaks=False)
176    u'1 < 2'
177   
178    :param text: the text to convert to plain text
179    :param keeplinebreaks: whether line breaks in the text should be kept intact
180    :return: the text with tags and entities removed
[485]181    """
182    text = stripentities(striptags(text))
183    if not keeplinebreaks:
[1075]184        text = text.replace('\n', ' ')
[485]185    return text
186
[1075]187
[485]188_STRIPENTITIES_RE = re.compile(r'&(?:#((?:\d+)|(?:[xX][0-9a-fA-F]+));?|(\w+);)')
189def stripentities(text, keepxmlentities=False):
190    """Return a copy of the given text with any character or numeric entities
191    replaced by the equivalent UTF-8 characters.
192   
193    >>> stripentities('1 &lt; 2')
194    u'1 < 2'
195    >>> stripentities('more &hellip;')
196    u'more \u2026'
197    >>> stripentities('&#8230;')
198    u'\u2026'
199    >>> stripentities('&#x2026;')
200    u'\u2026'
201   
202    If the `keepxmlentities` parameter is provided and is a truth value, the
203    core XML entities (&amp;, &apos;, &gt;, &lt; and &quot;) are left intact.
[876]204   
[485]205    >>> stripentities('1 &lt; 2 &hellip;', keepxmlentities=True)
206    u'1 &lt; 2 \u2026'
207    """
208    def _replace_entity(match):
209        if match.group(1): # numeric entity
210            ref = match.group(1)
211            if ref.startswith('x'):
212                ref = int(ref[1:], 16)
213            else:
214                ref = int(ref, 10)
215            return unichr(ref)
216        else: # character entity
217            ref = match.group(2)
218            if keepxmlentities and ref in ('amp', 'apos', 'gt', 'lt', 'quot'):
[1075]219                return '&%s;' % ref
[485]220            try:
[1079]221                return unichr(entities.name2codepoint[ref])
[485]222            except KeyError:
223                if keepxmlentities:
[1075]224                    return '&amp;%s;' % ref
[485]225                else:
226                    return ref
227    return _STRIPENTITIES_RE.sub(_replace_entity, text)
228
[1075]229
[771]230_STRIPTAGS_RE = re.compile(r'(<!--.*?-->|<[^>]*>)')
[485]231def striptags(text):
[525]232    """Return a copy of the text with any XML/HTML tags removed.
[485]233   
234    >>> striptags('<span>Foo</span> bar')
235    'Foo bar'
236    >>> striptags('<span class="bar">Foo</span>')
237    'Foo'
238    >>> striptags('Foo<br />')
239    'Foo'
[525]240   
[771]241    HTML/XML comments are stripped, too:
242   
243    >>> striptags('<!-- <blub>hehe</blah> -->test')
244    'test'
245   
[525]246    :param text: the string to remove tags from
247    :return: the text with tags removed
[485]248    """
249    return _STRIPTAGS_RE.sub('', text)
[1079]250
Note: See TracBrowser for help on using the repository browser.