Edgewall Software

Ticket #183: fast-path.patch

File fast-path.patch, 12.2 KB (added by alecf@…, 16 years ago)

Patch to make py:match faster, first cut

  • genshi/path.py

     
    181181                 stream against the path
    182182        :rtype: ``function``
    183183        """
    184         paths = [(p, len(p), [0], [], [0] * len(p)) for p in [
    185             (ignore_context and [_DOTSLASHSLASH] or []) + p for p in self.paths
    186         ]]
    187184
     185        # paths is path state that is maintained across calls to _test()
     186        # paths is a list of tuples, one for each segment in the path
     187        paths = []
     188        for p in self.paths:
     189            if ignore_context:
     190                p = [_DOTSLASHSLASH] + p
     191            path = (p, len(p), [0], [], [0] * len(p))
     192            paths.append(path)
     193
    188194        def _test(event, namespaces, variables, updateonly=False):
    189195            kind, data, pos = event[:3]
    190196            retval = None
     
    279285                    if ctxtnode and axis is DESCENDANT_OR_SELF:
    280286                        ctxtnode = False
    281287
    282                 if (retval or not matched) and kind is START and \
     288                if (retval or not matched) and \
     289                        kind is START and \
    283290                        not (axis is DESCENDANT or axis is DESCENDANT_OR_SELF):
    284291                    # If this step is not a closure, it cannot be matched until
    285292                    # the current element is closed... so we need to move the
  • genshi/template/base.py

     
    2525
    2626from genshi.core import Attrs, Stream, StreamEventKind, START, TEXT, _ensure
    2727from genshi.input import ParseError
     28from genshi.template.match import MatchSet
    2829
    2930__all__ = ['Context', 'Template', 'TemplateError', 'TemplateRuntimeError',
    3031           'TemplateSyntaxError', 'BadDirectiveError']
     
    136137        self.frames = deque([data])
    137138        self.pop = self.frames.popleft
    138139        self.push = self.frames.appendleft
    139         self._match_templates = []
     140        self._match_set = MatchSet()
    140141        self._choice_stack = []
    141142
    142143        # Helper functions for use in expressions
  • genshi/template/match.py

     
     1from genshi.core import START
     2from genshi.path import CHILD, LocalNameTest
     3
     4from copy import copy
     5
     6def is_simple_path(path):
     7    """
     8    Is the path merely a tag match like "foo"?
     9    """
     10    if len(path.paths) == 1 and len(path.paths[0]) == 1:
     11        axis, nodetest, predicates = path.paths[0][0]
     12        if (axis is CHILD and
     13            not predicates and
     14            isinstance(nodetest, LocalNameTest)):
     15            return True
     16
     17    return False
     18
     19
     20class MatchSet(object):
     21
     22    def __init__(self, parent=None, exclude=None):
     23        """
     24        If a parent is given, it means this is a wrapper around another
     25        set. Just copy references to member variables in parent, but
     26        also set exclude
     27        """
     28        self.parent = parent
     29        if parent is None:
     30            self.tag_templates = {}
     31            self.other_templates = []
     32            self.exclude = []
     33            if exclude is not None:
     34                self.exclude.append(exclude)
     35        else:
     36            self.tag_templates = parent.tag_templates
     37            self.other_templates = parent.other_templates
     38            self.exclude = copy(parent.exclude)
     39            if exclude is not None:
     40                self.exclude.append(exclude)
     41   
     42    def add(self, match_template):
     43        """
     44        match_template is a tuple the form
     45        test, path, template, hints, namespace, directives
     46        """
     47        path = match_template[1]
     48       
     49        if is_simple_path(path):
     50            # special cache of tag
     51            tag_name = path.paths[0][0][1].name
     52            # setdefault is wasteful
     53            if tag_name not in self.tag_templates:
     54                self.tag_templates[tag_name] = [match_template]
     55            else:
     56                self.tag_templates[tag_name].append(match_template)
     57               
     58        else:
     59            self.other_templates.append(match_template)
     60
     61    def remove(self, match_template):
     62        """
     63        Permanently remove a match_template - mainly for match_once
     64        """
     65        path = match_template[1]
     66       
     67        if is_simple_path(path):
     68            tag_name = path.paths[0][0][1].name
     69            if tag_name in self.tag_templates:
     70                template_list = self.tag_templates[tag_name]
     71                template_list.remove(match_template)
     72                if not template_list:
     73                    del self.tag_templates[tag_name]
     74
     75        else:
     76            self.other_templates.remove(match_template)
     77
     78    def single_match(cls, match_template):
     79        """
     80        Factory for creating a MatchSet with just one match
     81        """
     82        match_set = cls()
     83        match_set.add(match_template)
     84        return match_set
     85    single_match = classmethod(single_match)
     86
     87    def with_exclusion(self, exclude):
     88        """
     89        Factory for creating a MatchSet based on another MatchSet, but
     90        with certain templates excluded
     91        """
     92        cls = self.__class__
     93        new_match_set = cls(parent=self, exclude=exclude)
     94        return new_match_set
     95           
     96    def find_matches(self, event):
     97        """
     98        Return a list of all valid templates that can be used for the given event.
     99        """
     100        kind, data, pos = event[:3]
     101
     102        # todo: get the order right
     103        if kind is START:
     104            tag, attrs = data
     105            if tag.localname in self.tag_templates:
     106            for template in templates[tag.localname]:
     107                if template not in self.exclude:
     108                    yield template
     109
     110        for template in self.other_templates:
     111            if template not in self.exclude:
     112                yield template
     113
     114
     115    def __nonzero__(self):
     116        """
     117        allow this to behave as a list
     118        """
     119        return bool(self.tag_templates or self.other_templates)
     120
     121    def __iter__(self):
     122        """
     123        I don't think we really need this, but it lets us behave like a list
     124        """
     125        for template_list in self.tag_templates.iteritems():
     126            for template in template_list:
     127                yield template
     128        for template in self.other_templates:
     129            yield template
     130
     131    def __str__(self):
     132        parent = ""
     133        if self.parent:
     134            parent = ": child of 0x%x" % id(self.parent)
     135
     136        exclude = ""
     137        if self.exclude:
     138            exclude = " / excluding %d items" % len(self.exclude)
     139           
     140        return "<MatchSet 0x%x %d tag templates, %d other templates%s%s>" % (id(self), len(self.tag_templates), len(self.other_templates), parent, exclude)
  • genshi/template/markup.py

     
    2525from genshi.template.interpolation import interpolate
    2626from genshi.template.directives import *
    2727from genshi.template.text import NewTextTemplate
     28from genshi.template.match import MatchSet
    2829
    2930__all__ = ['MarkupTemplate']
    3031__docformat__ = 'restructuredtext en'
     
    225226        assert len(streams) == 1
    226227        return streams[0]
    227228
    228     def _match(self, stream, ctxt, match_templates=None):
     229    def _match(self, stream, ctxt, match_set=None):
    229230        """Internal stream filter that applies any defined match templates
    230231        to the stream.
    231232        """
    232         if match_templates is None:
    233             match_templates = ctxt._match_templates
     233        if match_set is None:
     234            match_set = ctxt._match_set
    234235
    235236        tail = []
    236237        def _strip(stream):
     
    251252
    252253            # We (currently) only care about start and end events for matching
    253254            # We might care about namespace events in the future, though
    254             if not match_templates or (event[0] is not START and
    255                                        event[0] is not END):
     255            if not match_set or (event[0] is not START and
     256                                 event[0] is not END):
    256257                yield event
    257258                continue
    258259
    259             for idx, (test, path, template, hints, namespaces, directives) \
    260                     in enumerate(match_templates):
    261 
     260            match_candidates = list(match_set.find_matches(event))
     261            for idx, match_template in enumerate(match_candidates):
     262               
     263                (test, path, template, hints, namespaces, directives) = \
     264                    match_template
    262265                if test(event, namespaces, ctxt) is True:
    263266                    if 'match_once' in hints:
    264                         del match_templates[idx]
     267                        match_set.remove(match_template)
     268                        del match_candidates[idx]
    265269                        idx -= 1
    266270
    267271                    # Let the remaining match templates know about the event so
    268272                    # they get a chance to update their internal state
    269                     for test in [mt[0] for mt in match_templates[idx + 1:]]:
     273                    for test in [mt[0] for mt in match_candidates[idx + 1:]]:
    270274                        test(event, namespaces, ctxt, updateonly=True)
    271275
    272276                    # Consume and store all events until an end event
     
    274278                    inner = _strip(stream)
    275279                    if 'match_once' not in hints \
    276280                            and 'not_recursive' not in hints:
    277                         inner = self._match(inner, ctxt, [match_templates[idx]])
     281                        inner = self._match(inner, ctxt,
     282                                            MatchSet.single_match(match_template))
    278283                    content = list(self._include(chain([event], inner, tail),
    279284                                                 ctxt))
    280285
    281                     for test in [mt[0] for mt in match_templates]:
     286                    # Now tell all the match templates about the
     287                    # END event (tail[0])
     288                    for test in [mt[0] for mt in match_candidates]:
    282289                        test(tail[0], namespaces, ctxt, updateonly=True)
    283290
    284291                    # Make the select() function available in the body of the
     
    289296
    290297                    # Recursively process the output
    291298                    template = _apply_directives(template, ctxt, directives)
    292                     remaining = match_templates
     299                    remaining = match_set
    293300                    if 'match_once' not in hints:
    294                         remaining = remaining[:idx] + remaining[idx + 1:]
    295                     for event in self._match(self._exec(
    296                                     self._eval(self._flatten(template, ctxt),
    297                                     ctxt), ctxt), ctxt, remaining):
     301                        # match has not been removed, so we need an exclusion matchset
     302                        remaining = match_set.with_exclusion(match_template)
     303                       
     304                    body = self._exec(self._eval(self._flatten(template, ctxt),
     305                                                 ctxt), ctxt)
     306                    for event in self._match(body, ctxt, remaining):
    298307                        yield event
    299308
    300309                    ctxt.pop()
  • genshi/template/directives.py

     
    450450    attach = classmethod(attach)
    451451
    452452    def __call__(self, stream, ctxt, directives):
    453         ctxt._match_templates.append((self.path.test(ignore_context=True),
    454                                       self.path, list(stream), self.hints,
    455                                       self.namespaces, directives))
     453        ctxt._match_set.add((self.path.test(ignore_context=True),
     454                             self.path, list(stream), self.hints,
     455                             self.namespaces, directives))
    456456        return []
    457457
    458458    def __repr__(self):