Edgewall Software

Ticket #392: xpath_engine_mem_opt.patch

File xpath_engine_mem_opt.patch, 9.4 KB (added by Carsten Klein <carsten.klein@…>, 13 years ago)
  • path.py

     
    8282
    8383class GenericStrategy(object):
    8484
    85     @classmethod
    86     def supports(cls, path):
     85    def supports(self, path):
    8786        return True
    8887
    89     def __init__(self, path):
    90         self.path = path
    91 
    92     def test(self, ignore_context):
    93         p = self.path
     88    def test(self, path, ignore_context):
     89        p = path
    9490        if ignore_context:
    9591            if p[0][0] is ATTRIBUTE:
    9692                steps = [_DOTSLASHSLASH] + p
     
    236232class SimplePathStrategy(object):
    237233    """Strategy for path with only local names, attributes and text nodes."""
    238234
    239     @classmethod
    240     def supports(cls, path):
     235    def supports(self, path):
    241236        if path[0][0] is ATTRIBUTE:
    242237            return False
    243238        allowed_tests = (LocalNameTest, CommentNodeTest, TextNodeTest)
     
    248243                return False
    249244        return True
    250245
    251     def __init__(self, path):
     246    def __init__(self):
     247        self._path_fragments = {}
     248
     249    def _init_or_get_path_fragments(self, path):
    252250        # fragments is list of tuples (fragment, pi, attr, self_beginning)
    253251        # fragment is list of nodetests for fragment of path with only
    254252        # child:: axes between
     
    256254        # attr is attribute nodetest if fragment ends with @ and None otherwise
    257255        # self_beginning is True if axis for first fragment element
    258256        # was self (first fragment) or descendant-or-self (farther fragment)
    259         self.fragments = []
    260257
     258        prepr = repr(path)
     259        if prepr in self._path_fragments:
     260            return self._path_fragments[prepr]
     261
    261262        self_beginning = False
     263        fragments = []
    262264        fragment = []
    263265
    264266        def nodes_equal(node1, node2):
     
    293295                    # the same as previous one
    294296                    # for example child::a/self::b is always wrong
    295297                    if axis[1] != fragment[-1][1]:
    296                         self.fragments = None
     298                        fragments = None
    297299                        return
    298300                else:
    299301                    self_beginning = True
     
    302304                fragment.append(axis[1])
    303305            elif axis[0] is ATTRIBUTE:
    304306                pi = calculate_pi(fragment)
    305                 self.fragments.append((fragment, pi, axis[1], self_beginning))
     307                fragments.append((fragment, pi, axis[1], self_beginning))
     308                self._path_fragments[prepr] = fragments
    306309                # attribute has always to be at the end, so we can jump out
    307                 return
     310                return fragments
    308311            else:
    309312                pi = calculate_pi(fragment)
    310                 self.fragments.append((fragment, pi, None, self_beginning))
     313                fragments.append((fragment, pi, None, self_beginning))
    311314                fragment = [axis[1]]
    312315                if axis[0] is DESCENDANT:
    313316                    self_beginning = False
    314317                else: # DESCENDANT_OR_SELF
    315318                    self_beginning = True
    316319        pi = calculate_pi(fragment)
    317         self.fragments.append((fragment, pi, None, self_beginning))
     320        fragments.append((fragment, pi, None, self_beginning))
     321        self._path_fragments[prepr] = fragments
     322        return fragments
    318323
    319     def test(self, ignore_context):
     324    def test(self, path, ignore_context):
    320325        # stack of triples (fid, p, ic)
    321326        # fid is index of current fragment
    322327        # p is position in this fragment
    323328        # ic is if we ignore context in this fragment
     329
    324330        stack = []
    325331        stack_push = stack.append
    326332        stack_pop = stack.pop
    327         frags = self.fragments
     333        frags = self._init_or_get_path_fragments(path)
    328334        frags_len = len(frags)
    329335
    330336        def _test(event, namespaces, variables, updateonly=False):
    331337            # expression found impossible during init
    332338            if frags is None:
    333                 return None
     339                return False
    334340
    335341            kind, data, pos = event[:3]
    336342
     
    338344            if kind is END:
    339345                if stack:
    340346                    stack_pop()
    341                 return None
     347                return False
    342348            if kind is START_NS or kind is END_NS \
    343349                    or kind is START_CDATA or kind is END_CDATA:
    344                 return None
     350                return False
    345351
    346352            if not stack:
    347353                # root node, nothing on stack, special case
     
    359365                if not frags[fid][3] and (not ignore_context or fid > 0):
    360366                    # axis is not self-beggining, we have to skip this node
    361367                    stack_push((fid, p, ic))
    362                     return None
     368                    return False
    363369            else:
    364370                # take position of parent
    365371                fid, p, ic = stack[-1]
     
    389395                # there was no match in fragment not ignoring context
    390396                if kind is START:
    391397                    stack_push((fid, p, ic))
    392                 return None
     398                return False
    393399
    394400            if ic:
    395401                # we are in fragment ignoring context
     
    435441                    return attrib(kind, data, pos, namespaces, variables)
    436442                return True
    437443
    438             return None
     444            return False
    439445
    440446        return _test
    441447
    442448
    443449class SingleStepStrategy(object):
    444450
    445     @classmethod
    446     def supports(cls, path):
     451    def supports(self, path):
    447452        return len(path) == 1
    448453
    449     def __init__(self, path):
    450         self.path = path
    451 
    452     def test(self, ignore_context):
    453         steps = self.path
     454    def test(self, path, ignore_context):
     455        steps = path
    454456        if steps[0][0] is ATTRIBUTE:
    455457            steps = [_DOTSLASH] + steps
    456         select_attr = steps[-1][0] is ATTRIBUTE and steps[-1][1] or None
     458        select_attr = steps[-1][0] is ATTRIBUTE and steps[-1][1] or False
    457459
    458460        # for every position in expression stores counters' list
    459461        # it is used for position based predicates
     
    467469            if kind is END:
    468470                if not ignore_context:
    469471                    depth[0] -= 1
    470                 return None
     472                return False
    471473            elif kind is START_NS or kind is END_NS \
    472474                    or kind is START_CDATA or kind is END_CDATA:
    473475                # should we make namespaces work?
    474                 return None
     476                return False
    475477
    476478            if not ignore_context:
    477479                outside = (steps[0][0] is SELF and depth[0] != 0) \
     
    480482                if kind is START:
    481483                    depth[0] += 1
    482484                if outside:
    483                     return None
     485                    return False
    484486
    485487            axis, nodetest, predicates = steps[0]
    486488            if not nodetest(kind, data, pos, namespaces, variables):
    487                 return None
     489                return False
    488490
    489491            if predicates:
    490492                cnum = 0
     
    500502                            pretval = False
    501503                        cnum += 1
    502504                    if not pretval:
    503                          return None
     505                         return False
    504506
    505507            if select_attr:
    506508                return select_attr(kind, data, pos, namespaces, variables)
     
    509511
    510512        return _test
    511513
     514STRATEGY_INSTANCES = []
     515STRATEGIES = (SingleStepStrategy, SimplePathStrategy, GenericStrategy)
    512516
     517for strategy_class in STRATEGIES:
     518    STRATEGY_INSTANCES.append(strategy_class())
     519
    513520class Path(object):
    514521    """Implements basic XPath support on streams.
    515522   
     
    518525    extracting a substream matching that path.
    519526    """
    520527
    521     STRATEGIES = (SingleStepStrategy, SimplePathStrategy, GenericStrategy)
    522 
    523528    def __init__(self, text, filename=None, lineno=-1):
    524529        """Create the path object from a string.
    525530       
     
    530535        """
    531536        self.source = text
    532537        self.paths = PathParser(text, filename, lineno).parse()
    533         self.strategies = []
     538        self.path_strategies = []
    534539        for path in self.paths:
    535             for strategy_class in self.STRATEGIES:
    536                 if strategy_class.supports(path):
    537                     self.strategies.append(strategy_class(path))
     540            for strategy in STRATEGY_INSTANCES:
     541                if strategy.supports(path):
     542                    self.path_strategies.append((path, strategy))
    538543                    break
    539544            else:
    540                 raise NotImplemented('No strategy found for path')
     545                raise NotImplemented('No strategy found for path "%s" in line "%s" of template "%s"' % (path, lineno, filename))
    541546
    542547    def __repr__(self):
    543548        paths = []
     
    628633                 stream against the path
    629634        :rtype: ``function``
    630635        """
    631         tests = [s.test(ignore_context) for s in self.strategies]
     636        tests = [strategy.test(path, ignore_context) for path, strategy in self.path_strategies]
    632637        if len(tests) == 1:
    633638            return tests[0]
    634639
    635640        def _multi(event, namespaces, variables, updateonly=False):
    636641            retval = None
    637642            for test in tests:
    638                 val = test(event, namespaces, variables, updateonly=updateonly)
    639                 if retval is None:
    640                     retval = val
    641             return retval
     643                if test(event, namespaces, variables, updateonly=updateonly):
     644                    return True
     645            return False
    642646        return _multi
    643647