mercurial/parser.py
changeset 11274 77272d28b53f
child 11278 7df88cdf47fd
equal deleted inserted replaced
11273:d1908cb95a82 11274:77272d28b53f
       
     1 # parser.py - simple top-down operator precedence parser for mercurial
       
     2 #
       
     3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 # see http://effbot.org/zone/simple-top-down-parsing.txt and
       
     9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
       
    10 # for background
       
    11 
       
    12 # takes a tokenizer and elements
       
    13 # tokenizer is an iterator that returns type, value pairs
       
    14 # elements is a mapping of types to binding strength, prefix and infix actions
       
    15 # an action is a tree node name, a tree label, and an optional match
       
    16 # __call__(program) parses program into a labelled tree
       
    17 
       
    18 class parser(object):
       
    19     def __init__(self, tokenizer, elements, methods=None):
       
    20         self._tokenizer = tokenizer
       
    21         self._elements = elements
       
    22         self._methods = methods
       
    23     def _advance(self):
       
    24         'advance the tokenizer'
       
    25         t = self.current
       
    26         self.current = self._iter.next()
       
    27         return t
       
    28     def _match(self, m):
       
    29         'make sure the tokenizer matches an end condition'
       
    30         if self.current[0] != m:
       
    31             raise SyntaxError(self.current)
       
    32         self._advance()
       
    33     def _parse(self, bind=0):
       
    34         token, value = self._advance()
       
    35         # handle prefix rules on current token
       
    36         prefix = self._elements[token][1]
       
    37         if not prefix:
       
    38             raise SyntaxError("not a prefix: %s" % token)
       
    39         if len(prefix) == 1:
       
    40             expr = (prefix[0], value)
       
    41         else:
       
    42             if len(prefix) > 2 and prefix[2] == self.current[0]:
       
    43                 self._match(prefix[2])
       
    44                 expr = (prefix[0], None)
       
    45             else:
       
    46                 expr = (prefix[0], self._parse(prefix[1]))
       
    47                 if len(prefix) > 2:
       
    48                     self._match(prefix[2])
       
    49         # gather tokens until we meet a lower binding strength
       
    50         while bind < self._elements[self.current[0]][0]:
       
    51             token, value = self._advance()
       
    52             # handle infix rules
       
    53             infix = self._elements[token][2]
       
    54             if len(infix) == 3 and infix[2] == self.current[0]:
       
    55                 self._match(infix[2])
       
    56                 expr = (infix[0], expr, (None))
       
    57             else:
       
    58                 if not infix[0]:
       
    59                     raise SyntaxError("not an infix")
       
    60                 expr = (infix[0], expr, self._parse(infix[1]))
       
    61                 if len(infix) == 3:
       
    62                     self._match(infix[2])
       
    63         return expr
       
    64     def parse(self, message):
       
    65         'generate a parse tree from a message'
       
    66         self._iter = self._tokenizer(message)
       
    67         self.current = self._iter.next()
       
    68         return self._parse()
       
    69     def eval(self, tree):
       
    70         'recursively evaluate a parse tree using node methods'
       
    71         if not isinstance(tree, tuple):
       
    72             return tree
       
    73         return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
       
    74     def __call__(self, message):
       
    75         'parse a message into a parse tree and evaluate if methods given'
       
    76         t = self.parse(message)
       
    77         if self._methods:
       
    78             return self.eval(t)
       
    79         return t