mercurial/revsetlang.py
changeset 31024 0b8356705de6
parent 31017 17b5cda5a84a
child 31355 77270ec0cdd9
equal deleted inserted replaced
31023:aea06029919e 31024:0b8356705de6
       
     1 # revsetlang.py - parser, tokenizer and utility for revision set language
       
     2 #
       
     3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
       
     4 #
       
     5 # This software may be used and distributed according to the terms of the
       
     6 # GNU General Public License version 2 or any later version.
       
     7 
       
     8 from __future__ import absolute_import
       
     9 
       
    10 import string
       
    11 
       
    12 from .i18n import _
       
    13 from . import (
       
    14     error,
       
    15     node,
       
    16     parser,
       
    17     pycompat,
       
    18 )
       
    19 
       
    20 elements = {
       
    21     # token-type: binding-strength, primary, prefix, infix, suffix
       
    22     "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
       
    23     "##": (20, None, None, ("_concat", 20), None),
       
    24     "~": (18, None, None, ("ancestor", 18), None),
       
    25     "^": (18, None, None, ("parent", 18), "parentpost"),
       
    26     "-": (5, None, ("negate", 19), ("minus", 5), None),
       
    27     "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
       
    28     "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
       
    29     ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
       
    30     "not": (10, None, ("not", 10), None, None),
       
    31     "!": (10, None, ("not", 10), None, None),
       
    32     "and": (5, None, None, ("and", 5), None),
       
    33     "&": (5, None, None, ("and", 5), None),
       
    34     "%": (5, None, None, ("only", 5), "onlypost"),
       
    35     "or": (4, None, None, ("or", 4), None),
       
    36     "|": (4, None, None, ("or", 4), None),
       
    37     "+": (4, None, None, ("or", 4), None),
       
    38     "=": (3, None, None, ("keyvalue", 3), None),
       
    39     ",": (2, None, None, ("list", 2), None),
       
    40     ")": (0, None, None, None, None),
       
    41     "symbol": (0, "symbol", None, None, None),
       
    42     "string": (0, "string", None, None, None),
       
    43     "end": (0, None, None, None, None),
       
    44 }
       
    45 
       
    46 keywords = set(['and', 'or', 'not'])
       
    47 
       
    48 # default set of valid characters for the initial letter of symbols
       
    49 _syminitletters = set(
       
    50     string.ascii_letters +
       
    51     string.digits + pycompat.sysstr('._@')) | set(map(chr, xrange(128, 256)))
       
    52 
       
    53 # default set of valid characters for non-initial letters of symbols
       
    54 _symletters = _syminitletters | set(pycompat.sysstr('-/'))
       
    55 
       
    56 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
       
    57     '''
       
    58     Parse a revset statement into a stream of tokens
       
    59 
       
    60     ``syminitletters`` is the set of valid characters for the initial
       
    61     letter of symbols.
       
    62 
       
    63     By default, character ``c`` is recognized as valid for initial
       
    64     letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
       
    65 
       
    66     ``symletters`` is the set of valid characters for non-initial
       
    67     letters of symbols.
       
    68 
       
    69     By default, character ``c`` is recognized as valid for non-initial
       
    70     letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
       
    71 
       
    72     Check that @ is a valid unquoted token character (issue3686):
       
    73     >>> list(tokenize("@::"))
       
    74     [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
       
    75 
       
    76     '''
       
    77     if syminitletters is None:
       
    78         syminitletters = _syminitletters
       
    79     if symletters is None:
       
    80         symletters = _symletters
       
    81 
       
    82     if program and lookup:
       
    83         # attempt to parse old-style ranges first to deal with
       
    84         # things like old-tag which contain query metacharacters
       
    85         parts = program.split(':', 1)
       
    86         if all(lookup(sym) for sym in parts if sym):
       
    87             if parts[0]:
       
    88                 yield ('symbol', parts[0], 0)
       
    89             if len(parts) > 1:
       
    90                 s = len(parts[0])
       
    91                 yield (':', None, s)
       
    92                 if parts[1]:
       
    93                     yield ('symbol', parts[1], s + 1)
       
    94             yield ('end', None, len(program))
       
    95             return
       
    96 
       
    97     pos, l = 0, len(program)
       
    98     while pos < l:
       
    99         c = program[pos]
       
   100         if c.isspace(): # skip inter-token whitespace
       
   101             pass
       
   102         elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
       
   103             yield ('::', None, pos)
       
   104             pos += 1 # skip ahead
       
   105         elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
       
   106             yield ('..', None, pos)
       
   107             pos += 1 # skip ahead
       
   108         elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
       
   109             yield ('##', None, pos)
       
   110             pos += 1 # skip ahead
       
   111         elif c in "():=,-|&+!~^%": # handle simple operators
       
   112             yield (c, None, pos)
       
   113         elif (c in '"\'' or c == 'r' and
       
   114               program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
       
   115             if c == 'r':
       
   116                 pos += 1
       
   117                 c = program[pos]
       
   118                 decode = lambda x: x
       
   119             else:
       
   120                 decode = parser.unescapestr
       
   121             pos += 1
       
   122             s = pos
       
   123             while pos < l: # find closing quote
       
   124                 d = program[pos]
       
   125                 if d == '\\': # skip over escaped characters
       
   126                     pos += 2
       
   127                     continue
       
   128                 if d == c:
       
   129                     yield ('string', decode(program[s:pos]), s)
       
   130                     break
       
   131                 pos += 1
       
   132             else:
       
   133                 raise error.ParseError(_("unterminated string"), s)
       
   134         # gather up a symbol/keyword
       
   135         elif c in syminitletters:
       
   136             s = pos
       
   137             pos += 1
       
   138             while pos < l: # find end of symbol
       
   139                 d = program[pos]
       
   140                 if d not in symletters:
       
   141                     break
       
   142                 if d == '.' and program[pos - 1] == '.': # special case for ..
       
   143                     pos -= 1
       
   144                     break
       
   145                 pos += 1
       
   146             sym = program[s:pos]
       
   147             if sym in keywords: # operator keywords
       
   148                 yield (sym, None, s)
       
   149             elif '-' in sym:
       
   150                 # some jerk gave us foo-bar-baz, try to check if it's a symbol
       
   151                 if lookup and lookup(sym):
       
   152                     # looks like a real symbol
       
   153                     yield ('symbol', sym, s)
       
   154                 else:
       
   155                     # looks like an expression
       
   156                     parts = sym.split('-')
       
   157                     for p in parts[:-1]:
       
   158                         if p: # possible consecutive -
       
   159                             yield ('symbol', p, s)
       
   160                         s += len(p)
       
   161                         yield ('-', None, pos)
       
   162                         s += 1
       
   163                     if parts[-1]: # possible trailing -
       
   164                         yield ('symbol', parts[-1], s)
       
   165             else:
       
   166                 yield ('symbol', sym, s)
       
   167             pos -= 1
       
   168         else:
       
   169             raise error.ParseError(_("syntax error in revset '%s'") %
       
   170                                    program, pos)
       
   171         pos += 1
       
   172     yield ('end', None, pos)
       
   173 
       
   174 # helpers
       
   175 
       
   176 _notset = object()
       
   177 
       
   178 def getsymbol(x):
       
   179     if x and x[0] == 'symbol':
       
   180         return x[1]
       
   181     raise error.ParseError(_('not a symbol'))
       
   182 
       
   183 def getstring(x, err):
       
   184     if x and (x[0] == 'string' or x[0] == 'symbol'):
       
   185         return x[1]
       
   186     raise error.ParseError(err)
       
   187 
       
   188 def getinteger(x, err, default=_notset):
       
   189     if not x and default is not _notset:
       
   190         return default
       
   191     try:
       
   192         return int(getstring(x, err))
       
   193     except ValueError:
       
   194         raise error.ParseError(err)
       
   195 
       
   196 def getlist(x):
       
   197     if not x:
       
   198         return []
       
   199     if x[0] == 'list':
       
   200         return list(x[1:])
       
   201     return [x]
       
   202 
       
   203 def getrange(x, err):
       
   204     if not x:
       
   205         raise error.ParseError(err)
       
   206     op = x[0]
       
   207     if op == 'range':
       
   208         return x[1], x[2]
       
   209     elif op == 'rangepre':
       
   210         return None, x[1]
       
   211     elif op == 'rangepost':
       
   212         return x[1], None
       
   213     elif op == 'rangeall':
       
   214         return None, None
       
   215     raise error.ParseError(err)
       
   216 
       
   217 def getargs(x, min, max, err):
       
   218     l = getlist(x)
       
   219     if len(l) < min or (max >= 0 and len(l) > max):
       
   220         raise error.ParseError(err)
       
   221     return l
       
   222 
       
   223 def getargsdict(x, funcname, keys):
       
   224     return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
       
   225                                 keyvaluenode='keyvalue', keynode='symbol')
       
   226 
       
   227 # Constants for ordering requirement, used in _analyze():
       
   228 #
       
   229 # If 'define', any nested functions and operations can change the ordering of
       
   230 # the entries in the set. If 'follow', any nested functions and operations
       
   231 # should take the ordering specified by the first operand to the '&' operator.
       
   232 #
       
   233 # For instance,
       
   234 #
       
   235 #   X & (Y | Z)
       
   236 #   ^   ^^^^^^^
       
   237 #   |   follow
       
   238 #   define
       
   239 #
       
   240 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
       
   241 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
       
   242 #
       
   243 # 'any' means the order doesn't matter. For instance,
       
   244 #
       
   245 #   X & !Y
       
   246 #        ^
       
   247 #        any
       
   248 #
       
   249 # 'y()' can either enforce its ordering requirement or take the ordering
       
   250 # specified by 'x()' because 'not()' doesn't care the order.
       
   251 #
       
   252 # Transition of ordering requirement:
       
   253 #
       
   254 # 1. starts with 'define'
       
   255 # 2. shifts to 'follow' by 'x & y'
       
   256 # 3. changes back to 'define' on function call 'f(x)' or function-like
       
   257 #    operation 'x (f) y' because 'f' may have its own ordering requirement
       
   258 #    for 'x' and 'y' (e.g. 'first(x)')
       
   259 #
       
   260 anyorder = 'any'        # don't care the order
       
   261 defineorder = 'define'  # should define the order
       
   262 followorder = 'follow'  # must follow the current order
       
   263 
       
   264 # transition table for 'x & y', from the current expression 'x' to 'y'
       
   265 _tofolloworder = {
       
   266     anyorder: anyorder,
       
   267     defineorder: followorder,
       
   268     followorder: followorder,
       
   269 }
       
   270 
       
   271 def _matchonly(revs, bases):
       
   272     """
       
   273     >>> f = lambda *args: _matchonly(*map(parse, args))
       
   274     >>> f('ancestors(A)', 'not ancestors(B)')
       
   275     ('list', ('symbol', 'A'), ('symbol', 'B'))
       
   276     """
       
   277     if (revs is not None
       
   278         and revs[0] == 'func'
       
   279         and getsymbol(revs[1]) == 'ancestors'
       
   280         and bases is not None
       
   281         and bases[0] == 'not'
       
   282         and bases[1][0] == 'func'
       
   283         and getsymbol(bases[1][1]) == 'ancestors'):
       
   284         return ('list', revs[2], bases[1][2])
       
   285 
       
   286 def _fixops(x):
       
   287     """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
       
   288     handled well by our simple top-down parser"""
       
   289     if not isinstance(x, tuple):
       
   290         return x
       
   291 
       
   292     op = x[0]
       
   293     if op == 'parent':
       
   294         # x^:y means (x^) : y, not x ^ (:y)
       
   295         # x^:  means (x^) :,   not x ^ (:)
       
   296         post = ('parentpost', x[1])
       
   297         if x[2][0] == 'dagrangepre':
       
   298             return _fixops(('dagrange', post, x[2][1]))
       
   299         elif x[2][0] == 'rangepre':
       
   300             return _fixops(('range', post, x[2][1]))
       
   301         elif x[2][0] == 'rangeall':
       
   302             return _fixops(('rangepost', post))
       
   303     elif op == 'or':
       
   304         # make number of arguments deterministic:
       
   305         # x + y + z -> (or x y z) -> (or (list x y z))
       
   306         return (op, _fixops(('list',) + x[1:]))
       
   307 
       
   308     return (op,) + tuple(_fixops(y) for y in x[1:])
       
   309 
       
   310 def _analyze(x, order):
       
   311     if x is None:
       
   312         return x
       
   313 
       
   314     op = x[0]
       
   315     if op == 'minus':
       
   316         return _analyze(('and', x[1], ('not', x[2])), order)
       
   317     elif op == 'only':
       
   318         t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
       
   319         return _analyze(t, order)
       
   320     elif op == 'onlypost':
       
   321         return _analyze(('func', ('symbol', 'only'), x[1]), order)
       
   322     elif op == 'dagrangepre':
       
   323         return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
       
   324     elif op == 'dagrangepost':
       
   325         return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
       
   326     elif op == 'negate':
       
   327         s = getstring(x[1], _("can't negate that"))
       
   328         return _analyze(('string', '-' + s), order)
       
   329     elif op in ('string', 'symbol'):
       
   330         return x
       
   331     elif op == 'and':
       
   332         ta = _analyze(x[1], order)
       
   333         tb = _analyze(x[2], _tofolloworder[order])
       
   334         return (op, ta, tb, order)
       
   335     elif op == 'or':
       
   336         return (op, _analyze(x[1], order), order)
       
   337     elif op == 'not':
       
   338         return (op, _analyze(x[1], anyorder), order)
       
   339     elif op == 'rangeall':
       
   340         return (op, None, order)
       
   341     elif op in ('rangepre', 'rangepost', 'parentpost'):
       
   342         return (op, _analyze(x[1], defineorder), order)
       
   343     elif op == 'group':
       
   344         return _analyze(x[1], order)
       
   345     elif op in ('dagrange', 'range', 'parent', 'ancestor'):
       
   346         ta = _analyze(x[1], defineorder)
       
   347         tb = _analyze(x[2], defineorder)
       
   348         return (op, ta, tb, order)
       
   349     elif op == 'list':
       
   350         return (op,) + tuple(_analyze(y, order) for y in x[1:])
       
   351     elif op == 'keyvalue':
       
   352         return (op, x[1], _analyze(x[2], order))
       
   353     elif op == 'func':
       
   354         f = getsymbol(x[1])
       
   355         d = defineorder
       
   356         if f == 'present':
       
   357             # 'present(set)' is known to return the argument set with no
       
   358             # modification, so forward the current order to its argument
       
   359             d = order
       
   360         return (op, x[1], _analyze(x[2], d), order)
       
   361     raise ValueError('invalid operator %r' % op)
       
   362 
       
   363 def analyze(x, order=defineorder):
       
   364     """Transform raw parsed tree to evaluatable tree which can be fed to
       
   365     optimize() or getset()
       
   366 
       
   367     All pseudo operations should be mapped to real operations or functions
       
   368     defined in methods or symbols table respectively.
       
   369 
       
   370     'order' specifies how the current expression 'x' is ordered (see the
       
   371     constants defined above.)
       
   372     """
       
   373     return _analyze(x, order)
       
   374 
       
   375 def _optimize(x, small):
       
   376     if x is None:
       
   377         return 0, x
       
   378 
       
   379     smallbonus = 1
       
   380     if small:
       
   381         smallbonus = .5
       
   382 
       
   383     op = x[0]
       
   384     if op in ('string', 'symbol'):
       
   385         return smallbonus, x # single revisions are small
       
   386     elif op == 'and':
       
   387         wa, ta = _optimize(x[1], True)
       
   388         wb, tb = _optimize(x[2], True)
       
   389         order = x[3]
       
   390         w = min(wa, wb)
       
   391 
       
   392         # (::x and not ::y)/(not ::y and ::x) have a fast path
       
   393         tm = _matchonly(ta, tb) or _matchonly(tb, ta)
       
   394         if tm:
       
   395             return w, ('func', ('symbol', 'only'), tm, order)
       
   396 
       
   397         if tb is not None and tb[0] == 'not':
       
   398             return wa, ('difference', ta, tb[1], order)
       
   399 
       
   400         if wa > wb:
       
   401             return w, (op, tb, ta, order)
       
   402         return w, (op, ta, tb, order)
       
   403     elif op == 'or':
       
   404         # fast path for machine-generated expression, that is likely to have
       
   405         # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
       
   406         order = x[2]
       
   407         ws, ts, ss = [], [], []
       
   408         def flushss():
       
   409             if not ss:
       
   410                 return
       
   411             if len(ss) == 1:
       
   412                 w, t = ss[0]
       
   413             else:
       
   414                 s = '\0'.join(t[1] for w, t in ss)
       
   415                 y = ('func', ('symbol', '_list'), ('string', s), order)
       
   416                 w, t = _optimize(y, False)
       
   417             ws.append(w)
       
   418             ts.append(t)
       
   419             del ss[:]
       
   420         for y in getlist(x[1]):
       
   421             w, t = _optimize(y, False)
       
   422             if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
       
   423                 ss.append((w, t))
       
   424                 continue
       
   425             flushss()
       
   426             ws.append(w)
       
   427             ts.append(t)
       
   428         flushss()
       
   429         if len(ts) == 1:
       
   430             return ws[0], ts[0] # 'or' operation is fully optimized out
       
   431         # we can't reorder trees by weight because it would change the order.
       
   432         # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
       
   433         #   ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
       
   434         return max(ws), (op, ('list',) + tuple(ts), order)
       
   435     elif op == 'not':
       
   436         # Optimize not public() to _notpublic() because we have a fast version
       
   437         if x[1][:3] == ('func', ('symbol', 'public'), None):
       
   438             order = x[1][3]
       
   439             newsym = ('func', ('symbol', '_notpublic'), None, order)
       
   440             o = _optimize(newsym, not small)
       
   441             return o[0], o[1]
       
   442         else:
       
   443             o = _optimize(x[1], not small)
       
   444             order = x[2]
       
   445             return o[0], (op, o[1], order)
       
   446     elif op == 'rangeall':
       
   447         return smallbonus, x
       
   448     elif op in ('rangepre', 'rangepost', 'parentpost'):
       
   449         o = _optimize(x[1], small)
       
   450         order = x[2]
       
   451         return o[0], (op, o[1], order)
       
   452     elif op in ('dagrange', 'range', 'parent', 'ancestor'):
       
   453         wa, ta = _optimize(x[1], small)
       
   454         wb, tb = _optimize(x[2], small)
       
   455         order = x[3]
       
   456         return wa + wb, (op, ta, tb, order)
       
   457     elif op == 'list':
       
   458         ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
       
   459         return sum(ws), (op,) + ts
       
   460     elif op == 'keyvalue':
       
   461         w, t = _optimize(x[2], small)
       
   462         return w, (op, x[1], t)
       
   463     elif op == 'func':
       
   464         f = getsymbol(x[1])
       
   465         wa, ta = _optimize(x[2], small)
       
   466         if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
       
   467                  'keyword', 'outgoing', 'user', 'destination'):
       
   468             w = 10 # slow
       
   469         elif f in ('modifies', 'adds', 'removes'):
       
   470             w = 30 # slower
       
   471         elif f == "contains":
       
   472             w = 100 # very slow
       
   473         elif f == "ancestor":
       
   474             w = 1 * smallbonus
       
   475         elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
       
   476             w = 0
       
   477         elif f == "sort":
       
   478             w = 10 # assume most sorts look at changelog
       
   479         else:
       
   480             w = 1
       
   481         order = x[3]
       
   482         return w + wa, (op, x[1], ta, order)
       
   483     raise ValueError('invalid operator %r' % op)
       
   484 
       
   485 def optimize(tree):
       
   486     """Optimize evaluatable tree
       
   487 
       
   488     All pseudo operations should be transformed beforehand.
       
   489     """
       
   490     _weight, newtree = _optimize(tree, small=True)
       
   491     return newtree
       
   492 
       
   493 # the set of valid characters for the initial letter of symbols in
       
   494 # alias declarations and definitions
       
   495 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
       
   496 
       
   497 def _parsewith(spec, lookup=None, syminitletters=None):
       
   498     """Generate a parse tree of given spec with given tokenizing options
       
   499 
       
   500     >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
       
   501     ('func', ('symbol', 'foo'), ('symbol', '$1'))
       
   502     >>> _parsewith('$1')
       
   503     Traceback (most recent call last):
       
   504       ...
       
   505     ParseError: ("syntax error in revset '$1'", 0)
       
   506     >>> _parsewith('foo bar')
       
   507     Traceback (most recent call last):
       
   508       ...
       
   509     ParseError: ('invalid token', 4)
       
   510     """
       
   511     p = parser.parser(elements)
       
   512     tree, pos = p.parse(tokenize(spec, lookup=lookup,
       
   513                                  syminitletters=syminitletters))
       
   514     if pos != len(spec):
       
   515         raise error.ParseError(_('invalid token'), pos)
       
   516     return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
       
   517 
       
   518 class _aliasrules(parser.basealiasrules):
       
   519     """Parsing and expansion rule set of revset aliases"""
       
   520     _section = _('revset alias')
       
   521 
       
   522     @staticmethod
       
   523     def _parse(spec):
       
   524         """Parse alias declaration/definition ``spec``
       
   525 
       
   526         This allows symbol names to use also ``$`` as an initial letter
       
   527         (for backward compatibility), and callers of this function should
       
   528         examine whether ``$`` is used also for unexpected symbols or not.
       
   529         """
       
   530         return _parsewith(spec, syminitletters=_aliassyminitletters)
       
   531 
       
   532     @staticmethod
       
   533     def _trygetfunc(tree):
       
   534         if tree[0] == 'func' and tree[1][0] == 'symbol':
       
   535             return tree[1][1], getlist(tree[2])
       
   536 
       
   537 def expandaliases(ui, tree):
       
   538     aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
       
   539     tree = _aliasrules.expand(aliases, tree)
       
   540     # warn about problematic (but not referred) aliases
       
   541     for name, alias in sorted(aliases.iteritems()):
       
   542         if alias.error and not alias.warned:
       
   543             ui.warn(_('warning: %s\n') % (alias.error))
       
   544             alias.warned = True
       
   545     return tree
       
   546 
       
   547 def foldconcat(tree):
       
   548     """Fold elements to be concatenated by `##`
       
   549     """
       
   550     if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
       
   551         return tree
       
   552     if tree[0] == '_concat':
       
   553         pending = [tree]
       
   554         l = []
       
   555         while pending:
       
   556             e = pending.pop()
       
   557             if e[0] == '_concat':
       
   558                 pending.extend(reversed(e[1:]))
       
   559             elif e[0] in ('string', 'symbol'):
       
   560                 l.append(e[1])
       
   561             else:
       
   562                 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
       
   563                 raise error.ParseError(msg)
       
   564         return ('string', ''.join(l))
       
   565     else:
       
   566         return tuple(foldconcat(t) for t in tree)
       
   567 
       
   568 def parse(spec, lookup=None):
       
   569     return _parsewith(spec, lookup=lookup)
       
   570 
       
   571 def formatspec(expr, *args):
       
   572     '''
       
   573     This is a convenience function for using revsets internally, and
       
   574     escapes arguments appropriately. Aliases are intentionally ignored
       
   575     so that intended expression behavior isn't accidentally subverted.
       
   576 
       
   577     Supported arguments:
       
   578 
       
   579     %r = revset expression, parenthesized
       
   580     %d = int(arg), no quoting
       
   581     %s = string(arg), escaped and single-quoted
       
   582     %b = arg.branch(), escaped and single-quoted
       
   583     %n = hex(arg), single-quoted
       
   584     %% = a literal '%'
       
   585 
       
   586     Prefixing the type with 'l' specifies a parenthesized list of that type.
       
   587 
       
   588     >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
       
   589     '(10 or 11):: and ((this()) or (that()))'
       
   590     >>> formatspec('%d:: and not %d::', 10, 20)
       
   591     '10:: and not 20::'
       
   592     >>> formatspec('%ld or %ld', [], [1])
       
   593     "_list('') or 1"
       
   594     >>> formatspec('keyword(%s)', 'foo\\xe9')
       
   595     "keyword('foo\\\\xe9')"
       
   596     >>> b = lambda: 'default'
       
   597     >>> b.branch = b
       
   598     >>> formatspec('branch(%b)', b)
       
   599     "branch('default')"
       
   600     >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
       
   601     "root(_list('a\\x00b\\x00c\\x00d'))"
       
   602     '''
       
   603 
       
   604     def quote(s):
       
   605         return repr(str(s))
       
   606 
       
   607     def argtype(c, arg):
       
   608         if c == 'd':
       
   609             return str(int(arg))
       
   610         elif c == 's':
       
   611             return quote(arg)
       
   612         elif c == 'r':
       
   613             parse(arg) # make sure syntax errors are confined
       
   614             return '(%s)' % arg
       
   615         elif c == 'n':
       
   616             return quote(node.hex(arg))
       
   617         elif c == 'b':
       
   618             return quote(arg.branch())
       
   619 
       
   620     def listexp(s, t):
       
   621         l = len(s)
       
   622         if l == 0:
       
   623             return "_list('')"
       
   624         elif l == 1:
       
   625             return argtype(t, s[0])
       
   626         elif t == 'd':
       
   627             return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
       
   628         elif t == 's':
       
   629             return "_list('%s')" % "\0".join(s)
       
   630         elif t == 'n':
       
   631             return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
       
   632         elif t == 'b':
       
   633             return "_list('%s')" % "\0".join(a.branch() for a in s)
       
   634 
       
   635         m = l // 2
       
   636         return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
       
   637 
       
   638     ret = ''
       
   639     pos = 0
       
   640     arg = 0
       
   641     while pos < len(expr):
       
   642         c = expr[pos]
       
   643         if c == '%':
       
   644             pos += 1
       
   645             d = expr[pos]
       
   646             if d == '%':
       
   647                 ret += d
       
   648             elif d in 'dsnbr':
       
   649                 ret += argtype(d, args[arg])
       
   650                 arg += 1
       
   651             elif d == 'l':
       
   652                 # a list of some type
       
   653                 pos += 1
       
   654                 d = expr[pos]
       
   655                 ret += listexp(list(args[arg]), d)
       
   656                 arg += 1
       
   657             else:
       
   658                 raise error.Abort(_('unexpected revspec format character %s')
       
   659                                   % d)
       
   660         else:
       
   661             ret += c
       
   662         pos += 1
       
   663 
       
   664     return ret
       
   665 
       
   666 def prettyformat(tree):
       
   667     return parser.prettyformat(tree, ('string', 'symbol'))
       
   668 
       
   669 def depth(tree):
       
   670     if isinstance(tree, tuple):
       
   671         return max(map(depth, tree)) + 1
       
   672     else:
       
   673         return 0
       
   674 
       
   675 def funcsused(tree):
       
   676     if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
       
   677         return set()
       
   678     else:
       
   679         funcs = set()
       
   680         for s in tree[1:]:
       
   681             funcs |= funcsused(s)
       
   682         if tree[0] == 'func':
       
   683             funcs.add(tree[1][1])
       
   684         return funcs