fileset: add kind:pat operator
authorYuya Nishihara <yuya@tcha.org>
Sun, 14 Jan 2018 13:29:15 +0900
changeset 35741 73432eee0ac4
parent 35740 06a757b9e334
child 35742 7a1806e0daea
fileset: add kind:pat operator ":" isn't taken as a symbol character but an infix operator so we can write e.g. "path:'foo bar'" as well as "'path:foo bar'". An invalid pattern kind is rejected in the former form as we know a kind is specified explicitly. The binding strength is copied from "x:y" range operator of revset. Perhaps it can be adjusted later if we want to parse "foo:bar()" as "(foo:bar)()", not "foo:(bar())". We can also add "kind:" postfix operator if we want. One possible confusion is that the scope of the leading "set:" vs "kind:pat" operator. The former is consumed by a matcher so applies to the whole fileset expression: $ hg files 'set:foo() or kind:bar or baz' ^^^^^^^^^^^^^^^^^^^^^^^^ Whereas the scope of kind:pat operator is narrow: $ hg files 'set:foo() or kind:bar or baz' ^^^
hgext/lfs/__init__.py
mercurial/fileset.py
mercurial/help/filesets.txt
mercurial/minifileset.py
tests/test-fileset.t
tests/test-minifileset.py
--- a/hgext/lfs/__init__.py	Sun Jan 14 13:33:56 2018 +0900
+++ b/hgext/lfs/__init__.py	Sun Jan 14 13:29:15 2018 +0900
@@ -45,7 +45,7 @@
 
     # Which files to track in LFS.  Path tests are "**.extname" for file
     # extensions, and "path:under/some/directory" for path prefix.  Both
-    # are relative to the repository root, and the latter must be quoted.
+    # are relative to the repository root.
     # File size can be tested with the "size()" fileset, and tests can be
     # joined with fileset operators.  (See "hg help filesets.operators".)
     #
@@ -55,9 +55,9 @@
     # - size(">20MB")               # larger than 20MB
     # - !**.txt                     # anything not a *.txt file
     # - **.zip | **.tar.gz | **.7z  # some types of compressed files
-    # - "path:bin"                  # files under "bin" in the project root
+    # - path:bin                    # files under "bin" in the project root
     # - (**.php & size(">2MB")) | (**.js & size(">5MB")) | **.tar.gz
-    #     | ("path:bin" & !"path:/bin/README") | size(">1GB")
+    #     | (path:bin & !path:/bin/README) | size(">1GB")
     # (default: none())
     #
     # This is ignored if there is a tracked '.hglfs' file, and this setting
--- a/mercurial/fileset.py	Sun Jan 14 13:33:56 2018 +0900
+++ b/mercurial/fileset.py	Sun Jan 14 13:29:15 2018 +0900
@@ -24,6 +24,7 @@
 elements = {
     # token-type: binding-strength, primary, prefix, infix, suffix
     "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None),
+    ":": (15, None, None, ("kindpat", 15), None),
     "-": (5, None, ("negate", 19), ("minus", 5), None),
     "not": (10, None, ("not", 10), None, None),
     "!": (10, None, ("not", 10), None, None),
@@ -50,7 +51,7 @@
         c = program[pos]
         if c.isspace(): # skip inter-token whitespace
             pass
-        elif c in "(),-|&+!": # handle simple operators
+        elif c in "(),-:|&+!": # handle simple operators
             yield (c, None, pos)
         elif (c in '"\'' or c == 'r' and
               program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
@@ -110,6 +111,18 @@
         return x[1]
     raise error.ParseError(err)
 
+def _getkindpat(x, y, allkinds, err):
+    kind = getsymbol(x)
+    pat = getstring(y, err)
+    if kind not in allkinds:
+        raise error.ParseError(_("invalid pattern kind: %s") % kind)
+    return '%s:%s' % (kind, pat)
+
+def getpattern(x, allkinds, err):
+    if x and x[0] == 'kindpat':
+        return _getkindpat(x[1], x[2], allkinds, err)
+    return getstring(x, err)
+
 def getset(mctx, x):
     if not x:
         raise error.ParseError(_("missing argument"))
@@ -119,6 +132,10 @@
     m = mctx.matcher([x])
     return [f for f in mctx.subset if m(f)]
 
+def kindpatset(mctx, x, y):
+    return stringset(mctx, _getkindpat(x, y, matchmod.allpatternkinds,
+                                       _("pattern must be a string")))
+
 def andset(mctx, x, y):
     return getset(mctx.narrow(getset(mctx, x)), y)
 
@@ -507,8 +524,9 @@
     ctx = mctx.ctx
     sstate = sorted(ctx.substate)
     if x:
-        # i18n: "subrepo" is a keyword
-        pat = getstring(x, _("subrepo requires a pattern or no arguments"))
+        pat = getpattern(x, matchmod.allpatternkinds,
+                         # i18n: "subrepo" is a keyword
+                         _("subrepo requires a pattern or no arguments"))
         fast = not matchmod.patkind(pat)
         if fast:
             def m(s):
@@ -522,6 +540,7 @@
 methods = {
     'string': stringset,
     'symbol': stringset,
+    'kindpat': kindpatset,
     'and': andset,
     'or': orset,
     'minus': minusset,
--- a/mercurial/help/filesets.txt	Sun Jan 14 13:33:56 2018 +0900
+++ b/mercurial/help/filesets.txt	Sun Jan 14 13:29:15 2018 +0900
@@ -9,7 +9,8 @@
 or double quotes if they contain characters outside of
 ``[.*{}[]?/\_a-zA-Z0-9\x80-\xff]`` or if they match one of the
 predefined predicates. This generally applies to file patterns other
-than globs and arguments for predicates.
+than globs and arguments for predicates. Pattern prefixes such as
+``path:`` may be specified without quoting.
 
 Special characters can be used in quoted identifiers by escaping them,
 e.g., ``\n`` is interpreted as a newline. To prevent them from being
@@ -75,4 +76,4 @@
 
 - Remove files listed in foo.lst that contain the letter a or b::
 
-    hg remove "set: 'listfile:foo.lst' and (**a* or **b*)"
+    hg remove "set: listfile:foo.lst and (**a* or **b*)"
--- a/mercurial/minifileset.py	Sun Jan 14 13:33:56 2018 +0900
+++ b/mercurial/minifileset.py	Sun Jan 14 13:29:15 2018 +0900
@@ -17,16 +17,14 @@
     if not tree:
         raise error.ParseError(_("missing argument"))
     op = tree[0]
-    if op in {'symbol', 'string'}:
-        name = fileset.getstring(tree, _('invalid file pattern'))
+    if op in {'symbol', 'string', 'kindpat'}:
+        name = fileset.getpattern(tree, {'path'}, _('invalid file pattern'))
         if name.startswith('**'): # file extension test, ex. "**.tar.gz"
             ext = name[2:]
             for c in ext:
                 if c in '*{}[]?/\\':
                     raise error.ParseError(_('reserved character: %s') % c)
             return lambda n, s: n.endswith(ext)
-        # TODO: teach fileset about 'path:', so that this can be a symbol and
-        # not require quoting.
         elif name.startswith('path:'): # directory or full path test
             p = name[5:] # prefix
             pl = len(p)
@@ -78,7 +76,7 @@
     for prefix test.  The ``size()`` predicate is borrowed from filesets to test
     file size.  The predicates ``all()`` and ``none()`` are also supported.
 
-    '(**.php & size(">10MB")) | **.zip | ("path:bin" & !"path:bin/README")' for
+    '(**.php & size(">10MB")) | **.zip | (path:bin & !path:bin/README)' for
     example, will catch all php files whose size is greater than 10 MB, all
     files whose name ends with ".zip", and all files under "bin" in the repo
     root except for "bin/README".
--- a/tests/test-fileset.t	Sun Jan 14 13:33:56 2018 +0900
+++ b/tests/test-fileset.t	Sun Jan 14 13:29:15 2018 +0900
@@ -27,6 +27,24 @@
   (string 're:a\\d')
   a1
   a2
+  $ fileset -v '!re:"a\d"'
+  (not
+    (kindpat
+      (symbol 're')
+      (string 'a\\d')))
+  b1
+  b2
+  $ fileset -v 'path:a1 or glob:b?'
+  (or
+    (kindpat
+      (symbol 'path')
+      (symbol 'a1'))
+    (kindpat
+      (symbol 'glob')
+      (symbol 'b?')))
+  a1
+  b1
+  b2
   $ fileset -v 'a1 or a2'
   (or
     (symbol 'a1')
@@ -80,6 +98,22 @@
   hg: parse error: can't use negate operator in this context
   [255]
 
+  $ fileset '"path":.'
+  hg: parse error: not a symbol
+  [255]
+  $ fileset 'path:foo bar'
+  hg: parse error at 9: invalid token
+  [255]
+  $ fileset 'foo:bar:baz'
+  hg: parse error: not a symbol
+  [255]
+  $ fileset 'foo:bar()'
+  hg: parse error: pattern must be a string
+  [255]
+  $ fileset 'foo:bar'
+  hg: parse error: invalid pattern kind: foo
+  [255]
+
 Test files status
 
   $ rm a1
@@ -346,6 +380,9 @@
   $ fileset -r4 'subrepo("re:su.*")'
   sub
   sub2
+  $ fileset -r4 'subrepo(re:su.*)'
+  sub
+  sub2
   $ fileset -r4 'subrepo("sub")'
   sub
   $ fileset -r4 'b2 or c1'
--- a/tests/test-minifileset.py	Sun Jan 14 13:33:56 2018 +0900
+++ b/tests/test-minifileset.py	Sun Jan 14 13:29:15 2018 +0900
@@ -23,7 +23,7 @@
 check('!!!!((!(!!all())))', [], [('a.php', 123), ('b.txt', 0)])
 
 check('"path:a" & (**.b | **.c)', [('a/b.b', 0), ('a/c.c', 0)], [('b/c.c', 0)])
-check('("path:a" & **.b) | **.c',
+check('(path:a & **.b) | **.c',
       [('a/b.b', 0), ('a/c.c', 0), ('b/c.c', 0)], [])
 
 check('**.bin - size("<20B")', [('b.bin', 21)], [('a.bin', 11), ('b.txt', 21)])