Patchwork [3,of,3] fileset: add kind:pat operator

login
register
mail settings
Submitter Yuya Nishihara
Date Jan. 18, 2018, 12:08 p.m.
Message ID <77ef795761a1dcc59c30.1516277318@mimosa>
Download mbox | patch
Permalink /patch/26863/
State Accepted
Headers show

Comments

Yuya Nishihara - Jan. 18, 2018, 12:08 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1515904155 -32400
#      Sun Jan 14 13:29:15 2018 +0900
# Node ID 77ef795761a1dcc59c3098f9dec308d01b1d7846
# Parent  d3fce96625e610085c2335f9446b726c74326108
fileset: add kind:pat operator

":" isn't taken as a symbol character but an infix operator so we can write
e.g. "path:'foo bar'" as well as "'path:foo bar'". An invalid pattern kind
is rejected in the former form as we know a kind is specified explicitly.

The binding strength is copied from "x:y" range operator of revset. Perhaps
it can be adjusted later if we want to parse "foo:bar()" as "(foo:bar)()",
not "foo:(bar())". We can also add "kind:" postfix operator if we want.

One possible confusion is that the scope of the leading "set:" vs "kind:pat"
operator. The former is consumed by a matcher so applies to the whole fileset
expression:

  $ hg files 'set:foo() or kind:bar or baz'
                  ^^^^^^^^^^^^^^^^^^^^^^^^

Whereas the scope of kind:pat operator is narrow:

  $ hg files 'set:foo() or kind:bar or baz'
                                ^^^
Matt Harbison - Jan. 19, 2018, 2:46 a.m.
On Thu, 18 Jan 2018 07:08:38 -0500, Yuya Nishihara <yuya@tcha.org> wrote:

> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1515904155 -32400
> #      Sun Jan 14 13:29:15 2018 +0900
> # Node ID 77ef795761a1dcc59c3098f9dec308d01b1d7846
> # Parent  d3fce96625e610085c2335f9446b726c74326108
> fileset: add kind:pat operator

Awesome, thanks!  I really didn't like the idea of quoting paths in the  
tracked file, and then changing it later.
Augie Fackler - Jan. 19, 2018, 7:35 p.m.
On Thu, Jan 18, 2018 at 09:08:38PM +0900, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1515904155 -32400
> #      Sun Jan 14 13:29:15 2018 +0900
> # Node ID 77ef795761a1dcc59c3098f9dec308d01b1d7846
> # Parent  d3fce96625e610085c2335f9446b726c74326108
> fileset: add kind:pat operator

queued, thanks

Patch

diff --git a/hgext/lfs/__init__.py b/hgext/lfs/__init__.py
--- a/hgext/lfs/__init__.py
+++ b/hgext/lfs/__init__.py
@@ -45,7 +45,7 @@  Configs::
 
     # Which files to track in LFS.  Path tests are "**.extname" for file
     # extensions, and "path:under/some/directory" for path prefix.  Both
-    # are relative to the repository root, and the latter must be quoted.
+    # are relative to the repository root.
     # File size can be tested with the "size()" fileset, and tests can be
     # joined with fileset operators.  (See "hg help filesets.operators".)
     #
@@ -55,9 +55,9 @@  Configs::
     # - size(">20MB")               # larger than 20MB
     # - !**.txt                     # anything not a *.txt file
     # - **.zip | **.tar.gz | **.7z  # some types of compressed files
-    # - "path:bin"                  # files under "bin" in the project root
+    # - path:bin                    # files under "bin" in the project root
     # - (**.php & size(">2MB")) | (**.js & size(">5MB")) | **.tar.gz
-    #     | ("path:bin" & !"path:/bin/README") | size(">1GB")
+    #     | (path:bin & !path:/bin/README) | size(">1GB")
     # (default: none())
     #
     # This is ignored if there is a tracked '.hglfs' file, and this setting
diff --git a/mercurial/fileset.py b/mercurial/fileset.py
--- a/mercurial/fileset.py
+++ b/mercurial/fileset.py
@@ -24,6 +24,7 @@  from . import (
 elements = {
     # token-type: binding-strength, primary, prefix, infix, suffix
     "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None),
+    ":": (15, None, None, ("kindpat", 15), None),
     "-": (5, None, ("negate", 19), ("minus", 5), None),
     "not": (10, None, ("not", 10), None, None),
     "!": (10, None, ("not", 10), None, None),
@@ -50,7 +51,7 @@  def tokenize(program):
         c = program[pos]
         if c.isspace(): # skip inter-token whitespace
             pass
-        elif c in "(),-|&+!": # handle simple operators
+        elif c in "(),-:|&+!": # handle simple operators
             yield (c, None, pos)
         elif (c in '"\'' or c == 'r' and
               program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
@@ -110,6 +111,18 @@  def getstring(x, err):
         return x[1]
     raise error.ParseError(err)
 
+def _getkindpat(x, y, allkinds, err):
+    kind = getsymbol(x)
+    pat = getstring(y, err)
+    if kind not in allkinds:
+        raise error.ParseError(_("invalid pattern kind: %s") % kind)
+    return '%s:%s' % (kind, pat)
+
+def getpattern(x, allkinds, err):
+    if x and x[0] == 'kindpat':
+        return _getkindpat(x[1], x[2], allkinds, err)
+    return getstring(x, err)
+
 def getset(mctx, x):
     if not x:
         raise error.ParseError(_("missing argument"))
@@ -119,6 +132,10 @@  def stringset(mctx, x):
     m = mctx.matcher([x])
     return [f for f in mctx.subset if m(f)]
 
+def kindpatset(mctx, x, y):
+    return stringset(mctx, _getkindpat(x, y, matchmod.allpatternkinds,
+                                       _("pattern must be a string")))
+
 def andset(mctx, x, y):
     return getset(mctx.narrow(getset(mctx, x)), y)
 
@@ -507,8 +524,9 @@  def subrepo(mctx, x):
     ctx = mctx.ctx
     sstate = sorted(ctx.substate)
     if x:
-        # i18n: "subrepo" is a keyword
-        pat = getstring(x, _("subrepo requires a pattern or no arguments"))
+        pat = getpattern(x, matchmod.allpatternkinds,
+                         # i18n: "subrepo" is a keyword
+                         _("subrepo requires a pattern or no arguments"))
         fast = not matchmod.patkind(pat)
         if fast:
             def m(s):
@@ -522,6 +540,7 @@  def subrepo(mctx, x):
 methods = {
     'string': stringset,
     'symbol': stringset,
+    'kindpat': kindpatset,
     'and': andset,
     'or': orset,
     'minus': minusset,
diff --git a/mercurial/help/filesets.txt b/mercurial/help/filesets.txt
--- a/mercurial/help/filesets.txt
+++ b/mercurial/help/filesets.txt
@@ -9,7 +9,8 @@  Identifiers such as filenames or pattern
 or double quotes if they contain characters outside of
 ``[.*{}[]?/\_a-zA-Z0-9\x80-\xff]`` or if they match one of the
 predefined predicates. This generally applies to file patterns other
-than globs and arguments for predicates.
+than globs and arguments for predicates. Pattern prefixes such as
+``path:`` may be specified without quoting.
 
 Special characters can be used in quoted identifiers by escaping them,
 e.g., ``\n`` is interpreted as a newline. To prevent them from being
@@ -75,4 +76,4 @@  Some sample queries:
 
 - Remove files listed in foo.lst that contain the letter a or b::
 
-    hg remove "set: 'listfile:foo.lst' and (**a* or **b*)"
+    hg remove "set: listfile:foo.lst and (**a* or **b*)"
diff --git a/mercurial/minifileset.py b/mercurial/minifileset.py
--- a/mercurial/minifileset.py
+++ b/mercurial/minifileset.py
@@ -17,16 +17,14 @@  def _compile(tree):
     if not tree:
         raise error.ParseError(_("missing argument"))
     op = tree[0]
-    if op in {'symbol', 'string'}:
-        name = fileset.getstring(tree, _('invalid file pattern'))
+    if op in {'symbol', 'string', 'kindpat'}:
+        name = fileset.getpattern(tree, {'path'}, _('invalid file pattern'))
         if name.startswith('**'): # file extension test, ex. "**.tar.gz"
             ext = name[2:]
             for c in ext:
                 if c in '*{}[]?/\\':
                     raise error.ParseError(_('reserved character: %s') % c)
             return lambda n, s: n.endswith(ext)
-        # TODO: teach fileset about 'path:', so that this can be a symbol and
-        # not require quoting.
         elif name.startswith('path:'): # directory or full path test
             p = name[5:] # prefix
             pl = len(p)
@@ -78,7 +76,7 @@  def compile(text):
     for prefix test.  The ``size()`` predicate is borrowed from filesets to test
     file size.  The predicates ``all()`` and ``none()`` are also supported.
 
-    '(**.php & size(">10MB")) | **.zip | ("path:bin" & !"path:bin/README")' for
+    '(**.php & size(">10MB")) | **.zip | (path:bin & !path:bin/README)' for
     example, will catch all php files whose size is greater than 10 MB, all
     files whose name ends with ".zip", and all files under "bin" in the repo
     root except for "bin/README".
diff --git a/tests/test-fileset.t b/tests/test-fileset.t
--- a/tests/test-fileset.t
+++ b/tests/test-fileset.t
@@ -27,6 +27,24 @@  Test operators and basic patterns
   (string 're:a\\d')
   a1
   a2
+  $ fileset -v '!re:"a\d"'
+  (not
+    (kindpat
+      (symbol 're')
+      (string 'a\\d')))
+  b1
+  b2
+  $ fileset -v 'path:a1 or glob:b?'
+  (or
+    (kindpat
+      (symbol 'path')
+      (symbol 'a1'))
+    (kindpat
+      (symbol 'glob')
+      (symbol 'b?')))
+  a1
+  b1
+  b2
   $ fileset -v 'a1 or a2'
   (or
     (symbol 'a1')
@@ -80,6 +98,22 @@  Test invalid syntax
   hg: parse error: can't use negate operator in this context
   [255]
 
+  $ fileset '"path":.'
+  hg: parse error: not a symbol
+  [255]
+  $ fileset 'path:foo bar'
+  hg: parse error at 9: invalid token
+  [255]
+  $ fileset 'foo:bar:baz'
+  hg: parse error: not a symbol
+  [255]
+  $ fileset 'foo:bar()'
+  hg: parse error: pattern must be a string
+  [255]
+  $ fileset 'foo:bar'
+  hg: parse error: invalid pattern kind: foo
+  [255]
+
 Test files status
 
   $ rm a1
@@ -346,6 +380,9 @@  Test with a revision
   $ fileset -r4 'subrepo("re:su.*")'
   sub
   sub2
+  $ fileset -r4 'subrepo(re:su.*)'
+  sub
+  sub2
   $ fileset -r4 'subrepo("sub")'
   sub
   $ fileset -r4 'b2 or c1'
diff --git a/tests/test-minifileset.py b/tests/test-minifileset.py
--- a/tests/test-minifileset.py
+++ b/tests/test-minifileset.py
@@ -23,7 +23,7 @@  check('none()', [], [('a.php', 123), ('b
 check('!!!!((!(!!all())))', [], [('a.php', 123), ('b.txt', 0)])
 
 check('"path:a" & (**.b | **.c)', [('a/b.b', 0), ('a/c.c', 0)], [('b/c.c', 0)])
-check('("path:a" & **.b) | **.c',
+check('(path:a & **.b) | **.c',
       [('a/b.b', 0), ('a/c.c', 0), ('b/c.c', 0)], [])
 
 check('**.bin - size("<20B")', [('b.bin', 21)], [('a.bin', 11), ('b.txt', 21)])