Patchwork [STABLE] revset: evaluate filesets against each revision for 'file()' (issue5778)

login
register
mail settings
Submitter Matt Harbison
Date Jan. 31, 2018, 5:14 a.m.
Message ID <105a48b751a294a386c4.1517375667@Envy>
Download mbox | patch
Permalink /patch/27086/
State Accepted
Headers show

Comments

Matt Harbison - Jan. 31, 2018, 5:14 a.m.
# HG changeset patch
# User Matt Harbison <matt_harbison@yahoo.com>
# Date 1517166539 18000
#      Sun Jan 28 14:08:59 2018 -0500
# Branch stable
# Node ID 105a48b751a294a386c46d13041160f29e627aee
# Parent  4ac443b8390179d22010fe49ea57e18ae44e48d4
revset: evaluate filesets against each revision for 'file()' (issue5778)

After f2aeff8a87b6, the fileset was evaluated to a set of files against the
working directory, and then those files were applied against each revision.  The
result was nonsense.  For example, `hg log -r 'file("set:exec()")'` on the
Mercurial repo listed revision 0 because it has the `hg` script, which is
currently +x.  But that bit wasn't applied until revision 280 (which
'contains()' properly indicates).

This technique was borrowed from checkstatus(), which services adds(),
modifies(), and removes(), so it seems safe enough.  The 'r=all()' is kind of a
hack, but necessary because keying off 'r=None' changes the behavior of
`hg log set:...` (test-largefiles-misc.t and test-fileset-generated.t drop
current log output when keying off of 'r=None').  I'm not sure what the right
behavior for that is (1fd352aa08fc explicitly enabled this behavior for
graphlog), but the day before the release isn't the time to experiment.
Yuya Nishihara - Jan. 31, 2018, 11:36 a.m.
On Wed, 31 Jan 2018 00:14:27 -0500, Matt Harbison wrote:
> # HG changeset patch
> # User Matt Harbison <matt_harbison@yahoo.com>
> # Date 1517166539 18000
> #      Sun Jan 28 14:08:59 2018 -0500
> # Branch stable
> # Node ID 105a48b751a294a386c46d13041160f29e627aee
> # Parent  4ac443b8390179d22010fe49ea57e18ae44e48d4
> revset: evaluate filesets against each revision for 'file()' (issue5778)
> 
> After f2aeff8a87b6, the fileset was evaluated to a set of files against the
> working directory, and then those files were applied against each revision.  The
> result was nonsense.  For example, `hg log -r 'file("set:exec()")'` on the
> Mercurial repo listed revision 0 because it has the `hg` script, which is
> currently +x.  But that bit wasn't applied until revision 280 (which
> 'contains()' properly indicates).
> 
> This technique was borrowed from checkstatus(), which services adds(),
> modifies(), and removes(), so it seems safe enough.  The 'r=all()' is kind of a
> hack, but necessary because keying off 'r=None' changes the behavior of
> `hg log set:...` (test-largefiles-misc.t and test-fileset-generated.t drop
> current log output when keying off of 'r=None').  I'm not sure what the right
> behavior for that is (1fd352aa08fc explicitly enabled this behavior for
> graphlog), but the day before the release isn't the time to experiment.
> 
> diff --git a/mercurial/revset.py b/mercurial/revset.py
> --- a/mercurial/revset.py
> +++ b/mercurial/revset.py
> @@ -1051,6 +1051,7 @@
>  
>      l = getargs(x, 1, -1, "_matchfiles requires at least one argument")
>      pats, inc, exc = [], [], []
> +    hasset = False
>      rev, default = None, None
>      for arg in l:
>          s = getstring(arg, "_matchfiles requires string arguments")
> @@ -1074,11 +1075,12 @@
>              default = value
>          else:
>              raise error.ParseError('invalid _matchfiles prefix: %s' % prefix)
> +        if not hasset and matchmod.patkind(value) == 'set':
> +            hasset = True

'r:'/'d:' values shouldn't be tested. I think it can be simply written as
any(patkind(p) == 'set' for p in pats + inc + exc).

>      if not default:
>          default = 'glob'
>  
> -    m = matchmod.match(repo.root, repo.getcwd(), pats, include=inc,
> -                       exclude=exc, ctx=repo[rev], default=default)
> +    mcache = [None]
>  
>      # This directly read the changelog data as creating changectx for all
>      # revisions is quite expensive.
> @@ -1089,6 +1091,16 @@
>              files = repo[x].files()
>          else:
>              files = getfiles(x)
> +
> +        if not mcache[0] or (hasset and rev == 'all()'):
> +            r = rev
> +            if rev == 'all()':
> +                r = x
> +            mcache[0] = matchmod.match(repo.root, repo.getcwd(), pats,
> +                                       include=inc, exclude=exc, ctx=repo[r],
> +                                       default=default)
> +        m = mcache[0]
> +
>          for f in files:
>              if m(f):
>                  return True
> @@ -1110,7 +1122,8 @@
>      """
>      # i18n: "file" is a keyword
>      pat = getstring(x, _("file requires a pattern"))
> -    return _matchfiles(repo, subset, ('string', 'p:' + pat))
> +    return _matchfiles(repo, subset, ('list', ('string', 'p:' + pat),
> +                                              ('string', 'r:all()')))

"all()" can be a valid revision label. Instead, maybe we can take rev=None
as an unspecified value, and parse 'r:' into rev=wdirrev.

Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -1051,6 +1051,7 @@ 
 
     l = getargs(x, 1, -1, "_matchfiles requires at least one argument")
     pats, inc, exc = [], [], []
+    hasset = False
     rev, default = None, None
     for arg in l:
         s = getstring(arg, "_matchfiles requires string arguments")
@@ -1074,11 +1075,12 @@ 
             default = value
         else:
             raise error.ParseError('invalid _matchfiles prefix: %s' % prefix)
+        if not hasset and matchmod.patkind(value) == 'set':
+            hasset = True
     if not default:
         default = 'glob'
 
-    m = matchmod.match(repo.root, repo.getcwd(), pats, include=inc,
-                       exclude=exc, ctx=repo[rev], default=default)
+    mcache = [None]
 
     # This directly read the changelog data as creating changectx for all
     # revisions is quite expensive.
@@ -1089,6 +1091,16 @@ 
             files = repo[x].files()
         else:
             files = getfiles(x)
+
+        if not mcache[0] or (hasset and rev == 'all()'):
+            r = rev
+            if rev == 'all()':
+                r = x
+            mcache[0] = matchmod.match(repo.root, repo.getcwd(), pats,
+                                       include=inc, exclude=exc, ctx=repo[r],
+                                       default=default)
+        m = mcache[0]
+
         for f in files:
             if m(f):
                 return True
@@ -1110,7 +1122,8 @@ 
     """
     # i18n: "file" is a keyword
     pat = getstring(x, _("file requires a pattern"))
-    return _matchfiles(repo, subset, ('string', 'p:' + pat))
+    return _matchfiles(repo, subset, ('list', ('string', 'p:' + pat),
+                                              ('string', 'r:all()')))
 
 @predicate('head()', safe=True)
 def head(repo, subset, x):
diff --git a/tests/test-glog.t b/tests/test-glog.t
--- a/tests/test-glog.t
+++ b/tests/test-glog.t
@@ -1890,7 +1890,9 @@ 
   $ testlog -r "sort(file('set:copied()'), -rev)"
   ["sort(file('set:copied()'), -rev)"]
   []
-  <baseset []>
+  <filteredset
+    <fullreposet- 0:7>,
+    <matchfiles patterns=['set:copied()'], include=[] exclude=[], default='glob', rev='all()'>>
 
 Test --removed
 
diff --git a/tests/test-largefiles-update.t b/tests/test-largefiles-update.t
--- a/tests/test-largefiles-update.t
+++ b/tests/test-largefiles-update.t
@@ -726,6 +726,20 @@ 
 
 #endif
 
+The fileset revset is evaluated for each revision, instead of once on wdir(),
+and then patterns matched on each revision.  Here, no exec bits are set in
+wdir(), but a matching revision is detected.
+
+  $ hg file 'set:exec()'
+  [1]
+  $ hg log -r 'file("set:exec()")'
+  changeset:   9:be1b433a65b1
+  tag:         tip
+  parent:      4:07d6153b5c04
+  user:        test
+  date:        Thu Jan 01 00:00:00 1970 +0000
+  summary:     chmod +x large2
+  
 Test a fatal error interrupting an update. Verify that status report dirty
 files correctly after an interrupted update. Also verify that checking all
 hashes reveals it isn't clean.