Patchwork [4,of,6] cmdutil: rewrite walkchangerevs() by using logcmdutil functions

login
register
mail settings
Submitter Yuya Nishihara
Date Oct. 3, 2020, 4:10 a.m.
Message ID <d399d4ab37adc58b51e1.1601698208@mimosa>
Download mbox | patch
Permalink /patch/47371/
State Accepted
Headers show

Comments

Yuya Nishihara - Oct. 3, 2020, 4:10 a.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1599728503 -32400
#      Thu Sep 10 18:01:43 2020 +0900
# Node ID d399d4ab37adc58b51e15c4f829f7e7b2a150ac6
# Parent  8eec1c384c992f9ce343a4245c2abe5f371b9bcd
cmdutil: rewrite walkchangerevs() by using logcmdutil functions

cmdutil.walkchangerevs() now takes (revs, makefilematcher) in place of
(match, opts), and only provides the "windowing" functionality. Unused
classes and functions will be removed by the next patch.

"hg grep --follow" (--all-files) is still broken since there is no logic
to follow copies while traversing changelog, but at least, it does follow
the DAG.

Patch

diff --git a/hgext/churn.py b/hgext/churn.py
--- a/hgext/churn.py
+++ b/hgext/churn.py
@@ -23,7 +23,6 @@  from mercurial import (
     patch,
     pycompat,
     registrar,
-    scmutil,
 )
 from mercurial.utils import dateutil
 
@@ -76,8 +75,6 @@  def countrate(ui, repo, amap, *pats, **o
     if opts.get(b'date'):
         df = dateutil.matchdate(opts[b'date'])
 
-    m = scmutil.match(repo[None], pats, opts)
-
     def prep(ctx, fmatch):
         rev = ctx.rev()
         if df and not df(ctx.date()[0]):  # doesn't match date format
@@ -99,7 +96,15 @@  def countrate(ui, repo, amap, *pats, **o
 
         progress.increment()
 
-    for ctx in cmdutil.walkchangerevs(repo, m, opts, prep):
+    wopts = logcmdutil.walkopts(
+        pats=pats,
+        opts=opts,
+        revspec=opts[b'rev'],
+        include_pats=opts[b'include'],
+        exclude_pats=opts[b'exclude'],
+    )
+    revs, makefilematcher = logcmdutil.makewalker(repo, wopts)
+    for ctx in cmdutil.walkchangerevs(repo, revs, makefilematcher, prep):
         continue
 
     progress.complete()
diff --git a/mercurial/cmdutil.py b/mercurial/cmdutil.py
--- a/mercurial/cmdutil.py
+++ b/mercurial/cmdutil.py
@@ -2428,8 +2428,8 @@  class _followfilter(object):
         return False
 
 
-def walkchangerevs(repo, match, opts, prepare):
-    '''Iterate over files and the revs in which they changed.
+def walkchangerevs(repo, revs, makefilematcher, prepare):
+    '''Iterate over files and the revs in a "windowed" way.
 
     Callers most commonly need to iterate backwards over the history
     in which they are interested. Doing so has awful (quadratic-looking)
@@ -2443,107 +2443,11 @@  def walkchangerevs(repo, match, opts, pr
     yielding each context, the iterator will first call the prepare
     function on each context in the window in forward order.'''
 
-    allfiles = opts.get(b'all_files')
-    follow = opts.get(b'follow') or opts.get(b'follow_first')
-    revs = _walkrevs(repo, opts)
     if not revs:
         return []
-    wanted = set()
-    slowpath = match.anypats() or (not match.always() and opts.get(b'removed'))
-    fncache = {}
     change = repo.__getitem__
 
-    # First step is to fill wanted, the set of revisions that we want to yield.
-    # When it does not induce extra cost, we also fill fncache for revisions in
-    # wanted: a cache of filenames that were changed (ctx.files()) and that
-    # match the file filtering conditions.
-
-    if match.always() or allfiles:
-        # No files, no patterns.  Display all revs.
-        wanted = revs
-    elif not slowpath:
-        # We only have to read through the filelog to find wanted revisions
-
-        try:
-            wanted = walkfilerevs(repo, match, follow, revs, fncache)
-        except FileWalkError:
-            slowpath = True
-
-            # We decided to fall back to the slowpath because at least one
-            # of the paths was not a file. Check to see if at least one of them
-            # existed in history, otherwise simply return
-            for path in match.files():
-                if path == b'.' or path in repo.store:
-                    break
-            else:
-                return []
-
-    if slowpath:
-        # We have to read the changelog to match filenames against
-        # changed files
-
-        if follow:
-            raise error.Abort(
-                _(b'can only follow copies/renames for explicit filenames')
-            )
-
-        # The slow path checks files modified in every changeset.
-        # This is really slow on large repos, so compute the set lazily.
-        class lazywantedset(object):
-            def __init__(self):
-                self.set = set()
-                self.revs = set(revs)
-
-            # No need to worry about locality here because it will be accessed
-            # in the same order as the increasing window below.
-            def __contains__(self, value):
-                if value in self.set:
-                    return True
-                elif not value in self.revs:
-                    return False
-                else:
-                    self.revs.discard(value)
-                    ctx = change(value)
-                    if allfiles:
-                        matches = list(ctx.manifest().walk(match))
-                    else:
-                        matches = [f for f in ctx.files() if match(f)]
-                    if matches:
-                        fncache[value] = matches
-                        self.set.add(value)
-                        return True
-                    return False
-
-            def discard(self, value):
-                self.revs.discard(value)
-                self.set.discard(value)
-
-        wanted = lazywantedset()
-
-    # it might be worthwhile to do this in the iterator if the rev range
-    # is descending and the prune args are all within that range
-    for rev in opts.get(b'prune', ()):
-        rev = repo[rev].rev()
-        ff = _followfilter(repo)
-        stop = min(revs[0], revs[-1])
-        for x in pycompat.xrange(rev, stop - 1, -1):
-            if ff.match(x):
-                wanted = wanted - [x]
-
-    # Now that wanted is correctly initialized, we can iterate over the
-    # revision range, yielding only revisions in wanted.
     def iterate():
-        if follow and match.always():
-            ff = _followfilter(repo, onlyfirst=opts.get(b'follow_first'))
-
-            def want(rev):
-                return ff.match(rev) and rev in wanted
-
-        else:
-
-            def want(rev):
-                return rev in wanted
-
         it = iter(revs)
         stopiteration = False
         for windowsize in increasingwindows():
@@ -2553,28 +2457,10 @@  def walkchangerevs(repo, match, opts, pr
                 if rev is None:
                     stopiteration = True
                     break
-                elif want(rev):
-                    nrevs.append(rev)
+                nrevs.append(rev)
             for rev in sorted(nrevs):
-                fns = fncache.get(rev)
                 ctx = change(rev)
-                if not fns:
-
-                    def fns_generator():
-                        if allfiles:
-
-                            def bad(f, msg):
-                                pass
-
-                            for f in ctx.matches(matchmod.badmatch(match, bad)):
-                                yield f
-                        else:
-                            for f in ctx.files():
-                                if match(f):
-                                    yield f
-
-                    fns = fns_generator()
-                prepare(ctx, scmutil.matchfiles(repo, fns))
+                prepare(ctx, makefilematcher(ctx))
             for rev in nrevs:
                 yield change(rev)
 
diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -3579,7 +3579,6 @@  def grep(ui, repo, pattern, *pats, **opt
 
     skip = set()
     revfiles = {}
-    match = scmutil.match(repo[None], pats, opts)
     found = False
     follow = opts.get(b'follow')
 
@@ -3654,9 +3653,21 @@  def grep(ui, repo, pattern, *pats, **opt
                     if pfn not in matches[parent] and pfn in pctx:
                         grepbody(pfn, parent, readfile(pctx, pfn))
 
+    wopts = logcmdutil.walkopts(
+        pats=pats,
+        opts=opts,
+        revspec=opts[b'rev'],
+        include_pats=opts[b'include'],
+        exclude_pats=opts[b'exclude'],
+        follow=follow,
+        force_changelog_traversal=all_files,
+        filter_revisions_by_pats=not all_files,
+    )
+    revs, makefilematcher = logcmdutil.makewalker(repo, wopts)
+
     ui.pager(b'grep')
     fm = ui.formatter(b'grep', opts)
-    for ctx in cmdutil.walkchangerevs(repo, match, opts, prep):
+    for ctx in cmdutil.walkchangerevs(repo, revs, makefilematcher, prep):
         rev = ctx.rev()
         parent = ctx.p1().rev()
         for fn in sorted(revfiles.get(rev, [])):
diff --git a/tests/test-grep.t b/tests/test-grep.t
--- a/tests/test-grep.t
+++ b/tests/test-grep.t
@@ -990,7 +990,6 @@  follow revision history from wdir:
   1: A add0-cp1, A add0-cp1-mod1, A add0-cp1-mod1-rm3, M add0-mod1, R add0-rm1
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history
  BROKEN: should not abort because of removed file
   $ hg grep --diff -fr'wdir()' data
   add0-cp4-mod4:2147483647:+:data4
@@ -1063,10 +1062,12 @@  follow revision history from multiple re
   1: A add0-cp1, A add0-cp1-mod1, A add0-cp1-mod1-rm3, M add0-mod1, R add0-rm1
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should include the revision 1
   $ hg grep --diff -fr'1+2' data
   add0-cp2-mod2:2:+:data2
   add0-mod2:2:+:data2
+  add0-cp1-mod1:1:+:data1
+  add0-cp1-mod1-rm3:1:+:data1
+  add0-mod1:1:+:data1
   add0:0:+:data0
   add0-mod1:0:+:data0
   add0-mod2:0:+:data0
@@ -1076,7 +1077,6 @@  follow revision history from multiple re
   add0-rm2:0:+:data0
   add0-rm4:0:+:data0
 
- BROKEN: should include the revision 1
   $ hg grep -fr'1+2' data
   add0:2:data0
   add0-cp2:2:data0
@@ -1089,6 +1089,19 @@  follow revision history from multiple re
   add0-mod4:2:data0
   add0-rm1:2:data0
   add0-rm4:2:data0
+  add0:1:data0
+  add0-cp1:1:data0
+  add0-cp1-mod1:1:data0
+  add0-cp1-mod1:1:data1
+  add0-cp1-mod1-rm3:1:data0
+  add0-cp1-mod1-rm3:1:data1
+  add0-mod1:1:data0
+  add0-mod1:1:data1
+  add0-mod2:1:data0
+  add0-mod3:1:data0
+  add0-mod4:1:data0
+  add0-rm2:1:data0
+  add0-rm4:1:data0
   add0:0:data0
   add0-mod1:0:data0
   add0-mod2:0:data0
@@ -1108,11 +1121,9 @@  follow file history from wdir parent, un
   add0-mod3:3:+:data3
   add0-mod3:0:+:data0
 
- BROKEN: should not include the revision 2
   $ hg grep -f data add0-mod3
   add0-mod3:3:data0
   add0-mod3:3:data3
-  add0-mod3:2:data0
   add0-mod3:1:data0
   add0-mod3:0:data0
 
@@ -1124,10 +1135,8 @@  follow file history from wdir parent, mo
   $ hg grep --diff -f data add0-mod4
   add0-mod4:0:+:data0
 
- BROKEN: should not include the revision 2
   $ hg grep -f data add0-mod4
   add0-mod4:3:data0
-  add0-mod4:2:data0
   add0-mod4:1:data0
   add0-mod4:0:data0
 
@@ -1170,7 +1179,7 @@  follow file history from wdir parent, co
   [255]
 
   $ hg grep --diff -f data add0-cp4
-  abort: cannot follow file not in parent revision: "add0-cp4"
+  abort: cannot follow nonexistent file: "add0-cp4"
   [255]
 
  BROKEN: maybe better to abort
@@ -1199,7 +1208,7 @@  follow file history from wdir parent (ex
   [255]
 
   $ hg grep --diff -fr. data add0-cp1-mod1-rm3
-  abort: cannot follow file not in parent revision: "add0-cp1-mod1-rm3"
+  abort: cannot follow file not in any of the specified revisions: "add0-cp1-mod1-rm3"
   [255]
 
  BROKEN: should abort
@@ -1213,14 +1222,13 @@  follow file history from wdir parent, re
   abort: cannot follow file not in parent revision: "add0-rm4"
   [255]
 
- BROKEN: may be okay, but different behavior from "hg log"
   $ hg grep --diff -f data add0-rm4
-  add0-rm4:0:+:data0
+  abort: cannot follow file not in parent revision: "add0-rm4"
+  [255]
 
- BROKEN: should not include the revision 2, and maybe better to abort
+ BROKEN: should abort
   $ hg grep -f data add0-rm4
   add0-rm4:3:data0
-  add0-rm4:2:data0
   add0-rm4:1:data0
   add0-rm4:0:data0
 
@@ -1250,14 +1258,12 @@  follow file history from wdir parent, mu
   add0:0:+:data0
   add0-mod3:0:+:data0
 
- BROKEN: should not include the revision 2
  BROKEN: should follow history across renames
   $ hg grep -f data add0-mod3 add0-cp1-mod1
   add0-cp1-mod1:3:data0
   add0-cp1-mod1:3:data1
   add0-mod3:3:data0
   add0-mod3:3:data3
-  add0-mod3:2:data0
   add0-cp1-mod1:1:data0
   add0-cp1-mod1:1:data1
   add0-mod3:1:data0
@@ -1269,8 +1275,8 @@  follow file history from specified revis
   2: A add0-cp2, A add0-cp2-mod2, M add0-mod2, R add0-rm2
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should include the revision 2
   $ hg grep --diff -fr2 data add0-mod2
+  add0-mod2:2:+:data2
   add0-mod2:0:+:data0
 
   $ hg grep -fr2 data add0-mod2
@@ -1284,10 +1290,8 @@  follow file history from specified revis
   2: A add0-cp2, A add0-cp2-mod2, M add0-mod2, R add0-rm2
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history from the specified revision
   $ hg grep --diff -fr2 data add0-cp2
-  abort: cannot follow file not in parent revision: "add0-cp2"
-  [255]
+  add0:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr2 data add0-cp2
@@ -1299,10 +1303,9 @@  follow file history from specified revis
   2: A add0-cp2, A add0-cp2-mod2, M add0-mod2, R add0-rm2
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history from the specified revision
   $ hg grep --diff -fr2 data add0-cp2-mod2
-  abort: cannot follow file not in parent revision: "add0-cp2-mod2"
-  [255]
+  add0-cp2-mod2:2:+:data2
+  add0:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr2 data add0-cp2-mod2
@@ -1315,9 +1318,9 @@  follow file history from specified revis
   abort: cannot follow file not in any of the specified revisions: "add0-rm2"
   [255]
 
- BROKEN: should abort
   $ hg grep --diff -fr2 data add0-rm2
-  add0-rm2:0:+:data0
+  abort: cannot follow file not in any of the specified revisions: "add0-rm2"
+  [255]
 
  BROKEN: should abort
   $ hg grep -fr2 data add0-rm2
@@ -1329,10 +1332,10 @@  follow file history from specified revis
   2: A add0-cp2, A add0-cp2-mod2, M add0-mod2, R add0-rm2
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history from the specified revision
   $ hg grep --diff -fr2 data add0-cp2 add0-mod2
-  abort: cannot follow file not in parent revision: "add0-cp2"
-  [255]
+  add0-mod2:2:+:data2
+  add0:0:+:data0
+  add0-mod2:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr2 data add0-cp2 add0-mod2
@@ -1366,8 +1369,8 @@  follow file history from wdir, modified:
   2147483647: A add0-cp4, A add0-cp4-mod4, M add0-mod4, R add0-rm4
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should include the changes in wdir
   $ hg grep --diff -fr'wdir()' data add0-mod4
+  add0-mod4:2147483647:+:data4
   add0-mod4:0:+:data0
 
   $ hg grep -fr'wdir()' data add0-mod4
@@ -1383,10 +1386,8 @@  follow file history from wdir, copied bu
   2147483647: A add0-cp4, A add0-cp4-mod4, M add0-mod4, R add0-rm4
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history
   $ hg grep --diff -fr'wdir()' data add0-cp4
-  abort: cannot follow file not in parent revision: "add0-cp4"
-  [255]
+  add0:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr'wdir()' data add0-cp4
@@ -1398,10 +1399,9 @@  follow file history from wdir, copied an
   2147483647: A add0-cp4, A add0-cp4-mod4, M add0-mod4, R add0-rm4
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history
   $ hg grep --diff -fr'wdir()' data add0-cp4-mod4
-  abort: cannot follow file not in parent revision: "add0-cp4-mod4"
-  [255]
+  add0-cp4-mod4:2147483647:+:data4
+  add0:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr'wdir()' data add0-cp4-mod4
@@ -1415,10 +1415,12 @@  follow file history from wdir, multiple 
   3: A add0-cp1-cp3, A add0-cp1-mod1-cp3-mod3, R add0-cp1-mod1-rm3, M add0-mod3
   0: A add0, A add0-mod1, A add0-mod2, A add0-mod3, A add0-mod4, A add0-rm1, A add0-rm2, A add0-rm4
 
- BROKEN: should follow history
   $ hg grep --diff -fr'wdir()' data add0-cp4 add0-mod4 add0-mod3
-  abort: cannot follow file not in parent revision: "add0-cp4"
-  [255]
+  add0-mod4:2147483647:+:data4
+  add0-mod3:3:+:data3
+  add0:0:+:data0
+  add0-mod3:0:+:data0
+  add0-mod4:0:+:data0
 
  BROKEN: should follow history across renames
   $ hg grep -fr'wdir()' data add0-cp4 add0-mod4 add0-mod3