Patchwork [6,of,6] grep: move prep() to grepsearcher class

login
register
mail settings
Submitter Yuya Nishihara
Date Oct. 7, 2020, 12:56 p.m.
Message ID <6026fbf740f499daabc0.1602075365@mimosa>
Download mbox | patch
Permalink /patch/47404/
State Accepted
Headers show

Comments

Yuya Nishihara - Oct. 7, 2020, 12:56 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1599635906 -32400
#      Wed Sep 09 16:18:26 2020 +0900
# Node ID 6026fbf740f499daabc0c97ae9b698d66ec4960a
# Parent  3bc6728f3ab93a4b2831c706052bbd6fe7090b23
grep: move prep() to grepsearcher class

Patch

diff --git a/mercurial/commands.py b/mercurial/commands.py
--- a/mercurial/commands.py
+++ b/mercurial/commands.py
@@ -45,7 +45,6 @@  from . import (
     help,
     hg,
     logcmdutil,
-    match as matchmod,
     merge as mergemod,
     mergestate as mergestatemod,
     narrowspec,
@@ -3371,6 +3370,7 @@  def grep(ui, repo, pattern, *pats, **opt
     """
     opts = pycompat.byteskwargs(opts)
     diff = opts.get(b'all') or opts.get(b'diff')
+    follow = opts.get(b'follow')
     if diff and opts.get(b'all_files'):
         raise error.Abort(_(b'--diff and --all-files are mutually exclusive'))
     if opts.get(b'all_files') is None and not diff:
@@ -3398,7 +3398,9 @@  def grep(ui, repo, pattern, *pats, **opt
     if opts.get(b'print0'):
         sep = eol = b'\0'
 
-    searcher = grepmod.grepsearcher(ui, repo, regexp)
+    searcher = grepmod.grepsearcher(
+        ui, repo, regexp, all_files=all_files, diff=diff, follow=follow
+    )
 
     getfile = searcher._getfile
     matches = searcher._matches
@@ -3515,58 +3517,6 @@  def grep(ui, repo, pattern, *pats, **opt
     skip = searcher._skip
     revfiles = searcher._revfiles
     found = False
-    follow = opts.get(b'follow')
-
-    getrenamed = searcher._getrenamed
-
-    def prep(ctx, fmatch):
-        rev = ctx.rev()
-        pctx = ctx.p1()
-        matches.setdefault(rev, {})
-        if diff:
-            parent = pctx.rev()
-            matches.setdefault(parent, {})
-        files = revfiles.setdefault(rev, [])
-        if rev is None:
-            # in `hg grep pattern`, 2/3 of the time is spent is spent in
-            # pathauditor checks without this in mozilla-central
-            contextmanager = repo.wvfs.audit.cached
-        else:
-            contextmanager = util.nullcontextmanager
-        with contextmanager():
-            # TODO: maybe better to warn missing files?
-            if all_files:
-                fmatch = matchmod.badmatch(fmatch, lambda f, msg: None)
-                filenames = ctx.matches(fmatch)
-            else:
-                filenames = (f for f in ctx.files() if fmatch(f))
-            for fn in filenames:
-                # fn might not exist in the revision (could be a file removed by
-                # the revision). We could check `fn not in ctx` even when rev is
-                # None, but it's less racy to protect againt that in readfile.
-                if rev is not None and fn not in ctx:
-                    continue
-
-                copy = None
-                if follow:
-                    copy = getrenamed(fn, rev)
-                    if copy:
-                        copies.setdefault(rev, {})[fn] = copy
-                        if fn in skip:
-                            skip.add(copy)
-                if fn in skip:
-                    continue
-                files.append(fn)
-
-                if fn not in matches[rev]:
-                    searcher._grepbody(fn, rev, searcher._readfile(ctx, fn))
-
-                if diff:
-                    pfn = copy or fn
-                    if pfn not in matches[parent] and pfn in pctx:
-                        searcher._grepbody(
-                            pfn, parent, searcher._readfile(pctx, pfn)
-                        )
 
     wopts = logcmdutil.walkopts(
         pats=pats,
@@ -3582,7 +3532,9 @@  def grep(ui, repo, pattern, *pats, **opt
 
     ui.pager(b'grep')
     fm = ui.formatter(b'grep', opts)
-    for ctx in cmdutil.walkchangerevs(repo, revs, makefilematcher, prep):
+    for ctx in cmdutil.walkchangerevs(
+        repo, revs, makefilematcher, searcher._prep
+    ):
         rev = ctx.rev()
         parent = ctx.p1().rev()
         for fn in sorted(revfiles.get(rev, [])):
diff --git a/mercurial/grep.py b/mercurial/grep.py
--- a/mercurial/grep.py
+++ b/mercurial/grep.py
@@ -14,6 +14,7 @@  from .i18n import _
 
 from . import (
     error,
+    match as matchmod,
     pycompat,
     scmutil,
     util,
@@ -80,12 +81,23 @@  def difflinestates(a, b):
 
 
 class grepsearcher(object):
-    """Search files and revisions for lines matching the given pattern"""
+    """Search files and revisions for lines matching the given pattern
 
-    def __init__(self, ui, repo, regexp):
+    Options:
+    - all_files to search unchanged files at that revision.
+    - diff to search files in the parent revision so diffs can be generated.
+    - follow to skip files across copies and renames.
+    """
+
+    def __init__(
+        self, ui, repo, regexp, all_files=False, diff=False, follow=False
+    ):
         self._ui = ui
         self._repo = repo
         self._regexp = regexp
+        self._all_files = all_files
+        self._diff = diff
+        self._follow = follow
 
         self._getfile = util.lrucachefunc(repo.file)
         self._getrenamed = scmutil.getrenamedfn(repo)
@@ -127,3 +139,50 @@  class grepsearcher(object):
                     )
                     % {b'filename': fn, b'revnum': pycompat.bytestr(rev)}
                 )
+
+    def _prep(self, ctx, fmatch):
+        rev = ctx.rev()
+        pctx = ctx.p1()
+        self._matches.setdefault(rev, {})
+        if self._diff:
+            parent = pctx.rev()
+            self._matches.setdefault(parent, {})
+        files = self._revfiles.setdefault(rev, [])
+        if rev is None:
+            # in `hg grep pattern`, 2/3 of the time is spent is spent in
+            # pathauditor checks without this in mozilla-central
+            contextmanager = self._repo.wvfs.audit.cached
+        else:
+            contextmanager = util.nullcontextmanager
+        with contextmanager():
+            # TODO: maybe better to warn missing files?
+            if self._all_files:
+                fmatch = matchmod.badmatch(fmatch, lambda f, msg: None)
+                filenames = ctx.matches(fmatch)
+            else:
+                filenames = (f for f in ctx.files() if fmatch(f))
+            for fn in filenames:
+                # fn might not exist in the revision (could be a file removed by
+                # the revision). We could check `fn not in ctx` even when rev is
+                # None, but it's less racy to protect againt that in readfile.
+                if rev is not None and fn not in ctx:
+                    continue
+
+                copy = None
+                if self._follow:
+                    copy = self._getrenamed(fn, rev)
+                    if copy:
+                        self._copies.setdefault(rev, {})[fn] = copy
+                        if fn in self._skip:
+                            self._skip.add(copy)
+                if fn in self._skip:
+                    continue
+                files.append(fn)
+
+                if fn not in self._matches[rev]:
+                    self._grepbody(fn, rev, self._readfile(ctx, fn))
+
+                if self._diff:
+                    pfn = copy or fn
+                    if pfn not in self._matches[parent] and pfn in pctx:
+                        self._grepbody(pfn, parent, self._readfile(pctx, pfn))