Patchwork [15,of,21,V2] speedy: add support for arbitrary file patterns in path query

login
register
mail settings
Submitter Tomasz Kleczek
Date Dec. 14, 2012, 2:52 a.m.
Message ID <2fd469f383645ddd6bdf.1355453547@dev408.prn1.facebook.com>
Download mbox | patch
Permalink /patch/97/
State Deferred, archived
Headers show

Comments

Tomasz Kleczek - Dec. 14, 2012, 2:52 a.m.
# HG changeset patch
# User Tomasz Kleczek <tkleczek at fb.com>
# Date 1355422376 28800
# Node ID 2fd469f383645ddd6bdf96b412f5b5f5bc5d6606
# Parent  fd47c0de8d33a0a9106d8bbdc71bd66e3db8764e
speedy: add support for arbitrary file patterns in path query

This change speeds up commands such as:

log "glob:*"
log "relglob:*.py"
hg log d2 --include "**.py"

Only the fileset patterns are still not supported server-side.

Patch

diff --git a/hgext/speedy/client.py b/hgext/speedy/client.py
--- a/hgext/speedy/client.py
+++ b/hgext/speedy/client.py
@@ -91,18 +91,26 @@ 
         `rev` is in fncache then fncache[rev] is a list with filenames
         this rev modifies that are matching according to `match`.
 
-        If match containts only 'path' or 'relpath' patterns the query is sent
+        If match doesn't contain any fileset expressions, the query is sent
         to the server, otherwise it is performed locally as server doesn't
-        support arbitrary patterns just yet.
+        support fileset expressions.
         """
-        anynonpaths = bool(filter(lambda (k, v): k != 'path', match._pats))
-        if match._includepats or match._excludepats or anynonpaths:
-            # For now, server supports only literal paths and not arbitrary
-            # patterns, fall back to the local query
+        allpats = match._pats + match._includepats + match._excludepats
+        anyfilesetexp = bool(filter(lambda (k, v): k == 'set', allpats))
+        if anyfilesetexp:
+            # Server doesn't support filesets, fall back to the local
+            # fileset query
             return cmdutil.filterrevs(self._repo, list(self._repo), match)
         else:
-            paths = [v for k, v in match._pats]
-            wanted = self._proxy.request('path', (paths,))
+            # Would like to send the match object over the network, but
+            # it is not possible as it contains functions and working
+            # dir context object. Send all parameters used to init the
+            # object in a dict instead and create the object server-side.
+            # match._ctx field is used only if one of the patterns to match is
+            # a 'set:' pattern. Therefore we do not loose any information here.
+            matchdict = dict(patterns=match._pats, include=match._includepats,
+                    exclude=match._excludepats)
+            wanted = self._proxy.request('path', (matchdict,))
             return set(nodestorevs(self._repo, wanted)), {}
 
 def _patchedauthor(metapeer, repo, subset, pats):
diff --git a/hgext/speedy/index.py b/hgext/speedy/index.py
--- a/hgext/speedy/index.py
+++ b/hgext/speedy/index.py
@@ -53,3 +53,13 @@ 
         for path in paths:
             filechgs.setdefault(path, []).append(ctx.node())
     return filechgs
+
+def makefiles(ctxs):
+    """Return the `files` index.
+
+    `files` is keyed by file name, with each value being an empty string
+    """
+    files = set()
+    for ctx in ctxs:
+        files.update(ctx.files())
+    return dict([(fn, '') for fn in files])
diff --git a/hgext/speedy/server.py b/hgext/speedy/server.py
--- a/hgext/speedy/server.py
+++ b/hgext/speedy/server.py
@@ -14,6 +14,7 @@ 
 from mercurial import cmdutil
 from mercurial.i18n import _
 from mercurial import util
+from mercurial import match as matchmod
 import index
 import protocol
 import tcptransport
@@ -59,10 +60,44 @@ 
         return [node for node, date in self.chgdate.iteritems()
             if matcher(date)]
 
-    def path(self, paths):
-        """Return a list of changesets that modify any of the paths.
+    def path(self, matchargs):
+        """Return a list of changesets that modify the specified paths.
 
-        Only the changes present in `subset` are returned.
+        matchargs: a dict that may contain the following parameters:
+            'patterns' - a pattern list
+            'include' - a pattern list describing additional paths
+                to include
+            'exclude' - a pattern list describing additional paths
+                to exclude
+
+        A pattern list is a an iterable with pairs of (kind, pattern),
+                kind may be any pattern kind recognized by match.match
+                constructor except for 'relpath' and 'set'.
+
+        If all patterns are literal paths, we can compute answer very
+        fast using `filechgs` index (see _literalpath method). Otherwise,
+        we have to fall back to an exhaustive search (see _patternpath
+        method).
+        """
+        pats = matchargs.get('patterns', [])
+        include = matchargs.get('include', [])
+        exclude = matchargs.get('exclude', [])
+        kinds = [k for k, v in pats]
+        if not include and not exclude and kinds == ['path'] * len(kinds):
+            paths = [ v for k, v in pats ]
+            wanted = self._literalpath(paths)
+        else:
+            def patsconvert(pats):
+                return[':'.join(p) for p in pats]
+            patterns = patsconvert(pats)
+            match = matchmod.match(self.repo.root, self.repo.root,
+                    patterns, include=patsconvert(include),
+                    exclude=patsconvert(exclude))
+            wanted = self._patternpath(match)
+        return wanted
+
+    def _literalpath(self, paths):
+        """Return a list of changesets touching any of the paths.
 
         Uses `filechgs` index which provides the mapping from paths
         (files and directories) to a list of changes touching this path.
@@ -74,10 +109,23 @@ 
             nodes.update(newnodes)
         return list(nodes)
 
+    def _patternpath(self, match):
+        """Return a list of changesets matching given files.
+
+        match: a callable that defines which files are relevant.
+               File is relevant if match(filename) == True.
+
+        Slow compared to _literalpath since it iterates through all filenames
+        in the repository history.
+        """
+        matchingfiles = filter(match, self.files.keys())
+        return self._literalpath(matchingfiles)
+
 indicecfg = {
     'userchgs': index.makeuserchgs,
     'chgdate': index.makechgdate,
     'filechgs': index.makefilechgs,
+    'files': index.makefiles,
 }
 
 def makeserver(repo):
diff --git a/tests/test-speedy.t b/tests/test-speedy.t
--- a/tests/test-speedy.t
+++ b/tests/test-speedy.t
@@ -272,10 +272,10 @@ 
   $ hg log --rev "date(10/20/2012) & user(testuser2)"
   chg1
 
-  $ cat >> $TESTTMP/localrepo/.hg/hgrc <<EOF_END
-  > [speedy]
-  > client = False
-  > EOF_END
+  $ hg log "glob:d2/*" --exclude "**.py"
+  chgx
+  chgpushed
+  chg4
 
   $ cd $TESTTMP/serverrepo