Patchwork [14,of,21,V2] speedy: add support for log directory queries

login
register
mail settings
Submitter Tomasz Kleczek
Date Dec. 14, 2012, 2:52 a.m.
Message ID <fd47c0de8d33a0a9106d.1355453546@dev408.prn1.facebook.com>
Download mbox | patch
Permalink /patch/96/
State Deferred, archived
Headers show

Comments

Tomasz Kleczek - Dec. 14, 2012, 2:52 a.m.
# HG changeset patch
# User Tomasz Kleczek <tkleczek at fb.com>
# Date 1355334882 28800
# Node ID fd47c0de8d33a0a9106d8bbdc71bd66e3db8764e
# Parent  45188f6b0235c8840e2931c7ae3ec9939bcb172d
speedy: add support for log directory queries

Introduces a new `filechgs` index and a `paths` query that uses it to speed
up log commands such as these:
* log .
* log dir1 dir2

`filechgs` is a table keyed by path (either a file or
a directory path) with each value being a list of all commits
touching this path (for directories it is a list of all commits
touching any of files withing this directory or any of its
subdirectories).

`paths` query takes a number of literal paths (support for the other file
patterns will be added in the subsequent patches) and returns a list of
node ids of changes that modifies any for these paths.

Patch

diff --git a/hgext/speedy/client.py b/hgext/speedy/client.py
--- a/hgext/speedy/client.py
+++ b/hgext/speedy/client.py
@@ -81,6 +81,30 @@ 
         resp = self._proxy.request('date', (x,))
         return nodestorevs(self._repo, resp)
 
+    def path(self, match):
+        """Perform a `path` query.
+
+        match: a match.match instance that defines which files are matching
+
+        Returns a pair of (wanted, fncache) where `wanted` is a set of
+        matching revisions and `fncache` is a dict such that if
+        `rev` is in fncache then fncache[rev] is a list with filenames
+        this rev modifies that are matching according to `match`.
+
+        If match containts only 'path' or 'relpath' patterns the query is sent
+        to the server, otherwise it is performed locally as server doesn't
+        support arbitrary patterns just yet.
+        """
+        anynonpaths = bool(filter(lambda (k, v): k != 'path', match._pats))
+        if match._includepats or match._excludepats or anynonpaths:
+            # For now, server supports only literal paths and not arbitrary
+            # patterns, fall back to the local query
+            return cmdutil.filterrevs(self._repo, list(self._repo), match)
+        else:
+            paths = [v for k, v in match._pats]
+            wanted = self._proxy.request('path', (paths,))
+            return set(nodestorevs(self._repo, wanted)), {}
+
 def _patchedauthor(metapeer, repo, subset, pats):
     """Return the revisions commited by matching users.
 
@@ -116,6 +140,15 @@ 
     ds = revset.getstring(d, _("date requires a string"))
     return _patcheddate(metapeer, repo, subset, ds)
 
+def patchedfilterrevs(metapeer, repo, revs, match):
+    """Used to monkey patch cmdutil.filterrevs function."""
+    wanted, fncache = metapeer.path(match)
+    lrevs = metapeer.localrevs()
+    lwanted, lfncache = cmdutil.filterrevs(repo, lrevs, match)
+    wanted.update(lwanted)
+    fncache.update(lfncache)
+    return (wanted & set(revs)), fncache
+
 def patchedfilterrevsopts(metapeer, repo, revs, opts):
     users = opts.get('user')
     date = opts.get('date')
@@ -159,6 +192,7 @@ 
     revset.symbols['author'] = wrapwithpeer(patchedauthor, mpeer)
     revset.symbols['date'] = wrapwithpeer(patcheddate, mpeer)
     cmdutil.walkchangerevshooks['filterrevsopts'] = wrapwithpeer(patchedfilterrevsopts, mpeer)
+    cmdutil.walkchangerevshooks['filterrevs'] = wrapwithpeer(patchedfilterrevs, mpeer)
 
 def uisetup(ui):
     # Perform patching and most of the initialization inside log wrapper,
diff --git a/hgext/speedy/index.py b/hgext/speedy/index.py
--- a/hgext/speedy/index.py
+++ b/hgext/speedy/index.py
@@ -32,3 +32,24 @@ 
         of that change.
     """
     return dict([(ctx.node(), ctx.date()[0]) for ctx in ctxs])
+
+
+def makefilechgs(ctxs):
+    """Return the `filechgs` index.
+
+    `filechgs` is keyed by paths (files or directories), with each value
+        being a list of commits touching this path (for direcories it is
+        a list of all commits touching any of files within this directory
+        and its subdirectiories).
+    """
+    filechgs = {}
+    for ctx in ctxs:
+        paths = set()
+        paths.add('')
+        for fn in ctx.files():
+            dirs = fn.split('/')
+            for i in range(len(dirs)):
+                paths.add('/'.join(dirs[0:i+1]))
+        for path in paths:
+            filechgs.setdefault(path, []).append(ctx.node())
+    return filechgs
diff --git a/hgext/speedy/server.py b/hgext/speedy/server.py
--- a/hgext/speedy/server.py
+++ b/hgext/speedy/server.py
@@ -59,9 +59,25 @@ 
         return [node for node, date in self.chgdate.iteritems()
             if matcher(date)]
 
+    def path(self, paths):
+        """Return a list of changesets that modify any of the paths.
+
+        Only the changes present in `subset` are returned.
+
+        Uses `filechgs` index which provides the mapping from paths
+        (files and directories) to a list of changes touching this path.
+        All that is left to do is to merge the relevant lists.
+        """
+        nodes = set()
+        for path in paths:
+            newnodes = self.filechgs.get(path, [])
+            nodes.update(newnodes)
+        return list(nodes)
+
 indicecfg = {
     'userchgs': index.makeuserchgs,
     'chgdate': index.makechgdate,
+    'filechgs': index.makefilechgs,
 }
 
 def makeserver(repo):
diff --git a/tests/test-speedy.t b/tests/test-speedy.t
--- a/tests/test-speedy.t
+++ b/tests/test-speedy.t
@@ -154,6 +154,85 @@ 
   chg2
   chg0
 
+  $ hg log .
+  chg10
+  chg9
+  chg8
+  chgx
+  chgl6
+  chgpushed
+  chg5
+  chg4
+  chg3
+  chg2
+  chg1
+  chg0
+
+  $ hg log d1 d2
+  chg9
+  chg8
+  chgx
+  chgl6
+  chgpushed
+  chg4
+  chg3
+  chg2
+  chg1
+  chg0
+
+  $ hg log d1
+  chg8
+  chgl6
+  chg2
+  chg1
+  chg0
+
+  $ cd d2
+
+  $ hg log .
+  chg9
+  chgx
+  chgpushed
+  chg4
+  chg3
+
+  $ hg log "glob:*"
+  chg9
+  chgx
+  chgpushed
+  chg4
+  chg3
+
+  $ hg log "relglob:*.py"
+  chg9
+  chg5
+  chg3
+
+  $ cd ..
+
+  $ hg log "set:**.py - *.py"
+  chg9
+  chg3
+
+  $ hg log d2 --include "**.py"
+  chg9
+  chg3
+
+  $ hg log d3_s
+
+  $ hg log -u testuser1 d1
+  chg8
+  chgl6
+  chg2
+  chg0
+
+  $ hg log --rev "date(10/20/2012)" "glob:*"
+  chg5
+
+  $ hg log non-existent-dir
+
+  $ cd ..
+
 Testing socket server
 
 Writing server config file