Patchwork [05,of,21,V2] speedy: create an index used to speed up 'author' revset query

login
register
mail settings
Submitter Tomasz Kleczek
Date Dec. 14, 2012, 2:52 a.m.
Message ID <859126a36e9173e5f637.1355453537@dev408.prn1.facebook.com>
Download mbox | patch
Permalink /patch/87/
State Deferred, archived
Headers show

Comments

Tomasz Kleczek - Dec. 14, 2012, 2:52 a.m.
# HG changeset patch
# User Tomasz Kleczek <tkleczek at fb.com>
# Date 1355383081 28800
# Node ID 859126a36e9173e5f6370ddb7b8bc14fc82ef682
# Parent  81a3c4c0455396f66dc0bf5651b079a375c003fe
speedy: create an index used to speed up 'author' revset query

To accelerate history queries server builds data structures in form
of key-value mappings which are referred to as `indices`.

To simplify the design we assume that the indices are read-only during
the server operation. The server update is meant to be updated in these steps:
1. stop the serverrepo server
2. stop the history server
3. pull new changes into the serverrepo
4. run the explicit history server update command or let it be performed
   implicitly during the server startup
5. start the history server
6. start the serverrepo server

For now, indices are created from scratch on each server initialization
and are stored as python dicts in memory. Therefore no update mechanism
is needed at the moment. Persistent storage and update mechanisms will
be added in the subsequent patches.

This change introduces an index that will be used to speed up the
`author` query. It is a table keyed by username, with
each value being a list of every node committed by that user.

Patch

diff --git a/hgext/speedy/client.py b/hgext/speedy/client.py
--- a/hgext/speedy/client.py
+++ b/hgext/speedy/client.py
@@ -9,6 +9,7 @@ 
 from mercurial import localrepo
 from mercurial.i18n import _
 import server
+import localtransport
 
 def nodestorevs(repo, nodes):
     return [repo[n].rev() for n in nodes if repo.changelog.hasnode(n)]
@@ -22,7 +23,7 @@ 
     The tuple is: (last common node, list of local changesets' node ids).
     """
     # Temporary setting quiet to get rid of 'search for changes message'
-    # string printed to output
+    # string printed to stdout
     oldquiet = repo.ui.quiet
     repo.ui.quiet = True
     try:
@@ -47,14 +48,15 @@ 
     """Class that encapsulates communication details with the metadata server.
 
     Its responsibilities:
-        - delegating the query to the server
+        - delegating the query to the proxy object that encapsulates
+          low level client/server communication details
         - translating node ids from server to revision numbers
         - computing and caching a list of local revs
     """
 
-    def __init__(self, server, repo, serverrepopath):
+    def __init__(self, proxy, repo, serverrepopath):
         self.serverrepopath = serverrepopath
-        self._server = server
+        self._proxy = proxy
         self._repo = repo
 
     @util.propertycache
@@ -69,7 +71,7 @@ 
         return self._localrevs
 
     def author(self, x):
-        resp = self._server.author(x)
+        resp = self._proxy.request('author', (x,))
         return nodestorevs(self._repo, resp)
 
 def patchedauthor(metapeer, repo, subset, x):
@@ -88,11 +90,13 @@ 
 
 def _speedysetup(ui, repo):
     """Initialize speedy client."""
+
     serverrepopath = ui.config('speedy', 'serverrepo', repo.root)
     serverrepo = localrepo.localrepository(ui, path=serverrepopath)
     mserver = server.makeserver(serverrepo)
+    proxy = localtransport.localclient(mserver)
 
-    mpeer = metapeer(mserver, repo, serverrepopath)
+    mpeer = metapeer(proxy, repo, serverrepopath)
 
     def wrapwithpeer(fun, peer):
         def wrapper(*args, **kwargs):
diff --git a/hgext/speedy/index.py b/hgext/speedy/index.py
new file mode 100644
--- /dev/null
+++ b/hgext/speedy/index.py
@@ -0,0 +1,25 @@ 
+# Copyright 2012 Facebook
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+"""Index definitions.
+
+Indices are data structures in the form of key-value mappings used
+by the server to speed up computations.
+
+Each index has a corresponding maker function that takes as parameter
+an iterable with change contexs from which the index is to be created
+and returns the index in a form of a dict.
+"""
+
+def makeuserchgs(ctxs):
+    """Return the `userchgs` index.
+
+    `userchgs` is keyed by username, with each value being a list
+        of changes commited by that user.
+    """
+    newentries = {}
+    for ctx in ctxs:
+        newentries.setdefault(ctx.user(), []).append(ctx.node())
+    return newentries
diff --git a/hgext/speedy/server.py b/hgext/speedy/server.py
--- a/hgext/speedy/server.py
+++ b/hgext/speedy/server.py
@@ -9,12 +9,16 @@ 
 """
 
 from mercurial import revset
+from mercurial import encoding
+import index
 
 class metaserver(object):
     """Contains all the logic behind the query acceleration."""
 
-    def __init__(self, repo):
+    def __init__(self, repo, indices):
         self.repo = repo
+        for name, idx in indices.iteritems():
+            setattr(self, name, idx)
 
     def author(self, pat):
         """Return a list of changes commited by a user that matches pattern.
@@ -24,10 +28,24 @@ 
 
         Returns a list of node ids.
         """
-        # This is going to be accelerated in the subsequent patches
-        revs = revset.author(self.repo, list(self.repo), ('symbol', pat))
-        return [self.repo[r].node() for r in revs]
+        pat = encoding.lower(pat)
+        kind, pattern, matcher = revset._substringmatcher(pat)
+        nodecands = []
+        for user, l in self.userchgs.iteritems():
+            if matcher(encoding.lower(user)):
+                nodecands.extend(l)
+        return nodecands
+
+indicecfg = {
+    'userchgs': index.makeuserchgs,
+}
 
 def makeserver(repo):
-    """Return an initialized metaserver instance."""
-    return metaserver(repo)
+    """Return an initialized metaserver instance.
+
+    Update the indices to the most recent revision along the way.
+    """
+    ctxs = [repo[r] for r in xrange(0, len(repo))]
+    indices = dict([(name, create(ctxs)) for name, create in
+        indicecfg.iteritems()])
+    return metaserver(repo, indices)