Patchwork [05,of,15] speedy: create an index used to speed up 'author' revset query

login
register
mail settings
Submitter Tomasz Kleczek
Date Dec. 11, 2012, 6:38 p.m.
Message ID <6013a399d9b4cad8578b.1355251100@dev408.prn1.facebook.com>
Download mbox | patch
Permalink /patch/52/
State Superseded
Headers show

Comments

Tomasz Kleczek - Dec. 11, 2012, 6:38 p.m.
# HG changeset patch
# User Tomasz Kleczek <tkleczek at fb.com>
# Date 1355246587 28800
# Branch stable
# Node ID 6013a399d9b4cad8578b587084ed0eb6a8080f5b
# Parent  e9e1263a5d57cb07fb2f0176163325bd185a3666
speedy: create an index used to speed up 'author' revset query

To accelerate history queries server builds data structures in form
of key-value mappings which are referred to as `indices`.

To simplify the desin we assume that the indices are read-only during
the server operation. The server update is meant to be updated in these steps:
1. stop the serverrepo server
1. stop the history server
2. pull new changes into the serverrepo
3. run the explicit history server update command or let it be performed
   implicitly during the server startup
4. start the history server
5. start the serverrepo server

For now, indices are created from scratch on each server initialization
and are stored as python dicts in memory. Therefore no update mechanism
is needed at the moment. Persistent storage and update mechanisms will
be added in the subsequent patches.

This change introduces an index that will be used to speed up the
`author` query. It is a table keyed by username, with
each value being a list of every node committed by that user.
Augie Fackler - Dec. 13, 2012, 3:28 a.m.
On Dec 11, 2012, at 12:38 PM, Tomasz Kleczek <tkleczek at fb.com> wrote:

> # HG changeset patch
> # User Tomasz Kleczek <tkleczek at fb.com>
> # Date 1355246587 28800
> # Branch stable
> # Node ID 6013a399d9b4cad8578b587084ed0eb6a8080f5b
> # Parent  e9e1263a5d57cb07fb2f0176163325bd185a3666
> speedy: create an index used to speed up 'author' revset query
> 
> To accelerate history queries server builds data structures in form
> of key-value mappings which are referred to as `indices`.
> 
> To simplify the desin we assume that the indices are read-only during

typo: design

> the server operation. The server update is meant to be updated in these steps:
> 1. stop the serverrepo server
> 1. stop the history server
> 2. pull new changes into the serverrepo

[...]

> --- /dev/null
> +++ b/hgext/speedy/index.py
> @@ -0,0 +1,24 @@
> +# Copyright 2012 Facebook
> +#
> +# This software may be used and distributed according to the terms of the
> +# GNU General Public License version 2 or any later version.
> +
> +"""Indice definitions.

Index

> +
> +Indices are data structures in the form of key-value mappings used
> +by the server to speed up computations.
> +"""
> +
> +def makeuserchgs(ctxs):
> +    """Return the `userchgs` index in the form of a dict
> +
> +    `userchgs` is keyed by username, with each value being a list
> +        of changes commited by that user.
> +
> +    ctxs: an iterable with change contexts from which the index is to
> +        be created.
> +    """
> +    newentries = {}
> +    for ctx in ctxs:
> +        newentries.setdefault(ctx.user(), []).append(ctx.node())
> +    return newentries
> diff --git a/hgext/speedy/server.py b/hgext/speedy/server.py
> --- a/hgext/speedy/server.py
> +++ b/hgext/speedy/server.py
> @@ -9,12 +9,16 @@
> """
> 
> from mercurial import revset
> +from mercurial import encoding
> +import index
> 
> class metaserver(object):
>     """Contains all the logic behind the query acceleration."""
> 
> -    def __init__(self, repo):
> +    def __init__(self, repo, indices):
>         self.repo = repo
> +        for name, idx in indices.iteritems():
> +            setattr(self, name, idx)
> 
>     def author(self, pat):
>         """Return a list of changes commited by a user that matches pattern.
> @@ -24,10 +28,24 @@
> 
>         Returns a list of node ids.
>         """
> -        # This is going to be accelerated in the subsequent patches
> -        revs = revset.author(self.repo, list(self.repo), ('symbol', pat))
> -        return [self.repo[r].node() for r in revs]
> +        pat = encoding.lower(pat)
> +        kind, pattern, matcher = revset._substringmatcher(pat)
> +        nodecands = []
> +        for user, l in self.userchgs.iteritems():
> +            if matcher(encoding.lower(user)):
> +                nodecands.extend(l)
> +        return nodecands
> +
> +indicecfg = {
> +    'userchgs': index.makeuserchgs,
> +}
> 
> def makeserver(repo):
> -    """Return an initialized metaserver instance."""
> -    return metaserver(repo)
> +    """Return an initialized metaserver instance.
> +
> +    Update the indices to the most recent revision along the way.
> +    """
> +    ctxs = [repo[r] for r in xrange(0, len(repo))]
> +    indices = dict([(name, create(ctxs)) for name, create in
> +        indicecfg.iteritems()])
> +    return metaserver(repo, indices)
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel at selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel

Patch

diff --git a/hgext/speedy/client.py b/hgext/speedy/client.py
--- a/hgext/speedy/client.py
+++ b/hgext/speedy/client.py
@@ -9,6 +9,7 @@ 
 from mercurial import localrepo
 from mercurial.i18n import _
 import server
+import localtransport
 
 def nodestorevs(repo, nodes):
     return [repo[n].rev() for n in nodes if repo.changelog.hasnode(n)]
@@ -47,19 +48,20 @@ 
     """Class that encapsulates communication details with the metadata server.
 
     Its responsibilities:
-        - delegating the query to the server
+        - delegating the query to the proxy object that encapsulates
+          low level client/server communication details
         - translating node ids from server to revision numbers
         - computing and caching a list of local revs
     """
 
-    def __init__(self, server, repo, serverrepopath):
-        self.serverrepopath = serverrepopath
-        self._server = server
+    def __init__(self, proxy, repo, proxyrepopath):
+        self.proxyrepopath = proxyrepopath
+        self._proxy = proxy
         self._repo = repo
 
     @util.propertycache
     def _localrevs(self):
-        remotepeer = hg.peer(self._repo, {}, self.serverrepopath)
+        remotepeer = hg.peer(self._repo, {}, self.proxyrepopath)
         lastcommonnode, localnodes = exactlocalnodes(self._repo, remotepeer)
         localrevs = nodestorevs(self._repo, localnodes)
         return localrevs
@@ -69,7 +71,7 @@ 
         return self._localrevs
 
     def author(self, x):
-        resp = self._server.author(x)
+        resp = self._proxy.request('author', (x,))
         return nodestorevs(self._repo, resp)
 
 def patchedauthor(metapeer, repo, subset, x):
@@ -88,11 +90,13 @@ 
 
 def _speedysetup(ui, repo):
     """Initialize speedy client."""
+
     serverrepopath = ui.config('speedy', 'serverrepo', repo.root)
     serverrepo = localrepo.localrepository(ui, path=serverrepopath)
     mserver = server.makeserver(serverrepo)
+    proxy = localtransport.localclient(mserver)
 
-    mpeer = metapeer(mserver, repo, serverrepopath)
+    mpeer = metapeer(proxy, repo, serverrepopath)
 
     def wrapwithpeer(fun, peer):
         def wrapper(*args, **kwargs):
diff --git a/hgext/speedy/index.py b/hgext/speedy/index.py
new file mode 100644
--- /dev/null
+++ b/hgext/speedy/index.py
@@ -0,0 +1,24 @@ 
+# Copyright 2012 Facebook
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+"""Indice definitions.
+
+Indices are data structures in the form of key-value mappings used
+by the server to speed up computations.
+"""
+
+def makeuserchgs(ctxs):
+    """Return the `userchgs` index in the form of a dict
+
+    `userchgs` is keyed by username, with each value being a list
+        of changes commited by that user.
+
+    ctxs: an iterable with change contexts from which the index is to
+        be created.
+    """
+    newentries = {}
+    for ctx in ctxs:
+        newentries.setdefault(ctx.user(), []).append(ctx.node())
+    return newentries
diff --git a/hgext/speedy/server.py b/hgext/speedy/server.py
--- a/hgext/speedy/server.py
+++ b/hgext/speedy/server.py
@@ -9,12 +9,16 @@ 
 """
 
 from mercurial import revset
+from mercurial import encoding
+import index
 
 class metaserver(object):
     """Contains all the logic behind the query acceleration."""
 
-    def __init__(self, repo):
+    def __init__(self, repo, indices):
         self.repo = repo
+        for name, idx in indices.iteritems():
+            setattr(self, name, idx)
 
     def author(self, pat):
         """Return a list of changes commited by a user that matches pattern.
@@ -24,10 +28,24 @@ 
 
         Returns a list of node ids.
         """
-        # This is going to be accelerated in the subsequent patches
-        revs = revset.author(self.repo, list(self.repo), ('symbol', pat))
-        return [self.repo[r].node() for r in revs]
+        pat = encoding.lower(pat)
+        kind, pattern, matcher = revset._substringmatcher(pat)
+        nodecands = []
+        for user, l in self.userchgs.iteritems():
+            if matcher(encoding.lower(user)):
+                nodecands.extend(l)
+        return nodecands
+
+indicecfg = {
+    'userchgs': index.makeuserchgs,
+}
 
 def makeserver(repo):
-    """Return an initialized metaserver instance."""
-    return metaserver(repo)
+    """Return an initialized metaserver instance.
+
+    Update the indices to the most recent revision along the way.
+    """
+    ctxs = [repo[r] for r in xrange(0, len(repo))]
+    indices = dict([(name, create(ctxs)) for name, create in
+        indicecfg.iteritems()])
+    return metaserver(repo, indices)