Patchwork [2,of,9,changegroup-apis] changegroup: create a class to manage emitting changegroup data

login
register
mail settings
Submitter Gregory Szorc
Date Aug. 1, 2016, 6:18 p.m.
Message ID <c454ae28863fefa2a34c.1470075499@ubuntu-vm-main>
Download mbox | patch
Permalink /patch/16016/
State Changes Requested
Delegated to: Pierre-Yves David
Headers show

Comments

Gregory Szorc - Aug. 1, 2016, 6:18 p.m.
# HG changeset patch
# User Gregory Szorc <gregory.szorc@gmail.com>
# Date 1469901028 25200
#      Sat Jul 30 10:50:28 2016 -0700
# Node ID c454ae28863fefa2a34c331901a139b627279b7b
# Parent  18275f2b6d2b8fcb50ecaf331011e3d233ca1cfa
changegroup: create a class to manage emitting changegroup data

Currently, there are a handful of functions in changegroup.py that
deal with producing changegroup data. The role of each of the functions
isn't very clear and there is a bit of duplicated code in the
functions. Furthermore, many of these functions are low-level. For
example, many are generators of raw data chunks. By the time consumers
get returned values, metadata such as the number of changesets in the
changegroup has been lost.

This patch introduces the "changegroupemitter" class. It holds
persistent state about what is in the changegroup. It provides
methods to get low-level representations of changegroup data.

The class isn't very feature complete right now. It basically
implements what was in getsubsetraw().

To prove the class works, consumers of getsubsetraw() have been
rewritten to use the class. And since getsubsetraw() is no longer
used, it has been deleted.

Patch

diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py
--- a/mercurial/changegroup.py
+++ b/mercurial/changegroup.py
@@ -907,36 +907,24 @@  def getunbundler(version, fh, alg, extra
 def _changegroupinfo(repo, nodes, source):
     if repo.ui.verbose or source == 'bundle':
         repo.ui.status(_("%d changesets found\n") % len(nodes))
     if repo.ui.debugflag:
         repo.ui.debug("list of changesets:\n")
         for node in nodes:
             repo.ui.debug("%s\n" % hex(node))
 
-def getsubsetraw(repo, outgoing, bundler, source, fastpath=False):
-    repo = repo.unfiltered()
-    commonrevs = outgoing.common
-    csets = outgoing.missing
-    heads = outgoing.missingheads
-    # We go through the fast path if we get told to, or if all (unfiltered
-    # heads have been requested (since we then know there all linkrevs will
-    # be pulled by the client).
-    heads.sort()
-    fastpathlinkrev = fastpath or (
-            repo.filtername is None and heads == sorted(repo.heads()))
+def getsubset(repo, outgoing, bundler, source, fastpath=False):
+    emitter = changegroupemitter.fromoutgoing(repo, outgoing)
+    data = emitter.emitchangegroupdata(bundler.version, source,
+                                       bundler._bundlecaps,
+                                       fastpathlinkrev=fastpath)
 
-    repo.hook('preoutgoing', throw=True, source=source)
-    _changegroupinfo(repo, csets, source)
-    return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
-
-def getsubset(repo, outgoing, bundler, source, fastpath=False):
-    gengroup = getsubsetraw(repo, outgoing, bundler, source, fastpath)
-    return getunbundler(bundler.version, util.chunkbuffer(gengroup), None,
-                        {'clcount': len(outgoing.missing)})
+    return getunbundler(bundler.version, util.chunkbuffer(data), None,
+                        {'clcount': emitter.changesetcount})
 
 def changegroupsubset(repo, roots, heads, source, version='01'):
     """Compute a changegroup consisting of all the nodes that are
     descendants of any of the roots and ancestors of any of the heads.
     Return a chunkbuffer object whose read() method will return
     successive changegroup chunks.
 
     It is fairly complex as determining which filenodes and which
@@ -963,18 +951,19 @@  def changegroupsubset(repo, roots, heads
 def getlocalchangegroupraw(repo, source, outgoing, bundlecaps=None,
                            version='01'):
     """Like getbundle, but taking a discovery.outgoing as an argument.
 
     This is only implemented for local repos and reuses potentially
     precomputed sets in outgoing. Returns a raw changegroup generator."""
     if not outgoing.missing:
         return None
-    bundler = getbundler(version, repo, bundlecaps)
-    return getsubsetraw(repo, outgoing, bundler, source)
+
+    emitter = changegroupemitter.fromoutgoing(repo, outgoing)
+    return emitter.emitchangegroupdata(version, source, bundlecaps=bundlecaps)
 
 def getlocalchangegroup(repo, source, outgoing, bundlecaps=None,
                         version='01'):
     """Like getbundle, but taking a discovery.outgoing as an argument.
 
     This is only implemented for local repos and reuses potentially
     precomputed sets in outgoing."""
     if not outgoing.missing:
@@ -1058,8 +1047,70 @@  def _addchangegroupfiles(repo, source, r
             try:
                 fl.rev(n)
             except error.LookupError:
                 raise error.Abort(
                     _('missing file data for %s:%s - run hg verify') %
                     (f, hex(n)))
 
     return revisions, files
+
+class changegroupemitter(object):
+    """Object that can emit changegroups."""
+
+    def __init__(self, repo, commonrevs, nodes, heads):
+        """Create a new instance from low-level revision info.
+
+        Most consumers will want to create instances from one of the
+        ``from*`` classmethods instead.
+        """
+        self._repo = repo.unfiltered()
+        # Set or set-like of integer revisions.
+        self._commonrevs = commonrevs
+        # Iterable of binary nodes.
+        self._nodes = nodes
+        # Iterable of binary nodes.
+        self._heads = heads
+
+    @classmethod
+    def fromoutgoing(cls, repo, outgoing):
+        """Construct an instance from a ``discovery.outgoing`` instance."""
+        return cls(repo, outgoing.common, outgoing.missing,
+                   outgoing.missingheads)
+
+    @property
+    def changesetcount(self):
+        """The number of changesets in this changegroup."""
+        return len(self._nodes)
+
+    def emitchangegroupdata(self, version, source, bundlecaps=None,
+                            fastpathlinkrev=False):
+        """Emit raw changegroup data.
+
+        This is a generator of byte strings that constitute the raw,
+        uncompressed changegroup data for the revisions specified
+        at instance construction time.
+
+        The emitted data chunks are of varying sizes and come straight from
+        a ``cgNpacker`` class's ``generate()``.
+
+        The ``preoutgoing`` and ``outgoing`` hooks will be fired during
+        execution.
+
+        May return ``None`` if no data would be emitted.
+        """
+        if not self._nodes:
+            return None
+
+        # We go through the fast path if told to or if all unfiltered heads
+        # have been requested (since we know that all linkrevs will be pulled
+        # by the client).
+        assert self._repo.filtername is None
+        fastpathlinkrev = fastpathlinkrev or (
+            set(self._repo.heads()) == set(self._heads))
+
+        bundler = getbundler(version, self._repo, bundlecaps=bundlecaps)
+
+        self._repo.hook('preoutgoing', throw=True, source=source)
+        _changegroupinfo(self._repo, self._nodes, source)
+
+        return bundler.generate(self._commonrevs, self._nodes,
+                                fastpathlinkrev, source)