From patchwork Wed May 8 23:24:20 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [5, of, 5, V3] changegroup: refactor changegroup generation into a separate class From: Durham Goode X-Patchwork-Id: 1600 Message-Id: <697f00e33a525e2c8b00.1368055460@dev350.prn1.facebook.com> To: mercurial-devel@selenic.com Date: Wed, 08 May 2013 16:24:20 -0700 # HG changeset patch # User Durham Goode # Date 1367430936 25200 # Wed May 01 10:55:36 2013 -0700 # Node ID 697f00e33a525e2c8b00b8156f49469baea7da21 # Parent 62fe0920af1b247ea4df68398b92784f6efb6f25 changegroup: refactor changegroup generation into a separate class Previously changegroup generation was confined to _changegroup() and _changegroupsubset(). Both were massive functions and they had lots of duplicate code. I've moved all their logic into the changegroup module as new classes: ChangeGroupGen and SubsetChangeGroupGen. This breaks up all the different logic into separate functions so extensions can modify them individually and allows us to share most of the logic with the subset version. This also helps make localrepo less of a god class (-156 lines from localrepo, +157 lines to changegroup). There should be no behavior changes introduced by this. diff --git a/mercurial/changegroup.py b/mercurial/changegroup.py --- a/mercurial/changegroup.py +++ b/mercurial/changegroup.py @@ -6,7 +6,7 @@ # GNU General Public License version 2 or any later version. from i18n import _ -from node import nullrev +from node import nullrev, hex import mdiff, util import struct, os, bz2, zlib, tempfile @@ -254,3 +254,160 @@ def builddeltaheader(self, node, p1n, p2n, basenode, linknode): # do nothing with basenode, it is implicitly the previous one in HG10 return struct.pack(self.deltaheader, node, p1n, p2n, linknode) + +_bundling = _('bundling') +_changesets = _('changesets') +_manifests = _('manifests') +_files = _('files') + +class changegroupgen(object): + def __init__(self, repo, csets, source, reorder): + self.repo = repo + self.cl = repo.changelog + self.mf = repo.manifest + self.csets = csets + self.changedfiles = set() + self.source = source + self.neededmfs = {} + self.reorder = reorder + self.bundler = bundle10(self.lookup) + self.fstate = ['', {}] + self.count = [0, 0] + self.progress = repo.ui.progress + + def gengroup(self): + for chunk in self.addcommitgroups(): + yield chunk + for chunk in self.addmanifestgroups(): + yield chunk + for chunk in self.addfilegroups(): + yield chunk + + # Signal that no more groups are left. + yield self.bundler.close() + self.progress(_bundling, None) + + if self.csets: + self.repo.hook('outgoing', + node = hex(self.csets[0]), + source = self.source) + + def addcommitgroups(self): + # Create a changenode group generator that will call our functions + # back to lookup the owning changenode and collect information. + self.count[:] = [0, len(self.csets)] + for chunk in self.cl.group(self.csets, + self.bundler, + reorder=self.reorder): + yield chunk + self.progress(_bundling, None) + + def addmanifestgroups(self): + # Create a generator for the manifestnodes that calls our lookup + # and data collection functions back. + self.count[:] = [0, len(self.neededmfs)] + outgoingmfs = self.outgoingmanifests(self.mf, self.neededmfs) + + for chunk in self.mf.group(outgoingmfs, self.bundler, + reorder=self.reorder): + yield chunk + self.progress(_bundling, None) + + def addfilegroups(self): + # Go through all our files in order sorted by name. + repo = self.repo + fstate = self.fstate + self.count[:] = [0, len(self.changedfiles)] + for fname in sorted(self.changedfiles): + filerevlog = repo.file(fname) + + fstate[0] = fname + fstate[1] = self.outgoingfilemap(filerevlog, fname) + + nodelist = fstate[1] + if nodelist: + self.count[0] += 1 + yield self.bundler.fileheader(fname) + for chunk in filerevlog.group(nodelist, + self.bundler, + self.reorder): + yield chunk + + def outgoingmanifests(self, manifest, nodes): + # return the manifest nodes that are outgoing + rr, rl = manifest.rev, manifest.linkrev + clnode = self.cl.node + return [n for n in nodes if clnode(rl(rr(n))) in self.csets] + + def outgoingfilemap(self, filerevlog, fname): + # map of outgoing file nodes to changelog nodes + mapping = {} + for r in filerevlog: + clnode = self.cl.node(filerevlog.linkrev(r)) + if clnode in self.csets: + mapping[filerevlog.node(r)] = clnode + return mapping + + def lookup(self, revlog, node): + if revlog == self.cl: + return self.lookupcommit(node) + elif revlog == self.mf: + return self.lookupmanifest(node) + else: + return self.lookupfile(revlog, node) + + def lookupcommit(self, node): + c = self.cl.read(node) + self.changedfiles.update(c[3]) + self.neededmfs.setdefault(c[0], node) + self.count[0] += 1 + self.progress(_bundling, self.count[0], + unit=_changesets, total=self.count[1]) + return node + + def lookupmanifest(self, node): + self.count[0] += 1 + self.progress(_bundling, self.count[0], + unit=_manifests, total=self.count[1]) + + return self.cl.node(self.mf.linkrev(self.mf.rev(node))) + + def lookupfile(self, filelog, node): + self.progress(_bundling, self.count[0], item=self.fstate[0], + unit=_files, total=self.count[1]) + return self.fstate[1][node] + +class subsetchangegroupgen(changegroupgen): + def __init__(self, repo, csets, commonrevs, source, reorder): + super(subsetchangegroupgen, self).__init__(repo, csets, source, reorder) + self.commonrevs = commonrevs + self.fnodes = {} + + def lookupmanifest(self, node): + clnode = self.neededmfs[node] + mdata = self.mf.readfast(node) + changedfiles = self.changedfiles + fnodes = self.fnodes + for f, n in mdata.iteritems(): + if f in changedfiles: + nodes = fnodes.setdefault(f, {}) + nodes.setdefault(n, clnode) + + self.count[0] += 1 + self.progress(_bundling, self.count[0], + unit=_manifests, total=self.count[1]) + return clnode + + def outgoingmanifests(self, manifest, nodes): + rr, rl = manifest.rev, manifest.linkrev + return [n for n in nodes if rl(rr(n)) not in self.commonrevs] + + def outgoingfilemap(self, filerevlog, fname): + # map of outgoing file nodes to changelog nodes + mapping = {} + for fnode, clnode in self.fnodes.pop(fname, {}).iteritems(): + # filter any nodes that claim to be part of the known set + clrev = filerevlog.linkrev(filerevlog.rev(fnode)) + if clrev not in self.commonrevs: + mapping[fnode] = clnode + return mapping diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -2022,15 +2022,6 @@ @unfilteredmethod def _changegroupsubset(self, commonrevs, csets, heads, source): - - cl = self.changelog - mf = self.manifest - mfs = {} # needed manifests - fnodes = {} # needed file nodes - changedfiles = set() - fstate = ['', {}] - count = [0, 0] - # can we go through the fast path ? heads.sort() if heads == sorted(self.heads()): @@ -2040,93 +2031,16 @@ self.hook('preoutgoing', throw=True, source=source) self.changegroupinfo(csets, source) - # filter any nodes that claim to be part of the known set - def prune(revlog, missing): - rr, rl = revlog.rev, revlog.linkrev - return [n for n in missing - if rl(rr(n)) not in commonrevs] - - progress = self.ui.progress - _bundling = _('bundling') - _changesets = _('changesets') - _manifests = _('manifests') - _files = _('files') - - def lookup(revlog, x): - if revlog == cl: - c = cl.read(x) - changedfiles.update(c[3]) - mfs.setdefault(c[0], x) - count[0] += 1 - progress(_bundling, count[0], - unit=_changesets, total=count[1]) - return x - elif revlog == mf: - clnode = mfs[x] - mdata = mf.readfast(x) - for f, n in mdata.iteritems(): - if f in changedfiles: - fnodes[f].setdefault(n, clnode) - count[0] += 1 - progress(_bundling, count[0], - unit=_manifests, total=count[1]) - return clnode - else: - progress(_bundling, count[0], item=fstate[0], - unit=_files, total=count[1]) - return fstate[1][x] - - bundler = changegroup.bundle10(lookup) reorder = self.ui.config('bundle', 'reorder', 'auto') if reorder == 'auto': reorder = None else: reorder = util.parsebool(reorder) - def gengroup(): - # Create a changenode group generator that will call our functions - # back to lookup the owning changenode and collect information. - count[:] = [0, len(csets)] - for chunk in cl.group(csets, bundler, reorder=reorder): - yield chunk - progress(_bundling, None) + gen = changegroup.subsetchangegroupgen(self, csets, commonrevs, + source, reorder) - # Create a generator for the manifestnodes that calls our lookup - # and data collection functions back. - for f in changedfiles: - fnodes[f] = {} - count[:] = [0, len(mfs)] - for chunk in mf.group(prune(mf, mfs), bundler, reorder=reorder): - yield chunk - progress(_bundling, None) - - mfs.clear() - - # Go through all our files in order sorted by name. - count[:] = [0, len(changedfiles)] - for fname in sorted(changedfiles): - filerevlog = self.file(fname) - if not len(filerevlog): - raise util.Abort(_("empty or missing revlog for %s") - % fname) - fstate[0] = fname - fstate[1] = fnodes.pop(fname, {}) - - nodelist = prune(filerevlog, fstate[1]) - if nodelist: - count[0] += 1 - yield bundler.fileheader(fname) - for chunk in filerevlog.group(nodelist, bundler, reorder): - yield chunk - - # Signal that no more groups are left. - yield bundler.close() - progress(_bundling, None) - - if csets: - self.hook('outgoing', node=hex(csets[0]), source=source) - - return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN') + return changegroup.unbundle10(util.chunkbuffer(gen.gengroup()), 'UN') def changegroup(self, basenodes, source): # to avoid a race we use changegroupsubset() (issue1320) @@ -2143,88 +2057,18 @@ nodes is the set of nodes to send""" - cl = self.changelog - mf = self.manifest - mfs = {} - changedfiles = set() - fstate = [''] - count = [0, 0] - self.hook('preoutgoing', throw=True, source=source) self.changegroupinfo(nodes, source) - revset = set([cl.rev(n) for n in nodes]) - - def gennodelst(log): - ln, llr = log.node, log.linkrev - return [ln(r) for r in log if llr(r) in revset] - - progress = self.ui.progress - _bundling = _('bundling') - _changesets = _('changesets') - _manifests = _('manifests') - _files = _('files') - - def lookup(revlog, x): - if revlog == cl: - c = cl.read(x) - changedfiles.update(c[3]) - mfs.setdefault(c[0], x) - count[0] += 1 - progress(_bundling, count[0], - unit=_changesets, total=count[1]) - return x - elif revlog == mf: - count[0] += 1 - progress(_bundling, count[0], - unit=_manifests, total=count[1]) - return cl.node(revlog.linkrev(revlog.rev(x))) - else: - progress(_bundling, count[0], item=fstate[0], - total=count[1], unit=_files) - return cl.node(revlog.linkrev(revlog.rev(x))) - - bundler = changegroup.bundle10(lookup) reorder = self.ui.config('bundle', 'reorder', 'auto') if reorder == 'auto': reorder = None else: reorder = util.parsebool(reorder) - def gengroup(): - '''yield a sequence of changegroup chunks (strings)''' - # construct a list of all changed files + gen = changegroup.changegroupgen(self, nodes, source, reorder) - count[:] = [0, len(nodes)] - for chunk in cl.group(nodes, bundler, reorder=reorder): - yield chunk - progress(_bundling, None) - - count[:] = [0, len(mfs)] - for chunk in mf.group(gennodelst(mf), bundler, reorder=reorder): - yield chunk - progress(_bundling, None) - - count[:] = [0, len(changedfiles)] - for fname in sorted(changedfiles): - filerevlog = self.file(fname) - if not len(filerevlog): - raise util.Abort(_("empty or missing revlog for %s") - % fname) - fstate[0] = fname - nodelist = gennodelst(filerevlog) - if nodelist: - count[0] += 1 - yield bundler.fileheader(fname) - for chunk in filerevlog.group(nodelist, bundler, reorder): - yield chunk - yield bundler.close() - progress(_bundling, None) - - if nodes: - self.hook('outgoing', node=hex(nodes[0]), source=source) - - return changegroup.unbundle10(util.chunkbuffer(gengroup()), 'UN') + return changegroup.unbundle10(util.chunkbuffer(gen.gengroup()), 'UN') @unfilteredmethod def addchangegroup(self, source, srctype, url, emptyok=False):