Submitter | Pierre-Yves David |
---|---|
Date | Dec. 24, 2013, 12:32 a.m. |
Message ID | <41a5810bb9cb062cd10f.1387845142@marginatus.fb.com> |
Download | mbox | patch |
Permalink | /patch/3235/ |
State | Accepted |
Commit | cd62532c62a1d780d7b431013a84786372481b75 |
Headers | show |
Comments
On Mon, Dec 23, 2013 at 04:32:22PM -0800, pierre-yves.david@ens-lyon.org wrote: > # HG changeset patch > # User Pierre-Yves David <pierre-yves.david@ens-lyon.org> > # Date 1387841391 28800 > # Mon Dec 23 15:29:51 2013 -0800 > # Node ID 41a5810bb9cb062cd10f6a12b261fdd66fab8aed > # Parent 1677c9118fd971197916b7f37177b2a5c03ad802 > obsolete: order of magnitude speedup in _computebumpedset queueing these, thanks. I may make some English tweaks depending on my motivation level. > > Reminder: a changeset is said "bumped" if it tries to obsolete a immutable > changeset. > > > The previous algorithm for computing bumped changeset was: > > 1) Get all public changesets > 2) Find all they successors > 3) Search for stuff that are eligible for being "bumped" > (mutable and non obsolete) > > The entry size of this algorithm is `O(len(public))` which is mostly the same as > `O(len(repo))`. Even this this approach mean fewer obsolescence marker are > traveled, this is not very scalable. > > The new algorithm is: > > 1) For each potential bumped changesets (non obsolete mutable) > 2) iterate over precursors > 3) if a precursors is public. changeset is bumped > > We travel more obsolescence marker, but the entry size is much smaller since > the amount of potential bumped should remains mostly stable with time `O(1)`. > > On some confidential gigantic repo this move bumped computation from 15.19s to > 0.46s (×33 speedup…). On "smaller" repo (mercurial, cubicweb's review) no > significant gain were seen. The additional traversal of obsolescence marker is > probably probably counter balance the advantage of it. > > Other optimisation could be done in the future (eg: sharing precursors cache > for divergence detection) > > diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py > --- a/mercurial/obsolete.py > +++ b/mercurial/obsolete.py > @@ -82,10 +82,11 @@ The header is followed by the markers. E > additional encoding. Keys cannot contain '\0' or ':' and values > cannot contain '\0'. > """ > import struct > import util, base85, node > +import phases > from i18n import _ > > _pack = struct.pack > _unpack = struct.unpack > > @@ -784,18 +785,30 @@ def _computeextinctset(repo): > > > @cachefor('bumped') > def _computebumpedset(repo): > """the set of revs trying to obsolete public revisions""" > - # get all possible bumped changesets > - tonode = repo.changelog.node > - publicnodes = (tonode(r) for r in repo.revs('public()')) > - successors = allsuccessors(repo.obsstore, publicnodes, > - ignoreflags=bumpedfix) > - # revision public or already obsolete don't count as bumped > - query = '%ld - obsolete() - public()' > - return set(repo.revs(query, _knownrevs(repo, successors))) > + bumped = set() > + # utils function (avoid attribut lookup in the loop) > + phase = repo._phasecache.phase # would be faster to grab the full list > + public = phases.public > + cl = repo.changelog > + torev = cl.nodemap.get > + obs = getrevs(repo, 'obsolete') > + for rev in repo: > + # We only evaluate mutable, non-obsolete revision > + if (public < phase(repo, rev)) and (rev not in obs): > + node = cl.node(rev) > + # (future) A cache of precursors may worth if split is very common > + for pnode in allprecursors(repo.obsstore, [node], > + ignoreflags=bumpedfix): > + prev = torev(pnode) # unfiltered! but so is phasecache > + if (prev is not None) and (phase(repo, prev) <= public): > + # we have a public precursors > + bumped.add(rev) > + break # Next draft! > + return bumped > > @cachefor('divergent') > def _computedivergentset(repo): > """the set of rev that compete to be the final successors of some revision. > """ > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel
Patch
diff --git a/mercurial/obsolete.py b/mercurial/obsolete.py --- a/mercurial/obsolete.py +++ b/mercurial/obsolete.py @@ -82,10 +82,11 @@ The header is followed by the markers. E additional encoding. Keys cannot contain '\0' or ':' and values cannot contain '\0'. """ import struct import util, base85, node +import phases from i18n import _ _pack = struct.pack _unpack = struct.unpack @@ -784,18 +785,30 @@ def _computeextinctset(repo): @cachefor('bumped') def _computebumpedset(repo): """the set of revs trying to obsolete public revisions""" - # get all possible bumped changesets - tonode = repo.changelog.node - publicnodes = (tonode(r) for r in repo.revs('public()')) - successors = allsuccessors(repo.obsstore, publicnodes, - ignoreflags=bumpedfix) - # revision public or already obsolete don't count as bumped - query = '%ld - obsolete() - public()' - return set(repo.revs(query, _knownrevs(repo, successors))) + bumped = set() + # utils function (avoid attribut lookup in the loop) + phase = repo._phasecache.phase # would be faster to grab the full list + public = phases.public + cl = repo.changelog + torev = cl.nodemap.get + obs = getrevs(repo, 'obsolete') + for rev in repo: + # We only evaluate mutable, non-obsolete revision + if (public < phase(repo, rev)) and (rev not in obs): + node = cl.node(rev) + # (future) A cache of precursors may worth if split is very common + for pnode in allprecursors(repo.obsstore, [node], + ignoreflags=bumpedfix): + prev = torev(pnode) # unfiltered! but so is phasecache + if (prev is not None) and (phase(repo, prev) <= public): + # we have a public precursors + bumped.add(rev) + break # Next draft! + return bumped @cachefor('divergent') def _computedivergentset(repo): """the set of rev that compete to be the final successors of some revision. """