From patchwork Mon Aug 27 10:06:37 2018 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [10, of, 11, V2] revlogdeltas: split candidate groups selection from the filtering logic From: Boris Feld X-Patchwork-Id: 34074 Message-Id: <2bf35ed4bc3d31d07b4d.1535364397@FB-lair> To: mercurial-devel@mercurial-scm.org Cc: gregory.szorc@gmail.com Date: Mon, 27 Aug 2018 12:06:37 +0200 # HG changeset patch # User Boris Feld # Date 1534573065 -7200 # Sat Aug 18 08:17:45 2018 +0200 # Node ID 2bf35ed4bc3d31d07b4dc97cf6f90e34128ca0e6 # Parent 9368d13b35f9d40ce74ec4f242ff13eef895d2a4 # EXP-Topic sparse-snapshot # Available At https://bitbucket.org/octobus/mercurial-devel/ # hg pull https://bitbucket.org/octobus/mercurial-devel/ -r 2bf35ed4bc3d revlogdeltas: split candidate groups selection from the filtering logic The group iteration has two main components: * walking candidates, the logic that we are about to extend to build intermediate snapshots, * Making sure we test diffs against interesting bases. No duplicated tests, skipping empty revisions, etc. We split `_candidate_groups` to separate the two components. This achieves two goals: * It will be simpler to update the walking logic for intermediate snapshots, * We can gather the filtering logic from `finddeltainfo` into `_candidate_groups` to centralize it. diff --git a/mercurial/revlogutils/deltas.py b/mercurial/revlogutils/deltas.py --- a/mercurial/revlogutils/deltas.py +++ b/mercurial/revlogutils/deltas.py @@ -569,9 +569,29 @@ def isgooddeltainfo(revlog, deltainfo, r return True def _candidategroups(revlog, p1, p2, cachedelta): + """Provides group of revision to be tested as delta base + + This top level function focus on emitting groups with unique and worthwhile + content. See _raw_candidate_groups for details about the group order. """ - Provides revisions that present an interest to be diffed against, - grouped by level of easiness. + # should we try to build a delta? + if not (len(revlog) and revlog._storedeltachains): + return + + tested = set([nullrev]) + for group in _rawgroups(revlog, p1, p2, cachedelta): + group = tuple(r for r in group if r not in tested) + tested.update(group) + if group: + yield group + +def _rawgroups(revlog, p1, p2, cachedelta): + """Provides group of revision to be tested as delta base + + This lower level function focus on emitting delta theorically interresting + without looking it any practical details. + + The group order aims at providing fast or small candidates first. """ gdelta = revlog._generaldelta curr = len(revlog) @@ -589,30 +609,33 @@ def _candidategroups(revlog, p1, p2, cac # build delta will reuse the cache yield (cachedelta[0],) tested.add(cachedelta[0]) + # This condition is true most of the time when processing + # changegroup data into a generaldelta repo. The only time it + # isn't true is if this is the first revision in a delta chain + # or if ``format.generaldelta=true`` disabled ``lazydeltabase``. + if cachedelta and gdelta and revlog._lazydeltabase: + # Assume what we received from the server is a good choice + # build delta will reuse the cache + yield (cachedelta[0],) - if gdelta: - # exclude already lazy tested base if any - parents = [p for p in (p1, p2) - if p != nullrev and p not in tested] + if gdelta: + # exclude already lazy tested base if any + parents = [p for p in (p1, p2) if p != nullrev] - if not revlog._deltabothparents and len(parents) == 2: - parents.sort() - # To minimize the chance of having to build a fulltext, - # pick first whichever parent is closest to us (max rev) - yield (parents[1],) - # then the other one (min rev) if the first did not fit - yield (parents[0],) - tested.update(parents) - elif len(parents) > 0: - # Test all parents (1 or 2), and keep the best candidate - yield parents - tested.update(parents) + if not revlog._deltabothparents and len(parents) == 2: + parents.sort() + # To minimize the chance of having to build a fulltext, + # pick first whichever parent is closest to us (max rev) + yield (parents[1],) + # then the other one (min rev) if the first did not fit + yield (parents[0],) + elif len(parents) > 0: + # Test all parents (1 or 2), and keep the best candidate + yield parents - if prev not in tested: - # other approach failed try against prev to hopefully save us a - # fulltext. - yield (prev,) - tested.add(prev) + # other approach failed try against prev to hopefully save us a + # fulltext. + yield (prev,) class deltacomputer(object): def __init__(self, revlog):