Submitter | Martijn Pieters |
---|---|

Date | June 8, 2016, 4:20 p.m. |

Message ID | <b573c3858edcd746dd08.1465402840@mjpieters-mbp> |

Download | mbox | patch |

Permalink | /patch/15441/ |

State | Superseded |

Commit | 98535ad46fc08e5adbd48d08a502a3ac7d15d56d |

Delegated to: | Augie Fackler |

Headers | show |

## Comments

## Patch

diff --git a/mercurial/graphmod.py b/mercurial/graphmod.py --- a/mercurial/graphmod.py +++ b/mercurial/graphmod.py @@ -19,8 +19,6 @@ from __future__ import absolute_import -import heapq - from .node import nullrev from . import ( revset, @@ -37,204 +35,6 @@ # (so making N negative) and all but the first N characters use that style. EDGES = {PARENT: '|', GRANDPARENT: ':', MISSINGPARENT: None} -def groupbranchiter(revs, parentsfunc, firstbranch=()): - """Yield revisions from heads to roots one (topo) branch at a time. - - This function aims to be used by a graph generator that wishes to minimize - the number of parallel branches and their interleaving. - - Example iteration order (numbers show the "true" order in a changelog): - - o 4 - | - o 1 - | - | o 3 - | | - | o 2 - |/ - o 0 - - Note that the ancestors of merges are understood by the current - algorithm to be on the same branch. This means no reordering will - occur behind a merge. - """ - - ### Quick summary of the algorithm - # - # This function is based around a "retention" principle. We keep revisions - # in memory until we are ready to emit a whole branch that immediately - # "merges" into an existing one. This reduces the number of parallel - # branches with interleaved revisions. - # - # During iteration revs are split into two groups: - # A) revision already emitted - # B) revision in "retention". They are stored as different subgroups. - # - # for each REV, we do the following logic: - # - # 1) if REV is a parent of (A), we will emit it. If there is a - # retention group ((B) above) that is blocked on REV being - # available, we emit all the revisions out of that retention - # group first. - # - # 2) else, we'll search for a subgroup in (B) awaiting for REV to be - # available, if such subgroup exist, we add REV to it and the subgroup is - # now awaiting for REV.parents() to be available. - # - # 3) finally if no such group existed in (B), we create a new subgroup. - # - # - # To bootstrap the algorithm, we emit the tipmost revision (which - # puts it in group (A) from above). - - revs.sort(reverse=True) - - # Set of parents of revision that have been emitted. They can be considered - # unblocked as the graph generator is already aware of them so there is no - # need to delay the revisions that reference them. - # - # If someone wants to prioritize a branch over the others, pre-filling this - # set will force all other branches to wait until this branch is ready to be - # emitted. - unblocked = set(firstbranch) - - # list of groups waiting to be displayed, each group is defined by: - # - # (revs: lists of revs waiting to be displayed, - # blocked: set of that cannot be displayed before those in 'revs') - # - # The second value ('blocked') correspond to parents of any revision in the - # group ('revs') that is not itself contained in the group. The main idea - # of this algorithm is to delay as much as possible the emission of any - # revision. This means waiting for the moment we are about to display - # these parents to display the revs in a group. - # - # This first implementation is smart until it encounters a merge: it will - # emit revs as soon as any parent is about to be emitted and can grow an - # arbitrary number of revs in 'blocked'. In practice this mean we properly - # retains new branches but gives up on any special ordering for ancestors - # of merges. The implementation can be improved to handle this better. - # - # The first subgroup is special. It corresponds to all the revision that - # were already emitted. The 'revs' lists is expected to be empty and the - # 'blocked' set contains the parents revisions of already emitted revision. - # - # You could pre-seed the <parents> set of groups[0] to a specific - # changesets to select what the first emitted branch should be. - groups = [([], unblocked)] - pendingheap = [] - pendingset = set() - - heapq.heapify(pendingheap) - heappop = heapq.heappop - heappush = heapq.heappush - for currentrev in revs: - # Heap works with smallest element, we want highest so we invert - if currentrev not in pendingset: - heappush(pendingheap, -currentrev) - pendingset.add(currentrev) - # iterates on pending rev until after the current rev have been - # processed. - rev = None - while rev != currentrev: - rev = -heappop(pendingheap) - pendingset.remove(rev) - - # Seek for a subgroup blocked, waiting for the current revision. - matching = [i for i, g in enumerate(groups) if rev in g[1]] - - if matching: - # The main idea is to gather together all sets that are blocked - # on the same revision. - # - # Groups are merged when a common blocking ancestor is - # observed. For example, given two groups: - # - # revs [5, 4] waiting for 1 - # revs [3, 2] waiting for 1 - # - # These two groups will be merged when we process - # 1. In theory, we could have merged the groups when - # we added 2 to the group it is now in (we could have - # noticed the groups were both blocked on 1 then), but - # the way it works now makes the algorithm simpler. - # - # We also always keep the oldest subgroup first. We can - # probably improve the behavior by having the longest set - # first. That way, graph algorithms could minimise the length - # of parallel lines their drawing. This is currently not done. - targetidx = matching.pop(0) - trevs, tparents = groups[targetidx] - for i in matching: - gr = groups[i] - trevs.extend(gr[0]) - tparents |= gr[1] - # delete all merged subgroups (except the one we kept) - # (starting from the last subgroup for performance and - # sanity reasons) - for i in reversed(matching): - del groups[i] - else: - # This is a new head. We create a new subgroup for it. - targetidx = len(groups) - groups.append(([], set([rev]))) - - gr = groups[targetidx] - - # We now add the current nodes to this subgroups. This is done - # after the subgroup merging because all elements from a subgroup - # that relied on this rev must precede it. - # - # we also update the <parents> set to include the parents of the - # new nodes. - if rev == currentrev: # only display stuff in rev - gr[0].append(rev) - gr[1].remove(rev) - parents = [p for p in parentsfunc(rev) if p > nullrev] - gr[1].update(parents) - for p in parents: - if p not in pendingset: - pendingset.add(p) - heappush(pendingheap, -p) - - # Look for a subgroup to display - # - # When unblocked is empty (if clause), we were not waiting for any - # revisions during the first iteration (if no priority was given) or - # if we emitted a whole disconnected set of the graph (reached a - # root). In that case we arbitrarily take the oldest known - # subgroup. The heuristic could probably be better. - # - # Otherwise (elif clause) if the subgroup is blocked on - # a revision we just emitted, we can safely emit it as - # well. - if not unblocked: - if len(groups) > 1: # display other subset - targetidx = 1 - gr = groups[1] - elif not gr[1] & unblocked: - gr = None - - if gr is not None: - # update the set of awaited revisions with the one from the - # subgroup - unblocked |= gr[1] - # output all revisions in the subgroup - for r in gr[0]: - yield r - # delete the subgroup that you just output - # unless it is groups[0] in which case you just empty it. - if targetidx: - del groups[targetidx] - else: - gr[0][:] = [] - # Check if we have some subgroup waiting for revisions we are not going to - # iterate over - for g in groups: - for r in g[0]: - yield r - def dagwalker(repo, revs): """cset DAG generator yielding (id, CHANGESET, ctx, [parentinfo]) tuples @@ -259,7 +59,7 @@ if firstbranchrevset: firstbranch = repo.revs(firstbranchrevset) parentrevs = repo.changelog.parentrevs - revs = groupbranchiter(revs, parentrevs, firstbranch) + revs = revset.groupbranchiter(revs, parentrevs, firstbranch) revs = revset.baseset(revs) for rev in revs: diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -1887,6 +1887,204 @@ raise error.ParseError(_("unknown sort key %r") % fk) return baseset([c.rev() for c in ctxs]) +def groupbranchiter(revs, parentsfunc, firstbranch=()): + """Yield revisions from heads to roots one (topo) branch at a time. + + This function aims to be used by a graph generator that wishes to minimize + the number of parallel branches and their interleaving. + + Example iteration order (numbers show the "true" order in a changelog): + + o 4 + | + o 1 + | + | o 3 + | | + | o 2 + |/ + o 0 + + Note that the ancestors of merges are understood by the current + algorithm to be on the same branch. This means no reordering will + occur behind a merge. + """ + + ### Quick summary of the algorithm + # + # This function is based around a "retention" principle. We keep revisions + # in memory until we are ready to emit a whole branch that immediately + # "merges" into an existing one. This reduces the number of parallel + # branches with interleaved revisions. + # + # During iteration revs are split into two groups: + # A) revision already emitted + # B) revision in "retention". They are stored as different subgroups. + # + # for each REV, we do the following logic: + # + # 1) if REV is a parent of (A), we will emit it. If there is a + # retention group ((B) above) that is blocked on REV being + # available, we emit all the revisions out of that retention + # group first. + # + # 2) else, we'll search for a subgroup in (B) awaiting for REV to be + # available, if such subgroup exist, we add REV to it and the subgroup is + # now awaiting for REV.parents() to be available. + # + # 3) finally if no such group existed in (B), we create a new subgroup. + # + # + # To bootstrap the algorithm, we emit the tipmost revision (which + # puts it in group (A) from above). + + revs.sort(reverse=True) + + # Set of parents of revision that have been emitted. They can be considered + # unblocked as the graph generator is already aware of them so there is no + # need to delay the revisions that reference them. + # + # If someone wants to prioritize a branch over the others, pre-filling this + # set will force all other branches to wait until this branch is ready to be + # emitted. + unblocked = set(firstbranch) + + # list of groups waiting to be displayed, each group is defined by: + # + # (revs: lists of revs waiting to be displayed, + # blocked: set of that cannot be displayed before those in 'revs') + # + # The second value ('blocked') correspond to parents of any revision in the + # group ('revs') that is not itself contained in the group. The main idea + # of this algorithm is to delay as much as possible the emission of any + # revision. This means waiting for the moment we are about to display + # these parents to display the revs in a group. + # + # This first implementation is smart until it encounters a merge: it will + # emit revs as soon as any parent is about to be emitted and can grow an + # arbitrary number of revs in 'blocked'. In practice this mean we properly + # retains new branches but gives up on any special ordering for ancestors + # of merges. The implementation can be improved to handle this better. + # + # The first subgroup is special. It corresponds to all the revision that + # were already emitted. The 'revs' lists is expected to be empty and the + # 'blocked' set contains the parents revisions of already emitted revision. + # + # You could pre-seed the <parents> set of groups[0] to a specific + # changesets to select what the first emitted branch should be. + groups = [([], unblocked)] + pendingheap = [] + pendingset = set() + + heapq.heapify(pendingheap) + heappop = heapq.heappop + heappush = heapq.heappush + for currentrev in revs: + # Heap works with smallest element, we want highest so we invert + if currentrev not in pendingset: + heappush(pendingheap, -currentrev) + pendingset.add(currentrev) + # iterates on pending rev until after the current rev have been + # processed. + rev = None + while rev != currentrev: + rev = -heappop(pendingheap) + pendingset.remove(rev) + + # Seek for a subgroup blocked, waiting for the current revision. + matching = [i for i, g in enumerate(groups) if rev in g[1]] + + if matching: + # The main idea is to gather together all sets that are blocked + # on the same revision. + # + # Groups are merged when a common blocking ancestor is + # observed. For example, given two groups: + # + # revs [5, 4] waiting for 1 + # revs [3, 2] waiting for 1 + # + # These two groups will be merged when we process + # 1. In theory, we could have merged the groups when + # we added 2 to the group it is now in (we could have + # noticed the groups were both blocked on 1 then), but + # the way it works now makes the algorithm simpler. + # + # We also always keep the oldest subgroup first. We can + # probably improve the behavior by having the longest set + # first. That way, graph algorithms could minimise the length + # of parallel lines their drawing. This is currently not done. + targetidx = matching.pop(0) + trevs, tparents = groups[targetidx] + for i in matching: + gr = groups[i] + trevs.extend(gr[0]) + tparents |= gr[1] + # delete all merged subgroups (except the one we kept) + # (starting from the last subgroup for performance and + # sanity reasons) + for i in reversed(matching): + del groups[i] + else: + # This is a new head. We create a new subgroup for it. + targetidx = len(groups) + groups.append(([], set([rev]))) + + gr = groups[targetidx] + + # We now add the current nodes to this subgroups. This is done + # after the subgroup merging because all elements from a subgroup + # that relied on this rev must precede it. + # + # we also update the <parents> set to include the parents of the + # new nodes. + if rev == currentrev: # only display stuff in rev + gr[0].append(rev) + gr[1].remove(rev) + parents = [p for p in parentsfunc(rev) if p > node.nullrev] + gr[1].update(parents) + for p in parents: + if p not in pendingset: + pendingset.add(p) + heappush(pendingheap, -p) + + # Look for a subgroup to display + # + # When unblocked is empty (if clause), we were not waiting for any + # revisions during the first iteration (if no priority was given) or + # if we emitted a whole disconnected set of the graph (reached a + # root). In that case we arbitrarily take the oldest known + # subgroup. The heuristic could probably be better. + # + # Otherwise (elif clause) if the subgroup is blocked on + # a revision we just emitted, we can safely emit it as + # well. + if not unblocked: + if len(groups) > 1: # display other subset + targetidx = 1 + gr = groups[1] + elif not gr[1] & unblocked: + gr = None + + if gr is not None: + # update the set of awaited revisions with the one from the + # subgroup + unblocked |= gr[1] + # output all revisions in the subgroup + for r in gr[0]: + yield r + # delete the subgroup that you just output + # unless it is groups[0] in which case you just empty it. + if targetidx: + del groups[targetidx] + else: + gr[0][:] = [] + # Check if we have some subgroup waiting for revisions we are not going to + # iterate over + for g in groups: + for r in g[0]: + yield r + @predicate('subrepo([pattern])') def subrepo(repo, subset, x): """Changesets that add, modify or remove the given subrepo. If no subrepo