Submitter | Pierre-Yves David |
---|---|

Date | Nov. 18, 2014, 11:56 p.m. |

Message ID | <026932aa1c4f05cf930c.1416354970@marginatus.alto.octopoid.net> |

Download | mbox | patch |

Permalink | /patch/6782/ |

State | Superseded |

Headers | show |

## Comments

Sorry, just some naive comments and questions below. On Tue Nov 18 2014 at 3:57:18 PM Pierre-Yves David < pierre-yves.david@ens-lyon.org> wrote: > # HG changeset patch > # User Pierre-Yves David <pierre-yves.david@fb.com> > # Date 1409850336 -7200 > # Thu Sep 04 19:05:36 2014 +0200 > # Node ID 026932aa1c4f05cf930c5853ae894636399de111 > # Parent ee131567469924b1642956ad5adfa9f6122ba6d3 > topoiter: support for non-contiguous revset > > The algorithm now work when some revision are skipped. We now use "first > included ancestors" instead of just "parent" to link changesets with each > other. > > diff --git a/mercurial/graphmod.py b/mercurial/graphmod.py > --- a/mercurial/graphmod.py > +++ b/mercurial/graphmod.py > @@ -18,17 +18,17 @@ Data depends on type. > """ > > from mercurial.node import nullrev > import util > > +import heapq > + > CHANGESET = 'C' > > def topoiter(revs, parentsfunc): > """topologically iter over a set of revision, one branch at a time. > > - Currently does not handle non contigious <revs> input. > - > Currently consider every changeset under a merge to be on the same > branch > using revision number to sort them. > > Could be easily extend to give priority to an initial branch. > > @@ -89,20 +89,31 @@ def topoiter(revs, parentsfunc): > # and the <blocked> set contains the parents revisions of already > emitted > # revision. > # > # You could pre-seed the <parents> set of groups[0] to a specific > # changesets to select what the first emitted branch should be. > - # > - # We do not support revisions will hole yet, but adding such support > - # would be easy. The iteration will have to be done using both input > - # revision and parents (see cl.ancestors function + a few tweaks) but > - # only revisions parts of the initial set should be emitted. > groups = [([], unblocked)] > - for current in revs: > - if True: > - # Look for a group awaiting on the current revision. > - matching = [i for i, g in enumerate(groups) if current in > g[1]] > + pendingheap = [] > + pendingset = set() > + > + heapq.heapify(pendingheap) > Not really necessary since pendingheap is already a valid heap, but I'm new to Python so I don't know what's considered best practice. > + heappop = heapq.heappop > + heappush = heapq.heappush > Is this functionally equivalent to "from heapq import heappop" etc? Faster? You want the smaller scope? > + for currentrev in revs: > + # heap works with smallest element, we want highest so we invert > + if currentrev not in pendingset: > + heappush(pendingheap, -currentrev) > + pendingset.add(currentrev) > + # iterates on pending rev until after the current rev have been > + # processeed. > + rev = None > + while rev != currentrev: > + rev = -heappop(pendingheap) > + pendingset.remove(rev) > + > + # seek for a group awaiting on the current revision. > Should 'Look' have been 'seek' in 1/5 instead? Or did you do it on purpose to make the patch easier to read? > + matching = [i for i, g in enumerate(groups) if rev in g[1]] > > if matching: > # The main idea is to gather together all set that await > on the > # same revision. > # > @@ -128,23 +139,29 @@ def topoiter(revs, parentsfunc): > for i in reversed(matching): > del groups[i] > else: > # his is a new head we create a new group for it. > targetidx = len(groups) > - groups.append(([], set([current]))) > + groups.append(([], set([rev]))) > Would s/current/rev/ had made sense in 1/5 to reduce this patch a bit? > > gr = groups[targetidx] > > # We now adds the current nodes to this groups. This is done > after > # the group merging because all elements from a group that > relied > # on this rev must preceed it. > # > # we also update the <parents> set to includes the parents on > the > # new nodes. > - gr[0].append(current) > - gr[1].remove(current) > - gr[1].update([p for p in parentsfunc(current) if p > nullrev]) > + if rev == currentrev: # only display stuff in rev > + gr[0].append(rev) > + gr[1].remove(rev) > + parents = [p for p in parentsfunc(rev) if p > nullrev] + gr[1].update(parents) > + for p in parents: > + if p not in pendingset: > + pendingset.add(p) > + heappush(pendingheap, -p) > > # Look for a group to display > # > # When unblocked is empty (if clause), We are not waiting > over any > # revision during the first iteration (if no priority was > given) or > @@ -173,10 +190,15 @@ def topoiter(revs, parentsfunc): > # unless it is group[0] in which case you just empty it. > if targetidx: > del groups[targetidx] > else: > gr[0][:] = [] > + # Check if we have some group waiting for revision we are not going to > + # iterate over > + for g in groups: > + for r in g[0]: > + yield r > > def dagwalker(repo, revs): > """cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) > tuples > > This generator function walks through revisions (which should be > ordered > diff --git a/tests/test-glog-topological.t b/tests/test-glog-topological.t > --- a/tests/test-glog-topological.t > +++ b/tests/test-glog-topological.t > @@ -35,10 +35,13 @@ later. > | | > o | 1 > |/ > o 0 > > + > +(display all nodes) > + > $ hg --config experimental.graph-topological=1 log -G > o 8 > | > o 3 > | > @@ -54,5 +57,24 @@ later. > | | > | o 4 > |/ > o 0 > > + > +(revset skipping nodes) > + > + $ hg --config experimental.graph-topological=1 log -G --rev 'not (2+6)' > Sorry, but I didn't quite understand the algorithm. Would it also work well and efficiently with e.g. 'hg log hgext/'? I don't know enough about revsets, I suppose. > + o 8 > + | > + o 3 > + | > + o 1 > + | > + | o 7 > + | | > + | o 5 > + | | > + | o 4 > + |/ > + o 0 > + > + > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel >

On 11/19/2014 10:49 PM, Martin von Zweigbergk wrote: > Sorry, just some naive comments and questions below. > > On Tue Nov 18 2014 at 3:57:18 PM Pierre-Yves David > <pierre-yves.david@ens-lyon.org <mailto:pierre-yves.david@ens-lyon.org>> > wrote: > > # HG changeset patch > # User Pierre-Yves David <pierre-yves.david@fb.com > <mailto:pierre-yves.david@fb.com>> > # Date 1409850336 -7200 > # Thu Sep 04 19:05:36 2014 +0200 > # Node ID 026932aa1c4f05cf930c5853ae8946__36399de111 > # Parent ee131567469924b1642956ad5adfa9__f6122ba6d3 > topoiter: support for non-contiguous revset > > The algorithm now work when some revision are skipped. We now use "first > included ancestors" instead of just "parent" to link changesets with > each other. > > diff --git a/mercurial/graphmod.py b/mercurial/graphmod.py > --- a/mercurial/graphmod.py > +++ b/mercurial/graphmod.py > @@ -18,17 +18,17 @@ Data depends on type. > """ > > from mercurial.node import nullrev > import util > > +import heapq > + > CHANGESET = 'C' > > def topoiter(revs, parentsfunc): > """topologically iter over a set of revision, one branch at a > time. > > - Currently does not handle non contigious <revs> input. > - > Currently consider every changeset under a merge to be on the > same branch > using revision number to sort them. > > Could be easily extend to give priority to an initial branch. > > @@ -89,20 +89,31 @@ def topoiter(revs, parentsfunc): > # and the <blocked> set contains the parents revisions of > already emitted > # revision. > # > # You could pre-seed the <parents> set of groups[0] to a specific > # changesets to select what the first emitted branch should be. > - # > - # We do not support revisions will hole yet, but adding such > support > - # would be easy. The iteration will have to be done using both > input > - # revision and parents (see cl.ancestors function + a few > tweaks) but > - # only revisions parts of the initial set should be emitted. > groups = [([], unblocked)] > - for current in revs: > - if True: > - # Look for a group awaiting on the current revision. > - matching = [i for i, g in enumerate(groups) if current > in g[1]] > + pendingheap = [] > + pendingset = set() > + > + heapq.heapify(pendingheap) > > > Not really necessary since pendingheap is already a valid heap, but I'm > new to Python so I don't know what's considered best practice. Yeah but better safe than sorry. It does not hurt to be over explicit about this being a heap. > + heappop = heapq.heappop > + heappush = heapq.heappush > > > Is this functionally equivalent to "from heapq import heappop" etc? > Faster? You want the smaller scope? It equivalent. It is faster too because python is bad at lookup. > > + for currentrev in revs: > + # heap works with smallest element, we want highest so we > invert > + if currentrev not in pendingset: > + heappush(pendingheap, -currentrev) > + pendingset.add(currentrev) > + # iterates on pending rev until after the current rev have been > + # processeed. > + rev = None > + while rev != currentrev: > + rev = -heappop(pendingheap) > + pendingset.remove(rev) > + > + # seek for a group awaiting on the current revision. > > > Should 'Look' have been 'seek' in 1/5 instead? Or did you do it on > purpose to make the patch easier to read? It should have been seek. (Not super critical) > > + matching = [i for i, g in enumerate(groups) if rev in g[1]] > > if matching: > # The main idea is to gather together all set that > await on the > # same revision. > # > @@ -128,23 +139,29 @@ def topoiter(revs, parentsfunc): > for i in reversed(matching): > del groups[i] > else: > # his is a new head we create a new group for it. > targetidx = len(groups) > - groups.append(([], set([current]))) > + groups.append(([], set([rev]))) > > > Would s/current/rev/ had made sense in 1/5 to reduce this patch a bit? No, rev is something we want emitted, current is either rev or a skipped parent of rev. > > > gr = groups[targetidx] > > # We now adds the current nodes to this groups. This > is done after > # the group merging because all elements from a group > that relied > # on this rev must preceed it. > # > # we also update the <parents> set to includes the > parents on the > # new nodes. > - gr[0].append(current) > - gr[1].remove(current) > - gr[1].update([p for p in parentsfunc(current) if p > > nullrev]) > + if rev == currentrev: # only display stuff in rev > + gr[0].append(rev) > + gr[1].remove(rev) > + parents = [p for p in parentsfunc(rev) if p > nullrev] > > + gr[1].update(parents) > + for p in parents: > + if p not in pendingset: > + pendingset.add(p) > + heappush(pendingheap, -p) > > # Look for a group to display > # > # When unblocked is empty (if clause), We are not > waiting over any > # revision during the first iteration (if no priority > was given) or > @@ -173,10 +190,15 @@ def topoiter(revs, parentsfunc): > # unless it is group[0] in which case you just > empty it. > if targetidx: > del groups[targetidx] > else: > gr[0][:] = [] > + # Check if we have some group waiting for revision we are not > going to > + # iterate over > + for g in groups: > + for r in g[0]: > + yield r > > def dagwalker(repo, revs): > """cset DAG generator yielding (id, CHANGESET, ctx, > [parentids]) tuples > > This generator function walks through revisions (which should > be ordered > diff --git a/tests/test-glog-topological.__t > b/tests/test-glog-topological.__t > --- a/tests/test-glog-topological.__t > +++ b/tests/test-glog-topological.__t > @@ -35,10 +35,13 @@ later. > | | > o | 1 > |/ > o 0 > > + > +(display all nodes) > + > $ hg --config experimental.graph-__topological=1 log -G > o 8 > | > o 3 > | > @@ -54,5 +57,24 @@ later. > | | > | o 4 > |/ > o 0 > > + > +(revset skipping nodes) > + > + $ hg --config experimental.graph-__topological=1 log -G --rev > 'not (2+6)' > > > Sorry, but I didn't quite understand the algorithm. Would it also work > well and efficiently with e.g. 'hg log hgext/'? I don't know enough > about revsets, I suppose. Sure it just work with a list of revs and parentrev function. The revset used for 'hgext/' should be lazy so we are all good.

## Patch

diff --git a/mercurial/graphmod.py b/mercurial/graphmod.py --- a/mercurial/graphmod.py +++ b/mercurial/graphmod.py @@ -18,17 +18,17 @@ Data depends on type. """ from mercurial.node import nullrev import util +import heapq + CHANGESET = 'C' def topoiter(revs, parentsfunc): """topologically iter over a set of revision, one branch at a time. - Currently does not handle non contigious <revs> input. - Currently consider every changeset under a merge to be on the same branch using revision number to sort them. Could be easily extend to give priority to an initial branch. @@ -89,20 +89,31 @@ def topoiter(revs, parentsfunc): # and the <blocked> set contains the parents revisions of already emitted # revision. # # You could pre-seed the <parents> set of groups[0] to a specific # changesets to select what the first emitted branch should be. - # - # We do not support revisions will hole yet, but adding such support - # would be easy. The iteration will have to be done using both input - # revision and parents (see cl.ancestors function + a few tweaks) but - # only revisions parts of the initial set should be emitted. groups = [([], unblocked)] - for current in revs: - if True: - # Look for a group awaiting on the current revision. - matching = [i for i, g in enumerate(groups) if current in g[1]] + pendingheap = [] + pendingset = set() + + heapq.heapify(pendingheap) + heappop = heapq.heappop + heappush = heapq.heappush + for currentrev in revs: + # heap works with smallest element, we want highest so we invert + if currentrev not in pendingset: + heappush(pendingheap, -currentrev) + pendingset.add(currentrev) + # iterates on pending rev until after the current rev have been + # processeed. + rev = None + while rev != currentrev: + rev = -heappop(pendingheap) + pendingset.remove(rev) + + # seek for a group awaiting on the current revision. + matching = [i for i, g in enumerate(groups) if rev in g[1]] if matching: # The main idea is to gather together all set that await on the # same revision. # @@ -128,23 +139,29 @@ def topoiter(revs, parentsfunc): for i in reversed(matching): del groups[i] else: # his is a new head we create a new group for it. targetidx = len(groups) - groups.append(([], set([current]))) + groups.append(([], set([rev]))) gr = groups[targetidx] # We now adds the current nodes to this groups. This is done after # the group merging because all elements from a group that relied # on this rev must preceed it. # # we also update the <parents> set to includes the parents on the # new nodes. - gr[0].append(current) - gr[1].remove(current) - gr[1].update([p for p in parentsfunc(current) if p > nullrev]) + if rev == currentrev: # only display stuff in rev + gr[0].append(rev) + gr[1].remove(rev) + parents = [p for p in parentsfunc(rev) if p > nullrev] + gr[1].update(parents) + for p in parents: + if p not in pendingset: + pendingset.add(p) + heappush(pendingheap, -p) # Look for a group to display # # When unblocked is empty (if clause), We are not waiting over any # revision during the first iteration (if no priority was given) or @@ -173,10 +190,15 @@ def topoiter(revs, parentsfunc): # unless it is group[0] in which case you just empty it. if targetidx: del groups[targetidx] else: gr[0][:] = [] + # Check if we have some group waiting for revision we are not going to + # iterate over + for g in groups: + for r in g[0]: + yield r def dagwalker(repo, revs): """cset DAG generator yielding (id, CHANGESET, ctx, [parentids]) tuples This generator function walks through revisions (which should be ordered diff --git a/tests/test-glog-topological.t b/tests/test-glog-topological.t --- a/tests/test-glog-topological.t +++ b/tests/test-glog-topological.t @@ -35,10 +35,13 @@ later. | | o | 1 |/ o 0 + +(display all nodes) + $ hg --config experimental.graph-topological=1 log -G o 8 | o 3 | @@ -54,5 +57,24 @@ later. | | | o 4 |/ o 0 + +(revset skipping nodes) + + $ hg --config experimental.graph-topological=1 log -G --rev 'not (2+6)' + o 8 + | + o 3 + | + o 1 + | + | o 7 + | | + | o 5 + | | + | o 4 + |/ + o 0 + +