Submitter | Pierre-Yves David |
---|---|
Date | May 4, 2015, 10:25 p.m. |
Message ID | <dffaf3d15f9587d3f4e0.1430778354@marginatus.alto.octopoid.net> |
Download | mbox | patch |
Permalink | /patch/8881/ |
State | Accepted |
Headers | show |
Comments
On Mon, May 4, 2015 at 3:29 PM Pierre-Yves David < pierre-yves.david@ens-lyon.org> wrote: > # HG changeset patch > # User Pierre-Yves David <pierre-yves.david@fb.com> > # Date 1395876090 25200 > # Wed Mar 26 16:21:30 2014 -0700 > # Node ID dffaf3d15f9587d3f4e00a03cd288108c5cc78fe > # Parent 8aff772b686be3240fc04dc633dff18299ba7151 > revset-ancestors: use and iterator instead of a dequeu > > The dequeue was actually just used to be able to pop value one at a time. > Building the dequeue mean we are reading all the input value at once at the > beginning of the evaluation. This defeat the lazyness of revset. > > We replace the deque with iterator usage for the sake of simplicity and > lazyness. > > This provide massive speedup to get the first result if the input set is > big > > max(::all()) > before) wall 0.001917 comb 0.000000 user 0.000000 sys 0.000000 (best of > 1115) > after) wall 0.000107 comb 0.000000 user 0.000000 sys 0.000000 (best of > 22222) > > diff --git a/mercurial/revset.py b/mercurial/revset.py > --- a/mercurial/revset.py > +++ b/mercurial/revset.py > @@ -23,27 +23,30 @@ def _revancestors(repo, revs, followfirs > else: > cut = None > cl = repo.changelog > > def iterate(): > - revqueue, inputrev = None, None > h = [] > > revs.sort(reverse=True) > - revqueue = util.deque(revs) > - if revqueue: > - inputrev = revqueue.popleft() > + irevs = iter(revs) > + try: > + inputrev = irevs.next() > heapq.heappush(h, -inputrev) > + except StopIteration: > + irevs, inputrev = None, None > When revs is empty, h will also be empty. So before this patch we could have done "if not revqueue: return" instead, which would have made it clear that inputrev never needs to be initialized to None. Since "return" in a generator can also be written "raise StopIteration", you could simply let the above StopIteration through. Am I correct? Since this was similar before and after your patch, perhaps such a simplification should not be baked in to this patch. I do think it would be cleaner to get it in before rather than after, though. But I may very well be missing something here. > > seen = set() > while h: > current = -heapq.heappop(h) > if current not in seen: > - if inputrev and current == inputrev: > - if revqueue: > - inputrev = revqueue.popleft() > + if irevs is not None and current == inputrev: > + try: > + inputrev = irevs.next()inputrev > heapq.heappush(h, -inputrev) > + except StopIteration: > + irevs, inputrev = None, None > The straight-forwards translation would be "except StopIteration: pass", wouldn't it? > seen.add(current) > yield current > for parent in cl.parentrevs(current)[:cut]: > if parent != node.nullrev: > heapq.heappush(h, -parent) > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel >
For the record, Pierre-Yves and I talked about this more on IRC. I ended up pushing a "return early when revs is empty" patch to the clowncopter after Pierre-Yves reviewed it off the list (probably transient link: https://bitbucket.org/martinvonz/hg/commits/025000e6f13871a4eb3f6e49752d88c4785c009d). Pierre-Yves will rebase and send a V2, IIUC. On Wed, May 6, 2015 at 11:16 AM Martin von Zweigbergk <martinvonz@google.com> wrote: > On Mon, May 4, 2015 at 3:29 PM Pierre-Yves David < > pierre-yves.david@ens-lyon.org> wrote: > >> # HG changeset patch >> # User Pierre-Yves David <pierre-yves.david@fb.com> >> # Date 1395876090 25200 >> # Wed Mar 26 16:21:30 2014 -0700 >> # Node ID dffaf3d15f9587d3f4e00a03cd288108c5cc78fe >> # Parent 8aff772b686be3240fc04dc633dff18299ba7151 >> revset-ancestors: use and iterator instead of a dequeu >> >> The dequeue was actually just used to be able to pop value one at a time. >> Building the dequeue mean we are reading all the input value at once at >> the >> beginning of the evaluation. This defeat the lazyness of revset. >> >> We replace the deque with iterator usage for the sake of simplicity and >> lazyness. >> >> This provide massive speedup to get the first result if the input set is >> big >> >> max(::all()) >> before) wall 0.001917 comb 0.000000 user 0.000000 sys 0.000000 (best of >> 1115) >> after) wall 0.000107 comb 0.000000 user 0.000000 sys 0.000000 (best of >> 22222) >> >> diff --git a/mercurial/revset.py b/mercurial/revset.py >> --- a/mercurial/revset.py >> +++ b/mercurial/revset.py >> @@ -23,27 +23,30 @@ def _revancestors(repo, revs, followfirs >> else: >> cut = None >> cl = repo.changelog >> >> def iterate(): >> - revqueue, inputrev = None, None >> h = [] >> >> revs.sort(reverse=True) >> - revqueue = util.deque(revs) >> - if revqueue: >> - inputrev = revqueue.popleft() >> + irevs = iter(revs) >> + try: >> + inputrev = irevs.next() >> heapq.heappush(h, -inputrev) >> + except StopIteration: >> + irevs, inputrev = None, None >> > > When revs is empty, h will also be empty. So before this patch we could > have done "if not revqueue: return" instead, which would have made it clear > that inputrev never needs to be initialized to None. Since "return" in a > generator can also be written "raise StopIteration", you could simply let > the above StopIteration through. Am I correct? > > Since this was similar before and after your patch, perhaps such a > simplification should not be baked in to this patch. I do think it would be > cleaner to get it in before rather than after, though. But I may very well > be missing something here. > > >> >> seen = set() >> while h: >> current = -heapq.heappop(h) >> if current not in seen: >> - if inputrev and current == inputrev: >> - if revqueue: >> - inputrev = revqueue.popleft() >> + if irevs is not None and current == inputrev: >> + try: >> > + inputrev = irevs.next()inputrev > > >> heapq.heappush(h, -inputrev) >> + except StopIteration: >> + irevs, inputrev = None, None >> > > The straight-forwards translation would be "except StopIteration: pass", > wouldn't it? > > >> seen.add(current) >> yield current >> for parent in cl.parentrevs(current)[:cut]: >> if parent != node.nullrev: >> heapq.heappush(h, -parent) >> _______________________________________________ >> Mercurial-devel mailing list >> Mercurial-devel@selenic.com >> http://selenic.com/mailman/listinfo/mercurial-devel >> >
Patch
diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -23,27 +23,30 @@ def _revancestors(repo, revs, followfirs else: cut = None cl = repo.changelog def iterate(): - revqueue, inputrev = None, None h = [] revs.sort(reverse=True) - revqueue = util.deque(revs) - if revqueue: - inputrev = revqueue.popleft() + irevs = iter(revs) + try: + inputrev = irevs.next() heapq.heappush(h, -inputrev) + except StopIteration: + irevs, inputrev = None, None seen = set() while h: current = -heapq.heappop(h) if current not in seen: - if inputrev and current == inputrev: - if revqueue: - inputrev = revqueue.popleft() + if irevs is not None and current == inputrev: + try: + inputrev = irevs.next() heapq.heappush(h, -inputrev) + except StopIteration: + irevs, inputrev = None, None seen.add(current) yield current for parent in cl.parentrevs(current)[:cut]: if parent != node.nullrev: heapq.heappush(h, -parent)