Submitter | Pierre-Yves David |
---|---|
Date | Aug. 7, 2015, 9:06 p.m. |
Message ID | <b11862dc880057a56e59.1438981615@marginatus.alto.octopoid.net> |
Download | mbox | patch |
Permalink | /patch/10157/ |
State | Accepted |
Headers | show |
Comments
On Fri, Aug 07, 2015 at 02:06:55PM -0700, Pierre-Yves David wrote: > # HG changeset patch > # User Laurent Charignon <lcharignon@fb.com> > # Date 1438924280 25200 > # Thu Aug 06 22:11:20 2015 -0700 > # Node ID b11862dc880057a56e5979456bb953d057d10c91 > # Parent 62d61128bd7d35f668b3ff982567b2976601f616 > reachableroots: default to the C implementation This series is queued, but I'm working on followups that I think need to be done before this is pushed (the extension code has bugs which probably prevent exceptions from ever propagating, for example.) I'll follow up with you via IRC and we'll get this landed. > > This patch is part of a series of patches to speed up the computation of > revset.reachableroots by introducing a C implementation. The main motivation is to > speed up smartlog on big repositories. At the end of the series, on our big > repositories the computation of reachableroots is 10-50x faster and smartlog on is > 2x-5x faster. > > Before this patch, reachableroots was computed in pure Python by default. This > patch makes the C implementation the default and provides a speedup for > reachableroots. > > diff --git a/mercurial/revset.py b/mercurial/revset.py > --- a/mercurial/revset.py > +++ b/mercurial/revset.py > @@ -76,24 +76,20 @@ def _revdescendants(repo, revs, followfi > yield i > break > > return generatorset(iterate(), iterasc=True) > > -def reachableroots(repo, roots, heads, includepath=False): > +def reachablerootspure(repo, minroot, roots, heads, includepath): > """return (heads(::<roots> and ::<heads>)) > > If includepath is True, return (<roots>::<heads>).""" > if not roots: > return baseset() > parentrevs = repo.changelog.parentrevs > visit = list(heads) > reachable = set() > seen = {} > - # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset > - # (and if it is not, it should.) > - minroot = min(roots) > - roots = set(roots) > # prefetch all the things! (because python is slow) > reached = reachable.add > dovisit = visit.append > nextvisit = visit.pop > # open-code the post-order traversal due to the tiny size of > @@ -117,10 +113,26 @@ def reachableroots(repo, roots, heads, i > for parent in seen[rev]: > if parent in reachable: > reached(rev) > return baseset(sorted(reachable)) > > +def reachableroots(repo, roots, heads, includepath=False): > + """return (heads(::<roots> and ::<heads>)) > + > + If includepath is True, return (<roots>::<heads>).""" > + if not roots: > + return baseset() > + # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset > + # (and if it is not, it should.) > + minroot = min(roots) > + roots = set(roots) > + heads = list(heads) > + try: > + return repo.changelog.reachableroots(minroot, heads, roots, includepath) > + except AttributeError: > + return reachablerootspure(repo, minroot, roots, heads, includepath) > + > elements = { > # token-type: binding-strength, primary, prefix, infix, suffix > "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None), > "##": (20, None, None, ("_concat", 20), None), > "~": (18, None, None, ("ancestor", 18), None), > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > https://selenic.com/mailman/listinfo/mercurial-devel
On Fri, Aug 07, 2015 at 02:06:55PM -0700, Pierre-Yves David wrote: > # HG changeset patch > # User Laurent Charignon <lcharignon@fb.com> > # Date 1438924280 25200 > # Thu Aug 06 22:11:20 2015 -0700 > # Node ID b11862dc880057a56e5979456bb953d057d10c91 > # Parent 62d61128bd7d35f668b3ff982567b2976601f616 > reachableroots: default to the C implementation Update: my followups have revealed many cases in which this code is silently falling back to the Python implementation, so I'm fixing all the goofy bugs in followups and will have marmoute review via IRC. > > This patch is part of a series of patches to speed up the computation of > revset.reachableroots by introducing a C implementation. The main motivation is to > speed up smartlog on big repositories. At the end of the series, on our big > repositories the computation of reachableroots is 10-50x faster and smartlog on is > 2x-5x faster. > > Before this patch, reachableroots was computed in pure Python by default. This > patch makes the C implementation the default and provides a speedup for > reachableroots. > > diff --git a/mercurial/revset.py b/mercurial/revset.py > --- a/mercurial/revset.py > +++ b/mercurial/revset.py > @@ -76,24 +76,20 @@ def _revdescendants(repo, revs, followfi > yield i > break > > return generatorset(iterate(), iterasc=True) > > -def reachableroots(repo, roots, heads, includepath=False): > +def reachablerootspure(repo, minroot, roots, heads, includepath): > """return (heads(::<roots> and ::<heads>)) > > If includepath is True, return (<roots>::<heads>).""" > if not roots: > return baseset() > parentrevs = repo.changelog.parentrevs > visit = list(heads) > reachable = set() > seen = {} > - # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset > - # (and if it is not, it should.) > - minroot = min(roots) > - roots = set(roots) > # prefetch all the things! (because python is slow) > reached = reachable.add > dovisit = visit.append > nextvisit = visit.pop > # open-code the post-order traversal due to the tiny size of > @@ -117,10 +113,26 @@ def reachableroots(repo, roots, heads, i > for parent in seen[rev]: > if parent in reachable: > reached(rev) > return baseset(sorted(reachable)) > > +def reachableroots(repo, roots, heads, includepath=False): > + """return (heads(::<roots> and ::<heads>)) > + > + If includepath is True, return (<roots>::<heads>).""" > + if not roots: > + return baseset() > + # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset > + # (and if it is not, it should.) > + minroot = min(roots) > + roots = set(roots) > + heads = list(heads) > + try: > + return repo.changelog.reachableroots(minroot, heads, roots, includepath) > + except AttributeError: > + return reachablerootspure(repo, minroot, roots, heads, includepath) > + > elements = { > # token-type: binding-strength, primary, prefix, infix, suffix > "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None), > "##": (20, None, None, ("_concat", 20), None), > "~": (18, None, None, ("ancestor", 18), None), > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > https://selenic.com/mailman/listinfo/mercurial-devel
http://hg.durin42.com/hg-wip/graph/reachableroots has my latest patches ready to roll, but: I ran ./contrib/revsetbenchmarks.py -f contrib/all-revsets.txt -R ../crew-clean @ reachableroots --variants=plain and didn't see any revelatory performance wins like the log messages implied. What I do see are things like: revset #56: (not public() - obsolete()) plain 0) 0.000124 1) 0.000105 84% and revset #72: draft() plain 0) 0.000067 1) 0.000057 85% so I think it's a win, but maybe not as much as previously thought? Can you try redoing the benchmarks with the new code I've got and see where you end up? On Tue, Aug 11, 2015 at 3:26 PM, Augie Fackler <raf@durin42.com> wrote: > On Fri, Aug 07, 2015 at 02:06:55PM -0700, Pierre-Yves David wrote: >> # HG changeset patch >> # User Laurent Charignon <lcharignon@fb.com> >> # Date 1438924280 25200 >> # Thu Aug 06 22:11:20 2015 -0700 >> # Node ID b11862dc880057a56e5979456bb953d057d10c91 >> # Parent 62d61128bd7d35f668b3ff982567b2976601f616 >> reachableroots: default to the C implementation > > Update: my followups have revealed many cases in which this code is > silently falling back to the Python implementation, so I'm fixing all > the goofy bugs in followups and will have marmoute review via IRC. > >> >> This patch is part of a series of patches to speed up the computation of >> revset.reachableroots by introducing a C implementation. The main motivation is to >> speed up smartlog on big repositories. At the end of the series, on our big >> repositories the computation of reachableroots is 10-50x faster and smartlog on is >> 2x-5x faster. >> >> Before this patch, reachableroots was computed in pure Python by default. This >> patch makes the C implementation the default and provides a speedup for >> reachableroots. >> >> diff --git a/mercurial/revset.py b/mercurial/revset.py >> --- a/mercurial/revset.py >> +++ b/mercurial/revset.py >> @@ -76,24 +76,20 @@ def _revdescendants(repo, revs, followfi >> yield i >> break >> >> return generatorset(iterate(), iterasc=True) >> >> -def reachableroots(repo, roots, heads, includepath=False): >> +def reachablerootspure(repo, minroot, roots, heads, includepath): >> """return (heads(::<roots> and ::<heads>)) >> >> If includepath is True, return (<roots>::<heads>).""" >> if not roots: >> return baseset() >> parentrevs = repo.changelog.parentrevs >> visit = list(heads) >> reachable = set() >> seen = {} >> - # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset >> - # (and if it is not, it should.) >> - minroot = min(roots) >> - roots = set(roots) >> # prefetch all the things! (because python is slow) >> reached = reachable.add >> dovisit = visit.append >> nextvisit = visit.pop >> # open-code the post-order traversal due to the tiny size of >> @@ -117,10 +113,26 @@ def reachableroots(repo, roots, heads, i >> for parent in seen[rev]: >> if parent in reachable: >> reached(rev) >> return baseset(sorted(reachable)) >> >> +def reachableroots(repo, roots, heads, includepath=False): >> + """return (heads(::<roots> and ::<heads>)) >> + >> + If includepath is True, return (<roots>::<heads>).""" >> + if not roots: >> + return baseset() >> + # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset >> + # (and if it is not, it should.) >> + minroot = min(roots) >> + roots = set(roots) >> + heads = list(heads) >> + try: >> + return repo.changelog.reachableroots(minroot, heads, roots, includepath) >> + except AttributeError: >> + return reachablerootspure(repo, minroot, roots, heads, includepath) >> + >> elements = { >> # token-type: binding-strength, primary, prefix, infix, suffix >> "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None), >> "##": (20, None, None, ("_concat", 20), None), >> "~": (18, None, None, ("ancestor", 18), None), >> _______________________________________________ >> Mercurial-devel mailing list >> Mercurial-devel@selenic.com >> https://selenic.com/mailman/listinfo/mercurial-devel > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > https://selenic.com/mailman/listinfo/mercurial-devel
On 08/11/2015 01:41 PM, Augie Fackler wrote: > http://hg.durin42.com/hg-wip/graph/reachableroots has my latest > patches ready to roll, but: > > I ran ./contrib/revsetbenchmarks.py -f contrib/all-revsets.txt -R > ../crew-clean @ reachableroots --variants=plain > > and didn't see any revelatory performance wins like the log messages > implied. What I do see are things like: > > revset #56: (not public() - obsolete()) > plain > 0) 0.000124 > 1) 0.000105 84% > > and > > revset #72: draft() > plain > 0) 0.000067 > 1) 0.000057 85% > > so I think it's a win, but maybe not as much as previously thought? > Can you try redoing the benchmarks with the new code I've got and see > where you end up? revset: 0::tip plain 0) 0.032721 1) 0.003761 11% revset: tip~150::tip plain 0) 0.000426 1) 0.000249 58% This is a win ;-)
> On Aug 11, 2015, at 6:13 PM, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote: > > > > On 08/11/2015 01:41 PM, Augie Fackler wrote: >> http://hg.durin42.com/hg-wip/graph/reachableroots has my latest >> patches ready to roll, but: >> >> I ran ./contrib/revsetbenchmarks.py -f contrib/all-revsets.txt -R >> ../crew-clean @ reachableroots --variants=plain >> >> and didn't see any revelatory performance wins like the log messages >> implied. What I do see are things like: >> >> revset #56: (not public() - obsolete()) >> plain >> 0) 0.000124 >> 1) 0.000105 84% >> >> and >> >> revset #72: draft() >> plain >> 0) 0.000067 >> 1) 0.000057 85% >> >> so I think it's a win, but maybe not as much as previously thought? >> Can you try redoing the benchmarks with the new code I've got and see >> where you end up? > > revset: 0::tip > plain > 0) 0.032721 > 1) 0.003761 11% > > revset: tip~150::tip > plain > 0) 0.000426 > 1) 0.000249 58% > > This is a win ;-) I have reproduced the perf wins and documented them in a commit message. Should we add 0::tip to the list of revsets to test in contrib? > > -- > Pierre-Yves David
On 08/11/2015 05:04 PM, Augie Fackler wrote: > >> On Aug 11, 2015, at 6:13 PM, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote: >> >> >> >> On 08/11/2015 01:41 PM, Augie Fackler wrote: >>> http://hg.durin42.com/hg-wip/graph/reachableroots has my latest >>> patches ready to roll, but: >>> >>> I ran ./contrib/revsetbenchmarks.py -f contrib/all-revsets.txt -R >>> ../crew-clean @ reachableroots --variants=plain >>> >>> and didn't see any revelatory performance wins like the log messages >>> implied. What I do see are things like: >>> >>> revset #56: (not public() - obsolete()) >>> plain >>> 0) 0.000124 >>> 1) 0.000105 84% >>> >>> and >>> >>> revset #72: draft() >>> plain >>> 0) 0.000067 >>> 1) 0.000057 85% >>> >>> so I think it's a win, but maybe not as much as previously thought? >>> Can you try redoing the benchmarks with the new code I've got and see >>> where you end up? >> >> revset: 0::tip >> plain >> 0) 0.032721 >> 1) 0.003761 11% >> >> revset: tip~150::tip >> plain >> 0) 0.000426 >> 1) 0.000249 58% >> >> This is a win ;-) > > I have reproduced the perf wins and documented them in a commit message. Should we add 0::tip to the list of revsets to test in contrib? Yes. Certainly in the all-revset file (with a comment).
> On Aug 11, 2015, at 10:18 PM, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote: > > > > On 08/11/2015 05:04 PM, Augie Fackler wrote: >> >>> On Aug 11, 2015, at 6:13 PM, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote: >>> >>> >>> >>> On 08/11/2015 01:41 PM, Augie Fackler wrote: >>>> http://hg.durin42.com/hg-wip/graph/reachableroots has my latest >>>> patches ready to roll, but: >>>> >>>> I ran ./contrib/revsetbenchmarks.py -f contrib/all-revsets.txt -R >>>> ../crew-clean @ reachableroots --variants=plain >>>> >>>> and didn't see any revelatory performance wins like the log messages >>>> implied. What I do see are things like: >>>> >>>> revset #56: (not public() - obsolete()) >>>> plain >>>> 0) 0.000124 >>>> 1) 0.000105 84% >>>> >>>> and >>>> >>>> revset #72: draft() >>>> plain >>>> 0) 0.000067 >>>> 1) 0.000057 85% >>>> >>>> so I think it's a win, but maybe not as much as previously thought? >>>> Can you try redoing the benchmarks with the new code I've got and see >>>> where you end up? >>> >>> revset: 0::tip >>> plain >>> 0) 0.032721 >>> 1) 0.003761 11% >>> >>> revset: tip~150::tip >>> plain >>> 0) 0.000426 >>> 1) 0.000249 58% >>> >>> This is a win ;-) >> >> I have reproduced the perf wins and documented them in a commit message. Should we add 0::tip to the list of revsets to test in contrib? > > Yes. Certainly in the all-revset file (with a comment). D’oh. It’s already in all-revsets. I guess I ran the smaller test suite. Thanks! > > -- > Pierre-Yves David
Patch
diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -76,24 +76,20 @@ def _revdescendants(repo, revs, followfi yield i break return generatorset(iterate(), iterasc=True) -def reachableroots(repo, roots, heads, includepath=False): +def reachablerootspure(repo, minroot, roots, heads, includepath): """return (heads(::<roots> and ::<heads>)) If includepath is True, return (<roots>::<heads>).""" if not roots: return baseset() parentrevs = repo.changelog.parentrevs visit = list(heads) reachable = set() seen = {} - # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset - # (and if it is not, it should.) - minroot = min(roots) - roots = set(roots) # prefetch all the things! (because python is slow) reached = reachable.add dovisit = visit.append nextvisit = visit.pop # open-code the post-order traversal due to the tiny size of @@ -117,10 +113,26 @@ def reachableroots(repo, roots, heads, i for parent in seen[rev]: if parent in reachable: reached(rev) return baseset(sorted(reachable)) +def reachableroots(repo, roots, heads, includepath=False): + """return (heads(::<roots> and ::<heads>)) + + If includepath is True, return (<roots>::<heads>).""" + if not roots: + return baseset() + # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset + # (and if it is not, it should.) + minroot = min(roots) + roots = set(roots) + heads = list(heads) + try: + return repo.changelog.reachableroots(minroot, heads, roots, includepath) + except AttributeError: + return reachablerootspure(repo, minroot, roots, heads, includepath) + elements = { # token-type: binding-strength, primary, prefix, infix, suffix "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None), "##": (20, None, None, ("_concat", 20), None), "~": (18, None, None, ("ancestor", 18), None),