Submitter | Durham Goode |
---|---|
Date | Feb. 26, 2014, 7:35 p.m. |
Message ID | <9bcbd10a93137b4dbc83.1393443314@dev010.prn1.facebook.com> |
Download | mbox | patch |
Permalink | /patch/3772/ |
State | Superseded |
Commit | 10433163bf5768cddac7f36ad39244282710e786 |
Headers | show |
Comments
On 2/26/14, 11:35 AM, "Durham Goode" <durham@fb.com> wrote: ># HG changeset patch ># User Durham Goode <durham@fb.com> ># Date 1384621028 28800 ># Sat Nov 16 08:57:08 2013 -0800 ># Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615 ># Parent 0ad353831461516132f57ccda8e8e0515213ec60 >revset: add 'only' revset > >Adds a only() revset that has two forms: > >only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))" > >only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>" > >On a large repo, this implementation can process/traverse 50,000 revs in >0.7 >seconds, versus 4.2 seconds using "::<include> - ::<exclude>". > >This is useful for performing histedits on your branch: >hg histedit -r 'first(only(.))' > >Or lifting branch foo off of branch bar: >hg rebase -d @ -s 'only(foo, bar)' > >Or a variety of other uses. > >diff --git a/mercurial/revset.py b/mercurial/revset.py >--- a/mercurial/revset.py >+++ b/mercurial/revset.py >@@ -9,6 +9,7 @@ > import parser, util, error, discovery, hbisect, phases > import node > import match as matchmod >+import ancestor as ancestormod > from i18n import _ > import encoding > import obsolete as obsmod >@@ -351,6 +352,25 @@ > kind, pattern, matcher = _substringmatcher(n) > return lazyset(subset, lambda x: >matcher(encoding.lower(repo[x].user()))) > >+def only(repo, subset, x): >+ """``only(set, [set]) >+ Changesets that are only ancestors of the first set, but not the >second. >+ If no second set is specified, it is assumed to be the heads that are >+ not in or descendants of the first set. >+ """ >+ cl = repo.changelog >+ args = getargs(x, 1, 2, _('only takes one or two arguments')) >+ include = set(getset(repo, baseset(cl), args[0])) Here you should use getset(repo, baseset(cl), args[0]).set() since anything returned by getset should have that method at this point and in many cases it will be more efficient (when it¹s a spanset for example it¹ll just return itself since it¹s contains is fast instead of going through all the values to build a set) >+ if len(args) == 1: >+ descendants = set(_revdescendants(repo, include, False)) >+ exclude = [rev for rev in cl.headrevs() >+ if not rev in descendants and not rev in include] >+ else: >+ exclude = getset(repo, baseset(cl), args[1]) >+ >+ results = set(ancestormod.missingancestors(include, exclude, >cl.parentrevs)) >+ return lazyset(subset, lambda x: x in results) >+ > def bisect(repo, subset, x): > """``bisect(string)`` > Changesets marked in the specified bisect status: >@@ -1587,6 +1607,7 @@ > "ancestors": ancestors, > "_firstancestors": _firstancestors, > "author": author, >+ "only": only, > "bisect": bisect, > "bisected": bisected, > "bookmark": bookmark, >diff --git a/tests/test-revset.t b/tests/test-revset.t >--- a/tests/test-revset.t >+++ b/tests/test-revset.t >@@ -367,6 +367,22 @@ > 4 > $ log 'id(5)' > 2 >+ $ log 'only(9)' >+ 8 >+ 9 >+ $ log 'only(8)' >+ 8 >+ $ log 'only(9, 5)' >+ 2 >+ 4 >+ 8 >+ 9 >+ $ log 'only(7 + 9, 5 + 2)' >+ 4 >+ 6 >+ 7 >+ 8 >+ 9 > $ log 'outgoing()' > 8 > 9 >_______________________________________________ >Mercurial-devel mailing list >Mercurial-devel@selenic.com >https://urldefense.proofpoint.com/v1/url?u=http://selenic.com/mailman/list >info/mercurial-devel&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=OvJpSDyvbZ%2BdRIG >uE%2BQNXdEMu%2FMWX%2BVvreTVxvKUMnE%3D%0A&m=9HUK%2FsgHAMmXj9vn9d7PymqiQsJ4u >KxSQcakrrGrIvs%3D%0A&s=7c65d79c34da7f3b293578f7330bfbff9814de6fd83ee718c84 >a39913c4d6270
On 2/27/14, 10:37 AM, "Lucas Moscovicz" <lmoscovicz@fb.com> wrote: > > >On 2/26/14, 11:35 AM, "Durham Goode" <durham@fb.com> wrote: > >># HG changeset patch >># User Durham Goode <durham@fb.com> >># Date 1384621028 28800 >># Sat Nov 16 08:57:08 2013 -0800 >># Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615 >># Parent 0ad353831461516132f57ccda8e8e0515213ec60 >>revset: add 'only' revset >> >>Adds a only() revset that has two forms: >> >>only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))" >> >>only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>" >> >>On a large repo, this implementation can process/traverse 50,000 revs in >>0.7 >>seconds, versus 4.2 seconds using "::<include> - ::<exclude>". >> >>This is useful for performing histedits on your branch: >>hg histedit -r 'first(only(.))' >> >>Or lifting branch foo off of branch bar: >>hg rebase -d @ -s 'only(foo, bar)' >> >>Or a variety of other uses. >> >>diff --git a/mercurial/revset.py b/mercurial/revset.py >>--- a/mercurial/revset.py >>+++ b/mercurial/revset.py >>@@ -9,6 +9,7 @@ >> import parser, util, error, discovery, hbisect, phases >> import node >> import match as matchmod >>+import ancestor as ancestormod >> from i18n import _ >> import encoding >> import obsolete as obsmod >>@@ -351,6 +352,25 @@ >> kind, pattern, matcher = _substringmatcher(n) >> return lazyset(subset, lambda x: >>matcher(encoding.lower(repo[x].user()))) >> >>+def only(repo, subset, x): >>+ """``only(set, [set]) >>+ Changesets that are only ancestors of the first set, but not the >>second. >>+ If no second set is specified, it is assumed to be the heads that >>are >>+ not in or descendants of the first set. >>+ """ >>+ cl = repo.changelog >>+ args = getargs(x, 1, 2, _('only takes one or two arguments')) >>+ include = set(getset(repo, baseset(cl), args[0])) > >Here you should use getset(repo, baseset(cl), args[0]).set() since >anything returned by getset should have that method at this point and in >many cases it will be more efficient (when it¹s a spanset for example >it¹ll just return itself since it¹s contains is fast instead of going >through all the values to build a set) Also, I would consider using spanset(repo) instead of baseset(cl) > >>+ if len(args) == 1: >>+ descendants = set(_revdescendants(repo, include, False)) >>+ exclude = [rev for rev in cl.headrevs() >>+ if not rev in descendants and not rev in include] >>+ else: >>+ exclude = getset(repo, baseset(cl), args[1]) >>+ Here too >>+ results = set(ancestormod.missingancestors(include, exclude, >>cl.parentrevs)) >>+ return lazyset(subset, lambda x: x in results) >>+ >> def bisect(repo, subset, x): >> """``bisect(string)`` >> Changesets marked in the specified bisect status: >>@@ -1587,6 +1607,7 @@ >> "ancestors": ancestors, >> "_firstancestors": _firstancestors, >> "author": author, >>+ "only": only, >> "bisect": bisect, >> "bisected": bisected, >> "bookmark": bookmark, >>diff --git a/tests/test-revset.t b/tests/test-revset.t >>--- a/tests/test-revset.t >>+++ b/tests/test-revset.t >>@@ -367,6 +367,22 @@ >> 4 >> $ log 'id(5)' >> 2 >>+ $ log 'only(9)' >>+ 8 >>+ 9 >>+ $ log 'only(8)' >>+ 8 >>+ $ log 'only(9, 5)' >>+ 2 >>+ 4 >>+ 8 >>+ 9 >>+ $ log 'only(7 + 9, 5 + 2)' >>+ 4 >>+ 6 >>+ 7 >>+ 8 >>+ 9 >> $ log 'outgoing()' >> 8 >> 9 >>_______________________________________________ >>Mercurial-devel mailing list >>Mercurial-devel@selenic.com >>https://urldefense.proofpoint.com/v1/url?u=http://selenic.com/mailman/lis >>t >>info/mercurial-devel&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=OvJpSDyvbZ%2BdRI >>G >>uE%2BQNXdEMu%2FMWX%2BVvreTVxvKUMnE%3D%0A&m=9HUK%2FsgHAMmXj9vn9d7PymqiQsJ4 >>u >>KxSQcakrrGrIvs%3D%0A&s=7c65d79c34da7f3b293578f7330bfbff9814de6fd83ee718c8 >>4 >>a39913c4d6270 > >_______________________________________________ >Mercurial-devel mailing list >Mercurial-devel@selenic.com >https://urldefense.proofpoint.com/v1/url?u=http://selenic.com/mailman/list >info/mercurial-devel&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=OvJpSDyvbZ%2BdRIG >uE%2BQNXdEMu%2FMWX%2BVvreTVxvKUMnE%3D%0A&m=nMIWf8eyyaRvndoubomHZzQOYPdThGl >sBsCZiAFqMu4%3D%0A&s=a7edeaa8dc910c50c7c575af444a6b5aaf706e9dd813a110f5c5b >00592520bf8
On Wed, Feb 26, 2014 at 11:35:14AM -0800, Durham Goode wrote: > # HG changeset patch > # User Durham Goode <durham@fb.com> > # Date 1384621028 28800 > # Sat Nov 16 08:57:08 2013 -0800 > # Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615 > # Parent 0ad353831461516132f57ccda8e8e0515213ec60 > revset: add 'only' revset Not crazy about the name, but seems reasonable. I had to think about this for a /long/ time to figure out that it's not ancestors(<set>). Can you figure out some clearer descriptions of this operator? > > Adds a only() revset that has two forms: > > only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))" > > only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>" > > On a large repo, this implementation can process/traverse 50,000 revs in 0.7 > seconds, versus 4.2 seconds using "::<include> - ::<exclude>". Hm. I wonder if we could do something to make - smarter so that it could auto-optimize that case? Probably too ambitious. > > This is useful for performing histedits on your branch: > hg histedit -r 'first(only(.))' > > Or lifting branch foo off of branch bar: > hg rebase -d @ -s 'only(foo, bar)' > > Or a variety of other uses. > > diff --git a/mercurial/revset.py b/mercurial/revset.py > --- a/mercurial/revset.py > +++ b/mercurial/revset.py > @@ -9,6 +9,7 @@ > import parser, util, error, discovery, hbisect, phases > import node > import match as matchmod > +import ancestor as ancestormod > from i18n import _ > import encoding > import obsolete as obsmod > @@ -351,6 +352,25 @@ > kind, pattern, matcher = _substringmatcher(n) > return lazyset(subset, lambda x: matcher(encoding.lower(repo[x].user()))) > > +def only(repo, subset, x): > + """``only(set, [set]) > + Changesets that are only ancestors of the first set, but not the second. > + If no second set is specified, it is assumed to be the heads that are > + not in or descendants of the first set. > + """ > + cl = repo.changelog > + args = getargs(x, 1, 2, _('only takes one or two arguments')) > + include = set(getset(repo, baseset(cl), args[0])) > + if len(args) == 1: > + descendants = set(_revdescendants(repo, include, False)) > + exclude = [rev for rev in cl.headrevs() > + if not rev in descendants and not rev in include] > + else: > + exclude = getset(repo, baseset(cl), args[1]) > + > + results = set(ancestormod.missingancestors(include, exclude, cl.parentrevs)) > + return lazyset(subset, lambda x: x in results) > + > def bisect(repo, subset, x): > """``bisect(string)`` > Changesets marked in the specified bisect status: > @@ -1587,6 +1607,7 @@ > "ancestors": ancestors, > "_firstancestors": _firstancestors, > "author": author, > + "only": only, > "bisect": bisect, > "bisected": bisected, > "bookmark": bookmark, > diff --git a/tests/test-revset.t b/tests/test-revset.t > --- a/tests/test-revset.t > +++ b/tests/test-revset.t > @@ -367,6 +367,22 @@ > 4 > $ log 'id(5)' > 2 > + $ log 'only(9)' > + 8 > + 9 > + $ log 'only(8)' > + 8 > + $ log 'only(9, 5)' > + 2 > + 4 > + 8 > + 9 > + $ log 'only(7 + 9, 5 + 2)' > + 4 > + 6 > + 7 > + 8 > + 9 > $ log 'outgoing()' > 8 > 9 > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel
On 2/28/14 9:29 AM, "Augie Fackler" <raf@durin42.com> wrote: >On Wed, Feb 26, 2014 at 11:35:14AM -0800, Durham Goode wrote: >> # HG changeset patch >> # User Durham Goode <durham@fb.com> >> # Date 1384621028 28800 >> # Sat Nov 16 08:57:08 2013 -0800 >> # Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615 >> # Parent 0ad353831461516132f57ccda8e8e0515213ec60 >> revset: add 'only' revset > >Not crazy about the name, but seems reasonable. I'm also not 100% happy. Some other options I've considered: onlyin(...) exclusive(...) exclusiveto(...) reachable(...) inbranch(...) infork(...) The real appropriate name is 'branch()', but that's already taken for other purposes. > >I had to think about this for a /long/ time to figure out that it's >not ancestors(<set>). Can you figure out some clearer descriptions of >this operator? > >> >> Adds a only() revset that has two forms: >> >> only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))" >> >> only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>" >> >> On a large repo, this implementation can process/traverse 50,000 revs >>in 0.7 >> seconds, versus 4.2 seconds using "::<include> - ::<exclude>". > >Hm. I wonder if we could do something to make - smarter so that it >could auto-optimize that case? Probably too ambitious. Sid made a commit earlier this month that did optimize it. So ::<include> - ::<exclude> is actually pretty fast now. My commit message is just old :p
On Fri, 2014-02-28 at 12:29 -0500, Augie Fackler wrote: > On Wed, Feb 26, 2014 at 11:35:14AM -0800, Durham Goode wrote: > > # HG changeset patch > > # User Durham Goode <durham@fb.com> > > # Date 1384621028 28800 > > # Sat Nov 16 08:57:08 2013 -0800 > > # Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615 > > # Parent 0ad353831461516132f57ccda8e8e0515213ec60 > > revset: add 'only' revset > > Not crazy about the name, but seems reasonable. > > I had to think about this for a /long/ time to figure out that it's > not ancestors(<set>). Can you figure out some clearer descriptions of > this operator? > > > > > Adds a only() revset that has two forms: > > > > only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))" > > > > only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>" > > > > On a large repo, this implementation can process/traverse 50,000 revs in 0.7 > > seconds, versus 4.2 seconds using "::<include> - ::<exclude>". > > Hm. I wonder if we could do something to make - smarter so that it > could auto-optimize that case? Probably too ambitious. Nah, now that only exists, rewriting "::x - ::y" to only(x, y) in the optimizer is pretty easy. But actually, I think we already did this? changeset: 20635:2efd608473fb user: Siddharth Agarwal <sid0@fb.com> date: Thu Feb 13 14:04:47 2014 -0800 files: mercurial/revset.py tests/test-revset.t description: revset: optimize missing ancestor expressions A missing ancestor expression is any expression of the form (::x - ::y) or equivalent. Such expressions are remarkably common, and so far have involved multiple walks down the DAG, followed by a set difference operation. With this patch, such expressions will be transformed into uses of the fast algorithm at ancestor.missingancestor. For a repository with over 600,000 revisions, perfrevset for '::tip - ::-10000' returns: Before: ! wall 3.999575 comb 4.000000 user 3.910000 sys 0.090000 (best of 3) After: ! wall 0.132423 comb 0.130000 user 0.130000 sys 0.000000 (best of 75)
Patch
diff --git a/mercurial/revset.py b/mercurial/revset.py --- a/mercurial/revset.py +++ b/mercurial/revset.py @@ -9,6 +9,7 @@ import parser, util, error, discovery, hbisect, phases import node import match as matchmod +import ancestor as ancestormod from i18n import _ import encoding import obsolete as obsmod @@ -351,6 +352,25 @@ kind, pattern, matcher = _substringmatcher(n) return lazyset(subset, lambda x: matcher(encoding.lower(repo[x].user()))) +def only(repo, subset, x): + """``only(set, [set]) + Changesets that are only ancestors of the first set, but not the second. + If no second set is specified, it is assumed to be the heads that are + not in or descendants of the first set. + """ + cl = repo.changelog + args = getargs(x, 1, 2, _('only takes one or two arguments')) + include = set(getset(repo, baseset(cl), args[0])) + if len(args) == 1: + descendants = set(_revdescendants(repo, include, False)) + exclude = [rev for rev in cl.headrevs() + if not rev in descendants and not rev in include] + else: + exclude = getset(repo, baseset(cl), args[1]) + + results = set(ancestormod.missingancestors(include, exclude, cl.parentrevs)) + return lazyset(subset, lambda x: x in results) + def bisect(repo, subset, x): """``bisect(string)`` Changesets marked in the specified bisect status: @@ -1587,6 +1607,7 @@ "ancestors": ancestors, "_firstancestors": _firstancestors, "author": author, + "only": only, "bisect": bisect, "bisected": bisected, "bookmark": bookmark, diff --git a/tests/test-revset.t b/tests/test-revset.t --- a/tests/test-revset.t +++ b/tests/test-revset.t @@ -367,6 +367,22 @@ 4 $ log 'id(5)' 2 + $ log 'only(9)' + 8 + 9 + $ log 'only(8)' + 8 + $ log 'only(9, 5)' + 2 + 4 + 8 + 9 + $ log 'only(7 + 9, 5 + 2)' + 4 + 6 + 7 + 8 + 9 $ log 'outgoing()' 8 9