Patchwork revset: add 'only' revset

login
register
mail settings
Submitter Durham Goode
Date Feb. 26, 2014, 7:35 p.m.
Message ID <9bcbd10a93137b4dbc83.1393443314@dev010.prn1.facebook.com>
Download mbox | patch
Permalink /patch/3772/
State Superseded
Commit 10433163bf5768cddac7f36ad39244282710e786
Headers show

Comments

Durham Goode - Feb. 26, 2014, 7:35 p.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1384621028 28800
#      Sat Nov 16 08:57:08 2013 -0800
# Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615
# Parent  0ad353831461516132f57ccda8e8e0515213ec60
revset: add 'only' revset

Adds a only() revset that has two forms:

only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))"

only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>"

On a large repo, this implementation can process/traverse 50,000 revs in 0.7
seconds, versus 4.2 seconds using "::<include> - ::<exclude>".

This is useful for performing histedits on your branch:
hg histedit -r 'first(only(.))'

Or lifting branch foo off of branch bar:
hg rebase -d @ -s 'only(foo, bar)'

Or a variety of other uses.
Lucas Moscovicz - Feb. 27, 2014, 6:37 p.m.
On 2/26/14, 11:35 AM, "Durham Goode" <durham@fb.com> wrote:

># HG changeset patch
># User Durham Goode <durham@fb.com>
># Date 1384621028 28800
>#      Sat Nov 16 08:57:08 2013 -0800
># Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615
># Parent  0ad353831461516132f57ccda8e8e0515213ec60
>revset: add 'only' revset
>
>Adds a only() revset that has two forms:
>
>only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))"
>
>only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>"
>
>On a large repo, this implementation can process/traverse 50,000 revs in
>0.7
>seconds, versus 4.2 seconds using "::<include> - ::<exclude>".
>
>This is useful for performing histedits on your branch:
>hg histedit -r 'first(only(.))'
>
>Or lifting branch foo off of branch bar:
>hg rebase -d @ -s 'only(foo, bar)'
>
>Or a variety of other uses.
>
>diff --git a/mercurial/revset.py b/mercurial/revset.py
>--- a/mercurial/revset.py
>+++ b/mercurial/revset.py
>@@ -9,6 +9,7 @@
> import parser, util, error, discovery, hbisect, phases
> import node
> import match as matchmod
>+import ancestor as ancestormod
> from i18n import _
> import encoding
> import obsolete as obsmod
>@@ -351,6 +352,25 @@
>     kind, pattern, matcher = _substringmatcher(n)
>     return lazyset(subset, lambda x:
>matcher(encoding.lower(repo[x].user())))
> 
>+def only(repo, subset, x):
>+    """``only(set, [set])
>+    Changesets that are only ancestors of the first set, but not the
>second.
>+    If no second set is specified, it is assumed to be the heads that are
>+    not in or descendants of the first set.
>+    """
>+    cl = repo.changelog
>+    args = getargs(x, 1, 2, _('only takes one or two arguments'))
>+    include = set(getset(repo, baseset(cl), args[0]))

Here you should use getset(repo, baseset(cl), args[0]).set() since
anything returned by getset should have that method at this point and in
many cases it will be more efficient (when it¹s a spanset for example
it¹ll just return itself since it¹s contains is fast instead of going
through all the values to build a set)

>+    if len(args) == 1:
>+        descendants = set(_revdescendants(repo, include, False))
>+        exclude = [rev for rev in cl.headrevs()
>+            if not rev in descendants and not rev in include]
>+    else:
>+        exclude = getset(repo, baseset(cl), args[1])
>+
>+    results = set(ancestormod.missingancestors(include, exclude,
>cl.parentrevs))
>+    return lazyset(subset, lambda x: x in results)
>+
> def bisect(repo, subset, x):
>     """``bisect(string)``
>     Changesets marked in the specified bisect status:
>@@ -1587,6 +1607,7 @@
>     "ancestors": ancestors,
>     "_firstancestors": _firstancestors,
>     "author": author,
>+    "only": only,
>     "bisect": bisect,
>     "bisected": bisected,
>     "bookmark": bookmark,
>diff --git a/tests/test-revset.t b/tests/test-revset.t
>--- a/tests/test-revset.t
>+++ b/tests/test-revset.t
>@@ -367,6 +367,22 @@
>   4
>   $ log 'id(5)'
>   2
>+  $ log 'only(9)'
>+  8
>+  9
>+  $ log 'only(8)'
>+  8
>+  $ log 'only(9, 5)'
>+  2
>+  4
>+  8
>+  9
>+  $ log 'only(7 + 9, 5 + 2)'
>+  4
>+  6
>+  7
>+  8
>+  9
>   $ log 'outgoing()'
>   8
>   9
>_______________________________________________
>Mercurial-devel mailing list
>Mercurial-devel@selenic.com
>https://urldefense.proofpoint.com/v1/url?u=http://selenic.com/mailman/list
>info/mercurial-devel&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=OvJpSDyvbZ%2BdRIG
>uE%2BQNXdEMu%2FMWX%2BVvreTVxvKUMnE%3D%0A&m=9HUK%2FsgHAMmXj9vn9d7PymqiQsJ4u
>KxSQcakrrGrIvs%3D%0A&s=7c65d79c34da7f3b293578f7330bfbff9814de6fd83ee718c84
>a39913c4d6270
Lucas Moscovicz - Feb. 27, 2014, 7:07 p.m.
On 2/27/14, 10:37 AM, "Lucas Moscovicz" <lmoscovicz@fb.com> wrote:

>

>

>On 2/26/14, 11:35 AM, "Durham Goode" <durham@fb.com> wrote:

>

>># HG changeset patch

>># User Durham Goode <durham@fb.com>

>># Date 1384621028 28800

>>#      Sat Nov 16 08:57:08 2013 -0800

>># Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615

>># Parent  0ad353831461516132f57ccda8e8e0515213ec60

>>revset: add 'only' revset

>>

>>Adds a only() revset that has two forms:

>>

>>only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))"

>>

>>only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>"

>>

>>On a large repo, this implementation can process/traverse 50,000 revs in

>>0.7

>>seconds, versus 4.2 seconds using "::<include> - ::<exclude>".

>>

>>This is useful for performing histedits on your branch:

>>hg histedit -r 'first(only(.))'

>>

>>Or lifting branch foo off of branch bar:

>>hg rebase -d @ -s 'only(foo, bar)'

>>

>>Or a variety of other uses.

>>

>>diff --git a/mercurial/revset.py b/mercurial/revset.py

>>--- a/mercurial/revset.py

>>+++ b/mercurial/revset.py

>>@@ -9,6 +9,7 @@

>> import parser, util, error, discovery, hbisect, phases

>> import node

>> import match as matchmod

>>+import ancestor as ancestormod

>> from i18n import _

>> import encoding

>> import obsolete as obsmod

>>@@ -351,6 +352,25 @@

>>     kind, pattern, matcher = _substringmatcher(n)

>>     return lazyset(subset, lambda x:

>>matcher(encoding.lower(repo[x].user())))

>> 

>>+def only(repo, subset, x):

>>+    """``only(set, [set])

>>+    Changesets that are only ancestors of the first set, but not the

>>second.

>>+    If no second set is specified, it is assumed to be the heads that

>>are

>>+    not in or descendants of the first set.

>>+    """

>>+    cl = repo.changelog

>>+    args = getargs(x, 1, 2, _('only takes one or two arguments'))

>>+    include = set(getset(repo, baseset(cl), args[0]))

>

>Here you should use getset(repo, baseset(cl), args[0]).set() since

>anything returned by getset should have that method at this point and in

>many cases it will be more efficient (when it¹s a spanset for example

>it¹ll just return itself since it¹s contains is fast instead of going

>through all the values to build a set)


Also, I would consider using spanset(repo) instead of baseset(cl)

>

>>+    if len(args) == 1:

>>+        descendants = set(_revdescendants(repo, include, False))

>>+        exclude = [rev for rev in cl.headrevs()

>>+            if not rev in descendants and not rev in include]

>>+    else:

>>+        exclude = getset(repo, baseset(cl), args[1])

>>+


Here too

>>+    results = set(ancestormod.missingancestors(include, exclude,

>>cl.parentrevs))

>>+    return lazyset(subset, lambda x: x in results)

>>+

>> def bisect(repo, subset, x):

>>     """``bisect(string)``

>>     Changesets marked in the specified bisect status:

>>@@ -1587,6 +1607,7 @@

>>     "ancestors": ancestors,

>>     "_firstancestors": _firstancestors,

>>     "author": author,

>>+    "only": only,

>>     "bisect": bisect,

>>     "bisected": bisected,

>>     "bookmark": bookmark,

>>diff --git a/tests/test-revset.t b/tests/test-revset.t

>>--- a/tests/test-revset.t

>>+++ b/tests/test-revset.t

>>@@ -367,6 +367,22 @@

>>   4

>>   $ log 'id(5)'

>>   2

>>+  $ log 'only(9)'

>>+  8

>>+  9

>>+  $ log 'only(8)'

>>+  8

>>+  $ log 'only(9, 5)'

>>+  2

>>+  4

>>+  8

>>+  9

>>+  $ log 'only(7 + 9, 5 + 2)'

>>+  4

>>+  6

>>+  7

>>+  8

>>+  9

>>   $ log 'outgoing()'

>>   8

>>   9

>>_______________________________________________

>>Mercurial-devel mailing list

>>Mercurial-devel@selenic.com

>>https://urldefense.proofpoint.com/v1/url?u=http://selenic.com/mailman/lis

>>t

>>info/mercurial-devel&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=OvJpSDyvbZ%2BdRI

>>G

>>uE%2BQNXdEMu%2FMWX%2BVvreTVxvKUMnE%3D%0A&m=9HUK%2FsgHAMmXj9vn9d7PymqiQsJ4

>>u

>>KxSQcakrrGrIvs%3D%0A&s=7c65d79c34da7f3b293578f7330bfbff9814de6fd83ee718c8

>>4

>>a39913c4d6270

>

>_______________________________________________

>Mercurial-devel mailing list

>Mercurial-devel@selenic.com

>https://urldefense.proofpoint.com/v1/url?u=http://selenic.com/mailman/list

>info/mercurial-devel&k=ZVNjlDMF0FElm4dQtryO4A%3D%3D%0A&r=OvJpSDyvbZ%2BdRIG

>uE%2BQNXdEMu%2FMWX%2BVvreTVxvKUMnE%3D%0A&m=nMIWf8eyyaRvndoubomHZzQOYPdThGl

>sBsCZiAFqMu4%3D%0A&s=a7edeaa8dc910c50c7c575af444a6b5aaf706e9dd813a110f5c5b

>00592520bf8
Augie Fackler - Feb. 28, 2014, 5:29 p.m.
On Wed, Feb 26, 2014 at 11:35:14AM -0800, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1384621028 28800
> #      Sat Nov 16 08:57:08 2013 -0800
> # Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615
> # Parent  0ad353831461516132f57ccda8e8e0515213ec60
> revset: add 'only' revset

Not crazy about the name, but seems reasonable.

I had to think about this for a /long/ time to figure out that it's
not ancestors(<set>). Can you figure out some clearer descriptions of
this operator?

>
> Adds a only() revset that has two forms:
>
> only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))"
>
> only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>"
>
> On a large repo, this implementation can process/traverse 50,000 revs in 0.7
> seconds, versus 4.2 seconds using "::<include> - ::<exclude>".

Hm. I wonder if we could do something to make - smarter so that it
could auto-optimize that case? Probably too ambitious.


>
> This is useful for performing histedits on your branch:
> hg histedit -r 'first(only(.))'
>
> Or lifting branch foo off of branch bar:
> hg rebase -d @ -s 'only(foo, bar)'
>
> Or a variety of other uses.
>
> diff --git a/mercurial/revset.py b/mercurial/revset.py
> --- a/mercurial/revset.py
> +++ b/mercurial/revset.py
> @@ -9,6 +9,7 @@
>  import parser, util, error, discovery, hbisect, phases
>  import node
>  import match as matchmod
> +import ancestor as ancestormod
>  from i18n import _
>  import encoding
>  import obsolete as obsmod
> @@ -351,6 +352,25 @@
>      kind, pattern, matcher = _substringmatcher(n)
>      return lazyset(subset, lambda x: matcher(encoding.lower(repo[x].user())))
>
> +def only(repo, subset, x):
> +    """``only(set, [set])
> +    Changesets that are only ancestors of the first set, but not the second.
> +    If no second set is specified, it is assumed to be the heads that are
> +    not in or descendants of the first set.
> +    """
> +    cl = repo.changelog
> +    args = getargs(x, 1, 2, _('only takes one or two arguments'))
> +    include = set(getset(repo, baseset(cl), args[0]))
> +    if len(args) == 1:
> +        descendants = set(_revdescendants(repo, include, False))
> +        exclude = [rev for rev in cl.headrevs()
> +            if not rev in descendants and not rev in include]
> +    else:
> +        exclude = getset(repo, baseset(cl), args[1])
> +
> +    results = set(ancestormod.missingancestors(include, exclude, cl.parentrevs))
> +    return lazyset(subset, lambda x: x in results)
> +
>  def bisect(repo, subset, x):
>      """``bisect(string)``
>      Changesets marked in the specified bisect status:
> @@ -1587,6 +1607,7 @@
>      "ancestors": ancestors,
>      "_firstancestors": _firstancestors,
>      "author": author,
> +    "only": only,
>      "bisect": bisect,
>      "bisected": bisected,
>      "bookmark": bookmark,
> diff --git a/tests/test-revset.t b/tests/test-revset.t
> --- a/tests/test-revset.t
> +++ b/tests/test-revset.t
> @@ -367,6 +367,22 @@
>    4
>    $ log 'id(5)'
>    2
> +  $ log 'only(9)'
> +  8
> +  9
> +  $ log 'only(8)'
> +  8
> +  $ log 'only(9, 5)'
> +  2
> +  4
> +  8
> +  9
> +  $ log 'only(7 + 9, 5 + 2)'
> +  4
> +  6
> +  7
> +  8
> +  9
>    $ log 'outgoing()'
>    8
>    9
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> http://selenic.com/mailman/listinfo/mercurial-devel
Durham Goode - Feb. 28, 2014, 7:43 p.m.
On 2/28/14 9:29 AM, "Augie Fackler" <raf@durin42.com> wrote:

>On Wed, Feb 26, 2014 at 11:35:14AM -0800, Durham Goode wrote:
>> # HG changeset patch
>> # User Durham Goode <durham@fb.com>
>> # Date 1384621028 28800
>> #      Sat Nov 16 08:57:08 2013 -0800
>> # Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615
>> # Parent  0ad353831461516132f57ccda8e8e0515213ec60
>> revset: add 'only' revset
>
>Not crazy about the name, but seems reasonable.

I'm also not 100% happy.  Some other options I've considered:

onlyin(...)
exclusive(...)
exclusiveto(...)

reachable(...)
inbranch(...)
infork(...)


The real appropriate name is 'branch()', but that's already taken for
other purposes.


>
>I had to think about this for a /long/ time to figure out that it's
>not ancestors(<set>). Can you figure out some clearer descriptions of
>this operator?
>
>>
>> Adds a only() revset that has two forms:
>>
>> only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))"
>>
>> only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>"
>>
>> On a large repo, this implementation can process/traverse 50,000 revs
>>in 0.7
>> seconds, versus 4.2 seconds using "::<include> - ::<exclude>".
>
>Hm. I wonder if we could do something to make - smarter so that it
>could auto-optimize that case? Probably too ambitious.

Sid made a commit earlier this month that did optimize it.  So ::<include>
- ::<exclude> is actually pretty fast now.  My commit message is just old
:p
Matt Mackall - Feb. 28, 2014, 11:27 p.m.
On Fri, 2014-02-28 at 12:29 -0500, Augie Fackler wrote:
> On Wed, Feb 26, 2014 at 11:35:14AM -0800, Durham Goode wrote:
> > # HG changeset patch
> > # User Durham Goode <durham@fb.com>
> > # Date 1384621028 28800
> > #      Sat Nov 16 08:57:08 2013 -0800
> > # Node ID 9bcbd10a93137b4dbc83bebd41fb4f00143a7615
> > # Parent  0ad353831461516132f57ccda8e8e0515213ec60
> > revset: add 'only' revset
> 
> Not crazy about the name, but seems reasonable.
> 
> I had to think about this for a /long/ time to figure out that it's
> not ancestors(<set>). Can you figure out some clearer descriptions of
> this operator?
> 
> >
> > Adds a only() revset that has two forms:
> >
> > only(<set>) is equivalent to "::<set> - ::(heads() - heads(<set>::))"
> >
> > only(<include>,<exclude>) is equivalent to "::<include> - ::<exclude>"
> >
> > On a large repo, this implementation can process/traverse 50,000 revs in 0.7
> > seconds, versus 4.2 seconds using "::<include> - ::<exclude>".
> 
> Hm. I wonder if we could do something to make - smarter so that it
> could auto-optimize that case? Probably too ambitious.

Nah, now that only exists, rewriting "::x - ::y" to only(x, y) in the
optimizer is pretty easy. But actually, I think we already did this?

changeset:   20635:2efd608473fb
user:        Siddharth Agarwal <sid0@fb.com>
date:        Thu Feb 13 14:04:47 2014 -0800
files:       mercurial/revset.py tests/test-revset.t
description:
revset: optimize missing ancestor expressions

A missing ancestor expression is any expression of the form (::x - ::y) or
equivalent. Such expressions are remarkably common, and so far have involved
multiple walks down the DAG, followed by a set difference operation.

With this patch, such expressions will be transformed into uses of the fast
algorithm at ancestor.missingancestor.

For a repository with over 600,000 revisions, perfrevset for '::tip - ::-10000'
returns:

Before: ! wall 3.999575 comb 4.000000 user 3.910000 sys 0.090000 (best of 3)
After:  ! wall 0.132423 comb 0.130000 user 0.130000 sys 0.000000 (best of 75)

Patch

diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -9,6 +9,7 @@ 
 import parser, util, error, discovery, hbisect, phases
 import node
 import match as matchmod
+import ancestor as ancestormod
 from i18n import _
 import encoding
 import obsolete as obsmod
@@ -351,6 +352,25 @@ 
     kind, pattern, matcher = _substringmatcher(n)
     return lazyset(subset, lambda x: matcher(encoding.lower(repo[x].user())))
 
+def only(repo, subset, x):
+    """``only(set, [set])
+    Changesets that are only ancestors of the first set, but not the second.
+    If no second set is specified, it is assumed to be the heads that are
+    not in or descendants of the first set.
+    """
+    cl = repo.changelog
+    args = getargs(x, 1, 2, _('only takes one or two arguments'))
+    include = set(getset(repo, baseset(cl), args[0]))
+    if len(args) == 1:
+        descendants = set(_revdescendants(repo, include, False))
+        exclude = [rev for rev in cl.headrevs()
+            if not rev in descendants and not rev in include]
+    else:
+        exclude = getset(repo, baseset(cl), args[1])
+
+    results = set(ancestormod.missingancestors(include, exclude, cl.parentrevs))
+    return lazyset(subset, lambda x: x in results)
+
 def bisect(repo, subset, x):
     """``bisect(string)``
     Changesets marked in the specified bisect status:
@@ -1587,6 +1607,7 @@ 
     "ancestors": ancestors,
     "_firstancestors": _firstancestors,
     "author": author,
+    "only": only,
     "bisect": bisect,
     "bisected": bisected,
     "bookmark": bookmark,
diff --git a/tests/test-revset.t b/tests/test-revset.t
--- a/tests/test-revset.t
+++ b/tests/test-revset.t
@@ -367,6 +367,22 @@ 
   4
   $ log 'id(5)'
   2
+  $ log 'only(9)'
+  8
+  9
+  $ log 'only(8)'
+  8
+  $ log 'only(9, 5)'
+  2
+  4
+  8
+  9
+  $ log 'only(7 + 9, 5 + 2)'
+  4
+  6
+  7
+  8
+  9
   $ log 'outgoing()'
   8
   9