Patchwork [5,of,7] reachableroots: use internal "revstates" array to test if rev is a root

login
register
mail settings
Submitter Yuya Nishihara
Date Aug. 18, 2015, 2:42 p.m.
Message ID <93e5cd30d2ff9b63f116.1439908977@mimosa>
Download mbox | patch
Permalink /patch/10227/
State Accepted
Delegated to: Augie Fackler
Headers show

Comments

Yuya Nishihara - Aug. 18, 2015, 2:42 p.m.
# HG changeset patch
# User Yuya Nishihara <yuya@tcha.org>
# Date 1439534609 -32400
#      Fri Aug 14 15:43:29 2015 +0900
# Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
# Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
reachableroots: use internal "revstates" array to test if rev is a root

The main goal of this patch series is to reduce the use of PyXxx() function
that is likely to require ugly error handling and inc/decref. Plus, this is
faster than using PySet_Contains().

  revset #0: 0::tip
  0) 0.004168
  1) 0.003678  88%

This patch ignores out-of-range roots as they are in the pure implementation.
Because reachable sets are calculated from heads, and out-of-range heads raise
IndexError, we can just take out-of-range roots as unreachable. Otherwise,
the test of "hg log -Gr '. + wdir()'" would fail.

"heads" argument is changed to a list. Should we have to rename the C function
as its signature is changed?
Augie Fackler - Aug. 18, 2015, 6:09 p.m.
On Tue, Aug 18, 2015 at 11:42:57PM +0900, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1439534609 -32400
> #      Fri Aug 14 15:43:29 2015 +0900
> # Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
> # Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
> reachableroots: use internal "revstates" array to test if rev is a root
>
> The main goal of this patch series is to reduce the use of PyXxx() function
> that is likely to require ugly error handling and inc/decref. Plus, this is
> faster than using PySet_Contains().
>
>   revset #0: 0::tip
>   0) 0.004168
>   1) 0.003678  88%
>
> This patch ignores out-of-range roots as they are in the pure implementation.
> Because reachable sets are calculated from heads, and out-of-range heads raise
> IndexError, we can just take out-of-range roots as unreachable. Otherwise,
> the test of "hg log -Gr '. + wdir()'" would fail.
>
> "heads" argument is changed to a list. Should we have to rename the C function
> as its signature is changed?

I think this should say "roots" rather than heads? In any case, yes,
we should likely change the function name now. Sigh.

>
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -1112,9 +1112,8 @@ static PyObject *reachableroots(indexObj
>       long minroot;
>       PyObject *includepatharg = NULL;
>       int includepath = 0;
> -	/* heads is a list */
> +	/* heads and roots are lists */
>       PyObject *heads = NULL;
> -	/* roots is a set */
>       PyObject *roots = NULL;
>       PyObject *reachable = NULL;
>
> @@ -1133,12 +1132,13 @@ static PyObject *reachableroots(indexObj
>        * revstates: array of length len+1 (all revs + nullrev) */
>       int *tovisit = NULL;
>       long lentovisit = 0;
> -	enum { RS_SEEN = 1 };
> +	enum { RS_SEEN = 1, RS_ROOT = 2 };
>       char *revstates = NULL;
>
>       /* Get arguments */
>       if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
> -				&PySet_Type, &roots, &PyBool_Type, &includepatharg))
> +                           &PyList_Type, &roots,
> +                           &PyBool_Type, &includepatharg))
>               goto bail;
>
>       if (includepatharg == Py_True)
> @@ -1164,6 +1164,18 @@ static PyObject *reachableroots(indexObj
>               goto bail;
>       }
>
> +	l = PyList_GET_SIZE(roots);
> +	for (i = 0; i < l; i++) {
> +		revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
> +		if (revnum == -1 && PyErr_Occurred())
> +			goto bail;
> +		/* If root is out of range, e.g. wdir(), it must be unreachable
> +              * from heads. So we can just ignore it. */
> +		if (revnum + 1 < 0 || revnum + 1 >= len + 1)
> +			continue;
> +		revstates[revnum + 1] |= RS_ROOT;
> +	}
> +
>       /* Populate tovisit with all the heads */
>       l = PyList_GET_SIZE(heads);
>       for (i = 0; i < l; i++) {
> @@ -1185,17 +1197,15 @@ static PyObject *reachableroots(indexObj
>       while (k < lentovisit) {
>               /* Add the node to reachable if it is a root*/
>               revnum = tovisit[k++];
> -		val = PyInt_FromLong(revnum);
> -		if (val == NULL)
> -			goto bail;
> -		if (PySet_Contains(roots, val) == 1) {
> +		if (revstates[revnum + 1] & RS_ROOT) {
> +			val = PyInt_FromLong(revnum);
> +			if (val == NULL)
> +				goto bail;
>                       PySet_Add(reachable, val);
> -			if (includepath == 0) {
> -				Py_DECREF(val);
> +			Py_DECREF(val);
> +			if (includepath == 0)
>                               continue;
> -			}
>               }
> -		Py_DECREF(val);
>
>               /* Add its parents to the list of nodes to visit */
>               if (revnum == -1)
> diff --git a/mercurial/revset.py b/mercurial/revset.py
> --- a/mercurial/revset.py
> +++ b/mercurial/revset.py
> @@ -94,6 +94,7 @@ def reachablerootspure(repo, minroot, ro
>      if not roots:
>          return baseset()
>      parentrevs = repo.changelog.parentrevs
> +    roots = set(roots)
>      visit = list(heads)
>      reachable = set()
>      seen = {}
> @@ -133,7 +134,7 @@ def reachableroots(repo, roots, heads, i
>      # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset
>      # (and if it is not, it should.)
>      minroot = min(roots)
> -    roots = set(roots)
> +    roots = list(roots)
>      heads = list(heads)
>      try:
>          return repo.changelog.reachableroots(minroot, heads, roots, includepath)
> diff --git a/tests/test-parseindex.t b/tests/test-parseindex.t
> --- a/tests/test-parseindex.t
> +++ b/tests/test-parseindex.t
> @@ -69,28 +69,53 @@ Test SEGV caused by bad revision passed
>    $ python <<EOF
>    > from mercurial import changelog, scmutil
>    > cl = changelog.changelog(scmutil.vfs('.hg/store'))
> -  > print 'goods:'
> +  > print 'good heads:'
>    > for head in [0, len(cl) - 1, -1]:
> -  >     print'%s: %r' % (head, cl.reachableroots(0, [head], set([0])))
> -  > print 'bads:'
> +  >     print'%s: %r' % (head, cl.reachableroots(0, [head], [0]))
> +  > print 'bad heads:'
>    > for head in [len(cl), 10000, -2, -10000, None]:
>    >     print '%s:' % head,
>    >     try:
> -  >         cl.reachableroots(0, [head], set([0]))
> +  >         cl.reachableroots(0, [head], [0])
>    >         print 'uncaught buffer overflow?'
>    >     except (IndexError, TypeError) as inst:
>    >         print inst
> +  > print 'good roots:'
> +  > for root in [0, len(cl) - 1, -1]:
> +  >     print '%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root]))
> +  > print 'out-of-range roots are ignored:'
> +  > for root in [len(cl), 10000, -2, -10000]:
> +  >     print '%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root]))
> +  > print 'bad roots:'
> +  > for root in [None]:
> +  >     print '%s:' % root,
> +  >     try:
> +  >         cl.reachableroots(root, [len(cl) - 1], [root])
> +  >         print 'uncaught error?'
> +  >     except TypeError as inst:
> +  >         print inst
>    > EOF
> -  goods:
> +  good heads:
>    0: <baseset [0]>
>    1: <baseset [0]>
>    -1: <baseset []>
> -  bads:
> +  bad heads:
>    2: head out of range
>    10000: head out of range
>    -2: head out of range
>    -10000: head out of range
>    None: an integer is required
> +  good roots:
> +  0: <baseset [0]>
> +  1: <baseset [1]>
> +  -1: <baseset [-1]>
> +  out-of-range roots are ignored:
> +  2: <baseset []>
> +  10000: <baseset []>
> +  -2: <baseset []>
> +  -10000: <baseset []>
> +  bad roots:
> +  None: an integer is required
>
>    $ cd ..
>
> @@ -127,7 +152,7 @@ Test corrupted p1/p2 fields that could c
>    > n0, n1 = cl.node(0), cl.node(1)
>    > ops = [
>    >     ('reachableroots',
> -  >      lambda: cl.index.reachableroots(0, [1], set([0]), False)),
> +  >      lambda: cl.index.reachableroots(0, [1], [0], False)),
>    >     ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
>    >     ('index_headrevs', lambda: cl.headrevs()),
>    >     ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> https://selenic.com/mailman/listinfo/mercurial-devel
Pierre-Yves David - Aug. 18, 2015, 7:26 p.m.
On 08/18/2015 07:42 AM, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1439534609 -32400
> #      Fri Aug 14 15:43:29 2015 +0900
> # Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
> # Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
> reachableroots: use internal "revstates" array to test if rev is a root
>
> The main goal of this patch series is to reduce the use of PyXxx() function
> that is likely to require ugly error handling and inc/decref. Plus, this is
> faster than using PySet_Contains().
>
>    revset #0: 0::tip
>    0) 0.004168
>    1) 0.003678  88%
>
> This patch ignores out-of-range roots as they are in the pure implementation.
> Because reachable sets are calculated from heads, and out-of-range heads raise
> IndexError, we can just take out-of-range roots as unreachable. Otherwise,
> the test of "hg log -Gr '. + wdir()'" would fail.
>
> "heads" argument is changed to a list. Should we have to rename the C function
> as its signature is changed?

Do we actually need (take advantage of) theses argument being a list? 
Could it take an arbitrary iterable? This would allow use to pass 
arbitrary smartset.
Pierre-Yves David - Aug. 18, 2015, 7:32 p.m.
On 08/18/2015 12:26 PM, Pierre-Yves David wrote:
>
>
> On 08/18/2015 07:42 AM, Yuya Nishihara wrote:
>> # HG changeset patch
>> # User Yuya Nishihara <yuya@tcha.org>
>> # Date 1439534609 -32400
>> #      Fri Aug 14 15:43:29 2015 +0900
>> # Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
>> # Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
>> reachableroots: use internal "revstates" array to test if rev is a root
>>
>> The main goal of this patch series is to reduce the use of PyXxx()
>> function
>> that is likely to require ugly error handling and inc/decref. Plus,
>> this is
>> faster than using PySet_Contains().
>>
>>    revset #0: 0::tip
>>    0) 0.004168
>>    1) 0.003678  88%
>>
>> This patch ignores out-of-range roots as they are in the pure
>> implementation.
>> Because reachable sets are calculated from heads, and out-of-range
>> heads raise
>> IndexError, we can just take out-of-range roots as unreachable.
>> Otherwise,
>> the test of "hg log -Gr '. + wdir()'" would fail.
>>
>> "heads" argument is changed to a list. Should we have to rename the C
>> function
>> as its signature is changed?
>
> Do we actually need (take advantage of) theses argument being a list?
> Could it take an arbitrary iterable? This would allow use to pass
> arbitrary smartset.

So reading more of the current implementation, it seems we are not doing 
anything specific with the list (beside iterating over it) and nothing 
special with the set beside membership (removed in this series).

So, I think we should enforce the argument to be smartsets and use 
smartset available API in the C code.

that would probably justify a signature change, but we never released 
this code, so I'm unsure. If the old/new code raise proper exception 
when passed the new/argument and fallback to the pure implementation, I 
do not think we need to change the name
Augie Fackler - Aug. 18, 2015, 7:44 p.m.
On Tue, Aug 18, 2015 at 12:32:55PM -0700, Pierre-Yves David wrote:
>
>
> On 08/18/2015 12:26 PM, Pierre-Yves David wrote:
> >
> >
> >On 08/18/2015 07:42 AM, Yuya Nishihara wrote:
> >># HG changeset patch
> >># User Yuya Nishihara <yuya@tcha.org>
> >># Date 1439534609 -32400
> >>#      Fri Aug 14 15:43:29 2015 +0900
> >># Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
> >># Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
> >>reachableroots: use internal "revstates" array to test if rev is a root
> >>
> >>The main goal of this patch series is to reduce the use of PyXxx()
> >>function
> >>that is likely to require ugly error handling and inc/decref. Plus,
> >>this is
> >>faster than using PySet_Contains().
> >>
> >>   revset #0: 0::tip
> >>   0) 0.004168
> >>   1) 0.003678  88%
> >>
> >>This patch ignores out-of-range roots as they are in the pure
> >>implementation.
> >>Because reachable sets are calculated from heads, and out-of-range
> >>heads raise
> >>IndexError, we can just take out-of-range roots as unreachable.
> >>Otherwise,
> >>the test of "hg log -Gr '. + wdir()'" would fail.
> >>
> >>"heads" argument is changed to a list. Should we have to rename the C
> >>function
> >>as its signature is changed?
> >
> >Do we actually need (take advantage of) theses argument being a list?
> >Could it take an arbitrary iterable? This would allow use to pass
> >arbitrary smartset.
>
> So reading more of the current implementation, it seems we are not doing
> anything specific with the list (beside iterating over it) and nothing
> special with the set beside membership (removed in this series).
>
> So, I think we should enforce the argument to be smartsets and use smartset
> available API in the C code.
>
> that would probably justify a signature change, but we never released this
> code, so I'm unsure. If the old/new code raise proper exception when passed
> the new/argument and fallback to the pure implementation, I do not think we
> need to change the name

It will not fall back to the old version - it will just explode
because of type mismatches. That's by design - a previous incarnation
of the patch series silently swallowed errors and it meant that it was
too easy to have a trivial programming error cause the C extension to
go unused silently.

>
> --
> Pierre-Yves David
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> https://selenic.com/mailman/listinfo/mercurial-devel
Yuya Nishihara - Aug. 19, 2015, 1 p.m.
On Tue, 18 Aug 2015 15:44:35 -0400, Augie Fackler wrote:
> On Tue, Aug 18, 2015 at 12:32:55PM -0700, Pierre-Yves David wrote:
> >
> >
> > On 08/18/2015 12:26 PM, Pierre-Yves David wrote:
> > >
> > >
> > >On 08/18/2015 07:42 AM, Yuya Nishihara wrote:
> > >># HG changeset patch
> > >># User Yuya Nishihara <yuya@tcha.org>
> > >># Date 1439534609 -32400
> > >>#      Fri Aug 14 15:43:29 2015 +0900
> > >># Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
> > >># Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
> > >>reachableroots: use internal "revstates" array to test if rev is a root
> > >>
> > >>The main goal of this patch series is to reduce the use of PyXxx()
> > >>function
> > >>that is likely to require ugly error handling and inc/decref. Plus,
> > >>this is
> > >>faster than using PySet_Contains().
> > >>
> > >>   revset #0: 0::tip
> > >>   0) 0.004168
> > >>   1) 0.003678  88%
> > >>
> > >>This patch ignores out-of-range roots as they are in the pure
> > >>implementation.
> > >>Because reachable sets are calculated from heads, and out-of-range
> > >>heads raise
> > >>IndexError, we can just take out-of-range roots as unreachable.
> > >>Otherwise,
> > >>the test of "hg log -Gr '. + wdir()'" would fail.
> > >>
> > >>"heads" argument is changed to a list. Should we have to rename the C
> > >>function
> > >>as its signature is changed?
> > >
> > >Do we actually need (take advantage of) theses argument being a list?
> > >Could it take an arbitrary iterable? This would allow use to pass
> > >arbitrary smartset.
> >
> > So reading more of the current implementation, it seems we are not doing
> > anything specific with the list (beside iterating over it) and nothing
> > special with the set beside membership (removed in this series).
> >
> > So, I think we should enforce the argument to be smartsets and use smartset
> > available API in the C code.

Good point. Perhaps the only requirement is it should be an iterable.
Bad thing is PyIter API needs Py_DECREFs.

> > that would probably justify a signature change, but we never released this
> > code, so I'm unsure. If the old/new code raise proper exception when passed
> > the new/argument and fallback to the pure implementation, I do not think we
> > need to change the name
> 
> It will not fall back to the old version - it will just explode
> because of type mismatches. That's by design - a previous incarnation
> of the patch series silently swallowed errors and it meant that it was
> too easy to have a trivial programming error cause the C extension to
> go unused silently.

Do we have to care for the case of new .py + old .c?

New C function will be more permissive because sets and lists are iterable,
but if new .py passes a smartset to old C function, it would cause TypeError.
Augie Fackler - Aug. 21, 2015, 5:19 p.m.
On Wed, Aug 19, 2015 at 10:00:42PM +0900, Yuya Nishihara wrote:
> On Tue, 18 Aug 2015 15:44:35 -0400, Augie Fackler wrote:
> > On Tue, Aug 18, 2015 at 12:32:55PM -0700, Pierre-Yves David wrote:
> > >
> > >
> > > On 08/18/2015 12:26 PM, Pierre-Yves David wrote:
> > > >
> > > >
> > > >On 08/18/2015 07:42 AM, Yuya Nishihara wrote:
> > > >># HG changeset patch
> > > >># User Yuya Nishihara <yuya@tcha.org>
> > > >># Date 1439534609 -32400
> > > >>#      Fri Aug 14 15:43:29 2015 +0900
> > > >># Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
> > > >># Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
> > > >>reachableroots: use internal "revstates" array to test if rev is a root
> > > >>
> > > >>The main goal of this patch series is to reduce the use of PyXxx()
> > > >>function
> > > >>that is likely to require ugly error handling and inc/decref. Plus,
> > > >>this is
> > > >>faster than using PySet_Contains().
> > > >>
> > > >>   revset #0: 0::tip
> > > >>   0) 0.004168
> > > >>   1) 0.003678  88%
> > > >>
> > > >>This patch ignores out-of-range roots as they are in the pure
> > > >>implementation.
> > > >>Because reachable sets are calculated from heads, and out-of-range
> > > >>heads raise
> > > >>IndexError, we can just take out-of-range roots as unreachable.
> > > >>Otherwise,
> > > >>the test of "hg log -Gr '. + wdir()'" would fail.
> > > >>
> > > >>"heads" argument is changed to a list. Should we have to rename the C
> > > >>function
> > > >>as its signature is changed?
> > > >
> > > >Do we actually need (take advantage of) theses argument being a list?
> > > >Could it take an arbitrary iterable? This would allow use to pass
> > > >arbitrary smartset.
> > >
> > > So reading more of the current implementation, it seems we are not doing
> > > anything specific with the list (beside iterating over it) and nothing
> > > special with the set beside membership (removed in this series).
> > >
> > > So, I think we should enforce the argument to be smartsets and use smartset
> > > available API in the C code.
>
> Good point. Perhaps the only requirement is it should be an iterable.
> Bad thing is PyIter API needs Py_DECREFs.
>
> > > that would probably justify a signature change, but we never released this
> > > code, so I'm unsure. If the old/new code raise proper exception when passed
> > > the new/argument and fallback to the pure implementation, I do not think we
> > > need to change the name
> >
> > It will not fall back to the old version - it will just explode
> > because of type mismatches. That's by design - a previous incarnation
> > of the patch series silently swallowed errors and it meant that it was
> > too easy to have a trivial programming error cause the C extension to
> > go unused silently.
>
> Do we have to care for the case of new .py + old .c?

Hm, no I don't think we need to worry about that especially.

>
> New C function will be more permissive because sets and lists are iterable,
> but if new .py passes a smartset to old C function, it would cause TypeError.

I thought this series moved from using PySet_ functions to PyList_
functions, so it'd still break old .py with new .c?
Augie Fackler - Aug. 21, 2015, 5:48 p.m.
On Tue, Aug 18, 2015 at 11:42:57PM +0900, Yuya Nishihara wrote:
> # HG changeset patch
> # User Yuya Nishihara <yuya@tcha.org>
> # Date 1439534609 -32400
> #      Fri Aug 14 15:43:29 2015 +0900
> # Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
> # Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
> reachableroots: use internal "revstates" array to test if rev is a root

After squinting briefly with mpm, I'm going to rename the
reachableroots c func to reachableroots2 and queue this. Thanks!

>
>
> The main goal of this patch series is to reduce the use of PyXxx() function
> that is likely to require ugly error handling and inc/decref. Plus, this is
> faster than using PySet_Contains().
>
>   revset #0: 0::tip
>   0) 0.004168
>   1) 0.003678  88%
>
> This patch ignores out-of-range roots as they are in the pure implementation.
> Because reachable sets are calculated from heads, and out-of-range heads raise
> IndexError, we can just take out-of-range roots as unreachable. Otherwise,
> the test of "hg log -Gr '. + wdir()'" would fail.
>
> "heads" argument is changed to a list. Should we have to rename the C function
> as its signature is changed?
>
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -1112,9 +1112,8 @@ static PyObject *reachableroots(indexObj
>       long minroot;
>       PyObject *includepatharg = NULL;
>       int includepath = 0;
> -	/* heads is a list */
> +	/* heads and roots are lists */
>       PyObject *heads = NULL;
> -	/* roots is a set */
>       PyObject *roots = NULL;
>       PyObject *reachable = NULL;
>
> @@ -1133,12 +1132,13 @@ static PyObject *reachableroots(indexObj
>        * revstates: array of length len+1 (all revs + nullrev) */
>       int *tovisit = NULL;
>       long lentovisit = 0;
> -	enum { RS_SEEN = 1 };
> +	enum { RS_SEEN = 1, RS_ROOT = 2 };
>       char *revstates = NULL;
>
>       /* Get arguments */
>       if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
> -				&PySet_Type, &roots, &PyBool_Type, &includepatharg))
> +                           &PyList_Type, &roots,
> +                           &PyBool_Type, &includepatharg))
>               goto bail;
>
>       if (includepatharg == Py_True)
> @@ -1164,6 +1164,18 @@ static PyObject *reachableroots(indexObj
>               goto bail;
>       }
>
> +	l = PyList_GET_SIZE(roots);
> +	for (i = 0; i < l; i++) {
> +		revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
> +		if (revnum == -1 && PyErr_Occurred())
> +			goto bail;
> +		/* If root is out of range, e.g. wdir(), it must be unreachable
> +              * from heads. So we can just ignore it. */
> +		if (revnum + 1 < 0 || revnum + 1 >= len + 1)
> +			continue;
> +		revstates[revnum + 1] |= RS_ROOT;
> +	}
> +
>       /* Populate tovisit with all the heads */
>       l = PyList_GET_SIZE(heads);
>       for (i = 0; i < l; i++) {
> @@ -1185,17 +1197,15 @@ static PyObject *reachableroots(indexObj
>       while (k < lentovisit) {
>               /* Add the node to reachable if it is a root*/
>               revnum = tovisit[k++];
> -		val = PyInt_FromLong(revnum);
> -		if (val == NULL)
> -			goto bail;
> -		if (PySet_Contains(roots, val) == 1) {
> +		if (revstates[revnum + 1] & RS_ROOT) {
> +			val = PyInt_FromLong(revnum);
> +			if (val == NULL)
> +				goto bail;
>                       PySet_Add(reachable, val);
> -			if (includepath == 0) {
> -				Py_DECREF(val);
> +			Py_DECREF(val);
> +			if (includepath == 0)
>                               continue;
> -			}
>               }
> -		Py_DECREF(val);
>
>               /* Add its parents to the list of nodes to visit */
>               if (revnum == -1)
> diff --git a/mercurial/revset.py b/mercurial/revset.py
> --- a/mercurial/revset.py
> +++ b/mercurial/revset.py
> @@ -94,6 +94,7 @@ def reachablerootspure(repo, minroot, ro
>      if not roots:
>          return baseset()
>      parentrevs = repo.changelog.parentrevs
> +    roots = set(roots)
>      visit = list(heads)
>      reachable = set()
>      seen = {}
> @@ -133,7 +134,7 @@ def reachableroots(repo, roots, heads, i
>      # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset
>      # (and if it is not, it should.)
>      minroot = min(roots)
> -    roots = set(roots)
> +    roots = list(roots)
>      heads = list(heads)
>      try:
>          return repo.changelog.reachableroots(minroot, heads, roots, includepath)
> diff --git a/tests/test-parseindex.t b/tests/test-parseindex.t
> --- a/tests/test-parseindex.t
> +++ b/tests/test-parseindex.t
> @@ -69,28 +69,53 @@ Test SEGV caused by bad revision passed
>    $ python <<EOF
>    > from mercurial import changelog, scmutil
>    > cl = changelog.changelog(scmutil.vfs('.hg/store'))
> -  > print 'goods:'
> +  > print 'good heads:'
>    > for head in [0, len(cl) - 1, -1]:
> -  >     print'%s: %r' % (head, cl.reachableroots(0, [head], set([0])))
> -  > print 'bads:'
> +  >     print'%s: %r' % (head, cl.reachableroots(0, [head], [0]))
> +  > print 'bad heads:'
>    > for head in [len(cl), 10000, -2, -10000, None]:
>    >     print '%s:' % head,
>    >     try:
> -  >         cl.reachableroots(0, [head], set([0]))
> +  >         cl.reachableroots(0, [head], [0])
>    >         print 'uncaught buffer overflow?'
>    >     except (IndexError, TypeError) as inst:
>    >         print inst
> +  > print 'good roots:'
> +  > for root in [0, len(cl) - 1, -1]:
> +  >     print '%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root]))
> +  > print 'out-of-range roots are ignored:'
> +  > for root in [len(cl), 10000, -2, -10000]:
> +  >     print '%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root]))
> +  > print 'bad roots:'
> +  > for root in [None]:
> +  >     print '%s:' % root,
> +  >     try:
> +  >         cl.reachableroots(root, [len(cl) - 1], [root])
> +  >         print 'uncaught error?'
> +  >     except TypeError as inst:
> +  >         print inst
>    > EOF
> -  goods:
> +  good heads:
>    0: <baseset [0]>
>    1: <baseset [0]>
>    -1: <baseset []>
> -  bads:
> +  bad heads:
>    2: head out of range
>    10000: head out of range
>    -2: head out of range
>    -10000: head out of range
>    None: an integer is required
> +  good roots:
> +  0: <baseset [0]>
> +  1: <baseset [1]>
> +  -1: <baseset [-1]>
> +  out-of-range roots are ignored:
> +  2: <baseset []>
> +  10000: <baseset []>
> +  -2: <baseset []>
> +  -10000: <baseset []>
> +  bad roots:
> +  None: an integer is required
>
>    $ cd ..
>
> @@ -127,7 +152,7 @@ Test corrupted p1/p2 fields that could c
>    > n0, n1 = cl.node(0), cl.node(1)
>    > ops = [
>    >     ('reachableroots',
> -  >      lambda: cl.index.reachableroots(0, [1], set([0]), False)),
> +  >      lambda: cl.index.reachableroots(0, [1], [0], False)),
>    >     ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
>    >     ('index_headrevs', lambda: cl.headrevs()),
>    >     ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
> _______________________________________________
> Mercurial-devel mailing list
> Mercurial-devel@selenic.com
> https://selenic.com/mailman/listinfo/mercurial-devel
Pierre-Yves David - Aug. 21, 2015, 11:05 p.m.
On 08/21/2015 10:48 AM, Augie Fackler wrote:
> On Tue, Aug 18, 2015 at 11:42:57PM +0900, Yuya Nishihara wrote:
>> # HG changeset patch
>> # User Yuya Nishihara <yuya@tcha.org>
>> # Date 1439534609 -32400
>> #      Fri Aug 14 15:43:29 2015 +0900
>> # Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
>> # Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
>> reachableroots: use internal "revstates" array to test if rev is a root
>
> After squinting briefly with mpm, I'm going to rename the
> reachableroots c func to reachableroots2 and queue this. Thanks!

If we break the function signature, can we do so with iterable instead 
of list ?
Augie Fackler - Aug. 21, 2015, 11:07 p.m.
> On Aug 21, 2015, at 7:05 PM, Pierre-Yves David <pierre-yves.david@ens-lyon.org> wrote:
> 
> On 08/21/2015 10:48 AM, Augie Fackler wrote:
>> On Tue, Aug 18, 2015 at 11:42:57PM +0900, Yuya Nishihara wrote:
>>> # HG changeset patch
>>> # User Yuya Nishihara <yuya@tcha.org>
>>> # Date 1439534609 -32400
>>> #      Fri Aug 14 15:43:29 2015 +0900
>>> # Node ID 93e5cd30d2ff9b63f11616054cb647d6ac998053
>>> # Parent  2ca0b48b6de1c79bc205e7a660a5531c125cad9e
>>> reachableroots: use internal "revstates" array to test if rev is a root
>> 
>> After squinting briefly with mpm, I'm going to rename the
>> reachableroots c func to reachableroots2 and queue this. Thanks!
> 
> If we break the function signature, can we do so with iterable instead of list ?

I believe that will require us to perform significantly more bookkeeping than doing the list thing. We can always break it again, it’s not like it’s expensive.
Yuya Nishihara - Aug. 22, 2015, 3:11 a.m.
On Fri, 21 Aug 2015 13:19:00 -0400, Augie Fackler wrote:
> On Wed, Aug 19, 2015 at 10:00:42PM +0900, Yuya Nishihara wrote:
> > On Tue, 18 Aug 2015 15:44:35 -0400, Augie Fackler wrote:
> > > On Tue, Aug 18, 2015 at 12:32:55PM -0700, Pierre-Yves David wrote:
> > > > that would probably justify a signature change, but we never released this
> > > > code, so I'm unsure. If the old/new code raise proper exception when passed
> > > > the new/argument and fallback to the pure implementation, I do not think we
> > > > need to change the name
> > >
> > > It will not fall back to the old version - it will just explode
> > > because of type mismatches. That's by design - a previous incarnation
> > > of the patch series silently swallowed errors and it meant that it was
> > > too easy to have a trivial programming error cause the C extension to
> > > go unused silently.
> >
> > Do we have to care for the case of new .py + old .c?
> 
> Hm, no I don't think we need to worry about that especially.
> 
> >
> > New C function will be more permissive because sets and lists are iterable,
> > but if new .py passes a smartset to old C function, it would cause TypeError.
> 
> I thought this series moved from using PySet_ functions to PyList_
> functions, so it'd still break old .py with new .c?

Yes. I'm thinking of new implementation that will use PyIter thing.

Patch

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -1112,9 +1112,8 @@  static PyObject *reachableroots(indexObj
 	long minroot;
 	PyObject *includepatharg = NULL;
 	int includepath = 0;
-	/* heads is a list */
+	/* heads and roots are lists */
 	PyObject *heads = NULL;
-	/* roots is a set */
 	PyObject *roots = NULL;
 	PyObject *reachable = NULL;
 
@@ -1133,12 +1132,13 @@  static PyObject *reachableroots(indexObj
 	 * revstates: array of length len+1 (all revs + nullrev) */
 	int *tovisit = NULL;
 	long lentovisit = 0;
-	enum { RS_SEEN = 1 };
+	enum { RS_SEEN = 1, RS_ROOT = 2 };
 	char *revstates = NULL;
 
 	/* Get arguments */
 	if (!PyArg_ParseTuple(args, "lO!O!O!", &minroot, &PyList_Type, &heads,
-				&PySet_Type, &roots, &PyBool_Type, &includepatharg))
+			      &PyList_Type, &roots,
+			      &PyBool_Type, &includepatharg))
 		goto bail;
 
 	if (includepatharg == Py_True)
@@ -1164,6 +1164,18 @@  static PyObject *reachableroots(indexObj
 		goto bail;
 	}
 
+	l = PyList_GET_SIZE(roots);
+	for (i = 0; i < l; i++) {
+		revnum = PyInt_AsLong(PyList_GET_ITEM(roots, i));
+		if (revnum == -1 && PyErr_Occurred())
+			goto bail;
+		/* If root is out of range, e.g. wdir(), it must be unreachable
+		 * from heads. So we can just ignore it. */
+		if (revnum + 1 < 0 || revnum + 1 >= len + 1)
+			continue;
+		revstates[revnum + 1] |= RS_ROOT;
+	}
+
 	/* Populate tovisit with all the heads */
 	l = PyList_GET_SIZE(heads);
 	for (i = 0; i < l; i++) {
@@ -1185,17 +1197,15 @@  static PyObject *reachableroots(indexObj
 	while (k < lentovisit) {
 		/* Add the node to reachable if it is a root*/
 		revnum = tovisit[k++];
-		val = PyInt_FromLong(revnum);
-		if (val == NULL)
-			goto bail;
-		if (PySet_Contains(roots, val) == 1) {
+		if (revstates[revnum + 1] & RS_ROOT) {
+			val = PyInt_FromLong(revnum);
+			if (val == NULL)
+				goto bail;
 			PySet_Add(reachable, val);
-			if (includepath == 0) {
-				Py_DECREF(val);
+			Py_DECREF(val);
+			if (includepath == 0)
 				continue;
-			}
 		}
-		Py_DECREF(val);
 
 		/* Add its parents to the list of nodes to visit */
 		if (revnum == -1)
diff --git a/mercurial/revset.py b/mercurial/revset.py
--- a/mercurial/revset.py
+++ b/mercurial/revset.py
@@ -94,6 +94,7 @@  def reachablerootspure(repo, minroot, ro
     if not roots:
         return baseset()
     parentrevs = repo.changelog.parentrevs
+    roots = set(roots)
     visit = list(heads)
     reachable = set()
     seen = {}
@@ -133,7 +134,7 @@  def reachableroots(repo, roots, heads, i
     # XXX this should be 'parentset.min()' assuming 'parentset' is a smartset
     # (and if it is not, it should.)
     minroot = min(roots)
-    roots = set(roots)
+    roots = list(roots)
     heads = list(heads)
     try:
         return repo.changelog.reachableroots(minroot, heads, roots, includepath)
diff --git a/tests/test-parseindex.t b/tests/test-parseindex.t
--- a/tests/test-parseindex.t
+++ b/tests/test-parseindex.t
@@ -69,28 +69,53 @@  Test SEGV caused by bad revision passed 
   $ python <<EOF
   > from mercurial import changelog, scmutil
   > cl = changelog.changelog(scmutil.vfs('.hg/store'))
-  > print 'goods:'
+  > print 'good heads:'
   > for head in [0, len(cl) - 1, -1]:
-  >     print'%s: %r' % (head, cl.reachableroots(0, [head], set([0])))
-  > print 'bads:'
+  >     print'%s: %r' % (head, cl.reachableroots(0, [head], [0]))
+  > print 'bad heads:'
   > for head in [len(cl), 10000, -2, -10000, None]:
   >     print '%s:' % head,
   >     try:
-  >         cl.reachableroots(0, [head], set([0]))
+  >         cl.reachableroots(0, [head], [0])
   >         print 'uncaught buffer overflow?'
   >     except (IndexError, TypeError) as inst:
   >         print inst
+  > print 'good roots:'
+  > for root in [0, len(cl) - 1, -1]:
+  >     print '%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root]))
+  > print 'out-of-range roots are ignored:'
+  > for root in [len(cl), 10000, -2, -10000]:
+  >     print '%s: %r' % (root, cl.reachableroots(root, [len(cl) - 1], [root]))
+  > print 'bad roots:'
+  > for root in [None]:
+  >     print '%s:' % root,
+  >     try:
+  >         cl.reachableroots(root, [len(cl) - 1], [root])
+  >         print 'uncaught error?'
+  >     except TypeError as inst:
+  >         print inst
   > EOF
-  goods:
+  good heads:
   0: <baseset [0]>
   1: <baseset [0]>
   -1: <baseset []>
-  bads:
+  bad heads:
   2: head out of range
   10000: head out of range
   -2: head out of range
   -10000: head out of range
   None: an integer is required
+  good roots:
+  0: <baseset [0]>
+  1: <baseset [1]>
+  -1: <baseset [-1]>
+  out-of-range roots are ignored:
+  2: <baseset []>
+  10000: <baseset []>
+  -2: <baseset []>
+  -10000: <baseset []>
+  bad roots:
+  None: an integer is required
 
   $ cd ..
 
@@ -127,7 +152,7 @@  Test corrupted p1/p2 fields that could c
   > n0, n1 = cl.node(0), cl.node(1)
   > ops = [
   >     ('reachableroots',
-  >      lambda: cl.index.reachableroots(0, [1], set([0]), False)),
+  >      lambda: cl.index.reachableroots(0, [1], [0], False)),
   >     ('compute_phases_map_sets', lambda: cl.computephases([[0], []])),
   >     ('index_headrevs', lambda: cl.headrevs()),
   >     ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),