Patchwork [1,of,2] phase: compute phases in C

login
register
mail settings
Submitter Laurent Charignon
Date March 20, 2015, 9:08 p.m.
Message ID <1c9600e6ceab57b48827.1426885699@dev919.prn2.facebook.com>
Download mbox | patch
Permalink /patch/8206/
State Superseded
Headers show

Comments

Laurent Charignon - March 20, 2015, 9:08 p.m.
# HG changeset patch
# User Laurent Charignon <lcharignon@fb.com>
# Date 1426874708 25200
#      Fri Mar 20 11:05:08 2015 -0700
# Node ID 1c9600e6ceab57b48827292c7b01ed9a625da1fd
# Parent  b7f936f47f2b104a60840bae571e009742126afc
phase: compute phases in C

Previously, the phase computation would grow much slower as the oldest draft
commit in the repository grew older (which is very common in repos with evolve
on) and the number of commits increase.
By rewriting the computation in C we can speed it up from 700ms to 7ms on
a large repository whose oldest draft commit is a year old.
Laurent Charignon - March 20, 2015, 9:09 p.m.
I forgot to add V2 to this sequence of patch but it is a V2 with an update
of the commit messages.

On 3/20/15, 2:08 PM, "Laurent Charignon" <lcharignon@fb.com> wrote:

># HG changeset patch
># User Laurent Charignon <lcharignon@fb.com>
># Date 1426874708 25200
>#      Fri Mar 20 11:05:08 2015 -0700
># Node ID 1c9600e6ceab57b48827292c7b01ed9a625da1fd
># Parent  b7f936f47f2b104a60840bae571e009742126afc
>phase: compute phases in C
>
>Previously, the phase computation would grow much slower as the oldest
>draft
>commit in the repository grew older (which is very common in repos with
>evolve
>on) and the number of commits increase.
>By rewriting the computation in C we can speed it up from 700ms to 7ms on
>a large repository whose oldest draft commit is a year old.
>
>diff --git a/mercurial/parsers.c b/mercurial/parsers.c
>--- a/mercurial/parsers.c
>+++ b/mercurial/parsers.c
>@@ -13,6 +13,7 @@
> #include <string.h>
> 
> #include "util.h"
>+#define MIN(a, b) (((a)<(b))?(a):(b))
> 
> static char *versionerrortext = "Python minor version mismatch";
> 
>@@ -911,6 +912,143 @@
> 	}
> }
> 
>+static long add_roots_get_min(indexObject *self, PyObject *list, int
>marker,
>+                              char *phases)
>+{
>+	if (PyList_Size(list) != 0) {
>+		long min_idx = index_length(self) + 1;
>+		PyObject *iter = PyObject_GetIter(list);
>+		if (iter == NULL) {
>+			return -2;
>+		}
>+		PyObject *iter_item;
>+		long iter_item_long;
>+		while ((iter_item = PyIter_Next(iter)))
>+		{
>+			iter_item_long = PyInt_AS_LONG(iter_item);
>+			Py_DECREF(iter_item);
>+			if (iter_item_long < min_idx) {
>+				min_idx = iter_item_long;
>+			}
>+			phases[iter_item_long] = marker;
>+		}
>+		Py_DECREF(iter);
>+
>+		return min_idx;
>+	} else {
>+		return index_length(self);
>+	}
>+}
>+
>+static inline void set_phase_from_parents(char *phases, int parent_1,
>+                                          int parent_2, int i )
>+{
>+	if ((parent_1 >= 0 && phases[parent_1] > phases[i])) {
>+		phases[i] = phases[parent_1];
>+	}
>+	if ((parent_2 >= 0 && phases[parent_2] > phases[i])) {
>+		phases[i] = phases[parent_2];
>+	}
>+}
>+
>+static PyObject *compute_phases(indexObject *self, PyObject *args)
>+{
>+	char *phases = NULL;
>+	PyObject **phasemarkers = NULL;
>+	PyObject *roots = Py_None;
>+
>+	/* Argument validation */
>+	if (!PyArg_ParseTuple(args, "O", &roots)) {
>+		goto bail;
>+	}
>+	if (roots == NULL || !PyList_Check(roots)) {
>+		goto bail;
>+	}
>+
>+	Py_ssize_t addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
>+	Py_ssize_t len = index_length(self) - 1;
>+	phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2:
>secret} */
>+
>+	/* Put the phase information of all the roots in phases */
>+	Py_ssize_t numphase = PyList_Size(roots)+1;
>+	Py_ssize_t minrevallphases = len + 1;
>+	Py_ssize_t i = 0;
>+	for (i = 0; i < numphase-1; i++) {
>+		PyObject *phaseroots = PyList_GetItem(roots, i);
>+
>+		if (!PyList_Check(phaseroots)) {
>+			Py_XDECREF(phaseroots);
>+			goto bail;
>+		}
>+
>+		Py_ssize_t minrevphase = add_roots_get_min(self, phaseroots, i+1,
>phases);
>+		/* Error from add_roots_get_min */
>+		if (minrevphase == -2 ) {
>+			Py_XDECREF(phaseroots);
>+			goto bail;
>+		}
>+		minrevallphases = MIN(minrevallphases, minrevphase);
>+	}
>+
>+	/* Propagate the phase information from the roots to the revs */
>+	if (minrevallphases != -1) {
>+		for (i = minrevallphases; i < self->raw_length; i++) {
>+			const char *data = index_deref(self, i);
>+			set_phase_from_parents(phases, getbe32(data+24), getbe32(data+28), i);
>+		}
>+
>+		for (i = 0; i < addlen; i++) {
>+			PyObject *rev = PyList_GET_ITEM(self->added, i);
>+			PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
>+			PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
>+			long parent_1, parent_2;
>+
>+			if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
>+				PyErr_SetString(PyExc_TypeError,
>+						"revlog parents are invalid");
>+				goto bail;
>+			}
>+			parent_1 = PyInt_AS_LONG(p1);
>+			parent_2 = PyInt_AS_LONG(p2);
>+			set_phase_from_parents(phases, parent_1, parent_2,
>i+self->raw_length);
>+		}
>+	}
>+
>+	/* Build list of markers to avoid creating python objects repeatedly */
>+	phasemarkers = calloc(numphase, sizeof(PyObject *));
>+	if (phasemarkers == NULL) {
>+		goto bail;
>+	}
>+	for (i = 0; i < numphase; i++) {
>+		phasemarkers[i] = PyInt_FromLong(i);
>+	}
>+
>+	/* Transform phase list to a python list */
>+	PyObject *phaseslist = PyList_New(len);
>+	Py_INCREF(phaseslist);
>+	if (phaseslist == NULL) {
>+		Py_XDECREF(phaseslist);
>+		goto bail;
>+	}
>+	for (i = 0; i < len; i++) {
>+		PyList_SET_ITEM(phaseslist, i, phasemarkers[(int)phases[i]]);
>+	}
>+
>+	/* Cleanup and return */
>+	free(phases);
>+	for (i = 0; i < numphase; i++) {
>+		Py_XDECREF(phasemarkers[i]);
>+	}
>+	free(phasemarkers);
>+	return phaseslist;
>+
>+bail:
>+	if (phases != NULL) {
>+		free(phases);
>+	}
>+	return Py_None;
>+}
>+
> static PyObject *index_headrevs(indexObject *self, PyObject *args)
> {
> 	Py_ssize_t i, len, addlen;
>@@ -2043,6 +2181,8 @@
> 	 "clear the index caches"},
> 	{"get", (PyCFunction)index_m_get, METH_VARARGS,
> 	 "get an index entry"},
>+	{"computephases", compute_phases, METH_VARARGS,
>+		"compute phases"},
> 	{"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
> 	 "get head revisions"}, /* Can do filtering since 3.2 */
> 	{"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,
>_______________________________________________
>Mercurial-devel mailing list
>Mercurial-devel@selenic.com
>http://selenic.com/mailman/listinfo/mercurial-devel
Adrian Buehlmann - March 20, 2015, 9:30 p.m.
On 2015-03-20 22:08, Laurent Charignon wrote:
> # HG changeset patch
> # User Laurent Charignon <lcharignon@fb.com>
> # Date 1426874708 25200
> #      Fri Mar 20 11:05:08 2015 -0700
> # Node ID 1c9600e6ceab57b48827292c7b01ed9a625da1fd
> # Parent  b7f936f47f2b104a60840bae571e009742126afc
> phase: compute phases in C
> 
> Previously, the phase computation would grow much slower as the oldest draft
> commit in the repository grew older (which is very common in repos with evolve
> on) and the number of commits increase.
> By rewriting the computation in C we can speed it up from 700ms to 7ms on
> a large repository whose oldest draft commit is a year old.
> 
> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
> --- a/mercurial/parsers.c
> +++ b/mercurial/parsers.c
> @@ -13,6 +13,7 @@
>  #include <string.h>
>  
>  #include "util.h"
> +#define MIN(a, b) (((a)<(b))?(a):(b))
>  
>  static char *versionerrortext = "Python minor version mismatch";
>  
> @@ -911,6 +912,143 @@
>  	}
>  }
>  
> +static long add_roots_get_min(indexObject *self, PyObject *list, int marker,
> +                              char *phases)
> +{
> +	if (PyList_Size(list) != 0) {
> +		long min_idx = index_length(self) + 1;

In case anyone still cares about the Windows platform: C's 'long' type
is notoriously problematic, as Windows uses the LLP64 model for 64-bit
target, which means long is 32 bit - which may surprise Linux folks.
Matt Mackall - March 20, 2015, 10:28 p.m.
On Fri, 2015-03-20 at 14:08 -0700, Laurent Charignon wrote:
> # HG changeset patch
> # User Laurent Charignon <lcharignon@fb.com>
> # Date 1426874708 25200
> #      Fri Mar 20 11:05:08 2015 -0700
> # Node ID 1c9600e6ceab57b48827292c7b01ed9a625da1fd
> # Parent  b7f936f47f2b104a60840bae571e009742126afc
> phase: compute phases in C

>  #include "util.h"
> +#define MIN(a, b) (((a)<(b))?(a):(b))

If we want to have a line like the second line anywhere, it's probably
inside the firstline. 

> +	/* Build list of markers to avoid creating python objects repeatedly */
> +	phasemarkers = calloc(numphase, sizeof(PyObject *));

> +	if (phasemarkers == NULL) {
> +		goto bail;
> +	}
> +	for (i = 0; i < numphase; i++) {
> +		phasemarkers[i] = PyInt_FromLong(i);
> +	}
> +
> +	/* Transform phase list to a python list */
> +	PyObject *phaseslist = PyList_New(len);
> +	Py_INCREF(phaseslist);
> +	if (phaseslist == NULL) {
> +		Py_XDECREF(phaseslist);
> +		goto bail;
> +	}
> +	for (i = 0; i < len; i++) {
> +		PyList_SET_ITEM(phaseslist, i, phasemarkers[(int)phases[i]]);
> +	}
> +
> +	/* Cleanup and return */
> +	free(phases);
> +	for (i = 0; i < numphase; i++) {
> +		Py_XDECREF(phasemarkers[i]);
> +	}
> +	free(phasemarkers);
> +	return phaseslist;
> +
> +bail:
> +	if (phases != NULL) {
> +		free(phases);
> +	}
> +	return Py_None;

This is not quite right. This should illustrate the goto error-handling
pattern a bit more clearly:

int func(void)
{
	int ret = -1; /* default error */
	char *a, *b, *c;
	
	a = malloc();
	if (!a)
		goto release_none;
	...
	if (some_error) {
		ret = -2; /* special error code */
		goto release_a;
	...
	b = malloc();
	if (!b)
		goto release_a;
	...
	if (something()) {
		...
		c = malloc();
		if (!c)
			goto release_b;		
		...
		while(something_else()) {
			...
			if (we_need_to_exit_early)
				goto release_c;
			...
			}
		}
	}
	...
	ret = our_result;

release_c:
	free(c);
release_b:
	free(b);
release_a:
	free(a);
release_none:
	return ret;
}

In short:

- one exit point
- one allocation / one free per resource
- release resources in reverse order
- code statically knows what needs releasing
Laurent Charignon - March 25, 2015, 7:35 p.m.
Hi,

I switched to using Py_ssize_t in that case.

All the longs in this patch are indexing rev number and not file offset so
it should be good.

Anyway, we probably should change all the longs in our C code to
Py_ssize_t, we have 19 of them so it shouldn¹t be too hard.
Do you think that it is a good approach?

Thanks,

Laurent

On 3/20/15, 2:30 PM, "Adrian Buehlmann" <adrian@cadifra.com> wrote:

>On 2015-03-20 22:08, Laurent Charignon wrote:
>> # HG changeset patch
>> # User Laurent Charignon <lcharignon@fb.com>
>> # Date 1426874708 25200
>> #      Fri Mar 20 11:05:08 2015 -0700
>> # Node ID 1c9600e6ceab57b48827292c7b01ed9a625da1fd
>> # Parent  b7f936f47f2b104a60840bae571e009742126afc
>> phase: compute phases in C
>> 
>> Previously, the phase computation would grow much slower as the oldest
>>draft
>> commit in the repository grew older (which is very common in repos with
>>evolve
>> on) and the number of commits increase.
>> By rewriting the computation in C we can speed it up from 700ms to 7ms
>>on
>> a large repository whose oldest draft commit is a year old.
>> 
>> diff --git a/mercurial/parsers.c b/mercurial/parsers.c
>> --- a/mercurial/parsers.c
>> +++ b/mercurial/parsers.c
>> @@ -13,6 +13,7 @@
>>  #include <string.h>
>>  
>>  #include "util.h"
>> +#define MIN(a, b) (((a)<(b))?(a):(b))
>>  
>>  static char *versionerrortext = "Python minor version mismatch";
>>  
>> @@ -911,6 +912,143 @@
>>  	}
>>  }
>>  
>> +static long add_roots_get_min(indexObject *self, PyObject *list, int
>>marker,
>> +                              char *phases)
>> +{
>> +	if (PyList_Size(list) != 0) {
>> +		long min_idx = index_length(self) + 1;
>
>In case anyone still cares about the Windows platform: C's 'long' type
>is notoriously problematic, as Windows uses the LLP64 model for 64-bit
>target, which means long is 32 bit - which may surprise Linux folks.
Adrian Buehlmann - March 25, 2015, 8:30 p.m.
On 2015-03-25 20:35, Laurent Charignon wrote:
> Hi,
> 
> I switched to using Py_ssize_t in that case.
> 
> All the longs in this patch are indexing rev number and not file offset so
> it should be good.
> 
> Anyway, we probably should change all the longs in our C code to
> Py_ssize_t, we have 19 of them so it shouldn¹t be too hard.
> Do you think that it is a good approach?

  https://www.python.org/dev/peps/pep-0353/

explains the rationale for Py_ssize_t. It contains the statement:

  "A new type Py_ssize_t is introduced, which has the same size as
   the compiler's size_t type, but is signed."

Which is great, because size_t is guaranteed to be 64 bits on Windows x64.

The only problem I can think of is Python <2.5, which lacks Py_ssize_t.
We currently have a

  typedef int Py_ssize_t;

for that case in util.h. So, using Py_ssize_t instead of long might be a
problem for x64 non-Windows platforms using Python <2.5.

For Windows, Python 2.4 is totally irrelevant in practice, as I can't
think of anyone trying to be so insane to use this combination.
Matt Harbison - March 26, 2015, 12:59 a.m.
On Wed, 25 Mar 2015 16:30:54 -0400, Adrian Buehlmann <adrian@cadifra.com>  
wrote:

> On 2015-03-25 20:35, Laurent Charignon wrote:
>> Hi,
>>
>> I switched to using Py_ssize_t in that case.
>>
>> All the longs in this patch are indexing rev number and not file offset  
>> so
>> it should be good.
>>
>> Anyway, we probably should change all the longs in our C code to
>> Py_ssize_t, we have 19 of them so it shouldn¹t be too hard.
>> Do you think that it is a good approach?
>
>   https://www.python.org/dev/peps/pep-0353/
>
> explains the rationale for Py_ssize_t. It contains the statement:
>
>   "A new type Py_ssize_t is introduced, which has the same size as
>    the compiler's size_t type, but is signed."
>
> Which is great, because size_t is guaranteed to be 64 bits on Windows  
> x64.
>
> The only problem I can think of is Python <2.5, which lacks Py_ssize_t.
> We currently have a
>
>   typedef int Py_ssize_t;
>
> for that case in util.h. So, using Py_ssize_t instead of long might be a
> problem for x64 non-Windows platforms using Python <2.5.
>
> For Windows, Python 2.4 is totally irrelevant in practice, as I can't
> think of anyone trying to be so insane to use this combination.

I wonder why this wasn't cased out with the preprocessor like it is in the  
pyport.h header.

Related to Py_ssize_t, I get signed/unsigned comparison warnings in  
pathencode.c when compiling with MSVC 2008 in cmd.exe, but not with 'make  
local' in MinGW (on Win7 x64).  It looks like there are a handful of  
places where Py_ssize_t is passed to a size_t method arg, and then  
compared to another Py_ssize_t.  The typedef above shouldn't be in play,  
because I have 2.7.

I'll submit a patch to s/size_t/Py_ssize_t/g (for method params) once I  
trace each call, back to the beginning and make sure it originates with a  
Py_ssize_t.

--Matt

Patch

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -13,6 +13,7 @@ 
 #include <string.h>
 
 #include "util.h"
+#define MIN(a, b) (((a)<(b))?(a):(b))
 
 static char *versionerrortext = "Python minor version mismatch";
 
@@ -911,6 +912,143 @@ 
 	}
 }
 
+static long add_roots_get_min(indexObject *self, PyObject *list, int marker,
+                              char *phases)
+{
+	if (PyList_Size(list) != 0) {
+		long min_idx = index_length(self) + 1;
+		PyObject *iter = PyObject_GetIter(list);
+		if (iter == NULL) {
+			return -2;
+		}
+		PyObject *iter_item;
+		long iter_item_long;
+		while ((iter_item = PyIter_Next(iter)))
+		{
+			iter_item_long = PyInt_AS_LONG(iter_item);
+			Py_DECREF(iter_item);
+			if (iter_item_long < min_idx) {
+				min_idx = iter_item_long;
+			}
+			phases[iter_item_long] = marker;
+		}
+		Py_DECREF(iter);
+
+		return min_idx;
+	} else {
+		return index_length(self);
+	}
+}
+
+static inline void set_phase_from_parents(char *phases, int parent_1,
+                                          int parent_2, int i )
+{
+	if ((parent_1 >= 0 && phases[parent_1] > phases[i])) {
+		phases[i] = phases[parent_1];
+	}
+	if ((parent_2 >= 0 && phases[parent_2] > phases[i])) {
+		phases[i] = phases[parent_2];
+	}
+}
+
+static PyObject *compute_phases(indexObject *self, PyObject *args)
+{
+	char *phases = NULL;
+	PyObject **phasemarkers = NULL;
+	PyObject *roots = Py_None;
+
+	/* Argument validation */
+	if (!PyArg_ParseTuple(args, "O", &roots)) {
+		goto bail;
+	}
+	if (roots == NULL || !PyList_Check(roots)) {
+		goto bail;
+	}
+
+	Py_ssize_t addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
+	Py_ssize_t len = index_length(self) - 1;
+	phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
+
+	/* Put the phase information of all the roots in phases */
+	Py_ssize_t numphase = PyList_Size(roots)+1;
+	Py_ssize_t minrevallphases = len + 1;
+	Py_ssize_t i = 0;
+	for (i = 0; i < numphase-1; i++) {
+		PyObject *phaseroots = PyList_GetItem(roots, i);
+
+		if (!PyList_Check(phaseroots)) {
+			Py_XDECREF(phaseroots);
+			goto bail;
+		}
+
+		Py_ssize_t minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
+		/* Error from add_roots_get_min */
+		if (minrevphase == -2 ) {
+			Py_XDECREF(phaseroots);
+			goto bail;
+		}
+		minrevallphases = MIN(minrevallphases, minrevphase);
+	}
+
+	/* Propagate the phase information from the roots to the revs */
+	if (minrevallphases != -1) {
+		for (i = minrevallphases; i < self->raw_length; i++) {
+			const char *data = index_deref(self, i);
+			set_phase_from_parents(phases, getbe32(data+24), getbe32(data+28), i);
+		}
+
+		for (i = 0; i < addlen; i++) {
+			PyObject *rev = PyList_GET_ITEM(self->added, i);
+			PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
+			PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
+			long parent_1, parent_2;
+
+			if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
+				PyErr_SetString(PyExc_TypeError,
+						"revlog parents are invalid");
+				goto bail;
+			}
+			parent_1 = PyInt_AS_LONG(p1);
+			parent_2 = PyInt_AS_LONG(p2);
+			set_phase_from_parents(phases, parent_1, parent_2, i+self->raw_length);
+		}
+	}
+
+	/* Build list of markers to avoid creating python objects repeatedly */
+	phasemarkers = calloc(numphase, sizeof(PyObject *));
+	if (phasemarkers == NULL) {
+		goto bail;
+	}
+	for (i = 0; i < numphase; i++) {
+		phasemarkers[i] = PyInt_FromLong(i);
+	}
+
+	/* Transform phase list to a python list */
+	PyObject *phaseslist = PyList_New(len);
+	Py_INCREF(phaseslist);
+	if (phaseslist == NULL) {
+		Py_XDECREF(phaseslist);
+		goto bail;
+	}
+	for (i = 0; i < len; i++) {
+		PyList_SET_ITEM(phaseslist, i, phasemarkers[(int)phases[i]]);
+	}
+
+	/* Cleanup and return */
+	free(phases);
+	for (i = 0; i < numphase; i++) {
+		Py_XDECREF(phasemarkers[i]);
+	}
+	free(phasemarkers);
+	return phaseslist;
+
+bail:
+	if (phases != NULL) {
+		free(phases);
+	}
+	return Py_None;
+}
+
 static PyObject *index_headrevs(indexObject *self, PyObject *args)
 {
 	Py_ssize_t i, len, addlen;
@@ -2043,6 +2181,8 @@ 
 	 "clear the index caches"},
 	{"get", (PyCFunction)index_m_get, METH_VARARGS,
 	 "get an index entry"},
+	{"computephases", compute_phases, METH_VARARGS,
+		"compute phases"},
 	{"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
 	 "get head revisions"}, /* Can do filtering since 3.2 */
 	{"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,