Patchwork [1,of,2] phase: compute phases in C

login
register
mail settings
Submitter Laurent Charignon
Date March 20, 2015, 6:20 p.m.
Message ID <40030781f44b76e1096e.1426875615@dev919.prn2.facebook.com>
Download mbox | patch
Permalink /patch/8202/
State Superseded
Headers show

Comments

Laurent Charignon - March 20, 2015, 6:20 p.m.
# HG changeset patch
# User Laurent Charignon <lcharignon@fb.com>
# Date 1426874708 25200
#      Fri Mar 20 11:05:08 2015 -0700
# Node ID 40030781f44b76e1096eca4044498b9ba7c5aeab
# Parent  b7f936f47f2b104a60840bae571e009742126afc
phase: compute phases in C

C version of the algorithm to compute the phases, slightly modified from
the python version to be more efficient
Bryan O'Sullivan - March 20, 2015, 10:14 p.m.
On Fri, Mar 20, 2015 at 11:20 AM, Laurent Charignon <lcharignon@fb.com>
wrote:

> phase: compute phases in C
>

Some commentary below.


> +               if (iter == NULL) {
> +                       return -2;
> +               }
>

We generally don't wrap one-line blocks in curly braces.


> +                       if (iter_item_long < min_idx) {
> +                               min_idx = iter_item_long;
> +                       }
>

See above.


> +       } else {
> +               return index_length(self);
> +       }
>

Drop the enclosing else block and just use a plain return here.


> +       if ((parent_1 >= 0 && phases[parent_1] > phases[i])) {
> +               phases[i] = phases[parent_1];
> +       }
> +       if ((parent_2 >= 0 && phases[parent_2] > phases[i])) {
> +               phases[i] = phases[parent_2];
> +       }
>

See above about single-line curlies (I'm going to stop pointing this out).


> +       /* Argument validation */
>

Unnecessary comment :-)


> +       phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft,
> 2: secret} */
>

Unchecked memory allocation needs a check.


> +       /* Transform phase list to a python list */
> +       PyObject *phaseslist = PyList_New(len);
> +       Py_INCREF(phaseslist);
>

Why are you incrementing the refcount to 2 here? This doesn't make obvious
sense.

A general comment: the cleanup management in this function is quite
difficult to understand due to the multiple exits. I don't have time to
reason through it to trust that it's correct.

Patch

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -13,6 +13,7 @@ 
 #include <string.h>
 
 #include "util.h"
+#define MIN(a, b) (((a)<(b))?(a):(b))
 
 static char *versionerrortext = "Python minor version mismatch";
 
@@ -911,6 +912,143 @@ 
 	}
 }
 
+static long add_roots_get_min(indexObject *self, PyObject *list, int marker,
+                              char *phases)
+{
+	if (PyList_Size(list) != 0) {
+		long min_idx = index_length(self) + 1;
+		PyObject *iter = PyObject_GetIter(list);
+		if (iter == NULL) {
+			return -2;
+		}
+		PyObject *iter_item;
+		long iter_item_long;
+		while ((iter_item = PyIter_Next(iter)))
+		{
+			iter_item_long = PyInt_AS_LONG(iter_item);
+			Py_DECREF(iter_item);
+			if (iter_item_long < min_idx) {
+				min_idx = iter_item_long;
+			}
+			phases[iter_item_long] = marker;
+		}
+		Py_DECREF(iter);
+
+		return min_idx;
+	} else {
+		return index_length(self);
+	}
+}
+
+static inline void set_phase_from_parents(char *phases, int parent_1,
+                                          int parent_2, int i )
+{
+	if ((parent_1 >= 0 && phases[parent_1] > phases[i])) {
+		phases[i] = phases[parent_1];
+	}
+	if ((parent_2 >= 0 && phases[parent_2] > phases[i])) {
+		phases[i] = phases[parent_2];
+	}
+}
+
+static PyObject *compute_phases(indexObject *self, PyObject *args)
+{
+	char *phases = NULL;
+	PyObject **phasemarkers = NULL;
+	PyObject *roots = Py_None;
+
+	/* Argument validation */
+	if (!PyArg_ParseTuple(args, "O", &roots)) {
+		goto bail;
+	}
+	if (roots == NULL || !PyList_Check(roots)) {
+		goto bail;
+	}
+
+	Py_ssize_t addlen = self->added ? PyList_GET_SIZE(self->added) : 0;
+	Py_ssize_t len = index_length(self) - 1;
+	phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */
+
+	/* Put the phase information of all the roots in phases */
+	Py_ssize_t numphase = PyList_Size(roots)+1;
+	Py_ssize_t minrevallphases = len + 1;
+	Py_ssize_t i = 0;
+	for (i = 0; i < numphase-1; i++) {
+		PyObject *phaseroots = PyList_GetItem(roots, i);
+
+		if (!PyList_Check(phaseroots)) {
+			Py_XDECREF(phaseroots);
+			goto bail;
+		}
+
+		Py_ssize_t minrevphase = add_roots_get_min(self, phaseroots, i+1, phases);
+		/* Error from add_roots_get_min */
+		if (minrevphase == -2 ) {
+			Py_XDECREF(phaseroots);
+			goto bail;
+		}
+		minrevallphases = MIN(minrevallphases, minrevphase);
+	}
+
+	/* Propagate the phase information from the roots to the revs */
+	if (minrevallphases != -1) {
+		for (i = minrevallphases; i < self->raw_length; i++) {
+			const char *data = index_deref(self, i);
+			set_phase_from_parents(phases, getbe32(data+24), getbe32(data+28), i);
+		}
+
+		for (i = 0; i < addlen; i++) {
+			PyObject *rev = PyList_GET_ITEM(self->added, i);
+			PyObject *p1 = PyTuple_GET_ITEM(rev, 5);
+			PyObject *p2 = PyTuple_GET_ITEM(rev, 6);
+			long parent_1, parent_2;
+
+			if (!PyInt_Check(p1) || !PyInt_Check(p2)) {
+				PyErr_SetString(PyExc_TypeError,
+						"revlog parents are invalid");
+				goto bail;
+			}
+			parent_1 = PyInt_AS_LONG(p1);
+			parent_2 = PyInt_AS_LONG(p2);
+			set_phase_from_parents(phases, parent_1, parent_2, i+self->raw_length);
+		}
+	}
+
+	/* Build list of markers to avoid creating python objects repeatedly */
+	phasemarkers = calloc(numphase, sizeof(PyObject *));
+	if (phasemarkers == NULL) {
+		goto bail;
+	}
+	for (i = 0; i < numphase; i++) {
+		phasemarkers[i] = PyInt_FromLong(i);
+	}
+
+	/* Transform phase list to a python list */
+	PyObject *phaseslist = PyList_New(len);
+	Py_INCREF(phaseslist);
+	if (phaseslist == NULL) {
+		Py_XDECREF(phaseslist);
+		goto bail;
+	}
+	for (i = 0; i < len; i++) {
+		PyList_SET_ITEM(phaseslist, i, phasemarkers[(int)phases[i]]);
+	}
+
+	/* Cleanup and return */
+	free(phases);
+	for (i = 0; i < numphase; i++) {
+		Py_XDECREF(phasemarkers[i]);
+	}
+	free(phasemarkers);
+	return phaseslist;
+
+bail:
+	if (phases != NULL) {
+		free(phases);
+	}
+	return Py_None;
+}
+
 static PyObject *index_headrevs(indexObject *self, PyObject *args)
 {
 	Py_ssize_t i, len, addlen;
@@ -2043,6 +2181,8 @@ 
 	 "clear the index caches"},
 	{"get", (PyCFunction)index_m_get, METH_VARARGS,
 	 "get an index entry"},
+	{"computephases", compute_phases, METH_VARARGS,
+		"compute phases"},
 	{"headrevs", (PyCFunction)index_headrevs, METH_VARARGS,
 	 "get head revisions"}, /* Can do filtering since 3.2 */
 	{"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS,