Submitter | Laurent Charignon |
---|---|
Date | March 24, 2015, 6:25 p.m. |
Message ID | <3be72e200cf2e5e1d5ac.1427221540@dev919.prn2.facebook.com> |
Download | mbox | patch |
Permalink | /patch/8241/ |
State | Superseded |
Commit | 539b3c7eea4410b4da099e42d118464c43c8f842 |
Headers | show |
Comments
On Tue, 2015-03-24 at 11:25 -0700, Laurent Charignon wrote: > # HG changeset patch > # User Laurent Charignon <lcharignon@fb.com> > # Date 1427220009 25200 > # Tue Mar 24 11:00:09 2015 -0700 > # Node ID 3be72e200cf2e5e1d5aced8d5e952fa5084b7ac0 > # Parent b7f936f47f2b104a60840bae571e009742126afc > phase: compute phases in C > > Previously, the phase computation would grow much slower as the oldest draft > commit in the repository grew older (which is very common in repos with evolve > on) and the number of commits increase. > By rewriting the computation in C we can speed it up from 700ms to 7ms on > a large repository whose oldest draft commit is a year old. > +static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list, > + int marker, char *phases) > +{ > + Py_ssize_t min_idx = index_length(self) + 1; > + if (PyList_Size(list) != 0) { > + PyObject *iter = PyObject_GetIter(list); > + if (iter == NULL) > + return -2; > + PyObject *iter_item; > + long iter_item_long; The standard compiler to use with Python 2.x on Windows is VC 2.9, which doesn't support C99-era mixed code and declarations. All declarations need to be at the top of the function block. You might want to try manually compiling with gcc -std=c89 to see if any other issues pop up. We're also going to have a problem with Py_ssize_t, which doesn't exist in Py2.4. But you can ignore that as I'm going to try to add a compatibility typedef to util.h to fix that.
On Tue, Mar 24, 2015 at 02:25:29PM -0500, Matt Mackall wrote: > On Tue, 2015-03-24 at 11:25 -0700, Laurent Charignon wrote: > > # HG changeset patch > > # User Laurent Charignon <lcharignon@fb.com> > > # Date 1427220009 25200 > > # Tue Mar 24 11:00:09 2015 -0700 > > # Node ID 3be72e200cf2e5e1d5aced8d5e952fa5084b7ac0 > > # Parent b7f936f47f2b104a60840bae571e009742126afc > > phase: compute phases in C > > > > Previously, the phase computation would grow much slower as the oldest draft > > commit in the repository grew older (which is very common in repos with evolve > > on) and the number of commits increase. > > By rewriting the computation in C we can speed it up from 700ms to 7ms on > > a large repository whose oldest draft commit is a year old. > > > > +static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list, > > + int marker, char *phases) > > +{ > > + Py_ssize_t min_idx = index_length(self) + 1; > > + if (PyList_Size(list) != 0) { > > + PyObject *iter = PyObject_GetIter(list); > > + if (iter == NULL) > > + return -2; > > + PyObject *iter_item; > > + long iter_item_long; > > The standard compiler to use with Python 2.x on Windows is VC 2.9, which > doesn't support C99-era mixed code and declarations. All declarations > need to be at the top of the function block. You might want to try > manually compiling with gcc -std=c89 to see if any other issues pop up. I seem to recall trying this and it was failing all over the place on things that aren't actually problems. :( > > We're also going to have a problem with Py_ssize_t, which doesn't exist > in Py2.4. But you can ignore that as I'm going to try to add a > compatibility typedef to util.h to fix that. > > -- > Mathematics is the supreme nostalgia of our time. > > > _______________________________________________ > Mercurial-devel mailing list > Mercurial-devel@selenic.com > http://selenic.com/mailman/listinfo/mercurial-devel
A few minor comments below. On Tue, Mar 24, 2015 at 11:25 AM, Laurent Charignon <lcharignon@fb.com> wrote: > +static inline void set_phase_from_parents(char *phases, int parent_1, > + int parent_2, int i ) > +{ > + if ((parent_1 >= 0 && phases[parent_1] > phases[i])) > + phases[i] = phases[parent_1]; > + if ((parent_2 >= 0 && phases[parent_2] > phases[i])) > + phases[i] = phases[parent_2]; > +} > What's with the doubled parentheses above? > + > +static PyObject *compute_phases(indexObject *self, PyObject *args) > +{ > + /* Put the phase information of all the roots in phases */ > + Py_ssize_t numphase = PyList_Size(roots)+1; > You can use PyList_GET_SIZE here since you've already verified that roots is a list. > + for (i = 0; i < numphase-1; i++) { > + PyObject *phaseroots = PyList_GetItem(roots, i); > Likewise PyList_GET_ITEM. > + if (minrevphase == -2 ) /* Error from add_roots_get_min */ > Extra space hanging off the end of the expression.
Patch
diff --git a/mercurial/parsers.c b/mercurial/parsers.c --- a/mercurial/parsers.c +++ b/mercurial/parsers.c @@ -911,6 +911,102 @@ } } +static Py_ssize_t add_roots_get_min(indexObject *self, PyObject *list, + int marker, char *phases) +{ + Py_ssize_t min_idx = index_length(self) + 1; + if (PyList_Size(list) != 0) { + PyObject *iter = PyObject_GetIter(list); + if (iter == NULL) + return -2; + PyObject *iter_item; + long iter_item_long; + while ((iter_item = PyIter_Next(iter))) + { + iter_item_long = PyInt_AS_LONG(iter_item); + Py_DECREF(iter_item); + if (iter_item_long < min_idx) + min_idx = iter_item_long; + phases[iter_item_long] = marker; + } + Py_DECREF(iter); + } + + return min_idx; +} + +static inline void set_phase_from_parents(char *phases, int parent_1, + int parent_2, int i ) +{ + if ((parent_1 >= 0 && phases[parent_1] > phases[i])) + phases[i] = phases[parent_1]; + if ((parent_2 >= 0 && phases[parent_2] > phases[i])) + phases[i] = phases[parent_2]; +} + +static PyObject *compute_phases(indexObject *self, PyObject *args) +{ + char *phases = NULL; + PyObject **phasemarkers = NULL; + PyObject *roots = Py_None; + PyObject *phaseslist = NULL; + + if (!PyArg_ParseTuple(args, "O", &roots)) + goto release_none; + if (roots == NULL || !PyList_Check(roots)) + goto release_none; + + Py_ssize_t addlen = self->added ? PyList_GET_SIZE(self->added) : 0; + Py_ssize_t len = index_length(self) - 1; + phases = calloc(len, 1); /* phase per rev: {0: public, 1: draft, 2: secret} */ + if (phases == NULL) + goto release_none; + /* Put the phase information of all the roots in phases */ + Py_ssize_t numphase = PyList_Size(roots)+1; + Py_ssize_t minrevallphases = len + 1; + Py_ssize_t i = 0; + for (i = 0; i < numphase-1; i++) { + PyObject *phaseroots = PyList_GetItem(roots, i); + if (!PyList_Check(phaseroots)) + goto release_phases; + Py_ssize_t minrevphase = add_roots_get_min(self, phaseroots, i+1, phases); + if (minrevphase == -2 ) /* Error from add_roots_get_min */ + goto release_phases; + minrevallphases = MIN(minrevallphases, minrevphase); + } + /* Propagate the phase information from the roots to the revs */ + if (minrevallphases != -1) { + for (i = minrevallphases; i < self->raw_length; i++) { + const char *data = index_deref(self, i); + set_phase_from_parents(phases, getbe32(data+24), getbe32(data+28), i); + } + for (i = 0; i < addlen; i++) { + PyObject *rev = PyList_GET_ITEM(self->added, i); + PyObject *p1 = PyTuple_GET_ITEM(rev, 5); + PyObject *p2 = PyTuple_GET_ITEM(rev, 6); + long parent_1, parent_2; + if (!PyInt_Check(p1) || !PyInt_Check(p2)) { + PyErr_SetString(PyExc_TypeError, "revlog parents are invalid"); + goto release_phases; + } + parent_1 = PyInt_AS_LONG(p1); + parent_2 = PyInt_AS_LONG(p2); + set_phase_from_parents(phases, parent_1, parent_2, i+self->raw_length); + } + } + /* Transform phase list to a python list */ + phaseslist = PyList_New(len); + if (phaseslist == NULL) + goto release_phases; + for (i = 0; i < len; i++) + PyList_SET_ITEM(phaseslist, i, PyInt_FromLong(phases[i])); + +release_phases: + free(phases); +release_none: + return phaseslist; +} + static PyObject *index_headrevs(indexObject *self, PyObject *args) { Py_ssize_t i, len, addlen; @@ -2043,6 +2139,8 @@ "clear the index caches"}, {"get", (PyCFunction)index_m_get, METH_VARARGS, "get an index entry"}, + {"computephases", compute_phases, METH_VARARGS, + "compute phases"}, {"headrevs", (PyCFunction)index_headrevs, METH_VARARGS, "get head revisions"}, /* Can do filtering since 3.2 */ {"headrevsfiltered", (PyCFunction)index_headrevs, METH_VARARGS, diff --git a/mercurial/util.h b/mercurial/util.h --- a/mercurial/util.h +++ b/mercurial/util.h @@ -209,4 +209,5 @@ return ret; } +#define MIN(a, b) (((a)<(b))?(a):(b)) #endif /* _HG_UTIL_H_ */