Patchwork obsolete: use C code for headrevs calculation

login
register
mail settings
Submitter Durham Goode
Date Sept. 17, 2014, 2:26 a.m.
Message ID <6c5330a7df580468bb0c.1410920808@dev2000.prn2.facebook.com>
Download mbox | patch
Permalink /patch/5838/
State Superseded
Headers show

Comments

Durham Goode - Sept. 17, 2014, 2:26 a.m.
# HG changeset patch
# User Durham Goode <durham@fb.com>
# Date 1410908601 25200
#      Tue Sep 16 16:03:21 2014 -0700
# Node ID 6c5330a7df580468bb0ca85dda11f2a2d14eed89
# Parent  48791c2bea1ceda4e4f28bc11651e281d636ce1a
obsolete: use C code for headrevs calculation

Previously, if there were filtered revs the repository could not use the C fast
path for computing the head revs in the changelog. This slowed down many
operations in large repositories.

This adds the ability to filter revs to the C fast path. This speeds up histedit
on repositories with filtered revs by 30% (13s to 9s). This could be improved
further by sorting the filtered revs and walking the sorted list while we walk
the changelog, but even this initial version that just calls __contains__ is
still massively faster.
Durham Goode - Sept. 17, 2014, 3:19 a.m.
On 9/16/14, 7:26 PM, Durham Goode wrote:
> # HG changeset patch
> # User Durham Goode <durham@fb.com>
> # Date 1410908601 25200
> #      Tue Sep 16 16:03:21 2014 -0700
> # Node ID 6c5330a7df580468bb0ca85dda11f2a2d14eed89
> # Parent  48791c2bea1ceda4e4f28bc11651e281d636ce1a
> obsolete: use C code for headrevs calculation
>
> Previously, if there were filtered revs the repository could not use the C fast
> path for computing the head revs in the changelog. This slowed down many
> operations in large repositories.
>
> This adds the ability to filter revs to the C fast path. This speeds up histedit
> on repositories with filtered revs by 30% (13s to 9s). This could be improved
> further by sorting the filtered revs and walking the sorted list while we walk
> the changelog, but even this initial version that just calls __contains__ is
> still massively faster.
>
I’ve been told I need to make this backwards compatible with old 
Mercurial calls. So I’ll resend later.

Patch

diff --git a/mercurial/changelog.py b/mercurial/changelog.py
--- a/mercurial/changelog.py
+++ b/mercurial/changelog.py
@@ -171,8 +171,12 @@ 
 
     def headrevs(self):
         if self.filteredrevs:
-            # XXX we should fix and use the C version
-            return self._headrevs()
+            try:
+                return self._headrevs()
+                return self.index.headrevs(self.filteredrevs)
+            except AttributeError:
+                return self._headrevs()
+
         return super(changelog, self).headrevs()
 
     def strip(self, *args, **kwargs):
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -508,6 +508,7 @@ 
 	Py_ssize_t length;     /* current number of elements */
 	PyObject *added;       /* populated on demand */
 	PyObject *headrevs;    /* cache, invalidated on changes */
+	PyObject *filteredrevs;/* filtered revs set */
 	nodetree *nt;          /* base-16 trie */
 	int ntlength;          /* # nodes in use */
 	int ntcapacity;        /* # nodes allocated */
@@ -823,15 +824,40 @@ 
 	return newlist;
 }
 
-static PyObject *index_headrevs(indexObject *self)
+static int check_filter(PyObject *filter, Py_ssize_t arg) {
+	if (filter) {
+		PyObject *arglist = Py_BuildValue("(i)", arg);
+		PyObject *isfiltered = PyEval_CallObject(filter, arglist);
+		Py_DECREF(arglist);
+		if (PyObject_IsTrue(isfiltered)) {
+			Py_DECREF(isfiltered);
+			return 1;
+		}
+		Py_DECREF(isfiltered);
+	}
+	return 0;
+}
+
+static PyObject *index_headrevs(indexObject *self, PyObject *filteredrevs)
 {
 	Py_ssize_t i, len, addlen;
 	char *nothead = NULL;
 	PyObject *heads;
 
-	if (self->headrevs)
+	if (self->headrevs && filteredrevs == self->filteredrevs)
 		return list_copy(self->headrevs);
 
+	if (self->filteredrevs) {
+		Py_DECREF(self->filteredrevs);
+	}
+	self->filteredrevs = filteredrevs;
+	Py_INCREF(filteredrevs);
+
+	PyObject *filter = NULL;
+	if (filteredrevs != Py_None) {
+		filter = PyObject_GetAttrString(filteredrevs, "__contains__");
+	}
+
 	len = index_length(self) - 1;
 	heads = PyList_New(0);
 	if (heads == NULL)
@@ -850,6 +876,11 @@ 
 		goto bail;
 
 	for (i = 0; i < self->raw_length; i++) {
+		if (check_filter(filter, i)) {
+			nothead[i] = 1;
+			continue;
+		}
+
 		const char *data = index_deref(self, i);
 		int parent_1 = getbe32(data + 24);
 		int parent_2 = getbe32(data + 28);
@@ -872,6 +903,12 @@ 
 					"revlog parents are invalid");
 			goto bail;
 		}
+
+		if (check_filter(filter, i)) {
+			nothead[i] = 1;
+			continue;
+		}
+
 		parent_1 = PyInt_AS_LONG(p1);
 		parent_2 = PyInt_AS_LONG(p2);
 		if (parent_1 >= 0)
@@ -1896,6 +1933,7 @@ 
 	self->cache = NULL;
 	self->data = NULL;
 	self->headrevs = NULL;
+	self->filteredrevs = NULL;
 	self->nt = NULL;
 	self->offsets = NULL;
 
@@ -1945,6 +1983,7 @@ 
 static void index_dealloc(indexObject *self)
 {
 	_index_clearcaches(self);
+	Py_XDECREF(self->filteredrevs);
 	Py_XDECREF(self->data);
 	Py_XDECREF(self->added);
 	PyObject_Del(self);
@@ -1977,7 +2016,7 @@ 
 	 "clear the index caches"},
 	{"get", (PyCFunction)index_m_get, METH_VARARGS,
 	 "get an index entry"},
-	{"headrevs", (PyCFunction)index_headrevs, METH_NOARGS,
+	{"headrevs", (PyCFunction)index_headrevs, METH_O,
 	 "get head revisions"},
 	{"insert", (PyCFunction)index_insert, METH_VARARGS,
 	 "insert an index entry"},
diff --git a/mercurial/revlog.py b/mercurial/revlog.py
--- a/mercurial/revlog.py
+++ b/mercurial/revlog.py
@@ -661,7 +661,7 @@ 
 
     def headrevs(self):
         try:
-            return self.index.headrevs()
+            return self.index.headrevs(None)
         except AttributeError:
             return self._headrevs()