Patchwork [4,of,5] parsers: inline fields of dirstate values in C version

login
register
mail settings
Submitter Siddharth Agarwal
Date May 28, 2014, 5:05 a.m.
Message ID <9ed8b0fa79cfa5f22093.1401253520@dev1738.prn1.facebook.com>
Download mbox | patch
Permalink /patch/4875/
State Superseded
Headers show

Comments

Siddharth Agarwal - May 28, 2014, 5:05 a.m.
# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1401226061 25200
#      Tue May 27 14:27:41 2014 -0700
# Node ID 9ed8b0fa79cfa5f220930c92ac79b93f6d6bd8ee
# Parent  15a5e7f16a39f1991f2c510a8b8bccefbccfcc6c
parsers: inline fields of dirstate values in C version

Previously, while unpacking the dirstate we'd create 3-4 new CPython objects
for most dirstate values:

- the state is a single character string, which is pooled by CPython
- the mode is a new object if it isn't 0 due to being in the lookup set
- the size is a new object if it is greater than 255
- the mtime is a new object if it isn't -1 due to being in the lookup set
- the tuple to contain them all

In some cases such as regular hg status, we actually look at all the objects.
In other cases like hg add, hg status for a subdirectory, or hg status with the
third-party hgwatchman enabled, we look at almost none of the objects.

This patch eliminates most object creation in these cases by defining a custom
C struct that is exposed to Python with an interface similar to a tuple. Only
when tuple elements are actually requested are the respective objects created.

The gains, where they're expected, are significant. The following tests are run
against a working copy with over 270,000 files.

parse_dirstate becomes significantly faster:

$ hg perfdirstate
before: wall 0.186437 comb 0.180000 user 0.160000 sys 0.020000 (best of 35)
after:  wall 0.093158 comb 0.100000 user 0.090000 sys 0.010000 (best of 95)

and as a result, several commands benefit:

$ time hg status  # with hgwatchman enabled
before: 0.42s user 0.14s system 99% cpu 0.563 total
after:  0.34s user 0.12s system 99% cpu 0.471 total

$ time hg add new-file
before: 0.85s user 0.18s system 99% cpu 1.033 total
after:  0.76s user 0.17s system 99% cpu 0.931 total

There is a slight regression in regular status performance, but this is fixed
in an upcoming patch.

Patch

diff --git a/mercurial/dirs.c b/mercurial/dirs.c
--- a/mercurial/dirs.c
+++ b/mercurial/dirs.c
@@ -138,25 +138,12 @@ 
 			return -1;
 		}
 		if (skipchar) {
-			PyObject *st;
-
-			if (!PyTuple_Check(value) ||
-			    PyTuple_GET_SIZE(value) == 0) {
+			if (!dirstate_tuple_check(value)) {
 				PyErr_SetString(PyExc_TypeError,
-						"expected non-empty tuple");
+						"expected a dirstate tuple");
 				return -1;
 			}
-
-			st = PyTuple_GET_ITEM(value, 0);
-
-			if (!PyString_Check(st) || PyString_GET_SIZE(st) == 0) {
-				PyErr_SetString(PyExc_TypeError,
-						"expected non-empty string "
-						"at tuple index 0");
-				return -1;
-			}
-
-			if (PyString_AS_STRING(st)[0] == skipchar)
+			if (((dirstateTupleObject *)value)->state == skipchar)
 				continue;
 		}
 
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -14,9 +14,7 @@ 
 filecache = scmutil.filecache
 _rangemask = 0x7fffffff
 
-def dirstatetuple(*x):
-    # x is a tuple
-    return x
+dirstatetuple = parsers.dirstatetuple
 
 class repocache(filecache):
     """filecache for files in .hg/"""
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -153,6 +153,122 @@ 
 	return NULL;
 }
 
+static inline dirstateTupleObject *make_dirstate_tuple(char state, int mode,
+						       int size, int mtime)
+{
+	dirstateTupleObject *t = PyObject_New(dirstateTupleObject,
+					      &dirstateTupleType);
+	if (!t)
+		return NULL;
+	t->state = state;
+	t->mode = mode;
+	t->size = size;
+	t->mtime = mtime;
+	return t;
+}
+
+static PyObject *dirstate_tuple_new(PyTypeObject *subtype, PyObject *args,
+				    PyObject *kwds)
+{
+	/* We do all the initialization here and not a tp_init function because
+	 * dirstate_tuple is immutable. */
+	dirstateTupleObject *t;
+	char state;
+	int size, mode, mtime;
+	if (!PyArg_ParseTuple(args, "ciii", &state, &mode, &size, &mtime))
+		return NULL;
+
+	t = (dirstateTupleObject *)subtype->tp_alloc(subtype, 1);
+	if (!t)
+		return NULL;
+	t->state = state;
+	t->mode = mode;
+	t->size = size;
+	t->mtime = mtime;
+
+	return (PyObject *)t;
+}
+
+static void dirstate_tuple_dealloc(PyObject *o)
+{
+	PyObject_Del(o);
+}
+
+static Py_ssize_t dirstate_tuple_length(PyObject *o)
+{
+	return 4;
+}
+
+static PyObject *dirstate_tuple_item(PyObject *o, Py_ssize_t i)
+{
+	dirstateTupleObject *t = (dirstateTupleObject *)o;
+	switch (i) {
+	case 0:
+		return PyBytes_FromStringAndSize(&t->state, 1);
+	case 1:
+		return PyInt_FromLong(t->mode);
+	case 2:
+		return PyInt_FromLong(t->size);
+	case 3:
+		return PyInt_FromLong(t->mtime);
+	default:
+		PyErr_SetString(PyExc_IndexError, "index out of range");
+		return NULL;
+	}
+}
+
+static PySequenceMethods dirstate_tuple_sq = {
+	dirstate_tuple_length,     /* sq_length */
+	0,                         /* sq_concat */
+	0,                         /* sq_repeat */
+	dirstate_tuple_item,       /* sq_item */
+	0,                         /* sq_ass_item */
+	0,                         /* sq_contains */
+	0,                         /* sq_inplace_concat */
+	0                          /* sq_inplace_repeat */
+};
+
+PyTypeObject dirstateTupleType = {
+	PyVarObject_HEAD_INIT(NULL, 0)
+	"dirstate_tuple",          /* tp_name */
+	sizeof(dirstateTupleObject),/* tp_basicsize */
+	0,                         /* tp_itemsize */
+	(destructor)dirstate_tuple_dealloc, /* tp_dealloc */
+	0,                         /* tp_print */
+	0,                         /* tp_getattr */
+	0,                         /* tp_setattr */
+	0,                         /* tp_compare */
+	0,                         /* tp_repr */
+	0,                         /* tp_as_number */
+	&dirstate_tuple_sq,        /* tp_as_sequence */
+	0,                         /* tp_as_mapping */
+	0,                         /* tp_hash  */
+	0,                         /* tp_call */
+	0,                         /* tp_str */
+	0,                         /* tp_getattro */
+	0,                         /* tp_setattro */
+	0,                         /* tp_as_buffer */
+	Py_TPFLAGS_DEFAULT,        /* tp_flags */
+	"dirstate tuple",          /* tp_doc */
+	0,                         /* tp_traverse */
+	0,                         /* tp_clear */
+	0,                         /* tp_richcompare */
+	0,                         /* tp_weaklistoffset */
+	0,                         /* tp_iter */
+	0,                         /* tp_iternext */
+	0,                         /* tp_methods */
+	0,                         /* tp_members */
+	0,                         /* tp_getset */
+	0,                         /* tp_base */
+	0,                         /* tp_dict */
+	0,                         /* tp_descr_get */
+	0,                         /* tp_descr_set */
+	0,                         /* tp_dictoffset */
+	0,                         /* tp_init */
+	0,                         /* tp_alloc */
+	dirstate_tuple_new,        /* tp_new */
+};
+
 static PyObject *parse_dirstate(PyObject *self, PyObject *args)
 {
 	PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
@@ -192,11 +308,8 @@ 
 			goto quit;
 		}
 
-		entry = Py_BuildValue("ciii", state, mode, size, mtime);
-		if (!entry)
-			goto quit;
-		PyObject_GC_UnTrack(entry); /* don't waste time with this */
-
+		entry = (PyObject *)make_dirstate_tuple(state, mode, size,
+							mtime);
 		cpos = memchr(cur, 0, flen);
 		if (cpos) {
 			fname = PyBytes_FromStringAndSize(cur, cpos - cur);
@@ -316,33 +429,30 @@ 
 	p += 20;
 
 	for (pos = 0; PyDict_Next(map, &pos, &k, &v); ) {
+		dirstateTupleObject *tuple;
+		char state;
 		uint32_t mode, size, mtime;
 		Py_ssize_t len, l;
 		PyObject *o;
-		char *s, *t;
+		char *t;
 
-		if (!PyTuple_Check(v) || PyTuple_GET_SIZE(v) != 4) {
-			PyErr_SetString(PyExc_TypeError, "expected a 4-tuple");
+		if (!dirstate_tuple_check(v)) {
+			PyErr_SetString(PyExc_TypeError,
+					"expected a dirstate tuple");
 			goto bail;
 		}
-		o = PyTuple_GET_ITEM(v, 0);
-		if (PyString_AsStringAndSize(o, &s, &l) == -1 || l != 1) {
-			PyErr_SetString(PyExc_TypeError, "expected one byte");
-			goto bail;
-		}
-		*p++ = *s;
-		if (getintat(v, 1, &mode) == -1)
-			goto bail;
-		if (getintat(v, 2, &size) == -1)
-			goto bail;
-		if (getintat(v, 3, &mtime) == -1)
-			goto bail;
-		if (*s == 'n' && mtime == (uint32_t)now) {
+		tuple = (dirstateTupleObject *)v;
+
+		state = tuple->state;
+		mode = tuple->mode;
+		size = tuple->size;
+		mtime = tuple->mtime;
+		if (state == 'n' && mtime == (uint32_t)now) {
 			/* See pure/parsers.py:pack_dirstate for why we do
 			 * this. */
 			mtime = -1;
-			mtime_unset = Py_BuildValue(
-				"ciii", *s, mode, size, mtime);
+			mtime_unset = (PyObject *)make_dirstate_tuple(
+				state, mode, size, mtime);
 			if (!mtime_unset)
 				goto bail;
 			if (PyDict_SetItem(map, k, mtime_unset) == -1)
@@ -350,6 +460,7 @@ 
 			Py_DECREF(mtime_unset);
 			mtime_unset = NULL;
 		}
+		*p++ = state;
 		putbe32(mode, p);
 		putbe32(size, p + 4);
 		putbe32(mtime, p + 8);
@@ -2025,11 +2136,14 @@ 
 	dirs_module_init(mod);
 
 	indexType.tp_new = PyType_GenericNew;
-	if (PyType_Ready(&indexType) < 0)
+	if (PyType_Ready(&indexType) < 0 ||
+	    PyType_Ready(&dirstateTupleType) < 0)
 		return;
 	Py_INCREF(&indexType);
-
 	PyModule_AddObject(mod, "index", (PyObject *)&indexType);
+	Py_INCREF(&dirstateTupleType);
+	PyModule_AddObject(mod, "dirstatetuple",
+			   (PyObject *)&dirstateTupleType);
 
 	nullentry = Py_BuildValue("iiiiiiis#", 0, 0, 0,
 				  -1, -1, -1, -1, nullid, 20);
diff --git a/mercurial/pure/parsers.py b/mercurial/pure/parsers.py
--- a/mercurial/pure/parsers.py
+++ b/mercurial/pure/parsers.py
@@ -15,6 +15,12 @@ 
 _decompress = zlib.decompress
 _sha = util.sha1
 
+# Some code below makes tuples directly because it's more convenient. However,
+# code outside this module should always use dirstatetuple.
+def dirstatetuple(*x):
+    # x is a tuple
+    return x
+
 def parse_manifest(mfdict, fdict, lines):
     for l in lines.splitlines():
         f, n = l.split('\0')
@@ -104,7 +110,7 @@ 
             # dirstate, forcing future 'status' calls to compare the
             # contents of the file if the size is the same. This prevents
             # mistakenly treating such files as clean.
-            e = (e[0], e[1], e[2], -1)
+            e = dirstatetuple(e[0], e[1], e[2], -1)
             dmap[f] = e
 
         if f in copymap:
diff --git a/mercurial/util.h b/mercurial/util.h
--- a/mercurial/util.h
+++ b/mercurial/util.h
@@ -151,6 +151,17 @@ 
 #define inline __inline
 #endif
 
+typedef struct {
+	PyObject_HEAD
+	char state;
+	int mode;
+	int size;
+	int mtime;
+} dirstateTupleObject;
+
+PyTypeObject dirstateTupleType;
+#define dirstate_tuple_check(op) (Py_TYPE(op) == &dirstateTupleType)
+
 static inline uint32_t getbe32(const char *c)
 {
 	const unsigned char *d = (const unsigned char *)c;