Patchwork [4,of,6] dirstate: perform dirs calculation in C

login
register
mail settings
Submitter Bryan O'Sullivan
Date March 29, 2013, 1:22 a.m.
Message ID <492566bf24e94dcbb87b.1364520166@australite.local>
Download mbox | patch
Permalink /patch/1211/
State Superseded
Headers show

Comments

Bryan O'Sullivan - March 29, 2013, 1:22 a.m.
# HG changeset patch
# User Bryan O'Sullivan <bryano@fb.com>
# Date 1364520157 25200
#      Thu Mar 28 18:22:37 2013 -0700
# Node ID 492566bf24e94dcbb87bdfcfb10c102742c90ab5
# Parent  9479dece303d06808cce9e2e3d0a196511fe6ba8
dirstate: perform dirs calculation in C

This speeds up the computation substantially.

perfdirs performance in a working dir with 170,000 files:

  Python  650  msec
  C       249

This is a naive translation of the Python code.  Future commits will
nearly double the speed of this C code.

Patch

diff --git a/mercurial/dirs.c b/mercurial/dirs.c
new file mode 100644
--- /dev/null
+++ b/mercurial/dirs.c
@@ -0,0 +1,211 @@ 
+/*
+ dirs.c - dynamic directory diddling for dirstates
+
+ Copyright 2013 Facebook
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+*/
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "util.h"
+
+static inline Py_ssize_t _finddir(PyObject *path, Py_ssize_t pos)
+{
+	const char *s = PyString_AS_STRING(path);
+
+	while (pos != -1) {
+		if (s[pos] == '/')
+			break;
+		pos -= 1;
+	}
+
+	return pos;
+}
+
+static int _incdirs(PyObject *dirs, PyObject *path)
+{
+	Py_ssize_t pos = PyString_GET_SIZE(path);
+	PyObject *newval = NULL, *key = NULL;
+	int ret = -1;
+
+	while ((pos = _finddir(path, pos - 1)) != -1) {
+		PyObject *val;
+		long v = 0;
+
+		key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
+
+		if (key == NULL)
+			goto bail;
+
+		val = PyDict_GetItem(dirs, key);
+		if (val != NULL) {
+			if (!PyInt_Check(val)) {
+				PyErr_SetString(PyExc_TypeError,
+						"expected int value");
+				goto bail;
+			}
+			v = PyInt_AS_LONG(val);
+		}
+
+		newval = PyInt_FromLong(v + 1);
+
+		if (newval == NULL)
+			goto bail;
+
+		ret = PyDict_SetItem(dirs, key, newval);
+		if (ret == -1)
+			goto bail;
+		Py_CLEAR(key);
+		Py_CLEAR(newval);
+	}
+	ret = 0;
+
+bail:
+	Py_XDECREF(key);
+	Py_XDECREF(newval);
+
+	return ret;
+}
+
+static int _decdirs(PyObject *dirs, PyObject *path)
+{
+	Py_ssize_t pos = PyString_GET_SIZE(path);
+	PyObject *newval = NULL, *key = NULL;
+	int ret = -1;
+
+	while ((pos = _finddir(path, pos - 1)) != -1) {
+		PyObject *val;
+		long v;
+
+		key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
+
+		if (key == NULL)
+			goto bail;
+
+		val = PyDict_GetItem(dirs, key);
+		if (val == NULL) {
+			PyErr_SetString(PyExc_ValueError,
+					"expected a value, found none");
+			goto bail;
+		}
+		else if (!PyInt_Check(val)) {
+			PyErr_SetString(PyExc_TypeError,
+					"expected int value");
+			goto bail;
+		}
+		v = PyInt_AS_LONG(val);
+
+		if (v < 1) {
+			PyErr_SetString(PyExc_ValueError,
+					"expected a positive value");
+			goto bail;
+		}
+		else if (v == 1) {
+			if (PyDict_DelItem(dirs, key) == -1)
+				goto bail;
+			continue;
+		}
+		newval = PyInt_FromLong(v - 1);
+
+		if (newval == NULL)
+			goto bail;
+
+		ret = PyDict_SetItem(dirs, key, newval);
+		if (ret == -1)
+			goto bail;
+		Py_CLEAR(key);
+		Py_CLEAR(newval);
+	}
+	ret = 0;
+
+bail:
+	Py_XDECREF(key);
+	Py_XDECREF(newval);
+
+	return ret;
+}
+
+/*
+ * Calculate a refcounted set of directory names for the files in a
+ * dirstate. These refcounts are internal; client code must only look at
+ * the directory names.
+ */
+PyObject *calcdirs(PyObject *self, PyObject *args)
+{
+	PyObject *dirs = NULL, *map;
+	PyObject *key, *value;
+	Py_ssize_t pos = 0;
+
+	if (!PyArg_ParseTuple(args, "O!:calcdirs", &PyDict_Type, &map))
+		goto done;
+
+	dirs = PyDict_New();
+
+	if (dirs == NULL)
+		goto done;
+
+	while (PyDict_Next(map, &pos, &key, &value)) {
+		PyObject *st;
+
+		if (!PyString_Check(key)) {
+			PyErr_SetString(PyExc_TypeError, "expected string key");
+			goto bail;
+		}
+		if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) == 0) {
+			PyErr_SetString(PyExc_TypeError,
+					"expected non-empty tuple value");
+			goto bail;
+		}
+
+		st = PyTuple_GET_ITEM(value, 0);
+
+		if (!PyString_Check(st) || PyString_GET_SIZE(st) == 0) {
+			PyErr_SetString(PyExc_TypeError,
+					"expected non-empty string in slot 0");
+			goto bail;
+		}
+
+		if (PyString_AS_STRING(st)[0] == 'r')
+			continue;
+
+		if (_incdirs(dirs, key) == -1)
+			goto bail;
+	}
+	goto done;
+
+bail:
+	Py_CLEAR(dirs);
+
+done:
+	return dirs;
+}
+
+PyObject *incdirs(PyObject *self, PyObject *args)
+{
+	PyObject *dirs, *path;
+
+	if (!PyArg_ParseTuple(args, "O!O!:incdirs",
+			      &PyDict_Type, &dirs, &PyString_Type, &path))
+		return NULL;
+
+	if (_incdirs(dirs, path) == -1)
+		return NULL;
+
+	Py_RETURN_NONE;
+}
+
+PyObject *decdirs(PyObject *self, PyObject *args)
+{
+	PyObject *dirs, *path;
+
+	if (!PyArg_ParseTuple(args, "O!O!:decdirs",
+			      &PyDict_Type, &dirs, &PyString_Type, &path))
+		return NULL;
+
+	if (_decdirs(dirs, path) == -1)
+		return NULL;
+
+	Py_RETURN_NONE;
+}
diff --git a/mercurial/dirstate.py b/mercurial/dirstate.py
--- a/mercurial/dirstate.py
+++ b/mercurial/dirstate.py
@@ -52,6 +52,11 @@  def _calcdirs(m):
             _incdirs(dirs, f)
     return dirs
 
+if util.safehasattr(parsers, 'calcdirs'):
+    _calcdirs = parsers.calcdirs
+    _incdirs = parsers.incdirs
+    _decdirs = parsers.decdirs
+
 class dirstate(object):
 
     def __init__(self, opener, ui, root, validate):
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -1878,6 +1878,9 @@  static char parsers_doc[] = "Efficient c
 PyObject *encodedir(PyObject *self, PyObject *args);
 PyObject *pathencode(PyObject *self, PyObject *args);
 PyObject *lowerencode(PyObject *self, PyObject *args);
+PyObject *calcdirs(PyObject *self, PyObject *args);
+PyObject *incdirs(PyObject *self, PyObject *args);
+PyObject *decdirs(PyObject *self, PyObject *args);
 
 static PyMethodDef methods[] = {
 	{"pack_dirstate", pack_dirstate, METH_VARARGS, "pack a dirstate\n"},
@@ -1887,6 +1890,11 @@  static PyMethodDef methods[] = {
 	{"encodedir", encodedir, METH_VARARGS, "encodedir a path\n"},
 	{"pathencode", pathencode, METH_VARARGS, "fncache-encode a path\n"},
 	{"lowerencode", lowerencode, METH_VARARGS, "lower-encode a path\n"},
+	{"calcdirs", calcdirs, METH_VARARGS, "build a dict of directories\n"},
+	{"incdirs", incdirs, METH_VARARGS,
+	 "increment paths in dict of directories\n"},
+	{"decdirs", decdirs, METH_VARARGS,
+	 "decrement paths in dict of directories\n"},
 	{NULL, NULL}
 };
 
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -427,7 +427,8 @@  extmodules = [
     Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
     Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']),
     Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
-    Extension('mercurial.parsers', ['mercurial/parsers.c',
+    Extension('mercurial.parsers', ['mercurial/dirs.c',
+                                    'mercurial/parsers.c',
                                     'mercurial/pathencode.c']),
     ]