Patchwork [4,of,6] scmutil: rewrite dirs in C, use if available

login
register
mail settings
Submitter Bryan O'Sullivan
Date April 1, 2013, 8:49 p.m.
Message ID <71ac3e4fd231239cf5c2.1364849357@australite.local>
Download mbox | patch
Permalink /patch/1234/
State Accepted, archived
Headers show

Comments

Bryan O'Sullivan - April 1, 2013, 8:49 p.m.
# HG changeset patch
# User Bryan O'Sullivan <bryano@fb.com>
# Date 1364849265 25200
#      Mon Apr 01 13:47:45 2013 -0700
# Node ID 71ac3e4fd231239cf5c281cf49ff2fb78b883ce8
# Parent  0f24cd329c3e1311dc472e23ab96bf7926ae7409
scmutil: rewrite dirs in C, use if available

This is over twice as fast as the Python dirs code. Upcoming changes
will nearly double its speed again.

perfdirs results for a working dir with 170,000 files:
  Python     638 msec
  C          244

Patch

diff --git a/mercurial/dirs.c b/mercurial/dirs.c
new file mode 100644
--- /dev/null
+++ b/mercurial/dirs.c
@@ -0,0 +1,294 @@ 
+/*
+ dirs.c - dynamic directory diddling for dirstates
+
+ Copyright 2013 Facebook
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+*/
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "util.h"
+
+typedef struct {
+	PyObject_HEAD
+	PyObject *dict;
+} dirsObject;
+
+static inline Py_ssize_t _finddir(PyObject *path, Py_ssize_t pos)
+{
+	const char *s = PyString_AS_STRING(path);
+
+	while (pos != -1) {
+		if (s[pos] == '/')
+			break;
+		pos -= 1;
+	}
+
+	return pos;
+}
+
+static int _addpath(PyObject *dirs, PyObject *path)
+{
+	Py_ssize_t pos = PyString_GET_SIZE(path);
+	PyObject *newval = NULL, *key = NULL;
+	int ret = -1;
+
+	while ((pos = _finddir(path, pos - 1)) != -1) {
+		PyObject *val;
+		long v = 0;
+
+		key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
+
+		if (key == NULL)
+			goto bail;
+
+		val = PyDict_GetItem(dirs, key);
+		if (val != NULL)
+			v = PyInt_AS_LONG(val);
+
+		newval = PyInt_FromLong(v + 1);
+
+		if (newval == NULL)
+			goto bail;
+
+		ret = PyDict_SetItem(dirs, key, newval);
+		if (ret == -1)
+			goto bail;
+		Py_CLEAR(key);
+		Py_CLEAR(newval);
+	}
+	ret = 0;
+
+bail:
+	Py_XDECREF(key);
+	Py_XDECREF(newval);
+
+	return ret;
+}
+
+static int _delpath(PyObject *dirs, PyObject *path)
+{
+	Py_ssize_t pos = PyString_GET_SIZE(path);
+	PyObject *newval = NULL, *key = NULL;
+	int ret = -1;
+
+	while ((pos = _finddir(path, pos - 1)) != -1) {
+		PyObject *val;
+		long v;
+
+		key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
+
+		if (key == NULL)
+			goto bail;
+
+		val = PyDict_GetItem(dirs, key);
+		if (val == NULL) {
+			PyErr_SetString(PyExc_ValueError,
+					"expected a value, found none");
+			goto bail;
+		}
+		v = PyInt_AS_LONG(val);
+
+		if (v <= 1) {
+			if (PyDict_DelItem(dirs, key) == -1)
+				goto bail;
+			continue;
+		}
+		newval = PyInt_FromLong(v - 1);
+
+		if (newval == NULL)
+			goto bail;
+
+		ret = PyDict_SetItem(dirs, key, newval);
+		if (ret == -1)
+			goto bail;
+		Py_CLEAR(key);
+		Py_CLEAR(newval);
+	}
+	ret = 0;
+
+bail:
+	Py_XDECREF(key);
+	Py_XDECREF(newval);
+
+	return ret;
+}
+
+static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
+{
+	PyObject *key, *value;
+	Py_ssize_t pos = 0;
+
+	while (PyDict_Next(source, &pos, &key, &value)) {
+		if (!PyString_Check(key)) {
+			PyErr_SetString(PyExc_TypeError, "expected string key");
+			return -1;
+		}
+		if (skipchar) {
+			PyObject *st;
+
+			if (!PyTuple_Check(value) ||
+			    PyTuple_GET_SIZE(value) == 0) {
+				PyErr_SetString(PyExc_TypeError,
+						"expected non-empty tuple");
+				return -1;
+			}
+
+			st = PyTuple_GET_ITEM(value, 0);
+
+			if (!PyString_Check(st) || PyString_GET_SIZE(st) == 0) {
+				PyErr_SetString(PyExc_TypeError,
+						"expected non-empty string "
+						"at tuple index 0");
+				return -1;
+			}
+
+			if (PyString_AS_STRING(st)[0] == skipchar)
+				continue;
+		}
+
+		if (_addpath(dirs, key) == -1)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int dirs_fromiter(PyObject *dirs, PyObject *source)
+{
+	PyObject *iter, *item = NULL;
+	int ret;
+
+	iter = PyObject_GetIter(source);
+	if (iter == NULL)
+		return -1;
+
+	while ((item = PyIter_Next(iter)) != NULL) {
+		if (!PyString_Check(item)) {
+			PyErr_SetString(PyExc_TypeError, "expected string");
+			break;
+		}
+
+		if (_addpath(dirs, item) == -1)
+			break;
+		Py_CLEAR(item);
+	}
+
+	ret = PyErr_Occurred() ? -1 : 0;
+	Py_XDECREF(item);
+	return ret;
+}
+
+/*
+ * Calculate a refcounted set of directory names for the files in a
+ * dirstate.
+ */
+static int dirs_init(dirsObject *self, PyObject *args)
+{
+	PyObject *dirs = NULL, *source = NULL;
+	char skipchar = 0;
+	int ret = -1;
+
+	self->dict = NULL;
+
+	if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
+		return -1;
+
+	dirs = PyDict_New();
+
+	if (dirs == NULL)
+		return -1;
+
+	if (source == NULL)
+		ret = 0;
+	else if (PyDict_Check(source))
+		ret = dirs_fromdict(dirs, source, skipchar);
+	else if (skipchar)
+		PyErr_SetString(PyExc_ValueError,
+				"skip character is only supported "
+				"with a dict source");
+	else
+		ret = dirs_fromiter(dirs, source);
+
+	if (ret == -1)
+		Py_XDECREF(dirs);
+	else
+		self->dict = dirs;
+
+	return ret;
+}
+
+PyObject *dirs_addpath(dirsObject *self, PyObject *args)
+{
+	PyObject *path;
+
+	if (!PyArg_ParseTuple(args, "O!:addpath", &PyString_Type, &path))
+		return NULL;
+
+	if (_addpath(self->dict, path) == -1)
+		return NULL;
+
+	Py_RETURN_NONE;
+}
+
+static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
+{
+	PyObject *path;
+
+	if (!PyArg_ParseTuple(args, "O!:delpath", &PyString_Type, &path))
+		return NULL;
+
+	if (_delpath(self->dict, path) == -1)
+		return NULL;
+
+	Py_RETURN_NONE;
+}
+
+static int dirs_contains(dirsObject *self, PyObject *value)
+{
+	return PyString_Check(value) ? PyDict_Contains(self->dict, value) : 0;
+}
+
+static void dirs_dealloc(dirsObject *self)
+{
+	Py_XDECREF(self->dict);
+	PyObject_Del(self);
+}
+
+static PyObject *dirs_iter(dirsObject *self)
+{
+	return PyObject_GetIter(self->dict);
+}
+
+static PySequenceMethods dirs_sequence_methods;
+
+static PyMethodDef dirs_methods[] = {
+	{"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
+	{"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
+	{NULL} /* Sentinel */
+};
+
+static PyTypeObject dirsType = { PyObject_HEAD_INIT(NULL) };
+
+void dirs_module_init(PyObject *mod)
+{
+	dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
+	dirsType.tp_name = "parsers.dirs";
+	dirsType.tp_new = PyType_GenericNew;
+	dirsType.tp_basicsize = sizeof(dirsObject);
+	dirsType.tp_dealloc = (destructor)dirs_dealloc;
+	dirsType.tp_as_sequence = &dirs_sequence_methods;
+	dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
+	dirsType.tp_doc = "dirs";
+	dirsType.tp_iter = (getiterfunc)dirs_iter;
+	dirsType.tp_methods = dirs_methods;
+	dirsType.tp_init = (initproc)dirs_init;
+
+	if (PyType_Ready(&dirsType) < 0)
+		return;
+	Py_INCREF(&dirsType);
+
+	PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
+}
diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -1890,8 +1890,12 @@  static PyMethodDef methods[] = {
 	{NULL, NULL}
 };
 
+void dirs_module_init(PyObject *mod);
+
 static void module_init(PyObject *mod)
 {
+	dirs_module_init(mod);
+
 	indexType.tp_new = PyType_GenericNew;
 	if (PyType_Ready(&indexType) < 0)
 		return;
diff --git a/mercurial/scmutil.py b/mercurial/scmutil.py
--- a/mercurial/scmutil.py
+++ b/mercurial/scmutil.py
@@ -7,7 +7,7 @@ 
 
 from i18n import _
 from mercurial.node import nullrev
-import util, error, osutil, revset, similar, encoding, phases
+import util, error, osutil, revset, similar, encoding, phases, parsers
 import match as matchmod
 import os, errno, re, stat, glob
 
@@ -967,3 +967,6 @@  class dirs(object):
 
     def __contains__(self, d):
         return d in self._dirs
+
+if util.safehasattr(parsers, 'dirs'):
+    dirs = parsers.dirs
diff --git a/setup.py b/setup.py
--- a/setup.py
+++ b/setup.py
@@ -427,7 +427,8 @@  extmodules = [
     Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
     Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']),
     Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
-    Extension('mercurial.parsers', ['mercurial/parsers.c',
+    Extension('mercurial.parsers', ['mercurial/dirs.c',
+                                    'mercurial/parsers.c',
                                     'mercurial/pathencode.c']),
     ]