Patchwork dirs.addpath: rework algorithm to search forward

login
register
mail settings
Submitter Siddharth Agarwal
Date March 27, 2015, 8:15 p.m.
Message ID <d9674bec56ff82499822.1427487359@devbig136.prn2.facebook.com>
Download mbox | patch
Permalink /patch/8316/
State Accepted
Headers show

Comments

Siddharth Agarwal - March 27, 2015, 8:15 p.m.
# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1427443386 25200
#      Fri Mar 27 01:03:06 2015 -0700
# Node ID d9674bec56ff82499822987f6fa79286b434038e
# Parent  95cbc77c0cad04b04bd690bb3ba91e919e11d3b1
dirs.addpath: rework algorithm to search forward

This improves performance because it uses strchr rather than a loop.

For LLVM/clang version "Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM
3.5svn)" on OS X, for a repo with over 200,000 files, this improves perfdirs
from 0.248 seconds to 0.230 (7.3%)

For gcc 4.4.6 on Linux, for a test repo with over 500,000 files, this improves
perfdirs from 0.704 seconds to 0.658 (6.5%).
Matt Mackall - March 27, 2015, 8:42 p.m.
On Fri, 2015-03-27 at 13:15 -0700, Siddharth Agarwal wrote:
> # HG changeset patch
> # User Siddharth Agarwal <sid0@fb.com>
> # Date 1427443386 25200
> #      Fri Mar 27 01:03:06 2015 -0700
> # Node ID d9674bec56ff82499822987f6fa79286b434038e
> # Parent  95cbc77c0cad04b04bd690bb3ba91e919e11d3b1
> dirs.addpath: rework algorithm to search forward

Looks good, queued for default.

Patch

diff --git a/mercurial/dirs.c b/mercurial/dirs.c
--- a/mercurial/dirs.c
+++ b/mercurial/dirs.c
@@ -9,6 +9,7 @@ 
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
+#include <string.h>
 #include "util.h"
 
 /*
@@ -32,23 +33,19 @@ 
 {
 	const char *s = PyString_AS_STRING(path);
 
-	while (pos != -1) {
-		if (s[pos] == '/')
-			break;
-		pos -= 1;
-	}
-
-	return pos;
+	const char *ret = strchr(s + pos, '/');
+	return (ret != NULL) ? (ret - s) : -1;
 }
 
 static int _addpath(PyObject *dirs, PyObject *path)
 {
-	const char *cpath = PyString_AS_STRING(path);
-	Py_ssize_t pos = PyString_GET_SIZE(path);
+	char *cpath = PyString_AS_STRING(path);
+	Py_ssize_t len = PyString_GET_SIZE(path);
+	Py_ssize_t pos = -1;
 	PyObject *key = NULL;
 	int ret = -1;
 
-	while ((pos = _finddir(path, pos - 1)) != -1) {
+	while ((pos = _finddir(path, pos + 1)) != -1) {
 		PyObject *val;
 
 		/* It's likely that every prefix already has an entry
@@ -56,10 +53,18 @@ 
 		   deallocating a string for each prefix we check. */
 		if (key != NULL)
 			((PyStringObject *)key)->ob_shash = -1;
-		else {
-			/* Force Python to not reuse a small shared string. */
-			key = PyString_FromStringAndSize(cpath,
-							 pos < 2 ? 2 : pos);
+		else if (pos != 0) {
+			/* pos >= 1, which means that len >= 2. This is
+			   guaranteed to produce a non-interned string. */
+			key = PyString_FromStringAndSize(cpath, len);
+			if (key == NULL)
+				goto bail;
+		} else {
+			/* pos == 0, which means we need to increment the dir
+			   count for the empty string. We need to make sure we
+			   don't muck around with interned strings, so throw it
+			   away later. */
+			key = PyString_FromString("");
 			if (key == NULL)
 				goto bail;
 		}
@@ -69,6 +74,10 @@ 
 		val = PyDict_GetItem(dirs, key);
 		if (val != NULL) {
 			PyInt_AS_LONG(val) += 1;
+			if (pos != 0)
+				PyString_AS_STRING(key)[pos] = '/';
+			else
+				key = NULL;
 			continue;
 		}
 
@@ -83,6 +92,11 @@ 
 		Py_DECREF(val);
 		if (ret == -1)
 			goto bail;
+
+		if (pos != 0)
+			PyString_AS_STRING(key)[pos] = '/';
+		else
+			key = NULL;
 		Py_CLEAR(key);
 	}
 	ret = 0;
@@ -95,11 +109,11 @@ 
 
 static int _delpath(PyObject *dirs, PyObject *path)
 {
-	Py_ssize_t pos = PyString_GET_SIZE(path);
+	Py_ssize_t pos = -1;
 	PyObject *key = NULL;
 	int ret = -1;
 
-	while ((pos = _finddir(path, pos - 1)) != -1) {
+	while ((pos = _finddir(path, pos + 1)) != -1) {
 		PyObject *val;
 
 		key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);