Patchwork [V2] parse_manifest: rewrite to use memchr

login
register
mail settings
Submitter Siddharth Agarwal
Date Sept. 10, 2013, 10 p.m.
Message ID <9c95435a0eb4f0604c5e.1378850414@dev1091.prn1.facebook.com>
Download mbox | patch
Permalink /patch/2420/
State Accepted
Commit 3daabd2da78b335048268db78ac9df834427f148
Headers show

Comments

Siddharth Agarwal - Sept. 10, 2013, 10 p.m.
# HG changeset patch
# User Siddharth Agarwal <sid0@fb.com>
# Date 1378536479 25200
#      Fri Sep 06 23:47:59 2013 -0700
# Node ID 9c95435a0eb4f0604c5e04113e297375b6b3b527
# Parent  6638dd23999d66a95ce28816f035a1d051963290
parse_manifest: rewrite to use memchr

memchr is usually smarter than a simple for loop. With gcc 4.4.6 and glibc 2.12
on x86-64, for a 20 MB, 200,000 file manifest, parse_manifest goes from 0.116
seconds to 0.095 seconds.
Matt Mackall - Sept. 14, 2013, 11:25 p.m.
On Tue, 2013-09-10 at 15:00 -0700, Siddharth Agarwal wrote:
> # HG changeset patch
> # User Siddharth Agarwal <sid0@fb.com>
> # Date 1378536479 25200
> #      Fri Sep 06 23:47:59 2013 -0700
> # Node ID 9c95435a0eb4f0604c5e04113e297375b6b3b527
> # Parent  6638dd23999d66a95ce28816f035a1d051963290
> parse_manifest: rewrite to use memchr

Queued for default, thanks.

Patch

diff --git a/mercurial/parsers.c b/mercurial/parsers.c
--- a/mercurial/parsers.c
+++ b/mercurial/parsers.c
@@ -77,7 +77,7 @@ 
 static PyObject *parse_manifest(PyObject *self, PyObject *args)
 {
 	PyObject *mfdict, *fdict;
-	char *str, *cur, *start, *zero;
+	char *str, *start, *end;
 	int len;
 
 	if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
@@ -86,30 +86,34 @@ 
 			      &str, &len))
 		goto quit;
 
-	for (start = cur = str, zero = NULL; cur < str + len; cur++) {
+	start = str;
+	end = str + len;
+	while (start < end) {
 		PyObject *file = NULL, *node = NULL;
 		PyObject *flags = NULL;
+		char *zero = NULL, *newline = NULL;
 		ptrdiff_t nlen;
 
-		if (!*cur) {
-			zero = cur;
-			continue;
-		}
-		else if (*cur != '\n')
-			continue;
-
+		zero = memchr(start, '\0', end - start);
 		if (!zero) {
 			PyErr_SetString(PyExc_ValueError,
 					"manifest entry has no separator");
 			goto quit;
 		}
 
+		newline = memchr(zero + 1, '\n', end - (zero + 1));
+		if (!newline) {
+			PyErr_SetString(PyExc_ValueError,
+					"manifest contains trailing garbage");
+			goto quit;
+		}
+
 		file = PyBytes_FromStringAndSize(start, zero - start);
 
 		if (!file)
 			goto bail;
 
-		nlen = cur - zero - 1;
+		nlen = newline - zero - 1;
 
 		node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
 		if (!node)
@@ -128,8 +132,7 @@ 
 		if (PyDict_SetItem(mfdict, file, node) == -1)
 			goto bail;
 
-		start = cur + 1;
-		zero = NULL;
+		start = newline + 1;
 
 		Py_XDECREF(flags);
 		Py_XDECREF(node);
@@ -142,12 +145,6 @@ 
 		goto quit;
 	}
 
-	if (len > 0 && *(cur - 1) != '\n') {
-		PyErr_SetString(PyExc_ValueError,
-				"manifest contains trailing garbage");
-		goto quit;
-	}
-
 	Py_INCREF(Py_None);
 	return Py_None;
 quit: