Patchwork [6,of,6,v6,lazy-manifest] lazymanifest: use a binary search to do an insertion

login
register
mail settings
Submitter Augie Fackler
Date March 7, 2015, 5:16 p.m.
Message ID <4df349ba93186f860144.1425748581@imladris.local>
Download mbox | patch
Permalink /patch/7931/
State Accepted
Headers show

Comments

Augie Fackler - March 7, 2015, 5:16 p.m.
# HG changeset patch
# User Augie Fackler <augie@google.com>
# Date 1422682240 28800
#      Fri Jan 30 21:30:40 2015 -0800
# Node ID 4df349ba93186f8601447b354273d411c71208b4
# Parent  6587f242aaa0732525ed3f8c2dc61c13dd3dc3ec
lazymanifest: use a binary search to do an insertion

This makes insertions log(n) plus some memmove() overhead, rather than
doing an append followed by an n*log(n) sort. There's probably a lot
of performance to be gained by adding a batch-add method, which could
be smarter about the memmove()s performed.

Includes a couple of extra tests that should help prevent bugs.

Thanks to Martin for some significant pre-mail cleanup of this change.
Matt Mackall - March 9, 2015, 6:44 p.m.
On Sat, 2015-03-07 at 12:16 -0500, Augie Fackler wrote:
> # HG changeset patch
> # User Augie Fackler <augie@google.com>
> # Date 1422682240 28800
> #      Fri Jan 30 21:30:40 2015 -0800
> # Node ID 4df349ba93186f8601447b354273d411c71208b4
> # Parent  6587f242aaa0732525ed3f8c2dc61c13dd3dc3ec
> lazymanifest: use a binary search to do an insertion

These are queued for default, thanks.

Patch

diff --git a/mercurial/manifest.c b/mercurial/manifest.c
--- a/mercurial/manifest.c
+++ b/mercurial/manifest.c
@@ -363,6 +363,39 @@  static int lazymanifest_delitem(lazymani
 	return 0;
 }
 
+/* Do a binary search for the insertion point for new, creating the
+ * new entry if needed. */
+static int internalsetitem(lazymanifest *self, line *new) {
+	int start = 0, end = self->numlines;
+	while (start < end) {
+		int pos = start + (end - start) / 2;
+		int c = linecmp(new, self->lines + pos);
+		if (c < 0)
+			end = pos;
+		else if (c > 0)
+			start = pos + 1;
+		else {
+			if (self->lines[pos].deleted)
+				self->livelines++;
+			start = pos;
+			goto finish;
+		}
+	}
+	/* being here means we need to do an insert */
+	if (!realloc_if_full(self)) {
+		PyErr_NoMemory();
+		return -1;
+	}
+	memmove(self->lines + start + 1, self->lines + start,
+		(self->numlines - start) * sizeof(line));
+	self->numlines++;
+	self->livelines++;
+finish:
+	self->lines[start] = *new;
+	self->dirty = true;
+	return 0;
+}
+
 static int lazymanifest_setitem(
 	lazymanifest *self, PyObject *key, PyObject *value)
 {
@@ -378,7 +411,6 @@  static int lazymanifest_setitem(
 	char *dest;
 	int i;
 	line new;
-	line *hit;
 	if (!PyString_Check(key)) {
 		PyErr_Format(PyExc_TypeError,
 			     "setitem: manifest keys must be a string.");
@@ -446,32 +478,8 @@  static int lazymanifest_setitem(
 	}
 	new.from_malloc = true;     /* is `start` a pointer we allocated? */
 	new.deleted = false;        /* is this entry deleted? */
-	hit = bsearch(&new, self->lines, self->numlines,
-		      sizeof(line), &linecmp);
-	self->dirty = true;
-	if (hit) {
-		/* updating a line we already had */
-		if (hit->from_malloc) {
-			free(hit->start);
-		}
-		if (hit->deleted) {
-			self->livelines++;
-		}
-		*hit = new;
-	} else {
-		/* new line */
-		if (!realloc_if_full(self)) {
-			PyErr_NoMemory();
-			return -1;
-		}
-		self->lines[self->numlines++] = new;
-		self->livelines++;
-		/* TODO: do a binary search and insert rather than
-		 * append and qsort. Also offer a batch-insert
-		 * interface, which should be a nice little
-		 * performance win.
-		 */
-		qsort(self->lines, self->numlines, sizeof(line), &linecmp);
+	if (internalsetitem(self, &new)) {
+		return -1;
 	}
 	return 0;
 }
diff --git a/tests/test-manifest.py b/tests/test-manifest.py
--- a/tests/test-manifest.py
+++ b/tests/test-manifest.py
@@ -133,6 +133,10 @@  class testmanifest(unittest.TestCase):
         self.assertRaises(KeyError, lambda : m['foo'])
         self.assertEqual(1, len(m))
         self.assertEqual(1, len(list(m)))
+        # now restore and make sure everything works right
+        m['foo'] = 'a' * 20, ''
+        self.assertEqual(2, len(m))
+        self.assertEqual(2, len(list(m)))
 
     def testManifestDiff(self):
         MISSING = (None, '')