lazymanifest: use a binary search to do an insertion
authorAugie Fackler <augie@google.com>
Fri, 30 Jan 2015 21:30:40 -0800
changeset 24228 542c891274b2
parent 24227 8ec2df32bd39
child 24229 f903689680e6
lazymanifest: use a binary search to do an insertion This makes insertions log(n) plus some memmove() overhead, rather than doing an append followed by an n*log(n) sort. There's probably a lot of performance to be gained by adding a batch-add method, which could be smarter about the memmove()s performed. Includes a couple of extra tests that should help prevent bugs. Thanks to Martin for some significant pre-mail cleanup of this change.
mercurial/manifest.c
tests/test-manifest.py
--- a/mercurial/manifest.c	Mon Nov 17 00:00:25 2014 -0500
+++ b/mercurial/manifest.c	Fri Jan 30 21:30:40 2015 -0800
@@ -363,6 +363,39 @@
 	return 0;
 }
 
+/* Do a binary search for the insertion point for new, creating the
+ * new entry if needed. */
+static int internalsetitem(lazymanifest *self, line *new) {
+	int start = 0, end = self->numlines;
+	while (start < end) {
+		int pos = start + (end - start) / 2;
+		int c = linecmp(new, self->lines + pos);
+		if (c < 0)
+			end = pos;
+		else if (c > 0)
+			start = pos + 1;
+		else {
+			if (self->lines[pos].deleted)
+				self->livelines++;
+			start = pos;
+			goto finish;
+		}
+	}
+	/* being here means we need to do an insert */
+	if (!realloc_if_full(self)) {
+		PyErr_NoMemory();
+		return -1;
+	}
+	memmove(self->lines + start + 1, self->lines + start,
+		(self->numlines - start) * sizeof(line));
+	self->numlines++;
+	self->livelines++;
+finish:
+	self->lines[start] = *new;
+	self->dirty = true;
+	return 0;
+}
+
 static int lazymanifest_setitem(
 	lazymanifest *self, PyObject *key, PyObject *value)
 {
@@ -378,7 +411,6 @@
 	char *dest;
 	int i;
 	line new;
-	line *hit;
 	if (!PyString_Check(key)) {
 		PyErr_Format(PyExc_TypeError,
 			     "setitem: manifest keys must be a string.");
@@ -446,32 +478,8 @@
 	}
 	new.from_malloc = true;     /* is `start` a pointer we allocated? */
 	new.deleted = false;        /* is this entry deleted? */
-	hit = bsearch(&new, self->lines, self->numlines,
-		      sizeof(line), &linecmp);
-	self->dirty = true;
-	if (hit) {
-		/* updating a line we already had */
-		if (hit->from_malloc) {
-			free(hit->start);
-		}
-		if (hit->deleted) {
-			self->livelines++;
-		}
-		*hit = new;
-	} else {
-		/* new line */
-		if (!realloc_if_full(self)) {
-			PyErr_NoMemory();
-			return -1;
-		}
-		self->lines[self->numlines++] = new;
-		self->livelines++;
-		/* TODO: do a binary search and insert rather than
-		 * append and qsort. Also offer a batch-insert
-		 * interface, which should be a nice little
-		 * performance win.
-		 */
-		qsort(self->lines, self->numlines, sizeof(line), &linecmp);
+	if (internalsetitem(self, &new)) {
+		return -1;
 	}
 	return 0;
 }
--- a/tests/test-manifest.py	Mon Nov 17 00:00:25 2014 -0500
+++ b/tests/test-manifest.py	Fri Jan 30 21:30:40 2015 -0800
@@ -133,6 +133,10 @@
         self.assertRaises(KeyError, lambda : m['foo'])
         self.assertEqual(1, len(m))
         self.assertEqual(1, len(list(m)))
+        # now restore and make sure everything works right
+        m['foo'] = 'a' * 20, ''
+        self.assertEqual(2, len(m))
+        self.assertEqual(2, len(list(m)))
 
     def testManifestDiff(self):
         MISSING = (None, '')