dirs.addpath: rework algorithm to search forward
authorSiddharth Agarwal <sid0@fb.com>
Fri, 27 Mar 2015 01:03:06 -0700
changeset 24486 1a9efc312700
parent 24485 914caae9a86a
child 24487 642d245ff537
dirs.addpath: rework algorithm to search forward This improves performance because it uses strchr rather than a loop. For LLVM/clang version "Apple LLVM version 6.0 (clang-600.0.56) (based on LLVM 3.5svn)" on OS X, for a repo with over 200,000 files, this improves perfdirs from 0.248 seconds to 0.230 (7.3%) For gcc 4.4.6 on Linux, for a test repo with over 500,000 files, this improves perfdirs from 0.704 seconds to 0.658 (6.5%).
mercurial/dirs.c
--- a/mercurial/dirs.c	Sat Mar 14 17:40:47 2015 +0900
+++ b/mercurial/dirs.c	Fri Mar 27 01:03:06 2015 -0700
@@ -9,6 +9,7 @@
 
 #define PY_SSIZE_T_CLEAN
 #include <Python.h>
+#include <string.h>
 #include "util.h"
 
 /*
@@ -32,23 +33,19 @@
 {
 	const char *s = PyString_AS_STRING(path);
 
-	while (pos != -1) {
-		if (s[pos] == '/')
-			break;
-		pos -= 1;
-	}
-
-	return pos;
+	const char *ret = strchr(s + pos, '/');
+	return (ret != NULL) ? (ret - s) : -1;
 }
 
 static int _addpath(PyObject *dirs, PyObject *path)
 {
-	const char *cpath = PyString_AS_STRING(path);
-	Py_ssize_t pos = PyString_GET_SIZE(path);
+	char *cpath = PyString_AS_STRING(path);
+	Py_ssize_t len = PyString_GET_SIZE(path);
+	Py_ssize_t pos = -1;
 	PyObject *key = NULL;
 	int ret = -1;
 
-	while ((pos = _finddir(path, pos - 1)) != -1) {
+	while ((pos = _finddir(path, pos + 1)) != -1) {
 		PyObject *val;
 
 		/* It's likely that every prefix already has an entry
@@ -56,10 +53,18 @@
 		   deallocating a string for each prefix we check. */
 		if (key != NULL)
 			((PyStringObject *)key)->ob_shash = -1;
-		else {
-			/* Force Python to not reuse a small shared string. */
-			key = PyString_FromStringAndSize(cpath,
-							 pos < 2 ? 2 : pos);
+		else if (pos != 0) {
+			/* pos >= 1, which means that len >= 2. This is
+			   guaranteed to produce a non-interned string. */
+			key = PyString_FromStringAndSize(cpath, len);
+			if (key == NULL)
+				goto bail;
+		} else {
+			/* pos == 0, which means we need to increment the dir
+			   count for the empty string. We need to make sure we
+			   don't muck around with interned strings, so throw it
+			   away later. */
+			key = PyString_FromString("");
 			if (key == NULL)
 				goto bail;
 		}
@@ -69,6 +74,10 @@
 		val = PyDict_GetItem(dirs, key);
 		if (val != NULL) {
 			PyInt_AS_LONG(val) += 1;
+			if (pos != 0)
+				PyString_AS_STRING(key)[pos] = '/';
+			else
+				key = NULL;
 			continue;
 		}
 
@@ -83,6 +92,11 @@
 		Py_DECREF(val);
 		if (ret == -1)
 			goto bail;
+
+		if (pos != 0)
+			PyString_AS_STRING(key)[pos] = '/';
+		else
+			key = NULL;
 		Py_CLEAR(key);
 	}
 	ret = 0;
@@ -95,11 +109,11 @@
 
 static int _delpath(PyObject *dirs, PyObject *path)
 {
-	Py_ssize_t pos = PyString_GET_SIZE(path);
+	Py_ssize_t pos = -1;
 	PyObject *key = NULL;
 	int ret = -1;
 
-	while ((pos = _finddir(path, pos - 1)) != -1) {
+	while ((pos = _finddir(path, pos + 1)) != -1) {
 		PyObject *val;
 
 		key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);