scmutil: rewrite dirs in C, use if available
authorBryan O'Sullivan <bryano@fb.com>
Wed, 10 Apr 2013 15:08:27 -0700
changeset 18900 02ee846b246a
parent 18899 d8ff607ef721
child 18901 66d3aebe2d95
scmutil: rewrite dirs in C, use if available This is over twice as fast as the Python dirs code. Upcoming changes will nearly double its speed again. perfdirs results for a working dir with 170,000 files: Python 638 msec C 244
mercurial/dirs.c
mercurial/parsers.c
mercurial/scmutil.py
setup.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/dirs.c	Wed Apr 10 15:08:27 2013 -0700
@@ -0,0 +1,298 @@
+/*
+ dirs.c - dynamic directory diddling for dirstates
+
+ Copyright 2013 Facebook
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+*/
+
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include "util.h"
+
+/*
+ * This is a multiset of directory names, built from the files that
+ * appear in a dirstate or manifest.
+ */
+typedef struct {
+	PyObject_HEAD
+	PyObject *dict;
+} dirsObject;
+
+static inline Py_ssize_t _finddir(PyObject *path, Py_ssize_t pos)
+{
+	const char *s = PyString_AS_STRING(path);
+
+	while (pos != -1) {
+		if (s[pos] == '/')
+			break;
+		pos -= 1;
+	}
+
+	return pos;
+}
+
+static int _addpath(PyObject *dirs, PyObject *path)
+{
+	Py_ssize_t pos = PyString_GET_SIZE(path);
+	PyObject *newval = NULL, *key = NULL;
+	int ret = -1;
+
+	while ((pos = _finddir(path, pos - 1)) != -1) {
+		PyObject *val;
+		long v = 0;
+
+		key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
+
+		if (key == NULL)
+			goto bail;
+
+		val = PyDict_GetItem(dirs, key);
+		if (val != NULL)
+			v = PyInt_AS_LONG(val);
+
+		newval = PyInt_FromLong(v + 1);
+
+		if (newval == NULL)
+			goto bail;
+
+		ret = PyDict_SetItem(dirs, key, newval);
+		if (ret == -1)
+			goto bail;
+		Py_CLEAR(key);
+		Py_CLEAR(newval);
+	}
+	ret = 0;
+
+bail:
+	Py_XDECREF(key);
+	Py_XDECREF(newval);
+
+	return ret;
+}
+
+static int _delpath(PyObject *dirs, PyObject *path)
+{
+	Py_ssize_t pos = PyString_GET_SIZE(path);
+	PyObject *newval = NULL, *key = NULL;
+	int ret = -1;
+
+	while ((pos = _finddir(path, pos - 1)) != -1) {
+		PyObject *val;
+		long v;
+
+		key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos);
+
+		if (key == NULL)
+			goto bail;
+
+		val = PyDict_GetItem(dirs, key);
+		if (val == NULL) {
+			PyErr_SetString(PyExc_ValueError,
+					"expected a value, found none");
+			goto bail;
+		}
+		v = PyInt_AS_LONG(val);
+
+		if (v <= 1) {
+			if (PyDict_DelItem(dirs, key) == -1)
+				goto bail;
+			continue;
+		}
+		newval = PyInt_FromLong(v - 1);
+
+		if (newval == NULL)
+			goto bail;
+
+		ret = PyDict_SetItem(dirs, key, newval);
+		if (ret == -1)
+			goto bail;
+		Py_CLEAR(key);
+		Py_CLEAR(newval);
+	}
+	ret = 0;
+
+bail:
+	Py_XDECREF(key);
+	Py_XDECREF(newval);
+
+	return ret;
+}
+
+static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar)
+{
+	PyObject *key, *value;
+	Py_ssize_t pos = 0;
+
+	while (PyDict_Next(source, &pos, &key, &value)) {
+		if (!PyString_Check(key)) {
+			PyErr_SetString(PyExc_TypeError, "expected string key");
+			return -1;
+		}
+		if (skipchar) {
+			PyObject *st;
+
+			if (!PyTuple_Check(value) ||
+			    PyTuple_GET_SIZE(value) == 0) {
+				PyErr_SetString(PyExc_TypeError,
+						"expected non-empty tuple");
+				return -1;
+			}
+
+			st = PyTuple_GET_ITEM(value, 0);
+
+			if (!PyString_Check(st) || PyString_GET_SIZE(st) == 0) {
+				PyErr_SetString(PyExc_TypeError,
+						"expected non-empty string "
+						"at tuple index 0");
+				return -1;
+			}
+
+			if (PyString_AS_STRING(st)[0] == skipchar)
+				continue;
+		}
+
+		if (_addpath(dirs, key) == -1)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int dirs_fromiter(PyObject *dirs, PyObject *source)
+{
+	PyObject *iter, *item = NULL;
+	int ret;
+
+	iter = PyObject_GetIter(source);
+	if (iter == NULL)
+		return -1;
+
+	while ((item = PyIter_Next(iter)) != NULL) {
+		if (!PyString_Check(item)) {
+			PyErr_SetString(PyExc_TypeError, "expected string");
+			break;
+		}
+
+		if (_addpath(dirs, item) == -1)
+			break;
+		Py_CLEAR(item);
+	}
+
+	ret = PyErr_Occurred() ? -1 : 0;
+	Py_XDECREF(item);
+	return ret;
+}
+
+/*
+ * Calculate a refcounted set of directory names for the files in a
+ * dirstate.
+ */
+static int dirs_init(dirsObject *self, PyObject *args)
+{
+	PyObject *dirs = NULL, *source = NULL;
+	char skipchar = 0;
+	int ret = -1;
+
+	self->dict = NULL;
+
+	if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar))
+		return -1;
+
+	dirs = PyDict_New();
+
+	if (dirs == NULL)
+		return -1;
+
+	if (source == NULL)
+		ret = 0;
+	else if (PyDict_Check(source))
+		ret = dirs_fromdict(dirs, source, skipchar);
+	else if (skipchar)
+		PyErr_SetString(PyExc_ValueError,
+				"skip character is only supported "
+				"with a dict source");
+	else
+		ret = dirs_fromiter(dirs, source);
+
+	if (ret == -1)
+		Py_XDECREF(dirs);
+	else
+		self->dict = dirs;
+
+	return ret;
+}
+
+PyObject *dirs_addpath(dirsObject *self, PyObject *args)
+{
+	PyObject *path;
+
+	if (!PyArg_ParseTuple(args, "O!:addpath", &PyString_Type, &path))
+		return NULL;
+
+	if (_addpath(self->dict, path) == -1)
+		return NULL;
+
+	Py_RETURN_NONE;
+}
+
+static PyObject *dirs_delpath(dirsObject *self, PyObject *args)
+{
+	PyObject *path;
+
+	if (!PyArg_ParseTuple(args, "O!:delpath", &PyString_Type, &path))
+		return NULL;
+
+	if (_delpath(self->dict, path) == -1)
+		return NULL;
+
+	Py_RETURN_NONE;
+}
+
+static int dirs_contains(dirsObject *self, PyObject *value)
+{
+	return PyString_Check(value) ? PyDict_Contains(self->dict, value) : 0;
+}
+
+static void dirs_dealloc(dirsObject *self)
+{
+	Py_XDECREF(self->dict);
+	PyObject_Del(self);
+}
+
+static PyObject *dirs_iter(dirsObject *self)
+{
+	return PyObject_GetIter(self->dict);
+}
+
+static PySequenceMethods dirs_sequence_methods;
+
+static PyMethodDef dirs_methods[] = {
+	{"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"},
+	{"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"},
+	{NULL} /* Sentinel */
+};
+
+static PyTypeObject dirsType = { PyObject_HEAD_INIT(NULL) };
+
+void dirs_module_init(PyObject *mod)
+{
+	dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains;
+	dirsType.tp_name = "parsers.dirs";
+	dirsType.tp_new = PyType_GenericNew;
+	dirsType.tp_basicsize = sizeof(dirsObject);
+	dirsType.tp_dealloc = (destructor)dirs_dealloc;
+	dirsType.tp_as_sequence = &dirs_sequence_methods;
+	dirsType.tp_flags = Py_TPFLAGS_DEFAULT;
+	dirsType.tp_doc = "dirs";
+	dirsType.tp_iter = (getiterfunc)dirs_iter;
+	dirsType.tp_methods = dirs_methods;
+	dirsType.tp_init = (initproc)dirs_init;
+
+	if (PyType_Ready(&dirsType) < 0)
+		return;
+	Py_INCREF(&dirsType);
+
+	PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType);
+}
--- a/mercurial/parsers.c	Wed Apr 10 15:08:26 2013 -0700
+++ b/mercurial/parsers.c	Wed Apr 10 15:08:27 2013 -0700
@@ -1528,8 +1528,12 @@
 	{NULL, NULL}
 };
 
+void dirs_module_init(PyObject *mod);
+
 static void module_init(PyObject *mod)
 {
+	dirs_module_init(mod);
+
 	indexType.tp_new = PyType_GenericNew;
 	if (PyType_Ready(&indexType) < 0)
 		return;
--- a/mercurial/scmutil.py	Wed Apr 10 15:08:26 2013 -0700
+++ b/mercurial/scmutil.py	Wed Apr 10 15:08:27 2013 -0700
@@ -7,7 +7,7 @@
 
 from i18n import _
 from mercurial.node import nullrev
-import util, error, osutil, revset, similar, encoding, phases
+import util, error, osutil, revset, similar, encoding, phases, parsers
 import match as matchmod
 import os, errno, re, stat, glob
 
@@ -927,6 +927,9 @@
     def __contains__(self, d):
         return d in self._dirs
 
+if util.safehasattr(parsers, 'dirs'):
+    dirs = parsers.dirs
+
 def finddirs(path):
     pos = path.rfind('/')
     while pos != -1:
--- a/setup.py	Wed Apr 10 15:08:26 2013 -0700
+++ b/setup.py	Wed Apr 10 15:08:27 2013 -0700
@@ -427,7 +427,8 @@
     Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
     Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']),
     Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
-    Extension('mercurial.parsers', ['mercurial/parsers.c',
+    Extension('mercurial.parsers', ['mercurial/dirs.c',
+                                    'mercurial/parsers.c',
                                     'mercurial/pathencode.c']),
     ]