# HG changeset patch # User Bryan O'Sullivan # Date 1365631707 25200 # Node ID 02ee846b246a08cb4f09816b232fd0aa543394bc # Parent d8ff607ef72135f2c1ede7b718cf57d304c041dc scmutil: rewrite dirs in C, use if available This is over twice as fast as the Python dirs code. Upcoming changes will nearly double its speed again. perfdirs results for a working dir with 170,000 files: Python 638 msec C 244 diff -r d8ff607ef721 -r 02ee846b246a mercurial/dirs.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/dirs.c Wed Apr 10 15:08:27 2013 -0700 @@ -0,0 +1,298 @@ +/* + dirs.c - dynamic directory diddling for dirstates + + Copyright 2013 Facebook + + This software may be used and distributed according to the terms of + the GNU General Public License, incorporated herein by reference. +*/ + +#define PY_SSIZE_T_CLEAN +#include +#include "util.h" + +/* + * This is a multiset of directory names, built from the files that + * appear in a dirstate or manifest. + */ +typedef struct { + PyObject_HEAD + PyObject *dict; +} dirsObject; + +static inline Py_ssize_t _finddir(PyObject *path, Py_ssize_t pos) +{ + const char *s = PyString_AS_STRING(path); + + while (pos != -1) { + if (s[pos] == '/') + break; + pos -= 1; + } + + return pos; +} + +static int _addpath(PyObject *dirs, PyObject *path) +{ + Py_ssize_t pos = PyString_GET_SIZE(path); + PyObject *newval = NULL, *key = NULL; + int ret = -1; + + while ((pos = _finddir(path, pos - 1)) != -1) { + PyObject *val; + long v = 0; + + key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos); + + if (key == NULL) + goto bail; + + val = PyDict_GetItem(dirs, key); + if (val != NULL) + v = PyInt_AS_LONG(val); + + newval = PyInt_FromLong(v + 1); + + if (newval == NULL) + goto bail; + + ret = PyDict_SetItem(dirs, key, newval); + if (ret == -1) + goto bail; + Py_CLEAR(key); + Py_CLEAR(newval); + } + ret = 0; + +bail: + Py_XDECREF(key); + Py_XDECREF(newval); + + return ret; +} + +static int _delpath(PyObject *dirs, PyObject *path) +{ + Py_ssize_t pos = PyString_GET_SIZE(path); + PyObject *newval = NULL, *key = NULL; + int ret = -1; + + while ((pos = _finddir(path, pos - 1)) != -1) { + PyObject *val; + long v; + + key = PyString_FromStringAndSize(PyString_AS_STRING(path), pos); + + if (key == NULL) + goto bail; + + val = PyDict_GetItem(dirs, key); + if (val == NULL) { + PyErr_SetString(PyExc_ValueError, + "expected a value, found none"); + goto bail; + } + v = PyInt_AS_LONG(val); + + if (v <= 1) { + if (PyDict_DelItem(dirs, key) == -1) + goto bail; + continue; + } + newval = PyInt_FromLong(v - 1); + + if (newval == NULL) + goto bail; + + ret = PyDict_SetItem(dirs, key, newval); + if (ret == -1) + goto bail; + Py_CLEAR(key); + Py_CLEAR(newval); + } + ret = 0; + +bail: + Py_XDECREF(key); + Py_XDECREF(newval); + + return ret; +} + +static int dirs_fromdict(PyObject *dirs, PyObject *source, char skipchar) +{ + PyObject *key, *value; + Py_ssize_t pos = 0; + + while (PyDict_Next(source, &pos, &key, &value)) { + if (!PyString_Check(key)) { + PyErr_SetString(PyExc_TypeError, "expected string key"); + return -1; + } + if (skipchar) { + PyObject *st; + + if (!PyTuple_Check(value) || + PyTuple_GET_SIZE(value) == 0) { + PyErr_SetString(PyExc_TypeError, + "expected non-empty tuple"); + return -1; + } + + st = PyTuple_GET_ITEM(value, 0); + + if (!PyString_Check(st) || PyString_GET_SIZE(st) == 0) { + PyErr_SetString(PyExc_TypeError, + "expected non-empty string " + "at tuple index 0"); + return -1; + } + + if (PyString_AS_STRING(st)[0] == skipchar) + continue; + } + + if (_addpath(dirs, key) == -1) + return -1; + } + + return 0; +} + +static int dirs_fromiter(PyObject *dirs, PyObject *source) +{ + PyObject *iter, *item = NULL; + int ret; + + iter = PyObject_GetIter(source); + if (iter == NULL) + return -1; + + while ((item = PyIter_Next(iter)) != NULL) { + if (!PyString_Check(item)) { + PyErr_SetString(PyExc_TypeError, "expected string"); + break; + } + + if (_addpath(dirs, item) == -1) + break; + Py_CLEAR(item); + } + + ret = PyErr_Occurred() ? -1 : 0; + Py_XDECREF(item); + return ret; +} + +/* + * Calculate a refcounted set of directory names for the files in a + * dirstate. + */ +static int dirs_init(dirsObject *self, PyObject *args) +{ + PyObject *dirs = NULL, *source = NULL; + char skipchar = 0; + int ret = -1; + + self->dict = NULL; + + if (!PyArg_ParseTuple(args, "|Oc:__init__", &source, &skipchar)) + return -1; + + dirs = PyDict_New(); + + if (dirs == NULL) + return -1; + + if (source == NULL) + ret = 0; + else if (PyDict_Check(source)) + ret = dirs_fromdict(dirs, source, skipchar); + else if (skipchar) + PyErr_SetString(PyExc_ValueError, + "skip character is only supported " + "with a dict source"); + else + ret = dirs_fromiter(dirs, source); + + if (ret == -1) + Py_XDECREF(dirs); + else + self->dict = dirs; + + return ret; +} + +PyObject *dirs_addpath(dirsObject *self, PyObject *args) +{ + PyObject *path; + + if (!PyArg_ParseTuple(args, "O!:addpath", &PyString_Type, &path)) + return NULL; + + if (_addpath(self->dict, path) == -1) + return NULL; + + Py_RETURN_NONE; +} + +static PyObject *dirs_delpath(dirsObject *self, PyObject *args) +{ + PyObject *path; + + if (!PyArg_ParseTuple(args, "O!:delpath", &PyString_Type, &path)) + return NULL; + + if (_delpath(self->dict, path) == -1) + return NULL; + + Py_RETURN_NONE; +} + +static int dirs_contains(dirsObject *self, PyObject *value) +{ + return PyString_Check(value) ? PyDict_Contains(self->dict, value) : 0; +} + +static void dirs_dealloc(dirsObject *self) +{ + Py_XDECREF(self->dict); + PyObject_Del(self); +} + +static PyObject *dirs_iter(dirsObject *self) +{ + return PyObject_GetIter(self->dict); +} + +static PySequenceMethods dirs_sequence_methods; + +static PyMethodDef dirs_methods[] = { + {"addpath", (PyCFunction)dirs_addpath, METH_VARARGS, "add a path"}, + {"delpath", (PyCFunction)dirs_delpath, METH_VARARGS, "remove a path"}, + {NULL} /* Sentinel */ +}; + +static PyTypeObject dirsType = { PyObject_HEAD_INIT(NULL) }; + +void dirs_module_init(PyObject *mod) +{ + dirs_sequence_methods.sq_contains = (objobjproc)dirs_contains; + dirsType.tp_name = "parsers.dirs"; + dirsType.tp_new = PyType_GenericNew; + dirsType.tp_basicsize = sizeof(dirsObject); + dirsType.tp_dealloc = (destructor)dirs_dealloc; + dirsType.tp_as_sequence = &dirs_sequence_methods; + dirsType.tp_flags = Py_TPFLAGS_DEFAULT; + dirsType.tp_doc = "dirs"; + dirsType.tp_iter = (getiterfunc)dirs_iter; + dirsType.tp_methods = dirs_methods; + dirsType.tp_init = (initproc)dirs_init; + + if (PyType_Ready(&dirsType) < 0) + return; + Py_INCREF(&dirsType); + + PyModule_AddObject(mod, "dirs", (PyObject *)&dirsType); +} diff -r d8ff607ef721 -r 02ee846b246a mercurial/parsers.c --- a/mercurial/parsers.c Wed Apr 10 15:08:26 2013 -0700 +++ b/mercurial/parsers.c Wed Apr 10 15:08:27 2013 -0700 @@ -1528,8 +1528,12 @@ {NULL, NULL} }; +void dirs_module_init(PyObject *mod); + static void module_init(PyObject *mod) { + dirs_module_init(mod); + indexType.tp_new = PyType_GenericNew; if (PyType_Ready(&indexType) < 0) return; diff -r d8ff607ef721 -r 02ee846b246a mercurial/scmutil.py --- a/mercurial/scmutil.py Wed Apr 10 15:08:26 2013 -0700 +++ b/mercurial/scmutil.py Wed Apr 10 15:08:27 2013 -0700 @@ -7,7 +7,7 @@ from i18n import _ from mercurial.node import nullrev -import util, error, osutil, revset, similar, encoding, phases +import util, error, osutil, revset, similar, encoding, phases, parsers import match as matchmod import os, errno, re, stat, glob @@ -927,6 +927,9 @@ def __contains__(self, d): return d in self._dirs +if util.safehasattr(parsers, 'dirs'): + dirs = parsers.dirs + def finddirs(path): pos = path.rfind('/') while pos != -1: diff -r d8ff607ef721 -r 02ee846b246a setup.py --- a/setup.py Wed Apr 10 15:08:26 2013 -0700 +++ b/setup.py Wed Apr 10 15:08:27 2013 -0700 @@ -427,7 +427,8 @@ Extension('mercurial.bdiff', ['mercurial/bdiff.c']), Extension('mercurial.diffhelpers', ['mercurial/diffhelpers.c']), Extension('mercurial.mpatch', ['mercurial/mpatch.c']), - Extension('mercurial.parsers', ['mercurial/parsers.c', + Extension('mercurial.parsers', ['mercurial/dirs.c', + 'mercurial/parsers.c', 'mercurial/pathencode.c']), ]