# HG changeset patch # User Matt Mackall # Date 1223842868 18000 # Node ID 16bafcebd3d1bf9f485f5eeafd57b138ab057369 # Parent fb3fc27617a2c71e8fbaa68d6389ae45ee4e3ace dirstate: C parsing extension diff -r fb3fc27617a2 -r 16bafcebd3d1 mercurial/dirstate.py --- a/mercurial/dirstate.py Sun Oct 12 15:21:08 2008 -0500 +++ b/mercurial/dirstate.py Sun Oct 12 15:21:08 2008 -0500 @@ -10,7 +10,7 @@ from node import nullid from i18n import _ import struct, os, stat, util, errno, ignore -import cStringIO, osutil, sys +import cStringIO, osutil, sys, parsers _unknown = ('?', 0, 0, 0) _format = ">cllll" @@ -190,8 +190,6 @@ def _read(self): self._map = {} self._copymap = {} - if not self._dirtypl: - self._pl = [nullid, nullid] try: st = self._opener("dirstate").read() except IOError, err: @@ -200,27 +198,9 @@ if not st: return + p = parsers.parse_dirstate(self._map, self._copymap, st); if not self._dirtypl: - self._pl = [st[:20], st[20: 40]] - - # deref fields so they will be local in loop - dmap = self._map - copymap = self._copymap - unpack = struct.unpack - e_size = struct.calcsize(_format) - pos1 = 40 - l = len(st) - - # the inner loop - while pos1 < l: - pos2 = pos1 + e_size - e = unpack(">cllll", st[pos1:pos2]) # a literal here is faster - pos1 = pos2 + e[4] - f = st[pos2:pos1] - if '\0' in f: - f, c = f.split('\0') - copymap[f] = c - dmap[f] = e # we hold onto e[4] because making a subtuple is slow + self._pl = p def invalidate(self): for a in "_map _copymap _foldmap _branch _pl _dirs _ignore".split(): @@ -274,7 +254,7 @@ self._dirty = True self._addpath(f) s = os.lstat(self._join(f)) - self._map[f] = ('n', s.st_mode, s.st_size, s.st_mtime, 0) + self._map[f] = ('n', s.st_mode, s.st_size, s.st_mtime) if f in self._copymap: del self._copymap[f] @@ -297,7 +277,7 @@ return self._dirty = True self._addpath(f) - self._map[f] = ('n', 0, -1, -1, 0) + self._map[f] = ('n', 0, -1, -1) if f in self._copymap: del self._copymap[f] @@ -305,7 +285,7 @@ 'mark a file normal, but dirty' self._dirty = True self._addpath(f) - self._map[f] = ('n', 0, -2, -1, 0) + self._map[f] = ('n', 0, -2, -1) if f in self._copymap: del self._copymap[f] @@ -313,7 +293,7 @@ 'mark a file added' self._dirty = True self._addpath(f, True) - self._map[f] = ('a', 0, -1, -1, 0) + self._map[f] = ('a', 0, -1, -1) if f in self._copymap: del self._copymap[f] @@ -328,7 +308,7 @@ size = -1 elif entry[0] == 'n' and entry[2] == -2: size = -2 - self._map[f] = ('r', 0, size, 0, 0) + self._map[f] = ('r', 0, size, 0) if size == 0 and f in self._copymap: del self._copymap[f] @@ -337,7 +317,7 @@ self._dirty = True s = os.lstat(self._join(f)) self._addpath(f) - self._map[f] = ('m', s.st_mode, s.st_size, s.st_mtime, 0) + self._map[f] = ('m', s.st_mode, s.st_size, s.st_mtime) if f in self._copymap: del self._copymap[f] @@ -373,9 +353,9 @@ self.clear() for f in files: if 'x' in files.flags(f): - self._map[f] = ('n', 0777, -1, 0, 0) + self._map[f] = ('n', 0777, -1, 0) else: - self._map[f] = ('n', 0666, -1, 0, 0) + self._map[f] = ('n', 0666, -1, 0) self._pl = (parent, nullid) self._dirty = True @@ -401,7 +381,7 @@ if f in copymap: f = "%s\0%s" % (f, copymap[f]) if e[3] > limit and e[0] == 'n': - e = (e[0], 0, -1, -1, 0) + e = (e[0], 0, -1, -1) e = pack(_format, e[0], e[1], e[2], e[3], len(f)) write(e) write(f) @@ -577,7 +557,7 @@ uadd(fn) continue - state, mode, size, time, foo = dmap[fn] + state, mode, size, time = dmap[fn] if not st and state in "nma": dadd(fn) diff -r fb3fc27617a2 -r 16bafcebd3d1 mercurial/parsers.c --- a/mercurial/parsers.c Sun Oct 12 15:21:08 2008 -0500 +++ b/mercurial/parsers.c Sun Oct 12 15:21:08 2008 -0500 @@ -128,15 +128,117 @@ Py_INCREF(Py_None); return Py_None; - quit: return NULL; } +#ifdef _WIN32 +# ifdef _MSC_VER +/* msvc 6.0 has problems */ +# define inline __inline +typedef unsigned long uint32_t; +# else +# include +# endif +static uint32_t ntohl(uint32_t x) +{ + return ((x & 0x000000ffUL) << 24) | + ((x & 0x0000ff00UL) << 8) | + ((x & 0x00ff0000UL) >> 8) | + ((x & 0xff000000UL) >> 24); +} +#else +/* not windows */ +# include +# if defined __BEOS__ && !defined __HAIKU__ +# include +# else +# include +# endif +# include +#endif + +static PyObject *parse_dirstate(PyObject *self, PyObject *args) +{ + PyObject *dmap, *cmap, *parents = NULL, *ret = NULL; + PyObject *fname = NULL, *cname = NULL, *entry = NULL; + char *str, *cur, *end, *cpos; + int state, mode, size, mtime, flen; + int len; + char decode[16]; /* for alignment */ + + if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate", + &PyDict_Type, &dmap, + &PyDict_Type, &cmap, + &str, &len)) + goto quit; + + /* read parents */ + if (len < 40) + goto quit; + + parents = Py_BuildValue("s#s#", str, 20, str + 20, 20); + if (!parents) + goto quit; + + /* read filenames */ + cur = str + 40; + end = str + len; + + while (cur < end - 17) { + /* unpack header */ + state = *cur; + memcpy(decode, cur + 1, 16); + mode = ntohl(*(uint32_t *)(decode)); + size = ntohl(*(uint32_t *)(decode + 4)); + mtime = ntohl(*(uint32_t *)(decode + 8)); + flen = ntohl(*(uint32_t *)(decode + 12)); + cur += 17; + if (cur + flen > end) + goto quit; + + entry = Py_BuildValue("ciii", state, mode, size, mtime); + PyObject_GC_UnTrack(entry); /* don't waste time with this */ + if (!entry) + goto quit; + + cpos = memchr(cur, 0, flen); + if (cpos) { + fname = PyString_FromStringAndSize(cur, cpos - cur); + cname = PyString_FromStringAndSize(cpos + 1, + flen - (cpos - cur) - 1); + if (!fname || !cname || + PyDict_SetItem(cmap, fname, cname) == -1 || + PyDict_SetItem(dmap, fname, entry) == -1) + goto quit; + Py_DECREF(cname); + } else { + fname = PyString_FromStringAndSize(cur, flen); + if (!fname || + PyDict_SetItem(dmap, fname, entry) == -1) + goto quit; + } + cur += flen; + Py_DECREF(fname); + Py_DECREF(entry); + fname = cname = entry = NULL; + } + + ret = parents; + Py_INCREF(ret); +quit: + Py_XDECREF(fname); + Py_XDECREF(cname); + Py_XDECREF(entry); + Py_XDECREF(parents); + return ret; +} + static char parsers_doc[] = "Efficient content parsing."; static PyMethodDef methods[] = { {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, + {"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"}, {NULL, NULL} };