dirstate: C parsing extension
authorMatt Mackall <mpm@selenic.com>
Sun, 12 Oct 2008 15:21:08 -0500
changeset 7093 16bafcebd3d1
parent 7092 fb3fc27617a2
child 7094 a4769dec7773
dirstate: C parsing extension
mercurial/dirstate.py
mercurial/parsers.c
--- a/mercurial/dirstate.py	Sun Oct 12 15:21:08 2008 -0500
+++ b/mercurial/dirstate.py	Sun Oct 12 15:21:08 2008 -0500
@@ -10,7 +10,7 @@
 from node import nullid
 from i18n import _
 import struct, os, stat, util, errno, ignore
-import cStringIO, osutil, sys
+import cStringIO, osutil, sys, parsers
 
 _unknown = ('?', 0, 0, 0)
 _format = ">cllll"
@@ -190,8 +190,6 @@
     def _read(self):
         self._map = {}
         self._copymap = {}
-        if not self._dirtypl:
-            self._pl = [nullid, nullid]
         try:
             st = self._opener("dirstate").read()
         except IOError, err:
@@ -200,27 +198,9 @@
         if not st:
             return
 
+        p = parsers.parse_dirstate(self._map, self._copymap, st);
         if not self._dirtypl:
-            self._pl = [st[:20], st[20: 40]]
-
-        # deref fields so they will be local in loop
-        dmap = self._map
-        copymap = self._copymap
-        unpack = struct.unpack
-        e_size = struct.calcsize(_format)
-        pos1 = 40
-        l = len(st)
-
-        # the inner loop
-        while pos1 < l:
-            pos2 = pos1 + e_size
-            e = unpack(">cllll", st[pos1:pos2]) # a literal here is faster
-            pos1 = pos2 + e[4]
-            f = st[pos2:pos1]
-            if '\0' in f:
-                f, c = f.split('\0')
-                copymap[f] = c
-            dmap[f] = e # we hold onto e[4] because making a subtuple is slow
+            self._pl = p
 
     def invalidate(self):
         for a in "_map _copymap _foldmap _branch _pl _dirs _ignore".split():
@@ -274,7 +254,7 @@
         self._dirty = True
         self._addpath(f)
         s = os.lstat(self._join(f))
-        self._map[f] = ('n', s.st_mode, s.st_size, s.st_mtime, 0)
+        self._map[f] = ('n', s.st_mode, s.st_size, s.st_mtime)
         if f in self._copymap:
             del self._copymap[f]
 
@@ -297,7 +277,7 @@
                 return
         self._dirty = True
         self._addpath(f)
-        self._map[f] = ('n', 0, -1, -1, 0)
+        self._map[f] = ('n', 0, -1, -1)
         if f in self._copymap:
             del self._copymap[f]
 
@@ -305,7 +285,7 @@
         'mark a file normal, but dirty'
         self._dirty = True
         self._addpath(f)
-        self._map[f] = ('n', 0, -2, -1, 0)
+        self._map[f] = ('n', 0, -2, -1)
         if f in self._copymap:
             del self._copymap[f]
 
@@ -313,7 +293,7 @@
         'mark a file added'
         self._dirty = True
         self._addpath(f, True)
-        self._map[f] = ('a', 0, -1, -1, 0)
+        self._map[f] = ('a', 0, -1, -1)
         if f in self._copymap:
             del self._copymap[f]
 
@@ -328,7 +308,7 @@
                 size = -1
             elif entry[0] == 'n' and entry[2] == -2:
                 size = -2
-        self._map[f] = ('r', 0, size, 0, 0)
+        self._map[f] = ('r', 0, size, 0)
         if size == 0 and f in self._copymap:
             del self._copymap[f]
 
@@ -337,7 +317,7 @@
         self._dirty = True
         s = os.lstat(self._join(f))
         self._addpath(f)
-        self._map[f] = ('m', s.st_mode, s.st_size, s.st_mtime, 0)
+        self._map[f] = ('m', s.st_mode, s.st_size, s.st_mtime)
         if f in self._copymap:
             del self._copymap[f]
 
@@ -373,9 +353,9 @@
         self.clear()
         for f in files:
             if 'x' in files.flags(f):
-                self._map[f] = ('n', 0777, -1, 0, 0)
+                self._map[f] = ('n', 0777, -1, 0)
             else:
-                self._map[f] = ('n', 0666, -1, 0, 0)
+                self._map[f] = ('n', 0666, -1, 0)
         self._pl = (parent, nullid)
         self._dirty = True
 
@@ -401,7 +381,7 @@
             if f in copymap:
                 f = "%s\0%s" % (f, copymap[f])
             if e[3] > limit and e[0] == 'n':
-                e = (e[0], 0, -1, -1, 0)
+                e = (e[0], 0, -1, -1)
             e = pack(_format, e[0], e[1], e[2], e[3], len(f))
             write(e)
             write(f)
@@ -577,7 +557,7 @@
                     uadd(fn)
                 continue
 
-            state, mode, size, time, foo = dmap[fn]
+            state, mode, size, time = dmap[fn]
 
             if not st and state in "nma":
                 dadd(fn)
--- a/mercurial/parsers.c	Sun Oct 12 15:21:08 2008 -0500
+++ b/mercurial/parsers.c	Sun Oct 12 15:21:08 2008 -0500
@@ -128,15 +128,117 @@
 
 	Py_INCREF(Py_None);
 	return Py_None;
-
 quit:
 	return NULL;
 }
 
+#ifdef _WIN32
+# ifdef _MSC_VER
+/* msvc 6.0 has problems */
+#  define inline __inline
+typedef unsigned long uint32_t;
+# else
+#  include <stdint.h>
+# endif
+static uint32_t ntohl(uint32_t x)
+{
+	return ((x & 0x000000ffUL) << 24) |
+		((x & 0x0000ff00UL) <<  8) |
+		((x & 0x00ff0000UL) >>  8) |
+		((x & 0xff000000UL) >> 24);
+}
+#else
+/* not windows */
+# include <sys/types.h>
+# if defined __BEOS__ && !defined __HAIKU__
+#  include <ByteOrder.h>
+# else
+#  include <arpa/inet.h>
+# endif
+# include <inttypes.h>
+#endif
+
+static PyObject *parse_dirstate(PyObject *self, PyObject *args)
+{
+	PyObject *dmap, *cmap, *parents = NULL, *ret = NULL;
+	PyObject *fname = NULL, *cname = NULL, *entry = NULL;
+	char *str, *cur, *end, *cpos;
+	int state, mode, size, mtime, flen;
+	int len;
+	char decode[16]; /* for alignment */
+
+	if (!PyArg_ParseTuple(args, "O!O!s#:parse_dirstate",
+			      &PyDict_Type, &dmap,
+			      &PyDict_Type, &cmap,
+			      &str, &len))
+		goto quit;
+
+	/* read parents */
+	if (len < 40)
+		goto quit;
+
+	parents = Py_BuildValue("s#s#", str, 20, str + 20, 20);
+	if (!parents)
+		goto quit;
+
+	/* read filenames */
+	cur = str + 40;
+	end = str + len;
+
+	while (cur < end - 17) {
+		/* unpack header */
+		state = *cur;
+		memcpy(decode, cur + 1, 16);
+		mode = ntohl(*(uint32_t *)(decode));
+		size = ntohl(*(uint32_t *)(decode + 4));
+		mtime = ntohl(*(uint32_t *)(decode + 8));
+		flen = ntohl(*(uint32_t *)(decode + 12));
+		cur += 17;
+		if (cur + flen > end)
+			goto quit;
+
+		entry = Py_BuildValue("ciii", state, mode, size, mtime);
+		PyObject_GC_UnTrack(entry); /* don't waste time with this */
+		if (!entry)
+			goto quit;
+
+		cpos = memchr(cur, 0, flen);
+		if (cpos) {
+			fname = PyString_FromStringAndSize(cur, cpos - cur);
+			cname = PyString_FromStringAndSize(cpos + 1,
+							   flen - (cpos - cur) - 1);
+			if (!fname || !cname ||
+			    PyDict_SetItem(cmap, fname, cname) == -1 ||
+			    PyDict_SetItem(dmap, fname, entry) == -1)
+				goto quit;
+			Py_DECREF(cname);
+		} else {
+			fname = PyString_FromStringAndSize(cur, flen);
+			if (!fname ||
+			    PyDict_SetItem(dmap, fname, entry) == -1)
+				goto quit;
+		}
+		cur += flen;
+		Py_DECREF(fname);
+		Py_DECREF(entry);
+		fname = cname = entry = NULL;
+	}
+
+	ret = parents;
+	Py_INCREF(ret);
+quit:
+	Py_XDECREF(fname);
+	Py_XDECREF(cname);
+	Py_XDECREF(entry);
+	Py_XDECREF(parents);
+	return ret;
+}
+
 static char parsers_doc[] = "Efficient content parsing.";
 
 static PyMethodDef methods[] = {
 	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
+	{"parse_dirstate", parse_dirstate, METH_VARARGS, "parse a dirstate\n"},
 	{NULL, NULL}
 };