mercurial/parsers.c
changeset 6389 0231f763ebc8
child 6395 3f0294536b24
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/parsers.c	Wed Mar 26 10:12:10 2008 -0700
@@ -0,0 +1,168 @@
+/*
+ parsers.c - efficient content parsing
+
+ Copyright 2008 Matt Mackall <mpm@selenic.com> and others
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+*/
+
+#include <Python.h>
+#include <ctype.h>
+#include <string.h>
+
+static int hexdigit(char c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+
+	if (c >= 'A' && c <= 'F')
+		return c - 'A' + 10;
+
+	if (c >= 'a' && c <= 'f')
+		return c - 'a' + 10;
+	
+	return -1;
+}
+
+/*
+ * Turn a hex-encoded string into binary.
+ */
+static PyObject *unhexlify(const char *str, int len)
+{
+	PyObject *ret = NULL;
+	char *c, *d;
+
+	if (len % 2) {
+		PyErr_SetString(PyExc_ValueError,
+				"input is not even in length");
+		goto bail;
+	}
+
+	ret = PyString_FromStringAndSize(NULL, len / 2);
+	if (!ret)
+		goto bail;
+
+	d = PyString_AsString(ret);
+	if (!d)
+		goto bail;
+
+	for (c = str; c < str + len;) {
+		int hi = hexdigit(*c++);
+		int lo = hexdigit(*c++);
+
+		if (hi == -1 || lo == -1) {
+			PyErr_SetString(PyExc_ValueError,
+					"input contains non-hex character");
+			goto bail;
+		}
+
+		*d++ = (hi << 4) | lo;
+	}
+	
+	goto done;
+	
+bail:
+	Py_XDECREF(ret);
+	ret = NULL;
+done:
+	return ret;
+}
+
+/*
+ * This code assumes that a manifest is stitched together with newline
+ * ('\n') characters.
+ */
+static PyObject *parse_manifest(PyObject *self, PyObject *args)
+{
+	PyObject *mfdict, *fdict;
+	char *str, *cur, *start, *zero;
+	int len;
+
+	if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
+			      &PyDict_Type, &mfdict,
+			      &PyDict_Type, &fdict,
+			      &str, &len))
+		goto quit;
+
+	for (start = cur = str, zero = NULL; cur < str + len; cur++) {
+		PyObject *file = NULL, *node = NULL;
+		PyObject *flags = NULL;
+		int nlen;
+
+		if (!*cur) {
+			zero = cur;
+			continue;
+		}
+		else if (*cur != '\n')
+			continue;
+
+		if (!zero) {
+			PyErr_SetString(PyExc_ValueError,
+					"manifest entry has no separator");
+			goto quit;
+		}
+
+		file = PyString_FromStringAndSize(start, zero - start);
+		if (!file)
+			goto bail;
+
+		nlen = cur - zero - 1;
+
+		node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
+		if (!node)
+			goto bail;
+
+		if (nlen > 40) {
+			PyObject *flags;
+
+			flags = PyString_FromStringAndSize(zero + 41,
+							   nlen - 40);
+			if (!flags)
+				goto bail;
+
+			if (PyDict_SetItem(fdict, file, flags) == -1)
+				goto bail;
+		}
+
+		if (PyDict_SetItem(mfdict, file, node) == -1)
+			goto bail;
+
+		start = cur + 1;
+		zero = NULL;
+
+		Py_XDECREF(flags);
+		Py_XDECREF(node);
+		Py_XDECREF(file);
+		continue;
+	bail:
+		Py_XDECREF(flags);
+		Py_XDECREF(node);
+		Py_XDECREF(file);
+		goto quit;
+	}
+
+	if (len > 0 && *(cur - 1) != '\n') {
+		PyErr_SetString(PyExc_ValueError,
+				"manifest contains trailing garbage");
+		goto quit;
+	}
+
+	Py_INCREF(Py_None);
+	return Py_None;
+
+quit:
+	return NULL;
+}
+
+static char parsers_doc[] = "Efficient content parsing.";
+
+static PyMethodDef methods[] = {
+	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
+	{NULL, NULL}
+};
+
+PyMODINIT_FUNC initparsers(void)
+{
+	Py_InitModule3("parsers", methods, parsers_doc);
+}