Add a base85 codec
authorBrendan Cully <brendan@kublai.com>
Fri, 06 Oct 2006 13:01:54 -0700
changeset 3283 1f2c3983a6c5
parent 3279 4b2d3c8a6195
child 3288 e93c926e069e
Add a base85 codec
mercurial/base85.c
setup.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/base85.c	Fri Oct 06 13:01:54 2006 -0700
@@ -0,0 +1,132 @@
+/*
+ base85 codec
+
+ Copyright 2006 Brendan Cully <brendan@kublai.com>
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+
+ Largely based on git's implementation
+*/
+
+#include <Python.h>
+
+static const char b85chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+	"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
+static char b85dec[256];
+
+static void
+b85prep(void)
+{
+	int i;
+
+	memset(b85dec, 0, sizeof(b85dec));
+	for (i = 0; i < sizeof(b85chars); i++)
+		b85dec[(int)(b85chars[i])] = i + 1;
+}
+
+static PyObject *
+b85encode(PyObject *self, PyObject *args)
+{
+	const unsigned char *text;
+	PyObject *out;
+	char *dst;
+	int len, olen, i;
+	unsigned int acc, val, ch;
+
+	if (!PyArg_ParseTuple(args, "s#", &text, &len))
+		return NULL;
+
+	olen = (len + 3) / 4 * 5;
+	if (!(out = PyString_FromStringAndSize(NULL, olen)))
+		return NULL;
+
+	dst = PyString_AS_STRING(out);
+
+	while (len)
+	{
+		acc = 0;
+		for (i = 24; i >= 0; i -= 8) {
+			ch = *text++;
+			acc |= ch << i;
+			if (--len == 0)
+				break;
+		}
+		for (i = 4; i >= 0; i--) {
+			val = acc % 85;
+			acc /= 85;
+			dst[i] = b85chars[val];
+		}
+		dst += 5;
+	}
+
+	return out;
+}
+
+static PyObject *
+b85decode(PyObject *self, PyObject *args)
+{
+	PyObject *out;
+	const char *text;
+	char *dst;
+	int len, i, j, olen, c;
+	unsigned int acc;
+
+	if (!PyArg_ParseTuple(args, "s#", &text, &len))
+		return NULL;
+
+	olen = (len + 4) / 5 * 4;
+	if (!(out = PyString_FromStringAndSize(NULL, olen)))
+		return NULL;
+
+	dst = PyString_AS_STRING(out);
+
+	for (i = 1; len; i++)
+	{
+		acc = 0;
+		for (j = 0; j < 4 && --len; j++)
+		{
+			c = b85dec[(int)*text++] - 1;
+			if (c < 0)
+				return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i);
+			acc = acc * 85 + c;
+		}
+		if (len--)
+		{
+			c = b85dec[(int)*text++] - 1;
+			if (c < 0)
+				return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i);
+		}
+		else
+			c = 0;
+		/* overflow detection: 0xffffffff == "|NsC0",
+		 * "|NsC" == 0x03030303 */
+		if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c)
+			return PyErr_Format(PyExc_ValueError, "Bad base85 sequence at position %d", i);
+		
+		acc += c;
+
+		for (j = 0; j < 4; j++)
+		{
+			acc = (acc << 8) | (acc >> 24);
+			*dst++ = (char)acc;
+		}
+	}
+
+	return out;
+}
+
+static char base85_doc[] = "Base85 Data Encoding";
+
+static PyMethodDef methods[] = {
+	{"b85encode", b85encode, METH_VARARGS, "encode text in base85\n"},
+	{"b85decode", b85decode, METH_VARARGS, "decode base85 text\n"},
+	{NULL, NULL}
+};
+
+PyMODINIT_FUNC initbase85(void)
+{
+	Py_InitModule3("base85", methods, base85_doc);
+
+	b85prep();
+}
--- a/setup.py	Fri Oct 06 12:58:50 2006 -0700
+++ b/setup.py	Fri Oct 06 13:01:54 2006 -0700
@@ -89,7 +89,8 @@
       license='GNU GPL',
       packages=['mercurial', 'mercurial.hgweb', 'hgext'],
       ext_modules=[Extension('mercurial.mpatch', ['mercurial/mpatch.c']),
-                   Extension('mercurial.bdiff', ['mercurial/bdiff.c'])],
+                   Extension('mercurial.bdiff', ['mercurial/bdiff.c']),
+                   Extension('mercurial.base85', ['mercurial/base85.c'])],
       data_files=[(os.path.join('mercurial', root),
                    [os.path.join(root, file_) for file_ in files])
                   for root, dirs, files in os.walk('templates')],