cext: split character encoding functions to new compilation unit
authorYuya Nishihara <yuya@tcha.org>
Mon, 31 Jul 2017 22:28:27 +0900
changeset 33757 e9996bd7203f
parent 33756 5866ba5e9c48
child 33758 0f4ac3b6dee4
cext: split character encoding functions to new compilation unit This extracts charencode.c from parsers.c, which seems big enough for me to hesitate to add new JSON functions. Still charencode.o is linked to parsers.so to avoid duplication of binary codes.
mercurial/cext/charencode.c
mercurial/cext/manifest.c
mercurial/cext/parsers.c
setup.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/cext/charencode.c	Mon Jul 31 22:28:27 2017 +0900
@@ -0,0 +1,209 @@
+/*
+ charencode.c - miscellaneous character encoding
+
+ Copyright 2008 Matt Mackall <mpm@selenic.com> and others
+
+ This software may be used and distributed according to the terms of
+ the GNU General Public License, incorporated herein by reference.
+*/
+
+#include <Python.h>
+
+#include "util.h"
+
+static const char lowertable[128] = {
+	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
+	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
+	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
+	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
+	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
+	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
+	'\x40',
+	        '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
+	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
+	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
+	'\x78', '\x79', '\x7a',                                         /* X-Z */
+	                        '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
+	'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
+	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
+	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
+	'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
+};
+
+static const char uppertable[128] = {
+	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
+	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
+	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
+	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
+	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
+	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
+	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
+	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
+	'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
+	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
+	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
+	'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
+	'\x60',
+		'\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
+	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
+	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
+	'\x58', '\x59', '\x5a', 					/* x-z */
+				'\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
+};
+
+/*
+ * Turn a hex-encoded string into binary.
+ */
+PyObject *unhexlify(const char *str, int len)
+{
+	PyObject *ret;
+	char *d;
+	int i;
+
+	ret = PyBytes_FromStringAndSize(NULL, len / 2);
+
+	if (!ret)
+		return NULL;
+
+	d = PyBytes_AsString(ret);
+
+	for (i = 0; i < len;) {
+		int hi = hexdigit(str, i++);
+		int lo = hexdigit(str, i++);
+		*d++ = (hi << 4) | lo;
+	}
+
+	return ret;
+}
+
+static inline PyObject *_asciitransform(PyObject *str_obj,
+					const char table[128],
+					PyObject *fallback_fn)
+{
+	char *str, *newstr;
+	Py_ssize_t i, len;
+	PyObject *newobj = NULL;
+	PyObject *ret = NULL;
+
+	str = PyBytes_AS_STRING(str_obj);
+	len = PyBytes_GET_SIZE(str_obj);
+
+	newobj = PyBytes_FromStringAndSize(NULL, len);
+	if (!newobj)
+		goto quit;
+
+	newstr = PyBytes_AS_STRING(newobj);
+
+	for (i = 0; i < len; i++) {
+		char c = str[i];
+		if (c & 0x80) {
+			if (fallback_fn != NULL) {
+				ret = PyObject_CallFunctionObjArgs(fallback_fn,
+					str_obj, NULL);
+			} else {
+				PyObject *err = PyUnicodeDecodeError_Create(
+					"ascii", str, len, i, (i + 1),
+					"unexpected code byte");
+				PyErr_SetObject(PyExc_UnicodeDecodeError, err);
+				Py_XDECREF(err);
+			}
+			goto quit;
+		}
+		newstr[i] = table[(unsigned char)c];
+	}
+
+	ret = newobj;
+	Py_INCREF(ret);
+quit:
+	Py_XDECREF(newobj);
+	return ret;
+}
+
+PyObject *asciilower(PyObject *self, PyObject *args)
+{
+	PyObject *str_obj;
+	if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
+		return NULL;
+	return _asciitransform(str_obj, lowertable, NULL);
+}
+
+PyObject *asciiupper(PyObject *self, PyObject *args)
+{
+	PyObject *str_obj;
+	if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
+		return NULL;
+	return _asciitransform(str_obj, uppertable, NULL);
+}
+
+PyObject *make_file_foldmap(PyObject *self, PyObject *args)
+{
+	PyObject *dmap, *spec_obj, *normcase_fallback;
+	PyObject *file_foldmap = NULL;
+	enum normcase_spec spec;
+	PyObject *k, *v;
+	dirstateTupleObject *tuple;
+	Py_ssize_t pos = 0;
+	const char *table;
+
+	if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
+			      &PyDict_Type, &dmap,
+			      &PyInt_Type, &spec_obj,
+			      &PyFunction_Type, &normcase_fallback))
+		goto quit;
+
+	spec = (int)PyInt_AS_LONG(spec_obj);
+	switch (spec) {
+	case NORMCASE_LOWER:
+		table = lowertable;
+		break;
+	case NORMCASE_UPPER:
+		table = uppertable;
+		break;
+	case NORMCASE_OTHER:
+		table = NULL;
+		break;
+	default:
+		PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
+		goto quit;
+	}
+
+	/* Add some more entries to deal with additions outside this
+	   function. */
+	file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
+	if (file_foldmap == NULL)
+		goto quit;
+
+	while (PyDict_Next(dmap, &pos, &k, &v)) {
+		if (!dirstate_tuple_check(v)) {
+			PyErr_SetString(PyExc_TypeError,
+					"expected a dirstate tuple");
+			goto quit;
+		}
+
+		tuple = (dirstateTupleObject *)v;
+		if (tuple->state != 'r') {
+			PyObject *normed;
+			if (table != NULL) {
+				normed = _asciitransform(k, table,
+					normcase_fallback);
+			} else {
+				normed = PyObject_CallFunctionObjArgs(
+					normcase_fallback, k, NULL);
+			}
+
+			if (normed == NULL)
+				goto quit;
+			if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
+				Py_DECREF(normed);
+				goto quit;
+			}
+			Py_DECREF(normed);
+		}
+	}
+	return file_foldmap;
+quit:
+	Py_XDECREF(file_foldmap);
+	return NULL;
+}
--- a/mercurial/cext/manifest.c	Mon Jul 31 22:12:24 2017 +0900
+++ b/mercurial/cext/manifest.c	Mon Jul 31 22:28:27 2017 +0900
@@ -38,7 +38,7 @@
 #define MANIFEST_NOT_SORTED -2
 #define MANIFEST_MALFORMED -3
 
-/* defined in parsers.c */
+/* defined in charencode.c */
 PyObject *unhexlify(const char *str, int len);
 
 /* get the length of the path for a line */
--- a/mercurial/cext/parsers.c	Mon Jul 31 22:12:24 2017 +0900
+++ b/mercurial/cext/parsers.c	Mon Jul 31 22:28:27 2017 +0900
@@ -29,131 +29,11 @@
 
 static const char *const versionerrortext = "Python minor version mismatch";
 
-static const char lowertable[128] = {
-	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
-	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
-	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
-	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
-	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
-	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
-	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
-	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
-	'\x40',
-	        '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
-	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
-	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
-	'\x78', '\x79', '\x7a',                                         /* X-Z */
-	                        '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
-	'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
-	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
-	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
-	'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
-};
-
-static const char uppertable[128] = {
-	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
-	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
-	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
-	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
-	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
-	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
-	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
-	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
-	'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
-	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
-	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
-	'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
-	'\x60',
-		'\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
-	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
-	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
-	'\x58', '\x59', '\x5a', 					/* x-z */
-				'\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
-};
-
-/*
- * Turn a hex-encoded string into binary.
- */
-PyObject *unhexlify(const char *str, int len)
-{
-	PyObject *ret;
-	char *d;
-	int i;
-
-	ret = PyBytes_FromStringAndSize(NULL, len / 2);
-
-	if (!ret)
-		return NULL;
-
-	d = PyBytes_AsString(ret);
-
-	for (i = 0; i < len;) {
-		int hi = hexdigit(str, i++);
-		int lo = hexdigit(str, i++);
-		*d++ = (hi << 4) | lo;
-	}
-
-	return ret;
-}
-
-static inline PyObject *_asciitransform(PyObject *str_obj,
-					const char table[128],
-					PyObject *fallback_fn)
-{
-	char *str, *newstr;
-	Py_ssize_t i, len;
-	PyObject *newobj = NULL;
-	PyObject *ret = NULL;
-
-	str = PyBytes_AS_STRING(str_obj);
-	len = PyBytes_GET_SIZE(str_obj);
-
-	newobj = PyBytes_FromStringAndSize(NULL, len);
-	if (!newobj)
-		goto quit;
-
-	newstr = PyBytes_AS_STRING(newobj);
-
-	for (i = 0; i < len; i++) {
-		char c = str[i];
-		if (c & 0x80) {
-			if (fallback_fn != NULL) {
-				ret = PyObject_CallFunctionObjArgs(fallback_fn,
-					str_obj, NULL);
-			} else {
-				PyObject *err = PyUnicodeDecodeError_Create(
-					"ascii", str, len, i, (i + 1),
-					"unexpected code byte");
-				PyErr_SetObject(PyExc_UnicodeDecodeError, err);
-				Py_XDECREF(err);
-			}
-			goto quit;
-		}
-		newstr[i] = table[(unsigned char)c];
-	}
-
-	ret = newobj;
-	Py_INCREF(ret);
-quit:
-	Py_XDECREF(newobj);
-	return ret;
-}
-
-static PyObject *asciilower(PyObject *self, PyObject *args)
-{
-	PyObject *str_obj;
-	if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj))
-		return NULL;
-	return _asciitransform(str_obj, lowertable, NULL);
-}
-
-static PyObject *asciiupper(PyObject *self, PyObject *args)
-{
-	PyObject *str_obj;
-	if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj))
-		return NULL;
-	return _asciitransform(str_obj, uppertable, NULL);
-}
+/* defined in charencode.c */
+PyObject *unhexlify(const char *str, int len);
+PyObject *asciilower(PyObject *self, PyObject *args);
+PyObject *asciiupper(PyObject *self, PyObject *args);
+PyObject *make_file_foldmap(PyObject *self, PyObject *args);
 
 static PyObject *dict_new_presized(PyObject *self, PyObject *args)
 {
@@ -165,77 +45,6 @@
 	return _dict_new_presized(expected_size);
 }
 
-static PyObject *make_file_foldmap(PyObject *self, PyObject *args)
-{
-	PyObject *dmap, *spec_obj, *normcase_fallback;
-	PyObject *file_foldmap = NULL;
-	enum normcase_spec spec;
-	PyObject *k, *v;
-	dirstateTupleObject *tuple;
-	Py_ssize_t pos = 0;
-	const char *table;
-
-	if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap",
-			      &PyDict_Type, &dmap,
-			      &PyInt_Type, &spec_obj,
-			      &PyFunction_Type, &normcase_fallback))
-		goto quit;
-
-	spec = (int)PyInt_AS_LONG(spec_obj);
-	switch (spec) {
-	case NORMCASE_LOWER:
-		table = lowertable;
-		break;
-	case NORMCASE_UPPER:
-		table = uppertable;
-		break;
-	case NORMCASE_OTHER:
-		table = NULL;
-		break;
-	default:
-		PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
-		goto quit;
-	}
-
-	/* Add some more entries to deal with additions outside this
-	   function. */
-	file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
-	if (file_foldmap == NULL)
-		goto quit;
-
-	while (PyDict_Next(dmap, &pos, &k, &v)) {
-		if (!dirstate_tuple_check(v)) {
-			PyErr_SetString(PyExc_TypeError,
-					"expected a dirstate tuple");
-			goto quit;
-		}
-
-		tuple = (dirstateTupleObject *)v;
-		if (tuple->state != 'r') {
-			PyObject *normed;
-			if (table != NULL) {
-				normed = _asciitransform(k, table,
-					normcase_fallback);
-			} else {
-				normed = PyObject_CallFunctionObjArgs(
-					normcase_fallback, k, NULL);
-			}
-
-			if (normed == NULL)
-				goto quit;
-			if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
-				Py_DECREF(normed);
-				goto quit;
-			}
-			Py_DECREF(normed);
-		}
-	}
-	return file_foldmap;
-quit:
-	Py_XDECREF(file_foldmap);
-	return NULL;
-}
-
 /*
  * This code assumes that a manifest is stitched together with newline
  * ('\n') characters.
--- a/setup.py	Mon Jul 31 22:12:24 2017 +0900
+++ b/setup.py	Mon Jul 31 22:28:27 2017 +0900
@@ -760,7 +760,8 @@
                                         'mercurial/cext/mpatch.c'],
               include_dirs=common_include_dirs,
               depends=common_depends),
-    Extension('mercurial.cext.parsers', ['mercurial/cext/dirs.c',
+    Extension('mercurial.cext.parsers', ['mercurial/cext/charencode.c',
+                                         'mercurial/cext/dirs.c',
                                          'mercurial/cext/manifest.c',
                                          'mercurial/cext/parsers.c',
                                          'mercurial/cext/pathencode.c',