mercurial/cext/charencode.h
author Yuya Nishihara <yuya@tcha.org>
Sun, 23 Apr 2017 14:47:52 +0900
changeset 33925 2c37f9dabc32
parent 33759 a22339d389d4
child 33926 f4433f2713d0
permissions -rw-r--r--
encoding: add fast path of jsonescape() (issue5533) This isn't highly optimized as it copies characters one by one, but seems reasonably simple and not slow. (with mercurial repo) $ export HGRCPATH=/dev/null HGPLAIN= $ hg log --time --config experimental.stabilization=all -Tjson > /dev/null (original) time: real 6.830 secs (user 6.740+0.000 sys 0.080+0.000) time: real 6.690 secs (user 6.650+0.000 sys 0.040+0.000) time: real 6.700 secs (user 6.640+0.000 sys 0.060+0.000) (this patch) time: real 5.630 secs (user 5.550+0.000 sys 0.070+0.000) time: real 5.700 secs (user 5.650+0.000 sys 0.050+0.000) time: real 5.690 secs (user 5.640+0.000 sys 0.050+0.000)

/*
 charencode.h - miscellaneous character encoding

 This software may be used and distributed according to the terms of
 the GNU General Public License, incorporated herein by reference.
*/

#ifndef _HG_CHARENCODE_H_
#define _HG_CHARENCODE_H_

#include <Python.h>
#include "compat.h"

/* This should be kept in sync with normcasespecs in encoding.py. */
enum normcase_spec {
	NORMCASE_LOWER = -1,
	NORMCASE_UPPER = 1,
	NORMCASE_OTHER = 0
};

PyObject *unhexlify(const char *str, Py_ssize_t len);
PyObject *asciilower(PyObject *self, PyObject *args);
PyObject *asciiupper(PyObject *self, PyObject *args);
PyObject *make_file_foldmap(PyObject *self, PyObject *args);
PyObject *jsonescapeu8fast(PyObject *self, PyObject *args);

static const int8_t hextable[256] = {
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, /* 0-9 */
	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};

static inline int hexdigit(const char *p, Py_ssize_t off)
{
	int8_t val = hextable[(unsigned char)p[off]];

	if (val >= 0) {
		return val;
	}

	PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
	return 0;
}

#endif /* _HG_CHARENCODE_H_ */