mercurial/cext/charencode.h
author Yuya Nishihara <yuya@tcha.org>
Sun, 23 Apr 2017 12:59:42 +0900
changeset 33926 f4433f2713d0
parent 33925 2c37f9dabc32
child 36058 81199632fa42
permissions -rw-r--r--
encoding: add function to test if a str consists of ASCII characters Most strings are ASCII. Let's optimize for it. Using uint64_t is slightly faster than uint32_t on 64bit system, but there isn't huge difference.

/*
 charencode.h - miscellaneous character encoding

 This software may be used and distributed according to the terms of
 the GNU General Public License, incorporated herein by reference.
*/

#ifndef _HG_CHARENCODE_H_
#define _HG_CHARENCODE_H_

#include <Python.h>
#include "compat.h"

/* This should be kept in sync with normcasespecs in encoding.py. */
enum normcase_spec {
	NORMCASE_LOWER = -1,
	NORMCASE_UPPER = 1,
	NORMCASE_OTHER = 0
};

PyObject *unhexlify(const char *str, Py_ssize_t len);
PyObject *isasciistr(PyObject *self, PyObject *args);
PyObject *asciilower(PyObject *self, PyObject *args);
PyObject *asciiupper(PyObject *self, PyObject *args);
PyObject *make_file_foldmap(PyObject *self, PyObject *args);
PyObject *jsonescapeu8fast(PyObject *self, PyObject *args);

static const int8_t hextable[256] = {
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, /* 0-9 */
	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};

static inline int hexdigit(const char *p, Py_ssize_t off)
{
	int8_t val = hextable[(unsigned char)p[off]];

	if (val >= 0) {
		return val;
	}

	PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
	return 0;
}

#endif /* _HG_CHARENCODE_H_ */