mercurial/parsers.c
changeset 6389 0231f763ebc8
child 6395 3f0294536b24
equal deleted inserted replaced
6388:1e4ddc9ac9f7 6389:0231f763ebc8
       
     1 /*
       
     2  parsers.c - efficient content parsing
       
     3 
       
     4  Copyright 2008 Matt Mackall <mpm@selenic.com> and others
       
     5 
       
     6  This software may be used and distributed according to the terms of
       
     7  the GNU General Public License, incorporated herein by reference.
       
     8 */
       
     9 
       
    10 #include <Python.h>
       
    11 #include <ctype.h>
       
    12 #include <string.h>
       
    13 
       
    14 static int hexdigit(char c)
       
    15 {
       
    16 	if (c >= '0' && c <= '9')
       
    17 		return c - '0';
       
    18 
       
    19 	if (c >= 'A' && c <= 'F')
       
    20 		return c - 'A' + 10;
       
    21 
       
    22 	if (c >= 'a' && c <= 'f')
       
    23 		return c - 'a' + 10;
       
    24 	
       
    25 	return -1;
       
    26 }
       
    27 
       
    28 /*
       
    29  * Turn a hex-encoded string into binary.
       
    30  */
       
    31 static PyObject *unhexlify(const char *str, int len)
       
    32 {
       
    33 	PyObject *ret = NULL;
       
    34 	char *c, *d;
       
    35 
       
    36 	if (len % 2) {
       
    37 		PyErr_SetString(PyExc_ValueError,
       
    38 				"input is not even in length");
       
    39 		goto bail;
       
    40 	}
       
    41 
       
    42 	ret = PyString_FromStringAndSize(NULL, len / 2);
       
    43 	if (!ret)
       
    44 		goto bail;
       
    45 
       
    46 	d = PyString_AsString(ret);
       
    47 	if (!d)
       
    48 		goto bail;
       
    49 
       
    50 	for (c = str; c < str + len;) {
       
    51 		int hi = hexdigit(*c++);
       
    52 		int lo = hexdigit(*c++);
       
    53 
       
    54 		if (hi == -1 || lo == -1) {
       
    55 			PyErr_SetString(PyExc_ValueError,
       
    56 					"input contains non-hex character");
       
    57 			goto bail;
       
    58 		}
       
    59 
       
    60 		*d++ = (hi << 4) | lo;
       
    61 	}
       
    62 	
       
    63 	goto done;
       
    64 	
       
    65 bail:
       
    66 	Py_XDECREF(ret);
       
    67 	ret = NULL;
       
    68 done:
       
    69 	return ret;
       
    70 }
       
    71 
       
    72 /*
       
    73  * This code assumes that a manifest is stitched together with newline
       
    74  * ('\n') characters.
       
    75  */
       
    76 static PyObject *parse_manifest(PyObject *self, PyObject *args)
       
    77 {
       
    78 	PyObject *mfdict, *fdict;
       
    79 	char *str, *cur, *start, *zero;
       
    80 	int len;
       
    81 
       
    82 	if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
       
    83 			      &PyDict_Type, &mfdict,
       
    84 			      &PyDict_Type, &fdict,
       
    85 			      &str, &len))
       
    86 		goto quit;
       
    87 
       
    88 	for (start = cur = str, zero = NULL; cur < str + len; cur++) {
       
    89 		PyObject *file = NULL, *node = NULL;
       
    90 		PyObject *flags = NULL;
       
    91 		int nlen;
       
    92 
       
    93 		if (!*cur) {
       
    94 			zero = cur;
       
    95 			continue;
       
    96 		}
       
    97 		else if (*cur != '\n')
       
    98 			continue;
       
    99 
       
   100 		if (!zero) {
       
   101 			PyErr_SetString(PyExc_ValueError,
       
   102 					"manifest entry has no separator");
       
   103 			goto quit;
       
   104 		}
       
   105 
       
   106 		file = PyString_FromStringAndSize(start, zero - start);
       
   107 		if (!file)
       
   108 			goto bail;
       
   109 
       
   110 		nlen = cur - zero - 1;
       
   111 
       
   112 		node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
       
   113 		if (!node)
       
   114 			goto bail;
       
   115 
       
   116 		if (nlen > 40) {
       
   117 			PyObject *flags;
       
   118 
       
   119 			flags = PyString_FromStringAndSize(zero + 41,
       
   120 							   nlen - 40);
       
   121 			if (!flags)
       
   122 				goto bail;
       
   123 
       
   124 			if (PyDict_SetItem(fdict, file, flags) == -1)
       
   125 				goto bail;
       
   126 		}
       
   127 
       
   128 		if (PyDict_SetItem(mfdict, file, node) == -1)
       
   129 			goto bail;
       
   130 
       
   131 		start = cur + 1;
       
   132 		zero = NULL;
       
   133 
       
   134 		Py_XDECREF(flags);
       
   135 		Py_XDECREF(node);
       
   136 		Py_XDECREF(file);
       
   137 		continue;
       
   138 	bail:
       
   139 		Py_XDECREF(flags);
       
   140 		Py_XDECREF(node);
       
   141 		Py_XDECREF(file);
       
   142 		goto quit;
       
   143 	}
       
   144 
       
   145 	if (len > 0 && *(cur - 1) != '\n') {
       
   146 		PyErr_SetString(PyExc_ValueError,
       
   147 				"manifest contains trailing garbage");
       
   148 		goto quit;
       
   149 	}
       
   150 
       
   151 	Py_INCREF(Py_None);
       
   152 	return Py_None;
       
   153 
       
   154 quit:
       
   155 	return NULL;
       
   156 }
       
   157 
       
   158 static char parsers_doc[] = "Efficient content parsing.";
       
   159 
       
   160 static PyMethodDef methods[] = {
       
   161 	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
       
   162 	{NULL, NULL}
       
   163 };
       
   164 
       
   165 PyMODINIT_FUNC initparsers(void)
       
   166 {
       
   167 	Py_InitModule3("parsers", methods, parsers_doc);
       
   168 }