|
1 /* |
|
2 parsers.c - efficient content parsing |
|
3 |
|
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others |
|
5 |
|
6 This software may be used and distributed according to the terms of |
|
7 the GNU General Public License, incorporated herein by reference. |
|
8 */ |
|
9 |
|
10 #include <Python.h> |
|
11 #include <ctype.h> |
|
12 #include <string.h> |
|
13 |
|
14 static int hexdigit(char c) |
|
15 { |
|
16 if (c >= '0' && c <= '9') |
|
17 return c - '0'; |
|
18 |
|
19 if (c >= 'A' && c <= 'F') |
|
20 return c - 'A' + 10; |
|
21 |
|
22 if (c >= 'a' && c <= 'f') |
|
23 return c - 'a' + 10; |
|
24 |
|
25 return -1; |
|
26 } |
|
27 |
|
28 /* |
|
29 * Turn a hex-encoded string into binary. |
|
30 */ |
|
31 static PyObject *unhexlify(const char *str, int len) |
|
32 { |
|
33 PyObject *ret = NULL; |
|
34 char *c, *d; |
|
35 |
|
36 if (len % 2) { |
|
37 PyErr_SetString(PyExc_ValueError, |
|
38 "input is not even in length"); |
|
39 goto bail; |
|
40 } |
|
41 |
|
42 ret = PyString_FromStringAndSize(NULL, len / 2); |
|
43 if (!ret) |
|
44 goto bail; |
|
45 |
|
46 d = PyString_AsString(ret); |
|
47 if (!d) |
|
48 goto bail; |
|
49 |
|
50 for (c = str; c < str + len;) { |
|
51 int hi = hexdigit(*c++); |
|
52 int lo = hexdigit(*c++); |
|
53 |
|
54 if (hi == -1 || lo == -1) { |
|
55 PyErr_SetString(PyExc_ValueError, |
|
56 "input contains non-hex character"); |
|
57 goto bail; |
|
58 } |
|
59 |
|
60 *d++ = (hi << 4) | lo; |
|
61 } |
|
62 |
|
63 goto done; |
|
64 |
|
65 bail: |
|
66 Py_XDECREF(ret); |
|
67 ret = NULL; |
|
68 done: |
|
69 return ret; |
|
70 } |
|
71 |
|
72 /* |
|
73 * This code assumes that a manifest is stitched together with newline |
|
74 * ('\n') characters. |
|
75 */ |
|
76 static PyObject *parse_manifest(PyObject *self, PyObject *args) |
|
77 { |
|
78 PyObject *mfdict, *fdict; |
|
79 char *str, *cur, *start, *zero; |
|
80 int len; |
|
81 |
|
82 if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", |
|
83 &PyDict_Type, &mfdict, |
|
84 &PyDict_Type, &fdict, |
|
85 &str, &len)) |
|
86 goto quit; |
|
87 |
|
88 for (start = cur = str, zero = NULL; cur < str + len; cur++) { |
|
89 PyObject *file = NULL, *node = NULL; |
|
90 PyObject *flags = NULL; |
|
91 int nlen; |
|
92 |
|
93 if (!*cur) { |
|
94 zero = cur; |
|
95 continue; |
|
96 } |
|
97 else if (*cur != '\n') |
|
98 continue; |
|
99 |
|
100 if (!zero) { |
|
101 PyErr_SetString(PyExc_ValueError, |
|
102 "manifest entry has no separator"); |
|
103 goto quit; |
|
104 } |
|
105 |
|
106 file = PyString_FromStringAndSize(start, zero - start); |
|
107 if (!file) |
|
108 goto bail; |
|
109 |
|
110 nlen = cur - zero - 1; |
|
111 |
|
112 node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen); |
|
113 if (!node) |
|
114 goto bail; |
|
115 |
|
116 if (nlen > 40) { |
|
117 PyObject *flags; |
|
118 |
|
119 flags = PyString_FromStringAndSize(zero + 41, |
|
120 nlen - 40); |
|
121 if (!flags) |
|
122 goto bail; |
|
123 |
|
124 if (PyDict_SetItem(fdict, file, flags) == -1) |
|
125 goto bail; |
|
126 } |
|
127 |
|
128 if (PyDict_SetItem(mfdict, file, node) == -1) |
|
129 goto bail; |
|
130 |
|
131 start = cur + 1; |
|
132 zero = NULL; |
|
133 |
|
134 Py_XDECREF(flags); |
|
135 Py_XDECREF(node); |
|
136 Py_XDECREF(file); |
|
137 continue; |
|
138 bail: |
|
139 Py_XDECREF(flags); |
|
140 Py_XDECREF(node); |
|
141 Py_XDECREF(file); |
|
142 goto quit; |
|
143 } |
|
144 |
|
145 if (len > 0 && *(cur - 1) != '\n') { |
|
146 PyErr_SetString(PyExc_ValueError, |
|
147 "manifest contains trailing garbage"); |
|
148 goto quit; |
|
149 } |
|
150 |
|
151 Py_INCREF(Py_None); |
|
152 return Py_None; |
|
153 |
|
154 quit: |
|
155 return NULL; |
|
156 } |
|
157 |
|
158 static char parsers_doc[] = "Efficient content parsing."; |
|
159 |
|
160 static PyMethodDef methods[] = { |
|
161 {"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"}, |
|
162 {NULL, NULL} |
|
163 }; |
|
164 |
|
165 PyMODINIT_FUNC initparsers(void) |
|
166 { |
|
167 Py_InitModule3("parsers", methods, parsers_doc); |
|
168 } |