changeset 30656 | 16f4b341288d |
parent 28528 | a011080fdb7b |
child 37594 | b1f62cd39b5c |
30655:f35397fe0c04 | 30656:16f4b341288d |
---|---|
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 */ |
29 */ |
30 |
30 |
31 #include <Python.h> |
31 #include <Python.h> |
32 #include <bytesobject.h> |
|
32 #ifdef _MSC_VER |
33 #ifdef _MSC_VER |
33 #define inline __inline |
34 #define inline __inline |
34 #include "msc_stdint.h" |
35 #if _MSC_VER >= 1800 |
36 #include <stdint.h> |
|
37 #else |
|
38 // The compiler associated with Python 2.7 on Windows doesn't ship |
|
39 // with stdint.h, so define the small subset that we use here. |
|
40 typedef __int8 int8_t; |
|
41 typedef __int16 int16_t; |
|
42 typedef __int32 int32_t; |
|
43 typedef __int64 int64_t; |
|
44 typedef unsigned __int8 uint8_t; |
|
45 typedef unsigned __int16 uint16_t; |
|
46 typedef unsigned __int32 uint32_t; |
|
47 typedef unsigned __int64 uint64_t; |
|
48 #define UINT32_MAX 4294967295U |
|
35 #endif |
49 #endif |
36 |
50 #endif |
51 |
|
52 // clang-format off |
|
37 /* Return the smallest size int that can store the value */ |
53 /* Return the smallest size int that can store the value */ |
38 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \ |
54 #define INT_SIZE(x) (((x) == ((int8_t)x)) ? 1 : \ |
39 ((x) == ((int16_t)x)) ? 2 : \ |
55 ((x) == ((int16_t)x)) ? 2 : \ |
40 ((x) == ((int32_t)x)) ? 4 : 8) |
56 ((x) == ((int32_t)x)) ? 4 : 8) |
41 |
57 |
42 #define BSER_ARRAY 0x00 |
58 #define BSER_ARRAY 0x00 |
43 #define BSER_OBJECT 0x01 |
59 #define BSER_OBJECT 0x01 |
44 #define BSER_STRING 0x02 |
60 #define BSER_BYTESTRING 0x02 |
45 #define BSER_INT8 0x03 |
61 #define BSER_INT8 0x03 |
46 #define BSER_INT16 0x04 |
62 #define BSER_INT16 0x04 |
47 #define BSER_INT32 0x05 |
63 #define BSER_INT32 0x05 |
48 #define BSER_INT64 0x06 |
64 #define BSER_INT64 0x06 |
49 #define BSER_REAL 0x07 |
65 #define BSER_REAL 0x07 |
50 #define BSER_TRUE 0x08 |
66 #define BSER_TRUE 0x08 |
51 #define BSER_FALSE 0x09 |
67 #define BSER_FALSE 0x09 |
52 #define BSER_NULL 0x0a |
68 #define BSER_NULL 0x0a |
53 #define BSER_TEMPLATE 0x0b |
69 #define BSER_TEMPLATE 0x0b |
54 #define BSER_SKIP 0x0c |
70 #define BSER_SKIP 0x0c |
71 #define BSER_UTF8STRING 0x0d |
|
72 // clang-format on |
|
55 |
73 |
56 // An immutable object representation of BSER_OBJECT. |
74 // An immutable object representation of BSER_OBJECT. |
57 // Rather than build a hash table, key -> value are obtained |
75 // Rather than build a hash table, key -> value are obtained |
58 // by walking the list of keys to determine the offset into |
76 // by walking the list of keys to determine the offset into |
59 // the values array. The assumption is that the number of |
77 // the values array. The assumption is that the number of |
62 // so that the time overhead for this is small compared to |
80 // so that the time overhead for this is small compared to |
63 // using a proper hash table. Even with this simplistic |
81 // using a proper hash table. Even with this simplistic |
64 // approach, this is still faster for the mercurial use case |
82 // approach, this is still faster for the mercurial use case |
65 // as it helps to eliminate creating N other objects to |
83 // as it helps to eliminate creating N other objects to |
66 // represent the stat information in the hgwatchman extension |
84 // represent the stat information in the hgwatchman extension |
85 // clang-format off |
|
67 typedef struct { |
86 typedef struct { |
68 PyObject_HEAD |
87 PyObject_HEAD |
69 PyObject *keys; // tuple of field names |
88 PyObject *keys; // tuple of field names |
70 PyObject *values; // tuple of values |
89 PyObject *values; // tuple of values |
71 } bserObject; |
90 } bserObject; |
72 |
91 // clang-format on |
73 static Py_ssize_t bserobj_tuple_length(PyObject *o) { |
92 |
74 bserObject *obj = (bserObject*)o; |
93 static Py_ssize_t bserobj_tuple_length(PyObject* o) { |
94 bserObject* obj = (bserObject*)o; |
|
75 |
95 |
76 return PySequence_Length(obj->keys); |
96 return PySequence_Length(obj->keys); |
77 } |
97 } |
78 |
98 |
79 static PyObject *bserobj_tuple_item(PyObject *o, Py_ssize_t i) { |
99 static PyObject* bserobj_tuple_item(PyObject* o, Py_ssize_t i) { |
80 bserObject *obj = (bserObject*)o; |
100 bserObject* obj = (bserObject*)o; |
81 |
101 |
82 return PySequence_GetItem(obj->values, i); |
102 return PySequence_GetItem(obj->values, i); |
83 } |
103 } |
84 |
104 |
105 // clang-format off |
|
85 static PySequenceMethods bserobj_sq = { |
106 static PySequenceMethods bserobj_sq = { |
86 bserobj_tuple_length, /* sq_length */ |
107 bserobj_tuple_length, /* sq_length */ |
87 0, /* sq_concat */ |
108 0, /* sq_concat */ |
88 0, /* sq_repeat */ |
109 0, /* sq_repeat */ |
89 bserobj_tuple_item, /* sq_item */ |
110 bserobj_tuple_item, /* sq_item */ |
90 0, /* sq_ass_item */ |
111 0, /* sq_ass_item */ |
91 0, /* sq_contains */ |
112 0, /* sq_contains */ |
92 0, /* sq_inplace_concat */ |
113 0, /* sq_inplace_concat */ |
93 0 /* sq_inplace_repeat */ |
114 0 /* sq_inplace_repeat */ |
94 }; |
115 }; |
95 |
116 // clang-format on |
96 static void bserobj_dealloc(PyObject *o) { |
117 |
97 bserObject *obj = (bserObject*)o; |
118 static void bserobj_dealloc(PyObject* o) { |
119 bserObject* obj = (bserObject*)o; |
|
98 |
120 |
99 Py_CLEAR(obj->keys); |
121 Py_CLEAR(obj->keys); |
100 Py_CLEAR(obj->values); |
122 Py_CLEAR(obj->values); |
101 PyObject_Del(o); |
123 PyObject_Del(o); |
102 } |
124 } |
103 |
125 |
104 static PyObject *bserobj_getattrro(PyObject *o, PyObject *name) { |
126 static PyObject* bserobj_getattrro(PyObject* o, PyObject* name) { |
105 bserObject *obj = (bserObject*)o; |
127 bserObject* obj = (bserObject*)o; |
106 Py_ssize_t i, n; |
128 Py_ssize_t i, n; |
107 const char *namestr; |
129 PyObject* name_bytes = NULL; |
130 PyObject* ret = NULL; |
|
131 const char* namestr; |
|
108 |
132 |
109 if (PyIndex_Check(name)) { |
133 if (PyIndex_Check(name)) { |
110 i = PyNumber_AsSsize_t(name, PyExc_IndexError); |
134 i = PyNumber_AsSsize_t(name, PyExc_IndexError); |
111 if (i == -1 && PyErr_Occurred()) { |
135 if (i == -1 && PyErr_Occurred()) { |
112 return NULL; |
136 goto bail; |
113 } |
137 } |
114 return PySequence_GetItem(obj->values, i); |
138 ret = PySequence_GetItem(obj->values, i); |
115 } |
139 goto bail; |
116 |
140 } |
141 |
|
142 // We can be passed in Unicode objects here -- we don't support anything other |
|
143 // than UTF-8 for keys. |
|
144 if (PyUnicode_Check(name)) { |
|
145 name_bytes = PyUnicode_AsUTF8String(name); |
|
146 if (name_bytes == NULL) { |
|
147 goto bail; |
|
148 } |
|
149 namestr = PyBytes_AsString(name_bytes); |
|
150 } else { |
|
151 namestr = PyBytes_AsString(name); |
|
152 } |
|
153 |
|
154 if (namestr == NULL) { |
|
155 goto bail; |
|
156 } |
|
117 // hack^Wfeature to allow mercurial to use "st_size" to reference "size" |
157 // hack^Wfeature to allow mercurial to use "st_size" to reference "size" |
118 namestr = PyString_AsString(name); |
|
119 if (!strncmp(namestr, "st_", 3)) { |
158 if (!strncmp(namestr, "st_", 3)) { |
120 namestr += 3; |
159 namestr += 3; |
121 } |
160 } |
122 |
161 |
123 n = PyTuple_GET_SIZE(obj->keys); |
162 n = PyTuple_GET_SIZE(obj->keys); |
124 for (i = 0; i < n; i++) { |
163 for (i = 0; i < n; i++) { |
125 const char *item_name = NULL; |
164 const char* item_name = NULL; |
126 PyObject *key = PyTuple_GET_ITEM(obj->keys, i); |
165 PyObject* key = PyTuple_GET_ITEM(obj->keys, i); |
127 |
166 |
128 item_name = PyString_AsString(key); |
167 item_name = PyBytes_AsString(key); |
129 if (!strcmp(item_name, namestr)) { |
168 if (!strcmp(item_name, namestr)) { |
130 return PySequence_GetItem(obj->values, i); |
169 ret = PySequence_GetItem(obj->values, i); |
131 } |
170 goto bail; |
132 } |
171 } |
133 PyErr_Format(PyExc_AttributeError, |
172 } |
134 "bserobject has no attribute '%.400s'", namestr); |
173 |
135 return NULL; |
174 PyErr_Format( |
136 } |
175 PyExc_AttributeError, "bserobject has no attribute '%.400s'", namestr); |
137 |
176 bail: |
177 Py_XDECREF(name_bytes); |
|
178 return ret; |
|
179 } |
|
180 |
|
181 // clang-format off |
|
138 static PyMappingMethods bserobj_map = { |
182 static PyMappingMethods bserobj_map = { |
139 bserobj_tuple_length, /* mp_length */ |
183 bserobj_tuple_length, /* mp_length */ |
140 bserobj_getattrro, /* mp_subscript */ |
184 bserobj_getattrro, /* mp_subscript */ |
141 0 /* mp_ass_subscript */ |
185 0 /* mp_ass_subscript */ |
142 }; |
186 }; |
179 0, /* tp_dictoffset */ |
223 0, /* tp_dictoffset */ |
180 0, /* tp_init */ |
224 0, /* tp_init */ |
181 0, /* tp_alloc */ |
225 0, /* tp_alloc */ |
182 0, /* tp_new */ |
226 0, /* tp_new */ |
183 }; |
227 }; |
184 |
228 // clang-format on |
185 |
229 |
186 static PyObject *bser_loads_recursive(const char **ptr, const char *end, |
230 typedef struct loads_ctx { |
187 int mutable); |
231 int mutable; |
232 const char* value_encoding; |
|
233 const char* value_errors; |
|
234 uint32_t bser_version; |
|
235 uint32_t bser_capabilities; |
|
236 } unser_ctx_t; |
|
237 |
|
238 static PyObject* |
|
239 bser_loads_recursive(const char** ptr, const char* end, const unser_ctx_t* ctx); |
|
188 |
240 |
189 static const char bser_true = BSER_TRUE; |
241 static const char bser_true = BSER_TRUE; |
190 static const char bser_false = BSER_FALSE; |
242 static const char bser_false = BSER_FALSE; |
191 static const char bser_null = BSER_NULL; |
243 static const char bser_null = BSER_NULL; |
192 static const char bser_string_hdr = BSER_STRING; |
244 static const char bser_bytestring_hdr = BSER_BYTESTRING; |
193 static const char bser_array_hdr = BSER_ARRAY; |
245 static const char bser_array_hdr = BSER_ARRAY; |
194 static const char bser_object_hdr = BSER_OBJECT; |
246 static const char bser_object_hdr = BSER_OBJECT; |
195 |
247 |
196 static inline uint32_t next_power_2(uint32_t n) |
248 static inline uint32_t next_power_2(uint32_t n) { |
197 { |
|
198 n |= (n >> 16); |
249 n |= (n >> 16); |
199 n |= (n >> 8); |
250 n |= (n >> 8); |
200 n |= (n >> 4); |
251 n |= (n >> 4); |
201 n |= (n >> 2); |
252 n |= (n >> 2); |
202 n |= (n >> 1); |
253 n |= (n >> 1); |
203 return n + 1; |
254 return n + 1; |
204 } |
255 } |
205 |
256 |
206 // A buffer we use for building up the serialized result |
257 // A buffer we use for building up the serialized result |
207 struct bser_buffer { |
258 struct bser_buffer { |
208 char *buf; |
259 char* buf; |
209 int wpos, allocd; |
260 int wpos, allocd; |
261 uint32_t bser_version; |
|
262 uint32_t capabilities; |
|
210 }; |
263 }; |
211 typedef struct bser_buffer bser_t; |
264 typedef struct bser_buffer bser_t; |
212 |
265 |
213 static int bser_append(bser_t *bser, const char *data, uint32_t len) |
266 static int bser_append(bser_t* bser, const char* data, uint32_t len) { |
214 { |
|
215 int newlen = next_power_2(bser->wpos + len); |
267 int newlen = next_power_2(bser->wpos + len); |
216 if (newlen > bser->allocd) { |
268 if (newlen > bser->allocd) { |
217 char *nbuf = realloc(bser->buf, newlen); |
269 char* nbuf = realloc(bser->buf, newlen); |
218 if (!nbuf) { |
270 if (!nbuf) { |
219 return 0; |
271 return 0; |
220 } |
272 } |
221 |
273 |
222 bser->buf = nbuf; |
274 bser->buf = nbuf; |
226 memcpy(bser->buf + bser->wpos, data, len); |
278 memcpy(bser->buf + bser->wpos, data, len); |
227 bser->wpos += len; |
279 bser->wpos += len; |
228 return 1; |
280 return 1; |
229 } |
281 } |
230 |
282 |
231 static int bser_init(bser_t *bser) |
283 static int bser_init(bser_t* bser, uint32_t version, uint32_t capabilities) { |
232 { |
|
233 bser->allocd = 8192; |
284 bser->allocd = 8192; |
234 bser->wpos = 0; |
285 bser->wpos = 0; |
235 bser->buf = malloc(bser->allocd); |
286 bser->buf = malloc(bser->allocd); |
236 |
287 bser->bser_version = version; |
288 bser->capabilities = capabilities; |
|
237 if (!bser->buf) { |
289 if (!bser->buf) { |
238 return 0; |
290 return 0; |
239 } |
291 } |
240 |
292 |
241 // Leave room for the serialization header, which includes |
293 // Leave room for the serialization header, which includes |
242 // our overall length. To make things simpler, we'll use an |
294 // our overall length. To make things simpler, we'll use an |
243 // int32 for the header |
295 // int32 for the header |
244 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00" |
296 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00" |
245 bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER)-1); |
297 |
298 // Version 2 also carries an integer indicating the capabilities. The |
|
299 // capabilities integer comes before the PDU size. |
|
300 #define EMPTY_HEADER_V2 "\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00" |
|
301 if (version == 2) { |
|
302 bser_append(bser, EMPTY_HEADER_V2, sizeof(EMPTY_HEADER_V2) - 1); |
|
303 } else { |
|
304 bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER) - 1); |
|
305 } |
|
246 |
306 |
247 return 1; |
307 return 1; |
248 } |
308 } |
249 |
309 |
250 static void bser_dtor(bser_t *bser) |
310 static void bser_dtor(bser_t* bser) { |
251 { |
|
252 free(bser->buf); |
311 free(bser->buf); |
253 bser->buf = NULL; |
312 bser->buf = NULL; |
254 } |
313 } |
255 |
314 |
256 static int bser_long(bser_t *bser, int64_t val) |
315 static int bser_long(bser_t* bser, int64_t val) { |
257 { |
|
258 int8_t i8; |
316 int8_t i8; |
259 int16_t i16; |
317 int16_t i16; |
260 int32_t i32; |
318 int32_t i32; |
261 int64_t i64; |
319 int64_t i64; |
262 char sz; |
320 char sz; |
263 int size = INT_SIZE(val); |
321 int size = INT_SIZE(val); |
264 char *iptr; |
322 char* iptr; |
265 |
323 |
266 switch (size) { |
324 switch (size) { |
267 case 1: |
325 case 1: |
268 sz = BSER_INT8; |
326 sz = BSER_INT8; |
269 i8 = (int8_t)val; |
327 i8 = (int8_t)val; |
283 sz = BSER_INT64; |
341 sz = BSER_INT64; |
284 i64 = (int64_t)val; |
342 i64 = (int64_t)val; |
285 iptr = (char*)&i64; |
343 iptr = (char*)&i64; |
286 break; |
344 break; |
287 default: |
345 default: |
288 PyErr_SetString(PyExc_RuntimeError, |
346 PyErr_SetString(PyExc_RuntimeError, "Cannot represent this long value!?"); |
289 "Cannot represent this long value!?"); |
|
290 return 0; |
347 return 0; |
291 } |
348 } |
292 |
349 |
293 if (!bser_append(bser, &sz, sizeof(sz))) { |
350 if (!bser_append(bser, &sz, sizeof(sz))) { |
294 return 0; |
351 return 0; |
295 } |
352 } |
296 |
353 |
297 return bser_append(bser, iptr, size); |
354 return bser_append(bser, iptr, size); |
298 } |
355 } |
299 |
356 |
300 static int bser_string(bser_t *bser, PyObject *sval) |
357 static int bser_bytestring(bser_t* bser, PyObject* sval) { |
301 { |
358 char* buf = NULL; |
302 char *buf = NULL; |
|
303 Py_ssize_t len; |
359 Py_ssize_t len; |
304 int res; |
360 int res; |
305 PyObject *utf = NULL; |
361 PyObject* utf = NULL; |
306 |
362 |
307 if (PyUnicode_Check(sval)) { |
363 if (PyUnicode_Check(sval)) { |
308 utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore"); |
364 utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore"); |
309 sval = utf; |
365 sval = utf; |
310 } |
366 } |
311 |
367 |
312 res = PyString_AsStringAndSize(sval, &buf, &len); |
368 res = PyBytes_AsStringAndSize(sval, &buf, &len); |
313 if (res == -1) { |
369 if (res == -1) { |
314 res = 0; |
370 res = 0; |
315 goto out; |
371 goto out; |
316 } |
372 } |
317 |
373 |
318 if (!bser_append(bser, &bser_string_hdr, sizeof(bser_string_hdr))) { |
374 if (!bser_append(bser, &bser_bytestring_hdr, sizeof(bser_bytestring_hdr))) { |
319 res = 0; |
375 res = 0; |
320 goto out; |
376 goto out; |
321 } |
377 } |
322 |
378 |
323 if (!bser_long(bser, len)) { |
379 if (!bser_long(bser, len)) { |
339 } |
395 } |
340 |
396 |
341 return res; |
397 return res; |
342 } |
398 } |
343 |
399 |
344 static int bser_recursive(bser_t *bser, PyObject *val) |
400 static int bser_recursive(bser_t* bser, PyObject* val) { |
345 { |
|
346 if (PyBool_Check(val)) { |
401 if (PyBool_Check(val)) { |
347 if (val == Py_True) { |
402 if (val == Py_True) { |
348 return bser_append(bser, &bser_true, sizeof(bser_true)); |
403 return bser_append(bser, &bser_true, sizeof(bser_true)); |
349 } |
404 } |
350 return bser_append(bser, &bser_false, sizeof(bser_false)); |
405 return bser_append(bser, &bser_false, sizeof(bser_false)); |
352 |
407 |
353 if (val == Py_None) { |
408 if (val == Py_None) { |
354 return bser_append(bser, &bser_null, sizeof(bser_null)); |
409 return bser_append(bser, &bser_null, sizeof(bser_null)); |
355 } |
410 } |
356 |
411 |
412 // Python 3 has one integer type. |
|
413 #if PY_MAJOR_VERSION < 3 |
|
357 if (PyInt_Check(val)) { |
414 if (PyInt_Check(val)) { |
358 return bser_long(bser, PyInt_AS_LONG(val)); |
415 return bser_long(bser, PyInt_AS_LONG(val)); |
359 } |
416 } |
417 #endif // PY_MAJOR_VERSION < 3 |
|
360 |
418 |
361 if (PyLong_Check(val)) { |
419 if (PyLong_Check(val)) { |
362 return bser_long(bser, PyLong_AsLongLong(val)); |
420 return bser_long(bser, PyLong_AsLongLong(val)); |
363 } |
421 } |
364 |
422 |
365 if (PyString_Check(val) || PyUnicode_Check(val)) { |
423 if (PyBytes_Check(val) || PyUnicode_Check(val)) { |
366 return bser_string(bser, val); |
424 return bser_bytestring(bser, val); |
367 } |
425 } |
368 |
|
369 |
426 |
370 if (PyFloat_Check(val)) { |
427 if (PyFloat_Check(val)) { |
371 double dval = PyFloat_AS_DOUBLE(val); |
428 double dval = PyFloat_AS_DOUBLE(val); |
372 char sz = BSER_REAL; |
429 char sz = BSER_REAL; |
373 |
430 |
388 if (!bser_long(bser, len)) { |
445 if (!bser_long(bser, len)) { |
389 return 0; |
446 return 0; |
390 } |
447 } |
391 |
448 |
392 for (i = 0; i < len; i++) { |
449 for (i = 0; i < len; i++) { |
393 PyObject *ele = PyList_GET_ITEM(val, i); |
450 PyObject* ele = PyList_GET_ITEM(val, i); |
394 |
451 |
395 if (!bser_recursive(bser, ele)) { |
452 if (!bser_recursive(bser, ele)) { |
396 return 0; |
453 return 0; |
397 } |
454 } |
398 } |
455 } |
410 if (!bser_long(bser, len)) { |
467 if (!bser_long(bser, len)) { |
411 return 0; |
468 return 0; |
412 } |
469 } |
413 |
470 |
414 for (i = 0; i < len; i++) { |
471 for (i = 0; i < len; i++) { |
415 PyObject *ele = PyTuple_GET_ITEM(val, i); |
472 PyObject* ele = PyTuple_GET_ITEM(val, i); |
416 |
473 |
417 if (!bser_recursive(bser, ele)) { |
474 if (!bser_recursive(bser, ele)) { |
418 return 0; |
475 return 0; |
419 } |
476 } |
420 } |
477 } |
434 if (!bser_long(bser, len)) { |
491 if (!bser_long(bser, len)) { |
435 return 0; |
492 return 0; |
436 } |
493 } |
437 |
494 |
438 while (PyDict_Next(val, &pos, &key, &ele)) { |
495 while (PyDict_Next(val, &pos, &key, &ele)) { |
439 if (!bser_string(bser, key)) { |
496 if (!bser_bytestring(bser, key)) { |
440 return 0; |
497 return 0; |
441 } |
498 } |
442 if (!bser_recursive(bser, ele)) { |
499 if (!bser_recursive(bser, ele)) { |
443 return 0; |
500 return 0; |
444 } |
501 } |
449 |
506 |
450 PyErr_SetString(PyExc_ValueError, "Unsupported value type"); |
507 PyErr_SetString(PyExc_ValueError, "Unsupported value type"); |
451 return 0; |
508 return 0; |
452 } |
509 } |
453 |
510 |
454 static PyObject *bser_dumps(PyObject *self, PyObject *args) |
511 static PyObject* bser_dumps(PyObject* self, PyObject* args, PyObject* kw) { |
455 { |
|
456 PyObject *val = NULL, *res; |
512 PyObject *val = NULL, *res; |
457 bser_t bser; |
513 bser_t bser; |
458 uint32_t len; |
514 uint32_t len, bser_version = 1, bser_capabilities = 0; |
459 |
515 |
460 if (!PyArg_ParseTuple(args, "O", &val)) { |
516 static char* kw_list[] = {"val", "version", "capabilities", NULL}; |
461 return NULL; |
517 |
462 } |
518 if (!PyArg_ParseTupleAndKeywords( |
463 |
519 args, |
464 if (!bser_init(&bser)) { |
520 kw, |
521 "O|ii:dumps", |
|
522 kw_list, |
|
523 &val, |
|
524 &bser_version, |
|
525 &bser_capabilities)) { |
|
526 return NULL; |
|
527 } |
|
528 |
|
529 if (!bser_init(&bser, bser_version, bser_capabilities)) { |
|
465 return PyErr_NoMemory(); |
530 return PyErr_NoMemory(); |
466 } |
531 } |
467 |
532 |
468 if (!bser_recursive(&bser, val)) { |
533 if (!bser_recursive(&bser, val)) { |
469 bser_dtor(&bser); |
534 bser_dtor(&bser); |
473 // otherwise, we've already set the error to something reasonable |
538 // otherwise, we've already set the error to something reasonable |
474 return NULL; |
539 return NULL; |
475 } |
540 } |
476 |
541 |
477 // Now fill in the overall length |
542 // Now fill in the overall length |
478 len = bser.wpos - (sizeof(EMPTY_HEADER) - 1); |
543 if (bser_version == 1) { |
479 memcpy(bser.buf + 3, &len, sizeof(len)); |
544 len = bser.wpos - (sizeof(EMPTY_HEADER) - 1); |
480 |
545 memcpy(bser.buf + 3, &len, sizeof(len)); |
481 res = PyString_FromStringAndSize(bser.buf, bser.wpos); |
546 } else { |
547 len = bser.wpos - (sizeof(EMPTY_HEADER_V2) - 1); |
|
548 // The BSER capabilities block comes before the PDU length |
|
549 memcpy(bser.buf + 2, &bser_capabilities, sizeof(bser_capabilities)); |
|
550 memcpy(bser.buf + 7, &len, sizeof(len)); |
|
551 } |
|
552 |
|
553 res = PyBytes_FromStringAndSize(bser.buf, bser.wpos); |
|
482 bser_dtor(&bser); |
554 bser_dtor(&bser); |
483 |
555 |
484 return res; |
556 return res; |
485 } |
557 } |
486 |
558 |
487 int bunser_int(const char **ptr, const char *end, int64_t *val) |
559 int bunser_int(const char** ptr, const char* end, int64_t* val) { |
488 { |
|
489 int needed; |
560 int needed; |
490 const char *buf = *ptr; |
561 const char* buf = *ptr; |
491 int8_t i8; |
562 int8_t i8; |
492 int16_t i16; |
563 int16_t i16; |
493 int32_t i32; |
564 int32_t i32; |
494 int64_t i64; |
565 int64_t i64; |
495 |
566 |
505 break; |
576 break; |
506 case BSER_INT64: |
577 case BSER_INT64: |
507 needed = 9; |
578 needed = 9; |
508 break; |
579 break; |
509 default: |
580 default: |
510 PyErr_Format(PyExc_ValueError, |
581 PyErr_Format( |
511 "invalid bser int encoding 0x%02x", buf[0]); |
582 PyExc_ValueError, "invalid bser int encoding 0x%02x", buf[0]); |
512 return 0; |
583 return 0; |
513 } |
584 } |
514 if (end - buf < needed) { |
585 if (end - buf < needed) { |
515 PyErr_SetString(PyExc_ValueError, "input buffer to small for int encoding"); |
586 PyErr_SetString(PyExc_ValueError, "input buffer to small for int encoding"); |
516 return 0; |
587 return 0; |
536 default: |
607 default: |
537 return 0; |
608 return 0; |
538 } |
609 } |
539 } |
610 } |
540 |
611 |
541 static int bunser_string(const char **ptr, const char *end, |
612 static int bunser_bytestring( |
542 const char **start, int64_t *len) |
613 const char** ptr, |
543 { |
614 const char* end, |
544 const char *buf = *ptr; |
615 const char** start, |
616 int64_t* len) { |
|
617 const char* buf = *ptr; |
|
545 |
618 |
546 // skip string marker |
619 // skip string marker |
547 buf++; |
620 buf++; |
548 if (!bunser_int(&buf, end, len)) { |
621 if (!bunser_int(&buf, end, len)) { |
549 return 0; |
622 return 0; |
557 *ptr = buf + *len; |
630 *ptr = buf + *len; |
558 *start = buf; |
631 *start = buf; |
559 return 1; |
632 return 1; |
560 } |
633 } |
561 |
634 |
562 static PyObject *bunser_array(const char **ptr, const char *end, int mutable) |
635 static PyObject* |
563 { |
636 bunser_array(const char** ptr, const char* end, const unser_ctx_t* ctx) { |
564 const char *buf = *ptr; |
637 const char* buf = *ptr; |
565 int64_t nitems, i; |
638 int64_t nitems, i; |
566 PyObject *res; |
639 int mutable = ctx->mutable; |
640 PyObject* res; |
|
567 |
641 |
568 // skip array header |
642 // skip array header |
569 buf++; |
643 buf++; |
570 if (!bunser_int(&buf, end, &nitems)) { |
644 if (!bunser_int(&buf, end, &nitems)) { |
571 return 0; |
645 return 0; |
582 } else { |
656 } else { |
583 res = PyTuple_New((Py_ssize_t)nitems); |
657 res = PyTuple_New((Py_ssize_t)nitems); |
584 } |
658 } |
585 |
659 |
586 for (i = 0; i < nitems; i++) { |
660 for (i = 0; i < nitems; i++) { |
587 PyObject *ele = bser_loads_recursive(ptr, end, mutable); |
661 PyObject* ele = bser_loads_recursive(ptr, end, ctx); |
588 |
662 |
589 if (!ele) { |
663 if (!ele) { |
590 Py_DECREF(res); |
664 Py_DECREF(res); |
591 return NULL; |
665 return NULL; |
592 } |
666 } |
600 } |
674 } |
601 |
675 |
602 return res; |
676 return res; |
603 } |
677 } |
604 |
678 |
605 static PyObject *bunser_object(const char **ptr, const char *end, |
679 static PyObject* |
606 int mutable) |
680 bunser_object(const char** ptr, const char* end, const unser_ctx_t* ctx) { |
607 { |
681 const char* buf = *ptr; |
608 const char *buf = *ptr; |
|
609 int64_t nitems, i; |
682 int64_t nitems, i; |
610 PyObject *res; |
683 int mutable = ctx->mutable; |
611 bserObject *obj; |
684 PyObject* res; |
685 bserObject* obj; |
|
612 |
686 |
613 // skip array header |
687 // skip array header |
614 buf++; |
688 buf++; |
615 if (!bunser_int(&buf, end, &nitems)) { |
689 if (!bunser_int(&buf, end, &nitems)) { |
616 return 0; |
690 return 0; |
625 obj->values = PyTuple_New((Py_ssize_t)nitems); |
699 obj->values = PyTuple_New((Py_ssize_t)nitems); |
626 res = (PyObject*)obj; |
700 res = (PyObject*)obj; |
627 } |
701 } |
628 |
702 |
629 for (i = 0; i < nitems; i++) { |
703 for (i = 0; i < nitems; i++) { |
630 const char *keystr; |
704 const char* keystr; |
631 int64_t keylen; |
705 int64_t keylen; |
632 PyObject *key; |
706 PyObject* key; |
633 PyObject *ele; |
707 PyObject* ele; |
634 |
708 |
635 if (!bunser_string(ptr, end, &keystr, &keylen)) { |
709 if (!bunser_bytestring(ptr, end, &keystr, &keylen)) { |
636 Py_DECREF(res); |
710 Py_DECREF(res); |
637 return NULL; |
711 return NULL; |
638 } |
712 } |
639 |
713 |
640 if (keylen > LONG_MAX) { |
714 if (keylen > LONG_MAX) { |
641 PyErr_Format(PyExc_ValueError, "string too big for python"); |
715 PyErr_Format(PyExc_ValueError, "string too big for python"); |
642 Py_DECREF(res); |
716 Py_DECREF(res); |
643 return NULL; |
717 return NULL; |
644 } |
718 } |
645 |
719 |
646 key = PyString_FromStringAndSize(keystr, (Py_ssize_t)keylen); |
720 if (mutable) { |
721 // This will interpret the key as UTF-8. |
|
722 key = PyUnicode_FromStringAndSize(keystr, (Py_ssize_t)keylen); |
|
723 } else { |
|
724 // For immutable objects we'll manage key lookups, so we can avoid going |
|
725 // through the Unicode APIs. This avoids a potentially expensive and |
|
726 // definitely unnecessary conversion to UTF-16 and back for Python 2. |
|
727 // TODO: On Python 3 the Unicode APIs are smarter: we might be able to use |
|
728 // Unicode keys there without an appreciable performance loss. |
|
729 key = PyBytes_FromStringAndSize(keystr, (Py_ssize_t)keylen); |
|
730 } |
|
731 |
|
647 if (!key) { |
732 if (!key) { |
648 Py_DECREF(res); |
733 Py_DECREF(res); |
649 return NULL; |
734 return NULL; |
650 } |
735 } |
651 |
736 |
652 ele = bser_loads_recursive(ptr, end, mutable); |
737 ele = bser_loads_recursive(ptr, end, ctx); |
653 |
738 |
654 if (!ele) { |
739 if (!ele) { |
655 Py_DECREF(key); |
740 Py_DECREF(key); |
656 Py_DECREF(res); |
741 Py_DECREF(res); |
657 return NULL; |
742 return NULL; |
669 } |
754 } |
670 |
755 |
671 return res; |
756 return res; |
672 } |
757 } |
673 |
758 |
674 static PyObject *bunser_template(const char **ptr, const char *end, |
759 static PyObject* |
675 int mutable) |
760 bunser_template(const char** ptr, const char* end, const unser_ctx_t* ctx) { |
676 { |
761 const char* buf = *ptr; |
677 const char *buf = *ptr; |
|
678 int64_t nitems, i; |
762 int64_t nitems, i; |
679 PyObject *arrval; |
763 int mutable = ctx->mutable; |
680 PyObject *keys; |
764 PyObject* arrval; |
765 PyObject* keys; |
|
681 Py_ssize_t numkeys, keyidx; |
766 Py_ssize_t numkeys, keyidx; |
767 unser_ctx_t keys_ctx = {0}; |
|
768 if (mutable) { |
|
769 keys_ctx.mutable = 1; |
|
770 // Decode keys as UTF-8 in this case. |
|
771 keys_ctx.value_encoding = "utf-8"; |
|
772 keys_ctx.value_errors = "strict"; |
|
773 } else { |
|
774 // Treat keys as bytestrings in this case -- we'll do Unicode conversions at |
|
775 // lookup time. |
|
776 } |
|
682 |
777 |
683 if (buf[1] != BSER_ARRAY) { |
778 if (buf[1] != BSER_ARRAY) { |
684 PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE"); |
779 PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE"); |
685 return NULL; |
780 return NULL; |
686 } |
781 } |
687 |
782 |
688 // skip header |
783 // skip header |
689 buf++; |
784 buf++; |
690 *ptr = buf; |
785 *ptr = buf; |
691 |
786 |
692 // Load template keys |
787 // Load template keys. |
693 keys = bunser_array(ptr, end, mutable); |
788 // For keys we don't want to do any decoding right now. |
789 keys = bunser_array(ptr, end, &keys_ctx); |
|
694 if (!keys) { |
790 if (!keys) { |
695 return NULL; |
791 return NULL; |
696 } |
792 } |
697 |
793 |
698 numkeys = PySequence_Length(keys); |
794 numkeys = PySequence_Length(keys); |
714 Py_DECREF(keys); |
810 Py_DECREF(keys); |
715 return NULL; |
811 return NULL; |
716 } |
812 } |
717 |
813 |
718 for (i = 0; i < nitems; i++) { |
814 for (i = 0; i < nitems; i++) { |
719 PyObject *dict = NULL; |
815 PyObject* dict = NULL; |
720 bserObject *obj = NULL; |
816 bserObject* obj = NULL; |
721 |
817 |
722 if (mutable) { |
818 if (mutable) { |
723 dict = PyDict_New(); |
819 dict = PyDict_New(); |
724 } else { |
820 } else { |
725 obj = PyObject_New(bserObject, &bserObjectType); |
821 obj = PyObject_New(bserObject, &bserObjectType); |
729 obj->values = PyTuple_New(numkeys); |
825 obj->values = PyTuple_New(numkeys); |
730 } |
826 } |
731 dict = (PyObject*)obj; |
827 dict = (PyObject*)obj; |
732 } |
828 } |
733 if (!dict) { |
829 if (!dict) { |
734 fail: |
830 fail: |
735 Py_DECREF(keys); |
831 Py_DECREF(keys); |
736 Py_DECREF(arrval); |
832 Py_DECREF(arrval); |
737 return NULL; |
833 return NULL; |
738 } |
834 } |
739 |
835 |
740 for (keyidx = 0; keyidx < numkeys; keyidx++) { |
836 for (keyidx = 0; keyidx < numkeys; keyidx++) { |
741 PyObject *key; |
837 PyObject* key; |
742 PyObject *ele; |
838 PyObject* ele; |
743 |
839 |
744 if (**ptr == BSER_SKIP) { |
840 if (**ptr == BSER_SKIP) { |
745 *ptr = *ptr + 1; |
841 *ptr = *ptr + 1; |
746 ele = Py_None; |
842 ele = Py_None; |
747 Py_INCREF(ele); |
843 Py_INCREF(ele); |
748 } else { |
844 } else { |
749 ele = bser_loads_recursive(ptr, end, mutable); |
845 ele = bser_loads_recursive(ptr, end, ctx); |
750 } |
846 } |
751 |
847 |
752 if (!ele) { |
848 if (!ele) { |
753 goto fail; |
849 goto fail; |
754 } |
850 } |
770 Py_DECREF(keys); |
866 Py_DECREF(keys); |
771 |
867 |
772 return arrval; |
868 return arrval; |
773 } |
869 } |
774 |
870 |
775 static PyObject *bser_loads_recursive(const char **ptr, const char *end, |
871 static PyObject* bser_loads_recursive( |
776 int mutable) |
872 const char** ptr, |
777 { |
873 const char* end, |
778 const char *buf = *ptr; |
874 const unser_ctx_t* ctx) { |
875 const char* buf = *ptr; |
|
779 |
876 |
780 switch (buf[0]) { |
877 switch (buf[0]) { |
781 case BSER_INT8: |
878 case BSER_INT8: |
782 case BSER_INT16: |
879 case BSER_INT16: |
783 case BSER_INT32: |
880 case BSER_INT32: |
784 case BSER_INT64: |
881 case BSER_INT64: { |
785 { |
882 int64_t ival; |
786 int64_t ival; |
883 if (!bunser_int(ptr, end, &ival)) { |
787 if (!bunser_int(ptr, end, &ival)) { |
884 return NULL; |
788 return NULL; |
885 } |
789 } |
886 // Python 3 has one integer type. |
790 if (ival < LONG_MIN || ival > LONG_MAX) { |
887 #if PY_MAJOR_VERSION >= 3 |
791 return PyLong_FromLongLong(ival); |
888 return PyLong_FromLongLong(ival); |
792 } |
889 #else |
793 return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t)); |
890 if (ival < LONG_MIN || ival > LONG_MAX) { |
794 } |
891 return PyLong_FromLongLong(ival); |
795 |
892 } |
796 case BSER_REAL: |
893 return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t)); |
797 { |
894 #endif // PY_MAJOR_VERSION >= 3 |
798 double dval; |
895 } |
799 memcpy(&dval, buf + 1, sizeof(dval)); |
896 |
800 *ptr = buf + 1 + sizeof(double); |
897 case BSER_REAL: { |
801 return PyFloat_FromDouble(dval); |
898 double dval; |
802 } |
899 memcpy(&dval, buf + 1, sizeof(dval)); |
900 *ptr = buf + 1 + sizeof(double); |
|
901 return PyFloat_FromDouble(dval); |
|
902 } |
|
803 |
903 |
804 case BSER_TRUE: |
904 case BSER_TRUE: |
805 *ptr = buf + 1; |
905 *ptr = buf + 1; |
806 Py_INCREF(Py_True); |
906 Py_INCREF(Py_True); |
807 return Py_True; |
907 return Py_True; |
814 case BSER_NULL: |
914 case BSER_NULL: |
815 *ptr = buf + 1; |
915 *ptr = buf + 1; |
816 Py_INCREF(Py_None); |
916 Py_INCREF(Py_None); |
817 return Py_None; |
917 return Py_None; |
818 |
918 |
819 case BSER_STRING: |
919 case BSER_BYTESTRING: { |
820 { |
920 const char* start; |
821 const char *start; |
921 int64_t len; |
822 int64_t len; |
922 |
823 |
923 if (!bunser_bytestring(ptr, end, &start, &len)) { |
824 if (!bunser_string(ptr, end, &start, &len)) { |
924 return NULL; |
825 return NULL; |
925 } |
826 } |
926 |
827 |
927 if (len > LONG_MAX) { |
828 if (len > LONG_MAX) { |
928 PyErr_Format(PyExc_ValueError, "string too long for python"); |
829 PyErr_Format(PyExc_ValueError, "string too long for python"); |
929 return NULL; |
830 return NULL; |
930 } |
831 } |
931 |
832 |
932 if (ctx->value_encoding != NULL) { |
833 return PyString_FromStringAndSize(start, (long)len); |
933 return PyUnicode_Decode( |
834 } |
934 start, (long)len, ctx->value_encoding, ctx->value_errors); |
935 } else { |
|
936 return PyBytes_FromStringAndSize(start, (long)len); |
|
937 } |
|
938 } |
|
939 |
|
940 case BSER_UTF8STRING: { |
|
941 const char* start; |
|
942 int64_t len; |
|
943 |
|
944 if (!bunser_bytestring(ptr, end, &start, &len)) { |
|
945 return NULL; |
|
946 } |
|
947 |
|
948 if (len > LONG_MAX) { |
|
949 PyErr_Format(PyExc_ValueError, "string too long for python"); |
|
950 return NULL; |
|
951 } |
|
952 |
|
953 return PyUnicode_Decode(start, (long)len, "utf-8", "strict"); |
|
954 } |
|
835 |
955 |
836 case BSER_ARRAY: |
956 case BSER_ARRAY: |
837 return bunser_array(ptr, end, mutable); |
957 return bunser_array(ptr, end, ctx); |
838 |
958 |
839 case BSER_OBJECT: |
959 case BSER_OBJECT: |
840 return bunser_object(ptr, end, mutable); |
960 return bunser_object(ptr, end, ctx); |
841 |
961 |
842 case BSER_TEMPLATE: |
962 case BSER_TEMPLATE: |
843 return bunser_template(ptr, end, mutable); |
963 return bunser_template(ptr, end, ctx); |
844 |
964 |
845 default: |
965 default: |
846 PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]); |
966 PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]); |
847 } |
967 } |
848 |
968 |
849 return NULL; |
969 return NULL; |
850 } |
970 } |
851 |
971 |
852 // Expected use case is to read a packet from the socket and |
972 static int _pdu_info_helper( |
853 // then call bser.pdu_len on the packet. It returns the total |
973 const char* data, |
854 // length of the entire response that the peer is sending, |
974 const char* end, |
855 // including the bytes already received. This allows the client |
975 uint32_t* bser_version_out, |
856 // to compute the data size it needs to read before it can |
976 uint32_t* bser_capabilities_out, |
857 // decode the data |
977 int64_t* expected_len_out, |
858 static PyObject *bser_pdu_len(PyObject *self, PyObject *args) |
978 off_t* position_out) { |
859 { |
979 uint32_t bser_version; |
860 const char *start = NULL; |
980 uint32_t bser_capabilities = 0; |
861 const char *data = NULL; |
981 int64_t expected_len; |
862 int datalen = 0; |
982 |
863 const char *end; |
983 const char* start; |
864 int64_t expected_len, total_len; |
984 start = data; |
865 |
|
866 if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) { |
|
867 return NULL; |
|
868 } |
|
869 data = start; |
|
870 end = data + datalen; |
|
871 |
|
872 // Validate the header and length |
985 // Validate the header and length |
873 if (memcmp(data, EMPTY_HEADER, 2) != 0) { |
986 if (memcmp(data, EMPTY_HEADER, 2) == 0) { |
987 bser_version = 1; |
|
988 } else if (memcmp(data, EMPTY_HEADER_V2, 2) == 0) { |
|
989 bser_version = 2; |
|
990 } else { |
|
874 PyErr_SetString(PyExc_ValueError, "invalid bser header"); |
991 PyErr_SetString(PyExc_ValueError, "invalid bser header"); |
875 return NULL; |
992 return 0; |
876 } |
993 } |
877 |
994 |
878 data += 2; |
995 data += 2; |
996 |
|
997 if (bser_version == 2) { |
|
998 // Expect an integer telling us what capabilities are supported by the |
|
999 // remote server (currently unused). |
|
1000 if (!memcpy(&bser_capabilities, &data, sizeof(bser_capabilities))) { |
|
1001 return 0; |
|
1002 } |
|
1003 data += sizeof(bser_capabilities); |
|
1004 } |
|
879 |
1005 |
880 // Expect an integer telling us how big the rest of the data |
1006 // Expect an integer telling us how big the rest of the data |
881 // should be |
1007 // should be |
882 if (!bunser_int(&data, end, &expected_len)) { |
1008 if (!bunser_int(&data, end, &expected_len)) { |
883 return NULL; |
1009 return 0; |
884 } |
1010 } |
885 |
1011 |
886 total_len = expected_len + (data - start); |
1012 *bser_version_out = bser_version; |
887 if (total_len > LONG_MAX) { |
1013 *bser_capabilities_out = (uint32_t)bser_capabilities; |
888 return PyLong_FromLongLong(total_len); |
1014 *expected_len_out = expected_len; |
889 } |
1015 *position_out = (off_t)(data - start); |
890 return PyInt_FromLong((long)total_len); |
1016 return 1; |
891 } |
1017 } |
892 |
1018 |
893 static PyObject *bser_loads(PyObject *self, PyObject *args) |
1019 // This function parses the PDU header and provides info about the packet |
894 { |
1020 // Returns false if unsuccessful |
895 const char *data = NULL; |
1021 static int pdu_info_helper( |
1022 PyObject* self, |
|
1023 PyObject* args, |
|
1024 uint32_t* bser_version_out, |
|
1025 uint32_t* bser_capabilities_out, |
|
1026 int64_t* total_len_out) { |
|
1027 const char* start = NULL; |
|
1028 const char* data = NULL; |
|
896 int datalen = 0; |
1029 int datalen = 0; |
897 const char *end; |
1030 const char* end; |
898 int64_t expected_len; |
1031 int64_t expected_len; |
899 int mutable = 1; |
1032 off_t position; |
900 PyObject *mutable_obj = NULL; |
1033 |
901 |
1034 if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) { |
902 if (!PyArg_ParseTuple(args, "s#|O:loads", &data, &datalen, &mutable_obj)) { |
1035 return 0; |
903 return NULL; |
1036 } |
904 } |
1037 data = start; |
1038 end = data + datalen; |
|
1039 |
|
1040 if (!_pdu_info_helper( |
|
1041 data, |
|
1042 end, |
|
1043 bser_version_out, |
|
1044 bser_capabilities_out, |
|
1045 &expected_len, |
|
1046 &position)) { |
|
1047 return 0; |
|
1048 } |
|
1049 *total_len_out = (int64_t)(expected_len + position); |
|
1050 return 1; |
|
1051 } |
|
1052 |
|
1053 // Expected use case is to read a packet from the socket and then call |
|
1054 // bser.pdu_info on the packet. It returns the BSER version, BSER capabilities, |
|
1055 // and the total length of the entire response that the peer is sending, |
|
1056 // including the bytes already received. This allows the client to compute the |
|
1057 // data size it needs to read before it can decode the data. |
|
1058 static PyObject* bser_pdu_info(PyObject* self, PyObject* args) { |
|
1059 uint32_t version, capabilities; |
|
1060 int64_t total_len; |
|
1061 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) { |
|
1062 return NULL; |
|
1063 } |
|
1064 return Py_BuildValue("kkL", version, capabilities, total_len); |
|
1065 } |
|
1066 |
|
1067 static PyObject* bser_pdu_len(PyObject* self, PyObject* args) { |
|
1068 uint32_t version, capabilities; |
|
1069 int64_t total_len; |
|
1070 if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) { |
|
1071 return NULL; |
|
1072 } |
|
1073 return Py_BuildValue("L", total_len); |
|
1074 } |
|
1075 |
|
1076 static PyObject* bser_loads(PyObject* self, PyObject* args, PyObject* kw) { |
|
1077 const char* data = NULL; |
|
1078 int datalen = 0; |
|
1079 const char* start; |
|
1080 const char* end; |
|
1081 int64_t expected_len; |
|
1082 off_t position; |
|
1083 PyObject* mutable_obj = NULL; |
|
1084 const char* value_encoding = NULL; |
|
1085 const char* value_errors = NULL; |
|
1086 unser_ctx_t ctx = {1, 0}; |
|
1087 |
|
1088 static char* kw_list[] = { |
|
1089 "buf", "mutable", "value_encoding", "value_errors", NULL}; |
|
1090 |
|
1091 if (!PyArg_ParseTupleAndKeywords( |
|
1092 args, |
|
1093 kw, |
|
1094 "s#|Ozz:loads", |
|
1095 kw_list, |
|
1096 &start, |
|
1097 &datalen, |
|
1098 &mutable_obj, |
|
1099 &value_encoding, |
|
1100 &value_errors)) { |
|
1101 return NULL; |
|
1102 } |
|
1103 |
|
905 if (mutable_obj) { |
1104 if (mutable_obj) { |
906 mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0; |
1105 ctx.mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0; |
907 } |
1106 } |
908 |
1107 ctx.value_encoding = value_encoding; |
1108 if (value_encoding == NULL) { |
|
1109 ctx.value_errors = NULL; |
|
1110 } else if (value_errors == NULL) { |
|
1111 ctx.value_errors = "strict"; |
|
1112 } else { |
|
1113 ctx.value_errors = value_errors; |
|
1114 } |
|
1115 data = start; |
|
909 end = data + datalen; |
1116 end = data + datalen; |
910 |
1117 |
911 // Validate the header and length |
1118 if (!_pdu_info_helper( |
912 if (memcmp(data, EMPTY_HEADER, 2) != 0) { |
1119 data, |
913 PyErr_SetString(PyExc_ValueError, "invalid bser header"); |
1120 end, |
914 return NULL; |
1121 &ctx.bser_version, |
915 } |
1122 &ctx.bser_capabilities, |
916 |
1123 &expected_len, |
917 data += 2; |
1124 &position)) { |
918 |
1125 return NULL; |
919 // Expect an integer telling us how big the rest of the data |
1126 } |
920 // should be |
1127 |
921 if (!bunser_int(&data, end, &expected_len)) { |
1128 data = start + position; |
922 return NULL; |
|
923 } |
|
924 |
|
925 // Verify |
1129 // Verify |
926 if (expected_len + data != end) { |
1130 if (expected_len + data != end) { |
927 PyErr_SetString(PyExc_ValueError, "bser data len != header len"); |
1131 PyErr_SetString(PyExc_ValueError, "bser data len != header len"); |
928 return NULL; |
1132 return NULL; |
929 } |
1133 } |
930 |
1134 |
931 return bser_loads_recursive(&data, end, mutable); |
1135 return bser_loads_recursive(&data, end, &ctx); |
932 } |
1136 } |
933 |
1137 |
1138 static PyObject* bser_load(PyObject* self, PyObject* args, PyObject* kw) { |
|
1139 PyObject *load, *string; |
|
1140 PyObject* fp = NULL; |
|
1141 PyObject* mutable_obj = NULL; |
|
1142 const char* value_encoding = NULL; |
|
1143 const char* value_errors = NULL; |
|
1144 |
|
1145 static char* kw_list[] = { |
|
1146 "fp", "mutable", "value_encoding", "value_errors", NULL}; |
|
1147 |
|
1148 if (!PyArg_ParseTupleAndKeywords( |
|
1149 args, |
|
1150 kw, |
|
1151 "OOzz:load", |
|
1152 kw_list, |
|
1153 &fp, |
|
1154 &mutable_obj, |
|
1155 &value_encoding, |
|
1156 &value_errors)) { |
|
1157 return NULL; |
|
1158 } |
|
1159 |
|
1160 load = PyImport_ImportModule("pywatchman.load"); |
|
1161 if (load == NULL) { |
|
1162 return NULL; |
|
1163 } |
|
1164 string = PyObject_CallMethod( |
|
1165 load, "load", "OOzz", fp, mutable_obj, value_encoding, value_errors); |
|
1166 Py_DECREF(load); |
|
1167 return string; |
|
1168 } |
|
1169 |
|
1170 // clang-format off |
|
934 static PyMethodDef bser_methods[] = { |
1171 static PyMethodDef bser_methods[] = { |
935 {"loads", bser_loads, METH_VARARGS, "Deserialize string."}, |
1172 {"loads", (PyCFunction)bser_loads, METH_VARARGS | METH_KEYWORDS, |
936 {"pdu_len", bser_pdu_len, METH_VARARGS, "Extract PDU length."}, |
1173 "Deserialize string."}, |
937 {"dumps", bser_dumps, METH_VARARGS, "Serialize string."}, |
1174 {"load", (PyCFunction)bser_load, METH_VARARGS | METH_KEYWORDS, |
1175 "Deserialize a file object"}, |
|
1176 {"pdu_info", (PyCFunction)bser_pdu_info, METH_VARARGS, |
|
1177 "Extract PDU information."}, |
|
1178 {"pdu_len", (PyCFunction)bser_pdu_len, METH_VARARGS, |
|
1179 "Extract total PDU length."}, |
|
1180 {"dumps", (PyCFunction)bser_dumps, METH_VARARGS | METH_KEYWORDS, |
|
1181 "Serialize string."}, |
|
938 {NULL, NULL, 0, NULL} |
1182 {NULL, NULL, 0, NULL} |
939 }; |
1183 }; |
940 |
1184 |
941 PyMODINIT_FUNC initbser(void) |
1185 #if PY_MAJOR_VERSION >= 3 |
942 { |
1186 static struct PyModuleDef bser_module = { |
1187 PyModuleDef_HEAD_INIT, |
|
1188 "bser", |
|
1189 "Efficient encoding and decoding of BSER.", |
|
1190 -1, |
|
1191 bser_methods |
|
1192 }; |
|
1193 // clang-format on |
|
1194 |
|
1195 PyMODINIT_FUNC PyInit_bser(void) { |
|
1196 PyObject* mod; |
|
1197 |
|
1198 mod = PyModule_Create(&bser_module); |
|
1199 PyType_Ready(&bserObjectType); |
|
1200 |
|
1201 return mod; |
|
1202 } |
|
1203 #else |
|
1204 |
|
1205 PyMODINIT_FUNC initbser(void) { |
|
943 (void)Py_InitModule("bser", bser_methods); |
1206 (void)Py_InitModule("bser", bser_methods); |
944 PyType_Ready(&bserObjectType); |
1207 PyType_Ready(&bserObjectType); |
945 } |
1208 } |
1209 #endif // PY_MAJOR_VERSION >= 3 |
|
946 |
1210 |
947 /* vim:ts=2:sw=2:et: |
1211 /* vim:ts=2:sw=2:et: |
948 */ |
1212 */ |
949 |
|
950 // no-check-code -- this is a 3rd party library |