hgext/fsmonitor/pywatchman/bser.c
changeset 30656 16f4b341288d
parent 28528 a011080fdb7b
child 37594 b1f62cd39b5c
equal deleted inserted replaced
30655:f35397fe0c04 30656:16f4b341288d
    27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29 */
    29 */
    30 
    30 
    31 #include <Python.h>
    31 #include <Python.h>
       
    32 #include <bytesobject.h>
    32 #ifdef _MSC_VER
    33 #ifdef _MSC_VER
    33 #define inline __inline
    34 #define inline __inline
    34 #include "msc_stdint.h"
    35 #if _MSC_VER >= 1800
       
    36 #include <stdint.h>
       
    37 #else
       
    38 // The compiler associated with Python 2.7 on Windows doesn't ship
       
    39 // with stdint.h, so define the small subset that we use here.
       
    40 typedef __int8 int8_t;
       
    41 typedef __int16 int16_t;
       
    42 typedef __int32 int32_t;
       
    43 typedef __int64 int64_t;
       
    44 typedef unsigned __int8 uint8_t;
       
    45 typedef unsigned __int16 uint16_t;
       
    46 typedef unsigned __int32 uint32_t;
       
    47 typedef unsigned __int64 uint64_t;
       
    48 #define UINT32_MAX 4294967295U
    35 #endif
    49 #endif
    36 
    50 #endif
       
    51 
       
    52 // clang-format off
    37 /* Return the smallest size int that can store the value */
    53 /* Return the smallest size int that can store the value */
    38 #define INT_SIZE(x) (((x) == ((int8_t)x))  ? 1 :    \
    54 #define INT_SIZE(x) (((x) == ((int8_t)x))  ? 1 :    \
    39                      ((x) == ((int16_t)x)) ? 2 :    \
    55                      ((x) == ((int16_t)x)) ? 2 :    \
    40                      ((x) == ((int32_t)x)) ? 4 : 8)
    56                      ((x) == ((int32_t)x)) ? 4 : 8)
    41 
    57 
    42 #define BSER_ARRAY     0x00
    58 #define BSER_ARRAY     0x00
    43 #define BSER_OBJECT    0x01
    59 #define BSER_OBJECT    0x01
    44 #define BSER_STRING    0x02
    60 #define BSER_BYTESTRING 0x02
    45 #define BSER_INT8      0x03
    61 #define BSER_INT8      0x03
    46 #define BSER_INT16     0x04
    62 #define BSER_INT16     0x04
    47 #define BSER_INT32     0x05
    63 #define BSER_INT32     0x05
    48 #define BSER_INT64     0x06
    64 #define BSER_INT64     0x06
    49 #define BSER_REAL      0x07
    65 #define BSER_REAL      0x07
    50 #define BSER_TRUE      0x08
    66 #define BSER_TRUE      0x08
    51 #define BSER_FALSE     0x09
    67 #define BSER_FALSE     0x09
    52 #define BSER_NULL      0x0a
    68 #define BSER_NULL      0x0a
    53 #define BSER_TEMPLATE  0x0b
    69 #define BSER_TEMPLATE  0x0b
    54 #define BSER_SKIP      0x0c
    70 #define BSER_SKIP      0x0c
       
    71 #define BSER_UTF8STRING 0x0d
       
    72 // clang-format on
    55 
    73 
    56 // An immutable object representation of BSER_OBJECT.
    74 // An immutable object representation of BSER_OBJECT.
    57 // Rather than build a hash table, key -> value are obtained
    75 // Rather than build a hash table, key -> value are obtained
    58 // by walking the list of keys to determine the offset into
    76 // by walking the list of keys to determine the offset into
    59 // the values array.  The assumption is that the number of
    77 // the values array.  The assumption is that the number of
    62 // so that the time overhead for this is small compared to
    80 // so that the time overhead for this is small compared to
    63 // using a proper hash table.  Even with this simplistic
    81 // using a proper hash table.  Even with this simplistic
    64 // approach, this is still faster for the mercurial use case
    82 // approach, this is still faster for the mercurial use case
    65 // as it helps to eliminate creating N other objects to
    83 // as it helps to eliminate creating N other objects to
    66 // represent the stat information in the hgwatchman extension
    84 // represent the stat information in the hgwatchman extension
       
    85 // clang-format off
    67 typedef struct {
    86 typedef struct {
    68   PyObject_HEAD
    87   PyObject_HEAD
    69   PyObject *keys;   // tuple of field names
    88   PyObject *keys;   // tuple of field names
    70   PyObject *values; // tuple of values
    89   PyObject *values; // tuple of values
    71 } bserObject;
    90 } bserObject;
    72 
    91 // clang-format on
    73 static Py_ssize_t bserobj_tuple_length(PyObject *o) {
    92 
    74   bserObject *obj = (bserObject*)o;
    93 static Py_ssize_t bserobj_tuple_length(PyObject* o) {
       
    94   bserObject* obj = (bserObject*)o;
    75 
    95 
    76   return PySequence_Length(obj->keys);
    96   return PySequence_Length(obj->keys);
    77 }
    97 }
    78 
    98 
    79 static PyObject *bserobj_tuple_item(PyObject *o, Py_ssize_t i) {
    99 static PyObject* bserobj_tuple_item(PyObject* o, Py_ssize_t i) {
    80   bserObject *obj = (bserObject*)o;
   100   bserObject* obj = (bserObject*)o;
    81 
   101 
    82   return PySequence_GetItem(obj->values, i);
   102   return PySequence_GetItem(obj->values, i);
    83 }
   103 }
    84 
   104 
       
   105 // clang-format off
    85 static PySequenceMethods bserobj_sq = {
   106 static PySequenceMethods bserobj_sq = {
    86   bserobj_tuple_length,      /* sq_length */
   107   bserobj_tuple_length,      /* sq_length */
    87   0,                         /* sq_concat */
   108   0,                         /* sq_concat */
    88   0,                         /* sq_repeat */
   109   0,                         /* sq_repeat */
    89   bserobj_tuple_item,        /* sq_item */
   110   bserobj_tuple_item,        /* sq_item */
    90   0,                         /* sq_ass_item */
   111   0,                         /* sq_ass_item */
    91   0,                         /* sq_contains */
   112   0,                         /* sq_contains */
    92   0,                         /* sq_inplace_concat */
   113   0,                         /* sq_inplace_concat */
    93   0                          /* sq_inplace_repeat */
   114   0                          /* sq_inplace_repeat */
    94 };
   115 };
    95 
   116 // clang-format on
    96 static void bserobj_dealloc(PyObject *o) {
   117 
    97   bserObject *obj = (bserObject*)o;
   118 static void bserobj_dealloc(PyObject* o) {
       
   119   bserObject* obj = (bserObject*)o;
    98 
   120 
    99   Py_CLEAR(obj->keys);
   121   Py_CLEAR(obj->keys);
   100   Py_CLEAR(obj->values);
   122   Py_CLEAR(obj->values);
   101   PyObject_Del(o);
   123   PyObject_Del(o);
   102 }
   124 }
   103 
   125 
   104 static PyObject *bserobj_getattrro(PyObject *o, PyObject *name) {
   126 static PyObject* bserobj_getattrro(PyObject* o, PyObject* name) {
   105   bserObject *obj = (bserObject*)o;
   127   bserObject* obj = (bserObject*)o;
   106   Py_ssize_t i, n;
   128   Py_ssize_t i, n;
   107   const char *namestr;
   129   PyObject* name_bytes = NULL;
       
   130   PyObject* ret = NULL;
       
   131   const char* namestr;
   108 
   132 
   109   if (PyIndex_Check(name)) {
   133   if (PyIndex_Check(name)) {
   110     i = PyNumber_AsSsize_t(name, PyExc_IndexError);
   134     i = PyNumber_AsSsize_t(name, PyExc_IndexError);
   111     if (i == -1 && PyErr_Occurred()) {
   135     if (i == -1 && PyErr_Occurred()) {
   112       return NULL;
   136       goto bail;
   113     }
   137     }
   114     return PySequence_GetItem(obj->values, i);
   138     ret = PySequence_GetItem(obj->values, i);
   115   }
   139     goto bail;
   116 
   140   }
       
   141 
       
   142   // We can be passed in Unicode objects here -- we don't support anything other
       
   143   // than UTF-8 for keys.
       
   144   if (PyUnicode_Check(name)) {
       
   145     name_bytes = PyUnicode_AsUTF8String(name);
       
   146     if (name_bytes == NULL) {
       
   147       goto bail;
       
   148     }
       
   149     namestr = PyBytes_AsString(name_bytes);
       
   150   } else {
       
   151     namestr = PyBytes_AsString(name);
       
   152   }
       
   153 
       
   154   if (namestr == NULL) {
       
   155     goto bail;
       
   156   }
   117   // hack^Wfeature to allow mercurial to use "st_size" to reference "size"
   157   // hack^Wfeature to allow mercurial to use "st_size" to reference "size"
   118   namestr = PyString_AsString(name);
       
   119   if (!strncmp(namestr, "st_", 3)) {
   158   if (!strncmp(namestr, "st_", 3)) {
   120     namestr += 3;
   159     namestr += 3;
   121   }
   160   }
   122 
   161 
   123   n = PyTuple_GET_SIZE(obj->keys);
   162   n = PyTuple_GET_SIZE(obj->keys);
   124   for (i = 0; i < n; i++) {
   163   for (i = 0; i < n; i++) {
   125     const char *item_name = NULL;
   164     const char* item_name = NULL;
   126     PyObject *key = PyTuple_GET_ITEM(obj->keys, i);
   165     PyObject* key = PyTuple_GET_ITEM(obj->keys, i);
   127 
   166 
   128     item_name = PyString_AsString(key);
   167     item_name = PyBytes_AsString(key);
   129     if (!strcmp(item_name, namestr)) {
   168     if (!strcmp(item_name, namestr)) {
   130       return PySequence_GetItem(obj->values, i);
   169       ret = PySequence_GetItem(obj->values, i);
   131     }
   170       goto bail;
   132   }
   171     }
   133   PyErr_Format(PyExc_AttributeError,
   172   }
   134               "bserobject has no attribute '%.400s'", namestr);
   173 
   135   return NULL;
   174   PyErr_Format(
   136 }
   175       PyExc_AttributeError, "bserobject has no attribute '%.400s'", namestr);
   137 
   176 bail:
       
   177   Py_XDECREF(name_bytes);
       
   178   return ret;
       
   179 }
       
   180 
       
   181 // clang-format off
   138 static PyMappingMethods bserobj_map = {
   182 static PyMappingMethods bserobj_map = {
   139   bserobj_tuple_length,     /* mp_length */
   183   bserobj_tuple_length,     /* mp_length */
   140   bserobj_getattrro,        /* mp_subscript */
   184   bserobj_getattrro,        /* mp_subscript */
   141   0                         /* mp_ass_subscript */
   185   0                         /* mp_ass_subscript */
   142 };
   186 };
   179   0,                         /* tp_dictoffset */
   223   0,                         /* tp_dictoffset */
   180   0,                         /* tp_init */
   224   0,                         /* tp_init */
   181   0,                         /* tp_alloc */
   225   0,                         /* tp_alloc */
   182   0,                         /* tp_new */
   226   0,                         /* tp_new */
   183 };
   227 };
   184 
   228 // clang-format on
   185 
   229 
   186 static PyObject *bser_loads_recursive(const char **ptr, const char *end,
   230 typedef struct loads_ctx {
   187     int mutable);
   231   int mutable;
       
   232   const char* value_encoding;
       
   233   const char* value_errors;
       
   234   uint32_t bser_version;
       
   235   uint32_t bser_capabilities;
       
   236 } unser_ctx_t;
       
   237 
       
   238 static PyObject*
       
   239 bser_loads_recursive(const char** ptr, const char* end, const unser_ctx_t* ctx);
   188 
   240 
   189 static const char bser_true = BSER_TRUE;
   241 static const char bser_true = BSER_TRUE;
   190 static const char bser_false = BSER_FALSE;
   242 static const char bser_false = BSER_FALSE;
   191 static const char bser_null = BSER_NULL;
   243 static const char bser_null = BSER_NULL;
   192 static const char bser_string_hdr = BSER_STRING;
   244 static const char bser_bytestring_hdr = BSER_BYTESTRING;
   193 static const char bser_array_hdr = BSER_ARRAY;
   245 static const char bser_array_hdr = BSER_ARRAY;
   194 static const char bser_object_hdr = BSER_OBJECT;
   246 static const char bser_object_hdr = BSER_OBJECT;
   195 
   247 
   196 static inline uint32_t next_power_2(uint32_t n)
   248 static inline uint32_t next_power_2(uint32_t n) {
   197 {
       
   198   n |= (n >> 16);
   249   n |= (n >> 16);
   199   n |= (n >> 8);
   250   n |= (n >> 8);
   200   n |= (n >> 4);
   251   n |= (n >> 4);
   201   n |= (n >> 2);
   252   n |= (n >> 2);
   202   n |= (n >> 1);
   253   n |= (n >> 1);
   203   return n + 1;
   254   return n + 1;
   204 }
   255 }
   205 
   256 
   206 // A buffer we use for building up the serialized result
   257 // A buffer we use for building up the serialized result
   207 struct bser_buffer {
   258 struct bser_buffer {
   208   char *buf;
   259   char* buf;
   209   int wpos, allocd;
   260   int wpos, allocd;
       
   261   uint32_t bser_version;
       
   262   uint32_t capabilities;
   210 };
   263 };
   211 typedef struct bser_buffer bser_t;
   264 typedef struct bser_buffer bser_t;
   212 
   265 
   213 static int bser_append(bser_t *bser, const char *data, uint32_t len)
   266 static int bser_append(bser_t* bser, const char* data, uint32_t len) {
   214 {
       
   215   int newlen = next_power_2(bser->wpos + len);
   267   int newlen = next_power_2(bser->wpos + len);
   216   if (newlen > bser->allocd) {
   268   if (newlen > bser->allocd) {
   217     char *nbuf = realloc(bser->buf, newlen);
   269     char* nbuf = realloc(bser->buf, newlen);
   218     if (!nbuf) {
   270     if (!nbuf) {
   219       return 0;
   271       return 0;
   220     }
   272     }
   221 
   273 
   222     bser->buf = nbuf;
   274     bser->buf = nbuf;
   226   memcpy(bser->buf + bser->wpos, data, len);
   278   memcpy(bser->buf + bser->wpos, data, len);
   227   bser->wpos += len;
   279   bser->wpos += len;
   228   return 1;
   280   return 1;
   229 }
   281 }
   230 
   282 
   231 static int bser_init(bser_t *bser)
   283 static int bser_init(bser_t* bser, uint32_t version, uint32_t capabilities) {
   232 {
       
   233   bser->allocd = 8192;
   284   bser->allocd = 8192;
   234   bser->wpos = 0;
   285   bser->wpos = 0;
   235   bser->buf = malloc(bser->allocd);
   286   bser->buf = malloc(bser->allocd);
   236 
   287   bser->bser_version = version;
       
   288   bser->capabilities = capabilities;
   237   if (!bser->buf) {
   289   if (!bser->buf) {
   238     return 0;
   290     return 0;
   239   }
   291   }
   240 
   292 
   241   // Leave room for the serialization header, which includes
   293 // Leave room for the serialization header, which includes
   242   // our overall length.  To make things simpler, we'll use an
   294 // our overall length.  To make things simpler, we'll use an
   243   // int32 for the header
   295 // int32 for the header
   244 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00"
   296 #define EMPTY_HEADER "\x00\x01\x05\x00\x00\x00\x00"
   245   bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER)-1);
   297 
       
   298 // Version 2 also carries an integer indicating the capabilities. The
       
   299 // capabilities integer comes before the PDU size.
       
   300 #define EMPTY_HEADER_V2 "\x00\x02\x00\x00\x00\x00\x05\x00\x00\x00\x00"
       
   301   if (version == 2) {
       
   302     bser_append(bser, EMPTY_HEADER_V2, sizeof(EMPTY_HEADER_V2) - 1);
       
   303   } else {
       
   304     bser_append(bser, EMPTY_HEADER, sizeof(EMPTY_HEADER) - 1);
       
   305   }
   246 
   306 
   247   return 1;
   307   return 1;
   248 }
   308 }
   249 
   309 
   250 static void bser_dtor(bser_t *bser)
   310 static void bser_dtor(bser_t* bser) {
   251 {
       
   252   free(bser->buf);
   311   free(bser->buf);
   253   bser->buf = NULL;
   312   bser->buf = NULL;
   254 }
   313 }
   255 
   314 
   256 static int bser_long(bser_t *bser, int64_t val)
   315 static int bser_long(bser_t* bser, int64_t val) {
   257 {
       
   258   int8_t i8;
   316   int8_t i8;
   259   int16_t i16;
   317   int16_t i16;
   260   int32_t i32;
   318   int32_t i32;
   261   int64_t i64;
   319   int64_t i64;
   262   char sz;
   320   char sz;
   263   int size = INT_SIZE(val);
   321   int size = INT_SIZE(val);
   264   char *iptr;
   322   char* iptr;
   265 
   323 
   266   switch (size) {
   324   switch (size) {
   267     case 1:
   325     case 1:
   268       sz = BSER_INT8;
   326       sz = BSER_INT8;
   269       i8 = (int8_t)val;
   327       i8 = (int8_t)val;
   283       sz = BSER_INT64;
   341       sz = BSER_INT64;
   284       i64 = (int64_t)val;
   342       i64 = (int64_t)val;
   285       iptr = (char*)&i64;
   343       iptr = (char*)&i64;
   286       break;
   344       break;
   287     default:
   345     default:
   288       PyErr_SetString(PyExc_RuntimeError,
   346       PyErr_SetString(PyExc_RuntimeError, "Cannot represent this long value!?");
   289           "Cannot represent this long value!?");
       
   290       return 0;
   347       return 0;
   291   }
   348   }
   292 
   349 
   293   if (!bser_append(bser, &sz, sizeof(sz))) {
   350   if (!bser_append(bser, &sz, sizeof(sz))) {
   294     return 0;
   351     return 0;
   295   }
   352   }
   296 
   353 
   297   return bser_append(bser, iptr, size);
   354   return bser_append(bser, iptr, size);
   298 }
   355 }
   299 
   356 
   300 static int bser_string(bser_t *bser, PyObject *sval)
   357 static int bser_bytestring(bser_t* bser, PyObject* sval) {
   301 {
   358   char* buf = NULL;
   302   char *buf = NULL;
       
   303   Py_ssize_t len;
   359   Py_ssize_t len;
   304   int res;
   360   int res;
   305   PyObject *utf = NULL;
   361   PyObject* utf = NULL;
   306 
   362 
   307   if (PyUnicode_Check(sval)) {
   363   if (PyUnicode_Check(sval)) {
   308     utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore");
   364     utf = PyUnicode_AsEncodedString(sval, "utf-8", "ignore");
   309     sval = utf;
   365     sval = utf;
   310   }
   366   }
   311 
   367 
   312   res = PyString_AsStringAndSize(sval, &buf, &len);
   368   res = PyBytes_AsStringAndSize(sval, &buf, &len);
   313   if (res == -1) {
   369   if (res == -1) {
   314     res = 0;
   370     res = 0;
   315     goto out;
   371     goto out;
   316   }
   372   }
   317 
   373 
   318   if (!bser_append(bser, &bser_string_hdr, sizeof(bser_string_hdr))) {
   374   if (!bser_append(bser, &bser_bytestring_hdr, sizeof(bser_bytestring_hdr))) {
   319     res = 0;
   375     res = 0;
   320     goto out;
   376     goto out;
   321   }
   377   }
   322 
   378 
   323   if (!bser_long(bser, len)) {
   379   if (!bser_long(bser, len)) {
   339   }
   395   }
   340 
   396 
   341   return res;
   397   return res;
   342 }
   398 }
   343 
   399 
   344 static int bser_recursive(bser_t *bser, PyObject *val)
   400 static int bser_recursive(bser_t* bser, PyObject* val) {
   345 {
       
   346   if (PyBool_Check(val)) {
   401   if (PyBool_Check(val)) {
   347     if (val == Py_True) {
   402     if (val == Py_True) {
   348       return bser_append(bser, &bser_true, sizeof(bser_true));
   403       return bser_append(bser, &bser_true, sizeof(bser_true));
   349     }
   404     }
   350     return bser_append(bser, &bser_false, sizeof(bser_false));
   405     return bser_append(bser, &bser_false, sizeof(bser_false));
   352 
   407 
   353   if (val == Py_None) {
   408   if (val == Py_None) {
   354     return bser_append(bser, &bser_null, sizeof(bser_null));
   409     return bser_append(bser, &bser_null, sizeof(bser_null));
   355   }
   410   }
   356 
   411 
       
   412 // Python 3 has one integer type.
       
   413 #if PY_MAJOR_VERSION < 3
   357   if (PyInt_Check(val)) {
   414   if (PyInt_Check(val)) {
   358     return bser_long(bser, PyInt_AS_LONG(val));
   415     return bser_long(bser, PyInt_AS_LONG(val));
   359   }
   416   }
       
   417 #endif // PY_MAJOR_VERSION < 3
   360 
   418 
   361   if (PyLong_Check(val)) {
   419   if (PyLong_Check(val)) {
   362     return bser_long(bser, PyLong_AsLongLong(val));
   420     return bser_long(bser, PyLong_AsLongLong(val));
   363   }
   421   }
   364 
   422 
   365   if (PyString_Check(val) || PyUnicode_Check(val)) {
   423   if (PyBytes_Check(val) || PyUnicode_Check(val)) {
   366     return bser_string(bser, val);
   424     return bser_bytestring(bser, val);
   367   }
   425   }
   368 
       
   369 
   426 
   370   if (PyFloat_Check(val)) {
   427   if (PyFloat_Check(val)) {
   371     double dval = PyFloat_AS_DOUBLE(val);
   428     double dval = PyFloat_AS_DOUBLE(val);
   372     char sz = BSER_REAL;
   429     char sz = BSER_REAL;
   373 
   430 
   388     if (!bser_long(bser, len)) {
   445     if (!bser_long(bser, len)) {
   389       return 0;
   446       return 0;
   390     }
   447     }
   391 
   448 
   392     for (i = 0; i < len; i++) {
   449     for (i = 0; i < len; i++) {
   393       PyObject *ele = PyList_GET_ITEM(val, i);
   450       PyObject* ele = PyList_GET_ITEM(val, i);
   394 
   451 
   395       if (!bser_recursive(bser, ele)) {
   452       if (!bser_recursive(bser, ele)) {
   396         return 0;
   453         return 0;
   397       }
   454       }
   398     }
   455     }
   410     if (!bser_long(bser, len)) {
   467     if (!bser_long(bser, len)) {
   411       return 0;
   468       return 0;
   412     }
   469     }
   413 
   470 
   414     for (i = 0; i < len; i++) {
   471     for (i = 0; i < len; i++) {
   415       PyObject *ele = PyTuple_GET_ITEM(val, i);
   472       PyObject* ele = PyTuple_GET_ITEM(val, i);
   416 
   473 
   417       if (!bser_recursive(bser, ele)) {
   474       if (!bser_recursive(bser, ele)) {
   418         return 0;
   475         return 0;
   419       }
   476       }
   420     }
   477     }
   434     if (!bser_long(bser, len)) {
   491     if (!bser_long(bser, len)) {
   435       return 0;
   492       return 0;
   436     }
   493     }
   437 
   494 
   438     while (PyDict_Next(val, &pos, &key, &ele)) {
   495     while (PyDict_Next(val, &pos, &key, &ele)) {
   439       if (!bser_string(bser, key)) {
   496       if (!bser_bytestring(bser, key)) {
   440         return 0;
   497         return 0;
   441       }
   498       }
   442       if (!bser_recursive(bser, ele)) {
   499       if (!bser_recursive(bser, ele)) {
   443         return 0;
   500         return 0;
   444       }
   501       }
   449 
   506 
   450   PyErr_SetString(PyExc_ValueError, "Unsupported value type");
   507   PyErr_SetString(PyExc_ValueError, "Unsupported value type");
   451   return 0;
   508   return 0;
   452 }
   509 }
   453 
   510 
   454 static PyObject *bser_dumps(PyObject *self, PyObject *args)
   511 static PyObject* bser_dumps(PyObject* self, PyObject* args, PyObject* kw) {
   455 {
       
   456   PyObject *val = NULL, *res;
   512   PyObject *val = NULL, *res;
   457   bser_t bser;
   513   bser_t bser;
   458   uint32_t len;
   514   uint32_t len, bser_version = 1, bser_capabilities = 0;
   459 
   515 
   460   if (!PyArg_ParseTuple(args, "O", &val)) {
   516   static char* kw_list[] = {"val", "version", "capabilities", NULL};
   461     return NULL;
   517 
   462   }
   518   if (!PyArg_ParseTupleAndKeywords(
   463 
   519           args,
   464   if (!bser_init(&bser)) {
   520           kw,
       
   521           "O|ii:dumps",
       
   522           kw_list,
       
   523           &val,
       
   524           &bser_version,
       
   525           &bser_capabilities)) {
       
   526     return NULL;
       
   527   }
       
   528 
       
   529   if (!bser_init(&bser, bser_version, bser_capabilities)) {
   465     return PyErr_NoMemory();
   530     return PyErr_NoMemory();
   466   }
   531   }
   467 
   532 
   468   if (!bser_recursive(&bser, val)) {
   533   if (!bser_recursive(&bser, val)) {
   469     bser_dtor(&bser);
   534     bser_dtor(&bser);
   473     // otherwise, we've already set the error to something reasonable
   538     // otherwise, we've already set the error to something reasonable
   474     return NULL;
   539     return NULL;
   475   }
   540   }
   476 
   541 
   477   // Now fill in the overall length
   542   // Now fill in the overall length
   478   len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
   543   if (bser_version == 1) {
   479   memcpy(bser.buf + 3, &len, sizeof(len));
   544     len = bser.wpos - (sizeof(EMPTY_HEADER) - 1);
   480 
   545     memcpy(bser.buf + 3, &len, sizeof(len));
   481   res = PyString_FromStringAndSize(bser.buf, bser.wpos);
   546   } else {
       
   547     len = bser.wpos - (sizeof(EMPTY_HEADER_V2) - 1);
       
   548     // The BSER capabilities block comes before the PDU length
       
   549     memcpy(bser.buf + 2, &bser_capabilities, sizeof(bser_capabilities));
       
   550     memcpy(bser.buf + 7, &len, sizeof(len));
       
   551   }
       
   552 
       
   553   res = PyBytes_FromStringAndSize(bser.buf, bser.wpos);
   482   bser_dtor(&bser);
   554   bser_dtor(&bser);
   483 
   555 
   484   return res;
   556   return res;
   485 }
   557 }
   486 
   558 
   487 int bunser_int(const char **ptr, const char *end, int64_t *val)
   559 int bunser_int(const char** ptr, const char* end, int64_t* val) {
   488 {
       
   489   int needed;
   560   int needed;
   490   const char *buf = *ptr;
   561   const char* buf = *ptr;
   491   int8_t i8;
   562   int8_t i8;
   492   int16_t i16;
   563   int16_t i16;
   493   int32_t i32;
   564   int32_t i32;
   494   int64_t i64;
   565   int64_t i64;
   495 
   566 
   505       break;
   576       break;
   506     case BSER_INT64:
   577     case BSER_INT64:
   507       needed = 9;
   578       needed = 9;
   508       break;
   579       break;
   509     default:
   580     default:
   510       PyErr_Format(PyExc_ValueError,
   581       PyErr_Format(
   511           "invalid bser int encoding 0x%02x", buf[0]);
   582           PyExc_ValueError, "invalid bser int encoding 0x%02x", buf[0]);
   512       return 0;
   583       return 0;
   513   }
   584   }
   514   if (end - buf < needed) {
   585   if (end - buf < needed) {
   515     PyErr_SetString(PyExc_ValueError, "input buffer to small for int encoding");
   586     PyErr_SetString(PyExc_ValueError, "input buffer to small for int encoding");
   516     return 0;
   587     return 0;
   536     default:
   607     default:
   537       return 0;
   608       return 0;
   538   }
   609   }
   539 }
   610 }
   540 
   611 
   541 static int bunser_string(const char **ptr, const char *end,
   612 static int bunser_bytestring(
   542     const char **start, int64_t *len)
   613     const char** ptr,
   543 {
   614     const char* end,
   544   const char *buf = *ptr;
   615     const char** start,
       
   616     int64_t* len) {
       
   617   const char* buf = *ptr;
   545 
   618 
   546   // skip string marker
   619   // skip string marker
   547   buf++;
   620   buf++;
   548   if (!bunser_int(&buf, end, len)) {
   621   if (!bunser_int(&buf, end, len)) {
   549     return 0;
   622     return 0;
   557   *ptr = buf + *len;
   630   *ptr = buf + *len;
   558   *start = buf;
   631   *start = buf;
   559   return 1;
   632   return 1;
   560 }
   633 }
   561 
   634 
   562 static PyObject *bunser_array(const char **ptr, const char *end, int mutable)
   635 static PyObject*
   563 {
   636 bunser_array(const char** ptr, const char* end, const unser_ctx_t* ctx) {
   564   const char *buf = *ptr;
   637   const char* buf = *ptr;
   565   int64_t nitems, i;
   638   int64_t nitems, i;
   566   PyObject *res;
   639   int mutable = ctx->mutable;
       
   640   PyObject* res;
   567 
   641 
   568   // skip array header
   642   // skip array header
   569   buf++;
   643   buf++;
   570   if (!bunser_int(&buf, end, &nitems)) {
   644   if (!bunser_int(&buf, end, &nitems)) {
   571     return 0;
   645     return 0;
   582   } else {
   656   } else {
   583     res = PyTuple_New((Py_ssize_t)nitems);
   657     res = PyTuple_New((Py_ssize_t)nitems);
   584   }
   658   }
   585 
   659 
   586   for (i = 0; i < nitems; i++) {
   660   for (i = 0; i < nitems; i++) {
   587     PyObject *ele = bser_loads_recursive(ptr, end, mutable);
   661     PyObject* ele = bser_loads_recursive(ptr, end, ctx);
   588 
   662 
   589     if (!ele) {
   663     if (!ele) {
   590       Py_DECREF(res);
   664       Py_DECREF(res);
   591       return NULL;
   665       return NULL;
   592     }
   666     }
   600   }
   674   }
   601 
   675 
   602   return res;
   676   return res;
   603 }
   677 }
   604 
   678 
   605 static PyObject *bunser_object(const char **ptr, const char *end,
   679 static PyObject*
   606     int mutable)
   680 bunser_object(const char** ptr, const char* end, const unser_ctx_t* ctx) {
   607 {
   681   const char* buf = *ptr;
   608   const char *buf = *ptr;
       
   609   int64_t nitems, i;
   682   int64_t nitems, i;
   610   PyObject *res;
   683   int mutable = ctx->mutable;
   611   bserObject *obj;
   684   PyObject* res;
       
   685   bserObject* obj;
   612 
   686 
   613   // skip array header
   687   // skip array header
   614   buf++;
   688   buf++;
   615   if (!bunser_int(&buf, end, &nitems)) {
   689   if (!bunser_int(&buf, end, &nitems)) {
   616     return 0;
   690     return 0;
   625     obj->values = PyTuple_New((Py_ssize_t)nitems);
   699     obj->values = PyTuple_New((Py_ssize_t)nitems);
   626     res = (PyObject*)obj;
   700     res = (PyObject*)obj;
   627   }
   701   }
   628 
   702 
   629   for (i = 0; i < nitems; i++) {
   703   for (i = 0; i < nitems; i++) {
   630     const char *keystr;
   704     const char* keystr;
   631     int64_t keylen;
   705     int64_t keylen;
   632     PyObject *key;
   706     PyObject* key;
   633     PyObject *ele;
   707     PyObject* ele;
   634 
   708 
   635     if (!bunser_string(ptr, end, &keystr, &keylen)) {
   709     if (!bunser_bytestring(ptr, end, &keystr, &keylen)) {
   636       Py_DECREF(res);
   710       Py_DECREF(res);
   637       return NULL;
   711       return NULL;
   638     }
   712     }
   639 
   713 
   640     if (keylen > LONG_MAX) {
   714     if (keylen > LONG_MAX) {
   641       PyErr_Format(PyExc_ValueError, "string too big for python");
   715       PyErr_Format(PyExc_ValueError, "string too big for python");
   642       Py_DECREF(res);
   716       Py_DECREF(res);
   643       return NULL;
   717       return NULL;
   644     }
   718     }
   645 
   719 
   646     key = PyString_FromStringAndSize(keystr, (Py_ssize_t)keylen);
   720     if (mutable) {
       
   721       // This will interpret the key as UTF-8.
       
   722       key = PyUnicode_FromStringAndSize(keystr, (Py_ssize_t)keylen);
       
   723     } else {
       
   724       // For immutable objects we'll manage key lookups, so we can avoid going
       
   725       // through the Unicode APIs. This avoids a potentially expensive and
       
   726       // definitely unnecessary conversion to UTF-16 and back for Python 2.
       
   727       // TODO: On Python 3 the Unicode APIs are smarter: we might be able to use
       
   728       // Unicode keys there without an appreciable performance loss.
       
   729       key = PyBytes_FromStringAndSize(keystr, (Py_ssize_t)keylen);
       
   730     }
       
   731 
   647     if (!key) {
   732     if (!key) {
   648       Py_DECREF(res);
   733       Py_DECREF(res);
   649       return NULL;
   734       return NULL;
   650     }
   735     }
   651 
   736 
   652     ele = bser_loads_recursive(ptr, end, mutable);
   737     ele = bser_loads_recursive(ptr, end, ctx);
   653 
   738 
   654     if (!ele) {
   739     if (!ele) {
   655       Py_DECREF(key);
   740       Py_DECREF(key);
   656       Py_DECREF(res);
   741       Py_DECREF(res);
   657       return NULL;
   742       return NULL;
   669   }
   754   }
   670 
   755 
   671   return res;
   756   return res;
   672 }
   757 }
   673 
   758 
   674 static PyObject *bunser_template(const char **ptr, const char *end,
   759 static PyObject*
   675     int mutable)
   760 bunser_template(const char** ptr, const char* end, const unser_ctx_t* ctx) {
   676 {
   761   const char* buf = *ptr;
   677   const char *buf = *ptr;
       
   678   int64_t nitems, i;
   762   int64_t nitems, i;
   679   PyObject *arrval;
   763   int mutable = ctx->mutable;
   680   PyObject *keys;
   764   PyObject* arrval;
       
   765   PyObject* keys;
   681   Py_ssize_t numkeys, keyidx;
   766   Py_ssize_t numkeys, keyidx;
       
   767   unser_ctx_t keys_ctx = {0};
       
   768   if (mutable) {
       
   769     keys_ctx.mutable = 1;
       
   770     // Decode keys as UTF-8 in this case.
       
   771     keys_ctx.value_encoding = "utf-8";
       
   772     keys_ctx.value_errors = "strict";
       
   773   } else {
       
   774     // Treat keys as bytestrings in this case -- we'll do Unicode conversions at
       
   775     // lookup time.
       
   776   }
   682 
   777 
   683   if (buf[1] != BSER_ARRAY) {
   778   if (buf[1] != BSER_ARRAY) {
   684     PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE");
   779     PyErr_Format(PyExc_ValueError, "Expect ARRAY to follow TEMPLATE");
   685     return NULL;
   780     return NULL;
   686   }
   781   }
   687 
   782 
   688   // skip header
   783   // skip header
   689   buf++;
   784   buf++;
   690   *ptr = buf;
   785   *ptr = buf;
   691 
   786 
   692   // Load template keys
   787   // Load template keys.
   693   keys = bunser_array(ptr, end, mutable);
   788   // For keys we don't want to do any decoding right now.
       
   789   keys = bunser_array(ptr, end, &keys_ctx);
   694   if (!keys) {
   790   if (!keys) {
   695     return NULL;
   791     return NULL;
   696   }
   792   }
   697 
   793 
   698   numkeys = PySequence_Length(keys);
   794   numkeys = PySequence_Length(keys);
   714     Py_DECREF(keys);
   810     Py_DECREF(keys);
   715     return NULL;
   811     return NULL;
   716   }
   812   }
   717 
   813 
   718   for (i = 0; i < nitems; i++) {
   814   for (i = 0; i < nitems; i++) {
   719     PyObject *dict = NULL;
   815     PyObject* dict = NULL;
   720     bserObject *obj = NULL;
   816     bserObject* obj = NULL;
   721 
   817 
   722     if (mutable) {
   818     if (mutable) {
   723       dict = PyDict_New();
   819       dict = PyDict_New();
   724     } else {
   820     } else {
   725       obj = PyObject_New(bserObject, &bserObjectType);
   821       obj = PyObject_New(bserObject, &bserObjectType);
   729         obj->values = PyTuple_New(numkeys);
   825         obj->values = PyTuple_New(numkeys);
   730       }
   826       }
   731       dict = (PyObject*)obj;
   827       dict = (PyObject*)obj;
   732     }
   828     }
   733     if (!dict) {
   829     if (!dict) {
   734 fail:
   830     fail:
   735       Py_DECREF(keys);
   831       Py_DECREF(keys);
   736       Py_DECREF(arrval);
   832       Py_DECREF(arrval);
   737       return NULL;
   833       return NULL;
   738     }
   834     }
   739 
   835 
   740     for (keyidx = 0; keyidx < numkeys; keyidx++) {
   836     for (keyidx = 0; keyidx < numkeys; keyidx++) {
   741       PyObject *key;
   837       PyObject* key;
   742       PyObject *ele;
   838       PyObject* ele;
   743 
   839 
   744       if (**ptr == BSER_SKIP) {
   840       if (**ptr == BSER_SKIP) {
   745         *ptr = *ptr + 1;
   841         *ptr = *ptr + 1;
   746         ele = Py_None;
   842         ele = Py_None;
   747         Py_INCREF(ele);
   843         Py_INCREF(ele);
   748       } else {
   844       } else {
   749         ele = bser_loads_recursive(ptr, end, mutable);
   845         ele = bser_loads_recursive(ptr, end, ctx);
   750       }
   846       }
   751 
   847 
   752       if (!ele) {
   848       if (!ele) {
   753         goto fail;
   849         goto fail;
   754       }
   850       }
   770   Py_DECREF(keys);
   866   Py_DECREF(keys);
   771 
   867 
   772   return arrval;
   868   return arrval;
   773 }
   869 }
   774 
   870 
   775 static PyObject *bser_loads_recursive(const char **ptr, const char *end,
   871 static PyObject* bser_loads_recursive(
   776     int mutable)
   872     const char** ptr,
   777 {
   873     const char* end,
   778   const char *buf = *ptr;
   874     const unser_ctx_t* ctx) {
       
   875   const char* buf = *ptr;
   779 
   876 
   780   switch (buf[0]) {
   877   switch (buf[0]) {
   781     case BSER_INT8:
   878     case BSER_INT8:
   782     case BSER_INT16:
   879     case BSER_INT16:
   783     case BSER_INT32:
   880     case BSER_INT32:
   784     case BSER_INT64:
   881     case BSER_INT64: {
   785       {
   882       int64_t ival;
   786         int64_t ival;
   883       if (!bunser_int(ptr, end, &ival)) {
   787         if (!bunser_int(ptr, end, &ival)) {
   884         return NULL;
   788           return NULL;
   885       }
   789         }
   886 // Python 3 has one integer type.
   790         if (ival < LONG_MIN || ival > LONG_MAX) {
   887 #if PY_MAJOR_VERSION >= 3
   791           return PyLong_FromLongLong(ival);
   888       return PyLong_FromLongLong(ival);
   792         }
   889 #else
   793         return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
   890       if (ival < LONG_MIN || ival > LONG_MAX) {
   794       }
   891         return PyLong_FromLongLong(ival);
   795 
   892       }
   796     case BSER_REAL:
   893       return PyInt_FromSsize_t(Py_SAFE_DOWNCAST(ival, int64_t, Py_ssize_t));
   797       {
   894 #endif // PY_MAJOR_VERSION >= 3
   798         double dval;
   895     }
   799         memcpy(&dval, buf + 1, sizeof(dval));
   896 
   800         *ptr = buf + 1 + sizeof(double);
   897     case BSER_REAL: {
   801         return PyFloat_FromDouble(dval);
   898       double dval;
   802       }
   899       memcpy(&dval, buf + 1, sizeof(dval));
       
   900       *ptr = buf + 1 + sizeof(double);
       
   901       return PyFloat_FromDouble(dval);
       
   902     }
   803 
   903 
   804     case BSER_TRUE:
   904     case BSER_TRUE:
   805       *ptr = buf + 1;
   905       *ptr = buf + 1;
   806       Py_INCREF(Py_True);
   906       Py_INCREF(Py_True);
   807       return Py_True;
   907       return Py_True;
   814     case BSER_NULL:
   914     case BSER_NULL:
   815       *ptr = buf + 1;
   915       *ptr = buf + 1;
   816       Py_INCREF(Py_None);
   916       Py_INCREF(Py_None);
   817       return Py_None;
   917       return Py_None;
   818 
   918 
   819     case BSER_STRING:
   919     case BSER_BYTESTRING: {
   820       {
   920       const char* start;
   821         const char *start;
   921       int64_t len;
   822         int64_t len;
   922 
   823 
   923       if (!bunser_bytestring(ptr, end, &start, &len)) {
   824         if (!bunser_string(ptr, end, &start, &len)) {
   924         return NULL;
   825           return NULL;
   925       }
   826         }
   926 
   827 
   927       if (len > LONG_MAX) {
   828         if (len > LONG_MAX) {
   928         PyErr_Format(PyExc_ValueError, "string too long for python");
   829           PyErr_Format(PyExc_ValueError, "string too long for python");
   929         return NULL;
   830           return NULL;
   930       }
   831         }
   931 
   832 
   932       if (ctx->value_encoding != NULL) {
   833         return PyString_FromStringAndSize(start, (long)len);
   933         return PyUnicode_Decode(
   834       }
   934             start, (long)len, ctx->value_encoding, ctx->value_errors);
       
   935       } else {
       
   936         return PyBytes_FromStringAndSize(start, (long)len);
       
   937       }
       
   938     }
       
   939 
       
   940     case BSER_UTF8STRING: {
       
   941       const char* start;
       
   942       int64_t len;
       
   943 
       
   944       if (!bunser_bytestring(ptr, end, &start, &len)) {
       
   945         return NULL;
       
   946       }
       
   947 
       
   948       if (len > LONG_MAX) {
       
   949         PyErr_Format(PyExc_ValueError, "string too long for python");
       
   950         return NULL;
       
   951       }
       
   952 
       
   953       return PyUnicode_Decode(start, (long)len, "utf-8", "strict");
       
   954     }
   835 
   955 
   836     case BSER_ARRAY:
   956     case BSER_ARRAY:
   837       return bunser_array(ptr, end, mutable);
   957       return bunser_array(ptr, end, ctx);
   838 
   958 
   839     case BSER_OBJECT:
   959     case BSER_OBJECT:
   840       return bunser_object(ptr, end, mutable);
   960       return bunser_object(ptr, end, ctx);
   841 
   961 
   842     case BSER_TEMPLATE:
   962     case BSER_TEMPLATE:
   843       return bunser_template(ptr, end, mutable);
   963       return bunser_template(ptr, end, ctx);
   844 
   964 
   845     default:
   965     default:
   846       PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]);
   966       PyErr_Format(PyExc_ValueError, "unhandled bser opcode 0x%02x", buf[0]);
   847   }
   967   }
   848 
   968 
   849   return NULL;
   969   return NULL;
   850 }
   970 }
   851 
   971 
   852 // Expected use case is to read a packet from the socket and
   972 static int _pdu_info_helper(
   853 // then call bser.pdu_len on the packet.  It returns the total
   973     const char* data,
   854 // length of the entire response that the peer is sending,
   974     const char* end,
   855 // including the bytes already received.  This allows the client
   975     uint32_t* bser_version_out,
   856 // to compute the data size it needs to read before it can
   976     uint32_t* bser_capabilities_out,
   857 // decode the data
   977     int64_t* expected_len_out,
   858 static PyObject *bser_pdu_len(PyObject *self, PyObject *args)
   978     off_t* position_out) {
   859 {
   979   uint32_t bser_version;
   860   const char *start = NULL;
   980   uint32_t bser_capabilities = 0;
   861   const char *data = NULL;
   981   int64_t expected_len;
   862   int datalen = 0;
   982 
   863   const char *end;
   983   const char* start;
   864   int64_t expected_len, total_len;
   984   start = data;
   865 
       
   866   if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
       
   867     return NULL;
       
   868   }
       
   869   data = start;
       
   870   end = data + datalen;
       
   871 
       
   872   // Validate the header and length
   985   // Validate the header and length
   873   if (memcmp(data, EMPTY_HEADER, 2) != 0) {
   986   if (memcmp(data, EMPTY_HEADER, 2) == 0) {
       
   987     bser_version = 1;
       
   988   } else if (memcmp(data, EMPTY_HEADER_V2, 2) == 0) {
       
   989     bser_version = 2;
       
   990   } else {
   874     PyErr_SetString(PyExc_ValueError, "invalid bser header");
   991     PyErr_SetString(PyExc_ValueError, "invalid bser header");
   875     return NULL;
   992     return 0;
   876   }
   993   }
   877 
   994 
   878   data += 2;
   995   data += 2;
       
   996 
       
   997   if (bser_version == 2) {
       
   998     // Expect an integer telling us what capabilities are supported by the
       
   999     // remote server (currently unused).
       
  1000     if (!memcpy(&bser_capabilities, &data, sizeof(bser_capabilities))) {
       
  1001       return 0;
       
  1002     }
       
  1003     data += sizeof(bser_capabilities);
       
  1004   }
   879 
  1005 
   880   // Expect an integer telling us how big the rest of the data
  1006   // Expect an integer telling us how big the rest of the data
   881   // should be
  1007   // should be
   882   if (!bunser_int(&data, end, &expected_len)) {
  1008   if (!bunser_int(&data, end, &expected_len)) {
   883     return NULL;
  1009     return 0;
   884   }
  1010   }
   885 
  1011 
   886   total_len = expected_len + (data - start);
  1012   *bser_version_out = bser_version;
   887   if (total_len > LONG_MAX) {
  1013   *bser_capabilities_out = (uint32_t)bser_capabilities;
   888     return PyLong_FromLongLong(total_len);
  1014   *expected_len_out = expected_len;
   889   }
  1015   *position_out = (off_t)(data - start);
   890   return PyInt_FromLong((long)total_len);
  1016   return 1;
   891 }
  1017 }
   892 
  1018 
   893 static PyObject *bser_loads(PyObject *self, PyObject *args)
  1019 // This function parses the PDU header and provides info about the packet
   894 {
  1020 // Returns false if unsuccessful
   895   const char *data = NULL;
  1021 static int pdu_info_helper(
       
  1022     PyObject* self,
       
  1023     PyObject* args,
       
  1024     uint32_t* bser_version_out,
       
  1025     uint32_t* bser_capabilities_out,
       
  1026     int64_t* total_len_out) {
       
  1027   const char* start = NULL;
       
  1028   const char* data = NULL;
   896   int datalen = 0;
  1029   int datalen = 0;
   897   const char *end;
  1030   const char* end;
   898   int64_t expected_len;
  1031   int64_t expected_len;
   899   int mutable = 1;
  1032   off_t position;
   900   PyObject *mutable_obj = NULL;
  1033 
   901 
  1034   if (!PyArg_ParseTuple(args, "s#", &start, &datalen)) {
   902   if (!PyArg_ParseTuple(args, "s#|O:loads", &data, &datalen, &mutable_obj)) {
  1035     return 0;
   903     return NULL;
  1036   }
   904   }
  1037   data = start;
       
  1038   end = data + datalen;
       
  1039 
       
  1040   if (!_pdu_info_helper(
       
  1041           data,
       
  1042           end,
       
  1043           bser_version_out,
       
  1044           bser_capabilities_out,
       
  1045           &expected_len,
       
  1046           &position)) {
       
  1047     return 0;
       
  1048   }
       
  1049   *total_len_out = (int64_t)(expected_len + position);
       
  1050   return 1;
       
  1051 }
       
  1052 
       
  1053 // Expected use case is to read a packet from the socket and then call
       
  1054 // bser.pdu_info on the packet.  It returns the BSER version, BSER capabilities,
       
  1055 // and the total length of the entire response that the peer is sending,
       
  1056 // including the bytes already received. This allows the client  to compute the
       
  1057 // data size it needs to read before it can decode the data.
       
  1058 static PyObject* bser_pdu_info(PyObject* self, PyObject* args) {
       
  1059   uint32_t version, capabilities;
       
  1060   int64_t total_len;
       
  1061   if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
       
  1062     return NULL;
       
  1063   }
       
  1064   return Py_BuildValue("kkL", version, capabilities, total_len);
       
  1065 }
       
  1066 
       
  1067 static PyObject* bser_pdu_len(PyObject* self, PyObject* args) {
       
  1068   uint32_t version, capabilities;
       
  1069   int64_t total_len;
       
  1070   if (!pdu_info_helper(self, args, &version, &capabilities, &total_len)) {
       
  1071     return NULL;
       
  1072   }
       
  1073   return Py_BuildValue("L", total_len);
       
  1074 }
       
  1075 
       
  1076 static PyObject* bser_loads(PyObject* self, PyObject* args, PyObject* kw) {
       
  1077   const char* data = NULL;
       
  1078   int datalen = 0;
       
  1079   const char* start;
       
  1080   const char* end;
       
  1081   int64_t expected_len;
       
  1082   off_t position;
       
  1083   PyObject* mutable_obj = NULL;
       
  1084   const char* value_encoding = NULL;
       
  1085   const char* value_errors = NULL;
       
  1086   unser_ctx_t ctx = {1, 0};
       
  1087 
       
  1088   static char* kw_list[] = {
       
  1089       "buf", "mutable", "value_encoding", "value_errors", NULL};
       
  1090 
       
  1091   if (!PyArg_ParseTupleAndKeywords(
       
  1092           args,
       
  1093           kw,
       
  1094           "s#|Ozz:loads",
       
  1095           kw_list,
       
  1096           &start,
       
  1097           &datalen,
       
  1098           &mutable_obj,
       
  1099           &value_encoding,
       
  1100           &value_errors)) {
       
  1101     return NULL;
       
  1102   }
       
  1103 
   905   if (mutable_obj) {
  1104   if (mutable_obj) {
   906     mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
  1105     ctx.mutable = PyObject_IsTrue(mutable_obj) > 0 ? 1 : 0;
   907   }
  1106   }
   908 
  1107   ctx.value_encoding = value_encoding;
       
  1108   if (value_encoding == NULL) {
       
  1109     ctx.value_errors = NULL;
       
  1110   } else if (value_errors == NULL) {
       
  1111     ctx.value_errors = "strict";
       
  1112   } else {
       
  1113     ctx.value_errors = value_errors;
       
  1114   }
       
  1115   data = start;
   909   end = data + datalen;
  1116   end = data + datalen;
   910 
  1117 
   911   // Validate the header and length
  1118   if (!_pdu_info_helper(
   912   if (memcmp(data, EMPTY_HEADER, 2) != 0) {
  1119           data,
   913     PyErr_SetString(PyExc_ValueError, "invalid bser header");
  1120           end,
   914     return NULL;
  1121           &ctx.bser_version,
   915   }
  1122           &ctx.bser_capabilities,
   916 
  1123           &expected_len,
   917   data += 2;
  1124           &position)) {
   918 
  1125     return NULL;
   919   // Expect an integer telling us how big the rest of the data
  1126   }
   920   // should be
  1127 
   921   if (!bunser_int(&data, end, &expected_len)) {
  1128   data = start + position;
   922     return NULL;
       
   923   }
       
   924 
       
   925   // Verify
  1129   // Verify
   926   if (expected_len + data != end) {
  1130   if (expected_len + data != end) {
   927     PyErr_SetString(PyExc_ValueError, "bser data len != header len");
  1131     PyErr_SetString(PyExc_ValueError, "bser data len != header len");
   928     return NULL;
  1132     return NULL;
   929   }
  1133   }
   930 
  1134 
   931   return bser_loads_recursive(&data, end, mutable);
  1135   return bser_loads_recursive(&data, end, &ctx);
   932 }
  1136 }
   933 
  1137 
       
  1138 static PyObject* bser_load(PyObject* self, PyObject* args, PyObject* kw) {
       
  1139   PyObject *load, *string;
       
  1140   PyObject* fp = NULL;
       
  1141   PyObject* mutable_obj = NULL;
       
  1142   const char* value_encoding = NULL;
       
  1143   const char* value_errors = NULL;
       
  1144 
       
  1145   static char* kw_list[] = {
       
  1146       "fp", "mutable", "value_encoding", "value_errors", NULL};
       
  1147 
       
  1148   if (!PyArg_ParseTupleAndKeywords(
       
  1149           args,
       
  1150           kw,
       
  1151           "OOzz:load",
       
  1152           kw_list,
       
  1153           &fp,
       
  1154           &mutable_obj,
       
  1155           &value_encoding,
       
  1156           &value_errors)) {
       
  1157     return NULL;
       
  1158   }
       
  1159 
       
  1160   load = PyImport_ImportModule("pywatchman.load");
       
  1161   if (load == NULL) {
       
  1162     return NULL;
       
  1163   }
       
  1164   string = PyObject_CallMethod(
       
  1165       load, "load", "OOzz", fp, mutable_obj, value_encoding, value_errors);
       
  1166   Py_DECREF(load);
       
  1167   return string;
       
  1168 }
       
  1169 
       
  1170 // clang-format off
   934 static PyMethodDef bser_methods[] = {
  1171 static PyMethodDef bser_methods[] = {
   935   {"loads",  bser_loads, METH_VARARGS, "Deserialize string."},
  1172   {"loads", (PyCFunction)bser_loads, METH_VARARGS | METH_KEYWORDS,
   936   {"pdu_len", bser_pdu_len, METH_VARARGS, "Extract PDU length."},
  1173    "Deserialize string."},
   937   {"dumps",  bser_dumps, METH_VARARGS, "Serialize string."},
  1174   {"load", (PyCFunction)bser_load, METH_VARARGS | METH_KEYWORDS,
       
  1175    "Deserialize a file object"},
       
  1176   {"pdu_info", (PyCFunction)bser_pdu_info, METH_VARARGS,
       
  1177    "Extract PDU information."},
       
  1178   {"pdu_len", (PyCFunction)bser_pdu_len, METH_VARARGS,
       
  1179    "Extract total PDU length."},
       
  1180   {"dumps",  (PyCFunction)bser_dumps, METH_VARARGS | METH_KEYWORDS,
       
  1181    "Serialize string."},
   938   {NULL, NULL, 0, NULL}
  1182   {NULL, NULL, 0, NULL}
   939 };
  1183 };
   940 
  1184 
   941 PyMODINIT_FUNC initbser(void)
  1185 #if PY_MAJOR_VERSION >= 3
   942 {
  1186 static struct PyModuleDef bser_module = {
       
  1187   PyModuleDef_HEAD_INIT,
       
  1188   "bser",
       
  1189   "Efficient encoding and decoding of BSER.",
       
  1190   -1,
       
  1191   bser_methods
       
  1192 };
       
  1193 // clang-format on
       
  1194 
       
  1195 PyMODINIT_FUNC PyInit_bser(void) {
       
  1196   PyObject* mod;
       
  1197 
       
  1198   mod = PyModule_Create(&bser_module);
       
  1199   PyType_Ready(&bserObjectType);
       
  1200 
       
  1201   return mod;
       
  1202 }
       
  1203 #else
       
  1204 
       
  1205 PyMODINIT_FUNC initbser(void) {
   943   (void)Py_InitModule("bser", bser_methods);
  1206   (void)Py_InitModule("bser", bser_methods);
   944   PyType_Ready(&bserObjectType);
  1207   PyType_Ready(&bserObjectType);
   945 }
  1208 }
       
  1209 #endif // PY_MAJOR_VERSION >= 3
   946 
  1210 
   947 /* vim:ts=2:sw=2:et:
  1211 /* vim:ts=2:sw=2:et:
   948  */
  1212  */
   949 
       
   950 // no-check-code -- this is a 3rd party library