--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/contrib/python-zstandard/c-ext/compressiondict.c Thu Nov 10 22:15:58 2016 -0800
@@ -0,0 +1,247 @@
+/**
+* Copyright (c) 2016-present, Gregory Szorc
+* All rights reserved.
+*
+* This software may be modified and distributed under the terms
+* of the BSD license. See the LICENSE file for details.
+*/
+
+#include "python-zstandard.h"
+
+extern PyObject* ZstdError;
+
+ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
+ static char *kwlist[] = { "dict_size", "samples", "parameters", NULL };
+ size_t capacity;
+ PyObject* samples;
+ Py_ssize_t samplesLen;
+ PyObject* parameters = NULL;
+ ZDICT_params_t zparams;
+ Py_ssize_t sampleIndex;
+ Py_ssize_t sampleSize;
+ PyObject* sampleItem;
+ size_t zresult;
+ void* sampleBuffer;
+ void* sampleOffset;
+ size_t samplesSize = 0;
+ size_t* sampleSizes;
+ void* dict;
+ ZstdCompressionDict* result;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|O!", kwlist,
+ &capacity,
+ &PyList_Type, &samples,
+ (PyObject*)&DictParametersType, ¶meters)) {
+ return NULL;
+ }
+
+ /* Validate parameters first since it is easiest. */
+ zparams.selectivityLevel = 0;
+ zparams.compressionLevel = 0;
+ zparams.notificationLevel = 0;
+ zparams.dictID = 0;
+ zparams.reserved[0] = 0;
+ zparams.reserved[1] = 0;
+
+ if (parameters) {
+ /* TODO validate data ranges */
+ zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0));
+ zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1));
+ zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2));
+ zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3));
+ }
+
+ /* Figure out the size of the raw samples */
+ samplesLen = PyList_Size(samples);
+ for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
+ sampleItem = PyList_GetItem(samples, sampleIndex);
+ if (!PyBytes_Check(sampleItem)) {
+ PyErr_SetString(PyExc_ValueError, "samples must be bytes");
+ /* TODO probably need to perform DECREF here */
+ return NULL;
+ }
+ samplesSize += PyBytes_GET_SIZE(sampleItem);
+ }
+
+ /* Now that we know the total size of the raw simples, we can allocate
+ a buffer for the raw data */
+ sampleBuffer = malloc(samplesSize);
+ if (!sampleBuffer) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+ sampleSizes = malloc(samplesLen * sizeof(size_t));
+ if (!sampleSizes) {
+ free(sampleBuffer);
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ sampleOffset = sampleBuffer;
+ /* Now iterate again and assemble the samples in the buffer */
+ for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) {
+ sampleItem = PyList_GetItem(samples, sampleIndex);
+ sampleSize = PyBytes_GET_SIZE(sampleItem);
+ sampleSizes[sampleIndex] = sampleSize;
+ memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
+ sampleOffset = (char*)sampleOffset + sampleSize;
+ }
+
+ dict = malloc(capacity);
+ if (!dict) {
+ free(sampleSizes);
+ free(sampleBuffer);
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ zresult = ZDICT_trainFromBuffer_advanced(dict, capacity,
+ sampleBuffer, sampleSizes, (unsigned int)samplesLen,
+ zparams);
+ if (ZDICT_isError(zresult)) {
+ PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult));
+ free(dict);
+ free(sampleSizes);
+ free(sampleBuffer);
+ return NULL;
+ }
+
+ result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
+ if (!result) {
+ return NULL;
+ }
+
+ result->dictData = dict;
+ result->dictSize = zresult;
+ return result;
+}
+
+
+PyDoc_STRVAR(ZstdCompressionDict__doc__,
+"ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
+"\n"
+"This type holds the results of a computed Zstandard compression dictionary.\n"
+"Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n"
+"obtained from another source into the constructor.\n"
+);
+
+static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) {
+ const char* source;
+ Py_ssize_t sourceSize;
+
+ self->dictData = NULL;
+ self->dictSize = 0;
+
+#if PY_MAJOR_VERSION >= 3
+ if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+#else
+ if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+#endif
+ return -1;
+ }
+
+ self->dictData = malloc(sourceSize);
+ if (!self->dictData) {
+ PyErr_NoMemory();
+ return -1;
+ }
+
+ memcpy(self->dictData, source, sourceSize);
+ self->dictSize = sourceSize;
+
+ return 0;
+ }
+
+static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
+ if (self->dictData) {
+ free(self->dictData);
+ self->dictData = NULL;
+ }
+
+ PyObject_Del(self);
+}
+
+static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
+ unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
+
+ return PyLong_FromLong(dictID);
+}
+
+static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
+ return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
+}
+
+static PyMethodDef ZstdCompressionDict_methods[] = {
+ { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
+ PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
+ { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
+ PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
+ { NULL, NULL }
+};
+
+static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
+ return self->dictSize;
+}
+
+static PySequenceMethods ZstdCompressionDict_sq = {
+ (lenfunc)ZstdCompressionDict_length, /* sq_length */
+ 0, /* sq_concat */
+ 0, /* sq_repeat */
+ 0, /* sq_item */
+ 0, /* sq_ass_item */
+ 0, /* sq_contains */
+ 0, /* sq_inplace_concat */
+ 0 /* sq_inplace_repeat */
+};
+
+PyTypeObject ZstdCompressionDictType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ "zstd.ZstdCompressionDict", /* tp_name */
+ sizeof(ZstdCompressionDict), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
+ 0, /* tp_print */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_compare */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ &ZstdCompressionDict_sq, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ ZstdCompressionDict__doc__, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ 0, /* tp_iter */
+ 0, /* tp_iternext */
+ ZstdCompressionDict_methods, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ (initproc)ZstdCompressionDict_init, /* tp_init */
+ 0, /* tp_alloc */
+ PyType_GenericNew, /* tp_new */
+};
+
+void compressiondict_module_init(PyObject* mod) {
+ Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
+ if (PyType_Ready(&ZstdCompressionDictType) < 0) {
+ return;
+ }
+
+ Py_INCREF((PyObject*)&ZstdCompressionDictType);
+ PyModule_AddObject(mod, "ZstdCompressionDict",
+ (PyObject*)&ZstdCompressionDictType);
+}