contrib/python-zstandard/c-ext/compressor.c
changeset 30822 b54a2984cdd4
parent 30435 b86a448a2965
child 30830 08fa3a76a080
--- a/contrib/python-zstandard/c-ext/compressor.c	Sat Jan 14 20:05:15 2017 +0530
+++ b/contrib/python-zstandard/c-ext/compressor.c	Sat Jan 14 19:41:43 2017 -0800
@@ -10,6 +10,23 @@
 
 extern PyObject* ZstdError;
 
+int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) {
+	ZSTD_customMem zmem;
+	assert(!compressor->cdict);
+	Py_BEGIN_ALLOW_THREADS
+	memset(&zmem, 0, sizeof(zmem));
+	compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
+		compressor->dict->dictSize, *zparams, zmem);
+	Py_END_ALLOW_THREADS
+
+	if (!compressor->cdict) {
+		PyErr_SetString(ZstdError, "could not create compression dictionary");
+		return 1;
+	}
+
+	return 0;
+}
+
 /**
 * Initialize a zstd CStream from a ZstdCompressor instance.
 *
@@ -57,7 +74,6 @@
 	return cstream;
 }
 
-
 PyDoc_STRVAR(ZstdCompressor__doc__,
 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
 "\n"
@@ -107,6 +123,7 @@
 	PyObject* writeContentSize = NULL;
 	PyObject* writeDictID = NULL;
 
+	self->cctx = NULL;
 	self->dict = NULL;
 	self->cparams = NULL;
 	self->cdict = NULL;
@@ -129,6 +146,14 @@
 		return -1;
 	}
 
+	/* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
+	   overhead of each compression operation. */
+	self->cctx = ZSTD_createCCtx();
+	if (!self->cctx) {
+		PyErr_NoMemory();
+		return -1;
+	}
+
 	self->compressionLevel = level;
 
 	if (dict) {
@@ -165,6 +190,11 @@
 		self->cdict = NULL;
 	}
 
+	if (self->cctx) {
+		ZSTD_freeCCtx(self->cctx);
+		self->cctx = NULL;
+	}
+
 	PyObject_Del(self);
 }
 
@@ -339,7 +369,7 @@
 }
 
 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
-"compress(data)\n"
+"compress(data, allow_empty=False)\n"
 "\n"
 "Compress data in a single operation.\n"
 "\n"
@@ -350,24 +380,41 @@
 "streaming based APIs is preferred for larger values.\n"
 );
 
-static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args) {
+static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
+	static char* kwlist[] = {
+		"data",
+		"allow_empty",
+		NULL
+	};
+
 	const char* source;
 	Py_ssize_t sourceSize;
+	PyObject* allowEmpty = NULL;
 	size_t destSize;
-	ZSTD_CCtx* cctx;
 	PyObject* output;
 	char* dest;
 	void* dictData = NULL;
 	size_t dictSize = 0;
 	size_t zresult;
 	ZSTD_parameters zparams;
-	ZSTD_customMem zmem;
 
 #if PY_MAJOR_VERSION >= 3
-	if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O",
 #else
-	if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) {
+	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O",
 #endif
+		kwlist, &source, &sourceSize, &allowEmpty)) {
+		return NULL;
+	}
+
+	/* Limitation in zstd C API doesn't let decompression side distinguish
+	   between content size of 0 and unknown content size. This can make round
+	   tripping via Python difficult. Until this is fixed, require a flag
+	   to fire the footgun.
+	   https://github.com/indygreg/python-zstandard/issues/11 */
+	if (0 == sourceSize && self->fparams.contentSizeFlag
+		&& (!allowEmpty || PyObject_Not(allowEmpty))) {
+		PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes");
 		return NULL;
 	}
 
@@ -379,13 +426,6 @@
 
 	dest = PyBytes_AsString(output);
 
-	cctx = ZSTD_createCCtx();
-	if (!cctx) {
-		Py_DECREF(output);
-		PyErr_SetString(ZstdError, "could not create CCtx");
-		return NULL;
-	}
-
 	if (self->dict) {
 		dictData = self->dict->dictData;
 		dictSize = self->dict->dictSize;
@@ -406,23 +446,16 @@
 	/* The raw dict data has to be processed before it can be used. Since this
 	adds overhead - especially if multiple dictionary compression operations
 	are performed on the same ZstdCompressor instance - we create a
-	ZSTD_CDict once and reuse it for all operations. */
+	ZSTD_CDict once and reuse it for all operations.
 
-	/* TODO the zparams (which can be derived from the source data size) used
-	on first invocation are effectively reused for subsequent operations. This
-	may not be appropriate if input sizes vary significantly and could affect
-	chosen compression parameters.
-	https://github.com/facebook/zstd/issues/358 tracks this issue. */
+	Note: the compression parameters used for the first invocation (possibly
+	derived from the source size) will be reused on all subsequent invocations.
+	https://github.com/facebook/zstd/issues/358 contains more info. We could
+	potentially add an argument somewhere to control this behavior.
+	*/
 	if (dictData && !self->cdict) {
-		Py_BEGIN_ALLOW_THREADS
-		memset(&zmem, 0, sizeof(zmem));
-		self->cdict = ZSTD_createCDict_advanced(dictData, dictSize, zparams, zmem);
-		Py_END_ALLOW_THREADS
-
-		if (!self->cdict) {
+		if (populate_cdict(self, dictData, dictSize, &zparams)) {
 			Py_DECREF(output);
-			ZSTD_freeCCtx(cctx);
-			PyErr_SetString(ZstdError, "could not create compression dictionary");
 			return NULL;
 		}
 	}
@@ -432,17 +465,15 @@
 	   size. This means the argument to ZstdCompressor to control frame
 	   parameters is honored. */
 	if (self->cdict) {
-		zresult = ZSTD_compress_usingCDict(cctx, dest, destSize,
+		zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
 			source, sourceSize, self->cdict);
 	}
 	else {
-		zresult = ZSTD_compress_advanced(cctx, dest, destSize,
+		zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
 			source, sourceSize, dictData, dictSize, zparams);
 	}
 	Py_END_ALLOW_THREADS
 
-	ZSTD_freeCCtx(cctx);
-
 	if (ZSTD_isError(zresult)) {
 		PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
 		Py_CLEAR(output);
@@ -500,7 +531,7 @@
 	result->compressor = self;
 	Py_INCREF(result->compressor);
 
-	result->flushed = 0;
+	result->finished = 0;
 
 	return result;
 }
@@ -691,8 +722,8 @@
 }
 
 static PyMethodDef ZstdCompressor_methods[] = {
-	{ "compress", (PyCFunction)ZstdCompressor_compress, METH_VARARGS,
-	ZstdCompressor_compress__doc__ },
+	{ "compress", (PyCFunction)ZstdCompressor_compress,
+	METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
 	{ "compressobj", (PyCFunction)ZstdCompressor_compressobj,
 	METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
 	{ "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,