9 #include "python-zstandard.h" |
9 #include "python-zstandard.h" |
10 #include "pool.h" |
10 #include "pool.h" |
11 |
11 |
12 extern PyObject* ZstdError; |
12 extern PyObject* ZstdError; |
13 |
13 |
14 int populate_cdict(ZstdCompressor* compressor, ZSTD_parameters* zparams) { |
14 int ensure_cctx(ZstdCompressor* compressor) { |
15 ZSTD_customMem zmem; |
15 size_t zresult; |
16 |
16 |
17 if (compressor->cdict || !compressor->dict || !compressor->dict->dictData) { |
17 assert(compressor); |
18 return 0; |
18 assert(compressor->cctx); |
19 } |
19 assert(compressor->params); |
20 |
20 |
21 Py_BEGIN_ALLOW_THREADS |
21 ZSTD_CCtx_reset(compressor->cctx); |
22 memset(&zmem, 0, sizeof(zmem)); |
22 |
23 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData, |
23 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params); |
24 compressor->dict->dictSize, 1, *zparams, zmem); |
24 if (ZSTD_isError(zresult)) { |
25 Py_END_ALLOW_THREADS |
25 PyErr_Format(ZstdError, "could not set compression parameters: %s", |
26 |
26 ZSTD_getErrorName(zresult)); |
27 if (!compressor->cdict) { |
|
28 PyErr_SetString(ZstdError, "could not create compression dictionary"); |
|
29 return 1; |
27 return 1; |
|
28 } |
|
29 |
|
30 if (compressor->dict) { |
|
31 if (compressor->dict->cdict) { |
|
32 zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict); |
|
33 } |
|
34 else { |
|
35 zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx, |
|
36 compressor->dict->dictData, compressor->dict->dictSize, |
|
37 ZSTD_dlm_byRef, compressor->dict->dictType); |
|
38 } |
|
39 if (ZSTD_isError(zresult)) { |
|
40 PyErr_Format(ZstdError, "could not load compression dictionary: %s", |
|
41 ZSTD_getErrorName(zresult)); |
|
42 return 1; |
|
43 } |
30 } |
44 } |
31 |
45 |
32 return 0; |
46 return 0; |
33 } |
47 } |
34 |
48 |
35 /** |
49 static PyObject* frame_progression(ZSTD_CCtx* cctx) { |
36 * Ensure the ZSTD_CStream on a ZstdCompressor instance is initialized. |
50 PyObject* result = NULL; |
37 * |
51 PyObject* value; |
38 * Returns 0 on success. Other value on failure. Will set a Python exception |
52 ZSTD_frameProgression progression; |
39 * on failure. |
53 |
40 */ |
54 result = PyTuple_New(3); |
41 int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize) { |
55 if (!result) { |
42 ZSTD_parameters zparams; |
56 return NULL; |
43 void* dictData = NULL; |
57 } |
44 size_t dictSize = 0; |
58 |
45 size_t zresult; |
59 progression = ZSTD_getFrameProgression(cctx); |
46 |
60 |
47 if (compressor->cstream) { |
61 value = PyLong_FromUnsignedLongLong(progression.ingested); |
48 zresult = ZSTD_resetCStream(compressor->cstream, sourceSize); |
62 if (!value) { |
49 if (ZSTD_isError(zresult)) { |
63 Py_DECREF(result); |
50 PyErr_Format(ZstdError, "could not reset CStream: %s", |
64 return NULL; |
51 ZSTD_getErrorName(zresult)); |
65 } |
52 return -1; |
66 |
53 } |
67 PyTuple_SET_ITEM(result, 0, value); |
54 |
68 |
55 return 0; |
69 value = PyLong_FromUnsignedLongLong(progression.consumed); |
56 } |
70 if (!value) { |
57 |
71 Py_DECREF(result); |
58 compressor->cstream = ZSTD_createCStream(); |
72 return NULL; |
59 if (!compressor->cstream) { |
73 } |
60 PyErr_SetString(ZstdError, "could not create CStream"); |
74 |
61 return -1; |
75 PyTuple_SET_ITEM(result, 1, value); |
62 } |
76 |
63 |
77 value = PyLong_FromUnsignedLongLong(progression.produced); |
64 if (compressor->dict) { |
78 if (!value) { |
65 dictData = compressor->dict->dictData; |
79 Py_DECREF(result); |
66 dictSize = compressor->dict->dictSize; |
80 return NULL; |
67 } |
81 } |
68 |
82 |
69 memset(&zparams, 0, sizeof(zparams)); |
83 PyTuple_SET_ITEM(result, 2, value); |
70 if (compressor->cparams) { |
84 |
71 ztopy_compression_parameters(compressor->cparams, &zparams.cParams); |
85 return result; |
72 /* Do NOT call ZSTD_adjustCParams() here because the compression params |
|
73 come from the user. */ |
|
74 } |
|
75 else { |
|
76 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize); |
|
77 } |
|
78 |
|
79 zparams.fParams = compressor->fparams; |
|
80 |
|
81 zresult = ZSTD_initCStream_advanced(compressor->cstream, dictData, dictSize, |
|
82 zparams, sourceSize); |
|
83 |
|
84 if (ZSTD_isError(zresult)) { |
|
85 ZSTD_freeCStream(compressor->cstream); |
|
86 compressor->cstream = NULL; |
|
87 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult)); |
|
88 return -1; |
|
89 } |
|
90 |
|
91 return 0;; |
|
92 } |
|
93 |
|
94 int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize) { |
|
95 size_t zresult; |
|
96 void* dictData = NULL; |
|
97 size_t dictSize = 0; |
|
98 ZSTD_parameters zparams; |
|
99 |
|
100 assert(compressor->mtcctx); |
|
101 |
|
102 if (compressor->dict) { |
|
103 dictData = compressor->dict->dictData; |
|
104 dictSize = compressor->dict->dictSize; |
|
105 } |
|
106 |
|
107 memset(&zparams, 0, sizeof(zparams)); |
|
108 if (compressor->cparams) { |
|
109 ztopy_compression_parameters(compressor->cparams, &zparams.cParams); |
|
110 } |
|
111 else { |
|
112 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize); |
|
113 } |
|
114 |
|
115 zparams.fParams = compressor->fparams; |
|
116 |
|
117 zresult = ZSTDMT_initCStream_advanced(compressor->mtcctx, dictData, dictSize, |
|
118 zparams, sourceSize); |
|
119 |
|
120 if (ZSTD_isError(zresult)) { |
|
121 PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult)); |
|
122 return -1; |
|
123 } |
|
124 |
|
125 return 0; |
|
126 } |
86 } |
127 |
87 |
128 PyDoc_STRVAR(ZstdCompressor__doc__, |
88 PyDoc_STRVAR(ZstdCompressor__doc__, |
129 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" |
89 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" |
130 "\n" |
90 "\n" |
145 " parameters. If defined, this will overwrite the ``level`` argument.\n" |
105 " parameters. If defined, this will overwrite the ``level`` argument.\n" |
146 "write_checksum\n" |
106 "write_checksum\n" |
147 " If True, a 4 byte content checksum will be written with the compressed\n" |
107 " If True, a 4 byte content checksum will be written with the compressed\n" |
148 " data, allowing the decompressor to perform content verification.\n" |
108 " data, allowing the decompressor to perform content verification.\n" |
149 "write_content_size\n" |
109 "write_content_size\n" |
150 " If True, the decompressed content size will be included in the header of\n" |
110 " If True (the default), the decompressed content size will be included in\n" |
151 " the compressed data. This data will only be written if the compressor\n" |
111 " the header of the compressed data. This data will only be written if the\n" |
152 " knows the size of the input data.\n" |
112 " compressor knows the size of the input data.\n" |
153 "write_dict_id\n" |
113 "write_dict_id\n" |
154 " Determines whether the dictionary ID will be written into the compressed\n" |
114 " Determines whether the dictionary ID will be written into the compressed\n" |
155 " data. Defaults to True. Only adds content to the compressed data if\n" |
115 " data. Defaults to True. Only adds content to the compressed data if\n" |
156 " a dictionary is being used.\n" |
116 " a dictionary is being used.\n" |
157 "threads\n" |
117 "threads\n" |
173 NULL |
133 NULL |
174 }; |
134 }; |
175 |
135 |
176 int level = 3; |
136 int level = 3; |
177 ZstdCompressionDict* dict = NULL; |
137 ZstdCompressionDict* dict = NULL; |
178 CompressionParametersObject* params = NULL; |
138 ZstdCompressionParametersObject* params = NULL; |
179 PyObject* writeChecksum = NULL; |
139 PyObject* writeChecksum = NULL; |
180 PyObject* writeContentSize = NULL; |
140 PyObject* writeContentSize = NULL; |
181 PyObject* writeDictID = NULL; |
141 PyObject* writeDictID = NULL; |
182 int threads = 0; |
142 int threads = 0; |
183 |
143 |
184 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor", |
144 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor", |
185 kwlist, &level, &ZstdCompressionDictType, &dict, |
145 kwlist, &level, &ZstdCompressionDictType, &dict, |
186 &CompressionParametersType, ¶ms, |
146 &ZstdCompressionParametersType, ¶ms, |
187 &writeChecksum, &writeContentSize, &writeDictID, &threads)) { |
147 &writeChecksum, &writeContentSize, &writeDictID, &threads)) { |
188 return -1; |
|
189 } |
|
190 |
|
191 if (level < 1) { |
|
192 PyErr_SetString(PyExc_ValueError, "level must be greater than 0"); |
|
193 return -1; |
148 return -1; |
194 } |
149 } |
195 |
150 |
196 if (level > ZSTD_maxCLevel()) { |
151 if (level > ZSTD_maxCLevel()) { |
197 PyErr_Format(PyExc_ValueError, "level must be less than %d", |
152 PyErr_Format(PyExc_ValueError, "level must be less than %d", |
201 |
156 |
202 if (threads < 0) { |
157 if (threads < 0) { |
203 threads = cpu_count(); |
158 threads = cpu_count(); |
204 } |
159 } |
205 |
160 |
206 self->threads = threads; |
|
207 |
|
208 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the |
161 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the |
209 overhead of each compression operation. */ |
162 overhead of each compression operation. */ |
210 if (threads) { |
163 self->cctx = ZSTD_createCCtx(); |
211 self->mtcctx = ZSTDMT_createCCtx(threads); |
164 if (!self->cctx) { |
212 if (!self->mtcctx) { |
165 PyErr_NoMemory(); |
213 PyErr_NoMemory(); |
166 return -1; |
|
167 } |
|
168 |
|
169 /* TODO stuff the original parameters away somewhere so we can reset later. This |
|
170 will allow us to do things like automatically adjust cparams based on input |
|
171 size (assuming zstd isn't doing that internally). */ |
|
172 |
|
173 self->params = ZSTD_createCCtxParams(); |
|
174 if (!self->params) { |
|
175 PyErr_NoMemory(); |
|
176 return -1; |
|
177 } |
|
178 |
|
179 if (params && writeChecksum) { |
|
180 PyErr_SetString(PyExc_ValueError, |
|
181 "cannot define compression_params and write_checksum"); |
|
182 return -1; |
|
183 } |
|
184 |
|
185 if (params && writeContentSize) { |
|
186 PyErr_SetString(PyExc_ValueError, |
|
187 "cannot define compression_params and write_content_size"); |
|
188 return -1; |
|
189 } |
|
190 |
|
191 if (params && writeDictID) { |
|
192 PyErr_SetString(PyExc_ValueError, |
|
193 "cannot define compression_params and write_dict_id"); |
|
194 return -1; |
|
195 } |
|
196 |
|
197 if (params && threads) { |
|
198 PyErr_SetString(PyExc_ValueError, |
|
199 "cannot define compression_params and threads"); |
|
200 return -1; |
|
201 } |
|
202 |
|
203 if (params) { |
|
204 if (set_parameters(self->params, params)) { |
214 return -1; |
205 return -1; |
215 } |
206 } |
216 } |
207 } |
217 else { |
208 else { |
218 self->cctx = ZSTD_createCCtx(); |
209 if (set_parameter(self->params, ZSTD_p_compressionLevel, level)) { |
219 if (!self->cctx) { |
|
220 PyErr_NoMemory(); |
|
221 return -1; |
210 return -1; |
222 } |
211 } |
223 } |
212 |
224 |
213 if (set_parameter(self->params, ZSTD_p_contentSizeFlag, |
225 self->compressionLevel = level; |
214 writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) { |
|
215 return -1; |
|
216 } |
|
217 |
|
218 if (set_parameter(self->params, ZSTD_p_checksumFlag, |
|
219 writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) { |
|
220 return -1; |
|
221 } |
|
222 |
|
223 if (set_parameter(self->params, ZSTD_p_dictIDFlag, |
|
224 writeDictID ? PyObject_IsTrue(writeDictID) : 1)) { |
|
225 return -1; |
|
226 } |
|
227 |
|
228 if (threads) { |
|
229 if (set_parameter(self->params, ZSTD_p_nbWorkers, threads)) { |
|
230 return -1; |
|
231 } |
|
232 } |
|
233 } |
226 |
234 |
227 if (dict) { |
235 if (dict) { |
228 self->dict = dict; |
236 self->dict = dict; |
229 Py_INCREF(dict); |
237 Py_INCREF(dict); |
230 } |
238 } |
231 |
239 |
232 if (params) { |
240 if (ensure_cctx(self)) { |
233 self->cparams = params; |
241 return -1; |
234 Py_INCREF(params); |
|
235 } |
|
236 |
|
237 memset(&self->fparams, 0, sizeof(self->fparams)); |
|
238 |
|
239 if (writeChecksum && PyObject_IsTrue(writeChecksum)) { |
|
240 self->fparams.checksumFlag = 1; |
|
241 } |
|
242 if (writeContentSize && PyObject_IsTrue(writeContentSize)) { |
|
243 self->fparams.contentSizeFlag = 1; |
|
244 } |
|
245 if (writeDictID && PyObject_Not(writeDictID)) { |
|
246 self->fparams.noDictIDFlag = 1; |
|
247 } |
242 } |
248 |
243 |
249 return 0; |
244 return 0; |
250 } |
245 } |
251 |
246 |
252 static void ZstdCompressor_dealloc(ZstdCompressor* self) { |
247 static void ZstdCompressor_dealloc(ZstdCompressor* self) { |
253 if (self->cstream) { |
|
254 ZSTD_freeCStream(self->cstream); |
|
255 self->cstream = NULL; |
|
256 } |
|
257 |
|
258 Py_XDECREF(self->cparams); |
|
259 Py_XDECREF(self->dict); |
|
260 |
|
261 if (self->cdict) { |
|
262 ZSTD_freeCDict(self->cdict); |
|
263 self->cdict = NULL; |
|
264 } |
|
265 |
|
266 if (self->cctx) { |
248 if (self->cctx) { |
267 ZSTD_freeCCtx(self->cctx); |
249 ZSTD_freeCCtx(self->cctx); |
268 self->cctx = NULL; |
250 self->cctx = NULL; |
269 } |
251 } |
270 |
252 |
271 if (self->mtcctx) { |
253 if (self->params) { |
272 ZSTDMT_freeCCtx(self->mtcctx); |
254 ZSTD_freeCCtxParams(self->params); |
273 self->mtcctx = NULL; |
255 self->params = NULL; |
274 } |
256 } |
275 |
257 |
|
258 Py_XDECREF(self->dict); |
276 PyObject_Del(self); |
259 PyObject_Del(self); |
|
260 } |
|
261 |
|
262 PyDoc_STRVAR(ZstdCompressor_memory_size__doc__, |
|
263 "memory_size()\n" |
|
264 "\n" |
|
265 "Obtain the memory usage of this compressor, in bytes.\n" |
|
266 ); |
|
267 |
|
268 static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) { |
|
269 if (self->cctx) { |
|
270 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx)); |
|
271 } |
|
272 else { |
|
273 PyErr_SetString(ZstdError, "no compressor context found; this should never happen"); |
|
274 return NULL; |
|
275 } |
|
276 } |
|
277 |
|
278 PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__, |
|
279 "frame_progression()\n" |
|
280 "\n" |
|
281 "Return information on how much work the compressor has done.\n" |
|
282 "\n" |
|
283 "Returns a 3-tuple of (ingested, consumed, produced).\n" |
|
284 ); |
|
285 |
|
286 static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) { |
|
287 return frame_progression(self->cctx); |
277 } |
288 } |
278 |
289 |
279 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__, |
290 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__, |
280 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n" |
291 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n" |
281 "compress data between streams\n" |
292 "compress data between streams\n" |
302 NULL |
313 NULL |
303 }; |
314 }; |
304 |
315 |
305 PyObject* source; |
316 PyObject* source; |
306 PyObject* dest; |
317 PyObject* dest; |
307 Py_ssize_t sourceSize = 0; |
318 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; |
308 size_t inSize = ZSTD_CStreamInSize(); |
319 size_t inSize = ZSTD_CStreamInSize(); |
309 size_t outSize = ZSTD_CStreamOutSize(); |
320 size_t outSize = ZSTD_CStreamOutSize(); |
310 ZSTD_inBuffer input; |
321 ZSTD_inBuffer input; |
311 ZSTD_outBuffer output; |
322 ZSTD_outBuffer output; |
312 Py_ssize_t totalRead = 0; |
323 Py_ssize_t totalRead = 0; |
313 Py_ssize_t totalWrite = 0; |
324 Py_ssize_t totalWrite = 0; |
314 char* readBuffer; |
325 char* readBuffer; |
315 Py_ssize_t readSize; |
326 Py_ssize_t readSize; |
316 PyObject* readResult; |
327 PyObject* readResult = NULL; |
317 PyObject* res = NULL; |
328 PyObject* res = NULL; |
318 size_t zresult; |
329 size_t zresult; |
319 PyObject* writeResult; |
330 PyObject* writeResult; |
320 PyObject* totalReadPy; |
331 PyObject* totalReadPy; |
321 PyObject* totalWritePy; |
332 PyObject* totalWritePy; |
322 |
333 |
323 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist, |
334 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist, |
324 &source, &dest, &sourceSize, &inSize, &outSize)) { |
335 &source, &dest, &sourceSize, &inSize, &outSize)) { |
325 return NULL; |
336 return NULL; |
326 } |
337 } |
327 |
338 |
328 if (!PyObject_HasAttrString(source, "read")) { |
339 if (!PyObject_HasAttrString(source, "read")) { |
333 if (!PyObject_HasAttrString(dest, "write")) { |
344 if (!PyObject_HasAttrString(dest, "write")) { |
334 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); |
345 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); |
335 return NULL; |
346 return NULL; |
336 } |
347 } |
337 |
348 |
|
349 if (ensure_cctx(self)) { |
|
350 return NULL; |
|
351 } |
|
352 |
|
353 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); |
|
354 if (ZSTD_isError(zresult)) { |
|
355 PyErr_Format(ZstdError, "error setting source size: %s", |
|
356 ZSTD_getErrorName(zresult)); |
|
357 return NULL; |
|
358 } |
|
359 |
338 /* Prevent free on uninitialized memory in finally. */ |
360 /* Prevent free on uninitialized memory in finally. */ |
339 output.dst = NULL; |
|
340 |
|
341 if (self->mtcctx) { |
|
342 if (init_mtcstream(self, sourceSize)) { |
|
343 res = NULL; |
|
344 goto finally; |
|
345 } |
|
346 } |
|
347 else { |
|
348 if (0 != init_cstream(self, sourceSize)) { |
|
349 res = NULL; |
|
350 goto finally; |
|
351 } |
|
352 } |
|
353 |
|
354 output.dst = PyMem_Malloc(outSize); |
361 output.dst = PyMem_Malloc(outSize); |
355 if (!output.dst) { |
362 if (!output.dst) { |
356 PyErr_NoMemory(); |
363 PyErr_NoMemory(); |
357 res = NULL; |
364 res = NULL; |
358 goto finally; |
365 goto finally; |
359 } |
366 } |
360 output.size = outSize; |
367 output.size = outSize; |
361 output.pos = 0; |
368 output.pos = 0; |
|
369 |
|
370 input.src = NULL; |
|
371 input.size = 0; |
|
372 input.pos = 0; |
362 |
373 |
363 while (1) { |
374 while (1) { |
364 /* Try to read from source stream. */ |
375 /* Try to read from source stream. */ |
365 readResult = PyObject_CallMethod(source, "read", "n", inSize); |
376 readResult = PyObject_CallMethod(source, "read", "n", inSize); |
366 if (!readResult) { |
377 if (!readResult) { |
453 finally: |
461 finally: |
454 if (output.dst) { |
462 if (output.dst) { |
455 PyMem_Free(output.dst); |
463 PyMem_Free(output.dst); |
456 } |
464 } |
457 |
465 |
|
466 Py_XDECREF(readResult); |
|
467 |
458 return res; |
468 return res; |
459 } |
469 } |
460 |
470 |
|
471 PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__, |
|
472 "stream_reader(source, [size=0])\n" |
|
473 "\n" |
|
474 "Obtain an object that behaves like an I/O stream.\n" |
|
475 "\n" |
|
476 "The source object can be any object with a ``read(size)`` method\n" |
|
477 "or an object that conforms to the buffer protocol.\n" |
|
478 ); |
|
479 |
|
480 static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
481 static char* kwlist[] = { |
|
482 "source", |
|
483 "size", |
|
484 "read_size", |
|
485 NULL |
|
486 }; |
|
487 |
|
488 PyObject* source; |
|
489 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; |
|
490 size_t readSize = ZSTD_CStreamInSize(); |
|
491 ZstdCompressionReader* result = NULL; |
|
492 |
|
493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist, |
|
494 &source, &sourceSize, &readSize)) { |
|
495 return NULL; |
|
496 } |
|
497 |
|
498 result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL); |
|
499 if (!result) { |
|
500 return NULL; |
|
501 } |
|
502 |
|
503 if (PyObject_HasAttrString(source, "read")) { |
|
504 result->reader = source; |
|
505 Py_INCREF(source); |
|
506 result->readSize = readSize; |
|
507 } |
|
508 else if (1 == PyObject_CheckBuffer(source)) { |
|
509 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) { |
|
510 goto except; |
|
511 } |
|
512 |
|
513 assert(result->buffer.len >= 0); |
|
514 |
|
515 sourceSize = result->buffer.len; |
|
516 } |
|
517 else { |
|
518 PyErr_SetString(PyExc_TypeError, |
|
519 "must pass an object with a read() method or that conforms to the buffer protocol"); |
|
520 goto except; |
|
521 } |
|
522 |
|
523 if (ensure_cctx(self)) { |
|
524 goto except; |
|
525 } |
|
526 |
|
527 result->compressor = self; |
|
528 Py_INCREF(self); |
|
529 result->sourceSize = sourceSize; |
|
530 |
|
531 return result; |
|
532 |
|
533 except: |
|
534 Py_CLEAR(result); |
|
535 |
|
536 return NULL; |
|
537 } |
|
538 |
461 PyDoc_STRVAR(ZstdCompressor_compress__doc__, |
539 PyDoc_STRVAR(ZstdCompressor_compress__doc__, |
462 "compress(data, allow_empty=False)\n" |
540 "compress(data)\n" |
463 "\n" |
541 "\n" |
464 "Compress data in a single operation.\n" |
542 "Compress data in a single operation.\n" |
465 "\n" |
543 "\n" |
466 "This is the simplest mechanism to perform compression: simply pass in a\n" |
544 "This is the simplest mechanism to perform compression: simply pass in a\n" |
467 "value and get a compressed value back. It is almost the most prone to abuse.\n" |
545 "value and get a compressed value back. It is almost the most prone to abuse.\n" |
471 ); |
549 ); |
472 |
550 |
473 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
551 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
474 static char* kwlist[] = { |
552 static char* kwlist[] = { |
475 "data", |
553 "data", |
476 "allow_empty", |
|
477 NULL |
554 NULL |
478 }; |
555 }; |
479 |
556 |
480 const char* source; |
557 Py_buffer source; |
481 Py_ssize_t sourceSize; |
|
482 PyObject* allowEmpty = NULL; |
|
483 size_t destSize; |
558 size_t destSize; |
484 PyObject* output; |
559 PyObject* output = NULL; |
485 char* dest; |
|
486 void* dictData = NULL; |
|
487 size_t dictSize = 0; |
|
488 size_t zresult; |
560 size_t zresult; |
489 ZSTD_parameters zparams; |
561 ZSTD_outBuffer outBuffer; |
|
562 ZSTD_inBuffer inBuffer; |
490 |
563 |
491 #if PY_MAJOR_VERSION >= 3 |
564 #if PY_MAJOR_VERSION >= 3 |
492 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress", |
565 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress", |
493 #else |
566 #else |
494 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress", |
567 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress", |
495 #endif |
568 #endif |
496 kwlist, &source, &sourceSize, &allowEmpty)) { |
569 kwlist, &source)) { |
497 return NULL; |
570 return NULL; |
498 } |
571 } |
499 |
572 |
500 if (self->threads && self->dict) { |
573 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { |
501 PyErr_SetString(ZstdError, |
574 PyErr_SetString(PyExc_ValueError, |
502 "compress() cannot be used with both dictionaries and multi-threaded compression"); |
575 "data buffer should be contiguous and have at most one dimension"); |
503 return NULL; |
576 goto finally; |
504 } |
577 } |
505 |
578 |
506 if (self->threads && self->cparams) { |
579 if (ensure_cctx(self)) { |
507 PyErr_SetString(ZstdError, |
580 goto finally; |
508 "compress() cannot be used with both compression parameters and multi-threaded compression"); |
581 } |
509 return NULL; |
582 |
510 } |
583 destSize = ZSTD_compressBound(source.len); |
511 |
|
512 /* Limitation in zstd C API doesn't let decompression side distinguish |
|
513 between content size of 0 and unknown content size. This can make round |
|
514 tripping via Python difficult. Until this is fixed, require a flag |
|
515 to fire the footgun. |
|
516 https://github.com/indygreg/python-zstandard/issues/11 */ |
|
517 if (0 == sourceSize && self->fparams.contentSizeFlag |
|
518 && (!allowEmpty || PyObject_Not(allowEmpty))) { |
|
519 PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes"); |
|
520 return NULL; |
|
521 } |
|
522 |
|
523 destSize = ZSTD_compressBound(sourceSize); |
|
524 output = PyBytes_FromStringAndSize(NULL, destSize); |
584 output = PyBytes_FromStringAndSize(NULL, destSize); |
525 if (!output) { |
585 if (!output) { |
526 return NULL; |
586 goto finally; |
527 } |
587 } |
528 |
588 |
529 dest = PyBytes_AsString(output); |
589 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len); |
530 |
590 if (ZSTD_isError(zresult)) { |
531 if (self->dict) { |
591 PyErr_Format(ZstdError, "error setting source size: %s", |
532 dictData = self->dict->dictData; |
592 ZSTD_getErrorName(zresult)); |
533 dictSize = self->dict->dictSize; |
593 Py_CLEAR(output); |
534 } |
594 goto finally; |
535 |
595 } |
536 memset(&zparams, 0, sizeof(zparams)); |
596 |
537 if (!self->cparams) { |
597 inBuffer.src = source.buf; |
538 zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize); |
598 inBuffer.size = source.len; |
539 } |
599 inBuffer.pos = 0; |
540 else { |
600 |
541 ztopy_compression_parameters(self->cparams, &zparams.cParams); |
601 outBuffer.dst = PyBytes_AsString(output); |
542 /* Do NOT call ZSTD_adjustCParams() here because the compression params |
602 outBuffer.size = destSize; |
543 come from the user. */ |
603 outBuffer.pos = 0; |
544 } |
|
545 |
|
546 zparams.fParams = self->fparams; |
|
547 |
|
548 /* The raw dict data has to be processed before it can be used. Since this |
|
549 adds overhead - especially if multiple dictionary compression operations |
|
550 are performed on the same ZstdCompressor instance - we create a |
|
551 ZSTD_CDict once and reuse it for all operations. |
|
552 |
|
553 Note: the compression parameters used for the first invocation (possibly |
|
554 derived from the source size) will be reused on all subsequent invocations. |
|
555 https://github.com/facebook/zstd/issues/358 contains more info. We could |
|
556 potentially add an argument somewhere to control this behavior. |
|
557 */ |
|
558 if (0 != populate_cdict(self, &zparams)) { |
|
559 Py_DECREF(output); |
|
560 return NULL; |
|
561 } |
|
562 |
604 |
563 Py_BEGIN_ALLOW_THREADS |
605 Py_BEGIN_ALLOW_THREADS |
564 if (self->mtcctx) { |
606 /* By avoiding ZSTD_compress(), we don't necessarily write out content |
565 zresult = ZSTDMT_compressCCtx(self->mtcctx, dest, destSize, |
607 size. This means the argument to ZstdCompressor to control frame |
566 source, sourceSize, self->compressionLevel); |
608 parameters is honored. */ |
567 } |
609 zresult = ZSTD_compress_generic(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end); |
568 else { |
|
569 /* By avoiding ZSTD_compress(), we don't necessarily write out content |
|
570 size. This means the argument to ZstdCompressor to control frame |
|
571 parameters is honored. */ |
|
572 if (self->cdict) { |
|
573 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize, |
|
574 source, sourceSize, self->cdict); |
|
575 } |
|
576 else { |
|
577 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize, |
|
578 source, sourceSize, dictData, dictSize, zparams); |
|
579 } |
|
580 } |
|
581 Py_END_ALLOW_THREADS |
610 Py_END_ALLOW_THREADS |
582 |
611 |
583 if (ZSTD_isError(zresult)) { |
612 if (ZSTD_isError(zresult)) { |
584 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); |
613 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); |
585 Py_CLEAR(output); |
614 Py_CLEAR(output); |
586 return NULL; |
615 goto finally; |
587 } |
616 } |
588 else { |
617 else if (zresult) { |
589 Py_SIZE(output) = zresult; |
618 PyErr_SetString(ZstdError, "unexpected partial frame flush"); |
590 } |
619 Py_CLEAR(output); |
591 |
620 goto finally; |
|
621 } |
|
622 |
|
623 Py_SIZE(output) = outBuffer.pos; |
|
624 |
|
625 finally: |
|
626 PyBuffer_Release(&source); |
592 return output; |
627 return output; |
593 } |
628 } |
594 |
629 |
595 PyDoc_STRVAR(ZstdCompressionObj__doc__, |
630 PyDoc_STRVAR(ZstdCompressionObj__doc__, |
596 "compressobj()\n" |
631 "compressobj()\n" |
606 static char* kwlist[] = { |
641 static char* kwlist[] = { |
607 "size", |
642 "size", |
608 NULL |
643 NULL |
609 }; |
644 }; |
610 |
645 |
611 Py_ssize_t inSize = 0; |
646 unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN; |
612 size_t outSize = ZSTD_CStreamOutSize(); |
647 size_t outSize = ZSTD_CStreamOutSize(); |
613 ZstdCompressionObj* result = NULL; |
648 ZstdCompressionObj* result = NULL; |
614 |
649 size_t zresult; |
615 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) { |
650 |
|
651 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) { |
|
652 return NULL; |
|
653 } |
|
654 |
|
655 if (ensure_cctx(self)) { |
|
656 return NULL; |
|
657 } |
|
658 |
|
659 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize); |
|
660 if (ZSTD_isError(zresult)) { |
|
661 PyErr_Format(ZstdError, "error setting source size: %s", |
|
662 ZSTD_getErrorName(zresult)); |
616 return NULL; |
663 return NULL; |
617 } |
664 } |
618 |
665 |
619 result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL); |
666 result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL); |
620 if (!result) { |
667 if (!result) { |
621 return NULL; |
668 return NULL; |
622 } |
|
623 |
|
624 if (self->mtcctx) { |
|
625 if (init_mtcstream(self, inSize)) { |
|
626 Py_DECREF(result); |
|
627 return NULL; |
|
628 } |
|
629 } |
|
630 else { |
|
631 if (0 != init_cstream(self, inSize)) { |
|
632 Py_DECREF(result); |
|
633 return NULL; |
|
634 } |
|
635 } |
669 } |
636 |
670 |
637 result->output.dst = PyMem_Malloc(outSize); |
671 result->output.dst = PyMem_Malloc(outSize); |
638 if (!result->output.dst) { |
672 if (!result->output.dst) { |
639 PyErr_NoMemory(); |
673 PyErr_NoMemory(); |
665 "The caller is partially in control of how fast data is fed into the\n" |
699 "The caller is partially in control of how fast data is fed into the\n" |
666 "compressor by how it consumes the returned iterator. The compressor will\n" |
700 "compressor by how it consumes the returned iterator. The compressor will\n" |
667 "not consume from the reader unless the caller consumes from the iterator.\n" |
701 "not consume from the reader unless the caller consumes from the iterator.\n" |
668 ); |
702 ); |
669 |
703 |
670 static ZstdCompressorIterator* ZstdCompressor_read_from(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
704 static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
671 static char* kwlist[] = { |
705 static char* kwlist[] = { |
672 "reader", |
706 "reader", |
673 "size", |
707 "size", |
674 "read_size", |
708 "read_size", |
675 "write_size", |
709 "write_size", |
676 NULL |
710 NULL |
677 }; |
711 }; |
678 |
712 |
679 PyObject* reader; |
713 PyObject* reader; |
680 Py_ssize_t sourceSize = 0; |
714 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; |
681 size_t inSize = ZSTD_CStreamInSize(); |
715 size_t inSize = ZSTD_CStreamInSize(); |
682 size_t outSize = ZSTD_CStreamOutSize(); |
716 size_t outSize = ZSTD_CStreamOutSize(); |
683 ZstdCompressorIterator* result; |
717 ZstdCompressorIterator* result; |
684 |
718 size_t zresult; |
685 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist, |
719 |
|
720 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist, |
686 &reader, &sourceSize, &inSize, &outSize)) { |
721 &reader, &sourceSize, &inSize, &outSize)) { |
687 return NULL; |
722 return NULL; |
688 } |
723 } |
689 |
724 |
690 result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL); |
725 result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL); |
694 if (PyObject_HasAttrString(reader, "read")) { |
729 if (PyObject_HasAttrString(reader, "read")) { |
695 result->reader = reader; |
730 result->reader = reader; |
696 Py_INCREF(result->reader); |
731 Py_INCREF(result->reader); |
697 } |
732 } |
698 else if (1 == PyObject_CheckBuffer(reader)) { |
733 else if (1 == PyObject_CheckBuffer(reader)) { |
699 result->buffer = PyMem_Malloc(sizeof(Py_buffer)); |
734 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) { |
700 if (!result->buffer) { |
|
701 goto except; |
735 goto except; |
702 } |
736 } |
703 |
737 |
704 memset(result->buffer, 0, sizeof(Py_buffer)); |
738 sourceSize = result->buffer.len; |
705 |
|
706 if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) { |
|
707 goto except; |
|
708 } |
|
709 |
|
710 sourceSize = result->buffer->len; |
|
711 } |
739 } |
712 else { |
740 else { |
713 PyErr_SetString(PyExc_ValueError, |
741 PyErr_SetString(PyExc_ValueError, |
714 "must pass an object with a read() method or conforms to buffer protocol"); |
742 "must pass an object with a read() method or conforms to buffer protocol"); |
715 goto except; |
743 goto except; |
716 } |
744 } |
717 |
745 |
|
746 if (ensure_cctx(self)) { |
|
747 return NULL; |
|
748 } |
|
749 |
|
750 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); |
|
751 if (ZSTD_isError(zresult)) { |
|
752 PyErr_Format(ZstdError, "error setting source size: %s", |
|
753 ZSTD_getErrorName(zresult)); |
|
754 return NULL; |
|
755 } |
|
756 |
718 result->compressor = self; |
757 result->compressor = self; |
719 Py_INCREF(result->compressor); |
758 Py_INCREF(result->compressor); |
720 |
|
721 result->sourceSize = sourceSize; |
|
722 |
|
723 if (self->mtcctx) { |
|
724 if (init_mtcstream(self, sourceSize)) { |
|
725 goto except; |
|
726 } |
|
727 } |
|
728 else { |
|
729 if (0 != init_cstream(self, sourceSize)) { |
|
730 goto except; |
|
731 } |
|
732 } |
|
733 |
759 |
734 result->inSize = inSize; |
760 result->inSize = inSize; |
735 result->outSize = outSize; |
761 result->outSize = outSize; |
736 |
762 |
737 result->output.dst = PyMem_Malloc(outSize); |
763 result->output.dst = PyMem_Malloc(outSize); |
769 "An optional ``write_size`` argument is also accepted. It defines the maximum\n" |
792 "An optional ``write_size`` argument is also accepted. It defines the maximum\n" |
770 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n" |
793 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n" |
771 "for a compressor output stream.\n" |
794 "for a compressor output stream.\n" |
772 ); |
795 ); |
773 |
796 |
774 static ZstdCompressionWriter* ZstdCompressor_write_to(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
797 static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
775 static char* kwlist[] = { |
798 static char* kwlist[] = { |
776 "writer", |
799 "writer", |
777 "size", |
800 "size", |
778 "write_size", |
801 "write_size", |
779 NULL |
802 NULL |
780 }; |
803 }; |
781 |
804 |
782 PyObject* writer; |
805 PyObject* writer; |
783 ZstdCompressionWriter* result; |
806 ZstdCompressionWriter* result; |
784 Py_ssize_t sourceSize = 0; |
807 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; |
785 size_t outSize = ZSTD_CStreamOutSize(); |
808 size_t outSize = ZSTD_CStreamOutSize(); |
786 |
809 |
787 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist, |
810 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_writer", kwlist, |
788 &writer, &sourceSize, &outSize)) { |
811 &writer, &sourceSize, &outSize)) { |
789 return NULL; |
812 return NULL; |
790 } |
813 } |
791 |
814 |
792 if (!PyObject_HasAttrString(writer, "write")) { |
815 if (!PyObject_HasAttrString(writer, "write")) { |
793 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); |
816 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); |
794 return NULL; |
817 return NULL; |
795 } |
818 } |
796 |
819 |
|
820 if (ensure_cctx(self)) { |
|
821 return NULL; |
|
822 } |
|
823 |
797 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL); |
824 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL); |
798 if (!result) { |
825 if (!result) { |
799 return NULL; |
826 return NULL; |
800 } |
827 } |
801 |
828 |
866 static void compress_worker(WorkerState* state) { |
891 static void compress_worker(WorkerState* state) { |
867 Py_ssize_t inputOffset = state->startOffset; |
892 Py_ssize_t inputOffset = state->startOffset; |
868 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1; |
893 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1; |
869 Py_ssize_t currentBufferStartOffset = state->startOffset; |
894 Py_ssize_t currentBufferStartOffset = state->startOffset; |
870 size_t zresult; |
895 size_t zresult; |
871 ZSTD_parameters zparams; |
|
872 void* newDest; |
896 void* newDest; |
873 size_t allocationSize; |
897 size_t allocationSize; |
874 size_t boundSize; |
898 size_t boundSize; |
875 Py_ssize_t destOffset = 0; |
899 Py_ssize_t destOffset = 0; |
876 DataSource* sources = state->sources; |
900 DataSource* sources = state->sources; |
877 DestBuffer* destBuffer; |
901 DestBuffer* destBuffer; |
878 |
902 |
879 assert(!state->destBuffers); |
903 assert(!state->destBuffers); |
880 assert(0 == state->destCount); |
904 assert(0 == state->destCount); |
881 |
905 |
882 if (state->cParams) { |
|
883 ztopy_compression_parameters(state->cParams, &zparams.cParams); |
|
884 } |
|
885 |
|
886 zparams.fParams = state->fParams; |
|
887 |
|
888 /* |
906 /* |
889 * The total size of the compressed data is unknown until we actually |
907 * The total size of the compressed data is unknown until we actually |
890 * compress data. That means we can't pre-allocate the exact size we need. |
908 * compress data. That means we can't pre-allocate the exact size we need. |
891 * |
909 * |
892 * There is a cost to every allocation and reallocation. So, it is in our |
910 * There is a cost to every allocation and reallocation. So, it is in our |
893 * interest to minimize the number of allocations. |
911 * interest to minimize the number of allocations. |
894 * |
912 * |
895 * There is also a cost to too few allocations. If allocations are too |
913 * There is also a cost to too few allocations. If allocations are too |
896 * large they may fail. If buffers are shared and all inputs become |
914 * large they may fail. If buffers are shared and all inputs become |
1030 currentBufferStartOffset = inputOffset; |
1052 currentBufferStartOffset = inputOffset; |
1031 } |
1053 } |
1032 |
1054 |
1033 dest = (char*)destBuffer->dest + destOffset; |
1055 dest = (char*)destBuffer->dest + destOffset; |
1034 |
1056 |
1035 if (state->cdict) { |
1057 opInBuffer.src = source; |
1036 zresult = ZSTD_compress_usingCDict(state->cctx, dest, destAvailable, |
1058 opInBuffer.size = sourceSize; |
1037 source, sourceSize, state->cdict); |
1059 opInBuffer.pos = 0; |
1038 } |
1060 |
1039 else { |
1061 opOutBuffer.dst = dest; |
1040 if (!state->cParams) { |
1062 opOutBuffer.size = destAvailable; |
1041 zparams.cParams = ZSTD_getCParams(state->cLevel, sourceSize, 0); |
1063 opOutBuffer.pos = 0; |
1042 } |
1064 |
1043 |
1065 zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize); |
1044 zresult = ZSTD_compress_advanced(state->cctx, dest, destAvailable, |
|
1045 source, sourceSize, NULL, 0, zparams); |
|
1046 } |
|
1047 |
|
1048 if (ZSTD_isError(zresult)) { |
1066 if (ZSTD_isError(zresult)) { |
1049 state->error = WorkerError_zstd; |
1067 state->error = WorkerError_zstd; |
1050 state->zresult = zresult; |
1068 state->zresult = zresult; |
1051 state->errorOffset = inputOffset; |
1069 state->errorOffset = inputOffset; |
1052 break; |
1070 break; |
1053 } |
1071 } |
1054 |
1072 |
|
1073 zresult = ZSTD_compress_generic(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end); |
|
1074 if (ZSTD_isError(zresult)) { |
|
1075 state->error = WorkerError_zstd; |
|
1076 state->zresult = zresult; |
|
1077 state->errorOffset = inputOffset; |
|
1078 break; |
|
1079 } |
|
1080 else if (zresult) { |
|
1081 state->error = WorkerError_nospace; |
|
1082 state->errorOffset = inputOffset; |
|
1083 break; |
|
1084 } |
|
1085 |
1055 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset; |
1086 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset; |
1056 destBuffer->segments[inputOffset - currentBufferStartOffset].length = zresult; |
1087 destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos; |
1057 |
1088 |
1058 destOffset += zresult; |
1089 destOffset += opOutBuffer.pos; |
1059 remainingItems--; |
1090 remainingItems--; |
1060 } |
1091 } |
1061 |
1092 |
1062 if (destBuffer->destSize > destOffset) { |
1093 if (destBuffer->destSize > destOffset) { |
1063 newDest = realloc(destBuffer->dest, destOffset); |
1094 newDest = realloc(destBuffer->dest, destOffset); |
1070 destBuffer->destSize = destOffset; |
1101 destBuffer->destSize = destOffset; |
1071 } |
1102 } |
1072 } |
1103 } |
1073 |
1104 |
1074 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor, |
1105 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor, |
1075 DataSources* sources, unsigned int threadCount) { |
1106 DataSources* sources, Py_ssize_t threadCount) { |
1076 ZSTD_parameters zparams; |
|
1077 unsigned long long bytesPerWorker; |
1107 unsigned long long bytesPerWorker; |
1078 POOL_ctx* pool = NULL; |
1108 POOL_ctx* pool = NULL; |
1079 WorkerState* workerStates = NULL; |
1109 WorkerState* workerStates = NULL; |
1080 Py_ssize_t i; |
1110 Py_ssize_t i; |
1081 unsigned long long workerBytes = 0; |
1111 unsigned long long workerBytes = 0; |
1082 Py_ssize_t workerStartOffset = 0; |
1112 Py_ssize_t workerStartOffset = 0; |
1083 size_t currentThread = 0; |
1113 Py_ssize_t currentThread = 0; |
1084 int errored = 0; |
1114 int errored = 0; |
1085 Py_ssize_t segmentsCount = 0; |
1115 Py_ssize_t segmentsCount = 0; |
1086 Py_ssize_t segmentIndex; |
1116 Py_ssize_t segmentIndex; |
1087 PyObject* segmentsArg = NULL; |
1117 PyObject* segmentsArg = NULL; |
1088 ZstdBufferWithSegments* buffer; |
1118 ZstdBufferWithSegments* buffer; |
1091 assert(sources->sourcesSize > 0); |
1121 assert(sources->sourcesSize > 0); |
1092 assert(sources->totalSourceSize > 0); |
1122 assert(sources->totalSourceSize > 0); |
1093 assert(threadCount >= 1); |
1123 assert(threadCount >= 1); |
1094 |
1124 |
1095 /* More threads than inputs makes no sense. */ |
1125 /* More threads than inputs makes no sense. */ |
1096 threadCount = sources->sourcesSize < threadCount ? (unsigned int)sources->sourcesSize |
1126 threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize |
1097 : threadCount; |
1127 : threadCount; |
1098 |
1128 |
1099 /* TODO lower thread count when input size is too small and threads would add |
1129 /* TODO lower thread count when input size is too small and threads would add |
1100 overhead. */ |
1130 overhead. */ |
1101 |
|
1102 /* |
|
1103 * When dictionaries are used, parameters are derived from the size of the |
|
1104 * first element. |
|
1105 * |
|
1106 * TODO come up with a better mechanism. |
|
1107 */ |
|
1108 memset(&zparams, 0, sizeof(zparams)); |
|
1109 if (compressor->cparams) { |
|
1110 ztopy_compression_parameters(compressor->cparams, &zparams.cParams); |
|
1111 } |
|
1112 else { |
|
1113 zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, |
|
1114 sources->sources[0].sourceSize, |
|
1115 compressor->dict ? compressor->dict->dictSize : 0); |
|
1116 } |
|
1117 |
|
1118 zparams.fParams = compressor->fparams; |
|
1119 |
|
1120 if (0 != populate_cdict(compressor, &zparams)) { |
|
1121 return NULL; |
|
1122 } |
|
1123 |
1131 |
1124 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState)); |
1132 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState)); |
1125 if (NULL == workerStates) { |
1133 if (NULL == workerStates) { |
1126 PyErr_NoMemory(); |
1134 PyErr_NoMemory(); |
1127 goto finally; |
1135 goto finally; |
1138 } |
1146 } |
1139 |
1147 |
1140 bytesPerWorker = sources->totalSourceSize / threadCount; |
1148 bytesPerWorker = sources->totalSourceSize / threadCount; |
1141 |
1149 |
1142 for (i = 0; i < threadCount; i++) { |
1150 for (i = 0; i < threadCount; i++) { |
|
1151 size_t zresult; |
|
1152 |
1143 workerStates[i].cctx = ZSTD_createCCtx(); |
1153 workerStates[i].cctx = ZSTD_createCCtx(); |
1144 if (!workerStates[i].cctx) { |
1154 if (!workerStates[i].cctx) { |
1145 PyErr_NoMemory(); |
1155 PyErr_NoMemory(); |
1146 goto finally; |
1156 goto finally; |
1147 } |
1157 } |
1148 |
1158 |
1149 workerStates[i].cdict = compressor->cdict; |
1159 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx, |
1150 workerStates[i].cLevel = compressor->compressionLevel; |
1160 compressor->params); |
1151 workerStates[i].cParams = compressor->cparams; |
1161 if (ZSTD_isError(zresult)) { |
1152 workerStates[i].fParams = compressor->fparams; |
1162 PyErr_Format(ZstdError, "could not set compression parameters: %s", |
|
1163 ZSTD_getErrorName(zresult)); |
|
1164 goto finally; |
|
1165 } |
|
1166 |
|
1167 if (compressor->dict) { |
|
1168 if (compressor->dict->cdict) { |
|
1169 zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict); |
|
1170 } |
|
1171 else { |
|
1172 zresult = ZSTD_CCtx_loadDictionary_advanced( |
|
1173 workerStates[i].cctx, |
|
1174 compressor->dict->dictData, |
|
1175 compressor->dict->dictSize, |
|
1176 ZSTD_dlm_byRef, |
|
1177 compressor->dict->dictType); |
|
1178 } |
|
1179 |
|
1180 if (ZSTD_isError(zresult)) { |
|
1181 PyErr_Format(ZstdError, "could not load compression dictionary: %s", |
|
1182 ZSTD_getErrorName(zresult)); |
|
1183 goto finally; |
|
1184 } |
|
1185 |
|
1186 } |
1153 |
1187 |
1154 workerStates[i].sources = sources->sources; |
1188 workerStates[i].sources = sources->sources; |
1155 workerStates[i].sourcesSize = sources->sourcesSize; |
1189 workerStates[i].sourcesSize = sources->sourcesSize; |
1156 } |
1190 } |
1157 |
1191 |
1480 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ }, |
1528 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ }, |
1481 { "compressobj", (PyCFunction)ZstdCompressor_compressobj, |
1529 { "compressobj", (PyCFunction)ZstdCompressor_compressobj, |
1482 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, |
1530 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, |
1483 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, |
1531 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, |
1484 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, |
1532 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, |
1485 { "read_from", (PyCFunction)ZstdCompressor_read_from, |
1533 { "stream_reader", (PyCFunction)ZstdCompressor_stream_reader, |
1486 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ }, |
1534 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ }, |
1487 { "write_to", (PyCFunction)ZstdCompressor_write_to, |
1535 { "stream_writer", (PyCFunction)ZstdCompressor_stream_writer, |
1488 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ }, |
1536 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ }, |
|
1537 { "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter, |
|
1538 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ }, |
|
1539 /* TODO Remove deprecated API */ |
|
1540 { "read_from", (PyCFunction)ZstdCompressor_read_to_iter, |
|
1541 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ }, |
|
1542 /* TODO remove deprecated API */ |
|
1543 { "write_to", (PyCFunction)ZstdCompressor_stream_writer, |
|
1544 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ }, |
1489 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer, |
1545 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer, |
1490 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ }, |
1546 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ }, |
|
1547 { "memory_size", (PyCFunction)ZstdCompressor_memory_size, |
|
1548 METH_NOARGS, ZstdCompressor_memory_size__doc__ }, |
|
1549 { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression, |
|
1550 METH_NOARGS, ZstdCompressor_frame_progression__doc__ }, |
1491 { NULL, NULL } |
1551 { NULL, NULL } |
1492 }; |
1552 }; |
1493 |
1553 |
1494 PyTypeObject ZstdCompressorType = { |
1554 PyTypeObject ZstdCompressorType = { |
1495 PyVarObject_HEAD_INIT(NULL, 0) |
1555 PyVarObject_HEAD_INIT(NULL, 0) |