98 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) |
92 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) |
99 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) |
93 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) |
100 |
94 |
101 |
95 |
102 /* Function templates */ |
96 /* Function templates */ |
103 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) |
97 |
|
98 /* FSE_buildCTable_wksp() : |
|
99 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). |
|
100 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` |
|
101 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements |
|
102 */ |
|
103 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) |
104 { |
104 { |
105 U32 const tableSize = 1 << tableLog; |
105 U32 const tableSize = 1 << tableLog; |
106 U32 const tableMask = tableSize - 1; |
106 U32 const tableMask = tableSize - 1; |
107 void* const ptr = ct; |
107 void* const ptr = ct; |
108 U16* const tableU16 = ( (U16*) ptr) + 2; |
108 U16* const tableU16 = ( (U16*) ptr) + 2; |
109 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; |
109 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ; |
110 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); |
110 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); |
111 U32 const step = FSE_TABLESTEP(tableSize); |
111 U32 const step = FSE_TABLESTEP(tableSize); |
112 U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; |
112 U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; |
113 |
113 |
114 FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ |
114 FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; |
115 U32 highThreshold = tableSize-1; |
115 U32 highThreshold = tableSize-1; |
116 |
116 |
117 /* CTable header */ |
117 /* CTable header */ |
|
118 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); |
118 tableU16[-2] = (U16) tableLog; |
119 tableU16[-2] = (U16) tableLog; |
119 tableU16[-1] = (U16) maxSymbolValue; |
120 tableU16[-1] = (U16) maxSymbolValue; |
120 |
121 |
121 /* For explanations on how to distribute symbol values over the table : |
122 /* For explanations on how to distribute symbol values over the table : |
122 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ |
123 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ |
298 |
306 |
299 /*-************************************************************** |
307 /*-************************************************************** |
300 * Counting histogram |
308 * Counting histogram |
301 ****************************************************************/ |
309 ****************************************************************/ |
302 /*! FSE_count_simple |
310 /*! FSE_count_simple |
303 This function just counts byte values within `src`, |
311 This function counts byte values within `src`, and store the histogram into table `count`. |
304 and store the histogram into table `count`. |
312 It doesn't use any additional memory. |
305 This function is unsafe : it doesn't check that all values within `src` can fit into `count`. |
313 But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. |
306 For this reason, prefer using a table `count` with 256 elements. |
314 For this reason, prefer using a table `count` with 256 elements. |
307 @return : count of most numerous element |
315 @return : count of most numerous element |
308 */ |
316 */ |
309 static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, |
317 size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, |
310 const void* src, size_t srcSize) |
318 const void* src, size_t srcSize) |
311 { |
319 { |
312 const BYTE* ip = (const BYTE*)src; |
320 const BYTE* ip = (const BYTE*)src; |
313 const BYTE* const end = ip + srcSize; |
321 const BYTE* const end = ip + srcSize; |
314 unsigned maxSymbolValue = *maxSymbolValuePtr; |
322 unsigned maxSymbolValue = *maxSymbolValuePtr; |
315 unsigned max=0; |
323 unsigned max=0; |
316 |
324 |
317 |
|
318 memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); |
325 memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); |
319 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } |
326 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } |
320 |
327 |
321 while (ip<end) count[*ip++]++; |
328 while (ip<end) count[*ip++]++; |
322 |
329 |
327 |
334 |
328 return (size_t)max; |
335 return (size_t)max; |
329 } |
336 } |
330 |
337 |
331 |
338 |
332 static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, |
339 /* FSE_count_parallel_wksp() : |
|
340 * Same as FSE_count_parallel(), but using an externally provided scratch buffer. |
|
341 * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ |
|
342 static size_t FSE_count_parallel_wksp( |
|
343 unsigned* count, unsigned* maxSymbolValuePtr, |
333 const void* source, size_t sourceSize, |
344 const void* source, size_t sourceSize, |
334 unsigned checkMax) |
345 unsigned checkMax, unsigned* const workSpace) |
335 { |
346 { |
336 const BYTE* ip = (const BYTE*)source; |
347 const BYTE* ip = (const BYTE*)source; |
337 const BYTE* const iend = ip+sourceSize; |
348 const BYTE* const iend = ip+sourceSize; |
338 unsigned maxSymbolValue = *maxSymbolValuePtr; |
349 unsigned maxSymbolValue = *maxSymbolValuePtr; |
339 unsigned max=0; |
350 unsigned max=0; |
340 |
351 U32* const Counting1 = workSpace; |
341 |
352 U32* const Counting2 = Counting1 + 256; |
342 U32 Counting1[256] = { 0 }; |
353 U32* const Counting3 = Counting2 + 256; |
343 U32 Counting2[256] = { 0 }; |
354 U32* const Counting4 = Counting3 + 256; |
344 U32 Counting3[256] = { 0 }; |
355 |
345 U32 Counting4[256] = { 0 }; |
356 memset(Counting1, 0, 4*256*sizeof(unsigned)); |
346 |
357 |
347 /* safety checks */ |
358 /* safety checks */ |
348 if (!sourceSize) { |
359 if (!sourceSize) { |
349 memset(count, 0, maxSymbolValue + 1); |
360 memset(count, 0, maxSymbolValue + 1); |
350 *maxSymbolValuePtr = 0; |
361 *maxSymbolValuePtr = 0; |
386 U32 s; for (s=255; s>maxSymbolValue; s--) { |
397 U32 s; for (s=255; s>maxSymbolValue; s--) { |
387 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; |
398 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; |
388 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); |
399 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); |
389 } } |
400 } } |
390 |
401 |
391 { U32 s; for (s=0; s<=maxSymbolValue; s++) { |
402 { U32 s; for (s=0; s<=maxSymbolValue; s++) { |
392 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; |
403 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; |
393 if (count[s] > max) max = count[s]; |
404 if (count[s] > max) max = count[s]; |
394 }} |
405 } } |
395 |
406 |
396 while (!count[maxSymbolValue]) maxSymbolValue--; |
407 while (!count[maxSymbolValue]) maxSymbolValue--; |
397 *maxSymbolValuePtr = maxSymbolValue; |
408 *maxSymbolValuePtr = maxSymbolValue; |
398 return (size_t)max; |
409 return (size_t)max; |
399 } |
410 } |
400 |
411 |
|
412 /* FSE_countFast_wksp() : |
|
413 * Same as FSE_countFast(), but using an externally provided scratch buffer. |
|
414 * `workSpace` size must be table of >= `1024` unsigned */ |
|
415 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
|
416 const void* source, size_t sourceSize, unsigned* workSpace) |
|
417 { |
|
418 if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); |
|
419 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); |
|
420 } |
|
421 |
401 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ |
422 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ |
402 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, |
423 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, |
403 const void* source, size_t sourceSize) |
424 const void* source, size_t sourceSize) |
404 { |
425 { |
405 if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); |
426 unsigned tmpCounters[1024]; |
406 return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0); |
427 return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); |
|
428 } |
|
429 |
|
430 /* FSE_count_wksp() : |
|
431 * Same as FSE_count(), but using an externally provided scratch buffer. |
|
432 * `workSpace` size must be table of >= `1024` unsigned */ |
|
433 size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, |
|
434 const void* source, size_t sourceSize, unsigned* workSpace) |
|
435 { |
|
436 if (*maxSymbolValuePtr < 255) |
|
437 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); |
|
438 *maxSymbolValuePtr = 255; |
|
439 return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); |
407 } |
440 } |
408 |
441 |
409 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, |
442 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, |
410 const void* source, size_t sourceSize) |
443 const void* src, size_t srcSize) |
411 { |
444 { |
412 if (*maxSymbolValuePtr <255) |
445 unsigned tmpCounters[1024]; |
413 return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1); |
446 return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); |
414 *maxSymbolValuePtr = 255; |
|
415 return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize); |
|
416 } |
447 } |
417 |
448 |
418 |
449 |
419 |
450 |
420 /*-************************************************************** |
451 /*-************************************************************** |
739 |
761 |
740 size_t FSE_compress_usingCTable (void* dst, size_t dstSize, |
762 size_t FSE_compress_usingCTable (void* dst, size_t dstSize, |
741 const void* src, size_t srcSize, |
763 const void* src, size_t srcSize, |
742 const FSE_CTable* ct) |
764 const FSE_CTable* ct) |
743 { |
765 { |
744 const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); |
766 unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); |
745 |
767 |
746 if (fast) |
768 if (fast) |
747 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); |
769 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); |
748 else |
770 else |
749 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); |
771 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); |
750 } |
772 } |
751 |
773 |
752 |
774 |
753 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } |
775 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } |
754 |
776 |
755 size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) |
777 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f |
756 { |
778 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } |
757 const BYTE* const istart = (const BYTE*) src; |
779 |
758 const BYTE* ip = istart; |
780 /* FSE_compress_wksp() : |
759 |
781 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). |
|
782 * `wkspSize` size must be `(1<<tableLog)`. |
|
783 */ |
|
784 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) |
|
785 { |
760 BYTE* const ostart = (BYTE*) dst; |
786 BYTE* const ostart = (BYTE*) dst; |
761 BYTE* op = ostart; |
787 BYTE* op = ostart; |
762 BYTE* const oend = ostart + dstSize; |
788 BYTE* const oend = ostart + dstSize; |
763 |
789 |
764 U32 count[FSE_MAX_SYMBOL_VALUE+1]; |
790 U32 count[FSE_MAX_SYMBOL_VALUE+1]; |
765 S16 norm[FSE_MAX_SYMBOL_VALUE+1]; |
791 S16 norm[FSE_MAX_SYMBOL_VALUE+1]; |
766 CTable_max_t ct; |
792 FSE_CTable* CTable = (FSE_CTable*)workSpace; |
767 size_t errorCode; |
793 size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue); |
|
794 void* scratchBuffer = (void*)(CTable + CTableSize); |
|
795 size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable)); |
768 |
796 |
769 /* init conditions */ |
797 /* init conditions */ |
770 if (srcSize <= 1) return 0; /* Uncompressible */ |
798 if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); |
|
799 if (srcSize <= 1) return 0; /* Not compressible */ |
771 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; |
800 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; |
772 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; |
801 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; |
773 |
802 |
774 /* Scan input and build symbol stats */ |
803 /* Scan input and build symbol stats */ |
775 errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize); |
804 { CHECK_V_F(maxCount, FSE_count(count, &maxSymbolValue, src, srcSize) ); |
776 if (FSE_isError(errorCode)) return errorCode; |
805 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ |
777 if (errorCode == srcSize) return 1; |
806 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ |
778 if (errorCode == 1) return 0; /* each symbol only present once */ |
807 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ |
779 if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ |
808 } |
780 |
809 |
781 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); |
810 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); |
782 errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue); |
811 CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); |
783 if (FSE_isError(errorCode)) return errorCode; |
|
784 |
812 |
785 /* Write table description header */ |
813 /* Write table description header */ |
786 errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog); |
814 { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); |
787 if (FSE_isError(errorCode)) return errorCode; |
815 op += nc_err; |
788 op += errorCode; |
816 } |
789 |
817 |
790 /* Compress */ |
818 /* Compress */ |
791 errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog); |
819 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); |
792 if (FSE_isError(errorCode)) return errorCode; |
820 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); |
793 errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct); |
821 if (cSize == 0) return 0; /* not enough space for compressed data */ |
794 if (errorCode == 0) return 0; /* not enough space for compressed data */ |
822 op += cSize; |
795 op += errorCode; |
823 } |
796 |
824 |
797 /* check compressibility */ |
825 /* check compressibility */ |
798 if ( (size_t)(op-ostart) >= srcSize-1 ) |
826 if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; |
799 return 0; |
|
800 |
827 |
801 return op-ostart; |
828 return op-ostart; |
802 } |
829 } |
803 |
830 |
804 size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize) |
831 typedef struct { |
805 { |
832 FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; |
806 return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); |
833 BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; |
|
834 } fseWkspMax_t; |
|
835 |
|
836 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) |
|
837 { |
|
838 fseWkspMax_t scratchBuffer; |
|
839 FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ |
|
840 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); |
|
841 return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); |
|
842 } |
|
843 |
|
844 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
|
845 { |
|
846 return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); |
807 } |
847 } |
808 |
848 |
809 |
849 |
810 #endif /* FSE_COMMONDEFS_ONLY */ |
850 #endif /* FSE_COMMONDEFS_ONLY */ |