31 /*-************************************* |
31 /*-************************************* |
32 * Constants |
32 * Constants |
33 ***************************************/ |
33 ***************************************/ |
34 #define kSearchStrength 8 |
34 #define kSearchStrength 8 |
35 #define HASH_READ_SIZE 8 |
35 #define HASH_READ_SIZE 8 |
36 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 now means "unsorted". |
36 #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". |
37 It could be confused for a real successor at index "1", if sorted as larger than its predecessor. |
37 It could be confused for a real successor at index "1", if sorted as larger than its predecessor. |
38 It's not a big deal though : candidate will just be sorted again. |
38 It's not a big deal though : candidate will just be sorted again. |
39 Additionnally, candidate position 1 will be lost. |
39 Additionally, candidate position 1 will be lost. |
40 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. |
40 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. |
41 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy |
41 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. |
42 Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ |
42 This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ |
43 |
43 |
44 |
44 |
45 /*-************************************* |
45 /*-************************************* |
46 * Context memory management |
46 * Context memory management |
47 ***************************************/ |
47 ***************************************/ |
117 typedef struct { |
126 typedef struct { |
118 BYTE const* nextSrc; /* next block here to continue on current prefix */ |
127 BYTE const* nextSrc; /* next block here to continue on current prefix */ |
119 BYTE const* base; /* All regular indexes relative to this position */ |
128 BYTE const* base; /* All regular indexes relative to this position */ |
120 BYTE const* dictBase; /* extDict indexes relative to this position */ |
129 BYTE const* dictBase; /* extDict indexes relative to this position */ |
121 U32 dictLimit; /* below that point, need extDict */ |
130 U32 dictLimit; /* below that point, need extDict */ |
122 U32 lowLimit; /* below that point, no more data */ |
131 U32 lowLimit; /* below that point, no more valid data */ |
123 } ZSTD_window_t; |
132 } ZSTD_window_t; |
124 |
133 |
125 typedef struct ZSTD_matchState_t ZSTD_matchState_t; |
134 typedef struct ZSTD_matchState_t ZSTD_matchState_t; |
126 struct ZSTD_matchState_t { |
135 struct ZSTD_matchState_t { |
127 ZSTD_window_t window; /* State for window round buffer management */ |
136 ZSTD_window_t window; /* State for window round buffer management */ |
128 U32 loadedDictEnd; /* index of end of dictionary */ |
137 U32 loadedDictEnd; /* index of end of dictionary, within context's referential. |
|
138 * When loadedDictEnd != 0, a dictionary is in use, and still valid. |
|
139 * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. |
|
140 * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). |
|
141 * When dict referential is copied into active context (i.e. not attached), |
|
142 * loadedDictEnd == dictSize, since referential starts from zero. |
|
143 */ |
129 U32 nextToUpdate; /* index from which to continue table update */ |
144 U32 nextToUpdate; /* index from which to continue table update */ |
130 U32 nextToUpdate3; /* index from which to continue table update */ |
145 U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ |
131 U32 hashLog3; /* dispatch table : larger == faster, more memory */ |
|
132 U32* hashTable; |
146 U32* hashTable; |
133 U32* hashTable3; |
147 U32* hashTable3; |
134 U32* chainTable; |
148 U32* chainTable; |
135 optState_t opt; /* optimal parser state */ |
149 optState_t opt; /* optimal parser state */ |
136 const ZSTD_matchState_t * dictMatchState; |
150 const ZSTD_matchState_t* dictMatchState; |
137 ZSTD_compressionParameters cParams; |
151 ZSTD_compressionParameters cParams; |
138 }; |
152 }; |
139 |
153 |
140 typedef struct { |
154 typedef struct { |
141 ZSTD_compressedBlockState_t* prevCBlock; |
155 ZSTD_compressedBlockState_t* prevCBlock; |
291 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, |
309 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, |
292 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, |
310 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, |
293 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; |
311 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; |
294 static const U32 ML_deltaCode = 36; |
312 static const U32 ML_deltaCode = 36; |
295 return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; |
313 return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; |
|
314 } |
|
315 |
|
316 /* ZSTD_cParam_withinBounds: |
|
317 * @return 1 if value is within cParam bounds, |
|
318 * 0 otherwise */ |
|
319 MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) |
|
320 { |
|
321 ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); |
|
322 if (ZSTD_isError(bounds.error)) return 0; |
|
323 if (value < bounds.lowerBound) return 0; |
|
324 if (value > bounds.upperBound) return 0; |
|
325 return 1; |
|
326 } |
|
327 |
|
328 /* ZSTD_minGain() : |
|
329 * minimum compression required |
|
330 * to generate a compress block or a compressed literals section. |
|
331 * note : use same formula for both situations */ |
|
332 MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) |
|
333 { |
|
334 U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; |
|
335 ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); |
|
336 assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); |
|
337 return (srcSize >> minlog) + 2; |
296 } |
338 } |
297 |
339 |
298 /*! ZSTD_storeSeq() : |
340 /*! ZSTD_storeSeq() : |
299 * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. |
341 * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. |
300 * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). |
342 * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). |
663 /** |
708 /** |
664 * ZSTD_window_enforceMaxDist(): |
709 * ZSTD_window_enforceMaxDist(): |
665 * Updates lowLimit so that: |
710 * Updates lowLimit so that: |
666 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd |
711 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd |
667 * |
712 * |
668 * This allows a simple check that index >= lowLimit to see if index is valid. |
713 * It ensures index is valid as long as index >= lowLimit. |
669 * This must be called before a block compression call, with srcEnd as the block |
714 * This must be called before a block compression call. |
670 * source end. |
|
671 * |
715 * |
672 * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. |
716 * loadedDictEnd is only defined if a dictionary is in use for current compression. |
673 * This is because dictionaries are allowed to be referenced as long as the last |
717 * As the name implies, loadedDictEnd represents the index at end of dictionary. |
674 * byte of the dictionary is in the window, but once they are out of range, |
718 * The value lies within context's referential, it can be directly compared to blockEndIdx. |
675 * they cannot be referenced. If loadedDictEndPtr is NULL, we use |
|
676 * loadedDictEnd == 0. |
|
677 * |
719 * |
678 * In normal dict mode, the dict is between lowLimit and dictLimit. In |
720 * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. |
679 * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary |
721 * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. |
680 * is below them. forceWindow and dictMatchState are therefore incompatible. |
722 * This is because dictionaries are allowed to be referenced fully |
|
723 * as long as the last byte of the dictionary is in the window. |
|
724 * Once input has progressed beyond window size, dictionary cannot be referenced anymore. |
|
725 * |
|
726 * In normal dict mode, the dictionary lies between lowLimit and dictLimit. |
|
727 * In dictMatchState mode, lowLimit and dictLimit are the same, |
|
728 * and the dictionary is below them. |
|
729 * forceWindow and dictMatchState are therefore incompatible. |
681 */ |
730 */ |
682 MEM_STATIC void |
731 MEM_STATIC void |
683 ZSTD_window_enforceMaxDist(ZSTD_window_t* window, |
732 ZSTD_window_enforceMaxDist(ZSTD_window_t* window, |
684 void const* srcEnd, |
733 const void* blockEnd, |
685 U32 maxDist, |
734 U32 maxDist, |
686 U32* loadedDictEndPtr, |
735 U32* loadedDictEndPtr, |
687 const ZSTD_matchState_t** dictMatchStatePtr) |
736 const ZSTD_matchState_t** dictMatchStatePtr) |
688 { |
737 { |
689 U32 const blockEndIdx = (U32)((BYTE const*)srcEnd - window->base); |
738 U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); |
690 U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; |
739 U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; |
691 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u", |
740 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", |
692 (unsigned)blockEndIdx, (unsigned)maxDist); |
741 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); |
|
742 |
|
743 /* - When there is no dictionary : loadedDictEnd == 0. |
|
744 In which case, the test (blockEndIdx > maxDist) is merely to avoid |
|
745 overflowing next operation `newLowLimit = blockEndIdx - maxDist`. |
|
746 - When there is a standard dictionary : |
|
747 Index referential is copied from the dictionary, |
|
748 which means it starts from 0. |
|
749 In which case, loadedDictEnd == dictSize, |
|
750 and it makes sense to compare `blockEndIdx > maxDist + dictSize` |
|
751 since `blockEndIdx` also starts from zero. |
|
752 - When there is an attached dictionary : |
|
753 loadedDictEnd is expressed within the referential of the context, |
|
754 so it can be directly compared against blockEndIdx. |
|
755 */ |
693 if (blockEndIdx > maxDist + loadedDictEnd) { |
756 if (blockEndIdx > maxDist + loadedDictEnd) { |
694 U32 const newLowLimit = blockEndIdx - maxDist; |
757 U32 const newLowLimit = blockEndIdx - maxDist; |
695 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; |
758 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; |
696 if (window->dictLimit < window->lowLimit) { |
759 if (window->dictLimit < window->lowLimit) { |
697 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", |
760 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", |
698 (unsigned)window->dictLimit, (unsigned)window->lowLimit); |
761 (unsigned)window->dictLimit, (unsigned)window->lowLimit); |
699 window->dictLimit = window->lowLimit; |
762 window->dictLimit = window->lowLimit; |
700 } |
763 } |
701 if (loadedDictEndPtr) |
764 /* On reaching window size, dictionaries are invalidated */ |
|
765 if (loadedDictEndPtr) *loadedDictEndPtr = 0; |
|
766 if (dictMatchStatePtr) *dictMatchStatePtr = NULL; |
|
767 } |
|
768 } |
|
769 |
|
770 /* Similar to ZSTD_window_enforceMaxDist(), |
|
771 * but only invalidates dictionary |
|
772 * when input progresses beyond window size. |
|
773 * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) |
|
774 * loadedDictEnd uses same referential as window->base |
|
775 * maxDist is the window size */ |
|
776 MEM_STATIC void |
|
777 ZSTD_checkDictValidity(const ZSTD_window_t* window, |
|
778 const void* blockEnd, |
|
779 U32 maxDist, |
|
780 U32* loadedDictEndPtr, |
|
781 const ZSTD_matchState_t** dictMatchStatePtr) |
|
782 { |
|
783 assert(loadedDictEndPtr != NULL); |
|
784 assert(dictMatchStatePtr != NULL); |
|
785 { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); |
|
786 U32 const loadedDictEnd = *loadedDictEndPtr; |
|
787 DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", |
|
788 (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); |
|
789 assert(blockEndIdx >= loadedDictEnd); |
|
790 |
|
791 if (blockEndIdx > loadedDictEnd + maxDist) { |
|
792 /* On reaching window size, dictionaries are invalidated. |
|
793 * For simplification, if window size is reached anywhere within next block, |
|
794 * the dictionary is invalidated for the full block. |
|
795 */ |
|
796 DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); |
702 *loadedDictEndPtr = 0; |
797 *loadedDictEndPtr = 0; |
703 if (dictMatchStatePtr) |
|
704 *dictMatchStatePtr = NULL; |
798 *dictMatchStatePtr = NULL; |
705 } |
799 } else { |
|
800 if (*loadedDictEndPtr != 0) { |
|
801 DEBUGLOG(6, "dictionary considered valid for current block"); |
|
802 } } } |
706 } |
803 } |
707 |
804 |
708 /** |
805 /** |
709 * ZSTD_window_update(): |
806 * ZSTD_window_update(): |
710 * Updates the window by appending [src, src + srcSize) to the window. |
807 * Updates the window by appending [src, src + srcSize) to the window. |