13 |
13 |
14 #ifndef ZSTD_OPT_H_91842398743 |
14 #ifndef ZSTD_OPT_H_91842398743 |
15 #define ZSTD_OPT_H_91842398743 |
15 #define ZSTD_OPT_H_91842398743 |
16 |
16 |
17 |
17 |
18 #define ZSTD_FREQ_DIV 5 |
18 #define ZSTD_LITFREQ_ADD 2 |
19 #define ZSTD_MAX_PRICE (1<<30) |
19 #define ZSTD_FREQ_DIV 4 |
|
20 #define ZSTD_MAX_PRICE (1<<30) |
20 |
21 |
21 /*-************************************* |
22 /*-************************************* |
22 * Price functions for optimal parser |
23 * Price functions for optimal parser |
23 ***************************************/ |
24 ***************************************/ |
24 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) |
25 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr) |
29 ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1); |
30 ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1); |
30 ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); |
31 ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum)); |
31 } |
32 } |
32 |
33 |
33 |
34 |
34 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) |
35 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize) |
35 { |
36 { |
36 unsigned u; |
37 unsigned u; |
37 |
38 |
38 ssPtr->cachedLiterals = NULL; |
39 ssPtr->cachedLiterals = NULL; |
39 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; |
40 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; |
|
41 ssPtr->staticPrices = 0; |
40 |
42 |
41 if (ssPtr->litLengthSum == 0) { |
43 if (ssPtr->litLengthSum == 0) { |
42 ssPtr->litSum = (2<<Litbits); |
44 if (srcSize <= 1024) ssPtr->staticPrices = 1; |
|
45 |
|
46 for (u=0; u<=MaxLit; u++) |
|
47 ssPtr->litFreq[u] = 0; |
|
48 for (u=0; u<srcSize; u++) |
|
49 ssPtr->litFreq[src[u]]++; |
|
50 |
|
51 ssPtr->litSum = 0; |
43 ssPtr->litLengthSum = MaxLL+1; |
52 ssPtr->litLengthSum = MaxLL+1; |
44 ssPtr->matchLengthSum = MaxML+1; |
53 ssPtr->matchLengthSum = MaxML+1; |
45 ssPtr->offCodeSum = (MaxOff+1); |
54 ssPtr->offCodeSum = (MaxOff+1); |
46 ssPtr->matchSum = (2<<Litbits); |
55 ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits); |
47 |
56 |
48 for (u=0; u<=MaxLit; u++) |
57 for (u=0; u<=MaxLit; u++) { |
49 ssPtr->litFreq[u] = 2; |
58 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); |
|
59 ssPtr->litSum += ssPtr->litFreq[u]; |
|
60 } |
50 for (u=0; u<=MaxLL; u++) |
61 for (u=0; u<=MaxLL; u++) |
51 ssPtr->litLengthFreq[u] = 1; |
62 ssPtr->litLengthFreq[u] = 1; |
52 for (u=0; u<=MaxML; u++) |
63 for (u=0; u<=MaxML; u++) |
53 ssPtr->matchLengthFreq[u] = 1; |
64 ssPtr->matchLengthFreq[u] = 1; |
54 for (u=0; u<=MaxOff; u++) |
65 for (u=0; u<=MaxOff; u++) |
59 ssPtr->offCodeSum = 0; |
70 ssPtr->offCodeSum = 0; |
60 ssPtr->matchSum = 0; |
71 ssPtr->matchSum = 0; |
61 ssPtr->litSum = 0; |
72 ssPtr->litSum = 0; |
62 |
73 |
63 for (u=0; u<=MaxLit; u++) { |
74 for (u=0; u<=MaxLit; u++) { |
64 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); |
75 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); |
65 ssPtr->litSum += ssPtr->litFreq[u]; |
76 ssPtr->litSum += ssPtr->litFreq[u]; |
66 } |
77 } |
67 for (u=0; u<=MaxLL; u++) { |
78 for (u=0; u<=MaxLL; u++) { |
68 ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); |
79 ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); |
69 ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; |
80 ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; |
70 } |
81 } |
71 for (u=0; u<=MaxML; u++) { |
82 for (u=0; u<=MaxML; u++) { |
72 ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); |
83 ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV); |
73 ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; |
84 ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; |
74 ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); |
85 ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); |
75 } |
86 } |
|
87 ssPtr->matchSum *= ZSTD_LITFREQ_ADD; |
76 for (u=0; u<=MaxOff; u++) { |
88 for (u=0; u<=MaxOff; u++) { |
77 ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); |
89 ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); |
78 ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; |
90 ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; |
79 } |
91 } |
80 } |
92 } |
122 |
137 |
123 |
138 |
124 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) |
139 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) |
125 { |
140 { |
126 /* offset */ |
141 /* offset */ |
|
142 U32 price; |
127 BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); |
143 BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); |
128 U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); |
144 |
129 |
145 if (seqStorePtr->staticPrices) |
|
146 return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; |
|
147 |
|
148 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); |
130 if (!ultra && offCode >= 20) price += (offCode-19)*2; |
149 if (!ultra && offCode >= 20) price += (offCode-19)*2; |
131 |
150 |
132 /* match Length */ |
151 /* match Length */ |
133 { const BYTE ML_deltaCode = 36; |
152 { const BYTE ML_deltaCode = 36; |
134 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; |
153 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; |
142 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) |
161 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength) |
143 { |
162 { |
144 U32 u; |
163 U32 u; |
145 |
164 |
146 /* literals */ |
165 /* literals */ |
147 seqStorePtr->litSum += litLength; |
166 seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD; |
148 for (u=0; u < litLength; u++) |
167 for (u=0; u < litLength; u++) |
149 seqStorePtr->litFreq[literals[u]]++; |
168 seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; |
150 |
169 |
151 /* literal Length */ |
170 /* literal Length */ |
152 { const BYTE LL_deltaCode = 19; |
171 { const BYTE LL_deltaCode = 19; |
153 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; |
172 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; |
154 seqStorePtr->litLengthFreq[llCode]++; |
173 seqStorePtr->litLengthFreq[llCode]++; |
399 const BYTE* inr; |
418 const BYTE* inr; |
400 U32 offset, rep[ZSTD_REP_NUM]; |
419 U32 offset, rep[ZSTD_REP_NUM]; |
401 |
420 |
402 /* init */ |
421 /* init */ |
403 ctx->nextToUpdate3 = ctx->nextToUpdate; |
422 ctx->nextToUpdate3 = ctx->nextToUpdate; |
404 ZSTD_rescaleFreqs(seqStorePtr); |
423 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); |
405 ip += (ip==prefixStart); |
424 ip += (ip==prefixStart); |
406 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } |
425 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } |
407 |
426 |
408 /* Match Loop */ |
427 /* Match Loop */ |
409 while (ip < ilimit) { |
428 while (ip < ilimit) { |
414 litlen = (U32)(ip - anchor); |
433 litlen = (U32)(ip - anchor); |
415 |
434 |
416 /* check repCode */ |
435 /* check repCode */ |
417 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); |
436 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); |
418 for (i=(ip == anchor); i<last_i; i++) { |
437 for (i=(ip == anchor); i<last_i; i++) { |
419 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i]; |
438 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; |
420 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart)) |
439 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart)) |
421 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) { |
440 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) { |
422 mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; |
441 mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; |
423 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { |
442 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { |
424 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; |
443 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1; |
499 } |
518 } |
500 |
519 |
501 best_mlen = minMatch; |
520 best_mlen = minMatch; |
502 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); |
521 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); |
503 for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */ |
522 for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */ |
504 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; |
523 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; |
505 if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart)) |
524 if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart)) |
506 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) { |
525 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) { |
507 mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; |
526 mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; |
508 |
527 |
509 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { |
528 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { |
654 /* init */ |
673 /* init */ |
655 U32 offset, rep[ZSTD_REP_NUM]; |
674 U32 offset, rep[ZSTD_REP_NUM]; |
656 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } |
675 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } |
657 |
676 |
658 ctx->nextToUpdate3 = ctx->nextToUpdate; |
677 ctx->nextToUpdate3 = ctx->nextToUpdate; |
659 ZSTD_rescaleFreqs(seqStorePtr); |
678 ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); |
660 ip += (ip==prefixStart); |
679 ip += (ip==prefixStart); |
661 |
680 |
662 /* Match Loop */ |
681 /* Match Loop */ |
663 while (ip < ilimit) { |
682 while (ip < ilimit) { |
664 U32 cur, match_num, last_pos, litlen, price; |
683 U32 cur, match_num, last_pos, litlen, price; |
669 opt[0].litlen = (U32)(ip - anchor); |
688 opt[0].litlen = (U32)(ip - anchor); |
670 |
689 |
671 /* check repCode */ |
690 /* check repCode */ |
672 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); |
691 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); |
673 for (i = (ip==anchor); i<last_i; i++) { |
692 for (i = (ip==anchor); i<last_i; i++) { |
674 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i]; |
693 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; |
675 const U32 repIndex = (U32)(current - repCur); |
694 const U32 repIndex = (U32)(current - repCur); |
676 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; |
695 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; |
677 const BYTE* const repMatch = repBase + repIndex; |
696 const BYTE* const repMatch = repBase + repIndex; |
678 if ( (repCur > 0 && repCur <= (S32)current) |
697 if ( (repCur > 0 && repCur <= (S32)current) |
679 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ |
698 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ |
765 } |
784 } |
766 |
785 |
767 best_mlen = minMatch; |
786 best_mlen = minMatch; |
768 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); |
787 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); |
769 for (i = (mlen != 1); i<last_i; i++) { |
788 for (i = (mlen != 1); i<last_i; i++) { |
770 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; |
789 const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; |
771 const U32 repIndex = (U32)(current+cur - repCur); |
790 const U32 repIndex = (U32)(current+cur - repCur); |
772 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; |
791 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; |
773 const BYTE* const repMatch = repBase + repIndex; |
792 const BYTE* const repMatch = repBase + repIndex; |
774 if ( (repCur > 0 && repCur <= (S32)(current+cur)) |
793 if ( (repCur > 0 && repCur <= (S32)(current+cur)) |
775 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ |
794 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */ |