Commit | Line | Data |
---|---|---|
e0c1b49f NT |
1 | /* ****************************************************************** |
2 | * FSE : Finite State Entropy decoder | |
3 | * Copyright (c) Yann Collet, Facebook, Inc. | |
4 | * | |
5 | * You can contact the author at : | |
6 | * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | |
7 | * - Public forum : https://groups.google.com/forum/#!forum/lz4c | |
8 | * | |
9 | * This source code is licensed under both the BSD-style license (found in the | |
10 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
11 | * in the COPYING file in the root directory of this source tree). | |
12 | * You may select, at your option, one of the above-listed licenses. | |
13 | ****************************************************************** */ | |
14 | ||
15 | ||
16 | /* ************************************************************** | |
17 | * Includes | |
18 | ****************************************************************/ | |
19 | #include "debug.h" /* assert */ | |
20 | #include "bitstream.h" | |
21 | #include "compiler.h" | |
22 | #define FSE_STATIC_LINKING_ONLY | |
23 | #include "fse.h" | |
24 | #include "error_private.h" | |
25 | #define ZSTD_DEPS_NEED_MALLOC | |
26 | #include "zstd_deps.h" | |
27 | ||
28 | ||
29 | /* ************************************************************** | |
30 | * Error Management | |
31 | ****************************************************************/ | |
32 | #define FSE_isError ERR_isError | |
33 | #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ | |
34 | ||
35 | ||
36 | /* ************************************************************** | |
37 | * Templates | |
38 | ****************************************************************/ | |
39 | /* | |
40 | designed to be included | |
41 | for type-specific functions (template emulation in C) | |
42 | Objective is to write these functions only once, for improved maintenance | |
43 | */ | |
44 | ||
45 | /* safety checks */ | |
46 | #ifndef FSE_FUNCTION_EXTENSION | |
47 | # error "FSE_FUNCTION_EXTENSION must be defined" | |
48 | #endif | |
49 | #ifndef FSE_FUNCTION_TYPE | |
50 | # error "FSE_FUNCTION_TYPE must be defined" | |
51 | #endif | |
52 | ||
53 | /* Function names */ | |
54 | #define FSE_CAT(X,Y) X##Y | |
55 | #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) | |
56 | #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) | |
57 | ||
58 | ||
59 | /* Function templates */ | |
60 | FSE_DTable* FSE_createDTable (unsigned tableLog) | |
61 | { | |
62 | if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; | |
63 | return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); | |
64 | } | |
65 | ||
66 | void FSE_freeDTable (FSE_DTable* dt) | |
67 | { | |
68 | ZSTD_free(dt); | |
69 | } | |
70 | ||
71 | static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | |
72 | { | |
73 | void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ | |
74 | FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); | |
75 | U16* symbolNext = (U16*)workSpace; | |
76 | BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); | |
77 | ||
78 | U32 const maxSV1 = maxSymbolValue + 1; | |
79 | U32 const tableSize = 1 << tableLog; | |
80 | U32 highThreshold = tableSize-1; | |
81 | ||
82 | /* Sanity Checks */ | |
83 | if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); | |
84 | if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); | |
85 | if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); | |
86 | ||
87 | /* Init, lay down lowprob symbols */ | |
88 | { FSE_DTableHeader DTableH; | |
89 | DTableH.tableLog = (U16)tableLog; | |
90 | DTableH.fastMode = 1; | |
91 | { S16 const largeLimit= (S16)(1 << (tableLog-1)); | |
92 | U32 s; | |
93 | for (s=0; s<maxSV1; s++) { | |
94 | if (normalizedCounter[s]==-1) { | |
95 | tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; | |
96 | symbolNext[s] = 1; | |
97 | } else { | |
98 | if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; | |
99 | symbolNext[s] = normalizedCounter[s]; | |
100 | } } } | |
101 | ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); | |
102 | } | |
103 | ||
104 | /* Spread symbols */ | |
105 | if (highThreshold == tableSize - 1) { | |
106 | size_t const tableMask = tableSize-1; | |
107 | size_t const step = FSE_TABLESTEP(tableSize); | |
108 | /* First lay down the symbols in order. | |
109 | * We use a uint64_t to lay down 8 bytes at a time. This reduces branch | |
110 | * misses since small blocks generally have small table logs, so nearly | |
111 | * all symbols have counts <= 8. We ensure we have 8 bytes at the end of | |
112 | * our buffer to handle the over-write. | |
113 | */ | |
114 | { | |
115 | U64 const add = 0x0101010101010101ull; | |
116 | size_t pos = 0; | |
117 | U64 sv = 0; | |
118 | U32 s; | |
119 | for (s=0; s<maxSV1; ++s, sv += add) { | |
120 | int i; | |
121 | int const n = normalizedCounter[s]; | |
122 | MEM_write64(spread + pos, sv); | |
123 | for (i = 8; i < n; i += 8) { | |
124 | MEM_write64(spread + pos + i, sv); | |
125 | } | |
126 | pos += n; | |
127 | } | |
128 | } | |
129 | /* Now we spread those positions across the table. | |
130 | * The benefit of doing it in two stages is that we avoid the the | |
131 | * variable size inner loop, which caused lots of branch misses. | |
132 | * Now we can run through all the positions without any branch misses. | |
133 | * We unroll the loop twice, since that is what emperically worked best. | |
134 | */ | |
135 | { | |
136 | size_t position = 0; | |
137 | size_t s; | |
138 | size_t const unroll = 2; | |
139 | assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */ | |
140 | for (s = 0; s < (size_t)tableSize; s += unroll) { | |
141 | size_t u; | |
142 | for (u = 0; u < unroll; ++u) { | |
143 | size_t const uPosition = (position + (u * step)) & tableMask; | |
144 | tableDecode[uPosition].symbol = spread[s + u]; | |
145 | } | |
146 | position = (position + (unroll * step)) & tableMask; | |
147 | } | |
148 | assert(position == 0); | |
149 | } | |
150 | } else { | |
151 | U32 const tableMask = tableSize-1; | |
152 | U32 const step = FSE_TABLESTEP(tableSize); | |
153 | U32 s, position = 0; | |
154 | for (s=0; s<maxSV1; s++) { | |
155 | int i; | |
156 | for (i=0; i<normalizedCounter[s]; i++) { | |
157 | tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; | |
158 | position = (position + step) & tableMask; | |
159 | while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */ | |
160 | } } | |
161 | if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ | |
162 | } | |
163 | ||
164 | /* Build Decoding table */ | |
165 | { U32 u; | |
166 | for (u=0; u<tableSize; u++) { | |
167 | FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); | |
168 | U32 const nextState = symbolNext[symbol]++; | |
169 | tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) ); | |
170 | tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize); | |
171 | } } | |
172 | ||
173 | return 0; | |
174 | } | |
175 | ||
176 | size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | |
177 | { | |
178 | return FSE_buildDTable_internal(dt, normalizedCounter, maxSymbolValue, tableLog, workSpace, wkspSize); | |
179 | } | |
180 | ||
181 | ||
182 | #ifndef FSE_COMMONDEFS_ONLY | |
183 | ||
184 | /*-******************************************************* | |
185 | * Decompression (Byte symbols) | |
186 | *********************************************************/ | |
187 | size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue) | |
188 | { | |
189 | void* ptr = dt; | |
190 | FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; | |
191 | void* dPtr = dt + 1; | |
192 | FSE_decode_t* const cell = (FSE_decode_t*)dPtr; | |
193 | ||
194 | DTableH->tableLog = 0; | |
195 | DTableH->fastMode = 0; | |
196 | ||
197 | cell->newState = 0; | |
198 | cell->symbol = symbolValue; | |
199 | cell->nbBits = 0; | |
200 | ||
201 | return 0; | |
202 | } | |
203 | ||
204 | ||
205 | size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) | |
206 | { | |
207 | void* ptr = dt; | |
208 | FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; | |
209 | void* dPtr = dt + 1; | |
210 | FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; | |
211 | const unsigned tableSize = 1 << nbBits; | |
212 | const unsigned tableMask = tableSize - 1; | |
213 | const unsigned maxSV1 = tableMask+1; | |
214 | unsigned s; | |
215 | ||
216 | /* Sanity checks */ | |
217 | if (nbBits < 1) return ERROR(GENERIC); /* min size */ | |
218 | ||
219 | /* Build Decoding Table */ | |
220 | DTableH->tableLog = (U16)nbBits; | |
221 | DTableH->fastMode = 1; | |
222 | for (s=0; s<maxSV1; s++) { | |
223 | dinfo[s].newState = 0; | |
224 | dinfo[s].symbol = (BYTE)s; | |
225 | dinfo[s].nbBits = (BYTE)nbBits; | |
226 | } | |
227 | ||
228 | return 0; | |
229 | } | |
230 | ||
231 | FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic( | |
232 | void* dst, size_t maxDstSize, | |
233 | const void* cSrc, size_t cSrcSize, | |
234 | const FSE_DTable* dt, const unsigned fast) | |
235 | { | |
236 | BYTE* const ostart = (BYTE*) dst; | |
237 | BYTE* op = ostart; | |
238 | BYTE* const omax = op + maxDstSize; | |
239 | BYTE* const olimit = omax-3; | |
240 | ||
241 | BIT_DStream_t bitD; | |
242 | FSE_DState_t state1; | |
243 | FSE_DState_t state2; | |
244 | ||
245 | /* Init */ | |
246 | CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize)); | |
247 | ||
248 | FSE_initDState(&state1, &bitD, dt); | |
249 | FSE_initDState(&state2, &bitD, dt); | |
250 | ||
251 | #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) | |
252 | ||
253 | /* 4 symbols per loop */ | |
254 | for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) { | |
255 | op[0] = FSE_GETSYMBOL(&state1); | |
256 | ||
257 | if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ | |
258 | BIT_reloadDStream(&bitD); | |
259 | ||
260 | op[1] = FSE_GETSYMBOL(&state2); | |
261 | ||
262 | if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ | |
263 | { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } | |
264 | ||
265 | op[2] = FSE_GETSYMBOL(&state1); | |
266 | ||
267 | if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ | |
268 | BIT_reloadDStream(&bitD); | |
269 | ||
270 | op[3] = FSE_GETSYMBOL(&state2); | |
271 | } | |
272 | ||
273 | /* tail */ | |
274 | /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ | |
275 | while (1) { | |
276 | if (op>(omax-2)) return ERROR(dstSize_tooSmall); | |
277 | *op++ = FSE_GETSYMBOL(&state1); | |
278 | if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { | |
279 | *op++ = FSE_GETSYMBOL(&state2); | |
280 | break; | |
281 | } | |
282 | ||
283 | if (op>(omax-2)) return ERROR(dstSize_tooSmall); | |
284 | *op++ = FSE_GETSYMBOL(&state2); | |
285 | if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { | |
286 | *op++ = FSE_GETSYMBOL(&state1); | |
287 | break; | |
288 | } } | |
289 | ||
290 | return op-ostart; | |
291 | } | |
292 | ||
293 | ||
294 | size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, | |
295 | const void* cSrc, size_t cSrcSize, | |
296 | const FSE_DTable* dt) | |
297 | { | |
298 | const void* ptr = dt; | |
299 | const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; | |
300 | const U32 fastMode = DTableH->fastMode; | |
301 | ||
302 | /* select fast mode (static) */ | |
303 | if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); | |
304 | return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); | |
305 | } | |
306 | ||
307 | ||
308 | size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) | |
309 | { | |
310 | return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); | |
311 | } | |
312 | ||
313 | typedef struct { | |
314 | short ncount[FSE_MAX_SYMBOL_VALUE + 1]; | |
315 | FSE_DTable dtable[1]; /* Dynamically sized */ | |
316 | } FSE_DecompressWksp; | |
317 | ||
318 | ||
319 | FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( | |
320 | void* dst, size_t dstCapacity, | |
321 | const void* cSrc, size_t cSrcSize, | |
322 | unsigned maxLog, void* workSpace, size_t wkspSize, | |
323 | int bmi2) | |
324 | { | |
325 | const BYTE* const istart = (const BYTE*)cSrc; | |
326 | const BYTE* ip = istart; | |
327 | unsigned tableLog; | |
328 | unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; | |
329 | FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; | |
330 | ||
331 | DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); | |
332 | if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); | |
333 | ||
334 | /* normal FSE decoding mode */ | |
335 | { | |
336 | size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); | |
337 | if (FSE_isError(NCountLength)) return NCountLength; | |
338 | if (tableLog > maxLog) return ERROR(tableLog_tooLarge); | |
339 | assert(NCountLength <= cSrcSize); | |
340 | ip += NCountLength; | |
341 | cSrcSize -= NCountLength; | |
342 | } | |
343 | ||
344 | if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); | |
345 | workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); | |
346 | wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); | |
347 | ||
348 | CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); | |
349 | ||
350 | { | |
351 | const void* ptr = wksp->dtable; | |
352 | const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; | |
353 | const U32 fastMode = DTableH->fastMode; | |
354 | ||
355 | /* select fast mode (static) */ | |
356 | if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); | |
357 | return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); | |
358 | } | |
359 | } | |
360 | ||
361 | /* Avoids the FORCE_INLINE of the _body() function. */ | |
362 | static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) | |
363 | { | |
364 | return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); | |
365 | } | |
366 | ||
367 | #if DYNAMIC_BMI2 | |
368 | TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) | |
369 | { | |
370 | return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); | |
371 | } | |
372 | #endif | |
373 | ||
374 | size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) | |
375 | { | |
376 | #if DYNAMIC_BMI2 | |
377 | if (bmi2) { | |
378 | return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); | |
379 | } | |
380 | #endif | |
381 | (void)bmi2; | |
382 | return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); | |
383 | } | |
384 | ||
385 | ||
386 | typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; | |
387 | ||
388 | ||
389 | ||
390 | #endif /* FSE_COMMONDEFS_ONLY */ |