Commit | Line | Data |
---|---|---|
09c434b8 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
64c70b1c | 2 | /* |
8b975bd3 | 3 | * LZO1X Compressor from LZO |
64c70b1c | 4 | * |
8b975bd3 | 5 | * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com> |
64c70b1c RP |
6 | * |
7 | * The full LZO package can be found at: | |
8 | * http://www.oberhumer.com/opensource/lzo/ | |
9 | * | |
8b975bd3 | 10 | * Changed for Linux kernel use by: |
64c70b1c RP |
11 | * Nitin Gupta <nitingupta910@gmail.com> |
12 | * Richard Purdie <rpurdie@openedhand.com> | |
13 | */ | |
14 | ||
15 | #include <linux/module.h> | |
16 | #include <linux/kernel.h> | |
64c70b1c | 17 | #include <asm/unaligned.h> |
8b975bd3 | 18 | #include <linux/lzo.h> |
64c70b1c RP |
19 | #include "lzodefs.h" |
20 | ||
21 | static noinline size_t | |
8b975bd3 MO |
22 | lzo1x_1_do_compress(const unsigned char *in, size_t in_len, |
23 | unsigned char *out, size_t *out_len, | |
45ec975e DR |
24 | size_t ti, void *wrkmem, signed char *state_offset, |
25 | const unsigned char bitstream_version) | |
64c70b1c | 26 | { |
8b975bd3 MO |
27 | const unsigned char *ip; |
28 | unsigned char *op; | |
64c70b1c | 29 | const unsigned char * const in_end = in + in_len; |
8b975bd3 MO |
30 | const unsigned char * const ip_end = in + in_len - 20; |
31 | const unsigned char *ii; | |
32 | lzo_dict_t * const dict = (lzo_dict_t *) wrkmem; | |
64c70b1c | 33 | |
8b975bd3 MO |
34 | op = out; |
35 | ip = in; | |
36 | ii = ip; | |
37 | ip += ti < 4 ? 4 - ti : 0; | |
64c70b1c RP |
38 | |
39 | for (;;) { | |
5ee4014a | 40 | const unsigned char *m_pos = NULL; |
8b975bd3 MO |
41 | size_t t, m_len, m_off; |
42 | u32 dv; | |
5ee4014a | 43 | u32 run_length = 0; |
64c70b1c | 44 | literal: |
8b975bd3 MO |
45 | ip += 1 + ((ip - ii) >> 5); |
46 | next: | |
64c70b1c RP |
47 | if (unlikely(ip >= ip_end)) |
48 | break; | |
8b975bd3 | 49 | dv = get_unaligned_le32(ip); |
5ee4014a | 50 | |
45ec975e | 51 | if (dv == 0 && bitstream_version) { |
5ee4014a DR |
52 | const unsigned char *ir = ip + 4; |
53 | const unsigned char *limit = ip_end | |
54 | < (ip + MAX_ZERO_RUN_LENGTH + 1) | |
55 | ? ip_end : ip + MAX_ZERO_RUN_LENGTH + 1; | |
56 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && \ | |
57 | defined(LZO_FAST_64BIT_MEMORY_ACCESS) | |
58 | u64 dv64; | |
59 | ||
60 | for (; (ir + 32) <= limit; ir += 32) { | |
61 | dv64 = get_unaligned((u64 *)ir); | |
62 | dv64 |= get_unaligned((u64 *)ir + 1); | |
63 | dv64 |= get_unaligned((u64 *)ir + 2); | |
64 | dv64 |= get_unaligned((u64 *)ir + 3); | |
65 | if (dv64) | |
66 | break; | |
67 | } | |
68 | for (; (ir + 8) <= limit; ir += 8) { | |
69 | dv64 = get_unaligned((u64 *)ir); | |
70 | if (dv64) { | |
71 | # if defined(__LITTLE_ENDIAN) | |
72 | ir += __builtin_ctzll(dv64) >> 3; | |
73 | # elif defined(__BIG_ENDIAN) | |
74 | ir += __builtin_clzll(dv64) >> 3; | |
75 | # else | |
76 | # error "missing endian definition" | |
77 | # endif | |
78 | break; | |
79 | } | |
80 | } | |
81 | #else | |
82 | while ((ir < (const unsigned char *) | |
83 | ALIGN((uintptr_t)ir, 4)) && | |
84 | (ir < limit) && (*ir == 0)) | |
85 | ir++; | |
86 | for (; (ir + 4) <= limit; ir += 4) { | |
87 | dv = *((u32 *)ir); | |
88 | if (dv) { | |
89 | # if defined(__LITTLE_ENDIAN) | |
90 | ir += __builtin_ctz(dv) >> 3; | |
91 | # elif defined(__BIG_ENDIAN) | |
92 | ir += __builtin_clz(dv) >> 3; | |
93 | # else | |
94 | # error "missing endian definition" | |
95 | # endif | |
96 | break; | |
97 | } | |
98 | } | |
99 | #endif | |
100 | while (likely(ir < limit) && unlikely(*ir == 0)) | |
101 | ir++; | |
102 | run_length = ir - ip; | |
103 | if (run_length > MAX_ZERO_RUN_LENGTH) | |
104 | run_length = MAX_ZERO_RUN_LENGTH; | |
105 | } else { | |
106 | t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK; | |
107 | m_pos = in + dict[t]; | |
108 | dict[t] = (lzo_dict_t) (ip - in); | |
109 | if (unlikely(dv != get_unaligned_le32(m_pos))) | |
110 | goto literal; | |
111 | } | |
64c70b1c | 112 | |
8b975bd3 MO |
113 | ii -= ti; |
114 | ti = 0; | |
115 | t = ip - ii; | |
116 | if (t != 0) { | |
64c70b1c | 117 | if (t <= 3) { |
5ee4014a | 118 | op[*state_offset] |= t; |
8b975bd3 MO |
119 | COPY4(op, ii); |
120 | op += t; | |
121 | } else if (t <= 16) { | |
64c70b1c | 122 | *op++ = (t - 3); |
8b975bd3 MO |
123 | COPY8(op, ii); |
124 | COPY8(op + 8, ii + 8); | |
125 | op += t; | |
64c70b1c | 126 | } else { |
8b975bd3 MO |
127 | if (t <= 18) { |
128 | *op++ = (t - 3); | |
129 | } else { | |
130 | size_t tt = t - 18; | |
64c70b1c | 131 | *op++ = 0; |
8b975bd3 MO |
132 | while (unlikely(tt > 255)) { |
133 | tt -= 255; | |
134 | *op++ = 0; | |
135 | } | |
136 | *op++ = tt; | |
64c70b1c | 137 | } |
8b975bd3 MO |
138 | do { |
139 | COPY8(op, ii); | |
140 | COPY8(op + 8, ii + 8); | |
141 | op += 16; | |
142 | ii += 16; | |
143 | t -= 16; | |
144 | } while (t >= 16); | |
145 | if (t > 0) do { | |
146 | *op++ = *ii++; | |
147 | } while (--t > 0); | |
64c70b1c | 148 | } |
64c70b1c RP |
149 | } |
150 | ||
5ee4014a DR |
151 | if (unlikely(run_length)) { |
152 | ip += run_length; | |
153 | run_length -= MIN_ZERO_RUN_LENGTH; | |
154 | put_unaligned_le32((run_length << 21) | 0xfffc18 | |
155 | | (run_length & 0x7), op); | |
156 | op += 4; | |
157 | run_length = 0; | |
158 | *state_offset = -3; | |
159 | goto finished_writing_instruction; | |
160 | } | |
161 | ||
8b975bd3 MO |
162 | m_len = 4; |
163 | { | |
164 | #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64) | |
165 | u64 v; | |
166 | v = get_unaligned((const u64 *) (ip + m_len)) ^ | |
167 | get_unaligned((const u64 *) (m_pos + m_len)); | |
168 | if (unlikely(v == 0)) { | |
169 | do { | |
170 | m_len += 8; | |
171 | v = get_unaligned((const u64 *) (ip + m_len)) ^ | |
172 | get_unaligned((const u64 *) (m_pos + m_len)); | |
173 | if (unlikely(ip + m_len >= ip_end)) | |
174 | goto m_len_done; | |
175 | } while (v == 0); | |
176 | } | |
177 | # if defined(__LITTLE_ENDIAN) | |
178 | m_len += (unsigned) __builtin_ctzll(v) / 8; | |
179 | # elif defined(__BIG_ENDIAN) | |
180 | m_len += (unsigned) __builtin_clzll(v) / 8; | |
181 | # else | |
182 | # error "missing endian definition" | |
183 | # endif | |
184 | #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32) | |
185 | u32 v; | |
186 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | |
187 | get_unaligned((const u32 *) (m_pos + m_len)); | |
188 | if (unlikely(v == 0)) { | |
189 | do { | |
190 | m_len += 4; | |
191 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | |
192 | get_unaligned((const u32 *) (m_pos + m_len)); | |
193 | if (v != 0) | |
194 | break; | |
195 | m_len += 4; | |
196 | v = get_unaligned((const u32 *) (ip + m_len)) ^ | |
197 | get_unaligned((const u32 *) (m_pos + m_len)); | |
198 | if (unlikely(ip + m_len >= ip_end)) | |
199 | goto m_len_done; | |
200 | } while (v == 0); | |
201 | } | |
202 | # if defined(__LITTLE_ENDIAN) | |
203 | m_len += (unsigned) __builtin_ctz(v) / 8; | |
204 | # elif defined(__BIG_ENDIAN) | |
205 | m_len += (unsigned) __builtin_clz(v) / 8; | |
206 | # else | |
207 | # error "missing endian definition" | |
208 | # endif | |
209 | #else | |
210 | if (unlikely(ip[m_len] == m_pos[m_len])) { | |
211 | do { | |
212 | m_len += 1; | |
213 | if (ip[m_len] != m_pos[m_len]) | |
214 | break; | |
215 | m_len += 1; | |
216 | if (ip[m_len] != m_pos[m_len]) | |
217 | break; | |
218 | m_len += 1; | |
219 | if (ip[m_len] != m_pos[m_len]) | |
220 | break; | |
221 | m_len += 1; | |
222 | if (ip[m_len] != m_pos[m_len]) | |
223 | break; | |
224 | m_len += 1; | |
225 | if (ip[m_len] != m_pos[m_len]) | |
226 | break; | |
227 | m_len += 1; | |
228 | if (ip[m_len] != m_pos[m_len]) | |
229 | break; | |
230 | m_len += 1; | |
231 | if (ip[m_len] != m_pos[m_len]) | |
232 | break; | |
233 | m_len += 1; | |
234 | if (unlikely(ip + m_len >= ip_end)) | |
235 | goto m_len_done; | |
236 | } while (ip[m_len] == m_pos[m_len]); | |
237 | } | |
238 | #endif | |
239 | } | |
240 | m_len_done: | |
64c70b1c | 241 | |
8b975bd3 MO |
242 | m_off = ip - m_pos; |
243 | ip += m_len; | |
8b975bd3 MO |
244 | if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { |
245 | m_off -= 1; | |
246 | *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2)); | |
247 | *op++ = (m_off >> 3); | |
248 | } else if (m_off <= M3_MAX_OFFSET) { | |
249 | m_off -= 1; | |
250 | if (m_len <= M3_MAX_LEN) | |
64c70b1c | 251 | *op++ = (M3_MARKER | (m_len - 2)); |
8b975bd3 MO |
252 | else { |
253 | m_len -= M3_MAX_LEN; | |
254 | *op++ = M3_MARKER | 0; | |
255 | while (unlikely(m_len > 255)) { | |
256 | m_len -= 255; | |
257 | *op++ = 0; | |
258 | } | |
259 | *op++ = (m_len); | |
64c70b1c | 260 | } |
8b975bd3 MO |
261 | *op++ = (m_off << 2); |
262 | *op++ = (m_off >> 6); | |
64c70b1c | 263 | } else { |
8b975bd3 MO |
264 | m_off -= 0x4000; |
265 | if (m_len <= M4_MAX_LEN) | |
266 | *op++ = (M4_MARKER | ((m_off >> 11) & 8) | |
64c70b1c | 267 | | (m_len - 2)); |
8b975bd3 MO |
268 | else { |
269 | m_len -= M4_MAX_LEN; | |
270 | *op++ = (M4_MARKER | ((m_off >> 11) & 8)); | |
271 | while (unlikely(m_len > 255)) { | |
272 | m_len -= 255; | |
273 | *op++ = 0; | |
64c70b1c | 274 | } |
8b975bd3 | 275 | *op++ = (m_len); |
64c70b1c | 276 | } |
8b975bd3 | 277 | *op++ = (m_off << 2); |
64c70b1c RP |
278 | *op++ = (m_off >> 6); |
279 | } | |
5ee4014a DR |
280 | *state_offset = -2; |
281 | finished_writing_instruction: | |
282 | ii = ip; | |
8b975bd3 | 283 | goto next; |
64c70b1c | 284 | } |
64c70b1c | 285 | *out_len = op - out; |
8b975bd3 | 286 | return in_end - (ii - ti); |
64c70b1c RP |
287 | } |
288 | ||
45ec975e | 289 | int lzogeneric1x_1_compress(const unsigned char *in, size_t in_len, |
8b975bd3 | 290 | unsigned char *out, size_t *out_len, |
45ec975e | 291 | void *wrkmem, const unsigned char bitstream_version) |
64c70b1c | 292 | { |
8b975bd3 | 293 | const unsigned char *ip = in; |
64c70b1c | 294 | unsigned char *op = out; |
b11ed18e | 295 | unsigned char *data_start; |
8b975bd3 MO |
296 | size_t l = in_len; |
297 | size_t t = 0; | |
5ee4014a | 298 | signed char state_offset = -2; |
45ec975e | 299 | unsigned int m4_max_offset; |
5ee4014a | 300 | |
b11ed18e DR |
301 | // LZO v0 will never write 17 as first byte (except for zero-length |
302 | // input), so this is used to version the bitstream | |
45ec975e DR |
303 | if (bitstream_version > 0) { |
304 | *op++ = 17; | |
305 | *op++ = bitstream_version; | |
306 | m4_max_offset = M4_MAX_OFFSET_V1; | |
307 | } else { | |
308 | m4_max_offset = M4_MAX_OFFSET_V0; | |
309 | } | |
64c70b1c | 310 | |
b11ed18e DR |
311 | data_start = op; |
312 | ||
8b975bd3 | 313 | while (l > 20) { |
45ec975e | 314 | size_t ll = l <= (m4_max_offset + 1) ? l : (m4_max_offset + 1); |
8b975bd3 MO |
315 | uintptr_t ll_end = (uintptr_t) ip + ll; |
316 | if ((ll_end + ((t + ll) >> 5)) <= ll_end) | |
317 | break; | |
318 | BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS); | |
319 | memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t)); | |
45ec975e DR |
320 | t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem, |
321 | &state_offset, bitstream_version); | |
8b975bd3 | 322 | ip += ll; |
64c70b1c | 323 | op += *out_len; |
8b975bd3 | 324 | l -= ll; |
64c70b1c | 325 | } |
8b975bd3 | 326 | t += l; |
64c70b1c RP |
327 | |
328 | if (t > 0) { | |
8b975bd3 | 329 | const unsigned char *ii = in + in_len - t; |
64c70b1c | 330 | |
b11ed18e | 331 | if (op == data_start && t <= 238) { |
64c70b1c RP |
332 | *op++ = (17 + t); |
333 | } else if (t <= 3) { | |
5ee4014a | 334 | op[state_offset] |= t; |
64c70b1c RP |
335 | } else if (t <= 18) { |
336 | *op++ = (t - 3); | |
337 | } else { | |
338 | size_t tt = t - 18; | |
64c70b1c RP |
339 | *op++ = 0; |
340 | while (tt > 255) { | |
341 | tt -= 255; | |
342 | *op++ = 0; | |
343 | } | |
64c70b1c RP |
344 | *op++ = tt; |
345 | } | |
8b975bd3 MO |
346 | if (t >= 16) do { |
347 | COPY8(op, ii); | |
348 | COPY8(op + 8, ii + 8); | |
349 | op += 16; | |
350 | ii += 16; | |
351 | t -= 16; | |
352 | } while (t >= 16); | |
353 | if (t > 0) do { | |
64c70b1c RP |
354 | *op++ = *ii++; |
355 | } while (--t > 0); | |
356 | } | |
357 | ||
358 | *op++ = M4_MARKER | 1; | |
359 | *op++ = 0; | |
360 | *op++ = 0; | |
361 | ||
362 | *out_len = op - out; | |
363 | return LZO_E_OK; | |
364 | } | |
45ec975e DR |
365 | |
366 | int lzo1x_1_compress(const unsigned char *in, size_t in_len, | |
367 | unsigned char *out, size_t *out_len, | |
368 | void *wrkmem) | |
369 | { | |
370 | return lzogeneric1x_1_compress(in, in_len, out, out_len, wrkmem, 0); | |
371 | } | |
372 | ||
373 | int lzorle1x_1_compress(const unsigned char *in, size_t in_len, | |
374 | unsigned char *out, size_t *out_len, | |
375 | void *wrkmem) | |
376 | { | |
377 | return lzogeneric1x_1_compress(in, in_len, out, out_len, | |
378 | wrkmem, LZO_VERSION); | |
379 | } | |
380 | ||
64c70b1c | 381 | EXPORT_SYMBOL_GPL(lzo1x_1_compress); |
45ec975e | 382 | EXPORT_SYMBOL_GPL(lzorle1x_1_compress); |
64c70b1c RP |
383 | |
384 | MODULE_LICENSE("GPL"); | |
385 | MODULE_DESCRIPTION("LZO1X-1 Compressor"); |