Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
4d6d6a2c | 2 | /* |
a97673a1 | 3 | * Glue Code for the AVX assembler implementation of the Cast5 Cipher |
4d6d6a2c JG |
4 | * |
5 | * Copyright (C) 2012 Johannes Goetzfried | |
6 | * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de> | |
4d6d6a2c JG |
7 | */ |
8 | ||
1e63183a | 9 | #include <asm/crypto/glue_helper.h> |
4d6d6a2c JG |
10 | #include <crypto/algapi.h> |
11 | #include <crypto/cast5.h> | |
1e63183a EB |
12 | #include <crypto/internal/simd.h> |
13 | #include <linux/crypto.h> | |
14 | #include <linux/err.h> | |
15 | #include <linux/module.h> | |
16 | #include <linux/types.h> | |
4d6d6a2c JG |
17 | |
18 | #define CAST5_PARALLEL_BLOCKS 16 | |
19 | ||
c12ab20b | 20 | asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst, |
4d6d6a2c | 21 | const u8 *src); |
c12ab20b JK |
22 | asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst, |
23 | const u8 *src); | |
24 | asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst, | |
25 | const u8 *src); | |
26 | asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src, | |
27 | __be64 *iv); | |
4d6d6a2c | 28 | |
1e63183a EB |
29 | static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, |
30 | unsigned int keylen) | |
4d6d6a2c | 31 | { |
1e63183a EB |
32 | return cast5_setkey(&tfm->base, key, keylen); |
33 | } | |
34 | ||
35 | static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk, | |
36 | unsigned int nbytes) | |
37 | { | |
75d8a553 EB |
38 | return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS, |
39 | walk, fpu_enabled, nbytes); | |
4d6d6a2c JG |
40 | } |
41 | ||
42 | static inline void cast5_fpu_end(bool fpu_enabled) | |
43 | { | |
44 | return glue_fpu_end(fpu_enabled); | |
45 | } | |
46 | ||
1e63183a | 47 | static int ecb_crypt(struct skcipher_request *req, bool enc) |
4d6d6a2c JG |
48 | { |
49 | bool fpu_enabled = false; | |
1e63183a EB |
50 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
51 | struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); | |
52 | struct skcipher_walk walk; | |
4d6d6a2c JG |
53 | const unsigned int bsize = CAST5_BLOCK_SIZE; |
54 | unsigned int nbytes; | |
c12ab20b | 55 | void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src); |
4d6d6a2c JG |
56 | int err; |
57 | ||
1e63183a | 58 | err = skcipher_walk_virt(&walk, req, false); |
4d6d6a2c | 59 | |
1e63183a EB |
60 | while ((nbytes = walk.nbytes)) { |
61 | u8 *wsrc = walk.src.virt.addr; | |
62 | u8 *wdst = walk.dst.virt.addr; | |
4d6d6a2c | 63 | |
1e63183a | 64 | fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); |
4d6d6a2c JG |
65 | |
66 | /* Process multi-block batch */ | |
67 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
8f461b1e | 68 | fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way; |
4d6d6a2c | 69 | do { |
c12ab20b | 70 | fn(ctx, wdst, wsrc); |
4d6d6a2c JG |
71 | |
72 | wsrc += bsize * CAST5_PARALLEL_BLOCKS; | |
73 | wdst += bsize * CAST5_PARALLEL_BLOCKS; | |
74 | nbytes -= bsize * CAST5_PARALLEL_BLOCKS; | |
75 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | |
76 | ||
77 | if (nbytes < bsize) | |
78 | goto done; | |
79 | } | |
80 | ||
c12ab20b JK |
81 | fn = (enc) ? __cast5_encrypt : __cast5_decrypt; |
82 | ||
4d6d6a2c JG |
83 | /* Handle leftovers */ |
84 | do { | |
c12ab20b | 85 | fn(ctx, wdst, wsrc); |
4d6d6a2c JG |
86 | |
87 | wsrc += bsize; | |
88 | wdst += bsize; | |
89 | nbytes -= bsize; | |
90 | } while (nbytes >= bsize); | |
91 | ||
92 | done: | |
1e63183a | 93 | err = skcipher_walk_done(&walk, nbytes); |
4d6d6a2c JG |
94 | } |
95 | ||
96 | cast5_fpu_end(fpu_enabled); | |
97 | return err; | |
98 | } | |
99 | ||
1e63183a | 100 | static int ecb_encrypt(struct skcipher_request *req) |
4d6d6a2c | 101 | { |
1e63183a | 102 | return ecb_crypt(req, true); |
4d6d6a2c JG |
103 | } |
104 | ||
1e63183a | 105 | static int ecb_decrypt(struct skcipher_request *req) |
4d6d6a2c | 106 | { |
1e63183a | 107 | return ecb_crypt(req, false); |
4d6d6a2c JG |
108 | } |
109 | ||
1e63183a | 110 | static int cbc_encrypt(struct skcipher_request *req) |
4d6d6a2c | 111 | { |
4d6d6a2c | 112 | const unsigned int bsize = CAST5_BLOCK_SIZE; |
1e63183a EB |
113 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
114 | struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); | |
115 | struct skcipher_walk walk; | |
116 | unsigned int nbytes; | |
4d6d6a2c JG |
117 | int err; |
118 | ||
1e63183a | 119 | err = skcipher_walk_virt(&walk, req, false); |
4d6d6a2c JG |
120 | |
121 | while ((nbytes = walk.nbytes)) { | |
1e63183a EB |
122 | u64 *src = (u64 *)walk.src.virt.addr; |
123 | u64 *dst = (u64 *)walk.dst.virt.addr; | |
124 | u64 *iv = (u64 *)walk.iv; | |
125 | ||
126 | do { | |
127 | *dst = *src ^ *iv; | |
128 | __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst); | |
129 | iv = dst; | |
130 | src++; | |
131 | dst++; | |
132 | nbytes -= bsize; | |
133 | } while (nbytes >= bsize); | |
134 | ||
135 | *(u64 *)walk.iv = *iv; | |
136 | err = skcipher_walk_done(&walk, nbytes); | |
4d6d6a2c JG |
137 | } |
138 | ||
139 | return err; | |
140 | } | |
141 | ||
1e63183a EB |
142 | static unsigned int __cbc_decrypt(struct cast5_ctx *ctx, |
143 | struct skcipher_walk *walk) | |
4d6d6a2c | 144 | { |
4d6d6a2c JG |
145 | const unsigned int bsize = CAST5_BLOCK_SIZE; |
146 | unsigned int nbytes = walk->nbytes; | |
147 | u64 *src = (u64 *)walk->src.virt.addr; | |
148 | u64 *dst = (u64 *)walk->dst.virt.addr; | |
4d6d6a2c | 149 | u64 last_iv; |
4d6d6a2c JG |
150 | |
151 | /* Start of the last block. */ | |
152 | src += nbytes / bsize - 1; | |
153 | dst += nbytes / bsize - 1; | |
154 | ||
155 | last_iv = *src; | |
156 | ||
157 | /* Process multi-block batch */ | |
158 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
159 | do { | |
160 | nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1); | |
161 | src -= CAST5_PARALLEL_BLOCKS - 1; | |
162 | dst -= CAST5_PARALLEL_BLOCKS - 1; | |
163 | ||
c12ab20b | 164 | cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src); |
4d6d6a2c JG |
165 | |
166 | nbytes -= bsize; | |
167 | if (nbytes < bsize) | |
168 | goto done; | |
169 | ||
170 | *dst ^= *(src - 1); | |
171 | src -= 1; | |
172 | dst -= 1; | |
173 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | |
4d6d6a2c JG |
174 | } |
175 | ||
176 | /* Handle leftovers */ | |
177 | for (;;) { | |
178 | __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src); | |
179 | ||
180 | nbytes -= bsize; | |
181 | if (nbytes < bsize) | |
182 | break; | |
183 | ||
184 | *dst ^= *(src - 1); | |
185 | src -= 1; | |
186 | dst -= 1; | |
187 | } | |
188 | ||
189 | done: | |
190 | *dst ^= *(u64 *)walk->iv; | |
191 | *(u64 *)walk->iv = last_iv; | |
192 | ||
193 | return nbytes; | |
194 | } | |
195 | ||
1e63183a | 196 | static int cbc_decrypt(struct skcipher_request *req) |
4d6d6a2c | 197 | { |
1e63183a EB |
198 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
199 | struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); | |
4d6d6a2c | 200 | bool fpu_enabled = false; |
1e63183a EB |
201 | struct skcipher_walk walk; |
202 | unsigned int nbytes; | |
4d6d6a2c JG |
203 | int err; |
204 | ||
1e63183a | 205 | err = skcipher_walk_virt(&walk, req, false); |
4d6d6a2c JG |
206 | |
207 | while ((nbytes = walk.nbytes)) { | |
1e63183a EB |
208 | fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); |
209 | nbytes = __cbc_decrypt(ctx, &walk); | |
210 | err = skcipher_walk_done(&walk, nbytes); | |
4d6d6a2c JG |
211 | } |
212 | ||
213 | cast5_fpu_end(fpu_enabled); | |
214 | return err; | |
215 | } | |
216 | ||
1e63183a | 217 | static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx) |
4d6d6a2c | 218 | { |
4d6d6a2c JG |
219 | u8 *ctrblk = walk->iv; |
220 | u8 keystream[CAST5_BLOCK_SIZE]; | |
221 | u8 *src = walk->src.virt.addr; | |
222 | u8 *dst = walk->dst.virt.addr; | |
223 | unsigned int nbytes = walk->nbytes; | |
224 | ||
225 | __cast5_encrypt(ctx, keystream, ctrblk); | |
45fe93df | 226 | crypto_xor_cpy(dst, keystream, src, nbytes); |
4d6d6a2c JG |
227 | |
228 | crypto_inc(ctrblk, CAST5_BLOCK_SIZE); | |
229 | } | |
230 | ||
1e63183a EB |
231 | static unsigned int __ctr_crypt(struct skcipher_walk *walk, |
232 | struct cast5_ctx *ctx) | |
4d6d6a2c | 233 | { |
4d6d6a2c JG |
234 | const unsigned int bsize = CAST5_BLOCK_SIZE; |
235 | unsigned int nbytes = walk->nbytes; | |
236 | u64 *src = (u64 *)walk->src.virt.addr; | |
237 | u64 *dst = (u64 *)walk->dst.virt.addr; | |
4d6d6a2c JG |
238 | |
239 | /* Process multi-block batch */ | |
240 | if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) { | |
241 | do { | |
c12ab20b JK |
242 | cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src, |
243 | (__be64 *)walk->iv); | |
4d6d6a2c JG |
244 | |
245 | src += CAST5_PARALLEL_BLOCKS; | |
246 | dst += CAST5_PARALLEL_BLOCKS; | |
247 | nbytes -= bsize * CAST5_PARALLEL_BLOCKS; | |
248 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | |
249 | ||
250 | if (nbytes < bsize) | |
251 | goto done; | |
252 | } | |
253 | ||
254 | /* Handle leftovers */ | |
255 | do { | |
c12ab20b JK |
256 | u64 ctrblk; |
257 | ||
4d6d6a2c JG |
258 | if (dst != src) |
259 | *dst = *src; | |
260 | ||
c12ab20b JK |
261 | ctrblk = *(u64 *)walk->iv; |
262 | be64_add_cpu((__be64 *)walk->iv, 1); | |
4d6d6a2c | 263 | |
c12ab20b JK |
264 | __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
265 | *dst ^= ctrblk; | |
4d6d6a2c JG |
266 | |
267 | src += 1; | |
268 | dst += 1; | |
269 | nbytes -= bsize; | |
270 | } while (nbytes >= bsize); | |
271 | ||
272 | done: | |
4d6d6a2c JG |
273 | return nbytes; |
274 | } | |
275 | ||
1e63183a | 276 | static int ctr_crypt(struct skcipher_request *req) |
4d6d6a2c | 277 | { |
1e63183a EB |
278 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
279 | struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm); | |
4d6d6a2c | 280 | bool fpu_enabled = false; |
1e63183a EB |
281 | struct skcipher_walk walk; |
282 | unsigned int nbytes; | |
4d6d6a2c JG |
283 | int err; |
284 | ||
1e63183a | 285 | err = skcipher_walk_virt(&walk, req, false); |
4d6d6a2c JG |
286 | |
287 | while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) { | |
1e63183a EB |
288 | fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes); |
289 | nbytes = __ctr_crypt(&walk, ctx); | |
290 | err = skcipher_walk_done(&walk, nbytes); | |
4d6d6a2c JG |
291 | } |
292 | ||
293 | cast5_fpu_end(fpu_enabled); | |
294 | ||
295 | if (walk.nbytes) { | |
1e63183a EB |
296 | ctr_crypt_final(&walk, ctx); |
297 | err = skcipher_walk_done(&walk, 0); | |
4d6d6a2c JG |
298 | } |
299 | ||
300 | return err; | |
301 | } | |
302 | ||
1e63183a EB |
303 | static struct skcipher_alg cast5_algs[] = { |
304 | { | |
305 | .base.cra_name = "__ecb(cast5)", | |
306 | .base.cra_driver_name = "__ecb-cast5-avx", | |
307 | .base.cra_priority = 200, | |
308 | .base.cra_flags = CRYPTO_ALG_INTERNAL, | |
309 | .base.cra_blocksize = CAST5_BLOCK_SIZE, | |
310 | .base.cra_ctxsize = sizeof(struct cast5_ctx), | |
311 | .base.cra_module = THIS_MODULE, | |
312 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
313 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
314 | .setkey = cast5_setkey_skcipher, | |
315 | .encrypt = ecb_encrypt, | |
316 | .decrypt = ecb_decrypt, | |
317 | }, { | |
318 | .base.cra_name = "__cbc(cast5)", | |
319 | .base.cra_driver_name = "__cbc-cast5-avx", | |
320 | .base.cra_priority = 200, | |
321 | .base.cra_flags = CRYPTO_ALG_INTERNAL, | |
322 | .base.cra_blocksize = CAST5_BLOCK_SIZE, | |
323 | .base.cra_ctxsize = sizeof(struct cast5_ctx), | |
324 | .base.cra_module = THIS_MODULE, | |
325 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
326 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
327 | .ivsize = CAST5_BLOCK_SIZE, | |
328 | .setkey = cast5_setkey_skcipher, | |
329 | .encrypt = cbc_encrypt, | |
330 | .decrypt = cbc_decrypt, | |
331 | }, { | |
332 | .base.cra_name = "__ctr(cast5)", | |
333 | .base.cra_driver_name = "__ctr-cast5-avx", | |
334 | .base.cra_priority = 200, | |
335 | .base.cra_flags = CRYPTO_ALG_INTERNAL, | |
336 | .base.cra_blocksize = 1, | |
337 | .base.cra_ctxsize = sizeof(struct cast5_ctx), | |
338 | .base.cra_module = THIS_MODULE, | |
339 | .min_keysize = CAST5_MIN_KEY_SIZE, | |
340 | .max_keysize = CAST5_MAX_KEY_SIZE, | |
341 | .ivsize = CAST5_BLOCK_SIZE, | |
342 | .chunksize = CAST5_BLOCK_SIZE, | |
343 | .setkey = cast5_setkey_skcipher, | |
344 | .encrypt = ctr_crypt, | |
345 | .decrypt = ctr_crypt, | |
346 | } | |
347 | }; | |
4d6d6a2c | 348 | |
1e63183a | 349 | static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)]; |
4d6d6a2c JG |
350 | |
351 | static int __init cast5_init(void) | |
352 | { | |
d5d34d98 | 353 | const char *feature_name; |
4d6d6a2c | 354 | |
d91cab78 DH |
355 | if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, |
356 | &feature_name)) { | |
d5d34d98 | 357 | pr_info("CPU feature '%s' is not supported.\n", feature_name); |
4d6d6a2c JG |
358 | return -ENODEV; |
359 | } | |
360 | ||
1e63183a EB |
361 | return simd_register_skciphers_compat(cast5_algs, |
362 | ARRAY_SIZE(cast5_algs), | |
363 | cast5_simd_algs); | |
4d6d6a2c JG |
364 | } |
365 | ||
366 | static void __exit cast5_exit(void) | |
367 | { | |
1e63183a EB |
368 | simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs), |
369 | cast5_simd_algs); | |
4d6d6a2c JG |
370 | } |
371 | ||
372 | module_init(cast5_init); | |
373 | module_exit(cast5_exit); | |
374 | ||
375 | MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized"); | |
376 | MODULE_LICENSE("GPL"); | |
5d26a105 | 377 | MODULE_ALIAS_CRYPTO("cast5"); |