Commit | Line | Data |
---|---|---|
b36d8c09 AB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * ARM NEON accelerated ChaCha and XChaCha stream ciphers, | |
4 | * including ChaCha20 (RFC7539) | |
5 | * | |
6 | * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> | |
7 | * Copyright (C) 2015 Martin Willi | |
8 | */ | |
9 | ||
10 | #include <crypto/algapi.h> | |
11 | #include <crypto/internal/chacha.h> | |
12 | #include <crypto/internal/simd.h> | |
13 | #include <crypto/internal/skcipher.h> | |
a44a3430 | 14 | #include <linux/jump_label.h> |
b36d8c09 AB |
15 | #include <linux/kernel.h> |
16 | #include <linux/module.h> | |
17 | ||
18 | #include <asm/cputype.h> | |
19 | #include <asm/hwcap.h> | |
20 | #include <asm/neon.h> | |
21 | #include <asm/simd.h> | |
22 | ||
23 | asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src, | |
24 | int nrounds); | |
25 | asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src, | |
86cd97ec | 26 | int nrounds, unsigned int nbytes); |
b36d8c09 AB |
27 | asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds); |
28 | asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds); | |
29 | ||
30 | asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes, | |
31 | const u32 *state, int nrounds); | |
32 | ||
a44a3430 AB |
33 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon); |
34 | ||
b36d8c09 AB |
35 | static inline bool neon_usable(void) |
36 | { | |
a44a3430 | 37 | return static_branch_likely(&use_neon) && crypto_simd_usable(); |
b36d8c09 AB |
38 | } |
39 | ||
40 | static void chacha_doneon(u32 *state, u8 *dst, const u8 *src, | |
41 | unsigned int bytes, int nrounds) | |
42 | { | |
43 | u8 buf[CHACHA_BLOCK_SIZE]; | |
44 | ||
86cd97ec AB |
45 | while (bytes > CHACHA_BLOCK_SIZE) { |
46 | unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U); | |
47 | ||
48 | chacha_4block_xor_neon(state, dst, src, nrounds, l); | |
49 | bytes -= l; | |
50 | src += l; | |
51 | dst += l; | |
52 | state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE); | |
b36d8c09 AB |
53 | } |
54 | if (bytes) { | |
86cd97ec AB |
55 | const u8 *s = src; |
56 | u8 *d = dst; | |
57 | ||
58 | if (bytes != CHACHA_BLOCK_SIZE) | |
59 | s = d = memcpy(buf, src, bytes); | |
60 | chacha_block_xor_neon(state, d, s, nrounds); | |
61 | if (d != dst) | |
62 | memcpy(dst, buf, bytes); | |
fd16931a | 63 | state[12]++; |
b36d8c09 AB |
64 | } |
65 | } | |
66 | ||
a44a3430 AB |
67 | void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) |
68 | { | |
69 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) { | |
70 | hchacha_block_arm(state, stream, nrounds); | |
71 | } else { | |
72 | kernel_neon_begin(); | |
73 | hchacha_block_neon(state, stream, nrounds); | |
74 | kernel_neon_end(); | |
75 | } | |
76 | } | |
77 | EXPORT_SYMBOL(hchacha_block_arch); | |
78 | ||
79 | void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) | |
80 | { | |
81 | chacha_init_generic(state, key, iv); | |
82 | } | |
83 | EXPORT_SYMBOL(chacha_init_arch); | |
84 | ||
85 | void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, | |
86 | int nrounds) | |
87 | { | |
88 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() || | |
89 | bytes <= CHACHA_BLOCK_SIZE) { | |
90 | chacha_doarm(dst, src, bytes, state, nrounds); | |
91 | state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE); | |
92 | return; | |
93 | } | |
94 | ||
706024a5 JD |
95 | do { |
96 | unsigned int todo = min_t(unsigned int, bytes, SZ_4K); | |
97 | ||
98 | kernel_neon_begin(); | |
99 | chacha_doneon(state, dst, src, todo, nrounds); | |
100 | kernel_neon_end(); | |
101 | ||
102 | bytes -= todo; | |
103 | src += todo; | |
104 | dst += todo; | |
105 | } while (bytes); | |
a44a3430 AB |
106 | } |
107 | EXPORT_SYMBOL(chacha_crypt_arch); | |
108 | ||
b36d8c09 AB |
109 | static int chacha_stream_xor(struct skcipher_request *req, |
110 | const struct chacha_ctx *ctx, const u8 *iv, | |
111 | bool neon) | |
112 | { | |
113 | struct skcipher_walk walk; | |
114 | u32 state[16]; | |
115 | int err; | |
116 | ||
117 | err = skcipher_walk_virt(&walk, req, false); | |
118 | ||
119 | chacha_init_generic(state, ctx->key, iv); | |
120 | ||
121 | while (walk.nbytes > 0) { | |
122 | unsigned int nbytes = walk.nbytes; | |
123 | ||
124 | if (nbytes < walk.total) | |
125 | nbytes = round_down(nbytes, walk.stride); | |
126 | ||
0bc81767 | 127 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
b36d8c09 AB |
128 | chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr, |
129 | nbytes, state, ctx->nrounds); | |
130 | state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE); | |
131 | } else { | |
132 | kernel_neon_begin(); | |
133 | chacha_doneon(state, walk.dst.virt.addr, | |
134 | walk.src.virt.addr, nbytes, ctx->nrounds); | |
135 | kernel_neon_end(); | |
136 | } | |
137 | err = skcipher_walk_done(&walk, walk.nbytes - nbytes); | |
138 | } | |
139 | ||
140 | return err; | |
141 | } | |
142 | ||
143 | static int do_chacha(struct skcipher_request *req, bool neon) | |
144 | { | |
145 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | |
146 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | |
147 | ||
148 | return chacha_stream_xor(req, ctx, req->iv, neon); | |
149 | } | |
150 | ||
151 | static int chacha_arm(struct skcipher_request *req) | |
152 | { | |
153 | return do_chacha(req, false); | |
154 | } | |
155 | ||
156 | static int chacha_neon(struct skcipher_request *req) | |
157 | { | |
158 | return do_chacha(req, neon_usable()); | |
159 | } | |
160 | ||
161 | static int do_xchacha(struct skcipher_request *req, bool neon) | |
162 | { | |
163 | struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); | |
164 | struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); | |
165 | struct chacha_ctx subctx; | |
166 | u32 state[16]; | |
167 | u8 real_iv[16]; | |
168 | ||
169 | chacha_init_generic(state, ctx->key, req->iv); | |
170 | ||
0bc81767 | 171 | if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon) { |
b36d8c09 AB |
172 | hchacha_block_arm(state, subctx.key, ctx->nrounds); |
173 | } else { | |
174 | kernel_neon_begin(); | |
175 | hchacha_block_neon(state, subctx.key, ctx->nrounds); | |
176 | kernel_neon_end(); | |
177 | } | |
178 | subctx.nrounds = ctx->nrounds; | |
179 | ||
180 | memcpy(&real_iv[0], req->iv + 24, 8); | |
181 | memcpy(&real_iv[8], req->iv + 16, 8); | |
182 | return chacha_stream_xor(req, &subctx, real_iv, neon); | |
183 | } | |
184 | ||
185 | static int xchacha_arm(struct skcipher_request *req) | |
186 | { | |
187 | return do_xchacha(req, false); | |
188 | } | |
189 | ||
190 | static int xchacha_neon(struct skcipher_request *req) | |
191 | { | |
192 | return do_xchacha(req, neon_usable()); | |
193 | } | |
194 | ||
195 | static struct skcipher_alg arm_algs[] = { | |
196 | { | |
197 | .base.cra_name = "chacha20", | |
198 | .base.cra_driver_name = "chacha20-arm", | |
199 | .base.cra_priority = 200, | |
200 | .base.cra_blocksize = 1, | |
201 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
202 | .base.cra_module = THIS_MODULE, | |
203 | ||
204 | .min_keysize = CHACHA_KEY_SIZE, | |
205 | .max_keysize = CHACHA_KEY_SIZE, | |
206 | .ivsize = CHACHA_IV_SIZE, | |
207 | .chunksize = CHACHA_BLOCK_SIZE, | |
208 | .setkey = chacha20_setkey, | |
209 | .encrypt = chacha_arm, | |
210 | .decrypt = chacha_arm, | |
211 | }, { | |
212 | .base.cra_name = "xchacha20", | |
213 | .base.cra_driver_name = "xchacha20-arm", | |
214 | .base.cra_priority = 200, | |
215 | .base.cra_blocksize = 1, | |
216 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
217 | .base.cra_module = THIS_MODULE, | |
218 | ||
219 | .min_keysize = CHACHA_KEY_SIZE, | |
220 | .max_keysize = CHACHA_KEY_SIZE, | |
221 | .ivsize = XCHACHA_IV_SIZE, | |
222 | .chunksize = CHACHA_BLOCK_SIZE, | |
223 | .setkey = chacha20_setkey, | |
224 | .encrypt = xchacha_arm, | |
225 | .decrypt = xchacha_arm, | |
226 | }, { | |
227 | .base.cra_name = "xchacha12", | |
228 | .base.cra_driver_name = "xchacha12-arm", | |
229 | .base.cra_priority = 200, | |
230 | .base.cra_blocksize = 1, | |
231 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
232 | .base.cra_module = THIS_MODULE, | |
233 | ||
234 | .min_keysize = CHACHA_KEY_SIZE, | |
235 | .max_keysize = CHACHA_KEY_SIZE, | |
236 | .ivsize = XCHACHA_IV_SIZE, | |
237 | .chunksize = CHACHA_BLOCK_SIZE, | |
238 | .setkey = chacha12_setkey, | |
239 | .encrypt = xchacha_arm, | |
240 | .decrypt = xchacha_arm, | |
241 | }, | |
242 | }; | |
243 | ||
244 | static struct skcipher_alg neon_algs[] = { | |
245 | { | |
246 | .base.cra_name = "chacha20", | |
247 | .base.cra_driver_name = "chacha20-neon", | |
248 | .base.cra_priority = 300, | |
249 | .base.cra_blocksize = 1, | |
250 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
251 | .base.cra_module = THIS_MODULE, | |
252 | ||
253 | .min_keysize = CHACHA_KEY_SIZE, | |
254 | .max_keysize = CHACHA_KEY_SIZE, | |
255 | .ivsize = CHACHA_IV_SIZE, | |
256 | .chunksize = CHACHA_BLOCK_SIZE, | |
257 | .walksize = 4 * CHACHA_BLOCK_SIZE, | |
258 | .setkey = chacha20_setkey, | |
259 | .encrypt = chacha_neon, | |
260 | .decrypt = chacha_neon, | |
261 | }, { | |
262 | .base.cra_name = "xchacha20", | |
263 | .base.cra_driver_name = "xchacha20-neon", | |
264 | .base.cra_priority = 300, | |
265 | .base.cra_blocksize = 1, | |
266 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
267 | .base.cra_module = THIS_MODULE, | |
268 | ||
269 | .min_keysize = CHACHA_KEY_SIZE, | |
270 | .max_keysize = CHACHA_KEY_SIZE, | |
271 | .ivsize = XCHACHA_IV_SIZE, | |
272 | .chunksize = CHACHA_BLOCK_SIZE, | |
273 | .walksize = 4 * CHACHA_BLOCK_SIZE, | |
274 | .setkey = chacha20_setkey, | |
275 | .encrypt = xchacha_neon, | |
276 | .decrypt = xchacha_neon, | |
277 | }, { | |
278 | .base.cra_name = "xchacha12", | |
279 | .base.cra_driver_name = "xchacha12-neon", | |
280 | .base.cra_priority = 300, | |
281 | .base.cra_blocksize = 1, | |
282 | .base.cra_ctxsize = sizeof(struct chacha_ctx), | |
283 | .base.cra_module = THIS_MODULE, | |
284 | ||
285 | .min_keysize = CHACHA_KEY_SIZE, | |
286 | .max_keysize = CHACHA_KEY_SIZE, | |
287 | .ivsize = XCHACHA_IV_SIZE, | |
288 | .chunksize = CHACHA_BLOCK_SIZE, | |
289 | .walksize = 4 * CHACHA_BLOCK_SIZE, | |
290 | .setkey = chacha12_setkey, | |
291 | .encrypt = xchacha_neon, | |
292 | .decrypt = xchacha_neon, | |
293 | } | |
294 | }; | |
295 | ||
296 | static int __init chacha_simd_mod_init(void) | |
297 | { | |
8394bfec | 298 | int err = 0; |
b36d8c09 | 299 | |
8394bfec JD |
300 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { |
301 | err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); | |
302 | if (err) | |
303 | return err; | |
304 | } | |
b36d8c09 AB |
305 | |
306 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) { | |
307 | int i; | |
308 | ||
309 | switch (read_cpuid_part()) { | |
310 | case ARM_CPU_PART_CORTEX_A7: | |
311 | case ARM_CPU_PART_CORTEX_A5: | |
312 | /* | |
313 | * The Cortex-A7 and Cortex-A5 do not perform well with | |
314 | * the NEON implementation but do incredibly with the | |
315 | * scalar one and use less power. | |
316 | */ | |
317 | for (i = 0; i < ARRAY_SIZE(neon_algs); i++) | |
318 | neon_algs[i].base.cra_priority = 0; | |
319 | break; | |
a44a3430 AB |
320 | default: |
321 | static_branch_enable(&use_neon); | |
b36d8c09 AB |
322 | } |
323 | ||
8394bfec JD |
324 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { |
325 | err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); | |
326 | if (err) | |
327 | crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); | |
328 | } | |
b36d8c09 AB |
329 | } |
330 | return err; | |
331 | } | |
332 | ||
333 | static void __exit chacha_simd_mod_fini(void) | |
334 | { | |
8394bfec JD |
335 | if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) { |
336 | crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs)); | |
337 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) | |
338 | crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs)); | |
339 | } | |
b36d8c09 AB |
340 | } |
341 | ||
342 | module_init(chacha_simd_mod_init); | |
343 | module_exit(chacha_simd_mod_fini); | |
344 | ||
345 | MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)"); | |
346 | MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); | |
347 | MODULE_LICENSE("GPL v2"); | |
348 | MODULE_ALIAS_CRYPTO("chacha20"); | |
349 | MODULE_ALIAS_CRYPTO("chacha20-arm"); | |
350 | MODULE_ALIAS_CRYPTO("xchacha20"); | |
351 | MODULE_ALIAS_CRYPTO("xchacha20-arm"); | |
352 | MODULE_ALIAS_CRYPTO("xchacha12"); | |
353 | MODULE_ALIAS_CRYPTO("xchacha12-arm"); | |
354 | #ifdef CONFIG_KERNEL_MODE_NEON | |
355 | MODULE_ALIAS_CRYPTO("chacha20-neon"); | |
356 | MODULE_ALIAS_CRYPTO("xchacha20-neon"); | |
357 | MODULE_ALIAS_CRYPTO("xchacha12-neon"); | |
358 | #endif |