Commit | Line | Data |
---|---|---|
a6b803b3 AB |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM | |
4 | * | |
5 | * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> | |
6 | */ | |
7 | ||
8 | #include <asm/hwcap.h> | |
9 | #include <asm/neon.h> | |
10 | #include <asm/simd.h> | |
11 | #include <asm/unaligned.h> | |
12 | #include <crypto/algapi.h> | |
13 | #include <crypto/internal/hash.h> | |
14 | #include <crypto/internal/poly1305.h> | |
15 | #include <crypto/internal/simd.h> | |
16 | #include <linux/cpufeature.h> | |
17 | #include <linux/crypto.h> | |
18 | #include <linux/jump_label.h> | |
19 | #include <linux/module.h> | |
20 | ||
21 | void poly1305_init_arm(void *state, const u8 *key); | |
22 | void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit); | |
51982ea0 | 23 | void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit); |
31899908 | 24 | void poly1305_emit_arm(void *state, u8 *digest, const u32 *nonce); |
a6b803b3 AB |
25 | |
26 | void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit) | |
27 | { | |
28 | } | |
29 | ||
30 | static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); | |
31 | ||
8d195e7a | 32 | void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE]) |
a6b803b3 AB |
33 | { |
34 | poly1305_init_arm(&dctx->h, key); | |
35 | dctx->s[0] = get_unaligned_le32(key + 16); | |
36 | dctx->s[1] = get_unaligned_le32(key + 20); | |
37 | dctx->s[2] = get_unaligned_le32(key + 24); | |
38 | dctx->s[3] = get_unaligned_le32(key + 28); | |
39 | dctx->buflen = 0; | |
40 | } | |
41 | EXPORT_SYMBOL(poly1305_init_arch); | |
42 | ||
43 | static int arm_poly1305_init(struct shash_desc *desc) | |
44 | { | |
45 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
46 | ||
47 | dctx->buflen = 0; | |
48 | dctx->rset = 0; | |
49 | dctx->sset = false; | |
50 | ||
51 | return 0; | |
52 | } | |
53 | ||
54 | static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, | |
55 | u32 len, u32 hibit, bool do_neon) | |
56 | { | |
57 | if (unlikely(!dctx->sset)) { | |
58 | if (!dctx->rset) { | |
59 | poly1305_init_arm(&dctx->h, src); | |
60 | src += POLY1305_BLOCK_SIZE; | |
61 | len -= POLY1305_BLOCK_SIZE; | |
62 | dctx->rset = 1; | |
63 | } | |
64 | if (len >= POLY1305_BLOCK_SIZE) { | |
65 | dctx->s[0] = get_unaligned_le32(src + 0); | |
66 | dctx->s[1] = get_unaligned_le32(src + 4); | |
67 | dctx->s[2] = get_unaligned_le32(src + 8); | |
68 | dctx->s[3] = get_unaligned_le32(src + 12); | |
69 | src += POLY1305_BLOCK_SIZE; | |
70 | len -= POLY1305_BLOCK_SIZE; | |
71 | dctx->sset = true; | |
72 | } | |
73 | if (len < POLY1305_BLOCK_SIZE) | |
74 | return; | |
75 | } | |
76 | ||
77 | len &= ~(POLY1305_BLOCK_SIZE - 1); | |
78 | ||
79 | if (static_branch_likely(&have_neon) && likely(do_neon)) | |
80 | poly1305_blocks_neon(&dctx->h, src, len, hibit); | |
81 | else | |
82 | poly1305_blocks_arm(&dctx->h, src, len, hibit); | |
83 | } | |
84 | ||
85 | static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx, | |
86 | const u8 *src, u32 len, bool do_neon) | |
87 | { | |
88 | if (unlikely(dctx->buflen)) { | |
89 | u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); | |
90 | ||
91 | memcpy(dctx->buf + dctx->buflen, src, bytes); | |
92 | src += bytes; | |
93 | len -= bytes; | |
94 | dctx->buflen += bytes; | |
95 | ||
96 | if (dctx->buflen == POLY1305_BLOCK_SIZE) { | |
97 | arm_poly1305_blocks(dctx, dctx->buf, | |
98 | POLY1305_BLOCK_SIZE, 1, false); | |
99 | dctx->buflen = 0; | |
100 | } | |
101 | } | |
102 | ||
103 | if (likely(len >= POLY1305_BLOCK_SIZE)) { | |
104 | arm_poly1305_blocks(dctx, src, len, 1, do_neon); | |
105 | src += round_down(len, POLY1305_BLOCK_SIZE); | |
106 | len %= POLY1305_BLOCK_SIZE; | |
107 | } | |
108 | ||
109 | if (unlikely(len)) { | |
110 | dctx->buflen = len; | |
111 | memcpy(dctx->buf, src, len); | |
112 | } | |
113 | } | |
114 | ||
115 | static int arm_poly1305_update(struct shash_desc *desc, | |
116 | const u8 *src, unsigned int srclen) | |
117 | { | |
118 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
119 | ||
120 | arm_poly1305_do_update(dctx, src, srclen, false); | |
121 | return 0; | |
122 | } | |
123 | ||
124 | static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc, | |
125 | const u8 *src, | |
126 | unsigned int srclen) | |
127 | { | |
128 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
129 | bool do_neon = crypto_simd_usable() && srclen > 128; | |
130 | ||
131 | if (static_branch_likely(&have_neon) && do_neon) | |
132 | kernel_neon_begin(); | |
133 | arm_poly1305_do_update(dctx, src, srclen, do_neon); | |
134 | if (static_branch_likely(&have_neon) && do_neon) | |
135 | kernel_neon_end(); | |
136 | return 0; | |
137 | } | |
138 | ||
139 | void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, | |
140 | unsigned int nbytes) | |
141 | { | |
142 | bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && | |
143 | crypto_simd_usable(); | |
144 | ||
145 | if (unlikely(dctx->buflen)) { | |
146 | u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); | |
147 | ||
148 | memcpy(dctx->buf + dctx->buflen, src, bytes); | |
149 | src += bytes; | |
150 | nbytes -= bytes; | |
151 | dctx->buflen += bytes; | |
152 | ||
153 | if (dctx->buflen == POLY1305_BLOCK_SIZE) { | |
154 | poly1305_blocks_arm(&dctx->h, dctx->buf, | |
155 | POLY1305_BLOCK_SIZE, 1); | |
156 | dctx->buflen = 0; | |
157 | } | |
158 | } | |
159 | ||
160 | if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { | |
161 | unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); | |
162 | ||
163 | if (static_branch_likely(&have_neon) && do_neon) { | |
706024a5 JD |
164 | do { |
165 | unsigned int todo = min_t(unsigned int, len, SZ_4K); | |
166 | ||
167 | kernel_neon_begin(); | |
168 | poly1305_blocks_neon(&dctx->h, src, todo, 1); | |
169 | kernel_neon_end(); | |
170 | ||
171 | len -= todo; | |
172 | src += todo; | |
173 | } while (len); | |
a6b803b3 AB |
174 | } else { |
175 | poly1305_blocks_arm(&dctx->h, src, len, 1); | |
706024a5 | 176 | src += len; |
a6b803b3 | 177 | } |
a6b803b3 AB |
178 | nbytes %= POLY1305_BLOCK_SIZE; |
179 | } | |
180 | ||
181 | if (unlikely(nbytes)) { | |
182 | dctx->buflen = nbytes; | |
183 | memcpy(dctx->buf, src, nbytes); | |
184 | } | |
185 | } | |
186 | EXPORT_SYMBOL(poly1305_update_arch); | |
187 | ||
188 | void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) | |
189 | { | |
a6b803b3 AB |
190 | if (unlikely(dctx->buflen)) { |
191 | dctx->buf[dctx->buflen++] = 1; | |
192 | memset(dctx->buf + dctx->buflen, 0, | |
193 | POLY1305_BLOCK_SIZE - dctx->buflen); | |
194 | poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); | |
195 | } | |
196 | ||
31899908 | 197 | poly1305_emit_arm(&dctx->h, dst, dctx->s); |
a6b803b3 AB |
198 | *dctx = (struct poly1305_desc_ctx){}; |
199 | } | |
200 | EXPORT_SYMBOL(poly1305_final_arch); | |
201 | ||
202 | static int arm_poly1305_final(struct shash_desc *desc, u8 *dst) | |
203 | { | |
204 | struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); | |
205 | ||
206 | if (unlikely(!dctx->sset)) | |
207 | return -ENOKEY; | |
208 | ||
209 | poly1305_final_arch(dctx, dst); | |
210 | return 0; | |
211 | } | |
212 | ||
213 | static struct shash_alg arm_poly1305_algs[] = {{ | |
214 | .init = arm_poly1305_init, | |
215 | .update = arm_poly1305_update, | |
216 | .final = arm_poly1305_final, | |
217 | .digestsize = POLY1305_DIGEST_SIZE, | |
218 | .descsize = sizeof(struct poly1305_desc_ctx), | |
219 | ||
220 | .base.cra_name = "poly1305", | |
221 | .base.cra_driver_name = "poly1305-arm", | |
222 | .base.cra_priority = 150, | |
223 | .base.cra_blocksize = POLY1305_BLOCK_SIZE, | |
224 | .base.cra_module = THIS_MODULE, | |
225 | #ifdef CONFIG_KERNEL_MODE_NEON | |
226 | }, { | |
227 | .init = arm_poly1305_init, | |
228 | .update = arm_poly1305_update_neon, | |
229 | .final = arm_poly1305_final, | |
230 | .digestsize = POLY1305_DIGEST_SIZE, | |
231 | .descsize = sizeof(struct poly1305_desc_ctx), | |
232 | ||
233 | .base.cra_name = "poly1305", | |
234 | .base.cra_driver_name = "poly1305-neon", | |
235 | .base.cra_priority = 200, | |
236 | .base.cra_blocksize = POLY1305_BLOCK_SIZE, | |
237 | .base.cra_module = THIS_MODULE, | |
238 | #endif | |
239 | }}; | |
240 | ||
241 | static int __init arm_poly1305_mod_init(void) | |
242 | { | |
243 | if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && | |
244 | (elf_hwcap & HWCAP_NEON)) | |
245 | static_branch_enable(&have_neon); | |
8394bfec | 246 | else if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
a6b803b3 AB |
247 | /* register only the first entry */ |
248 | return crypto_register_shash(&arm_poly1305_algs[0]); | |
249 | ||
8394bfec JD |
250 | return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? |
251 | crypto_register_shashes(arm_poly1305_algs, | |
252 | ARRAY_SIZE(arm_poly1305_algs)) : 0; | |
a6b803b3 AB |
253 | } |
254 | ||
255 | static void __exit arm_poly1305_mod_exit(void) | |
256 | { | |
8394bfec JD |
257 | if (!IS_REACHABLE(CONFIG_CRYPTO_HASH)) |
258 | return; | |
a6b803b3 AB |
259 | if (!static_branch_likely(&have_neon)) { |
260 | crypto_unregister_shash(&arm_poly1305_algs[0]); | |
261 | return; | |
262 | } | |
263 | crypto_unregister_shashes(arm_poly1305_algs, | |
264 | ARRAY_SIZE(arm_poly1305_algs)); | |
265 | } | |
266 | ||
267 | module_init(arm_poly1305_mod_init); | |
268 | module_exit(arm_poly1305_mod_exit); | |
269 | ||
270 | MODULE_LICENSE("GPL v2"); | |
271 | MODULE_ALIAS_CRYPTO("poly1305"); | |
272 | MODULE_ALIAS_CRYPTO("poly1305-arm"); | |
273 | MODULE_ALIAS_CRYPTO("poly1305-neon"); |