Commit | Line | Data |
---|---|---|
1a59d1b8 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
8280daad JK |
2 | /* |
3 | * Glue Code for 3-way parallel assembler optimized version of Twofish | |
4 | * | |
5 | * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> | |
8280daad JK |
6 | */ |
7 | ||
37992fa4 EB |
8 | #include <asm/crypto/glue_helper.h> |
9 | #include <asm/crypto/twofish.h> | |
10 | #include <crypto/algapi.h> | |
11 | #include <crypto/b128ops.h> | |
12 | #include <crypto/internal/skcipher.h> | |
13 | #include <crypto/twofish.h> | |
8280daad JK |
14 | #include <linux/crypto.h> |
15 | #include <linux/init.h> | |
16 | #include <linux/module.h> | |
17 | #include <linux/types.h> | |
81559f9a | 18 | |
107778b5 | 19 | EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); |
107778b5 | 20 | EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); |
8280daad | 21 | |
37992fa4 EB |
22 | static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, |
23 | const u8 *key, unsigned int keylen) | |
24 | { | |
25 | return twofish_setkey(&tfm->base, key, keylen); | |
26 | } | |
27 | ||
8280daad JK |
28 | static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, |
29 | const u8 *src) | |
30 | { | |
31 | __twofish_enc_blk_3way(ctx, dst, src, false); | |
32 | } | |
33 | ||
34 | static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, | |
35 | const u8 *src) | |
36 | { | |
37 | __twofish_enc_blk_3way(ctx, dst, src, true); | |
38 | } | |
39 | ||
a7378d4e | 40 | void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) |
8280daad | 41 | { |
414cb5e7 | 42 | u128 ivs[2]; |
8280daad | 43 | |
414cb5e7 JK |
44 | ivs[0] = src[0]; |
45 | ivs[1] = src[1]; | |
8280daad | 46 | |
414cb5e7 | 47 | twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); |
8280daad | 48 | |
414cb5e7 JK |
49 | u128_xor(&dst[1], &dst[1], &ivs[0]); |
50 | u128_xor(&dst[2], &dst[2], &ivs[1]); | |
8280daad | 51 | } |
a7378d4e | 52 | EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); |
8280daad | 53 | |
58990986 | 54 | void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) |
8280daad | 55 | { |
414cb5e7 | 56 | be128 ctrblk; |
8280daad | 57 | |
414cb5e7 JK |
58 | if (dst != src) |
59 | *dst = *src; | |
8280daad | 60 | |
58990986 JK |
61 | le128_to_be128(&ctrblk, iv); |
62 | le128_inc(iv); | |
8280daad | 63 | |
414cb5e7 JK |
64 | twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); |
65 | u128_xor(dst, dst, (u128 *)&ctrblk); | |
8280daad | 66 | } |
a7378d4e | 67 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); |
8280daad | 68 | |
a7378d4e | 69 | void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, |
58990986 | 70 | le128 *iv) |
8280daad | 71 | { |
414cb5e7 | 72 | be128 ctrblks[3]; |
8280daad | 73 | |
414cb5e7 JK |
74 | if (dst != src) { |
75 | dst[0] = src[0]; | |
76 | dst[1] = src[1]; | |
77 | dst[2] = src[2]; | |
8280daad JK |
78 | } |
79 | ||
58990986 JK |
80 | le128_to_be128(&ctrblks[0], iv); |
81 | le128_inc(iv); | |
82 | le128_to_be128(&ctrblks[1], iv); | |
83 | le128_inc(iv); | |
84 | le128_to_be128(&ctrblks[2], iv); | |
85 | le128_inc(iv); | |
414cb5e7 JK |
86 | |
87 | twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); | |
88 | } | |
a7378d4e | 89 | EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); |
414cb5e7 JK |
90 | |
91 | static const struct common_glue_ctx twofish_enc = { | |
92 | .num_funcs = 2, | |
93 | .fpu_blocks_limit = -1, | |
94 | ||
95 | .funcs = { { | |
96 | .num_blocks = 3, | |
97 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } | |
98 | }, { | |
99 | .num_blocks = 1, | |
100 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } | |
101 | } } | |
102 | }; | |
103 | ||
104 | static const struct common_glue_ctx twofish_ctr = { | |
105 | .num_funcs = 2, | |
106 | .fpu_blocks_limit = -1, | |
107 | ||
108 | .funcs = { { | |
109 | .num_blocks = 3, | |
110 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } | |
111 | }, { | |
112 | .num_blocks = 1, | |
113 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } | |
114 | } } | |
115 | }; | |
116 | ||
117 | static const struct common_glue_ctx twofish_dec = { | |
118 | .num_funcs = 2, | |
119 | .fpu_blocks_limit = -1, | |
120 | ||
121 | .funcs = { { | |
122 | .num_blocks = 3, | |
123 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } | |
124 | }, { | |
125 | .num_blocks = 1, | |
126 | .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } | |
127 | } } | |
128 | }; | |
129 | ||
130 | static const struct common_glue_ctx twofish_dec_cbc = { | |
131 | .num_funcs = 2, | |
132 | .fpu_blocks_limit = -1, | |
133 | ||
134 | .funcs = { { | |
135 | .num_blocks = 3, | |
136 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } | |
137 | }, { | |
138 | .num_blocks = 1, | |
139 | .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } | |
140 | } } | |
141 | }; | |
8280daad | 142 | |
37992fa4 | 143 | static int ecb_encrypt(struct skcipher_request *req) |
8280daad | 144 | { |
37992fa4 | 145 | return glue_ecb_req_128bit(&twofish_enc, req); |
8280daad JK |
146 | } |
147 | ||
37992fa4 | 148 | static int ecb_decrypt(struct skcipher_request *req) |
8280daad | 149 | { |
37992fa4 | 150 | return glue_ecb_req_128bit(&twofish_dec, req); |
8280daad JK |
151 | } |
152 | ||
37992fa4 | 153 | static int cbc_encrypt(struct skcipher_request *req) |
8280daad | 154 | { |
37992fa4 EB |
155 | return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), |
156 | req); | |
8280daad JK |
157 | } |
158 | ||
37992fa4 | 159 | static int cbc_decrypt(struct skcipher_request *req) |
8280daad | 160 | { |
37992fa4 | 161 | return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); |
8280daad JK |
162 | } |
163 | ||
37992fa4 | 164 | static int ctr_crypt(struct skcipher_request *req) |
8280daad | 165 | { |
37992fa4 | 166 | return glue_ctr_req_128bit(&twofish_ctr, req); |
8280daad JK |
167 | } |
168 | ||
37992fa4 EB |
169 | static struct skcipher_alg tf_skciphers[] = { |
170 | { | |
171 | .base.cra_name = "ecb(twofish)", | |
172 | .base.cra_driver_name = "ecb-twofish-3way", | |
173 | .base.cra_priority = 300, | |
174 | .base.cra_blocksize = TF_BLOCK_SIZE, | |
175 | .base.cra_ctxsize = sizeof(struct twofish_ctx), | |
176 | .base.cra_module = THIS_MODULE, | |
177 | .min_keysize = TF_MIN_KEY_SIZE, | |
178 | .max_keysize = TF_MAX_KEY_SIZE, | |
179 | .setkey = twofish_setkey_skcipher, | |
180 | .encrypt = ecb_encrypt, | |
181 | .decrypt = ecb_decrypt, | |
182 | }, { | |
183 | .base.cra_name = "cbc(twofish)", | |
184 | .base.cra_driver_name = "cbc-twofish-3way", | |
185 | .base.cra_priority = 300, | |
186 | .base.cra_blocksize = TF_BLOCK_SIZE, | |
187 | .base.cra_ctxsize = sizeof(struct twofish_ctx), | |
188 | .base.cra_module = THIS_MODULE, | |
189 | .min_keysize = TF_MIN_KEY_SIZE, | |
190 | .max_keysize = TF_MAX_KEY_SIZE, | |
191 | .ivsize = TF_BLOCK_SIZE, | |
192 | .setkey = twofish_setkey_skcipher, | |
193 | .encrypt = cbc_encrypt, | |
194 | .decrypt = cbc_decrypt, | |
195 | }, { | |
196 | .base.cra_name = "ctr(twofish)", | |
197 | .base.cra_driver_name = "ctr-twofish-3way", | |
198 | .base.cra_priority = 300, | |
199 | .base.cra_blocksize = 1, | |
200 | .base.cra_ctxsize = sizeof(struct twofish_ctx), | |
201 | .base.cra_module = THIS_MODULE, | |
202 | .min_keysize = TF_MIN_KEY_SIZE, | |
203 | .max_keysize = TF_MAX_KEY_SIZE, | |
204 | .ivsize = TF_BLOCK_SIZE, | |
205 | .chunksize = TF_BLOCK_SIZE, | |
206 | .setkey = twofish_setkey_skcipher, | |
207 | .encrypt = ctr_crypt, | |
208 | .decrypt = ctr_crypt, | |
53709dde | 209 | }, |
37992fa4 | 210 | }; |
bae6d303 | 211 | |
a522ee85 JK |
212 | static bool is_blacklisted_cpu(void) |
213 | { | |
214 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | |
215 | return false; | |
216 | ||
217 | if (boot_cpu_data.x86 == 0x06 && | |
218 | (boot_cpu_data.x86_model == 0x1c || | |
219 | boot_cpu_data.x86_model == 0x26 || | |
220 | boot_cpu_data.x86_model == 0x36)) { | |
221 | /* | |
222 | * On Atom, twofish-3way is slower than original assembler | |
223 | * implementation. Twofish-3way trades off some performance in | |
224 | * storing blocks in 64bit registers to allow three blocks to | |
225 | * be processed parallel. Parallel operation then allows gaining | |
226 | * more performance than was trade off, on out-of-order CPUs. | |
227 | * However Atom does not benefit from this parallellism and | |
228 | * should be blacklisted. | |
229 | */ | |
230 | return true; | |
231 | } | |
232 | ||
233 | if (boot_cpu_data.x86 == 0x0f) { | |
234 | /* | |
235 | * On Pentium 4, twofish-3way is slower than original assembler | |
236 | * implementation because excessive uses of 64bit rotate and | |
237 | * left-shifts (which are really slow on P4) needed to store and | |
238 | * handle 128bit block in two 64bit registers. | |
239 | */ | |
240 | return true; | |
241 | } | |
242 | ||
243 | return false; | |
244 | } | |
245 | ||
246 | static int force; | |
247 | module_param(force, int, 0); | |
248 | MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); | |
249 | ||
ff0a70fe | 250 | static int __init init(void) |
8280daad | 251 | { |
a522ee85 JK |
252 | if (!force && is_blacklisted_cpu()) { |
253 | printk(KERN_INFO | |
254 | "twofish-x86_64-3way: performance on this CPU " | |
255 | "would be suboptimal: disabling " | |
256 | "twofish-x86_64-3way.\n"); | |
257 | return -ENODEV; | |
258 | } | |
259 | ||
37992fa4 EB |
260 | return crypto_register_skciphers(tf_skciphers, |
261 | ARRAY_SIZE(tf_skciphers)); | |
8280daad JK |
262 | } |
263 | ||
ff0a70fe | 264 | static void __exit fini(void) |
8280daad | 265 | { |
37992fa4 | 266 | crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); |
8280daad JK |
267 | } |
268 | ||
269 | module_init(init); | |
270 | module_exit(fini); | |
271 | ||
272 | MODULE_LICENSE("GPL"); | |
273 | MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); | |
5d26a105 KC |
274 | MODULE_ALIAS_CRYPTO("twofish"); |
275 | MODULE_ALIAS_CRYPTO("twofish-asm"); |