Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Cryptographic API. | |
3 | * | |
4 | * Support for VIA PadLock hardware crypto engine. | |
5 | * | |
6 | * Copyright (c) 2004 Michal Ludvig <michal@logix.cz> | |
7 | * | |
8 | * Key expansion routine taken from crypto/aes.c | |
9 | * | |
10 | * This program is free software; you can redistribute it and/or modify | |
11 | * it under the terms of the GNU General Public License as published by | |
12 | * the Free Software Foundation; either version 2 of the License, or | |
13 | * (at your option) any later version. | |
14 | * | |
15 | * --------------------------------------------------------------------------- | |
16 | * Copyright (c) 2002, Dr Brian Gladman <brg@gladman.me.uk>, Worcester, UK. | |
17 | * All rights reserved. | |
18 | * | |
19 | * LICENSE TERMS | |
20 | * | |
21 | * The free distribution and use of this software in both source and binary | |
22 | * form is allowed (with or without changes) provided that: | |
23 | * | |
24 | * 1. distributions of this source code include the above copyright | |
25 | * notice, this list of conditions and the following disclaimer; | |
26 | * | |
27 | * 2. distributions in binary form include the above copyright | |
28 | * notice, this list of conditions and the following disclaimer | |
29 | * in the documentation and/or other associated materials; | |
30 | * | |
31 | * 3. the copyright holder's name is not used to endorse products | |
32 | * built using this software without specific written permission. | |
33 | * | |
34 | * ALTERNATIVELY, provided that this notice is retained in full, this product | |
35 | * may be distributed under the terms of the GNU General Public License (GPL), | |
36 | * in which case the provisions of the GPL apply INSTEAD OF those given above. | |
37 | * | |
38 | * DISCLAIMER | |
39 | * | |
40 | * This software is provided 'as is' with no explicit or implied warranties | |
41 | * in respect of its properties, including, but not limited to, correctness | |
42 | * and/or fitness for purpose. | |
43 | * --------------------------------------------------------------------------- | |
44 | */ | |
45 | ||
46 | #include <linux/module.h> | |
47 | #include <linux/init.h> | |
48 | #include <linux/types.h> | |
49 | #include <linux/errno.h> | |
50 | #include <linux/crypto.h> | |
51 | #include <linux/interrupt.h> | |
52 | #include <asm/byteorder.h> | |
53 | #include "padlock.h" | |
54 | ||
55 | #define AES_MIN_KEY_SIZE 16 /* in uint8_t units */ | |
56 | #define AES_MAX_KEY_SIZE 32 /* ditto */ | |
57 | #define AES_BLOCK_SIZE 16 /* ditto */ | |
58 | #define AES_EXTENDED_KEY_SIZE 64 /* in uint32_t units */ | |
59 | #define AES_EXTENDED_KEY_SIZE_B (AES_EXTENDED_KEY_SIZE * sizeof(uint32_t)) | |
60 | ||
61 | struct aes_ctx { | |
62 | uint32_t e_data[AES_EXTENDED_KEY_SIZE+4]; | |
63 | uint32_t d_data[AES_EXTENDED_KEY_SIZE+4]; | |
64 | uint32_t *E; | |
65 | uint32_t *D; | |
66 | int key_length; | |
67 | }; | |
68 | ||
69 | /* ====== Key management routines ====== */ | |
70 | ||
71 | static inline uint32_t | |
72 | generic_rotr32 (const uint32_t x, const unsigned bits) | |
73 | { | |
74 | const unsigned n = bits % 32; | |
75 | return (x >> n) | (x << (32 - n)); | |
76 | } | |
77 | ||
78 | static inline uint32_t | |
79 | generic_rotl32 (const uint32_t x, const unsigned bits) | |
80 | { | |
81 | const unsigned n = bits % 32; | |
82 | return (x << n) | (x >> (32 - n)); | |
83 | } | |
84 | ||
85 | #define rotl generic_rotl32 | |
86 | #define rotr generic_rotr32 | |
87 | ||
88 | /* | |
89 | * #define byte(x, nr) ((unsigned char)((x) >> (nr*8))) | |
90 | */ | |
91 | static inline uint8_t | |
92 | byte(const uint32_t x, const unsigned n) | |
93 | { | |
94 | return x >> (n << 3); | |
95 | } | |
96 | ||
97 | #define uint32_t_in(x) le32_to_cpu(*(const uint32_t *)(x)) | |
98 | #define uint32_t_out(to, from) (*(uint32_t *)(to) = cpu_to_le32(from)) | |
99 | ||
100 | #define E_KEY ctx->E | |
101 | #define D_KEY ctx->D | |
102 | ||
103 | static uint8_t pow_tab[256]; | |
104 | static uint8_t log_tab[256]; | |
105 | static uint8_t sbx_tab[256]; | |
106 | static uint8_t isb_tab[256]; | |
107 | static uint32_t rco_tab[10]; | |
108 | static uint32_t ft_tab[4][256]; | |
109 | static uint32_t it_tab[4][256]; | |
110 | ||
111 | static uint32_t fl_tab[4][256]; | |
112 | static uint32_t il_tab[4][256]; | |
113 | ||
114 | static inline uint8_t | |
115 | f_mult (uint8_t a, uint8_t b) | |
116 | { | |
117 | uint8_t aa = log_tab[a], cc = aa + log_tab[b]; | |
118 | ||
119 | return pow_tab[cc + (cc < aa ? 1 : 0)]; | |
120 | } | |
121 | ||
122 | #define ff_mult(a,b) (a && b ? f_mult(a, b) : 0) | |
123 | ||
124 | #define f_rn(bo, bi, n, k) \ | |
125 | bo[n] = ft_tab[0][byte(bi[n],0)] ^ \ | |
126 | ft_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ | |
127 | ft_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
128 | ft_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) | |
129 | ||
130 | #define i_rn(bo, bi, n, k) \ | |
131 | bo[n] = it_tab[0][byte(bi[n],0)] ^ \ | |
132 | it_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ | |
133 | it_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
134 | it_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) | |
135 | ||
136 | #define ls_box(x) \ | |
137 | ( fl_tab[0][byte(x, 0)] ^ \ | |
138 | fl_tab[1][byte(x, 1)] ^ \ | |
139 | fl_tab[2][byte(x, 2)] ^ \ | |
140 | fl_tab[3][byte(x, 3)] ) | |
141 | ||
142 | #define f_rl(bo, bi, n, k) \ | |
143 | bo[n] = fl_tab[0][byte(bi[n],0)] ^ \ | |
144 | fl_tab[1][byte(bi[(n + 1) & 3],1)] ^ \ | |
145 | fl_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
146 | fl_tab[3][byte(bi[(n + 3) & 3],3)] ^ *(k + n) | |
147 | ||
148 | #define i_rl(bo, bi, n, k) \ | |
149 | bo[n] = il_tab[0][byte(bi[n],0)] ^ \ | |
150 | il_tab[1][byte(bi[(n + 3) & 3],1)] ^ \ | |
151 | il_tab[2][byte(bi[(n + 2) & 3],2)] ^ \ | |
152 | il_tab[3][byte(bi[(n + 1) & 3],3)] ^ *(k + n) | |
153 | ||
154 | static void | |
155 | gen_tabs (void) | |
156 | { | |
157 | uint32_t i, t; | |
158 | uint8_t p, q; | |
159 | ||
160 | /* log and power tables for GF(2**8) finite field with | |
161 | 0x011b as modular polynomial - the simplest prmitive | |
162 | root is 0x03, used here to generate the tables */ | |
163 | ||
164 | for (i = 0, p = 1; i < 256; ++i) { | |
165 | pow_tab[i] = (uint8_t) p; | |
166 | log_tab[p] = (uint8_t) i; | |
167 | ||
168 | p ^= (p << 1) ^ (p & 0x80 ? 0x01b : 0); | |
169 | } | |
170 | ||
171 | log_tab[1] = 0; | |
172 | ||
173 | for (i = 0, p = 1; i < 10; ++i) { | |
174 | rco_tab[i] = p; | |
175 | ||
176 | p = (p << 1) ^ (p & 0x80 ? 0x01b : 0); | |
177 | } | |
178 | ||
179 | for (i = 0; i < 256; ++i) { | |
180 | p = (i ? pow_tab[255 - log_tab[i]] : 0); | |
181 | q = ((p >> 7) | (p << 1)) ^ ((p >> 6) | (p << 2)); | |
182 | p ^= 0x63 ^ q ^ ((q >> 6) | (q << 2)); | |
183 | sbx_tab[i] = p; | |
184 | isb_tab[p] = (uint8_t) i; | |
185 | } | |
186 | ||
187 | for (i = 0; i < 256; ++i) { | |
188 | p = sbx_tab[i]; | |
189 | ||
190 | t = p; | |
191 | fl_tab[0][i] = t; | |
192 | fl_tab[1][i] = rotl (t, 8); | |
193 | fl_tab[2][i] = rotl (t, 16); | |
194 | fl_tab[3][i] = rotl (t, 24); | |
195 | ||
196 | t = ((uint32_t) ff_mult (2, p)) | | |
197 | ((uint32_t) p << 8) | | |
198 | ((uint32_t) p << 16) | ((uint32_t) ff_mult (3, p) << 24); | |
199 | ||
200 | ft_tab[0][i] = t; | |
201 | ft_tab[1][i] = rotl (t, 8); | |
202 | ft_tab[2][i] = rotl (t, 16); | |
203 | ft_tab[3][i] = rotl (t, 24); | |
204 | ||
205 | p = isb_tab[i]; | |
206 | ||
207 | t = p; | |
208 | il_tab[0][i] = t; | |
209 | il_tab[1][i] = rotl (t, 8); | |
210 | il_tab[2][i] = rotl (t, 16); | |
211 | il_tab[3][i] = rotl (t, 24); | |
212 | ||
213 | t = ((uint32_t) ff_mult (14, p)) | | |
214 | ((uint32_t) ff_mult (9, p) << 8) | | |
215 | ((uint32_t) ff_mult (13, p) << 16) | | |
216 | ((uint32_t) ff_mult (11, p) << 24); | |
217 | ||
218 | it_tab[0][i] = t; | |
219 | it_tab[1][i] = rotl (t, 8); | |
220 | it_tab[2][i] = rotl (t, 16); | |
221 | it_tab[3][i] = rotl (t, 24); | |
222 | } | |
223 | } | |
224 | ||
225 | #define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b) | |
226 | ||
227 | #define imix_col(y,x) \ | |
228 | u = star_x(x); \ | |
229 | v = star_x(u); \ | |
230 | w = star_x(v); \ | |
231 | t = w ^ (x); \ | |
232 | (y) = u ^ v ^ w; \ | |
233 | (y) ^= rotr(u ^ t, 8) ^ \ | |
234 | rotr(v ^ t, 16) ^ \ | |
235 | rotr(t,24) | |
236 | ||
237 | /* initialise the key schedule from the user supplied key */ | |
238 | ||
239 | #define loop4(i) \ | |
240 | { t = rotr(t, 8); t = ls_box(t) ^ rco_tab[i]; \ | |
241 | t ^= E_KEY[4 * i]; E_KEY[4 * i + 4] = t; \ | |
242 | t ^= E_KEY[4 * i + 1]; E_KEY[4 * i + 5] = t; \ | |
243 | t ^= E_KEY[4 * i + 2]; E_KEY[4 * i + 6] = t; \ | |
244 | t ^= E_KEY[4 * i + 3]; E_KEY[4 * i + 7] = t; \ | |
245 | } | |
246 | ||
247 | #define loop6(i) \ | |
248 | { t = rotr(t, 8); t = ls_box(t) ^ rco_tab[i]; \ | |
249 | t ^= E_KEY[6 * i]; E_KEY[6 * i + 6] = t; \ | |
250 | t ^= E_KEY[6 * i + 1]; E_KEY[6 * i + 7] = t; \ | |
251 | t ^= E_KEY[6 * i + 2]; E_KEY[6 * i + 8] = t; \ | |
252 | t ^= E_KEY[6 * i + 3]; E_KEY[6 * i + 9] = t; \ | |
253 | t ^= E_KEY[6 * i + 4]; E_KEY[6 * i + 10] = t; \ | |
254 | t ^= E_KEY[6 * i + 5]; E_KEY[6 * i + 11] = t; \ | |
255 | } | |
256 | ||
257 | #define loop8(i) \ | |
258 | { t = rotr(t, 8); ; t = ls_box(t) ^ rco_tab[i]; \ | |
259 | t ^= E_KEY[8 * i]; E_KEY[8 * i + 8] = t; \ | |
260 | t ^= E_KEY[8 * i + 1]; E_KEY[8 * i + 9] = t; \ | |
261 | t ^= E_KEY[8 * i + 2]; E_KEY[8 * i + 10] = t; \ | |
262 | t ^= E_KEY[8 * i + 3]; E_KEY[8 * i + 11] = t; \ | |
263 | t = E_KEY[8 * i + 4] ^ ls_box(t); \ | |
264 | E_KEY[8 * i + 12] = t; \ | |
265 | t ^= E_KEY[8 * i + 5]; E_KEY[8 * i + 13] = t; \ | |
266 | t ^= E_KEY[8 * i + 6]; E_KEY[8 * i + 14] = t; \ | |
267 | t ^= E_KEY[8 * i + 7]; E_KEY[8 * i + 15] = t; \ | |
268 | } | |
269 | ||
270 | /* Tells whether the ACE is capable to generate | |
271 | the extended key for a given key_len. */ | |
272 | static inline int | |
273 | aes_hw_extkey_available(uint8_t key_len) | |
274 | { | |
275 | /* TODO: We should check the actual CPU model/stepping | |
276 | as it's possible that the capability will be | |
277 | added in the next CPU revisions. */ | |
278 | if (key_len == 16) | |
279 | return 1; | |
280 | return 0; | |
281 | } | |
282 | ||
283 | static int | |
284 | aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t *flags) | |
285 | { | |
286 | struct aes_ctx *ctx = ctx_arg; | |
287 | uint32_t i, t, u, v, w; | |
288 | uint32_t P[AES_EXTENDED_KEY_SIZE]; | |
289 | uint32_t rounds; | |
290 | ||
291 | if (key_len != 16 && key_len != 24 && key_len != 32) { | |
292 | *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; | |
293 | return -EINVAL; | |
294 | } | |
295 | ||
296 | ctx->key_length = key_len; | |
297 | ||
298 | ctx->E = ctx->e_data; | |
299 | ctx->D = ctx->d_data; | |
300 | ||
301 | /* Ensure 16-Bytes alignmentation of keys for VIA PadLock. */ | |
302 | if ((int)(ctx->e_data) & 0x0F) | |
303 | ctx->E += 4 - (((int)(ctx->e_data) & 0x0F) / sizeof (ctx->e_data[0])); | |
304 | ||
305 | if ((int)(ctx->d_data) & 0x0F) | |
306 | ctx->D += 4 - (((int)(ctx->d_data) & 0x0F) / sizeof (ctx->d_data[0])); | |
307 | ||
308 | E_KEY[0] = uint32_t_in (in_key); | |
309 | E_KEY[1] = uint32_t_in (in_key + 4); | |
310 | E_KEY[2] = uint32_t_in (in_key + 8); | |
311 | E_KEY[3] = uint32_t_in (in_key + 12); | |
312 | ||
313 | /* Don't generate extended keys if the hardware can do it. */ | |
314 | if (aes_hw_extkey_available(key_len)) | |
315 | return 0; | |
316 | ||
317 | switch (key_len) { | |
318 | case 16: | |
319 | t = E_KEY[3]; | |
320 | for (i = 0; i < 10; ++i) | |
321 | loop4 (i); | |
322 | break; | |
323 | ||
324 | case 24: | |
325 | E_KEY[4] = uint32_t_in (in_key + 16); | |
326 | t = E_KEY[5] = uint32_t_in (in_key + 20); | |
327 | for (i = 0; i < 8; ++i) | |
328 | loop6 (i); | |
329 | break; | |
330 | ||
331 | case 32: | |
332 | E_KEY[4] = uint32_t_in (in_key + 16); | |
333 | E_KEY[5] = uint32_t_in (in_key + 20); | |
334 | E_KEY[6] = uint32_t_in (in_key + 24); | |
335 | t = E_KEY[7] = uint32_t_in (in_key + 28); | |
336 | for (i = 0; i < 7; ++i) | |
337 | loop8 (i); | |
338 | break; | |
339 | } | |
340 | ||
341 | D_KEY[0] = E_KEY[0]; | |
342 | D_KEY[1] = E_KEY[1]; | |
343 | D_KEY[2] = E_KEY[2]; | |
344 | D_KEY[3] = E_KEY[3]; | |
345 | ||
346 | for (i = 4; i < key_len + 24; ++i) { | |
347 | imix_col (D_KEY[i], E_KEY[i]); | |
348 | } | |
349 | ||
350 | /* PadLock needs a different format of the decryption key. */ | |
351 | rounds = 10 + (key_len - 16) / 4; | |
352 | ||
353 | for (i = 0; i < rounds; i++) { | |
354 | P[((i + 1) * 4) + 0] = D_KEY[((rounds - i - 1) * 4) + 0]; | |
355 | P[((i + 1) * 4) + 1] = D_KEY[((rounds - i - 1) * 4) + 1]; | |
356 | P[((i + 1) * 4) + 2] = D_KEY[((rounds - i - 1) * 4) + 2]; | |
357 | P[((i + 1) * 4) + 3] = D_KEY[((rounds - i - 1) * 4) + 3]; | |
358 | } | |
359 | ||
360 | P[0] = E_KEY[(rounds * 4) + 0]; | |
361 | P[1] = E_KEY[(rounds * 4) + 1]; | |
362 | P[2] = E_KEY[(rounds * 4) + 2]; | |
363 | P[3] = E_KEY[(rounds * 4) + 3]; | |
364 | ||
365 | memcpy(D_KEY, P, AES_EXTENDED_KEY_SIZE_B); | |
366 | ||
367 | return 0; | |
368 | } | |
369 | ||
370 | /* ====== Encryption/decryption routines ====== */ | |
371 | ||
372 | /* This is the real call to PadLock. */ | |
373 | static inline void | |
374 | padlock_xcrypt_ecb(uint8_t *input, uint8_t *output, uint8_t *key, | |
375 | void *control_word, uint32_t count) | |
376 | { | |
377 | asm volatile ("pushfl; popfl"); /* enforce key reload. */ | |
378 | asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ | |
379 | : "+S"(input), "+D"(output) | |
380 | : "d"(control_word), "b"(key), "c"(count)); | |
381 | } | |
382 | ||
383 | static void | |
384 | aes_padlock(void *ctx_arg, uint8_t *out_arg, const uint8_t *in_arg, int encdec) | |
385 | { | |
386 | /* Don't blindly modify this structure - the items must | |
387 | fit on 16-Bytes boundaries! */ | |
388 | struct padlock_xcrypt_data { | |
389 | uint8_t buf[AES_BLOCK_SIZE]; | |
390 | union cword cword; | |
391 | }; | |
392 | ||
393 | struct aes_ctx *ctx = ctx_arg; | |
394 | char bigbuf[sizeof(struct padlock_xcrypt_data) + 16]; | |
395 | struct padlock_xcrypt_data *data; | |
396 | void *key; | |
397 | ||
398 | /* Place 'data' at the first 16-Bytes aligned address in 'bigbuf'. */ | |
399 | if (((long)bigbuf) & 0x0F) | |
400 | data = (void*)(bigbuf + 16 - ((long)bigbuf & 0x0F)); | |
401 | else | |
402 | data = (void*)bigbuf; | |
403 | ||
404 | /* Prepare Control word. */ | |
405 | memset (data, 0, sizeof(struct padlock_xcrypt_data)); | |
406 | data->cword.b.encdec = !encdec; /* in the rest of cryptoapi ENC=1/DEC=0 */ | |
407 | data->cword.b.rounds = 10 + (ctx->key_length - 16) / 4; | |
408 | data->cword.b.ksize = (ctx->key_length - 16) / 8; | |
409 | ||
410 | /* Is the hardware capable to generate the extended key? */ | |
411 | if (!aes_hw_extkey_available(ctx->key_length)) | |
412 | data->cword.b.keygen = 1; | |
413 | ||
414 | /* ctx->E starts with a plain key - if the hardware is capable | |
415 | to generate the extended key itself we must supply | |
416 | the plain key for both Encryption and Decryption. */ | |
417 | if (encdec == CRYPTO_DIR_ENCRYPT || data->cword.b.keygen == 0) | |
418 | key = ctx->E; | |
419 | else | |
420 | key = ctx->D; | |
421 | ||
422 | memcpy(data->buf, in_arg, AES_BLOCK_SIZE); | |
423 | padlock_xcrypt_ecb(data->buf, data->buf, key, &data->cword, 1); | |
424 | memcpy(out_arg, data->buf, AES_BLOCK_SIZE); | |
425 | } | |
426 | ||
427 | static void | |
428 | aes_encrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) | |
429 | { | |
430 | aes_padlock(ctx_arg, out, in, CRYPTO_DIR_ENCRYPT); | |
431 | } | |
432 | ||
433 | static void | |
434 | aes_decrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) | |
435 | { | |
436 | aes_padlock(ctx_arg, out, in, CRYPTO_DIR_DECRYPT); | |
437 | } | |
438 | ||
439 | static struct crypto_alg aes_alg = { | |
440 | .cra_name = "aes", | |
441 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | |
442 | .cra_blocksize = AES_BLOCK_SIZE, | |
443 | .cra_ctxsize = sizeof(struct aes_ctx), | |
444 | .cra_module = THIS_MODULE, | |
445 | .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), | |
446 | .cra_u = { | |
447 | .cipher = { | |
448 | .cia_min_keysize = AES_MIN_KEY_SIZE, | |
449 | .cia_max_keysize = AES_MAX_KEY_SIZE, | |
450 | .cia_setkey = aes_set_key, | |
451 | .cia_encrypt = aes_encrypt, | |
452 | .cia_decrypt = aes_decrypt | |
453 | } | |
454 | } | |
455 | }; | |
456 | ||
457 | int __init padlock_init_aes(void) | |
458 | { | |
459 | printk(KERN_NOTICE PFX "Using VIA PadLock ACE for AES algorithm.\n"); | |
460 | ||
461 | gen_tabs(); | |
462 | return crypto_register_alg(&aes_alg); | |
463 | } | |
464 | ||
465 | void __exit padlock_fini_aes(void) | |
466 | { | |
467 | crypto_unregister_alg(&aes_alg); | |
468 | } |