Commit | Line | Data |
---|---|---|
86464859 AB |
1 | /* |
2 | * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions | |
3 | * | |
4 | * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 as | |
8 | * published by the Free Software Foundation. | |
9 | */ | |
10 | ||
11 | #include <linux/linkage.h> | |
12 | #include <asm/assembler.h> | |
13 | ||
14 | .text | |
15 | .fpu crypto-neon-fp-armv8 | |
16 | .align 3 | |
17 | ||
18 | .macro enc_round, state, key | |
19 | aese.8 \state, \key | |
20 | aesmc.8 \state, \state | |
21 | .endm | |
22 | ||
23 | .macro dec_round, state, key | |
24 | aesd.8 \state, \key | |
25 | aesimc.8 \state, \state | |
26 | .endm | |
27 | ||
28 | .macro enc_dround, key1, key2 | |
29 | enc_round q0, \key1 | |
30 | enc_round q0, \key2 | |
31 | .endm | |
32 | ||
33 | .macro dec_dround, key1, key2 | |
34 | dec_round q0, \key1 | |
35 | dec_round q0, \key2 | |
36 | .endm | |
37 | ||
38 | .macro enc_fround, key1, key2, key3 | |
39 | enc_round q0, \key1 | |
40 | aese.8 q0, \key2 | |
41 | veor q0, q0, \key3 | |
42 | .endm | |
43 | ||
44 | .macro dec_fround, key1, key2, key3 | |
45 | dec_round q0, \key1 | |
46 | aesd.8 q0, \key2 | |
47 | veor q0, q0, \key3 | |
48 | .endm | |
49 | ||
50 | .macro enc_dround_3x, key1, key2 | |
51 | enc_round q0, \key1 | |
52 | enc_round q1, \key1 | |
53 | enc_round q2, \key1 | |
54 | enc_round q0, \key2 | |
55 | enc_round q1, \key2 | |
56 | enc_round q2, \key2 | |
57 | .endm | |
58 | ||
59 | .macro dec_dround_3x, key1, key2 | |
60 | dec_round q0, \key1 | |
61 | dec_round q1, \key1 | |
62 | dec_round q2, \key1 | |
63 | dec_round q0, \key2 | |
64 | dec_round q1, \key2 | |
65 | dec_round q2, \key2 | |
66 | .endm | |
67 | ||
68 | .macro enc_fround_3x, key1, key2, key3 | |
69 | enc_round q0, \key1 | |
70 | enc_round q1, \key1 | |
71 | enc_round q2, \key1 | |
72 | aese.8 q0, \key2 | |
73 | aese.8 q1, \key2 | |
74 | aese.8 q2, \key2 | |
75 | veor q0, q0, \key3 | |
76 | veor q1, q1, \key3 | |
77 | veor q2, q2, \key3 | |
78 | .endm | |
79 | ||
80 | .macro dec_fround_3x, key1, key2, key3 | |
81 | dec_round q0, \key1 | |
82 | dec_round q1, \key1 | |
83 | dec_round q2, \key1 | |
84 | aesd.8 q0, \key2 | |
85 | aesd.8 q1, \key2 | |
86 | aesd.8 q2, \key2 | |
87 | veor q0, q0, \key3 | |
88 | veor q1, q1, \key3 | |
89 | veor q2, q2, \key3 | |
90 | .endm | |
91 | ||
92 | .macro do_block, dround, fround | |
93 | cmp r3, #12 @ which key size? | |
94 | vld1.8 {q10-q11}, [ip]! | |
95 | \dround q8, q9 | |
96 | vld1.8 {q12-q13}, [ip]! | |
97 | \dround q10, q11 | |
98 | vld1.8 {q10-q11}, [ip]! | |
99 | \dround q12, q13 | |
100 | vld1.8 {q12-q13}, [ip]! | |
101 | \dround q10, q11 | |
102 | blo 0f @ AES-128: 10 rounds | |
103 | vld1.8 {q10-q11}, [ip]! | |
86464859 | 104 | \dround q12, q13 |
6499e8cf | 105 | beq 1f @ AES-192: 12 rounds |
86464859 AB |
106 | vld1.8 {q12-q13}, [ip] |
107 | \dround q10, q11 | |
108 | 0: \fround q12, q13, q14 | |
109 | bx lr | |
110 | ||
6499e8cf | 111 | 1: \fround q10, q11, q14 |
86464859 AB |
112 | bx lr |
113 | .endm | |
114 | ||
115 | /* | |
116 | * Internal, non-AAPCS compliant functions that implement the core AES | |
117 | * transforms. These should preserve all registers except q0 - q2 and ip | |
118 | * Arguments: | |
119 | * q0 : first in/output block | |
120 | * q1 : second in/output block (_3x version only) | |
121 | * q2 : third in/output block (_3x version only) | |
122 | * q8 : first round key | |
123 | * q9 : secound round key | |
86464859 | 124 | * q14 : final round key |
6499e8cf | 125 | * r2 : address of round key array |
86464859 AB |
126 | * r3 : number of rounds |
127 | */ | |
128 | .align 6 | |
129 | aes_encrypt: | |
130 | add ip, r2, #32 @ 3rd round key | |
131 | .Laes_encrypt_tweak: | |
132 | do_block enc_dround, enc_fround | |
133 | ENDPROC(aes_encrypt) | |
134 | ||
135 | .align 6 | |
136 | aes_decrypt: | |
137 | add ip, r2, #32 @ 3rd round key | |
138 | do_block dec_dround, dec_fround | |
139 | ENDPROC(aes_decrypt) | |
140 | ||
141 | .align 6 | |
142 | aes_encrypt_3x: | |
143 | add ip, r2, #32 @ 3rd round key | |
144 | do_block enc_dround_3x, enc_fround_3x | |
145 | ENDPROC(aes_encrypt_3x) | |
146 | ||
147 | .align 6 | |
148 | aes_decrypt_3x: | |
149 | add ip, r2, #32 @ 3rd round key | |
150 | do_block dec_dround_3x, dec_fround_3x | |
151 | ENDPROC(aes_decrypt_3x) | |
152 | ||
153 | .macro prepare_key, rk, rounds | |
154 | add ip, \rk, \rounds, lsl #4 | |
155 | vld1.8 {q8-q9}, [\rk] @ load first 2 round keys | |
156 | vld1.8 {q14}, [ip] @ load last round key | |
157 | .endm | |
158 | ||
159 | /* | |
160 | * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
161 | * int blocks) | |
162 | * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
163 | * int blocks) | |
164 | */ | |
165 | ENTRY(ce_aes_ecb_encrypt) | |
166 | push {r4, lr} | |
167 | ldr r4, [sp, #8] | |
168 | prepare_key r2, r3 | |
169 | .Lecbencloop3x: | |
170 | subs r4, r4, #3 | |
171 | bmi .Lecbenc1x | |
1465fb13 AB |
172 | vld1.8 {q0-q1}, [r1]! |
173 | vld1.8 {q2}, [r1]! | |
86464859 | 174 | bl aes_encrypt_3x |
1465fb13 AB |
175 | vst1.8 {q0-q1}, [r0]! |
176 | vst1.8 {q2}, [r0]! | |
86464859 AB |
177 | b .Lecbencloop3x |
178 | .Lecbenc1x: | |
179 | adds r4, r4, #3 | |
180 | beq .Lecbencout | |
181 | .Lecbencloop: | |
1465fb13 | 182 | vld1.8 {q0}, [r1]! |
86464859 | 183 | bl aes_encrypt |
1465fb13 | 184 | vst1.8 {q0}, [r0]! |
86464859 AB |
185 | subs r4, r4, #1 |
186 | bne .Lecbencloop | |
187 | .Lecbencout: | |
188 | pop {r4, pc} | |
189 | ENDPROC(ce_aes_ecb_encrypt) | |
190 | ||
191 | ENTRY(ce_aes_ecb_decrypt) | |
192 | push {r4, lr} | |
193 | ldr r4, [sp, #8] | |
194 | prepare_key r2, r3 | |
195 | .Lecbdecloop3x: | |
196 | subs r4, r4, #3 | |
197 | bmi .Lecbdec1x | |
1465fb13 AB |
198 | vld1.8 {q0-q1}, [r1]! |
199 | vld1.8 {q2}, [r1]! | |
86464859 | 200 | bl aes_decrypt_3x |
1465fb13 AB |
201 | vst1.8 {q0-q1}, [r0]! |
202 | vst1.8 {q2}, [r0]! | |
86464859 AB |
203 | b .Lecbdecloop3x |
204 | .Lecbdec1x: | |
205 | adds r4, r4, #3 | |
206 | beq .Lecbdecout | |
207 | .Lecbdecloop: | |
1465fb13 | 208 | vld1.8 {q0}, [r1]! |
86464859 | 209 | bl aes_decrypt |
1465fb13 | 210 | vst1.8 {q0}, [r0]! |
86464859 AB |
211 | subs r4, r4, #1 |
212 | bne .Lecbdecloop | |
213 | .Lecbdecout: | |
214 | pop {r4, pc} | |
215 | ENDPROC(ce_aes_ecb_decrypt) | |
216 | ||
217 | /* | |
218 | * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
219 | * int blocks, u8 iv[]) | |
220 | * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
221 | * int blocks, u8 iv[]) | |
222 | */ | |
223 | ENTRY(ce_aes_cbc_encrypt) | |
224 | push {r4-r6, lr} | |
225 | ldrd r4, r5, [sp, #16] | |
226 | vld1.8 {q0}, [r5] | |
227 | prepare_key r2, r3 | |
228 | .Lcbcencloop: | |
1465fb13 | 229 | vld1.8 {q1}, [r1]! @ get next pt block |
86464859 AB |
230 | veor q0, q0, q1 @ ..and xor with iv |
231 | bl aes_encrypt | |
1465fb13 | 232 | vst1.8 {q0}, [r0]! |
86464859 AB |
233 | subs r4, r4, #1 |
234 | bne .Lcbcencloop | |
235 | vst1.8 {q0}, [r5] | |
236 | pop {r4-r6, pc} | |
237 | ENDPROC(ce_aes_cbc_encrypt) | |
238 | ||
239 | ENTRY(ce_aes_cbc_decrypt) | |
240 | push {r4-r6, lr} | |
241 | ldrd r4, r5, [sp, #16] | |
242 | vld1.8 {q6}, [r5] @ keep iv in q6 | |
243 | prepare_key r2, r3 | |
244 | .Lcbcdecloop3x: | |
245 | subs r4, r4, #3 | |
246 | bmi .Lcbcdec1x | |
1465fb13 AB |
247 | vld1.8 {q0-q1}, [r1]! |
248 | vld1.8 {q2}, [r1]! | |
86464859 AB |
249 | vmov q3, q0 |
250 | vmov q4, q1 | |
251 | vmov q5, q2 | |
252 | bl aes_decrypt_3x | |
253 | veor q0, q0, q6 | |
254 | veor q1, q1, q3 | |
255 | veor q2, q2, q4 | |
256 | vmov q6, q5 | |
1465fb13 AB |
257 | vst1.8 {q0-q1}, [r0]! |
258 | vst1.8 {q2}, [r0]! | |
86464859 AB |
259 | b .Lcbcdecloop3x |
260 | .Lcbcdec1x: | |
261 | adds r4, r4, #3 | |
262 | beq .Lcbcdecout | |
263 | vmov q15, q14 @ preserve last round key | |
264 | .Lcbcdecloop: | |
1465fb13 | 265 | vld1.8 {q0}, [r1]! @ get next ct block |
86464859 AB |
266 | veor q14, q15, q6 @ combine prev ct with last key |
267 | vmov q6, q0 | |
268 | bl aes_decrypt | |
1465fb13 | 269 | vst1.8 {q0}, [r0]! |
86464859 AB |
270 | subs r4, r4, #1 |
271 | bne .Lcbcdecloop | |
272 | .Lcbcdecout: | |
273 | vst1.8 {q6}, [r5] @ keep iv in q6 | |
274 | pop {r4-r6, pc} | |
275 | ENDPROC(ce_aes_cbc_decrypt) | |
276 | ||
277 | /* | |
278 | * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
279 | * int blocks, u8 ctr[]) | |
280 | */ | |
281 | ENTRY(ce_aes_ctr_encrypt) | |
282 | push {r4-r6, lr} | |
283 | ldrd r4, r5, [sp, #16] | |
284 | vld1.8 {q6}, [r5] @ load ctr | |
285 | prepare_key r2, r3 | |
286 | vmov r6, s27 @ keep swabbed ctr in r6 | |
287 | rev r6, r6 | |
288 | cmn r6, r4 @ 32 bit overflow? | |
289 | bcs .Lctrloop | |
290 | .Lctrloop3x: | |
291 | subs r4, r4, #3 | |
292 | bmi .Lctr1x | |
293 | add r6, r6, #1 | |
294 | vmov q0, q6 | |
295 | vmov q1, q6 | |
296 | rev ip, r6 | |
297 | add r6, r6, #1 | |
298 | vmov q2, q6 | |
299 | vmov s7, ip | |
300 | rev ip, r6 | |
301 | add r6, r6, #1 | |
302 | vmov s11, ip | |
1465fb13 AB |
303 | vld1.8 {q3-q4}, [r1]! |
304 | vld1.8 {q5}, [r1]! | |
86464859 AB |
305 | bl aes_encrypt_3x |
306 | veor q0, q0, q3 | |
307 | veor q1, q1, q4 | |
308 | veor q2, q2, q5 | |
309 | rev ip, r6 | |
1465fb13 AB |
310 | vst1.8 {q0-q1}, [r0]! |
311 | vst1.8 {q2}, [r0]! | |
86464859 AB |
312 | vmov s27, ip |
313 | b .Lctrloop3x | |
314 | .Lctr1x: | |
315 | adds r4, r4, #3 | |
316 | beq .Lctrout | |
317 | .Lctrloop: | |
318 | vmov q0, q6 | |
319 | bl aes_encrypt | |
320 | subs r4, r4, #1 | |
1465fb13 AB |
321 | bmi .Lctrtailblock @ blocks < 0 means tail block |
322 | vld1.8 {q3}, [r1]! | |
86464859 | 323 | veor q3, q0, q3 |
1465fb13 | 324 | vst1.8 {q3}, [r0]! |
86464859 AB |
325 | |
326 | adds r6, r6, #1 @ increment BE ctr | |
327 | rev ip, r6 | |
328 | vmov s27, ip | |
329 | bcs .Lctrcarry | |
330 | teq r4, #0 | |
331 | bne .Lctrloop | |
332 | .Lctrout: | |
333 | vst1.8 {q6}, [r5] | |
334 | pop {r4-r6, pc} | |
335 | ||
1465fb13 AB |
336 | .Lctrtailblock: |
337 | vst1.8 {q0}, [r0, :64] @ return just the key stream | |
86464859 AB |
338 | pop {r4-r6, pc} |
339 | ||
340 | .Lctrcarry: | |
341 | .irp sreg, s26, s25, s24 | |
342 | vmov ip, \sreg @ load next word of ctr | |
343 | rev ip, ip @ ... to handle the carry | |
344 | adds ip, ip, #1 | |
345 | rev ip, ip | |
346 | vmov \sreg, ip | |
347 | bcc 0f | |
348 | .endr | |
349 | 0: teq r4, #0 | |
350 | beq .Lctrout | |
351 | b .Lctrloop | |
352 | ENDPROC(ce_aes_ctr_encrypt) | |
353 | ||
354 | /* | |
355 | * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | |
356 | * int blocks, u8 iv[], u8 const rk2[], int first) | |
357 | * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | |
358 | * int blocks, u8 iv[], u8 const rk2[], int first) | |
359 | */ | |
360 | ||
361 | .macro next_tweak, out, in, const, tmp | |
362 | vshr.s64 \tmp, \in, #63 | |
363 | vand \tmp, \tmp, \const | |
364 | vadd.u64 \out, \in, \in | |
365 | vext.8 \tmp, \tmp, \tmp, #8 | |
366 | veor \out, \out, \tmp | |
367 | .endm | |
368 | ||
369 | .align 3 | |
370 | .Lxts_mul_x: | |
371 | .quad 1, 0x87 | |
372 | ||
373 | ce_aes_xts_init: | |
374 | vldr d14, .Lxts_mul_x | |
375 | vldr d15, .Lxts_mul_x + 8 | |
376 | ||
377 | ldrd r4, r5, [sp, #16] @ load args | |
378 | ldr r6, [sp, #28] | |
379 | vld1.8 {q0}, [r5] @ load iv | |
380 | teq r6, #1 @ start of a block? | |
381 | bxne lr | |
382 | ||
383 | @ Encrypt the IV in q0 with the second AES key. This should only | |
384 | @ be done at the start of a block. | |
385 | ldr r6, [sp, #24] @ load AES key 2 | |
386 | prepare_key r6, r3 | |
387 | add ip, r6, #32 @ 3rd round key of key 2 | |
388 | b .Laes_encrypt_tweak @ tail call | |
389 | ENDPROC(ce_aes_xts_init) | |
390 | ||
391 | ENTRY(ce_aes_xts_encrypt) | |
392 | push {r4-r6, lr} | |
393 | ||
394 | bl ce_aes_xts_init @ run shared prologue | |
395 | prepare_key r2, r3 | |
396 | vmov q3, q0 | |
397 | ||
398 | teq r6, #0 @ start of a block? | |
399 | bne .Lxtsenc3x | |
400 | ||
401 | .Lxtsencloop3x: | |
402 | next_tweak q3, q3, q7, q6 | |
403 | .Lxtsenc3x: | |
404 | subs r4, r4, #3 | |
405 | bmi .Lxtsenc1x | |
1465fb13 AB |
406 | vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks |
407 | vld1.8 {q2}, [r1]! | |
86464859 AB |
408 | next_tweak q4, q3, q7, q6 |
409 | veor q0, q0, q3 | |
410 | next_tweak q5, q4, q7, q6 | |
411 | veor q1, q1, q4 | |
412 | veor q2, q2, q5 | |
413 | bl aes_encrypt_3x | |
414 | veor q0, q0, q3 | |
415 | veor q1, q1, q4 | |
416 | veor q2, q2, q5 | |
1465fb13 AB |
417 | vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks |
418 | vst1.8 {q2}, [r0]! | |
86464859 AB |
419 | vmov q3, q5 |
420 | teq r4, #0 | |
421 | beq .Lxtsencout | |
422 | b .Lxtsencloop3x | |
423 | .Lxtsenc1x: | |
424 | adds r4, r4, #3 | |
425 | beq .Lxtsencout | |
426 | .Lxtsencloop: | |
1465fb13 | 427 | vld1.8 {q0}, [r1]! |
86464859 AB |
428 | veor q0, q0, q3 |
429 | bl aes_encrypt | |
430 | veor q0, q0, q3 | |
1465fb13 | 431 | vst1.8 {q0}, [r0]! |
86464859 AB |
432 | subs r4, r4, #1 |
433 | beq .Lxtsencout | |
434 | next_tweak q3, q3, q7, q6 | |
435 | b .Lxtsencloop | |
436 | .Lxtsencout: | |
437 | vst1.8 {q3}, [r5] | |
438 | pop {r4-r6, pc} | |
439 | ENDPROC(ce_aes_xts_encrypt) | |
440 | ||
441 | ||
442 | ENTRY(ce_aes_xts_decrypt) | |
443 | push {r4-r6, lr} | |
444 | ||
445 | bl ce_aes_xts_init @ run shared prologue | |
446 | prepare_key r2, r3 | |
447 | vmov q3, q0 | |
448 | ||
449 | teq r6, #0 @ start of a block? | |
450 | bne .Lxtsdec3x | |
451 | ||
452 | .Lxtsdecloop3x: | |
453 | next_tweak q3, q3, q7, q6 | |
454 | .Lxtsdec3x: | |
455 | subs r4, r4, #3 | |
456 | bmi .Lxtsdec1x | |
1465fb13 AB |
457 | vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks |
458 | vld1.8 {q2}, [r1]! | |
86464859 AB |
459 | next_tweak q4, q3, q7, q6 |
460 | veor q0, q0, q3 | |
461 | next_tweak q5, q4, q7, q6 | |
462 | veor q1, q1, q4 | |
463 | veor q2, q2, q5 | |
464 | bl aes_decrypt_3x | |
465 | veor q0, q0, q3 | |
466 | veor q1, q1, q4 | |
467 | veor q2, q2, q5 | |
1465fb13 AB |
468 | vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks |
469 | vst1.8 {q2}, [r0]! | |
86464859 AB |
470 | vmov q3, q5 |
471 | teq r4, #0 | |
472 | beq .Lxtsdecout | |
473 | b .Lxtsdecloop3x | |
474 | .Lxtsdec1x: | |
475 | adds r4, r4, #3 | |
476 | beq .Lxtsdecout | |
477 | .Lxtsdecloop: | |
1465fb13 | 478 | vld1.8 {q0}, [r1]! |
86464859 AB |
479 | veor q0, q0, q3 |
480 | add ip, r2, #32 @ 3rd round key | |
481 | bl aes_decrypt | |
482 | veor q0, q0, q3 | |
1465fb13 | 483 | vst1.8 {q0}, [r0]! |
86464859 AB |
484 | subs r4, r4, #1 |
485 | beq .Lxtsdecout | |
486 | next_tweak q3, q3, q7, q6 | |
487 | b .Lxtsdecloop | |
488 | .Lxtsdecout: | |
489 | vst1.8 {q3}, [r5] | |
490 | pop {r4-r6, pc} | |
491 | ENDPROC(ce_aes_xts_decrypt) | |
492 | ||
493 | /* | |
494 | * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the | |
495 | * AES sbox substitution on each byte in | |
496 | * 'input' | |
497 | */ | |
498 | ENTRY(ce_aes_sub) | |
499 | vdup.32 q1, r0 | |
500 | veor q0, q0, q0 | |
501 | aese.8 q0, q1 | |
502 | vmov r0, s0 | |
503 | bx lr | |
504 | ENDPROC(ce_aes_sub) | |
505 | ||
506 | /* | |
507 | * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns | |
508 | * operation on round key *src | |
509 | */ | |
510 | ENTRY(ce_aes_invert) | |
511 | vld1.8 {q0}, [r1] | |
512 | aesimc.8 q0, q0 | |
513 | vst1.8 {q0}, [r0] | |
514 | bx lr | |
515 | ENDPROC(ce_aes_invert) |