2 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 /* included by aes-ce.S and aes-neon.S */
17 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
19 ENDPROC(aes_encrypt_block4x)
22 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
24 ENDPROC(aes_decrypt_block4x)
27 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
29 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
33 AES_ENTRY(aes_ecb_encrypt)
34 stp x29, x30, [sp, #-16]!
37 enc_prepare w3, x2, x5
42 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
43 bl aes_encrypt_block4x
44 st1 {v0.16b-v3.16b}, [x0], #64
50 ld1 {v0.16b}, [x1], #16 /* get next pt block */
51 encrypt_block v0, w3, x2, x5, w6
52 st1 {v0.16b}, [x0], #16
56 ldp x29, x30, [sp], #16
58 AES_ENDPROC(aes_ecb_encrypt)
61 AES_ENTRY(aes_ecb_decrypt)
62 stp x29, x30, [sp, #-16]!
65 dec_prepare w3, x2, x5
70 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
71 bl aes_decrypt_block4x
72 st1 {v0.16b-v3.16b}, [x0], #64
78 ld1 {v0.16b}, [x1], #16 /* get next ct block */
79 decrypt_block v0, w3, x2, x5, w6
80 st1 {v0.16b}, [x0], #16
84 ldp x29, x30, [sp], #16
86 AES_ENDPROC(aes_ecb_decrypt)
90 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
91 * int blocks, u8 iv[])
92 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
93 * int blocks, u8 iv[])
96 AES_ENTRY(aes_cbc_encrypt)
97 ld1 {v4.16b}, [x5] /* get iv */
98 enc_prepare w3, x2, x6
103 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
104 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
105 encrypt_block v0, w3, x2, x6, w7
106 eor v1.16b, v1.16b, v0.16b
107 encrypt_block v1, w3, x2, x6, w7
108 eor v2.16b, v2.16b, v1.16b
109 encrypt_block v2, w3, x2, x6, w7
110 eor v3.16b, v3.16b, v2.16b
111 encrypt_block v3, w3, x2, x6, w7
112 st1 {v0.16b-v3.16b}, [x0], #64
119 ld1 {v0.16b}, [x1], #16 /* get next pt block */
120 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
121 encrypt_block v4, w3, x2, x6, w7
122 st1 {v4.16b}, [x0], #16
126 st1 {v4.16b}, [x5] /* return iv */
128 AES_ENDPROC(aes_cbc_encrypt)
131 AES_ENTRY(aes_cbc_decrypt)
132 stp x29, x30, [sp, #-16]!
135 ld1 {v7.16b}, [x5] /* get iv */
136 dec_prepare w3, x2, x6
141 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
145 bl aes_decrypt_block4x
147 eor v0.16b, v0.16b, v7.16b
148 eor v1.16b, v1.16b, v4.16b
149 ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
150 eor v2.16b, v2.16b, v5.16b
151 eor v3.16b, v3.16b, v6.16b
152 st1 {v0.16b-v3.16b}, [x0], #64
158 ld1 {v1.16b}, [x1], #16 /* get next ct block */
159 mov v0.16b, v1.16b /* ...and copy to v0 */
160 decrypt_block v0, w3, x2, x6, w7
161 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
162 mov v7.16b, v1.16b /* ct is next iv */
163 st1 {v0.16b}, [x0], #16
167 st1 {v7.16b}, [x5] /* return iv */
168 ldp x29, x30, [sp], #16
170 AES_ENDPROC(aes_cbc_decrypt)
174 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
175 * int rounds, int bytes, u8 const iv[])
176 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
177 * int rounds, int bytes, u8 const iv[])
180 AES_ENTRY(aes_cbc_cts_encrypt)
181 adr_l x8, .Lcts_permute_table
189 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
192 ld1 {v5.16b}, [x5] /* get iv */
193 enc_prepare w3, x2, x6
195 eor v0.16b, v0.16b, v5.16b /* xor with iv */
196 tbl v1.16b, {v1.16b}, v4.16b
197 encrypt_block v0, w3, x2, x6, w7
199 eor v1.16b, v1.16b, v0.16b
200 tbl v0.16b, {v0.16b}, v3.16b
201 encrypt_block v1, w3, x2, x6, w7
204 st1 {v0.16b}, [x4] /* overlapping stores */
207 AES_ENDPROC(aes_cbc_cts_encrypt)
209 AES_ENTRY(aes_cbc_cts_decrypt)
210 adr_l x8, .Lcts_permute_table
218 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
221 ld1 {v5.16b}, [x5] /* get iv */
222 dec_prepare w3, x2, x6
224 tbl v2.16b, {v1.16b}, v4.16b
225 decrypt_block v0, w3, x2, x6, w7
226 eor v2.16b, v2.16b, v0.16b
228 tbx v0.16b, {v1.16b}, v4.16b
229 tbl v2.16b, {v2.16b}, v3.16b
230 decrypt_block v0, w3, x2, x6, w7
231 eor v0.16b, v0.16b, v5.16b /* xor with iv */
234 st1 {v2.16b}, [x4] /* overlapping stores */
237 AES_ENDPROC(aes_cbc_cts_decrypt)
239 .section ".rodata", "a"
242 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
243 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
244 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
245 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
246 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
247 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
252 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
253 * int blocks, u8 ctr[])
256 AES_ENTRY(aes_ctr_encrypt)
257 stp x29, x30, [sp, #-16]!
260 enc_prepare w3, x2, x6
263 umov x6, v4.d[1] /* keep swabbed ctr in reg */
265 cmn w6, w4 /* 32 bit overflow? */
283 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
284 bl aes_encrypt_block4x
285 eor v0.16b, v5.16b, v0.16b
286 ld1 {v5.16b}, [x1], #16 /* get 1 input block */
287 eor v1.16b, v6.16b, v1.16b
288 eor v2.16b, v7.16b, v2.16b
289 eor v3.16b, v5.16b, v3.16b
290 st1 {v0.16b-v3.16b}, [x0], #64
301 encrypt_block v0, w3, x2, x8, w7
303 adds x6, x6, #1 /* increment BE ctr */
306 bcs .Lctrcarry /* overflow? */
310 bmi .Lctrtailblock /* blocks <0 means tail block */
311 ld1 {v3.16b}, [x1], #16
312 eor v3.16b, v0.16b, v3.16b
313 st1 {v3.16b}, [x0], #16
317 st1 {v4.16b}, [x5] /* return next CTR value */
318 ldp x29, x30, [sp], #16
323 ldp x29, x30, [sp], #16
327 umov x7, v4.d[0] /* load upper word of ctr */
328 rev x7, x7 /* ... to handle the carry */
333 AES_ENDPROC(aes_ctr_encrypt)
337 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
338 * int blocks, u8 const rk2[], u8 iv[], int first)
339 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
340 * int blocks, u8 const rk2[], u8 iv[], int first)
343 .macro next_tweak, out, in, tmp
344 sshr \tmp\().2d, \in\().2d, #63
345 and \tmp\().16b, \tmp\().16b, xtsmask.16b
346 add \out\().2d, \in\().2d, \in\().2d
347 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
348 eor \out\().16b, \out\().16b, \tmp\().16b
351 .macro xts_load_mask, tmp
352 movi xtsmask.2s, #0x1
353 movi \tmp\().2s, #0x87
354 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
357 AES_ENTRY(aes_xts_encrypt)
358 stp x29, x30, [sp, #-16]!
363 cbz w7, .Lxtsencnotfirst
365 enc_prepare w3, x5, x8
366 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
367 enc_switch_key w3, x2, x8
371 enc_prepare w3, x2, x8
373 next_tweak v4, v4, v8
377 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
378 next_tweak v5, v4, v8
379 eor v0.16b, v0.16b, v4.16b
380 next_tweak v6, v5, v8
381 eor v1.16b, v1.16b, v5.16b
382 eor v2.16b, v2.16b, v6.16b
383 next_tweak v7, v6, v8
384 eor v3.16b, v3.16b, v7.16b
385 bl aes_encrypt_block4x
386 eor v3.16b, v3.16b, v7.16b
387 eor v0.16b, v0.16b, v4.16b
388 eor v1.16b, v1.16b, v5.16b
389 eor v2.16b, v2.16b, v6.16b
390 st1 {v0.16b-v3.16b}, [x0], #64
399 ld1 {v1.16b}, [x1], #16
400 eor v0.16b, v1.16b, v4.16b
401 encrypt_block v0, w3, x2, x8, w7
402 eor v0.16b, v0.16b, v4.16b
403 st1 {v0.16b}, [x0], #16
406 next_tweak v4, v4, v8
410 ldp x29, x30, [sp], #16
412 AES_ENDPROC(aes_xts_encrypt)
415 AES_ENTRY(aes_xts_decrypt)
416 stp x29, x30, [sp, #-16]!
421 cbz w7, .Lxtsdecnotfirst
423 enc_prepare w3, x5, x8
424 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
425 dec_prepare w3, x2, x8
429 dec_prepare w3, x2, x8
431 next_tweak v4, v4, v8
435 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
436 next_tweak v5, v4, v8
437 eor v0.16b, v0.16b, v4.16b
438 next_tweak v6, v5, v8
439 eor v1.16b, v1.16b, v5.16b
440 eor v2.16b, v2.16b, v6.16b
441 next_tweak v7, v6, v8
442 eor v3.16b, v3.16b, v7.16b
443 bl aes_decrypt_block4x
444 eor v3.16b, v3.16b, v7.16b
445 eor v0.16b, v0.16b, v4.16b
446 eor v1.16b, v1.16b, v5.16b
447 eor v2.16b, v2.16b, v6.16b
448 st1 {v0.16b-v3.16b}, [x0], #64
457 ld1 {v1.16b}, [x1], #16
458 eor v0.16b, v1.16b, v4.16b
459 decrypt_block v0, w3, x2, x8, w7
460 eor v0.16b, v0.16b, v4.16b
461 st1 {v0.16b}, [x0], #16
464 next_tweak v4, v4, v8
468 ldp x29, x30, [sp], #16
470 AES_ENDPROC(aes_xts_decrypt)
473 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
474 * int blocks, u8 dg[], int enc_before, int enc_after)
476 AES_ENTRY(aes_mac_update)
486 ld1 {v0.16b}, [x23] /* get dg */
487 enc_prepare w2, x1, x7
490 encrypt_block v0, w2, x1, x7, w8
495 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
496 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
497 encrypt_block v0, w21, x20, x7, w8
498 eor v0.16b, v0.16b, v2.16b
499 encrypt_block v0, w21, x20, x7, w8
500 eor v0.16b, v0.16b, v3.16b
501 encrypt_block v0, w21, x20, x7, w8
502 eor v0.16b, v0.16b, v4.16b
504 csinv x5, x24, xzr, eq
506 encrypt_block v0, w21, x20, x7, w8
507 st1 {v0.16b}, [x23] /* return dg */
508 cond_yield_neon .Lmacrestart
514 ld1 {v1.16b}, [x19], #16 /* get next pt block */
515 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
518 csinv x5, x24, xzr, eq
522 encrypt_block v0, w21, x20, x7, w8
526 st1 {v0.16b}, [x23] /* return dg */
531 ld1 {v0.16b}, [x23] /* get dg */
532 enc_prepare w21, x20, x0
534 AES_ENDPROC(aes_mac_update)