crypto: arm64/aes-cts-cbc-ce - performance tweak
[linux-2.6-block.git] / arch / arm64 / crypto / aes-modes.S
CommitLineData
d2912cb1 1/* SPDX-License-Identifier: GPL-2.0-only */
49788fe2
AB
2/*
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 *
4860620d 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
49788fe2
AB
6 */
7
8/* included by aes-ce.S and aes-neon.S */
9
10 .text
11 .align 4
12
e2174139
AB
13#ifndef MAX_STRIDE
14#define MAX_STRIDE 4
15#endif
16
7367bfeb
AB
17#if MAX_STRIDE == 4
18#define ST4(x...) x
19#define ST5(x...)
20#else
21#define ST4(x...)
22#define ST5(x...) x
23#endif
24
49788fe2 25aes_encrypt_block4x:
6e7de6af 26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
49788fe2
AB
27 ret
28ENDPROC(aes_encrypt_block4x)
29
30aes_decrypt_block4x:
6e7de6af 31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
49788fe2
AB
32 ret
33ENDPROC(aes_decrypt_block4x)
34
e2174139
AB
35#if MAX_STRIDE == 5
36aes_encrypt_block5x:
37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
38 ret
39ENDPROC(aes_encrypt_block5x)
40
41aes_decrypt_block5x:
42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
43 ret
44ENDPROC(aes_decrypt_block5x)
45#endif
46
49788fe2
AB
47 /*
48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 49 * int blocks)
49788fe2 50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 51 * int blocks)
49788fe2
AB
52 */
53
54AES_ENTRY(aes_ecb_encrypt)
6e7de6af
AB
55 stp x29, x30, [sp, #-16]!
56 mov x29, sp
49788fe2 57
6e7de6af 58 enc_prepare w3, x2, x5
49788fe2
AB
59
60.LecbencloopNx:
7367bfeb 61 subs w4, w4, #MAX_STRIDE
49788fe2 62 bmi .Lecbenc1x
6e7de6af 63 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
7367bfeb
AB
64ST4( bl aes_encrypt_block4x )
65ST5( ld1 {v4.16b}, [x1], #16 )
66ST5( bl aes_encrypt_block5x )
6e7de6af 67 st1 {v0.16b-v3.16b}, [x0], #64
7367bfeb 68ST5( st1 {v4.16b}, [x0], #16 )
49788fe2
AB
69 b .LecbencloopNx
70.Lecbenc1x:
7367bfeb 71 adds w4, w4, #MAX_STRIDE
49788fe2 72 beq .Lecbencout
49788fe2 73.Lecbencloop:
6e7de6af
AB
74 ld1 {v0.16b}, [x1], #16 /* get next pt block */
75 encrypt_block v0, w3, x2, x5, w6
76 st1 {v0.16b}, [x0], #16
77 subs w4, w4, #1
49788fe2
AB
78 bne .Lecbencloop
79.Lecbencout:
6e7de6af 80 ldp x29, x30, [sp], #16
49788fe2
AB
81 ret
82AES_ENDPROC(aes_ecb_encrypt)
83
84
85AES_ENTRY(aes_ecb_decrypt)
6e7de6af
AB
86 stp x29, x30, [sp, #-16]!
87 mov x29, sp
0c8f838a 88
6e7de6af 89 dec_prepare w3, x2, x5
49788fe2
AB
90
91.LecbdecloopNx:
7367bfeb 92 subs w4, w4, #MAX_STRIDE
49788fe2 93 bmi .Lecbdec1x
6e7de6af 94 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
7367bfeb
AB
95ST4( bl aes_decrypt_block4x )
96ST5( ld1 {v4.16b}, [x1], #16 )
97ST5( bl aes_decrypt_block5x )
6e7de6af 98 st1 {v0.16b-v3.16b}, [x0], #64
7367bfeb 99ST5( st1 {v4.16b}, [x0], #16 )
49788fe2
AB
100 b .LecbdecloopNx
101.Lecbdec1x:
7367bfeb 102 adds w4, w4, #MAX_STRIDE
49788fe2 103 beq .Lecbdecout
49788fe2 104.Lecbdecloop:
6e7de6af
AB
105 ld1 {v0.16b}, [x1], #16 /* get next ct block */
106 decrypt_block v0, w3, x2, x5, w6
107 st1 {v0.16b}, [x0], #16
108 subs w4, w4, #1
49788fe2
AB
109 bne .Lecbdecloop
110.Lecbdecout:
6e7de6af 111 ldp x29, x30, [sp], #16
49788fe2
AB
112 ret
113AES_ENDPROC(aes_ecb_decrypt)
114
115
116 /*
117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 118 * int blocks, u8 iv[])
49788fe2 119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 120 * int blocks, u8 iv[])
735177ca
AB
121 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
122 * int rounds, int blocks, u8 iv[],
123 * u32 const rk2[]);
124 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
125 * int rounds, int blocks, u8 iv[],
126 * u32 const rk2[]);
49788fe2
AB
127 */
128
735177ca
AB
129AES_ENTRY(aes_essiv_cbc_encrypt)
130 ld1 {v4.16b}, [x5] /* get iv */
131
132 mov w8, #14 /* AES-256: 14 rounds */
133 enc_prepare w8, x6, x7
134 encrypt_block v4, w8, x6, x7, w9
135 enc_switch_key w3, x2, x6
136 b .Lcbcencloop4x
137
49788fe2 138AES_ENTRY(aes_cbc_encrypt)
6e7de6af
AB
139 ld1 {v4.16b}, [x5] /* get iv */
140 enc_prepare w3, x2, x6
49788fe2 141
a8f8a69e 142.Lcbcencloop4x:
6e7de6af 143 subs w4, w4, #4
a8f8a69e 144 bmi .Lcbcenc1x
6e7de6af 145 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
a8f8a69e 146 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
6e7de6af 147 encrypt_block v0, w3, x2, x6, w7
a8f8a69e 148 eor v1.16b, v1.16b, v0.16b
6e7de6af 149 encrypt_block v1, w3, x2, x6, w7
a8f8a69e 150 eor v2.16b, v2.16b, v1.16b
6e7de6af 151 encrypt_block v2, w3, x2, x6, w7
a8f8a69e 152 eor v3.16b, v3.16b, v2.16b
6e7de6af
AB
153 encrypt_block v3, w3, x2, x6, w7
154 st1 {v0.16b-v3.16b}, [x0], #64
a8f8a69e
AB
155 mov v4.16b, v3.16b
156 b .Lcbcencloop4x
157.Lcbcenc1x:
6e7de6af 158 adds w4, w4, #4
a8f8a69e
AB
159 beq .Lcbcencout
160.Lcbcencloop:
6e7de6af 161 ld1 {v0.16b}, [x1], #16 /* get next pt block */
a8f8a69e 162 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
6e7de6af
AB
163 encrypt_block v4, w3, x2, x6, w7
164 st1 {v4.16b}, [x0], #16
165 subs w4, w4, #1
49788fe2 166 bne .Lcbcencloop
a8f8a69e 167.Lcbcencout:
6e7de6af 168 st1 {v4.16b}, [x5] /* return iv */
49788fe2
AB
169 ret
170AES_ENDPROC(aes_cbc_encrypt)
735177ca
AB
171AES_ENDPROC(aes_essiv_cbc_encrypt)
172
173AES_ENTRY(aes_essiv_cbc_decrypt)
174 stp x29, x30, [sp, #-16]!
175 mov x29, sp
176
177 ld1 {cbciv.16b}, [x5] /* get iv */
49788fe2 178
735177ca
AB
179 mov w8, #14 /* AES-256: 14 rounds */
180 enc_prepare w8, x6, x7
181 encrypt_block cbciv, w8, x6, x7, w9
182 b .Lessivcbcdecstart
49788fe2
AB
183
184AES_ENTRY(aes_cbc_decrypt)
6e7de6af
AB
185 stp x29, x30, [sp, #-16]!
186 mov x29, sp
49788fe2 187
7367bfeb 188 ld1 {cbciv.16b}, [x5] /* get iv */
735177ca 189.Lessivcbcdecstart:
6e7de6af 190 dec_prepare w3, x2, x6
49788fe2
AB
191
192.LcbcdecloopNx:
7367bfeb 193 subs w4, w4, #MAX_STRIDE
49788fe2 194 bmi .Lcbcdec1x
6e7de6af 195 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
7367bfeb
AB
196#if MAX_STRIDE == 5
197 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
198 mov v5.16b, v0.16b
199 mov v6.16b, v1.16b
200 mov v7.16b, v2.16b
201 bl aes_decrypt_block5x
202 sub x1, x1, #32
203 eor v0.16b, v0.16b, cbciv.16b
204 eor v1.16b, v1.16b, v5.16b
205 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
206 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
207 eor v2.16b, v2.16b, v6.16b
208 eor v3.16b, v3.16b, v7.16b
209 eor v4.16b, v4.16b, v5.16b
210#else
49788fe2
AB
211 mov v4.16b, v0.16b
212 mov v5.16b, v1.16b
213 mov v6.16b, v2.16b
55868b45 214 bl aes_decrypt_block4x
6e7de6af 215 sub x1, x1, #16
7367bfeb 216 eor v0.16b, v0.16b, cbciv.16b
49788fe2 217 eor v1.16b, v1.16b, v4.16b
7367bfeb 218 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
49788fe2
AB
219 eor v2.16b, v2.16b, v5.16b
220 eor v3.16b, v3.16b, v6.16b
7367bfeb 221#endif
6e7de6af 222 st1 {v0.16b-v3.16b}, [x0], #64
7367bfeb 223ST5( st1 {v4.16b}, [x0], #16 )
49788fe2
AB
224 b .LcbcdecloopNx
225.Lcbcdec1x:
7367bfeb 226 adds w4, w4, #MAX_STRIDE
49788fe2 227 beq .Lcbcdecout
49788fe2 228.Lcbcdecloop:
6e7de6af 229 ld1 {v1.16b}, [x1], #16 /* get next ct block */
49788fe2 230 mov v0.16b, v1.16b /* ...and copy to v0 */
6e7de6af 231 decrypt_block v0, w3, x2, x6, w7
7367bfeb
AB
232 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
233 mov cbciv.16b, v1.16b /* ct is next iv */
6e7de6af
AB
234 st1 {v0.16b}, [x0], #16
235 subs w4, w4, #1
49788fe2
AB
236 bne .Lcbcdecloop
237.Lcbcdecout:
7367bfeb 238 st1 {cbciv.16b}, [x5] /* return iv */
6e7de6af 239 ldp x29, x30, [sp], #16
49788fe2
AB
240 ret
241AES_ENDPROC(aes_cbc_decrypt)
735177ca 242AES_ENDPROC(aes_essiv_cbc_decrypt)
49788fe2
AB
243
244
dd597fb3
AB
245 /*
246 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
247 * int rounds, int bytes, u8 const iv[])
248 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
249 * int rounds, int bytes, u8 const iv[])
250 */
251
252AES_ENTRY(aes_cbc_cts_encrypt)
253 adr_l x8, .Lcts_permute_table
254 sub x4, x4, #16
255 add x9, x8, #32
256 add x8, x8, x4
257 sub x9, x9, x4
258 ld1 {v3.16b}, [x8]
259 ld1 {v4.16b}, [x9]
260
261 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
262 ld1 {v1.16b}, [x1]
263
264 ld1 {v5.16b}, [x5] /* get iv */
265 enc_prepare w3, x2, x6
266
267 eor v0.16b, v0.16b, v5.16b /* xor with iv */
268 tbl v1.16b, {v1.16b}, v4.16b
269 encrypt_block v0, w3, x2, x6, w7
270
271 eor v1.16b, v1.16b, v0.16b
272 tbl v0.16b, {v0.16b}, v3.16b
273 encrypt_block v1, w3, x2, x6, w7
274
275 add x4, x0, x4
276 st1 {v0.16b}, [x4] /* overlapping stores */
277 st1 {v1.16b}, [x0]
278 ret
279AES_ENDPROC(aes_cbc_cts_encrypt)
280
281AES_ENTRY(aes_cbc_cts_decrypt)
282 adr_l x8, .Lcts_permute_table
283 sub x4, x4, #16
284 add x9, x8, #32
285 add x8, x8, x4
286 sub x9, x9, x4
287 ld1 {v3.16b}, [x8]
288 ld1 {v4.16b}, [x9]
289
290 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
291 ld1 {v1.16b}, [x1]
292
293 ld1 {v5.16b}, [x5] /* get iv */
294 dec_prepare w3, x2, x6
295
dd597fb3 296 decrypt_block v0, w3, x2, x6, w7
0cfd507c
AB
297 tbl v2.16b, {v0.16b}, v3.16b
298 eor v2.16b, v2.16b, v1.16b
dd597fb3
AB
299
300 tbx v0.16b, {v1.16b}, v4.16b
dd597fb3
AB
301 decrypt_block v0, w3, x2, x6, w7
302 eor v0.16b, v0.16b, v5.16b /* xor with iv */
303
304 add x4, x0, x4
305 st1 {v2.16b}, [x4] /* overlapping stores */
306 st1 {v0.16b}, [x0]
307 ret
308AES_ENDPROC(aes_cbc_cts_decrypt)
309
310 .section ".rodata", "a"
311 .align 6
312.Lcts_permute_table:
313 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
314 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
315 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
316 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
317 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
318 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
319 .previous
320
321
49788fe2
AB
322 /*
323 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 324 * int blocks, u8 ctr[])
49788fe2
AB
325 */
326
327AES_ENTRY(aes_ctr_encrypt)
6e7de6af
AB
328 stp x29, x30, [sp, #-16]!
329 mov x29, sp
68338174 330
6e7de6af 331 enc_prepare w3, x2, x6
7367bfeb 332 ld1 {vctr.16b}, [x5]
11e3b725 333
7367bfeb 334 umov x6, vctr.d[1] /* keep swabbed ctr in reg */
68338174 335 rev x6, x6
6e7de6af
AB
336 cmn w6, w4 /* 32 bit overflow? */
337 bcs .Lctrloop
49788fe2 338.LctrloopNx:
7367bfeb 339 subs w4, w4, #MAX_STRIDE
49788fe2 340 bmi .Lctr1x
ed6ed118 341 add w7, w6, #1
7367bfeb 342 mov v0.16b, vctr.16b
ed6ed118 343 add w8, w6, #2
7367bfeb
AB
344 mov v1.16b, vctr.16b
345 add w9, w6, #3
346 mov v2.16b, vctr.16b
ed6ed118 347 add w9, w6, #3
ed6ed118 348 rev w7, w7
7367bfeb 349 mov v3.16b, vctr.16b
ed6ed118 350 rev w8, w8
7367bfeb 351ST5( mov v4.16b, vctr.16b )
ed6ed118
AB
352 mov v1.s[3], w7
353 rev w9, w9
7367bfeb 354ST5( add w10, w6, #4 )
ed6ed118 355 mov v2.s[3], w8
7367bfeb 356ST5( rev w10, w10 )
ed6ed118 357 mov v3.s[3], w9
7367bfeb 358ST5( mov v4.s[3], w10 )
6e7de6af 359 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
7367bfeb
AB
360ST4( bl aes_encrypt_block4x )
361ST5( bl aes_encrypt_block5x )
49788fe2 362 eor v0.16b, v5.16b, v0.16b
7367bfeb 363ST4( ld1 {v5.16b}, [x1], #16 )
49788fe2 364 eor v1.16b, v6.16b, v1.16b
7367bfeb 365ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
49788fe2
AB
366 eor v2.16b, v7.16b, v2.16b
367 eor v3.16b, v5.16b, v3.16b
7367bfeb 368ST5( eor v4.16b, v6.16b, v4.16b )
6e7de6af 369 st1 {v0.16b-v3.16b}, [x0], #64
7367bfeb
AB
370ST5( st1 {v4.16b}, [x0], #16 )
371 add x6, x6, #MAX_STRIDE
68338174 372 rev x7, x6
7367bfeb 373 ins vctr.d[1], x7
6e7de6af 374 cbz w4, .Lctrout
49788fe2 375 b .LctrloopNx
49788fe2 376.Lctr1x:
7367bfeb 377 adds w4, w4, #MAX_STRIDE
49788fe2 378 beq .Lctrout
49788fe2 379.Lctrloop:
7367bfeb 380 mov v0.16b, vctr.16b
6e7de6af 381 encrypt_block v0, w3, x2, x8, w7
11e3b725 382
68338174
AB
383 adds x6, x6, #1 /* increment BE ctr */
384 rev x7, x6
7367bfeb 385 ins vctr.d[1], x7
11e3b725
AB
386 bcs .Lctrcarry /* overflow? */
387
388.Lctrcarrydone:
6e7de6af 389 subs w4, w4, #1
ccc5d51e 390 bmi .Lctrtailblock /* blocks <0 means tail block */
6e7de6af 391 ld1 {v3.16b}, [x1], #16
49788fe2 392 eor v3.16b, v0.16b, v3.16b
6e7de6af 393 st1 {v3.16b}, [x0], #16
11e3b725
AB
394 bne .Lctrloop
395
396.Lctrout:
7367bfeb 397 st1 {vctr.16b}, [x5] /* return next CTR value */
6e7de6af 398 ldp x29, x30, [sp], #16
11e3b725
AB
399 ret
400
ccc5d51e 401.Lctrtailblock:
6e7de6af 402 st1 {v0.16b}, [x0]
fa5fd3af 403 b .Lctrout
11e3b725
AB
404
405.Lctrcarry:
7367bfeb 406 umov x7, vctr.d[0] /* load upper word of ctr */
11e3b725
AB
407 rev x7, x7 /* ... to handle the carry */
408 add x7, x7, #1
409 rev x7, x7
7367bfeb 410 ins vctr.d[0], x7
11e3b725 411 b .Lctrcarrydone
49788fe2 412AES_ENDPROC(aes_ctr_encrypt)
49788fe2
AB
413
414
415 /*
416 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
417 * int blocks, u8 const rk2[], u8 iv[], int first)
418 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
419 * int blocks, u8 const rk2[], u8 iv[], int first)
420 */
421
2e5d2f33 422 .macro next_tweak, out, in, tmp
49788fe2 423 sshr \tmp\().2d, \in\().2d, #63
2e5d2f33 424 and \tmp\().16b, \tmp\().16b, xtsmask.16b
49788fe2
AB
425 add \out\().2d, \in\().2d, \in\().2d
426 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
427 eor \out\().16b, \out\().16b, \tmp\().16b
428 .endm
429
2e5d2f33
AB
430 .macro xts_load_mask, tmp
431 movi xtsmask.2s, #0x1
432 movi \tmp\().2s, #0x87
433 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
434 .endm
49788fe2
AB
435
436AES_ENTRY(aes_xts_encrypt)
6e7de6af
AB
437 stp x29, x30, [sp, #-16]!
438 mov x29, sp
55868b45 439
6e7de6af 440 ld1 {v4.16b}, [x6]
cc3cc489 441 xts_load_mask v8
68338174
AB
442 cbz w7, .Lxtsencnotfirst
443
444 enc_prepare w3, x5, x8
445 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
446 enc_switch_key w3, x2, x8
49788fe2
AB
447 b .LxtsencNx
448
68338174 449.Lxtsencnotfirst:
6e7de6af 450 enc_prepare w3, x2, x8
49788fe2 451.LxtsencloopNx:
2e5d2f33 452 next_tweak v4, v4, v8
49788fe2 453.LxtsencNx:
6e7de6af 454 subs w4, w4, #4
49788fe2 455 bmi .Lxtsenc1x
6e7de6af 456 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
2e5d2f33 457 next_tweak v5, v4, v8
49788fe2 458 eor v0.16b, v0.16b, v4.16b
2e5d2f33 459 next_tweak v6, v5, v8
49788fe2
AB
460 eor v1.16b, v1.16b, v5.16b
461 eor v2.16b, v2.16b, v6.16b
2e5d2f33 462 next_tweak v7, v6, v8
49788fe2 463 eor v3.16b, v3.16b, v7.16b
55868b45 464 bl aes_encrypt_block4x
49788fe2
AB
465 eor v3.16b, v3.16b, v7.16b
466 eor v0.16b, v0.16b, v4.16b
467 eor v1.16b, v1.16b, v5.16b
468 eor v2.16b, v2.16b, v6.16b
6e7de6af 469 st1 {v0.16b-v3.16b}, [x0], #64
49788fe2 470 mov v4.16b, v7.16b
6e7de6af 471 cbz w4, .Lxtsencout
cc3cc489 472 xts_reload_mask v8
49788fe2 473 b .LxtsencloopNx
49788fe2 474.Lxtsenc1x:
6e7de6af 475 adds w4, w4, #4
49788fe2 476 beq .Lxtsencout
49788fe2 477.Lxtsencloop:
6e7de6af 478 ld1 {v1.16b}, [x1], #16
49788fe2 479 eor v0.16b, v1.16b, v4.16b
6e7de6af 480 encrypt_block v0, w3, x2, x8, w7
49788fe2 481 eor v0.16b, v0.16b, v4.16b
6e7de6af
AB
482 st1 {v0.16b}, [x0], #16
483 subs w4, w4, #1
49788fe2 484 beq .Lxtsencout
2e5d2f33 485 next_tweak v4, v4, v8
49788fe2
AB
486 b .Lxtsencloop
487.Lxtsencout:
6e7de6af
AB
488 st1 {v4.16b}, [x6]
489 ldp x29, x30, [sp], #16
49788fe2
AB
490 ret
491AES_ENDPROC(aes_xts_encrypt)
492
493
494AES_ENTRY(aes_xts_decrypt)
6e7de6af
AB
495 stp x29, x30, [sp, #-16]!
496 mov x29, sp
55868b45 497
6e7de6af 498 ld1 {v4.16b}, [x6]
cc3cc489 499 xts_load_mask v8
68338174
AB
500 cbz w7, .Lxtsdecnotfirst
501
502 enc_prepare w3, x5, x8
503 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
504 dec_prepare w3, x2, x8
49788fe2
AB
505 b .LxtsdecNx
506
68338174 507.Lxtsdecnotfirst:
6e7de6af 508 dec_prepare w3, x2, x8
49788fe2 509.LxtsdecloopNx:
2e5d2f33 510 next_tweak v4, v4, v8
49788fe2 511.LxtsdecNx:
6e7de6af 512 subs w4, w4, #4
49788fe2 513 bmi .Lxtsdec1x
6e7de6af 514 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
2e5d2f33 515 next_tweak v5, v4, v8
49788fe2 516 eor v0.16b, v0.16b, v4.16b
2e5d2f33 517 next_tweak v6, v5, v8
49788fe2
AB
518 eor v1.16b, v1.16b, v5.16b
519 eor v2.16b, v2.16b, v6.16b
2e5d2f33 520 next_tweak v7, v6, v8
49788fe2 521 eor v3.16b, v3.16b, v7.16b
55868b45 522 bl aes_decrypt_block4x
49788fe2
AB
523 eor v3.16b, v3.16b, v7.16b
524 eor v0.16b, v0.16b, v4.16b
525 eor v1.16b, v1.16b, v5.16b
526 eor v2.16b, v2.16b, v6.16b
6e7de6af 527 st1 {v0.16b-v3.16b}, [x0], #64
49788fe2 528 mov v4.16b, v7.16b
6e7de6af 529 cbz w4, .Lxtsdecout
cc3cc489 530 xts_reload_mask v8
49788fe2 531 b .LxtsdecloopNx
49788fe2 532.Lxtsdec1x:
6e7de6af 533 adds w4, w4, #4
49788fe2 534 beq .Lxtsdecout
49788fe2 535.Lxtsdecloop:
6e7de6af 536 ld1 {v1.16b}, [x1], #16
49788fe2 537 eor v0.16b, v1.16b, v4.16b
6e7de6af 538 decrypt_block v0, w3, x2, x8, w7
49788fe2 539 eor v0.16b, v0.16b, v4.16b
6e7de6af
AB
540 st1 {v0.16b}, [x0], #16
541 subs w4, w4, #1
49788fe2 542 beq .Lxtsdecout
2e5d2f33 543 next_tweak v4, v4, v8
49788fe2
AB
544 b .Lxtsdecloop
545.Lxtsdecout:
6e7de6af
AB
546 st1 {v4.16b}, [x6]
547 ldp x29, x30, [sp], #16
49788fe2
AB
548 ret
549AES_ENDPROC(aes_xts_decrypt)
4860620d
AB
550
551 /*
552 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
553 * int blocks, u8 dg[], int enc_before, int enc_after)
554 */
555AES_ENTRY(aes_mac_update)
0c8f838a
AB
556 frame_push 6
557
558 mov x19, x0
559 mov x20, x1
560 mov x21, x2
561 mov x22, x3
562 mov x23, x4
563 mov x24, x6
564
565 ld1 {v0.16b}, [x23] /* get dg */
4860620d 566 enc_prepare w2, x1, x7
870c163a 567 cbz w5, .Lmacloop4x
4860620d 568
870c163a
AB
569 encrypt_block v0, w2, x1, x7, w8
570
571.Lmacloop4x:
0c8f838a 572 subs w22, w22, #4
870c163a 573 bmi .Lmac1x
0c8f838a 574 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
870c163a 575 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
0c8f838a 576 encrypt_block v0, w21, x20, x7, w8
870c163a 577 eor v0.16b, v0.16b, v2.16b
0c8f838a 578 encrypt_block v0, w21, x20, x7, w8
870c163a 579 eor v0.16b, v0.16b, v3.16b
0c8f838a 580 encrypt_block v0, w21, x20, x7, w8
870c163a 581 eor v0.16b, v0.16b, v4.16b
0c8f838a
AB
582 cmp w22, wzr
583 csinv x5, x24, xzr, eq
870c163a 584 cbz w5, .Lmacout
0c8f838a
AB
585 encrypt_block v0, w21, x20, x7, w8
586 st1 {v0.16b}, [x23] /* return dg */
587 cond_yield_neon .Lmacrestart
870c163a
AB
588 b .Lmacloop4x
589.Lmac1x:
0c8f838a 590 add w22, w22, #4
4860620d 591.Lmacloop:
0c8f838a
AB
592 cbz w22, .Lmacout
593 ld1 {v1.16b}, [x19], #16 /* get next pt block */
4860620d
AB
594 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
595
0c8f838a
AB
596 subs w22, w22, #1
597 csinv x5, x24, xzr, eq
4860620d
AB
598 cbz w5, .Lmacout
599
0c8f838a
AB
600.Lmacenc:
601 encrypt_block v0, w21, x20, x7, w8
4860620d
AB
602 b .Lmacloop
603
604.Lmacout:
0c8f838a
AB
605 st1 {v0.16b}, [x23] /* return dg */
606 frame_pop
4860620d 607 ret
0c8f838a
AB
608
609.Lmacrestart:
610 ld1 {v0.16b}, [x23] /* get dg */
611 enc_prepare w21, x20, x0
612 b .Lmacloop4x
4860620d 613AES_ENDPROC(aes_mac_update)