Merge tag 'upstream-5.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw...
[linux-2.6-block.git] / arch / arm64 / crypto / aes-modes.S
CommitLineData
d2912cb1 1/* SPDX-License-Identifier: GPL-2.0-only */
49788fe2
AB
2/*
3 * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES
4 *
4860620d 5 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
49788fe2
AB
6 */
7
8/* included by aes-ce.S and aes-neon.S */
9
10 .text
11 .align 4
12
e2174139
AB
13#ifndef MAX_STRIDE
14#define MAX_STRIDE 4
15#endif
16
7367bfeb
AB
17#if MAX_STRIDE == 4
18#define ST4(x...) x
19#define ST5(x...)
20#else
21#define ST4(x...)
22#define ST5(x...) x
23#endif
24
49788fe2 25aes_encrypt_block4x:
6e7de6af 26 encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
49788fe2
AB
27 ret
28ENDPROC(aes_encrypt_block4x)
29
30aes_decrypt_block4x:
6e7de6af 31 decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
49788fe2
AB
32 ret
33ENDPROC(aes_decrypt_block4x)
34
e2174139
AB
35#if MAX_STRIDE == 5
36aes_encrypt_block5x:
37 encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
38 ret
39ENDPROC(aes_encrypt_block5x)
40
41aes_decrypt_block5x:
42 decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
43 ret
44ENDPROC(aes_decrypt_block5x)
45#endif
46
49788fe2
AB
47 /*
48 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 49 * int blocks)
49788fe2 50 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 51 * int blocks)
49788fe2
AB
52 */
53
54AES_ENTRY(aes_ecb_encrypt)
6e7de6af
AB
55 stp x29, x30, [sp, #-16]!
56 mov x29, sp
49788fe2 57
6e7de6af 58 enc_prepare w3, x2, x5
49788fe2
AB
59
60.LecbencloopNx:
7367bfeb 61 subs w4, w4, #MAX_STRIDE
49788fe2 62 bmi .Lecbenc1x
6e7de6af 63 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
7367bfeb
AB
64ST4( bl aes_encrypt_block4x )
65ST5( ld1 {v4.16b}, [x1], #16 )
66ST5( bl aes_encrypt_block5x )
6e7de6af 67 st1 {v0.16b-v3.16b}, [x0], #64
7367bfeb 68ST5( st1 {v4.16b}, [x0], #16 )
49788fe2
AB
69 b .LecbencloopNx
70.Lecbenc1x:
7367bfeb 71 adds w4, w4, #MAX_STRIDE
49788fe2 72 beq .Lecbencout
49788fe2 73.Lecbencloop:
6e7de6af
AB
74 ld1 {v0.16b}, [x1], #16 /* get next pt block */
75 encrypt_block v0, w3, x2, x5, w6
76 st1 {v0.16b}, [x0], #16
77 subs w4, w4, #1
49788fe2
AB
78 bne .Lecbencloop
79.Lecbencout:
6e7de6af 80 ldp x29, x30, [sp], #16
49788fe2
AB
81 ret
82AES_ENDPROC(aes_ecb_encrypt)
83
84
85AES_ENTRY(aes_ecb_decrypt)
6e7de6af
AB
86 stp x29, x30, [sp, #-16]!
87 mov x29, sp
0c8f838a 88
6e7de6af 89 dec_prepare w3, x2, x5
49788fe2
AB
90
91.LecbdecloopNx:
7367bfeb 92 subs w4, w4, #MAX_STRIDE
49788fe2 93 bmi .Lecbdec1x
6e7de6af 94 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
7367bfeb
AB
95ST4( bl aes_decrypt_block4x )
96ST5( ld1 {v4.16b}, [x1], #16 )
97ST5( bl aes_decrypt_block5x )
6e7de6af 98 st1 {v0.16b-v3.16b}, [x0], #64
7367bfeb 99ST5( st1 {v4.16b}, [x0], #16 )
49788fe2
AB
100 b .LecbdecloopNx
101.Lecbdec1x:
7367bfeb 102 adds w4, w4, #MAX_STRIDE
49788fe2 103 beq .Lecbdecout
49788fe2 104.Lecbdecloop:
6e7de6af
AB
105 ld1 {v0.16b}, [x1], #16 /* get next ct block */
106 decrypt_block v0, w3, x2, x5, w6
107 st1 {v0.16b}, [x0], #16
108 subs w4, w4, #1
49788fe2
AB
109 bne .Lecbdecloop
110.Lecbdecout:
6e7de6af 111 ldp x29, x30, [sp], #16
49788fe2
AB
112 ret
113AES_ENDPROC(aes_ecb_decrypt)
114
115
116 /*
117 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 118 * int blocks, u8 iv[])
49788fe2 119 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 120 * int blocks, u8 iv[])
49788fe2
AB
121 */
122
123AES_ENTRY(aes_cbc_encrypt)
6e7de6af
AB
124 ld1 {v4.16b}, [x5] /* get iv */
125 enc_prepare w3, x2, x6
49788fe2 126
a8f8a69e 127.Lcbcencloop4x:
6e7de6af 128 subs w4, w4, #4
a8f8a69e 129 bmi .Lcbcenc1x
6e7de6af 130 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
a8f8a69e 131 eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */
6e7de6af 132 encrypt_block v0, w3, x2, x6, w7
a8f8a69e 133 eor v1.16b, v1.16b, v0.16b
6e7de6af 134 encrypt_block v1, w3, x2, x6, w7
a8f8a69e 135 eor v2.16b, v2.16b, v1.16b
6e7de6af 136 encrypt_block v2, w3, x2, x6, w7
a8f8a69e 137 eor v3.16b, v3.16b, v2.16b
6e7de6af
AB
138 encrypt_block v3, w3, x2, x6, w7
139 st1 {v0.16b-v3.16b}, [x0], #64
a8f8a69e
AB
140 mov v4.16b, v3.16b
141 b .Lcbcencloop4x
142.Lcbcenc1x:
6e7de6af 143 adds w4, w4, #4
a8f8a69e
AB
144 beq .Lcbcencout
145.Lcbcencloop:
6e7de6af 146 ld1 {v0.16b}, [x1], #16 /* get next pt block */
a8f8a69e 147 eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */
6e7de6af
AB
148 encrypt_block v4, w3, x2, x6, w7
149 st1 {v4.16b}, [x0], #16
150 subs w4, w4, #1
49788fe2 151 bne .Lcbcencloop
a8f8a69e 152.Lcbcencout:
6e7de6af 153 st1 {v4.16b}, [x5] /* return iv */
49788fe2
AB
154 ret
155AES_ENDPROC(aes_cbc_encrypt)
156
157
158AES_ENTRY(aes_cbc_decrypt)
6e7de6af
AB
159 stp x29, x30, [sp, #-16]!
160 mov x29, sp
49788fe2 161
7367bfeb 162 ld1 {cbciv.16b}, [x5] /* get iv */
6e7de6af 163 dec_prepare w3, x2, x6
49788fe2
AB
164
165.LcbcdecloopNx:
7367bfeb 166 subs w4, w4, #MAX_STRIDE
49788fe2 167 bmi .Lcbcdec1x
6e7de6af 168 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
7367bfeb
AB
169#if MAX_STRIDE == 5
170 ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
171 mov v5.16b, v0.16b
172 mov v6.16b, v1.16b
173 mov v7.16b, v2.16b
174 bl aes_decrypt_block5x
175 sub x1, x1, #32
176 eor v0.16b, v0.16b, cbciv.16b
177 eor v1.16b, v1.16b, v5.16b
178 ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
179 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
180 eor v2.16b, v2.16b, v6.16b
181 eor v3.16b, v3.16b, v7.16b
182 eor v4.16b, v4.16b, v5.16b
183#else
49788fe2
AB
184 mov v4.16b, v0.16b
185 mov v5.16b, v1.16b
186 mov v6.16b, v2.16b
55868b45 187 bl aes_decrypt_block4x
6e7de6af 188 sub x1, x1, #16
7367bfeb 189 eor v0.16b, v0.16b, cbciv.16b
49788fe2 190 eor v1.16b, v1.16b, v4.16b
7367bfeb 191 ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
49788fe2
AB
192 eor v2.16b, v2.16b, v5.16b
193 eor v3.16b, v3.16b, v6.16b
7367bfeb 194#endif
6e7de6af 195 st1 {v0.16b-v3.16b}, [x0], #64
7367bfeb 196ST5( st1 {v4.16b}, [x0], #16 )
49788fe2
AB
197 b .LcbcdecloopNx
198.Lcbcdec1x:
7367bfeb 199 adds w4, w4, #MAX_STRIDE
49788fe2 200 beq .Lcbcdecout
49788fe2 201.Lcbcdecloop:
6e7de6af 202 ld1 {v1.16b}, [x1], #16 /* get next ct block */
49788fe2 203 mov v0.16b, v1.16b /* ...and copy to v0 */
6e7de6af 204 decrypt_block v0, w3, x2, x6, w7
7367bfeb
AB
205 eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
206 mov cbciv.16b, v1.16b /* ct is next iv */
6e7de6af
AB
207 st1 {v0.16b}, [x0], #16
208 subs w4, w4, #1
49788fe2
AB
209 bne .Lcbcdecloop
210.Lcbcdecout:
7367bfeb 211 st1 {cbciv.16b}, [x5] /* return iv */
6e7de6af 212 ldp x29, x30, [sp], #16
49788fe2
AB
213 ret
214AES_ENDPROC(aes_cbc_decrypt)
215
216
dd597fb3
AB
217 /*
218 * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
219 * int rounds, int bytes, u8 const iv[])
220 * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
221 * int rounds, int bytes, u8 const iv[])
222 */
223
224AES_ENTRY(aes_cbc_cts_encrypt)
225 adr_l x8, .Lcts_permute_table
226 sub x4, x4, #16
227 add x9, x8, #32
228 add x8, x8, x4
229 sub x9, x9, x4
230 ld1 {v3.16b}, [x8]
231 ld1 {v4.16b}, [x9]
232
233 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
234 ld1 {v1.16b}, [x1]
235
236 ld1 {v5.16b}, [x5] /* get iv */
237 enc_prepare w3, x2, x6
238
239 eor v0.16b, v0.16b, v5.16b /* xor with iv */
240 tbl v1.16b, {v1.16b}, v4.16b
241 encrypt_block v0, w3, x2, x6, w7
242
243 eor v1.16b, v1.16b, v0.16b
244 tbl v0.16b, {v0.16b}, v3.16b
245 encrypt_block v1, w3, x2, x6, w7
246
247 add x4, x0, x4
248 st1 {v0.16b}, [x4] /* overlapping stores */
249 st1 {v1.16b}, [x0]
250 ret
251AES_ENDPROC(aes_cbc_cts_encrypt)
252
253AES_ENTRY(aes_cbc_cts_decrypt)
254 adr_l x8, .Lcts_permute_table
255 sub x4, x4, #16
256 add x9, x8, #32
257 add x8, x8, x4
258 sub x9, x9, x4
259 ld1 {v3.16b}, [x8]
260 ld1 {v4.16b}, [x9]
261
262 ld1 {v0.16b}, [x1], x4 /* overlapping loads */
263 ld1 {v1.16b}, [x1]
264
265 ld1 {v5.16b}, [x5] /* get iv */
266 dec_prepare w3, x2, x6
267
268 tbl v2.16b, {v1.16b}, v4.16b
269 decrypt_block v0, w3, x2, x6, w7
270 eor v2.16b, v2.16b, v0.16b
271
272 tbx v0.16b, {v1.16b}, v4.16b
273 tbl v2.16b, {v2.16b}, v3.16b
274 decrypt_block v0, w3, x2, x6, w7
275 eor v0.16b, v0.16b, v5.16b /* xor with iv */
276
277 add x4, x0, x4
278 st1 {v2.16b}, [x4] /* overlapping stores */
279 st1 {v0.16b}, [x0]
280 ret
281AES_ENDPROC(aes_cbc_cts_decrypt)
282
283 .section ".rodata", "a"
284 .align 6
285.Lcts_permute_table:
286 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
287 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
288 .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
289 .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
290 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
291 .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
292 .previous
293
294
49788fe2
AB
295 /*
296 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
68338174 297 * int blocks, u8 ctr[])
49788fe2
AB
298 */
299
300AES_ENTRY(aes_ctr_encrypt)
6e7de6af
AB
301 stp x29, x30, [sp, #-16]!
302 mov x29, sp
68338174 303
6e7de6af 304 enc_prepare w3, x2, x6
7367bfeb 305 ld1 {vctr.16b}, [x5]
11e3b725 306
7367bfeb 307 umov x6, vctr.d[1] /* keep swabbed ctr in reg */
68338174 308 rev x6, x6
6e7de6af
AB
309 cmn w6, w4 /* 32 bit overflow? */
310 bcs .Lctrloop
49788fe2 311.LctrloopNx:
7367bfeb 312 subs w4, w4, #MAX_STRIDE
49788fe2 313 bmi .Lctr1x
ed6ed118 314 add w7, w6, #1
7367bfeb 315 mov v0.16b, vctr.16b
ed6ed118 316 add w8, w6, #2
7367bfeb
AB
317 mov v1.16b, vctr.16b
318 add w9, w6, #3
319 mov v2.16b, vctr.16b
ed6ed118 320 add w9, w6, #3
ed6ed118 321 rev w7, w7
7367bfeb 322 mov v3.16b, vctr.16b
ed6ed118 323 rev w8, w8
7367bfeb 324ST5( mov v4.16b, vctr.16b )
ed6ed118
AB
325 mov v1.s[3], w7
326 rev w9, w9
7367bfeb 327ST5( add w10, w6, #4 )
ed6ed118 328 mov v2.s[3], w8
7367bfeb 329ST5( rev w10, w10 )
ed6ed118 330 mov v3.s[3], w9
7367bfeb 331ST5( mov v4.s[3], w10 )
6e7de6af 332 ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
7367bfeb
AB
333ST4( bl aes_encrypt_block4x )
334ST5( bl aes_encrypt_block5x )
49788fe2 335 eor v0.16b, v5.16b, v0.16b
7367bfeb 336ST4( ld1 {v5.16b}, [x1], #16 )
49788fe2 337 eor v1.16b, v6.16b, v1.16b
7367bfeb 338ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
49788fe2
AB
339 eor v2.16b, v7.16b, v2.16b
340 eor v3.16b, v5.16b, v3.16b
7367bfeb 341ST5( eor v4.16b, v6.16b, v4.16b )
6e7de6af 342 st1 {v0.16b-v3.16b}, [x0], #64
7367bfeb
AB
343ST5( st1 {v4.16b}, [x0], #16 )
344 add x6, x6, #MAX_STRIDE
68338174 345 rev x7, x6
7367bfeb 346 ins vctr.d[1], x7
6e7de6af 347 cbz w4, .Lctrout
49788fe2 348 b .LctrloopNx
49788fe2 349.Lctr1x:
7367bfeb 350 adds w4, w4, #MAX_STRIDE
49788fe2 351 beq .Lctrout
49788fe2 352.Lctrloop:
7367bfeb 353 mov v0.16b, vctr.16b
6e7de6af 354 encrypt_block v0, w3, x2, x8, w7
11e3b725 355
68338174
AB
356 adds x6, x6, #1 /* increment BE ctr */
357 rev x7, x6
7367bfeb 358 ins vctr.d[1], x7
11e3b725
AB
359 bcs .Lctrcarry /* overflow? */
360
361.Lctrcarrydone:
6e7de6af 362 subs w4, w4, #1
ccc5d51e 363 bmi .Lctrtailblock /* blocks <0 means tail block */
6e7de6af 364 ld1 {v3.16b}, [x1], #16
49788fe2 365 eor v3.16b, v0.16b, v3.16b
6e7de6af 366 st1 {v3.16b}, [x0], #16
11e3b725
AB
367 bne .Lctrloop
368
369.Lctrout:
7367bfeb 370 st1 {vctr.16b}, [x5] /* return next CTR value */
6e7de6af 371 ldp x29, x30, [sp], #16
11e3b725
AB
372 ret
373
ccc5d51e 374.Lctrtailblock:
6e7de6af 375 st1 {v0.16b}, [x0]
fa5fd3af 376 b .Lctrout
11e3b725
AB
377
378.Lctrcarry:
7367bfeb 379 umov x7, vctr.d[0] /* load upper word of ctr */
11e3b725
AB
380 rev x7, x7 /* ... to handle the carry */
381 add x7, x7, #1
382 rev x7, x7
7367bfeb 383 ins vctr.d[0], x7
11e3b725 384 b .Lctrcarrydone
49788fe2 385AES_ENDPROC(aes_ctr_encrypt)
49788fe2
AB
386
387
388 /*
389 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
390 * int blocks, u8 const rk2[], u8 iv[], int first)
391 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
392 * int blocks, u8 const rk2[], u8 iv[], int first)
393 */
394
2e5d2f33 395 .macro next_tweak, out, in, tmp
49788fe2 396 sshr \tmp\().2d, \in\().2d, #63
2e5d2f33 397 and \tmp\().16b, \tmp\().16b, xtsmask.16b
49788fe2
AB
398 add \out\().2d, \in\().2d, \in\().2d
399 ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
400 eor \out\().16b, \out\().16b, \tmp\().16b
401 .endm
402
2e5d2f33
AB
403 .macro xts_load_mask, tmp
404 movi xtsmask.2s, #0x1
405 movi \tmp\().2s, #0x87
406 uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
407 .endm
49788fe2
AB
408
409AES_ENTRY(aes_xts_encrypt)
6e7de6af
AB
410 stp x29, x30, [sp, #-16]!
411 mov x29, sp
55868b45 412
6e7de6af 413 ld1 {v4.16b}, [x6]
cc3cc489 414 xts_load_mask v8
68338174
AB
415 cbz w7, .Lxtsencnotfirst
416
417 enc_prepare w3, x5, x8
418 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
419 enc_switch_key w3, x2, x8
49788fe2
AB
420 b .LxtsencNx
421
68338174 422.Lxtsencnotfirst:
6e7de6af 423 enc_prepare w3, x2, x8
49788fe2 424.LxtsencloopNx:
2e5d2f33 425 next_tweak v4, v4, v8
49788fe2 426.LxtsencNx:
6e7de6af 427 subs w4, w4, #4
49788fe2 428 bmi .Lxtsenc1x
6e7de6af 429 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
2e5d2f33 430 next_tweak v5, v4, v8
49788fe2 431 eor v0.16b, v0.16b, v4.16b
2e5d2f33 432 next_tweak v6, v5, v8
49788fe2
AB
433 eor v1.16b, v1.16b, v5.16b
434 eor v2.16b, v2.16b, v6.16b
2e5d2f33 435 next_tweak v7, v6, v8
49788fe2 436 eor v3.16b, v3.16b, v7.16b
55868b45 437 bl aes_encrypt_block4x
49788fe2
AB
438 eor v3.16b, v3.16b, v7.16b
439 eor v0.16b, v0.16b, v4.16b
440 eor v1.16b, v1.16b, v5.16b
441 eor v2.16b, v2.16b, v6.16b
6e7de6af 442 st1 {v0.16b-v3.16b}, [x0], #64
49788fe2 443 mov v4.16b, v7.16b
6e7de6af 444 cbz w4, .Lxtsencout
cc3cc489 445 xts_reload_mask v8
49788fe2 446 b .LxtsencloopNx
49788fe2 447.Lxtsenc1x:
6e7de6af 448 adds w4, w4, #4
49788fe2 449 beq .Lxtsencout
49788fe2 450.Lxtsencloop:
6e7de6af 451 ld1 {v1.16b}, [x1], #16
49788fe2 452 eor v0.16b, v1.16b, v4.16b
6e7de6af 453 encrypt_block v0, w3, x2, x8, w7
49788fe2 454 eor v0.16b, v0.16b, v4.16b
6e7de6af
AB
455 st1 {v0.16b}, [x0], #16
456 subs w4, w4, #1
49788fe2 457 beq .Lxtsencout
2e5d2f33 458 next_tweak v4, v4, v8
49788fe2
AB
459 b .Lxtsencloop
460.Lxtsencout:
6e7de6af
AB
461 st1 {v4.16b}, [x6]
462 ldp x29, x30, [sp], #16
49788fe2
AB
463 ret
464AES_ENDPROC(aes_xts_encrypt)
465
466
467AES_ENTRY(aes_xts_decrypt)
6e7de6af
AB
468 stp x29, x30, [sp, #-16]!
469 mov x29, sp
55868b45 470
6e7de6af 471 ld1 {v4.16b}, [x6]
cc3cc489 472 xts_load_mask v8
68338174
AB
473 cbz w7, .Lxtsdecnotfirst
474
475 enc_prepare w3, x5, x8
476 encrypt_block v4, w3, x5, x8, w7 /* first tweak */
477 dec_prepare w3, x2, x8
49788fe2
AB
478 b .LxtsdecNx
479
68338174 480.Lxtsdecnotfirst:
6e7de6af 481 dec_prepare w3, x2, x8
49788fe2 482.LxtsdecloopNx:
2e5d2f33 483 next_tweak v4, v4, v8
49788fe2 484.LxtsdecNx:
6e7de6af 485 subs w4, w4, #4
49788fe2 486 bmi .Lxtsdec1x
6e7de6af 487 ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
2e5d2f33 488 next_tweak v5, v4, v8
49788fe2 489 eor v0.16b, v0.16b, v4.16b
2e5d2f33 490 next_tweak v6, v5, v8
49788fe2
AB
491 eor v1.16b, v1.16b, v5.16b
492 eor v2.16b, v2.16b, v6.16b
2e5d2f33 493 next_tweak v7, v6, v8
49788fe2 494 eor v3.16b, v3.16b, v7.16b
55868b45 495 bl aes_decrypt_block4x
49788fe2
AB
496 eor v3.16b, v3.16b, v7.16b
497 eor v0.16b, v0.16b, v4.16b
498 eor v1.16b, v1.16b, v5.16b
499 eor v2.16b, v2.16b, v6.16b
6e7de6af 500 st1 {v0.16b-v3.16b}, [x0], #64
49788fe2 501 mov v4.16b, v7.16b
6e7de6af 502 cbz w4, .Lxtsdecout
cc3cc489 503 xts_reload_mask v8
49788fe2 504 b .LxtsdecloopNx
49788fe2 505.Lxtsdec1x:
6e7de6af 506 adds w4, w4, #4
49788fe2 507 beq .Lxtsdecout
49788fe2 508.Lxtsdecloop:
6e7de6af 509 ld1 {v1.16b}, [x1], #16
49788fe2 510 eor v0.16b, v1.16b, v4.16b
6e7de6af 511 decrypt_block v0, w3, x2, x8, w7
49788fe2 512 eor v0.16b, v0.16b, v4.16b
6e7de6af
AB
513 st1 {v0.16b}, [x0], #16
514 subs w4, w4, #1
49788fe2 515 beq .Lxtsdecout
2e5d2f33 516 next_tweak v4, v4, v8
49788fe2
AB
517 b .Lxtsdecloop
518.Lxtsdecout:
6e7de6af
AB
519 st1 {v4.16b}, [x6]
520 ldp x29, x30, [sp], #16
49788fe2
AB
521 ret
522AES_ENDPROC(aes_xts_decrypt)
4860620d
AB
523
524 /*
525 * aes_mac_update(u8 const in[], u32 const rk[], int rounds,
526 * int blocks, u8 dg[], int enc_before, int enc_after)
527 */
528AES_ENTRY(aes_mac_update)
0c8f838a
AB
529 frame_push 6
530
531 mov x19, x0
532 mov x20, x1
533 mov x21, x2
534 mov x22, x3
535 mov x23, x4
536 mov x24, x6
537
538 ld1 {v0.16b}, [x23] /* get dg */
4860620d 539 enc_prepare w2, x1, x7
870c163a 540 cbz w5, .Lmacloop4x
4860620d 541
870c163a
AB
542 encrypt_block v0, w2, x1, x7, w8
543
544.Lmacloop4x:
0c8f838a 545 subs w22, w22, #4
870c163a 546 bmi .Lmac1x
0c8f838a 547 ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */
870c163a 548 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
0c8f838a 549 encrypt_block v0, w21, x20, x7, w8
870c163a 550 eor v0.16b, v0.16b, v2.16b
0c8f838a 551 encrypt_block v0, w21, x20, x7, w8
870c163a 552 eor v0.16b, v0.16b, v3.16b
0c8f838a 553 encrypt_block v0, w21, x20, x7, w8
870c163a 554 eor v0.16b, v0.16b, v4.16b
0c8f838a
AB
555 cmp w22, wzr
556 csinv x5, x24, xzr, eq
870c163a 557 cbz w5, .Lmacout
0c8f838a
AB
558 encrypt_block v0, w21, x20, x7, w8
559 st1 {v0.16b}, [x23] /* return dg */
560 cond_yield_neon .Lmacrestart
870c163a
AB
561 b .Lmacloop4x
562.Lmac1x:
0c8f838a 563 add w22, w22, #4
4860620d 564.Lmacloop:
0c8f838a
AB
565 cbz w22, .Lmacout
566 ld1 {v1.16b}, [x19], #16 /* get next pt block */
4860620d
AB
567 eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */
568
0c8f838a
AB
569 subs w22, w22, #1
570 csinv x5, x24, xzr, eq
4860620d
AB
571 cbz w5, .Lmacout
572
0c8f838a
AB
573.Lmacenc:
574 encrypt_block v0, w21, x20, x7, w8
4860620d
AB
575 b .Lmacloop
576
577.Lmacout:
0c8f838a
AB
578 st1 {v0.16b}, [x23] /* return dg */
579 frame_pop
4860620d 580 ret
0c8f838a
AB
581
582.Lmacrestart:
583 ld1 {v0.16b}, [x23] /* get dg */
584 enc_prepare w21, x20, x0
585 b .Lmacloop4x
4860620d 586AES_ENDPROC(aes_mac_update)