Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
49788fe2 AB |
2 | /* |
3 | * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES | |
4 | * | |
4860620d | 5 | * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> |
49788fe2 AB |
6 | */ |
7 | ||
8 | /* included by aes-ce.S and aes-neon.S */ | |
9 | ||
10 | .text | |
11 | .align 4 | |
12 | ||
e2174139 AB |
13 | #ifndef MAX_STRIDE |
14 | #define MAX_STRIDE 4 | |
15 | #endif | |
16 | ||
7367bfeb AB |
17 | #if MAX_STRIDE == 4 |
18 | #define ST4(x...) x | |
19 | #define ST5(x...) | |
20 | #else | |
21 | #define ST4(x...) | |
22 | #define ST5(x...) x | |
23 | #endif | |
24 | ||
49788fe2 | 25 | aes_encrypt_block4x: |
6e7de6af | 26 | encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 |
49788fe2 AB |
27 | ret |
28 | ENDPROC(aes_encrypt_block4x) | |
29 | ||
30 | aes_decrypt_block4x: | |
6e7de6af | 31 | decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 |
49788fe2 AB |
32 | ret |
33 | ENDPROC(aes_decrypt_block4x) | |
34 | ||
e2174139 AB |
35 | #if MAX_STRIDE == 5 |
36 | aes_encrypt_block5x: | |
37 | encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 | |
38 | ret | |
39 | ENDPROC(aes_encrypt_block5x) | |
40 | ||
41 | aes_decrypt_block5x: | |
42 | decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 | |
43 | ret | |
44 | ENDPROC(aes_decrypt_block5x) | |
45 | #endif | |
46 | ||
49788fe2 AB |
47 | /* |
48 | * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
68338174 | 49 | * int blocks) |
49788fe2 | 50 | * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
68338174 | 51 | * int blocks) |
49788fe2 AB |
52 | */ |
53 | ||
54 | AES_ENTRY(aes_ecb_encrypt) | |
6e7de6af AB |
55 | stp x29, x30, [sp, #-16]! |
56 | mov x29, sp | |
49788fe2 | 57 | |
6e7de6af | 58 | enc_prepare w3, x2, x5 |
49788fe2 AB |
59 | |
60 | .LecbencloopNx: | |
7367bfeb | 61 | subs w4, w4, #MAX_STRIDE |
49788fe2 | 62 | bmi .Lecbenc1x |
6e7de6af | 63 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ |
7367bfeb AB |
64 | ST4( bl aes_encrypt_block4x ) |
65 | ST5( ld1 {v4.16b}, [x1], #16 ) | |
66 | ST5( bl aes_encrypt_block5x ) | |
6e7de6af | 67 | st1 {v0.16b-v3.16b}, [x0], #64 |
7367bfeb | 68 | ST5( st1 {v4.16b}, [x0], #16 ) |
49788fe2 AB |
69 | b .LecbencloopNx |
70 | .Lecbenc1x: | |
7367bfeb | 71 | adds w4, w4, #MAX_STRIDE |
49788fe2 | 72 | beq .Lecbencout |
49788fe2 | 73 | .Lecbencloop: |
6e7de6af AB |
74 | ld1 {v0.16b}, [x1], #16 /* get next pt block */ |
75 | encrypt_block v0, w3, x2, x5, w6 | |
76 | st1 {v0.16b}, [x0], #16 | |
77 | subs w4, w4, #1 | |
49788fe2 AB |
78 | bne .Lecbencloop |
79 | .Lecbencout: | |
6e7de6af | 80 | ldp x29, x30, [sp], #16 |
49788fe2 AB |
81 | ret |
82 | AES_ENDPROC(aes_ecb_encrypt) | |
83 | ||
84 | ||
85 | AES_ENTRY(aes_ecb_decrypt) | |
6e7de6af AB |
86 | stp x29, x30, [sp, #-16]! |
87 | mov x29, sp | |
0c8f838a | 88 | |
6e7de6af | 89 | dec_prepare w3, x2, x5 |
49788fe2 AB |
90 | |
91 | .LecbdecloopNx: | |
7367bfeb | 92 | subs w4, w4, #MAX_STRIDE |
49788fe2 | 93 | bmi .Lecbdec1x |
6e7de6af | 94 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ |
7367bfeb AB |
95 | ST4( bl aes_decrypt_block4x ) |
96 | ST5( ld1 {v4.16b}, [x1], #16 ) | |
97 | ST5( bl aes_decrypt_block5x ) | |
6e7de6af | 98 | st1 {v0.16b-v3.16b}, [x0], #64 |
7367bfeb | 99 | ST5( st1 {v4.16b}, [x0], #16 ) |
49788fe2 AB |
100 | b .LecbdecloopNx |
101 | .Lecbdec1x: | |
7367bfeb | 102 | adds w4, w4, #MAX_STRIDE |
49788fe2 | 103 | beq .Lecbdecout |
49788fe2 | 104 | .Lecbdecloop: |
6e7de6af AB |
105 | ld1 {v0.16b}, [x1], #16 /* get next ct block */ |
106 | decrypt_block v0, w3, x2, x5, w6 | |
107 | st1 {v0.16b}, [x0], #16 | |
108 | subs w4, w4, #1 | |
49788fe2 AB |
109 | bne .Lecbdecloop |
110 | .Lecbdecout: | |
6e7de6af | 111 | ldp x29, x30, [sp], #16 |
49788fe2 AB |
112 | ret |
113 | AES_ENDPROC(aes_ecb_decrypt) | |
114 | ||
115 | ||
116 | /* | |
117 | * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
68338174 | 118 | * int blocks, u8 iv[]) |
49788fe2 | 119 | * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
68338174 | 120 | * int blocks, u8 iv[]) |
49788fe2 AB |
121 | */ |
122 | ||
123 | AES_ENTRY(aes_cbc_encrypt) | |
6e7de6af AB |
124 | ld1 {v4.16b}, [x5] /* get iv */ |
125 | enc_prepare w3, x2, x6 | |
49788fe2 | 126 | |
a8f8a69e | 127 | .Lcbcencloop4x: |
6e7de6af | 128 | subs w4, w4, #4 |
a8f8a69e | 129 | bmi .Lcbcenc1x |
6e7de6af | 130 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ |
a8f8a69e | 131 | eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ |
6e7de6af | 132 | encrypt_block v0, w3, x2, x6, w7 |
a8f8a69e | 133 | eor v1.16b, v1.16b, v0.16b |
6e7de6af | 134 | encrypt_block v1, w3, x2, x6, w7 |
a8f8a69e | 135 | eor v2.16b, v2.16b, v1.16b |
6e7de6af | 136 | encrypt_block v2, w3, x2, x6, w7 |
a8f8a69e | 137 | eor v3.16b, v3.16b, v2.16b |
6e7de6af AB |
138 | encrypt_block v3, w3, x2, x6, w7 |
139 | st1 {v0.16b-v3.16b}, [x0], #64 | |
a8f8a69e AB |
140 | mov v4.16b, v3.16b |
141 | b .Lcbcencloop4x | |
142 | .Lcbcenc1x: | |
6e7de6af | 143 | adds w4, w4, #4 |
a8f8a69e AB |
144 | beq .Lcbcencout |
145 | .Lcbcencloop: | |
6e7de6af | 146 | ld1 {v0.16b}, [x1], #16 /* get next pt block */ |
a8f8a69e | 147 | eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ |
6e7de6af AB |
148 | encrypt_block v4, w3, x2, x6, w7 |
149 | st1 {v4.16b}, [x0], #16 | |
150 | subs w4, w4, #1 | |
49788fe2 | 151 | bne .Lcbcencloop |
a8f8a69e | 152 | .Lcbcencout: |
6e7de6af | 153 | st1 {v4.16b}, [x5] /* return iv */ |
49788fe2 AB |
154 | ret |
155 | AES_ENDPROC(aes_cbc_encrypt) | |
156 | ||
157 | ||
158 | AES_ENTRY(aes_cbc_decrypt) | |
6e7de6af AB |
159 | stp x29, x30, [sp, #-16]! |
160 | mov x29, sp | |
49788fe2 | 161 | |
7367bfeb | 162 | ld1 {cbciv.16b}, [x5] /* get iv */ |
6e7de6af | 163 | dec_prepare w3, x2, x6 |
49788fe2 AB |
164 | |
165 | .LcbcdecloopNx: | |
7367bfeb | 166 | subs w4, w4, #MAX_STRIDE |
49788fe2 | 167 | bmi .Lcbcdec1x |
6e7de6af | 168 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ |
7367bfeb AB |
169 | #if MAX_STRIDE == 5 |
170 | ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ | |
171 | mov v5.16b, v0.16b | |
172 | mov v6.16b, v1.16b | |
173 | mov v7.16b, v2.16b | |
174 | bl aes_decrypt_block5x | |
175 | sub x1, x1, #32 | |
176 | eor v0.16b, v0.16b, cbciv.16b | |
177 | eor v1.16b, v1.16b, v5.16b | |
178 | ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ | |
179 | ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ | |
180 | eor v2.16b, v2.16b, v6.16b | |
181 | eor v3.16b, v3.16b, v7.16b | |
182 | eor v4.16b, v4.16b, v5.16b | |
183 | #else | |
49788fe2 AB |
184 | mov v4.16b, v0.16b |
185 | mov v5.16b, v1.16b | |
186 | mov v6.16b, v2.16b | |
55868b45 | 187 | bl aes_decrypt_block4x |
6e7de6af | 188 | sub x1, x1, #16 |
7367bfeb | 189 | eor v0.16b, v0.16b, cbciv.16b |
49788fe2 | 190 | eor v1.16b, v1.16b, v4.16b |
7367bfeb | 191 | ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ |
49788fe2 AB |
192 | eor v2.16b, v2.16b, v5.16b |
193 | eor v3.16b, v3.16b, v6.16b | |
7367bfeb | 194 | #endif |
6e7de6af | 195 | st1 {v0.16b-v3.16b}, [x0], #64 |
7367bfeb | 196 | ST5( st1 {v4.16b}, [x0], #16 ) |
49788fe2 AB |
197 | b .LcbcdecloopNx |
198 | .Lcbcdec1x: | |
7367bfeb | 199 | adds w4, w4, #MAX_STRIDE |
49788fe2 | 200 | beq .Lcbcdecout |
49788fe2 | 201 | .Lcbcdecloop: |
6e7de6af | 202 | ld1 {v1.16b}, [x1], #16 /* get next ct block */ |
49788fe2 | 203 | mov v0.16b, v1.16b /* ...and copy to v0 */ |
6e7de6af | 204 | decrypt_block v0, w3, x2, x6, w7 |
7367bfeb AB |
205 | eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ |
206 | mov cbciv.16b, v1.16b /* ct is next iv */ | |
6e7de6af AB |
207 | st1 {v0.16b}, [x0], #16 |
208 | subs w4, w4, #1 | |
49788fe2 AB |
209 | bne .Lcbcdecloop |
210 | .Lcbcdecout: | |
7367bfeb | 211 | st1 {cbciv.16b}, [x5] /* return iv */ |
6e7de6af | 212 | ldp x29, x30, [sp], #16 |
49788fe2 AB |
213 | ret |
214 | AES_ENDPROC(aes_cbc_decrypt) | |
215 | ||
216 | ||
dd597fb3 AB |
217 | /* |
218 | * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], | |
219 | * int rounds, int bytes, u8 const iv[]) | |
220 | * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], | |
221 | * int rounds, int bytes, u8 const iv[]) | |
222 | */ | |
223 | ||
224 | AES_ENTRY(aes_cbc_cts_encrypt) | |
225 | adr_l x8, .Lcts_permute_table | |
226 | sub x4, x4, #16 | |
227 | add x9, x8, #32 | |
228 | add x8, x8, x4 | |
229 | sub x9, x9, x4 | |
230 | ld1 {v3.16b}, [x8] | |
231 | ld1 {v4.16b}, [x9] | |
232 | ||
233 | ld1 {v0.16b}, [x1], x4 /* overlapping loads */ | |
234 | ld1 {v1.16b}, [x1] | |
235 | ||
236 | ld1 {v5.16b}, [x5] /* get iv */ | |
237 | enc_prepare w3, x2, x6 | |
238 | ||
239 | eor v0.16b, v0.16b, v5.16b /* xor with iv */ | |
240 | tbl v1.16b, {v1.16b}, v4.16b | |
241 | encrypt_block v0, w3, x2, x6, w7 | |
242 | ||
243 | eor v1.16b, v1.16b, v0.16b | |
244 | tbl v0.16b, {v0.16b}, v3.16b | |
245 | encrypt_block v1, w3, x2, x6, w7 | |
246 | ||
247 | add x4, x0, x4 | |
248 | st1 {v0.16b}, [x4] /* overlapping stores */ | |
249 | st1 {v1.16b}, [x0] | |
250 | ret | |
251 | AES_ENDPROC(aes_cbc_cts_encrypt) | |
252 | ||
253 | AES_ENTRY(aes_cbc_cts_decrypt) | |
254 | adr_l x8, .Lcts_permute_table | |
255 | sub x4, x4, #16 | |
256 | add x9, x8, #32 | |
257 | add x8, x8, x4 | |
258 | sub x9, x9, x4 | |
259 | ld1 {v3.16b}, [x8] | |
260 | ld1 {v4.16b}, [x9] | |
261 | ||
262 | ld1 {v0.16b}, [x1], x4 /* overlapping loads */ | |
263 | ld1 {v1.16b}, [x1] | |
264 | ||
265 | ld1 {v5.16b}, [x5] /* get iv */ | |
266 | dec_prepare w3, x2, x6 | |
267 | ||
268 | tbl v2.16b, {v1.16b}, v4.16b | |
269 | decrypt_block v0, w3, x2, x6, w7 | |
270 | eor v2.16b, v2.16b, v0.16b | |
271 | ||
272 | tbx v0.16b, {v1.16b}, v4.16b | |
273 | tbl v2.16b, {v2.16b}, v3.16b | |
274 | decrypt_block v0, w3, x2, x6, w7 | |
275 | eor v0.16b, v0.16b, v5.16b /* xor with iv */ | |
276 | ||
277 | add x4, x0, x4 | |
278 | st1 {v2.16b}, [x4] /* overlapping stores */ | |
279 | st1 {v0.16b}, [x0] | |
280 | ret | |
281 | AES_ENDPROC(aes_cbc_cts_decrypt) | |
282 | ||
283 | .section ".rodata", "a" | |
284 | .align 6 | |
285 | .Lcts_permute_table: | |
286 | .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff | |
287 | .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff | |
288 | .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 | |
289 | .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf | |
290 | .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff | |
291 | .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff | |
292 | .previous | |
293 | ||
294 | ||
49788fe2 AB |
295 | /* |
296 | * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
68338174 | 297 | * int blocks, u8 ctr[]) |
49788fe2 AB |
298 | */ |
299 | ||
300 | AES_ENTRY(aes_ctr_encrypt) | |
6e7de6af AB |
301 | stp x29, x30, [sp, #-16]! |
302 | mov x29, sp | |
68338174 | 303 | |
6e7de6af | 304 | enc_prepare w3, x2, x6 |
7367bfeb | 305 | ld1 {vctr.16b}, [x5] |
11e3b725 | 306 | |
7367bfeb | 307 | umov x6, vctr.d[1] /* keep swabbed ctr in reg */ |
68338174 | 308 | rev x6, x6 |
6e7de6af AB |
309 | cmn w6, w4 /* 32 bit overflow? */ |
310 | bcs .Lctrloop | |
49788fe2 | 311 | .LctrloopNx: |
7367bfeb | 312 | subs w4, w4, #MAX_STRIDE |
49788fe2 | 313 | bmi .Lctr1x |
ed6ed118 | 314 | add w7, w6, #1 |
7367bfeb | 315 | mov v0.16b, vctr.16b |
ed6ed118 | 316 | add w8, w6, #2 |
7367bfeb AB |
317 | mov v1.16b, vctr.16b |
318 | add w9, w6, #3 | |
319 | mov v2.16b, vctr.16b | |
ed6ed118 | 320 | add w9, w6, #3 |
ed6ed118 | 321 | rev w7, w7 |
7367bfeb | 322 | mov v3.16b, vctr.16b |
ed6ed118 | 323 | rev w8, w8 |
7367bfeb | 324 | ST5( mov v4.16b, vctr.16b ) |
ed6ed118 AB |
325 | mov v1.s[3], w7 |
326 | rev w9, w9 | |
7367bfeb | 327 | ST5( add w10, w6, #4 ) |
ed6ed118 | 328 | mov v2.s[3], w8 |
7367bfeb | 329 | ST5( rev w10, w10 ) |
ed6ed118 | 330 | mov v3.s[3], w9 |
7367bfeb | 331 | ST5( mov v4.s[3], w10 ) |
6e7de6af | 332 | ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ |
7367bfeb AB |
333 | ST4( bl aes_encrypt_block4x ) |
334 | ST5( bl aes_encrypt_block5x ) | |
49788fe2 | 335 | eor v0.16b, v5.16b, v0.16b |
7367bfeb | 336 | ST4( ld1 {v5.16b}, [x1], #16 ) |
49788fe2 | 337 | eor v1.16b, v6.16b, v1.16b |
7367bfeb | 338 | ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) |
49788fe2 AB |
339 | eor v2.16b, v7.16b, v2.16b |
340 | eor v3.16b, v5.16b, v3.16b | |
7367bfeb | 341 | ST5( eor v4.16b, v6.16b, v4.16b ) |
6e7de6af | 342 | st1 {v0.16b-v3.16b}, [x0], #64 |
7367bfeb AB |
343 | ST5( st1 {v4.16b}, [x0], #16 ) |
344 | add x6, x6, #MAX_STRIDE | |
68338174 | 345 | rev x7, x6 |
7367bfeb | 346 | ins vctr.d[1], x7 |
6e7de6af | 347 | cbz w4, .Lctrout |
49788fe2 | 348 | b .LctrloopNx |
49788fe2 | 349 | .Lctr1x: |
7367bfeb | 350 | adds w4, w4, #MAX_STRIDE |
49788fe2 | 351 | beq .Lctrout |
49788fe2 | 352 | .Lctrloop: |
7367bfeb | 353 | mov v0.16b, vctr.16b |
6e7de6af | 354 | encrypt_block v0, w3, x2, x8, w7 |
11e3b725 | 355 | |
68338174 AB |
356 | adds x6, x6, #1 /* increment BE ctr */ |
357 | rev x7, x6 | |
7367bfeb | 358 | ins vctr.d[1], x7 |
11e3b725 AB |
359 | bcs .Lctrcarry /* overflow? */ |
360 | ||
361 | .Lctrcarrydone: | |
6e7de6af | 362 | subs w4, w4, #1 |
ccc5d51e | 363 | bmi .Lctrtailblock /* blocks <0 means tail block */ |
6e7de6af | 364 | ld1 {v3.16b}, [x1], #16 |
49788fe2 | 365 | eor v3.16b, v0.16b, v3.16b |
6e7de6af | 366 | st1 {v3.16b}, [x0], #16 |
11e3b725 AB |
367 | bne .Lctrloop |
368 | ||
369 | .Lctrout: | |
7367bfeb | 370 | st1 {vctr.16b}, [x5] /* return next CTR value */ |
6e7de6af | 371 | ldp x29, x30, [sp], #16 |
11e3b725 AB |
372 | ret |
373 | ||
ccc5d51e | 374 | .Lctrtailblock: |
6e7de6af | 375 | st1 {v0.16b}, [x0] |
fa5fd3af | 376 | b .Lctrout |
11e3b725 AB |
377 | |
378 | .Lctrcarry: | |
7367bfeb | 379 | umov x7, vctr.d[0] /* load upper word of ctr */ |
11e3b725 AB |
380 | rev x7, x7 /* ... to handle the carry */ |
381 | add x7, x7, #1 | |
382 | rev x7, x7 | |
7367bfeb | 383 | ins vctr.d[0], x7 |
11e3b725 | 384 | b .Lctrcarrydone |
49788fe2 | 385 | AES_ENDPROC(aes_ctr_encrypt) |
49788fe2 AB |
386 | |
387 | ||
388 | /* | |
389 | * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | |
390 | * int blocks, u8 const rk2[], u8 iv[], int first) | |
391 | * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | |
392 | * int blocks, u8 const rk2[], u8 iv[], int first) | |
393 | */ | |
394 | ||
2e5d2f33 | 395 | .macro next_tweak, out, in, tmp |
49788fe2 | 396 | sshr \tmp\().2d, \in\().2d, #63 |
2e5d2f33 | 397 | and \tmp\().16b, \tmp\().16b, xtsmask.16b |
49788fe2 AB |
398 | add \out\().2d, \in\().2d, \in\().2d |
399 | ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 | |
400 | eor \out\().16b, \out\().16b, \tmp\().16b | |
401 | .endm | |
402 | ||
2e5d2f33 AB |
403 | .macro xts_load_mask, tmp |
404 | movi xtsmask.2s, #0x1 | |
405 | movi \tmp\().2s, #0x87 | |
406 | uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s | |
407 | .endm | |
49788fe2 AB |
408 | |
409 | AES_ENTRY(aes_xts_encrypt) | |
6e7de6af AB |
410 | stp x29, x30, [sp, #-16]! |
411 | mov x29, sp | |
55868b45 | 412 | |
6e7de6af | 413 | ld1 {v4.16b}, [x6] |
cc3cc489 | 414 | xts_load_mask v8 |
68338174 AB |
415 | cbz w7, .Lxtsencnotfirst |
416 | ||
417 | enc_prepare w3, x5, x8 | |
418 | encrypt_block v4, w3, x5, x8, w7 /* first tweak */ | |
419 | enc_switch_key w3, x2, x8 | |
49788fe2 AB |
420 | b .LxtsencNx |
421 | ||
68338174 | 422 | .Lxtsencnotfirst: |
6e7de6af | 423 | enc_prepare w3, x2, x8 |
49788fe2 | 424 | .LxtsencloopNx: |
2e5d2f33 | 425 | next_tweak v4, v4, v8 |
49788fe2 | 426 | .LxtsencNx: |
6e7de6af | 427 | subs w4, w4, #4 |
49788fe2 | 428 | bmi .Lxtsenc1x |
6e7de6af | 429 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ |
2e5d2f33 | 430 | next_tweak v5, v4, v8 |
49788fe2 | 431 | eor v0.16b, v0.16b, v4.16b |
2e5d2f33 | 432 | next_tweak v6, v5, v8 |
49788fe2 AB |
433 | eor v1.16b, v1.16b, v5.16b |
434 | eor v2.16b, v2.16b, v6.16b | |
2e5d2f33 | 435 | next_tweak v7, v6, v8 |
49788fe2 | 436 | eor v3.16b, v3.16b, v7.16b |
55868b45 | 437 | bl aes_encrypt_block4x |
49788fe2 AB |
438 | eor v3.16b, v3.16b, v7.16b |
439 | eor v0.16b, v0.16b, v4.16b | |
440 | eor v1.16b, v1.16b, v5.16b | |
441 | eor v2.16b, v2.16b, v6.16b | |
6e7de6af | 442 | st1 {v0.16b-v3.16b}, [x0], #64 |
49788fe2 | 443 | mov v4.16b, v7.16b |
6e7de6af | 444 | cbz w4, .Lxtsencout |
cc3cc489 | 445 | xts_reload_mask v8 |
49788fe2 | 446 | b .LxtsencloopNx |
49788fe2 | 447 | .Lxtsenc1x: |
6e7de6af | 448 | adds w4, w4, #4 |
49788fe2 | 449 | beq .Lxtsencout |
49788fe2 | 450 | .Lxtsencloop: |
6e7de6af | 451 | ld1 {v1.16b}, [x1], #16 |
49788fe2 | 452 | eor v0.16b, v1.16b, v4.16b |
6e7de6af | 453 | encrypt_block v0, w3, x2, x8, w7 |
49788fe2 | 454 | eor v0.16b, v0.16b, v4.16b |
6e7de6af AB |
455 | st1 {v0.16b}, [x0], #16 |
456 | subs w4, w4, #1 | |
49788fe2 | 457 | beq .Lxtsencout |
2e5d2f33 | 458 | next_tweak v4, v4, v8 |
49788fe2 AB |
459 | b .Lxtsencloop |
460 | .Lxtsencout: | |
6e7de6af AB |
461 | st1 {v4.16b}, [x6] |
462 | ldp x29, x30, [sp], #16 | |
49788fe2 AB |
463 | ret |
464 | AES_ENDPROC(aes_xts_encrypt) | |
465 | ||
466 | ||
467 | AES_ENTRY(aes_xts_decrypt) | |
6e7de6af AB |
468 | stp x29, x30, [sp, #-16]! |
469 | mov x29, sp | |
55868b45 | 470 | |
6e7de6af | 471 | ld1 {v4.16b}, [x6] |
cc3cc489 | 472 | xts_load_mask v8 |
68338174 AB |
473 | cbz w7, .Lxtsdecnotfirst |
474 | ||
475 | enc_prepare w3, x5, x8 | |
476 | encrypt_block v4, w3, x5, x8, w7 /* first tweak */ | |
477 | dec_prepare w3, x2, x8 | |
49788fe2 AB |
478 | b .LxtsdecNx |
479 | ||
68338174 | 480 | .Lxtsdecnotfirst: |
6e7de6af | 481 | dec_prepare w3, x2, x8 |
49788fe2 | 482 | .LxtsdecloopNx: |
2e5d2f33 | 483 | next_tweak v4, v4, v8 |
49788fe2 | 484 | .LxtsdecNx: |
6e7de6af | 485 | subs w4, w4, #4 |
49788fe2 | 486 | bmi .Lxtsdec1x |
6e7de6af | 487 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ |
2e5d2f33 | 488 | next_tweak v5, v4, v8 |
49788fe2 | 489 | eor v0.16b, v0.16b, v4.16b |
2e5d2f33 | 490 | next_tweak v6, v5, v8 |
49788fe2 AB |
491 | eor v1.16b, v1.16b, v5.16b |
492 | eor v2.16b, v2.16b, v6.16b | |
2e5d2f33 | 493 | next_tweak v7, v6, v8 |
49788fe2 | 494 | eor v3.16b, v3.16b, v7.16b |
55868b45 | 495 | bl aes_decrypt_block4x |
49788fe2 AB |
496 | eor v3.16b, v3.16b, v7.16b |
497 | eor v0.16b, v0.16b, v4.16b | |
498 | eor v1.16b, v1.16b, v5.16b | |
499 | eor v2.16b, v2.16b, v6.16b | |
6e7de6af | 500 | st1 {v0.16b-v3.16b}, [x0], #64 |
49788fe2 | 501 | mov v4.16b, v7.16b |
6e7de6af | 502 | cbz w4, .Lxtsdecout |
cc3cc489 | 503 | xts_reload_mask v8 |
49788fe2 | 504 | b .LxtsdecloopNx |
49788fe2 | 505 | .Lxtsdec1x: |
6e7de6af | 506 | adds w4, w4, #4 |
49788fe2 | 507 | beq .Lxtsdecout |
49788fe2 | 508 | .Lxtsdecloop: |
6e7de6af | 509 | ld1 {v1.16b}, [x1], #16 |
49788fe2 | 510 | eor v0.16b, v1.16b, v4.16b |
6e7de6af | 511 | decrypt_block v0, w3, x2, x8, w7 |
49788fe2 | 512 | eor v0.16b, v0.16b, v4.16b |
6e7de6af AB |
513 | st1 {v0.16b}, [x0], #16 |
514 | subs w4, w4, #1 | |
49788fe2 | 515 | beq .Lxtsdecout |
2e5d2f33 | 516 | next_tweak v4, v4, v8 |
49788fe2 AB |
517 | b .Lxtsdecloop |
518 | .Lxtsdecout: | |
6e7de6af AB |
519 | st1 {v4.16b}, [x6] |
520 | ldp x29, x30, [sp], #16 | |
49788fe2 AB |
521 | ret |
522 | AES_ENDPROC(aes_xts_decrypt) | |
4860620d AB |
523 | |
524 | /* | |
525 | * aes_mac_update(u8 const in[], u32 const rk[], int rounds, | |
526 | * int blocks, u8 dg[], int enc_before, int enc_after) | |
527 | */ | |
528 | AES_ENTRY(aes_mac_update) | |
0c8f838a AB |
529 | frame_push 6 |
530 | ||
531 | mov x19, x0 | |
532 | mov x20, x1 | |
533 | mov x21, x2 | |
534 | mov x22, x3 | |
535 | mov x23, x4 | |
536 | mov x24, x6 | |
537 | ||
538 | ld1 {v0.16b}, [x23] /* get dg */ | |
4860620d | 539 | enc_prepare w2, x1, x7 |
870c163a | 540 | cbz w5, .Lmacloop4x |
4860620d | 541 | |
870c163a AB |
542 | encrypt_block v0, w2, x1, x7, w8 |
543 | ||
544 | .Lmacloop4x: | |
0c8f838a | 545 | subs w22, w22, #4 |
870c163a | 546 | bmi .Lmac1x |
0c8f838a | 547 | ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ |
870c163a | 548 | eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ |
0c8f838a | 549 | encrypt_block v0, w21, x20, x7, w8 |
870c163a | 550 | eor v0.16b, v0.16b, v2.16b |
0c8f838a | 551 | encrypt_block v0, w21, x20, x7, w8 |
870c163a | 552 | eor v0.16b, v0.16b, v3.16b |
0c8f838a | 553 | encrypt_block v0, w21, x20, x7, w8 |
870c163a | 554 | eor v0.16b, v0.16b, v4.16b |
0c8f838a AB |
555 | cmp w22, wzr |
556 | csinv x5, x24, xzr, eq | |
870c163a | 557 | cbz w5, .Lmacout |
0c8f838a AB |
558 | encrypt_block v0, w21, x20, x7, w8 |
559 | st1 {v0.16b}, [x23] /* return dg */ | |
560 | cond_yield_neon .Lmacrestart | |
870c163a AB |
561 | b .Lmacloop4x |
562 | .Lmac1x: | |
0c8f838a | 563 | add w22, w22, #4 |
4860620d | 564 | .Lmacloop: |
0c8f838a AB |
565 | cbz w22, .Lmacout |
566 | ld1 {v1.16b}, [x19], #16 /* get next pt block */ | |
4860620d AB |
567 | eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ |
568 | ||
0c8f838a AB |
569 | subs w22, w22, #1 |
570 | csinv x5, x24, xzr, eq | |
4860620d AB |
571 | cbz w5, .Lmacout |
572 | ||
0c8f838a AB |
573 | .Lmacenc: |
574 | encrypt_block v0, w21, x20, x7, w8 | |
4860620d AB |
575 | b .Lmacloop |
576 | ||
577 | .Lmacout: | |
0c8f838a AB |
578 | st1 {v0.16b}, [x23] /* return dg */ |
579 | frame_pop | |
4860620d | 580 | ret |
0c8f838a AB |
581 | |
582 | .Lmacrestart: | |
583 | ld1 {v0.16b}, [x23] /* get dg */ | |
584 | enc_prepare w21, x20, x0 | |
585 | b .Lmacloop4x | |
4860620d | 586 | AES_ENDPROC(aes_mac_update) |