Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
49788fe2 AB |
2 | /* |
3 | * linux/arch/arm64/crypto/aes-modes.S - chaining mode wrappers for AES | |
4 | * | |
4860620d | 5 | * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> |
49788fe2 AB |
6 | */ |
7 | ||
8 | /* included by aes-ce.S and aes-neon.S */ | |
9 | ||
10 | .text | |
11 | .align 4 | |
12 | ||
e2174139 AB |
13 | #ifndef MAX_STRIDE |
14 | #define MAX_STRIDE 4 | |
15 | #endif | |
16 | ||
7367bfeb AB |
17 | #if MAX_STRIDE == 4 |
18 | #define ST4(x...) x | |
19 | #define ST5(x...) | |
20 | #else | |
21 | #define ST4(x...) | |
22 | #define ST5(x...) x | |
23 | #endif | |
24 | ||
49788fe2 | 25 | aes_encrypt_block4x: |
6e7de6af | 26 | encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 |
49788fe2 AB |
27 | ret |
28 | ENDPROC(aes_encrypt_block4x) | |
29 | ||
30 | aes_decrypt_block4x: | |
6e7de6af | 31 | decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 |
49788fe2 AB |
32 | ret |
33 | ENDPROC(aes_decrypt_block4x) | |
34 | ||
e2174139 AB |
35 | #if MAX_STRIDE == 5 |
36 | aes_encrypt_block5x: | |
37 | encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 | |
38 | ret | |
39 | ENDPROC(aes_encrypt_block5x) | |
40 | ||
41 | aes_decrypt_block5x: | |
42 | decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7 | |
43 | ret | |
44 | ENDPROC(aes_decrypt_block5x) | |
45 | #endif | |
46 | ||
49788fe2 AB |
47 | /* |
48 | * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
68338174 | 49 | * int blocks) |
49788fe2 | 50 | * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
68338174 | 51 | * int blocks) |
49788fe2 AB |
52 | */ |
53 | ||
54 | AES_ENTRY(aes_ecb_encrypt) | |
6e7de6af AB |
55 | stp x29, x30, [sp, #-16]! |
56 | mov x29, sp | |
49788fe2 | 57 | |
6e7de6af | 58 | enc_prepare w3, x2, x5 |
49788fe2 AB |
59 | |
60 | .LecbencloopNx: | |
7367bfeb | 61 | subs w4, w4, #MAX_STRIDE |
49788fe2 | 62 | bmi .Lecbenc1x |
6e7de6af | 63 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ |
7367bfeb AB |
64 | ST4( bl aes_encrypt_block4x ) |
65 | ST5( ld1 {v4.16b}, [x1], #16 ) | |
66 | ST5( bl aes_encrypt_block5x ) | |
6e7de6af | 67 | st1 {v0.16b-v3.16b}, [x0], #64 |
7367bfeb | 68 | ST5( st1 {v4.16b}, [x0], #16 ) |
49788fe2 AB |
69 | b .LecbencloopNx |
70 | .Lecbenc1x: | |
7367bfeb | 71 | adds w4, w4, #MAX_STRIDE |
49788fe2 | 72 | beq .Lecbencout |
49788fe2 | 73 | .Lecbencloop: |
6e7de6af AB |
74 | ld1 {v0.16b}, [x1], #16 /* get next pt block */ |
75 | encrypt_block v0, w3, x2, x5, w6 | |
76 | st1 {v0.16b}, [x0], #16 | |
77 | subs w4, w4, #1 | |
49788fe2 AB |
78 | bne .Lecbencloop |
79 | .Lecbencout: | |
6e7de6af | 80 | ldp x29, x30, [sp], #16 |
49788fe2 AB |
81 | ret |
82 | AES_ENDPROC(aes_ecb_encrypt) | |
83 | ||
84 | ||
85 | AES_ENTRY(aes_ecb_decrypt) | |
6e7de6af AB |
86 | stp x29, x30, [sp, #-16]! |
87 | mov x29, sp | |
0c8f838a | 88 | |
6e7de6af | 89 | dec_prepare w3, x2, x5 |
49788fe2 AB |
90 | |
91 | .LecbdecloopNx: | |
7367bfeb | 92 | subs w4, w4, #MAX_STRIDE |
49788fe2 | 93 | bmi .Lecbdec1x |
6e7de6af | 94 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ |
7367bfeb AB |
95 | ST4( bl aes_decrypt_block4x ) |
96 | ST5( ld1 {v4.16b}, [x1], #16 ) | |
97 | ST5( bl aes_decrypt_block5x ) | |
6e7de6af | 98 | st1 {v0.16b-v3.16b}, [x0], #64 |
7367bfeb | 99 | ST5( st1 {v4.16b}, [x0], #16 ) |
49788fe2 AB |
100 | b .LecbdecloopNx |
101 | .Lecbdec1x: | |
7367bfeb | 102 | adds w4, w4, #MAX_STRIDE |
49788fe2 | 103 | beq .Lecbdecout |
49788fe2 | 104 | .Lecbdecloop: |
6e7de6af AB |
105 | ld1 {v0.16b}, [x1], #16 /* get next ct block */ |
106 | decrypt_block v0, w3, x2, x5, w6 | |
107 | st1 {v0.16b}, [x0], #16 | |
108 | subs w4, w4, #1 | |
49788fe2 AB |
109 | bne .Lecbdecloop |
110 | .Lecbdecout: | |
6e7de6af | 111 | ldp x29, x30, [sp], #16 |
49788fe2 AB |
112 | ret |
113 | AES_ENDPROC(aes_ecb_decrypt) | |
114 | ||
115 | ||
116 | /* | |
117 | * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
68338174 | 118 | * int blocks, u8 iv[]) |
49788fe2 | 119 | * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, |
68338174 | 120 | * int blocks, u8 iv[]) |
735177ca AB |
121 | * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[], |
122 | * int rounds, int blocks, u8 iv[], | |
123 | * u32 const rk2[]); | |
124 | * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[], | |
125 | * int rounds, int blocks, u8 iv[], | |
126 | * u32 const rk2[]); | |
49788fe2 AB |
127 | */ |
128 | ||
735177ca AB |
129 | AES_ENTRY(aes_essiv_cbc_encrypt) |
130 | ld1 {v4.16b}, [x5] /* get iv */ | |
131 | ||
132 | mov w8, #14 /* AES-256: 14 rounds */ | |
133 | enc_prepare w8, x6, x7 | |
134 | encrypt_block v4, w8, x6, x7, w9 | |
135 | enc_switch_key w3, x2, x6 | |
136 | b .Lcbcencloop4x | |
137 | ||
49788fe2 | 138 | AES_ENTRY(aes_cbc_encrypt) |
6e7de6af AB |
139 | ld1 {v4.16b}, [x5] /* get iv */ |
140 | enc_prepare w3, x2, x6 | |
49788fe2 | 141 | |
a8f8a69e | 142 | .Lcbcencloop4x: |
6e7de6af | 143 | subs w4, w4, #4 |
a8f8a69e | 144 | bmi .Lcbcenc1x |
6e7de6af | 145 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ |
a8f8a69e | 146 | eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ |
6e7de6af | 147 | encrypt_block v0, w3, x2, x6, w7 |
a8f8a69e | 148 | eor v1.16b, v1.16b, v0.16b |
6e7de6af | 149 | encrypt_block v1, w3, x2, x6, w7 |
a8f8a69e | 150 | eor v2.16b, v2.16b, v1.16b |
6e7de6af | 151 | encrypt_block v2, w3, x2, x6, w7 |
a8f8a69e | 152 | eor v3.16b, v3.16b, v2.16b |
6e7de6af AB |
153 | encrypt_block v3, w3, x2, x6, w7 |
154 | st1 {v0.16b-v3.16b}, [x0], #64 | |
a8f8a69e AB |
155 | mov v4.16b, v3.16b |
156 | b .Lcbcencloop4x | |
157 | .Lcbcenc1x: | |
6e7de6af | 158 | adds w4, w4, #4 |
a8f8a69e AB |
159 | beq .Lcbcencout |
160 | .Lcbcencloop: | |
6e7de6af | 161 | ld1 {v0.16b}, [x1], #16 /* get next pt block */ |
a8f8a69e | 162 | eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ |
6e7de6af AB |
163 | encrypt_block v4, w3, x2, x6, w7 |
164 | st1 {v4.16b}, [x0], #16 | |
165 | subs w4, w4, #1 | |
49788fe2 | 166 | bne .Lcbcencloop |
a8f8a69e | 167 | .Lcbcencout: |
6e7de6af | 168 | st1 {v4.16b}, [x5] /* return iv */ |
49788fe2 AB |
169 | ret |
170 | AES_ENDPROC(aes_cbc_encrypt) | |
735177ca AB |
171 | AES_ENDPROC(aes_essiv_cbc_encrypt) |
172 | ||
173 | AES_ENTRY(aes_essiv_cbc_decrypt) | |
174 | stp x29, x30, [sp, #-16]! | |
175 | mov x29, sp | |
176 | ||
177 | ld1 {cbciv.16b}, [x5] /* get iv */ | |
49788fe2 | 178 | |
735177ca AB |
179 | mov w8, #14 /* AES-256: 14 rounds */ |
180 | enc_prepare w8, x6, x7 | |
181 | encrypt_block cbciv, w8, x6, x7, w9 | |
182 | b .Lessivcbcdecstart | |
49788fe2 AB |
183 | |
184 | AES_ENTRY(aes_cbc_decrypt) | |
6e7de6af AB |
185 | stp x29, x30, [sp, #-16]! |
186 | mov x29, sp | |
49788fe2 | 187 | |
7367bfeb | 188 | ld1 {cbciv.16b}, [x5] /* get iv */ |
735177ca | 189 | .Lessivcbcdecstart: |
6e7de6af | 190 | dec_prepare w3, x2, x6 |
49788fe2 AB |
191 | |
192 | .LcbcdecloopNx: | |
7367bfeb | 193 | subs w4, w4, #MAX_STRIDE |
49788fe2 | 194 | bmi .Lcbcdec1x |
6e7de6af | 195 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ |
7367bfeb AB |
196 | #if MAX_STRIDE == 5 |
197 | ld1 {v4.16b}, [x1], #16 /* get 1 ct block */ | |
198 | mov v5.16b, v0.16b | |
199 | mov v6.16b, v1.16b | |
200 | mov v7.16b, v2.16b | |
201 | bl aes_decrypt_block5x | |
202 | sub x1, x1, #32 | |
203 | eor v0.16b, v0.16b, cbciv.16b | |
204 | eor v1.16b, v1.16b, v5.16b | |
205 | ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */ | |
206 | ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ | |
207 | eor v2.16b, v2.16b, v6.16b | |
208 | eor v3.16b, v3.16b, v7.16b | |
209 | eor v4.16b, v4.16b, v5.16b | |
210 | #else | |
49788fe2 AB |
211 | mov v4.16b, v0.16b |
212 | mov v5.16b, v1.16b | |
213 | mov v6.16b, v2.16b | |
55868b45 | 214 | bl aes_decrypt_block4x |
6e7de6af | 215 | sub x1, x1, #16 |
7367bfeb | 216 | eor v0.16b, v0.16b, cbciv.16b |
49788fe2 | 217 | eor v1.16b, v1.16b, v4.16b |
7367bfeb | 218 | ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */ |
49788fe2 AB |
219 | eor v2.16b, v2.16b, v5.16b |
220 | eor v3.16b, v3.16b, v6.16b | |
7367bfeb | 221 | #endif |
6e7de6af | 222 | st1 {v0.16b-v3.16b}, [x0], #64 |
7367bfeb | 223 | ST5( st1 {v4.16b}, [x0], #16 ) |
49788fe2 AB |
224 | b .LcbcdecloopNx |
225 | .Lcbcdec1x: | |
7367bfeb | 226 | adds w4, w4, #MAX_STRIDE |
49788fe2 | 227 | beq .Lcbcdecout |
49788fe2 | 228 | .Lcbcdecloop: |
6e7de6af | 229 | ld1 {v1.16b}, [x1], #16 /* get next ct block */ |
49788fe2 | 230 | mov v0.16b, v1.16b /* ...and copy to v0 */ |
6e7de6af | 231 | decrypt_block v0, w3, x2, x6, w7 |
7367bfeb AB |
232 | eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */ |
233 | mov cbciv.16b, v1.16b /* ct is next iv */ | |
6e7de6af AB |
234 | st1 {v0.16b}, [x0], #16 |
235 | subs w4, w4, #1 | |
49788fe2 AB |
236 | bne .Lcbcdecloop |
237 | .Lcbcdecout: | |
7367bfeb | 238 | st1 {cbciv.16b}, [x5] /* return iv */ |
6e7de6af | 239 | ldp x29, x30, [sp], #16 |
49788fe2 AB |
240 | ret |
241 | AES_ENDPROC(aes_cbc_decrypt) | |
735177ca | 242 | AES_ENDPROC(aes_essiv_cbc_decrypt) |
49788fe2 AB |
243 | |
244 | ||
dd597fb3 AB |
245 | /* |
246 | * aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[], | |
247 | * int rounds, int bytes, u8 const iv[]) | |
248 | * aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], | |
249 | * int rounds, int bytes, u8 const iv[]) | |
250 | */ | |
251 | ||
252 | AES_ENTRY(aes_cbc_cts_encrypt) | |
253 | adr_l x8, .Lcts_permute_table | |
254 | sub x4, x4, #16 | |
255 | add x9, x8, #32 | |
256 | add x8, x8, x4 | |
257 | sub x9, x9, x4 | |
258 | ld1 {v3.16b}, [x8] | |
259 | ld1 {v4.16b}, [x9] | |
260 | ||
261 | ld1 {v0.16b}, [x1], x4 /* overlapping loads */ | |
262 | ld1 {v1.16b}, [x1] | |
263 | ||
264 | ld1 {v5.16b}, [x5] /* get iv */ | |
265 | enc_prepare w3, x2, x6 | |
266 | ||
267 | eor v0.16b, v0.16b, v5.16b /* xor with iv */ | |
268 | tbl v1.16b, {v1.16b}, v4.16b | |
269 | encrypt_block v0, w3, x2, x6, w7 | |
270 | ||
271 | eor v1.16b, v1.16b, v0.16b | |
272 | tbl v0.16b, {v0.16b}, v3.16b | |
273 | encrypt_block v1, w3, x2, x6, w7 | |
274 | ||
275 | add x4, x0, x4 | |
276 | st1 {v0.16b}, [x4] /* overlapping stores */ | |
277 | st1 {v1.16b}, [x0] | |
278 | ret | |
279 | AES_ENDPROC(aes_cbc_cts_encrypt) | |
280 | ||
281 | AES_ENTRY(aes_cbc_cts_decrypt) | |
282 | adr_l x8, .Lcts_permute_table | |
283 | sub x4, x4, #16 | |
284 | add x9, x8, #32 | |
285 | add x8, x8, x4 | |
286 | sub x9, x9, x4 | |
287 | ld1 {v3.16b}, [x8] | |
288 | ld1 {v4.16b}, [x9] | |
289 | ||
290 | ld1 {v0.16b}, [x1], x4 /* overlapping loads */ | |
291 | ld1 {v1.16b}, [x1] | |
292 | ||
293 | ld1 {v5.16b}, [x5] /* get iv */ | |
294 | dec_prepare w3, x2, x6 | |
295 | ||
dd597fb3 | 296 | decrypt_block v0, w3, x2, x6, w7 |
0cfd507c AB |
297 | tbl v2.16b, {v0.16b}, v3.16b |
298 | eor v2.16b, v2.16b, v1.16b | |
dd597fb3 AB |
299 | |
300 | tbx v0.16b, {v1.16b}, v4.16b | |
dd597fb3 AB |
301 | decrypt_block v0, w3, x2, x6, w7 |
302 | eor v0.16b, v0.16b, v5.16b /* xor with iv */ | |
303 | ||
304 | add x4, x0, x4 | |
305 | st1 {v2.16b}, [x4] /* overlapping stores */ | |
306 | st1 {v0.16b}, [x0] | |
307 | ret | |
308 | AES_ENDPROC(aes_cbc_cts_decrypt) | |
309 | ||
310 | .section ".rodata", "a" | |
311 | .align 6 | |
312 | .Lcts_permute_table: | |
313 | .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff | |
314 | .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff | |
315 | .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 | |
316 | .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf | |
317 | .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff | |
318 | .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff | |
319 | .previous | |
320 | ||
321 | ||
49788fe2 AB |
322 | /* |
323 | * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, | |
68338174 | 324 | * int blocks, u8 ctr[]) |
49788fe2 AB |
325 | */ |
326 | ||
327 | AES_ENTRY(aes_ctr_encrypt) | |
6e7de6af AB |
328 | stp x29, x30, [sp, #-16]! |
329 | mov x29, sp | |
68338174 | 330 | |
6e7de6af | 331 | enc_prepare w3, x2, x6 |
7367bfeb | 332 | ld1 {vctr.16b}, [x5] |
11e3b725 | 333 | |
7367bfeb | 334 | umov x6, vctr.d[1] /* keep swabbed ctr in reg */ |
68338174 | 335 | rev x6, x6 |
6e7de6af AB |
336 | cmn w6, w4 /* 32 bit overflow? */ |
337 | bcs .Lctrloop | |
49788fe2 | 338 | .LctrloopNx: |
7367bfeb | 339 | subs w4, w4, #MAX_STRIDE |
49788fe2 | 340 | bmi .Lctr1x |
ed6ed118 | 341 | add w7, w6, #1 |
7367bfeb | 342 | mov v0.16b, vctr.16b |
ed6ed118 | 343 | add w8, w6, #2 |
7367bfeb AB |
344 | mov v1.16b, vctr.16b |
345 | add w9, w6, #3 | |
346 | mov v2.16b, vctr.16b | |
ed6ed118 | 347 | add w9, w6, #3 |
ed6ed118 | 348 | rev w7, w7 |
7367bfeb | 349 | mov v3.16b, vctr.16b |
ed6ed118 | 350 | rev w8, w8 |
7367bfeb | 351 | ST5( mov v4.16b, vctr.16b ) |
ed6ed118 AB |
352 | mov v1.s[3], w7 |
353 | rev w9, w9 | |
7367bfeb | 354 | ST5( add w10, w6, #4 ) |
ed6ed118 | 355 | mov v2.s[3], w8 |
7367bfeb | 356 | ST5( rev w10, w10 ) |
ed6ed118 | 357 | mov v3.s[3], w9 |
7367bfeb | 358 | ST5( mov v4.s[3], w10 ) |
6e7de6af | 359 | ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ |
7367bfeb AB |
360 | ST4( bl aes_encrypt_block4x ) |
361 | ST5( bl aes_encrypt_block5x ) | |
49788fe2 | 362 | eor v0.16b, v5.16b, v0.16b |
7367bfeb | 363 | ST4( ld1 {v5.16b}, [x1], #16 ) |
49788fe2 | 364 | eor v1.16b, v6.16b, v1.16b |
7367bfeb | 365 | ST5( ld1 {v5.16b-v6.16b}, [x1], #32 ) |
49788fe2 AB |
366 | eor v2.16b, v7.16b, v2.16b |
367 | eor v3.16b, v5.16b, v3.16b | |
7367bfeb | 368 | ST5( eor v4.16b, v6.16b, v4.16b ) |
6e7de6af | 369 | st1 {v0.16b-v3.16b}, [x0], #64 |
7367bfeb AB |
370 | ST5( st1 {v4.16b}, [x0], #16 ) |
371 | add x6, x6, #MAX_STRIDE | |
68338174 | 372 | rev x7, x6 |
7367bfeb | 373 | ins vctr.d[1], x7 |
6e7de6af | 374 | cbz w4, .Lctrout |
49788fe2 | 375 | b .LctrloopNx |
49788fe2 | 376 | .Lctr1x: |
7367bfeb | 377 | adds w4, w4, #MAX_STRIDE |
49788fe2 | 378 | beq .Lctrout |
49788fe2 | 379 | .Lctrloop: |
7367bfeb | 380 | mov v0.16b, vctr.16b |
6e7de6af | 381 | encrypt_block v0, w3, x2, x8, w7 |
11e3b725 | 382 | |
68338174 AB |
383 | adds x6, x6, #1 /* increment BE ctr */ |
384 | rev x7, x6 | |
7367bfeb | 385 | ins vctr.d[1], x7 |
11e3b725 AB |
386 | bcs .Lctrcarry /* overflow? */ |
387 | ||
388 | .Lctrcarrydone: | |
6e7de6af | 389 | subs w4, w4, #1 |
ccc5d51e | 390 | bmi .Lctrtailblock /* blocks <0 means tail block */ |
6e7de6af | 391 | ld1 {v3.16b}, [x1], #16 |
49788fe2 | 392 | eor v3.16b, v0.16b, v3.16b |
6e7de6af | 393 | st1 {v3.16b}, [x0], #16 |
11e3b725 AB |
394 | bne .Lctrloop |
395 | ||
396 | .Lctrout: | |
7367bfeb | 397 | st1 {vctr.16b}, [x5] /* return next CTR value */ |
6e7de6af | 398 | ldp x29, x30, [sp], #16 |
11e3b725 AB |
399 | ret |
400 | ||
ccc5d51e | 401 | .Lctrtailblock: |
6e7de6af | 402 | st1 {v0.16b}, [x0] |
fa5fd3af | 403 | b .Lctrout |
11e3b725 AB |
404 | |
405 | .Lctrcarry: | |
7367bfeb | 406 | umov x7, vctr.d[0] /* load upper word of ctr */ |
11e3b725 AB |
407 | rev x7, x7 /* ... to handle the carry */ |
408 | add x7, x7, #1 | |
409 | rev x7, x7 | |
7367bfeb | 410 | ins vctr.d[0], x7 |
11e3b725 | 411 | b .Lctrcarrydone |
49788fe2 | 412 | AES_ENDPROC(aes_ctr_encrypt) |
49788fe2 AB |
413 | |
414 | ||
415 | /* | |
416 | * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | |
417 | * int blocks, u8 const rk2[], u8 iv[], int first) | |
418 | * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds, | |
419 | * int blocks, u8 const rk2[], u8 iv[], int first) | |
420 | */ | |
421 | ||
2e5d2f33 | 422 | .macro next_tweak, out, in, tmp |
49788fe2 | 423 | sshr \tmp\().2d, \in\().2d, #63 |
2e5d2f33 | 424 | and \tmp\().16b, \tmp\().16b, xtsmask.16b |
49788fe2 AB |
425 | add \out\().2d, \in\().2d, \in\().2d |
426 | ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8 | |
427 | eor \out\().16b, \out\().16b, \tmp\().16b | |
428 | .endm | |
429 | ||
2e5d2f33 AB |
430 | .macro xts_load_mask, tmp |
431 | movi xtsmask.2s, #0x1 | |
432 | movi \tmp\().2s, #0x87 | |
433 | uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s | |
434 | .endm | |
49788fe2 AB |
435 | |
436 | AES_ENTRY(aes_xts_encrypt) | |
6e7de6af AB |
437 | stp x29, x30, [sp, #-16]! |
438 | mov x29, sp | |
55868b45 | 439 | |
6e7de6af | 440 | ld1 {v4.16b}, [x6] |
cc3cc489 | 441 | xts_load_mask v8 |
68338174 AB |
442 | cbz w7, .Lxtsencnotfirst |
443 | ||
444 | enc_prepare w3, x5, x8 | |
445 | encrypt_block v4, w3, x5, x8, w7 /* first tweak */ | |
446 | enc_switch_key w3, x2, x8 | |
49788fe2 AB |
447 | b .LxtsencNx |
448 | ||
68338174 | 449 | .Lxtsencnotfirst: |
6e7de6af | 450 | enc_prepare w3, x2, x8 |
49788fe2 | 451 | .LxtsencloopNx: |
2e5d2f33 | 452 | next_tweak v4, v4, v8 |
49788fe2 | 453 | .LxtsencNx: |
6e7de6af | 454 | subs w4, w4, #4 |
49788fe2 | 455 | bmi .Lxtsenc1x |
6e7de6af | 456 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ |
2e5d2f33 | 457 | next_tweak v5, v4, v8 |
49788fe2 | 458 | eor v0.16b, v0.16b, v4.16b |
2e5d2f33 | 459 | next_tweak v6, v5, v8 |
49788fe2 AB |
460 | eor v1.16b, v1.16b, v5.16b |
461 | eor v2.16b, v2.16b, v6.16b | |
2e5d2f33 | 462 | next_tweak v7, v6, v8 |
49788fe2 | 463 | eor v3.16b, v3.16b, v7.16b |
55868b45 | 464 | bl aes_encrypt_block4x |
49788fe2 AB |
465 | eor v3.16b, v3.16b, v7.16b |
466 | eor v0.16b, v0.16b, v4.16b | |
467 | eor v1.16b, v1.16b, v5.16b | |
468 | eor v2.16b, v2.16b, v6.16b | |
6e7de6af | 469 | st1 {v0.16b-v3.16b}, [x0], #64 |
49788fe2 | 470 | mov v4.16b, v7.16b |
6e7de6af | 471 | cbz w4, .Lxtsencout |
cc3cc489 | 472 | xts_reload_mask v8 |
49788fe2 | 473 | b .LxtsencloopNx |
49788fe2 | 474 | .Lxtsenc1x: |
6e7de6af | 475 | adds w4, w4, #4 |
49788fe2 | 476 | beq .Lxtsencout |
49788fe2 | 477 | .Lxtsencloop: |
6e7de6af | 478 | ld1 {v1.16b}, [x1], #16 |
49788fe2 | 479 | eor v0.16b, v1.16b, v4.16b |
6e7de6af | 480 | encrypt_block v0, w3, x2, x8, w7 |
49788fe2 | 481 | eor v0.16b, v0.16b, v4.16b |
6e7de6af AB |
482 | st1 {v0.16b}, [x0], #16 |
483 | subs w4, w4, #1 | |
49788fe2 | 484 | beq .Lxtsencout |
2e5d2f33 | 485 | next_tweak v4, v4, v8 |
49788fe2 AB |
486 | b .Lxtsencloop |
487 | .Lxtsencout: | |
6e7de6af AB |
488 | st1 {v4.16b}, [x6] |
489 | ldp x29, x30, [sp], #16 | |
49788fe2 AB |
490 | ret |
491 | AES_ENDPROC(aes_xts_encrypt) | |
492 | ||
493 | ||
494 | AES_ENTRY(aes_xts_decrypt) | |
6e7de6af AB |
495 | stp x29, x30, [sp, #-16]! |
496 | mov x29, sp | |
55868b45 | 497 | |
6e7de6af | 498 | ld1 {v4.16b}, [x6] |
cc3cc489 | 499 | xts_load_mask v8 |
68338174 AB |
500 | cbz w7, .Lxtsdecnotfirst |
501 | ||
502 | enc_prepare w3, x5, x8 | |
503 | encrypt_block v4, w3, x5, x8, w7 /* first tweak */ | |
504 | dec_prepare w3, x2, x8 | |
49788fe2 AB |
505 | b .LxtsdecNx |
506 | ||
68338174 | 507 | .Lxtsdecnotfirst: |
6e7de6af | 508 | dec_prepare w3, x2, x8 |
49788fe2 | 509 | .LxtsdecloopNx: |
2e5d2f33 | 510 | next_tweak v4, v4, v8 |
49788fe2 | 511 | .LxtsdecNx: |
6e7de6af | 512 | subs w4, w4, #4 |
49788fe2 | 513 | bmi .Lxtsdec1x |
6e7de6af | 514 | ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ |
2e5d2f33 | 515 | next_tweak v5, v4, v8 |
49788fe2 | 516 | eor v0.16b, v0.16b, v4.16b |
2e5d2f33 | 517 | next_tweak v6, v5, v8 |
49788fe2 AB |
518 | eor v1.16b, v1.16b, v5.16b |
519 | eor v2.16b, v2.16b, v6.16b | |
2e5d2f33 | 520 | next_tweak v7, v6, v8 |
49788fe2 | 521 | eor v3.16b, v3.16b, v7.16b |
55868b45 | 522 | bl aes_decrypt_block4x |
49788fe2 AB |
523 | eor v3.16b, v3.16b, v7.16b |
524 | eor v0.16b, v0.16b, v4.16b | |
525 | eor v1.16b, v1.16b, v5.16b | |
526 | eor v2.16b, v2.16b, v6.16b | |
6e7de6af | 527 | st1 {v0.16b-v3.16b}, [x0], #64 |
49788fe2 | 528 | mov v4.16b, v7.16b |
6e7de6af | 529 | cbz w4, .Lxtsdecout |
cc3cc489 | 530 | xts_reload_mask v8 |
49788fe2 | 531 | b .LxtsdecloopNx |
49788fe2 | 532 | .Lxtsdec1x: |
6e7de6af | 533 | adds w4, w4, #4 |
49788fe2 | 534 | beq .Lxtsdecout |
49788fe2 | 535 | .Lxtsdecloop: |
6e7de6af | 536 | ld1 {v1.16b}, [x1], #16 |
49788fe2 | 537 | eor v0.16b, v1.16b, v4.16b |
6e7de6af | 538 | decrypt_block v0, w3, x2, x8, w7 |
49788fe2 | 539 | eor v0.16b, v0.16b, v4.16b |
6e7de6af AB |
540 | st1 {v0.16b}, [x0], #16 |
541 | subs w4, w4, #1 | |
49788fe2 | 542 | beq .Lxtsdecout |
2e5d2f33 | 543 | next_tweak v4, v4, v8 |
49788fe2 AB |
544 | b .Lxtsdecloop |
545 | .Lxtsdecout: | |
6e7de6af AB |
546 | st1 {v4.16b}, [x6] |
547 | ldp x29, x30, [sp], #16 | |
49788fe2 AB |
548 | ret |
549 | AES_ENDPROC(aes_xts_decrypt) | |
4860620d AB |
550 | |
551 | /* | |
552 | * aes_mac_update(u8 const in[], u32 const rk[], int rounds, | |
553 | * int blocks, u8 dg[], int enc_before, int enc_after) | |
554 | */ | |
555 | AES_ENTRY(aes_mac_update) | |
0c8f838a AB |
556 | frame_push 6 |
557 | ||
558 | mov x19, x0 | |
559 | mov x20, x1 | |
560 | mov x21, x2 | |
561 | mov x22, x3 | |
562 | mov x23, x4 | |
563 | mov x24, x6 | |
564 | ||
565 | ld1 {v0.16b}, [x23] /* get dg */ | |
4860620d | 566 | enc_prepare w2, x1, x7 |
870c163a | 567 | cbz w5, .Lmacloop4x |
4860620d | 568 | |
870c163a AB |
569 | encrypt_block v0, w2, x1, x7, w8 |
570 | ||
571 | .Lmacloop4x: | |
0c8f838a | 572 | subs w22, w22, #4 |
870c163a | 573 | bmi .Lmac1x |
0c8f838a | 574 | ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ |
870c163a | 575 | eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ |
0c8f838a | 576 | encrypt_block v0, w21, x20, x7, w8 |
870c163a | 577 | eor v0.16b, v0.16b, v2.16b |
0c8f838a | 578 | encrypt_block v0, w21, x20, x7, w8 |
870c163a | 579 | eor v0.16b, v0.16b, v3.16b |
0c8f838a | 580 | encrypt_block v0, w21, x20, x7, w8 |
870c163a | 581 | eor v0.16b, v0.16b, v4.16b |
0c8f838a AB |
582 | cmp w22, wzr |
583 | csinv x5, x24, xzr, eq | |
870c163a | 584 | cbz w5, .Lmacout |
0c8f838a AB |
585 | encrypt_block v0, w21, x20, x7, w8 |
586 | st1 {v0.16b}, [x23] /* return dg */ | |
587 | cond_yield_neon .Lmacrestart | |
870c163a AB |
588 | b .Lmacloop4x |
589 | .Lmac1x: | |
0c8f838a | 590 | add w22, w22, #4 |
4860620d | 591 | .Lmacloop: |
0c8f838a AB |
592 | cbz w22, .Lmacout |
593 | ld1 {v1.16b}, [x19], #16 /* get next pt block */ | |
4860620d AB |
594 | eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ |
595 | ||
0c8f838a AB |
596 | subs w22, w22, #1 |
597 | csinv x5, x24, xzr, eq | |
4860620d AB |
598 | cbz w5, .Lmacout |
599 | ||
0c8f838a AB |
600 | .Lmacenc: |
601 | encrypt_block v0, w21, x20, x7, w8 | |
4860620d AB |
602 | b .Lmacloop |
603 | ||
604 | .Lmacout: | |
0c8f838a AB |
605 | st1 {v0.16b}, [x23] /* return dg */ |
606 | frame_pop | |
4860620d | 607 | ret |
0c8f838a AB |
608 | |
609 | .Lmacrestart: | |
610 | ld1 {v0.16b}, [x23] /* get dg */ | |
611 | enc_prepare w21, x20, x0 | |
612 | b .Lmacloop4x | |
4860620d | 613 | AES_ENDPROC(aes_mac_update) |