Merge tag 'for-4.18/dm-changes-v2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / arch / arm / crypto / aes-ce-core.S
CommitLineData
86464859
AB
1/*
2 * aes-ce-core.S - AES in CBC/CTR/XTS mode using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14 .text
15 .fpu crypto-neon-fp-armv8
16 .align 3
17
18 .macro enc_round, state, key
19 aese.8 \state, \key
20 aesmc.8 \state, \state
21 .endm
22
23 .macro dec_round, state, key
24 aesd.8 \state, \key
25 aesimc.8 \state, \state
26 .endm
27
28 .macro enc_dround, key1, key2
29 enc_round q0, \key1
30 enc_round q0, \key2
31 .endm
32
33 .macro dec_dround, key1, key2
34 dec_round q0, \key1
35 dec_round q0, \key2
36 .endm
37
38 .macro enc_fround, key1, key2, key3
39 enc_round q0, \key1
40 aese.8 q0, \key2
41 veor q0, q0, \key3
42 .endm
43
44 .macro dec_fround, key1, key2, key3
45 dec_round q0, \key1
46 aesd.8 q0, \key2
47 veor q0, q0, \key3
48 .endm
49
50 .macro enc_dround_3x, key1, key2
51 enc_round q0, \key1
52 enc_round q1, \key1
53 enc_round q2, \key1
54 enc_round q0, \key2
55 enc_round q1, \key2
56 enc_round q2, \key2
57 .endm
58
59 .macro dec_dround_3x, key1, key2
60 dec_round q0, \key1
61 dec_round q1, \key1
62 dec_round q2, \key1
63 dec_round q0, \key2
64 dec_round q1, \key2
65 dec_round q2, \key2
66 .endm
67
68 .macro enc_fround_3x, key1, key2, key3
69 enc_round q0, \key1
70 enc_round q1, \key1
71 enc_round q2, \key1
72 aese.8 q0, \key2
73 aese.8 q1, \key2
74 aese.8 q2, \key2
75 veor q0, q0, \key3
76 veor q1, q1, \key3
77 veor q2, q2, \key3
78 .endm
79
80 .macro dec_fround_3x, key1, key2, key3
81 dec_round q0, \key1
82 dec_round q1, \key1
83 dec_round q2, \key1
84 aesd.8 q0, \key2
85 aesd.8 q1, \key2
86 aesd.8 q2, \key2
87 veor q0, q0, \key3
88 veor q1, q1, \key3
89 veor q2, q2, \key3
90 .endm
91
92 .macro do_block, dround, fround
93 cmp r3, #12 @ which key size?
94 vld1.8 {q10-q11}, [ip]!
95 \dround q8, q9
96 vld1.8 {q12-q13}, [ip]!
97 \dround q10, q11
98 vld1.8 {q10-q11}, [ip]!
99 \dround q12, q13
100 vld1.8 {q12-q13}, [ip]!
101 \dround q10, q11
102 blo 0f @ AES-128: 10 rounds
103 vld1.8 {q10-q11}, [ip]!
86464859 104 \dround q12, q13
6499e8cf 105 beq 1f @ AES-192: 12 rounds
86464859
AB
106 vld1.8 {q12-q13}, [ip]
107 \dround q10, q11
1080: \fround q12, q13, q14
109 bx lr
110
6499e8cf 1111: \fround q10, q11, q14
86464859
AB
112 bx lr
113 .endm
114
115 /*
116 * Internal, non-AAPCS compliant functions that implement the core AES
117 * transforms. These should preserve all registers except q0 - q2 and ip
118 * Arguments:
119 * q0 : first in/output block
120 * q1 : second in/output block (_3x version only)
121 * q2 : third in/output block (_3x version only)
122 * q8 : first round key
123 * q9 : secound round key
86464859 124 * q14 : final round key
6499e8cf 125 * r2 : address of round key array
86464859
AB
126 * r3 : number of rounds
127 */
128 .align 6
129aes_encrypt:
130 add ip, r2, #32 @ 3rd round key
131.Laes_encrypt_tweak:
132 do_block enc_dround, enc_fround
133ENDPROC(aes_encrypt)
134
135 .align 6
136aes_decrypt:
137 add ip, r2, #32 @ 3rd round key
138 do_block dec_dround, dec_fround
139ENDPROC(aes_decrypt)
140
141 .align 6
142aes_encrypt_3x:
143 add ip, r2, #32 @ 3rd round key
144 do_block enc_dround_3x, enc_fround_3x
145ENDPROC(aes_encrypt_3x)
146
147 .align 6
148aes_decrypt_3x:
149 add ip, r2, #32 @ 3rd round key
150 do_block dec_dround_3x, dec_fround_3x
151ENDPROC(aes_decrypt_3x)
152
153 .macro prepare_key, rk, rounds
154 add ip, \rk, \rounds, lsl #4
155 vld1.8 {q8-q9}, [\rk] @ load first 2 round keys
156 vld1.8 {q14}, [ip] @ load last round key
157 .endm
158
159 /*
160 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
161 * int blocks)
162 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
163 * int blocks)
164 */
165ENTRY(ce_aes_ecb_encrypt)
166 push {r4, lr}
167 ldr r4, [sp, #8]
168 prepare_key r2, r3
169.Lecbencloop3x:
170 subs r4, r4, #3
171 bmi .Lecbenc1x
1465fb13
AB
172 vld1.8 {q0-q1}, [r1]!
173 vld1.8 {q2}, [r1]!
86464859 174 bl aes_encrypt_3x
1465fb13
AB
175 vst1.8 {q0-q1}, [r0]!
176 vst1.8 {q2}, [r0]!
86464859
AB
177 b .Lecbencloop3x
178.Lecbenc1x:
179 adds r4, r4, #3
180 beq .Lecbencout
181.Lecbencloop:
1465fb13 182 vld1.8 {q0}, [r1]!
86464859 183 bl aes_encrypt
1465fb13 184 vst1.8 {q0}, [r0]!
86464859
AB
185 subs r4, r4, #1
186 bne .Lecbencloop
187.Lecbencout:
188 pop {r4, pc}
189ENDPROC(ce_aes_ecb_encrypt)
190
191ENTRY(ce_aes_ecb_decrypt)
192 push {r4, lr}
193 ldr r4, [sp, #8]
194 prepare_key r2, r3
195.Lecbdecloop3x:
196 subs r4, r4, #3
197 bmi .Lecbdec1x
1465fb13
AB
198 vld1.8 {q0-q1}, [r1]!
199 vld1.8 {q2}, [r1]!
86464859 200 bl aes_decrypt_3x
1465fb13
AB
201 vst1.8 {q0-q1}, [r0]!
202 vst1.8 {q2}, [r0]!
86464859
AB
203 b .Lecbdecloop3x
204.Lecbdec1x:
205 adds r4, r4, #3
206 beq .Lecbdecout
207.Lecbdecloop:
1465fb13 208 vld1.8 {q0}, [r1]!
86464859 209 bl aes_decrypt
1465fb13 210 vst1.8 {q0}, [r0]!
86464859
AB
211 subs r4, r4, #1
212 bne .Lecbdecloop
213.Lecbdecout:
214 pop {r4, pc}
215ENDPROC(ce_aes_ecb_decrypt)
216
217 /*
218 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
219 * int blocks, u8 iv[])
220 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
221 * int blocks, u8 iv[])
222 */
223ENTRY(ce_aes_cbc_encrypt)
224 push {r4-r6, lr}
225 ldrd r4, r5, [sp, #16]
226 vld1.8 {q0}, [r5]
227 prepare_key r2, r3
228.Lcbcencloop:
1465fb13 229 vld1.8 {q1}, [r1]! @ get next pt block
86464859
AB
230 veor q0, q0, q1 @ ..and xor with iv
231 bl aes_encrypt
1465fb13 232 vst1.8 {q0}, [r0]!
86464859
AB
233 subs r4, r4, #1
234 bne .Lcbcencloop
235 vst1.8 {q0}, [r5]
236 pop {r4-r6, pc}
237ENDPROC(ce_aes_cbc_encrypt)
238
239ENTRY(ce_aes_cbc_decrypt)
240 push {r4-r6, lr}
241 ldrd r4, r5, [sp, #16]
242 vld1.8 {q6}, [r5] @ keep iv in q6
243 prepare_key r2, r3
244.Lcbcdecloop3x:
245 subs r4, r4, #3
246 bmi .Lcbcdec1x
1465fb13
AB
247 vld1.8 {q0-q1}, [r1]!
248 vld1.8 {q2}, [r1]!
86464859
AB
249 vmov q3, q0
250 vmov q4, q1
251 vmov q5, q2
252 bl aes_decrypt_3x
253 veor q0, q0, q6
254 veor q1, q1, q3
255 veor q2, q2, q4
256 vmov q6, q5
1465fb13
AB
257 vst1.8 {q0-q1}, [r0]!
258 vst1.8 {q2}, [r0]!
86464859
AB
259 b .Lcbcdecloop3x
260.Lcbcdec1x:
261 adds r4, r4, #3
262 beq .Lcbcdecout
263 vmov q15, q14 @ preserve last round key
264.Lcbcdecloop:
1465fb13 265 vld1.8 {q0}, [r1]! @ get next ct block
86464859
AB
266 veor q14, q15, q6 @ combine prev ct with last key
267 vmov q6, q0
268 bl aes_decrypt
1465fb13 269 vst1.8 {q0}, [r0]!
86464859
AB
270 subs r4, r4, #1
271 bne .Lcbcdecloop
272.Lcbcdecout:
273 vst1.8 {q6}, [r5] @ keep iv in q6
274 pop {r4-r6, pc}
275ENDPROC(ce_aes_cbc_decrypt)
276
277 /*
278 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
279 * int blocks, u8 ctr[])
280 */
281ENTRY(ce_aes_ctr_encrypt)
282 push {r4-r6, lr}
283 ldrd r4, r5, [sp, #16]
284 vld1.8 {q6}, [r5] @ load ctr
285 prepare_key r2, r3
286 vmov r6, s27 @ keep swabbed ctr in r6
287 rev r6, r6
288 cmn r6, r4 @ 32 bit overflow?
289 bcs .Lctrloop
290.Lctrloop3x:
291 subs r4, r4, #3
292 bmi .Lctr1x
293 add r6, r6, #1
294 vmov q0, q6
295 vmov q1, q6
296 rev ip, r6
297 add r6, r6, #1
298 vmov q2, q6
299 vmov s7, ip
300 rev ip, r6
301 add r6, r6, #1
302 vmov s11, ip
1465fb13
AB
303 vld1.8 {q3-q4}, [r1]!
304 vld1.8 {q5}, [r1]!
86464859
AB
305 bl aes_encrypt_3x
306 veor q0, q0, q3
307 veor q1, q1, q4
308 veor q2, q2, q5
309 rev ip, r6
1465fb13
AB
310 vst1.8 {q0-q1}, [r0]!
311 vst1.8 {q2}, [r0]!
86464859
AB
312 vmov s27, ip
313 b .Lctrloop3x
314.Lctr1x:
315 adds r4, r4, #3
316 beq .Lctrout
317.Lctrloop:
318 vmov q0, q6
319 bl aes_encrypt
320 subs r4, r4, #1
1465fb13
AB
321 bmi .Lctrtailblock @ blocks < 0 means tail block
322 vld1.8 {q3}, [r1]!
86464859 323 veor q3, q0, q3
1465fb13 324 vst1.8 {q3}, [r0]!
86464859
AB
325
326 adds r6, r6, #1 @ increment BE ctr
327 rev ip, r6
328 vmov s27, ip
329 bcs .Lctrcarry
330 teq r4, #0
331 bne .Lctrloop
332.Lctrout:
333 vst1.8 {q6}, [r5]
334 pop {r4-r6, pc}
335
1465fb13
AB
336.Lctrtailblock:
337 vst1.8 {q0}, [r0, :64] @ return just the key stream
86464859
AB
338 pop {r4-r6, pc}
339
340.Lctrcarry:
341 .irp sreg, s26, s25, s24
342 vmov ip, \sreg @ load next word of ctr
343 rev ip, ip @ ... to handle the carry
344 adds ip, ip, #1
345 rev ip, ip
346 vmov \sreg, ip
347 bcc 0f
348 .endr
3490: teq r4, #0
350 beq .Lctrout
351 b .Lctrloop
352ENDPROC(ce_aes_ctr_encrypt)
353
354 /*
355 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
356 * int blocks, u8 iv[], u8 const rk2[], int first)
357 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
358 * int blocks, u8 iv[], u8 const rk2[], int first)
359 */
360
361 .macro next_tweak, out, in, const, tmp
362 vshr.s64 \tmp, \in, #63
363 vand \tmp, \tmp, \const
364 vadd.u64 \out, \in, \in
365 vext.8 \tmp, \tmp, \tmp, #8
366 veor \out, \out, \tmp
367 .endm
368
369 .align 3
370.Lxts_mul_x:
371 .quad 1, 0x87
372
373ce_aes_xts_init:
374 vldr d14, .Lxts_mul_x
375 vldr d15, .Lxts_mul_x + 8
376
377 ldrd r4, r5, [sp, #16] @ load args
378 ldr r6, [sp, #28]
379 vld1.8 {q0}, [r5] @ load iv
380 teq r6, #1 @ start of a block?
381 bxne lr
382
383 @ Encrypt the IV in q0 with the second AES key. This should only
384 @ be done at the start of a block.
385 ldr r6, [sp, #24] @ load AES key 2
386 prepare_key r6, r3
387 add ip, r6, #32 @ 3rd round key of key 2
388 b .Laes_encrypt_tweak @ tail call
389ENDPROC(ce_aes_xts_init)
390
391ENTRY(ce_aes_xts_encrypt)
392 push {r4-r6, lr}
393
394 bl ce_aes_xts_init @ run shared prologue
395 prepare_key r2, r3
396 vmov q3, q0
397
398 teq r6, #0 @ start of a block?
399 bne .Lxtsenc3x
400
401.Lxtsencloop3x:
402 next_tweak q3, q3, q7, q6
403.Lxtsenc3x:
404 subs r4, r4, #3
405 bmi .Lxtsenc1x
1465fb13
AB
406 vld1.8 {q0-q1}, [r1]! @ get 3 pt blocks
407 vld1.8 {q2}, [r1]!
86464859
AB
408 next_tweak q4, q3, q7, q6
409 veor q0, q0, q3
410 next_tweak q5, q4, q7, q6
411 veor q1, q1, q4
412 veor q2, q2, q5
413 bl aes_encrypt_3x
414 veor q0, q0, q3
415 veor q1, q1, q4
416 veor q2, q2, q5
1465fb13
AB
417 vst1.8 {q0-q1}, [r0]! @ write 3 ct blocks
418 vst1.8 {q2}, [r0]!
86464859
AB
419 vmov q3, q5
420 teq r4, #0
421 beq .Lxtsencout
422 b .Lxtsencloop3x
423.Lxtsenc1x:
424 adds r4, r4, #3
425 beq .Lxtsencout
426.Lxtsencloop:
1465fb13 427 vld1.8 {q0}, [r1]!
86464859
AB
428 veor q0, q0, q3
429 bl aes_encrypt
430 veor q0, q0, q3
1465fb13 431 vst1.8 {q0}, [r0]!
86464859
AB
432 subs r4, r4, #1
433 beq .Lxtsencout
434 next_tweak q3, q3, q7, q6
435 b .Lxtsencloop
436.Lxtsencout:
437 vst1.8 {q3}, [r5]
438 pop {r4-r6, pc}
439ENDPROC(ce_aes_xts_encrypt)
440
441
442ENTRY(ce_aes_xts_decrypt)
443 push {r4-r6, lr}
444
445 bl ce_aes_xts_init @ run shared prologue
446 prepare_key r2, r3
447 vmov q3, q0
448
449 teq r6, #0 @ start of a block?
450 bne .Lxtsdec3x
451
452.Lxtsdecloop3x:
453 next_tweak q3, q3, q7, q6
454.Lxtsdec3x:
455 subs r4, r4, #3
456 bmi .Lxtsdec1x
1465fb13
AB
457 vld1.8 {q0-q1}, [r1]! @ get 3 ct blocks
458 vld1.8 {q2}, [r1]!
86464859
AB
459 next_tweak q4, q3, q7, q6
460 veor q0, q0, q3
461 next_tweak q5, q4, q7, q6
462 veor q1, q1, q4
463 veor q2, q2, q5
464 bl aes_decrypt_3x
465 veor q0, q0, q3
466 veor q1, q1, q4
467 veor q2, q2, q5
1465fb13
AB
468 vst1.8 {q0-q1}, [r0]! @ write 3 pt blocks
469 vst1.8 {q2}, [r0]!
86464859
AB
470 vmov q3, q5
471 teq r4, #0
472 beq .Lxtsdecout
473 b .Lxtsdecloop3x
474.Lxtsdec1x:
475 adds r4, r4, #3
476 beq .Lxtsdecout
477.Lxtsdecloop:
1465fb13 478 vld1.8 {q0}, [r1]!
86464859
AB
479 veor q0, q0, q3
480 add ip, r2, #32 @ 3rd round key
481 bl aes_decrypt
482 veor q0, q0, q3
1465fb13 483 vst1.8 {q0}, [r0]!
86464859
AB
484 subs r4, r4, #1
485 beq .Lxtsdecout
486 next_tweak q3, q3, q7, q6
487 b .Lxtsdecloop
488.Lxtsdecout:
489 vst1.8 {q3}, [r5]
490 pop {r4-r6, pc}
491ENDPROC(ce_aes_xts_decrypt)
492
493 /*
494 * u32 ce_aes_sub(u32 input) - use the aese instruction to perform the
495 * AES sbox substitution on each byte in
496 * 'input'
497 */
498ENTRY(ce_aes_sub)
499 vdup.32 q1, r0
500 veor q0, q0, q0
501 aese.8 q0, q1
502 vmov r0, s0
503 bx lr
504ENDPROC(ce_aes_sub)
505
506 /*
507 * void ce_aes_invert(u8 *dst, u8 *src) - perform the Inverse MixColumns
508 * operation on round key *src
509 */
510ENTRY(ce_aes_invert)
511 vld1.8 {q0}, [r1]
512 aesimc.8 q0, q0
513 vst1.8 {q0}, [r0]
514 bx lr
515ENDPROC(ce_aes_invert)