Merge tag 'pinctrl-v5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[linux-block.git] / arch / x86 / crypto / aegis128-aesni-asm.S
CommitLineData
d2912cb1 1/* SPDX-License-Identifier: GPL-2.0-only */
1d373d4e
OM
2/*
3 * AES-NI + SSE2 implementation of AEGIS-128
4 *
5 * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
6 * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
1d373d4e
OM
7 */
8
9#include <linux/linkage.h>
10#include <asm/frame.h>
11
12#define STATE0 %xmm0
13#define STATE1 %xmm1
14#define STATE2 %xmm2
15#define STATE3 %xmm3
16#define STATE4 %xmm4
17#define KEY %xmm5
18#define MSG %xmm5
19#define T0 %xmm6
20#define T1 %xmm7
21
22#define STATEP %rdi
23#define LEN %rsi
24#define SRC %rdx
25#define DST %rcx
26
27.section .rodata.cst16.aegis128_const, "aM", @progbits, 32
28.align 16
29.Laegis128_const_0:
30 .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
31 .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
32.Laegis128_const_1:
33 .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
34 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
35
36.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16
37.align 16
38.Laegis128_counter:
39 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
40 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
41
42.text
43
44/*
45 * aegis128_update
46 * input:
47 * STATE[0-4] - input state
48 * output:
49 * STATE[0-4] - output state (shifted positions)
50 * changed:
51 * T0
52 */
53.macro aegis128_update
54 movdqa STATE4, T0
55 aesenc STATE0, STATE4
56 aesenc STATE1, STATE0
57 aesenc STATE2, STATE1
58 aesenc STATE3, STATE2
59 aesenc T0, STATE3
60.endm
61
62/*
63 * __load_partial: internal ABI
64 * input:
65 * LEN - bytes
66 * SRC - src
67 * output:
68 * MSG - message block
69 * changed:
70 * T0
71 * %r8
72 * %r9
73 */
74__load_partial:
a7bea830 75 xor %r9d, %r9d
1d373d4e
OM
76 pxor MSG, MSG
77
78 mov LEN, %r8
79 and $0x1, %r8
80 jz .Lld_partial_1
81
82 mov LEN, %r8
83 and $0x1E, %r8
84 add SRC, %r8
85 mov (%r8), %r9b
86
87.Lld_partial_1:
88 mov LEN, %r8
89 and $0x2, %r8
90 jz .Lld_partial_2
91
92 mov LEN, %r8
93 and $0x1C, %r8
94 add SRC, %r8
95 shl $0x10, %r9
96 mov (%r8), %r9w
97
98.Lld_partial_2:
99 mov LEN, %r8
100 and $0x4, %r8
101 jz .Lld_partial_4
102
103 mov LEN, %r8
104 and $0x18, %r8
105 add SRC, %r8
106 shl $32, %r9
107 mov (%r8), %r8d
108 xor %r8, %r9
109
110.Lld_partial_4:
111 movq %r9, MSG
112
113 mov LEN, %r8
114 and $0x8, %r8
115 jz .Lld_partial_8
116
117 mov LEN, %r8
118 and $0x10, %r8
119 add SRC, %r8
120 pslldq $8, MSG
121 movq (%r8), T0
122 pxor T0, MSG
123
124.Lld_partial_8:
125 ret
126ENDPROC(__load_partial)
127
128/*
129 * __store_partial: internal ABI
130 * input:
131 * LEN - bytes
132 * DST - dst
133 * output:
134 * T0 - message block
135 * changed:
136 * %r8
137 * %r9
138 * %r10
139 */
140__store_partial:
141 mov LEN, %r8
142 mov DST, %r9
143
144 movq T0, %r10
145
146 cmp $8, %r8
147 jl .Lst_partial_8
148
149 mov %r10, (%r9)
150 psrldq $8, T0
151 movq T0, %r10
152
153 sub $8, %r8
154 add $8, %r9
155
156.Lst_partial_8:
157 cmp $4, %r8
158 jl .Lst_partial_4
159
160 mov %r10d, (%r9)
161 shr $32, %r10
162
163 sub $4, %r8
164 add $4, %r9
165
166.Lst_partial_4:
167 cmp $2, %r8
168 jl .Lst_partial_2
169
170 mov %r10w, (%r9)
171 shr $0x10, %r10
172
173 sub $2, %r8
174 add $2, %r9
175
176.Lst_partial_2:
177 cmp $1, %r8
178 jl .Lst_partial_1
179
180 mov %r10b, (%r9)
181
182.Lst_partial_1:
183 ret
184ENDPROC(__store_partial)
185
186/*
187 * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
188 */
189ENTRY(crypto_aegis128_aesni_init)
190 FRAME_BEGIN
191
192 /* load IV: */
193 movdqu (%rdx), T1
194
195 /* load key: */
196 movdqa (%rsi), KEY
197 pxor KEY, T1
198 movdqa T1, STATE0
199 movdqa KEY, STATE3
200 movdqa KEY, STATE4
201
202 /* load the constants: */
203 movdqa .Laegis128_const_0, STATE2
204 movdqa .Laegis128_const_1, STATE1
205 pxor STATE2, STATE3
206 pxor STATE1, STATE4
207
208 /* update 10 times with KEY / KEY xor IV: */
209 aegis128_update; pxor KEY, STATE4
210 aegis128_update; pxor T1, STATE3
211 aegis128_update; pxor KEY, STATE2
212 aegis128_update; pxor T1, STATE1
213 aegis128_update; pxor KEY, STATE0
214 aegis128_update; pxor T1, STATE4
215 aegis128_update; pxor KEY, STATE3
216 aegis128_update; pxor T1, STATE2
217 aegis128_update; pxor KEY, STATE1
218 aegis128_update; pxor T1, STATE0
219
220 /* store the state: */
221 movdqu STATE0, 0x00(STATEP)
222 movdqu STATE1, 0x10(STATEP)
223 movdqu STATE2, 0x20(STATEP)
224 movdqu STATE3, 0x30(STATEP)
225 movdqu STATE4, 0x40(STATEP)
226
227 FRAME_END
228 ret
229ENDPROC(crypto_aegis128_aesni_init)
230
231/*
232 * void crypto_aegis128_aesni_ad(void *state, unsigned int length,
233 * const void *data);
234 */
235ENTRY(crypto_aegis128_aesni_ad)
236 FRAME_BEGIN
237
238 cmp $0x10, LEN
239 jb .Lad_out
240
241 /* load the state: */
242 movdqu 0x00(STATEP), STATE0
243 movdqu 0x10(STATEP), STATE1
244 movdqu 0x20(STATEP), STATE2
245 movdqu 0x30(STATEP), STATE3
246 movdqu 0x40(STATEP), STATE4
247
248 mov SRC, %r8
249 and $0xF, %r8
250 jnz .Lad_u_loop
251
252.align 8
253.Lad_a_loop:
254 movdqa 0x00(SRC), MSG
255 aegis128_update
256 pxor MSG, STATE4
257 sub $0x10, LEN
258 cmp $0x10, LEN
259 jl .Lad_out_1
260
261 movdqa 0x10(SRC), MSG
262 aegis128_update
263 pxor MSG, STATE3
264 sub $0x10, LEN
265 cmp $0x10, LEN
266 jl .Lad_out_2
267
268 movdqa 0x20(SRC), MSG
269 aegis128_update
270 pxor MSG, STATE2
271 sub $0x10, LEN
272 cmp $0x10, LEN
273 jl .Lad_out_3
274
275 movdqa 0x30(SRC), MSG
276 aegis128_update
277 pxor MSG, STATE1
278 sub $0x10, LEN
279 cmp $0x10, LEN
280 jl .Lad_out_4
281
282 movdqa 0x40(SRC), MSG
283 aegis128_update
284 pxor MSG, STATE0
285 sub $0x10, LEN
286 cmp $0x10, LEN
287 jl .Lad_out_0
288
289 add $0x50, SRC
290 jmp .Lad_a_loop
291
292.align 8
293.Lad_u_loop:
294 movdqu 0x00(SRC), MSG
295 aegis128_update
296 pxor MSG, STATE4
297 sub $0x10, LEN
298 cmp $0x10, LEN
299 jl .Lad_out_1
300
301 movdqu 0x10(SRC), MSG
302 aegis128_update
303 pxor MSG, STATE3
304 sub $0x10, LEN
305 cmp $0x10, LEN
306 jl .Lad_out_2
307
308 movdqu 0x20(SRC), MSG
309 aegis128_update
310 pxor MSG, STATE2
311 sub $0x10, LEN
312 cmp $0x10, LEN
313 jl .Lad_out_3
314
315 movdqu 0x30(SRC), MSG
316 aegis128_update
317 pxor MSG, STATE1
318 sub $0x10, LEN
319 cmp $0x10, LEN
320 jl .Lad_out_4
321
322 movdqu 0x40(SRC), MSG
323 aegis128_update
324 pxor MSG, STATE0
325 sub $0x10, LEN
326 cmp $0x10, LEN
327 jl .Lad_out_0
328
329 add $0x50, SRC
330 jmp .Lad_u_loop
331
332 /* store the state: */
333.Lad_out_0:
334 movdqu STATE0, 0x00(STATEP)
335 movdqu STATE1, 0x10(STATEP)
336 movdqu STATE2, 0x20(STATEP)
337 movdqu STATE3, 0x30(STATEP)
338 movdqu STATE4, 0x40(STATEP)
339 FRAME_END
340 ret
341
342.Lad_out_1:
343 movdqu STATE4, 0x00(STATEP)
344 movdqu STATE0, 0x10(STATEP)
345 movdqu STATE1, 0x20(STATEP)
346 movdqu STATE2, 0x30(STATEP)
347 movdqu STATE3, 0x40(STATEP)
348 FRAME_END
349 ret
350
351.Lad_out_2:
352 movdqu STATE3, 0x00(STATEP)
353 movdqu STATE4, 0x10(STATEP)
354 movdqu STATE0, 0x20(STATEP)
355 movdqu STATE1, 0x30(STATEP)
356 movdqu STATE2, 0x40(STATEP)
357 FRAME_END
358 ret
359
360.Lad_out_3:
361 movdqu STATE2, 0x00(STATEP)
362 movdqu STATE3, 0x10(STATEP)
363 movdqu STATE4, 0x20(STATEP)
364 movdqu STATE0, 0x30(STATEP)
365 movdqu STATE1, 0x40(STATEP)
366 FRAME_END
367 ret
368
369.Lad_out_4:
370 movdqu STATE1, 0x00(STATEP)
371 movdqu STATE2, 0x10(STATEP)
372 movdqu STATE3, 0x20(STATEP)
373 movdqu STATE4, 0x30(STATEP)
374 movdqu STATE0, 0x40(STATEP)
375 FRAME_END
376 ret
377
378.Lad_out:
379 FRAME_END
380 ret
381ENDPROC(crypto_aegis128_aesni_ad)
382
383.macro encrypt_block a s0 s1 s2 s3 s4 i
384 movdq\a (\i * 0x10)(SRC), MSG
385 movdqa MSG, T0
386 pxor \s1, T0
387 pxor \s4, T0
388 movdqa \s2, T1
389 pand \s3, T1
390 pxor T1, T0
391 movdq\a T0, (\i * 0x10)(DST)
392
393 aegis128_update
394 pxor MSG, \s4
395
396 sub $0x10, LEN
397 cmp $0x10, LEN
398 jl .Lenc_out_\i
399.endm
400
401/*
402 * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
403 * const void *src, void *dst);
404 */
405ENTRY(crypto_aegis128_aesni_enc)
406 FRAME_BEGIN
407
408 cmp $0x10, LEN
409 jb .Lenc_out
410
411 /* load the state: */
412 movdqu 0x00(STATEP), STATE0
413 movdqu 0x10(STATEP), STATE1
414 movdqu 0x20(STATEP), STATE2
415 movdqu 0x30(STATEP), STATE3
416 movdqu 0x40(STATEP), STATE4
417
418 mov SRC, %r8
419 or DST, %r8
420 and $0xF, %r8
421 jnz .Lenc_u_loop
422
423.align 8
424.Lenc_a_loop:
425 encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
426 encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
427 encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
428 encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
429 encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
430
431 add $0x50, SRC
432 add $0x50, DST
433 jmp .Lenc_a_loop
434
435.align 8
436.Lenc_u_loop:
437 encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
438 encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
439 encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
440 encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
441 encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
442
443 add $0x50, SRC
444 add $0x50, DST
445 jmp .Lenc_u_loop
446
447 /* store the state: */
448.Lenc_out_0:
449 movdqu STATE4, 0x00(STATEP)
450 movdqu STATE0, 0x10(STATEP)
451 movdqu STATE1, 0x20(STATEP)
452 movdqu STATE2, 0x30(STATEP)
453 movdqu STATE3, 0x40(STATEP)
454 FRAME_END
455 ret
456
457.Lenc_out_1:
458 movdqu STATE3, 0x00(STATEP)
459 movdqu STATE4, 0x10(STATEP)
460 movdqu STATE0, 0x20(STATEP)
461 movdqu STATE1, 0x30(STATEP)
462 movdqu STATE2, 0x40(STATEP)
463 FRAME_END
464 ret
465
466.Lenc_out_2:
467 movdqu STATE2, 0x00(STATEP)
468 movdqu STATE3, 0x10(STATEP)
469 movdqu STATE4, 0x20(STATEP)
470 movdqu STATE0, 0x30(STATEP)
471 movdqu STATE1, 0x40(STATEP)
472 FRAME_END
473 ret
474
475.Lenc_out_3:
476 movdqu STATE1, 0x00(STATEP)
477 movdqu STATE2, 0x10(STATEP)
478 movdqu STATE3, 0x20(STATEP)
479 movdqu STATE4, 0x30(STATEP)
480 movdqu STATE0, 0x40(STATEP)
481 FRAME_END
482 ret
483
484.Lenc_out_4:
485 movdqu STATE0, 0x00(STATEP)
486 movdqu STATE1, 0x10(STATEP)
487 movdqu STATE2, 0x20(STATEP)
488 movdqu STATE3, 0x30(STATEP)
489 movdqu STATE4, 0x40(STATEP)
490 FRAME_END
491 ret
492
493.Lenc_out:
494 FRAME_END
495 ret
496ENDPROC(crypto_aegis128_aesni_enc)
497
498/*
499 * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
500 * const void *src, void *dst);
501 */
502ENTRY(crypto_aegis128_aesni_enc_tail)
503 FRAME_BEGIN
504
505 /* load the state: */
506 movdqu 0x00(STATEP), STATE0
507 movdqu 0x10(STATEP), STATE1
508 movdqu 0x20(STATEP), STATE2
509 movdqu 0x30(STATEP), STATE3
510 movdqu 0x40(STATEP), STATE4
511
512 /* encrypt message: */
513 call __load_partial
514
515 movdqa MSG, T0
516 pxor STATE1, T0
517 pxor STATE4, T0
518 movdqa STATE2, T1
519 pand STATE3, T1
520 pxor T1, T0
521
522 call __store_partial
523
524 aegis128_update
525 pxor MSG, STATE4
526
527 /* store the state: */
528 movdqu STATE4, 0x00(STATEP)
529 movdqu STATE0, 0x10(STATEP)
530 movdqu STATE1, 0x20(STATEP)
531 movdqu STATE2, 0x30(STATEP)
532 movdqu STATE3, 0x40(STATEP)
533
534 FRAME_END
221e00d1 535 ret
1d373d4e
OM
536ENDPROC(crypto_aegis128_aesni_enc_tail)
537
538.macro decrypt_block a s0 s1 s2 s3 s4 i
539 movdq\a (\i * 0x10)(SRC), MSG
540 pxor \s1, MSG
541 pxor \s4, MSG
542 movdqa \s2, T1
543 pand \s3, T1
544 pxor T1, MSG
545 movdq\a MSG, (\i * 0x10)(DST)
546
547 aegis128_update
548 pxor MSG, \s4
549
550 sub $0x10, LEN
551 cmp $0x10, LEN
552 jl .Ldec_out_\i
553.endm
554
555/*
556 * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
557 * const void *src, void *dst);
558 */
559ENTRY(crypto_aegis128_aesni_dec)
560 FRAME_BEGIN
561
562 cmp $0x10, LEN
563 jb .Ldec_out
564
565 /* load the state: */
566 movdqu 0x00(STATEP), STATE0
567 movdqu 0x10(STATEP), STATE1
568 movdqu 0x20(STATEP), STATE2
569 movdqu 0x30(STATEP), STATE3
570 movdqu 0x40(STATEP), STATE4
571
572 mov SRC, %r8
573 or DST, %r8
574 and $0xF, %r8
575 jnz .Ldec_u_loop
576
577.align 8
578.Ldec_a_loop:
579 decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0
580 decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1
581 decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2
582 decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3
583 decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4
584
585 add $0x50, SRC
586 add $0x50, DST
587 jmp .Ldec_a_loop
588
589.align 8
590.Ldec_u_loop:
591 decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0
592 decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1
593 decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2
594 decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3
595 decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4
596
597 add $0x50, SRC
598 add $0x50, DST
599 jmp .Ldec_u_loop
600
601 /* store the state: */
602.Ldec_out_0:
603 movdqu STATE4, 0x00(STATEP)
604 movdqu STATE0, 0x10(STATEP)
605 movdqu STATE1, 0x20(STATEP)
606 movdqu STATE2, 0x30(STATEP)
607 movdqu STATE3, 0x40(STATEP)
608 FRAME_END
609 ret
610
611.Ldec_out_1:
612 movdqu STATE3, 0x00(STATEP)
613 movdqu STATE4, 0x10(STATEP)
614 movdqu STATE0, 0x20(STATEP)
615 movdqu STATE1, 0x30(STATEP)
616 movdqu STATE2, 0x40(STATEP)
617 FRAME_END
618 ret
619
620.Ldec_out_2:
621 movdqu STATE2, 0x00(STATEP)
622 movdqu STATE3, 0x10(STATEP)
623 movdqu STATE4, 0x20(STATEP)
624 movdqu STATE0, 0x30(STATEP)
625 movdqu STATE1, 0x40(STATEP)
626 FRAME_END
627 ret
628
629.Ldec_out_3:
630 movdqu STATE1, 0x00(STATEP)
631 movdqu STATE2, 0x10(STATEP)
632 movdqu STATE3, 0x20(STATEP)
633 movdqu STATE4, 0x30(STATEP)
634 movdqu STATE0, 0x40(STATEP)
635 FRAME_END
636 ret
637
638.Ldec_out_4:
639 movdqu STATE0, 0x00(STATEP)
640 movdqu STATE1, 0x10(STATEP)
641 movdqu STATE2, 0x20(STATEP)
642 movdqu STATE3, 0x30(STATEP)
643 movdqu STATE4, 0x40(STATEP)
644 FRAME_END
645 ret
646
647.Ldec_out:
648 FRAME_END
649 ret
650ENDPROC(crypto_aegis128_aesni_dec)
651
652/*
653 * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
654 * const void *src, void *dst);
655 */
656ENTRY(crypto_aegis128_aesni_dec_tail)
657 FRAME_BEGIN
658
659 /* load the state: */
660 movdqu 0x00(STATEP), STATE0
661 movdqu 0x10(STATEP), STATE1
662 movdqu 0x20(STATEP), STATE2
663 movdqu 0x30(STATEP), STATE3
664 movdqu 0x40(STATEP), STATE4
665
666 /* decrypt message: */
667 call __load_partial
668
669 pxor STATE1, MSG
670 pxor STATE4, MSG
671 movdqa STATE2, T1
672 pand STATE3, T1
673 pxor T1, MSG
674
675 movdqa MSG, T0
676 call __store_partial
677
678 /* mask with byte count: */
679 movq LEN, T0
680 punpcklbw T0, T0
681 punpcklbw T0, T0
682 punpcklbw T0, T0
683 punpcklbw T0, T0
684 movdqa .Laegis128_counter, T1
685 pcmpgtb T1, T0
686 pand T0, MSG
687
688 aegis128_update
689 pxor MSG, STATE4
690
691 /* store the state: */
692 movdqu STATE4, 0x00(STATEP)
693 movdqu STATE0, 0x10(STATEP)
694 movdqu STATE1, 0x20(STATEP)
695 movdqu STATE2, 0x30(STATEP)
696 movdqu STATE3, 0x40(STATEP)
697
698 FRAME_END
699 ret
700ENDPROC(crypto_aegis128_aesni_dec_tail)
701
702/*
703 * void crypto_aegis128_aesni_final(void *state, void *tag_xor,
704 * u64 assoclen, u64 cryptlen);
705 */
706ENTRY(crypto_aegis128_aesni_final)
707 FRAME_BEGIN
708
709 /* load the state: */
710 movdqu 0x00(STATEP), STATE0
711 movdqu 0x10(STATEP), STATE1
712 movdqu 0x20(STATEP), STATE2
713 movdqu 0x30(STATEP), STATE3
714 movdqu 0x40(STATEP), STATE4
715
716 /* prepare length block: */
717 movq %rdx, MSG
718 movq %rcx, T0
719 pslldq $8, T0
720 pxor T0, MSG
721 psllq $3, MSG /* multiply by 8 (to get bit count) */
722
723 pxor STATE3, MSG
724
725 /* update state: */
726 aegis128_update; pxor MSG, STATE4
727 aegis128_update; pxor MSG, STATE3
728 aegis128_update; pxor MSG, STATE2
729 aegis128_update; pxor MSG, STATE1
730 aegis128_update; pxor MSG, STATE0
731 aegis128_update; pxor MSG, STATE4
732 aegis128_update; pxor MSG, STATE3
733
734 /* xor tag: */
735 movdqu (%rsi), MSG
736
737 pxor STATE0, MSG
738 pxor STATE1, MSG
739 pxor STATE2, MSG
740 pxor STATE3, MSG
741 pxor STATE4, MSG
742
743 movdqu MSG, (%rsi)
744
745 FRAME_END
746 ret
747ENDPROC(crypto_aegis128_aesni_final)