Commit | Line | Data |
---|---|---|
d2912cb1 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
1d373d4e OM |
2 | /* |
3 | * AES-NI + SSE2 implementation of AEGIS-128 | |
4 | * | |
5 | * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> | |
6 | * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. | |
1d373d4e OM |
7 | */ |
8 | ||
9 | #include <linux/linkage.h> | |
10 | #include <asm/frame.h> | |
11 | ||
12 | #define STATE0 %xmm0 | |
13 | #define STATE1 %xmm1 | |
14 | #define STATE2 %xmm2 | |
15 | #define STATE3 %xmm3 | |
16 | #define STATE4 %xmm4 | |
17 | #define KEY %xmm5 | |
18 | #define MSG %xmm5 | |
19 | #define T0 %xmm6 | |
20 | #define T1 %xmm7 | |
21 | ||
22 | #define STATEP %rdi | |
23 | #define LEN %rsi | |
24 | #define SRC %rdx | |
25 | #define DST %rcx | |
26 | ||
27 | .section .rodata.cst16.aegis128_const, "aM", @progbits, 32 | |
28 | .align 16 | |
29 | .Laegis128_const_0: | |
30 | .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d | |
31 | .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 | |
32 | .Laegis128_const_1: | |
33 | .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 | |
34 | .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd | |
35 | ||
36 | .section .rodata.cst16.aegis128_counter, "aM", @progbits, 16 | |
37 | .align 16 | |
38 | .Laegis128_counter: | |
39 | .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 | |
40 | .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f | |
41 | ||
42 | .text | |
43 | ||
44 | /* | |
45 | * aegis128_update | |
46 | * input: | |
47 | * STATE[0-4] - input state | |
48 | * output: | |
49 | * STATE[0-4] - output state (shifted positions) | |
50 | * changed: | |
51 | * T0 | |
52 | */ | |
53 | .macro aegis128_update | |
54 | movdqa STATE4, T0 | |
55 | aesenc STATE0, STATE4 | |
56 | aesenc STATE1, STATE0 | |
57 | aesenc STATE2, STATE1 | |
58 | aesenc STATE3, STATE2 | |
59 | aesenc T0, STATE3 | |
60 | .endm | |
61 | ||
62 | /* | |
63 | * __load_partial: internal ABI | |
64 | * input: | |
65 | * LEN - bytes | |
66 | * SRC - src | |
67 | * output: | |
68 | * MSG - message block | |
69 | * changed: | |
70 | * T0 | |
71 | * %r8 | |
72 | * %r9 | |
73 | */ | |
74 | __load_partial: | |
a7bea830 | 75 | xor %r9d, %r9d |
1d373d4e OM |
76 | pxor MSG, MSG |
77 | ||
78 | mov LEN, %r8 | |
79 | and $0x1, %r8 | |
80 | jz .Lld_partial_1 | |
81 | ||
82 | mov LEN, %r8 | |
83 | and $0x1E, %r8 | |
84 | add SRC, %r8 | |
85 | mov (%r8), %r9b | |
86 | ||
87 | .Lld_partial_1: | |
88 | mov LEN, %r8 | |
89 | and $0x2, %r8 | |
90 | jz .Lld_partial_2 | |
91 | ||
92 | mov LEN, %r8 | |
93 | and $0x1C, %r8 | |
94 | add SRC, %r8 | |
95 | shl $0x10, %r9 | |
96 | mov (%r8), %r9w | |
97 | ||
98 | .Lld_partial_2: | |
99 | mov LEN, %r8 | |
100 | and $0x4, %r8 | |
101 | jz .Lld_partial_4 | |
102 | ||
103 | mov LEN, %r8 | |
104 | and $0x18, %r8 | |
105 | add SRC, %r8 | |
106 | shl $32, %r9 | |
107 | mov (%r8), %r8d | |
108 | xor %r8, %r9 | |
109 | ||
110 | .Lld_partial_4: | |
111 | movq %r9, MSG | |
112 | ||
113 | mov LEN, %r8 | |
114 | and $0x8, %r8 | |
115 | jz .Lld_partial_8 | |
116 | ||
117 | mov LEN, %r8 | |
118 | and $0x10, %r8 | |
119 | add SRC, %r8 | |
120 | pslldq $8, MSG | |
121 | movq (%r8), T0 | |
122 | pxor T0, MSG | |
123 | ||
124 | .Lld_partial_8: | |
125 | ret | |
126 | ENDPROC(__load_partial) | |
127 | ||
128 | /* | |
129 | * __store_partial: internal ABI | |
130 | * input: | |
131 | * LEN - bytes | |
132 | * DST - dst | |
133 | * output: | |
134 | * T0 - message block | |
135 | * changed: | |
136 | * %r8 | |
137 | * %r9 | |
138 | * %r10 | |
139 | */ | |
140 | __store_partial: | |
141 | mov LEN, %r8 | |
142 | mov DST, %r9 | |
143 | ||
144 | movq T0, %r10 | |
145 | ||
146 | cmp $8, %r8 | |
147 | jl .Lst_partial_8 | |
148 | ||
149 | mov %r10, (%r9) | |
150 | psrldq $8, T0 | |
151 | movq T0, %r10 | |
152 | ||
153 | sub $8, %r8 | |
154 | add $8, %r9 | |
155 | ||
156 | .Lst_partial_8: | |
157 | cmp $4, %r8 | |
158 | jl .Lst_partial_4 | |
159 | ||
160 | mov %r10d, (%r9) | |
161 | shr $32, %r10 | |
162 | ||
163 | sub $4, %r8 | |
164 | add $4, %r9 | |
165 | ||
166 | .Lst_partial_4: | |
167 | cmp $2, %r8 | |
168 | jl .Lst_partial_2 | |
169 | ||
170 | mov %r10w, (%r9) | |
171 | shr $0x10, %r10 | |
172 | ||
173 | sub $2, %r8 | |
174 | add $2, %r9 | |
175 | ||
176 | .Lst_partial_2: | |
177 | cmp $1, %r8 | |
178 | jl .Lst_partial_1 | |
179 | ||
180 | mov %r10b, (%r9) | |
181 | ||
182 | .Lst_partial_1: | |
183 | ret | |
184 | ENDPROC(__store_partial) | |
185 | ||
186 | /* | |
187 | * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv); | |
188 | */ | |
189 | ENTRY(crypto_aegis128_aesni_init) | |
190 | FRAME_BEGIN | |
191 | ||
192 | /* load IV: */ | |
193 | movdqu (%rdx), T1 | |
194 | ||
195 | /* load key: */ | |
196 | movdqa (%rsi), KEY | |
197 | pxor KEY, T1 | |
198 | movdqa T1, STATE0 | |
199 | movdqa KEY, STATE3 | |
200 | movdqa KEY, STATE4 | |
201 | ||
202 | /* load the constants: */ | |
203 | movdqa .Laegis128_const_0, STATE2 | |
204 | movdqa .Laegis128_const_1, STATE1 | |
205 | pxor STATE2, STATE3 | |
206 | pxor STATE1, STATE4 | |
207 | ||
208 | /* update 10 times with KEY / KEY xor IV: */ | |
209 | aegis128_update; pxor KEY, STATE4 | |
210 | aegis128_update; pxor T1, STATE3 | |
211 | aegis128_update; pxor KEY, STATE2 | |
212 | aegis128_update; pxor T1, STATE1 | |
213 | aegis128_update; pxor KEY, STATE0 | |
214 | aegis128_update; pxor T1, STATE4 | |
215 | aegis128_update; pxor KEY, STATE3 | |
216 | aegis128_update; pxor T1, STATE2 | |
217 | aegis128_update; pxor KEY, STATE1 | |
218 | aegis128_update; pxor T1, STATE0 | |
219 | ||
220 | /* store the state: */ | |
221 | movdqu STATE0, 0x00(STATEP) | |
222 | movdqu STATE1, 0x10(STATEP) | |
223 | movdqu STATE2, 0x20(STATEP) | |
224 | movdqu STATE3, 0x30(STATEP) | |
225 | movdqu STATE4, 0x40(STATEP) | |
226 | ||
227 | FRAME_END | |
228 | ret | |
229 | ENDPROC(crypto_aegis128_aesni_init) | |
230 | ||
231 | /* | |
232 | * void crypto_aegis128_aesni_ad(void *state, unsigned int length, | |
233 | * const void *data); | |
234 | */ | |
235 | ENTRY(crypto_aegis128_aesni_ad) | |
236 | FRAME_BEGIN | |
237 | ||
238 | cmp $0x10, LEN | |
239 | jb .Lad_out | |
240 | ||
241 | /* load the state: */ | |
242 | movdqu 0x00(STATEP), STATE0 | |
243 | movdqu 0x10(STATEP), STATE1 | |
244 | movdqu 0x20(STATEP), STATE2 | |
245 | movdqu 0x30(STATEP), STATE3 | |
246 | movdqu 0x40(STATEP), STATE4 | |
247 | ||
248 | mov SRC, %r8 | |
249 | and $0xF, %r8 | |
250 | jnz .Lad_u_loop | |
251 | ||
252 | .align 8 | |
253 | .Lad_a_loop: | |
254 | movdqa 0x00(SRC), MSG | |
255 | aegis128_update | |
256 | pxor MSG, STATE4 | |
257 | sub $0x10, LEN | |
258 | cmp $0x10, LEN | |
259 | jl .Lad_out_1 | |
260 | ||
261 | movdqa 0x10(SRC), MSG | |
262 | aegis128_update | |
263 | pxor MSG, STATE3 | |
264 | sub $0x10, LEN | |
265 | cmp $0x10, LEN | |
266 | jl .Lad_out_2 | |
267 | ||
268 | movdqa 0x20(SRC), MSG | |
269 | aegis128_update | |
270 | pxor MSG, STATE2 | |
271 | sub $0x10, LEN | |
272 | cmp $0x10, LEN | |
273 | jl .Lad_out_3 | |
274 | ||
275 | movdqa 0x30(SRC), MSG | |
276 | aegis128_update | |
277 | pxor MSG, STATE1 | |
278 | sub $0x10, LEN | |
279 | cmp $0x10, LEN | |
280 | jl .Lad_out_4 | |
281 | ||
282 | movdqa 0x40(SRC), MSG | |
283 | aegis128_update | |
284 | pxor MSG, STATE0 | |
285 | sub $0x10, LEN | |
286 | cmp $0x10, LEN | |
287 | jl .Lad_out_0 | |
288 | ||
289 | add $0x50, SRC | |
290 | jmp .Lad_a_loop | |
291 | ||
292 | .align 8 | |
293 | .Lad_u_loop: | |
294 | movdqu 0x00(SRC), MSG | |
295 | aegis128_update | |
296 | pxor MSG, STATE4 | |
297 | sub $0x10, LEN | |
298 | cmp $0x10, LEN | |
299 | jl .Lad_out_1 | |
300 | ||
301 | movdqu 0x10(SRC), MSG | |
302 | aegis128_update | |
303 | pxor MSG, STATE3 | |
304 | sub $0x10, LEN | |
305 | cmp $0x10, LEN | |
306 | jl .Lad_out_2 | |
307 | ||
308 | movdqu 0x20(SRC), MSG | |
309 | aegis128_update | |
310 | pxor MSG, STATE2 | |
311 | sub $0x10, LEN | |
312 | cmp $0x10, LEN | |
313 | jl .Lad_out_3 | |
314 | ||
315 | movdqu 0x30(SRC), MSG | |
316 | aegis128_update | |
317 | pxor MSG, STATE1 | |
318 | sub $0x10, LEN | |
319 | cmp $0x10, LEN | |
320 | jl .Lad_out_4 | |
321 | ||
322 | movdqu 0x40(SRC), MSG | |
323 | aegis128_update | |
324 | pxor MSG, STATE0 | |
325 | sub $0x10, LEN | |
326 | cmp $0x10, LEN | |
327 | jl .Lad_out_0 | |
328 | ||
329 | add $0x50, SRC | |
330 | jmp .Lad_u_loop | |
331 | ||
332 | /* store the state: */ | |
333 | .Lad_out_0: | |
334 | movdqu STATE0, 0x00(STATEP) | |
335 | movdqu STATE1, 0x10(STATEP) | |
336 | movdqu STATE2, 0x20(STATEP) | |
337 | movdqu STATE3, 0x30(STATEP) | |
338 | movdqu STATE4, 0x40(STATEP) | |
339 | FRAME_END | |
340 | ret | |
341 | ||
342 | .Lad_out_1: | |
343 | movdqu STATE4, 0x00(STATEP) | |
344 | movdqu STATE0, 0x10(STATEP) | |
345 | movdqu STATE1, 0x20(STATEP) | |
346 | movdqu STATE2, 0x30(STATEP) | |
347 | movdqu STATE3, 0x40(STATEP) | |
348 | FRAME_END | |
349 | ret | |
350 | ||
351 | .Lad_out_2: | |
352 | movdqu STATE3, 0x00(STATEP) | |
353 | movdqu STATE4, 0x10(STATEP) | |
354 | movdqu STATE0, 0x20(STATEP) | |
355 | movdqu STATE1, 0x30(STATEP) | |
356 | movdqu STATE2, 0x40(STATEP) | |
357 | FRAME_END | |
358 | ret | |
359 | ||
360 | .Lad_out_3: | |
361 | movdqu STATE2, 0x00(STATEP) | |
362 | movdqu STATE3, 0x10(STATEP) | |
363 | movdqu STATE4, 0x20(STATEP) | |
364 | movdqu STATE0, 0x30(STATEP) | |
365 | movdqu STATE1, 0x40(STATEP) | |
366 | FRAME_END | |
367 | ret | |
368 | ||
369 | .Lad_out_4: | |
370 | movdqu STATE1, 0x00(STATEP) | |
371 | movdqu STATE2, 0x10(STATEP) | |
372 | movdqu STATE3, 0x20(STATEP) | |
373 | movdqu STATE4, 0x30(STATEP) | |
374 | movdqu STATE0, 0x40(STATEP) | |
375 | FRAME_END | |
376 | ret | |
377 | ||
378 | .Lad_out: | |
379 | FRAME_END | |
380 | ret | |
381 | ENDPROC(crypto_aegis128_aesni_ad) | |
382 | ||
383 | .macro encrypt_block a s0 s1 s2 s3 s4 i | |
384 | movdq\a (\i * 0x10)(SRC), MSG | |
385 | movdqa MSG, T0 | |
386 | pxor \s1, T0 | |
387 | pxor \s4, T0 | |
388 | movdqa \s2, T1 | |
389 | pand \s3, T1 | |
390 | pxor T1, T0 | |
391 | movdq\a T0, (\i * 0x10)(DST) | |
392 | ||
393 | aegis128_update | |
394 | pxor MSG, \s4 | |
395 | ||
396 | sub $0x10, LEN | |
397 | cmp $0x10, LEN | |
398 | jl .Lenc_out_\i | |
399 | .endm | |
400 | ||
401 | /* | |
402 | * void crypto_aegis128_aesni_enc(void *state, unsigned int length, | |
403 | * const void *src, void *dst); | |
404 | */ | |
405 | ENTRY(crypto_aegis128_aesni_enc) | |
406 | FRAME_BEGIN | |
407 | ||
408 | cmp $0x10, LEN | |
409 | jb .Lenc_out | |
410 | ||
411 | /* load the state: */ | |
412 | movdqu 0x00(STATEP), STATE0 | |
413 | movdqu 0x10(STATEP), STATE1 | |
414 | movdqu 0x20(STATEP), STATE2 | |
415 | movdqu 0x30(STATEP), STATE3 | |
416 | movdqu 0x40(STATEP), STATE4 | |
417 | ||
418 | mov SRC, %r8 | |
419 | or DST, %r8 | |
420 | and $0xF, %r8 | |
421 | jnz .Lenc_u_loop | |
422 | ||
423 | .align 8 | |
424 | .Lenc_a_loop: | |
425 | encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 | |
426 | encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 | |
427 | encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 | |
428 | encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 | |
429 | encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 | |
430 | ||
431 | add $0x50, SRC | |
432 | add $0x50, DST | |
433 | jmp .Lenc_a_loop | |
434 | ||
435 | .align 8 | |
436 | .Lenc_u_loop: | |
437 | encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 | |
438 | encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 | |
439 | encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 | |
440 | encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 | |
441 | encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 | |
442 | ||
443 | add $0x50, SRC | |
444 | add $0x50, DST | |
445 | jmp .Lenc_u_loop | |
446 | ||
447 | /* store the state: */ | |
448 | .Lenc_out_0: | |
449 | movdqu STATE4, 0x00(STATEP) | |
450 | movdqu STATE0, 0x10(STATEP) | |
451 | movdqu STATE1, 0x20(STATEP) | |
452 | movdqu STATE2, 0x30(STATEP) | |
453 | movdqu STATE3, 0x40(STATEP) | |
454 | FRAME_END | |
455 | ret | |
456 | ||
457 | .Lenc_out_1: | |
458 | movdqu STATE3, 0x00(STATEP) | |
459 | movdqu STATE4, 0x10(STATEP) | |
460 | movdqu STATE0, 0x20(STATEP) | |
461 | movdqu STATE1, 0x30(STATEP) | |
462 | movdqu STATE2, 0x40(STATEP) | |
463 | FRAME_END | |
464 | ret | |
465 | ||
466 | .Lenc_out_2: | |
467 | movdqu STATE2, 0x00(STATEP) | |
468 | movdqu STATE3, 0x10(STATEP) | |
469 | movdqu STATE4, 0x20(STATEP) | |
470 | movdqu STATE0, 0x30(STATEP) | |
471 | movdqu STATE1, 0x40(STATEP) | |
472 | FRAME_END | |
473 | ret | |
474 | ||
475 | .Lenc_out_3: | |
476 | movdqu STATE1, 0x00(STATEP) | |
477 | movdqu STATE2, 0x10(STATEP) | |
478 | movdqu STATE3, 0x20(STATEP) | |
479 | movdqu STATE4, 0x30(STATEP) | |
480 | movdqu STATE0, 0x40(STATEP) | |
481 | FRAME_END | |
482 | ret | |
483 | ||
484 | .Lenc_out_4: | |
485 | movdqu STATE0, 0x00(STATEP) | |
486 | movdqu STATE1, 0x10(STATEP) | |
487 | movdqu STATE2, 0x20(STATEP) | |
488 | movdqu STATE3, 0x30(STATEP) | |
489 | movdqu STATE4, 0x40(STATEP) | |
490 | FRAME_END | |
491 | ret | |
492 | ||
493 | .Lenc_out: | |
494 | FRAME_END | |
495 | ret | |
496 | ENDPROC(crypto_aegis128_aesni_enc) | |
497 | ||
498 | /* | |
499 | * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, | |
500 | * const void *src, void *dst); | |
501 | */ | |
502 | ENTRY(crypto_aegis128_aesni_enc_tail) | |
503 | FRAME_BEGIN | |
504 | ||
505 | /* load the state: */ | |
506 | movdqu 0x00(STATEP), STATE0 | |
507 | movdqu 0x10(STATEP), STATE1 | |
508 | movdqu 0x20(STATEP), STATE2 | |
509 | movdqu 0x30(STATEP), STATE3 | |
510 | movdqu 0x40(STATEP), STATE4 | |
511 | ||
512 | /* encrypt message: */ | |
513 | call __load_partial | |
514 | ||
515 | movdqa MSG, T0 | |
516 | pxor STATE1, T0 | |
517 | pxor STATE4, T0 | |
518 | movdqa STATE2, T1 | |
519 | pand STATE3, T1 | |
520 | pxor T1, T0 | |
521 | ||
522 | call __store_partial | |
523 | ||
524 | aegis128_update | |
525 | pxor MSG, STATE4 | |
526 | ||
527 | /* store the state: */ | |
528 | movdqu STATE4, 0x00(STATEP) | |
529 | movdqu STATE0, 0x10(STATEP) | |
530 | movdqu STATE1, 0x20(STATEP) | |
531 | movdqu STATE2, 0x30(STATEP) | |
532 | movdqu STATE3, 0x40(STATEP) | |
533 | ||
534 | FRAME_END | |
221e00d1 | 535 | ret |
1d373d4e OM |
536 | ENDPROC(crypto_aegis128_aesni_enc_tail) |
537 | ||
538 | .macro decrypt_block a s0 s1 s2 s3 s4 i | |
539 | movdq\a (\i * 0x10)(SRC), MSG | |
540 | pxor \s1, MSG | |
541 | pxor \s4, MSG | |
542 | movdqa \s2, T1 | |
543 | pand \s3, T1 | |
544 | pxor T1, MSG | |
545 | movdq\a MSG, (\i * 0x10)(DST) | |
546 | ||
547 | aegis128_update | |
548 | pxor MSG, \s4 | |
549 | ||
550 | sub $0x10, LEN | |
551 | cmp $0x10, LEN | |
552 | jl .Ldec_out_\i | |
553 | .endm | |
554 | ||
555 | /* | |
556 | * void crypto_aegis128_aesni_dec(void *state, unsigned int length, | |
557 | * const void *src, void *dst); | |
558 | */ | |
559 | ENTRY(crypto_aegis128_aesni_dec) | |
560 | FRAME_BEGIN | |
561 | ||
562 | cmp $0x10, LEN | |
563 | jb .Ldec_out | |
564 | ||
565 | /* load the state: */ | |
566 | movdqu 0x00(STATEP), STATE0 | |
567 | movdqu 0x10(STATEP), STATE1 | |
568 | movdqu 0x20(STATEP), STATE2 | |
569 | movdqu 0x30(STATEP), STATE3 | |
570 | movdqu 0x40(STATEP), STATE4 | |
571 | ||
572 | mov SRC, %r8 | |
573 | or DST, %r8 | |
574 | and $0xF, %r8 | |
575 | jnz .Ldec_u_loop | |
576 | ||
577 | .align 8 | |
578 | .Ldec_a_loop: | |
579 | decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 | |
580 | decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 | |
581 | decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 | |
582 | decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 | |
583 | decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 | |
584 | ||
585 | add $0x50, SRC | |
586 | add $0x50, DST | |
587 | jmp .Ldec_a_loop | |
588 | ||
589 | .align 8 | |
590 | .Ldec_u_loop: | |
591 | decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 | |
592 | decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 | |
593 | decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 | |
594 | decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 | |
595 | decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 | |
596 | ||
597 | add $0x50, SRC | |
598 | add $0x50, DST | |
599 | jmp .Ldec_u_loop | |
600 | ||
601 | /* store the state: */ | |
602 | .Ldec_out_0: | |
603 | movdqu STATE4, 0x00(STATEP) | |
604 | movdqu STATE0, 0x10(STATEP) | |
605 | movdqu STATE1, 0x20(STATEP) | |
606 | movdqu STATE2, 0x30(STATEP) | |
607 | movdqu STATE3, 0x40(STATEP) | |
608 | FRAME_END | |
609 | ret | |
610 | ||
611 | .Ldec_out_1: | |
612 | movdqu STATE3, 0x00(STATEP) | |
613 | movdqu STATE4, 0x10(STATEP) | |
614 | movdqu STATE0, 0x20(STATEP) | |
615 | movdqu STATE1, 0x30(STATEP) | |
616 | movdqu STATE2, 0x40(STATEP) | |
617 | FRAME_END | |
618 | ret | |
619 | ||
620 | .Ldec_out_2: | |
621 | movdqu STATE2, 0x00(STATEP) | |
622 | movdqu STATE3, 0x10(STATEP) | |
623 | movdqu STATE4, 0x20(STATEP) | |
624 | movdqu STATE0, 0x30(STATEP) | |
625 | movdqu STATE1, 0x40(STATEP) | |
626 | FRAME_END | |
627 | ret | |
628 | ||
629 | .Ldec_out_3: | |
630 | movdqu STATE1, 0x00(STATEP) | |
631 | movdqu STATE2, 0x10(STATEP) | |
632 | movdqu STATE3, 0x20(STATEP) | |
633 | movdqu STATE4, 0x30(STATEP) | |
634 | movdqu STATE0, 0x40(STATEP) | |
635 | FRAME_END | |
636 | ret | |
637 | ||
638 | .Ldec_out_4: | |
639 | movdqu STATE0, 0x00(STATEP) | |
640 | movdqu STATE1, 0x10(STATEP) | |
641 | movdqu STATE2, 0x20(STATEP) | |
642 | movdqu STATE3, 0x30(STATEP) | |
643 | movdqu STATE4, 0x40(STATEP) | |
644 | FRAME_END | |
645 | ret | |
646 | ||
647 | .Ldec_out: | |
648 | FRAME_END | |
649 | ret | |
650 | ENDPROC(crypto_aegis128_aesni_dec) | |
651 | ||
652 | /* | |
653 | * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, | |
654 | * const void *src, void *dst); | |
655 | */ | |
656 | ENTRY(crypto_aegis128_aesni_dec_tail) | |
657 | FRAME_BEGIN | |
658 | ||
659 | /* load the state: */ | |
660 | movdqu 0x00(STATEP), STATE0 | |
661 | movdqu 0x10(STATEP), STATE1 | |
662 | movdqu 0x20(STATEP), STATE2 | |
663 | movdqu 0x30(STATEP), STATE3 | |
664 | movdqu 0x40(STATEP), STATE4 | |
665 | ||
666 | /* decrypt message: */ | |
667 | call __load_partial | |
668 | ||
669 | pxor STATE1, MSG | |
670 | pxor STATE4, MSG | |
671 | movdqa STATE2, T1 | |
672 | pand STATE3, T1 | |
673 | pxor T1, MSG | |
674 | ||
675 | movdqa MSG, T0 | |
676 | call __store_partial | |
677 | ||
678 | /* mask with byte count: */ | |
679 | movq LEN, T0 | |
680 | punpcklbw T0, T0 | |
681 | punpcklbw T0, T0 | |
682 | punpcklbw T0, T0 | |
683 | punpcklbw T0, T0 | |
684 | movdqa .Laegis128_counter, T1 | |
685 | pcmpgtb T1, T0 | |
686 | pand T0, MSG | |
687 | ||
688 | aegis128_update | |
689 | pxor MSG, STATE4 | |
690 | ||
691 | /* store the state: */ | |
692 | movdqu STATE4, 0x00(STATEP) | |
693 | movdqu STATE0, 0x10(STATEP) | |
694 | movdqu STATE1, 0x20(STATEP) | |
695 | movdqu STATE2, 0x30(STATEP) | |
696 | movdqu STATE3, 0x40(STATEP) | |
697 | ||
698 | FRAME_END | |
699 | ret | |
700 | ENDPROC(crypto_aegis128_aesni_dec_tail) | |
701 | ||
702 | /* | |
703 | * void crypto_aegis128_aesni_final(void *state, void *tag_xor, | |
704 | * u64 assoclen, u64 cryptlen); | |
705 | */ | |
706 | ENTRY(crypto_aegis128_aesni_final) | |
707 | FRAME_BEGIN | |
708 | ||
709 | /* load the state: */ | |
710 | movdqu 0x00(STATEP), STATE0 | |
711 | movdqu 0x10(STATEP), STATE1 | |
712 | movdqu 0x20(STATEP), STATE2 | |
713 | movdqu 0x30(STATEP), STATE3 | |
714 | movdqu 0x40(STATEP), STATE4 | |
715 | ||
716 | /* prepare length block: */ | |
717 | movq %rdx, MSG | |
718 | movq %rcx, T0 | |
719 | pslldq $8, T0 | |
720 | pxor T0, MSG | |
721 | psllq $3, MSG /* multiply by 8 (to get bit count) */ | |
722 | ||
723 | pxor STATE3, MSG | |
724 | ||
725 | /* update state: */ | |
726 | aegis128_update; pxor MSG, STATE4 | |
727 | aegis128_update; pxor MSG, STATE3 | |
728 | aegis128_update; pxor MSG, STATE2 | |
729 | aegis128_update; pxor MSG, STATE1 | |
730 | aegis128_update; pxor MSG, STATE0 | |
731 | aegis128_update; pxor MSG, STATE4 | |
732 | aegis128_update; pxor MSG, STATE3 | |
733 | ||
734 | /* xor tag: */ | |
735 | movdqu (%rsi), MSG | |
736 | ||
737 | pxor STATE0, MSG | |
738 | pxor STATE1, MSG | |
739 | pxor STATE2, MSG | |
740 | pxor STATE3, MSG | |
741 | pxor STATE4, MSG | |
742 | ||
743 | movdqu MSG, (%rsi) | |
744 | ||
745 | FRAME_END | |
746 | ret | |
747 | ENDPROC(crypto_aegis128_aesni_final) |