Commit | Line | Data |
---|---|---|
90be188b | 1 | /* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */ |
22cddcc7 | 2 | /* |
90be188b | 3 | * AES CTR mode by8 optimization with AVX instructions. (x86_64) |
22cddcc7 | 4 | * |
5 | * Copyright(c) 2014 Intel Corporation. | |
6 | * | |
22cddcc7 | 7 | * Contact Information: |
8 | * James Guilford <james.guilford@intel.com> | |
9 | * Sean Gulley <sean.m.gulley@intel.com> | |
10 | * Chandramouli Narayanan <mouli@linux.intel.com> | |
90be188b NH |
11 | */ |
12 | /* | |
13 | * This is AES128/192/256 CTR mode optimization implementation. It requires | |
14 | * the support of Intel(R) AESNI and AVX instructions. | |
22cddcc7 | 15 | * |
90be188b NH |
16 | * This work was inspired by the AES CTR mode optimization published |
17 | * in Intel Optimized IPSEC Cryptographic library. | |
18 | * Additional information on it can be found at: | |
19 | * https://github.com/intel/intel-ipsec-mb | |
22cddcc7 | 20 | */ |
21 | ||
22 | #include <linux/linkage.h> | |
22cddcc7 | 23 | |
22cddcc7 | 24 | #define VMOVDQ vmovdqu |
25 | ||
fd94fcf0 NH |
26 | /* |
27 | * Note: the "x" prefix in these aliases means "this is an xmm register". The | |
28 | * alias prefixes have no relation to XCTR where the "X" prefix means "XOR | |
29 | * counter". | |
30 | */ | |
22cddcc7 | 31 | #define xdata0 %xmm0 |
32 | #define xdata1 %xmm1 | |
33 | #define xdata2 %xmm2 | |
34 | #define xdata3 %xmm3 | |
35 | #define xdata4 %xmm4 | |
36 | #define xdata5 %xmm5 | |
37 | #define xdata6 %xmm6 | |
38 | #define xdata7 %xmm7 | |
fd94fcf0 NH |
39 | #define xcounter %xmm8 // CTR mode only |
40 | #define xiv %xmm8 // XCTR mode only | |
41 | #define xbyteswap %xmm9 // CTR mode only | |
42 | #define xtmp %xmm9 // XCTR mode only | |
22cddcc7 | 43 | #define xkey0 %xmm10 |
22cddcc7 | 44 | #define xkey4 %xmm11 |
45 | #define xkey8 %xmm12 | |
46 | #define xkey12 %xmm13 | |
47 | #define xkeyA %xmm14 | |
48 | #define xkeyB %xmm15 | |
49 | ||
50 | #define p_in %rdi | |
51 | #define p_iv %rsi | |
52 | #define p_keys %rdx | |
53 | #define p_out %rcx | |
54 | #define num_bytes %r8 | |
fd94fcf0 | 55 | #define counter %r9 // XCTR mode only |
22cddcc7 | 56 | #define tmp %r10 |
22cddcc7 | 57 | #define DDQ_DATA 0 |
58 | #define XDATA 1 | |
59 | #define KEY_128 1 | |
60 | #define KEY_192 2 | |
61 | #define KEY_256 3 | |
62 | ||
63 | .section .rodata | |
64 | .align 16 | |
65 | ||
66 | byteswap_const: | |
67 | .octa 0x000102030405060708090A0B0C0D0E0F | |
80dca473 MK |
68 | ddq_low_msk: |
69 | .octa 0x0000000000000000FFFFFFFFFFFFFFFF | |
70 | ddq_high_add_1: | |
71 | .octa 0x00000000000000010000000000000000 | |
22cddcc7 | 72 | ddq_add_1: |
73 | .octa 0x00000000000000000000000000000001 | |
74 | ddq_add_2: | |
75 | .octa 0x00000000000000000000000000000002 | |
76 | ddq_add_3: | |
77 | .octa 0x00000000000000000000000000000003 | |
78 | ddq_add_4: | |
79 | .octa 0x00000000000000000000000000000004 | |
80 | ddq_add_5: | |
81 | .octa 0x00000000000000000000000000000005 | |
82 | ddq_add_6: | |
83 | .octa 0x00000000000000000000000000000006 | |
84 | ddq_add_7: | |
85 | .octa 0x00000000000000000000000000000007 | |
86 | ddq_add_8: | |
87 | .octa 0x00000000000000000000000000000008 | |
88 | ||
89 | .text | |
90 | ||
91 | /* generate a unique variable for ddq_add_x */ | |
92 | ||
22cddcc7 | 93 | /* generate a unique variable for xmm register */ |
94 | .macro setxdata n | |
fdb2726f | 95 | var_xdata = %xmm\n |
22cddcc7 | 96 | .endm |
97 | ||
98 | /* club the numeric 'id' to the symbol 'name' */ | |
99 | ||
100 | .macro club name, id | |
101 | .altmacro | |
44069737 | 102 | .if \name == XDATA |
22cddcc7 | 103 | setxdata %\id |
104 | .endif | |
105 | .noaltmacro | |
106 | .endm | |
107 | ||
108 | /* | |
109 | * do_aes num_in_par load_keys key_len | |
110 | * This increments p_in, but not p_out | |
111 | */ | |
fd94fcf0 | 112 | .macro do_aes b, k, key_len, xctr |
22cddcc7 | 113 | .set by, \b |
114 | .set load_keys, \k | |
115 | .set klen, \key_len | |
116 | ||
117 | .if (load_keys) | |
118 | vmovdqa 0*16(p_keys), xkey0 | |
119 | .endif | |
120 | ||
fd94fcf0 NH |
121 | .if \xctr |
122 | movq counter, xtmp | |
123 | .set i, 0 | |
124 | .rept (by) | |
125 | club XDATA, i | |
126 | vpaddq (ddq_add_1 + 16 * i)(%rip), xtmp, var_xdata | |
127 | .set i, (i +1) | |
128 | .endr | |
129 | .set i, 0 | |
130 | .rept (by) | |
131 | club XDATA, i | |
132 | vpxor xiv, var_xdata, var_xdata | |
133 | .set i, (i +1) | |
134 | .endr | |
135 | .else | |
136 | vpshufb xbyteswap, xcounter, xdata0 | |
137 | .set i, 1 | |
138 | .rept (by - 1) | |
139 | club XDATA, i | |
140 | vpaddq (ddq_add_1 + 16 * (i - 1))(%rip), xcounter, var_xdata | |
141 | vptest ddq_low_msk(%rip), var_xdata | |
142 | jnz 1f | |
143 | vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata | |
144 | vpaddq ddq_high_add_1(%rip), xcounter, xcounter | |
145 | 1: | |
146 | vpshufb xbyteswap, var_xdata, var_xdata | |
147 | .set i, (i +1) | |
148 | .endr | |
149 | .endif | |
22cddcc7 | 150 | |
151 | vmovdqa 1*16(p_keys), xkeyA | |
152 | ||
153 | vpxor xkey0, xdata0, xdata0 | |
fd94fcf0 NH |
154 | .if \xctr |
155 | add $by, counter | |
156 | .else | |
157 | vpaddq (ddq_add_1 + 16 * (by - 1))(%rip), xcounter, xcounter | |
158 | vptest ddq_low_msk(%rip), xcounter | |
159 | jnz 1f | |
160 | vpaddq ddq_high_add_1(%rip), xcounter, xcounter | |
161 | 1: | |
162 | .endif | |
22cddcc7 | 163 | |
164 | .set i, 1 | |
165 | .rept (by - 1) | |
166 | club XDATA, i | |
167 | vpxor xkey0, var_xdata, var_xdata | |
168 | .set i, (i +1) | |
169 | .endr | |
170 | ||
171 | vmovdqa 2*16(p_keys), xkeyB | |
172 | ||
173 | .set i, 0 | |
174 | .rept by | |
175 | club XDATA, i | |
176 | vaesenc xkeyA, var_xdata, var_xdata /* key 1 */ | |
177 | .set i, (i +1) | |
178 | .endr | |
179 | ||
180 | .if (klen == KEY_128) | |
181 | .if (load_keys) | |
0b1e95b2 | 182 | vmovdqa 3*16(p_keys), xkey4 |
22cddcc7 | 183 | .endif |
184 | .else | |
185 | vmovdqa 3*16(p_keys), xkeyA | |
186 | .endif | |
187 | ||
188 | .set i, 0 | |
189 | .rept by | |
190 | club XDATA, i | |
191 | vaesenc xkeyB, var_xdata, var_xdata /* key 2 */ | |
192 | .set i, (i +1) | |
193 | .endr | |
194 | ||
195 | add $(16*by), p_in | |
196 | ||
197 | .if (klen == KEY_128) | |
0b1e95b2 | 198 | vmovdqa 4*16(p_keys), xkeyB |
22cddcc7 | 199 | .else |
200 | .if (load_keys) | |
201 | vmovdqa 4*16(p_keys), xkey4 | |
202 | .endif | |
203 | .endif | |
204 | ||
205 | .set i, 0 | |
206 | .rept by | |
207 | club XDATA, i | |
0b1e95b2 MK |
208 | /* key 3 */ |
209 | .if (klen == KEY_128) | |
210 | vaesenc xkey4, var_xdata, var_xdata | |
211 | .else | |
212 | vaesenc xkeyA, var_xdata, var_xdata | |
213 | .endif | |
22cddcc7 | 214 | .set i, (i +1) |
215 | .endr | |
216 | ||
217 | vmovdqa 5*16(p_keys), xkeyA | |
218 | ||
219 | .set i, 0 | |
220 | .rept by | |
221 | club XDATA, i | |
0b1e95b2 MK |
222 | /* key 4 */ |
223 | .if (klen == KEY_128) | |
224 | vaesenc xkeyB, var_xdata, var_xdata | |
225 | .else | |
226 | vaesenc xkey4, var_xdata, var_xdata | |
227 | .endif | |
22cddcc7 | 228 | .set i, (i +1) |
229 | .endr | |
230 | ||
231 | .if (klen == KEY_128) | |
232 | .if (load_keys) | |
0b1e95b2 | 233 | vmovdqa 6*16(p_keys), xkey8 |
22cddcc7 | 234 | .endif |
235 | .else | |
236 | vmovdqa 6*16(p_keys), xkeyB | |
237 | .endif | |
238 | ||
239 | .set i, 0 | |
240 | .rept by | |
241 | club XDATA, i | |
242 | vaesenc xkeyA, var_xdata, var_xdata /* key 5 */ | |
243 | .set i, (i +1) | |
244 | .endr | |
245 | ||
246 | vmovdqa 7*16(p_keys), xkeyA | |
247 | ||
248 | .set i, 0 | |
249 | .rept by | |
250 | club XDATA, i | |
0b1e95b2 MK |
251 | /* key 6 */ |
252 | .if (klen == KEY_128) | |
253 | vaesenc xkey8, var_xdata, var_xdata | |
254 | .else | |
255 | vaesenc xkeyB, var_xdata, var_xdata | |
256 | .endif | |
22cddcc7 | 257 | .set i, (i +1) |
258 | .endr | |
259 | ||
260 | .if (klen == KEY_128) | |
0b1e95b2 | 261 | vmovdqa 8*16(p_keys), xkeyB |
22cddcc7 | 262 | .else |
263 | .if (load_keys) | |
264 | vmovdqa 8*16(p_keys), xkey8 | |
265 | .endif | |
266 | .endif | |
267 | ||
268 | .set i, 0 | |
269 | .rept by | |
270 | club XDATA, i | |
271 | vaesenc xkeyA, var_xdata, var_xdata /* key 7 */ | |
272 | .set i, (i +1) | |
273 | .endr | |
274 | ||
275 | .if (klen == KEY_128) | |
276 | .if (load_keys) | |
0b1e95b2 | 277 | vmovdqa 9*16(p_keys), xkey12 |
22cddcc7 | 278 | .endif |
279 | .else | |
280 | vmovdqa 9*16(p_keys), xkeyA | |
281 | .endif | |
282 | ||
283 | .set i, 0 | |
284 | .rept by | |
285 | club XDATA, i | |
0b1e95b2 MK |
286 | /* key 8 */ |
287 | .if (klen == KEY_128) | |
288 | vaesenc xkeyB, var_xdata, var_xdata | |
289 | .else | |
290 | vaesenc xkey8, var_xdata, var_xdata | |
291 | .endif | |
22cddcc7 | 292 | .set i, (i +1) |
293 | .endr | |
294 | ||
295 | vmovdqa 10*16(p_keys), xkeyB | |
296 | ||
297 | .set i, 0 | |
298 | .rept by | |
299 | club XDATA, i | |
0b1e95b2 MK |
300 | /* key 9 */ |
301 | .if (klen == KEY_128) | |
302 | vaesenc xkey12, var_xdata, var_xdata | |
303 | .else | |
304 | vaesenc xkeyA, var_xdata, var_xdata | |
305 | .endif | |
22cddcc7 | 306 | .set i, (i +1) |
307 | .endr | |
308 | ||
309 | .if (klen != KEY_128) | |
310 | vmovdqa 11*16(p_keys), xkeyA | |
311 | .endif | |
312 | ||
313 | .set i, 0 | |
314 | .rept by | |
315 | club XDATA, i | |
316 | /* key 10 */ | |
317 | .if (klen == KEY_128) | |
318 | vaesenclast xkeyB, var_xdata, var_xdata | |
319 | .else | |
320 | vaesenc xkeyB, var_xdata, var_xdata | |
321 | .endif | |
322 | .set i, (i +1) | |
323 | .endr | |
324 | ||
325 | .if (klen != KEY_128) | |
326 | .if (load_keys) | |
327 | vmovdqa 12*16(p_keys), xkey12 | |
328 | .endif | |
329 | ||
330 | .set i, 0 | |
331 | .rept by | |
332 | club XDATA, i | |
333 | vaesenc xkeyA, var_xdata, var_xdata /* key 11 */ | |
334 | .set i, (i +1) | |
335 | .endr | |
336 | ||
337 | .if (klen == KEY_256) | |
338 | vmovdqa 13*16(p_keys), xkeyA | |
339 | .endif | |
340 | ||
341 | .set i, 0 | |
342 | .rept by | |
343 | club XDATA, i | |
344 | .if (klen == KEY_256) | |
345 | /* key 12 */ | |
346 | vaesenc xkey12, var_xdata, var_xdata | |
347 | .else | |
348 | vaesenclast xkey12, var_xdata, var_xdata | |
349 | .endif | |
350 | .set i, (i +1) | |
351 | .endr | |
352 | ||
353 | .if (klen == KEY_256) | |
354 | vmovdqa 14*16(p_keys), xkeyB | |
355 | ||
356 | .set i, 0 | |
357 | .rept by | |
358 | club XDATA, i | |
359 | /* key 13 */ | |
360 | vaesenc xkeyA, var_xdata, var_xdata | |
361 | .set i, (i +1) | |
362 | .endr | |
363 | ||
364 | .set i, 0 | |
365 | .rept by | |
366 | club XDATA, i | |
367 | /* key 14 */ | |
368 | vaesenclast xkeyB, var_xdata, var_xdata | |
369 | .set i, (i +1) | |
370 | .endr | |
371 | .endif | |
372 | .endif | |
373 | ||
374 | .set i, 0 | |
375 | .rept (by / 2) | |
376 | .set j, (i+1) | |
377 | VMOVDQ (i*16 - 16*by)(p_in), xkeyA | |
378 | VMOVDQ (j*16 - 16*by)(p_in), xkeyB | |
379 | club XDATA, i | |
380 | vpxor xkeyA, var_xdata, var_xdata | |
381 | club XDATA, j | |
382 | vpxor xkeyB, var_xdata, var_xdata | |
383 | .set i, (i+2) | |
384 | .endr | |
385 | ||
386 | .if (i < by) | |
387 | VMOVDQ (i*16 - 16*by)(p_in), xkeyA | |
388 | club XDATA, i | |
389 | vpxor xkeyA, var_xdata, var_xdata | |
390 | .endif | |
391 | ||
392 | .set i, 0 | |
393 | .rept by | |
394 | club XDATA, i | |
395 | VMOVDQ var_xdata, i*16(p_out) | |
396 | .set i, (i+1) | |
397 | .endr | |
398 | .endm | |
399 | ||
fd94fcf0 NH |
400 | .macro do_aes_load val, key_len, xctr |
401 | do_aes \val, 1, \key_len, \xctr | |
22cddcc7 | 402 | .endm |
403 | ||
fd94fcf0 NH |
404 | .macro do_aes_noload val, key_len, xctr |
405 | do_aes \val, 0, \key_len, \xctr | |
22cddcc7 | 406 | .endm |
407 | ||
408 | /* main body of aes ctr load */ | |
409 | ||
fd94fcf0 | 410 | .macro do_aes_ctrmain key_len, xctr |
22cddcc7 | 411 | cmp $16, num_bytes |
fd94fcf0 | 412 | jb .Ldo_return2\xctr\key_len |
22cddcc7 | 413 | |
fd94fcf0 NH |
414 | .if \xctr |
415 | shr $4, counter | |
416 | vmovdqu (p_iv), xiv | |
417 | .else | |
418 | vmovdqa byteswap_const(%rip), xbyteswap | |
419 | vmovdqu (p_iv), xcounter | |
420 | vpshufb xbyteswap, xcounter, xcounter | |
421 | .endif | |
22cddcc7 | 422 | |
423 | mov num_bytes, tmp | |
424 | and $(7*16), tmp | |
fd94fcf0 | 425 | jz .Lmult_of_8_blks\xctr\key_len |
22cddcc7 | 426 | |
427 | /* 1 <= tmp <= 7 */ | |
428 | cmp $(4*16), tmp | |
fd94fcf0 NH |
429 | jg .Lgt4\xctr\key_len |
430 | je .Leq4\xctr\key_len | |
22cddcc7 | 431 | |
fd94fcf0 | 432 | .Llt4\xctr\key_len: |
22cddcc7 | 433 | cmp $(2*16), tmp |
fd94fcf0 NH |
434 | jg .Leq3\xctr\key_len |
435 | je .Leq2\xctr\key_len | |
22cddcc7 | 436 | |
fd94fcf0 NH |
437 | .Leq1\xctr\key_len: |
438 | do_aes_load 1, \key_len, \xctr | |
22cddcc7 | 439 | add $(1*16), p_out |
440 | and $(~7*16), num_bytes | |
fd94fcf0 NH |
441 | jz .Ldo_return2\xctr\key_len |
442 | jmp .Lmain_loop2\xctr\key_len | |
22cddcc7 | 443 | |
fd94fcf0 NH |
444 | .Leq2\xctr\key_len: |
445 | do_aes_load 2, \key_len, \xctr | |
22cddcc7 | 446 | add $(2*16), p_out |
447 | and $(~7*16), num_bytes | |
fd94fcf0 NH |
448 | jz .Ldo_return2\xctr\key_len |
449 | jmp .Lmain_loop2\xctr\key_len | |
22cddcc7 | 450 | |
451 | ||
fd94fcf0 NH |
452 | .Leq3\xctr\key_len: |
453 | do_aes_load 3, \key_len, \xctr | |
22cddcc7 | 454 | add $(3*16), p_out |
455 | and $(~7*16), num_bytes | |
fd94fcf0 NH |
456 | jz .Ldo_return2\xctr\key_len |
457 | jmp .Lmain_loop2\xctr\key_len | |
22cddcc7 | 458 | |
fd94fcf0 NH |
459 | .Leq4\xctr\key_len: |
460 | do_aes_load 4, \key_len, \xctr | |
22cddcc7 | 461 | add $(4*16), p_out |
462 | and $(~7*16), num_bytes | |
fd94fcf0 NH |
463 | jz .Ldo_return2\xctr\key_len |
464 | jmp .Lmain_loop2\xctr\key_len | |
22cddcc7 | 465 | |
fd94fcf0 | 466 | .Lgt4\xctr\key_len: |
22cddcc7 | 467 | cmp $(6*16), tmp |
fd94fcf0 NH |
468 | jg .Leq7\xctr\key_len |
469 | je .Leq6\xctr\key_len | |
22cddcc7 | 470 | |
fd94fcf0 NH |
471 | .Leq5\xctr\key_len: |
472 | do_aes_load 5, \key_len, \xctr | |
22cddcc7 | 473 | add $(5*16), p_out |
474 | and $(~7*16), num_bytes | |
fd94fcf0 NH |
475 | jz .Ldo_return2\xctr\key_len |
476 | jmp .Lmain_loop2\xctr\key_len | |
22cddcc7 | 477 | |
fd94fcf0 NH |
478 | .Leq6\xctr\key_len: |
479 | do_aes_load 6, \key_len, \xctr | |
22cddcc7 | 480 | add $(6*16), p_out |
481 | and $(~7*16), num_bytes | |
fd94fcf0 NH |
482 | jz .Ldo_return2\xctr\key_len |
483 | jmp .Lmain_loop2\xctr\key_len | |
22cddcc7 | 484 | |
fd94fcf0 NH |
485 | .Leq7\xctr\key_len: |
486 | do_aes_load 7, \key_len, \xctr | |
22cddcc7 | 487 | add $(7*16), p_out |
488 | and $(~7*16), num_bytes | |
fd94fcf0 NH |
489 | jz .Ldo_return2\xctr\key_len |
490 | jmp .Lmain_loop2\xctr\key_len | |
22cddcc7 | 491 | |
fd94fcf0 | 492 | .Lmult_of_8_blks\xctr\key_len: |
22cddcc7 | 493 | .if (\key_len != KEY_128) |
494 | vmovdqa 0*16(p_keys), xkey0 | |
495 | vmovdqa 4*16(p_keys), xkey4 | |
496 | vmovdqa 8*16(p_keys), xkey8 | |
497 | vmovdqa 12*16(p_keys), xkey12 | |
498 | .else | |
499 | vmovdqa 0*16(p_keys), xkey0 | |
500 | vmovdqa 3*16(p_keys), xkey4 | |
501 | vmovdqa 6*16(p_keys), xkey8 | |
502 | vmovdqa 9*16(p_keys), xkey12 | |
503 | .endif | |
504 | .align 16 | |
fd94fcf0 | 505 | .Lmain_loop2\xctr\key_len: |
22cddcc7 | 506 | /* num_bytes is a multiple of 8 and >0 */ |
fd94fcf0 | 507 | do_aes_noload 8, \key_len, \xctr |
22cddcc7 | 508 | add $(8*16), p_out |
509 | sub $(8*16), num_bytes | |
fd94fcf0 | 510 | jne .Lmain_loop2\xctr\key_len |
22cddcc7 | 511 | |
fd94fcf0 NH |
512 | .Ldo_return2\xctr\key_len: |
513 | .if !\xctr | |
514 | /* return updated IV */ | |
515 | vpshufb xbyteswap, xcounter, xcounter | |
516 | vmovdqu xcounter, (p_iv) | |
517 | .endif | |
f94909ce | 518 | RET |
22cddcc7 | 519 | .endm |
520 | ||
521 | /* | |
522 | * routine to do AES128 CTR enc/decrypt "by8" | |
523 | * XMM registers are clobbered. | |
524 | * Saving/restoring must be done at a higher level | |
525 | * aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out, | |
526 | * unsigned int num_bytes) | |
527 | */ | |
6dcc5627 | 528 | SYM_FUNC_START(aes_ctr_enc_128_avx_by8) |
22cddcc7 | 529 | /* call the aes main loop */ |
fd94fcf0 | 530 | do_aes_ctrmain KEY_128 0 |
22cddcc7 | 531 | |
6dcc5627 | 532 | SYM_FUNC_END(aes_ctr_enc_128_avx_by8) |
22cddcc7 | 533 | |
534 | /* | |
535 | * routine to do AES192 CTR enc/decrypt "by8" | |
536 | * XMM registers are clobbered. | |
537 | * Saving/restoring must be done at a higher level | |
538 | * aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out, | |
539 | * unsigned int num_bytes) | |
540 | */ | |
6dcc5627 | 541 | SYM_FUNC_START(aes_ctr_enc_192_avx_by8) |
22cddcc7 | 542 | /* call the aes main loop */ |
fd94fcf0 | 543 | do_aes_ctrmain KEY_192 0 |
22cddcc7 | 544 | |
6dcc5627 | 545 | SYM_FUNC_END(aes_ctr_enc_192_avx_by8) |
22cddcc7 | 546 | |
547 | /* | |
548 | * routine to do AES256 CTR enc/decrypt "by8" | |
549 | * XMM registers are clobbered. | |
550 | * Saving/restoring must be done at a higher level | |
551 | * aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out, | |
552 | * unsigned int num_bytes) | |
553 | */ | |
6dcc5627 | 554 | SYM_FUNC_START(aes_ctr_enc_256_avx_by8) |
22cddcc7 | 555 | /* call the aes main loop */ |
fd94fcf0 | 556 | do_aes_ctrmain KEY_256 0 |
22cddcc7 | 557 | |
6dcc5627 | 558 | SYM_FUNC_END(aes_ctr_enc_256_avx_by8) |
fd94fcf0 NH |
559 | |
560 | /* | |
561 | * routine to do AES128 XCTR enc/decrypt "by8" | |
562 | * XMM registers are clobbered. | |
563 | * Saving/restoring must be done at a higher level | |
564 | * aes_xctr_enc_128_avx_by8(const u8 *in, const u8 *iv, const void *keys, | |
565 | * u8* out, unsigned int num_bytes, unsigned int byte_ctr) | |
566 | */ | |
567 | SYM_FUNC_START(aes_xctr_enc_128_avx_by8) | |
568 | /* call the aes main loop */ | |
569 | do_aes_ctrmain KEY_128 1 | |
570 | ||
571 | SYM_FUNC_END(aes_xctr_enc_128_avx_by8) | |
572 | ||
573 | /* | |
574 | * routine to do AES192 XCTR enc/decrypt "by8" | |
575 | * XMM registers are clobbered. | |
576 | * Saving/restoring must be done at a higher level | |
577 | * aes_xctr_enc_192_avx_by8(const u8 *in, const u8 *iv, const void *keys, | |
578 | * u8* out, unsigned int num_bytes, unsigned int byte_ctr) | |
579 | */ | |
580 | SYM_FUNC_START(aes_xctr_enc_192_avx_by8) | |
581 | /* call the aes main loop */ | |
582 | do_aes_ctrmain KEY_192 1 | |
583 | ||
584 | SYM_FUNC_END(aes_xctr_enc_192_avx_by8) | |
585 | ||
586 | /* | |
587 | * routine to do AES256 XCTR enc/decrypt "by8" | |
588 | * XMM registers are clobbered. | |
589 | * Saving/restoring must be done at a higher level | |
590 | * aes_xctr_enc_256_avx_by8(const u8 *in, const u8 *iv, const void *keys, | |
591 | * u8* out, unsigned int num_bytes, unsigned int byte_ctr) | |
592 | */ | |
593 | SYM_FUNC_START(aes_xctr_enc_256_avx_by8) | |
594 | /* call the aes main loop */ | |
595 | do_aes_ctrmain KEY_256 1 | |
596 | ||
597 | SYM_FUNC_END(aes_xctr_enc_256_avx_by8) |