2 # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
4 # Licensed under the OpenSSL license (the "License"). You may not use
5 # this file except in compliance with the License. You can obtain a copy
6 # in the file LICENSE in the source distribution or at
7 # https://www.openssl.org/source/license.html
10 # ====================================================================
11 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12 # project. The module is, however, dual licensed under OpenSSL and
13 # CRYPTOGAMS licenses depending on where you obtain it. For further
14 # details see http://www.openssl.org/~appro/cryptogams/.
15 # ====================================================================
17 # This module implements support for AES instructions as per PowerISA
18 # specification version 2.07, first implemented by POWER8 processor.
19 # The module is endian-agnostic in sense that it supports both big-
20 # and little-endian cases. Data alignment in parallelizable modes is
21 # handled with VSX loads and stores, which implies MSR.VSX flag being
22 # set. It should also be noted that ISA specification doesn't prohibit
23 # alignment exceptions for these instructions on page boundaries.
24 # Initially alignment was handled in pure AltiVec/VMX way [when data
25 # is aligned programmatically, which in turn guarantees exception-
26 # free execution], but it turned to hamper performance when vcipher
27 # instructions are interleaved. It's reckoned that eventual
28 # misalignment penalties at page boundaries are in average lower
29 # than additional overhead in pure AltiVec approach.
33 # Add XTS subroutine, 9x on little- and 12x improvement on big-endian
34 # systems were measured.
36 ######################################################################
37 # Current large-block performance in cycles per byte processed with
38 # 128-bit key (less is better).
40 # CBC en-/decrypt CTR XTS
41 # POWER8[le] 3.96/0.72 0.74 1.1
42 # POWER8[be] 3.75/0.65 0.66 1.0
46 if ($flavour =~ /64/) {
54 } elsif ($flavour =~ /32/) {
62 } else { die "nonsense $flavour"; }
64 $LITTLE_ENDIAN = ($flavour=~/le$/) ? $SIZE_T : 0;
66 $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
67 ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
68 ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
69 die "can't locate ppc-xlate.pl";
71 open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
79 #########################################################################
80 {{{ # Key setup procedures #
81 my ($inp,$bits,$out,$ptr,$cnt,$rounds)=map("r$_",(3..8));
82 my ($zero,$in0,$in1,$key,$rcon,$mask,$tmp)=map("v$_",(0..6));
83 my ($stage,$outperm,$outmask,$outhead,$outtail)=map("v$_",(7..11));
92 .long 0x01000000, 0x01000000, 0x01000000, 0x01000000 ?rev
93 .long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
94 .long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
99 mflr $ptr #vvvvv "distance between . and rcon
104 .byte 0,12,0x14,0,0,0,0,0
105 .asciz "AES for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
107 .globl .${prefix}_set_encrypt_key
110 $PUSH r11,$LRSAVE($sp)
114 beq- Lenc_key_abort # if ($inp==0) return -1;
116 beq- Lenc_key_abort # if ($out==0) return -1;
134 addi $inp,$inp,15 # 15 is not typo
135 lvsr $key,0,r9 # borrow $key
139 le?vspltisb $mask,0x0f # borrow $mask
141 le?vxor $key,$key,$mask # adjust for byte swap
144 vperm $in0,$in0,$in1,$key # align [and byte swap in LE]
146 vxor $zero,$zero,$zero
149 ?lvsr $outperm,0,$out
152 ?vperm $outmask,$zero,$outmask,$outperm
162 vperm $key,$in0,$in0,$mask # rotate-n-splat
163 vsldoi $tmp,$zero,$in0,12 # >>32
164 vperm $outtail,$in0,$in0,$outperm # rotate
165 vsel $stage,$outhead,$outtail,$outmask
166 vmr $outhead,$outtail
167 vcipherlast $key,$key,$rcon
172 vsldoi $tmp,$zero,$tmp,12 # >>32
174 vsldoi $tmp,$zero,$tmp,12 # >>32
176 vadduwm $rcon,$rcon,$rcon
180 lvx $rcon,0,$ptr # last two round keys
182 vperm $key,$in0,$in0,$mask # rotate-n-splat
183 vsldoi $tmp,$zero,$in0,12 # >>32
184 vperm $outtail,$in0,$in0,$outperm # rotate
185 vsel $stage,$outhead,$outtail,$outmask
186 vmr $outhead,$outtail
187 vcipherlast $key,$key,$rcon
192 vsldoi $tmp,$zero,$tmp,12 # >>32
194 vsldoi $tmp,$zero,$tmp,12 # >>32
196 vadduwm $rcon,$rcon,$rcon
199 vperm $key,$in0,$in0,$mask # rotate-n-splat
200 vsldoi $tmp,$zero,$in0,12 # >>32
201 vperm $outtail,$in0,$in0,$outperm # rotate
202 vsel $stage,$outhead,$outtail,$outmask
203 vmr $outhead,$outtail
204 vcipherlast $key,$key,$rcon
209 vsldoi $tmp,$zero,$tmp,12 # >>32
211 vsldoi $tmp,$zero,$tmp,12 # >>32
214 vperm $outtail,$in0,$in0,$outperm # rotate
215 vsel $stage,$outhead,$outtail,$outmask
216 vmr $outhead,$outtail
219 addi $inp,$out,15 # 15 is not typo
229 vperm $outtail,$in0,$in0,$outperm # rotate
230 vsel $stage,$outhead,$outtail,$outmask
231 vmr $outhead,$outtail
234 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
235 vspltisb $key,8 # borrow $key
237 vsububm $mask,$mask,$key # adjust the mask
240 vperm $key,$in1,$in1,$mask # roate-n-splat
241 vsldoi $tmp,$zero,$in0,12 # >>32
242 vcipherlast $key,$key,$rcon
245 vsldoi $tmp,$zero,$tmp,12 # >>32
247 vsldoi $tmp,$zero,$tmp,12 # >>32
250 vsldoi $stage,$zero,$in1,8
253 vsldoi $in1,$zero,$in1,12 # >>32
254 vadduwm $rcon,$rcon,$rcon
258 vsldoi $stage,$stage,$in0,8
260 vperm $key,$in1,$in1,$mask # rotate-n-splat
261 vsldoi $tmp,$zero,$in0,12 # >>32
262 vperm $outtail,$stage,$stage,$outperm # rotate
263 vsel $stage,$outhead,$outtail,$outmask
264 vmr $outhead,$outtail
265 vcipherlast $key,$key,$rcon
269 vsldoi $stage,$in0,$in1,8
271 vsldoi $tmp,$zero,$tmp,12 # >>32
272 vperm $outtail,$stage,$stage,$outperm # rotate
273 vsel $stage,$outhead,$outtail,$outmask
274 vmr $outhead,$outtail
276 vsldoi $tmp,$zero,$tmp,12 # >>32
283 vsldoi $in1,$zero,$in1,12 # >>32
284 vadduwm $rcon,$rcon,$rcon
288 vperm $outtail,$in0,$in0,$outperm # rotate
289 vsel $stage,$outhead,$outtail,$outmask
290 vmr $outhead,$outtail
292 addi $inp,$out,15 # 15 is not typo
305 vperm $outtail,$in0,$in0,$outperm # rotate
306 vsel $stage,$outhead,$outtail,$outmask
307 vmr $outhead,$outtail
310 vperm $in1,$in1,$tmp,$key # align [and byte swap in LE]
314 vperm $key,$in1,$in1,$mask # rotate-n-splat
315 vsldoi $tmp,$zero,$in0,12 # >>32
316 vperm $outtail,$in1,$in1,$outperm # rotate
317 vsel $stage,$outhead,$outtail,$outmask
318 vmr $outhead,$outtail
319 vcipherlast $key,$key,$rcon
324 vsldoi $tmp,$zero,$tmp,12 # >>32
326 vsldoi $tmp,$zero,$tmp,12 # >>32
328 vadduwm $rcon,$rcon,$rcon
330 vperm $outtail,$in0,$in0,$outperm # rotate
331 vsel $stage,$outhead,$outtail,$outmask
332 vmr $outhead,$outtail
334 addi $inp,$out,15 # 15 is not typo
338 vspltw $key,$in0,3 # just splat
339 vsldoi $tmp,$zero,$in1,12 # >>32
343 vsldoi $tmp,$zero,$tmp,12 # >>32
345 vsldoi $tmp,$zero,$tmp,12 # >>32
353 lvx $in1,0,$inp # redundant in aligned case
354 vsel $in1,$outhead,$in1,$outmask
364 .byte 0,12,0x14,1,0,0,3,0
366 .size .${prefix}_set_encrypt_key,.-.${prefix}_set_encrypt_key
368 .globl .${prefix}_set_decrypt_key
369 $STU $sp,-$FRAME($sp)
371 $PUSH r10,$FRAME+$LRSAVE($sp)
379 subi $inp,$out,240 # first round key
380 srwi $rounds,$rounds,1
381 add $out,$inp,$cnt # last round key
405 xor r3,r3,r3 # return value
410 .byte 0,12,4,1,0x80,0,3,0
412 .size .${prefix}_set_decrypt_key,.-.${prefix}_set_decrypt_key
415 #########################################################################
416 {{{ # Single block en- and decrypt procedures #
419 my $n = $dir eq "de" ? "n" : "";
420 my ($inp,$out,$key,$rounds,$idx)=map("r$_",(3..7));
423 .globl .${prefix}_${dir}crypt
424 lwz $rounds,240($key)
427 li $idx,15 # 15 is not typo
433 lvsl v2,0,$inp # inpperm
435 ?lvsl v3,0,r11 # outperm
438 vperm v0,v0,v1,v2 # align [and byte swap in LE]
440 ?lvsl v5,0,$key # keyperm
441 srwi $rounds,$rounds,1
444 subi $rounds,$rounds,1
445 ?vperm v1,v1,v2,v5 # align round key
467 v${n}cipherlast v0,v0,v1
471 li $idx,15 # 15 is not typo
472 ?vperm v2,v1,v2,v3 # outmask
474 lvx v1,0,$out # outhead
475 vperm v0,v0,v0,v3 # rotate [and byte swap in LE]
485 .byte 0,12,0x14,0,0,0,3,0
487 .size .${prefix}_${dir}crypt,.-.${prefix}_${dir}crypt
493 #########################################################################
494 {{{ # CBC en- and decrypt procedures #
495 my ($inp,$out,$len,$key,$ivp,$enc,$rounds,$idx)=map("r$_",(3..10));
496 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
497 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm)=
500 .globl .${prefix}_cbc_encrypt
504 cmpwi $enc,0 # test direction
510 vxor $rndkey0,$rndkey0,$rndkey0
511 le?vspltisb $tmp,0x0f
513 lvx $ivec,0,$ivp # load [unaligned] iv
515 lvx $inptail,$idx,$ivp
516 le?vxor $inpperm,$inpperm,$tmp
517 vperm $ivec,$ivec,$inptail,$inpperm
520 ?lvsl $keyperm,0,$key # prepare for unaligned key
521 lwz $rounds,240($key)
523 lvsr $inpperm,0,r11 # prepare for unaligned load
525 addi $inp,$inp,15 # 15 is not typo
526 le?vxor $inpperm,$inpperm,$tmp
528 ?lvsr $outperm,0,$out # prepare for unaligned store
531 ?vperm $outmask,$rndkey0,$outmask,$outperm
532 le?vxor $outperm,$outperm,$tmp
534 srwi $rounds,$rounds,1
536 subi $rounds,$rounds,1
544 subi $len,$len,16 # len-=16
547 vperm $inout,$inout,$inptail,$inpperm
548 lvx $rndkey1,$idx,$key
550 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
551 vxor $inout,$inout,$rndkey0
552 lvx $rndkey0,$idx,$key
554 vxor $inout,$inout,$ivec
557 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
558 vcipher $inout,$inout,$rndkey1
559 lvx $rndkey1,$idx,$key
561 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
562 vcipher $inout,$inout,$rndkey0
563 lvx $rndkey0,$idx,$key
567 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
568 vcipher $inout,$inout,$rndkey1
569 lvx $rndkey1,$idx,$key
571 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
572 vcipherlast $ivec,$inout,$rndkey0
575 vperm $tmp,$ivec,$ivec,$outperm
576 vsel $inout,$outhead,$tmp,$outmask
587 bge _aesp8_cbc_decrypt8x
592 subi $len,$len,16 # len-=16
595 vperm $tmp,$tmp,$inptail,$inpperm
596 lvx $rndkey1,$idx,$key
598 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
599 vxor $inout,$tmp,$rndkey0
600 lvx $rndkey0,$idx,$key
604 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
605 vncipher $inout,$inout,$rndkey1
606 lvx $rndkey1,$idx,$key
608 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
609 vncipher $inout,$inout,$rndkey0
610 lvx $rndkey0,$idx,$key
614 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
615 vncipher $inout,$inout,$rndkey1
616 lvx $rndkey1,$idx,$key
618 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
619 vncipherlast $inout,$inout,$rndkey0
622 vxor $inout,$inout,$ivec
624 vperm $tmp,$inout,$inout,$outperm
625 vsel $inout,$outhead,$tmp,$outmask
633 lvx $inout,0,$out # redundant in aligned case
634 vsel $inout,$outhead,$inout,$outmask
637 neg $enc,$ivp # write [unaligned] iv
638 li $idx,15 # 15 is not typo
639 vxor $rndkey0,$rndkey0,$rndkey0
641 le?vspltisb $tmp,0x0f
642 ?lvsl $outperm,0,$enc
643 ?vperm $outmask,$rndkey0,$outmask,$outperm
644 le?vxor $outperm,$outperm,$tmp
646 vperm $ivec,$ivec,$ivec,$outperm
647 vsel $inout,$outhead,$ivec,$outmask
648 lvx $inptail,$idx,$ivp
650 vsel $inout,$ivec,$inptail,$outmask
651 stvx $inout,$idx,$ivp
656 .byte 0,12,0x14,0,0,0,6,0
659 #########################################################################
660 {{ # Optimized CBC decrypt procedure #
662 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
663 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10..13));
664 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(14..21));
665 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
666 # v26-v31 last 6 round keys
667 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
671 _aesp8_cbc_decrypt8x:
672 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
673 li r10,`$FRAME+8*16+15`
674 li r11,`$FRAME+8*16+31`
675 stvx v20,r10,$sp # ABI says so
698 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
700 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
702 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
704 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
706 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
708 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
710 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
714 subi $rounds,$rounds,3 # -4 in total
715 subi $len,$len,128 # bias
717 lvx $rndkey0,$x00,$key # load key schedule
721 ?vperm $rndkey0,$rndkey0,v30,$keyperm
722 addi $key_,$sp,$FRAME+15
726 ?vperm v24,v30,v31,$keyperm
729 stvx v24,$x00,$key_ # off-load round[1]
730 ?vperm v25,v31,v30,$keyperm
732 stvx v25,$x10,$key_ # off-load round[2]
733 addi $key_,$key_,0x20
734 bdnz Load_cbc_dec_key
737 ?vperm v24,v30,v31,$keyperm
739 stvx v24,$x00,$key_ # off-load round[3]
740 ?vperm v25,v31,v26,$keyperm
742 stvx v25,$x10,$key_ # off-load round[4]
743 addi $key_,$sp,$FRAME+15 # rewind $key_
744 ?vperm v26,v26,v27,$keyperm
746 ?vperm v27,v27,v28,$keyperm
748 ?vperm v28,v28,v29,$keyperm
750 ?vperm v29,v29,v30,$keyperm
751 lvx $out0,$x70,$key # borrow $out0
752 ?vperm v30,v30,v31,$keyperm
753 lvx v24,$x00,$key_ # pre-load round[1]
754 ?vperm v31,v31,$out0,$keyperm
755 lvx v25,$x10,$key_ # pre-load round[2]
757 #lvx $inptail,0,$inp # "caller" already did this
758 #addi $inp,$inp,15 # 15 is not typo
759 subi $inp,$inp,15 # undo "caller"
762 lvx_u $in0,$x00,$inp # load first 8 "words"
763 le?lvsl $inpperm,0,$idx
764 le?vspltisb $tmp,0x0f
766 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
768 le?vperm $in0,$in0,$in0,$inpperm
770 le?vperm $in1,$in1,$in1,$inpperm
772 le?vperm $in2,$in2,$in2,$inpperm
773 vxor $out0,$in0,$rndkey0
775 le?vperm $in3,$in3,$in3,$inpperm
776 vxor $out1,$in1,$rndkey0
778 le?vperm $in4,$in4,$in4,$inpperm
779 vxor $out2,$in2,$rndkey0
782 le?vperm $in5,$in5,$in5,$inpperm
783 vxor $out3,$in3,$rndkey0
784 le?vperm $in6,$in6,$in6,$inpperm
785 vxor $out4,$in4,$rndkey0
786 le?vperm $in7,$in7,$in7,$inpperm
787 vxor $out5,$in5,$rndkey0
788 vxor $out6,$in6,$rndkey0
789 vxor $out7,$in7,$rndkey0
795 vncipher $out0,$out0,v24
796 vncipher $out1,$out1,v24
797 vncipher $out2,$out2,v24
798 vncipher $out3,$out3,v24
799 vncipher $out4,$out4,v24
800 vncipher $out5,$out5,v24
801 vncipher $out6,$out6,v24
802 vncipher $out7,$out7,v24
803 lvx v24,$x20,$key_ # round[3]
804 addi $key_,$key_,0x20
806 vncipher $out0,$out0,v25
807 vncipher $out1,$out1,v25
808 vncipher $out2,$out2,v25
809 vncipher $out3,$out3,v25
810 vncipher $out4,$out4,v25
811 vncipher $out5,$out5,v25
812 vncipher $out6,$out6,v25
813 vncipher $out7,$out7,v25
814 lvx v25,$x10,$key_ # round[4]
817 subic $len,$len,128 # $len-=128
818 vncipher $out0,$out0,v24
819 vncipher $out1,$out1,v24
820 vncipher $out2,$out2,v24
821 vncipher $out3,$out3,v24
822 vncipher $out4,$out4,v24
823 vncipher $out5,$out5,v24
824 vncipher $out6,$out6,v24
825 vncipher $out7,$out7,v24
827 subfe. r0,r0,r0 # borrow?-1:0
828 vncipher $out0,$out0,v25
829 vncipher $out1,$out1,v25
830 vncipher $out2,$out2,v25
831 vncipher $out3,$out3,v25
832 vncipher $out4,$out4,v25
833 vncipher $out5,$out5,v25
834 vncipher $out6,$out6,v25
835 vncipher $out7,$out7,v25
838 vncipher $out0,$out0,v26
839 vncipher $out1,$out1,v26
840 vncipher $out2,$out2,v26
841 vncipher $out3,$out3,v26
842 vncipher $out4,$out4,v26
843 vncipher $out5,$out5,v26
844 vncipher $out6,$out6,v26
845 vncipher $out7,$out7,v26
847 add $inp,$inp,r0 # $inp is adjusted in such
848 # way that at exit from the
849 # loop inX-in7 are loaded
851 vncipher $out0,$out0,v27
852 vncipher $out1,$out1,v27
853 vncipher $out2,$out2,v27
854 vncipher $out3,$out3,v27
855 vncipher $out4,$out4,v27
856 vncipher $out5,$out5,v27
857 vncipher $out6,$out6,v27
858 vncipher $out7,$out7,v27
860 addi $key_,$sp,$FRAME+15 # rewind $key_
861 vncipher $out0,$out0,v28
862 vncipher $out1,$out1,v28
863 vncipher $out2,$out2,v28
864 vncipher $out3,$out3,v28
865 vncipher $out4,$out4,v28
866 vncipher $out5,$out5,v28
867 vncipher $out6,$out6,v28
868 vncipher $out7,$out7,v28
869 lvx v24,$x00,$key_ # re-pre-load round[1]
871 vncipher $out0,$out0,v29
872 vncipher $out1,$out1,v29
873 vncipher $out2,$out2,v29
874 vncipher $out3,$out3,v29
875 vncipher $out4,$out4,v29
876 vncipher $out5,$out5,v29
877 vncipher $out6,$out6,v29
878 vncipher $out7,$out7,v29
879 lvx v25,$x10,$key_ # re-pre-load round[2]
881 vncipher $out0,$out0,v30
882 vxor $ivec,$ivec,v31 # xor with last round key
883 vncipher $out1,$out1,v30
885 vncipher $out2,$out2,v30
887 vncipher $out3,$out3,v30
889 vncipher $out4,$out4,v30
891 vncipher $out5,$out5,v30
893 vncipher $out6,$out6,v30
895 vncipher $out7,$out7,v30
898 vncipherlast $out0,$out0,$ivec
899 vncipherlast $out1,$out1,$in0
900 lvx_u $in0,$x00,$inp # load next input block
901 vncipherlast $out2,$out2,$in1
903 vncipherlast $out3,$out3,$in2
904 le?vperm $in0,$in0,$in0,$inpperm
906 vncipherlast $out4,$out4,$in3
907 le?vperm $in1,$in1,$in1,$inpperm
909 vncipherlast $out5,$out5,$in4
910 le?vperm $in2,$in2,$in2,$inpperm
912 vncipherlast $out6,$out6,$in5
913 le?vperm $in3,$in3,$in3,$inpperm
915 vncipherlast $out7,$out7,$in6
916 le?vperm $in4,$in4,$in4,$inpperm
919 le?vperm $in5,$in5,$in5,$inpperm
923 le?vperm $out0,$out0,$out0,$inpperm
924 le?vperm $out1,$out1,$out1,$inpperm
925 stvx_u $out0,$x00,$out
926 le?vperm $in6,$in6,$in6,$inpperm
927 vxor $out0,$in0,$rndkey0
928 le?vperm $out2,$out2,$out2,$inpperm
929 stvx_u $out1,$x10,$out
930 le?vperm $in7,$in7,$in7,$inpperm
931 vxor $out1,$in1,$rndkey0
932 le?vperm $out3,$out3,$out3,$inpperm
933 stvx_u $out2,$x20,$out
934 vxor $out2,$in2,$rndkey0
935 le?vperm $out4,$out4,$out4,$inpperm
936 stvx_u $out3,$x30,$out
937 vxor $out3,$in3,$rndkey0
938 le?vperm $out5,$out5,$out5,$inpperm
939 stvx_u $out4,$x40,$out
940 vxor $out4,$in4,$rndkey0
941 le?vperm $out6,$out6,$out6,$inpperm
942 stvx_u $out5,$x50,$out
943 vxor $out5,$in5,$rndkey0
944 le?vperm $out7,$out7,$out7,$inpperm
945 stvx_u $out6,$x60,$out
946 vxor $out6,$in6,$rndkey0
947 stvx_u $out7,$x70,$out
949 vxor $out7,$in7,$rndkey0
952 beq Loop_cbc_dec8x # did $len-=128 borrow?
959 Loop_cbc_dec8x_tail: # up to 7 "words" tail...
960 vncipher $out1,$out1,v24
961 vncipher $out2,$out2,v24
962 vncipher $out3,$out3,v24
963 vncipher $out4,$out4,v24
964 vncipher $out5,$out5,v24
965 vncipher $out6,$out6,v24
966 vncipher $out7,$out7,v24
967 lvx v24,$x20,$key_ # round[3]
968 addi $key_,$key_,0x20
970 vncipher $out1,$out1,v25
971 vncipher $out2,$out2,v25
972 vncipher $out3,$out3,v25
973 vncipher $out4,$out4,v25
974 vncipher $out5,$out5,v25
975 vncipher $out6,$out6,v25
976 vncipher $out7,$out7,v25
977 lvx v25,$x10,$key_ # round[4]
978 bdnz Loop_cbc_dec8x_tail
980 vncipher $out1,$out1,v24
981 vncipher $out2,$out2,v24
982 vncipher $out3,$out3,v24
983 vncipher $out4,$out4,v24
984 vncipher $out5,$out5,v24
985 vncipher $out6,$out6,v24
986 vncipher $out7,$out7,v24
988 vncipher $out1,$out1,v25
989 vncipher $out2,$out2,v25
990 vncipher $out3,$out3,v25
991 vncipher $out4,$out4,v25
992 vncipher $out5,$out5,v25
993 vncipher $out6,$out6,v25
994 vncipher $out7,$out7,v25
996 vncipher $out1,$out1,v26
997 vncipher $out2,$out2,v26
998 vncipher $out3,$out3,v26
999 vncipher $out4,$out4,v26
1000 vncipher $out5,$out5,v26
1001 vncipher $out6,$out6,v26
1002 vncipher $out7,$out7,v26
1004 vncipher $out1,$out1,v27
1005 vncipher $out2,$out2,v27
1006 vncipher $out3,$out3,v27
1007 vncipher $out4,$out4,v27
1008 vncipher $out5,$out5,v27
1009 vncipher $out6,$out6,v27
1010 vncipher $out7,$out7,v27
1012 vncipher $out1,$out1,v28
1013 vncipher $out2,$out2,v28
1014 vncipher $out3,$out3,v28
1015 vncipher $out4,$out4,v28
1016 vncipher $out5,$out5,v28
1017 vncipher $out6,$out6,v28
1018 vncipher $out7,$out7,v28
1020 vncipher $out1,$out1,v29
1021 vncipher $out2,$out2,v29
1022 vncipher $out3,$out3,v29
1023 vncipher $out4,$out4,v29
1024 vncipher $out5,$out5,v29
1025 vncipher $out6,$out6,v29
1026 vncipher $out7,$out7,v29
1028 vncipher $out1,$out1,v30
1029 vxor $ivec,$ivec,v31 # last round key
1030 vncipher $out2,$out2,v30
1032 vncipher $out3,$out3,v30
1034 vncipher $out4,$out4,v30
1036 vncipher $out5,$out5,v30
1038 vncipher $out6,$out6,v30
1040 vncipher $out7,$out7,v30
1043 cmplwi $len,32 # switch($len)
1048 blt Lcbc_dec8x_three
1057 vncipherlast $out1,$out1,$ivec
1058 vncipherlast $out2,$out2,$in1
1059 vncipherlast $out3,$out3,$in2
1060 vncipherlast $out4,$out4,$in3
1061 vncipherlast $out5,$out5,$in4
1062 vncipherlast $out6,$out6,$in5
1063 vncipherlast $out7,$out7,$in6
1066 le?vperm $out1,$out1,$out1,$inpperm
1067 le?vperm $out2,$out2,$out2,$inpperm
1068 stvx_u $out1,$x00,$out
1069 le?vperm $out3,$out3,$out3,$inpperm
1070 stvx_u $out2,$x10,$out
1071 le?vperm $out4,$out4,$out4,$inpperm
1072 stvx_u $out3,$x20,$out
1073 le?vperm $out5,$out5,$out5,$inpperm
1074 stvx_u $out4,$x30,$out
1075 le?vperm $out6,$out6,$out6,$inpperm
1076 stvx_u $out5,$x40,$out
1077 le?vperm $out7,$out7,$out7,$inpperm
1078 stvx_u $out6,$x50,$out
1079 stvx_u $out7,$x60,$out
1085 vncipherlast $out2,$out2,$ivec
1086 vncipherlast $out3,$out3,$in2
1087 vncipherlast $out4,$out4,$in3
1088 vncipherlast $out5,$out5,$in4
1089 vncipherlast $out6,$out6,$in5
1090 vncipherlast $out7,$out7,$in6
1093 le?vperm $out2,$out2,$out2,$inpperm
1094 le?vperm $out3,$out3,$out3,$inpperm
1095 stvx_u $out2,$x00,$out
1096 le?vperm $out4,$out4,$out4,$inpperm
1097 stvx_u $out3,$x10,$out
1098 le?vperm $out5,$out5,$out5,$inpperm
1099 stvx_u $out4,$x20,$out
1100 le?vperm $out6,$out6,$out6,$inpperm
1101 stvx_u $out5,$x30,$out
1102 le?vperm $out7,$out7,$out7,$inpperm
1103 stvx_u $out6,$x40,$out
1104 stvx_u $out7,$x50,$out
1110 vncipherlast $out3,$out3,$ivec
1111 vncipherlast $out4,$out4,$in3
1112 vncipherlast $out5,$out5,$in4
1113 vncipherlast $out6,$out6,$in5
1114 vncipherlast $out7,$out7,$in6
1117 le?vperm $out3,$out3,$out3,$inpperm
1118 le?vperm $out4,$out4,$out4,$inpperm
1119 stvx_u $out3,$x00,$out
1120 le?vperm $out5,$out5,$out5,$inpperm
1121 stvx_u $out4,$x10,$out
1122 le?vperm $out6,$out6,$out6,$inpperm
1123 stvx_u $out5,$x20,$out
1124 le?vperm $out7,$out7,$out7,$inpperm
1125 stvx_u $out6,$x30,$out
1126 stvx_u $out7,$x40,$out
1132 vncipherlast $out4,$out4,$ivec
1133 vncipherlast $out5,$out5,$in4
1134 vncipherlast $out6,$out6,$in5
1135 vncipherlast $out7,$out7,$in6
1138 le?vperm $out4,$out4,$out4,$inpperm
1139 le?vperm $out5,$out5,$out5,$inpperm
1140 stvx_u $out4,$x00,$out
1141 le?vperm $out6,$out6,$out6,$inpperm
1142 stvx_u $out5,$x10,$out
1143 le?vperm $out7,$out7,$out7,$inpperm
1144 stvx_u $out6,$x20,$out
1145 stvx_u $out7,$x30,$out
1151 vncipherlast $out5,$out5,$ivec
1152 vncipherlast $out6,$out6,$in5
1153 vncipherlast $out7,$out7,$in6
1156 le?vperm $out5,$out5,$out5,$inpperm
1157 le?vperm $out6,$out6,$out6,$inpperm
1158 stvx_u $out5,$x00,$out
1159 le?vperm $out7,$out7,$out7,$inpperm
1160 stvx_u $out6,$x10,$out
1161 stvx_u $out7,$x20,$out
1167 vncipherlast $out6,$out6,$ivec
1168 vncipherlast $out7,$out7,$in6
1171 le?vperm $out6,$out6,$out6,$inpperm
1172 le?vperm $out7,$out7,$out7,$inpperm
1173 stvx_u $out6,$x00,$out
1174 stvx_u $out7,$x10,$out
1180 vncipherlast $out7,$out7,$ivec
1183 le?vperm $out7,$out7,$out7,$inpperm
1188 le?vperm $ivec,$ivec,$ivec,$inpperm
1189 stvx_u $ivec,0,$ivp # write [unaligned] iv
1193 stvx $inpperm,r10,$sp # wipe copies of round keys
1195 stvx $inpperm,r11,$sp
1197 stvx $inpperm,r10,$sp
1199 stvx $inpperm,r11,$sp
1201 stvx $inpperm,r10,$sp
1203 stvx $inpperm,r11,$sp
1205 stvx $inpperm,r10,$sp
1207 stvx $inpperm,r11,$sp
1211 lvx v20,r10,$sp # ABI says so
1233 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1234 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1235 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1236 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1237 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1238 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1239 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1242 .byte 0,12,0x14,0,0x80,6,6,0
1244 .size .${prefix}_cbc_encrypt,.-.${prefix}_cbc_encrypt
1248 #########################################################################
1249 {{{ # CTR procedure[s] #
1250 my ($inp,$out,$len,$key,$ivp,$x10,$rounds,$idx)=map("r$_",(3..10));
1251 my ($rndkey0,$rndkey1,$inout,$tmp)= map("v$_",(0..3));
1252 my ($ivec,$inptail,$inpperm,$outhead,$outperm,$outmask,$keyperm,$one)=
1257 .globl .${prefix}_ctr32_encrypt_blocks
1266 vxor $rndkey0,$rndkey0,$rndkey0
1267 le?vspltisb $tmp,0x0f
1269 lvx $ivec,0,$ivp # load [unaligned] iv
1270 lvsl $inpperm,0,$ivp
1271 lvx $inptail,$idx,$ivp
1273 le?vxor $inpperm,$inpperm,$tmp
1274 vperm $ivec,$ivec,$inptail,$inpperm
1275 vsldoi $one,$rndkey0,$one,1
1278 ?lvsl $keyperm,0,$key # prepare for unaligned key
1279 lwz $rounds,240($key)
1281 lvsr $inpperm,0,r11 # prepare for unaligned load
1283 addi $inp,$inp,15 # 15 is not typo
1284 le?vxor $inpperm,$inpperm,$tmp
1286 srwi $rounds,$rounds,1
1288 subi $rounds,$rounds,1
1291 bge _aesp8_ctr32_encrypt8x
1293 ?lvsr $outperm,0,$out # prepare for unaligned store
1294 vspltisb $outmask,-1
1296 ?vperm $outmask,$rndkey0,$outmask,$outperm
1297 le?vxor $outperm,$outperm,$tmp
1301 lvx $rndkey1,$idx,$key
1303 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1304 vxor $inout,$ivec,$rndkey0
1305 lvx $rndkey0,$idx,$key
1311 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1312 vcipher $inout,$inout,$rndkey1
1313 lvx $rndkey1,$idx,$key
1315 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1316 vcipher $inout,$inout,$rndkey0
1317 lvx $rndkey0,$idx,$key
1321 vadduwm $ivec,$ivec,$one
1325 subic. $len,$len,1 # blocks--
1327 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1328 vcipher $inout,$inout,$rndkey1
1329 lvx $rndkey1,$idx,$key
1330 vperm $dat,$dat,$inptail,$inpperm
1332 ?vperm $rndkey1,$rndkey0,$rndkey1,$keyperm
1334 vxor $dat,$dat,$rndkey1 # last round key
1335 vcipherlast $inout,$inout,$dat
1337 lvx $rndkey1,$idx,$key
1339 vperm $inout,$inout,$inout,$outperm
1340 vsel $dat,$outhead,$inout,$outmask
1342 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1344 vxor $inout,$ivec,$rndkey0
1345 lvx $rndkey0,$idx,$key
1352 lvx $inout,0,$out # redundant in aligned case
1353 vsel $inout,$outhead,$inout,$outmask
1359 .byte 0,12,0x14,0,0,0,6,0
1362 #########################################################################
1363 {{ # Optimized CTR procedure #
1365 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,8,26..31));
1366 my ($in0, $in1, $in2, $in3, $in4, $in5, $in6, $in7 )=map("v$_",(0..3,10,12..14));
1367 my ($out0,$out1,$out2,$out3,$out4,$out5,$out6,$out7)=map("v$_",(15..22));
1368 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
1369 # v26-v31 last 6 round keys
1370 my ($tmp,$keyperm)=($in3,$in4); # aliases with "caller", redundant assignment
1371 my ($two,$three,$four)=($outhead,$outperm,$outmask);
1375 _aesp8_ctr32_encrypt8x:
1376 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
1377 li r10,`$FRAME+8*16+15`
1378 li r11,`$FRAME+8*16+31`
1379 stvx v20,r10,$sp # ABI says so
1402 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
1404 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1406 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1408 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1410 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1412 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1414 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1418 subi $rounds,$rounds,3 # -4 in total
1420 lvx $rndkey0,$x00,$key # load key schedule
1424 ?vperm $rndkey0,$rndkey0,v30,$keyperm
1425 addi $key_,$sp,$FRAME+15
1429 ?vperm v24,v30,v31,$keyperm
1432 stvx v24,$x00,$key_ # off-load round[1]
1433 ?vperm v25,v31,v30,$keyperm
1435 stvx v25,$x10,$key_ # off-load round[2]
1436 addi $key_,$key_,0x20
1437 bdnz Load_ctr32_enc_key
1440 ?vperm v24,v30,v31,$keyperm
1442 stvx v24,$x00,$key_ # off-load round[3]
1443 ?vperm v25,v31,v26,$keyperm
1445 stvx v25,$x10,$key_ # off-load round[4]
1446 addi $key_,$sp,$FRAME+15 # rewind $key_
1447 ?vperm v26,v26,v27,$keyperm
1449 ?vperm v27,v27,v28,$keyperm
1451 ?vperm v28,v28,v29,$keyperm
1453 ?vperm v29,v29,v30,$keyperm
1454 lvx $out0,$x70,$key # borrow $out0
1455 ?vperm v30,v30,v31,$keyperm
1456 lvx v24,$x00,$key_ # pre-load round[1]
1457 ?vperm v31,v31,$out0,$keyperm
1458 lvx v25,$x10,$key_ # pre-load round[2]
1460 vadduqm $two,$one,$one
1461 subi $inp,$inp,15 # undo "caller"
1464 vadduqm $out1,$ivec,$one # counter values ...
1465 vadduqm $out2,$ivec,$two
1466 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1468 vadduqm $out3,$out1,$two
1469 vxor $out1,$out1,$rndkey0
1470 le?lvsl $inpperm,0,$idx
1471 vadduqm $out4,$out2,$two
1472 vxor $out2,$out2,$rndkey0
1473 le?vspltisb $tmp,0x0f
1474 vadduqm $out5,$out3,$two
1475 vxor $out3,$out3,$rndkey0
1476 le?vxor $inpperm,$inpperm,$tmp # transform for lvx_u/stvx_u
1477 vadduqm $out6,$out4,$two
1478 vxor $out4,$out4,$rndkey0
1479 vadduqm $out7,$out5,$two
1480 vxor $out5,$out5,$rndkey0
1481 vadduqm $ivec,$out6,$two # next counter value
1482 vxor $out6,$out6,$rndkey0
1483 vxor $out7,$out7,$rndkey0
1489 vcipher $out0,$out0,v24
1490 vcipher $out1,$out1,v24
1491 vcipher $out2,$out2,v24
1492 vcipher $out3,$out3,v24
1493 vcipher $out4,$out4,v24
1494 vcipher $out5,$out5,v24
1495 vcipher $out6,$out6,v24
1496 vcipher $out7,$out7,v24
1497 Loop_ctr32_enc8x_middle:
1498 lvx v24,$x20,$key_ # round[3]
1499 addi $key_,$key_,0x20
1501 vcipher $out0,$out0,v25
1502 vcipher $out1,$out1,v25
1503 vcipher $out2,$out2,v25
1504 vcipher $out3,$out3,v25
1505 vcipher $out4,$out4,v25
1506 vcipher $out5,$out5,v25
1507 vcipher $out6,$out6,v25
1508 vcipher $out7,$out7,v25
1509 lvx v25,$x10,$key_ # round[4]
1510 bdnz Loop_ctr32_enc8x
1512 subic r11,$len,256 # $len-256, borrow $key_
1513 vcipher $out0,$out0,v24
1514 vcipher $out1,$out1,v24
1515 vcipher $out2,$out2,v24
1516 vcipher $out3,$out3,v24
1517 vcipher $out4,$out4,v24
1518 vcipher $out5,$out5,v24
1519 vcipher $out6,$out6,v24
1520 vcipher $out7,$out7,v24
1522 subfe r0,r0,r0 # borrow?-1:0
1523 vcipher $out0,$out0,v25
1524 vcipher $out1,$out1,v25
1525 vcipher $out2,$out2,v25
1526 vcipher $out3,$out3,v25
1527 vcipher $out4,$out4,v25
1528 vcipher $out5,$out5,v25
1529 vcipher $out6,$out6,v25
1530 vcipher $out7,$out7,v25
1533 addi $key_,$sp,$FRAME+15 # rewind $key_
1534 vcipher $out0,$out0,v26
1535 vcipher $out1,$out1,v26
1536 vcipher $out2,$out2,v26
1537 vcipher $out3,$out3,v26
1538 vcipher $out4,$out4,v26
1539 vcipher $out5,$out5,v26
1540 vcipher $out6,$out6,v26
1541 vcipher $out7,$out7,v26
1542 lvx v24,$x00,$key_ # re-pre-load round[1]
1544 subic $len,$len,129 # $len-=129
1545 vcipher $out0,$out0,v27
1546 addi $len,$len,1 # $len-=128 really
1547 vcipher $out1,$out1,v27
1548 vcipher $out2,$out2,v27
1549 vcipher $out3,$out3,v27
1550 vcipher $out4,$out4,v27
1551 vcipher $out5,$out5,v27
1552 vcipher $out6,$out6,v27
1553 vcipher $out7,$out7,v27
1554 lvx v25,$x10,$key_ # re-pre-load round[2]
1556 vcipher $out0,$out0,v28
1557 lvx_u $in0,$x00,$inp # load input
1558 vcipher $out1,$out1,v28
1559 lvx_u $in1,$x10,$inp
1560 vcipher $out2,$out2,v28
1561 lvx_u $in2,$x20,$inp
1562 vcipher $out3,$out3,v28
1563 lvx_u $in3,$x30,$inp
1564 vcipher $out4,$out4,v28
1565 lvx_u $in4,$x40,$inp
1566 vcipher $out5,$out5,v28
1567 lvx_u $in5,$x50,$inp
1568 vcipher $out6,$out6,v28
1569 lvx_u $in6,$x60,$inp
1570 vcipher $out7,$out7,v28
1571 lvx_u $in7,$x70,$inp
1574 vcipher $out0,$out0,v29
1575 le?vperm $in0,$in0,$in0,$inpperm
1576 vcipher $out1,$out1,v29
1577 le?vperm $in1,$in1,$in1,$inpperm
1578 vcipher $out2,$out2,v29
1579 le?vperm $in2,$in2,$in2,$inpperm
1580 vcipher $out3,$out3,v29
1581 le?vperm $in3,$in3,$in3,$inpperm
1582 vcipher $out4,$out4,v29
1583 le?vperm $in4,$in4,$in4,$inpperm
1584 vcipher $out5,$out5,v29
1585 le?vperm $in5,$in5,$in5,$inpperm
1586 vcipher $out6,$out6,v29
1587 le?vperm $in6,$in6,$in6,$inpperm
1588 vcipher $out7,$out7,v29
1589 le?vperm $in7,$in7,$in7,$inpperm
1591 add $inp,$inp,r0 # $inp is adjusted in such
1592 # way that at exit from the
1593 # loop inX-in7 are loaded
1595 subfe. r0,r0,r0 # borrow?-1:0
1596 vcipher $out0,$out0,v30
1597 vxor $in0,$in0,v31 # xor with last round key
1598 vcipher $out1,$out1,v30
1600 vcipher $out2,$out2,v30
1602 vcipher $out3,$out3,v30
1604 vcipher $out4,$out4,v30
1606 vcipher $out5,$out5,v30
1608 vcipher $out6,$out6,v30
1610 vcipher $out7,$out7,v30
1613 bne Lctr32_enc8x_break # did $len-129 borrow?
1615 vcipherlast $in0,$out0,$in0
1616 vcipherlast $in1,$out1,$in1
1617 vadduqm $out1,$ivec,$one # counter values ...
1618 vcipherlast $in2,$out2,$in2
1619 vadduqm $out2,$ivec,$two
1620 vxor $out0,$ivec,$rndkey0 # ... xored with rndkey[0]
1621 vcipherlast $in3,$out3,$in3
1622 vadduqm $out3,$out1,$two
1623 vxor $out1,$out1,$rndkey0
1624 vcipherlast $in4,$out4,$in4
1625 vadduqm $out4,$out2,$two
1626 vxor $out2,$out2,$rndkey0
1627 vcipherlast $in5,$out5,$in5
1628 vadduqm $out5,$out3,$two
1629 vxor $out3,$out3,$rndkey0
1630 vcipherlast $in6,$out6,$in6
1631 vadduqm $out6,$out4,$two
1632 vxor $out4,$out4,$rndkey0
1633 vcipherlast $in7,$out7,$in7
1634 vadduqm $out7,$out5,$two
1635 vxor $out5,$out5,$rndkey0
1636 le?vperm $in0,$in0,$in0,$inpperm
1637 vadduqm $ivec,$out6,$two # next counter value
1638 vxor $out6,$out6,$rndkey0
1639 le?vperm $in1,$in1,$in1,$inpperm
1640 vxor $out7,$out7,$rndkey0
1643 vcipher $out0,$out0,v24
1644 stvx_u $in0,$x00,$out
1645 le?vperm $in2,$in2,$in2,$inpperm
1646 vcipher $out1,$out1,v24
1647 stvx_u $in1,$x10,$out
1648 le?vperm $in3,$in3,$in3,$inpperm
1649 vcipher $out2,$out2,v24
1650 stvx_u $in2,$x20,$out
1651 le?vperm $in4,$in4,$in4,$inpperm
1652 vcipher $out3,$out3,v24
1653 stvx_u $in3,$x30,$out
1654 le?vperm $in5,$in5,$in5,$inpperm
1655 vcipher $out4,$out4,v24
1656 stvx_u $in4,$x40,$out
1657 le?vperm $in6,$in6,$in6,$inpperm
1658 vcipher $out5,$out5,v24
1659 stvx_u $in5,$x50,$out
1660 le?vperm $in7,$in7,$in7,$inpperm
1661 vcipher $out6,$out6,v24
1662 stvx_u $in6,$x60,$out
1663 vcipher $out7,$out7,v24
1664 stvx_u $in7,$x70,$out
1667 b Loop_ctr32_enc8x_middle
1672 blt Lctr32_enc8x_one
1674 beq Lctr32_enc8x_two
1676 blt Lctr32_enc8x_three
1678 beq Lctr32_enc8x_four
1680 blt Lctr32_enc8x_five
1682 beq Lctr32_enc8x_six
1684 blt Lctr32_enc8x_seven
1687 vcipherlast $out0,$out0,$in0
1688 vcipherlast $out1,$out1,$in1
1689 vcipherlast $out2,$out2,$in2
1690 vcipherlast $out3,$out3,$in3
1691 vcipherlast $out4,$out4,$in4
1692 vcipherlast $out5,$out5,$in5
1693 vcipherlast $out6,$out6,$in6
1694 vcipherlast $out7,$out7,$in7
1696 le?vperm $out0,$out0,$out0,$inpperm
1697 le?vperm $out1,$out1,$out1,$inpperm
1698 stvx_u $out0,$x00,$out
1699 le?vperm $out2,$out2,$out2,$inpperm
1700 stvx_u $out1,$x10,$out
1701 le?vperm $out3,$out3,$out3,$inpperm
1702 stvx_u $out2,$x20,$out
1703 le?vperm $out4,$out4,$out4,$inpperm
1704 stvx_u $out3,$x30,$out
1705 le?vperm $out5,$out5,$out5,$inpperm
1706 stvx_u $out4,$x40,$out
1707 le?vperm $out6,$out6,$out6,$inpperm
1708 stvx_u $out5,$x50,$out
1709 le?vperm $out7,$out7,$out7,$inpperm
1710 stvx_u $out6,$x60,$out
1711 stvx_u $out7,$x70,$out
1717 vcipherlast $out0,$out0,$in1
1718 vcipherlast $out1,$out1,$in2
1719 vcipherlast $out2,$out2,$in3
1720 vcipherlast $out3,$out3,$in4
1721 vcipherlast $out4,$out4,$in5
1722 vcipherlast $out5,$out5,$in6
1723 vcipherlast $out6,$out6,$in7
1725 le?vperm $out0,$out0,$out0,$inpperm
1726 le?vperm $out1,$out1,$out1,$inpperm
1727 stvx_u $out0,$x00,$out
1728 le?vperm $out2,$out2,$out2,$inpperm
1729 stvx_u $out1,$x10,$out
1730 le?vperm $out3,$out3,$out3,$inpperm
1731 stvx_u $out2,$x20,$out
1732 le?vperm $out4,$out4,$out4,$inpperm
1733 stvx_u $out3,$x30,$out
1734 le?vperm $out5,$out5,$out5,$inpperm
1735 stvx_u $out4,$x40,$out
1736 le?vperm $out6,$out6,$out6,$inpperm
1737 stvx_u $out5,$x50,$out
1738 stvx_u $out6,$x60,$out
1744 vcipherlast $out0,$out0,$in2
1745 vcipherlast $out1,$out1,$in3
1746 vcipherlast $out2,$out2,$in4
1747 vcipherlast $out3,$out3,$in5
1748 vcipherlast $out4,$out4,$in6
1749 vcipherlast $out5,$out5,$in7
1751 le?vperm $out0,$out0,$out0,$inpperm
1752 le?vperm $out1,$out1,$out1,$inpperm
1753 stvx_u $out0,$x00,$out
1754 le?vperm $out2,$out2,$out2,$inpperm
1755 stvx_u $out1,$x10,$out
1756 le?vperm $out3,$out3,$out3,$inpperm
1757 stvx_u $out2,$x20,$out
1758 le?vperm $out4,$out4,$out4,$inpperm
1759 stvx_u $out3,$x30,$out
1760 le?vperm $out5,$out5,$out5,$inpperm
1761 stvx_u $out4,$x40,$out
1762 stvx_u $out5,$x50,$out
1768 vcipherlast $out0,$out0,$in3
1769 vcipherlast $out1,$out1,$in4
1770 vcipherlast $out2,$out2,$in5
1771 vcipherlast $out3,$out3,$in6
1772 vcipherlast $out4,$out4,$in7
1774 le?vperm $out0,$out0,$out0,$inpperm
1775 le?vperm $out1,$out1,$out1,$inpperm
1776 stvx_u $out0,$x00,$out
1777 le?vperm $out2,$out2,$out2,$inpperm
1778 stvx_u $out1,$x10,$out
1779 le?vperm $out3,$out3,$out3,$inpperm
1780 stvx_u $out2,$x20,$out
1781 le?vperm $out4,$out4,$out4,$inpperm
1782 stvx_u $out3,$x30,$out
1783 stvx_u $out4,$x40,$out
1789 vcipherlast $out0,$out0,$in4
1790 vcipherlast $out1,$out1,$in5
1791 vcipherlast $out2,$out2,$in6
1792 vcipherlast $out3,$out3,$in7
1794 le?vperm $out0,$out0,$out0,$inpperm
1795 le?vperm $out1,$out1,$out1,$inpperm
1796 stvx_u $out0,$x00,$out
1797 le?vperm $out2,$out2,$out2,$inpperm
1798 stvx_u $out1,$x10,$out
1799 le?vperm $out3,$out3,$out3,$inpperm
1800 stvx_u $out2,$x20,$out
1801 stvx_u $out3,$x30,$out
1807 vcipherlast $out0,$out0,$in5
1808 vcipherlast $out1,$out1,$in6
1809 vcipherlast $out2,$out2,$in7
1811 le?vperm $out0,$out0,$out0,$inpperm
1812 le?vperm $out1,$out1,$out1,$inpperm
1813 stvx_u $out0,$x00,$out
1814 le?vperm $out2,$out2,$out2,$inpperm
1815 stvx_u $out1,$x10,$out
1816 stvx_u $out2,$x20,$out
1822 vcipherlast $out0,$out0,$in6
1823 vcipherlast $out1,$out1,$in7
1825 le?vperm $out0,$out0,$out0,$inpperm
1826 le?vperm $out1,$out1,$out1,$inpperm
1827 stvx_u $out0,$x00,$out
1828 stvx_u $out1,$x10,$out
1834 vcipherlast $out0,$out0,$in7
1836 le?vperm $out0,$out0,$out0,$inpperm
1843 stvx $inpperm,r10,$sp # wipe copies of round keys
1845 stvx $inpperm,r11,$sp
1847 stvx $inpperm,r10,$sp
1849 stvx $inpperm,r11,$sp
1851 stvx $inpperm,r10,$sp
1853 stvx $inpperm,r11,$sp
1855 stvx $inpperm,r10,$sp
1857 stvx $inpperm,r11,$sp
1861 lvx v20,r10,$sp # ABI says so
1883 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
1884 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
1885 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
1886 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
1887 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
1888 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
1889 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
1892 .byte 0,12,0x14,0,0x80,6,6,0
1894 .size .${prefix}_ctr32_encrypt_blocks,.-.${prefix}_ctr32_encrypt_blocks
1898 #########################################################################
1899 {{{ # XTS procedures #
1900 # int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, #
1901 # const AES_KEY *key1, const AES_KEY *key2, #
1902 # [const] unsigned char iv[16]); #
1903 # If $key2 is NULL, then a "tweak chaining" mode is engaged, in which #
1904 # input tweak value is assumed to be encrypted already, and last tweak #
1905 # value, one suitable for consecutive call on same chunk of data, is #
1906 # written back to original buffer. In addition, in "tweak chaining" #
1907 # mode only complete input blocks are processed. #
1909 my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10));
1910 my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2));
1911 my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7));
1912 my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12));
1913 my $taillen = $key2;
1915 ($inp,$idx) = ($idx,$inp); # reassign
1918 .globl .${prefix}_xts_encrypt
1919 mr $inp,r3 # reassign
1925 mfspr r12,256 # save vrsave
1929 vspltisb $seven,0x07 # 0x070707..07
1930 le?lvsl $leperm,r11,r11
1931 le?vspltisb $tmp,0x0f
1932 le?vxor $leperm,$leperm,$seven
1935 lvx $tweak,0,$ivp # load [unaligned] iv
1936 lvsl $inpperm,0,$ivp
1937 lvx $inptail,$idx,$ivp
1938 le?vxor $inpperm,$inpperm,$tmp
1939 vperm $tweak,$tweak,$inptail,$inpperm
1942 lvsr $inpperm,0,r11 # prepare for unaligned load
1944 addi $inp,$inp,15 # 15 is not typo
1945 le?vxor $inpperm,$inpperm,$tmp
1947 ${UCMP}i $key2,0 # key2==NULL?
1948 beq Lxts_enc_no_key2
1950 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
1951 lwz $rounds,240($key2)
1952 srwi $rounds,$rounds,1
1953 subi $rounds,$rounds,1
1956 lvx $rndkey0,0,$key2
1957 lvx $rndkey1,$idx,$key2
1959 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1960 vxor $tweak,$tweak,$rndkey0
1961 lvx $rndkey0,$idx,$key2
1966 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1967 vcipher $tweak,$tweak,$rndkey1
1968 lvx $rndkey1,$idx,$key2
1970 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1971 vcipher $tweak,$tweak,$rndkey0
1972 lvx $rndkey0,$idx,$key2
1976 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
1977 vcipher $tweak,$tweak,$rndkey1
1978 lvx $rndkey1,$idx,$key2
1979 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
1980 vcipherlast $tweak,$tweak,$rndkey0
1982 li $ivp,0 # don't chain the tweak
1987 and $len,$len,$idx # in "tweak chaining"
1988 # mode only complete
1989 # blocks are processed
1994 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
1995 lwz $rounds,240($key1)
1996 srwi $rounds,$rounds,1
1997 subi $rounds,$rounds,1
2000 vslb $eighty7,$seven,$seven # 0x808080..80
2001 vor $eighty7,$eighty7,$seven # 0x878787..87
2002 vspltisb $tmp,1 # 0x010101..01
2003 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2006 bge _aesp8_xts_encrypt6x
2008 andi. $taillen,$len,15
2010 subi $taillen,$taillen,16
2015 lvx $rndkey0,0,$key1
2016 lvx $rndkey1,$idx,$key1
2018 vperm $inout,$inout,$inptail,$inpperm
2019 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2020 vxor $inout,$inout,$tweak
2021 vxor $inout,$inout,$rndkey0
2022 lvx $rndkey0,$idx,$key1
2029 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2030 vcipher $inout,$inout,$rndkey1
2031 lvx $rndkey1,$idx,$key1
2033 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2034 vcipher $inout,$inout,$rndkey0
2035 lvx $rndkey0,$idx,$key1
2039 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2040 vcipher $inout,$inout,$rndkey1
2041 lvx $rndkey1,$idx,$key1
2043 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2044 vxor $rndkey0,$rndkey0,$tweak
2045 vcipherlast $output,$inout,$rndkey0
2047 le?vperm $tmp,$output,$output,$leperm
2049 le?stvx_u $tmp,0,$out
2050 be?stvx_u $output,0,$out
2059 lvx $rndkey0,0,$key1
2060 lvx $rndkey1,$idx,$key1
2068 vsrab $tmp,$tweak,$seven # next tweak value
2069 vaddubm $tweak,$tweak,$tweak
2070 vsldoi $tmp,$tmp,$tmp,15
2071 vand $tmp,$tmp,$eighty7
2072 vxor $tweak,$tweak,$tmp
2074 vperm $inout,$inout,$inptail,$inpperm
2075 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2076 vxor $inout,$inout,$tweak
2077 vxor $output,$output,$rndkey0 # just in case $len<16
2078 vxor $inout,$inout,$rndkey0
2079 lvx $rndkey0,$idx,$key1
2086 vxor $output,$output,$tweak
2087 lvsr $inpperm,0,$len # $inpperm is no longer needed
2088 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2090 vperm $inptail,$inptail,$tmp,$inpperm
2091 vsel $inout,$inout,$output,$inptail
2100 bdnz Loop_xts_enc_steal
2103 b Loop_xts_enc # one more time...
2109 vsrab $tmp,$tweak,$seven # next tweak value
2110 vaddubm $tweak,$tweak,$tweak
2111 vsldoi $tmp,$tmp,$tmp,15
2112 vand $tmp,$tmp,$eighty7
2113 vxor $tweak,$tweak,$tmp
2115 le?vperm $tweak,$tweak,$tweak,$leperm
2116 stvx_u $tweak,0,$ivp
2119 mtspr 256,r12 # restore vrsave
2123 .byte 0,12,0x04,0,0x80,6,6,0
2125 .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt
2127 .globl .${prefix}_xts_decrypt
2128 mr $inp,r3 # reassign
2134 mfspr r12,256 # save vrsave
2143 vspltisb $seven,0x07 # 0x070707..07
2144 le?lvsl $leperm,r11,r11
2145 le?vspltisb $tmp,0x0f
2146 le?vxor $leperm,$leperm,$seven
2149 lvx $tweak,0,$ivp # load [unaligned] iv
2150 lvsl $inpperm,0,$ivp
2151 lvx $inptail,$idx,$ivp
2152 le?vxor $inpperm,$inpperm,$tmp
2153 vperm $tweak,$tweak,$inptail,$inpperm
2156 lvsr $inpperm,0,r11 # prepare for unaligned load
2158 addi $inp,$inp,15 # 15 is not typo
2159 le?vxor $inpperm,$inpperm,$tmp
2161 ${UCMP}i $key2,0 # key2==NULL?
2162 beq Lxts_dec_no_key2
2164 ?lvsl $keyperm,0,$key2 # prepare for unaligned key
2165 lwz $rounds,240($key2)
2166 srwi $rounds,$rounds,1
2167 subi $rounds,$rounds,1
2170 lvx $rndkey0,0,$key2
2171 lvx $rndkey1,$idx,$key2
2173 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2174 vxor $tweak,$tweak,$rndkey0
2175 lvx $rndkey0,$idx,$key2
2180 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2181 vcipher $tweak,$tweak,$rndkey1
2182 lvx $rndkey1,$idx,$key2
2184 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2185 vcipher $tweak,$tweak,$rndkey0
2186 lvx $rndkey0,$idx,$key2
2190 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2191 vcipher $tweak,$tweak,$rndkey1
2192 lvx $rndkey1,$idx,$key2
2193 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2194 vcipherlast $tweak,$tweak,$rndkey0
2196 li $ivp,0 # don't chain the tweak
2202 add $len,$len,$idx # in "tweak chaining"
2203 # mode only complete
2204 # blocks are processed
2209 ?lvsl $keyperm,0,$key1 # prepare for unaligned key
2210 lwz $rounds,240($key1)
2211 srwi $rounds,$rounds,1
2212 subi $rounds,$rounds,1
2215 vslb $eighty7,$seven,$seven # 0x808080..80
2216 vor $eighty7,$eighty7,$seven # 0x878787..87
2217 vspltisb $tmp,1 # 0x010101..01
2218 vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01
2221 bge _aesp8_xts_decrypt6x
2223 lvx $rndkey0,0,$key1
2224 lvx $rndkey1,$idx,$key1
2226 vperm $inout,$inout,$inptail,$inpperm
2227 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2228 vxor $inout,$inout,$tweak
2229 vxor $inout,$inout,$rndkey0
2230 lvx $rndkey0,$idx,$key1
2240 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2241 vncipher $inout,$inout,$rndkey1
2242 lvx $rndkey1,$idx,$key1
2244 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2245 vncipher $inout,$inout,$rndkey0
2246 lvx $rndkey0,$idx,$key1
2250 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2251 vncipher $inout,$inout,$rndkey1
2252 lvx $rndkey1,$idx,$key1
2254 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2255 vxor $rndkey0,$rndkey0,$tweak
2256 vncipherlast $output,$inout,$rndkey0
2258 le?vperm $tmp,$output,$output,$leperm
2260 le?stvx_u $tmp,0,$out
2261 be?stvx_u $output,0,$out
2270 lvx $rndkey0,0,$key1
2271 lvx $rndkey1,$idx,$key1
2274 vsrab $tmp,$tweak,$seven # next tweak value
2275 vaddubm $tweak,$tweak,$tweak
2276 vsldoi $tmp,$tmp,$tmp,15
2277 vand $tmp,$tmp,$eighty7
2278 vxor $tweak,$tweak,$tmp
2280 vperm $inout,$inout,$inptail,$inpperm
2281 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2282 vxor $inout,$inout,$tweak
2283 vxor $inout,$inout,$rndkey0
2284 lvx $rndkey0,$idx,$key1
2292 vsrab $tmp,$tweak,$seven # next tweak value
2293 vaddubm $tweak1,$tweak,$tweak
2294 vsldoi $tmp,$tmp,$tmp,15
2295 vand $tmp,$tmp,$eighty7
2296 vxor $tweak1,$tweak1,$tmp
2301 vxor $inout,$inout,$tweak # :-(
2302 vxor $inout,$inout,$tweak1 # :-)
2305 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2306 vncipher $inout,$inout,$rndkey1
2307 lvx $rndkey1,$idx,$key1
2309 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2310 vncipher $inout,$inout,$rndkey0
2311 lvx $rndkey0,$idx,$key1
2313 bdnz Loop_xts_dec_short
2315 ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm
2316 vncipher $inout,$inout,$rndkey1
2317 lvx $rndkey1,$idx,$key1
2319 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2320 vxor $rndkey0,$rndkey0,$tweak1
2321 vncipherlast $output,$inout,$rndkey0
2323 le?vperm $tmp,$output,$output,$leperm
2325 le?stvx_u $tmp,0,$out
2326 be?stvx_u $output,0,$out
2331 lvx $rndkey0,0,$key1
2332 lvx $rndkey1,$idx,$key1
2334 vperm $inout,$inout,$inptail,$inpperm
2335 ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm
2337 lvsr $inpperm,0,$len # $inpperm is no longer needed
2338 vxor $inptail,$inptail,$inptail # $inptail is no longer needed
2340 vperm $inptail,$inptail,$tmp,$inpperm
2341 vsel $inout,$inout,$output,$inptail
2343 vxor $rndkey0,$rndkey0,$tweak
2344 vxor $inout,$inout,$rndkey0
2345 lvx $rndkey0,$idx,$key1
2354 bdnz Loop_xts_dec_steal
2357 b Loop_xts_dec # one more time...
2363 vsrab $tmp,$tweak,$seven # next tweak value
2364 vaddubm $tweak,$tweak,$tweak
2365 vsldoi $tmp,$tmp,$tmp,15
2366 vand $tmp,$tmp,$eighty7
2367 vxor $tweak,$tweak,$tmp
2369 le?vperm $tweak,$tweak,$tweak,$leperm
2370 stvx_u $tweak,0,$ivp
2373 mtspr 256,r12 # restore vrsave
2377 .byte 0,12,0x04,0,0x80,6,6,0
2379 .size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt
2381 #########################################################################
2382 {{ # Optimized XTS procedures #
2384 my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
2385 $x00=0 if ($flavour =~ /osx/);
2386 my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
2387 my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
2388 my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
2389 my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
2390 # v26-v31 last 6 round keys
2391 my ($keyperm)=($out0); # aliases with "caller", redundant assignment
2396 _aesp8_xts_encrypt6x:
2397 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
2399 li r7,`$FRAME+8*16+15`
2400 li r3,`$FRAME+8*16+31`
2401 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
2402 stvx v20,r7,$sp # ABI says so
2425 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
2427 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2429 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2431 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2433 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2435 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2437 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2441 subi $rounds,$rounds,3 # -4 in total
2443 lvx $rndkey0,$x00,$key1 # load key schedule
2445 addi $key1,$key1,0x20
2447 ?vperm $rndkey0,$rndkey0,v30,$keyperm
2448 addi $key_,$sp,$FRAME+15
2452 ?vperm v24,v30,v31,$keyperm
2454 addi $key1,$key1,0x20
2455 stvx v24,$x00,$key_ # off-load round[1]
2456 ?vperm v25,v31,v30,$keyperm
2458 stvx v25,$x10,$key_ # off-load round[2]
2459 addi $key_,$key_,0x20
2460 bdnz Load_xts_enc_key
2463 ?vperm v24,v30,v31,$keyperm
2465 stvx v24,$x00,$key_ # off-load round[3]
2466 ?vperm v25,v31,v26,$keyperm
2468 stvx v25,$x10,$key_ # off-load round[4]
2469 addi $key_,$sp,$FRAME+15 # rewind $key_
2470 ?vperm v26,v26,v27,$keyperm
2472 ?vperm v27,v27,v28,$keyperm
2474 ?vperm v28,v28,v29,$keyperm
2476 ?vperm v29,v29,v30,$keyperm
2477 lvx $twk5,$x70,$key1 # borrow $twk5
2478 ?vperm v30,v30,v31,$keyperm
2479 lvx v24,$x00,$key_ # pre-load round[1]
2480 ?vperm v31,v31,$twk5,$keyperm
2481 lvx v25,$x10,$key_ # pre-load round[2]
2483 vperm $in0,$inout,$inptail,$inpperm
2484 subi $inp,$inp,31 # undo "caller"
2485 vxor $twk0,$tweak,$rndkey0
2486 vsrab $tmp,$tweak,$seven # next tweak value
2487 vaddubm $tweak,$tweak,$tweak
2488 vsldoi $tmp,$tmp,$tmp,15
2489 vand $tmp,$tmp,$eighty7
2490 vxor $out0,$in0,$twk0
2491 vxor $tweak,$tweak,$tmp
2493 lvx_u $in1,$x10,$inp
2494 vxor $twk1,$tweak,$rndkey0
2495 vsrab $tmp,$tweak,$seven # next tweak value
2496 vaddubm $tweak,$tweak,$tweak
2497 vsldoi $tmp,$tmp,$tmp,15
2498 le?vperm $in1,$in1,$in1,$leperm
2499 vand $tmp,$tmp,$eighty7
2500 vxor $out1,$in1,$twk1
2501 vxor $tweak,$tweak,$tmp
2503 lvx_u $in2,$x20,$inp
2504 andi. $taillen,$len,15
2505 vxor $twk2,$tweak,$rndkey0
2506 vsrab $tmp,$tweak,$seven # next tweak value
2507 vaddubm $tweak,$tweak,$tweak
2508 vsldoi $tmp,$tmp,$tmp,15
2509 le?vperm $in2,$in2,$in2,$leperm
2510 vand $tmp,$tmp,$eighty7
2511 vxor $out2,$in2,$twk2
2512 vxor $tweak,$tweak,$tmp
2514 lvx_u $in3,$x30,$inp
2515 sub $len,$len,$taillen
2516 vxor $twk3,$tweak,$rndkey0
2517 vsrab $tmp,$tweak,$seven # next tweak value
2518 vaddubm $tweak,$tweak,$tweak
2519 vsldoi $tmp,$tmp,$tmp,15
2520 le?vperm $in3,$in3,$in3,$leperm
2521 vand $tmp,$tmp,$eighty7
2522 vxor $out3,$in3,$twk3
2523 vxor $tweak,$tweak,$tmp
2525 lvx_u $in4,$x40,$inp
2527 vxor $twk4,$tweak,$rndkey0
2528 vsrab $tmp,$tweak,$seven # next tweak value
2529 vaddubm $tweak,$tweak,$tweak
2530 vsldoi $tmp,$tmp,$tmp,15
2531 le?vperm $in4,$in4,$in4,$leperm
2532 vand $tmp,$tmp,$eighty7
2533 vxor $out4,$in4,$twk4
2534 vxor $tweak,$tweak,$tmp
2536 lvx_u $in5,$x50,$inp
2538 vxor $twk5,$tweak,$rndkey0
2539 vsrab $tmp,$tweak,$seven # next tweak value
2540 vaddubm $tweak,$tweak,$tweak
2541 vsldoi $tmp,$tmp,$tmp,15
2542 le?vperm $in5,$in5,$in5,$leperm
2543 vand $tmp,$tmp,$eighty7
2544 vxor $out5,$in5,$twk5
2545 vxor $tweak,$tweak,$tmp
2547 vxor v31,v31,$rndkey0
2553 vcipher $out0,$out0,v24
2554 vcipher $out1,$out1,v24
2555 vcipher $out2,$out2,v24
2556 vcipher $out3,$out3,v24
2557 vcipher $out4,$out4,v24
2558 vcipher $out5,$out5,v24
2559 lvx v24,$x20,$key_ # round[3]
2560 addi $key_,$key_,0x20
2562 vcipher $out0,$out0,v25
2563 vcipher $out1,$out1,v25
2564 vcipher $out2,$out2,v25
2565 vcipher $out3,$out3,v25
2566 vcipher $out4,$out4,v25
2567 vcipher $out5,$out5,v25
2568 lvx v25,$x10,$key_ # round[4]
2571 subic $len,$len,96 # $len-=96
2572 vxor $in0,$twk0,v31 # xor with last round key
2573 vcipher $out0,$out0,v24
2574 vcipher $out1,$out1,v24
2575 vsrab $tmp,$tweak,$seven # next tweak value
2576 vxor $twk0,$tweak,$rndkey0
2577 vaddubm $tweak,$tweak,$tweak
2578 vcipher $out2,$out2,v24
2579 vcipher $out3,$out3,v24
2580 vsldoi $tmp,$tmp,$tmp,15
2581 vcipher $out4,$out4,v24
2582 vcipher $out5,$out5,v24
2584 subfe. r0,r0,r0 # borrow?-1:0
2585 vand $tmp,$tmp,$eighty7
2586 vcipher $out0,$out0,v25
2587 vcipher $out1,$out1,v25
2588 vxor $tweak,$tweak,$tmp
2589 vcipher $out2,$out2,v25
2590 vcipher $out3,$out3,v25
2592 vsrab $tmp,$tweak,$seven # next tweak value
2593 vxor $twk1,$tweak,$rndkey0
2594 vcipher $out4,$out4,v25
2595 vcipher $out5,$out5,v25
2598 vaddubm $tweak,$tweak,$tweak
2599 vsldoi $tmp,$tmp,$tmp,15
2600 vcipher $out0,$out0,v26
2601 vcipher $out1,$out1,v26
2602 vand $tmp,$tmp,$eighty7
2603 vcipher $out2,$out2,v26
2604 vcipher $out3,$out3,v26
2605 vxor $tweak,$tweak,$tmp
2606 vcipher $out4,$out4,v26
2607 vcipher $out5,$out5,v26
2609 add $inp,$inp,r0 # $inp is adjusted in such
2610 # way that at exit from the
2611 # loop inX-in5 are loaded
2614 vsrab $tmp,$tweak,$seven # next tweak value
2615 vxor $twk2,$tweak,$rndkey0
2616 vaddubm $tweak,$tweak,$tweak
2617 vcipher $out0,$out0,v27
2618 vcipher $out1,$out1,v27
2619 vsldoi $tmp,$tmp,$tmp,15
2620 vcipher $out2,$out2,v27
2621 vcipher $out3,$out3,v27
2622 vand $tmp,$tmp,$eighty7
2623 vcipher $out4,$out4,v27
2624 vcipher $out5,$out5,v27
2626 addi $key_,$sp,$FRAME+15 # rewind $key_
2627 vxor $tweak,$tweak,$tmp
2628 vcipher $out0,$out0,v28
2629 vcipher $out1,$out1,v28
2631 vsrab $tmp,$tweak,$seven # next tweak value
2632 vxor $twk3,$tweak,$rndkey0
2633 vcipher $out2,$out2,v28
2634 vcipher $out3,$out3,v28
2635 vaddubm $tweak,$tweak,$tweak
2636 vsldoi $tmp,$tmp,$tmp,15
2637 vcipher $out4,$out4,v28
2638 vcipher $out5,$out5,v28
2639 lvx v24,$x00,$key_ # re-pre-load round[1]
2640 vand $tmp,$tmp,$eighty7
2642 vcipher $out0,$out0,v29
2643 vcipher $out1,$out1,v29
2644 vxor $tweak,$tweak,$tmp
2645 vcipher $out2,$out2,v29
2646 vcipher $out3,$out3,v29
2648 vsrab $tmp,$tweak,$seven # next tweak value
2649 vxor $twk4,$tweak,$rndkey0
2650 vcipher $out4,$out4,v29
2651 vcipher $out5,$out5,v29
2652 lvx v25,$x10,$key_ # re-pre-load round[2]
2653 vaddubm $tweak,$tweak,$tweak
2654 vsldoi $tmp,$tmp,$tmp,15
2656 vcipher $out0,$out0,v30
2657 vcipher $out1,$out1,v30
2658 vand $tmp,$tmp,$eighty7
2659 vcipher $out2,$out2,v30
2660 vcipher $out3,$out3,v30
2661 vxor $tweak,$tweak,$tmp
2662 vcipher $out4,$out4,v30
2663 vcipher $out5,$out5,v30
2665 vsrab $tmp,$tweak,$seven # next tweak value
2666 vxor $twk5,$tweak,$rndkey0
2668 vcipherlast $out0,$out0,$in0
2669 lvx_u $in0,$x00,$inp # load next input block
2670 vaddubm $tweak,$tweak,$tweak
2671 vsldoi $tmp,$tmp,$tmp,15
2672 vcipherlast $out1,$out1,$in1
2673 lvx_u $in1,$x10,$inp
2674 vcipherlast $out2,$out2,$in2
2675 le?vperm $in0,$in0,$in0,$leperm
2676 lvx_u $in2,$x20,$inp
2677 vand $tmp,$tmp,$eighty7
2678 vcipherlast $out3,$out3,$in3
2679 le?vperm $in1,$in1,$in1,$leperm
2680 lvx_u $in3,$x30,$inp
2681 vcipherlast $out4,$out4,$in4
2682 le?vperm $in2,$in2,$in2,$leperm
2683 lvx_u $in4,$x40,$inp
2684 vxor $tweak,$tweak,$tmp
2685 vcipherlast $tmp,$out5,$in5 # last block might be needed
2687 le?vperm $in3,$in3,$in3,$leperm
2688 lvx_u $in5,$x50,$inp
2690 le?vperm $in4,$in4,$in4,$leperm
2691 le?vperm $in5,$in5,$in5,$leperm
2693 le?vperm $out0,$out0,$out0,$leperm
2694 le?vperm $out1,$out1,$out1,$leperm
2695 stvx_u $out0,$x00,$out # store output
2696 vxor $out0,$in0,$twk0
2697 le?vperm $out2,$out2,$out2,$leperm
2698 stvx_u $out1,$x10,$out
2699 vxor $out1,$in1,$twk1
2700 le?vperm $out3,$out3,$out3,$leperm
2701 stvx_u $out2,$x20,$out
2702 vxor $out2,$in2,$twk2
2703 le?vperm $out4,$out4,$out4,$leperm
2704 stvx_u $out3,$x30,$out
2705 vxor $out3,$in3,$twk3
2706 le?vperm $out5,$tmp,$tmp,$leperm
2707 stvx_u $out4,$x40,$out
2708 vxor $out4,$in4,$twk4
2709 le?stvx_u $out5,$x50,$out
2710 be?stvx_u $tmp, $x50,$out
2711 vxor $out5,$in5,$twk5
2715 beq Loop_xts_enc6x # did $len-=96 borrow?
2717 addic. $len,$len,0x60
2724 blt Lxts_enc6x_three
2729 vxor $out0,$in1,$twk0
2730 vxor $out1,$in2,$twk1
2731 vxor $out2,$in3,$twk2
2732 vxor $out3,$in4,$twk3
2733 vxor $out4,$in5,$twk4
2737 le?vperm $out0,$out0,$out0,$leperm
2738 vmr $twk0,$twk5 # unused tweak
2739 le?vperm $out1,$out1,$out1,$leperm
2740 stvx_u $out0,$x00,$out # store output
2741 le?vperm $out2,$out2,$out2,$leperm
2742 stvx_u $out1,$x10,$out
2743 le?vperm $out3,$out3,$out3,$leperm
2744 stvx_u $out2,$x20,$out
2745 vxor $tmp,$out4,$twk5 # last block prep for stealing
2746 le?vperm $out4,$out4,$out4,$leperm
2747 stvx_u $out3,$x30,$out
2748 stvx_u $out4,$x40,$out
2750 bne Lxts_enc6x_steal
2755 vxor $out0,$in2,$twk0
2756 vxor $out1,$in3,$twk1
2757 vxor $out2,$in4,$twk2
2758 vxor $out3,$in5,$twk3
2759 vxor $out4,$out4,$out4
2763 le?vperm $out0,$out0,$out0,$leperm
2764 vmr $twk0,$twk4 # unused tweak
2765 le?vperm $out1,$out1,$out1,$leperm
2766 stvx_u $out0,$x00,$out # store output
2767 le?vperm $out2,$out2,$out2,$leperm
2768 stvx_u $out1,$x10,$out
2769 vxor $tmp,$out3,$twk4 # last block prep for stealing
2770 le?vperm $out3,$out3,$out3,$leperm
2771 stvx_u $out2,$x20,$out
2772 stvx_u $out3,$x30,$out
2774 bne Lxts_enc6x_steal
2779 vxor $out0,$in3,$twk0
2780 vxor $out1,$in4,$twk1
2781 vxor $out2,$in5,$twk2
2782 vxor $out3,$out3,$out3
2783 vxor $out4,$out4,$out4
2787 le?vperm $out0,$out0,$out0,$leperm
2788 vmr $twk0,$twk3 # unused tweak
2789 le?vperm $out1,$out1,$out1,$leperm
2790 stvx_u $out0,$x00,$out # store output
2791 vxor $tmp,$out2,$twk3 # last block prep for stealing
2792 le?vperm $out2,$out2,$out2,$leperm
2793 stvx_u $out1,$x10,$out
2794 stvx_u $out2,$x20,$out
2796 bne Lxts_enc6x_steal
2801 vxor $out0,$in4,$twk0
2802 vxor $out1,$in5,$twk1
2803 vxor $out2,$out2,$out2
2804 vxor $out3,$out3,$out3
2805 vxor $out4,$out4,$out4
2809 le?vperm $out0,$out0,$out0,$leperm
2810 vmr $twk0,$twk2 # unused tweak
2811 vxor $tmp,$out1,$twk2 # last block prep for stealing
2812 le?vperm $out1,$out1,$out1,$leperm
2813 stvx_u $out0,$x00,$out # store output
2814 stvx_u $out1,$x10,$out
2816 bne Lxts_enc6x_steal
2821 vxor $out0,$in5,$twk0
2824 vcipher $out0,$out0,v24
2825 lvx v24,$x20,$key_ # round[3]
2826 addi $key_,$key_,0x20
2828 vcipher $out0,$out0,v25
2829 lvx v25,$x10,$key_ # round[4]
2832 add $inp,$inp,$taillen
2834 vcipher $out0,$out0,v24
2837 vcipher $out0,$out0,v25
2839 lvsr $inpperm,0,$taillen
2840 vcipher $out0,$out0,v26
2843 vcipher $out0,$out0,v27
2845 addi $key_,$sp,$FRAME+15 # rewind $key_
2846 vcipher $out0,$out0,v28
2847 lvx v24,$x00,$key_ # re-pre-load round[1]
2849 vcipher $out0,$out0,v29
2850 lvx v25,$x10,$key_ # re-pre-load round[2]
2851 vxor $twk0,$twk0,v31
2853 le?vperm $in0,$in0,$in0,$leperm
2854 vcipher $out0,$out0,v30
2856 vperm $in0,$in0,$in0,$inpperm
2857 vcipherlast $out0,$out0,$twk0
2859 vmr $twk0,$twk1 # unused tweak
2860 vxor $tmp,$out0,$twk1 # last block prep for stealing
2861 le?vperm $out0,$out0,$out0,$leperm
2862 stvx_u $out0,$x00,$out # store output
2864 bne Lxts_enc6x_steal
2872 add $inp,$inp,$taillen
2875 lvsr $inpperm,0,$taillen # $in5 is no more
2876 le?vperm $in0,$in0,$in0,$leperm
2877 vperm $in0,$in0,$in0,$inpperm
2878 vxor $tmp,$tmp,$twk0
2880 vxor $in0,$in0,$twk0
2881 vxor $out0,$out0,$out0
2883 vperm $out0,$out0,$out1,$inpperm
2884 vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember?
2889 Loop_xts_enc6x_steal:
2892 bdnz Loop_xts_enc6x_steal
2896 b Loop_xts_enc1x # one more time...
2903 vxor $tweak,$twk0,$rndkey0
2904 le?vperm $tweak,$tweak,$tweak,$leperm
2905 stvx_u $tweak,0,$ivp
2911 stvx $seven,r10,$sp # wipe copies of round keys
2929 lvx v20,r10,$sp # ABI says so
2951 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
2952 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
2953 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
2954 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
2955 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
2956 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
2957 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
2960 .byte 0,12,0x04,1,0x80,6,6,0
2965 vcipher $out0,$out0,v24
2966 vcipher $out1,$out1,v24
2967 vcipher $out2,$out2,v24
2968 vcipher $out3,$out3,v24
2969 vcipher $out4,$out4,v24
2970 lvx v24,$x20,$key_ # round[3]
2971 addi $key_,$key_,0x20
2973 vcipher $out0,$out0,v25
2974 vcipher $out1,$out1,v25
2975 vcipher $out2,$out2,v25
2976 vcipher $out3,$out3,v25
2977 vcipher $out4,$out4,v25
2978 lvx v25,$x10,$key_ # round[4]
2979 bdnz _aesp8_xts_enc5x
2981 add $inp,$inp,$taillen
2983 vcipher $out0,$out0,v24
2984 vcipher $out1,$out1,v24
2985 vcipher $out2,$out2,v24
2986 vcipher $out3,$out3,v24
2987 vcipher $out4,$out4,v24
2990 vcipher $out0,$out0,v25
2991 vcipher $out1,$out1,v25
2992 vcipher $out2,$out2,v25
2993 vcipher $out3,$out3,v25
2994 vcipher $out4,$out4,v25
2995 vxor $twk0,$twk0,v31
2997 vcipher $out0,$out0,v26
2998 lvsr $inpperm,r0,$taillen # $in5 is no more
2999 vcipher $out1,$out1,v26
3000 vcipher $out2,$out2,v26
3001 vcipher $out3,$out3,v26
3002 vcipher $out4,$out4,v26
3005 vcipher $out0,$out0,v27
3007 vcipher $out1,$out1,v27
3008 vcipher $out2,$out2,v27
3009 vcipher $out3,$out3,v27
3010 vcipher $out4,$out4,v27
3013 addi $key_,$sp,$FRAME+15 # rewind $key_
3014 vcipher $out0,$out0,v28
3015 vcipher $out1,$out1,v28
3016 vcipher $out2,$out2,v28
3017 vcipher $out3,$out3,v28
3018 vcipher $out4,$out4,v28
3019 lvx v24,$x00,$key_ # re-pre-load round[1]
3022 vcipher $out0,$out0,v29
3023 le?vperm $in0,$in0,$in0,$leperm
3024 vcipher $out1,$out1,v29
3025 vcipher $out2,$out2,v29
3026 vcipher $out3,$out3,v29
3027 vcipher $out4,$out4,v29
3028 lvx v25,$x10,$key_ # re-pre-load round[2]
3031 vcipher $out0,$out0,v30
3032 vperm $in0,$in0,$in0,$inpperm
3033 vcipher $out1,$out1,v30
3034 vcipher $out2,$out2,v30
3035 vcipher $out3,$out3,v30
3036 vcipher $out4,$out4,v30
3038 vcipherlast $out0,$out0,$twk0
3039 vcipherlast $out1,$out1,$in1
3040 vcipherlast $out2,$out2,$in2
3041 vcipherlast $out3,$out3,$in3
3042 vcipherlast $out4,$out4,$in4
3045 .byte 0,12,0x14,0,0,0,0,0
3048 _aesp8_xts_decrypt6x:
3049 $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp)
3051 li r7,`$FRAME+8*16+15`
3052 li r3,`$FRAME+8*16+31`
3053 $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp)
3054 stvx v20,r7,$sp # ABI says so
3077 stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave
3079 $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3081 $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3083 $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3085 $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3087 $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3089 $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3093 subi $rounds,$rounds,3 # -4 in total
3095 lvx $rndkey0,$x00,$key1 # load key schedule
3097 addi $key1,$key1,0x20
3099 ?vperm $rndkey0,$rndkey0,v30,$keyperm
3100 addi $key_,$sp,$FRAME+15
3104 ?vperm v24,v30,v31,$keyperm
3106 addi $key1,$key1,0x20
3107 stvx v24,$x00,$key_ # off-load round[1]
3108 ?vperm v25,v31,v30,$keyperm
3110 stvx v25,$x10,$key_ # off-load round[2]
3111 addi $key_,$key_,0x20
3112 bdnz Load_xts_dec_key
3115 ?vperm v24,v30,v31,$keyperm
3117 stvx v24,$x00,$key_ # off-load round[3]
3118 ?vperm v25,v31,v26,$keyperm
3120 stvx v25,$x10,$key_ # off-load round[4]
3121 addi $key_,$sp,$FRAME+15 # rewind $key_
3122 ?vperm v26,v26,v27,$keyperm
3124 ?vperm v27,v27,v28,$keyperm
3126 ?vperm v28,v28,v29,$keyperm
3128 ?vperm v29,v29,v30,$keyperm
3129 lvx $twk5,$x70,$key1 # borrow $twk5
3130 ?vperm v30,v30,v31,$keyperm
3131 lvx v24,$x00,$key_ # pre-load round[1]
3132 ?vperm v31,v31,$twk5,$keyperm
3133 lvx v25,$x10,$key_ # pre-load round[2]
3135 vperm $in0,$inout,$inptail,$inpperm
3136 subi $inp,$inp,31 # undo "caller"
3137 vxor $twk0,$tweak,$rndkey0
3138 vsrab $tmp,$tweak,$seven # next tweak value
3139 vaddubm $tweak,$tweak,$tweak
3140 vsldoi $tmp,$tmp,$tmp,15
3141 vand $tmp,$tmp,$eighty7
3142 vxor $out0,$in0,$twk0
3143 vxor $tweak,$tweak,$tmp
3145 lvx_u $in1,$x10,$inp
3146 vxor $twk1,$tweak,$rndkey0
3147 vsrab $tmp,$tweak,$seven # next tweak value
3148 vaddubm $tweak,$tweak,$tweak
3149 vsldoi $tmp,$tmp,$tmp,15
3150 le?vperm $in1,$in1,$in1,$leperm
3151 vand $tmp,$tmp,$eighty7
3152 vxor $out1,$in1,$twk1
3153 vxor $tweak,$tweak,$tmp
3155 lvx_u $in2,$x20,$inp
3156 andi. $taillen,$len,15
3157 vxor $twk2,$tweak,$rndkey0
3158 vsrab $tmp,$tweak,$seven # next tweak value
3159 vaddubm $tweak,$tweak,$tweak
3160 vsldoi $tmp,$tmp,$tmp,15
3161 le?vperm $in2,$in2,$in2,$leperm
3162 vand $tmp,$tmp,$eighty7
3163 vxor $out2,$in2,$twk2
3164 vxor $tweak,$tweak,$tmp
3166 lvx_u $in3,$x30,$inp
3167 sub $len,$len,$taillen
3168 vxor $twk3,$tweak,$rndkey0
3169 vsrab $tmp,$tweak,$seven # next tweak value
3170 vaddubm $tweak,$tweak,$tweak
3171 vsldoi $tmp,$tmp,$tmp,15
3172 le?vperm $in3,$in3,$in3,$leperm
3173 vand $tmp,$tmp,$eighty7
3174 vxor $out3,$in3,$twk3
3175 vxor $tweak,$tweak,$tmp
3177 lvx_u $in4,$x40,$inp
3179 vxor $twk4,$tweak,$rndkey0
3180 vsrab $tmp,$tweak,$seven # next tweak value
3181 vaddubm $tweak,$tweak,$tweak
3182 vsldoi $tmp,$tmp,$tmp,15
3183 le?vperm $in4,$in4,$in4,$leperm
3184 vand $tmp,$tmp,$eighty7
3185 vxor $out4,$in4,$twk4
3186 vxor $tweak,$tweak,$tmp
3188 lvx_u $in5,$x50,$inp
3190 vxor $twk5,$tweak,$rndkey0
3191 vsrab $tmp,$tweak,$seven # next tweak value
3192 vaddubm $tweak,$tweak,$tweak
3193 vsldoi $tmp,$tmp,$tmp,15
3194 le?vperm $in5,$in5,$in5,$leperm
3195 vand $tmp,$tmp,$eighty7
3196 vxor $out5,$in5,$twk5
3197 vxor $tweak,$tweak,$tmp
3199 vxor v31,v31,$rndkey0
3205 vncipher $out0,$out0,v24
3206 vncipher $out1,$out1,v24
3207 vncipher $out2,$out2,v24
3208 vncipher $out3,$out3,v24
3209 vncipher $out4,$out4,v24
3210 vncipher $out5,$out5,v24
3211 lvx v24,$x20,$key_ # round[3]
3212 addi $key_,$key_,0x20
3214 vncipher $out0,$out0,v25
3215 vncipher $out1,$out1,v25
3216 vncipher $out2,$out2,v25
3217 vncipher $out3,$out3,v25
3218 vncipher $out4,$out4,v25
3219 vncipher $out5,$out5,v25
3220 lvx v25,$x10,$key_ # round[4]
3223 subic $len,$len,96 # $len-=96
3224 vxor $in0,$twk0,v31 # xor with last round key
3225 vncipher $out0,$out0,v24
3226 vncipher $out1,$out1,v24
3227 vsrab $tmp,$tweak,$seven # next tweak value
3228 vxor $twk0,$tweak,$rndkey0
3229 vaddubm $tweak,$tweak,$tweak
3230 vncipher $out2,$out2,v24
3231 vncipher $out3,$out3,v24
3232 vsldoi $tmp,$tmp,$tmp,15
3233 vncipher $out4,$out4,v24
3234 vncipher $out5,$out5,v24
3236 subfe. r0,r0,r0 # borrow?-1:0
3237 vand $tmp,$tmp,$eighty7
3238 vncipher $out0,$out0,v25
3239 vncipher $out1,$out1,v25
3240 vxor $tweak,$tweak,$tmp
3241 vncipher $out2,$out2,v25
3242 vncipher $out3,$out3,v25
3244 vsrab $tmp,$tweak,$seven # next tweak value
3245 vxor $twk1,$tweak,$rndkey0
3246 vncipher $out4,$out4,v25
3247 vncipher $out5,$out5,v25
3250 vaddubm $tweak,$tweak,$tweak
3251 vsldoi $tmp,$tmp,$tmp,15
3252 vncipher $out0,$out0,v26
3253 vncipher $out1,$out1,v26
3254 vand $tmp,$tmp,$eighty7
3255 vncipher $out2,$out2,v26
3256 vncipher $out3,$out3,v26
3257 vxor $tweak,$tweak,$tmp
3258 vncipher $out4,$out4,v26
3259 vncipher $out5,$out5,v26
3261 add $inp,$inp,r0 # $inp is adjusted in such
3262 # way that at exit from the
3263 # loop inX-in5 are loaded
3266 vsrab $tmp,$tweak,$seven # next tweak value
3267 vxor $twk2,$tweak,$rndkey0
3268 vaddubm $tweak,$tweak,$tweak
3269 vncipher $out0,$out0,v27
3270 vncipher $out1,$out1,v27
3271 vsldoi $tmp,$tmp,$tmp,15
3272 vncipher $out2,$out2,v27
3273 vncipher $out3,$out3,v27
3274 vand $tmp,$tmp,$eighty7
3275 vncipher $out4,$out4,v27
3276 vncipher $out5,$out5,v27
3278 addi $key_,$sp,$FRAME+15 # rewind $key_
3279 vxor $tweak,$tweak,$tmp
3280 vncipher $out0,$out0,v28
3281 vncipher $out1,$out1,v28
3283 vsrab $tmp,$tweak,$seven # next tweak value
3284 vxor $twk3,$tweak,$rndkey0
3285 vncipher $out2,$out2,v28
3286 vncipher $out3,$out3,v28
3287 vaddubm $tweak,$tweak,$tweak
3288 vsldoi $tmp,$tmp,$tmp,15
3289 vncipher $out4,$out4,v28
3290 vncipher $out5,$out5,v28
3291 lvx v24,$x00,$key_ # re-pre-load round[1]
3292 vand $tmp,$tmp,$eighty7
3294 vncipher $out0,$out0,v29
3295 vncipher $out1,$out1,v29
3296 vxor $tweak,$tweak,$tmp
3297 vncipher $out2,$out2,v29
3298 vncipher $out3,$out3,v29
3300 vsrab $tmp,$tweak,$seven # next tweak value
3301 vxor $twk4,$tweak,$rndkey0
3302 vncipher $out4,$out4,v29
3303 vncipher $out5,$out5,v29
3304 lvx v25,$x10,$key_ # re-pre-load round[2]
3305 vaddubm $tweak,$tweak,$tweak
3306 vsldoi $tmp,$tmp,$tmp,15
3308 vncipher $out0,$out0,v30
3309 vncipher $out1,$out1,v30
3310 vand $tmp,$tmp,$eighty7
3311 vncipher $out2,$out2,v30
3312 vncipher $out3,$out3,v30
3313 vxor $tweak,$tweak,$tmp
3314 vncipher $out4,$out4,v30
3315 vncipher $out5,$out5,v30
3317 vsrab $tmp,$tweak,$seven # next tweak value
3318 vxor $twk5,$tweak,$rndkey0
3320 vncipherlast $out0,$out0,$in0
3321 lvx_u $in0,$x00,$inp # load next input block
3322 vaddubm $tweak,$tweak,$tweak
3323 vsldoi $tmp,$tmp,$tmp,15
3324 vncipherlast $out1,$out1,$in1
3325 lvx_u $in1,$x10,$inp
3326 vncipherlast $out2,$out2,$in2
3327 le?vperm $in0,$in0,$in0,$leperm
3328 lvx_u $in2,$x20,$inp
3329 vand $tmp,$tmp,$eighty7
3330 vncipherlast $out3,$out3,$in3
3331 le?vperm $in1,$in1,$in1,$leperm
3332 lvx_u $in3,$x30,$inp
3333 vncipherlast $out4,$out4,$in4
3334 le?vperm $in2,$in2,$in2,$leperm
3335 lvx_u $in4,$x40,$inp
3336 vxor $tweak,$tweak,$tmp
3337 vncipherlast $out5,$out5,$in5
3338 le?vperm $in3,$in3,$in3,$leperm
3339 lvx_u $in5,$x50,$inp
3341 le?vperm $in4,$in4,$in4,$leperm
3342 le?vperm $in5,$in5,$in5,$leperm
3344 le?vperm $out0,$out0,$out0,$leperm
3345 le?vperm $out1,$out1,$out1,$leperm
3346 stvx_u $out0,$x00,$out # store output
3347 vxor $out0,$in0,$twk0
3348 le?vperm $out2,$out2,$out2,$leperm
3349 stvx_u $out1,$x10,$out
3350 vxor $out1,$in1,$twk1
3351 le?vperm $out3,$out3,$out3,$leperm
3352 stvx_u $out2,$x20,$out
3353 vxor $out2,$in2,$twk2
3354 le?vperm $out4,$out4,$out4,$leperm
3355 stvx_u $out3,$x30,$out
3356 vxor $out3,$in3,$twk3
3357 le?vperm $out5,$out5,$out5,$leperm
3358 stvx_u $out4,$x40,$out
3359 vxor $out4,$in4,$twk4
3360 stvx_u $out5,$x50,$out
3361 vxor $out5,$in5,$twk5
3365 beq Loop_xts_dec6x # did $len-=96 borrow?
3367 addic. $len,$len,0x60
3374 blt Lxts_dec6x_three
3379 vxor $out0,$in1,$twk0
3380 vxor $out1,$in2,$twk1
3381 vxor $out2,$in3,$twk2
3382 vxor $out3,$in4,$twk3
3383 vxor $out4,$in5,$twk4
3387 le?vperm $out0,$out0,$out0,$leperm
3388 vmr $twk0,$twk5 # unused tweak
3389 vxor $twk1,$tweak,$rndkey0
3390 le?vperm $out1,$out1,$out1,$leperm
3391 stvx_u $out0,$x00,$out # store output
3392 vxor $out0,$in0,$twk1
3393 le?vperm $out2,$out2,$out2,$leperm
3394 stvx_u $out1,$x10,$out
3395 le?vperm $out3,$out3,$out3,$leperm
3396 stvx_u $out2,$x20,$out
3397 le?vperm $out4,$out4,$out4,$leperm
3398 stvx_u $out3,$x30,$out
3399 stvx_u $out4,$x40,$out
3401 bne Lxts_dec6x_steal
3406 vxor $out0,$in2,$twk0
3407 vxor $out1,$in3,$twk1
3408 vxor $out2,$in4,$twk2
3409 vxor $out3,$in5,$twk3
3410 vxor $out4,$out4,$out4
3414 le?vperm $out0,$out0,$out0,$leperm
3415 vmr $twk0,$twk4 # unused tweak
3417 le?vperm $out1,$out1,$out1,$leperm
3418 stvx_u $out0,$x00,$out # store output
3419 vxor $out0,$in0,$twk5
3420 le?vperm $out2,$out2,$out2,$leperm
3421 stvx_u $out1,$x10,$out
3422 le?vperm $out3,$out3,$out3,$leperm
3423 stvx_u $out2,$x20,$out
3424 stvx_u $out3,$x30,$out
3426 bne Lxts_dec6x_steal
3431 vxor $out0,$in3,$twk0
3432 vxor $out1,$in4,$twk1
3433 vxor $out2,$in5,$twk2
3434 vxor $out3,$out3,$out3
3435 vxor $out4,$out4,$out4
3439 le?vperm $out0,$out0,$out0,$leperm
3440 vmr $twk0,$twk3 # unused tweak
3442 le?vperm $out1,$out1,$out1,$leperm
3443 stvx_u $out0,$x00,$out # store output
3444 vxor $out0,$in0,$twk4
3445 le?vperm $out2,$out2,$out2,$leperm
3446 stvx_u $out1,$x10,$out
3447 stvx_u $out2,$x20,$out
3449 bne Lxts_dec6x_steal
3454 vxor $out0,$in4,$twk0
3455 vxor $out1,$in5,$twk1
3456 vxor $out2,$out2,$out2
3457 vxor $out3,$out3,$out3
3458 vxor $out4,$out4,$out4
3462 le?vperm $out0,$out0,$out0,$leperm
3463 vmr $twk0,$twk2 # unused tweak
3465 le?vperm $out1,$out1,$out1,$leperm
3466 stvx_u $out0,$x00,$out # store output
3467 vxor $out0,$in0,$twk3
3468 stvx_u $out1,$x10,$out
3470 bne Lxts_dec6x_steal
3475 vxor $out0,$in5,$twk0
3478 vncipher $out0,$out0,v24
3479 lvx v24,$x20,$key_ # round[3]
3480 addi $key_,$key_,0x20
3482 vncipher $out0,$out0,v25
3483 lvx v25,$x10,$key_ # round[4]
3487 vncipher $out0,$out0,v24
3491 vncipher $out0,$out0,v25
3494 vncipher $out0,$out0,v26
3497 vncipher $out0,$out0,v27
3499 addi $key_,$sp,$FRAME+15 # rewind $key_
3500 vncipher $out0,$out0,v28
3501 lvx v24,$x00,$key_ # re-pre-load round[1]
3503 vncipher $out0,$out0,v29
3504 lvx v25,$x10,$key_ # re-pre-load round[2]
3505 vxor $twk0,$twk0,v31
3507 le?vperm $in0,$in0,$in0,$leperm
3508 vncipher $out0,$out0,v30
3511 vncipherlast $out0,$out0,$twk0
3513 vmr $twk0,$twk1 # unused tweak
3515 le?vperm $out0,$out0,$out0,$leperm
3516 stvx_u $out0,$x00,$out # store output
3518 vxor $out0,$in0,$twk2
3519 bne Lxts_dec6x_steal
3528 le?vperm $in0,$in0,$in0,$leperm
3529 vxor $out0,$in0,$twk1
3531 vncipher $out0,$out0,v24
3532 lvx v24,$x20,$key_ # round[3]
3533 addi $key_,$key_,0x20
3535 vncipher $out0,$out0,v25
3536 lvx v25,$x10,$key_ # round[4]
3537 bdnz Lxts_dec6x_steal
3539 add $inp,$inp,$taillen
3540 vncipher $out0,$out0,v24
3543 vncipher $out0,$out0,v25
3546 vncipher $out0,$out0,v26
3548 lvsr $inpperm,0,$taillen # $in5 is no more
3549 vncipher $out0,$out0,v27
3551 addi $key_,$sp,$FRAME+15 # rewind $key_
3552 vncipher $out0,$out0,v28
3553 lvx v24,$x00,$key_ # re-pre-load round[1]
3555 vncipher $out0,$out0,v29
3556 lvx v25,$x10,$key_ # re-pre-load round[2]
3557 vxor $twk1,$twk1,v31
3559 le?vperm $in0,$in0,$in0,$leperm
3560 vncipher $out0,$out0,v30
3562 vperm $in0,$in0,$in0,$inpperm
3563 vncipherlast $tmp,$out0,$twk1
3565 le?vperm $out0,$tmp,$tmp,$leperm
3566 le?stvx_u $out0,0,$out
3567 be?stvx_u $tmp,0,$out
3569 vxor $out0,$out0,$out0
3571 vperm $out0,$out0,$out1,$inpperm
3572 vsel $out0,$in0,$tmp,$out0
3573 vxor $out0,$out0,$twk0
3577 Loop_xts_dec6x_steal:
3580 bdnz Loop_xts_dec6x_steal
3584 b Loop_xts_dec1x # one more time...
3591 vxor $tweak,$twk0,$rndkey0
3592 le?vperm $tweak,$tweak,$tweak,$leperm
3593 stvx_u $tweak,0,$ivp
3599 stvx $seven,r10,$sp # wipe copies of round keys
3617 lvx v20,r10,$sp # ABI says so
3639 $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp)
3640 $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp)
3641 $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp)
3642 $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp)
3643 $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp)
3644 $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp)
3645 addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T`
3648 .byte 0,12,0x04,1,0x80,6,6,0
3653 vncipher $out0,$out0,v24
3654 vncipher $out1,$out1,v24
3655 vncipher $out2,$out2,v24
3656 vncipher $out3,$out3,v24
3657 vncipher $out4,$out4,v24
3658 lvx v24,$x20,$key_ # round[3]
3659 addi $key_,$key_,0x20
3661 vncipher $out0,$out0,v25
3662 vncipher $out1,$out1,v25
3663 vncipher $out2,$out2,v25
3664 vncipher $out3,$out3,v25
3665 vncipher $out4,$out4,v25
3666 lvx v25,$x10,$key_ # round[4]
3667 bdnz _aesp8_xts_dec5x
3670 vncipher $out0,$out0,v24
3671 vncipher $out1,$out1,v24
3672 vncipher $out2,$out2,v24
3673 vncipher $out3,$out3,v24
3674 vncipher $out4,$out4,v24
3678 vncipher $out0,$out0,v25
3679 vncipher $out1,$out1,v25
3680 vncipher $out2,$out2,v25
3681 vncipher $out3,$out3,v25
3682 vncipher $out4,$out4,v25
3683 vxor $twk0,$twk0,v31
3686 vncipher $out0,$out0,v26
3687 vncipher $out1,$out1,v26
3688 vncipher $out2,$out2,v26
3689 vncipher $out3,$out3,v26
3690 vncipher $out4,$out4,v26
3693 vncipher $out0,$out0,v27
3695 vncipher $out1,$out1,v27
3696 vncipher $out2,$out2,v27
3697 vncipher $out3,$out3,v27
3698 vncipher $out4,$out4,v27
3701 addi $key_,$sp,$FRAME+15 # rewind $key_
3702 vncipher $out0,$out0,v28
3703 vncipher $out1,$out1,v28
3704 vncipher $out2,$out2,v28
3705 vncipher $out3,$out3,v28
3706 vncipher $out4,$out4,v28
3707 lvx v24,$x00,$key_ # re-pre-load round[1]
3710 vncipher $out0,$out0,v29
3711 le?vperm $in0,$in0,$in0,$leperm
3712 vncipher $out1,$out1,v29
3713 vncipher $out2,$out2,v29
3714 vncipher $out3,$out3,v29
3715 vncipher $out4,$out4,v29
3716 lvx v25,$x10,$key_ # re-pre-load round[2]
3719 vncipher $out0,$out0,v30
3720 vncipher $out1,$out1,v30
3721 vncipher $out2,$out2,v30
3722 vncipher $out3,$out3,v30
3723 vncipher $out4,$out4,v30
3725 vncipherlast $out0,$out0,$twk0
3726 vncipherlast $out1,$out1,$in1
3727 vncipherlast $out2,$out2,$in2
3728 vncipherlast $out3,$out3,$in3
3729 vncipherlast $out4,$out4,$in4
3733 .byte 0,12,0x14,0,0,0,0,0
3738 foreach(split("\n",$code)) {
3739 s/\`([^\`]*)\`/eval($1)/geo;
3741 # constants table endian-specific conversion
3742 if ($consts && m/\.(long|byte)\s+(.+)\s+(\?[a-z]*)$/o) {
3746 # convert to endian-agnostic format
3748 foreach (split(/,\s*/,$2)) {
3749 my $l = /^0/?oct:int;
3750 push @bytes,($l>>24)&0xff,($l>>16)&0xff,($l>>8)&0xff,$l&0xff;
3753 @bytes = map(/^0/?oct:int,split(/,\s*/,$2));
3756 # little-endian conversion
3757 if ($flavour =~ /le$/o) {
3758 SWITCH: for($conv) {
3759 /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; };
3760 /\?rev/ && do { @bytes=reverse(@bytes); last; };
3765 print ".byte\t",join(',',map (sprintf("0x%02x",$_),@bytes)),"\n";
3768 $consts=0 if (m/Lconsts:/o); # end of table
3770 # instructions prefixed with '?' are endian-specific and need
3771 # to be adjusted accordingly...
3772 if ($flavour =~ /le$/o) { # little-endian
3777 s/\?(vperm\s+v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+,\s*)(v[0-9]+)/$1$3$2$4/o or
3778 s/\?(vsldoi\s+v[0-9]+,\s*)(v[0-9]+,)\s*(v[0-9]+,\s*)([0-9]+)/$1$3$2 16-$4/o or
3779 s/\?(vspltw\s+v[0-9]+,\s*)(v[0-9]+,)\s*([0-9])/$1$2 3-$3/o;
3780 } else { # big-endian