X-Git-Url: https://git.kernel.dk/?a=blobdiff_plain;f=arch%2Fpowerpc%2Flib%2Fmemcpy_64.S;h=e178922b2c2129e808a427464ce8fccc34643309;hb=e423b9ecd6aa434ce9ba72a21fdc61079e620e0a;hp=7173ba98f427d293fc6f66727a4448be2da40ff2;hpb=115b384cf87249d76adb0b21aca11ee22128927d;p=linux-2.6-block.git diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 7173ba98f427..e178922b2c21 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -12,17 +12,29 @@ .align 7 _GLOBAL(memcpy) std r3,48(r1) /* save destination pointer for return value */ - mtcrf 0x01,r5 + PPC_MTOCRF 0x01,r5 cmpldi cr1,r5,16 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry andi. r6,r6,7 dcbt 0,r4 blt cr1,.Lshort_copy +/* Below we want to nop out the bne if we're on a CPU that has the + CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit + cleared. + At the time of writing the only CPU that has this combination of bits + set is Power6. */ +BEGIN_FTR_SECTION + nop +FTR_SECTION_ELSE bne .Ldst_unaligned +ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ + CPU_FTR_UNALIGNED_LD_STD) .Ldst_aligned: - andi. r0,r4,7 addi r3,r3,-16 +BEGIN_FTR_SECTION + andi. r0,r4,7 bne .Lsrc_unaligned +END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) srdi r7,r5,4 ld r9,0(r4) addi r4,r4,-8 @@ -41,18 +53,19 @@ _GLOBAL(memcpy) 3: std r8,8(r3) beq 3f addi r3,r3,16 - ld r9,8(r4) .Ldo_tail: bf cr7*4+1,1f - rotldi r9,r9,32 + lwz r9,8(r4) + addi r4,r4,4 stw r9,0(r3) addi r3,r3,4 1: bf cr7*4+2,2f - rotldi r9,r9,16 + lhz r9,8(r4) + addi r4,r4,2 sth r9,0(r3) addi r3,r3,2 2: bf cr7*4+3,3f - rotldi r9,r9,8 + lbz r9,8(r4) stb r9,0(r3) 3: ld r3,48(r1) /* return dest pointer */ blr @@ -121,17 +134,30 @@ _GLOBAL(memcpy) cmpwi cr1,r5,8 addi r3,r3,32 sld r9,r9,r10 - ble cr1,.Ldo_tail + ble cr1,6f ld r0,8(r4) srd r7,r0,r11 or r9,r7,r9 - b .Ldo_tail +6: + bf cr7*4+1,1f + rotldi r9,r9,32 + stw r9,0(r3) + addi r3,r3,4 +1: bf cr7*4+2,2f + rotldi r9,r9,16 + sth r9,0(r3) + addi r3,r3,2 +2: bf cr7*4+3,3f + rotldi r9,r9,8 + stb r9,0(r3) +3: ld r3,48(r1) /* return dest pointer */ + blr .Ldst_unaligned: - mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7 + PPC_MTOCRF 0x01,r6 # put #bytes to 8B bdry into cr7 subf r5,r6,r5 li r7,0 - cmpldi r1,r5,16 + cmpldi cr1,r5,16 bf cr7*4+3,1f lbz r0,0(r4) stb r0,0(r3) @@ -143,7 +169,7 @@ _GLOBAL(memcpy) 2: bf cr7*4+1,3f lwzx r0,r7,r4 stwx r0,r7,r3 -3: mtcrf 0x01,r5 +3: PPC_MTOCRF 0x01,r5 add r4,r6,r4 add r3,r6,r3 b .Ldst_aligned