[POWERPC] iseries: Define insw et al. so libata/ide will compile
[linux-2.6-block.git] / arch / powerpc / lib / memcpy_64.S
CommitLineData
14cf11af 1/*
14cf11af
PM
2 * Copyright (C) 2002 Paul Mackerras, IBM Corp.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 */
9#include <asm/processor.h>
10#include <asm/ppc_asm.h>
11
12 .align 7
13_GLOBAL(memcpy)
14 mtcrf 0x01,r5
15 cmpldi cr1,r5,16
16 neg r6,r3 # LS 3 bits = # bytes to 8-byte dest bdry
17 andi. r6,r6,7
18 dcbt 0,r4
19 blt cr1,.Lshort_copy
20 bne .Ldst_unaligned
21.Ldst_aligned:
22 andi. r0,r4,7
23 addi r3,r3,-16
24 bne .Lsrc_unaligned
25 srdi r7,r5,4
26 ld r9,0(r4)
27 addi r4,r4,-8
28 mtctr r7
29 andi. r5,r5,7
30 bf cr7*4+0,2f
31 addi r3,r3,8
32 addi r4,r4,8
33 mr r8,r9
34 blt cr1,3f
351: ld r9,8(r4)
36 std r8,8(r3)
372: ldu r8,16(r4)
38 stdu r9,16(r3)
39 bdnz 1b
403: std r8,8(r3)
41 beqlr
42 addi r3,r3,16
43 ld r9,8(r4)
44.Ldo_tail:
45 bf cr7*4+1,1f
46 rotldi r9,r9,32
47 stw r9,0(r3)
48 addi r3,r3,4
491: bf cr7*4+2,2f
50 rotldi r9,r9,16
51 sth r9,0(r3)
52 addi r3,r3,2
532: bf cr7*4+3,3f
54 rotldi r9,r9,8
55 stb r9,0(r3)
563: blr
57
58.Lsrc_unaligned:
59 srdi r6,r5,3
60 addi r5,r5,-16
61 subf r4,r0,r4
62 srdi r7,r5,4
63 sldi r10,r0,3
64 cmpdi cr6,r6,3
65 andi. r5,r5,7
66 mtctr r7
67 subfic r11,r10,64
68 add r5,r5,r0
69
70 bt cr7*4+0,0f
71
72 ld r9,0(r4) # 3+2n loads, 2+2n stores
73 ld r0,8(r4)
74 sld r6,r9,r10
75 ldu r9,16(r4)
76 srd r7,r0,r11
77 sld r8,r0,r10
78 or r7,r7,r6
79 blt cr6,4f
80 ld r0,8(r4)
81 # s1<< in r8, d0=(s0<<|s1>>) in r7, s3 in r0, s2 in r9, nix in r6 & r12
82 b 2f
83
840: ld r0,0(r4) # 4+2n loads, 3+2n stores
85 ldu r9,8(r4)
86 sld r8,r0,r10
87 addi r3,r3,-8
88 blt cr6,5f
89 ld r0,8(r4)
90 srd r12,r9,r11
91 sld r6,r9,r10
92 ldu r9,16(r4)
93 or r12,r8,r12
94 srd r7,r0,r11
95 sld r8,r0,r10
96 addi r3,r3,16
97 beq cr6,3f
98
99 # d0=(s0<<|s1>>) in r12, s1<< in r6, s2>> in r7, s2<< in r8, s3 in r9
1001: or r7,r7,r6
101 ld r0,8(r4)
102 std r12,8(r3)
1032: srd r12,r9,r11
104 sld r6,r9,r10
105 ldu r9,16(r4)
106 or r12,r8,r12
107 stdu r7,16(r3)
108 srd r7,r0,r11
109 sld r8,r0,r10
110 bdnz 1b
111
1123: std r12,8(r3)
113 or r7,r7,r6
1144: std r7,16(r3)
1155: srd r12,r9,r11
116 or r12,r8,r12
117 std r12,24(r3)
118 beqlr
119 cmpwi cr1,r5,8
120 addi r3,r3,32
121 sld r9,r9,r10
122 ble cr1,.Ldo_tail
123 ld r0,8(r4)
124 srd r7,r0,r11
125 or r9,r7,r9
126 b .Ldo_tail
127
128.Ldst_unaligned:
129 mtcrf 0x01,r6 # put #bytes to 8B bdry into cr7
130 subf r5,r6,r5
131 li r7,0
132 cmpldi r1,r5,16
133 bf cr7*4+3,1f
134 lbz r0,0(r4)
135 stb r0,0(r3)
136 addi r7,r7,1
1371: bf cr7*4+2,2f
138 lhzx r0,r7,r4
139 sthx r0,r7,r3
140 addi r7,r7,2
1412: bf cr7*4+1,3f
142 lwzx r0,r7,r4
143 stwx r0,r7,r3
1443: mtcrf 0x01,r5
145 add r4,r6,r4
146 add r3,r6,r3
147 b .Ldst_aligned
148
149.Lshort_copy:
150 bf cr7*4+0,1f
151 lwz r0,0(r4)
152 lwz r9,4(r4)
153 addi r4,r4,8
154 stw r0,0(r3)
155 stw r9,4(r3)
156 addi r3,r3,8
1571: bf cr7*4+1,2f
158 lwz r0,0(r4)
159 addi r4,r4,4
160 stw r0,0(r3)
161 addi r3,r3,4
1622: bf cr7*4+2,3f
163 lhz r0,0(r4)
164 addi r4,r4,2
165 sth r0,0(r3)
166 addi r3,r3,2
1673: bf cr7*4+3,4f
168 lbz r0,0(r4)
169 stb r0,0(r3)
1704: blr