powerpc: Merge Kconfig.debug
[linux-2.6-block.git] / arch / powerpc / kernel / vector.S
CommitLineData
14cf11af
PM
1#include <linux/config.h>
2#include <asm/ppc_asm.h>
3#include <asm/processor.h>
4
5/*
6 * The routines below are in assembler so we can closely control the
7 * usage of floating-point registers. These routines must be called
8 * with preempt disabled.
9 */
10#ifdef CONFIG_PPC32
11 .data
12fpzero:
13 .long 0
14fpone:
15 .long 0x3f800000 /* 1.0 in single-precision FP */
16fphalf:
17 .long 0x3f000000 /* 0.5 in single-precision FP */
18
19#define LDCONST(fr, name) \
20 lis r11,name@ha; \
21 lfs fr,name@l(r11)
22#else
23
24 .section ".toc","aw"
25fpzero:
26 .tc FD_0_0[TC],0
27fpone:
28 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
29fphalf:
30 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
31
32#define LDCONST(fr, name) \
33 lfd fr,name@toc(r2)
34#endif
35
36 .text
37/*
38 * Internal routine to enable floating point and set FPSCR to 0.
39 * Don't call it from C; it doesn't use the normal calling convention.
40 */
41fpenable:
42#ifdef CONFIG_PPC32
43 stwu r1,-64(r1)
44#else
45 stdu r1,-64(r1)
46#endif
47 mfmsr r10
48 ori r11,r10,MSR_FP
49 mtmsr r11
50 isync
51 stfd fr0,24(r1)
52 stfd fr1,16(r1)
53 stfd fr31,8(r1)
54 LDCONST(fr1, fpzero)
55 mffs fr31
56 mtfsf 0xff,fr1
57 blr
58
59fpdisable:
60 mtlr r12
61 mtfsf 0xff,fr31
62 lfd fr31,8(r1)
63 lfd fr1,16(r1)
64 lfd fr0,24(r1)
65 mtmsr r10
66 isync
67 addi r1,r1,64
68 blr
69
70/*
71 * Vector add, floating point.
72 */
73_GLOBAL(vaddfp)
74 mflr r12
75 bl fpenable
76 li r0,4
77 mtctr r0
78 li r6,0
791: lfsx fr0,r4,r6
80 lfsx fr1,r5,r6
81 fadds fr0,fr0,fr1
82 stfsx fr0,r3,r6
83 addi r6,r6,4
84 bdnz 1b
85 b fpdisable
86
87/*
88 * Vector subtract, floating point.
89 */
90_GLOBAL(vsubfp)
91 mflr r12
92 bl fpenable
93 li r0,4
94 mtctr r0
95 li r6,0
961: lfsx fr0,r4,r6
97 lfsx fr1,r5,r6
98 fsubs fr0,fr0,fr1
99 stfsx fr0,r3,r6
100 addi r6,r6,4
101 bdnz 1b
102 b fpdisable
103
104/*
105 * Vector multiply and add, floating point.
106 */
107_GLOBAL(vmaddfp)
108 mflr r12
109 bl fpenable
110 stfd fr2,32(r1)
111 li r0,4
112 mtctr r0
113 li r7,0
1141: lfsx fr0,r4,r7
115 lfsx fr1,r5,r7
116 lfsx fr2,r6,r7
117 fmadds fr0,fr0,fr2,fr1
118 stfsx fr0,r3,r7
119 addi r7,r7,4
120 bdnz 1b
121 lfd fr2,32(r1)
122 b fpdisable
123
124/*
125 * Vector negative multiply and subtract, floating point.
126 */
127_GLOBAL(vnmsubfp)
128 mflr r12
129 bl fpenable
130 stfd fr2,32(r1)
131 li r0,4
132 mtctr r0
133 li r7,0
1341: lfsx fr0,r4,r7
135 lfsx fr1,r5,r7
136 lfsx fr2,r6,r7
137 fnmsubs fr0,fr0,fr2,fr1
138 stfsx fr0,r3,r7
139 addi r7,r7,4
140 bdnz 1b
141 lfd fr2,32(r1)
142 b fpdisable
143
144/*
145 * Vector reciprocal estimate. We just compute 1.0/x.
146 * r3 -> destination, r4 -> source.
147 */
148_GLOBAL(vrefp)
149 mflr r12
150 bl fpenable
151 li r0,4
152 LDCONST(fr1, fpone)
153 mtctr r0
154 li r6,0
1551: lfsx fr0,r4,r6
156 fdivs fr0,fr1,fr0
157 stfsx fr0,r3,r6
158 addi r6,r6,4
159 bdnz 1b
160 b fpdisable
161
162/*
163 * Vector reciprocal square-root estimate, floating point.
164 * We use the frsqrte instruction for the initial estimate followed
165 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
166 * r3 -> destination, r4 -> source.
167 */
168_GLOBAL(vrsqrtefp)
169 mflr r12
170 bl fpenable
171 stfd fr2,32(r1)
172 stfd fr3,40(r1)
173 stfd fr4,48(r1)
174 stfd fr5,56(r1)
175 li r0,4
176 LDCONST(fr4, fpone)
177 LDCONST(fr5, fphalf)
178 mtctr r0
179 li r6,0
1801: lfsx fr0,r4,r6
181 frsqrte fr1,fr0 /* r = frsqrte(s) */
182 fmuls fr3,fr1,fr0 /* r * s */
183 fmuls fr2,fr1,fr5 /* r * 0.5 */
184 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
185 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
186 fmuls fr3,fr1,fr0 /* r * s */
187 fmuls fr2,fr1,fr5 /* r * 0.5 */
188 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
189 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
190 stfsx fr1,r3,r6
191 addi r6,r6,4
192 bdnz 1b
193 lfd fr5,56(r1)
194 lfd fr4,48(r1)
195 lfd fr3,40(r1)
196 lfd fr2,32(r1)
197 b fpdisable