Merge tag 'kbuild-v4.21-3' of git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy...
[linux-2.6-block.git] / arch / powerpc / kernel / vector.S
CommitLineData
b2441318 1/* SPDX-License-Identifier: GPL-2.0 */
e821ea70 2#include <asm/processor.h>
14cf11af 3#include <asm/ppc_asm.h>
b3b8dc6c 4#include <asm/reg.h>
e821ea70
BH
5#include <asm/asm-offsets.h>
6#include <asm/cputable.h>
7#include <asm/thread_info.h>
8#include <asm/page.h>
46f52210 9#include <asm/ptrace.h>
9445aa1a 10#include <asm/export.h>
ec0c464c 11#include <asm/asm-compat.h>
e821ea70 12
18461960
PM
13/*
14 * Load state from memory into VMX registers including VSCR.
15 * Assumes the caller has enabled VMX in the MSR.
16 */
17_GLOBAL(load_vr_state)
18 li r4,VRSTATE_VSCR
c2ce6f9f
AB
19 lvx v0,r4,r3
20 mtvscr v0
18461960
PM
21 REST_32VRS(0,r4,r3)
22 blr
9445aa1a 23EXPORT_SYMBOL(load_vr_state)
18461960
PM
24
25/*
26 * Store VMX state into memory, including VSCR.
27 * Assumes the caller has enabled VMX in the MSR.
28 */
29_GLOBAL(store_vr_state)
30 SAVE_32VRS(0, r4, r3)
c2ce6f9f 31 mfvscr v0
18461960 32 li r4, VRSTATE_VSCR
c2ce6f9f 33 stvx v0, r4, r3
18461960 34 blr
9445aa1a 35EXPORT_SYMBOL(store_vr_state)
18461960 36
e821ea70 37/*
e821ea70
BH
38 * Disable VMX for the task which had it previously,
39 * and save its vector registers in its thread_struct.
40 * Enables the VMX for use in the kernel on return.
41 * On SMP we know the VMX is free, since we give it up every
42 * switch (ie, no lazy save of the vector registers).
955c1cab
PM
43 *
44 * Note that on 32-bit this can only use registers that will be
45 * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
e821ea70
BH
46 */
47_GLOBAL(load_up_altivec)
48 mfmsr r5 /* grab the current MSR */
49 oris r5,r5,MSR_VEC@h
50 MTMSRD(r5) /* enable use of AltiVec now */
51 isync
52
dd570237
AB
53 /*
54 * While userspace in general ignores VRSAVE, glibc uses it as a boolean
55 * to optimise userspace context save/restore. Whenever we take an
56 * altivec unavailable exception we must set VRSAVE to something non
57 * zero. Set it to all 1s. See also the programming note in the ISA.
e821ea70
BH
58 */
59 mfspr r4,SPRN_VRSAVE
e090aa80 60 cmpwi 0,r4,0
e821ea70
BH
61 bne+ 1f
62 li r4,-1
63 mtspr SPRN_VRSAVE,r4
641:
65 /* enable use of VMX after return */
66#ifdef CONFIG_PPC32
ee43eb78 67 mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
e821ea70
BH
68 oris r9,r9,MSR_VEC@h
69#else
70 ld r4,PACACURRENT(r13)
71 addi r5,r4,THREAD /* Get THREAD */
72 oris r12,r12,MSR_VEC@h
73 std r12,_MSR(r1)
74#endif
70fe3d98
CB
75 /* Don't care if r4 overflows, this is desired behaviour */
76 lbz r4,THREAD_LOAD_VEC(r5)
77 addi r4,r4,1
78 stb r4,THREAD_LOAD_VEC(r5)
955c1cab 79 addi r6,r5,THREAD_VRSTATE
e821ea70 80 li r4,1
de79f7b9 81 li r10,VRSTATE_VSCR
e821ea70 82 stw r4,THREAD_USED_VR(r5)
c2ce6f9f
AB
83 lvx v0,r10,r6
84 mtvscr v0
955c1cab 85 REST_32VRS(0,r4,r6)
e821ea70
BH
86 /* restore registers and return */
87 blr
88
89/*
6f515d84
CB
90 * save_altivec(tsk)
91 * Save the vector registers to its thread_struct
e821ea70 92 */
6f515d84 93_GLOBAL(save_altivec)
e821ea70 94 addi r3,r3,THREAD /* want THREAD of task */
18461960 95 PPC_LL r7,THREAD_VRSAVEAREA(r3)
e821ea70 96 PPC_LL r5,PT_REGS(r3)
18461960
PM
97 PPC_LCMPI 0,r7,0
98 bne 2f
99 addi r7,r3,THREAD_VRSTATE
6f515d84 1002: SAVE_32VRS(0,r4,r7)
c2ce6f9f 101 mfvscr v0
de79f7b9 102 li r4,VRSTATE_VSCR
c2ce6f9f 103 stvx v0,r4,r7
e821ea70
BH
104 blr
105
106#ifdef CONFIG_VSX
107
108#ifdef CONFIG_PPC32
109#error This asm code isn't ready for 32-bit kernels
110#endif
111
112/*
113 * load_up_vsx(unused, unused, tsk)
114 * Disable VSX for the task which had it previously,
115 * and save its vector registers in its thread_struct.
116 * Reuse the fp and vsx saves, but first check to see if they have
117 * been saved already.
118 */
119_GLOBAL(load_up_vsx)
120/* Load FP and VSX registers if they haven't been done yet */
121 andi. r5,r12,MSR_FP
122 beql+ load_up_fpu /* skip if already loaded */
123 andis. r5,r12,MSR_VEC@h
124 beql+ load_up_altivec /* skip if already loaded */
125
e821ea70
BH
126 ld r4,PACACURRENT(r13)
127 addi r4,r4,THREAD /* Get THREAD */
128 li r6,1
129 stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
130 /* enable use of VSX after return */
131 oris r12,r12,MSR_VSX@h
132 std r12,_MSR(r1)
e821ea70
BH
133 b fast_exception_return
134
e821ea70
BH
135#endif /* CONFIG_VSX */
136
14cf11af
PM
137
138/*
139 * The routines below are in assembler so we can closely control the
140 * usage of floating-point registers. These routines must be called
141 * with preempt disabled.
142 */
143#ifdef CONFIG_PPC32
144 .data
145fpzero:
146 .long 0
147fpone:
148 .long 0x3f800000 /* 1.0 in single-precision FP */
149fphalf:
150 .long 0x3f000000 /* 0.5 in single-precision FP */
151
152#define LDCONST(fr, name) \
153 lis r11,name@ha; \
154 lfs fr,name@l(r11)
155#else
156
157 .section ".toc","aw"
158fpzero:
159 .tc FD_0_0[TC],0
160fpone:
161 .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
162fphalf:
163 .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
164
165#define LDCONST(fr, name) \
166 lfd fr,name@toc(r2)
167#endif
168
169 .text
170/*
171 * Internal routine to enable floating point and set FPSCR to 0.
172 * Don't call it from C; it doesn't use the normal calling convention.
173 */
174fpenable:
175#ifdef CONFIG_PPC32
176 stwu r1,-64(r1)
177#else
178 stdu r1,-64(r1)
179#endif
180 mfmsr r10
181 ori r11,r10,MSR_FP
182 mtmsr r11
183 isync
184 stfd fr0,24(r1)
185 stfd fr1,16(r1)
186 stfd fr31,8(r1)
187 LDCONST(fr1, fpzero)
188 mffs fr31
3a2c48cf 189 MTFSF_L(fr1)
14cf11af
PM
190 blr
191
192fpdisable:
193 mtlr r12
3a2c48cf 194 MTFSF_L(fr31)
14cf11af
PM
195 lfd fr31,8(r1)
196 lfd fr1,16(r1)
197 lfd fr0,24(r1)
198 mtmsr r10
199 isync
200 addi r1,r1,64
201 blr
202
203/*
204 * Vector add, floating point.
205 */
206_GLOBAL(vaddfp)
207 mflr r12
208 bl fpenable
209 li r0,4
210 mtctr r0
211 li r6,0
2121: lfsx fr0,r4,r6
213 lfsx fr1,r5,r6
214 fadds fr0,fr0,fr1
215 stfsx fr0,r3,r6
216 addi r6,r6,4
217 bdnz 1b
218 b fpdisable
219
220/*
221 * Vector subtract, floating point.
222 */
223_GLOBAL(vsubfp)
224 mflr r12
225 bl fpenable
226 li r0,4
227 mtctr r0
228 li r6,0
2291: lfsx fr0,r4,r6
230 lfsx fr1,r5,r6
231 fsubs fr0,fr0,fr1
232 stfsx fr0,r3,r6
233 addi r6,r6,4
234 bdnz 1b
235 b fpdisable
236
237/*
238 * Vector multiply and add, floating point.
239 */
240_GLOBAL(vmaddfp)
241 mflr r12
242 bl fpenable
243 stfd fr2,32(r1)
244 li r0,4
245 mtctr r0
246 li r7,0
2471: lfsx fr0,r4,r7
248 lfsx fr1,r5,r7
249 lfsx fr2,r6,r7
250 fmadds fr0,fr0,fr2,fr1
251 stfsx fr0,r3,r7
252 addi r7,r7,4
253 bdnz 1b
254 lfd fr2,32(r1)
255 b fpdisable
256
257/*
258 * Vector negative multiply and subtract, floating point.
259 */
260_GLOBAL(vnmsubfp)
261 mflr r12
262 bl fpenable
263 stfd fr2,32(r1)
264 li r0,4
265 mtctr r0
266 li r7,0
2671: lfsx fr0,r4,r7
268 lfsx fr1,r5,r7
269 lfsx fr2,r6,r7
270 fnmsubs fr0,fr0,fr2,fr1
271 stfsx fr0,r3,r7
272 addi r7,r7,4
273 bdnz 1b
274 lfd fr2,32(r1)
275 b fpdisable
276
277/*
278 * Vector reciprocal estimate. We just compute 1.0/x.
279 * r3 -> destination, r4 -> source.
280 */
281_GLOBAL(vrefp)
282 mflr r12
283 bl fpenable
284 li r0,4
285 LDCONST(fr1, fpone)
286 mtctr r0
287 li r6,0
2881: lfsx fr0,r4,r6
289 fdivs fr0,fr1,fr0
290 stfsx fr0,r3,r6
291 addi r6,r6,4
292 bdnz 1b
293 b fpdisable
294
295/*
296 * Vector reciprocal square-root estimate, floating point.
297 * We use the frsqrte instruction for the initial estimate followed
298 * by 2 iterations of Newton-Raphson to get sufficient accuracy.
299 * r3 -> destination, r4 -> source.
300 */
301_GLOBAL(vrsqrtefp)
302 mflr r12
303 bl fpenable
304 stfd fr2,32(r1)
305 stfd fr3,40(r1)
306 stfd fr4,48(r1)
307 stfd fr5,56(r1)
308 li r0,4
309 LDCONST(fr4, fpone)
310 LDCONST(fr5, fphalf)
311 mtctr r0
312 li r6,0
3131: lfsx fr0,r4,r6
314 frsqrte fr1,fr0 /* r = frsqrte(s) */
315 fmuls fr3,fr1,fr0 /* r * s */
316 fmuls fr2,fr1,fr5 /* r * 0.5 */
317 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
318 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
319 fmuls fr3,fr1,fr0 /* r * s */
320 fmuls fr2,fr1,fr5 /* r * 0.5 */
321 fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
322 fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
323 stfsx fr1,r3,r6
324 addi r6,r6,4
325 bdnz 1b
326 lfd fr5,56(r1)
327 lfd fr4,48(r1)
328 lfd fr3,40(r1)
329 lfd fr2,32(r1)
330 b fpdisable