Commit | Line | Data |
---|---|---|
2874c5fd | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
6a800f36 LY |
2 | /* |
3 | * arch/powerpc/math-emu/math_efp.c | |
4 | * | |
ac6f1203 | 5 | * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc. |
6a800f36 LY |
6 | * |
7 | * Author: Ebony Zhu, <ebony.zhu@freescale.com> | |
8 | * Yu Liu, <yu.liu@freescale.com> | |
9 | * | |
10 | * Derived from arch/alpha/math-emu/math.c | |
11 | * arch/powerpc/math-emu/math.c | |
12 | * | |
13 | * Description: | |
14 | * This file is the exception handler to make E500 SPE instructions | |
15 | * fully comply with IEEE-754 floating point standard. | |
6a800f36 LY |
16 | */ |
17 | ||
18 | #include <linux/types.h> | |
01c9ccee | 19 | #include <linux/prctl.h> |
cfe0d370 | 20 | #include <linux/module.h> |
6a800f36 | 21 | |
7c0f6ba6 | 22 | #include <linux/uaccess.h> |
6a800f36 LY |
23 | #include <asm/reg.h> |
24 | ||
25 | #define FP_EX_BOOKE_E500_SPE | |
26 | #include <asm/sfp-machine.h> | |
27 | ||
28 | #include <math-emu/soft-fp.h> | |
29 | #include <math-emu/single.h> | |
30 | #include <math-emu/double.h> | |
31 | ||
32 | #define EFAPU 0x4 | |
33 | ||
34 | #define VCT 0x4 | |
35 | #define SPFP 0x6 | |
36 | #define DPFP 0x7 | |
37 | ||
38 | #define EFSADD 0x2c0 | |
39 | #define EFSSUB 0x2c1 | |
40 | #define EFSABS 0x2c4 | |
41 | #define EFSNABS 0x2c5 | |
42 | #define EFSNEG 0x2c6 | |
43 | #define EFSMUL 0x2c8 | |
44 | #define EFSDIV 0x2c9 | |
45 | #define EFSCMPGT 0x2cc | |
46 | #define EFSCMPLT 0x2cd | |
47 | #define EFSCMPEQ 0x2ce | |
48 | #define EFSCFD 0x2cf | |
49 | #define EFSCFSI 0x2d1 | |
50 | #define EFSCTUI 0x2d4 | |
51 | #define EFSCTSI 0x2d5 | |
52 | #define EFSCTUF 0x2d6 | |
53 | #define EFSCTSF 0x2d7 | |
54 | #define EFSCTUIZ 0x2d8 | |
55 | #define EFSCTSIZ 0x2da | |
56 | ||
57 | #define EVFSADD 0x280 | |
58 | #define EVFSSUB 0x281 | |
59 | #define EVFSABS 0x284 | |
60 | #define EVFSNABS 0x285 | |
61 | #define EVFSNEG 0x286 | |
62 | #define EVFSMUL 0x288 | |
63 | #define EVFSDIV 0x289 | |
64 | #define EVFSCMPGT 0x28c | |
65 | #define EVFSCMPLT 0x28d | |
66 | #define EVFSCMPEQ 0x28e | |
67 | #define EVFSCTUI 0x294 | |
68 | #define EVFSCTSI 0x295 | |
69 | #define EVFSCTUF 0x296 | |
70 | #define EVFSCTSF 0x297 | |
71 | #define EVFSCTUIZ 0x298 | |
72 | #define EVFSCTSIZ 0x29a | |
73 | ||
74 | #define EFDADD 0x2e0 | |
75 | #define EFDSUB 0x2e1 | |
76 | #define EFDABS 0x2e4 | |
77 | #define EFDNABS 0x2e5 | |
78 | #define EFDNEG 0x2e6 | |
79 | #define EFDMUL 0x2e8 | |
80 | #define EFDDIV 0x2e9 | |
81 | #define EFDCTUIDZ 0x2ea | |
82 | #define EFDCTSIDZ 0x2eb | |
83 | #define EFDCMPGT 0x2ec | |
84 | #define EFDCMPLT 0x2ed | |
85 | #define EFDCMPEQ 0x2ee | |
86 | #define EFDCFS 0x2ef | |
87 | #define EFDCTUI 0x2f4 | |
88 | #define EFDCTSI 0x2f5 | |
89 | #define EFDCTUF 0x2f6 | |
90 | #define EFDCTSF 0x2f7 | |
91 | #define EFDCTUIZ 0x2f8 | |
92 | #define EFDCTSIZ 0x2fa | |
93 | ||
94 | #define AB 2 | |
95 | #define XA 3 | |
96 | #define XB 4 | |
97 | #define XCR 5 | |
98 | #define NOTYPE 0 | |
99 | ||
100 | #define SIGN_BIT_S (1UL << 31) | |
101 | #define SIGN_BIT_D (1ULL << 63) | |
102 | #define FP_EX_MASK (FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \ | |
103 | FP_EX_UNDERFLOW | FP_EX_OVERFLOW) | |
104 | ||
ac6f1203 LY |
105 | static int have_e500_cpu_a005_erratum; |
106 | ||
6a800f36 LY |
107 | union dw_union { |
108 | u64 dp[1]; | |
109 | u32 wp[2]; | |
110 | }; | |
111 | ||
112 | static unsigned long insn_type(unsigned long speinsn) | |
113 | { | |
114 | unsigned long ret = NOTYPE; | |
115 | ||
116 | switch (speinsn & 0x7ff) { | |
117 | case EFSABS: ret = XA; break; | |
118 | case EFSADD: ret = AB; break; | |
119 | case EFSCFD: ret = XB; break; | |
120 | case EFSCMPEQ: ret = XCR; break; | |
121 | case EFSCMPGT: ret = XCR; break; | |
122 | case EFSCMPLT: ret = XCR; break; | |
123 | case EFSCTSF: ret = XB; break; | |
124 | case EFSCTSI: ret = XB; break; | |
125 | case EFSCTSIZ: ret = XB; break; | |
126 | case EFSCTUF: ret = XB; break; | |
127 | case EFSCTUI: ret = XB; break; | |
128 | case EFSCTUIZ: ret = XB; break; | |
129 | case EFSDIV: ret = AB; break; | |
130 | case EFSMUL: ret = AB; break; | |
131 | case EFSNABS: ret = XA; break; | |
132 | case EFSNEG: ret = XA; break; | |
133 | case EFSSUB: ret = AB; break; | |
134 | case EFSCFSI: ret = XB; break; | |
135 | ||
136 | case EVFSABS: ret = XA; break; | |
137 | case EVFSADD: ret = AB; break; | |
138 | case EVFSCMPEQ: ret = XCR; break; | |
139 | case EVFSCMPGT: ret = XCR; break; | |
140 | case EVFSCMPLT: ret = XCR; break; | |
141 | case EVFSCTSF: ret = XB; break; | |
142 | case EVFSCTSI: ret = XB; break; | |
143 | case EVFSCTSIZ: ret = XB; break; | |
144 | case EVFSCTUF: ret = XB; break; | |
145 | case EVFSCTUI: ret = XB; break; | |
146 | case EVFSCTUIZ: ret = XB; break; | |
147 | case EVFSDIV: ret = AB; break; | |
148 | case EVFSMUL: ret = AB; break; | |
149 | case EVFSNABS: ret = XA; break; | |
150 | case EVFSNEG: ret = XA; break; | |
151 | case EVFSSUB: ret = AB; break; | |
152 | ||
153 | case EFDABS: ret = XA; break; | |
154 | case EFDADD: ret = AB; break; | |
155 | case EFDCFS: ret = XB; break; | |
156 | case EFDCMPEQ: ret = XCR; break; | |
157 | case EFDCMPGT: ret = XCR; break; | |
158 | case EFDCMPLT: ret = XCR; break; | |
159 | case EFDCTSF: ret = XB; break; | |
160 | case EFDCTSI: ret = XB; break; | |
161 | case EFDCTSIDZ: ret = XB; break; | |
162 | case EFDCTSIZ: ret = XB; break; | |
163 | case EFDCTUF: ret = XB; break; | |
164 | case EFDCTUI: ret = XB; break; | |
165 | case EFDCTUIDZ: ret = XB; break; | |
166 | case EFDCTUIZ: ret = XB; break; | |
167 | case EFDDIV: ret = AB; break; | |
168 | case EFDMUL: ret = AB; break; | |
169 | case EFDNABS: ret = XA; break; | |
170 | case EFDNEG: ret = XA; break; | |
171 | case EFDSUB: ret = AB; break; | |
6a800f36 LY |
172 | } |
173 | ||
174 | return ret; | |
175 | } | |
176 | ||
177 | int do_spe_mathemu(struct pt_regs *regs) | |
178 | { | |
179 | FP_DECL_EX; | |
180 | int IR, cmp; | |
181 | ||
182 | unsigned long type, func, fc, fa, fb, src, speinsn; | |
183 | union dw_union vc, va, vb; | |
184 | ||
185 | if (get_user(speinsn, (unsigned int __user *) regs->nip)) | |
186 | return -EFAULT; | |
187 | if ((speinsn >> 26) != EFAPU) | |
188 | return -EINVAL; /* not an spe instruction */ | |
189 | ||
190 | type = insn_type(speinsn); | |
191 | if (type == NOTYPE) | |
09af52f7 | 192 | goto illegal; |
6a800f36 LY |
193 | |
194 | func = speinsn & 0x7ff; | |
195 | fc = (speinsn >> 21) & 0x1f; | |
196 | fa = (speinsn >> 16) & 0x1f; | |
197 | fb = (speinsn >> 11) & 0x1f; | |
198 | src = (speinsn >> 5) & 0x7; | |
199 | ||
200 | vc.wp[0] = current->thread.evr[fc]; | |
201 | vc.wp[1] = regs->gpr[fc]; | |
202 | va.wp[0] = current->thread.evr[fa]; | |
203 | va.wp[1] = regs->gpr[fa]; | |
204 | vb.wp[0] = current->thread.evr[fb]; | |
205 | vb.wp[1] = regs->gpr[fb]; | |
206 | ||
207 | __FPU_FPSCR = mfspr(SPRN_SPEFSCR); | |
208 | ||
b430abc4 LY |
209 | pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR); |
210 | pr_debug("vc: %08x %08x\n", vc.wp[0], vc.wp[1]); | |
211 | pr_debug("va: %08x %08x\n", va.wp[0], va.wp[1]); | |
212 | pr_debug("vb: %08x %08x\n", vb.wp[0], vb.wp[1]); | |
6a800f36 LY |
213 | |
214 | switch (src) { | |
215 | case SPFP: { | |
216 | FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); | |
217 | ||
218 | switch (type) { | |
219 | case AB: | |
220 | case XCR: | |
221 | FP_UNPACK_SP(SA, va.wp + 1); | |
7245fc5b | 222 | fallthrough; |
6a800f36 LY |
223 | case XB: |
224 | FP_UNPACK_SP(SB, vb.wp + 1); | |
225 | break; | |
226 | case XA: | |
227 | FP_UNPACK_SP(SA, va.wp + 1); | |
228 | break; | |
229 | } | |
230 | ||
7245fc5b CL |
231 | pr_debug("SA: %d %08x %d (%d)\n", SA_s, SA_f, SA_e, SA_c); |
232 | pr_debug("SB: %d %08x %d (%d)\n", SB_s, SB_f, SB_e, SB_c); | |
6a800f36 LY |
233 | |
234 | switch (func) { | |
235 | case EFSABS: | |
236 | vc.wp[1] = va.wp[1] & ~SIGN_BIT_S; | |
237 | goto update_regs; | |
238 | ||
239 | case EFSNABS: | |
240 | vc.wp[1] = va.wp[1] | SIGN_BIT_S; | |
241 | goto update_regs; | |
242 | ||
243 | case EFSNEG: | |
244 | vc.wp[1] = va.wp[1] ^ SIGN_BIT_S; | |
245 | goto update_regs; | |
246 | ||
247 | case EFSADD: | |
248 | FP_ADD_S(SR, SA, SB); | |
249 | goto pack_s; | |
250 | ||
251 | case EFSSUB: | |
252 | FP_SUB_S(SR, SA, SB); | |
253 | goto pack_s; | |
254 | ||
255 | case EFSMUL: | |
256 | FP_MUL_S(SR, SA, SB); | |
257 | goto pack_s; | |
258 | ||
259 | case EFSDIV: | |
260 | FP_DIV_S(SR, SA, SB); | |
261 | goto pack_s; | |
262 | ||
263 | case EFSCMPEQ: | |
264 | cmp = 0; | |
265 | goto cmp_s; | |
266 | ||
267 | case EFSCMPGT: | |
268 | cmp = 1; | |
269 | goto cmp_s; | |
270 | ||
271 | case EFSCMPLT: | |
272 | cmp = -1; | |
273 | goto cmp_s; | |
274 | ||
275 | case EFSCTSF: | |
276 | case EFSCTUF: | |
28fbf1d5 JM |
277 | if (SB_c == FP_CLS_NAN) { |
278 | vc.wp[1] = 0; | |
279 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
280 | } else { | |
281 | SB_e += (func == EFSCTSF ? 31 : 32); | |
282 | FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, | |
7245fc5b | 283 | (func == EFSCTSF) ? 1 : 0); |
6a800f36 LY |
284 | } |
285 | goto update_regs; | |
286 | ||
287 | case EFSCFD: { | |
288 | FP_DECL_D(DB); | |
289 | FP_CLEAR_EXCEPTIONS; | |
290 | FP_UNPACK_DP(DB, vb.dp); | |
b430abc4 | 291 | |
7245fc5b | 292 | pr_debug("DB: %d %08x %08x %d (%d)\n", |
6a800f36 | 293 | DB_s, DB_f1, DB_f0, DB_e, DB_c); |
b430abc4 | 294 | |
6a800f36 LY |
295 | FP_CONV(S, D, 1, 2, SR, DB); |
296 | goto pack_s; | |
297 | } | |
298 | ||
299 | case EFSCTSI: | |
6a800f36 | 300 | case EFSCTUI: |
28fbf1d5 JM |
301 | if (SB_c == FP_CLS_NAN) { |
302 | vc.wp[1] = 0; | |
303 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
304 | } else { | |
305 | FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, | |
7245fc5b | 306 | ((func & 0x3) != 0) ? 1 : 0); |
28fbf1d5 JM |
307 | } |
308 | goto update_regs; | |
309 | ||
310 | case EFSCTSIZ: | |
6a800f36 | 311 | case EFSCTUIZ: |
28fbf1d5 JM |
312 | if (SB_c == FP_CLS_NAN) { |
313 | vc.wp[1] = 0; | |
314 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
6a800f36 | 315 | } else { |
28fbf1d5 | 316 | FP_TO_INT_S(vc.wp[1], SB, 32, |
7245fc5b | 317 | ((func & 0x3) != 0) ? 1 : 0); |
6a800f36 | 318 | } |
6a800f36 LY |
319 | goto update_regs; |
320 | ||
321 | default: | |
322 | goto illegal; | |
323 | } | |
324 | break; | |
325 | ||
326 | pack_s: | |
7245fc5b | 327 | pr_debug("SR: %d %08x %d (%d)\n", SR_s, SR_f, SR_e, SR_c); |
b430abc4 | 328 | |
6a800f36 LY |
329 | FP_PACK_SP(vc.wp + 1, SR); |
330 | goto update_regs; | |
331 | ||
332 | cmp_s: | |
333 | FP_CMP_S(IR, SA, SB, 3); | |
334 | if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB))) | |
335 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
336 | if (IR == cmp) { | |
337 | IR = 0x4; | |
338 | } else { | |
339 | IR = 0; | |
340 | } | |
341 | goto update_ccr; | |
342 | } | |
343 | ||
344 | case DPFP: { | |
345 | FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); | |
346 | ||
347 | switch (type) { | |
348 | case AB: | |
349 | case XCR: | |
350 | FP_UNPACK_DP(DA, va.dp); | |
7245fc5b | 351 | fallthrough; |
6a800f36 LY |
352 | case XB: |
353 | FP_UNPACK_DP(DB, vb.dp); | |
354 | break; | |
355 | case XA: | |
356 | FP_UNPACK_DP(DA, va.dp); | |
357 | break; | |
358 | } | |
359 | ||
7245fc5b | 360 | pr_debug("DA: %d %08x %08x %d (%d)\n", |
6a800f36 | 361 | DA_s, DA_f1, DA_f0, DA_e, DA_c); |
7245fc5b | 362 | pr_debug("DB: %d %08x %08x %d (%d)\n", |
6a800f36 | 363 | DB_s, DB_f1, DB_f0, DB_e, DB_c); |
6a800f36 LY |
364 | |
365 | switch (func) { | |
366 | case EFDABS: | |
367 | vc.dp[0] = va.dp[0] & ~SIGN_BIT_D; | |
368 | goto update_regs; | |
369 | ||
370 | case EFDNABS: | |
371 | vc.dp[0] = va.dp[0] | SIGN_BIT_D; | |
372 | goto update_regs; | |
373 | ||
374 | case EFDNEG: | |
375 | vc.dp[0] = va.dp[0] ^ SIGN_BIT_D; | |
376 | goto update_regs; | |
377 | ||
378 | case EFDADD: | |
379 | FP_ADD_D(DR, DA, DB); | |
380 | goto pack_d; | |
381 | ||
382 | case EFDSUB: | |
383 | FP_SUB_D(DR, DA, DB); | |
384 | goto pack_d; | |
385 | ||
386 | case EFDMUL: | |
387 | FP_MUL_D(DR, DA, DB); | |
388 | goto pack_d; | |
389 | ||
390 | case EFDDIV: | |
391 | FP_DIV_D(DR, DA, DB); | |
392 | goto pack_d; | |
393 | ||
394 | case EFDCMPEQ: | |
395 | cmp = 0; | |
396 | goto cmp_d; | |
397 | ||
398 | case EFDCMPGT: | |
399 | cmp = 1; | |
400 | goto cmp_d; | |
401 | ||
402 | case EFDCMPLT: | |
403 | cmp = -1; | |
404 | goto cmp_d; | |
405 | ||
406 | case EFDCTSF: | |
407 | case EFDCTUF: | |
28fbf1d5 JM |
408 | if (DB_c == FP_CLS_NAN) { |
409 | vc.wp[1] = 0; | |
410 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
411 | } else { | |
412 | DB_e += (func == EFDCTSF ? 31 : 32); | |
413 | FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, | |
7245fc5b | 414 | (func == EFDCTSF) ? 1 : 0); |
6a800f36 LY |
415 | } |
416 | goto update_regs; | |
417 | ||
418 | case EFDCFS: { | |
419 | FP_DECL_S(SB); | |
420 | FP_CLEAR_EXCEPTIONS; | |
421 | FP_UNPACK_SP(SB, vb.wp + 1); | |
b430abc4 | 422 | |
7245fc5b | 423 | pr_debug("SB: %d %08x %d (%d)\n", |
6a800f36 | 424 | SB_s, SB_f, SB_e, SB_c); |
b430abc4 | 425 | |
6a800f36 LY |
426 | FP_CONV(D, S, 2, 1, DR, SB); |
427 | goto pack_d; | |
428 | } | |
429 | ||
430 | case EFDCTUIDZ: | |
431 | case EFDCTSIDZ: | |
28fbf1d5 JM |
432 | if (DB_c == FP_CLS_NAN) { |
433 | vc.dp[0] = 0; | |
434 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
435 | } else { | |
436 | FP_TO_INT_D(vc.dp[0], DB, 64, | |
7245fc5b | 437 | ((func & 0x1) == 0) ? 1 : 0); |
28fbf1d5 | 438 | } |
6a800f36 LY |
439 | goto update_regs; |
440 | ||
441 | case EFDCTUI: | |
442 | case EFDCTSI: | |
28fbf1d5 JM |
443 | if (DB_c == FP_CLS_NAN) { |
444 | vc.wp[1] = 0; | |
445 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
446 | } else { | |
447 | FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, | |
7245fc5b | 448 | ((func & 0x3) != 0) ? 1 : 0); |
28fbf1d5 JM |
449 | } |
450 | goto update_regs; | |
451 | ||
6a800f36 LY |
452 | case EFDCTUIZ: |
453 | case EFDCTSIZ: | |
28fbf1d5 JM |
454 | if (DB_c == FP_CLS_NAN) { |
455 | vc.wp[1] = 0; | |
456 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
6a800f36 | 457 | } else { |
28fbf1d5 | 458 | FP_TO_INT_D(vc.wp[1], DB, 32, |
7245fc5b | 459 | ((func & 0x3) != 0) ? 1 : 0); |
6a800f36 | 460 | } |
6a800f36 LY |
461 | goto update_regs; |
462 | ||
463 | default: | |
464 | goto illegal; | |
465 | } | |
466 | break; | |
467 | ||
468 | pack_d: | |
7245fc5b | 469 | pr_debug("DR: %d %08x %08x %d (%d)\n", |
6a800f36 | 470 | DR_s, DR_f1, DR_f0, DR_e, DR_c); |
b430abc4 | 471 | |
6a800f36 LY |
472 | FP_PACK_DP(vc.dp, DR); |
473 | goto update_regs; | |
474 | ||
475 | cmp_d: | |
476 | FP_CMP_D(IR, DA, DB, 3); | |
477 | if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) | |
478 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
479 | if (IR == cmp) { | |
480 | IR = 0x4; | |
481 | } else { | |
482 | IR = 0; | |
483 | } | |
484 | goto update_ccr; | |
485 | ||
486 | } | |
487 | ||
488 | case VCT: { | |
489 | FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0); | |
490 | FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1); | |
491 | int IR0, IR1; | |
492 | ||
493 | switch (type) { | |
494 | case AB: | |
495 | case XCR: | |
496 | FP_UNPACK_SP(SA0, va.wp); | |
497 | FP_UNPACK_SP(SA1, va.wp + 1); | |
7245fc5b | 498 | fallthrough; |
6a800f36 LY |
499 | case XB: |
500 | FP_UNPACK_SP(SB0, vb.wp); | |
501 | FP_UNPACK_SP(SB1, vb.wp + 1); | |
502 | break; | |
503 | case XA: | |
504 | FP_UNPACK_SP(SA0, va.wp); | |
505 | FP_UNPACK_SP(SA1, va.wp + 1); | |
506 | break; | |
507 | } | |
508 | ||
7245fc5b | 509 | pr_debug("SA0: %d %08x %d (%d)\n", |
b430abc4 | 510 | SA0_s, SA0_f, SA0_e, SA0_c); |
7245fc5b | 511 | pr_debug("SA1: %d %08x %d (%d)\n", |
b430abc4 | 512 | SA1_s, SA1_f, SA1_e, SA1_c); |
7245fc5b | 513 | pr_debug("SB0: %d %08x %d (%d)\n", |
b430abc4 | 514 | SB0_s, SB0_f, SB0_e, SB0_c); |
7245fc5b | 515 | pr_debug("SB1: %d %08x %d (%d)\n", |
b430abc4 | 516 | SB1_s, SB1_f, SB1_e, SB1_c); |
6a800f36 LY |
517 | |
518 | switch (func) { | |
519 | case EVFSABS: | |
520 | vc.wp[0] = va.wp[0] & ~SIGN_BIT_S; | |
521 | vc.wp[1] = va.wp[1] & ~SIGN_BIT_S; | |
522 | goto update_regs; | |
523 | ||
524 | case EVFSNABS: | |
525 | vc.wp[0] = va.wp[0] | SIGN_BIT_S; | |
526 | vc.wp[1] = va.wp[1] | SIGN_BIT_S; | |
527 | goto update_regs; | |
528 | ||
529 | case EVFSNEG: | |
530 | vc.wp[0] = va.wp[0] ^ SIGN_BIT_S; | |
531 | vc.wp[1] = va.wp[1] ^ SIGN_BIT_S; | |
532 | goto update_regs; | |
533 | ||
534 | case EVFSADD: | |
535 | FP_ADD_S(SR0, SA0, SB0); | |
536 | FP_ADD_S(SR1, SA1, SB1); | |
537 | goto pack_vs; | |
538 | ||
539 | case EVFSSUB: | |
540 | FP_SUB_S(SR0, SA0, SB0); | |
541 | FP_SUB_S(SR1, SA1, SB1); | |
542 | goto pack_vs; | |
543 | ||
544 | case EVFSMUL: | |
545 | FP_MUL_S(SR0, SA0, SB0); | |
546 | FP_MUL_S(SR1, SA1, SB1); | |
547 | goto pack_vs; | |
548 | ||
549 | case EVFSDIV: | |
550 | FP_DIV_S(SR0, SA0, SB0); | |
551 | FP_DIV_S(SR1, SA1, SB1); | |
552 | goto pack_vs; | |
553 | ||
554 | case EVFSCMPEQ: | |
555 | cmp = 0; | |
556 | goto cmp_vs; | |
557 | ||
558 | case EVFSCMPGT: | |
559 | cmp = 1; | |
560 | goto cmp_vs; | |
561 | ||
562 | case EVFSCMPLT: | |
563 | cmp = -1; | |
564 | goto cmp_vs; | |
565 | ||
6a800f36 | 566 | case EVFSCTUF: |
28fbf1d5 JM |
567 | case EVFSCTSF: |
568 | if (SB0_c == FP_CLS_NAN) { | |
569 | vc.wp[0] = 0; | |
570 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
571 | } else { | |
572 | SB0_e += (func == EVFSCTSF ? 31 : 32); | |
573 | FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, | |
7245fc5b | 574 | (func == EVFSCTSF) ? 1 : 0); |
28fbf1d5 JM |
575 | } |
576 | if (SB1_c == FP_CLS_NAN) { | |
577 | vc.wp[1] = 0; | |
578 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
579 | } else { | |
580 | SB1_e += (func == EVFSCTSF ? 31 : 32); | |
581 | FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, | |
7245fc5b | 582 | (func == EVFSCTSF) ? 1 : 0); |
28fbf1d5 | 583 | } |
6a800f36 LY |
584 | goto update_regs; |
585 | ||
586 | case EVFSCTUI: | |
587 | case EVFSCTSI: | |
28fbf1d5 JM |
588 | if (SB0_c == FP_CLS_NAN) { |
589 | vc.wp[0] = 0; | |
590 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
591 | } else { | |
592 | FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, | |
7245fc5b | 593 | ((func & 0x3) != 0) ? 1 : 0); |
28fbf1d5 JM |
594 | } |
595 | if (SB1_c == FP_CLS_NAN) { | |
596 | vc.wp[1] = 0; | |
597 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
598 | } else { | |
599 | FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, | |
7245fc5b | 600 | ((func & 0x3) != 0) ? 1 : 0); |
28fbf1d5 JM |
601 | } |
602 | goto update_regs; | |
603 | ||
6a800f36 LY |
604 | case EVFSCTUIZ: |
605 | case EVFSCTSIZ: | |
28fbf1d5 JM |
606 | if (SB0_c == FP_CLS_NAN) { |
607 | vc.wp[0] = 0; | |
608 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
6a800f36 | 609 | } else { |
28fbf1d5 | 610 | FP_TO_INT_S(vc.wp[0], SB0, 32, |
7245fc5b | 611 | ((func & 0x3) != 0) ? 1 : 0); |
28fbf1d5 JM |
612 | } |
613 | if (SB1_c == FP_CLS_NAN) { | |
614 | vc.wp[1] = 0; | |
615 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
616 | } else { | |
617 | FP_TO_INT_S(vc.wp[1], SB1, 32, | |
7245fc5b | 618 | ((func & 0x3) != 0) ? 1 : 0); |
6a800f36 | 619 | } |
6a800f36 LY |
620 | goto update_regs; |
621 | ||
622 | default: | |
623 | goto illegal; | |
624 | } | |
625 | break; | |
626 | ||
627 | pack_vs: | |
7245fc5b | 628 | pr_debug("SR0: %d %08x %d (%d)\n", |
b430abc4 | 629 | SR0_s, SR0_f, SR0_e, SR0_c); |
7245fc5b | 630 | pr_debug("SR1: %d %08x %d (%d)\n", |
b430abc4 LY |
631 | SR1_s, SR1_f, SR1_e, SR1_c); |
632 | ||
6a800f36 LY |
633 | FP_PACK_SP(vc.wp, SR0); |
634 | FP_PACK_SP(vc.wp + 1, SR1); | |
635 | goto update_regs; | |
636 | ||
637 | cmp_vs: | |
638 | { | |
639 | int ch, cl; | |
640 | ||
641 | FP_CMP_S(IR0, SA0, SB0, 3); | |
642 | FP_CMP_S(IR1, SA1, SB1, 3); | |
643 | if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0))) | |
644 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
645 | if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1))) | |
646 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
647 | ch = (IR0 == cmp) ? 1 : 0; | |
648 | cl = (IR1 == cmp) ? 1 : 0; | |
649 | IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) | | |
650 | ((ch & cl) << 0); | |
651 | goto update_ccr; | |
652 | } | |
653 | } | |
654 | default: | |
655 | return -EINVAL; | |
656 | } | |
657 | ||
658 | update_ccr: | |
659 | regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2)); | |
660 | regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2)); | |
661 | ||
662 | update_regs: | |
640e9225 JM |
663 | /* |
664 | * If the "invalid" exception sticky bit was set by the | |
665 | * processor for non-finite input, but was not set before the | |
666 | * instruction being emulated, clear it. Likewise for the | |
667 | * "underflow" bit, which may have been set by the processor | |
668 | * for exact underflow, not just inexact underflow when the | |
669 | * flag should be set for IEEE 754 semantics. Other sticky | |
670 | * exceptions will only be set by the processor when they are | |
671 | * correct according to IEEE 754 semantics, and we must not | |
672 | * clear sticky bits that were already set before the emulated | |
673 | * instruction as they represent the user-visible sticky | |
674 | * exception status. "inexact" traps to kernel are not | |
675 | * required for IEEE semantics and are not enabled by default, | |
676 | * so the "inexact" sticky bit may have been set by a previous | |
677 | * instruction without the kernel being aware of it. | |
678 | */ | |
679 | __FPU_FPSCR | |
680 | &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last; | |
6a800f36 LY |
681 | __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK); |
682 | mtspr(SPRN_SPEFSCR, __FPU_FPSCR); | |
640e9225 | 683 | current->thread.spefscr_last = __FPU_FPSCR; |
6a800f36 LY |
684 | |
685 | current->thread.evr[fc] = vc.wp[0]; | |
686 | regs->gpr[fc] = vc.wp[1]; | |
687 | ||
b430abc4 LY |
688 | pr_debug("ccr = %08lx\n", regs->ccr); |
689 | pr_debug("cur exceptions = %08x spefscr = %08lx\n", | |
6a800f36 | 690 | FP_CUR_EXCEPTIONS, __FPU_FPSCR); |
b430abc4 LY |
691 | pr_debug("vc: %08x %08x\n", vc.wp[0], vc.wp[1]); |
692 | pr_debug("va: %08x %08x\n", va.wp[0], va.wp[1]); | |
693 | pr_debug("vb: %08x %08x\n", vb.wp[0], vb.wp[1]); | |
6a800f36 | 694 | |
01c9ccee JM |
695 | if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) { |
696 | if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO) | |
697 | && (current->thread.fpexc_mode & PR_FP_EXC_DIV)) | |
698 | return 1; | |
699 | if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW) | |
700 | && (current->thread.fpexc_mode & PR_FP_EXC_OVF)) | |
701 | return 1; | |
702 | if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW) | |
703 | && (current->thread.fpexc_mode & PR_FP_EXC_UND)) | |
704 | return 1; | |
705 | if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT) | |
706 | && (current->thread.fpexc_mode & PR_FP_EXC_RES)) | |
707 | return 1; | |
708 | if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID) | |
709 | && (current->thread.fpexc_mode & PR_FP_EXC_INV)) | |
710 | return 1; | |
711 | } | |
6a800f36 LY |
712 | return 0; |
713 | ||
714 | illegal: | |
ac6f1203 LY |
715 | if (have_e500_cpu_a005_erratum) { |
716 | /* according to e500 cpu a005 erratum, reissue efp inst */ | |
59dc5bfc | 717 | regs_add_return_ip(regs, -4); |
b430abc4 | 718 | pr_debug("re-issue efp inst: %08lx\n", speinsn); |
ac6f1203 LY |
719 | return 0; |
720 | } | |
721 | ||
6a800f36 LY |
722 | printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n", speinsn); |
723 | return -ENOSYS; | |
724 | } | |
725 | ||
726 | int speround_handler(struct pt_regs *regs) | |
727 | { | |
728 | union dw_union fgpr; | |
729 | int s_lo, s_hi; | |
28414a6d | 730 | int lo_inexact, hi_inexact; |
28fbf1d5 JM |
731 | int fp_result; |
732 | unsigned long speinsn, type, fb, fc, fptype, func; | |
6a800f36 LY |
733 | |
734 | if (get_user(speinsn, (unsigned int __user *) regs->nip)) | |
735 | return -EFAULT; | |
736 | if ((speinsn >> 26) != 4) | |
737 | return -EINVAL; /* not an spe instruction */ | |
738 | ||
28fbf1d5 JM |
739 | func = speinsn & 0x7ff; |
740 | type = insn_type(func); | |
6a800f36 LY |
741 | if (type == XCR) return -ENOSYS; |
742 | ||
d5755e6f LY |
743 | __FPU_FPSCR = mfspr(SPRN_SPEFSCR); |
744 | pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR); | |
745 | ||
28414a6d JM |
746 | fptype = (speinsn >> 5) & 0x7; |
747 | ||
d5755e6f | 748 | /* No need to round if the result is exact */ |
28414a6d JM |
749 | lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX); |
750 | hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH); | |
751 | if (!(lo_inexact || (hi_inexact && fptype == VCT))) | |
d5755e6f LY |
752 | return 0; |
753 | ||
6a800f36 LY |
754 | fc = (speinsn >> 21) & 0x1f; |
755 | s_lo = regs->gpr[fc] & SIGN_BIT_S; | |
756 | s_hi = current->thread.evr[fc] & SIGN_BIT_S; | |
757 | fgpr.wp[0] = current->thread.evr[fc]; | |
758 | fgpr.wp[1] = regs->gpr[fc]; | |
759 | ||
28fbf1d5 JM |
760 | fb = (speinsn >> 11) & 0x1f; |
761 | switch (func) { | |
762 | case EFSCTUIZ: | |
763 | case EFSCTSIZ: | |
764 | case EVFSCTUIZ: | |
765 | case EVFSCTSIZ: | |
766 | case EFDCTUIDZ: | |
767 | case EFDCTSIDZ: | |
768 | case EFDCTUIZ: | |
769 | case EFDCTSIZ: | |
770 | /* | |
771 | * These instructions always round to zero, | |
772 | * independent of the rounding mode. | |
773 | */ | |
774 | return 0; | |
775 | ||
776 | case EFSCTUI: | |
777 | case EFSCTUF: | |
778 | case EVFSCTUI: | |
779 | case EVFSCTUF: | |
780 | case EFDCTUI: | |
781 | case EFDCTUF: | |
782 | fp_result = 0; | |
783 | s_lo = 0; | |
784 | s_hi = 0; | |
785 | break; | |
786 | ||
787 | case EFSCTSI: | |
788 | case EFSCTSF: | |
789 | fp_result = 0; | |
790 | /* Recover the sign of a zero result if possible. */ | |
791 | if (fgpr.wp[1] == 0) | |
792 | s_lo = regs->gpr[fb] & SIGN_BIT_S; | |
793 | break; | |
794 | ||
795 | case EVFSCTSI: | |
796 | case EVFSCTSF: | |
797 | fp_result = 0; | |
798 | /* Recover the sign of a zero result if possible. */ | |
799 | if (fgpr.wp[1] == 0) | |
800 | s_lo = regs->gpr[fb] & SIGN_BIT_S; | |
801 | if (fgpr.wp[0] == 0) | |
802 | s_hi = current->thread.evr[fb] & SIGN_BIT_S; | |
803 | break; | |
804 | ||
805 | case EFDCTSI: | |
806 | case EFDCTSF: | |
807 | fp_result = 0; | |
808 | s_hi = s_lo; | |
809 | /* Recover the sign of a zero result if possible. */ | |
810 | if (fgpr.wp[1] == 0) | |
811 | s_hi = current->thread.evr[fb] & SIGN_BIT_S; | |
812 | break; | |
813 | ||
814 | default: | |
815 | fp_result = 1; | |
816 | break; | |
817 | } | |
818 | ||
d5755e6f | 819 | pr_debug("round fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]); |
6a800f36 | 820 | |
28414a6d | 821 | switch (fptype) { |
6a800f36 LY |
822 | /* Since SPE instructions on E500 core can handle round to nearest |
823 | * and round toward zero with IEEE-754 complied, we just need | |
824 | * to handle round toward +Inf and round toward -Inf by software. | |
825 | */ | |
826 | case SPFP: | |
827 | if ((FP_ROUNDMODE) == FP_RND_PINF) { | |
828 | if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */ | |
829 | } else { /* round to -Inf */ | |
28fbf1d5 JM |
830 | if (s_lo) { |
831 | if (fp_result) | |
832 | fgpr.wp[1]++; /* Z < 0, choose Z2 */ | |
833 | else | |
834 | fgpr.wp[1]--; /* Z < 0, choose Z2 */ | |
835 | } | |
6a800f36 LY |
836 | } |
837 | break; | |
838 | ||
839 | case DPFP: | |
840 | if (FP_ROUNDMODE == FP_RND_PINF) { | |
28fbf1d5 JM |
841 | if (!s_hi) { |
842 | if (fp_result) | |
843 | fgpr.dp[0]++; /* Z > 0, choose Z1 */ | |
844 | else | |
845 | fgpr.wp[1]++; /* Z > 0, choose Z1 */ | |
846 | } | |
6a800f36 | 847 | } else { /* round to -Inf */ |
28fbf1d5 JM |
848 | if (s_hi) { |
849 | if (fp_result) | |
850 | fgpr.dp[0]++; /* Z < 0, choose Z2 */ | |
851 | else | |
852 | fgpr.wp[1]--; /* Z < 0, choose Z2 */ | |
853 | } | |
6a800f36 LY |
854 | } |
855 | break; | |
856 | ||
857 | case VCT: | |
858 | if (FP_ROUNDMODE == FP_RND_PINF) { | |
28414a6d JM |
859 | if (lo_inexact && !s_lo) |
860 | fgpr.wp[1]++; /* Z_low > 0, choose Z1 */ | |
861 | if (hi_inexact && !s_hi) | |
862 | fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */ | |
6a800f36 | 863 | } else { /* round to -Inf */ |
28fbf1d5 JM |
864 | if (lo_inexact && s_lo) { |
865 | if (fp_result) | |
866 | fgpr.wp[1]++; /* Z_low < 0, choose Z2 */ | |
867 | else | |
868 | fgpr.wp[1]--; /* Z_low < 0, choose Z2 */ | |
869 | } | |
870 | if (hi_inexact && s_hi) { | |
871 | if (fp_result) | |
872 | fgpr.wp[0]++; /* Z_high < 0, choose Z2 */ | |
873 | else | |
874 | fgpr.wp[0]--; /* Z_high < 0, choose Z2 */ | |
875 | } | |
6a800f36 LY |
876 | } |
877 | break; | |
878 | ||
879 | default: | |
880 | return -EINVAL; | |
881 | } | |
882 | ||
883 | current->thread.evr[fc] = fgpr.wp[0]; | |
884 | regs->gpr[fc] = fgpr.wp[1]; | |
885 | ||
d5755e6f LY |
886 | pr_debug(" to fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]); |
887 | ||
01c9ccee JM |
888 | if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) |
889 | return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0; | |
6a800f36 LY |
890 | return 0; |
891 | } | |
ac6f1203 | 892 | |
78c73c80 | 893 | static int __init spe_mathemu_init(void) |
ac6f1203 LY |
894 | { |
895 | u32 pvr, maj, min; | |
896 | ||
897 | pvr = mfspr(SPRN_PVR); | |
898 | ||
899 | if ((PVR_VER(pvr) == PVR_VER_E500V1) || | |
900 | (PVR_VER(pvr) == PVR_VER_E500V2)) { | |
901 | maj = PVR_MAJ(pvr); | |
902 | min = PVR_MIN(pvr); | |
903 | ||
904 | /* | |
905 | * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1 | |
906 | * need cpu a005 errata workaround | |
907 | */ | |
908 | switch (maj) { | |
909 | case 1: | |
910 | if (min < 1) | |
911 | have_e500_cpu_a005_erratum = 1; | |
912 | break; | |
913 | case 2: | |
914 | if (min < 3) | |
915 | have_e500_cpu_a005_erratum = 1; | |
916 | break; | |
917 | case 3: | |
918 | case 4: | |
919 | case 5: | |
920 | if (min < 1) | |
921 | have_e500_cpu_a005_erratum = 1; | |
922 | break; | |
923 | default: | |
924 | break; | |
925 | } | |
926 | } | |
927 | ||
928 | return 0; | |
929 | } | |
930 | ||
931 | module_init(spe_mathemu_init); |