Commit | Line | Data |
---|---|---|
6a800f36 LY |
1 | /* |
2 | * arch/powerpc/math-emu/math_efp.c | |
3 | * | |
ac6f1203 | 4 | * Copyright (C) 2006-2008, 2010 Freescale Semiconductor, Inc. |
6a800f36 LY |
5 | * |
6 | * Author: Ebony Zhu, <ebony.zhu@freescale.com> | |
7 | * Yu Liu, <yu.liu@freescale.com> | |
8 | * | |
9 | * Derived from arch/alpha/math-emu/math.c | |
10 | * arch/powerpc/math-emu/math.c | |
11 | * | |
12 | * Description: | |
13 | * This file is the exception handler to make E500 SPE instructions | |
14 | * fully comply with IEEE-754 floating point standard. | |
15 | * | |
16 | * This program is free software; you can redistribute it and/or | |
17 | * modify it under the terms of the GNU General Public License | |
18 | * as published by the Free Software Foundation; either version | |
19 | * 2 of the License, or (at your option) any later version. | |
20 | */ | |
21 | ||
22 | #include <linux/types.h> | |
01c9ccee | 23 | #include <linux/prctl.h> |
6a800f36 | 24 | |
7c0f6ba6 | 25 | #include <linux/uaccess.h> |
6a800f36 LY |
26 | #include <asm/reg.h> |
27 | ||
28 | #define FP_EX_BOOKE_E500_SPE | |
29 | #include <asm/sfp-machine.h> | |
30 | ||
31 | #include <math-emu/soft-fp.h> | |
32 | #include <math-emu/single.h> | |
33 | #include <math-emu/double.h> | |
34 | ||
35 | #define EFAPU 0x4 | |
36 | ||
37 | #define VCT 0x4 | |
38 | #define SPFP 0x6 | |
39 | #define DPFP 0x7 | |
40 | ||
41 | #define EFSADD 0x2c0 | |
42 | #define EFSSUB 0x2c1 | |
43 | #define EFSABS 0x2c4 | |
44 | #define EFSNABS 0x2c5 | |
45 | #define EFSNEG 0x2c6 | |
46 | #define EFSMUL 0x2c8 | |
47 | #define EFSDIV 0x2c9 | |
48 | #define EFSCMPGT 0x2cc | |
49 | #define EFSCMPLT 0x2cd | |
50 | #define EFSCMPEQ 0x2ce | |
51 | #define EFSCFD 0x2cf | |
52 | #define EFSCFSI 0x2d1 | |
53 | #define EFSCTUI 0x2d4 | |
54 | #define EFSCTSI 0x2d5 | |
55 | #define EFSCTUF 0x2d6 | |
56 | #define EFSCTSF 0x2d7 | |
57 | #define EFSCTUIZ 0x2d8 | |
58 | #define EFSCTSIZ 0x2da | |
59 | ||
60 | #define EVFSADD 0x280 | |
61 | #define EVFSSUB 0x281 | |
62 | #define EVFSABS 0x284 | |
63 | #define EVFSNABS 0x285 | |
64 | #define EVFSNEG 0x286 | |
65 | #define EVFSMUL 0x288 | |
66 | #define EVFSDIV 0x289 | |
67 | #define EVFSCMPGT 0x28c | |
68 | #define EVFSCMPLT 0x28d | |
69 | #define EVFSCMPEQ 0x28e | |
70 | #define EVFSCTUI 0x294 | |
71 | #define EVFSCTSI 0x295 | |
72 | #define EVFSCTUF 0x296 | |
73 | #define EVFSCTSF 0x297 | |
74 | #define EVFSCTUIZ 0x298 | |
75 | #define EVFSCTSIZ 0x29a | |
76 | ||
77 | #define EFDADD 0x2e0 | |
78 | #define EFDSUB 0x2e1 | |
79 | #define EFDABS 0x2e4 | |
80 | #define EFDNABS 0x2e5 | |
81 | #define EFDNEG 0x2e6 | |
82 | #define EFDMUL 0x2e8 | |
83 | #define EFDDIV 0x2e9 | |
84 | #define EFDCTUIDZ 0x2ea | |
85 | #define EFDCTSIDZ 0x2eb | |
86 | #define EFDCMPGT 0x2ec | |
87 | #define EFDCMPLT 0x2ed | |
88 | #define EFDCMPEQ 0x2ee | |
89 | #define EFDCFS 0x2ef | |
90 | #define EFDCTUI 0x2f4 | |
91 | #define EFDCTSI 0x2f5 | |
92 | #define EFDCTUF 0x2f6 | |
93 | #define EFDCTSF 0x2f7 | |
94 | #define EFDCTUIZ 0x2f8 | |
95 | #define EFDCTSIZ 0x2fa | |
96 | ||
97 | #define AB 2 | |
98 | #define XA 3 | |
99 | #define XB 4 | |
100 | #define XCR 5 | |
101 | #define NOTYPE 0 | |
102 | ||
103 | #define SIGN_BIT_S (1UL << 31) | |
104 | #define SIGN_BIT_D (1ULL << 63) | |
105 | #define FP_EX_MASK (FP_EX_INEXACT | FP_EX_INVALID | FP_EX_DIVZERO | \ | |
106 | FP_EX_UNDERFLOW | FP_EX_OVERFLOW) | |
107 | ||
ac6f1203 LY |
108 | static int have_e500_cpu_a005_erratum; |
109 | ||
6a800f36 LY |
110 | union dw_union { |
111 | u64 dp[1]; | |
112 | u32 wp[2]; | |
113 | }; | |
114 | ||
115 | static unsigned long insn_type(unsigned long speinsn) | |
116 | { | |
117 | unsigned long ret = NOTYPE; | |
118 | ||
119 | switch (speinsn & 0x7ff) { | |
120 | case EFSABS: ret = XA; break; | |
121 | case EFSADD: ret = AB; break; | |
122 | case EFSCFD: ret = XB; break; | |
123 | case EFSCMPEQ: ret = XCR; break; | |
124 | case EFSCMPGT: ret = XCR; break; | |
125 | case EFSCMPLT: ret = XCR; break; | |
126 | case EFSCTSF: ret = XB; break; | |
127 | case EFSCTSI: ret = XB; break; | |
128 | case EFSCTSIZ: ret = XB; break; | |
129 | case EFSCTUF: ret = XB; break; | |
130 | case EFSCTUI: ret = XB; break; | |
131 | case EFSCTUIZ: ret = XB; break; | |
132 | case EFSDIV: ret = AB; break; | |
133 | case EFSMUL: ret = AB; break; | |
134 | case EFSNABS: ret = XA; break; | |
135 | case EFSNEG: ret = XA; break; | |
136 | case EFSSUB: ret = AB; break; | |
137 | case EFSCFSI: ret = XB; break; | |
138 | ||
139 | case EVFSABS: ret = XA; break; | |
140 | case EVFSADD: ret = AB; break; | |
141 | case EVFSCMPEQ: ret = XCR; break; | |
142 | case EVFSCMPGT: ret = XCR; break; | |
143 | case EVFSCMPLT: ret = XCR; break; | |
144 | case EVFSCTSF: ret = XB; break; | |
145 | case EVFSCTSI: ret = XB; break; | |
146 | case EVFSCTSIZ: ret = XB; break; | |
147 | case EVFSCTUF: ret = XB; break; | |
148 | case EVFSCTUI: ret = XB; break; | |
149 | case EVFSCTUIZ: ret = XB; break; | |
150 | case EVFSDIV: ret = AB; break; | |
151 | case EVFSMUL: ret = AB; break; | |
152 | case EVFSNABS: ret = XA; break; | |
153 | case EVFSNEG: ret = XA; break; | |
154 | case EVFSSUB: ret = AB; break; | |
155 | ||
156 | case EFDABS: ret = XA; break; | |
157 | case EFDADD: ret = AB; break; | |
158 | case EFDCFS: ret = XB; break; | |
159 | case EFDCMPEQ: ret = XCR; break; | |
160 | case EFDCMPGT: ret = XCR; break; | |
161 | case EFDCMPLT: ret = XCR; break; | |
162 | case EFDCTSF: ret = XB; break; | |
163 | case EFDCTSI: ret = XB; break; | |
164 | case EFDCTSIDZ: ret = XB; break; | |
165 | case EFDCTSIZ: ret = XB; break; | |
166 | case EFDCTUF: ret = XB; break; | |
167 | case EFDCTUI: ret = XB; break; | |
168 | case EFDCTUIDZ: ret = XB; break; | |
169 | case EFDCTUIZ: ret = XB; break; | |
170 | case EFDDIV: ret = AB; break; | |
171 | case EFDMUL: ret = AB; break; | |
172 | case EFDNABS: ret = XA; break; | |
173 | case EFDNEG: ret = XA; break; | |
174 | case EFDSUB: ret = AB; break; | |
6a800f36 LY |
175 | } |
176 | ||
177 | return ret; | |
178 | } | |
179 | ||
180 | int do_spe_mathemu(struct pt_regs *regs) | |
181 | { | |
182 | FP_DECL_EX; | |
183 | int IR, cmp; | |
184 | ||
185 | unsigned long type, func, fc, fa, fb, src, speinsn; | |
186 | union dw_union vc, va, vb; | |
187 | ||
188 | if (get_user(speinsn, (unsigned int __user *) regs->nip)) | |
189 | return -EFAULT; | |
190 | if ((speinsn >> 26) != EFAPU) | |
191 | return -EINVAL; /* not an spe instruction */ | |
192 | ||
193 | type = insn_type(speinsn); | |
194 | if (type == NOTYPE) | |
09af52f7 | 195 | goto illegal; |
6a800f36 LY |
196 | |
197 | func = speinsn & 0x7ff; | |
198 | fc = (speinsn >> 21) & 0x1f; | |
199 | fa = (speinsn >> 16) & 0x1f; | |
200 | fb = (speinsn >> 11) & 0x1f; | |
201 | src = (speinsn >> 5) & 0x7; | |
202 | ||
203 | vc.wp[0] = current->thread.evr[fc]; | |
204 | vc.wp[1] = regs->gpr[fc]; | |
205 | va.wp[0] = current->thread.evr[fa]; | |
206 | va.wp[1] = regs->gpr[fa]; | |
207 | vb.wp[0] = current->thread.evr[fb]; | |
208 | vb.wp[1] = regs->gpr[fb]; | |
209 | ||
210 | __FPU_FPSCR = mfspr(SPRN_SPEFSCR); | |
211 | ||
b430abc4 LY |
212 | pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR); |
213 | pr_debug("vc: %08x %08x\n", vc.wp[0], vc.wp[1]); | |
214 | pr_debug("va: %08x %08x\n", va.wp[0], va.wp[1]); | |
215 | pr_debug("vb: %08x %08x\n", vb.wp[0], vb.wp[1]); | |
6a800f36 LY |
216 | |
217 | switch (src) { | |
218 | case SPFP: { | |
219 | FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR); | |
220 | ||
221 | switch (type) { | |
222 | case AB: | |
223 | case XCR: | |
224 | FP_UNPACK_SP(SA, va.wp + 1); | |
225 | case XB: | |
226 | FP_UNPACK_SP(SB, vb.wp + 1); | |
227 | break; | |
228 | case XA: | |
229 | FP_UNPACK_SP(SA, va.wp + 1); | |
230 | break; | |
231 | } | |
232 | ||
b430abc4 LY |
233 | pr_debug("SA: %ld %08lx %ld (%ld)\n", SA_s, SA_f, SA_e, SA_c); |
234 | pr_debug("SB: %ld %08lx %ld (%ld)\n", SB_s, SB_f, SB_e, SB_c); | |
6a800f36 LY |
235 | |
236 | switch (func) { | |
237 | case EFSABS: | |
238 | vc.wp[1] = va.wp[1] & ~SIGN_BIT_S; | |
239 | goto update_regs; | |
240 | ||
241 | case EFSNABS: | |
242 | vc.wp[1] = va.wp[1] | SIGN_BIT_S; | |
243 | goto update_regs; | |
244 | ||
245 | case EFSNEG: | |
246 | vc.wp[1] = va.wp[1] ^ SIGN_BIT_S; | |
247 | goto update_regs; | |
248 | ||
249 | case EFSADD: | |
250 | FP_ADD_S(SR, SA, SB); | |
251 | goto pack_s; | |
252 | ||
253 | case EFSSUB: | |
254 | FP_SUB_S(SR, SA, SB); | |
255 | goto pack_s; | |
256 | ||
257 | case EFSMUL: | |
258 | FP_MUL_S(SR, SA, SB); | |
259 | goto pack_s; | |
260 | ||
261 | case EFSDIV: | |
262 | FP_DIV_S(SR, SA, SB); | |
263 | goto pack_s; | |
264 | ||
265 | case EFSCMPEQ: | |
266 | cmp = 0; | |
267 | goto cmp_s; | |
268 | ||
269 | case EFSCMPGT: | |
270 | cmp = 1; | |
271 | goto cmp_s; | |
272 | ||
273 | case EFSCMPLT: | |
274 | cmp = -1; | |
275 | goto cmp_s; | |
276 | ||
277 | case EFSCTSF: | |
278 | case EFSCTUF: | |
28fbf1d5 JM |
279 | if (SB_c == FP_CLS_NAN) { |
280 | vc.wp[1] = 0; | |
281 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
282 | } else { | |
283 | SB_e += (func == EFSCTSF ? 31 : 32); | |
284 | FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, | |
285 | (func == EFSCTSF)); | |
6a800f36 LY |
286 | } |
287 | goto update_regs; | |
288 | ||
289 | case EFSCFD: { | |
290 | FP_DECL_D(DB); | |
291 | FP_CLEAR_EXCEPTIONS; | |
292 | FP_UNPACK_DP(DB, vb.dp); | |
b430abc4 LY |
293 | |
294 | pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n", | |
6a800f36 | 295 | DB_s, DB_f1, DB_f0, DB_e, DB_c); |
b430abc4 | 296 | |
6a800f36 LY |
297 | FP_CONV(S, D, 1, 2, SR, DB); |
298 | goto pack_s; | |
299 | } | |
300 | ||
301 | case EFSCTSI: | |
6a800f36 | 302 | case EFSCTUI: |
28fbf1d5 JM |
303 | if (SB_c == FP_CLS_NAN) { |
304 | vc.wp[1] = 0; | |
305 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
306 | } else { | |
307 | FP_TO_INT_ROUND_S(vc.wp[1], SB, 32, | |
308 | ((func & 0x3) != 0)); | |
309 | } | |
310 | goto update_regs; | |
311 | ||
312 | case EFSCTSIZ: | |
6a800f36 | 313 | case EFSCTUIZ: |
28fbf1d5 JM |
314 | if (SB_c == FP_CLS_NAN) { |
315 | vc.wp[1] = 0; | |
316 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
6a800f36 | 317 | } else { |
28fbf1d5 JM |
318 | FP_TO_INT_S(vc.wp[1], SB, 32, |
319 | ((func & 0x3) != 0)); | |
6a800f36 | 320 | } |
6a800f36 LY |
321 | goto update_regs; |
322 | ||
323 | default: | |
324 | goto illegal; | |
325 | } | |
326 | break; | |
327 | ||
328 | pack_s: | |
b430abc4 LY |
329 | pr_debug("SR: %ld %08lx %ld (%ld)\n", SR_s, SR_f, SR_e, SR_c); |
330 | ||
6a800f36 LY |
331 | FP_PACK_SP(vc.wp + 1, SR); |
332 | goto update_regs; | |
333 | ||
334 | cmp_s: | |
335 | FP_CMP_S(IR, SA, SB, 3); | |
336 | if (IR == 3 && (FP_ISSIGNAN_S(SA) || FP_ISSIGNAN_S(SB))) | |
337 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
338 | if (IR == cmp) { | |
339 | IR = 0x4; | |
340 | } else { | |
341 | IR = 0; | |
342 | } | |
343 | goto update_ccr; | |
344 | } | |
345 | ||
346 | case DPFP: { | |
347 | FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR); | |
348 | ||
349 | switch (type) { | |
350 | case AB: | |
351 | case XCR: | |
352 | FP_UNPACK_DP(DA, va.dp); | |
353 | case XB: | |
354 | FP_UNPACK_DP(DB, vb.dp); | |
355 | break; | |
356 | case XA: | |
357 | FP_UNPACK_DP(DA, va.dp); | |
358 | break; | |
359 | } | |
360 | ||
b430abc4 | 361 | pr_debug("DA: %ld %08lx %08lx %ld (%ld)\n", |
6a800f36 | 362 | DA_s, DA_f1, DA_f0, DA_e, DA_c); |
b430abc4 | 363 | pr_debug("DB: %ld %08lx %08lx %ld (%ld)\n", |
6a800f36 | 364 | DB_s, DB_f1, DB_f0, DB_e, DB_c); |
6a800f36 LY |
365 | |
366 | switch (func) { | |
367 | case EFDABS: | |
368 | vc.dp[0] = va.dp[0] & ~SIGN_BIT_D; | |
369 | goto update_regs; | |
370 | ||
371 | case EFDNABS: | |
372 | vc.dp[0] = va.dp[0] | SIGN_BIT_D; | |
373 | goto update_regs; | |
374 | ||
375 | case EFDNEG: | |
376 | vc.dp[0] = va.dp[0] ^ SIGN_BIT_D; | |
377 | goto update_regs; | |
378 | ||
379 | case EFDADD: | |
380 | FP_ADD_D(DR, DA, DB); | |
381 | goto pack_d; | |
382 | ||
383 | case EFDSUB: | |
384 | FP_SUB_D(DR, DA, DB); | |
385 | goto pack_d; | |
386 | ||
387 | case EFDMUL: | |
388 | FP_MUL_D(DR, DA, DB); | |
389 | goto pack_d; | |
390 | ||
391 | case EFDDIV: | |
392 | FP_DIV_D(DR, DA, DB); | |
393 | goto pack_d; | |
394 | ||
395 | case EFDCMPEQ: | |
396 | cmp = 0; | |
397 | goto cmp_d; | |
398 | ||
399 | case EFDCMPGT: | |
400 | cmp = 1; | |
401 | goto cmp_d; | |
402 | ||
403 | case EFDCMPLT: | |
404 | cmp = -1; | |
405 | goto cmp_d; | |
406 | ||
407 | case EFDCTSF: | |
408 | case EFDCTUF: | |
28fbf1d5 JM |
409 | if (DB_c == FP_CLS_NAN) { |
410 | vc.wp[1] = 0; | |
411 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
412 | } else { | |
413 | DB_e += (func == EFDCTSF ? 31 : 32); | |
414 | FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, | |
415 | (func == EFDCTSF)); | |
6a800f36 LY |
416 | } |
417 | goto update_regs; | |
418 | ||
419 | case EFDCFS: { | |
420 | FP_DECL_S(SB); | |
421 | FP_CLEAR_EXCEPTIONS; | |
422 | FP_UNPACK_SP(SB, vb.wp + 1); | |
b430abc4 LY |
423 | |
424 | pr_debug("SB: %ld %08lx %ld (%ld)\n", | |
6a800f36 | 425 | SB_s, SB_f, SB_e, SB_c); |
b430abc4 | 426 | |
6a800f36 LY |
427 | FP_CONV(D, S, 2, 1, DR, SB); |
428 | goto pack_d; | |
429 | } | |
430 | ||
431 | case EFDCTUIDZ: | |
432 | case EFDCTSIDZ: | |
28fbf1d5 JM |
433 | if (DB_c == FP_CLS_NAN) { |
434 | vc.dp[0] = 0; | |
435 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
436 | } else { | |
437 | FP_TO_INT_D(vc.dp[0], DB, 64, | |
438 | ((func & 0x1) == 0)); | |
439 | } | |
6a800f36 LY |
440 | goto update_regs; |
441 | ||
442 | case EFDCTUI: | |
443 | case EFDCTSI: | |
28fbf1d5 JM |
444 | if (DB_c == FP_CLS_NAN) { |
445 | vc.wp[1] = 0; | |
446 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
447 | } else { | |
448 | FP_TO_INT_ROUND_D(vc.wp[1], DB, 32, | |
449 | ((func & 0x3) != 0)); | |
450 | } | |
451 | goto update_regs; | |
452 | ||
6a800f36 LY |
453 | case EFDCTUIZ: |
454 | case EFDCTSIZ: | |
28fbf1d5 JM |
455 | if (DB_c == FP_CLS_NAN) { |
456 | vc.wp[1] = 0; | |
457 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
6a800f36 | 458 | } else { |
28fbf1d5 JM |
459 | FP_TO_INT_D(vc.wp[1], DB, 32, |
460 | ((func & 0x3) != 0)); | |
6a800f36 | 461 | } |
6a800f36 LY |
462 | goto update_regs; |
463 | ||
464 | default: | |
465 | goto illegal; | |
466 | } | |
467 | break; | |
468 | ||
469 | pack_d: | |
b430abc4 | 470 | pr_debug("DR: %ld %08lx %08lx %ld (%ld)\n", |
6a800f36 | 471 | DR_s, DR_f1, DR_f0, DR_e, DR_c); |
b430abc4 | 472 | |
6a800f36 LY |
473 | FP_PACK_DP(vc.dp, DR); |
474 | goto update_regs; | |
475 | ||
476 | cmp_d: | |
477 | FP_CMP_D(IR, DA, DB, 3); | |
478 | if (IR == 3 && (FP_ISSIGNAN_D(DA) || FP_ISSIGNAN_D(DB))) | |
479 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
480 | if (IR == cmp) { | |
481 | IR = 0x4; | |
482 | } else { | |
483 | IR = 0; | |
484 | } | |
485 | goto update_ccr; | |
486 | ||
487 | } | |
488 | ||
489 | case VCT: { | |
490 | FP_DECL_S(SA0); FP_DECL_S(SB0); FP_DECL_S(SR0); | |
491 | FP_DECL_S(SA1); FP_DECL_S(SB1); FP_DECL_S(SR1); | |
492 | int IR0, IR1; | |
493 | ||
494 | switch (type) { | |
495 | case AB: | |
496 | case XCR: | |
497 | FP_UNPACK_SP(SA0, va.wp); | |
498 | FP_UNPACK_SP(SA1, va.wp + 1); | |
499 | case XB: | |
500 | FP_UNPACK_SP(SB0, vb.wp); | |
501 | FP_UNPACK_SP(SB1, vb.wp + 1); | |
502 | break; | |
503 | case XA: | |
504 | FP_UNPACK_SP(SA0, va.wp); | |
505 | FP_UNPACK_SP(SA1, va.wp + 1); | |
506 | break; | |
507 | } | |
508 | ||
b430abc4 LY |
509 | pr_debug("SA0: %ld %08lx %ld (%ld)\n", |
510 | SA0_s, SA0_f, SA0_e, SA0_c); | |
511 | pr_debug("SA1: %ld %08lx %ld (%ld)\n", | |
512 | SA1_s, SA1_f, SA1_e, SA1_c); | |
513 | pr_debug("SB0: %ld %08lx %ld (%ld)\n", | |
514 | SB0_s, SB0_f, SB0_e, SB0_c); | |
515 | pr_debug("SB1: %ld %08lx %ld (%ld)\n", | |
516 | SB1_s, SB1_f, SB1_e, SB1_c); | |
6a800f36 LY |
517 | |
518 | switch (func) { | |
519 | case EVFSABS: | |
520 | vc.wp[0] = va.wp[0] & ~SIGN_BIT_S; | |
521 | vc.wp[1] = va.wp[1] & ~SIGN_BIT_S; | |
522 | goto update_regs; | |
523 | ||
524 | case EVFSNABS: | |
525 | vc.wp[0] = va.wp[0] | SIGN_BIT_S; | |
526 | vc.wp[1] = va.wp[1] | SIGN_BIT_S; | |
527 | goto update_regs; | |
528 | ||
529 | case EVFSNEG: | |
530 | vc.wp[0] = va.wp[0] ^ SIGN_BIT_S; | |
531 | vc.wp[1] = va.wp[1] ^ SIGN_BIT_S; | |
532 | goto update_regs; | |
533 | ||
534 | case EVFSADD: | |
535 | FP_ADD_S(SR0, SA0, SB0); | |
536 | FP_ADD_S(SR1, SA1, SB1); | |
537 | goto pack_vs; | |
538 | ||
539 | case EVFSSUB: | |
540 | FP_SUB_S(SR0, SA0, SB0); | |
541 | FP_SUB_S(SR1, SA1, SB1); | |
542 | goto pack_vs; | |
543 | ||
544 | case EVFSMUL: | |
545 | FP_MUL_S(SR0, SA0, SB0); | |
546 | FP_MUL_S(SR1, SA1, SB1); | |
547 | goto pack_vs; | |
548 | ||
549 | case EVFSDIV: | |
550 | FP_DIV_S(SR0, SA0, SB0); | |
551 | FP_DIV_S(SR1, SA1, SB1); | |
552 | goto pack_vs; | |
553 | ||
554 | case EVFSCMPEQ: | |
555 | cmp = 0; | |
556 | goto cmp_vs; | |
557 | ||
558 | case EVFSCMPGT: | |
559 | cmp = 1; | |
560 | goto cmp_vs; | |
561 | ||
562 | case EVFSCMPLT: | |
563 | cmp = -1; | |
564 | goto cmp_vs; | |
565 | ||
6a800f36 | 566 | case EVFSCTUF: |
28fbf1d5 JM |
567 | case EVFSCTSF: |
568 | if (SB0_c == FP_CLS_NAN) { | |
569 | vc.wp[0] = 0; | |
570 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
571 | } else { | |
572 | SB0_e += (func == EVFSCTSF ? 31 : 32); | |
573 | FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, | |
574 | (func == EVFSCTSF)); | |
575 | } | |
576 | if (SB1_c == FP_CLS_NAN) { | |
577 | vc.wp[1] = 0; | |
578 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
579 | } else { | |
580 | SB1_e += (func == EVFSCTSF ? 31 : 32); | |
581 | FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, | |
582 | (func == EVFSCTSF)); | |
583 | } | |
6a800f36 LY |
584 | goto update_regs; |
585 | ||
586 | case EVFSCTUI: | |
587 | case EVFSCTSI: | |
28fbf1d5 JM |
588 | if (SB0_c == FP_CLS_NAN) { |
589 | vc.wp[0] = 0; | |
590 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
591 | } else { | |
592 | FP_TO_INT_ROUND_S(vc.wp[0], SB0, 32, | |
593 | ((func & 0x3) != 0)); | |
594 | } | |
595 | if (SB1_c == FP_CLS_NAN) { | |
596 | vc.wp[1] = 0; | |
597 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
598 | } else { | |
599 | FP_TO_INT_ROUND_S(vc.wp[1], SB1, 32, | |
600 | ((func & 0x3) != 0)); | |
601 | } | |
602 | goto update_regs; | |
603 | ||
6a800f36 LY |
604 | case EVFSCTUIZ: |
605 | case EVFSCTSIZ: | |
28fbf1d5 JM |
606 | if (SB0_c == FP_CLS_NAN) { |
607 | vc.wp[0] = 0; | |
608 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
6a800f36 | 609 | } else { |
28fbf1d5 JM |
610 | FP_TO_INT_S(vc.wp[0], SB0, 32, |
611 | ((func & 0x3) != 0)); | |
612 | } | |
613 | if (SB1_c == FP_CLS_NAN) { | |
614 | vc.wp[1] = 0; | |
615 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
616 | } else { | |
617 | FP_TO_INT_S(vc.wp[1], SB1, 32, | |
618 | ((func & 0x3) != 0)); | |
6a800f36 | 619 | } |
6a800f36 LY |
620 | goto update_regs; |
621 | ||
622 | default: | |
623 | goto illegal; | |
624 | } | |
625 | break; | |
626 | ||
627 | pack_vs: | |
b430abc4 LY |
628 | pr_debug("SR0: %ld %08lx %ld (%ld)\n", |
629 | SR0_s, SR0_f, SR0_e, SR0_c); | |
630 | pr_debug("SR1: %ld %08lx %ld (%ld)\n", | |
631 | SR1_s, SR1_f, SR1_e, SR1_c); | |
632 | ||
6a800f36 LY |
633 | FP_PACK_SP(vc.wp, SR0); |
634 | FP_PACK_SP(vc.wp + 1, SR1); | |
635 | goto update_regs; | |
636 | ||
637 | cmp_vs: | |
638 | { | |
639 | int ch, cl; | |
640 | ||
641 | FP_CMP_S(IR0, SA0, SB0, 3); | |
642 | FP_CMP_S(IR1, SA1, SB1, 3); | |
643 | if (IR0 == 3 && (FP_ISSIGNAN_S(SA0) || FP_ISSIGNAN_S(SB0))) | |
644 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
645 | if (IR1 == 3 && (FP_ISSIGNAN_S(SA1) || FP_ISSIGNAN_S(SB1))) | |
646 | FP_SET_EXCEPTION(FP_EX_INVALID); | |
647 | ch = (IR0 == cmp) ? 1 : 0; | |
648 | cl = (IR1 == cmp) ? 1 : 0; | |
649 | IR = (ch << 3) | (cl << 2) | ((ch | cl) << 1) | | |
650 | ((ch & cl) << 0); | |
651 | goto update_ccr; | |
652 | } | |
653 | } | |
654 | default: | |
655 | return -EINVAL; | |
656 | } | |
657 | ||
658 | update_ccr: | |
659 | regs->ccr &= ~(15 << ((7 - ((speinsn >> 23) & 0x7)) << 2)); | |
660 | regs->ccr |= (IR << ((7 - ((speinsn >> 23) & 0x7)) << 2)); | |
661 | ||
662 | update_regs: | |
640e9225 JM |
663 | /* |
664 | * If the "invalid" exception sticky bit was set by the | |
665 | * processor for non-finite input, but was not set before the | |
666 | * instruction being emulated, clear it. Likewise for the | |
667 | * "underflow" bit, which may have been set by the processor | |
668 | * for exact underflow, not just inexact underflow when the | |
669 | * flag should be set for IEEE 754 semantics. Other sticky | |
670 | * exceptions will only be set by the processor when they are | |
671 | * correct according to IEEE 754 semantics, and we must not | |
672 | * clear sticky bits that were already set before the emulated | |
673 | * instruction as they represent the user-visible sticky | |
674 | * exception status. "inexact" traps to kernel are not | |
675 | * required for IEEE semantics and are not enabled by default, | |
676 | * so the "inexact" sticky bit may have been set by a previous | |
677 | * instruction without the kernel being aware of it. | |
678 | */ | |
679 | __FPU_FPSCR | |
680 | &= ~(FP_EX_INVALID | FP_EX_UNDERFLOW) | current->thread.spefscr_last; | |
6a800f36 LY |
681 | __FPU_FPSCR |= (FP_CUR_EXCEPTIONS & FP_EX_MASK); |
682 | mtspr(SPRN_SPEFSCR, __FPU_FPSCR); | |
640e9225 | 683 | current->thread.spefscr_last = __FPU_FPSCR; |
6a800f36 LY |
684 | |
685 | current->thread.evr[fc] = vc.wp[0]; | |
686 | regs->gpr[fc] = vc.wp[1]; | |
687 | ||
b430abc4 LY |
688 | pr_debug("ccr = %08lx\n", regs->ccr); |
689 | pr_debug("cur exceptions = %08x spefscr = %08lx\n", | |
6a800f36 | 690 | FP_CUR_EXCEPTIONS, __FPU_FPSCR); |
b430abc4 LY |
691 | pr_debug("vc: %08x %08x\n", vc.wp[0], vc.wp[1]); |
692 | pr_debug("va: %08x %08x\n", va.wp[0], va.wp[1]); | |
693 | pr_debug("vb: %08x %08x\n", vb.wp[0], vb.wp[1]); | |
6a800f36 | 694 | |
01c9ccee JM |
695 | if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) { |
696 | if ((FP_CUR_EXCEPTIONS & FP_EX_DIVZERO) | |
697 | && (current->thread.fpexc_mode & PR_FP_EXC_DIV)) | |
698 | return 1; | |
699 | if ((FP_CUR_EXCEPTIONS & FP_EX_OVERFLOW) | |
700 | && (current->thread.fpexc_mode & PR_FP_EXC_OVF)) | |
701 | return 1; | |
702 | if ((FP_CUR_EXCEPTIONS & FP_EX_UNDERFLOW) | |
703 | && (current->thread.fpexc_mode & PR_FP_EXC_UND)) | |
704 | return 1; | |
705 | if ((FP_CUR_EXCEPTIONS & FP_EX_INEXACT) | |
706 | && (current->thread.fpexc_mode & PR_FP_EXC_RES)) | |
707 | return 1; | |
708 | if ((FP_CUR_EXCEPTIONS & FP_EX_INVALID) | |
709 | && (current->thread.fpexc_mode & PR_FP_EXC_INV)) | |
710 | return 1; | |
711 | } | |
6a800f36 LY |
712 | return 0; |
713 | ||
714 | illegal: | |
ac6f1203 LY |
715 | if (have_e500_cpu_a005_erratum) { |
716 | /* according to e500 cpu a005 erratum, reissue efp inst */ | |
717 | regs->nip -= 4; | |
b430abc4 | 718 | pr_debug("re-issue efp inst: %08lx\n", speinsn); |
ac6f1203 LY |
719 | return 0; |
720 | } | |
721 | ||
6a800f36 LY |
722 | printk(KERN_ERR "\nOoops! IEEE-754 compliance handler encountered un-supported instruction.\ninst code: %08lx\n", speinsn); |
723 | return -ENOSYS; | |
724 | } | |
725 | ||
726 | int speround_handler(struct pt_regs *regs) | |
727 | { | |
728 | union dw_union fgpr; | |
729 | int s_lo, s_hi; | |
28414a6d | 730 | int lo_inexact, hi_inexact; |
28fbf1d5 JM |
731 | int fp_result; |
732 | unsigned long speinsn, type, fb, fc, fptype, func; | |
6a800f36 LY |
733 | |
734 | if (get_user(speinsn, (unsigned int __user *) regs->nip)) | |
735 | return -EFAULT; | |
736 | if ((speinsn >> 26) != 4) | |
737 | return -EINVAL; /* not an spe instruction */ | |
738 | ||
28fbf1d5 JM |
739 | func = speinsn & 0x7ff; |
740 | type = insn_type(func); | |
6a800f36 LY |
741 | if (type == XCR) return -ENOSYS; |
742 | ||
d5755e6f LY |
743 | __FPU_FPSCR = mfspr(SPRN_SPEFSCR); |
744 | pr_debug("speinsn:%08lx spefscr:%08lx\n", speinsn, __FPU_FPSCR); | |
745 | ||
28414a6d JM |
746 | fptype = (speinsn >> 5) & 0x7; |
747 | ||
d5755e6f | 748 | /* No need to round if the result is exact */ |
28414a6d JM |
749 | lo_inexact = __FPU_FPSCR & (SPEFSCR_FG | SPEFSCR_FX); |
750 | hi_inexact = __FPU_FPSCR & (SPEFSCR_FGH | SPEFSCR_FXH); | |
751 | if (!(lo_inexact || (hi_inexact && fptype == VCT))) | |
d5755e6f LY |
752 | return 0; |
753 | ||
6a800f36 LY |
754 | fc = (speinsn >> 21) & 0x1f; |
755 | s_lo = regs->gpr[fc] & SIGN_BIT_S; | |
756 | s_hi = current->thread.evr[fc] & SIGN_BIT_S; | |
757 | fgpr.wp[0] = current->thread.evr[fc]; | |
758 | fgpr.wp[1] = regs->gpr[fc]; | |
759 | ||
28fbf1d5 JM |
760 | fb = (speinsn >> 11) & 0x1f; |
761 | switch (func) { | |
762 | case EFSCTUIZ: | |
763 | case EFSCTSIZ: | |
764 | case EVFSCTUIZ: | |
765 | case EVFSCTSIZ: | |
766 | case EFDCTUIDZ: | |
767 | case EFDCTSIDZ: | |
768 | case EFDCTUIZ: | |
769 | case EFDCTSIZ: | |
770 | /* | |
771 | * These instructions always round to zero, | |
772 | * independent of the rounding mode. | |
773 | */ | |
774 | return 0; | |
775 | ||
776 | case EFSCTUI: | |
777 | case EFSCTUF: | |
778 | case EVFSCTUI: | |
779 | case EVFSCTUF: | |
780 | case EFDCTUI: | |
781 | case EFDCTUF: | |
782 | fp_result = 0; | |
783 | s_lo = 0; | |
784 | s_hi = 0; | |
785 | break; | |
786 | ||
787 | case EFSCTSI: | |
788 | case EFSCTSF: | |
789 | fp_result = 0; | |
790 | /* Recover the sign of a zero result if possible. */ | |
791 | if (fgpr.wp[1] == 0) | |
792 | s_lo = regs->gpr[fb] & SIGN_BIT_S; | |
793 | break; | |
794 | ||
795 | case EVFSCTSI: | |
796 | case EVFSCTSF: | |
797 | fp_result = 0; | |
798 | /* Recover the sign of a zero result if possible. */ | |
799 | if (fgpr.wp[1] == 0) | |
800 | s_lo = regs->gpr[fb] & SIGN_BIT_S; | |
801 | if (fgpr.wp[0] == 0) | |
802 | s_hi = current->thread.evr[fb] & SIGN_BIT_S; | |
803 | break; | |
804 | ||
805 | case EFDCTSI: | |
806 | case EFDCTSF: | |
807 | fp_result = 0; | |
808 | s_hi = s_lo; | |
809 | /* Recover the sign of a zero result if possible. */ | |
810 | if (fgpr.wp[1] == 0) | |
811 | s_hi = current->thread.evr[fb] & SIGN_BIT_S; | |
812 | break; | |
813 | ||
814 | default: | |
815 | fp_result = 1; | |
816 | break; | |
817 | } | |
818 | ||
d5755e6f | 819 | pr_debug("round fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]); |
6a800f36 | 820 | |
28414a6d | 821 | switch (fptype) { |
6a800f36 LY |
822 | /* Since SPE instructions on E500 core can handle round to nearest |
823 | * and round toward zero with IEEE-754 complied, we just need | |
824 | * to handle round toward +Inf and round toward -Inf by software. | |
825 | */ | |
826 | case SPFP: | |
827 | if ((FP_ROUNDMODE) == FP_RND_PINF) { | |
828 | if (!s_lo) fgpr.wp[1]++; /* Z > 0, choose Z1 */ | |
829 | } else { /* round to -Inf */ | |
28fbf1d5 JM |
830 | if (s_lo) { |
831 | if (fp_result) | |
832 | fgpr.wp[1]++; /* Z < 0, choose Z2 */ | |
833 | else | |
834 | fgpr.wp[1]--; /* Z < 0, choose Z2 */ | |
835 | } | |
6a800f36 LY |
836 | } |
837 | break; | |
838 | ||
839 | case DPFP: | |
840 | if (FP_ROUNDMODE == FP_RND_PINF) { | |
28fbf1d5 JM |
841 | if (!s_hi) { |
842 | if (fp_result) | |
843 | fgpr.dp[0]++; /* Z > 0, choose Z1 */ | |
844 | else | |
845 | fgpr.wp[1]++; /* Z > 0, choose Z1 */ | |
846 | } | |
6a800f36 | 847 | } else { /* round to -Inf */ |
28fbf1d5 JM |
848 | if (s_hi) { |
849 | if (fp_result) | |
850 | fgpr.dp[0]++; /* Z < 0, choose Z2 */ | |
851 | else | |
852 | fgpr.wp[1]--; /* Z < 0, choose Z2 */ | |
853 | } | |
6a800f36 LY |
854 | } |
855 | break; | |
856 | ||
857 | case VCT: | |
858 | if (FP_ROUNDMODE == FP_RND_PINF) { | |
28414a6d JM |
859 | if (lo_inexact && !s_lo) |
860 | fgpr.wp[1]++; /* Z_low > 0, choose Z1 */ | |
861 | if (hi_inexact && !s_hi) | |
862 | fgpr.wp[0]++; /* Z_high word > 0, choose Z1 */ | |
6a800f36 | 863 | } else { /* round to -Inf */ |
28fbf1d5 JM |
864 | if (lo_inexact && s_lo) { |
865 | if (fp_result) | |
866 | fgpr.wp[1]++; /* Z_low < 0, choose Z2 */ | |
867 | else | |
868 | fgpr.wp[1]--; /* Z_low < 0, choose Z2 */ | |
869 | } | |
870 | if (hi_inexact && s_hi) { | |
871 | if (fp_result) | |
872 | fgpr.wp[0]++; /* Z_high < 0, choose Z2 */ | |
873 | else | |
874 | fgpr.wp[0]--; /* Z_high < 0, choose Z2 */ | |
875 | } | |
6a800f36 LY |
876 | } |
877 | break; | |
878 | ||
879 | default: | |
880 | return -EINVAL; | |
881 | } | |
882 | ||
883 | current->thread.evr[fc] = fgpr.wp[0]; | |
884 | regs->gpr[fc] = fgpr.wp[1]; | |
885 | ||
d5755e6f LY |
886 | pr_debug(" to fgpr: %08x %08x\n", fgpr.wp[0], fgpr.wp[1]); |
887 | ||
01c9ccee JM |
888 | if (current->thread.fpexc_mode & PR_FP_EXC_SW_ENABLE) |
889 | return (current->thread.fpexc_mode & PR_FP_EXC_RES) ? 1 : 0; | |
6a800f36 LY |
890 | return 0; |
891 | } | |
ac6f1203 LY |
892 | |
893 | int __init spe_mathemu_init(void) | |
894 | { | |
895 | u32 pvr, maj, min; | |
896 | ||
897 | pvr = mfspr(SPRN_PVR); | |
898 | ||
899 | if ((PVR_VER(pvr) == PVR_VER_E500V1) || | |
900 | (PVR_VER(pvr) == PVR_VER_E500V2)) { | |
901 | maj = PVR_MAJ(pvr); | |
902 | min = PVR_MIN(pvr); | |
903 | ||
904 | /* | |
905 | * E500 revision below 1.1, 2.3, 3.1, 4.1, 5.1 | |
906 | * need cpu a005 errata workaround | |
907 | */ | |
908 | switch (maj) { | |
909 | case 1: | |
910 | if (min < 1) | |
911 | have_e500_cpu_a005_erratum = 1; | |
912 | break; | |
913 | case 2: | |
914 | if (min < 3) | |
915 | have_e500_cpu_a005_erratum = 1; | |
916 | break; | |
917 | case 3: | |
918 | case 4: | |
919 | case 5: | |
920 | if (min < 1) | |
921 | have_e500_cpu_a005_erratum = 1; | |
922 | break; | |
923 | default: | |
924 | break; | |
925 | } | |
926 | } | |
927 | ||
928 | return 0; | |
929 | } | |
930 | ||
931 | module_init(spe_mathemu_init); |