sh: Fix occasional FPU register corruption under preempt.
[linux-2.6-block.git] / arch / sh / kernel / cpu / sh2a / fpu.c
CommitLineData
74d99a5e
PM
1/*
2 * Save/restore floating point context for signal handlers.
3 *
4 * Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
5 *
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
8 * for more details.
9 *
10 * FIXME! These routines can be optimized in big endian case.
11 */
12#include <linux/sched.h>
13#include <linux/signal.h>
14#include <asm/processor.h>
15#include <asm/io.h>
9bbafce2 16#include <asm/fpu.h>
74d99a5e
PM
17
18/* The PR (precision) bit in the FP Status Register must be clear when
19 * an frchg instruction is executed, otherwise the instruction is undefined.
20 * Executing frchg with PR set causes a trap on some SH4 implementations.
21 */
22
23#define FPSCR_RCHG 0x00000000
24
25
26/*
27 * Save FPU registers onto task structure.
28 * Assume called with FPU enabled (SR.FD=0).
29 */
30void
31save_fpu(struct task_struct *tsk, struct pt_regs *regs)
32{
33 unsigned long dummy;
34
35 clear_tsk_thread_flag(tsk, TIF_USEDFPU);
36 enable_fpu();
37 asm volatile("sts.l fpul, @-%0\n\t"
38 "sts.l fpscr, @-%0\n\t"
39 "fmov.s fr15, @-%0\n\t"
40 "fmov.s fr14, @-%0\n\t"
41 "fmov.s fr13, @-%0\n\t"
42 "fmov.s fr12, @-%0\n\t"
43 "fmov.s fr11, @-%0\n\t"
44 "fmov.s fr10, @-%0\n\t"
45 "fmov.s fr9, @-%0\n\t"
46 "fmov.s fr8, @-%0\n\t"
47 "fmov.s fr7, @-%0\n\t"
48 "fmov.s fr6, @-%0\n\t"
49 "fmov.s fr5, @-%0\n\t"
50 "fmov.s fr4, @-%0\n\t"
51 "fmov.s fr3, @-%0\n\t"
52 "fmov.s fr2, @-%0\n\t"
53 "fmov.s fr1, @-%0\n\t"
54 "fmov.s fr0, @-%0\n\t"
55 "lds %3, fpscr\n\t"
56 : "=r" (dummy)
57 : "0" ((char *)(&tsk->thread.fpu.hard.status)),
58 "r" (FPSCR_RCHG),
59 "r" (FPSCR_INIT)
60 : "memory");
61
62 disable_fpu();
63 release_fpu(regs);
64}
65
66static void
67restore_fpu(struct task_struct *tsk)
68{
69 unsigned long dummy;
70
71 enable_fpu();
72 asm volatile("fmov.s @%0+, fr0\n\t"
73 "fmov.s @%0+, fr1\n\t"
74 "fmov.s @%0+, fr2\n\t"
75 "fmov.s @%0+, fr3\n\t"
76 "fmov.s @%0+, fr4\n\t"
77 "fmov.s @%0+, fr5\n\t"
78 "fmov.s @%0+, fr6\n\t"
79 "fmov.s @%0+, fr7\n\t"
80 "fmov.s @%0+, fr8\n\t"
81 "fmov.s @%0+, fr9\n\t"
82 "fmov.s @%0+, fr10\n\t"
83 "fmov.s @%0+, fr11\n\t"
84 "fmov.s @%0+, fr12\n\t"
85 "fmov.s @%0+, fr13\n\t"
86 "fmov.s @%0+, fr14\n\t"
87 "fmov.s @%0+, fr15\n\t"
88 "lds.l @%0+, fpscr\n\t"
89 "lds.l @%0+, fpul\n\t"
90 : "=r" (dummy)
91 : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
92 : "memory");
93 disable_fpu();
94}
95
96/*
97 * Load the FPU with signalling NANS. This bit pattern we're using
98 * has the property that no matter wether considered as single or as
99 * double precission represents signaling NANS.
100 */
101
102static void
103fpu_init(void)
104{
105 enable_fpu();
106 asm volatile("lds %0, fpul\n\t"
107 "fsts fpul, fr0\n\t"
108 "fsts fpul, fr1\n\t"
109 "fsts fpul, fr2\n\t"
110 "fsts fpul, fr3\n\t"
111 "fsts fpul, fr4\n\t"
112 "fsts fpul, fr5\n\t"
113 "fsts fpul, fr6\n\t"
114 "fsts fpul, fr7\n\t"
115 "fsts fpul, fr8\n\t"
116 "fsts fpul, fr9\n\t"
117 "fsts fpul, fr10\n\t"
118 "fsts fpul, fr11\n\t"
119 "fsts fpul, fr12\n\t"
120 "fsts fpul, fr13\n\t"
121 "fsts fpul, fr14\n\t"
122 "fsts fpul, fr15\n\t"
123 "lds %2, fpscr\n\t"
124 : /* no output */
125 : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
126 disable_fpu();
127}
128
129/*
130 * Emulate arithmetic ops on denormalized number for some FPU insns.
131 */
132
133/* denormalized float * float */
134static int denormal_mulf(int hx, int hy)
135{
136 unsigned int ix, iy;
137 unsigned long long m, n;
138 int exp, w;
139
140 ix = hx & 0x7fffffff;
141 iy = hy & 0x7fffffff;
142 if (iy < 0x00800000 || ix == 0)
143 return ((hx ^ hy) & 0x80000000);
144
145 exp = (iy & 0x7f800000) >> 23;
146 ix &= 0x007fffff;
147 iy = (iy & 0x007fffff) | 0x00800000;
148 m = (unsigned long long)ix * iy;
149 n = m;
150 w = -1;
151 while (n) { n >>= 1; w++; }
152
153 /* FIXME: use guard bits */
154 exp += w - 126 - 46;
155 if (exp > 0)
156 ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
157 else if (exp + 22 >= 0)
158 ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
159 else
160 ix = 0;
161
162 ix |= (hx ^ hy) & 0x80000000;
163 return ix;
164}
165
166/* denormalized double * double */
167static void mult64(unsigned long long x, unsigned long long y,
168 unsigned long long *highp, unsigned long long *lowp)
169{
170 unsigned long long sub0, sub1, sub2, sub3;
171 unsigned long long high, low;
172
173 sub0 = (x >> 32) * (unsigned long) (y >> 32);
174 sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
175 sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
176 sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
177 low = sub3;
178 high = 0LL;
179 sub3 += (sub1 << 32);
180 if (low > sub3)
181 high++;
182 low = sub3;
183 sub3 += (sub2 << 32);
184 if (low > sub3)
185 high++;
186 low = sub3;
187 high += (sub1 >> 32) + (sub2 >> 32);
188 high += sub0;
189 *lowp = low;
190 *highp = high;
191}
192
193static inline long long rshift64(unsigned long long mh,
194 unsigned long long ml, int n)
195{
196 if (n >= 64)
197 return mh >> (n - 64);
198 return (mh << (64 - n)) | (ml >> n);
199}
200
201static long long denormal_muld(long long hx, long long hy)
202{
203 unsigned long long ix, iy;
204 unsigned long long mh, ml, nh, nl;
205 int exp, w;
206
207 ix = hx & 0x7fffffffffffffffLL;
208 iy = hy & 0x7fffffffffffffffLL;
209 if (iy < 0x0010000000000000LL || ix == 0)
210 return ((hx ^ hy) & 0x8000000000000000LL);
211
212 exp = (iy & 0x7ff0000000000000LL) >> 52;
213 ix &= 0x000fffffffffffffLL;
214 iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
215 mult64(ix, iy, &mh, &ml);
216 nh = mh;
217 nl = ml;
218 w = -1;
219 if (nh) {
220 while (nh) { nh >>= 1; w++;}
221 w += 64;
222 } else
223 while (nl) { nl >>= 1; w++;}
224
225 /* FIXME: use guard bits */
226 exp += w - 1022 - 52 * 2;
227 if (exp > 0)
228 ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
229 | ((long long)exp << 52);
230 else if (exp + 51 >= 0)
231 ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
232 else
233 ix = 0;
234
235 ix |= (hx ^ hy) & 0x8000000000000000LL;
236 return ix;
237}
238
239/* ix - iy where iy: denormal and ix, iy >= 0 */
240static int denormal_subf1(unsigned int ix, unsigned int iy)
241{
242 int frac;
243 int exp;
244
245 if (ix < 0x00800000)
246 return ix - iy;
247
248 exp = (ix & 0x7f800000) >> 23;
249 if (exp - 1 > 31)
250 return ix;
251 iy >>= exp - 1;
252 if (iy == 0)
253 return ix;
254
255 frac = (ix & 0x007fffff) | 0x00800000;
256 frac -= iy;
257 while (frac < 0x00800000) {
258 if (--exp == 0)
259 return frac;
260 frac <<= 1;
261 }
262
263 return (exp << 23) | (frac & 0x007fffff);
264}
265
266/* ix + iy where iy: denormal and ix, iy >= 0 */
267static int denormal_addf1(unsigned int ix, unsigned int iy)
268{
269 int frac;
270 int exp;
271
272 if (ix < 0x00800000)
273 return ix + iy;
274
275 exp = (ix & 0x7f800000) >> 23;
276 if (exp - 1 > 31)
277 return ix;
278 iy >>= exp - 1;
279 if (iy == 0)
280 return ix;
281
282 frac = (ix & 0x007fffff) | 0x00800000;
283 frac += iy;
284 if (frac >= 0x01000000) {
285 frac >>= 1;
286 ++exp;
287 }
288
289 return (exp << 23) | (frac & 0x007fffff);
290}
291
292static int denormal_addf(int hx, int hy)
293{
294 unsigned int ix, iy;
295 int sign;
296
297 if ((hx ^ hy) & 0x80000000) {
298 sign = hx & 0x80000000;
299 ix = hx & 0x7fffffff;
300 iy = hy & 0x7fffffff;
301 if (iy < 0x00800000) {
302 ix = denormal_subf1(ix, iy);
303 if (ix < 0) {
304 ix = -ix;
305 sign ^= 0x80000000;
306 }
307 } else {
308 ix = denormal_subf1(iy, ix);
309 sign ^= 0x80000000;
310 }
311 } else {
312 sign = hx & 0x80000000;
313 ix = hx & 0x7fffffff;
314 iy = hy & 0x7fffffff;
315 if (iy < 0x00800000)
316 ix = denormal_addf1(ix, iy);
317 else
318 ix = denormal_addf1(iy, ix);
319 }
320
321 return sign | ix;
322}
323
324/* ix - iy where iy: denormal and ix, iy >= 0 */
325static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
326{
327 long long frac;
328 int exp;
329
330 if (ix < 0x0010000000000000LL)
331 return ix - iy;
332
333 exp = (ix & 0x7ff0000000000000LL) >> 52;
334 if (exp - 1 > 63)
335 return ix;
336 iy >>= exp - 1;
337 if (iy == 0)
338 return ix;
339
340 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
341 frac -= iy;
342 while (frac < 0x0010000000000000LL) {
343 if (--exp == 0)
344 return frac;
345 frac <<= 1;
346 }
347
348 return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
349}
350
351/* ix + iy where iy: denormal and ix, iy >= 0 */
352static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
353{
354 long long frac;
355 long long exp;
356
357 if (ix < 0x0010000000000000LL)
358 return ix + iy;
359
360 exp = (ix & 0x7ff0000000000000LL) >> 52;
361 if (exp - 1 > 63)
362 return ix;
363 iy >>= exp - 1;
364 if (iy == 0)
365 return ix;
366
367 frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
368 frac += iy;
369 if (frac >= 0x0020000000000000LL) {
370 frac >>= 1;
371 ++exp;
372 }
373
374 return (exp << 52) | (frac & 0x000fffffffffffffLL);
375}
376
377static long long denormal_addd(long long hx, long long hy)
378{
379 unsigned long long ix, iy;
380 long long sign;
381
382 if ((hx ^ hy) & 0x8000000000000000LL) {
383 sign = hx & 0x8000000000000000LL;
384 ix = hx & 0x7fffffffffffffffLL;
385 iy = hy & 0x7fffffffffffffffLL;
386 if (iy < 0x0010000000000000LL) {
387 ix = denormal_subd1(ix, iy);
388 if (ix < 0) {
389 ix = -ix;
390 sign ^= 0x8000000000000000LL;
391 }
392 } else {
393 ix = denormal_subd1(iy, ix);
394 sign ^= 0x8000000000000000LL;
395 }
396 } else {
397 sign = hx & 0x8000000000000000LL;
398 ix = hx & 0x7fffffffffffffffLL;
399 iy = hy & 0x7fffffffffffffffLL;
400 if (iy < 0x0010000000000000LL)
401 ix = denormal_addd1(ix, iy);
402 else
403 ix = denormal_addd1(iy, ix);
404 }
405
406 return sign | ix;
407}
408
409/**
410 * denormal_to_double - Given denormalized float number,
411 * store double float
412 *
413 * @fpu: Pointer to sh_fpu_hard structure
414 * @n: Index to FP register
415 */
416static void
417denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
418{
419 unsigned long du, dl;
420 unsigned long x = fpu->fpul;
421 int exp = 1023 - 126;
422
423 if (x != 0 && (x & 0x7f800000) == 0) {
424 du = (x & 0x80000000);
425 while ((x & 0x00800000) == 0) {
426 x <<= 1;
427 exp--;
428 }
429 x &= 0x007fffff;
430 du |= (exp << 20) | (x >> 3);
431 dl = x << 29;
432
433 fpu->fp_regs[n] = du;
434 fpu->fp_regs[n+1] = dl;
435 }
436}
437
438/**
439 * ieee_fpe_handler - Handle denormalized number exception
440 *
441 * @regs: Pointer to register structure
442 *
443 * Returns 1 when it's handled (should not cause exception).
444 */
445static int
446ieee_fpe_handler (struct pt_regs *regs)
447{
448 unsigned short insn = *(unsigned short *) regs->pc;
449 unsigned short finsn;
450 unsigned long nextpc;
451 int nib[4] = {
452 (insn >> 12) & 0xf,
453 (insn >> 8) & 0xf,
454 (insn >> 4) & 0xf,
455 insn & 0xf};
456
457 if (nib[0] == 0xb ||
458 (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
459 regs->pr = regs->pc + 4;
460 if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
461 nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
462 finsn = *(unsigned short *) (regs->pc + 2);
463 } else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
464 if (regs->sr & 1)
465 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
466 else
467 nextpc = regs->pc + 4;
468 finsn = *(unsigned short *) (regs->pc + 2);
469 } else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
470 if (regs->sr & 1)
471 nextpc = regs->pc + 4;
472 else
473 nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
474 finsn = *(unsigned short *) (regs->pc + 2);
475 } else if (nib[0] == 0x4 && nib[3] == 0xb &&
476 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
477 nextpc = regs->regs[nib[1]];
478 finsn = *(unsigned short *) (regs->pc + 2);
479 } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
480 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
481 nextpc = regs->pc + 4 + regs->regs[nib[1]];
482 finsn = *(unsigned short *) (regs->pc + 2);
483 } else if (insn == 0x000b) { /* rts */
484 nextpc = regs->pr;
485 finsn = *(unsigned short *) (regs->pc + 2);
486 } else {
487 nextpc = regs->pc + 2;
488 finsn = insn;
489 }
490
491#define FPSCR_FPU_ERROR (1 << 17)
492
493 if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
494 struct task_struct *tsk = current;
495
496 if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
497 /* FPU error */
498 denormal_to_double (&tsk->thread.fpu.hard,
499 (finsn >> 8) & 0xf);
500 } else
501 return 0;
502
503 regs->pc = nextpc;
504 return 1;
505 } else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
506 struct task_struct *tsk = current;
507 int fpscr;
508 int n, m, prec;
509 unsigned int hx, hy;
510
511 n = (finsn >> 8) & 0xf;
512 m = (finsn >> 4) & 0xf;
513 hx = tsk->thread.fpu.hard.fp_regs[n];
514 hy = tsk->thread.fpu.hard.fp_regs[m];
515 fpscr = tsk->thread.fpu.hard.fpscr;
516 prec = fpscr & (1 << 19);
517
518 if ((fpscr & FPSCR_FPU_ERROR)
519 && (prec && ((hx & 0x7fffffff) < 0x00100000
520 || (hy & 0x7fffffff) < 0x00100000))) {
521 long long llx, lly;
522
523 /* FPU error because of denormal */
524 llx = ((long long) hx << 32)
525 | tsk->thread.fpu.hard.fp_regs[n+1];
526 lly = ((long long) hy << 32)
527 | tsk->thread.fpu.hard.fp_regs[m+1];
528 if ((hx & 0x7fffffff) >= 0x00100000)
529 llx = denormal_muld(lly, llx);
530 else
531 llx = denormal_muld(llx, lly);
532 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
533 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
534 } else if ((fpscr & FPSCR_FPU_ERROR)
535 && (!prec && ((hx & 0x7fffffff) < 0x00800000
536 || (hy & 0x7fffffff) < 0x00800000))) {
537 /* FPU error because of denormal */
538 if ((hx & 0x7fffffff) >= 0x00800000)
539 hx = denormal_mulf(hy, hx);
540 else
541 hx = denormal_mulf(hx, hy);
542 tsk->thread.fpu.hard.fp_regs[n] = hx;
543 } else
544 return 0;
545
546 regs->pc = nextpc;
547 return 1;
548 } else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
549 struct task_struct *tsk = current;
550 int fpscr;
551 int n, m, prec;
552 unsigned int hx, hy;
553
554 n = (finsn >> 8) & 0xf;
555 m = (finsn >> 4) & 0xf;
556 hx = tsk->thread.fpu.hard.fp_regs[n];
557 hy = tsk->thread.fpu.hard.fp_regs[m];
558 fpscr = tsk->thread.fpu.hard.fpscr;
559 prec = fpscr & (1 << 19);
560
561 if ((fpscr & FPSCR_FPU_ERROR)
562 && (prec && ((hx & 0x7fffffff) < 0x00100000
563 || (hy & 0x7fffffff) < 0x00100000))) {
564 long long llx, lly;
565
566 /* FPU error because of denormal */
567 llx = ((long long) hx << 32)
568 | tsk->thread.fpu.hard.fp_regs[n+1];
569 lly = ((long long) hy << 32)
570 | tsk->thread.fpu.hard.fp_regs[m+1];
571 if ((finsn & 0xf00f) == 0xf000)
572 llx = denormal_addd(llx, lly);
573 else
574 llx = denormal_addd(llx, lly ^ (1LL << 63));
575 tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
576 tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
577 } else if ((fpscr & FPSCR_FPU_ERROR)
578 && (!prec && ((hx & 0x7fffffff) < 0x00800000
579 || (hy & 0x7fffffff) < 0x00800000))) {
580 /* FPU error because of denormal */
581 if ((finsn & 0xf00f) == 0xf000)
582 hx = denormal_addf(hx, hy);
583 else
584 hx = denormal_addf(hx, hy ^ 0x80000000);
585 tsk->thread.fpu.hard.fp_regs[n] = hx;
586 } else
587 return 0;
588
589 regs->pc = nextpc;
590 return 1;
591 }
592
593 return 0;
594}
595
596BUILD_TRAP_HANDLER(fpu_error)
597{
598 struct task_struct *tsk = current;
599 TRAP_HANDLER_DECL;
600
601 save_fpu(tsk, regs);
602 if (ieee_fpe_handler(regs)) {
603 tsk->thread.fpu.hard.fpscr &=
604 ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
605 grab_fpu(regs);
606 restore_fpu(tsk);
607 set_tsk_thread_flag(tsk, TIF_USEDFPU);
608 return;
609 }
610
611 force_sig(SIGFPE, tsk);
612}
613
614BUILD_TRAP_HANDLER(fpu_state_restore)
615{
616 struct task_struct *tsk = current;
617 TRAP_HANDLER_DECL;
618
619 grab_fpu(regs);
620 if (!user_mode(regs)) {
621 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
622 return;
623 }
624
625 if (used_math()) {
626 /* Using the FPU again. */
627 restore_fpu(tsk);
628 } else {
629 /* First time FPU user. */
630 fpu_init();
631 set_used_math();
632 }
633 set_tsk_thread_flag(tsk, TIF_USEDFPU);
634}