[linux-2.6-block.git] / arch / sh / kernel / cpu / sh2a / fpu.c

/*
 * Save/restore floating point context for signal handlers.
 *
 * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
 * FIXME! These routines can be optimized in big endian case.
 */
#include <linux/sched.h>
#include <linux/signal.h>
#include <asm/processor.h>
#include <asm/io.h>
#include <asm/fpu.h>

/* The PR (precision) bit in the FP Status Register must be clear when
 * an frchg instruction is executed, otherwise the instruction is undefined.
 * Executing frchg with PR set causes a trap on some SH4 implementations.
 */

#define FPSCR_RCHG 0x00000000


/*
 * Save FPU registers onto task structure.
 * Assume called with FPU enabled (SR.FD=0).
 */
void
save_fpu(struct task_struct *tsk, struct pt_regs *regs)
{
	unsigned long dummy;

	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
	enable_fpu();
	asm volatile("sts.l	fpul, @-%0\n\t"
		     "sts.l	fpscr, @-%0\n\t"
		     "fmov.s	fr15, @-%0\n\t"
		     "fmov.s	fr14, @-%0\n\t"
		     "fmov.s	fr13, @-%0\n\t"
		     "fmov.s	fr12, @-%0\n\t"
		     "fmov.s	fr11, @-%0\n\t"
		     "fmov.s	fr10, @-%0\n\t"
		     "fmov.s	fr9, @-%0\n\t"
		     "fmov.s	fr8, @-%0\n\t"
		     "fmov.s	fr7, @-%0\n\t"
		     "fmov.s	fr6, @-%0\n\t"
		     "fmov.s	fr5, @-%0\n\t"
		     "fmov.s	fr4, @-%0\n\t"
		     "fmov.s	fr3, @-%0\n\t"
		     "fmov.s	fr2, @-%0\n\t"
		     "fmov.s	fr1, @-%0\n\t"
		     "fmov.s	fr0, @-%0\n\t"
		     "lds	%3, fpscr\n\t"
		     : "=r" (dummy)
		     : "0" ((char *)(&tsk->thread.fpu.hard.status)),
		       "r" (FPSCR_RCHG),
		       "r" (FPSCR_INIT)
		     : "memory");

	disable_fpu();
	release_fpu(regs);
}

static void
restore_fpu(struct task_struct *tsk)
{
	unsigned long dummy;

	enable_fpu();
	asm volatile("fmov.s	@%0+, fr0\n\t"
		     "fmov.s	@%0+, fr1\n\t"
		     "fmov.s	@%0+, fr2\n\t"
		     "fmov.s	@%0+, fr3\n\t"
		     "fmov.s	@%0+, fr4\n\t"
		     "fmov.s	@%0+, fr5\n\t"
		     "fmov.s	@%0+, fr6\n\t"
		     "fmov.s	@%0+, fr7\n\t"
		     "fmov.s	@%0+, fr8\n\t"
		     "fmov.s	@%0+, fr9\n\t"
		     "fmov.s	@%0+, fr10\n\t"
		     "fmov.s	@%0+, fr11\n\t"
		     "fmov.s	@%0+, fr12\n\t"
		     "fmov.s	@%0+, fr13\n\t"
		     "fmov.s	@%0+, fr14\n\t"
		     "fmov.s	@%0+, fr15\n\t"
		     "lds.l	@%0+, fpscr\n\t"
		     "lds.l	@%0+, fpul\n\t"
		     : "=r" (dummy)
		     : "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
		     : "memory");
	disable_fpu();
}

/*
 * Load the FPU with signalling NANS.  This bit pattern we're using
 * has the property that no matter wether considered as single or as
 * double precission represents signaling NANS.
 */

static void
fpu_init(void)
{
	enable_fpu();
	asm volatile("lds	%0, fpul\n\t"
		     "fsts	fpul, fr0\n\t"
		     "fsts	fpul, fr1\n\t"
		     "fsts	fpul, fr2\n\t"
		     "fsts	fpul, fr3\n\t"
		     "fsts	fpul, fr4\n\t"
		     "fsts	fpul, fr5\n\t"
		     "fsts	fpul, fr6\n\t"
		     "fsts	fpul, fr7\n\t"
		     "fsts	fpul, fr8\n\t"
		     "fsts	fpul, fr9\n\t"
		     "fsts	fpul, fr10\n\t"
		     "fsts	fpul, fr11\n\t"
		     "fsts	fpul, fr12\n\t"
		     "fsts	fpul, fr13\n\t"
		     "fsts	fpul, fr14\n\t"
		     "fsts	fpul, fr15\n\t"
		     "lds	%2, fpscr\n\t"
		     : /* no output */
		     : "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
	disable_fpu();
}

/*
 *	Emulate arithmetic ops on denormalized number for some FPU insns.
 */

/* denormalized float * float */
static int denormal_mulf(int hx, int hy)
{
	unsigned int ix, iy;
	unsigned long long m, n;
	int exp, w;

	ix = hx & 0x7fffffff;
	iy = hy & 0x7fffffff;
	if (iy < 0x00800000 || ix == 0)
		return ((hx ^ hy) & 0x80000000);

	exp = (iy & 0x7f800000) >> 23;
	ix &= 0x007fffff;
	iy = (iy & 0x007fffff) | 0x00800000;
	m = (unsigned long long)ix * iy;
	n = m;
	w = -1;
	while (n) { n >>= 1; w++; }

	/* FIXME: use guard bits */
	exp += w - 126 - 46;
	if (exp > 0)
		ix = ((int) (m >> (w - 23)) & 0x007fffff) | (exp << 23);
	else if (exp + 22 >= 0)
		ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
	else
		ix = 0;

	ix |= (hx ^ hy) & 0x80000000;
	return ix;
}

/* denormalized double * double */
static void mult64(unsigned long long x, unsigned long long y,
		unsigned long long *highp, unsigned long long *lowp)
{
	unsigned long long sub0, sub1, sub2, sub3;
	unsigned long long high, low;

	sub0 = (x >> 32) * (unsigned long) (y >> 32);
	sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
	sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
	sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
	low = sub3;
	high = 0LL;
	sub3 += (sub1 << 32);
	if (low > sub3)
		high++;
	low = sub3;
	sub3 += (sub2 << 32);
	if (low > sub3)
		high++;
	low = sub3;
	high += (sub1 >> 32) + (sub2 >> 32);
	high += sub0;
	*lowp = low;
	*highp = high;
}

static inline long long rshift64(unsigned long long mh,
		unsigned long long ml, int n)
{
	if (n >= 64)
		return mh >> (n - 64);
	return (mh << (64 - n)) | (ml >> n);
}

static long long denormal_muld(long long hx, long long hy)
{
	unsigned long long ix, iy;
	unsigned long long mh, ml, nh, nl;
	int exp, w;

	ix = hx & 0x7fffffffffffffffLL;
	iy = hy & 0x7fffffffffffffffLL;
	if (iy < 0x0010000000000000LL || ix == 0)
		return ((hx ^ hy) & 0x8000000000000000LL);

	exp = (iy & 0x7ff0000000000000LL) >> 52;
	ix &= 0x000fffffffffffffLL;
	iy = (iy & 0x000fffffffffffffLL) | 0x0010000000000000LL;
	mult64(ix, iy, &mh, &ml);
	nh = mh;
	nl = ml;
	w = -1;
	if (nh) {
		while (nh) { nh >>= 1; w++;}
		w += 64;
	} else
		while (nl) { nl >>= 1; w++;}

	/* FIXME: use guard bits */
	exp += w - 1022 - 52 * 2;
	if (exp > 0)
		ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
			| ((long long)exp << 52);
	else if (exp + 51 >= 0)
		ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
	else
		ix = 0;

	ix |= (hx ^ hy) & 0x8000000000000000LL;
	return ix;
}

/* ix - iy where iy: denormal and ix, iy >= 0 */
static int denormal_subf1(unsigned int ix, unsigned int iy)
{
	int frac;
	int exp;

	if (ix < 0x00800000)
		return ix - iy;

	exp = (ix & 0x7f800000) >> 23;
	if (exp - 1 > 31)
		return ix;
	iy >>= exp - 1;
	if (iy == 0)
		return ix;

	frac = (ix & 0x007fffff) | 0x00800000;
	frac -= iy;
	while (frac < 0x00800000) {
		if (--exp == 0)
			return frac;
		frac <<= 1;
	}

	return (exp << 23) | (frac & 0x007fffff);
}

/* ix + iy where iy: denormal and ix, iy >= 0 */
static int denormal_addf1(unsigned int ix, unsigned int iy)
{
	int frac;
	int exp;

	if (ix < 0x00800000)
		return ix + iy;

	exp = (ix & 0x7f800000) >> 23;
	if (exp - 1 > 31)
		return ix;
	iy >>= exp - 1;
	if (iy == 0)
	  return ix;

	frac = (ix & 0x007fffff) | 0x00800000;
	frac += iy;
	if (frac >= 0x01000000) {
		frac >>= 1;
		++exp;
	}

	return (exp << 23) | (frac & 0x007fffff);
}

static int denormal_addf(int hx, int hy)
{
	unsigned int ix, iy;
	int sign;

	if ((hx ^ hy) & 0x80000000) {
		sign = hx & 0x80000000;
		ix = hx & 0x7fffffff;
		iy = hy & 0x7fffffff;
		if (iy < 0x00800000) {
			ix = denormal_subf1(ix, iy);
			if (ix < 0) {
				ix = -ix;
				sign ^= 0x80000000;
			}
		} else {
			ix = denormal_subf1(iy, ix);
			sign ^= 0x80000000;
		}
	} else {
		sign = hx & 0x80000000;
		ix = hx & 0x7fffffff;
		iy = hy & 0x7fffffff;
		if (iy < 0x00800000)
			ix = denormal_addf1(ix, iy);
		else
			ix = denormal_addf1(iy, ix);
	}

	return sign | ix;
}

/* ix - iy where iy: denormal and ix, iy >= 0 */
static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
{
	long long frac;
	int exp;

	if (ix < 0x0010000000000000LL)
		return ix - iy;

	exp = (ix & 0x7ff0000000000000LL) >> 52;
	if (exp - 1 > 63)
		return ix;
	iy >>= exp - 1;
	if (iy == 0)
		return ix;

	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
	frac -= iy;
	while (frac < 0x0010000000000000LL) {
		if (--exp == 0)
			return frac;
		frac <<= 1;
	}

	return ((long long)exp << 52) | (frac & 0x000fffffffffffffLL);
}

/* ix + iy where iy: denormal and ix, iy >= 0 */
static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
{
	long long frac;
	long long exp;

	if (ix < 0x0010000000000000LL)
		return ix + iy;

	exp = (ix & 0x7ff0000000000000LL) >> 52;
	if (exp - 1 > 63)
		return ix;
	iy >>= exp - 1;
	if (iy == 0)
	  return ix;

	frac = (ix & 0x000fffffffffffffLL) | 0x0010000000000000LL;
	frac += iy;
	if (frac >= 0x0020000000000000LL) {
		frac >>= 1;
		++exp;
	}

	return (exp << 52) | (frac & 0x000fffffffffffffLL);
}

static long long denormal_addd(long long hx, long long hy)
{
	unsigned long long ix, iy;
	long long sign;

	if ((hx ^ hy) & 0x8000000000000000LL) {
		sign = hx & 0x8000000000000000LL;
		ix = hx & 0x7fffffffffffffffLL;
		iy = hy & 0x7fffffffffffffffLL;
		if (iy < 0x0010000000000000LL) {
			ix = denormal_subd1(ix, iy);
			if (ix < 0) {
				ix = -ix;
				sign ^= 0x8000000000000000LL;
			}
		} else {
			ix = denormal_subd1(iy, ix);
			sign ^= 0x8000000000000000LL;
		}
	} else {
		sign = hx & 0x8000000000000000LL;
		ix = hx & 0x7fffffffffffffffLL;
		iy = hy & 0x7fffffffffffffffLL;
		if (iy < 0x0010000000000000LL)
			ix = denormal_addd1(ix, iy);
		else
			ix = denormal_addd1(iy, ix);
	}

	return sign | ix;
}

/**
 *	denormal_to_double - Given denormalized float number,
 *	                     store double float
 *
 *	@fpu: Pointer to sh_fpu_hard structure
 *	@n: Index to FP register
 */
static void
denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
{
	unsigned long du, dl;
	unsigned long x = fpu->fpul;
	int exp = 1023 - 126;

	if (x != 0 && (x & 0x7f800000) == 0) {
		du = (x & 0x80000000);
		while ((x & 0x00800000) == 0) {
			x <<= 1;
			exp--;
		}
		x &= 0x007fffff;
		du |= (exp << 20) | (x >> 3);
		dl = x << 29;

		fpu->fp_regs[n] = du;
		fpu->fp_regs[n+1] = dl;
	}
}

/**
 *	ieee_fpe_handler - Handle denormalized number exception
 *
 *	@regs: Pointer to register structure
 *
 *	Returns 1 when it's handled (should not cause exception).
 */
static int
ieee_fpe_handler (struct pt_regs *regs)
{
	unsigned short insn = *(unsigned short *) regs->pc;
	unsigned short finsn;
	unsigned long nextpc;
	int nib[4] = {
		(insn >> 12) & 0xf,
		(insn >> 8) & 0xf,
		(insn >> 4) & 0xf,
		insn & 0xf};

	if (nib[0] == 0xb ||
	    (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
		regs->pr = regs->pc + 4;
	if (nib[0] == 0xa || nib[0] == 0xb) { /* bra & bsr */
		nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
		finsn = *(unsigned short *) (regs->pc + 2);
	} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
		if (regs->sr & 1)
			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
		else
			nextpc = regs->pc + 4;
		finsn = *(unsigned short *) (regs->pc + 2);
	} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
		if (regs->sr & 1)
			nextpc = regs->pc + 4;
		else
			nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
		finsn = *(unsigned short *) (regs->pc + 2);
	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* jmp & jsr */
		nextpc = regs->regs[nib[1]];
		finsn = *(unsigned short *) (regs->pc + 2);
	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
		 (nib[2] == 0x0 || nib[2] == 0x2)) { /* braf & bsrf */
		nextpc = regs->pc + 4 + regs->regs[nib[1]];
		finsn = *(unsigned short *) (regs->pc + 2);
	} else if (insn == 0x000b) { /* rts */
		nextpc = regs->pr;
		finsn = *(unsigned short *) (regs->pc + 2);
	} else {
		nextpc = regs->pc + 2;
		finsn = insn;
	}

#define FPSCR_FPU_ERROR (1 << 17)

	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
		struct task_struct *tsk = current;

		if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
			/* FPU error */
			denormal_to_double (&tsk->thread.fpu.hard,
					    (finsn >> 8) & 0xf);
		} else
			return 0;

		regs->pc = nextpc;
		return 1;
	} else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
		struct task_struct *tsk = current;
		int fpscr;
		int n, m, prec;
		unsigned int hx, hy;

		n = (finsn >> 8) & 0xf;
		m = (finsn >> 4) & 0xf;
		hx = tsk->thread.fpu.hard.fp_regs[n];
		hy = tsk->thread.fpu.hard.fp_regs[m];
		fpscr = tsk->thread.fpu.hard.fpscr;
		prec = fpscr & (1 << 19);

		if ((fpscr & FPSCR_FPU_ERROR)
		     && (prec && ((hx & 0x7fffffff) < 0x00100000
				   || (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal */
			llx = ((long long) hx << 32)
			       | tsk->thread.fpu.hard.fp_regs[n+1];
			lly = ((long long) hy << 32)
			       | tsk->thread.fpu.hard.fp_regs[m+1];
			if ((hx & 0x7fffffff) >= 0x00100000)
				llx = denormal_muld(lly, llx);
			else
				llx = denormal_muld(llx, lly);
			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
		} else if ((fpscr & FPSCR_FPU_ERROR)
		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
				   || (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal */
			if ((hx & 0x7fffffff) >= 0x00800000)
				hx = denormal_mulf(hy, hx);
			else
				hx = denormal_mulf(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
		} else
			return 0;

		regs->pc = nextpc;
		return 1;
	} else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
		struct task_struct *tsk = current;
		int fpscr;
		int n, m, prec;
		unsigned int hx, hy;

		n = (finsn >> 8) & 0xf;
		m = (finsn >> 4) & 0xf;
		hx = tsk->thread.fpu.hard.fp_regs[n];
		hy = tsk->thread.fpu.hard.fp_regs[m];
		fpscr = tsk->thread.fpu.hard.fpscr;
		prec = fpscr & (1 << 19);

		if ((fpscr & FPSCR_FPU_ERROR)
		     && (prec && ((hx & 0x7fffffff) < 0x00100000
				   || (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal */
			llx = ((long long) hx << 32)
			       | tsk->thread.fpu.hard.fp_regs[n+1];
			lly = ((long long) hy << 32)
			       | tsk->thread.fpu.hard.fp_regs[m+1];
			if ((finsn & 0xf00f) == 0xf000)
				llx = denormal_addd(llx, lly);
			else
				llx = denormal_addd(llx, lly ^ (1LL << 63));
			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
		} else if ((fpscr & FPSCR_FPU_ERROR)
		     && (!prec && ((hx & 0x7fffffff) < 0x00800000
				   || (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal */
			if ((finsn & 0xf00f) == 0xf000)
				hx = denormal_addf(hx, hy);
			else
				hx = denormal_addf(hx, hy ^ 0x80000000);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
		} else
			return 0;

		regs->pc = nextpc;
		return 1;
	}

	return 0;
}

BUILD_TRAP_HANDLER(fpu_error)
{
	struct task_struct *tsk = current;
	TRAP_HANDLER_DECL;

	save_fpu(tsk, regs);
	if (ieee_fpe_handler(regs)) {
		tsk->thread.fpu.hard.fpscr &=
			~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
		grab_fpu(regs);
		restore_fpu(tsk);
		set_tsk_thread_flag(tsk, TIF_USEDFPU);
		return;
	}

	force_sig(SIGFPE, tsk);
}

BUILD_TRAP_HANDLER(fpu_state_restore)
{
	struct task_struct *tsk = current;
	TRAP_HANDLER_DECL;

	grab_fpu(regs);
	if (!user_mode(regs)) {
		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
		return;
	}

	if (used_math()) {
		/* Using the FPU again.  */
		restore_fpu(tsk);
	} else	{
		/* First time FPU user.  */
		fpu_init();
		set_used_math();
	}
	set_tsk_thread_flag(tsk, TIF_USEDFPU);
}
Commit	Line	Data
74d99a5e PM	1	/*
	2	* Save/restore floating point context for signal handlers.
	3	*
	4	* Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
	5	*
	6	* This file is subject to the terms and conditions of the GNU General Public
	7	* License. See the file "COPYING" in the main directory of this archive
	8	* for more details.
	9	*
	10	* FIXME! These routines can be optimized in big endian case.
	11	*/
	12	#include <linux/sched.h>
	13	#include <linux/signal.h>
	14	#include <asm/processor.h>
	15	#include <asm/io.h>
9bbafce2	16	#include <asm/fpu.h>
74d99a5e PM	17
	18	/* The PR (precision) bit in the FP Status Register must be clear when
	19	* an frchg instruction is executed, otherwise the instruction is undefined.
	20	* Executing frchg with PR set causes a trap on some SH4 implementations.
	21	*/
	22
	23	#define FPSCR_RCHG 0x00000000
	24
	25
	26	/*
	27	* Save FPU registers onto task structure.
	28	* Assume called with FPU enabled (SR.FD=0).
	29	*/
	30	void
	31	save_fpu(struct task_struct tsk, struct pt_regs regs)
	32	{
	33	unsigned long dummy;
	34
	35	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
	36	enable_fpu();
	37	asm volatile("sts.l fpul, @-%0\n\t"
	38	"sts.l fpscr, @-%0\n\t"
	39	"fmov.s fr15, @-%0\n\t"
	40	"fmov.s fr14, @-%0\n\t"
	41	"fmov.s fr13, @-%0\n\t"
	42	"fmov.s fr12, @-%0\n\t"
	43	"fmov.s fr11, @-%0\n\t"
	44	"fmov.s fr10, @-%0\n\t"
	45	"fmov.s fr9, @-%0\n\t"
	46	"fmov.s fr8, @-%0\n\t"
	47	"fmov.s fr7, @-%0\n\t"
	48	"fmov.s fr6, @-%0\n\t"
	49	"fmov.s fr5, @-%0\n\t"
	50	"fmov.s fr4, @-%0\n\t"
	51	"fmov.s fr3, @-%0\n\t"
	52	"fmov.s fr2, @-%0\n\t"
	53	"fmov.s fr1, @-%0\n\t"
	54	"fmov.s fr0, @-%0\n\t"
	55	"lds %3, fpscr\n\t"
	56	: "=r" (dummy)
	57	: "0" ((char *)(&tsk->thread.fpu.hard.status)),
	58	"r" (FPSCR_RCHG),
	59	"r" (FPSCR_INIT)
	60	: "memory");
	61
	62	disable_fpu();
	63	release_fpu(regs);
	64	}
	65
	66	static void
	67	restore_fpu(struct task_struct *tsk)
	68	{
	69	unsigned long dummy;
	70
	71	enable_fpu();
	72	asm volatile("fmov.s @%0+, fr0\n\t"
	73	"fmov.s @%0+, fr1\n\t"
	74	"fmov.s @%0+, fr2\n\t"
	75	"fmov.s @%0+, fr3\n\t"
	76	"fmov.s @%0+, fr4\n\t"
	77	"fmov.s @%0+, fr5\n\t"
	78	"fmov.s @%0+, fr6\n\t"
	79	"fmov.s @%0+, fr7\n\t"
	80	"fmov.s @%0+, fr8\n\t"
81	"fmov.s @%0+, fr9\n\t"
82	"fmov.s @%0+, fr10\n\t"
83	"fmov.s @%0+, fr11\n\t"
84	"fmov.s @%0+, fr12\n\t"
85	"fmov.s @%0+, fr13\n\t"
86	"fmov.s @%0+, fr14\n\t"
87	"fmov.s @%0+, fr15\n\t"
88	"lds.l @%0+, fpscr\n\t"
89	"lds.l @%0+, fpul\n\t"
90	: "=r" (dummy)
91	: "0" (&tsk->thread.fpu), "r" (FPSCR_RCHG)
92	: "memory");
93	disable_fpu();
94	}
95
96	/*
97	* Load the FPU with signalling NANS. This bit pattern we're using
98	* has the property that no matter wether considered as single or as
99	* double precission represents signaling NANS.
100	*/
101
102	static void
103	fpu_init(void)
104	{
105	enable_fpu();
106	asm volatile("lds %0, fpul\n\t"
107	"fsts fpul, fr0\n\t"
108	"fsts fpul, fr1\n\t"
109	"fsts fpul, fr2\n\t"
110	"fsts fpul, fr3\n\t"
111	"fsts fpul, fr4\n\t"
112	"fsts fpul, fr5\n\t"
113	"fsts fpul, fr6\n\t"
114	"fsts fpul, fr7\n\t"
115	"fsts fpul, fr8\n\t"
116	"fsts fpul, fr9\n\t"
117	"fsts fpul, fr10\n\t"
118	"fsts fpul, fr11\n\t"
119	"fsts fpul, fr12\n\t"
120	"fsts fpul, fr13\n\t"
121	"fsts fpul, fr14\n\t"
122	"fsts fpul, fr15\n\t"
123	"lds %2, fpscr\n\t"
124	: /* no output */
125	: "r" (0), "r" (FPSCR_RCHG), "r" (FPSCR_INIT));
126	disable_fpu();
127	}
128
129	/*
130	* Emulate arithmetic ops on denormalized number for some FPU insns.
131	*/
132
133	/* denormalized float * float */
134	static int denormal_mulf(int hx, int hy)
135	{
136	unsigned int ix, iy;
137	unsigned long long m, n;
138	int exp, w;
139
140	ix = hx & 0x7fffffff;
141	iy = hy & 0x7fffffff;
142	if (iy < 0x00800000 \|\| ix == 0)
143	return ((hx ^ hy) & 0x80000000);
144
145	exp = (iy & 0x7f800000) >> 23;
146	ix &= 0x007fffff;
147	iy = (iy & 0x007fffff) \| 0x00800000;
148	m = (unsigned long long)ix * iy;
149	n = m;
150	w = -1;
151	while (n) { n >>= 1; w++; }
152
153	/* FIXME: use guard bits */
154	exp += w - 126 - 46;
155	if (exp > 0)
156	ix = ((int) (m >> (w - 23)) & 0x007fffff) \| (exp << 23);
157	else if (exp + 22 >= 0)
158	ix = (int) (m >> (w - 22 - exp)) & 0x007fffff;
159	else
160	ix = 0;
161
162	ix \|= (hx ^ hy) & 0x80000000;
163	return ix;
164	}
165
166	/* denormalized double * double */
167	static void mult64(unsigned long long x, unsigned long long y,
168	unsigned long long highp, unsigned long long lowp)
169	{
170	unsigned long long sub0, sub1, sub2, sub3;
171	unsigned long long high, low;
172
173	sub0 = (x >> 32) * (unsigned long) (y >> 32);
174	sub1 = (x & 0xffffffffLL) * (unsigned long) (y >> 32);
175	sub2 = (x >> 32) * (unsigned long) (y & 0xffffffffLL);
176	sub3 = (x & 0xffffffffLL) * (unsigned long) (y & 0xffffffffLL);
177	low = sub3;
178	high = 0LL;
179	sub3 += (sub1 << 32);
180	if (low > sub3)
181	high++;
182	low = sub3;
183	sub3 += (sub2 << 32);
184	if (low > sub3)
185	high++;
186	low = sub3;
187	high += (sub1 >> 32) + (sub2 >> 32);
188	high += sub0;
189	*lowp = low;
190	*highp = high;
191	}
192
193	static inline long long rshift64(unsigned long long mh,
194	unsigned long long ml, int n)
195	{
196	if (n >= 64)
197	return mh >> (n - 64);
198	return (mh << (64 - n)) \| (ml >> n);
199	}
200
201	static long long denormal_muld(long long hx, long long hy)
202	{
203	unsigned long long ix, iy;
204	unsigned long long mh, ml, nh, nl;
205	int exp, w;
206
207	ix = hx & 0x7fffffffffffffffLL;
208	iy = hy & 0x7fffffffffffffffLL;
209	if (iy < 0x0010000000000000LL \|\| ix == 0)
210	return ((hx ^ hy) & 0x8000000000000000LL);
211
212	exp = (iy & 0x7ff0000000000000LL) >> 52;
213	ix &= 0x000fffffffffffffLL;
214	iy = (iy & 0x000fffffffffffffLL) \| 0x0010000000000000LL;
215	mult64(ix, iy, &mh, &ml);
216	nh = mh;
217	nl = ml;
218	w = -1;
219	if (nh) {
220	while (nh) { nh >>= 1; w++;}
221	w += 64;
222	} else
223	while (nl) { nl >>= 1; w++;}
224
225	/* FIXME: use guard bits */
226	exp += w - 1022 - 52 * 2;
227	if (exp > 0)
228	ix = (rshift64(mh, ml, w - 52) & 0x000fffffffffffffLL)
229	\| ((long long)exp << 52);
230	else if (exp + 51 >= 0)
231	ix = rshift64(mh, ml, w - 51 - exp) & 0x000fffffffffffffLL;
232	else
233	ix = 0;
234
235	ix \|= (hx ^ hy) & 0x8000000000000000LL;
236	return ix;
237	}
238
239	/* ix - iy where iy: denormal and ix, iy >= 0 */
240	static int denormal_subf1(unsigned int ix, unsigned int iy)
241	{
242	int frac;
243	int exp;
244
245	if (ix < 0x00800000)
246	return ix - iy;
247
248	exp = (ix & 0x7f800000) >> 23;
249	if (exp - 1 > 31)
250	return ix;
251	iy >>= exp - 1;
252	if (iy == 0)
253	return ix;
254
255	frac = (ix & 0x007fffff) \| 0x00800000;
256	frac -= iy;
257	while (frac < 0x00800000) {
258	if (--exp == 0)
259	return frac;
260	frac <<= 1;
261	}
262
263	return (exp << 23) \| (frac & 0x007fffff);
264	}
265
266	/* ix + iy where iy: denormal and ix, iy >= 0 */
267	static int denormal_addf1(unsigned int ix, unsigned int iy)
268	{
269	int frac;
270	int exp;
271
272	if (ix < 0x00800000)
273	return ix + iy;
274
275	exp = (ix & 0x7f800000) >> 23;
276	if (exp - 1 > 31)
277	return ix;
278	iy >>= exp - 1;
279	if (iy == 0)
280	return ix;
281
282	frac = (ix & 0x007fffff) \| 0x00800000;
283	frac += iy;
284	if (frac >= 0x01000000) {
285	frac >>= 1;
286	++exp;
287	}
288
289	return (exp << 23) \| (frac & 0x007fffff);
290	}
291
292	static int denormal_addf(int hx, int hy)
293	{
294	unsigned int ix, iy;
295	int sign;
296
297	if ((hx ^ hy) & 0x80000000) {
298	sign = hx & 0x80000000;
299	ix = hx & 0x7fffffff;
300	iy = hy & 0x7fffffff;
301	if (iy < 0x00800000) {
302	ix = denormal_subf1(ix, iy);
303	if (ix < 0) {
304	ix = -ix;
305	sign ^= 0x80000000;
306	}
307	} else {
308	ix = denormal_subf1(iy, ix);
309	sign ^= 0x80000000;
310	}
311	} else {
312	sign = hx & 0x80000000;
313	ix = hx & 0x7fffffff;
314	iy = hy & 0x7fffffff;
315	if (iy < 0x00800000)
316	ix = denormal_addf1(ix, iy);
317	else
318	ix = denormal_addf1(iy, ix);
319	}
320
321	return sign \| ix;
322	}
323
324	/* ix - iy where iy: denormal and ix, iy >= 0 */
325	static long long denormal_subd1(unsigned long long ix, unsigned long long iy)
326	{
327	long long frac;
328	int exp;
329
330	if (ix < 0x0010000000000000LL)
331	return ix - iy;
332
333	exp = (ix & 0x7ff0000000000000LL) >> 52;
334	if (exp - 1 > 63)
335	return ix;
336	iy >>= exp - 1;
337	if (iy == 0)
338	return ix;
339
340	frac = (ix & 0x000fffffffffffffLL) \| 0x0010000000000000LL;
341	frac -= iy;
342	while (frac < 0x0010000000000000LL) {
343	if (--exp == 0)
344	return frac;
345	frac <<= 1;
346	}
347
348	return ((long long)exp << 52) \| (frac & 0x000fffffffffffffLL);
349	}
350
351	/* ix + iy where iy: denormal and ix, iy >= 0 */
352	static long long denormal_addd1(unsigned long long ix, unsigned long long iy)
353	{
354	long long frac;
355	long long exp;
356
357	if (ix < 0x0010000000000000LL)
358	return ix + iy;
359
360	exp = (ix & 0x7ff0000000000000LL) >> 52;
361	if (exp - 1 > 63)
362	return ix;
363	iy >>= exp - 1;
364	if (iy == 0)
365	return ix;
366
367	frac = (ix & 0x000fffffffffffffLL) \| 0x0010000000000000LL;
368	frac += iy;
369	if (frac >= 0x0020000000000000LL) {
370	frac >>= 1;
371	++exp;
372	}
373
374	return (exp << 52) \| (frac & 0x000fffffffffffffLL);
375	}
376
377	static long long denormal_addd(long long hx, long long hy)
378	{
379	unsigned long long ix, iy;
380	long long sign;
381
382	if ((hx ^ hy) & 0x8000000000000000LL) {
383	sign = hx & 0x8000000000000000LL;
384	ix = hx & 0x7fffffffffffffffLL;
385	iy = hy & 0x7fffffffffffffffLL;
386	if (iy < 0x0010000000000000LL) {
387	ix = denormal_subd1(ix, iy);
388	if (ix < 0) {
389	ix = -ix;
390	sign ^= 0x8000000000000000LL;
391	}
392	} else {
393	ix = denormal_subd1(iy, ix);
394	sign ^= 0x8000000000000000LL;
395	}
396	} else {
397	sign = hx & 0x8000000000000000LL;
398	ix = hx & 0x7fffffffffffffffLL;
399	iy = hy & 0x7fffffffffffffffLL;
400	if (iy < 0x0010000000000000LL)
401	ix = denormal_addd1(ix, iy);
402	else
403	ix = denormal_addd1(iy, ix);
404	}
405
406	return sign \| ix;
407	}
408
409	/**
410	* denormal_to_double - Given denormalized float number,
411	* store double float
412	*
413	* @fpu: Pointer to sh_fpu_hard structure
414	* @n: Index to FP register
415	*/
416	static void
417	denormal_to_double (struct sh_fpu_hard_struct *fpu, int n)
418	{
419	unsigned long du, dl;
420	unsigned long x = fpu->fpul;
421	int exp = 1023 - 126;
422
423	if (x != 0 && (x & 0x7f800000) == 0) {
424	du = (x & 0x80000000);
425	while ((x & 0x00800000) == 0) {
426	x <<= 1;
427	exp--;
428	}
429	x &= 0x007fffff;
430	du \|= (exp << 20) \| (x >> 3);
431	dl = x << 29;
432
433	fpu->fp_regs[n] = du;
434	fpu->fp_regs[n+1] = dl;
435	}
436	}
437
438	/**
439	* ieee_fpe_handler - Handle denormalized number exception
440	*
441	* @regs: Pointer to register structure
442	*
443	* Returns 1 when it's handled (should not cause exception).
444	*/
445	static int
446	ieee_fpe_handler (struct pt_regs *regs)
447	{
448	unsigned short insn = (unsigned short ) regs->pc;
449	unsigned short finsn;
450	unsigned long nextpc;
451	int nib[4] = {
452	(insn >> 12) & 0xf,
453	(insn >> 8) & 0xf,
454	(insn >> 4) & 0xf,
455	insn & 0xf};
456
457	if (nib[0] == 0xb \|\|
458	(nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb)) /* bsr & jsr */
459	regs->pr = regs->pc + 4;
460	if (nib[0] == 0xa \|\| nib[0] == 0xb) { /* bra & bsr */
461	nextpc = regs->pc + 4 + ((short) ((insn & 0xfff) << 4) >> 3);
462	finsn = (unsigned short ) (regs->pc + 2);
463	} else if (nib[0] == 0x8 && nib[1] == 0xd) { /* bt/s */
464	if (regs->sr & 1)
465	nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
466	else
467	nextpc = regs->pc + 4;
468	finsn = (unsigned short ) (regs->pc + 2);
469	} else if (nib[0] == 0x8 && nib[1] == 0xf) { /* bf/s */
470	if (regs->sr & 1)
471	nextpc = regs->pc + 4;
472	else
473	nextpc = regs->pc + 4 + ((char) (insn & 0xff) << 1);
474	finsn = (unsigned short ) (regs->pc + 2);
475	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
476	(nib[2] == 0x0 \|\| nib[2] == 0x2)) { /* jmp & jsr */
477	nextpc = regs->regs[nib[1]];
478	finsn = (unsigned short ) (regs->pc + 2);
479	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
480	(nib[2] == 0x0 \|\| nib[2] == 0x2)) { /* braf & bsrf */
481	nextpc = regs->pc + 4 + regs->regs[nib[1]];
482	finsn = (unsigned short ) (regs->pc + 2);
483	} else if (insn == 0x000b) { /* rts */
484	nextpc = regs->pr;
485	finsn = (unsigned short ) (regs->pc + 2);
486	} else {
487	nextpc = regs->pc + 2;
488	finsn = insn;
489	}
490
491	#define FPSCR_FPU_ERROR (1 << 17)
492
493	if ((finsn & 0xf1ff) == 0xf0ad) { /* fcnvsd */
494	struct task_struct *tsk = current;
495
496	if ((tsk->thread.fpu.hard.fpscr & FPSCR_FPU_ERROR)) {
497	/* FPU error */
498	denormal_to_double (&tsk->thread.fpu.hard,
499	(finsn >> 8) & 0xf);
500	} else
501	return 0;
502
503	regs->pc = nextpc;
504	return 1;
505	} else if ((finsn & 0xf00f) == 0xf002) { /* fmul */
506	struct task_struct *tsk = current;
507	int fpscr;
508	int n, m, prec;
509	unsigned int hx, hy;
510
511	n = (finsn >> 8) & 0xf;
512	m = (finsn >> 4) & 0xf;
513	hx = tsk->thread.fpu.hard.fp_regs[n];
514	hy = tsk->thread.fpu.hard.fp_regs[m];
515	fpscr = tsk->thread.fpu.hard.fpscr;
516	prec = fpscr & (1 << 19);
517
518	if ((fpscr & FPSCR_FPU_ERROR)
519	&& (prec && ((hx & 0x7fffffff) < 0x00100000
520	\|\| (hy & 0x7fffffff) < 0x00100000))) {
521	long long llx, lly;
522
523	/* FPU error because of denormal */
524	llx = ((long long) hx << 32)
525	\| tsk->thread.fpu.hard.fp_regs[n+1];
526	lly = ((long long) hy << 32)
527	\| tsk->thread.fpu.hard.fp_regs[m+1];
528	if ((hx & 0x7fffffff) >= 0x00100000)
529	llx = denormal_muld(lly, llx);
530	else
531	llx = denormal_muld(llx, lly);
532	tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
533	tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
534	} else if ((fpscr & FPSCR_FPU_ERROR)
535	&& (!prec && ((hx & 0x7fffffff) < 0x00800000
536	\|\| (hy & 0x7fffffff) < 0x00800000))) {
537	/* FPU error because of denormal */
538	if ((hx & 0x7fffffff) >= 0x00800000)
539	hx = denormal_mulf(hy, hx);
540	else
541	hx = denormal_mulf(hx, hy);
542	tsk->thread.fpu.hard.fp_regs[n] = hx;
543	} else
544	return 0;
545
546	regs->pc = nextpc;
547	return 1;
548	} else if ((finsn & 0xf00e) == 0xf000) { /* fadd, fsub */
549	struct task_struct *tsk = current;
550	int fpscr;
551	int n, m, prec;
552	unsigned int hx, hy;
553
554	n = (finsn >> 8) & 0xf;
555	m = (finsn >> 4) & 0xf;
556	hx = tsk->thread.fpu.hard.fp_regs[n];
557	hy = tsk->thread.fpu.hard.fp_regs[m];
558	fpscr = tsk->thread.fpu.hard.fpscr;
559	prec = fpscr & (1 << 19);
560
561	if ((fpscr & FPSCR_FPU_ERROR)
562	&& (prec && ((hx & 0x7fffffff) < 0x00100000
563	\|\| (hy & 0x7fffffff) < 0x00100000))) {
564	long long llx, lly;
565
566	/* FPU error because of denormal */
567	llx = ((long long) hx << 32)
568	\| tsk->thread.fpu.hard.fp_regs[n+1];
569	lly = ((long long) hy << 32)
570	\| tsk->thread.fpu.hard.fp_regs[m+1];
571	if ((finsn & 0xf00f) == 0xf000)
572	llx = denormal_addd(llx, lly);
573	else
574	llx = denormal_addd(llx, lly ^ (1LL << 63));
575	tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
576	tsk->thread.fpu.hard.fp_regs[n+1] = llx & 0xffffffff;
577	} else if ((fpscr & FPSCR_FPU_ERROR)
578	&& (!prec && ((hx & 0x7fffffff) < 0x00800000
579	\|\| (hy & 0x7fffffff) < 0x00800000))) {
580	/* FPU error because of denormal */
581	if ((finsn & 0xf00f) == 0xf000)
582	hx = denormal_addf(hx, hy);
583	else
584	hx = denormal_addf(hx, hy ^ 0x80000000);
585	tsk->thread.fpu.hard.fp_regs[n] = hx;
586	} else
587	return 0;
588
589	regs->pc = nextpc;
590	return 1;
591	}
592
593	return 0;
594	}
595
596	BUILD_TRAP_HANDLER(fpu_error)
597	{
598	struct task_struct *tsk = current;
599	TRAP_HANDLER_DECL;
600
601	save_fpu(tsk, regs);
602	if (ieee_fpe_handler(regs)) {
603	tsk->thread.fpu.hard.fpscr &=
604	~(FPSCR_CAUSE_MASK \| FPSCR_FLAG_MASK);
605	grab_fpu(regs);
606	restore_fpu(tsk);
607	set_tsk_thread_flag(tsk, TIF_USEDFPU);
608	return;
609	}
610
611	force_sig(SIGFPE, tsk);
612	}
613
614	BUILD_TRAP_HANDLER(fpu_state_restore)
615	{
616	struct task_struct *tsk = current;
617	TRAP_HANDLER_DECL;
618
619	grab_fpu(regs);
620	if (!user_mode(regs)) {
621	printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
622	return;
623	}
624
625	if (used_math()) {
626	/* Using the FPU again. */
627	restore_fpu(tsk);
628	} else {
629	/* First time FPU user. */
630	fpu_init();
631	set_used_math();
632	}
633	set_tsk_thread_flag(tsk, TIF_USEDFPU);
634	}