Merge branch 'efi-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / arch / powerpc / kernel / vecemu.c
CommitLineData
b2441318 1// SPDX-License-Identifier: GPL-2.0
1da177e4
LT
2/*
3 * Routines to emulate some Altivec/VMX instructions, specifically
4 * those that can trap when given denormalized operands in Java mode.
5 */
6#include <linux/kernel.h>
7#include <linux/errno.h>
8#include <linux/sched.h>
9#include <asm/ptrace.h>
10#include <asm/processor.h>
d647b210 11#include <asm/switch_to.h>
7c0f6ba6 12#include <linux/uaccess.h>
1da177e4
LT
13
14/* Functions in vector.S */
15extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
16extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
17extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
18extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
19extern void vrefp(vector128 *dst, vector128 *src);
20extern void vrsqrtefp(vector128 *dst, vector128 *src);
21extern void vexptep(vector128 *dst, vector128 *src);
22
23static unsigned int exp2s[8] = {
24 0x800000,
25 0x8b95c2,
26 0x9837f0,
27 0xa5fed7,
28 0xb504f3,
29 0xc5672a,
30 0xd744fd,
31 0xeac0c7
32};
33
34/*
35 * Computes an estimate of 2^x. The `s' argument is the 32-bit
36 * single-precision floating-point representation of x.
37 */
38static unsigned int eexp2(unsigned int s)
39{
40 int exp, pwr;
41 unsigned int mant, frac;
42
43 /* extract exponent field from input */
44 exp = ((s >> 23) & 0xff) - 127;
45 if (exp > 7) {
46 /* check for NaN input */
47 if (exp == 128 && (s & 0x7fffff) != 0)
48 return s | 0x400000; /* return QNaN */
49 /* 2^-big = 0, 2^+big = +Inf */
50 return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
51 }
52 if (exp < -23)
53 return 0x3f800000; /* 1.0 */
54
55 /* convert to fixed point integer in 9.23 representation */
56 pwr = (s & 0x7fffff) | 0x800000;
57 if (exp > 0)
58 pwr <<= exp;
59 else
60 pwr >>= -exp;
61 if (s & 0x80000000)
62 pwr = -pwr;
63
64 /* extract integer part, which becomes exponent part of result */
65 exp = (pwr >> 23) + 126;
66 if (exp >= 254)
67 return 0x7f800000;
68 if (exp < -23)
69 return 0;
70
71 /* table lookup on top 3 bits of fraction to get mantissa */
72 mant = exp2s[(pwr >> 20) & 7];
73
74 /* linear interpolation using remaining 20 bits of fraction */
75 asm("mulhwu %0,%1,%2" : "=r" (frac)
76 : "r" (pwr << 12), "r" (0x172b83ff));
77 asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
78 mant += frac;
79
80 if (exp >= 0)
81 return mant + (exp << 23);
82
83 /* denormalized result */
84 exp = -exp;
85 mant += 1 << (exp - 1);
86 return mant >> exp;
87}
88
89/*
90 * Computes an estimate of log_2(x). The `s' argument is the 32-bit
91 * single-precision floating-point representation of x.
92 */
93static unsigned int elog2(unsigned int s)
94{
95 int exp, mant, lz, frac;
96
97 exp = s & 0x7f800000;
98 mant = s & 0x7fffff;
99 if (exp == 0x7f800000) { /* Inf or NaN */
100 if (mant != 0)
101 s |= 0x400000; /* turn NaN into QNaN */
102 return s;
103 }
104 if ((exp | mant) == 0) /* +0 or -0 */
105 return 0xff800000; /* return -Inf */
106
107 if (exp == 0) {
108 /* denormalized */
109 asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
110 mant <<= lz - 8;
111 exp = (-118 - lz) << 23;
112 } else {
113 mant |= 0x800000;
114 exp -= 127 << 23;
115 }
116
117 if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
118 exp |= 0x400000; /* 0.5 * 2^23 */
119 asm("mulhwu %0,%1,%2" : "=r" (mant)
120 : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
121 }
122 if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
123 exp |= 0x200000; /* 0.25 * 2^23 */
124 asm("mulhwu %0,%1,%2" : "=r" (mant)
125 : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
126 }
127 if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
128 exp |= 0x100000; /* 0.125 * 2^23 */
129 asm("mulhwu %0,%1,%2" : "=r" (mant)
130 : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
131 }
132 if (mant > 0x800000) { /* 1.0 * 2^23 */
133 /* calculate (mant - 1) * 1.381097463 */
134 /* 1.381097463 == 0.125 / (2^0.125 - 1) */
135 asm("mulhwu %0,%1,%2" : "=r" (frac)
136 : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
137 exp += frac;
138 }
139 s = exp & 0x80000000;
140 if (exp != 0) {
141 if (s)
142 exp = -exp;
143 asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
144 lz = 8 - lz;
145 if (lz > 0)
146 exp >>= lz;
147 else if (lz < 0)
148 exp <<= -lz;
149 s += ((lz + 126) << 23) + exp;
150 }
151 return s;
152}
153
154#define VSCR_SAT 1
155
156static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
157{
158 int exp, mant;
159
160 exp = (x >> 23) & 0xff;
161 mant = x & 0x7fffff;
162 if (exp == 255 && mant != 0)
163 return 0; /* NaN -> 0 */
164 exp = exp - 127 + scale;
165 if (exp < 0)
166 return 0; /* round towards zero */
167 if (exp >= 31) {
168 /* saturate, unless the result would be -2^31 */
169 if (x + (scale << 23) != 0xcf000000)
170 *vscrp |= VSCR_SAT;
171 return (x & 0x80000000)? 0x80000000: 0x7fffffff;
172 }
173 mant |= 0x800000;
174 mant = (mant << 7) >> (30 - exp);
175 return (x & 0x80000000)? -mant: mant;
176}
177
178static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
179{
180 int exp;
181 unsigned int mant;
182
183 exp = (x >> 23) & 0xff;
184 mant = x & 0x7fffff;
185 if (exp == 255 && mant != 0)
186 return 0; /* NaN -> 0 */
187 exp = exp - 127 + scale;
188 if (exp < 0)
189 return 0; /* round towards zero */
190 if (x & 0x80000000) {
191 /* negative => saturate to 0 */
192 *vscrp |= VSCR_SAT;
193 return 0;
194 }
195 if (exp >= 32) {
196 /* saturate */
197 *vscrp |= VSCR_SAT;
198 return 0xffffffff;
199 }
200 mant |= 0x800000;
201 mant = (mant << 8) >> (31 - exp);
202 return mant;
203}
204
205/* Round to floating integer, towards 0 */
206static unsigned int rfiz(unsigned int x)
207{
208 int exp;
209
210 exp = ((x >> 23) & 0xff) - 127;
211 if (exp == 128 && (x & 0x7fffff) != 0)
212 return x | 0x400000; /* NaN -> make it a QNaN */
213 if (exp >= 23)
214 return x; /* it's an integer already (or Inf) */
215 if (exp < 0)
216 return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
217 return x & ~(0x7fffff >> exp);
218}
219
220/* Round to floating integer, towards +/- Inf */
221static unsigned int rfii(unsigned int x)
222{
223 int exp, mask;
224
225 exp = ((x >> 23) & 0xff) - 127;
226 if (exp == 128 && (x & 0x7fffff) != 0)
227 return x | 0x400000; /* NaN -> make it a QNaN */
228 if (exp >= 23)
229 return x; /* it's an integer already (or Inf) */
230 if ((x & 0x7fffffff) == 0)
231 return x; /* +/-0 -> +/-0 */
232 if (exp < 0)
233 /* 0 < |x| < 1.0 rounds to +/- 1.0 */
234 return (x & 0x80000000) | 0x3f800000;
235 mask = 0x7fffff >> exp;
236 /* mantissa overflows into exponent - that's OK,
237 it can't overflow into the sign bit */
238 return (x + mask) & ~mask;
239}
240
241/* Round to floating integer, to nearest */
242static unsigned int rfin(unsigned int x)
243{
244 int exp, half;
245
246 exp = ((x >> 23) & 0xff) - 127;
247 if (exp == 128 && (x & 0x7fffff) != 0)
248 return x | 0x400000; /* NaN -> make it a QNaN */
249 if (exp >= 23)
250 return x; /* it's an integer already (or Inf) */
251 if (exp < -1)
252 return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
253 if (exp == -1)
254 /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
255 return (x & 0x80000000) | 0x3f800000;
256 half = 0x400000 >> exp;
257 /* add 0.5 to the magnitude and chop off the fraction bits */
258 return (x + half) & ~(0x7fffff >> exp);
259}
260
261int emulate_altivec(struct pt_regs *regs)
262{
263 unsigned int instr, i;
264 unsigned int va, vb, vc, vd;
265 vector128 *vrs;
266
267 if (get_user(instr, (unsigned int __user *) regs->nip))
268 return -EFAULT;
269 if ((instr >> 26) != 4)
270 return -EINVAL; /* not an altivec instruction */
271 vd = (instr >> 21) & 0x1f;
272 va = (instr >> 16) & 0x1f;
273 vb = (instr >> 11) & 0x1f;
274 vc = (instr >> 6) & 0x1f;
275
de79f7b9 276 vrs = current->thread.vr_state.vr;
1da177e4
LT
277 switch (instr & 0x3f) {
278 case 10:
279 switch (vc) {
280 case 0: /* vaddfp */
281 vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
282 break;
283 case 1: /* vsubfp */
284 vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
285 break;
286 case 4: /* vrefp */
287 vrefp(&vrs[vd], &vrs[vb]);
288 break;
289 case 5: /* vrsqrtefp */
290 vrsqrtefp(&vrs[vd], &vrs[vb]);
291 break;
292 case 6: /* vexptefp */
293 for (i = 0; i < 4; ++i)
294 vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
295 break;
296 case 7: /* vlogefp */
297 for (i = 0; i < 4; ++i)
298 vrs[vd].u[i] = elog2(vrs[vb].u[i]);
299 break;
300 case 8: /* vrfin */
301 for (i = 0; i < 4; ++i)
302 vrs[vd].u[i] = rfin(vrs[vb].u[i]);
303 break;
304 case 9: /* vrfiz */
305 for (i = 0; i < 4; ++i)
306 vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
307 break;
308 case 10: /* vrfip */
309 for (i = 0; i < 4; ++i) {
310 u32 x = vrs[vb].u[i];
311 x = (x & 0x80000000)? rfiz(x): rfii(x);
312 vrs[vd].u[i] = x;
313 }
314 break;
315 case 11: /* vrfim */
316 for (i = 0; i < 4; ++i) {
317 u32 x = vrs[vb].u[i];
318 x = (x & 0x80000000)? rfii(x): rfiz(x);
319 vrs[vd].u[i] = x;
320 }
321 break;
322 case 14: /* vctuxs */
323 for (i = 0; i < 4; ++i)
324 vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
de79f7b9 325 &current->thread.vr_state.vscr.u[3]);
1da177e4
LT
326 break;
327 case 15: /* vctsxs */
328 for (i = 0; i < 4; ++i)
329 vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
de79f7b9 330 &current->thread.vr_state.vscr.u[3]);
1da177e4
LT
331 break;
332 default:
333 return -EINVAL;
334 }
335 break;
336 case 46: /* vmaddfp */
337 vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
338 break;
339 case 47: /* vnmsubfp */
340 vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
341 break;
342 default:
343 return -EINVAL;
344 }
345
346 return 0;
347}