sparc64: Fix VIS emulation bugs
[linux-2.6-block.git] / arch / sparc64 / kernel / visemul.c
CommitLineData
0c51ed93
DM
1/* visemul.c: Emulation of VIS instructions.
2 *
3 * Copyright (C) 2006 David S. Miller (davem@davemloft.net)
4 */
5#include <linux/kernel.h>
6#include <linux/errno.h>
7#include <linux/thread_info.h>
8
9#include <asm/ptrace.h>
10#include <asm/pstate.h>
11#include <asm/system.h>
12#include <asm/fpumacro.h>
13#include <asm/uaccess.h>
14
15/* OPF field of various VIS instructions. */
16
17/* 000111011 - four 16-bit packs */
18#define FPACK16_OPF 0x03b
19
20/* 000111010 - two 32-bit packs */
21#define FPACK32_OPF 0x03a
22
23/* 000111101 - four 16-bit packs */
24#define FPACKFIX_OPF 0x03d
25
26/* 001001101 - four 16-bit expands */
27#define FEXPAND_OPF 0x04d
28
29/* 001001011 - two 32-bit merges */
30#define FPMERGE_OPF 0x04b
31
32/* 000110001 - 8-by-16-bit partitoned product */
33#define FMUL8x16_OPF 0x031
34
35/* 000110011 - 8-by-16-bit upper alpha partitioned product */
36#define FMUL8x16AU_OPF 0x033
37
38/* 000110101 - 8-by-16-bit lower alpha partitioned product */
39#define FMUL8x16AL_OPF 0x035
40
41/* 000110110 - upper 8-by-16-bit partitioned product */
42#define FMUL8SUx16_OPF 0x036
43
44/* 000110111 - lower 8-by-16-bit partitioned product */
45#define FMUL8ULx16_OPF 0x037
46
47/* 000111000 - upper 8-by-16-bit partitioned product */
48#define FMULD8SUx16_OPF 0x038
49
50/* 000111001 - lower unsigned 8-by-16-bit partitioned product */
51#define FMULD8ULx16_OPF 0x039
52
53/* 000101000 - four 16-bit compare; set rd if src1 > src2 */
54#define FCMPGT16_OPF 0x028
55
56/* 000101100 - two 32-bit compare; set rd if src1 > src2 */
57#define FCMPGT32_OPF 0x02c
58
59/* 000100000 - four 16-bit compare; set rd if src1 <= src2 */
60#define FCMPLE16_OPF 0x020
61
62/* 000100100 - two 32-bit compare; set rd if src1 <= src2 */
63#define FCMPLE32_OPF 0x024
64
65/* 000100010 - four 16-bit compare; set rd if src1 != src2 */
66#define FCMPNE16_OPF 0x022
67
68/* 000100110 - two 32-bit compare; set rd if src1 != src2 */
69#define FCMPNE32_OPF 0x026
70
71/* 000101010 - four 16-bit compare; set rd if src1 == src2 */
72#define FCMPEQ16_OPF 0x02a
73
74/* 000101110 - two 32-bit compare; set rd if src1 == src2 */
75#define FCMPEQ32_OPF 0x02e
76
77/* 000000000 - Eight 8-bit edge boundary processing */
78#define EDGE8_OPF 0x000
79
80/* 000000001 - Eight 8-bit edge boundary processing, no CC */
81#define EDGE8N_OPF 0x001
82
83/* 000000010 - Eight 8-bit edge boundary processing, little-endian */
84#define EDGE8L_OPF 0x002
85
86/* 000000011 - Eight 8-bit edge boundary processing, little-endian, no CC */
87#define EDGE8LN_OPF 0x003
88
89/* 000000100 - Four 16-bit edge boundary processing */
90#define EDGE16_OPF 0x004
91
92/* 000000101 - Four 16-bit edge boundary processing, no CC */
93#define EDGE16N_OPF 0x005
94
95/* 000000110 - Four 16-bit edge boundary processing, little-endian */
96#define EDGE16L_OPF 0x006
97
98/* 000000111 - Four 16-bit edge boundary processing, little-endian, no CC */
99#define EDGE16LN_OPF 0x007
100
101/* 000001000 - Two 32-bit edge boundary processing */
102#define EDGE32_OPF 0x008
103
104/* 000001001 - Two 32-bit edge boundary processing, no CC */
105#define EDGE32N_OPF 0x009
106
107/* 000001010 - Two 32-bit edge boundary processing, little-endian */
108#define EDGE32L_OPF 0x00a
109
110/* 000001011 - Two 32-bit edge boundary processing, little-endian, no CC */
111#define EDGE32LN_OPF 0x00b
112
113/* 000111110 - distance between 8 8-bit components */
114#define PDIST_OPF 0x03e
115
116/* 000010000 - convert 8-bit 3-D address to blocked byte address */
117#define ARRAY8_OPF 0x010
118
119/* 000010010 - convert 16-bit 3-D address to blocked byte address */
120#define ARRAY16_OPF 0x012
121
122/* 000010100 - convert 32-bit 3-D address to blocked byte address */
123#define ARRAY32_OPF 0x014
124
125/* 000011001 - Set the GSR.MASK field in preparation for a BSHUFFLE */
126#define BMASK_OPF 0x019
127
128/* 001001100 - Permute bytes as specified by GSR.MASK */
129#define BSHUFFLE_OPF 0x04c
130
0c51ed93
DM
131#define VIS_OPF_SHIFT 5
132#define VIS_OPF_MASK (0x1ff << VIS_OPF_SHIFT)
133
726c12f5 134#define RS1(INSN) (((INSN) >> 14) & 0x1f)
0c51ed93
DM
135#define RS2(INSN) (((INSN) >> 0) & 0x1f)
136#define RD(INSN) (((INSN) >> 25) & 0x1f)
137
138static inline void maybe_flush_windows(unsigned int rs1, unsigned int rs2,
139 unsigned int rd, int from_kernel)
140{
141 if (rs2 >= 16 || rs1 >= 16 || rd >= 16) {
142 if (from_kernel != 0)
143 __asm__ __volatile__("flushw");
144 else
145 flushw_user();
146 }
147}
148
149static unsigned long fetch_reg(unsigned int reg, struct pt_regs *regs)
150{
151 unsigned long value;
152
153 if (reg < 16)
154 return (!reg ? 0 : regs->u_regs[reg]);
155 if (regs->tstate & TSTATE_PRIV) {
156 struct reg_window *win;
157 win = (struct reg_window *)(regs->u_regs[UREG_FP] + STACK_BIAS);
158 value = win->locals[reg - 16];
159 } else if (test_thread_flag(TIF_32BIT)) {
160 struct reg_window32 __user *win32;
161 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
162 get_user(value, &win32->locals[reg - 16]);
163 } else {
164 struct reg_window __user *win;
165 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
166 get_user(value, &win->locals[reg - 16]);
167 }
168 return value;
169}
170
171static inline unsigned long __user *__fetch_reg_addr_user(unsigned int reg,
172 struct pt_regs *regs)
173{
174 BUG_ON(reg < 16);
175 BUG_ON(regs->tstate & TSTATE_PRIV);
176
177 if (test_thread_flag(TIF_32BIT)) {
178 struct reg_window32 __user *win32;
179 win32 = (struct reg_window32 __user *)((unsigned long)((u32)regs->u_regs[UREG_FP]));
180 return (unsigned long __user *)&win32->locals[reg - 16];
181 } else {
182 struct reg_window __user *win;
183 win = (struct reg_window __user *)(regs->u_regs[UREG_FP] + STACK_BIAS);
184 return &win->locals[reg - 16];
185 }
186}
187
188static inline unsigned long *__fetch_reg_addr_kern(unsigned int reg,
189 struct pt_regs *regs)
190{
191 BUG_ON(reg >= 16);
192 BUG_ON(regs->tstate & TSTATE_PRIV);
193
194 return &regs->u_regs[reg];
195}
196
197static void store_reg(struct pt_regs *regs, unsigned long val, unsigned long rd)
198{
199 if (rd < 16) {
200 unsigned long *rd_kern = __fetch_reg_addr_kern(rd, regs);
201
202 *rd_kern = val;
203 } else {
204 unsigned long __user *rd_user = __fetch_reg_addr_user(rd, regs);
205
206 if (test_thread_flag(TIF_32BIT))
207 __put_user((u32)val, (u32 __user *)rd_user);
208 else
209 __put_user(val, rd_user);
210 }
211}
212
213static inline unsigned long fpd_regval(struct fpustate *f,
214 unsigned int insn_regnum)
215{
216 insn_regnum = (((insn_regnum & 1) << 5) |
217 (insn_regnum & 0x1e));
218
219 return *(unsigned long *) &f->regs[insn_regnum];
220}
221
222static inline unsigned long *fpd_regaddr(struct fpustate *f,
223 unsigned int insn_regnum)
224{
225 insn_regnum = (((insn_regnum & 1) << 5) |
226 (insn_regnum & 0x1e));
227
228 return (unsigned long *) &f->regs[insn_regnum];
229}
230
231static inline unsigned int fps_regval(struct fpustate *f,
232 unsigned int insn_regnum)
233{
234 return f->regs[insn_regnum];
235}
236
237static inline unsigned int *fps_regaddr(struct fpustate *f,
238 unsigned int insn_regnum)
239{
240 return &f->regs[insn_regnum];
241}
242
243struct edge_tab {
244 u16 left, right;
245};
7e0b1e61 246static struct edge_tab edge8_tab[8] = {
0c51ed93
DM
247 { 0xff, 0x80 },
248 { 0x7f, 0xc0 },
249 { 0x3f, 0xe0 },
250 { 0x1f, 0xf0 },
251 { 0x0f, 0xf8 },
252 { 0x07, 0xfc },
253 { 0x03, 0xfe },
254 { 0x01, 0xff },
255};
7e0b1e61 256static struct edge_tab edge8_tab_l[8] = {
0c51ed93
DM
257 { 0xff, 0x01 },
258 { 0xfe, 0x03 },
259 { 0xfc, 0x07 },
260 { 0xf8, 0x0f },
261 { 0xf0, 0x1f },
262 { 0xe0, 0x3f },
263 { 0xc0, 0x7f },
264 { 0x80, 0xff },
265};
7e0b1e61 266static struct edge_tab edge16_tab[4] = {
0c51ed93
DM
267 { 0xf, 0x8 },
268 { 0x7, 0xc },
269 { 0x3, 0xe },
270 { 0x1, 0xf },
271};
7e0b1e61 272static struct edge_tab edge16_tab_l[4] = {
0c51ed93
DM
273 { 0xf, 0x1 },
274 { 0xe, 0x3 },
275 { 0xc, 0x7 },
276 { 0x8, 0xf },
277};
7e0b1e61 278static struct edge_tab edge32_tab[2] = {
0c51ed93
DM
279 { 0x3, 0x2 },
280 { 0x1, 0x3 },
281};
7e0b1e61 282static struct edge_tab edge32_tab_l[2] = {
0c51ed93
DM
283 { 0x3, 0x1 },
284 { 0x2, 0x3 },
285};
286
287static void edge(struct pt_regs *regs, unsigned int insn, unsigned int opf)
288{
289 unsigned long orig_rs1, rs1, orig_rs2, rs2, rd_val;
290 u16 left, right;
291
292 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
293 orig_rs1 = rs1 = fetch_reg(RS1(insn), regs);
294 orig_rs2 = rs2 = fetch_reg(RS2(insn), regs);
295
296 if (test_thread_flag(TIF_32BIT)) {
297 rs1 = rs1 & 0xffffffff;
298 rs2 = rs2 & 0xffffffff;
299 }
300 switch (opf) {
301 default:
302 case EDGE8_OPF:
303 case EDGE8N_OPF:
304 left = edge8_tab[rs1 & 0x7].left;
305 right = edge8_tab[rs2 & 0x7].right;
306 break;
307 case EDGE8L_OPF:
308 case EDGE8LN_OPF:
309 left = edge8_tab_l[rs1 & 0x7].left;
310 right = edge8_tab_l[rs2 & 0x7].right;
311 break;
312
313 case EDGE16_OPF:
314 case EDGE16N_OPF:
315 left = edge16_tab[(rs1 >> 1) & 0x3].left;
316 right = edge16_tab[(rs2 >> 1) & 0x3].right;
317 break;
318
319 case EDGE16L_OPF:
320 case EDGE16LN_OPF:
321 left = edge16_tab_l[(rs1 >> 1) & 0x3].left;
322 right = edge16_tab_l[(rs2 >> 1) & 0x3].right;
323 break;
324
325 case EDGE32_OPF:
326 case EDGE32N_OPF:
327 left = edge32_tab[(rs1 >> 2) & 0x1].left;
328 right = edge32_tab[(rs2 >> 2) & 0x1].right;
329 break;
330
331 case EDGE32L_OPF:
332 case EDGE32LN_OPF:
333 left = edge32_tab_l[(rs1 >> 2) & 0x1].left;
334 right = edge32_tab_l[(rs2 >> 2) & 0x1].right;
335 break;
336 };
337
338 if ((rs1 & ~0x7UL) == (rs2 & ~0x7UL))
339 rd_val = right & left;
340 else
341 rd_val = left;
342
343 store_reg(regs, rd_val, RD(insn));
344
345 switch (opf) {
346 case EDGE8_OPF:
347 case EDGE8L_OPF:
348 case EDGE16_OPF:
349 case EDGE16L_OPF:
350 case EDGE32_OPF:
351 case EDGE32L_OPF: {
352 unsigned long ccr, tstate;
353
354 __asm__ __volatile__("subcc %1, %2, %%g0\n\t"
355 "rd %%ccr, %0"
356 : "=r" (ccr)
357 : "r" (orig_rs1), "r" (orig_rs2)
358 : "cc");
359 tstate = regs->tstate & ~(TSTATE_XCC | TSTATE_ICC);
360 regs->tstate = tstate | (ccr << 32UL);
361 }
362 };
363}
364
365static void array(struct pt_regs *regs, unsigned int insn, unsigned int opf)
366{
367 unsigned long rs1, rs2, rd_val;
368 unsigned int bits, bits_mask;
369
370 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
371 rs1 = fetch_reg(RS1(insn), regs);
372 rs2 = fetch_reg(RS2(insn), regs);
373
374 bits = (rs2 > 5 ? 5 : rs2);
375 bits_mask = (1UL << bits) - 1UL;
376
377 rd_val = ((((rs1 >> 11) & 0x3) << 0) |
378 (((rs1 >> 33) & 0x3) << 2) |
379 (((rs1 >> 55) & 0x1) << 4) |
380 (((rs1 >> 13) & 0xf) << 5) |
381 (((rs1 >> 35) & 0xf) << 9) |
382 (((rs1 >> 56) & 0xf) << 13) |
383 (((rs1 >> 17) & bits_mask) << 17) |
384 (((rs1 >> 39) & bits_mask) << (17 + bits)) |
385 (((rs1 >> 60) & 0xf) << (17 + (2*bits))));
386
387 switch (opf) {
388 case ARRAY16_OPF:
389 rd_val <<= 1;
390 break;
391
392 case ARRAY32_OPF:
393 rd_val <<= 2;
394 };
395
396 store_reg(regs, rd_val, RD(insn));
397}
398
399static void bmask(struct pt_regs *regs, unsigned int insn)
400{
401 unsigned long rs1, rs2, rd_val, gsr;
402
403 maybe_flush_windows(RS1(insn), RS2(insn), RD(insn), 0);
404 rs1 = fetch_reg(RS1(insn), regs);
405 rs2 = fetch_reg(RS2(insn), regs);
406 rd_val = rs1 + rs2;
407
408 store_reg(regs, rd_val, RD(insn));
409
410 gsr = current_thread_info()->gsr[0] & 0xffffffff;
411 gsr |= rd_val << 32UL;
412 current_thread_info()->gsr[0] = gsr;
413}
414
415static void bshuffle(struct pt_regs *regs, unsigned int insn)
416{
417 struct fpustate *f = FPUSTATE;
418 unsigned long rs1, rs2, rd_val;
419 unsigned long bmask, i;
420
421 bmask = current_thread_info()->gsr[0] >> 32UL;
422
423 rs1 = fpd_regval(f, RS1(insn));
424 rs2 = fpd_regval(f, RS2(insn));
425
426 rd_val = 0UL;
427 for (i = 0; i < 8; i++) {
428 unsigned long which = (bmask >> (i * 4)) & 0xf;
429 unsigned long byte;
430
431 if (which < 8)
432 byte = (rs1 >> (which * 8)) & 0xff;
433 else
434 byte = (rs2 >> ((which-8)*8)) & 0xff;
435 rd_val |= (byte << (i * 8));
436 }
437
438 *fpd_regaddr(f, RD(insn)) = rd_val;
439}
440
441static void pdist(struct pt_regs *regs, unsigned int insn)
442{
443 struct fpustate *f = FPUSTATE;
444 unsigned long rs1, rs2, *rd, rd_val;
445 unsigned long i;
446
447 rs1 = fpd_regval(f, RS1(insn));
726c12f5 448 rs2 = fpd_regval(f, RS2(insn));
0c51ed93
DM
449 rd = fpd_regaddr(f, RD(insn));
450
451 rd_val = *rd;
452
453 for (i = 0; i < 8; i++) {
454 s16 s1, s2;
455
456 s1 = (rs1 >> (56 - (i * 8))) & 0xff;
457 s2 = (rs2 >> (56 - (i * 8))) & 0xff;
458
459 /* Absolute value of difference. */
460 s1 -= s2;
461 if (s1 < 0)
462 s1 = ~s1 + 1;
463
464 rd_val += s1;
465 }
466
467 *rd = rd_val;
468}
469
470static void pformat(struct pt_regs *regs, unsigned int insn, unsigned int opf)
471{
472 struct fpustate *f = FPUSTATE;
473 unsigned long rs1, rs2, gsr, scale, rd_val;
474
475 gsr = current_thread_info()->gsr[0];
476 scale = (gsr >> 3) & (opf == FPACK16_OPF ? 0xf : 0x1f);
477 switch (opf) {
478 case FPACK16_OPF: {
479 unsigned long byte;
480
481 rs2 = fpd_regval(f, RS2(insn));
482 rd_val = 0;
483 for (byte = 0; byte < 4; byte++) {
484 unsigned int val;
485 s16 src = (rs2 >> (byte * 16UL)) & 0xffffUL;
486 int scaled = src << scale;
487 int from_fixed = scaled >> 7;
488
489 val = ((from_fixed < 0) ?
490 0 :
491 (from_fixed > 255) ?
492 255 : from_fixed);
493
494 rd_val |= (val << (8 * byte));
495 }
496 *fps_regaddr(f, RD(insn)) = rd_val;
497 break;
498 }
499
500 case FPACK32_OPF: {
501 unsigned long word;
502
503 rs1 = fpd_regval(f, RS1(insn));
504 rs2 = fpd_regval(f, RS2(insn));
505 rd_val = (rs1 << 8) & ~(0x000000ff000000ffUL);
506 for (word = 0; word < 2; word++) {
507 unsigned long val;
508 s32 src = (rs2 >> (word * 32UL));
509 s64 scaled = src << scale;
510 s64 from_fixed = scaled >> 23;
511
512 val = ((from_fixed < 0) ?
513 0 :
514 (from_fixed > 255) ?
515 255 : from_fixed);
516
517 rd_val |= (val << (32 * word));
518 }
519 *fpd_regaddr(f, RD(insn)) = rd_val;
520 break;
521 }
522
523 case FPACKFIX_OPF: {
524 unsigned long word;
525
526 rs2 = fpd_regval(f, RS2(insn));
527
528 rd_val = 0;
529 for (word = 0; word < 2; word++) {
530 long val;
531 s32 src = (rs2 >> (word * 32UL));
532 s64 scaled = src << scale;
533 s64 from_fixed = scaled >> 16;
534
535 val = ((from_fixed < -32768) ?
536 -32768 :
537 (from_fixed > 32767) ?
538 32767 : from_fixed);
539
540 rd_val |= ((val & 0xffff) << (word * 16));
541 }
542 *fps_regaddr(f, RD(insn)) = rd_val;
543 break;
544 }
545
546 case FEXPAND_OPF: {
547 unsigned long byte;
548
549 rs2 = fps_regval(f, RS2(insn));
550
551 rd_val = 0;
552 for (byte = 0; byte < 4; byte++) {
553 unsigned long val;
554 u8 src = (rs2 >> (byte * 8)) & 0xff;
555
556 val = src << 4;
557
558 rd_val |= (val << (byte * 16));
559 }
560 *fpd_regaddr(f, RD(insn)) = rd_val;
561 break;
562 }
563
564 case FPMERGE_OPF: {
565 rs1 = fps_regval(f, RS1(insn));
566 rs2 = fps_regval(f, RS2(insn));
567
568 rd_val = (((rs2 & 0x000000ff) << 0) |
569 ((rs1 & 0x000000ff) << 8) |
570 ((rs2 & 0x0000ff00) << 8) |
571 ((rs1 & 0x0000ff00) << 16) |
572 ((rs2 & 0x00ff0000) << 16) |
573 ((rs1 & 0x00ff0000) << 24) |
574 ((rs2 & 0xff000000) << 24) |
575 ((rs1 & 0xff000000) << 32));
576 *fpd_regaddr(f, RD(insn)) = rd_val;
577 break;
578 }
579 };
580}
581
582static void pmul(struct pt_regs *regs, unsigned int insn, unsigned int opf)
583{
584 struct fpustate *f = FPUSTATE;
585 unsigned long rs1, rs2, rd_val;
586
587 switch (opf) {
588 case FMUL8x16_OPF: {
589 unsigned long byte;
590
591 rs1 = fps_regval(f, RS1(insn));
592 rs2 = fpd_regval(f, RS2(insn));
593
594 rd_val = 0;
595 for (byte = 0; byte < 4; byte++) {
596 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
597 s16 src2 = (rs2 >> (byte * 16)) & 0xffff;
598 u32 prod = src1 * src2;
599 u16 scaled = ((prod & 0x00ffff00) >> 8);
600
601 /* Round up. */
602 if (prod & 0x80)
603 scaled++;
604 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
605 }
606
607 *fpd_regaddr(f, RD(insn)) = rd_val;
608 break;
609 }
610
611 case FMUL8x16AU_OPF:
612 case FMUL8x16AL_OPF: {
613 unsigned long byte;
614 s16 src2;
615
616 rs1 = fps_regval(f, RS1(insn));
617 rs2 = fps_regval(f, RS2(insn));
618
619 rd_val = 0;
620 src2 = (rs2 >> (opf == FMUL8x16AU_OPF) ? 16 : 0);
621 for (byte = 0; byte < 4; byte++) {
622 u16 src1 = (rs1 >> (byte * 8)) & 0x00ff;
623 u32 prod = src1 * src2;
624 u16 scaled = ((prod & 0x00ffff00) >> 8);
625
626 /* Round up. */
627 if (prod & 0x80)
628 scaled++;
629 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
630 }
631
632 *fpd_regaddr(f, RD(insn)) = rd_val;
633 break;
634 }
635
636 case FMUL8SUx16_OPF:
637 case FMUL8ULx16_OPF: {
638 unsigned long byte, ushift;
639
640 rs1 = fpd_regval(f, RS1(insn));
641 rs2 = fpd_regval(f, RS2(insn));
642
643 rd_val = 0;
644 ushift = (opf == FMUL8SUx16_OPF) ? 8 : 0;
645 for (byte = 0; byte < 4; byte++) {
646 u16 src1;
647 s16 src2;
648 u32 prod;
649 u16 scaled;
650
651 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
652 src2 = ((rs2 >> (16 * byte)) & 0xffff);
653 prod = src1 * src2;
654 scaled = ((prod & 0x00ffff00) >> 8);
655
656 /* Round up. */
657 if (prod & 0x80)
658 scaled++;
659 rd_val |= ((scaled & 0xffffUL) << (byte * 16UL));
660 }
661
662 *fpd_regaddr(f, RD(insn)) = rd_val;
663 break;
664 }
665
666 case FMULD8SUx16_OPF:
667 case FMULD8ULx16_OPF: {
668 unsigned long byte, ushift;
669
670 rs1 = fps_regval(f, RS1(insn));
671 rs2 = fps_regval(f, RS2(insn));
672
673 rd_val = 0;
674 ushift = (opf == FMULD8SUx16_OPF) ? 8 : 0;
675 for (byte = 0; byte < 2; byte++) {
676 u16 src1;
677 s16 src2;
678 u32 prod;
679 u16 scaled;
680
681 src1 = ((rs1 >> ((16 * byte) + ushift)) & 0x00ff);
682 src2 = ((rs2 >> (16 * byte)) & 0xffff);
683 prod = src1 * src2;
684 scaled = ((prod & 0x00ffff00) >> 8);
685
686 /* Round up. */
687 if (prod & 0x80)
688 scaled++;
689 rd_val |= ((scaled & 0xffffUL) <<
690 ((byte * 32UL) + 7UL));
691 }
692 *fpd_regaddr(f, RD(insn)) = rd_val;
693 break;
694 }
695 };
696}
697
698static void pcmp(struct pt_regs *regs, unsigned int insn, unsigned int opf)
699{
700 struct fpustate *f = FPUSTATE;
701 unsigned long rs1, rs2, rd_val, i;
702
703 rs1 = fpd_regval(f, RS1(insn));
704 rs2 = fpd_regval(f, RS2(insn));
705
706 rd_val = 0;
707
708 switch (opf) {
709 case FCMPGT16_OPF:
710 for (i = 0; i < 4; i++) {
711 s16 a = (rs1 >> (i * 16)) & 0xffff;
712 s16 b = (rs2 >> (i * 16)) & 0xffff;
713
714 if (a > b)
715 rd_val |= 1 << i;
716 }
717 break;
718
719 case FCMPGT32_OPF:
720 for (i = 0; i < 2; i++) {
721 s32 a = (rs1 >> (i * 32)) & 0xffff;
722 s32 b = (rs2 >> (i * 32)) & 0xffff;
723
724 if (a > b)
725 rd_val |= 1 << i;
726 }
727 break;
728
729 case FCMPLE16_OPF:
730 for (i = 0; i < 4; i++) {
731 s16 a = (rs1 >> (i * 16)) & 0xffff;
732 s16 b = (rs2 >> (i * 16)) & 0xffff;
733
734 if (a <= b)
735 rd_val |= 1 << i;
736 }
737 break;
738
739 case FCMPLE32_OPF:
740 for (i = 0; i < 2; i++) {
741 s32 a = (rs1 >> (i * 32)) & 0xffff;
742 s32 b = (rs2 >> (i * 32)) & 0xffff;
743
744 if (a <= b)
745 rd_val |= 1 << i;
746 }
747 break;
748
749 case FCMPNE16_OPF:
750 for (i = 0; i < 4; i++) {
751 s16 a = (rs1 >> (i * 16)) & 0xffff;
752 s16 b = (rs2 >> (i * 16)) & 0xffff;
753
754 if (a != b)
755 rd_val |= 1 << i;
756 }
757 break;
758
759 case FCMPNE32_OPF:
760 for (i = 0; i < 2; i++) {
761 s32 a = (rs1 >> (i * 32)) & 0xffff;
762 s32 b = (rs2 >> (i * 32)) & 0xffff;
763
764 if (a != b)
765 rd_val |= 1 << i;
766 }
767 break;
768
769 case FCMPEQ16_OPF:
770 for (i = 0; i < 4; i++) {
771 s16 a = (rs1 >> (i * 16)) & 0xffff;
772 s16 b = (rs2 >> (i * 16)) & 0xffff;
773
774 if (a == b)
775 rd_val |= 1 << i;
776 }
777 break;
778
779 case FCMPEQ32_OPF:
780 for (i = 0; i < 2; i++) {
781 s32 a = (rs1 >> (i * 32)) & 0xffff;
782 s32 b = (rs2 >> (i * 32)) & 0xffff;
783
784 if (a == b)
785 rd_val |= 1 << i;
786 }
787 break;
788 };
789
790 maybe_flush_windows(0, 0, RD(insn), 0);
791 store_reg(regs, rd_val, RD(insn));
792}
793
794/* Emulate the VIS instructions which are not implemented in
795 * hardware on Niagara.
796 */
797int vis_emul(struct pt_regs *regs, unsigned int insn)
798{
799 unsigned long pc = regs->tpc;
800 unsigned int opf;
801
802 BUG_ON(regs->tstate & TSTATE_PRIV);
803
804 if (test_thread_flag(TIF_32BIT))
805 pc = (u32)pc;
806
807 if (get_user(insn, (u32 __user *) pc))
808 return -EFAULT;
809
0c51ed93
DM
810 opf = (insn & VIS_OPF_MASK) >> VIS_OPF_SHIFT;
811 switch (opf) {
812 default:
813 return -EINVAL;
814
815 /* Pixel Formatting Instructions. */
816 case FPACK16_OPF:
817 case FPACK32_OPF:
818 case FPACKFIX_OPF:
819 case FEXPAND_OPF:
820 case FPMERGE_OPF:
821 pformat(regs, insn, opf);
822 break;
823
824 /* Partitioned Multiply Instructions */
825 case FMUL8x16_OPF:
826 case FMUL8x16AU_OPF:
827 case FMUL8x16AL_OPF:
828 case FMUL8SUx16_OPF:
829 case FMUL8ULx16_OPF:
830 case FMULD8SUx16_OPF:
831 case FMULD8ULx16_OPF:
832 pmul(regs, insn, opf);
833 break;
834
835 /* Pixel Compare Instructions */
836 case FCMPGT16_OPF:
837 case FCMPGT32_OPF:
838 case FCMPLE16_OPF:
839 case FCMPLE32_OPF:
840 case FCMPNE16_OPF:
841 case FCMPNE32_OPF:
842 case FCMPEQ16_OPF:
843 case FCMPEQ32_OPF:
844 pcmp(regs, insn, opf);
845 break;
846
847 /* Edge Handling Instructions */
848 case EDGE8_OPF:
849 case EDGE8N_OPF:
850 case EDGE8L_OPF:
851 case EDGE8LN_OPF:
852 case EDGE16_OPF:
853 case EDGE16N_OPF:
854 case EDGE16L_OPF:
855 case EDGE16LN_OPF:
856 case EDGE32_OPF:
857 case EDGE32N_OPF:
858 case EDGE32L_OPF:
859 case EDGE32LN_OPF:
860 edge(regs, insn, opf);
861 break;
862
863 /* Pixel Component Distance */
864 case PDIST_OPF:
865 pdist(regs, insn);
866 break;
867
868 /* Three-Dimensional Array Addressing Instructions */
869 case ARRAY8_OPF:
870 case ARRAY16_OPF:
871 case ARRAY32_OPF:
872 array(regs, insn, opf);
873 break;
874
875 /* Byte Mask and Shuffle Instructions */
876 case BMASK_OPF:
877 bmask(regs, insn);
878 break;
879
880 case BSHUFFLE_OPF:
881 bshuffle(regs, insn);
882 break;
883 };
884
885 regs->tpc = regs->tnpc;
886 regs->tnpc += 4;
887 return 0;
888}