Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* Machine-dependent software floating-point definitions. PPC version. |
2 | Copyright (C) 1997 Free Software Foundation, Inc. | |
3 | This file is part of the GNU C Library. | |
4 | ||
5 | The GNU C Library is free software; you can redistribute it and/or | |
6 | modify it under the terms of the GNU Library General Public License as | |
7 | published by the Free Software Foundation; either version 2 of the | |
8 | License, or (at your option) any later version. | |
9 | ||
10 | The GNU C Library is distributed in the hope that it will be useful, | |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | Library General Public License for more details. | |
14 | ||
15 | You should have received a copy of the GNU Library General Public | |
16 | License along with the GNU C Library; see the file COPYING.LIB. If | |
17 | not, write to the Free Software Foundation, Inc., | |
18 | 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
19 | ||
20 | Actually, this is a PPC (32bit) version, written based on the | |
21 | i386, sparc, and sparc64 versions, by me, | |
22 | Peter Maydell (pmaydell@chiark.greenend.org.uk). | |
23 | Comments are by and large also mine, although they may be inaccurate. | |
24 | ||
25 | In picking out asm fragments I've gone with the lowest common | |
26 | denominator, which also happens to be the hardware I have :-> | |
27 | That is, a SPARC without hardware multiply and divide. | |
28 | */ | |
29 | ||
30 | /* basic word size definitions */ | |
31 | #define _FP_W_TYPE_SIZE 32 | |
e60f57f5 KG |
32 | #define _FP_W_TYPE unsigned int |
33 | #define _FP_WS_TYPE signed int | |
34 | #define _FP_I_TYPE int | |
1da177e4 LT |
35 | |
36 | #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) | |
37 | #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) | |
38 | #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) | |
39 | ||
40 | /* You can optionally code some things like addition in asm. For | |
41 | * example, i386 defines __FP_FRAC_ADD_2 as asm. If you don't | |
42 | * then you get a fragment of C code [if you change an #ifdef 0 | |
43 | * in op-2.h] or a call to add_ssaaaa (see below). | |
44 | * Good places to look for asm fragments to use are gcc and glibc. | |
45 | * gcc's longlong.h is useful. | |
46 | */ | |
47 | ||
48 | /* We need to know how to multiply and divide. If the host word size | |
49 | * is >= 2*fracbits you can use FP_MUL_MEAT_n_imm(t,R,X,Y) which | |
50 | * codes the multiply with whatever gcc does to 'a * b'. | |
51 | * _FP_MUL_MEAT_n_wide(t,R,X,Y,f) is used when you have an asm | |
52 | * function that can multiply two 1W values and get a 2W result. | |
53 | * Otherwise you're stuck with _FP_MUL_MEAT_n_hard(t,R,X,Y) which | |
54 | * does bitshifting to avoid overflow. | |
55 | * For division there is FP_DIV_MEAT_n_imm(t,R,X,Y,f) for word size | |
56 | * >= 2*fracbits, where f is either _FP_DIV_HELP_imm or | |
57 | * _FP_DIV_HELP_ldiv (see op-1.h). | |
58 | * _FP_DIV_MEAT_udiv() is if you have asm to do 2W/1W => (1W, 1W). | |
59 | * [GCC and glibc have longlong.h which has the asm macro udiv_qrnnd | |
60 | * to do this.] | |
61 | * In general, 'n' is the number of words required to hold the type, | |
62 | * and 't' is either S, D or Q for single/double/quad. | |
63 | * -- PMM | |
64 | */ | |
65 | /* Example: SPARC64: | |
66 | * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_imm(S,R,X,Y) | |
67 | * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm) | |
68 | * #define _FP_MUL_MEAT_Q(R,X,Y) _FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm) | |
69 | * | |
70 | * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm) | |
71 | * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_1_udiv(D,R,X,Y) | |
72 | * #define _FP_DIV_MEAT_Q(R,X,Y) _FP_DIV_MEAT_2_udiv_64(Q,R,X,Y) | |
73 | * | |
74 | * Example: i386: | |
75 | * #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(S,R,X,Y,_i386_mul_32_64) | |
76 | * #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(D,R,X,Y,_i386_mul_32_64) | |
77 | * | |
78 | * #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv(S,R,X,Y,_i386_div_64_32) | |
79 | * #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv_64(D,R,X,Y) | |
80 | */ | |
81 | ||
d2b194ed KG |
82 | #define _FP_MUL_MEAT_S(R,X,Y) _FP_MUL_MEAT_1_wide(_FP_WFRACBITS_S,R,X,Y,umul_ppmm) |
83 | #define _FP_MUL_MEAT_D(R,X,Y) _FP_MUL_MEAT_2_wide(_FP_WFRACBITS_D,R,X,Y,umul_ppmm) | |
1da177e4 | 84 | |
cf030336 | 85 | #define _FP_DIV_MEAT_S(R,X,Y) _FP_DIV_MEAT_1_udiv_norm(S,R,X,Y) |
d2b194ed | 86 | #define _FP_DIV_MEAT_D(R,X,Y) _FP_DIV_MEAT_2_udiv(D,R,X,Y) |
1da177e4 LT |
87 | |
88 | /* These macros define what NaN looks like. They're supposed to expand to | |
89 | * a comma-separated set of 32bit unsigned ints that encode NaN. | |
90 | */ | |
d2b194ed KG |
91 | #define _FP_NANFRAC_S ((_FP_QNANBIT_S << 1) - 1) |
92 | #define _FP_NANFRAC_D ((_FP_QNANBIT_D << 1) - 1), -1 | |
93 | #define _FP_NANFRAC_Q ((_FP_QNANBIT_Q << 1) - 1), -1, -1, -1 | |
94 | #define _FP_NANSIGN_S 0 | |
95 | #define _FP_NANSIGN_D 0 | |
96 | #define _FP_NANSIGN_Q 0 | |
1da177e4 LT |
97 | |
98 | #define _FP_KEEPNANFRACP 1 | |
99 | ||
6a800f36 LY |
100 | #ifdef FP_EX_BOOKE_E500_SPE |
101 | #define FP_EX_INEXACT (1 << 21) | |
102 | #define FP_EX_INVALID (1 << 20) | |
103 | #define FP_EX_DIVZERO (1 << 19) | |
104 | #define FP_EX_UNDERFLOW (1 << 18) | |
105 | #define FP_EX_OVERFLOW (1 << 17) | |
106 | #define FP_INHIBIT_RESULTS 0 | |
107 | ||
108 | #define __FPU_FPSCR (current->thread.spefscr) | |
109 | #define __FPU_ENABLED_EXC \ | |
110 | ({ \ | |
111 | (__FPU_FPSCR >> 2) & 0x1f; \ | |
112 | }) | |
113 | #else | |
d2b194ed KG |
114 | /* Exception flags. We use the bit positions of the appropriate bits |
115 | in the FPSCR, which also correspond to the FE_* bits. This makes | |
116 | everything easier ;-). */ | |
117 | #define FP_EX_INVALID (1 << (31 - 2)) | |
118 | #define FP_EX_INVALID_SNAN EFLAG_VXSNAN | |
119 | #define FP_EX_INVALID_ISI EFLAG_VXISI | |
120 | #define FP_EX_INVALID_IDI EFLAG_VXIDI | |
121 | #define FP_EX_INVALID_ZDZ EFLAG_VXZDZ | |
122 | #define FP_EX_INVALID_IMZ EFLAG_VXIMZ | |
123 | #define FP_EX_OVERFLOW (1 << (31 - 3)) | |
124 | #define FP_EX_UNDERFLOW (1 << (31 - 4)) | |
125 | #define FP_EX_DIVZERO (1 << (31 - 5)) | |
126 | #define FP_EX_INEXACT (1 << (31 - 6)) | |
127 | ||
de79f7b9 | 128 | #define __FPU_FPSCR (current->thread.fp_state.fpscr) |
6a800f36 LY |
129 | |
130 | /* We only actually write to the destination register | |
131 | * if exceptions signalled (if any) will not trap. | |
132 | */ | |
133 | #define __FPU_ENABLED_EXC \ | |
134 | ({ \ | |
135 | (__FPU_FPSCR >> 3) & 0x1f; \ | |
136 | }) | |
137 | ||
138 | #endif | |
139 | ||
463a8c01 LY |
140 | /* |
141 | * If one NaN is signaling and the other is not, | |
142 | * we choose that one, otherwise we choose X. | |
1da177e4 | 143 | */ |
463a8c01 LY |
144 | #define _FP_CHOOSENAN(fs, wc, R, X, Y, OP) \ |
145 | do { \ | |
146 | if ((_FP_FRAC_HIGH_RAW_##fs(Y) & _FP_QNANBIT_##fs) \ | |
147 | && !(_FP_FRAC_HIGH_RAW_##fs(X) & _FP_QNANBIT_##fs)) \ | |
148 | { \ | |
149 | R##_s = X##_s; \ | |
150 | _FP_FRAC_COPY_##wc(R,X); \ | |
151 | } \ | |
152 | else \ | |
153 | { \ | |
154 | R##_s = Y##_s; \ | |
155 | _FP_FRAC_COPY_##wc(R,Y); \ | |
156 | } \ | |
157 | R##_c = FP_CLS_NAN; \ | |
1da177e4 LT |
158 | } while (0) |
159 | ||
160 | ||
1da177e4 LT |
161 | #include <linux/kernel.h> |
162 | #include <linux/sched.h> | |
163 | ||
1da177e4 LT |
164 | #define __FPU_TRAP_P(bits) \ |
165 | ((__FPU_ENABLED_EXC & (bits)) != 0) | |
166 | ||
167 | #define __FP_PACK_S(val,X) \ | |
168 | ({ int __exc = _FP_PACK_CANONICAL(S,1,X); \ | |
169 | if(!__exc || !__FPU_TRAP_P(__exc)) \ | |
d2b194ed | 170 | _FP_PACK_RAW_1_P(S,val,X); \ |
1da177e4 LT |
171 | __exc; \ |
172 | }) | |
173 | ||
174 | #define __FP_PACK_D(val,X) \ | |
d2b194ed KG |
175 | do { \ |
176 | _FP_PACK_CANONICAL(D, 2, X); \ | |
177 | if (!FP_CUR_EXCEPTIONS || !__FPU_TRAP_P(FP_CUR_EXCEPTIONS)) \ | |
178 | _FP_PACK_RAW_2_P(D, val, X); \ | |
179 | } while (0) | |
180 | ||
181 | #define __FP_PACK_DS(val,X) \ | |
182 | do { \ | |
183 | FP_DECL_S(__X); \ | |
184 | FP_CONV(S, D, 1, 2, __X, X); \ | |
185 | _FP_PACK_CANONICAL(S, 1, __X); \ | |
186 | if (!FP_CUR_EXCEPTIONS || !__FPU_TRAP_P(FP_CUR_EXCEPTIONS)) { \ | |
187 | _FP_UNPACK_CANONICAL(S, 1, __X); \ | |
188 | FP_CONV(D, S, 2, 1, X, __X); \ | |
189 | _FP_PACK_CANONICAL(D, 2, X); \ | |
190 | if (!FP_CUR_EXCEPTIONS || !__FPU_TRAP_P(FP_CUR_EXCEPTIONS)) \ | |
191 | _FP_PACK_RAW_2_P(D, val, X); \ | |
192 | } \ | |
193 | } while (0) | |
1da177e4 LT |
194 | |
195 | /* Obtain the current rounding mode. */ | |
196 | #define FP_ROUNDMODE \ | |
197 | ({ \ | |
198 | __FPU_FPSCR & 0x3; \ | |
199 | }) | |
200 | ||
201 | /* the asm fragments go here: all these are taken from glibc-2.0.5's | |
202 | * stdlib/longlong.h | |
203 | */ | |
204 | ||
205 | #include <linux/types.h> | |
206 | #include <asm/byteorder.h> | |
207 | ||
208 | /* add_ssaaaa is used in op-2.h and should be equivalent to | |
209 | * #define add_ssaaaa(sh,sl,ah,al,bh,bl) (sh = ah+bh+ (( sl = al+bl) < al)) | |
210 | * add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, | |
211 | * high_addend_2, low_addend_2) adds two UWtype integers, composed by | |
212 | * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 | |
213 | * respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow | |
214 | * (i.e. carry out) is not stored anywhere, and is lost. | |
215 | */ | |
b682c869 | 216 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |
1da177e4 LT |
217 | do { \ |
218 | if (__builtin_constant_p (bh) && (bh) == 0) \ | |
b682c869 JS |
219 | __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ |
220 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ | |
221 | else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ | |
222 | __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ | |
223 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ | |
1da177e4 | 224 | else \ |
b682c869 JS |
225 | __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ |
226 | : "=r" (sh), "=&r" (sl) \ | |
227 | : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ | |
1da177e4 LT |
228 | } while (0) |
229 | ||
230 | /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to | |
231 | * #define sub_ddmmss(sh, sl, ah, al, bh, bl) (sh = ah-bh - ((sl = al-bl) > al)) | |
232 | * sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, | |
233 | * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, | |
234 | * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and | |
235 | * LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE | |
236 | * and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, | |
237 | * and is lost. | |
238 | */ | |
b682c869 | 239 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |
1da177e4 LT |
240 | do { \ |
241 | if (__builtin_constant_p (ah) && (ah) == 0) \ | |
b682c869 JS |
242 | __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ |
243 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ | |
244 | else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ | |
245 | __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ | |
246 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ | |
1da177e4 | 247 | else if (__builtin_constant_p (bh) && (bh) == 0) \ |
b682c869 JS |
248 | __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ |
249 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ | |
250 | else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ | |
251 | __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ | |
252 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ | |
1da177e4 | 253 | else \ |
b682c869 JS |
254 | __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ |
255 | : "=r" (sh), "=&r" (sl) \ | |
256 | : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ | |
1da177e4 LT |
257 | } while (0) |
258 | ||
259 | /* asm fragments for mul and div */ | |
260 | ||
261 | /* umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two | |
262 | * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype | |
263 | * word product in HIGH_PROD and LOW_PROD. | |
264 | */ | |
b682c869 | 265 | #define umul_ppmm(ph, pl, m0, m1) \ |
1da177e4 LT |
266 | do { \ |
267 | USItype __m0 = (m0), __m1 = (m1); \ | |
b682c869 | 268 | __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ |
1da177e4 LT |
269 | (pl) = __m0 * __m1; \ |
270 | } while (0) | |
271 | ||
272 | /* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, | |
273 | * denominator) divides a UDWtype, composed by the UWtype integers | |
274 | * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient | |
275 | * in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less | |
276 | * than DENOMINATOR for correct operation. If, in addition, the most | |
277 | * significant bit of DENOMINATOR must be 1, then the pre-processor symbol | |
278 | * UDIV_NEEDS_NORMALIZATION is defined to 1. | |
279 | */ | |
b682c869 | 280 | #define udiv_qrnnd(q, r, n1, n0, d) \ |
1da177e4 | 281 | do { \ |
b682c869 JS |
282 | UWtype __d1, __d0, __q1, __q0; \ |
283 | UWtype __r1, __r0, __m; \ | |
1da177e4 LT |
284 | __d1 = __ll_highpart (d); \ |
285 | __d0 = __ll_lowpart (d); \ | |
286 | \ | |
287 | __r1 = (n1) % __d1; \ | |
288 | __q1 = (n1) / __d1; \ | |
289 | __m = (UWtype) __q1 * __d0; \ | |
290 | __r1 = __r1 * __ll_B | __ll_highpart (n0); \ | |
291 | if (__r1 < __m) \ | |
292 | { \ | |
293 | __q1--, __r1 += (d); \ | |
b682c869 | 294 | if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ |
1da177e4 LT |
295 | if (__r1 < __m) \ |
296 | __q1--, __r1 += (d); \ | |
297 | } \ | |
298 | __r1 -= __m; \ | |
299 | \ | |
300 | __r0 = __r1 % __d1; \ | |
301 | __q0 = __r1 / __d1; \ | |
302 | __m = (UWtype) __q0 * __d0; \ | |
303 | __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ | |
304 | if (__r0 < __m) \ | |
305 | { \ | |
306 | __q0--, __r0 += (d); \ | |
307 | if (__r0 >= (d)) \ | |
308 | if (__r0 < __m) \ | |
309 | __q0--, __r0 += (d); \ | |
310 | } \ | |
311 | __r0 -= __m; \ | |
312 | \ | |
313 | (q) = (UWtype) __q1 * __ll_B | __q0; \ | |
314 | (r) = __r0; \ | |
315 | } while (0) | |
316 | ||
317 | #define UDIV_NEEDS_NORMALIZATION 1 | |
318 | ||
319 | #define abort() \ | |
320 | return 0 | |
321 | ||
13da9e20 LT |
322 | #ifdef __BIG_ENDIAN |
323 | #define __BYTE_ORDER __BIG_ENDIAN | |
324 | #else | |
325 | #define __BYTE_ORDER __LITTLE_ENDIAN | |
326 | #endif | |
327 | ||
1da177e4 LT |
328 | /* Exception flags. */ |
329 | #define EFLAG_INVALID (1 << (31 - 2)) | |
330 | #define EFLAG_OVERFLOW (1 << (31 - 3)) | |
331 | #define EFLAG_UNDERFLOW (1 << (31 - 4)) | |
332 | #define EFLAG_DIVZERO (1 << (31 - 5)) | |
333 | #define EFLAG_INEXACT (1 << (31 - 6)) | |
334 | ||
335 | #define EFLAG_VXSNAN (1 << (31 - 7)) | |
336 | #define EFLAG_VXISI (1 << (31 - 8)) | |
337 | #define EFLAG_VXIDI (1 << (31 - 9)) | |
338 | #define EFLAG_VXZDZ (1 << (31 - 10)) | |
339 | #define EFLAG_VXIMZ (1 << (31 - 11)) | |
340 | #define EFLAG_VXVC (1 << (31 - 12)) | |
341 | #define EFLAG_VXSOFT (1 << (31 - 21)) | |
342 | #define EFLAG_VXSQRT (1 << (31 - 22)) | |
343 | #define EFLAG_VXCVI (1 << (31 - 23)) |