Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /*---------------------------------------------------------------------------+ |
2 | | poly_sin.c | | |
3 | | | | |
4 | | Computation of an approximation of the sin function and the cosine | | |
5 | | function by a polynomial. | | |
6 | | | | |
7 | | Copyright (C) 1992,1993,1994,1997,1999 | | |
8 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, Australia | | |
9 | | E-mail billm@melbpc.org.au | | |
10 | | | | |
11 | | | | |
12 | +---------------------------------------------------------------------------*/ | |
13 | ||
14 | ||
15 | #include "exception.h" | |
16 | #include "reg_constant.h" | |
17 | #include "fpu_emu.h" | |
18 | #include "fpu_system.h" | |
19 | #include "control_w.h" | |
20 | #include "poly.h" | |
21 | ||
22 | ||
23 | #define N_COEFF_P 4 | |
24 | #define N_COEFF_N 4 | |
25 | ||
26 | static const unsigned long long pos_terms_l[N_COEFF_P] = | |
27 | { | |
28 | 0xaaaaaaaaaaaaaaabLL, | |
29 | 0x00d00d00d00cf906LL, | |
30 | 0x000006b99159a8bbLL, | |
31 | 0x000000000d7392e6LL | |
32 | }; | |
33 | ||
34 | static const unsigned long long neg_terms_l[N_COEFF_N] = | |
35 | { | |
36 | 0x2222222222222167LL, | |
37 | 0x0002e3bc74aab624LL, | |
38 | 0x0000000b09229062LL, | |
39 | 0x00000000000c7973LL | |
40 | }; | |
41 | ||
42 | ||
43 | ||
44 | #define N_COEFF_PH 4 | |
45 | #define N_COEFF_NH 4 | |
46 | static const unsigned long long pos_terms_h[N_COEFF_PH] = | |
47 | { | |
48 | 0x0000000000000000LL, | |
49 | 0x05b05b05b05b0406LL, | |
50 | 0x000049f93edd91a9LL, | |
51 | 0x00000000c9c9ed62LL | |
52 | }; | |
53 | ||
54 | static const unsigned long long neg_terms_h[N_COEFF_NH] = | |
55 | { | |
56 | 0xaaaaaaaaaaaaaa98LL, | |
57 | 0x001a01a01a019064LL, | |
58 | 0x0000008f76c68a77LL, | |
59 | 0x0000000000d58f5eLL | |
60 | }; | |
61 | ||
62 | ||
63 | /*--- poly_sine() -----------------------------------------------------------+ | |
64 | | | | |
65 | +---------------------------------------------------------------------------*/ | |
66 | void poly_sine(FPU_REG *st0_ptr) | |
67 | { | |
68 | int exponent, echange; | |
69 | Xsig accumulator, argSqrd, argTo4; | |
70 | unsigned long fix_up, adj; | |
71 | unsigned long long fixed_arg; | |
72 | FPU_REG result; | |
73 | ||
74 | exponent = exponent(st0_ptr); | |
75 | ||
76 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; | |
77 | ||
78 | /* Split into two ranges, for arguments below and above 1.0 */ | |
79 | /* The boundary between upper and lower is approx 0.88309101259 */ | |
80 | if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xe21240aa)) ) | |
81 | { | |
82 | /* The argument is <= 0.88309101259 */ | |
83 | ||
84 | argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; argSqrd.lsw = 0; | |
85 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | |
86 | shr_Xsig(&argSqrd, 2*(-1-exponent)); | |
87 | argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; | |
88 | argTo4.lsw = argSqrd.lsw; | |
89 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
90 | ||
91 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, | |
92 | N_COEFF_N-1); | |
93 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
94 | negate_Xsig(&accumulator); | |
95 | ||
96 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, | |
97 | N_COEFF_P-1); | |
98 | ||
99 | shr_Xsig(&accumulator, 2); /* Divide by four */ | |
100 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | |
101 | ||
102 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
103 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
104 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
105 | ||
106 | /* Divide by four, FPU_REG compatible, etc */ | |
107 | exponent = 3*exponent; | |
108 | ||
109 | /* The minimum exponent difference is 3 */ | |
110 | shr_Xsig(&accumulator, exponent(st0_ptr) - exponent); | |
111 | ||
112 | negate_Xsig(&accumulator); | |
113 | XSIG_LL(accumulator) += significand(st0_ptr); | |
114 | ||
115 | echange = round_Xsig(&accumulator); | |
116 | ||
117 | setexponentpos(&result, exponent(st0_ptr) + echange); | |
118 | } | |
119 | else | |
120 | { | |
121 | /* The argument is > 0.88309101259 */ | |
122 | /* We use sin(st(0)) = cos(pi/2-st(0)) */ | |
123 | ||
124 | fixed_arg = significand(st0_ptr); | |
125 | ||
126 | if ( exponent == 0 ) | |
127 | { | |
128 | /* The argument is >= 1.0 */ | |
129 | ||
130 | /* Put the binary point at the left. */ | |
131 | fixed_arg <<= 1; | |
132 | } | |
133 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | |
134 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | |
135 | /* There is a special case which arises due to rounding, to fix here. */ | |
136 | if ( fixed_arg == 0xffffffffffffffffLL ) | |
137 | fixed_arg = 0; | |
138 | ||
139 | XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; | |
140 | mul64_Xsig(&argSqrd, &fixed_arg); | |
141 | ||
142 | XSIG_LL(argTo4) = XSIG_LL(argSqrd); argTo4.lsw = argSqrd.lsw; | |
143 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
144 | ||
145 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, | |
146 | N_COEFF_NH-1); | |
147 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
148 | negate_Xsig(&accumulator); | |
149 | ||
150 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, | |
151 | N_COEFF_PH-1); | |
152 | negate_Xsig(&accumulator); | |
153 | ||
154 | mul64_Xsig(&accumulator, &fixed_arg); | |
155 | mul64_Xsig(&accumulator, &fixed_arg); | |
156 | ||
157 | shr_Xsig(&accumulator, 3); | |
158 | negate_Xsig(&accumulator); | |
159 | ||
160 | add_Xsig_Xsig(&accumulator, &argSqrd); | |
161 | ||
162 | shr_Xsig(&accumulator, 1); | |
163 | ||
164 | accumulator.lsw |= 1; /* A zero accumulator here would cause problems */ | |
165 | negate_Xsig(&accumulator); | |
166 | ||
167 | /* The basic computation is complete. Now fix the answer to | |
168 | compensate for the error due to the approximation used for | |
169 | pi/2 | |
170 | */ | |
171 | ||
172 | /* This has an exponent of -65 */ | |
173 | fix_up = 0x898cc517; | |
174 | /* The fix-up needs to be improved for larger args */ | |
175 | if ( argSqrd.msw & 0xffc00000 ) | |
176 | { | |
177 | /* Get about 32 bit precision in these: */ | |
178 | fix_up -= mul_32_32(0x898cc517, argSqrd.msw) / 6; | |
179 | } | |
180 | fix_up = mul_32_32(fix_up, LL_MSW(fixed_arg)); | |
181 | ||
182 | adj = accumulator.lsw; /* temp save */ | |
183 | accumulator.lsw -= fix_up; | |
184 | if ( accumulator.lsw > adj ) | |
185 | XSIG_LL(accumulator) --; | |
186 | ||
187 | echange = round_Xsig(&accumulator); | |
188 | ||
189 | setexponentpos(&result, echange - 1); | |
190 | } | |
191 | ||
192 | significand(&result) = XSIG_LL(accumulator); | |
193 | setsign(&result, getsign(st0_ptr)); | |
194 | FPU_copy_to_reg0(&result, TAG_Valid); | |
195 | ||
196 | #ifdef PARANOID | |
197 | if ( (exponent(&result) >= 0) | |
198 | && (significand(&result) > 0x8000000000000000LL) ) | |
199 | { | |
200 | EXCEPTION(EX_INTERNAL|0x150); | |
201 | } | |
202 | #endif /* PARANOID */ | |
203 | ||
204 | } | |
205 | ||
206 | ||
207 | ||
208 | /*--- poly_cos() ------------------------------------------------------------+ | |
209 | | | | |
210 | +---------------------------------------------------------------------------*/ | |
211 | void poly_cos(FPU_REG *st0_ptr) | |
212 | { | |
213 | FPU_REG result; | |
214 | long int exponent, exp2, echange; | |
215 | Xsig accumulator, argSqrd, fix_up, argTo4; | |
216 | unsigned long long fixed_arg; | |
217 | ||
218 | #ifdef PARANOID | |
219 | if ( (exponent(st0_ptr) > 0) | |
220 | || ((exponent(st0_ptr) == 0) | |
221 | && (significand(st0_ptr) > 0xc90fdaa22168c234LL)) ) | |
222 | { | |
223 | EXCEPTION(EX_Invalid); | |
224 | FPU_copy_to_reg0(&CONST_QNaN, TAG_Special); | |
225 | return; | |
226 | } | |
227 | #endif /* PARANOID */ | |
228 | ||
229 | exponent = exponent(st0_ptr); | |
230 | ||
231 | accumulator.lsw = accumulator.midw = accumulator.msw = 0; | |
232 | ||
233 | if ( (exponent < -1) || ((exponent == -1) && (st0_ptr->sigh <= 0xb00d6f54)) ) | |
234 | { | |
235 | /* arg is < 0.687705 */ | |
236 | ||
237 | argSqrd.msw = st0_ptr->sigh; argSqrd.midw = st0_ptr->sigl; | |
238 | argSqrd.lsw = 0; | |
239 | mul64_Xsig(&argSqrd, &significand(st0_ptr)); | |
240 | ||
241 | if ( exponent < -1 ) | |
242 | { | |
243 | /* shift the argument right by the required places */ | |
244 | shr_Xsig(&argSqrd, 2*(-1-exponent)); | |
245 | } | |
246 | ||
247 | argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; | |
248 | argTo4.lsw = argSqrd.lsw; | |
249 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
250 | ||
251 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_h, | |
252 | N_COEFF_NH-1); | |
253 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
254 | negate_Xsig(&accumulator); | |
255 | ||
256 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_h, | |
257 | N_COEFF_PH-1); | |
258 | negate_Xsig(&accumulator); | |
259 | ||
260 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
261 | mul64_Xsig(&accumulator, &significand(st0_ptr)); | |
262 | shr_Xsig(&accumulator, -2*(1+exponent)); | |
263 | ||
264 | shr_Xsig(&accumulator, 3); | |
265 | negate_Xsig(&accumulator); | |
266 | ||
267 | add_Xsig_Xsig(&accumulator, &argSqrd); | |
268 | ||
269 | shr_Xsig(&accumulator, 1); | |
270 | ||
271 | /* It doesn't matter if accumulator is all zero here, the | |
272 | following code will work ok */ | |
273 | negate_Xsig(&accumulator); | |
274 | ||
275 | if ( accumulator.lsw & 0x80000000 ) | |
276 | XSIG_LL(accumulator) ++; | |
277 | if ( accumulator.msw == 0 ) | |
278 | { | |
279 | /* The result is 1.0 */ | |
280 | FPU_copy_to_reg0(&CONST_1, TAG_Valid); | |
281 | return; | |
282 | } | |
283 | else | |
284 | { | |
285 | significand(&result) = XSIG_LL(accumulator); | |
286 | ||
287 | /* will be a valid positive nr with expon = -1 */ | |
288 | setexponentpos(&result, -1); | |
289 | } | |
290 | } | |
291 | else | |
292 | { | |
293 | fixed_arg = significand(st0_ptr); | |
294 | ||
295 | if ( exponent == 0 ) | |
296 | { | |
297 | /* The argument is >= 1.0 */ | |
298 | ||
299 | /* Put the binary point at the left. */ | |
300 | fixed_arg <<= 1; | |
301 | } | |
302 | /* pi/2 in hex is: 1.921fb54442d18469 898CC51701B839A2 52049C1 */ | |
303 | fixed_arg = 0x921fb54442d18469LL - fixed_arg; | |
304 | /* There is a special case which arises due to rounding, to fix here. */ | |
305 | if ( fixed_arg == 0xffffffffffffffffLL ) | |
306 | fixed_arg = 0; | |
307 | ||
308 | exponent = -1; | |
309 | exp2 = -1; | |
310 | ||
311 | /* A shift is needed here only for a narrow range of arguments, | |
312 | i.e. for fixed_arg approx 2^-32, but we pick up more... */ | |
313 | if ( !(LL_MSW(fixed_arg) & 0xffff0000) ) | |
314 | { | |
315 | fixed_arg <<= 16; | |
316 | exponent -= 16; | |
317 | exp2 -= 16; | |
318 | } | |
319 | ||
320 | XSIG_LL(argSqrd) = fixed_arg; argSqrd.lsw = 0; | |
321 | mul64_Xsig(&argSqrd, &fixed_arg); | |
322 | ||
323 | if ( exponent < -1 ) | |
324 | { | |
325 | /* shift the argument right by the required places */ | |
326 | shr_Xsig(&argSqrd, 2*(-1-exponent)); | |
327 | } | |
328 | ||
329 | argTo4.msw = argSqrd.msw; argTo4.midw = argSqrd.midw; | |
330 | argTo4.lsw = argSqrd.lsw; | |
331 | mul_Xsig_Xsig(&argTo4, &argTo4); | |
332 | ||
333 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), neg_terms_l, | |
334 | N_COEFF_N-1); | |
335 | mul_Xsig_Xsig(&accumulator, &argSqrd); | |
336 | negate_Xsig(&accumulator); | |
337 | ||
338 | polynomial_Xsig(&accumulator, &XSIG_LL(argTo4), pos_terms_l, | |
339 | N_COEFF_P-1); | |
340 | ||
341 | shr_Xsig(&accumulator, 2); /* Divide by four */ | |
342 | accumulator.msw |= 0x80000000; /* Add 1.0 */ | |
343 | ||
344 | mul64_Xsig(&accumulator, &fixed_arg); | |
345 | mul64_Xsig(&accumulator, &fixed_arg); | |
346 | mul64_Xsig(&accumulator, &fixed_arg); | |
347 | ||
348 | /* Divide by four, FPU_REG compatible, etc */ | |
349 | exponent = 3*exponent; | |
350 | ||
351 | /* The minimum exponent difference is 3 */ | |
352 | shr_Xsig(&accumulator, exp2 - exponent); | |
353 | ||
354 | negate_Xsig(&accumulator); | |
355 | XSIG_LL(accumulator) += fixed_arg; | |
356 | ||
357 | /* The basic computation is complete. Now fix the answer to | |
358 | compensate for the error due to the approximation used for | |
359 | pi/2 | |
360 | */ | |
361 | ||
362 | /* This has an exponent of -65 */ | |
363 | XSIG_LL(fix_up) = 0x898cc51701b839a2ll; | |
364 | fix_up.lsw = 0; | |
365 | ||
366 | /* The fix-up needs to be improved for larger args */ | |
367 | if ( argSqrd.msw & 0xffc00000 ) | |
368 | { | |
369 | /* Get about 32 bit precision in these: */ | |
370 | fix_up.msw -= mul_32_32(0x898cc517, argSqrd.msw) / 2; | |
371 | fix_up.msw += mul_32_32(0x898cc517, argTo4.msw) / 24; | |
372 | } | |
373 | ||
374 | exp2 += norm_Xsig(&accumulator); | |
375 | shr_Xsig(&accumulator, 1); /* Prevent overflow */ | |
376 | exp2++; | |
377 | shr_Xsig(&fix_up, 65 + exp2); | |
378 | ||
379 | add_Xsig_Xsig(&accumulator, &fix_up); | |
380 | ||
381 | echange = round_Xsig(&accumulator); | |
382 | ||
383 | setexponentpos(&result, exp2 + echange); | |
384 | significand(&result) = XSIG_LL(accumulator); | |
385 | } | |
386 | ||
387 | FPU_copy_to_reg0(&result, TAG_Valid); | |
388 | ||
389 | #ifdef PARANOID | |
390 | if ( (exponent(&result) >= 0) | |
391 | && (significand(&result) > 0x8000000000000000LL) ) | |
392 | { | |
393 | EXCEPTION(EX_INTERNAL|0x151); | |
394 | } | |
395 | #endif /* PARANOID */ | |
396 | ||
397 | } |