Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
2 | MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP | |
3 | M68000 Hi-Performance Microprocessor Division | |
4 | M68060 Software Package | |
5 | Production Release P1.00 -- October 10, 1994 | |
6 | ||
96de0e25 | 7 | M68060 Software Package Copyright © 1993, 1994 Motorola Inc. All rights reserved. |
1da177e4 LT |
8 | |
9 | THE SOFTWARE is provided on an "AS IS" basis and without warranty. | |
10 | To the maximum extent permitted by applicable law, | |
11 | MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED, | |
12 | INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE | |
13 | and any warranty against infringement with regard to the SOFTWARE | |
14 | (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials. | |
15 | ||
16 | To the maximum extent permitted by applicable law, | |
17 | IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER | |
18 | (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS, | |
19 | BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS) | |
20 | ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE. | |
21 | Motorola assumes no responsibility for the maintenance and support of the SOFTWARE. | |
22 | ||
23 | You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE | |
24 | so long as this entire notice is retained without alteration in any modified and/or | |
25 | redistributed versions, and that such modified versions are clearly identified as such. | |
26 | No licenses are granted by implication, estoppel or otherwise under any patents | |
27 | or trademarks of Motorola, Inc. | |
28 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
29 | # | |
30 | # lfptop.s: | |
31 | # This file is appended to the top of the 060ILSP package | |
32 | # and contains the entry points into the package. The user, in | |
33 | # effect, branches to one of the branch table entries located here. | |
34 | # | |
35 | ||
36 | bra.l _facoss_ | |
37 | short 0x0000 | |
38 | bra.l _facosd_ | |
39 | short 0x0000 | |
40 | bra.l _facosx_ | |
41 | short 0x0000 | |
42 | ||
43 | bra.l _fasins_ | |
44 | short 0x0000 | |
45 | bra.l _fasind_ | |
46 | short 0x0000 | |
47 | bra.l _fasinx_ | |
48 | short 0x0000 | |
49 | ||
50 | bra.l _fatans_ | |
51 | short 0x0000 | |
52 | bra.l _fatand_ | |
53 | short 0x0000 | |
54 | bra.l _fatanx_ | |
55 | short 0x0000 | |
56 | ||
57 | bra.l _fatanhs_ | |
58 | short 0x0000 | |
59 | bra.l _fatanhd_ | |
60 | short 0x0000 | |
61 | bra.l _fatanhx_ | |
62 | short 0x0000 | |
63 | ||
64 | bra.l _fcoss_ | |
65 | short 0x0000 | |
66 | bra.l _fcosd_ | |
67 | short 0x0000 | |
68 | bra.l _fcosx_ | |
69 | short 0x0000 | |
70 | ||
71 | bra.l _fcoshs_ | |
72 | short 0x0000 | |
73 | bra.l _fcoshd_ | |
74 | short 0x0000 | |
75 | bra.l _fcoshx_ | |
76 | short 0x0000 | |
77 | ||
78 | bra.l _fetoxs_ | |
79 | short 0x0000 | |
80 | bra.l _fetoxd_ | |
81 | short 0x0000 | |
82 | bra.l _fetoxx_ | |
83 | short 0x0000 | |
84 | ||
85 | bra.l _fetoxm1s_ | |
86 | short 0x0000 | |
87 | bra.l _fetoxm1d_ | |
88 | short 0x0000 | |
89 | bra.l _fetoxm1x_ | |
90 | short 0x0000 | |
91 | ||
92 | bra.l _fgetexps_ | |
93 | short 0x0000 | |
94 | bra.l _fgetexpd_ | |
95 | short 0x0000 | |
96 | bra.l _fgetexpx_ | |
97 | short 0x0000 | |
98 | ||
99 | bra.l _fgetmans_ | |
100 | short 0x0000 | |
101 | bra.l _fgetmand_ | |
102 | short 0x0000 | |
103 | bra.l _fgetmanx_ | |
104 | short 0x0000 | |
105 | ||
106 | bra.l _flog10s_ | |
107 | short 0x0000 | |
108 | bra.l _flog10d_ | |
109 | short 0x0000 | |
110 | bra.l _flog10x_ | |
111 | short 0x0000 | |
112 | ||
113 | bra.l _flog2s_ | |
114 | short 0x0000 | |
115 | bra.l _flog2d_ | |
116 | short 0x0000 | |
117 | bra.l _flog2x_ | |
118 | short 0x0000 | |
119 | ||
120 | bra.l _flogns_ | |
121 | short 0x0000 | |
122 | bra.l _flognd_ | |
123 | short 0x0000 | |
124 | bra.l _flognx_ | |
125 | short 0x0000 | |
126 | ||
127 | bra.l _flognp1s_ | |
128 | short 0x0000 | |
129 | bra.l _flognp1d_ | |
130 | short 0x0000 | |
131 | bra.l _flognp1x_ | |
132 | short 0x0000 | |
133 | ||
134 | bra.l _fmods_ | |
135 | short 0x0000 | |
136 | bra.l _fmodd_ | |
137 | short 0x0000 | |
138 | bra.l _fmodx_ | |
139 | short 0x0000 | |
140 | ||
141 | bra.l _frems_ | |
142 | short 0x0000 | |
143 | bra.l _fremd_ | |
144 | short 0x0000 | |
145 | bra.l _fremx_ | |
146 | short 0x0000 | |
147 | ||
148 | bra.l _fscales_ | |
149 | short 0x0000 | |
150 | bra.l _fscaled_ | |
151 | short 0x0000 | |
152 | bra.l _fscalex_ | |
153 | short 0x0000 | |
154 | ||
155 | bra.l _fsins_ | |
156 | short 0x0000 | |
157 | bra.l _fsind_ | |
158 | short 0x0000 | |
159 | bra.l _fsinx_ | |
160 | short 0x0000 | |
161 | ||
162 | bra.l _fsincoss_ | |
163 | short 0x0000 | |
164 | bra.l _fsincosd_ | |
165 | short 0x0000 | |
166 | bra.l _fsincosx_ | |
167 | short 0x0000 | |
168 | ||
169 | bra.l _fsinhs_ | |
170 | short 0x0000 | |
171 | bra.l _fsinhd_ | |
172 | short 0x0000 | |
173 | bra.l _fsinhx_ | |
174 | short 0x0000 | |
175 | ||
176 | bra.l _ftans_ | |
177 | short 0x0000 | |
178 | bra.l _ftand_ | |
179 | short 0x0000 | |
180 | bra.l _ftanx_ | |
181 | short 0x0000 | |
182 | ||
183 | bra.l _ftanhs_ | |
184 | short 0x0000 | |
185 | bra.l _ftanhd_ | |
186 | short 0x0000 | |
187 | bra.l _ftanhx_ | |
188 | short 0x0000 | |
189 | ||
190 | bra.l _ftentoxs_ | |
191 | short 0x0000 | |
192 | bra.l _ftentoxd_ | |
193 | short 0x0000 | |
194 | bra.l _ftentoxx_ | |
195 | short 0x0000 | |
196 | ||
197 | bra.l _ftwotoxs_ | |
198 | short 0x0000 | |
199 | bra.l _ftwotoxd_ | |
200 | short 0x0000 | |
201 | bra.l _ftwotoxx_ | |
202 | short 0x0000 | |
203 | ||
204 | bra.l _fabss_ | |
205 | short 0x0000 | |
206 | bra.l _fabsd_ | |
207 | short 0x0000 | |
208 | bra.l _fabsx_ | |
209 | short 0x0000 | |
210 | ||
211 | bra.l _fadds_ | |
212 | short 0x0000 | |
213 | bra.l _faddd_ | |
214 | short 0x0000 | |
215 | bra.l _faddx_ | |
216 | short 0x0000 | |
217 | ||
218 | bra.l _fdivs_ | |
219 | short 0x0000 | |
220 | bra.l _fdivd_ | |
221 | short 0x0000 | |
222 | bra.l _fdivx_ | |
223 | short 0x0000 | |
224 | ||
225 | bra.l _fints_ | |
226 | short 0x0000 | |
227 | bra.l _fintd_ | |
228 | short 0x0000 | |
229 | bra.l _fintx_ | |
230 | short 0x0000 | |
231 | ||
232 | bra.l _fintrzs_ | |
233 | short 0x0000 | |
234 | bra.l _fintrzd_ | |
235 | short 0x0000 | |
236 | bra.l _fintrzx_ | |
237 | short 0x0000 | |
238 | ||
239 | bra.l _fmuls_ | |
240 | short 0x0000 | |
241 | bra.l _fmuld_ | |
242 | short 0x0000 | |
243 | bra.l _fmulx_ | |
244 | short 0x0000 | |
245 | ||
246 | bra.l _fnegs_ | |
247 | short 0x0000 | |
248 | bra.l _fnegd_ | |
249 | short 0x0000 | |
250 | bra.l _fnegx_ | |
251 | short 0x0000 | |
252 | ||
253 | bra.l _fsqrts_ | |
254 | short 0x0000 | |
255 | bra.l _fsqrtd_ | |
256 | short 0x0000 | |
257 | bra.l _fsqrtx_ | |
258 | short 0x0000 | |
259 | ||
260 | bra.l _fsubs_ | |
261 | short 0x0000 | |
262 | bra.l _fsubd_ | |
263 | short 0x0000 | |
264 | bra.l _fsubx_ | |
265 | short 0x0000 | |
266 | ||
267 | # leave room for future possible additions | |
268 | align 0x400 | |
269 | ||
270 | # | |
271 | # This file contains a set of define statements for constants | |
272 | # in order to promote readability within the corecode itself. | |
273 | # | |
274 | ||
275 | set LOCAL_SIZE, 192 # stack frame size(bytes) | |
276 | set LV, -LOCAL_SIZE # stack offset | |
277 | ||
278 | set EXC_SR, 0x4 # stack status register | |
279 | set EXC_PC, 0x6 # stack pc | |
280 | set EXC_VOFF, 0xa # stacked vector offset | |
281 | set EXC_EA, 0xc # stacked <ea> | |
282 | ||
283 | set EXC_FP, 0x0 # frame pointer | |
284 | ||
285 | set EXC_AREGS, -68 # offset of all address regs | |
286 | set EXC_DREGS, -100 # offset of all data regs | |
287 | set EXC_FPREGS, -36 # offset of all fp regs | |
288 | ||
289 | set EXC_A7, EXC_AREGS+(7*4) # offset of saved a7 | |
290 | set OLD_A7, EXC_AREGS+(6*4) # extra copy of saved a7 | |
291 | set EXC_A6, EXC_AREGS+(6*4) # offset of saved a6 | |
292 | set EXC_A5, EXC_AREGS+(5*4) | |
293 | set EXC_A4, EXC_AREGS+(4*4) | |
294 | set EXC_A3, EXC_AREGS+(3*4) | |
295 | set EXC_A2, EXC_AREGS+(2*4) | |
296 | set EXC_A1, EXC_AREGS+(1*4) | |
297 | set EXC_A0, EXC_AREGS+(0*4) | |
298 | set EXC_D7, EXC_DREGS+(7*4) | |
299 | set EXC_D6, EXC_DREGS+(6*4) | |
300 | set EXC_D5, EXC_DREGS+(5*4) | |
301 | set EXC_D4, EXC_DREGS+(4*4) | |
302 | set EXC_D3, EXC_DREGS+(3*4) | |
303 | set EXC_D2, EXC_DREGS+(2*4) | |
304 | set EXC_D1, EXC_DREGS+(1*4) | |
305 | set EXC_D0, EXC_DREGS+(0*4) | |
306 | ||
307 | set EXC_FP0, EXC_FPREGS+(0*12) # offset of saved fp0 | |
308 | set EXC_FP1, EXC_FPREGS+(1*12) # offset of saved fp1 | |
309 | set EXC_FP2, EXC_FPREGS+(2*12) # offset of saved fp2 (not used) | |
310 | ||
311 | set FP_SCR1, LV+80 # fp scratch 1 | |
312 | set FP_SCR1_EX, FP_SCR1+0 | |
313 | set FP_SCR1_SGN, FP_SCR1+2 | |
314 | set FP_SCR1_HI, FP_SCR1+4 | |
315 | set FP_SCR1_LO, FP_SCR1+8 | |
316 | ||
317 | set FP_SCR0, LV+68 # fp scratch 0 | |
318 | set FP_SCR0_EX, FP_SCR0+0 | |
319 | set FP_SCR0_SGN, FP_SCR0+2 | |
320 | set FP_SCR0_HI, FP_SCR0+4 | |
321 | set FP_SCR0_LO, FP_SCR0+8 | |
322 | ||
323 | set FP_DST, LV+56 # fp destination operand | |
324 | set FP_DST_EX, FP_DST+0 | |
325 | set FP_DST_SGN, FP_DST+2 | |
326 | set FP_DST_HI, FP_DST+4 | |
327 | set FP_DST_LO, FP_DST+8 | |
328 | ||
329 | set FP_SRC, LV+44 # fp source operand | |
330 | set FP_SRC_EX, FP_SRC+0 | |
331 | set FP_SRC_SGN, FP_SRC+2 | |
332 | set FP_SRC_HI, FP_SRC+4 | |
333 | set FP_SRC_LO, FP_SRC+8 | |
334 | ||
335 | set USER_FPIAR, LV+40 # FP instr address register | |
336 | ||
337 | set USER_FPSR, LV+36 # FP status register | |
338 | set FPSR_CC, USER_FPSR+0 # FPSR condition codes | |
339 | set FPSR_QBYTE, USER_FPSR+1 # FPSR qoutient byte | |
340 | set FPSR_EXCEPT, USER_FPSR+2 # FPSR exception status byte | |
341 | set FPSR_AEXCEPT, USER_FPSR+3 # FPSR accrued exception byte | |
342 | ||
343 | set USER_FPCR, LV+32 # FP control register | |
344 | set FPCR_ENABLE, USER_FPCR+2 # FPCR exception enable | |
345 | set FPCR_MODE, USER_FPCR+3 # FPCR rounding mode control | |
346 | ||
347 | set L_SCR3, LV+28 # integer scratch 3 | |
348 | set L_SCR2, LV+24 # integer scratch 2 | |
349 | set L_SCR1, LV+20 # integer scratch 1 | |
350 | ||
351 | set STORE_FLG, LV+19 # flag: operand store (ie. not fcmp/ftst) | |
352 | ||
353 | set EXC_TEMP2, LV+24 # temporary space | |
354 | set EXC_TEMP, LV+16 # temporary space | |
355 | ||
356 | set DTAG, LV+15 # destination operand type | |
357 | set STAG, LV+14 # source operand type | |
358 | ||
359 | set SPCOND_FLG, LV+10 # flag: special case (see below) | |
360 | ||
361 | set EXC_CC, LV+8 # saved condition codes | |
362 | set EXC_EXTWPTR, LV+4 # saved current PC (active) | |
363 | set EXC_EXTWORD, LV+2 # saved extension word | |
364 | set EXC_CMDREG, LV+2 # saved extension word | |
365 | set EXC_OPWORD, LV+0 # saved operation word | |
366 | ||
367 | ################################ | |
368 | ||
369 | # Helpful macros | |
370 | ||
371 | set FTEMP, 0 # offsets within an | |
372 | set FTEMP_EX, 0 # extended precision | |
373 | set FTEMP_SGN, 2 # value saved in memory. | |
374 | set FTEMP_HI, 4 | |
375 | set FTEMP_LO, 8 | |
376 | set FTEMP_GRS, 12 | |
377 | ||
378 | set LOCAL, 0 # offsets within an | |
379 | set LOCAL_EX, 0 # extended precision | |
380 | set LOCAL_SGN, 2 # value saved in memory. | |
381 | set LOCAL_HI, 4 | |
382 | set LOCAL_LO, 8 | |
383 | set LOCAL_GRS, 12 | |
384 | ||
385 | set DST, 0 # offsets within an | |
386 | set DST_EX, 0 # extended precision | |
387 | set DST_HI, 4 # value saved in memory. | |
388 | set DST_LO, 8 | |
389 | ||
390 | set SRC, 0 # offsets within an | |
391 | set SRC_EX, 0 # extended precision | |
392 | set SRC_HI, 4 # value saved in memory. | |
393 | set SRC_LO, 8 | |
394 | ||
395 | set SGL_LO, 0x3f81 # min sgl prec exponent | |
396 | set SGL_HI, 0x407e # max sgl prec exponent | |
397 | set DBL_LO, 0x3c01 # min dbl prec exponent | |
398 | set DBL_HI, 0x43fe # max dbl prec exponent | |
399 | set EXT_LO, 0x0 # min ext prec exponent | |
400 | set EXT_HI, 0x7ffe # max ext prec exponent | |
401 | ||
402 | set EXT_BIAS, 0x3fff # extended precision bias | |
403 | set SGL_BIAS, 0x007f # single precision bias | |
404 | set DBL_BIAS, 0x03ff # double precision bias | |
405 | ||
406 | set NORM, 0x00 # operand type for STAG/DTAG | |
407 | set ZERO, 0x01 # operand type for STAG/DTAG | |
408 | set INF, 0x02 # operand type for STAG/DTAG | |
409 | set QNAN, 0x03 # operand type for STAG/DTAG | |
410 | set DENORM, 0x04 # operand type for STAG/DTAG | |
411 | set SNAN, 0x05 # operand type for STAG/DTAG | |
412 | set UNNORM, 0x06 # operand type for STAG/DTAG | |
413 | ||
414 | ################## | |
415 | # FPSR/FPCR bits # | |
416 | ################## | |
417 | set neg_bit, 0x3 # negative result | |
418 | set z_bit, 0x2 # zero result | |
419 | set inf_bit, 0x1 # infinite result | |
420 | set nan_bit, 0x0 # NAN result | |
421 | ||
422 | set q_sn_bit, 0x7 # sign bit of quotient byte | |
423 | ||
424 | set bsun_bit, 7 # branch on unordered | |
425 | set snan_bit, 6 # signalling NAN | |
426 | set operr_bit, 5 # operand error | |
427 | set ovfl_bit, 4 # overflow | |
428 | set unfl_bit, 3 # underflow | |
429 | set dz_bit, 2 # divide by zero | |
430 | set inex2_bit, 1 # inexact result 2 | |
431 | set inex1_bit, 0 # inexact result 1 | |
432 | ||
433 | set aiop_bit, 7 # accrued inexact operation bit | |
434 | set aovfl_bit, 6 # accrued overflow bit | |
435 | set aunfl_bit, 5 # accrued underflow bit | |
436 | set adz_bit, 4 # accrued dz bit | |
437 | set ainex_bit, 3 # accrued inexact bit | |
438 | ||
439 | ############################# | |
440 | # FPSR individual bit masks # | |
441 | ############################# | |
442 | set neg_mask, 0x08000000 # negative bit mask (lw) | |
443 | set inf_mask, 0x02000000 # infinity bit mask (lw) | |
444 | set z_mask, 0x04000000 # zero bit mask (lw) | |
445 | set nan_mask, 0x01000000 # nan bit mask (lw) | |
446 | ||
447 | set neg_bmask, 0x08 # negative bit mask (byte) | |
448 | set inf_bmask, 0x02 # infinity bit mask (byte) | |
449 | set z_bmask, 0x04 # zero bit mask (byte) | |
450 | set nan_bmask, 0x01 # nan bit mask (byte) | |
451 | ||
452 | set bsun_mask, 0x00008000 # bsun exception mask | |
453 | set snan_mask, 0x00004000 # snan exception mask | |
454 | set operr_mask, 0x00002000 # operr exception mask | |
455 | set ovfl_mask, 0x00001000 # overflow exception mask | |
456 | set unfl_mask, 0x00000800 # underflow exception mask | |
457 | set dz_mask, 0x00000400 # dz exception mask | |
458 | set inex2_mask, 0x00000200 # inex2 exception mask | |
459 | set inex1_mask, 0x00000100 # inex1 exception mask | |
460 | ||
461 | set aiop_mask, 0x00000080 # accrued illegal operation | |
462 | set aovfl_mask, 0x00000040 # accrued overflow | |
463 | set aunfl_mask, 0x00000020 # accrued underflow | |
464 | set adz_mask, 0x00000010 # accrued divide by zero | |
465 | set ainex_mask, 0x00000008 # accrued inexact | |
466 | ||
467 | ###################################### | |
468 | # FPSR combinations used in the FPSP # | |
469 | ###################################### | |
470 | set dzinf_mask, inf_mask+dz_mask+adz_mask | |
471 | set opnan_mask, nan_mask+operr_mask+aiop_mask | |
472 | set nzi_mask, 0x01ffffff #clears N, Z, and I | |
473 | set unfinx_mask, unfl_mask+inex2_mask+aunfl_mask+ainex_mask | |
474 | set unf2inx_mask, unfl_mask+inex2_mask+ainex_mask | |
475 | set ovfinx_mask, ovfl_mask+inex2_mask+aovfl_mask+ainex_mask | |
476 | set inx1a_mask, inex1_mask+ainex_mask | |
477 | set inx2a_mask, inex2_mask+ainex_mask | |
478 | set snaniop_mask, nan_mask+snan_mask+aiop_mask | |
479 | set snaniop2_mask, snan_mask+aiop_mask | |
480 | set naniop_mask, nan_mask+aiop_mask | |
481 | set neginf_mask, neg_mask+inf_mask | |
482 | set infaiop_mask, inf_mask+aiop_mask | |
483 | set negz_mask, neg_mask+z_mask | |
484 | set opaop_mask, operr_mask+aiop_mask | |
485 | set unfl_inx_mask, unfl_mask+aunfl_mask+ainex_mask | |
486 | set ovfl_inx_mask, ovfl_mask+aovfl_mask+ainex_mask | |
487 | ||
488 | ######### | |
489 | # misc. # | |
490 | ######### | |
491 | set rnd_stky_bit, 29 # stky bit pos in longword | |
492 | ||
493 | set sign_bit, 0x7 # sign bit | |
494 | set signan_bit, 0x6 # signalling nan bit | |
495 | ||
496 | set sgl_thresh, 0x3f81 # minimum sgl exponent | |
497 | set dbl_thresh, 0x3c01 # minimum dbl exponent | |
498 | ||
499 | set x_mode, 0x0 # extended precision | |
500 | set s_mode, 0x4 # single precision | |
501 | set d_mode, 0x8 # double precision | |
502 | ||
503 | set rn_mode, 0x0 # round-to-nearest | |
504 | set rz_mode, 0x1 # round-to-zero | |
505 | set rm_mode, 0x2 # round-tp-minus-infinity | |
506 | set rp_mode, 0x3 # round-to-plus-infinity | |
507 | ||
508 | set mantissalen, 64 # length of mantissa in bits | |
509 | ||
510 | set BYTE, 1 # len(byte) == 1 byte | |
511 | set WORD, 2 # len(word) == 2 bytes | |
512 | set LONG, 4 # len(longword) == 2 bytes | |
513 | ||
514 | set BSUN_VEC, 0xc0 # bsun vector offset | |
515 | set INEX_VEC, 0xc4 # inexact vector offset | |
516 | set DZ_VEC, 0xc8 # dz vector offset | |
517 | set UNFL_VEC, 0xcc # unfl vector offset | |
518 | set OPERR_VEC, 0xd0 # operr vector offset | |
519 | set OVFL_VEC, 0xd4 # ovfl vector offset | |
520 | set SNAN_VEC, 0xd8 # snan vector offset | |
521 | ||
522 | ########################### | |
523 | # SPecial CONDition FLaGs # | |
524 | ########################### | |
525 | set ftrapcc_flg, 0x01 # flag bit: ftrapcc exception | |
526 | set fbsun_flg, 0x02 # flag bit: bsun exception | |
527 | set mia7_flg, 0x04 # flag bit: (a7)+ <ea> | |
528 | set mda7_flg, 0x08 # flag bit: -(a7) <ea> | |
529 | set fmovm_flg, 0x40 # flag bit: fmovm instruction | |
530 | set immed_flg, 0x80 # flag bit: &<data> <ea> | |
531 | ||
532 | set ftrapcc_bit, 0x0 | |
533 | set fbsun_bit, 0x1 | |
534 | set mia7_bit, 0x2 | |
535 | set mda7_bit, 0x3 | |
536 | set immed_bit, 0x7 | |
537 | ||
538 | ################################## | |
539 | # TRANSCENDENTAL "LAST-OP" FLAGS # | |
540 | ################################## | |
541 | set FMUL_OP, 0x0 # fmul instr performed last | |
542 | set FDIV_OP, 0x1 # fdiv performed last | |
543 | set FADD_OP, 0x2 # fadd performed last | |
544 | set FMOV_OP, 0x3 # fmov performed last | |
545 | ||
546 | ############# | |
547 | # CONSTANTS # | |
548 | ############# | |
549 | T1: long 0x40C62D38,0xD3D64634 # 16381 LOG2 LEAD | |
550 | T2: long 0x3D6F90AE,0xB1E75CC7 # 16381 LOG2 TRAIL | |
551 | ||
552 | PI: long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000 | |
553 | PIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 | |
554 | ||
555 | TWOBYPI: | |
556 | long 0x3FE45F30,0x6DC9C883 | |
557 | ||
558 | ######################################################################### | |
559 | # MONADIC TEMPLATE # | |
560 | ######################################################################### | |
561 | global _fsins_ | |
562 | _fsins_: | |
563 | link %a6,&-LOCAL_SIZE | |
564 | ||
565 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
566 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
567 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
568 | ||
569 | fmov.l &0x0,%fpcr # zero FPCR | |
570 | ||
571 | # | |
572 | # copy, convert, and tag input argument | |
573 | # | |
574 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
575 | fmov.x %fp0,FP_SRC(%a6) | |
576 | lea FP_SRC(%a6),%a0 | |
577 | bsr.l tag # fetch operand type | |
578 | mov.b %d0,STAG(%a6) | |
579 | mov.b %d0,%d1 | |
580 | ||
581 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
582 | ||
583 | clr.l %d0 | |
584 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
585 | ||
586 | tst.b %d1 | |
587 | bne.b _L0_2s | |
588 | bsr.l ssin # operand is a NORM | |
589 | bra.b _L0_6s | |
590 | _L0_2s: | |
591 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
592 | bne.b _L0_3s # no | |
593 | bsr.l src_zero # yes | |
594 | bra.b _L0_6s | |
595 | _L0_3s: | |
596 | cmpi.b %d1,&INF # is operand an INF? | |
597 | bne.b _L0_4s # no | |
598 | bsr.l t_operr # yes | |
599 | bra.b _L0_6s | |
600 | _L0_4s: | |
601 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
602 | bne.b _L0_5s # no | |
603 | bsr.l src_qnan # yes | |
604 | bra.b _L0_6s | |
605 | _L0_5s: | |
606 | bsr.l ssind # operand is a DENORM | |
607 | _L0_6s: | |
608 | ||
609 | # | |
610 | # Result is now in FP0 | |
611 | # | |
612 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
613 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
614 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
615 | unlk %a6 | |
616 | rts | |
617 | ||
618 | global _fsind_ | |
619 | _fsind_: | |
620 | link %a6,&-LOCAL_SIZE | |
621 | ||
622 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
623 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
624 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
625 | ||
626 | fmov.l &0x0,%fpcr # zero FPCR | |
627 | ||
628 | # | |
629 | # copy, convert, and tag input argument | |
630 | # | |
631 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
632 | fmov.x %fp0,FP_SRC(%a6) | |
633 | lea FP_SRC(%a6),%a0 | |
634 | bsr.l tag # fetch operand type | |
635 | mov.b %d0,STAG(%a6) | |
636 | mov.b %d0,%d1 | |
637 | ||
638 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
639 | ||
640 | clr.l %d0 | |
641 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
642 | ||
643 | mov.b %d1,STAG(%a6) | |
644 | tst.b %d1 | |
645 | bne.b _L0_2d | |
646 | bsr.l ssin # operand is a NORM | |
647 | bra.b _L0_6d | |
648 | _L0_2d: | |
649 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
650 | bne.b _L0_3d # no | |
651 | bsr.l src_zero # yes | |
652 | bra.b _L0_6d | |
653 | _L0_3d: | |
654 | cmpi.b %d1,&INF # is operand an INF? | |
655 | bne.b _L0_4d # no | |
656 | bsr.l t_operr # yes | |
657 | bra.b _L0_6d | |
658 | _L0_4d: | |
659 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
660 | bne.b _L0_5d # no | |
661 | bsr.l src_qnan # yes | |
662 | bra.b _L0_6d | |
663 | _L0_5d: | |
664 | bsr.l ssind # operand is a DENORM | |
665 | _L0_6d: | |
666 | ||
667 | # | |
668 | # Result is now in FP0 | |
669 | # | |
670 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
671 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
672 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
673 | unlk %a6 | |
674 | rts | |
675 | ||
676 | global _fsinx_ | |
677 | _fsinx_: | |
678 | link %a6,&-LOCAL_SIZE | |
679 | ||
680 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
681 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
682 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
683 | ||
684 | fmov.l &0x0,%fpcr # zero FPCR | |
685 | ||
686 | # | |
687 | # copy, convert, and tag input argument | |
688 | # | |
689 | lea FP_SRC(%a6),%a0 | |
690 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
691 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
692 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
693 | bsr.l tag # fetch operand type | |
694 | mov.b %d0,STAG(%a6) | |
695 | mov.b %d0,%d1 | |
696 | ||
697 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
698 | ||
699 | clr.l %d0 | |
700 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
701 | ||
702 | tst.b %d1 | |
703 | bne.b _L0_2x | |
704 | bsr.l ssin # operand is a NORM | |
705 | bra.b _L0_6x | |
706 | _L0_2x: | |
707 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
708 | bne.b _L0_3x # no | |
709 | bsr.l src_zero # yes | |
710 | bra.b _L0_6x | |
711 | _L0_3x: | |
712 | cmpi.b %d1,&INF # is operand an INF? | |
713 | bne.b _L0_4x # no | |
714 | bsr.l t_operr # yes | |
715 | bra.b _L0_6x | |
716 | _L0_4x: | |
717 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
718 | bne.b _L0_5x # no | |
719 | bsr.l src_qnan # yes | |
720 | bra.b _L0_6x | |
721 | _L0_5x: | |
722 | bsr.l ssind # operand is a DENORM | |
723 | _L0_6x: | |
724 | ||
725 | # | |
726 | # Result is now in FP0 | |
727 | # | |
728 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
729 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
730 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
731 | unlk %a6 | |
732 | rts | |
733 | ||
734 | ||
735 | ######################################################################### | |
736 | # MONADIC TEMPLATE # | |
737 | ######################################################################### | |
738 | global _fcoss_ | |
739 | _fcoss_: | |
740 | link %a6,&-LOCAL_SIZE | |
741 | ||
742 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
743 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
744 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
745 | ||
746 | fmov.l &0x0,%fpcr # zero FPCR | |
747 | ||
748 | # | |
749 | # copy, convert, and tag input argument | |
750 | # | |
751 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
752 | fmov.x %fp0,FP_SRC(%a6) | |
753 | lea FP_SRC(%a6),%a0 | |
754 | bsr.l tag # fetch operand type | |
755 | mov.b %d0,STAG(%a6) | |
756 | mov.b %d0,%d1 | |
757 | ||
758 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
759 | ||
760 | clr.l %d0 | |
761 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
762 | ||
763 | tst.b %d1 | |
764 | bne.b _L1_2s | |
765 | bsr.l scos # operand is a NORM | |
766 | bra.b _L1_6s | |
767 | _L1_2s: | |
768 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
769 | bne.b _L1_3s # no | |
770 | bsr.l ld_pone # yes | |
771 | bra.b _L1_6s | |
772 | _L1_3s: | |
773 | cmpi.b %d1,&INF # is operand an INF? | |
774 | bne.b _L1_4s # no | |
775 | bsr.l t_operr # yes | |
776 | bra.b _L1_6s | |
777 | _L1_4s: | |
778 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
779 | bne.b _L1_5s # no | |
780 | bsr.l src_qnan # yes | |
781 | bra.b _L1_6s | |
782 | _L1_5s: | |
783 | bsr.l scosd # operand is a DENORM | |
784 | _L1_6s: | |
785 | ||
786 | # | |
787 | # Result is now in FP0 | |
788 | # | |
789 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
790 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
791 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
792 | unlk %a6 | |
793 | rts | |
794 | ||
795 | global _fcosd_ | |
796 | _fcosd_: | |
797 | link %a6,&-LOCAL_SIZE | |
798 | ||
799 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
800 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
801 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
802 | ||
803 | fmov.l &0x0,%fpcr # zero FPCR | |
804 | ||
805 | # | |
806 | # copy, convert, and tag input argument | |
807 | # | |
808 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
809 | fmov.x %fp0,FP_SRC(%a6) | |
810 | lea FP_SRC(%a6),%a0 | |
811 | bsr.l tag # fetch operand type | |
812 | mov.b %d0,STAG(%a6) | |
813 | mov.b %d0,%d1 | |
814 | ||
815 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
816 | ||
817 | clr.l %d0 | |
818 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
819 | ||
820 | mov.b %d1,STAG(%a6) | |
821 | tst.b %d1 | |
822 | bne.b _L1_2d | |
823 | bsr.l scos # operand is a NORM | |
824 | bra.b _L1_6d | |
825 | _L1_2d: | |
826 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
827 | bne.b _L1_3d # no | |
828 | bsr.l ld_pone # yes | |
829 | bra.b _L1_6d | |
830 | _L1_3d: | |
831 | cmpi.b %d1,&INF # is operand an INF? | |
832 | bne.b _L1_4d # no | |
833 | bsr.l t_operr # yes | |
834 | bra.b _L1_6d | |
835 | _L1_4d: | |
836 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
837 | bne.b _L1_5d # no | |
838 | bsr.l src_qnan # yes | |
839 | bra.b _L1_6d | |
840 | _L1_5d: | |
841 | bsr.l scosd # operand is a DENORM | |
842 | _L1_6d: | |
843 | ||
844 | # | |
845 | # Result is now in FP0 | |
846 | # | |
847 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
848 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
849 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
850 | unlk %a6 | |
851 | rts | |
852 | ||
853 | global _fcosx_ | |
854 | _fcosx_: | |
855 | link %a6,&-LOCAL_SIZE | |
856 | ||
857 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
858 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
859 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
860 | ||
861 | fmov.l &0x0,%fpcr # zero FPCR | |
862 | ||
863 | # | |
864 | # copy, convert, and tag input argument | |
865 | # | |
866 | lea FP_SRC(%a6),%a0 | |
867 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
868 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
869 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
870 | bsr.l tag # fetch operand type | |
871 | mov.b %d0,STAG(%a6) | |
872 | mov.b %d0,%d1 | |
873 | ||
874 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
875 | ||
876 | clr.l %d0 | |
877 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
878 | ||
879 | tst.b %d1 | |
880 | bne.b _L1_2x | |
881 | bsr.l scos # operand is a NORM | |
882 | bra.b _L1_6x | |
883 | _L1_2x: | |
884 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
885 | bne.b _L1_3x # no | |
886 | bsr.l ld_pone # yes | |
887 | bra.b _L1_6x | |
888 | _L1_3x: | |
889 | cmpi.b %d1,&INF # is operand an INF? | |
890 | bne.b _L1_4x # no | |
891 | bsr.l t_operr # yes | |
892 | bra.b _L1_6x | |
893 | _L1_4x: | |
894 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
895 | bne.b _L1_5x # no | |
896 | bsr.l src_qnan # yes | |
897 | bra.b _L1_6x | |
898 | _L1_5x: | |
899 | bsr.l scosd # operand is a DENORM | |
900 | _L1_6x: | |
901 | ||
902 | # | |
903 | # Result is now in FP0 | |
904 | # | |
905 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
906 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
907 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
908 | unlk %a6 | |
909 | rts | |
910 | ||
911 | ||
912 | ######################################################################### | |
913 | # MONADIC TEMPLATE # | |
914 | ######################################################################### | |
915 | global _fsinhs_ | |
916 | _fsinhs_: | |
917 | link %a6,&-LOCAL_SIZE | |
918 | ||
919 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
920 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
921 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
922 | ||
923 | fmov.l &0x0,%fpcr # zero FPCR | |
924 | ||
925 | # | |
926 | # copy, convert, and tag input argument | |
927 | # | |
928 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
929 | fmov.x %fp0,FP_SRC(%a6) | |
930 | lea FP_SRC(%a6),%a0 | |
931 | bsr.l tag # fetch operand type | |
932 | mov.b %d0,STAG(%a6) | |
933 | mov.b %d0,%d1 | |
934 | ||
935 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
936 | ||
937 | clr.l %d0 | |
938 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
939 | ||
940 | tst.b %d1 | |
941 | bne.b _L2_2s | |
942 | bsr.l ssinh # operand is a NORM | |
943 | bra.b _L2_6s | |
944 | _L2_2s: | |
945 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
946 | bne.b _L2_3s # no | |
947 | bsr.l src_zero # yes | |
948 | bra.b _L2_6s | |
949 | _L2_3s: | |
950 | cmpi.b %d1,&INF # is operand an INF? | |
951 | bne.b _L2_4s # no | |
952 | bsr.l src_inf # yes | |
953 | bra.b _L2_6s | |
954 | _L2_4s: | |
955 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
956 | bne.b _L2_5s # no | |
957 | bsr.l src_qnan # yes | |
958 | bra.b _L2_6s | |
959 | _L2_5s: | |
960 | bsr.l ssinhd # operand is a DENORM | |
961 | _L2_6s: | |
962 | ||
963 | # | |
964 | # Result is now in FP0 | |
965 | # | |
966 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
967 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
968 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
969 | unlk %a6 | |
970 | rts | |
971 | ||
972 | global _fsinhd_ | |
973 | _fsinhd_: | |
974 | link %a6,&-LOCAL_SIZE | |
975 | ||
976 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
977 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
978 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
979 | ||
980 | fmov.l &0x0,%fpcr # zero FPCR | |
981 | ||
982 | # | |
983 | # copy, convert, and tag input argument | |
984 | # | |
985 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
986 | fmov.x %fp0,FP_SRC(%a6) | |
987 | lea FP_SRC(%a6),%a0 | |
988 | bsr.l tag # fetch operand type | |
989 | mov.b %d0,STAG(%a6) | |
990 | mov.b %d0,%d1 | |
991 | ||
992 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
993 | ||
994 | clr.l %d0 | |
995 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
996 | ||
997 | mov.b %d1,STAG(%a6) | |
998 | tst.b %d1 | |
999 | bne.b _L2_2d | |
1000 | bsr.l ssinh # operand is a NORM | |
1001 | bra.b _L2_6d | |
1002 | _L2_2d: | |
1003 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1004 | bne.b _L2_3d # no | |
1005 | bsr.l src_zero # yes | |
1006 | bra.b _L2_6d | |
1007 | _L2_3d: | |
1008 | cmpi.b %d1,&INF # is operand an INF? | |
1009 | bne.b _L2_4d # no | |
1010 | bsr.l src_inf # yes | |
1011 | bra.b _L2_6d | |
1012 | _L2_4d: | |
1013 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1014 | bne.b _L2_5d # no | |
1015 | bsr.l src_qnan # yes | |
1016 | bra.b _L2_6d | |
1017 | _L2_5d: | |
1018 | bsr.l ssinhd # operand is a DENORM | |
1019 | _L2_6d: | |
1020 | ||
1021 | # | |
1022 | # Result is now in FP0 | |
1023 | # | |
1024 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1025 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1026 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1027 | unlk %a6 | |
1028 | rts | |
1029 | ||
1030 | global _fsinhx_ | |
1031 | _fsinhx_: | |
1032 | link %a6,&-LOCAL_SIZE | |
1033 | ||
1034 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1035 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1036 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1037 | ||
1038 | fmov.l &0x0,%fpcr # zero FPCR | |
1039 | ||
1040 | # | |
1041 | # copy, convert, and tag input argument | |
1042 | # | |
1043 | lea FP_SRC(%a6),%a0 | |
1044 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
1045 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
1046 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
1047 | bsr.l tag # fetch operand type | |
1048 | mov.b %d0,STAG(%a6) | |
1049 | mov.b %d0,%d1 | |
1050 | ||
1051 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1052 | ||
1053 | clr.l %d0 | |
1054 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1055 | ||
1056 | tst.b %d1 | |
1057 | bne.b _L2_2x | |
1058 | bsr.l ssinh # operand is a NORM | |
1059 | bra.b _L2_6x | |
1060 | _L2_2x: | |
1061 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1062 | bne.b _L2_3x # no | |
1063 | bsr.l src_zero # yes | |
1064 | bra.b _L2_6x | |
1065 | _L2_3x: | |
1066 | cmpi.b %d1,&INF # is operand an INF? | |
1067 | bne.b _L2_4x # no | |
1068 | bsr.l src_inf # yes | |
1069 | bra.b _L2_6x | |
1070 | _L2_4x: | |
1071 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1072 | bne.b _L2_5x # no | |
1073 | bsr.l src_qnan # yes | |
1074 | bra.b _L2_6x | |
1075 | _L2_5x: | |
1076 | bsr.l ssinhd # operand is a DENORM | |
1077 | _L2_6x: | |
1078 | ||
1079 | # | |
1080 | # Result is now in FP0 | |
1081 | # | |
1082 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1083 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1084 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1085 | unlk %a6 | |
1086 | rts | |
1087 | ||
1088 | ||
1089 | ######################################################################### | |
1090 | # MONADIC TEMPLATE # | |
1091 | ######################################################################### | |
1092 | global _flognp1s_ | |
1093 | _flognp1s_: | |
1094 | link %a6,&-LOCAL_SIZE | |
1095 | ||
1096 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1097 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1098 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1099 | ||
1100 | fmov.l &0x0,%fpcr # zero FPCR | |
1101 | ||
1102 | # | |
1103 | # copy, convert, and tag input argument | |
1104 | # | |
1105 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
1106 | fmov.x %fp0,FP_SRC(%a6) | |
1107 | lea FP_SRC(%a6),%a0 | |
1108 | bsr.l tag # fetch operand type | |
1109 | mov.b %d0,STAG(%a6) | |
1110 | mov.b %d0,%d1 | |
1111 | ||
1112 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1113 | ||
1114 | clr.l %d0 | |
1115 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1116 | ||
1117 | tst.b %d1 | |
1118 | bne.b _L3_2s | |
1119 | bsr.l slognp1 # operand is a NORM | |
1120 | bra.b _L3_6s | |
1121 | _L3_2s: | |
1122 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1123 | bne.b _L3_3s # no | |
1124 | bsr.l src_zero # yes | |
1125 | bra.b _L3_6s | |
1126 | _L3_3s: | |
1127 | cmpi.b %d1,&INF # is operand an INF? | |
1128 | bne.b _L3_4s # no | |
1129 | bsr.l sopr_inf # yes | |
1130 | bra.b _L3_6s | |
1131 | _L3_4s: | |
1132 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1133 | bne.b _L3_5s # no | |
1134 | bsr.l src_qnan # yes | |
1135 | bra.b _L3_6s | |
1136 | _L3_5s: | |
1137 | bsr.l slognp1d # operand is a DENORM | |
1138 | _L3_6s: | |
1139 | ||
1140 | # | |
1141 | # Result is now in FP0 | |
1142 | # | |
1143 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1144 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1145 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1146 | unlk %a6 | |
1147 | rts | |
1148 | ||
1149 | global _flognp1d_ | |
1150 | _flognp1d_: | |
1151 | link %a6,&-LOCAL_SIZE | |
1152 | ||
1153 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1154 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1155 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1156 | ||
1157 | fmov.l &0x0,%fpcr # zero FPCR | |
1158 | ||
1159 | # | |
1160 | # copy, convert, and tag input argument | |
1161 | # | |
1162 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
1163 | fmov.x %fp0,FP_SRC(%a6) | |
1164 | lea FP_SRC(%a6),%a0 | |
1165 | bsr.l tag # fetch operand type | |
1166 | mov.b %d0,STAG(%a6) | |
1167 | mov.b %d0,%d1 | |
1168 | ||
1169 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1170 | ||
1171 | clr.l %d0 | |
1172 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1173 | ||
1174 | mov.b %d1,STAG(%a6) | |
1175 | tst.b %d1 | |
1176 | bne.b _L3_2d | |
1177 | bsr.l slognp1 # operand is a NORM | |
1178 | bra.b _L3_6d | |
1179 | _L3_2d: | |
1180 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1181 | bne.b _L3_3d # no | |
1182 | bsr.l src_zero # yes | |
1183 | bra.b _L3_6d | |
1184 | _L3_3d: | |
1185 | cmpi.b %d1,&INF # is operand an INF? | |
1186 | bne.b _L3_4d # no | |
1187 | bsr.l sopr_inf # yes | |
1188 | bra.b _L3_6d | |
1189 | _L3_4d: | |
1190 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1191 | bne.b _L3_5d # no | |
1192 | bsr.l src_qnan # yes | |
1193 | bra.b _L3_6d | |
1194 | _L3_5d: | |
1195 | bsr.l slognp1d # operand is a DENORM | |
1196 | _L3_6d: | |
1197 | ||
1198 | # | |
1199 | # Result is now in FP0 | |
1200 | # | |
1201 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1202 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1203 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1204 | unlk %a6 | |
1205 | rts | |
1206 | ||
1207 | global _flognp1x_ | |
1208 | _flognp1x_: | |
1209 | link %a6,&-LOCAL_SIZE | |
1210 | ||
1211 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1212 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1213 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1214 | ||
1215 | fmov.l &0x0,%fpcr # zero FPCR | |
1216 | ||
1217 | # | |
1218 | # copy, convert, and tag input argument | |
1219 | # | |
1220 | lea FP_SRC(%a6),%a0 | |
1221 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
1222 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
1223 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
1224 | bsr.l tag # fetch operand type | |
1225 | mov.b %d0,STAG(%a6) | |
1226 | mov.b %d0,%d1 | |
1227 | ||
1228 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1229 | ||
1230 | clr.l %d0 | |
1231 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1232 | ||
1233 | tst.b %d1 | |
1234 | bne.b _L3_2x | |
1235 | bsr.l slognp1 # operand is a NORM | |
1236 | bra.b _L3_6x | |
1237 | _L3_2x: | |
1238 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1239 | bne.b _L3_3x # no | |
1240 | bsr.l src_zero # yes | |
1241 | bra.b _L3_6x | |
1242 | _L3_3x: | |
1243 | cmpi.b %d1,&INF # is operand an INF? | |
1244 | bne.b _L3_4x # no | |
1245 | bsr.l sopr_inf # yes | |
1246 | bra.b _L3_6x | |
1247 | _L3_4x: | |
1248 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1249 | bne.b _L3_5x # no | |
1250 | bsr.l src_qnan # yes | |
1251 | bra.b _L3_6x | |
1252 | _L3_5x: | |
1253 | bsr.l slognp1d # operand is a DENORM | |
1254 | _L3_6x: | |
1255 | ||
1256 | # | |
1257 | # Result is now in FP0 | |
1258 | # | |
1259 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1260 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1261 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1262 | unlk %a6 | |
1263 | rts | |
1264 | ||
1265 | ||
1266 | ######################################################################### | |
1267 | # MONADIC TEMPLATE # | |
1268 | ######################################################################### | |
1269 | global _fetoxm1s_ | |
1270 | _fetoxm1s_: | |
1271 | link %a6,&-LOCAL_SIZE | |
1272 | ||
1273 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1274 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1275 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1276 | ||
1277 | fmov.l &0x0,%fpcr # zero FPCR | |
1278 | ||
1279 | # | |
1280 | # copy, convert, and tag input argument | |
1281 | # | |
1282 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
1283 | fmov.x %fp0,FP_SRC(%a6) | |
1284 | lea FP_SRC(%a6),%a0 | |
1285 | bsr.l tag # fetch operand type | |
1286 | mov.b %d0,STAG(%a6) | |
1287 | mov.b %d0,%d1 | |
1288 | ||
1289 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1290 | ||
1291 | clr.l %d0 | |
1292 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1293 | ||
1294 | tst.b %d1 | |
1295 | bne.b _L4_2s | |
1296 | bsr.l setoxm1 # operand is a NORM | |
1297 | bra.b _L4_6s | |
1298 | _L4_2s: | |
1299 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1300 | bne.b _L4_3s # no | |
1301 | bsr.l src_zero # yes | |
1302 | bra.b _L4_6s | |
1303 | _L4_3s: | |
1304 | cmpi.b %d1,&INF # is operand an INF? | |
1305 | bne.b _L4_4s # no | |
1306 | bsr.l setoxm1i # yes | |
1307 | bra.b _L4_6s | |
1308 | _L4_4s: | |
1309 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1310 | bne.b _L4_5s # no | |
1311 | bsr.l src_qnan # yes | |
1312 | bra.b _L4_6s | |
1313 | _L4_5s: | |
1314 | bsr.l setoxm1d # operand is a DENORM | |
1315 | _L4_6s: | |
1316 | ||
1317 | # | |
1318 | # Result is now in FP0 | |
1319 | # | |
1320 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1321 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1322 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1323 | unlk %a6 | |
1324 | rts | |
1325 | ||
1326 | global _fetoxm1d_ | |
1327 | _fetoxm1d_: | |
1328 | link %a6,&-LOCAL_SIZE | |
1329 | ||
1330 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1331 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1332 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1333 | ||
1334 | fmov.l &0x0,%fpcr # zero FPCR | |
1335 | ||
1336 | # | |
1337 | # copy, convert, and tag input argument | |
1338 | # | |
1339 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
1340 | fmov.x %fp0,FP_SRC(%a6) | |
1341 | lea FP_SRC(%a6),%a0 | |
1342 | bsr.l tag # fetch operand type | |
1343 | mov.b %d0,STAG(%a6) | |
1344 | mov.b %d0,%d1 | |
1345 | ||
1346 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1347 | ||
1348 | clr.l %d0 | |
1349 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1350 | ||
1351 | mov.b %d1,STAG(%a6) | |
1352 | tst.b %d1 | |
1353 | bne.b _L4_2d | |
1354 | bsr.l setoxm1 # operand is a NORM | |
1355 | bra.b _L4_6d | |
1356 | _L4_2d: | |
1357 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1358 | bne.b _L4_3d # no | |
1359 | bsr.l src_zero # yes | |
1360 | bra.b _L4_6d | |
1361 | _L4_3d: | |
1362 | cmpi.b %d1,&INF # is operand an INF? | |
1363 | bne.b _L4_4d # no | |
1364 | bsr.l setoxm1i # yes | |
1365 | bra.b _L4_6d | |
1366 | _L4_4d: | |
1367 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1368 | bne.b _L4_5d # no | |
1369 | bsr.l src_qnan # yes | |
1370 | bra.b _L4_6d | |
1371 | _L4_5d: | |
1372 | bsr.l setoxm1d # operand is a DENORM | |
1373 | _L4_6d: | |
1374 | ||
1375 | # | |
1376 | # Result is now in FP0 | |
1377 | # | |
1378 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1379 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1380 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1381 | unlk %a6 | |
1382 | rts | |
1383 | ||
1384 | global _fetoxm1x_ | |
1385 | _fetoxm1x_: | |
1386 | link %a6,&-LOCAL_SIZE | |
1387 | ||
1388 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1389 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1390 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1391 | ||
1392 | fmov.l &0x0,%fpcr # zero FPCR | |
1393 | ||
1394 | # | |
1395 | # copy, convert, and tag input argument | |
1396 | # | |
1397 | lea FP_SRC(%a6),%a0 | |
1398 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
1399 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
1400 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
1401 | bsr.l tag # fetch operand type | |
1402 | mov.b %d0,STAG(%a6) | |
1403 | mov.b %d0,%d1 | |
1404 | ||
1405 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1406 | ||
1407 | clr.l %d0 | |
1408 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1409 | ||
1410 | tst.b %d1 | |
1411 | bne.b _L4_2x | |
1412 | bsr.l setoxm1 # operand is a NORM | |
1413 | bra.b _L4_6x | |
1414 | _L4_2x: | |
1415 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1416 | bne.b _L4_3x # no | |
1417 | bsr.l src_zero # yes | |
1418 | bra.b _L4_6x | |
1419 | _L4_3x: | |
1420 | cmpi.b %d1,&INF # is operand an INF? | |
1421 | bne.b _L4_4x # no | |
1422 | bsr.l setoxm1i # yes | |
1423 | bra.b _L4_6x | |
1424 | _L4_4x: | |
1425 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1426 | bne.b _L4_5x # no | |
1427 | bsr.l src_qnan # yes | |
1428 | bra.b _L4_6x | |
1429 | _L4_5x: | |
1430 | bsr.l setoxm1d # operand is a DENORM | |
1431 | _L4_6x: | |
1432 | ||
1433 | # | |
1434 | # Result is now in FP0 | |
1435 | # | |
1436 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1437 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1438 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1439 | unlk %a6 | |
1440 | rts | |
1441 | ||
1442 | ||
1443 | ######################################################################### | |
1444 | # MONADIC TEMPLATE # | |
1445 | ######################################################################### | |
1446 | global _ftanhs_ | |
1447 | _ftanhs_: | |
1448 | link %a6,&-LOCAL_SIZE | |
1449 | ||
1450 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1451 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1452 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1453 | ||
1454 | fmov.l &0x0,%fpcr # zero FPCR | |
1455 | ||
1456 | # | |
1457 | # copy, convert, and tag input argument | |
1458 | # | |
1459 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
1460 | fmov.x %fp0,FP_SRC(%a6) | |
1461 | lea FP_SRC(%a6),%a0 | |
1462 | bsr.l tag # fetch operand type | |
1463 | mov.b %d0,STAG(%a6) | |
1464 | mov.b %d0,%d1 | |
1465 | ||
1466 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1467 | ||
1468 | clr.l %d0 | |
1469 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1470 | ||
1471 | tst.b %d1 | |
1472 | bne.b _L5_2s | |
1473 | bsr.l stanh # operand is a NORM | |
1474 | bra.b _L5_6s | |
1475 | _L5_2s: | |
1476 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1477 | bne.b _L5_3s # no | |
1478 | bsr.l src_zero # yes | |
1479 | bra.b _L5_6s | |
1480 | _L5_3s: | |
1481 | cmpi.b %d1,&INF # is operand an INF? | |
1482 | bne.b _L5_4s # no | |
1483 | bsr.l src_one # yes | |
1484 | bra.b _L5_6s | |
1485 | _L5_4s: | |
1486 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1487 | bne.b _L5_5s # no | |
1488 | bsr.l src_qnan # yes | |
1489 | bra.b _L5_6s | |
1490 | _L5_5s: | |
1491 | bsr.l stanhd # operand is a DENORM | |
1492 | _L5_6s: | |
1493 | ||
1494 | # | |
1495 | # Result is now in FP0 | |
1496 | # | |
1497 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1498 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1499 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1500 | unlk %a6 | |
1501 | rts | |
1502 | ||
1503 | global _ftanhd_ | |
1504 | _ftanhd_: | |
1505 | link %a6,&-LOCAL_SIZE | |
1506 | ||
1507 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1508 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1509 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1510 | ||
1511 | fmov.l &0x0,%fpcr # zero FPCR | |
1512 | ||
1513 | # | |
1514 | # copy, convert, and tag input argument | |
1515 | # | |
1516 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
1517 | fmov.x %fp0,FP_SRC(%a6) | |
1518 | lea FP_SRC(%a6),%a0 | |
1519 | bsr.l tag # fetch operand type | |
1520 | mov.b %d0,STAG(%a6) | |
1521 | mov.b %d0,%d1 | |
1522 | ||
1523 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1524 | ||
1525 | clr.l %d0 | |
1526 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1527 | ||
1528 | mov.b %d1,STAG(%a6) | |
1529 | tst.b %d1 | |
1530 | bne.b _L5_2d | |
1531 | bsr.l stanh # operand is a NORM | |
1532 | bra.b _L5_6d | |
1533 | _L5_2d: | |
1534 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1535 | bne.b _L5_3d # no | |
1536 | bsr.l src_zero # yes | |
1537 | bra.b _L5_6d | |
1538 | _L5_3d: | |
1539 | cmpi.b %d1,&INF # is operand an INF? | |
1540 | bne.b _L5_4d # no | |
1541 | bsr.l src_one # yes | |
1542 | bra.b _L5_6d | |
1543 | _L5_4d: | |
1544 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1545 | bne.b _L5_5d # no | |
1546 | bsr.l src_qnan # yes | |
1547 | bra.b _L5_6d | |
1548 | _L5_5d: | |
1549 | bsr.l stanhd # operand is a DENORM | |
1550 | _L5_6d: | |
1551 | ||
1552 | # | |
1553 | # Result is now in FP0 | |
1554 | # | |
1555 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1556 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1557 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1558 | unlk %a6 | |
1559 | rts | |
1560 | ||
1561 | global _ftanhx_ | |
1562 | _ftanhx_: | |
1563 | link %a6,&-LOCAL_SIZE | |
1564 | ||
1565 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1566 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1567 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1568 | ||
1569 | fmov.l &0x0,%fpcr # zero FPCR | |
1570 | ||
1571 | # | |
1572 | # copy, convert, and tag input argument | |
1573 | # | |
1574 | lea FP_SRC(%a6),%a0 | |
1575 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
1576 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
1577 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
1578 | bsr.l tag # fetch operand type | |
1579 | mov.b %d0,STAG(%a6) | |
1580 | mov.b %d0,%d1 | |
1581 | ||
1582 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1583 | ||
1584 | clr.l %d0 | |
1585 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1586 | ||
1587 | tst.b %d1 | |
1588 | bne.b _L5_2x | |
1589 | bsr.l stanh # operand is a NORM | |
1590 | bra.b _L5_6x | |
1591 | _L5_2x: | |
1592 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1593 | bne.b _L5_3x # no | |
1594 | bsr.l src_zero # yes | |
1595 | bra.b _L5_6x | |
1596 | _L5_3x: | |
1597 | cmpi.b %d1,&INF # is operand an INF? | |
1598 | bne.b _L5_4x # no | |
1599 | bsr.l src_one # yes | |
1600 | bra.b _L5_6x | |
1601 | _L5_4x: | |
1602 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1603 | bne.b _L5_5x # no | |
1604 | bsr.l src_qnan # yes | |
1605 | bra.b _L5_6x | |
1606 | _L5_5x: | |
1607 | bsr.l stanhd # operand is a DENORM | |
1608 | _L5_6x: | |
1609 | ||
1610 | # | |
1611 | # Result is now in FP0 | |
1612 | # | |
1613 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1614 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1615 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1616 | unlk %a6 | |
1617 | rts | |
1618 | ||
1619 | ||
1620 | ######################################################################### | |
1621 | # MONADIC TEMPLATE # | |
1622 | ######################################################################### | |
1623 | global _fatans_ | |
1624 | _fatans_: | |
1625 | link %a6,&-LOCAL_SIZE | |
1626 | ||
1627 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1628 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1629 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1630 | ||
1631 | fmov.l &0x0,%fpcr # zero FPCR | |
1632 | ||
1633 | # | |
1634 | # copy, convert, and tag input argument | |
1635 | # | |
1636 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
1637 | fmov.x %fp0,FP_SRC(%a6) | |
1638 | lea FP_SRC(%a6),%a0 | |
1639 | bsr.l tag # fetch operand type | |
1640 | mov.b %d0,STAG(%a6) | |
1641 | mov.b %d0,%d1 | |
1642 | ||
1643 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1644 | ||
1645 | clr.l %d0 | |
1646 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1647 | ||
1648 | tst.b %d1 | |
1649 | bne.b _L6_2s | |
1650 | bsr.l satan # operand is a NORM | |
1651 | bra.b _L6_6s | |
1652 | _L6_2s: | |
1653 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1654 | bne.b _L6_3s # no | |
1655 | bsr.l src_zero # yes | |
1656 | bra.b _L6_6s | |
1657 | _L6_3s: | |
1658 | cmpi.b %d1,&INF # is operand an INF? | |
1659 | bne.b _L6_4s # no | |
1660 | bsr.l spi_2 # yes | |
1661 | bra.b _L6_6s | |
1662 | _L6_4s: | |
1663 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1664 | bne.b _L6_5s # no | |
1665 | bsr.l src_qnan # yes | |
1666 | bra.b _L6_6s | |
1667 | _L6_5s: | |
1668 | bsr.l satand # operand is a DENORM | |
1669 | _L6_6s: | |
1670 | ||
1671 | # | |
1672 | # Result is now in FP0 | |
1673 | # | |
1674 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1675 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1676 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1677 | unlk %a6 | |
1678 | rts | |
1679 | ||
1680 | global _fatand_ | |
1681 | _fatand_: | |
1682 | link %a6,&-LOCAL_SIZE | |
1683 | ||
1684 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1685 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1686 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1687 | ||
1688 | fmov.l &0x0,%fpcr # zero FPCR | |
1689 | ||
1690 | # | |
1691 | # copy, convert, and tag input argument | |
1692 | # | |
1693 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
1694 | fmov.x %fp0,FP_SRC(%a6) | |
1695 | lea FP_SRC(%a6),%a0 | |
1696 | bsr.l tag # fetch operand type | |
1697 | mov.b %d0,STAG(%a6) | |
1698 | mov.b %d0,%d1 | |
1699 | ||
1700 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1701 | ||
1702 | clr.l %d0 | |
1703 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1704 | ||
1705 | mov.b %d1,STAG(%a6) | |
1706 | tst.b %d1 | |
1707 | bne.b _L6_2d | |
1708 | bsr.l satan # operand is a NORM | |
1709 | bra.b _L6_6d | |
1710 | _L6_2d: | |
1711 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1712 | bne.b _L6_3d # no | |
1713 | bsr.l src_zero # yes | |
1714 | bra.b _L6_6d | |
1715 | _L6_3d: | |
1716 | cmpi.b %d1,&INF # is operand an INF? | |
1717 | bne.b _L6_4d # no | |
1718 | bsr.l spi_2 # yes | |
1719 | bra.b _L6_6d | |
1720 | _L6_4d: | |
1721 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1722 | bne.b _L6_5d # no | |
1723 | bsr.l src_qnan # yes | |
1724 | bra.b _L6_6d | |
1725 | _L6_5d: | |
1726 | bsr.l satand # operand is a DENORM | |
1727 | _L6_6d: | |
1728 | ||
1729 | # | |
1730 | # Result is now in FP0 | |
1731 | # | |
1732 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1733 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1734 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1735 | unlk %a6 | |
1736 | rts | |
1737 | ||
1738 | global _fatanx_ | |
1739 | _fatanx_: | |
1740 | link %a6,&-LOCAL_SIZE | |
1741 | ||
1742 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1743 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1744 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1745 | ||
1746 | fmov.l &0x0,%fpcr # zero FPCR | |
1747 | ||
1748 | # | |
1749 | # copy, convert, and tag input argument | |
1750 | # | |
1751 | lea FP_SRC(%a6),%a0 | |
1752 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
1753 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
1754 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
1755 | bsr.l tag # fetch operand type | |
1756 | mov.b %d0,STAG(%a6) | |
1757 | mov.b %d0,%d1 | |
1758 | ||
1759 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1760 | ||
1761 | clr.l %d0 | |
1762 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1763 | ||
1764 | tst.b %d1 | |
1765 | bne.b _L6_2x | |
1766 | bsr.l satan # operand is a NORM | |
1767 | bra.b _L6_6x | |
1768 | _L6_2x: | |
1769 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1770 | bne.b _L6_3x # no | |
1771 | bsr.l src_zero # yes | |
1772 | bra.b _L6_6x | |
1773 | _L6_3x: | |
1774 | cmpi.b %d1,&INF # is operand an INF? | |
1775 | bne.b _L6_4x # no | |
1776 | bsr.l spi_2 # yes | |
1777 | bra.b _L6_6x | |
1778 | _L6_4x: | |
1779 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1780 | bne.b _L6_5x # no | |
1781 | bsr.l src_qnan # yes | |
1782 | bra.b _L6_6x | |
1783 | _L6_5x: | |
1784 | bsr.l satand # operand is a DENORM | |
1785 | _L6_6x: | |
1786 | ||
1787 | # | |
1788 | # Result is now in FP0 | |
1789 | # | |
1790 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1791 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1792 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1793 | unlk %a6 | |
1794 | rts | |
1795 | ||
1796 | ||
1797 | ######################################################################### | |
1798 | # MONADIC TEMPLATE # | |
1799 | ######################################################################### | |
1800 | global _fasins_ | |
1801 | _fasins_: | |
1802 | link %a6,&-LOCAL_SIZE | |
1803 | ||
1804 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1805 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1806 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1807 | ||
1808 | fmov.l &0x0,%fpcr # zero FPCR | |
1809 | ||
1810 | # | |
1811 | # copy, convert, and tag input argument | |
1812 | # | |
1813 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
1814 | fmov.x %fp0,FP_SRC(%a6) | |
1815 | lea FP_SRC(%a6),%a0 | |
1816 | bsr.l tag # fetch operand type | |
1817 | mov.b %d0,STAG(%a6) | |
1818 | mov.b %d0,%d1 | |
1819 | ||
1820 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1821 | ||
1822 | clr.l %d0 | |
1823 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1824 | ||
1825 | tst.b %d1 | |
1826 | bne.b _L7_2s | |
1827 | bsr.l sasin # operand is a NORM | |
1828 | bra.b _L7_6s | |
1829 | _L7_2s: | |
1830 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1831 | bne.b _L7_3s # no | |
1832 | bsr.l src_zero # yes | |
1833 | bra.b _L7_6s | |
1834 | _L7_3s: | |
1835 | cmpi.b %d1,&INF # is operand an INF? | |
1836 | bne.b _L7_4s # no | |
1837 | bsr.l t_operr # yes | |
1838 | bra.b _L7_6s | |
1839 | _L7_4s: | |
1840 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1841 | bne.b _L7_5s # no | |
1842 | bsr.l src_qnan # yes | |
1843 | bra.b _L7_6s | |
1844 | _L7_5s: | |
1845 | bsr.l sasind # operand is a DENORM | |
1846 | _L7_6s: | |
1847 | ||
1848 | # | |
1849 | # Result is now in FP0 | |
1850 | # | |
1851 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1852 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1853 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1854 | unlk %a6 | |
1855 | rts | |
1856 | ||
1857 | global _fasind_ | |
1858 | _fasind_: | |
1859 | link %a6,&-LOCAL_SIZE | |
1860 | ||
1861 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1862 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1863 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1864 | ||
1865 | fmov.l &0x0,%fpcr # zero FPCR | |
1866 | ||
1867 | # | |
1868 | # copy, convert, and tag input argument | |
1869 | # | |
1870 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
1871 | fmov.x %fp0,FP_SRC(%a6) | |
1872 | lea FP_SRC(%a6),%a0 | |
1873 | bsr.l tag # fetch operand type | |
1874 | mov.b %d0,STAG(%a6) | |
1875 | mov.b %d0,%d1 | |
1876 | ||
1877 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1878 | ||
1879 | clr.l %d0 | |
1880 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1881 | ||
1882 | mov.b %d1,STAG(%a6) | |
1883 | tst.b %d1 | |
1884 | bne.b _L7_2d | |
1885 | bsr.l sasin # operand is a NORM | |
1886 | bra.b _L7_6d | |
1887 | _L7_2d: | |
1888 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1889 | bne.b _L7_3d # no | |
1890 | bsr.l src_zero # yes | |
1891 | bra.b _L7_6d | |
1892 | _L7_3d: | |
1893 | cmpi.b %d1,&INF # is operand an INF? | |
1894 | bne.b _L7_4d # no | |
1895 | bsr.l t_operr # yes | |
1896 | bra.b _L7_6d | |
1897 | _L7_4d: | |
1898 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1899 | bne.b _L7_5d # no | |
1900 | bsr.l src_qnan # yes | |
1901 | bra.b _L7_6d | |
1902 | _L7_5d: | |
1903 | bsr.l sasind # operand is a DENORM | |
1904 | _L7_6d: | |
1905 | ||
1906 | # | |
1907 | # Result is now in FP0 | |
1908 | # | |
1909 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1910 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1911 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1912 | unlk %a6 | |
1913 | rts | |
1914 | ||
1915 | global _fasinx_ | |
1916 | _fasinx_: | |
1917 | link %a6,&-LOCAL_SIZE | |
1918 | ||
1919 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1920 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1921 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1922 | ||
1923 | fmov.l &0x0,%fpcr # zero FPCR | |
1924 | ||
1925 | # | |
1926 | # copy, convert, and tag input argument | |
1927 | # | |
1928 | lea FP_SRC(%a6),%a0 | |
1929 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
1930 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
1931 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
1932 | bsr.l tag # fetch operand type | |
1933 | mov.b %d0,STAG(%a6) | |
1934 | mov.b %d0,%d1 | |
1935 | ||
1936 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1937 | ||
1938 | clr.l %d0 | |
1939 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
1940 | ||
1941 | tst.b %d1 | |
1942 | bne.b _L7_2x | |
1943 | bsr.l sasin # operand is a NORM | |
1944 | bra.b _L7_6x | |
1945 | _L7_2x: | |
1946 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
1947 | bne.b _L7_3x # no | |
1948 | bsr.l src_zero # yes | |
1949 | bra.b _L7_6x | |
1950 | _L7_3x: | |
1951 | cmpi.b %d1,&INF # is operand an INF? | |
1952 | bne.b _L7_4x # no | |
1953 | bsr.l t_operr # yes | |
1954 | bra.b _L7_6x | |
1955 | _L7_4x: | |
1956 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
1957 | bne.b _L7_5x # no | |
1958 | bsr.l src_qnan # yes | |
1959 | bra.b _L7_6x | |
1960 | _L7_5x: | |
1961 | bsr.l sasind # operand is a DENORM | |
1962 | _L7_6x: | |
1963 | ||
1964 | # | |
1965 | # Result is now in FP0 | |
1966 | # | |
1967 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
1968 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
1969 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
1970 | unlk %a6 | |
1971 | rts | |
1972 | ||
1973 | ||
1974 | ######################################################################### | |
1975 | # MONADIC TEMPLATE # | |
1976 | ######################################################################### | |
1977 | global _fatanhs_ | |
1978 | _fatanhs_: | |
1979 | link %a6,&-LOCAL_SIZE | |
1980 | ||
1981 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
1982 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
1983 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
1984 | ||
1985 | fmov.l &0x0,%fpcr # zero FPCR | |
1986 | ||
1987 | # | |
1988 | # copy, convert, and tag input argument | |
1989 | # | |
1990 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
1991 | fmov.x %fp0,FP_SRC(%a6) | |
1992 | lea FP_SRC(%a6),%a0 | |
1993 | bsr.l tag # fetch operand type | |
1994 | mov.b %d0,STAG(%a6) | |
1995 | mov.b %d0,%d1 | |
1996 | ||
1997 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
1998 | ||
1999 | clr.l %d0 | |
2000 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2001 | ||
2002 | tst.b %d1 | |
2003 | bne.b _L8_2s | |
2004 | bsr.l satanh # operand is a NORM | |
2005 | bra.b _L8_6s | |
2006 | _L8_2s: | |
2007 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2008 | bne.b _L8_3s # no | |
2009 | bsr.l src_zero # yes | |
2010 | bra.b _L8_6s | |
2011 | _L8_3s: | |
2012 | cmpi.b %d1,&INF # is operand an INF? | |
2013 | bne.b _L8_4s # no | |
2014 | bsr.l t_operr # yes | |
2015 | bra.b _L8_6s | |
2016 | _L8_4s: | |
2017 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2018 | bne.b _L8_5s # no | |
2019 | bsr.l src_qnan # yes | |
2020 | bra.b _L8_6s | |
2021 | _L8_5s: | |
2022 | bsr.l satanhd # operand is a DENORM | |
2023 | _L8_6s: | |
2024 | ||
2025 | # | |
2026 | # Result is now in FP0 | |
2027 | # | |
2028 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2029 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2030 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2031 | unlk %a6 | |
2032 | rts | |
2033 | ||
2034 | global _fatanhd_ | |
2035 | _fatanhd_: | |
2036 | link %a6,&-LOCAL_SIZE | |
2037 | ||
2038 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2039 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2040 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2041 | ||
2042 | fmov.l &0x0,%fpcr # zero FPCR | |
2043 | ||
2044 | # | |
2045 | # copy, convert, and tag input argument | |
2046 | # | |
2047 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
2048 | fmov.x %fp0,FP_SRC(%a6) | |
2049 | lea FP_SRC(%a6),%a0 | |
2050 | bsr.l tag # fetch operand type | |
2051 | mov.b %d0,STAG(%a6) | |
2052 | mov.b %d0,%d1 | |
2053 | ||
2054 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2055 | ||
2056 | clr.l %d0 | |
2057 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2058 | ||
2059 | mov.b %d1,STAG(%a6) | |
2060 | tst.b %d1 | |
2061 | bne.b _L8_2d | |
2062 | bsr.l satanh # operand is a NORM | |
2063 | bra.b _L8_6d | |
2064 | _L8_2d: | |
2065 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2066 | bne.b _L8_3d # no | |
2067 | bsr.l src_zero # yes | |
2068 | bra.b _L8_6d | |
2069 | _L8_3d: | |
2070 | cmpi.b %d1,&INF # is operand an INF? | |
2071 | bne.b _L8_4d # no | |
2072 | bsr.l t_operr # yes | |
2073 | bra.b _L8_6d | |
2074 | _L8_4d: | |
2075 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2076 | bne.b _L8_5d # no | |
2077 | bsr.l src_qnan # yes | |
2078 | bra.b _L8_6d | |
2079 | _L8_5d: | |
2080 | bsr.l satanhd # operand is a DENORM | |
2081 | _L8_6d: | |
2082 | ||
2083 | # | |
2084 | # Result is now in FP0 | |
2085 | # | |
2086 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2087 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2088 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2089 | unlk %a6 | |
2090 | rts | |
2091 | ||
2092 | global _fatanhx_ | |
2093 | _fatanhx_: | |
2094 | link %a6,&-LOCAL_SIZE | |
2095 | ||
2096 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2097 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2098 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2099 | ||
2100 | fmov.l &0x0,%fpcr # zero FPCR | |
2101 | ||
2102 | # | |
2103 | # copy, convert, and tag input argument | |
2104 | # | |
2105 | lea FP_SRC(%a6),%a0 | |
2106 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
2107 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
2108 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
2109 | bsr.l tag # fetch operand type | |
2110 | mov.b %d0,STAG(%a6) | |
2111 | mov.b %d0,%d1 | |
2112 | ||
2113 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2114 | ||
2115 | clr.l %d0 | |
2116 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2117 | ||
2118 | tst.b %d1 | |
2119 | bne.b _L8_2x | |
2120 | bsr.l satanh # operand is a NORM | |
2121 | bra.b _L8_6x | |
2122 | _L8_2x: | |
2123 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2124 | bne.b _L8_3x # no | |
2125 | bsr.l src_zero # yes | |
2126 | bra.b _L8_6x | |
2127 | _L8_3x: | |
2128 | cmpi.b %d1,&INF # is operand an INF? | |
2129 | bne.b _L8_4x # no | |
2130 | bsr.l t_operr # yes | |
2131 | bra.b _L8_6x | |
2132 | _L8_4x: | |
2133 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2134 | bne.b _L8_5x # no | |
2135 | bsr.l src_qnan # yes | |
2136 | bra.b _L8_6x | |
2137 | _L8_5x: | |
2138 | bsr.l satanhd # operand is a DENORM | |
2139 | _L8_6x: | |
2140 | ||
2141 | # | |
2142 | # Result is now in FP0 | |
2143 | # | |
2144 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2145 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2146 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2147 | unlk %a6 | |
2148 | rts | |
2149 | ||
2150 | ||
2151 | ######################################################################### | |
2152 | # MONADIC TEMPLATE # | |
2153 | ######################################################################### | |
2154 | global _ftans_ | |
2155 | _ftans_: | |
2156 | link %a6,&-LOCAL_SIZE | |
2157 | ||
2158 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2159 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2160 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2161 | ||
2162 | fmov.l &0x0,%fpcr # zero FPCR | |
2163 | ||
2164 | # | |
2165 | # copy, convert, and tag input argument | |
2166 | # | |
2167 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
2168 | fmov.x %fp0,FP_SRC(%a6) | |
2169 | lea FP_SRC(%a6),%a0 | |
2170 | bsr.l tag # fetch operand type | |
2171 | mov.b %d0,STAG(%a6) | |
2172 | mov.b %d0,%d1 | |
2173 | ||
2174 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2175 | ||
2176 | clr.l %d0 | |
2177 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2178 | ||
2179 | tst.b %d1 | |
2180 | bne.b _L9_2s | |
2181 | bsr.l stan # operand is a NORM | |
2182 | bra.b _L9_6s | |
2183 | _L9_2s: | |
2184 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2185 | bne.b _L9_3s # no | |
2186 | bsr.l src_zero # yes | |
2187 | bra.b _L9_6s | |
2188 | _L9_3s: | |
2189 | cmpi.b %d1,&INF # is operand an INF? | |
2190 | bne.b _L9_4s # no | |
2191 | bsr.l t_operr # yes | |
2192 | bra.b _L9_6s | |
2193 | _L9_4s: | |
2194 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2195 | bne.b _L9_5s # no | |
2196 | bsr.l src_qnan # yes | |
2197 | bra.b _L9_6s | |
2198 | _L9_5s: | |
2199 | bsr.l stand # operand is a DENORM | |
2200 | _L9_6s: | |
2201 | ||
2202 | # | |
2203 | # Result is now in FP0 | |
2204 | # | |
2205 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2206 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2207 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2208 | unlk %a6 | |
2209 | rts | |
2210 | ||
2211 | global _ftand_ | |
2212 | _ftand_: | |
2213 | link %a6,&-LOCAL_SIZE | |
2214 | ||
2215 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2216 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2217 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2218 | ||
2219 | fmov.l &0x0,%fpcr # zero FPCR | |
2220 | ||
2221 | # | |
2222 | # copy, convert, and tag input argument | |
2223 | # | |
2224 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
2225 | fmov.x %fp0,FP_SRC(%a6) | |
2226 | lea FP_SRC(%a6),%a0 | |
2227 | bsr.l tag # fetch operand type | |
2228 | mov.b %d0,STAG(%a6) | |
2229 | mov.b %d0,%d1 | |
2230 | ||
2231 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2232 | ||
2233 | clr.l %d0 | |
2234 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2235 | ||
2236 | mov.b %d1,STAG(%a6) | |
2237 | tst.b %d1 | |
2238 | bne.b _L9_2d | |
2239 | bsr.l stan # operand is a NORM | |
2240 | bra.b _L9_6d | |
2241 | _L9_2d: | |
2242 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2243 | bne.b _L9_3d # no | |
2244 | bsr.l src_zero # yes | |
2245 | bra.b _L9_6d | |
2246 | _L9_3d: | |
2247 | cmpi.b %d1,&INF # is operand an INF? | |
2248 | bne.b _L9_4d # no | |
2249 | bsr.l t_operr # yes | |
2250 | bra.b _L9_6d | |
2251 | _L9_4d: | |
2252 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2253 | bne.b _L9_5d # no | |
2254 | bsr.l src_qnan # yes | |
2255 | bra.b _L9_6d | |
2256 | _L9_5d: | |
2257 | bsr.l stand # operand is a DENORM | |
2258 | _L9_6d: | |
2259 | ||
2260 | # | |
2261 | # Result is now in FP0 | |
2262 | # | |
2263 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2264 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2265 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2266 | unlk %a6 | |
2267 | rts | |
2268 | ||
2269 | global _ftanx_ | |
2270 | _ftanx_: | |
2271 | link %a6,&-LOCAL_SIZE | |
2272 | ||
2273 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2274 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2275 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2276 | ||
2277 | fmov.l &0x0,%fpcr # zero FPCR | |
2278 | ||
2279 | # | |
2280 | # copy, convert, and tag input argument | |
2281 | # | |
2282 | lea FP_SRC(%a6),%a0 | |
2283 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
2284 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
2285 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
2286 | bsr.l tag # fetch operand type | |
2287 | mov.b %d0,STAG(%a6) | |
2288 | mov.b %d0,%d1 | |
2289 | ||
2290 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2291 | ||
2292 | clr.l %d0 | |
2293 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2294 | ||
2295 | tst.b %d1 | |
2296 | bne.b _L9_2x | |
2297 | bsr.l stan # operand is a NORM | |
2298 | bra.b _L9_6x | |
2299 | _L9_2x: | |
2300 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2301 | bne.b _L9_3x # no | |
2302 | bsr.l src_zero # yes | |
2303 | bra.b _L9_6x | |
2304 | _L9_3x: | |
2305 | cmpi.b %d1,&INF # is operand an INF? | |
2306 | bne.b _L9_4x # no | |
2307 | bsr.l t_operr # yes | |
2308 | bra.b _L9_6x | |
2309 | _L9_4x: | |
2310 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2311 | bne.b _L9_5x # no | |
2312 | bsr.l src_qnan # yes | |
2313 | bra.b _L9_6x | |
2314 | _L9_5x: | |
2315 | bsr.l stand # operand is a DENORM | |
2316 | _L9_6x: | |
2317 | ||
2318 | # | |
2319 | # Result is now in FP0 | |
2320 | # | |
2321 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2322 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2323 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2324 | unlk %a6 | |
2325 | rts | |
2326 | ||
2327 | ||
2328 | ######################################################################### | |
2329 | # MONADIC TEMPLATE # | |
2330 | ######################################################################### | |
2331 | global _fetoxs_ | |
2332 | _fetoxs_: | |
2333 | link %a6,&-LOCAL_SIZE | |
2334 | ||
2335 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2336 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2337 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2338 | ||
2339 | fmov.l &0x0,%fpcr # zero FPCR | |
2340 | ||
2341 | # | |
2342 | # copy, convert, and tag input argument | |
2343 | # | |
2344 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
2345 | fmov.x %fp0,FP_SRC(%a6) | |
2346 | lea FP_SRC(%a6),%a0 | |
2347 | bsr.l tag # fetch operand type | |
2348 | mov.b %d0,STAG(%a6) | |
2349 | mov.b %d0,%d1 | |
2350 | ||
2351 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2352 | ||
2353 | clr.l %d0 | |
2354 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2355 | ||
2356 | tst.b %d1 | |
2357 | bne.b _L10_2s | |
2358 | bsr.l setox # operand is a NORM | |
2359 | bra.b _L10_6s | |
2360 | _L10_2s: | |
2361 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2362 | bne.b _L10_3s # no | |
2363 | bsr.l ld_pone # yes | |
2364 | bra.b _L10_6s | |
2365 | _L10_3s: | |
2366 | cmpi.b %d1,&INF # is operand an INF? | |
2367 | bne.b _L10_4s # no | |
2368 | bsr.l szr_inf # yes | |
2369 | bra.b _L10_6s | |
2370 | _L10_4s: | |
2371 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2372 | bne.b _L10_5s # no | |
2373 | bsr.l src_qnan # yes | |
2374 | bra.b _L10_6s | |
2375 | _L10_5s: | |
2376 | bsr.l setoxd # operand is a DENORM | |
2377 | _L10_6s: | |
2378 | ||
2379 | # | |
2380 | # Result is now in FP0 | |
2381 | # | |
2382 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2383 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2384 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2385 | unlk %a6 | |
2386 | rts | |
2387 | ||
2388 | global _fetoxd_ | |
2389 | _fetoxd_: | |
2390 | link %a6,&-LOCAL_SIZE | |
2391 | ||
2392 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2393 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2394 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2395 | ||
2396 | fmov.l &0x0,%fpcr # zero FPCR | |
2397 | ||
2398 | # | |
2399 | # copy, convert, and tag input argument | |
2400 | # | |
2401 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
2402 | fmov.x %fp0,FP_SRC(%a6) | |
2403 | lea FP_SRC(%a6),%a0 | |
2404 | bsr.l tag # fetch operand type | |
2405 | mov.b %d0,STAG(%a6) | |
2406 | mov.b %d0,%d1 | |
2407 | ||
2408 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2409 | ||
2410 | clr.l %d0 | |
2411 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2412 | ||
2413 | mov.b %d1,STAG(%a6) | |
2414 | tst.b %d1 | |
2415 | bne.b _L10_2d | |
2416 | bsr.l setox # operand is a NORM | |
2417 | bra.b _L10_6d | |
2418 | _L10_2d: | |
2419 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2420 | bne.b _L10_3d # no | |
2421 | bsr.l ld_pone # yes | |
2422 | bra.b _L10_6d | |
2423 | _L10_3d: | |
2424 | cmpi.b %d1,&INF # is operand an INF? | |
2425 | bne.b _L10_4d # no | |
2426 | bsr.l szr_inf # yes | |
2427 | bra.b _L10_6d | |
2428 | _L10_4d: | |
2429 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2430 | bne.b _L10_5d # no | |
2431 | bsr.l src_qnan # yes | |
2432 | bra.b _L10_6d | |
2433 | _L10_5d: | |
2434 | bsr.l setoxd # operand is a DENORM | |
2435 | _L10_6d: | |
2436 | ||
2437 | # | |
2438 | # Result is now in FP0 | |
2439 | # | |
2440 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2441 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2442 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2443 | unlk %a6 | |
2444 | rts | |
2445 | ||
2446 | global _fetoxx_ | |
2447 | _fetoxx_: | |
2448 | link %a6,&-LOCAL_SIZE | |
2449 | ||
2450 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2451 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2452 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2453 | ||
2454 | fmov.l &0x0,%fpcr # zero FPCR | |
2455 | ||
2456 | # | |
2457 | # copy, convert, and tag input argument | |
2458 | # | |
2459 | lea FP_SRC(%a6),%a0 | |
2460 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
2461 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
2462 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
2463 | bsr.l tag # fetch operand type | |
2464 | mov.b %d0,STAG(%a6) | |
2465 | mov.b %d0,%d1 | |
2466 | ||
2467 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2468 | ||
2469 | clr.l %d0 | |
2470 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2471 | ||
2472 | tst.b %d1 | |
2473 | bne.b _L10_2x | |
2474 | bsr.l setox # operand is a NORM | |
2475 | bra.b _L10_6x | |
2476 | _L10_2x: | |
2477 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2478 | bne.b _L10_3x # no | |
2479 | bsr.l ld_pone # yes | |
2480 | bra.b _L10_6x | |
2481 | _L10_3x: | |
2482 | cmpi.b %d1,&INF # is operand an INF? | |
2483 | bne.b _L10_4x # no | |
2484 | bsr.l szr_inf # yes | |
2485 | bra.b _L10_6x | |
2486 | _L10_4x: | |
2487 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2488 | bne.b _L10_5x # no | |
2489 | bsr.l src_qnan # yes | |
2490 | bra.b _L10_6x | |
2491 | _L10_5x: | |
2492 | bsr.l setoxd # operand is a DENORM | |
2493 | _L10_6x: | |
2494 | ||
2495 | # | |
2496 | # Result is now in FP0 | |
2497 | # | |
2498 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2499 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2500 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2501 | unlk %a6 | |
2502 | rts | |
2503 | ||
2504 | ||
2505 | ######################################################################### | |
2506 | # MONADIC TEMPLATE # | |
2507 | ######################################################################### | |
2508 | global _ftwotoxs_ | |
2509 | _ftwotoxs_: | |
2510 | link %a6,&-LOCAL_SIZE | |
2511 | ||
2512 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2513 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2514 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2515 | ||
2516 | fmov.l &0x0,%fpcr # zero FPCR | |
2517 | ||
2518 | # | |
2519 | # copy, convert, and tag input argument | |
2520 | # | |
2521 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
2522 | fmov.x %fp0,FP_SRC(%a6) | |
2523 | lea FP_SRC(%a6),%a0 | |
2524 | bsr.l tag # fetch operand type | |
2525 | mov.b %d0,STAG(%a6) | |
2526 | mov.b %d0,%d1 | |
2527 | ||
2528 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2529 | ||
2530 | clr.l %d0 | |
2531 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2532 | ||
2533 | tst.b %d1 | |
2534 | bne.b _L11_2s | |
2535 | bsr.l stwotox # operand is a NORM | |
2536 | bra.b _L11_6s | |
2537 | _L11_2s: | |
2538 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2539 | bne.b _L11_3s # no | |
2540 | bsr.l ld_pone # yes | |
2541 | bra.b _L11_6s | |
2542 | _L11_3s: | |
2543 | cmpi.b %d1,&INF # is operand an INF? | |
2544 | bne.b _L11_4s # no | |
2545 | bsr.l szr_inf # yes | |
2546 | bra.b _L11_6s | |
2547 | _L11_4s: | |
2548 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2549 | bne.b _L11_5s # no | |
2550 | bsr.l src_qnan # yes | |
2551 | bra.b _L11_6s | |
2552 | _L11_5s: | |
2553 | bsr.l stwotoxd # operand is a DENORM | |
2554 | _L11_6s: | |
2555 | ||
2556 | # | |
2557 | # Result is now in FP0 | |
2558 | # | |
2559 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2560 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2561 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2562 | unlk %a6 | |
2563 | rts | |
2564 | ||
2565 | global _ftwotoxd_ | |
2566 | _ftwotoxd_: | |
2567 | link %a6,&-LOCAL_SIZE | |
2568 | ||
2569 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2570 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2571 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2572 | ||
2573 | fmov.l &0x0,%fpcr # zero FPCR | |
2574 | ||
2575 | # | |
2576 | # copy, convert, and tag input argument | |
2577 | # | |
2578 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
2579 | fmov.x %fp0,FP_SRC(%a6) | |
2580 | lea FP_SRC(%a6),%a0 | |
2581 | bsr.l tag # fetch operand type | |
2582 | mov.b %d0,STAG(%a6) | |
2583 | mov.b %d0,%d1 | |
2584 | ||
2585 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2586 | ||
2587 | clr.l %d0 | |
2588 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2589 | ||
2590 | mov.b %d1,STAG(%a6) | |
2591 | tst.b %d1 | |
2592 | bne.b _L11_2d | |
2593 | bsr.l stwotox # operand is a NORM | |
2594 | bra.b _L11_6d | |
2595 | _L11_2d: | |
2596 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2597 | bne.b _L11_3d # no | |
2598 | bsr.l ld_pone # yes | |
2599 | bra.b _L11_6d | |
2600 | _L11_3d: | |
2601 | cmpi.b %d1,&INF # is operand an INF? | |
2602 | bne.b _L11_4d # no | |
2603 | bsr.l szr_inf # yes | |
2604 | bra.b _L11_6d | |
2605 | _L11_4d: | |
2606 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2607 | bne.b _L11_5d # no | |
2608 | bsr.l src_qnan # yes | |
2609 | bra.b _L11_6d | |
2610 | _L11_5d: | |
2611 | bsr.l stwotoxd # operand is a DENORM | |
2612 | _L11_6d: | |
2613 | ||
2614 | # | |
2615 | # Result is now in FP0 | |
2616 | # | |
2617 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2618 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2619 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2620 | unlk %a6 | |
2621 | rts | |
2622 | ||
2623 | global _ftwotoxx_ | |
2624 | _ftwotoxx_: | |
2625 | link %a6,&-LOCAL_SIZE | |
2626 | ||
2627 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2628 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2629 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2630 | ||
2631 | fmov.l &0x0,%fpcr # zero FPCR | |
2632 | ||
2633 | # | |
2634 | # copy, convert, and tag input argument | |
2635 | # | |
2636 | lea FP_SRC(%a6),%a0 | |
2637 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
2638 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
2639 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
2640 | bsr.l tag # fetch operand type | |
2641 | mov.b %d0,STAG(%a6) | |
2642 | mov.b %d0,%d1 | |
2643 | ||
2644 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2645 | ||
2646 | clr.l %d0 | |
2647 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2648 | ||
2649 | tst.b %d1 | |
2650 | bne.b _L11_2x | |
2651 | bsr.l stwotox # operand is a NORM | |
2652 | bra.b _L11_6x | |
2653 | _L11_2x: | |
2654 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2655 | bne.b _L11_3x # no | |
2656 | bsr.l ld_pone # yes | |
2657 | bra.b _L11_6x | |
2658 | _L11_3x: | |
2659 | cmpi.b %d1,&INF # is operand an INF? | |
2660 | bne.b _L11_4x # no | |
2661 | bsr.l szr_inf # yes | |
2662 | bra.b _L11_6x | |
2663 | _L11_4x: | |
2664 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2665 | bne.b _L11_5x # no | |
2666 | bsr.l src_qnan # yes | |
2667 | bra.b _L11_6x | |
2668 | _L11_5x: | |
2669 | bsr.l stwotoxd # operand is a DENORM | |
2670 | _L11_6x: | |
2671 | ||
2672 | # | |
2673 | # Result is now in FP0 | |
2674 | # | |
2675 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2676 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2677 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2678 | unlk %a6 | |
2679 | rts | |
2680 | ||
2681 | ||
2682 | ######################################################################### | |
2683 | # MONADIC TEMPLATE # | |
2684 | ######################################################################### | |
2685 | global _ftentoxs_ | |
2686 | _ftentoxs_: | |
2687 | link %a6,&-LOCAL_SIZE | |
2688 | ||
2689 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2690 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2691 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2692 | ||
2693 | fmov.l &0x0,%fpcr # zero FPCR | |
2694 | ||
2695 | # | |
2696 | # copy, convert, and tag input argument | |
2697 | # | |
2698 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
2699 | fmov.x %fp0,FP_SRC(%a6) | |
2700 | lea FP_SRC(%a6),%a0 | |
2701 | bsr.l tag # fetch operand type | |
2702 | mov.b %d0,STAG(%a6) | |
2703 | mov.b %d0,%d1 | |
2704 | ||
2705 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2706 | ||
2707 | clr.l %d0 | |
2708 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2709 | ||
2710 | tst.b %d1 | |
2711 | bne.b _L12_2s | |
2712 | bsr.l stentox # operand is a NORM | |
2713 | bra.b _L12_6s | |
2714 | _L12_2s: | |
2715 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2716 | bne.b _L12_3s # no | |
2717 | bsr.l ld_pone # yes | |
2718 | bra.b _L12_6s | |
2719 | _L12_3s: | |
2720 | cmpi.b %d1,&INF # is operand an INF? | |
2721 | bne.b _L12_4s # no | |
2722 | bsr.l szr_inf # yes | |
2723 | bra.b _L12_6s | |
2724 | _L12_4s: | |
2725 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2726 | bne.b _L12_5s # no | |
2727 | bsr.l src_qnan # yes | |
2728 | bra.b _L12_6s | |
2729 | _L12_5s: | |
2730 | bsr.l stentoxd # operand is a DENORM | |
2731 | _L12_6s: | |
2732 | ||
2733 | # | |
2734 | # Result is now in FP0 | |
2735 | # | |
2736 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2737 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2738 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2739 | unlk %a6 | |
2740 | rts | |
2741 | ||
2742 | global _ftentoxd_ | |
2743 | _ftentoxd_: | |
2744 | link %a6,&-LOCAL_SIZE | |
2745 | ||
2746 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2747 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2748 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2749 | ||
2750 | fmov.l &0x0,%fpcr # zero FPCR | |
2751 | ||
2752 | # | |
2753 | # copy, convert, and tag input argument | |
2754 | # | |
2755 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
2756 | fmov.x %fp0,FP_SRC(%a6) | |
2757 | lea FP_SRC(%a6),%a0 | |
2758 | bsr.l tag # fetch operand type | |
2759 | mov.b %d0,STAG(%a6) | |
2760 | mov.b %d0,%d1 | |
2761 | ||
2762 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2763 | ||
2764 | clr.l %d0 | |
2765 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2766 | ||
2767 | mov.b %d1,STAG(%a6) | |
2768 | tst.b %d1 | |
2769 | bne.b _L12_2d | |
2770 | bsr.l stentox # operand is a NORM | |
2771 | bra.b _L12_6d | |
2772 | _L12_2d: | |
2773 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2774 | bne.b _L12_3d # no | |
2775 | bsr.l ld_pone # yes | |
2776 | bra.b _L12_6d | |
2777 | _L12_3d: | |
2778 | cmpi.b %d1,&INF # is operand an INF? | |
2779 | bne.b _L12_4d # no | |
2780 | bsr.l szr_inf # yes | |
2781 | bra.b _L12_6d | |
2782 | _L12_4d: | |
2783 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2784 | bne.b _L12_5d # no | |
2785 | bsr.l src_qnan # yes | |
2786 | bra.b _L12_6d | |
2787 | _L12_5d: | |
2788 | bsr.l stentoxd # operand is a DENORM | |
2789 | _L12_6d: | |
2790 | ||
2791 | # | |
2792 | # Result is now in FP0 | |
2793 | # | |
2794 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2795 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2796 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2797 | unlk %a6 | |
2798 | rts | |
2799 | ||
2800 | global _ftentoxx_ | |
2801 | _ftentoxx_: | |
2802 | link %a6,&-LOCAL_SIZE | |
2803 | ||
2804 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2805 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2806 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2807 | ||
2808 | fmov.l &0x0,%fpcr # zero FPCR | |
2809 | ||
2810 | # | |
2811 | # copy, convert, and tag input argument | |
2812 | # | |
2813 | lea FP_SRC(%a6),%a0 | |
2814 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
2815 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
2816 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
2817 | bsr.l tag # fetch operand type | |
2818 | mov.b %d0,STAG(%a6) | |
2819 | mov.b %d0,%d1 | |
2820 | ||
2821 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2822 | ||
2823 | clr.l %d0 | |
2824 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2825 | ||
2826 | tst.b %d1 | |
2827 | bne.b _L12_2x | |
2828 | bsr.l stentox # operand is a NORM | |
2829 | bra.b _L12_6x | |
2830 | _L12_2x: | |
2831 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2832 | bne.b _L12_3x # no | |
2833 | bsr.l ld_pone # yes | |
2834 | bra.b _L12_6x | |
2835 | _L12_3x: | |
2836 | cmpi.b %d1,&INF # is operand an INF? | |
2837 | bne.b _L12_4x # no | |
2838 | bsr.l szr_inf # yes | |
2839 | bra.b _L12_6x | |
2840 | _L12_4x: | |
2841 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2842 | bne.b _L12_5x # no | |
2843 | bsr.l src_qnan # yes | |
2844 | bra.b _L12_6x | |
2845 | _L12_5x: | |
2846 | bsr.l stentoxd # operand is a DENORM | |
2847 | _L12_6x: | |
2848 | ||
2849 | # | |
2850 | # Result is now in FP0 | |
2851 | # | |
2852 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2853 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2854 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2855 | unlk %a6 | |
2856 | rts | |
2857 | ||
2858 | ||
2859 | ######################################################################### | |
2860 | # MONADIC TEMPLATE # | |
2861 | ######################################################################### | |
2862 | global _flogns_ | |
2863 | _flogns_: | |
2864 | link %a6,&-LOCAL_SIZE | |
2865 | ||
2866 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2867 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2868 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2869 | ||
2870 | fmov.l &0x0,%fpcr # zero FPCR | |
2871 | ||
2872 | # | |
2873 | # copy, convert, and tag input argument | |
2874 | # | |
2875 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
2876 | fmov.x %fp0,FP_SRC(%a6) | |
2877 | lea FP_SRC(%a6),%a0 | |
2878 | bsr.l tag # fetch operand type | |
2879 | mov.b %d0,STAG(%a6) | |
2880 | mov.b %d0,%d1 | |
2881 | ||
2882 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2883 | ||
2884 | clr.l %d0 | |
2885 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2886 | ||
2887 | tst.b %d1 | |
2888 | bne.b _L13_2s | |
2889 | bsr.l slogn # operand is a NORM | |
2890 | bra.b _L13_6s | |
2891 | _L13_2s: | |
2892 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2893 | bne.b _L13_3s # no | |
2894 | bsr.l t_dz2 # yes | |
2895 | bra.b _L13_6s | |
2896 | _L13_3s: | |
2897 | cmpi.b %d1,&INF # is operand an INF? | |
2898 | bne.b _L13_4s # no | |
2899 | bsr.l sopr_inf # yes | |
2900 | bra.b _L13_6s | |
2901 | _L13_4s: | |
2902 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2903 | bne.b _L13_5s # no | |
2904 | bsr.l src_qnan # yes | |
2905 | bra.b _L13_6s | |
2906 | _L13_5s: | |
2907 | bsr.l slognd # operand is a DENORM | |
2908 | _L13_6s: | |
2909 | ||
2910 | # | |
2911 | # Result is now in FP0 | |
2912 | # | |
2913 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2914 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2915 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2916 | unlk %a6 | |
2917 | rts | |
2918 | ||
2919 | global _flognd_ | |
2920 | _flognd_: | |
2921 | link %a6,&-LOCAL_SIZE | |
2922 | ||
2923 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2924 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2925 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2926 | ||
2927 | fmov.l &0x0,%fpcr # zero FPCR | |
2928 | ||
2929 | # | |
2930 | # copy, convert, and tag input argument | |
2931 | # | |
2932 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
2933 | fmov.x %fp0,FP_SRC(%a6) | |
2934 | lea FP_SRC(%a6),%a0 | |
2935 | bsr.l tag # fetch operand type | |
2936 | mov.b %d0,STAG(%a6) | |
2937 | mov.b %d0,%d1 | |
2938 | ||
2939 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2940 | ||
2941 | clr.l %d0 | |
2942 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
2943 | ||
2944 | mov.b %d1,STAG(%a6) | |
2945 | tst.b %d1 | |
2946 | bne.b _L13_2d | |
2947 | bsr.l slogn # operand is a NORM | |
2948 | bra.b _L13_6d | |
2949 | _L13_2d: | |
2950 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
2951 | bne.b _L13_3d # no | |
2952 | bsr.l t_dz2 # yes | |
2953 | bra.b _L13_6d | |
2954 | _L13_3d: | |
2955 | cmpi.b %d1,&INF # is operand an INF? | |
2956 | bne.b _L13_4d # no | |
2957 | bsr.l sopr_inf # yes | |
2958 | bra.b _L13_6d | |
2959 | _L13_4d: | |
2960 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
2961 | bne.b _L13_5d # no | |
2962 | bsr.l src_qnan # yes | |
2963 | bra.b _L13_6d | |
2964 | _L13_5d: | |
2965 | bsr.l slognd # operand is a DENORM | |
2966 | _L13_6d: | |
2967 | ||
2968 | # | |
2969 | # Result is now in FP0 | |
2970 | # | |
2971 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
2972 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
2973 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
2974 | unlk %a6 | |
2975 | rts | |
2976 | ||
2977 | global _flognx_ | |
2978 | _flognx_: | |
2979 | link %a6,&-LOCAL_SIZE | |
2980 | ||
2981 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
2982 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
2983 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
2984 | ||
2985 | fmov.l &0x0,%fpcr # zero FPCR | |
2986 | ||
2987 | # | |
2988 | # copy, convert, and tag input argument | |
2989 | # | |
2990 | lea FP_SRC(%a6),%a0 | |
2991 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
2992 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
2993 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
2994 | bsr.l tag # fetch operand type | |
2995 | mov.b %d0,STAG(%a6) | |
2996 | mov.b %d0,%d1 | |
2997 | ||
2998 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
2999 | ||
3000 | clr.l %d0 | |
3001 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3002 | ||
3003 | tst.b %d1 | |
3004 | bne.b _L13_2x | |
3005 | bsr.l slogn # operand is a NORM | |
3006 | bra.b _L13_6x | |
3007 | _L13_2x: | |
3008 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3009 | bne.b _L13_3x # no | |
3010 | bsr.l t_dz2 # yes | |
3011 | bra.b _L13_6x | |
3012 | _L13_3x: | |
3013 | cmpi.b %d1,&INF # is operand an INF? | |
3014 | bne.b _L13_4x # no | |
3015 | bsr.l sopr_inf # yes | |
3016 | bra.b _L13_6x | |
3017 | _L13_4x: | |
3018 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3019 | bne.b _L13_5x # no | |
3020 | bsr.l src_qnan # yes | |
3021 | bra.b _L13_6x | |
3022 | _L13_5x: | |
3023 | bsr.l slognd # operand is a DENORM | |
3024 | _L13_6x: | |
3025 | ||
3026 | # | |
3027 | # Result is now in FP0 | |
3028 | # | |
3029 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3030 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3031 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3032 | unlk %a6 | |
3033 | rts | |
3034 | ||
3035 | ||
3036 | ######################################################################### | |
3037 | # MONADIC TEMPLATE # | |
3038 | ######################################################################### | |
3039 | global _flog10s_ | |
3040 | _flog10s_: | |
3041 | link %a6,&-LOCAL_SIZE | |
3042 | ||
3043 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3044 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3045 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3046 | ||
3047 | fmov.l &0x0,%fpcr # zero FPCR | |
3048 | ||
3049 | # | |
3050 | # copy, convert, and tag input argument | |
3051 | # | |
3052 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
3053 | fmov.x %fp0,FP_SRC(%a6) | |
3054 | lea FP_SRC(%a6),%a0 | |
3055 | bsr.l tag # fetch operand type | |
3056 | mov.b %d0,STAG(%a6) | |
3057 | mov.b %d0,%d1 | |
3058 | ||
3059 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3060 | ||
3061 | clr.l %d0 | |
3062 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3063 | ||
3064 | tst.b %d1 | |
3065 | bne.b _L14_2s | |
3066 | bsr.l slog10 # operand is a NORM | |
3067 | bra.b _L14_6s | |
3068 | _L14_2s: | |
3069 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3070 | bne.b _L14_3s # no | |
3071 | bsr.l t_dz2 # yes | |
3072 | bra.b _L14_6s | |
3073 | _L14_3s: | |
3074 | cmpi.b %d1,&INF # is operand an INF? | |
3075 | bne.b _L14_4s # no | |
3076 | bsr.l sopr_inf # yes | |
3077 | bra.b _L14_6s | |
3078 | _L14_4s: | |
3079 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3080 | bne.b _L14_5s # no | |
3081 | bsr.l src_qnan # yes | |
3082 | bra.b _L14_6s | |
3083 | _L14_5s: | |
3084 | bsr.l slog10d # operand is a DENORM | |
3085 | _L14_6s: | |
3086 | ||
3087 | # | |
3088 | # Result is now in FP0 | |
3089 | # | |
3090 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3091 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3092 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3093 | unlk %a6 | |
3094 | rts | |
3095 | ||
3096 | global _flog10d_ | |
3097 | _flog10d_: | |
3098 | link %a6,&-LOCAL_SIZE | |
3099 | ||
3100 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3101 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3102 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3103 | ||
3104 | fmov.l &0x0,%fpcr # zero FPCR | |
3105 | ||
3106 | # | |
3107 | # copy, convert, and tag input argument | |
3108 | # | |
3109 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
3110 | fmov.x %fp0,FP_SRC(%a6) | |
3111 | lea FP_SRC(%a6),%a0 | |
3112 | bsr.l tag # fetch operand type | |
3113 | mov.b %d0,STAG(%a6) | |
3114 | mov.b %d0,%d1 | |
3115 | ||
3116 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3117 | ||
3118 | clr.l %d0 | |
3119 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3120 | ||
3121 | mov.b %d1,STAG(%a6) | |
3122 | tst.b %d1 | |
3123 | bne.b _L14_2d | |
3124 | bsr.l slog10 # operand is a NORM | |
3125 | bra.b _L14_6d | |
3126 | _L14_2d: | |
3127 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3128 | bne.b _L14_3d # no | |
3129 | bsr.l t_dz2 # yes | |
3130 | bra.b _L14_6d | |
3131 | _L14_3d: | |
3132 | cmpi.b %d1,&INF # is operand an INF? | |
3133 | bne.b _L14_4d # no | |
3134 | bsr.l sopr_inf # yes | |
3135 | bra.b _L14_6d | |
3136 | _L14_4d: | |
3137 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3138 | bne.b _L14_5d # no | |
3139 | bsr.l src_qnan # yes | |
3140 | bra.b _L14_6d | |
3141 | _L14_5d: | |
3142 | bsr.l slog10d # operand is a DENORM | |
3143 | _L14_6d: | |
3144 | ||
3145 | # | |
3146 | # Result is now in FP0 | |
3147 | # | |
3148 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3149 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3150 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3151 | unlk %a6 | |
3152 | rts | |
3153 | ||
3154 | global _flog10x_ | |
3155 | _flog10x_: | |
3156 | link %a6,&-LOCAL_SIZE | |
3157 | ||
3158 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3159 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3160 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3161 | ||
3162 | fmov.l &0x0,%fpcr # zero FPCR | |
3163 | ||
3164 | # | |
3165 | # copy, convert, and tag input argument | |
3166 | # | |
3167 | lea FP_SRC(%a6),%a0 | |
3168 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
3169 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
3170 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
3171 | bsr.l tag # fetch operand type | |
3172 | mov.b %d0,STAG(%a6) | |
3173 | mov.b %d0,%d1 | |
3174 | ||
3175 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3176 | ||
3177 | clr.l %d0 | |
3178 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3179 | ||
3180 | tst.b %d1 | |
3181 | bne.b _L14_2x | |
3182 | bsr.l slog10 # operand is a NORM | |
3183 | bra.b _L14_6x | |
3184 | _L14_2x: | |
3185 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3186 | bne.b _L14_3x # no | |
3187 | bsr.l t_dz2 # yes | |
3188 | bra.b _L14_6x | |
3189 | _L14_3x: | |
3190 | cmpi.b %d1,&INF # is operand an INF? | |
3191 | bne.b _L14_4x # no | |
3192 | bsr.l sopr_inf # yes | |
3193 | bra.b _L14_6x | |
3194 | _L14_4x: | |
3195 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3196 | bne.b _L14_5x # no | |
3197 | bsr.l src_qnan # yes | |
3198 | bra.b _L14_6x | |
3199 | _L14_5x: | |
3200 | bsr.l slog10d # operand is a DENORM | |
3201 | _L14_6x: | |
3202 | ||
3203 | # | |
3204 | # Result is now in FP0 | |
3205 | # | |
3206 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3207 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3208 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3209 | unlk %a6 | |
3210 | rts | |
3211 | ||
3212 | ||
3213 | ######################################################################### | |
3214 | # MONADIC TEMPLATE # | |
3215 | ######################################################################### | |
3216 | global _flog2s_ | |
3217 | _flog2s_: | |
3218 | link %a6,&-LOCAL_SIZE | |
3219 | ||
3220 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3221 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3222 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3223 | ||
3224 | fmov.l &0x0,%fpcr # zero FPCR | |
3225 | ||
3226 | # | |
3227 | # copy, convert, and tag input argument | |
3228 | # | |
3229 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
3230 | fmov.x %fp0,FP_SRC(%a6) | |
3231 | lea FP_SRC(%a6),%a0 | |
3232 | bsr.l tag # fetch operand type | |
3233 | mov.b %d0,STAG(%a6) | |
3234 | mov.b %d0,%d1 | |
3235 | ||
3236 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3237 | ||
3238 | clr.l %d0 | |
3239 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3240 | ||
3241 | tst.b %d1 | |
3242 | bne.b _L15_2s | |
3243 | bsr.l slog2 # operand is a NORM | |
3244 | bra.b _L15_6s | |
3245 | _L15_2s: | |
3246 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3247 | bne.b _L15_3s # no | |
3248 | bsr.l t_dz2 # yes | |
3249 | bra.b _L15_6s | |
3250 | _L15_3s: | |
3251 | cmpi.b %d1,&INF # is operand an INF? | |
3252 | bne.b _L15_4s # no | |
3253 | bsr.l sopr_inf # yes | |
3254 | bra.b _L15_6s | |
3255 | _L15_4s: | |
3256 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3257 | bne.b _L15_5s # no | |
3258 | bsr.l src_qnan # yes | |
3259 | bra.b _L15_6s | |
3260 | _L15_5s: | |
3261 | bsr.l slog2d # operand is a DENORM | |
3262 | _L15_6s: | |
3263 | ||
3264 | # | |
3265 | # Result is now in FP0 | |
3266 | # | |
3267 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3268 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3269 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3270 | unlk %a6 | |
3271 | rts | |
3272 | ||
3273 | global _flog2d_ | |
3274 | _flog2d_: | |
3275 | link %a6,&-LOCAL_SIZE | |
3276 | ||
3277 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3278 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3279 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3280 | ||
3281 | fmov.l &0x0,%fpcr # zero FPCR | |
3282 | ||
3283 | # | |
3284 | # copy, convert, and tag input argument | |
3285 | # | |
3286 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
3287 | fmov.x %fp0,FP_SRC(%a6) | |
3288 | lea FP_SRC(%a6),%a0 | |
3289 | bsr.l tag # fetch operand type | |
3290 | mov.b %d0,STAG(%a6) | |
3291 | mov.b %d0,%d1 | |
3292 | ||
3293 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3294 | ||
3295 | clr.l %d0 | |
3296 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3297 | ||
3298 | mov.b %d1,STAG(%a6) | |
3299 | tst.b %d1 | |
3300 | bne.b _L15_2d | |
3301 | bsr.l slog2 # operand is a NORM | |
3302 | bra.b _L15_6d | |
3303 | _L15_2d: | |
3304 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3305 | bne.b _L15_3d # no | |
3306 | bsr.l t_dz2 # yes | |
3307 | bra.b _L15_6d | |
3308 | _L15_3d: | |
3309 | cmpi.b %d1,&INF # is operand an INF? | |
3310 | bne.b _L15_4d # no | |
3311 | bsr.l sopr_inf # yes | |
3312 | bra.b _L15_6d | |
3313 | _L15_4d: | |
3314 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3315 | bne.b _L15_5d # no | |
3316 | bsr.l src_qnan # yes | |
3317 | bra.b _L15_6d | |
3318 | _L15_5d: | |
3319 | bsr.l slog2d # operand is a DENORM | |
3320 | _L15_6d: | |
3321 | ||
3322 | # | |
3323 | # Result is now in FP0 | |
3324 | # | |
3325 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3326 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3327 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3328 | unlk %a6 | |
3329 | rts | |
3330 | ||
3331 | global _flog2x_ | |
3332 | _flog2x_: | |
3333 | link %a6,&-LOCAL_SIZE | |
3334 | ||
3335 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3336 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3337 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3338 | ||
3339 | fmov.l &0x0,%fpcr # zero FPCR | |
3340 | ||
3341 | # | |
3342 | # copy, convert, and tag input argument | |
3343 | # | |
3344 | lea FP_SRC(%a6),%a0 | |
3345 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
3346 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
3347 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
3348 | bsr.l tag # fetch operand type | |
3349 | mov.b %d0,STAG(%a6) | |
3350 | mov.b %d0,%d1 | |
3351 | ||
3352 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3353 | ||
3354 | clr.l %d0 | |
3355 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3356 | ||
3357 | tst.b %d1 | |
3358 | bne.b _L15_2x | |
3359 | bsr.l slog2 # operand is a NORM | |
3360 | bra.b _L15_6x | |
3361 | _L15_2x: | |
3362 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3363 | bne.b _L15_3x # no | |
3364 | bsr.l t_dz2 # yes | |
3365 | bra.b _L15_6x | |
3366 | _L15_3x: | |
3367 | cmpi.b %d1,&INF # is operand an INF? | |
3368 | bne.b _L15_4x # no | |
3369 | bsr.l sopr_inf # yes | |
3370 | bra.b _L15_6x | |
3371 | _L15_4x: | |
3372 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3373 | bne.b _L15_5x # no | |
3374 | bsr.l src_qnan # yes | |
3375 | bra.b _L15_6x | |
3376 | _L15_5x: | |
3377 | bsr.l slog2d # operand is a DENORM | |
3378 | _L15_6x: | |
3379 | ||
3380 | # | |
3381 | # Result is now in FP0 | |
3382 | # | |
3383 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3384 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3385 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3386 | unlk %a6 | |
3387 | rts | |
3388 | ||
3389 | ||
3390 | ######################################################################### | |
3391 | # MONADIC TEMPLATE # | |
3392 | ######################################################################### | |
3393 | global _fcoshs_ | |
3394 | _fcoshs_: | |
3395 | link %a6,&-LOCAL_SIZE | |
3396 | ||
3397 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3398 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3399 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3400 | ||
3401 | fmov.l &0x0,%fpcr # zero FPCR | |
3402 | ||
3403 | # | |
3404 | # copy, convert, and tag input argument | |
3405 | # | |
3406 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
3407 | fmov.x %fp0,FP_SRC(%a6) | |
3408 | lea FP_SRC(%a6),%a0 | |
3409 | bsr.l tag # fetch operand type | |
3410 | mov.b %d0,STAG(%a6) | |
3411 | mov.b %d0,%d1 | |
3412 | ||
3413 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3414 | ||
3415 | clr.l %d0 | |
3416 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3417 | ||
3418 | tst.b %d1 | |
3419 | bne.b _L16_2s | |
3420 | bsr.l scosh # operand is a NORM | |
3421 | bra.b _L16_6s | |
3422 | _L16_2s: | |
3423 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3424 | bne.b _L16_3s # no | |
3425 | bsr.l ld_pone # yes | |
3426 | bra.b _L16_6s | |
3427 | _L16_3s: | |
3428 | cmpi.b %d1,&INF # is operand an INF? | |
3429 | bne.b _L16_4s # no | |
3430 | bsr.l ld_pinf # yes | |
3431 | bra.b _L16_6s | |
3432 | _L16_4s: | |
3433 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3434 | bne.b _L16_5s # no | |
3435 | bsr.l src_qnan # yes | |
3436 | bra.b _L16_6s | |
3437 | _L16_5s: | |
3438 | bsr.l scoshd # operand is a DENORM | |
3439 | _L16_6s: | |
3440 | ||
3441 | # | |
3442 | # Result is now in FP0 | |
3443 | # | |
3444 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3445 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3446 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3447 | unlk %a6 | |
3448 | rts | |
3449 | ||
3450 | global _fcoshd_ | |
3451 | _fcoshd_: | |
3452 | link %a6,&-LOCAL_SIZE | |
3453 | ||
3454 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3455 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3456 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3457 | ||
3458 | fmov.l &0x0,%fpcr # zero FPCR | |
3459 | ||
3460 | # | |
3461 | # copy, convert, and tag input argument | |
3462 | # | |
3463 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
3464 | fmov.x %fp0,FP_SRC(%a6) | |
3465 | lea FP_SRC(%a6),%a0 | |
3466 | bsr.l tag # fetch operand type | |
3467 | mov.b %d0,STAG(%a6) | |
3468 | mov.b %d0,%d1 | |
3469 | ||
3470 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3471 | ||
3472 | clr.l %d0 | |
3473 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3474 | ||
3475 | mov.b %d1,STAG(%a6) | |
3476 | tst.b %d1 | |
3477 | bne.b _L16_2d | |
3478 | bsr.l scosh # operand is a NORM | |
3479 | bra.b _L16_6d | |
3480 | _L16_2d: | |
3481 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3482 | bne.b _L16_3d # no | |
3483 | bsr.l ld_pone # yes | |
3484 | bra.b _L16_6d | |
3485 | _L16_3d: | |
3486 | cmpi.b %d1,&INF # is operand an INF? | |
3487 | bne.b _L16_4d # no | |
3488 | bsr.l ld_pinf # yes | |
3489 | bra.b _L16_6d | |
3490 | _L16_4d: | |
3491 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3492 | bne.b _L16_5d # no | |
3493 | bsr.l src_qnan # yes | |
3494 | bra.b _L16_6d | |
3495 | _L16_5d: | |
3496 | bsr.l scoshd # operand is a DENORM | |
3497 | _L16_6d: | |
3498 | ||
3499 | # | |
3500 | # Result is now in FP0 | |
3501 | # | |
3502 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3503 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3504 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3505 | unlk %a6 | |
3506 | rts | |
3507 | ||
3508 | global _fcoshx_ | |
3509 | _fcoshx_: | |
3510 | link %a6,&-LOCAL_SIZE | |
3511 | ||
3512 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3513 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3514 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3515 | ||
3516 | fmov.l &0x0,%fpcr # zero FPCR | |
3517 | ||
3518 | # | |
3519 | # copy, convert, and tag input argument | |
3520 | # | |
3521 | lea FP_SRC(%a6),%a0 | |
3522 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
3523 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
3524 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
3525 | bsr.l tag # fetch operand type | |
3526 | mov.b %d0,STAG(%a6) | |
3527 | mov.b %d0,%d1 | |
3528 | ||
3529 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3530 | ||
3531 | clr.l %d0 | |
3532 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3533 | ||
3534 | tst.b %d1 | |
3535 | bne.b _L16_2x | |
3536 | bsr.l scosh # operand is a NORM | |
3537 | bra.b _L16_6x | |
3538 | _L16_2x: | |
3539 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3540 | bne.b _L16_3x # no | |
3541 | bsr.l ld_pone # yes | |
3542 | bra.b _L16_6x | |
3543 | _L16_3x: | |
3544 | cmpi.b %d1,&INF # is operand an INF? | |
3545 | bne.b _L16_4x # no | |
3546 | bsr.l ld_pinf # yes | |
3547 | bra.b _L16_6x | |
3548 | _L16_4x: | |
3549 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3550 | bne.b _L16_5x # no | |
3551 | bsr.l src_qnan # yes | |
3552 | bra.b _L16_6x | |
3553 | _L16_5x: | |
3554 | bsr.l scoshd # operand is a DENORM | |
3555 | _L16_6x: | |
3556 | ||
3557 | # | |
3558 | # Result is now in FP0 | |
3559 | # | |
3560 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3561 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3562 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3563 | unlk %a6 | |
3564 | rts | |
3565 | ||
3566 | ||
3567 | ######################################################################### | |
3568 | # MONADIC TEMPLATE # | |
3569 | ######################################################################### | |
3570 | global _facoss_ | |
3571 | _facoss_: | |
3572 | link %a6,&-LOCAL_SIZE | |
3573 | ||
3574 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3575 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3576 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3577 | ||
3578 | fmov.l &0x0,%fpcr # zero FPCR | |
3579 | ||
3580 | # | |
3581 | # copy, convert, and tag input argument | |
3582 | # | |
3583 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
3584 | fmov.x %fp0,FP_SRC(%a6) | |
3585 | lea FP_SRC(%a6),%a0 | |
3586 | bsr.l tag # fetch operand type | |
3587 | mov.b %d0,STAG(%a6) | |
3588 | mov.b %d0,%d1 | |
3589 | ||
3590 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3591 | ||
3592 | clr.l %d0 | |
3593 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3594 | ||
3595 | tst.b %d1 | |
3596 | bne.b _L17_2s | |
3597 | bsr.l sacos # operand is a NORM | |
3598 | bra.b _L17_6s | |
3599 | _L17_2s: | |
3600 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3601 | bne.b _L17_3s # no | |
3602 | bsr.l ld_ppi2 # yes | |
3603 | bra.b _L17_6s | |
3604 | _L17_3s: | |
3605 | cmpi.b %d1,&INF # is operand an INF? | |
3606 | bne.b _L17_4s # no | |
3607 | bsr.l t_operr # yes | |
3608 | bra.b _L17_6s | |
3609 | _L17_4s: | |
3610 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3611 | bne.b _L17_5s # no | |
3612 | bsr.l src_qnan # yes | |
3613 | bra.b _L17_6s | |
3614 | _L17_5s: | |
3615 | bsr.l sacosd # operand is a DENORM | |
3616 | _L17_6s: | |
3617 | ||
3618 | # | |
3619 | # Result is now in FP0 | |
3620 | # | |
3621 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3622 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3623 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3624 | unlk %a6 | |
3625 | rts | |
3626 | ||
3627 | global _facosd_ | |
3628 | _facosd_: | |
3629 | link %a6,&-LOCAL_SIZE | |
3630 | ||
3631 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3632 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3633 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3634 | ||
3635 | fmov.l &0x0,%fpcr # zero FPCR | |
3636 | ||
3637 | # | |
3638 | # copy, convert, and tag input argument | |
3639 | # | |
3640 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
3641 | fmov.x %fp0,FP_SRC(%a6) | |
3642 | lea FP_SRC(%a6),%a0 | |
3643 | bsr.l tag # fetch operand type | |
3644 | mov.b %d0,STAG(%a6) | |
3645 | mov.b %d0,%d1 | |
3646 | ||
3647 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3648 | ||
3649 | clr.l %d0 | |
3650 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3651 | ||
3652 | mov.b %d1,STAG(%a6) | |
3653 | tst.b %d1 | |
3654 | bne.b _L17_2d | |
3655 | bsr.l sacos # operand is a NORM | |
3656 | bra.b _L17_6d | |
3657 | _L17_2d: | |
3658 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3659 | bne.b _L17_3d # no | |
3660 | bsr.l ld_ppi2 # yes | |
3661 | bra.b _L17_6d | |
3662 | _L17_3d: | |
3663 | cmpi.b %d1,&INF # is operand an INF? | |
3664 | bne.b _L17_4d # no | |
3665 | bsr.l t_operr # yes | |
3666 | bra.b _L17_6d | |
3667 | _L17_4d: | |
3668 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3669 | bne.b _L17_5d # no | |
3670 | bsr.l src_qnan # yes | |
3671 | bra.b _L17_6d | |
3672 | _L17_5d: | |
3673 | bsr.l sacosd # operand is a DENORM | |
3674 | _L17_6d: | |
3675 | ||
3676 | # | |
3677 | # Result is now in FP0 | |
3678 | # | |
3679 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3680 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3681 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3682 | unlk %a6 | |
3683 | rts | |
3684 | ||
3685 | global _facosx_ | |
3686 | _facosx_: | |
3687 | link %a6,&-LOCAL_SIZE | |
3688 | ||
3689 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3690 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3691 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3692 | ||
3693 | fmov.l &0x0,%fpcr # zero FPCR | |
3694 | ||
3695 | # | |
3696 | # copy, convert, and tag input argument | |
3697 | # | |
3698 | lea FP_SRC(%a6),%a0 | |
3699 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
3700 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
3701 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
3702 | bsr.l tag # fetch operand type | |
3703 | mov.b %d0,STAG(%a6) | |
3704 | mov.b %d0,%d1 | |
3705 | ||
3706 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3707 | ||
3708 | clr.l %d0 | |
3709 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3710 | ||
3711 | tst.b %d1 | |
3712 | bne.b _L17_2x | |
3713 | bsr.l sacos # operand is a NORM | |
3714 | bra.b _L17_6x | |
3715 | _L17_2x: | |
3716 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3717 | bne.b _L17_3x # no | |
3718 | bsr.l ld_ppi2 # yes | |
3719 | bra.b _L17_6x | |
3720 | _L17_3x: | |
3721 | cmpi.b %d1,&INF # is operand an INF? | |
3722 | bne.b _L17_4x # no | |
3723 | bsr.l t_operr # yes | |
3724 | bra.b _L17_6x | |
3725 | _L17_4x: | |
3726 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3727 | bne.b _L17_5x # no | |
3728 | bsr.l src_qnan # yes | |
3729 | bra.b _L17_6x | |
3730 | _L17_5x: | |
3731 | bsr.l sacosd # operand is a DENORM | |
3732 | _L17_6x: | |
3733 | ||
3734 | # | |
3735 | # Result is now in FP0 | |
3736 | # | |
3737 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3738 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3739 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3740 | unlk %a6 | |
3741 | rts | |
3742 | ||
3743 | ||
3744 | ######################################################################### | |
3745 | # MONADIC TEMPLATE # | |
3746 | ######################################################################### | |
3747 | global _fgetexps_ | |
3748 | _fgetexps_: | |
3749 | link %a6,&-LOCAL_SIZE | |
3750 | ||
3751 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3752 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3753 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3754 | ||
3755 | fmov.l &0x0,%fpcr # zero FPCR | |
3756 | ||
3757 | # | |
3758 | # copy, convert, and tag input argument | |
3759 | # | |
3760 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
3761 | fmov.x %fp0,FP_SRC(%a6) | |
3762 | lea FP_SRC(%a6),%a0 | |
3763 | bsr.l tag # fetch operand type | |
3764 | mov.b %d0,STAG(%a6) | |
3765 | mov.b %d0,%d1 | |
3766 | ||
3767 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3768 | ||
3769 | clr.l %d0 | |
3770 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3771 | ||
3772 | tst.b %d1 | |
3773 | bne.b _L18_2s | |
3774 | bsr.l sgetexp # operand is a NORM | |
3775 | bra.b _L18_6s | |
3776 | _L18_2s: | |
3777 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3778 | bne.b _L18_3s # no | |
3779 | bsr.l src_zero # yes | |
3780 | bra.b _L18_6s | |
3781 | _L18_3s: | |
3782 | cmpi.b %d1,&INF # is operand an INF? | |
3783 | bne.b _L18_4s # no | |
3784 | bsr.l t_operr # yes | |
3785 | bra.b _L18_6s | |
3786 | _L18_4s: | |
3787 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3788 | bne.b _L18_5s # no | |
3789 | bsr.l src_qnan # yes | |
3790 | bra.b _L18_6s | |
3791 | _L18_5s: | |
3792 | bsr.l sgetexpd # operand is a DENORM | |
3793 | _L18_6s: | |
3794 | ||
3795 | # | |
3796 | # Result is now in FP0 | |
3797 | # | |
3798 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3799 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3800 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3801 | unlk %a6 | |
3802 | rts | |
3803 | ||
3804 | global _fgetexpd_ | |
3805 | _fgetexpd_: | |
3806 | link %a6,&-LOCAL_SIZE | |
3807 | ||
3808 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3809 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3810 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3811 | ||
3812 | fmov.l &0x0,%fpcr # zero FPCR | |
3813 | ||
3814 | # | |
3815 | # copy, convert, and tag input argument | |
3816 | # | |
3817 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
3818 | fmov.x %fp0,FP_SRC(%a6) | |
3819 | lea FP_SRC(%a6),%a0 | |
3820 | bsr.l tag # fetch operand type | |
3821 | mov.b %d0,STAG(%a6) | |
3822 | mov.b %d0,%d1 | |
3823 | ||
3824 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3825 | ||
3826 | clr.l %d0 | |
3827 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3828 | ||
3829 | mov.b %d1,STAG(%a6) | |
3830 | tst.b %d1 | |
3831 | bne.b _L18_2d | |
3832 | bsr.l sgetexp # operand is a NORM | |
3833 | bra.b _L18_6d | |
3834 | _L18_2d: | |
3835 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3836 | bne.b _L18_3d # no | |
3837 | bsr.l src_zero # yes | |
3838 | bra.b _L18_6d | |
3839 | _L18_3d: | |
3840 | cmpi.b %d1,&INF # is operand an INF? | |
3841 | bne.b _L18_4d # no | |
3842 | bsr.l t_operr # yes | |
3843 | bra.b _L18_6d | |
3844 | _L18_4d: | |
3845 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3846 | bne.b _L18_5d # no | |
3847 | bsr.l src_qnan # yes | |
3848 | bra.b _L18_6d | |
3849 | _L18_5d: | |
3850 | bsr.l sgetexpd # operand is a DENORM | |
3851 | _L18_6d: | |
3852 | ||
3853 | # | |
3854 | # Result is now in FP0 | |
3855 | # | |
3856 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3857 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3858 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3859 | unlk %a6 | |
3860 | rts | |
3861 | ||
3862 | global _fgetexpx_ | |
3863 | _fgetexpx_: | |
3864 | link %a6,&-LOCAL_SIZE | |
3865 | ||
3866 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3867 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3868 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3869 | ||
3870 | fmov.l &0x0,%fpcr # zero FPCR | |
3871 | ||
3872 | # | |
3873 | # copy, convert, and tag input argument | |
3874 | # | |
3875 | lea FP_SRC(%a6),%a0 | |
3876 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
3877 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
3878 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
3879 | bsr.l tag # fetch operand type | |
3880 | mov.b %d0,STAG(%a6) | |
3881 | mov.b %d0,%d1 | |
3882 | ||
3883 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3884 | ||
3885 | clr.l %d0 | |
3886 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3887 | ||
3888 | tst.b %d1 | |
3889 | bne.b _L18_2x | |
3890 | bsr.l sgetexp # operand is a NORM | |
3891 | bra.b _L18_6x | |
3892 | _L18_2x: | |
3893 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3894 | bne.b _L18_3x # no | |
3895 | bsr.l src_zero # yes | |
3896 | bra.b _L18_6x | |
3897 | _L18_3x: | |
3898 | cmpi.b %d1,&INF # is operand an INF? | |
3899 | bne.b _L18_4x # no | |
3900 | bsr.l t_operr # yes | |
3901 | bra.b _L18_6x | |
3902 | _L18_4x: | |
3903 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3904 | bne.b _L18_5x # no | |
3905 | bsr.l src_qnan # yes | |
3906 | bra.b _L18_6x | |
3907 | _L18_5x: | |
3908 | bsr.l sgetexpd # operand is a DENORM | |
3909 | _L18_6x: | |
3910 | ||
3911 | # | |
3912 | # Result is now in FP0 | |
3913 | # | |
3914 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3915 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3916 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3917 | unlk %a6 | |
3918 | rts | |
3919 | ||
3920 | ||
3921 | ######################################################################### | |
3922 | # MONADIC TEMPLATE # | |
3923 | ######################################################################### | |
3924 | global _fgetmans_ | |
3925 | _fgetmans_: | |
3926 | link %a6,&-LOCAL_SIZE | |
3927 | ||
3928 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3929 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3930 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3931 | ||
3932 | fmov.l &0x0,%fpcr # zero FPCR | |
3933 | ||
3934 | # | |
3935 | # copy, convert, and tag input argument | |
3936 | # | |
3937 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
3938 | fmov.x %fp0,FP_SRC(%a6) | |
3939 | lea FP_SRC(%a6),%a0 | |
3940 | bsr.l tag # fetch operand type | |
3941 | mov.b %d0,STAG(%a6) | |
3942 | mov.b %d0,%d1 | |
3943 | ||
3944 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
3945 | ||
3946 | clr.l %d0 | |
3947 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
3948 | ||
3949 | tst.b %d1 | |
3950 | bne.b _L19_2s | |
3951 | bsr.l sgetman # operand is a NORM | |
3952 | bra.b _L19_6s | |
3953 | _L19_2s: | |
3954 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
3955 | bne.b _L19_3s # no | |
3956 | bsr.l src_zero # yes | |
3957 | bra.b _L19_6s | |
3958 | _L19_3s: | |
3959 | cmpi.b %d1,&INF # is operand an INF? | |
3960 | bne.b _L19_4s # no | |
3961 | bsr.l t_operr # yes | |
3962 | bra.b _L19_6s | |
3963 | _L19_4s: | |
3964 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
3965 | bne.b _L19_5s # no | |
3966 | bsr.l src_qnan # yes | |
3967 | bra.b _L19_6s | |
3968 | _L19_5s: | |
3969 | bsr.l sgetmand # operand is a DENORM | |
3970 | _L19_6s: | |
3971 | ||
3972 | # | |
3973 | # Result is now in FP0 | |
3974 | # | |
3975 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
3976 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
3977 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
3978 | unlk %a6 | |
3979 | rts | |
3980 | ||
3981 | global _fgetmand_ | |
3982 | _fgetmand_: | |
3983 | link %a6,&-LOCAL_SIZE | |
3984 | ||
3985 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
3986 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
3987 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
3988 | ||
3989 | fmov.l &0x0,%fpcr # zero FPCR | |
3990 | ||
3991 | # | |
3992 | # copy, convert, and tag input argument | |
3993 | # | |
3994 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
3995 | fmov.x %fp0,FP_SRC(%a6) | |
3996 | lea FP_SRC(%a6),%a0 | |
3997 | bsr.l tag # fetch operand type | |
3998 | mov.b %d0,STAG(%a6) | |
3999 | mov.b %d0,%d1 | |
4000 | ||
4001 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4002 | ||
4003 | clr.l %d0 | |
4004 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4005 | ||
4006 | mov.b %d1,STAG(%a6) | |
4007 | tst.b %d1 | |
4008 | bne.b _L19_2d | |
4009 | bsr.l sgetman # operand is a NORM | |
4010 | bra.b _L19_6d | |
4011 | _L19_2d: | |
4012 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4013 | bne.b _L19_3d # no | |
4014 | bsr.l src_zero # yes | |
4015 | bra.b _L19_6d | |
4016 | _L19_3d: | |
4017 | cmpi.b %d1,&INF # is operand an INF? | |
4018 | bne.b _L19_4d # no | |
4019 | bsr.l t_operr # yes | |
4020 | bra.b _L19_6d | |
4021 | _L19_4d: | |
4022 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4023 | bne.b _L19_5d # no | |
4024 | bsr.l src_qnan # yes | |
4025 | bra.b _L19_6d | |
4026 | _L19_5d: | |
4027 | bsr.l sgetmand # operand is a DENORM | |
4028 | _L19_6d: | |
4029 | ||
4030 | # | |
4031 | # Result is now in FP0 | |
4032 | # | |
4033 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4034 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4035 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4036 | unlk %a6 | |
4037 | rts | |
4038 | ||
4039 | global _fgetmanx_ | |
4040 | _fgetmanx_: | |
4041 | link %a6,&-LOCAL_SIZE | |
4042 | ||
4043 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4044 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4045 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4046 | ||
4047 | fmov.l &0x0,%fpcr # zero FPCR | |
4048 | ||
4049 | # | |
4050 | # copy, convert, and tag input argument | |
4051 | # | |
4052 | lea FP_SRC(%a6),%a0 | |
4053 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
4054 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
4055 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
4056 | bsr.l tag # fetch operand type | |
4057 | mov.b %d0,STAG(%a6) | |
4058 | mov.b %d0,%d1 | |
4059 | ||
4060 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4061 | ||
4062 | clr.l %d0 | |
4063 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4064 | ||
4065 | tst.b %d1 | |
4066 | bne.b _L19_2x | |
4067 | bsr.l sgetman # operand is a NORM | |
4068 | bra.b _L19_6x | |
4069 | _L19_2x: | |
4070 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4071 | bne.b _L19_3x # no | |
4072 | bsr.l src_zero # yes | |
4073 | bra.b _L19_6x | |
4074 | _L19_3x: | |
4075 | cmpi.b %d1,&INF # is operand an INF? | |
4076 | bne.b _L19_4x # no | |
4077 | bsr.l t_operr # yes | |
4078 | bra.b _L19_6x | |
4079 | _L19_4x: | |
4080 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4081 | bne.b _L19_5x # no | |
4082 | bsr.l src_qnan # yes | |
4083 | bra.b _L19_6x | |
4084 | _L19_5x: | |
4085 | bsr.l sgetmand # operand is a DENORM | |
4086 | _L19_6x: | |
4087 | ||
4088 | # | |
4089 | # Result is now in FP0 | |
4090 | # | |
4091 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4092 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4093 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4094 | unlk %a6 | |
4095 | rts | |
4096 | ||
4097 | ||
4098 | ######################################################################### | |
4099 | # MONADIC TEMPLATE # | |
4100 | ######################################################################### | |
4101 | global _fsincoss_ | |
4102 | _fsincoss_: | |
4103 | link %a6,&-LOCAL_SIZE | |
4104 | ||
4105 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4106 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4107 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4108 | ||
4109 | fmov.l &0x0,%fpcr # zero FPCR | |
4110 | ||
4111 | # | |
4112 | # copy, convert, and tag input argument | |
4113 | # | |
4114 | fmov.s 0x8(%a6),%fp0 # load sgl input | |
4115 | fmov.x %fp0,FP_SRC(%a6) | |
4116 | lea FP_SRC(%a6),%a0 | |
4117 | bsr.l tag # fetch operand type | |
4118 | mov.b %d0,STAG(%a6) | |
4119 | mov.b %d0,%d1 | |
4120 | ||
4121 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4122 | ||
4123 | clr.l %d0 | |
4124 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4125 | ||
4126 | tst.b %d1 | |
4127 | bne.b _L20_2s | |
4128 | bsr.l ssincos # operand is a NORM | |
4129 | bra.b _L20_6s | |
4130 | _L20_2s: | |
4131 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4132 | bne.b _L20_3s # no | |
4133 | bsr.l ssincosz # yes | |
4134 | bra.b _L20_6s | |
4135 | _L20_3s: | |
4136 | cmpi.b %d1,&INF # is operand an INF? | |
4137 | bne.b _L20_4s # no | |
4138 | bsr.l ssincosi # yes | |
4139 | bra.b _L20_6s | |
4140 | _L20_4s: | |
4141 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4142 | bne.b _L20_5s # no | |
4143 | bsr.l ssincosqnan # yes | |
4144 | bra.b _L20_6s | |
4145 | _L20_5s: | |
4146 | bsr.l ssincosd # operand is a DENORM | |
4147 | _L20_6s: | |
4148 | ||
4149 | # | |
4150 | # Result is now in FP0 | |
4151 | # | |
4152 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4153 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4154 | fmovm.x &0x03,-(%sp) # store off fp0/fp1 | |
4155 | fmovm.x (%sp)+,&0x40 # fp0 now in fp1 | |
4156 | fmovm.x (%sp)+,&0x80 # fp1 now in fp0 | |
4157 | unlk %a6 | |
4158 | rts | |
4159 | ||
4160 | global _fsincosd_ | |
4161 | _fsincosd_: | |
4162 | link %a6,&-LOCAL_SIZE | |
4163 | ||
4164 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4165 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4166 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4167 | ||
4168 | fmov.l &0x0,%fpcr # zero FPCR | |
4169 | ||
4170 | # | |
4171 | # copy, convert, and tag input argument | |
4172 | # | |
4173 | fmov.d 0x8(%a6),%fp0 # load dbl input | |
4174 | fmov.x %fp0,FP_SRC(%a6) | |
4175 | lea FP_SRC(%a6),%a0 | |
4176 | bsr.l tag # fetch operand type | |
4177 | mov.b %d0,STAG(%a6) | |
4178 | mov.b %d0,%d1 | |
4179 | ||
4180 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4181 | ||
4182 | clr.l %d0 | |
4183 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4184 | ||
4185 | mov.b %d1,STAG(%a6) | |
4186 | tst.b %d1 | |
4187 | bne.b _L20_2d | |
4188 | bsr.l ssincos # operand is a NORM | |
4189 | bra.b _L20_6d | |
4190 | _L20_2d: | |
4191 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4192 | bne.b _L20_3d # no | |
4193 | bsr.l ssincosz # yes | |
4194 | bra.b _L20_6d | |
4195 | _L20_3d: | |
4196 | cmpi.b %d1,&INF # is operand an INF? | |
4197 | bne.b _L20_4d # no | |
4198 | bsr.l ssincosi # yes | |
4199 | bra.b _L20_6d | |
4200 | _L20_4d: | |
4201 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4202 | bne.b _L20_5d # no | |
4203 | bsr.l ssincosqnan # yes | |
4204 | bra.b _L20_6d | |
4205 | _L20_5d: | |
4206 | bsr.l ssincosd # operand is a DENORM | |
4207 | _L20_6d: | |
4208 | ||
4209 | # | |
4210 | # Result is now in FP0 | |
4211 | # | |
4212 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4213 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4214 | fmovm.x &0x03,-(%sp) # store off fp0/fp1 | |
4215 | fmovm.x (%sp)+,&0x40 # fp0 now in fp1 | |
4216 | fmovm.x (%sp)+,&0x80 # fp1 now in fp0 | |
4217 | unlk %a6 | |
4218 | rts | |
4219 | ||
4220 | global _fsincosx_ | |
4221 | _fsincosx_: | |
4222 | link %a6,&-LOCAL_SIZE | |
4223 | ||
4224 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4225 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4226 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4227 | ||
4228 | fmov.l &0x0,%fpcr # zero FPCR | |
4229 | ||
4230 | # | |
4231 | # copy, convert, and tag input argument | |
4232 | # | |
4233 | lea FP_SRC(%a6),%a0 | |
4234 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext input | |
4235 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
4236 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
4237 | bsr.l tag # fetch operand type | |
4238 | mov.b %d0,STAG(%a6) | |
4239 | mov.b %d0,%d1 | |
4240 | ||
4241 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4242 | ||
4243 | clr.l %d0 | |
4244 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4245 | ||
4246 | tst.b %d1 | |
4247 | bne.b _L20_2x | |
4248 | bsr.l ssincos # operand is a NORM | |
4249 | bra.b _L20_6x | |
4250 | _L20_2x: | |
4251 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4252 | bne.b _L20_3x # no | |
4253 | bsr.l ssincosz # yes | |
4254 | bra.b _L20_6x | |
4255 | _L20_3x: | |
4256 | cmpi.b %d1,&INF # is operand an INF? | |
4257 | bne.b _L20_4x # no | |
4258 | bsr.l ssincosi # yes | |
4259 | bra.b _L20_6x | |
4260 | _L20_4x: | |
4261 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4262 | bne.b _L20_5x # no | |
4263 | bsr.l ssincosqnan # yes | |
4264 | bra.b _L20_6x | |
4265 | _L20_5x: | |
4266 | bsr.l ssincosd # operand is a DENORM | |
4267 | _L20_6x: | |
4268 | ||
4269 | # | |
4270 | # Result is now in FP0 | |
4271 | # | |
4272 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4273 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4274 | fmovm.x &0x03,-(%sp) # store off fp0/fp1 | |
4275 | fmovm.x (%sp)+,&0x40 # fp0 now in fp1 | |
4276 | fmovm.x (%sp)+,&0x80 # fp1 now in fp0 | |
4277 | unlk %a6 | |
4278 | rts | |
4279 | ||
4280 | ||
4281 | ######################################################################### | |
4282 | # DYADIC TEMPLATE # | |
4283 | ######################################################################### | |
4284 | global _frems_ | |
4285 | _frems_: | |
4286 | link %a6,&-LOCAL_SIZE | |
4287 | ||
4288 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4289 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4290 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4291 | ||
4292 | fmov.l &0x0,%fpcr # zero FPCR | |
4293 | ||
4294 | # | |
4295 | # copy, convert, and tag input argument | |
4296 | # | |
4297 | fmov.s 0x8(%a6),%fp0 # load sgl dst | |
4298 | fmov.x %fp0,FP_DST(%a6) | |
4299 | lea FP_DST(%a6),%a0 | |
4300 | bsr.l tag # fetch operand type | |
4301 | mov.b %d0,DTAG(%a6) | |
4302 | ||
4303 | fmov.s 0xc(%a6),%fp0 # load sgl src | |
4304 | fmov.x %fp0,FP_SRC(%a6) | |
4305 | lea FP_SRC(%a6),%a0 | |
4306 | bsr.l tag # fetch operand type | |
4307 | mov.b %d0,STAG(%a6) | |
4308 | mov.l %d0,%d1 | |
4309 | ||
4310 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4311 | ||
4312 | clr.l %d0 | |
4313 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4314 | ||
4315 | lea FP_SRC(%a6),%a0 # pass ptr to src | |
4316 | lea FP_DST(%a6),%a1 # pass ptr to dst | |
4317 | ||
4318 | tst.b %d1 | |
4319 | bne.b _L21_2s | |
4320 | bsr.l srem_snorm # operand is a NORM | |
4321 | bra.b _L21_6s | |
4322 | _L21_2s: | |
4323 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4324 | bne.b _L21_3s # no | |
4325 | bsr.l srem_szero # yes | |
4326 | bra.b _L21_6s | |
4327 | _L21_3s: | |
4328 | cmpi.b %d1,&INF # is operand an INF? | |
4329 | bne.b _L21_4s # no | |
4330 | bsr.l srem_sinf # yes | |
4331 | bra.b _L21_6s | |
4332 | _L21_4s: | |
4333 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4334 | bne.b _L21_5s # no | |
4335 | bsr.l sop_sqnan # yes | |
4336 | bra.b _L21_6s | |
4337 | _L21_5s: | |
4338 | bsr.l srem_sdnrm # operand is a DENORM | |
4339 | _L21_6s: | |
4340 | ||
4341 | # | |
4342 | # Result is now in FP0 | |
4343 | # | |
4344 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4345 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4346 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4347 | unlk %a6 | |
4348 | rts | |
4349 | ||
4350 | global _fremd_ | |
4351 | _fremd_: | |
4352 | link %a6,&-LOCAL_SIZE | |
4353 | ||
4354 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4355 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4356 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4357 | ||
4358 | fmov.l &0x0,%fpcr # zero FPCR | |
4359 | ||
4360 | # | |
4361 | # copy, convert, and tag input argument | |
4362 | # | |
4363 | fmov.d 0x8(%a6),%fp0 # load dbl dst | |
4364 | fmov.x %fp0,FP_DST(%a6) | |
4365 | lea FP_DST(%a6),%a0 | |
4366 | bsr.l tag # fetch operand type | |
4367 | mov.b %d0,DTAG(%a6) | |
4368 | ||
4369 | fmov.d 0x10(%a6),%fp0 # load dbl src | |
4370 | fmov.x %fp0,FP_SRC(%a6) | |
4371 | lea FP_SRC(%a6),%a0 | |
4372 | bsr.l tag # fetch operand type | |
4373 | mov.b %d0,STAG(%a6) | |
4374 | mov.l %d0,%d1 | |
4375 | ||
4376 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4377 | ||
4378 | clr.l %d0 | |
4379 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4380 | ||
4381 | lea FP_SRC(%a6),%a0 # pass ptr to src | |
4382 | lea FP_DST(%a6),%a1 # pass ptr to dst | |
4383 | ||
4384 | tst.b %d1 | |
4385 | bne.b _L21_2d | |
4386 | bsr.l srem_snorm # operand is a NORM | |
4387 | bra.b _L21_6d | |
4388 | _L21_2d: | |
4389 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4390 | bne.b _L21_3d # no | |
4391 | bsr.l srem_szero # yes | |
4392 | bra.b _L21_6d | |
4393 | _L21_3d: | |
4394 | cmpi.b %d1,&INF # is operand an INF? | |
4395 | bne.b _L21_4d # no | |
4396 | bsr.l srem_sinf # yes | |
4397 | bra.b _L21_6d | |
4398 | _L21_4d: | |
4399 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4400 | bne.b _L21_5d # no | |
4401 | bsr.l sop_sqnan # yes | |
4402 | bra.b _L21_6d | |
4403 | _L21_5d: | |
4404 | bsr.l srem_sdnrm # operand is a DENORM | |
4405 | _L21_6d: | |
4406 | ||
4407 | # | |
4408 | # Result is now in FP0 | |
4409 | # | |
4410 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4411 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4412 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4413 | unlk %a6 | |
4414 | rts | |
4415 | ||
4416 | global _fremx_ | |
4417 | _fremx_: | |
4418 | link %a6,&-LOCAL_SIZE | |
4419 | ||
4420 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4421 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4422 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4423 | ||
4424 | fmov.l &0x0,%fpcr # zero FPCR | |
4425 | ||
4426 | # | |
4427 | # copy, convert, and tag input argument | |
4428 | # | |
4429 | lea FP_DST(%a6),%a0 | |
4430 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst | |
4431 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
4432 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
4433 | bsr.l tag # fetch operand type | |
4434 | mov.b %d0,DTAG(%a6) | |
4435 | ||
4436 | lea FP_SRC(%a6),%a0 | |
4437 | mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src | |
4438 | mov.l 0x14+0x4(%a6),0x4(%a0) | |
4439 | mov.l 0x14+0x8(%a6),0x8(%a0) | |
4440 | bsr.l tag # fetch operand type | |
4441 | mov.b %d0,STAG(%a6) | |
4442 | mov.l %d0,%d1 | |
4443 | ||
4444 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4445 | ||
4446 | clr.l %d0 | |
4447 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4448 | ||
4449 | lea FP_SRC(%a6),%a0 # pass ptr to src | |
4450 | lea FP_DST(%a6),%a1 # pass ptr to dst | |
4451 | ||
4452 | tst.b %d1 | |
4453 | bne.b _L21_2x | |
4454 | bsr.l srem_snorm # operand is a NORM | |
4455 | bra.b _L21_6x | |
4456 | _L21_2x: | |
4457 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4458 | bne.b _L21_3x # no | |
4459 | bsr.l srem_szero # yes | |
4460 | bra.b _L21_6x | |
4461 | _L21_3x: | |
4462 | cmpi.b %d1,&INF # is operand an INF? | |
4463 | bne.b _L21_4x # no | |
4464 | bsr.l srem_sinf # yes | |
4465 | bra.b _L21_6x | |
4466 | _L21_4x: | |
4467 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4468 | bne.b _L21_5x # no | |
4469 | bsr.l sop_sqnan # yes | |
4470 | bra.b _L21_6x | |
4471 | _L21_5x: | |
4472 | bsr.l srem_sdnrm # operand is a DENORM | |
4473 | _L21_6x: | |
4474 | ||
4475 | # | |
4476 | # Result is now in FP0 | |
4477 | # | |
4478 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4479 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4480 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4481 | unlk %a6 | |
4482 | rts | |
4483 | ||
4484 | ||
4485 | ######################################################################### | |
4486 | # DYADIC TEMPLATE # | |
4487 | ######################################################################### | |
4488 | global _fmods_ | |
4489 | _fmods_: | |
4490 | link %a6,&-LOCAL_SIZE | |
4491 | ||
4492 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4493 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4494 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4495 | ||
4496 | fmov.l &0x0,%fpcr # zero FPCR | |
4497 | ||
4498 | # | |
4499 | # copy, convert, and tag input argument | |
4500 | # | |
4501 | fmov.s 0x8(%a6),%fp0 # load sgl dst | |
4502 | fmov.x %fp0,FP_DST(%a6) | |
4503 | lea FP_DST(%a6),%a0 | |
4504 | bsr.l tag # fetch operand type | |
4505 | mov.b %d0,DTAG(%a6) | |
4506 | ||
4507 | fmov.s 0xc(%a6),%fp0 # load sgl src | |
4508 | fmov.x %fp0,FP_SRC(%a6) | |
4509 | lea FP_SRC(%a6),%a0 | |
4510 | bsr.l tag # fetch operand type | |
4511 | mov.b %d0,STAG(%a6) | |
4512 | mov.l %d0,%d1 | |
4513 | ||
4514 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4515 | ||
4516 | clr.l %d0 | |
4517 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4518 | ||
4519 | lea FP_SRC(%a6),%a0 # pass ptr to src | |
4520 | lea FP_DST(%a6),%a1 # pass ptr to dst | |
4521 | ||
4522 | tst.b %d1 | |
4523 | bne.b _L22_2s | |
4524 | bsr.l smod_snorm # operand is a NORM | |
4525 | bra.b _L22_6s | |
4526 | _L22_2s: | |
4527 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4528 | bne.b _L22_3s # no | |
4529 | bsr.l smod_szero # yes | |
4530 | bra.b _L22_6s | |
4531 | _L22_3s: | |
4532 | cmpi.b %d1,&INF # is operand an INF? | |
4533 | bne.b _L22_4s # no | |
4534 | bsr.l smod_sinf # yes | |
4535 | bra.b _L22_6s | |
4536 | _L22_4s: | |
4537 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4538 | bne.b _L22_5s # no | |
4539 | bsr.l sop_sqnan # yes | |
4540 | bra.b _L22_6s | |
4541 | _L22_5s: | |
4542 | bsr.l smod_sdnrm # operand is a DENORM | |
4543 | _L22_6s: | |
4544 | ||
4545 | # | |
4546 | # Result is now in FP0 | |
4547 | # | |
4548 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4549 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4550 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4551 | unlk %a6 | |
4552 | rts | |
4553 | ||
4554 | global _fmodd_ | |
4555 | _fmodd_: | |
4556 | link %a6,&-LOCAL_SIZE | |
4557 | ||
4558 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4559 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4560 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4561 | ||
4562 | fmov.l &0x0,%fpcr # zero FPCR | |
4563 | ||
4564 | # | |
4565 | # copy, convert, and tag input argument | |
4566 | # | |
4567 | fmov.d 0x8(%a6),%fp0 # load dbl dst | |
4568 | fmov.x %fp0,FP_DST(%a6) | |
4569 | lea FP_DST(%a6),%a0 | |
4570 | bsr.l tag # fetch operand type | |
4571 | mov.b %d0,DTAG(%a6) | |
4572 | ||
4573 | fmov.d 0x10(%a6),%fp0 # load dbl src | |
4574 | fmov.x %fp0,FP_SRC(%a6) | |
4575 | lea FP_SRC(%a6),%a0 | |
4576 | bsr.l tag # fetch operand type | |
4577 | mov.b %d0,STAG(%a6) | |
4578 | mov.l %d0,%d1 | |
4579 | ||
4580 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4581 | ||
4582 | clr.l %d0 | |
4583 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4584 | ||
4585 | lea FP_SRC(%a6),%a0 # pass ptr to src | |
4586 | lea FP_DST(%a6),%a1 # pass ptr to dst | |
4587 | ||
4588 | tst.b %d1 | |
4589 | bne.b _L22_2d | |
4590 | bsr.l smod_snorm # operand is a NORM | |
4591 | bra.b _L22_6d | |
4592 | _L22_2d: | |
4593 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4594 | bne.b _L22_3d # no | |
4595 | bsr.l smod_szero # yes | |
4596 | bra.b _L22_6d | |
4597 | _L22_3d: | |
4598 | cmpi.b %d1,&INF # is operand an INF? | |
4599 | bne.b _L22_4d # no | |
4600 | bsr.l smod_sinf # yes | |
4601 | bra.b _L22_6d | |
4602 | _L22_4d: | |
4603 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4604 | bne.b _L22_5d # no | |
4605 | bsr.l sop_sqnan # yes | |
4606 | bra.b _L22_6d | |
4607 | _L22_5d: | |
4608 | bsr.l smod_sdnrm # operand is a DENORM | |
4609 | _L22_6d: | |
4610 | ||
4611 | # | |
4612 | # Result is now in FP0 | |
4613 | # | |
4614 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4615 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4616 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4617 | unlk %a6 | |
4618 | rts | |
4619 | ||
4620 | global _fmodx_ | |
4621 | _fmodx_: | |
4622 | link %a6,&-LOCAL_SIZE | |
4623 | ||
4624 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4625 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4626 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4627 | ||
4628 | fmov.l &0x0,%fpcr # zero FPCR | |
4629 | ||
4630 | # | |
4631 | # copy, convert, and tag input argument | |
4632 | # | |
4633 | lea FP_DST(%a6),%a0 | |
4634 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst | |
4635 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
4636 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
4637 | bsr.l tag # fetch operand type | |
4638 | mov.b %d0,DTAG(%a6) | |
4639 | ||
4640 | lea FP_SRC(%a6),%a0 | |
4641 | mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src | |
4642 | mov.l 0x14+0x4(%a6),0x4(%a0) | |
4643 | mov.l 0x14+0x8(%a6),0x8(%a0) | |
4644 | bsr.l tag # fetch operand type | |
4645 | mov.b %d0,STAG(%a6) | |
4646 | mov.l %d0,%d1 | |
4647 | ||
4648 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4649 | ||
4650 | clr.l %d0 | |
4651 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4652 | ||
4653 | lea FP_SRC(%a6),%a0 # pass ptr to src | |
4654 | lea FP_DST(%a6),%a1 # pass ptr to dst | |
4655 | ||
4656 | tst.b %d1 | |
4657 | bne.b _L22_2x | |
4658 | bsr.l smod_snorm # operand is a NORM | |
4659 | bra.b _L22_6x | |
4660 | _L22_2x: | |
4661 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4662 | bne.b _L22_3x # no | |
4663 | bsr.l smod_szero # yes | |
4664 | bra.b _L22_6x | |
4665 | _L22_3x: | |
4666 | cmpi.b %d1,&INF # is operand an INF? | |
4667 | bne.b _L22_4x # no | |
4668 | bsr.l smod_sinf # yes | |
4669 | bra.b _L22_6x | |
4670 | _L22_4x: | |
4671 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4672 | bne.b _L22_5x # no | |
4673 | bsr.l sop_sqnan # yes | |
4674 | bra.b _L22_6x | |
4675 | _L22_5x: | |
4676 | bsr.l smod_sdnrm # operand is a DENORM | |
4677 | _L22_6x: | |
4678 | ||
4679 | # | |
4680 | # Result is now in FP0 | |
4681 | # | |
4682 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4683 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4684 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4685 | unlk %a6 | |
4686 | rts | |
4687 | ||
4688 | ||
4689 | ######################################################################### | |
4690 | # DYADIC TEMPLATE # | |
4691 | ######################################################################### | |
4692 | global _fscales_ | |
4693 | _fscales_: | |
4694 | link %a6,&-LOCAL_SIZE | |
4695 | ||
4696 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4697 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4698 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4699 | ||
4700 | fmov.l &0x0,%fpcr # zero FPCR | |
4701 | ||
4702 | # | |
4703 | # copy, convert, and tag input argument | |
4704 | # | |
4705 | fmov.s 0x8(%a6),%fp0 # load sgl dst | |
4706 | fmov.x %fp0,FP_DST(%a6) | |
4707 | lea FP_DST(%a6),%a0 | |
4708 | bsr.l tag # fetch operand type | |
4709 | mov.b %d0,DTAG(%a6) | |
4710 | ||
4711 | fmov.s 0xc(%a6),%fp0 # load sgl src | |
4712 | fmov.x %fp0,FP_SRC(%a6) | |
4713 | lea FP_SRC(%a6),%a0 | |
4714 | bsr.l tag # fetch operand type | |
4715 | mov.b %d0,STAG(%a6) | |
4716 | mov.l %d0,%d1 | |
4717 | ||
4718 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4719 | ||
4720 | clr.l %d0 | |
4721 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4722 | ||
4723 | lea FP_SRC(%a6),%a0 # pass ptr to src | |
4724 | lea FP_DST(%a6),%a1 # pass ptr to dst | |
4725 | ||
4726 | tst.b %d1 | |
4727 | bne.b _L23_2s | |
4728 | bsr.l sscale_snorm # operand is a NORM | |
4729 | bra.b _L23_6s | |
4730 | _L23_2s: | |
4731 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4732 | bne.b _L23_3s # no | |
4733 | bsr.l sscale_szero # yes | |
4734 | bra.b _L23_6s | |
4735 | _L23_3s: | |
4736 | cmpi.b %d1,&INF # is operand an INF? | |
4737 | bne.b _L23_4s # no | |
4738 | bsr.l sscale_sinf # yes | |
4739 | bra.b _L23_6s | |
4740 | _L23_4s: | |
4741 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4742 | bne.b _L23_5s # no | |
4743 | bsr.l sop_sqnan # yes | |
4744 | bra.b _L23_6s | |
4745 | _L23_5s: | |
4746 | bsr.l sscale_sdnrm # operand is a DENORM | |
4747 | _L23_6s: | |
4748 | ||
4749 | # | |
4750 | # Result is now in FP0 | |
4751 | # | |
4752 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4753 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4754 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4755 | unlk %a6 | |
4756 | rts | |
4757 | ||
4758 | global _fscaled_ | |
4759 | _fscaled_: | |
4760 | link %a6,&-LOCAL_SIZE | |
4761 | ||
4762 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4763 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4764 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4765 | ||
4766 | fmov.l &0x0,%fpcr # zero FPCR | |
4767 | ||
4768 | # | |
4769 | # copy, convert, and tag input argument | |
4770 | # | |
4771 | fmov.d 0x8(%a6),%fp0 # load dbl dst | |
4772 | fmov.x %fp0,FP_DST(%a6) | |
4773 | lea FP_DST(%a6),%a0 | |
4774 | bsr.l tag # fetch operand type | |
4775 | mov.b %d0,DTAG(%a6) | |
4776 | ||
4777 | fmov.d 0x10(%a6),%fp0 # load dbl src | |
4778 | fmov.x %fp0,FP_SRC(%a6) | |
4779 | lea FP_SRC(%a6),%a0 | |
4780 | bsr.l tag # fetch operand type | |
4781 | mov.b %d0,STAG(%a6) | |
4782 | mov.l %d0,%d1 | |
4783 | ||
4784 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4785 | ||
4786 | clr.l %d0 | |
4787 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4788 | ||
4789 | lea FP_SRC(%a6),%a0 # pass ptr to src | |
4790 | lea FP_DST(%a6),%a1 # pass ptr to dst | |
4791 | ||
4792 | tst.b %d1 | |
4793 | bne.b _L23_2d | |
4794 | bsr.l sscale_snorm # operand is a NORM | |
4795 | bra.b _L23_6d | |
4796 | _L23_2d: | |
4797 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4798 | bne.b _L23_3d # no | |
4799 | bsr.l sscale_szero # yes | |
4800 | bra.b _L23_6d | |
4801 | _L23_3d: | |
4802 | cmpi.b %d1,&INF # is operand an INF? | |
4803 | bne.b _L23_4d # no | |
4804 | bsr.l sscale_sinf # yes | |
4805 | bra.b _L23_6d | |
4806 | _L23_4d: | |
4807 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4808 | bne.b _L23_5d # no | |
4809 | bsr.l sop_sqnan # yes | |
4810 | bra.b _L23_6d | |
4811 | _L23_5d: | |
4812 | bsr.l sscale_sdnrm # operand is a DENORM | |
4813 | _L23_6d: | |
4814 | ||
4815 | # | |
4816 | # Result is now in FP0 | |
4817 | # | |
4818 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4819 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4820 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4821 | unlk %a6 | |
4822 | rts | |
4823 | ||
4824 | global _fscalex_ | |
4825 | _fscalex_: | |
4826 | link %a6,&-LOCAL_SIZE | |
4827 | ||
4828 | movm.l &0x0303,EXC_DREGS(%a6) # save d0-d1/a0-a1 | |
4829 | fmovm.l %fpcr,%fpsr,USER_FPCR(%a6) # save ctrl regs | |
4830 | fmovm.x &0xc0,EXC_FP0(%a6) # save fp0/fp1 | |
4831 | ||
4832 | fmov.l &0x0,%fpcr # zero FPCR | |
4833 | ||
4834 | # | |
4835 | # copy, convert, and tag input argument | |
4836 | # | |
4837 | lea FP_DST(%a6),%a0 | |
4838 | mov.l 0x8+0x0(%a6),0x0(%a0) # load ext dst | |
4839 | mov.l 0x8+0x4(%a6),0x4(%a0) | |
4840 | mov.l 0x8+0x8(%a6),0x8(%a0) | |
4841 | bsr.l tag # fetch operand type | |
4842 | mov.b %d0,DTAG(%a6) | |
4843 | ||
4844 | lea FP_SRC(%a6),%a0 | |
4845 | mov.l 0x14+0x0(%a6),0x0(%a0) # load ext src | |
4846 | mov.l 0x14+0x4(%a6),0x4(%a0) | |
4847 | mov.l 0x14+0x8(%a6),0x8(%a0) | |
4848 | bsr.l tag # fetch operand type | |
4849 | mov.b %d0,STAG(%a6) | |
4850 | mov.l %d0,%d1 | |
4851 | ||
4852 | andi.l &0x00ff00ff,USER_FPSR(%a6) | |
4853 | ||
4854 | clr.l %d0 | |
4855 | mov.b FPCR_MODE(%a6),%d0 # pass rnd mode,prec | |
4856 | ||
4857 | lea FP_SRC(%a6),%a0 # pass ptr to src | |
4858 | lea FP_DST(%a6),%a1 # pass ptr to dst | |
4859 | ||
4860 | tst.b %d1 | |
4861 | bne.b _L23_2x | |
4862 | bsr.l sscale_snorm # operand is a NORM | |
4863 | bra.b _L23_6x | |
4864 | _L23_2x: | |
4865 | cmpi.b %d1,&ZERO # is operand a ZERO? | |
4866 | bne.b _L23_3x # no | |
4867 | bsr.l sscale_szero # yes | |
4868 | bra.b _L23_6x | |
4869 | _L23_3x: | |
4870 | cmpi.b %d1,&INF # is operand an INF? | |
4871 | bne.b _L23_4x # no | |
4872 | bsr.l sscale_sinf # yes | |
4873 | bra.b _L23_6x | |
4874 | _L23_4x: | |
4875 | cmpi.b %d1,&QNAN # is operand a QNAN? | |
4876 | bne.b _L23_5x # no | |
4877 | bsr.l sop_sqnan # yes | |
4878 | bra.b _L23_6x | |
4879 | _L23_5x: | |
4880 | bsr.l sscale_sdnrm # operand is a DENORM | |
4881 | _L23_6x: | |
4882 | ||
4883 | # | |
4884 | # Result is now in FP0 | |
4885 | # | |
4886 | movm.l EXC_DREGS(%a6),&0x0303 # restore d0-d1/a0-a1 | |
4887 | fmovm.l USER_FPCR(%a6),%fpcr,%fpsr # restore ctrl regs | |
4888 | fmovm.x EXC_FP1(%a6),&0x40 # restore fp1 | |
4889 | unlk %a6 | |
4890 | rts | |
4891 | ||
4892 | ||
4893 | ######################################################################### | |
4894 | # ssin(): computes the sine of a normalized input # | |
4895 | # ssind(): computes the sine of a denormalized input # | |
4896 | # scos(): computes the cosine of a normalized input # | |
4897 | # scosd(): computes the cosine of a denormalized input # | |
4898 | # ssincos(): computes the sine and cosine of a normalized input # | |
4899 | # ssincosd(): computes the sine and cosine of a denormalized input # | |
4900 | # # | |
4901 | # INPUT *************************************************************** # | |
4902 | # a0 = pointer to extended precision input # | |
4903 | # d0 = round precision,mode # | |
4904 | # # | |
4905 | # OUTPUT ************************************************************** # | |
4906 | # fp0 = sin(X) or cos(X) # | |
4907 | # # | |
4908 | # For ssincos(X): # | |
4909 | # fp0 = sin(X) # | |
4910 | # fp1 = cos(X) # | |
4911 | # # | |
4912 | # ACCURACY and MONOTONICITY ******************************************* # | |
4913 | # The returned result is within 1 ulp in 64 significant bit, i.e. # | |
4914 | # within 0.5001 ulp to 53 bits if the result is subsequently # | |
4915 | # rounded to double precision. The result is provably monotonic # | |
4916 | # in double precision. # | |
4917 | # # | |
4918 | # ALGORITHM *********************************************************** # | |
4919 | # # | |
4920 | # SIN and COS: # | |
4921 | # 1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1. # | |
4922 | # # | |
4923 | # 2. If |X| >= 15Pi or |X| < 2**(-40), go to 7. # | |
4924 | # # | |
4925 | # 3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # | |
4926 | # k = N mod 4, so in particular, k = 0,1,2,or 3. # | |
4927 | # Overwrite k by k := k + AdjN. # | |
4928 | # # | |
4929 | # 4. If k is even, go to 6. # | |
4930 | # # | |
4931 | # 5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. # | |
4932 | # Return sgn*cos(r) where cos(r) is approximated by an # | |
4933 | # even polynomial in r, 1 + r*r*(B1+s*(B2+ ... + s*B8)), # | |
4934 | # s = r*r. # | |
4935 | # Exit. # | |
4936 | # # | |
4937 | # 6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r) # | |
4938 | # where sin(r) is approximated by an odd polynomial in r # | |
4939 | # r + r*s*(A1+s*(A2+ ... + s*A7)), s = r*r. # | |
4940 | # Exit. # | |
4941 | # # | |
4942 | # 7. If |X| > 1, go to 9. # | |
4943 | # # | |
4944 | # 8. (|X|<2**(-40)) If SIN is invoked, return X; # | |
4945 | # otherwise return 1. # | |
4946 | # # | |
4947 | # 9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # | |
4948 | # go back to 3. # | |
4949 | # # | |
4950 | # SINCOS: # | |
4951 | # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # | |
4952 | # # | |
4953 | # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # | |
4954 | # k = N mod 4, so in particular, k = 0,1,2,or 3. # | |
4955 | # # | |
4956 | # 3. If k is even, go to 5. # | |
4957 | # # | |
4958 | # 4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), ie. # | |
4959 | # j1 exclusive or with the l.s.b. of k. # | |
4960 | # sgn1 := (-1)**j1, sgn2 := (-1)**j2. # | |
4961 | # SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where # | |
4962 | # sin(r) and cos(r) are computed as odd and even # | |
4963 | # polynomials in r, respectively. Exit # | |
4964 | # # | |
4965 | # 5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1. # | |
4966 | # SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where # | |
4967 | # sin(r) and cos(r) are computed as odd and even # | |
4968 | # polynomials in r, respectively. Exit # | |
4969 | # # | |
4970 | # 6. If |X| > 1, go to 8. # | |
4971 | # # | |
4972 | # 7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit. # | |
4973 | # # | |
4974 | # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, # | |
4975 | # go back to 2. # | |
4976 | # # | |
4977 | ######################################################################### | |
4978 | ||
4979 | SINA7: long 0xBD6AAA77,0xCCC994F5 | |
4980 | SINA6: long 0x3DE61209,0x7AAE8DA1 | |
4981 | SINA5: long 0xBE5AE645,0x2A118AE4 | |
4982 | SINA4: long 0x3EC71DE3,0xA5341531 | |
4983 | SINA3: long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000 | |
4984 | SINA2: long 0x3FF80000,0x88888888,0x888859AF,0x00000000 | |
4985 | SINA1: long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000 | |
4986 | ||
4987 | COSB8: long 0x3D2AC4D0,0xD6011EE3 | |
4988 | COSB7: long 0xBDA9396F,0x9F45AC19 | |
4989 | COSB6: long 0x3E21EED9,0x0612C972 | |
4990 | COSB5: long 0xBE927E4F,0xB79D9FCF | |
4991 | COSB4: long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000 | |
4992 | COSB3: long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000 | |
4993 | COSB2: long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E | |
4994 | COSB1: long 0xBF000000 | |
4995 | ||
4996 | set INARG,FP_SCR0 | |
4997 | ||
4998 | set X,FP_SCR0 | |
4999 | # set XDCARE,X+2 | |
5000 | set XFRAC,X+4 | |
5001 | ||
5002 | set RPRIME,FP_SCR0 | |
5003 | set SPRIME,FP_SCR1 | |
5004 | ||
5005 | set POSNEG1,L_SCR1 | |
5006 | set TWOTO63,L_SCR1 | |
5007 | ||
5008 | set ENDFLAG,L_SCR2 | |
5009 | set INT,L_SCR2 | |
5010 | ||
5011 | set ADJN,L_SCR3 | |
5012 | ||
5013 | ############################################ | |
5014 | global ssin | |
5015 | ssin: | |
5016 | mov.l &0,ADJN(%a6) # yes; SET ADJN TO 0 | |
5017 | bra.b SINBGN | |
5018 | ||
5019 | ############################################ | |
5020 | global scos | |
5021 | scos: | |
5022 | mov.l &1,ADJN(%a6) # yes; SET ADJN TO 1 | |
5023 | ||
5024 | ############################################ | |
5025 | SINBGN: | |
5026 | #--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE | |
5027 | ||
5028 | fmov.x (%a0),%fp0 # LOAD INPUT | |
5029 | fmov.x %fp0,X(%a6) # save input at X | |
5030 | ||
5031 | # "COMPACTIFY" X | |
5032 | mov.l (%a0),%d1 # put exp in hi word | |
5033 | mov.w 4(%a0),%d1 # fetch hi(man) | |
5034 | and.l &0x7FFFFFFF,%d1 # strip sign | |
5035 | ||
5036 | cmpi.l %d1,&0x3FD78000 # is |X| >= 2**(-40)? | |
5037 | bge.b SOK1 # no | |
5038 | bra.w SINSM # yes; input is very small | |
5039 | ||
5040 | SOK1: | |
5041 | cmp.l %d1,&0x4004BC7E # is |X| < 15 PI? | |
5042 | blt.b SINMAIN # no | |
5043 | bra.w SREDUCEX # yes; input is very large | |
5044 | ||
5045 | #--THIS IS THE USUAL CASE, |X| <= 15 PI. | |
5046 | #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | |
5047 | SINMAIN: | |
5048 | fmov.x %fp0,%fp1 | |
5049 | fmul.d TWOBYPI(%pc),%fp1 # X*2/PI | |
5050 | ||
5051 | lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 | |
5052 | ||
5053 | fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER | |
5054 | ||
5055 | mov.l INT(%a6),%d1 # make a copy of N | |
5056 | asl.l &4,%d1 # N *= 16 | |
5057 | add.l %d1,%a1 # tbl_addr = a1 + (N*16) | |
5058 | ||
5059 | # A1 IS THE ADDRESS OF N*PIBY2 | |
5060 | # ...WHICH IS IN TWO PIECES Y1 & Y2 | |
5061 | fsub.x (%a1)+,%fp0 # X-Y1 | |
5062 | fsub.s (%a1),%fp0 # fp0 = R = (X-Y1)-Y2 | |
5063 | ||
5064 | SINCONT: | |
5065 | #--continuation from REDUCEX | |
5066 | ||
5067 | #--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED | |
5068 | mov.l INT(%a6),%d1 | |
5069 | add.l ADJN(%a6),%d1 # SEE IF D0 IS ODD OR EVEN | |
5070 | ror.l &1,%d1 # D0 WAS ODD IFF D0 IS NEGATIVE | |
5071 | cmp.l %d1,&0 | |
5072 | blt.w COSPOLY | |
5073 | ||
5074 | #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. | |
5075 | #--THEN WE RETURN SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY | |
5076 | #--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE | |
5077 | #--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS | |
5078 | #--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))]) | |
5079 | #--WHERE T=S*S. | |
5080 | #--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION | |
5081 | #--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT. | |
5082 | SINPOLY: | |
5083 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | |
5084 | ||
5085 | fmov.x %fp0,X(%a6) # X IS R | |
5086 | fmul.x %fp0,%fp0 # FP0 IS S | |
5087 | ||
5088 | fmov.d SINA7(%pc),%fp3 | |
5089 | fmov.d SINA6(%pc),%fp2 | |
5090 | ||
5091 | fmov.x %fp0,%fp1 | |
5092 | fmul.x %fp1,%fp1 # FP1 IS T | |
5093 | ||
5094 | ror.l &1,%d1 | |
5095 | and.l &0x80000000,%d1 | |
5096 | # ...LEAST SIG. BIT OF D0 IN SIGN POSITION | |
5097 | eor.l %d1,X(%a6) # X IS NOW R'= SGN*R | |
5098 | ||
5099 | fmul.x %fp1,%fp3 # TA7 | |
5100 | fmul.x %fp1,%fp2 # TA6 | |
5101 | ||
5102 | fadd.d SINA5(%pc),%fp3 # A5+TA7 | |
5103 | fadd.d SINA4(%pc),%fp2 # A4+TA6 | |
5104 | ||
5105 | fmul.x %fp1,%fp3 # T(A5+TA7) | |
5106 | fmul.x %fp1,%fp2 # T(A4+TA6) | |
5107 | ||
5108 | fadd.d SINA3(%pc),%fp3 # A3+T(A5+TA7) | |
5109 | fadd.x SINA2(%pc),%fp2 # A2+T(A4+TA6) | |
5110 | ||
5111 | fmul.x %fp3,%fp1 # T(A3+T(A5+TA7)) | |
5112 | ||
5113 | fmul.x %fp0,%fp2 # S(A2+T(A4+TA6)) | |
5114 | fadd.x SINA1(%pc),%fp1 # A1+T(A3+T(A5+TA7)) | |
5115 | fmul.x X(%a6),%fp0 # R'*S | |
5116 | ||
5117 | fadd.x %fp2,%fp1 # [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] | |
5118 | ||
5119 | fmul.x %fp1,%fp0 # SIN(R')-R' | |
5120 | ||
5121 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | |
5122 | ||
5123 | fmov.l %d0,%fpcr # restore users round mode,prec | |
5124 | fadd.x X(%a6),%fp0 # last inst - possible exception set | |
5125 | bra t_inx2 | |
5126 | ||
5127 | #--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J. | |
5128 | #--THEN WE RETURN SGN*COS(R). SGN*COS(R) IS COMPUTED BY | |
5129 | #--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE | |
5130 | #--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS | |
5131 | #--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))]) | |
5132 | #--WHERE T=S*S. | |
5133 | #--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION | |
5134 | #--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2 | |
5135 | #--AND IS THEREFORE STORED AS SINGLE PRECISION. | |
5136 | COSPOLY: | |
5137 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | |
5138 | ||
5139 | fmul.x %fp0,%fp0 # FP0 IS S | |
5140 | ||
5141 | fmov.d COSB8(%pc),%fp2 | |
5142 | fmov.d COSB7(%pc),%fp3 | |
5143 | ||
5144 | fmov.x %fp0,%fp1 | |
5145 | fmul.x %fp1,%fp1 # FP1 IS T | |
5146 | ||
5147 | fmov.x %fp0,X(%a6) # X IS S | |
5148 | ror.l &1,%d1 | |
5149 | and.l &0x80000000,%d1 | |
5150 | # ...LEAST SIG. BIT OF D0 IN SIGN POSITION | |
5151 | ||
5152 | fmul.x %fp1,%fp2 # TB8 | |
5153 | ||
5154 | eor.l %d1,X(%a6) # X IS NOW S'= SGN*S | |
5155 | and.l &0x80000000,%d1 | |
5156 | ||
5157 | fmul.x %fp1,%fp3 # TB7 | |
5158 | ||
5159 | or.l &0x3F800000,%d1 # D0 IS SGN IN SINGLE | |
5160 | mov.l %d1,POSNEG1(%a6) | |
5161 | ||
5162 | fadd.d COSB6(%pc),%fp2 # B6+TB8 | |
5163 | fadd.d COSB5(%pc),%fp3 # B5+TB7 | |
5164 | ||
5165 | fmul.x %fp1,%fp2 # T(B6+TB8) | |
5166 | fmul.x %fp1,%fp3 # T(B5+TB7) | |
5167 | ||
5168 | fadd.d COSB4(%pc),%fp2 # B4+T(B6+TB8) | |
5169 | fadd.x COSB3(%pc),%fp3 # B3+T(B5+TB7) | |
5170 | ||
5171 | fmul.x %fp1,%fp2 # T(B4+T(B6+TB8)) | |
5172 | fmul.x %fp3,%fp1 # T(B3+T(B5+TB7)) | |
5173 | ||
5174 | fadd.x COSB2(%pc),%fp2 # B2+T(B4+T(B6+TB8)) | |
5175 | fadd.s COSB1(%pc),%fp1 # B1+T(B3+T(B5+TB7)) | |
5176 | ||
5177 | fmul.x %fp2,%fp0 # S(B2+T(B4+T(B6+TB8))) | |
5178 | ||
5179 | fadd.x %fp1,%fp0 | |
5180 | ||
5181 | fmul.x X(%a6),%fp0 | |
5182 | ||
5183 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | |
5184 | ||
5185 | fmov.l %d0,%fpcr # restore users round mode,prec | |
5186 | fadd.s POSNEG1(%a6),%fp0 # last inst - possible exception set | |
5187 | bra t_inx2 | |
5188 | ||
5189 | ############################################## | |
5190 | ||
5191 | # SINe: Big OR Small? | |
5192 | #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. | |
5193 | #--IF |X| < 2**(-40), RETURN X OR 1. | |
5194 | SINBORS: | |
5195 | cmp.l %d1,&0x3FFF8000 | |
5196 | bgt.l SREDUCEX | |
5197 | ||
5198 | SINSM: | |
5199 | mov.l ADJN(%a6),%d1 | |
5200 | cmp.l %d1,&0 | |
5201 | bgt.b COSTINY | |
5202 | ||
5203 | # here, the operation may underflow iff the precision is sgl or dbl. | |
5204 | # extended denorms are handled through another entry point. | |
5205 | SINTINY: | |
5206 | # mov.w &0x0000,XDCARE(%a6) # JUST IN CASE | |
5207 | ||
5208 | fmov.l %d0,%fpcr # restore users round mode,prec | |
5209 | mov.b &FMOV_OP,%d1 # last inst is MOVE | |
5210 | fmov.x X(%a6),%fp0 # last inst - possible exception set | |
5211 | bra t_catch | |
5212 | ||
5213 | COSTINY: | |
5214 | fmov.s &0x3F800000,%fp0 # fp0 = 1.0 | |
5215 | fmov.l %d0,%fpcr # restore users round mode,prec | |
5216 | fadd.s &0x80800000,%fp0 # last inst - possible exception set | |
5217 | bra t_pinx2 | |
5218 | ||
5219 | ################################################ | |
5220 | global ssind | |
5221 | #--SIN(X) = X FOR DENORMALIZED X | |
5222 | ssind: | |
5223 | bra t_extdnrm | |
5224 | ||
5225 | ############################################ | |
5226 | global scosd | |
5227 | #--COS(X) = 1 FOR DENORMALIZED X | |
5228 | scosd: | |
5229 | fmov.s &0x3F800000,%fp0 # fp0 = 1.0 | |
5230 | bra t_pinx2 | |
5231 | ||
5232 | ################################################## | |
5233 | ||
5234 | global ssincos | |
5235 | ssincos: | |
5236 | #--SET ADJN TO 4 | |
5237 | mov.l &4,ADJN(%a6) | |
5238 | ||
5239 | fmov.x (%a0),%fp0 # LOAD INPUT | |
5240 | fmov.x %fp0,X(%a6) | |
5241 | ||
5242 | mov.l (%a0),%d1 | |
5243 | mov.w 4(%a0),%d1 | |
5244 | and.l &0x7FFFFFFF,%d1 # COMPACTIFY X | |
5245 | ||
5246 | cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? | |
5247 | bge.b SCOK1 | |
5248 | bra.w SCSM | |
5249 | ||
5250 | SCOK1: | |
5251 | cmp.l %d1,&0x4004BC7E # |X| < 15 PI? | |
5252 | blt.b SCMAIN | |
5253 | bra.w SREDUCEX | |
5254 | ||
5255 | ||
5256 | #--THIS IS THE USUAL CASE, |X| <= 15 PI. | |
5257 | #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | |
5258 | SCMAIN: | |
5259 | fmov.x %fp0,%fp1 | |
5260 | ||
5261 | fmul.d TWOBYPI(%pc),%fp1 # X*2/PI | |
5262 | ||
5263 | lea PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 | |
5264 | ||
5265 | fmov.l %fp1,INT(%a6) # CONVERT TO INTEGER | |
5266 | ||
5267 | mov.l INT(%a6),%d1 | |
5268 | asl.l &4,%d1 | |
5269 | add.l %d1,%a1 # ADDRESS OF N*PIBY2, IN Y1, Y2 | |
5270 | ||
5271 | fsub.x (%a1)+,%fp0 # X-Y1 | |
5272 | fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 | |
5273 | ||
5274 | SCCONT: | |
5275 | #--continuation point from REDUCEX | |
5276 | ||
5277 | mov.l INT(%a6),%d1 | |
5278 | ror.l &1,%d1 | |
5279 | cmp.l %d1,&0 # D0 < 0 IFF N IS ODD | |
5280 | bge.w NEVEN | |
5281 | ||
5282 | SNODD: | |
5283 | #--REGISTERS SAVED SO FAR: D0, A0, FP2. | |
5284 | fmovm.x &0x04,-(%sp) # save fp2 | |
5285 | ||
5286 | fmov.x %fp0,RPRIME(%a6) | |
5287 | fmul.x %fp0,%fp0 # FP0 IS S = R*R | |
5288 | fmov.d SINA7(%pc),%fp1 # A7 | |
5289 | fmov.d COSB8(%pc),%fp2 # B8 | |
5290 | fmul.x %fp0,%fp1 # SA7 | |
5291 | fmul.x %fp0,%fp2 # SB8 | |
5292 | ||
5293 | mov.l %d2,-(%sp) | |
5294 | mov.l %d1,%d2 | |
5295 | ror.l &1,%d2 | |
5296 | and.l &0x80000000,%d2 | |
5297 | eor.l %d1,%d2 | |
5298 | and.l &0x80000000,%d2 | |
5299 | ||
5300 | fadd.d SINA6(%pc),%fp1 # A6+SA7 | |
5301 | fadd.d COSB7(%pc),%fp2 # B7+SB8 | |
5302 | ||
5303 | fmul.x %fp0,%fp1 # S(A6+SA7) | |
5304 | eor.l %d2,RPRIME(%a6) | |
5305 | mov.l (%sp)+,%d2 | |
5306 | fmul.x %fp0,%fp2 # S(B7+SB8) | |
5307 | ror.l &1,%d1 | |
5308 | and.l &0x80000000,%d1 | |
5309 | mov.l &0x3F800000,POSNEG1(%a6) | |
5310 | eor.l %d1,POSNEG1(%a6) | |
5311 | ||
5312 | fadd.d SINA5(%pc),%fp1 # A5+S(A6+SA7) | |
5313 | fadd.d COSB6(%pc),%fp2 # B6+S(B7+SB8) | |
5314 | ||
5315 | fmul.x %fp0,%fp1 # S(A5+S(A6+SA7)) | |
5316 | fmul.x %fp0,%fp2 # S(B6+S(B7+SB8)) | |
5317 | fmov.x %fp0,SPRIME(%a6) | |
5318 | ||
5319 | fadd.d SINA4(%pc),%fp1 # A4+S(A5+S(A6+SA7)) | |
5320 | eor.l %d1,SPRIME(%a6) | |
5321 | fadd.d COSB5(%pc),%fp2 # B5+S(B6+S(B7+SB8)) | |
5322 | ||
5323 | fmul.x %fp0,%fp1 # S(A4+...) | |
5324 | fmul.x %fp0,%fp2 # S(B5+...) | |
5325 | ||
5326 | fadd.d SINA3(%pc),%fp1 # A3+S(A4+...) | |
5327 | fadd.d COSB4(%pc),%fp2 # B4+S(B5+...) | |
5328 | ||
5329 | fmul.x %fp0,%fp1 # S(A3+...) | |
5330 | fmul.x %fp0,%fp2 # S(B4+...) | |
5331 | ||
5332 | fadd.x SINA2(%pc),%fp1 # A2+S(A3+...) | |
5333 | fadd.x COSB3(%pc),%fp2 # B3+S(B4+...) | |
5334 | ||
5335 | fmul.x %fp0,%fp1 # S(A2+...) | |
5336 | fmul.x %fp0,%fp2 # S(B3+...) | |
5337 | ||
5338 | fadd.x SINA1(%pc),%fp1 # A1+S(A2+...) | |
5339 | fadd.x COSB2(%pc),%fp2 # B2+S(B3+...) | |
5340 | ||
5341 | fmul.x %fp0,%fp1 # S(A1+...) | |
5342 | fmul.x %fp2,%fp0 # S(B2+...) | |
5343 | ||
5344 | fmul.x RPRIME(%a6),%fp1 # R'S(A1+...) | |
5345 | fadd.s COSB1(%pc),%fp0 # B1+S(B2...) | |
5346 | fmul.x SPRIME(%a6),%fp0 # S'(B1+S(B2+...)) | |
5347 | ||
5348 | fmovm.x (%sp)+,&0x20 # restore fp2 | |
5349 | ||
5350 | fmov.l %d0,%fpcr | |
5351 | fadd.x RPRIME(%a6),%fp1 # COS(X) | |
5352 | bsr sto_cos # store cosine result | |
5353 | fadd.s POSNEG1(%a6),%fp0 # SIN(X) | |
5354 | bra t_inx2 | |
5355 | ||
5356 | NEVEN: | |
5357 | #--REGISTERS SAVED SO FAR: FP2. | |
5358 | fmovm.x &0x04,-(%sp) # save fp2 | |
5359 | ||
5360 | fmov.x %fp0,RPRIME(%a6) | |
5361 | fmul.x %fp0,%fp0 # FP0 IS S = R*R | |
5362 | ||
5363 | fmov.d COSB8(%pc),%fp1 # B8 | |
5364 | fmov.d SINA7(%pc),%fp2 # A7 | |
5365 | ||
5366 | fmul.x %fp0,%fp1 # SB8 | |
5367 | fmov.x %fp0,SPRIME(%a6) | |
5368 | fmul.x %fp0,%fp2 # SA7 | |
5369 | ||
5370 | ror.l &1,%d1 | |
5371 | and.l &0x80000000,%d1 | |
5372 | ||
5373 | fadd.d COSB7(%pc),%fp1 # B7+SB8 | |
5374 | fadd.d SINA6(%pc),%fp2 # A6+SA7 | |
5375 | ||
5376 | eor.l %d1,RPRIME(%a6) | |
5377 | eor.l %d1,SPRIME(%a6) | |
5378 | ||
5379 | fmul.x %fp0,%fp1 # S(B7+SB8) | |
5380 | ||
5381 | or.l &0x3F800000,%d1 | |
5382 | mov.l %d1,POSNEG1(%a6) | |
5383 | ||
5384 | fmul.x %fp0,%fp2 # S(A6+SA7) | |
5385 | ||
5386 | fadd.d COSB6(%pc),%fp1 # B6+S(B7+SB8) | |
5387 | fadd.d SINA5(%pc),%fp2 # A5+S(A6+SA7) | |
5388 | ||
5389 | fmul.x %fp0,%fp1 # S(B6+S(B7+SB8)) | |
5390 | fmul.x %fp0,%fp2 # S(A5+S(A6+SA7)) | |
5391 | ||
5392 | fadd.d COSB5(%pc),%fp1 # B5+S(B6+S(B7+SB8)) | |
5393 | fadd.d SINA4(%pc),%fp2 # A4+S(A5+S(A6+SA7)) | |
5394 | ||
5395 | fmul.x %fp0,%fp1 # S(B5+...) | |
5396 | fmul.x %fp0,%fp2 # S(A4+...) | |
5397 | ||
5398 | fadd.d COSB4(%pc),%fp1 # B4+S(B5+...) | |
5399 | fadd.d SINA3(%pc),%fp2 # A3+S(A4+...) | |
5400 | ||
5401 | fmul.x %fp0,%fp1 # S(B4+...) | |
5402 | fmul.x %fp0,%fp2 # S(A3+...) | |
5403 | ||
5404 | fadd.x COSB3(%pc),%fp1 # B3+S(B4+...) | |
5405 | fadd.x SINA2(%pc),%fp2 # A2+S(A3+...) | |
5406 | ||
5407 | fmul.x %fp0,%fp1 # S(B3+...) | |
5408 | fmul.x %fp0,%fp2 # S(A2+...) | |
5409 | ||
5410 | fadd.x COSB2(%pc),%fp1 # B2+S(B3+...) | |
5411 | fadd.x SINA1(%pc),%fp2 # A1+S(A2+...) | |
5412 | ||
5413 | fmul.x %fp0,%fp1 # S(B2+...) | |
5414 | fmul.x %fp2,%fp0 # s(a1+...) | |
5415 | ||
5416 | ||
5417 | fadd.s COSB1(%pc),%fp1 # B1+S(B2...) | |
5418 | fmul.x RPRIME(%a6),%fp0 # R'S(A1+...) | |
5419 | fmul.x SPRIME(%a6),%fp1 # S'(B1+S(B2+...)) | |
5420 | ||
5421 | fmovm.x (%sp)+,&0x20 # restore fp2 | |
5422 | ||
5423 | fmov.l %d0,%fpcr | |
5424 | fadd.s POSNEG1(%a6),%fp1 # COS(X) | |
5425 | bsr sto_cos # store cosine result | |
5426 | fadd.x RPRIME(%a6),%fp0 # SIN(X) | |
5427 | bra t_inx2 | |
5428 | ||
5429 | ################################################ | |
5430 | ||
5431 | SCBORS: | |
5432 | cmp.l %d1,&0x3FFF8000 | |
5433 | bgt.w SREDUCEX | |
5434 | ||
5435 | ################################################ | |
5436 | ||
5437 | SCSM: | |
5438 | # mov.w &0x0000,XDCARE(%a6) | |
5439 | fmov.s &0x3F800000,%fp1 | |
5440 | ||
5441 | fmov.l %d0,%fpcr | |
5442 | fsub.s &0x00800000,%fp1 | |
5443 | bsr sto_cos # store cosine result | |
5444 | fmov.l %fpcr,%d0 # d0 must have fpcr,too | |
5445 | mov.b &FMOV_OP,%d1 # last inst is MOVE | |
5446 | fmov.x X(%a6),%fp0 | |
5447 | bra t_catch | |
5448 | ||
5449 | ############################################## | |
5450 | ||
5451 | global ssincosd | |
5452 | #--SIN AND COS OF X FOR DENORMALIZED X | |
5453 | ssincosd: | |
5454 | mov.l %d0,-(%sp) # save d0 | |
5455 | fmov.s &0x3F800000,%fp1 | |
5456 | bsr sto_cos # store cosine result | |
5457 | mov.l (%sp)+,%d0 # restore d0 | |
5458 | bra t_extdnrm | |
5459 | ||
5460 | ############################################ | |
5461 | ||
5462 | #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. | |
5463 | #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING | |
5464 | #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. | |
5465 | SREDUCEX: | |
5466 | fmovm.x &0x3c,-(%sp) # save {fp2-fp5} | |
5467 | mov.l %d2,-(%sp) # save d2 | |
5468 | fmov.s &0x00000000,%fp1 # fp1 = 0 | |
5469 | ||
5470 | #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that | |
5471 | #--there is a danger of unwanted overflow in first LOOP iteration. In this | |
5472 | #--case, reduce argument by one remainder step to make subsequent reduction | |
5473 | #--safe. | |
5474 | cmp.l %d1,&0x7ffeffff # is arg dangerously large? | |
5475 | bne.b SLOOP # no | |
5476 | ||
5477 | # yes; create 2**16383*PI/2 | |
5478 | mov.w &0x7ffe,FP_SCR0_EX(%a6) | |
5479 | mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) | |
5480 | clr.l FP_SCR0_LO(%a6) | |
5481 | ||
5482 | # create low half of 2**16383*PI/2 at FP_SCR1 | |
5483 | mov.w &0x7fdc,FP_SCR1_EX(%a6) | |
5484 | mov.l &0x85a308d3,FP_SCR1_HI(%a6) | |
5485 | clr.l FP_SCR1_LO(%a6) | |
5486 | ||
5487 | ftest.x %fp0 # test sign of argument | |
5488 | fblt.w sred_neg | |
5489 | ||
5490 | or.b &0x80,FP_SCR0_EX(%a6) # positive arg | |
5491 | or.b &0x80,FP_SCR1_EX(%a6) | |
5492 | sred_neg: | |
5493 | fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact | |
5494 | fmov.x %fp0,%fp1 # save high result in fp1 | |
5495 | fadd.x FP_SCR1(%a6),%fp0 # low part of reduction | |
5496 | fsub.x %fp0,%fp1 # determine low component of result | |
5497 | fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. | |
5498 | ||
5499 | #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. | |
5500 | #--integer quotient will be stored in N | |
5501 | #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) | |
5502 | SLOOP: | |
5503 | fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 | |
5504 | mov.w INARG(%a6),%d1 | |
5505 | mov.l %d1,%a1 # save a copy of D0 | |
5506 | and.l &0x00007FFF,%d1 | |
5507 | sub.l &0x00003FFF,%d1 # d0 = K | |
5508 | cmp.l %d1,&28 | |
5509 | ble.b SLASTLOOP | |
5510 | SCONTLOOP: | |
5511 | sub.l &27,%d1 # d0 = L := K-27 | |
5512 | mov.b &0,ENDFLAG(%a6) | |
5513 | bra.b SWORK | |
5514 | SLASTLOOP: | |
5515 | clr.l %d1 # d0 = L := 0 | |
5516 | mov.b &1,ENDFLAG(%a6) | |
5517 | ||
5518 | SWORK: | |
5519 | #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN | |
5520 | #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. | |
5521 | ||
5522 | #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), | |
5523 | #--2**L * (PIby2_1), 2**L * (PIby2_2) | |
5524 | ||
5525 | mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI | |
5526 | sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) | |
5527 | ||
5528 | mov.l &0xA2F9836E,FP_SCR0_HI(%a6) | |
5529 | mov.l &0x4E44152A,FP_SCR0_LO(%a6) | |
5530 | mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) | |
5531 | ||
5532 | fmov.x %fp0,%fp2 | |
5533 | fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) | |
5534 | ||
5535 | #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN | |
5536 | #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N | |
5537 | #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT | |
5538 | #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE | |
5539 | #--US THE DESIRED VALUE IN FLOATING POINT. | |
5540 | mov.l %a1,%d2 | |
5541 | swap %d2 | |
5542 | and.l &0x80000000,%d2 | |
5543 | or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL | |
5544 | mov.l %d2,TWOTO63(%a6) | |
5545 | fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED | |
5546 | fsub.s TWOTO63(%a6),%fp2 # fp2 = N | |
5547 | # fint.x %fp2 | |
5548 | ||
5549 | #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 | |
5550 | mov.l %d1,%d2 # d2 = L | |
5551 | ||
5552 | add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) | |
5553 | mov.w %d2,FP_SCR0_EX(%a6) | |
5554 | mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) | |
5555 | clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 | |
5556 | ||
5557 | add.l &0x00003FDD,%d1 | |
5558 | mov.w %d1,FP_SCR1_EX(%a6) | |
5559 | mov.l &0x85A308D3,FP_SCR1_HI(%a6) | |
5560 | clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 | |
5561 | ||
5562 | mov.b ENDFLAG(%a6),%d1 | |
5563 | ||
5564 | #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and | |
5565 | #--P2 = 2**(L) * Piby2_2 | |
5566 | fmov.x %fp2,%fp4 # fp4 = N | |
5567 | fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 | |
5568 | fmov.x %fp2,%fp5 # fp5 = N | |
5569 | fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 | |
5570 | fmov.x %fp4,%fp3 # fp3 = W = N*P1 | |
5571 | ||
5572 | #--we want P+p = W+w but |p| <= half ulp of P | |
5573 | #--Then, we need to compute A := R-P and a := r-p | |
5574 | fadd.x %fp5,%fp3 # fp3 = P | |
5575 | fsub.x %fp3,%fp4 # fp4 = W-P | |
5576 | ||
5577 | fsub.x %fp3,%fp0 # fp0 = A := R - P | |
5578 | fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w | |
5579 | ||
5580 | fmov.x %fp0,%fp3 # fp3 = A | |
5581 | fsub.x %fp4,%fp1 # fp1 = a := r - p | |
5582 | ||
5583 | #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but | |
5584 | #--|r| <= half ulp of R. | |
5585 | fadd.x %fp1,%fp0 # fp0 = R := A+a | |
5586 | #--No need to calculate r if this is the last loop | |
5587 | cmp.b %d1,&0 | |
5588 | bgt.w SRESTORE | |
5589 | ||
5590 | #--Need to calculate r | |
5591 | fsub.x %fp0,%fp3 # fp3 = A-R | |
5592 | fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a | |
5593 | bra.w SLOOP | |
5594 | ||
5595 | SRESTORE: | |
5596 | fmov.l %fp2,INT(%a6) | |
5597 | mov.l (%sp)+,%d2 # restore d2 | |
5598 | fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} | |
5599 | ||
5600 | mov.l ADJN(%a6),%d1 | |
5601 | cmp.l %d1,&4 | |
5602 | ||
5603 | blt.w SINCONT | |
5604 | bra.w SCCONT | |
5605 | ||
5606 | ######################################################################### | |
5607 | # stan(): computes the tangent of a normalized input # | |
5608 | # stand(): computes the tangent of a denormalized input # | |
5609 | # # | |
5610 | # INPUT *************************************************************** # | |
5611 | # a0 = pointer to extended precision input # | |
5612 | # d0 = round precision,mode # | |
5613 | # # | |
5614 | # OUTPUT ************************************************************** # | |
5615 | # fp0 = tan(X) # | |
5616 | # # | |
5617 | # ACCURACY and MONOTONICITY ******************************************* # | |
5618 | # The returned result is within 3 ulp in 64 significant bit, i.e. # | |
5619 | # within 0.5001 ulp to 53 bits if the result is subsequently # | |
5620 | # rounded to double precision. The result is provably monotonic # | |
5621 | # in double precision. # | |
5622 | # # | |
5623 | # ALGORITHM *********************************************************** # | |
5624 | # # | |
5625 | # 1. If |X| >= 15Pi or |X| < 2**(-40), go to 6. # | |
5626 | # # | |
5627 | # 2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let # | |
5628 | # k = N mod 2, so in particular, k = 0 or 1. # | |
5629 | # # | |
5630 | # 3. If k is odd, go to 5. # | |
5631 | # # | |
5632 | # 4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a # | |
5633 | # rational function U/V where # | |
5634 | # U = r + r*s*(P1 + s*(P2 + s*P3)), and # | |
5635 | # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r. # | |
5636 | # Exit. # | |
5637 | # # | |
5638 | # 4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by # | |
5639 | # a rational function U/V where # | |
5640 | # U = r + r*s*(P1 + s*(P2 + s*P3)), and # | |
5641 | # V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r, # | |
5642 | # -Cot(r) = -V/U. Exit. # | |
5643 | # # | |
5644 | # 6. If |X| > 1, go to 8. # | |
5645 | # # | |
5646 | # 7. (|X|<2**(-40)) Tan(X) = X. Exit. # | |
5647 | # # | |
5648 | # 8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back # | |
5649 | # to 2. # | |
5650 | # # | |
5651 | ######################################################################### | |
5652 | ||
5653 | TANQ4: | |
5654 | long 0x3EA0B759,0xF50F8688 | |
5655 | TANP3: | |
5656 | long 0xBEF2BAA5,0xA8924F04 | |
5657 | ||
5658 | TANQ3: | |
5659 | long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000 | |
5660 | ||
5661 | TANP2: | |
5662 | long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000 | |
5663 | ||
5664 | TANQ2: | |
5665 | long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000 | |
5666 | ||
5667 | TANP1: | |
5668 | long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000 | |
5669 | ||
5670 | TANQ1: | |
5671 | long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000 | |
5672 | ||
5673 | INVTWOPI: | |
5674 | long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000 | |
5675 | ||
5676 | TWOPI1: | |
5677 | long 0x40010000,0xC90FDAA2,0x00000000,0x00000000 | |
5678 | TWOPI2: | |
5679 | long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000 | |
5680 | ||
5681 | #--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING | |
5682 | #--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT | |
5683 | #--MOST 69 BITS LONG. | |
5684 | # global PITBL | |
5685 | PITBL: | |
5686 | long 0xC0040000,0xC90FDAA2,0x2168C235,0x21800000 | |
5687 | long 0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000 | |
5688 | long 0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000 | |
5689 | long 0xC0040000,0xB6365E22,0xEE46F000,0x21480000 | |
5690 | long 0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000 | |
5691 | long 0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000 | |
5692 | long 0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000 | |
5693 | long 0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000 | |
5694 | long 0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000 | |
5695 | long 0xC0040000,0x90836524,0x88034B96,0x20B00000 | |
5696 | long 0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000 | |
5697 | long 0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000 | |
5698 | long 0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000 | |
5699 | long 0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000 | |
5700 | long 0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000 | |
5701 | long 0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000 | |
5702 | long 0xC0030000,0xC90FDAA2,0x2168C235,0x21000000 | |
5703 | long 0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000 | |
5704 | long 0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000 | |
5705 | long 0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000 | |
5706 | long 0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000 | |
5707 | long 0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000 | |
5708 | long 0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000 | |
5709 | long 0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000 | |
5710 | long 0xC0020000,0xC90FDAA2,0x2168C235,0x20800000 | |
5711 | long 0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000 | |
5712 | long 0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000 | |
5713 | long 0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000 | |
5714 | long 0xC0010000,0xC90FDAA2,0x2168C235,0x20000000 | |
5715 | long 0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000 | |
5716 | long 0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000 | |
5717 | long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000 | |
5718 | long 0x00000000,0x00000000,0x00000000,0x00000000 | |
5719 | long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000 | |
5720 | long 0x40000000,0xC90FDAA2,0x2168C235,0x9F800000 | |
5721 | long 0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000 | |
5722 | long 0x40010000,0xC90FDAA2,0x2168C235,0xA0000000 | |
5723 | long 0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000 | |
5724 | long 0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000 | |
5725 | long 0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000 | |
5726 | long 0x40020000,0xC90FDAA2,0x2168C235,0xA0800000 | |
5727 | long 0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000 | |
5728 | long 0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000 | |
5729 | long 0x40030000,0x8A3AE64F,0x76F80584,0x21080000 | |
5730 | long 0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000 | |
5731 | long 0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000 | |
5732 | long 0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000 | |
5733 | long 0x40030000,0xBC7EDCF7,0xFF523611,0x21680000 | |
5734 | long 0x40030000,0xC90FDAA2,0x2168C235,0xA1000000 | |
5735 | long 0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000 | |
5736 | long 0x40030000,0xE231D5F6,0x6595DA7B,0x21300000 | |
5737 | long 0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000 | |
5738 | long 0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000 | |
5739 | long 0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000 | |
5740 | long 0x40040000,0x8A3AE64F,0x76F80584,0x21880000 | |
5741 | long 0x40040000,0x90836524,0x88034B96,0xA0B00000 | |
5742 | long 0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000 | |
5743 | long 0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000 | |
5744 | long 0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000 | |
5745 | long 0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000 | |
5746 | long 0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000 | |
5747 | long 0x40040000,0xB6365E22,0xEE46F000,0xA1480000 | |
5748 | long 0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000 | |
5749 | long 0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000 | |
5750 | long 0x40040000,0xC90FDAA2,0x2168C235,0xA1800000 | |
5751 | ||
5752 | set INARG,FP_SCR0 | |
5753 | ||
5754 | set TWOTO63,L_SCR1 | |
5755 | set INT,L_SCR1 | |
5756 | set ENDFLAG,L_SCR2 | |
5757 | ||
5758 | global stan | |
5759 | stan: | |
5760 | fmov.x (%a0),%fp0 # LOAD INPUT | |
5761 | ||
5762 | mov.l (%a0),%d1 | |
5763 | mov.w 4(%a0),%d1 | |
5764 | and.l &0x7FFFFFFF,%d1 | |
5765 | ||
5766 | cmp.l %d1,&0x3FD78000 # |X| >= 2**(-40)? | |
5767 | bge.b TANOK1 | |
5768 | bra.w TANSM | |
5769 | TANOK1: | |
5770 | cmp.l %d1,&0x4004BC7E # |X| < 15 PI? | |
5771 | blt.b TANMAIN | |
5772 | bra.w REDUCEX | |
5773 | ||
5774 | TANMAIN: | |
5775 | #--THIS IS THE USUAL CASE, |X| <= 15 PI. | |
5776 | #--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP. | |
5777 | fmov.x %fp0,%fp1 | |
5778 | fmul.d TWOBYPI(%pc),%fp1 # X*2/PI | |
5779 | ||
5780 | lea.l PITBL+0x200(%pc),%a1 # TABLE OF N*PI/2, N = -32,...,32 | |
5781 | ||
5782 | fmov.l %fp1,%d1 # CONVERT TO INTEGER | |
5783 | ||
5784 | asl.l &4,%d1 | |
5785 | add.l %d1,%a1 # ADDRESS N*PIBY2 IN Y1, Y2 | |
5786 | ||
5787 | fsub.x (%a1)+,%fp0 # X-Y1 | |
5788 | ||
5789 | fsub.s (%a1),%fp0 # FP0 IS R = (X-Y1)-Y2 | |
5790 | ||
5791 | ror.l &5,%d1 | |
5792 | and.l &0x80000000,%d1 # D0 WAS ODD IFF D0 < 0 | |
5793 | ||
5794 | TANCONT: | |
5795 | fmovm.x &0x0c,-(%sp) # save fp2,fp3 | |
5796 | ||
5797 | cmp.l %d1,&0 | |
5798 | blt.w NODD | |
5799 | ||
5800 | fmov.x %fp0,%fp1 | |
5801 | fmul.x %fp1,%fp1 # S = R*R | |
5802 | ||
5803 | fmov.d TANQ4(%pc),%fp3 | |
5804 | fmov.d TANP3(%pc),%fp2 | |
5805 | ||
5806 | fmul.x %fp1,%fp3 # SQ4 | |
5807 | fmul.x %fp1,%fp2 # SP3 | |
5808 | ||
5809 | fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 | |
5810 | fadd.x TANP2(%pc),%fp2 # P2+SP3 | |
5811 | ||
5812 | fmul.x %fp1,%fp3 # S(Q3+SQ4) | |
5813 | fmul.x %fp1,%fp2 # S(P2+SP3) | |
5814 | ||
5815 | fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) | |
5816 | fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) | |
5817 | ||
5818 | fmul.x %fp1,%fp3 # S(Q2+S(Q3+SQ4)) | |
5819 | fmul.x %fp1,%fp2 # S(P1+S(P2+SP3)) | |
5820 | ||
5821 | fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) | |
5822 | fmul.x %fp0,%fp2 # RS(P1+S(P2+SP3)) | |
5823 | ||
5824 | fmul.x %fp3,%fp1 # S(Q1+S(Q2+S(Q3+SQ4))) | |
5825 | ||
5826 | fadd.x %fp2,%fp0 # R+RS(P1+S(P2+SP3)) | |
5827 | ||
5828 | fadd.s &0x3F800000,%fp1 # 1+S(Q1+...) | |
5829 | ||
5830 | fmovm.x (%sp)+,&0x30 # restore fp2,fp3 | |
5831 | ||
5832 | fmov.l %d0,%fpcr # restore users round mode,prec | |
5833 | fdiv.x %fp1,%fp0 # last inst - possible exception set | |
5834 | bra t_inx2 | |
5835 | ||
5836 | NODD: | |
5837 | fmov.x %fp0,%fp1 | |
5838 | fmul.x %fp0,%fp0 # S = R*R | |
5839 | ||
5840 | fmov.d TANQ4(%pc),%fp3 | |
5841 | fmov.d TANP3(%pc),%fp2 | |
5842 | ||
5843 | fmul.x %fp0,%fp3 # SQ4 | |
5844 | fmul.x %fp0,%fp2 # SP3 | |
5845 | ||
5846 | fadd.d TANQ3(%pc),%fp3 # Q3+SQ4 | |
5847 | fadd.x TANP2(%pc),%fp2 # P2+SP3 | |
5848 | ||
5849 | fmul.x %fp0,%fp3 # S(Q3+SQ4) | |
5850 | fmul.x %fp0,%fp2 # S(P2+SP3) | |
5851 | ||
5852 | fadd.x TANQ2(%pc),%fp3 # Q2+S(Q3+SQ4) | |
5853 | fadd.x TANP1(%pc),%fp2 # P1+S(P2+SP3) | |
5854 | ||
5855 | fmul.x %fp0,%fp3 # S(Q2+S(Q3+SQ4)) | |
5856 | fmul.x %fp0,%fp2 # S(P1+S(P2+SP3)) | |
5857 | ||
5858 | fadd.x TANQ1(%pc),%fp3 # Q1+S(Q2+S(Q3+SQ4)) | |
5859 | fmul.x %fp1,%fp2 # RS(P1+S(P2+SP3)) | |
5860 | ||
5861 | fmul.x %fp3,%fp0 # S(Q1+S(Q2+S(Q3+SQ4))) | |
5862 | ||
5863 | fadd.x %fp2,%fp1 # R+RS(P1+S(P2+SP3)) | |
5864 | fadd.s &0x3F800000,%fp0 # 1+S(Q1+...) | |
5865 | ||
5866 | fmovm.x (%sp)+,&0x30 # restore fp2,fp3 | |
5867 | ||
5868 | fmov.x %fp1,-(%sp) | |
5869 | eor.l &0x80000000,(%sp) | |
5870 | ||
5871 | fmov.l %d0,%fpcr # restore users round mode,prec | |
5872 | fdiv.x (%sp)+,%fp0 # last inst - possible exception set | |
5873 | bra t_inx2 | |
5874 | ||
5875 | TANBORS: | |
5876 | #--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION. | |
5877 | #--IF |X| < 2**(-40), RETURN X OR 1. | |
5878 | cmp.l %d1,&0x3FFF8000 | |
5879 | bgt.b REDUCEX | |
5880 | ||
5881 | TANSM: | |
5882 | fmov.x %fp0,-(%sp) | |
5883 | fmov.l %d0,%fpcr # restore users round mode,prec | |
5884 | mov.b &FMOV_OP,%d1 # last inst is MOVE | |
5885 | fmov.x (%sp)+,%fp0 # last inst - posibble exception set | |
5886 | bra t_catch | |
5887 | ||
5888 | global stand | |
5889 | #--TAN(X) = X FOR DENORMALIZED X | |
5890 | stand: | |
5891 | bra t_extdnrm | |
5892 | ||
5893 | #--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW. | |
5894 | #--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING | |
5895 | #--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE. | |
5896 | REDUCEX: | |
5897 | fmovm.x &0x3c,-(%sp) # save {fp2-fp5} | |
5898 | mov.l %d2,-(%sp) # save d2 | |
5899 | fmov.s &0x00000000,%fp1 # fp1 = 0 | |
5900 | ||
5901 | #--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that | |
5902 | #--there is a danger of unwanted overflow in first LOOP iteration. In this | |
5903 | #--case, reduce argument by one remainder step to make subsequent reduction | |
5904 | #--safe. | |
5905 | cmp.l %d1,&0x7ffeffff # is arg dangerously large? | |
5906 | bne.b LOOP # no | |
5907 | ||
5908 | # yes; create 2**16383*PI/2 | |
5909 | mov.w &0x7ffe,FP_SCR0_EX(%a6) | |
5910 | mov.l &0xc90fdaa2,FP_SCR0_HI(%a6) | |
5911 | clr.l FP_SCR0_LO(%a6) | |
5912 | ||
5913 | # create low half of 2**16383*PI/2 at FP_SCR1 | |
5914 | mov.w &0x7fdc,FP_SCR1_EX(%a6) | |
5915 | mov.l &0x85a308d3,FP_SCR1_HI(%a6) | |
5916 | clr.l FP_SCR1_LO(%a6) | |
5917 | ||
5918 | ftest.x %fp0 # test sign of argument | |
5919 | fblt.w red_neg | |
5920 | ||
5921 | or.b &0x80,FP_SCR0_EX(%a6) # positive arg | |
5922 | or.b &0x80,FP_SCR1_EX(%a6) | |
5923 | red_neg: | |
5924 | fadd.x FP_SCR0(%a6),%fp0 # high part of reduction is exact | |
5925 | fmov.x %fp0,%fp1 # save high result in fp1 | |
5926 | fadd.x FP_SCR1(%a6),%fp0 # low part of reduction | |
5927 | fsub.x %fp0,%fp1 # determine low component of result | |
5928 | fadd.x FP_SCR1(%a6),%fp1 # fp0/fp1 are reduced argument. | |
5929 | ||
5930 | #--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4. | |
5931 | #--integer quotient will be stored in N | |
5932 | #--Intermeditate remainder is 66-bit long; (R,r) in (FP0,FP1) | |
5933 | LOOP: | |
5934 | fmov.x %fp0,INARG(%a6) # +-2**K * F, 1 <= F < 2 | |
5935 | mov.w INARG(%a6),%d1 | |
5936 | mov.l %d1,%a1 # save a copy of D0 | |
5937 | and.l &0x00007FFF,%d1 | |
5938 | sub.l &0x00003FFF,%d1 # d0 = K | |
5939 | cmp.l %d1,&28 | |
5940 | ble.b LASTLOOP | |
5941 | CONTLOOP: | |
5942 | sub.l &27,%d1 # d0 = L := K-27 | |
5943 | mov.b &0,ENDFLAG(%a6) | |
5944 | bra.b WORK | |
5945 | LASTLOOP: | |
5946 | clr.l %d1 # d0 = L := 0 | |
5947 | mov.b &1,ENDFLAG(%a6) | |
5948 | ||
5949 | WORK: | |
5950 | #--FIND THE REMAINDER OF (R,r) W.R.T. 2**L * (PI/2). L IS SO CHOSEN | |
5951 | #--THAT INT( X * (2/PI) / 2**(L) ) < 2**29. | |
5952 | ||
5953 | #--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63), | |
5954 | #--2**L * (PIby2_1), 2**L * (PIby2_2) | |
5955 | ||
5956 | mov.l &0x00003FFE,%d2 # BIASED EXP OF 2/PI | |
5957 | sub.l %d1,%d2 # BIASED EXP OF 2**(-L)*(2/PI) | |
5958 | ||
5959 | mov.l &0xA2F9836E,FP_SCR0_HI(%a6) | |
5960 | mov.l &0x4E44152A,FP_SCR0_LO(%a6) | |
5961 | mov.w %d2,FP_SCR0_EX(%a6) # FP_SCR0 = 2**(-L)*(2/PI) | |
5962 | ||
5963 | fmov.x %fp0,%fp2 | |
5964 | fmul.x FP_SCR0(%a6),%fp2 # fp2 = X * 2**(-L)*(2/PI) | |
5965 | ||
5966 | #--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN | |
5967 | #--FLOATING POINT FORMAT, THE TWO FMOVE'S FMOVE.L FP <--> N | |
5968 | #--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT | |
5969 | #--(SIGN(INARG)*2**63 + FP2) - SIGN(INARG)*2**63 WILL GIVE | |
5970 | #--US THE DESIRED VALUE IN FLOATING POINT. | |
5971 | mov.l %a1,%d2 | |
5972 | swap %d2 | |
5973 | and.l &0x80000000,%d2 | |
5974 | or.l &0x5F000000,%d2 # d2 = SIGN(INARG)*2**63 IN SGL | |
5975 | mov.l %d2,TWOTO63(%a6) | |
5976 | fadd.s TWOTO63(%a6),%fp2 # THE FRACTIONAL PART OF FP1 IS ROUNDED | |
5977 | fsub.s TWOTO63(%a6),%fp2 # fp2 = N | |
5978 | # fintrz.x %fp2,%fp2 | |
5979 | ||
5980 | #--CREATING 2**(L)*Piby2_1 and 2**(L)*Piby2_2 | |
5981 | mov.l %d1,%d2 # d2 = L | |
5982 | ||
5983 | add.l &0x00003FFF,%d2 # BIASED EXP OF 2**L * (PI/2) | |
5984 | mov.w %d2,FP_SCR0_EX(%a6) | |
5985 | mov.l &0xC90FDAA2,FP_SCR0_HI(%a6) | |
5986 | clr.l FP_SCR0_LO(%a6) # FP_SCR0 = 2**(L) * Piby2_1 | |
5987 | ||
5988 | add.l &0x00003FDD,%d1 | |
5989 | mov.w %d1,FP_SCR1_EX(%a6) | |
5990 | mov.l &0x85A308D3,FP_SCR1_HI(%a6) | |
5991 | clr.l FP_SCR1_LO(%a6) # FP_SCR1 = 2**(L) * Piby2_2 | |
5992 | ||
5993 | mov.b ENDFLAG(%a6),%d1 | |
5994 | ||
5995 | #--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and | |
5996 | #--P2 = 2**(L) * Piby2_2 | |
5997 | fmov.x %fp2,%fp4 # fp4 = N | |
5998 | fmul.x FP_SCR0(%a6),%fp4 # fp4 = W = N*P1 | |
5999 | fmov.x %fp2,%fp5 # fp5 = N | |
6000 | fmul.x FP_SCR1(%a6),%fp5 # fp5 = w = N*P2 | |
6001 | fmov.x %fp4,%fp3 # fp3 = W = N*P1 | |
6002 | ||
6003 | #--we want P+p = W+w but |p| <= half ulp of P | |
6004 | #--Then, we need to compute A := R-P and a := r-p | |
6005 | fadd.x %fp5,%fp3 # fp3 = P | |
6006 | fsub.x %fp3,%fp4 # fp4 = W-P | |
6007 | ||
6008 | fsub.x %fp3,%fp0 # fp0 = A := R - P | |
6009 | fadd.x %fp5,%fp4 # fp4 = p = (W-P)+w | |
6010 | ||
6011 | fmov.x %fp0,%fp3 # fp3 = A | |
6012 | fsub.x %fp4,%fp1 # fp1 = a := r - p | |
6013 | ||
6014 | #--Now we need to normalize (A,a) to "new (R,r)" where R+r = A+a but | |
6015 | #--|r| <= half ulp of R. | |
6016 | fadd.x %fp1,%fp0 # fp0 = R := A+a | |
6017 | #--No need to calculate r if this is the last loop | |
6018 | cmp.b %d1,&0 | |
6019 | bgt.w RESTORE | |
6020 | ||
6021 | #--Need to calculate r | |
6022 | fsub.x %fp0,%fp3 # fp3 = A-R | |
6023 | fadd.x %fp3,%fp1 # fp1 = r := (A-R)+a | |
6024 | bra.w LOOP | |
6025 | ||
6026 | RESTORE: | |
6027 | fmov.l %fp2,INT(%a6) | |
6028 | mov.l (%sp)+,%d2 # restore d2 | |
6029 | fmovm.x (%sp)+,&0x3c # restore {fp2-fp5} | |
6030 | ||
6031 | mov.l INT(%a6),%d1 | |
6032 | ror.l &1,%d1 | |
6033 | ||
6034 | bra.w TANCONT | |
6035 | ||
6036 | ######################################################################### | |
6037 | # satan(): computes the arctangent of a normalized number # | |
6038 | # satand(): computes the arctangent of a denormalized number # | |
6039 | # # | |
6040 | # INPUT *************************************************************** # | |
6041 | # a0 = pointer to extended precision input # | |
6042 | # d0 = round precision,mode # | |
6043 | # # | |
6044 | # OUTPUT ************************************************************** # | |
6045 | # fp0 = arctan(X) # | |
6046 | # # | |
6047 | # ACCURACY and MONOTONICITY ******************************************* # | |
6048 | # The returned result is within 2 ulps in 64 significant bit, # | |
6049 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
6050 | # rounded to double precision. The result is provably monotonic # | |
6051 | # in double precision. # | |
6052 | # # | |
6053 | # ALGORITHM *********************************************************** # | |
6054 | # Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5. # | |
6055 | # # | |
6056 | # Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. # | |
6057 | # Note that k = -4, -3,..., or 3. # | |
6058 | # Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 # | |
6059 | # significant bits of X with a bit-1 attached at the 6-th # | |
6060 | # bit position. Define u to be u = (X-F) / (1 + X*F). # | |
6061 | # # | |
6062 | # Step 3. Approximate arctan(u) by a polynomial poly. # | |
6063 | # # | |
6064 | # Step 4. Return arctan(F) + poly, arctan(F) is fetched from a # | |
6065 | # table of values calculated beforehand. Exit. # | |
6066 | # # | |
6067 | # Step 5. If |X| >= 16, go to Step 7. # | |
6068 | # # | |
6069 | # Step 6. Approximate arctan(X) by an odd polynomial in X. Exit. # | |
6070 | # # | |
6071 | # Step 7. Define X' = -1/X. Approximate arctan(X') by an odd # | |
6072 | # polynomial in X'. # | |
6073 | # Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit. # | |
6074 | # # | |
6075 | ######################################################################### | |
6076 | ||
6077 | ATANA3: long 0xBFF6687E,0x314987D8 | |
6078 | ATANA2: long 0x4002AC69,0x34A26DB3 | |
6079 | ATANA1: long 0xBFC2476F,0x4E1DA28E | |
6080 | ||
6081 | ATANB6: long 0x3FB34444,0x7F876989 | |
6082 | ATANB5: long 0xBFB744EE,0x7FAF45DB | |
6083 | ATANB4: long 0x3FBC71C6,0x46940220 | |
6084 | ATANB3: long 0xBFC24924,0x921872F9 | |
6085 | ATANB2: long 0x3FC99999,0x99998FA9 | |
6086 | ATANB1: long 0xBFD55555,0x55555555 | |
6087 | ||
6088 | ATANC5: long 0xBFB70BF3,0x98539E6A | |
6089 | ATANC4: long 0x3FBC7187,0x962D1D7D | |
6090 | ATANC3: long 0xBFC24924,0x827107B8 | |
6091 | ATANC2: long 0x3FC99999,0x9996263E | |
6092 | ATANC1: long 0xBFD55555,0x55555536 | |
6093 | ||
6094 | PPIBY2: long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000 | |
6095 | NPIBY2: long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000 | |
6096 | ||
6097 | PTINY: long 0x00010000,0x80000000,0x00000000,0x00000000 | |
6098 | NTINY: long 0x80010000,0x80000000,0x00000000,0x00000000 | |
6099 | ||
6100 | ATANTBL: | |
6101 | long 0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000 | |
6102 | long 0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000 | |
6103 | long 0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000 | |
6104 | long 0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000 | |
6105 | long 0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000 | |
6106 | long 0x3FFB0000,0xAB98E943,0x62765619,0x00000000 | |
6107 | long 0x3FFB0000,0xB389E502,0xF9C59862,0x00000000 | |
6108 | long 0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000 | |
6109 | long 0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000 | |
6110 | long 0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000 | |
6111 | long 0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000 | |
6112 | long 0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000 | |
6113 | long 0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000 | |
6114 | long 0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000 | |
6115 | long 0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000 | |
6116 | long 0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000 | |
6117 | long 0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000 | |
6118 | long 0x3FFC0000,0x8B232A08,0x304282D8,0x00000000 | |
6119 | long 0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000 | |
6120 | long 0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000 | |
6121 | long 0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000 | |
6122 | long 0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000 | |
6123 | long 0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000 | |
6124 | long 0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000 | |
6125 | long 0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000 | |
6126 | long 0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000 | |
6127 | long 0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000 | |
6128 | long 0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000 | |
6129 | long 0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000 | |
6130 | long 0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000 | |
6131 | long 0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000 | |
6132 | long 0x3FFC0000,0xF7170A28,0xECC06666,0x00000000 | |
6133 | long 0x3FFD0000,0x812FD288,0x332DAD32,0x00000000 | |
6134 | long 0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000 | |
6135 | long 0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000 | |
6136 | long 0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000 | |
6137 | long 0x3FFD0000,0x9EB68949,0x3889A227,0x00000000 | |
6138 | long 0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000 | |
6139 | long 0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000 | |
6140 | long 0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000 | |
6141 | long 0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000 | |
6142 | long 0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000 | |
6143 | long 0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000 | |
6144 | long 0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000 | |
6145 | long 0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000 | |
6146 | long 0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000 | |
6147 | long 0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000 | |
6148 | long 0x3FFD0000,0xEA2D764F,0x64315989,0x00000000 | |
6149 | long 0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000 | |
6150 | long 0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000 | |
6151 | long 0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000 | |
6152 | long 0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000 | |
6153 | long 0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000 | |
6154 | long 0x3FFE0000,0x97731420,0x365E538C,0x00000000 | |
6155 | long 0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000 | |
6156 | long 0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000 | |
6157 | long 0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000 | |
6158 | long 0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000 | |
6159 | long 0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000 | |
6160 | long 0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000 | |
6161 | long 0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000 | |
6162 | long 0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000 | |
6163 | long 0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000 | |
6164 | long 0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000 | |
6165 | long 0x3FFE0000,0xCD000549,0xADEC7159,0x00000000 | |
6166 | long 0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000 | |
6167 | long 0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000 | |
6168 | long 0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000 | |
6169 | long 0x3FFE0000,0xE8771129,0xC4353259,0x00000000 | |
6170 | long 0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000 | |
6171 | long 0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000 | |
6172 | long 0x3FFE0000,0xF919039D,0x758B8D41,0x00000000 | |
6173 | long 0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000 | |
6174 | long 0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000 | |
6175 | long 0x3FFF0000,0x83889E35,0x49D108E1,0x00000000 | |
6176 | long 0x3FFF0000,0x859CFA76,0x511D724B,0x00000000 | |
6177 | long 0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000 | |
6178 | long 0x3FFF0000,0x89732FD1,0x9557641B,0x00000000 | |
6179 | long 0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000 | |
6180 | long 0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000 | |
6181 | long 0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000 | |
6182 | long 0x3FFF0000,0x922DA7D7,0x91888487,0x00000000 | |
6183 | long 0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000 | |
6184 | long 0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000 | |
6185 | long 0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000 | |
6186 | long 0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000 | |
6187 | long 0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000 | |
6188 | long 0x3FFF0000,0x9F100575,0x006CC571,0x00000000 | |
6189 | long 0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000 | |
6190 | long 0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000 | |
6191 | long 0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000 | |
6192 | long 0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000 | |
6193 | long 0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000 | |
6194 | long 0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000 | |
6195 | long 0x3FFF0000,0xA83A5153,0x0956168F,0x00000000 | |
6196 | long 0x3FFF0000,0xA93A2007,0x7539546E,0x00000000 | |
6197 | long 0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000 | |
6198 | long 0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000 | |
6199 | long 0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000 | |
6200 | long 0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000 | |
6201 | long 0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000 | |
6202 | long 0x3FFF0000,0xB1846515,0x0F71496A,0x00000000 | |
6203 | long 0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000 | |
6204 | long 0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000 | |
6205 | long 0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000 | |
6206 | long 0x3FFF0000,0xB525529D,0x562246BD,0x00000000 | |
6207 | long 0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000 | |
6208 | long 0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000 | |
6209 | long 0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000 | |
6210 | long 0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000 | |
6211 | long 0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000 | |
6212 | long 0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000 | |
6213 | long 0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000 | |
6214 | long 0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000 | |
6215 | long 0x3FFF0000,0xBB471285,0x7637E17D,0x00000000 | |
6216 | long 0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000 | |
6217 | long 0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000 | |
6218 | long 0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000 | |
6219 | long 0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000 | |
6220 | long 0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000 | |
6221 | long 0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000 | |
6222 | long 0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000 | |
6223 | long 0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000 | |
6224 | long 0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000 | |
6225 | long 0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000 | |
6226 | long 0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000 | |
6227 | long 0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000 | |
6228 | long 0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000 | |
6229 | ||
6230 | set X,FP_SCR0 | |
6231 | set XDCARE,X+2 | |
6232 | set XFRAC,X+4 | |
6233 | set XFRACLO,X+8 | |
6234 | ||
6235 | set ATANF,FP_SCR1 | |
6236 | set ATANFHI,ATANF+4 | |
6237 | set ATANFLO,ATANF+8 | |
6238 | ||
6239 | global satan | |
6240 | #--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | |
6241 | satan: | |
6242 | fmov.x (%a0),%fp0 # LOAD INPUT | |
6243 | ||
6244 | mov.l (%a0),%d1 | |
6245 | mov.w 4(%a0),%d1 | |
6246 | fmov.x %fp0,X(%a6) | |
6247 | and.l &0x7FFFFFFF,%d1 | |
6248 | ||
6249 | cmp.l %d1,&0x3FFB8000 # |X| >= 1/16? | |
6250 | bge.b ATANOK1 | |
6251 | bra.w ATANSM | |
6252 | ||
6253 | ATANOK1: | |
6254 | cmp.l %d1,&0x4002FFFF # |X| < 16 ? | |
6255 | ble.b ATANMAIN | |
6256 | bra.w ATANBIG | |
6257 | ||
6258 | #--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE | |
6259 | #--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ). | |
6260 | #--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN | |
6261 | #--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE | |
6262 | #--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS | |
6263 | #--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR | |
6264 | #--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO | |
6265 | #--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE | |
6266 | #--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL | |
6267 | #--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE | |
6268 | #--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION | |
6269 | #--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION | |
6270 | #--WILL INVOLVE A VERY LONG POLYNOMIAL. | |
6271 | ||
6272 | #--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS | |
6273 | #--WE CHOSE F TO BE +-2^K * 1.BBBB1 | |
6274 | #--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE | |
6275 | #--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE | |
6276 | #--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS | |
6277 | #-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|). | |
6278 | ||
6279 | ATANMAIN: | |
6280 | ||
6281 | and.l &0xF8000000,XFRAC(%a6) # FIRST 5 BITS | |
6282 | or.l &0x04000000,XFRAC(%a6) # SET 6-TH BIT TO 1 | |
6283 | mov.l &0x00000000,XFRACLO(%a6) # LOCATION OF X IS NOW F | |
6284 | ||
6285 | fmov.x %fp0,%fp1 # FP1 IS X | |
6286 | fmul.x X(%a6),%fp1 # FP1 IS X*F, NOTE THAT X*F > 0 | |
6287 | fsub.x X(%a6),%fp0 # FP0 IS X-F | |
6288 | fadd.s &0x3F800000,%fp1 # FP1 IS 1 + X*F | |
6289 | fdiv.x %fp1,%fp0 # FP0 IS U = (X-F)/(1+X*F) | |
6290 | ||
6291 | #--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|) | |
6292 | #--CREATE ATAN(F) AND STORE IT IN ATANF, AND | |
6293 | #--SAVE REGISTERS FP2. | |
6294 | ||
6295 | mov.l %d2,-(%sp) # SAVE d2 TEMPORARILY | |
6296 | mov.l %d1,%d2 # THE EXP AND 16 BITS OF X | |
6297 | and.l &0x00007800,%d1 # 4 VARYING BITS OF F'S FRACTION | |
6298 | and.l &0x7FFF0000,%d2 # EXPONENT OF F | |
6299 | sub.l &0x3FFB0000,%d2 # K+4 | |
6300 | asr.l &1,%d2 | |
6301 | add.l %d2,%d1 # THE 7 BITS IDENTIFYING F | |
6302 | asr.l &7,%d1 # INDEX INTO TBL OF ATAN(|F|) | |
6303 | lea ATANTBL(%pc),%a1 | |
6304 | add.l %d1,%a1 # ADDRESS OF ATAN(|F|) | |
6305 | mov.l (%a1)+,ATANF(%a6) | |
6306 | mov.l (%a1)+,ATANFHI(%a6) | |
6307 | mov.l (%a1)+,ATANFLO(%a6) # ATANF IS NOW ATAN(|F|) | |
6308 | mov.l X(%a6),%d1 # LOAD SIGN AND EXPO. AGAIN | |
6309 | and.l &0x80000000,%d1 # SIGN(F) | |
6310 | or.l %d1,ATANF(%a6) # ATANF IS NOW SIGN(F)*ATAN(|F|) | |
6311 | mov.l (%sp)+,%d2 # RESTORE d2 | |
6312 | ||
6313 | #--THAT'S ALL I HAVE TO DO FOR NOW, | |
6314 | #--BUT ALAS, THE DIVIDE IS STILL CRANKING! | |
6315 | ||
6316 | #--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS | |
6317 | #--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U | |
6318 | #--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT. | |
6319 | #--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3)) | |
6320 | #--WHAT WE HAVE HERE IS MERELY A1 = A3, A2 = A1/A3, A3 = A2/A3. | |
6321 | #--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT | |
6322 | #--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED | |
6323 | ||
6324 | fmovm.x &0x04,-(%sp) # save fp2 | |
6325 | ||
6326 | fmov.x %fp0,%fp1 | |
6327 | fmul.x %fp1,%fp1 | |
6328 | fmov.d ATANA3(%pc),%fp2 | |
6329 | fadd.x %fp1,%fp2 # A3+V | |
6330 | fmul.x %fp1,%fp2 # V*(A3+V) | |
6331 | fmul.x %fp0,%fp1 # U*V | |
6332 | fadd.d ATANA2(%pc),%fp2 # A2+V*(A3+V) | |
6333 | fmul.d ATANA1(%pc),%fp1 # A1*U*V | |
6334 | fmul.x %fp2,%fp1 # A1*U*V*(A2+V*(A3+V)) | |
6335 | fadd.x %fp1,%fp0 # ATAN(U), FP1 RELEASED | |
6336 | ||
6337 | fmovm.x (%sp)+,&0x20 # restore fp2 | |
6338 | ||
6339 | fmov.l %d0,%fpcr # restore users rnd mode,prec | |
6340 | fadd.x ATANF(%a6),%fp0 # ATAN(X) | |
6341 | bra t_inx2 | |
6342 | ||
6343 | ATANBORS: | |
6344 | #--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED. | |
6345 | #--FP0 IS X AND |X| <= 1/16 OR |X| >= 16. | |
6346 | cmp.l %d1,&0x3FFF8000 | |
6347 | bgt.w ATANBIG # I.E. |X| >= 16 | |
6348 | ||
6349 | ATANSM: | |
6350 | #--|X| <= 1/16 | |
6351 | #--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE | |
6352 | #--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6))))) | |
6353 | #--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] ) | |
6354 | #--WHERE Y = X*X, AND Z = Y*Y. | |
6355 | ||
6356 | cmp.l %d1,&0x3FD78000 | |
6357 | blt.w ATANTINY | |
6358 | ||
6359 | #--COMPUTE POLYNOMIAL | |
6360 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | |
6361 | ||
6362 | fmul.x %fp0,%fp0 # FPO IS Y = X*X | |
6363 | ||
6364 | fmov.x %fp0,%fp1 | |
6365 | fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y | |
6366 | ||
6367 | fmov.d ATANB6(%pc),%fp2 | |
6368 | fmov.d ATANB5(%pc),%fp3 | |
6369 | ||
6370 | fmul.x %fp1,%fp2 # Z*B6 | |
6371 | fmul.x %fp1,%fp3 # Z*B5 | |
6372 | ||
6373 | fadd.d ATANB4(%pc),%fp2 # B4+Z*B6 | |
6374 | fadd.d ATANB3(%pc),%fp3 # B3+Z*B5 | |
6375 | ||
6376 | fmul.x %fp1,%fp2 # Z*(B4+Z*B6) | |
6377 | fmul.x %fp3,%fp1 # Z*(B3+Z*B5) | |
6378 | ||
6379 | fadd.d ATANB2(%pc),%fp2 # B2+Z*(B4+Z*B6) | |
6380 | fadd.d ATANB1(%pc),%fp1 # B1+Z*(B3+Z*B5) | |
6381 | ||
6382 | fmul.x %fp0,%fp2 # Y*(B2+Z*(B4+Z*B6)) | |
6383 | fmul.x X(%a6),%fp0 # X*Y | |
6384 | ||
6385 | fadd.x %fp2,%fp1 # [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] | |
6386 | ||
6387 | fmul.x %fp1,%fp0 # X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) | |
6388 | ||
6389 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | |
6390 | ||
6391 | fmov.l %d0,%fpcr # restore users rnd mode,prec | |
6392 | fadd.x X(%a6),%fp0 | |
6393 | bra t_inx2 | |
6394 | ||
6395 | ATANTINY: | |
6396 | #--|X| < 2^(-40), ATAN(X) = X | |
6397 | ||
6398 | fmov.l %d0,%fpcr # restore users rnd mode,prec | |
6399 | mov.b &FMOV_OP,%d1 # last inst is MOVE | |
6400 | fmov.x X(%a6),%fp0 # last inst - possible exception set | |
6401 | ||
6402 | bra t_catch | |
6403 | ||
6404 | ATANBIG: | |
6405 | #--IF |X| > 2^(100), RETURN SIGN(X)*(PI/2 - TINY). OTHERWISE, | |
6406 | #--RETURN SIGN(X)*PI/2 + ATAN(-1/X). | |
6407 | cmp.l %d1,&0x40638000 | |
6408 | bgt.w ATANHUGE | |
6409 | ||
6410 | #--APPROXIMATE ATAN(-1/X) BY | |
6411 | #--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X' | |
6412 | #--THIS CAN BE RE-WRITTEN AS | |
6413 | #--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y. | |
6414 | ||
6415 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | |
6416 | ||
6417 | fmov.s &0xBF800000,%fp1 # LOAD -1 | |
6418 | fdiv.x %fp0,%fp1 # FP1 IS -1/X | |
6419 | ||
6420 | #--DIVIDE IS STILL CRANKING | |
6421 | ||
6422 | fmov.x %fp1,%fp0 # FP0 IS X' | |
6423 | fmul.x %fp0,%fp0 # FP0 IS Y = X'*X' | |
6424 | fmov.x %fp1,X(%a6) # X IS REALLY X' | |
6425 | ||
6426 | fmov.x %fp0,%fp1 | |
6427 | fmul.x %fp1,%fp1 # FP1 IS Z = Y*Y | |
6428 | ||
6429 | fmov.d ATANC5(%pc),%fp3 | |
6430 | fmov.d ATANC4(%pc),%fp2 | |
6431 | ||
6432 | fmul.x %fp1,%fp3 # Z*C5 | |
6433 | fmul.x %fp1,%fp2 # Z*B4 | |
6434 | ||
6435 | fadd.d ATANC3(%pc),%fp3 # C3+Z*C5 | |
6436 | fadd.d ATANC2(%pc),%fp2 # C2+Z*C4 | |
6437 | ||
6438 | fmul.x %fp3,%fp1 # Z*(C3+Z*C5), FP3 RELEASED | |
6439 | fmul.x %fp0,%fp2 # Y*(C2+Z*C4) | |
6440 | ||
6441 | fadd.d ATANC1(%pc),%fp1 # C1+Z*(C3+Z*C5) | |
6442 | fmul.x X(%a6),%fp0 # X'*Y | |
6443 | ||
6444 | fadd.x %fp2,%fp1 # [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] | |
6445 | ||
6446 | fmul.x %fp1,%fp0 # X'*Y*([B1+Z*(B3+Z*B5)] | |
6447 | # ... +[Y*(B2+Z*(B4+Z*B6))]) | |
6448 | fadd.x X(%a6),%fp0 | |
6449 | ||
6450 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | |
6451 | ||
6452 | fmov.l %d0,%fpcr # restore users rnd mode,prec | |
6453 | tst.b (%a0) | |
6454 | bpl.b pos_big | |
6455 | ||
6456 | neg_big: | |
6457 | fadd.x NPIBY2(%pc),%fp0 | |
6458 | bra t_minx2 | |
6459 | ||
6460 | pos_big: | |
6461 | fadd.x PPIBY2(%pc),%fp0 | |
6462 | bra t_pinx2 | |
6463 | ||
6464 | ATANHUGE: | |
6465 | #--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY | |
6466 | tst.b (%a0) | |
6467 | bpl.b pos_huge | |
6468 | ||
6469 | neg_huge: | |
6470 | fmov.x NPIBY2(%pc),%fp0 | |
6471 | fmov.l %d0,%fpcr | |
6472 | fadd.x PTINY(%pc),%fp0 | |
6473 | bra t_minx2 | |
6474 | ||
6475 | pos_huge: | |
6476 | fmov.x PPIBY2(%pc),%fp0 | |
6477 | fmov.l %d0,%fpcr | |
6478 | fadd.x NTINY(%pc),%fp0 | |
6479 | bra t_pinx2 | |
6480 | ||
6481 | global satand | |
6482 | #--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT | |
6483 | satand: | |
6484 | bra t_extdnrm | |
6485 | ||
6486 | ######################################################################### | |
6487 | # sasin(): computes the inverse sine of a normalized input # | |
6488 | # sasind(): computes the inverse sine of a denormalized input # | |
6489 | # # | |
6490 | # INPUT *************************************************************** # | |
6491 | # a0 = pointer to extended precision input # | |
6492 | # d0 = round precision,mode # | |
6493 | # # | |
6494 | # OUTPUT ************************************************************** # | |
6495 | # fp0 = arcsin(X) # | |
6496 | # # | |
6497 | # ACCURACY and MONOTONICITY ******************************************* # | |
6498 | # The returned result is within 3 ulps in 64 significant bit, # | |
6499 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
6500 | # rounded to double precision. The result is provably monotonic # | |
6501 | # in double precision. # | |
6502 | # # | |
6503 | # ALGORITHM *********************************************************** # | |
6504 | # # | |
6505 | # ASIN # | |
6506 | # 1. If |X| >= 1, go to 3. # | |
6507 | # # | |
6508 | # 2. (|X| < 1) Calculate asin(X) by # | |
6509 | # z := sqrt( [1-X][1+X] ) # | |
6510 | # asin(X) = atan( x / z ). # | |
6511 | # Exit. # | |
6512 | # # | |
6513 | # 3. If |X| > 1, go to 5. # | |
6514 | # # | |
6515 | # 4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.# | |
6516 | # # | |
6517 | # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # | |
6518 | # Exit. # | |
6519 | # # | |
6520 | ######################################################################### | |
6521 | ||
6522 | global sasin | |
6523 | sasin: | |
6524 | fmov.x (%a0),%fp0 # LOAD INPUT | |
6525 | ||
6526 | mov.l (%a0),%d1 | |
6527 | mov.w 4(%a0),%d1 | |
6528 | and.l &0x7FFFFFFF,%d1 | |
6529 | cmp.l %d1,&0x3FFF8000 | |
6530 | bge.b ASINBIG | |
6531 | ||
6532 | # This catch is added here for the '060 QSP. Originally, the call to | |
6533 | # satan() would handle this case by causing the exception which would | |
6534 | # not be caught until gen_except(). Now, with the exceptions being | |
6535 | # detected inside of satan(), the exception would have been handled there | |
6536 | # instead of inside sasin() as expected. | |
6537 | cmp.l %d1,&0x3FD78000 | |
6538 | blt.w ASINTINY | |
6539 | ||
6540 | #--THIS IS THE USUAL CASE, |X| < 1 | |
6541 | #--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) ) | |
6542 | ||
6543 | ASINMAIN: | |
6544 | fmov.s &0x3F800000,%fp1 | |
6545 | fsub.x %fp0,%fp1 # 1-X | |
6546 | fmovm.x &0x4,-(%sp) # {fp2} | |
6547 | fmov.s &0x3F800000,%fp2 | |
6548 | fadd.x %fp0,%fp2 # 1+X | |
6549 | fmul.x %fp2,%fp1 # (1+X)(1-X) | |
6550 | fmovm.x (%sp)+,&0x20 # {fp2} | |
6551 | fsqrt.x %fp1 # SQRT([1-X][1+X]) | |
6552 | fdiv.x %fp1,%fp0 # X/SQRT([1-X][1+X]) | |
6553 | fmovm.x &0x01,-(%sp) # save X/SQRT(...) | |
6554 | lea (%sp),%a0 # pass ptr to X/SQRT(...) | |
6555 | bsr satan | |
6556 | add.l &0xc,%sp # clear X/SQRT(...) from stack | |
6557 | bra t_inx2 | |
6558 | ||
6559 | ASINBIG: | |
6560 | fabs.x %fp0 # |X| | |
6561 | fcmp.s %fp0,&0x3F800000 | |
6562 | fbgt t_operr # cause an operr exception | |
6563 | ||
6564 | #--|X| = 1, ASIN(X) = +- PI/2. | |
6565 | ASINONE: | |
6566 | fmov.x PIBY2(%pc),%fp0 | |
6567 | mov.l (%a0),%d1 | |
6568 | and.l &0x80000000,%d1 # SIGN BIT OF X | |
6569 | or.l &0x3F800000,%d1 # +-1 IN SGL FORMAT | |
6570 | mov.l %d1,-(%sp) # push SIGN(X) IN SGL-FMT | |
6571 | fmov.l %d0,%fpcr | |
6572 | fmul.s (%sp)+,%fp0 | |
6573 | bra t_inx2 | |
6574 | ||
6575 | #--|X| < 2^(-40), ATAN(X) = X | |
6576 | ASINTINY: | |
6577 | fmov.l %d0,%fpcr # restore users rnd mode,prec | |
6578 | mov.b &FMOV_OP,%d1 # last inst is MOVE | |
6579 | fmov.x (%a0),%fp0 # last inst - possible exception | |
6580 | bra t_catch | |
6581 | ||
6582 | global sasind | |
6583 | #--ASIN(X) = X FOR DENORMALIZED X | |
6584 | sasind: | |
6585 | bra t_extdnrm | |
6586 | ||
6587 | ######################################################################### | |
6588 | # sacos(): computes the inverse cosine of a normalized input # | |
6589 | # sacosd(): computes the inverse cosine of a denormalized input # | |
6590 | # # | |
6591 | # INPUT *************************************************************** # | |
6592 | # a0 = pointer to extended precision input # | |
6593 | # d0 = round precision,mode # | |
6594 | # # | |
6595 | # OUTPUT ************************************************************** # | |
6596 | # fp0 = arccos(X) # | |
6597 | # # | |
6598 | # ACCURACY and MONOTONICITY ******************************************* # | |
6599 | # The returned result is within 3 ulps in 64 significant bit, # | |
6600 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
6601 | # rounded to double precision. The result is provably monotonic # | |
6602 | # in double precision. # | |
6603 | # # | |
6604 | # ALGORITHM *********************************************************** # | |
6605 | # # | |
6606 | # ACOS # | |
6607 | # 1. If |X| >= 1, go to 3. # | |
6608 | # # | |
6609 | # 2. (|X| < 1) Calculate acos(X) by # | |
6610 | # z := (1-X) / (1+X) # | |
6611 | # acos(X) = 2 * atan( sqrt(z) ). # | |
6612 | # Exit. # | |
6613 | # # | |
6614 | # 3. If |X| > 1, go to 5. # | |
6615 | # # | |
6616 | # 4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit. # | |
6617 | # # | |
6618 | # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # | |
6619 | # Exit. # | |
6620 | # # | |
6621 | ######################################################################### | |
6622 | ||
6623 | global sacos | |
6624 | sacos: | |
6625 | fmov.x (%a0),%fp0 # LOAD INPUT | |
6626 | ||
6627 | mov.l (%a0),%d1 # pack exp w/ upper 16 fraction | |
6628 | mov.w 4(%a0),%d1 | |
6629 | and.l &0x7FFFFFFF,%d1 | |
6630 | cmp.l %d1,&0x3FFF8000 | |
6631 | bge.b ACOSBIG | |
6632 | ||
6633 | #--THIS IS THE USUAL CASE, |X| < 1 | |
6634 | #--ACOS(X) = 2 * ATAN( SQRT( (1-X)/(1+X) ) ) | |
6635 | ||
6636 | ACOSMAIN: | |
6637 | fmov.s &0x3F800000,%fp1 | |
6638 | fadd.x %fp0,%fp1 # 1+X | |
6639 | fneg.x %fp0 # -X | |
6640 | fadd.s &0x3F800000,%fp0 # 1-X | |
6641 | fdiv.x %fp1,%fp0 # (1-X)/(1+X) | |
6642 | fsqrt.x %fp0 # SQRT((1-X)/(1+X)) | |
6643 | mov.l %d0,-(%sp) # save original users fpcr | |
6644 | clr.l %d0 | |
6645 | fmovm.x &0x01,-(%sp) # save SQRT(...) to stack | |
6646 | lea (%sp),%a0 # pass ptr to sqrt | |
6647 | bsr satan # ATAN(SQRT([1-X]/[1+X])) | |
6648 | add.l &0xc,%sp # clear SQRT(...) from stack | |
6649 | ||
6650 | fmov.l (%sp)+,%fpcr # restore users round prec,mode | |
6651 | fadd.x %fp0,%fp0 # 2 * ATAN( STUFF ) | |
6652 | bra t_pinx2 | |
6653 | ||
6654 | ACOSBIG: | |
6655 | fabs.x %fp0 | |
6656 | fcmp.s %fp0,&0x3F800000 | |
6657 | fbgt t_operr # cause an operr exception | |
6658 | ||
6659 | #--|X| = 1, ACOS(X) = 0 OR PI | |
6660 | tst.b (%a0) # is X positive or negative? | |
6661 | bpl.b ACOSP1 | |
6662 | ||
6663 | #--X = -1 | |
6664 | #Returns PI and inexact exception | |
6665 | ACOSM1: | |
6666 | fmov.x PI(%pc),%fp0 # load PI | |
6667 | fmov.l %d0,%fpcr # load round mode,prec | |
6668 | fadd.s &0x00800000,%fp0 # add a small value | |
6669 | bra t_pinx2 | |
6670 | ||
6671 | ACOSP1: | |
6672 | bra ld_pzero # answer is positive zero | |
6673 | ||
6674 | global sacosd | |
6675 | #--ACOS(X) = PI/2 FOR DENORMALIZED X | |
6676 | sacosd: | |
6677 | fmov.l %d0,%fpcr # load user's rnd mode/prec | |
6678 | fmov.x PIBY2(%pc),%fp0 | |
6679 | bra t_pinx2 | |
6680 | ||
6681 | ######################################################################### | |
6682 | # setox(): computes the exponential for a normalized input # | |
6683 | # setoxd(): computes the exponential for a denormalized input # | |
6684 | # setoxm1(): computes the exponential minus 1 for a normalized input # | |
6685 | # setoxm1d(): computes the exponential minus 1 for a denormalized input # | |
6686 | # # | |
6687 | # INPUT *************************************************************** # | |
6688 | # a0 = pointer to extended precision input # | |
6689 | # d0 = round precision,mode # | |
6690 | # # | |
6691 | # OUTPUT ************************************************************** # | |
6692 | # fp0 = exp(X) or exp(X)-1 # | |
6693 | # # | |
6694 | # ACCURACY and MONOTONICITY ******************************************* # | |
6695 | # The returned result is within 0.85 ulps in 64 significant bit, # | |
6696 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
6697 | # rounded to double precision. The result is provably monotonic # | |
6698 | # in double precision. # | |
6699 | # # | |
6700 | # ALGORITHM and IMPLEMENTATION **************************************** # | |
6701 | # # | |
6702 | # setoxd # | |
6703 | # ------ # | |
6704 | # Step 1. Set ans := 1.0 # | |
6705 | # # | |
6706 | # Step 2. Return ans := ans + sign(X)*2^(-126). Exit. # | |
6707 | # Notes: This will always generate one exception -- inexact. # | |
6708 | # # | |
6709 | # # | |
6710 | # setox # | |
6711 | # ----- # | |
6712 | # # | |
6713 | # Step 1. Filter out extreme cases of input argument. # | |
6714 | # 1.1 If |X| >= 2^(-65), go to Step 1.3. # | |
6715 | # 1.2 Go to Step 7. # | |
6716 | # 1.3 If |X| < 16380 log(2), go to Step 2. # | |
6717 | # 1.4 Go to Step 8. # | |
6718 | # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# | |
6719 | # To avoid the use of floating-point comparisons, a # | |
6720 | # compact representation of |X| is used. This format is a # | |
6721 | # 32-bit integer, the upper (more significant) 16 bits # | |
6722 | # are the sign and biased exponent field of |X|; the # | |
6723 | # lower 16 bits are the 16 most significant fraction # | |
6724 | # (including the explicit bit) bits of |X|. Consequently, # | |
6725 | # the comparisons in Steps 1.1 and 1.3 can be performed # | |
6726 | # by integer comparison. Note also that the constant # | |
6727 | # 16380 log(2) used in Step 1.3 is also in the compact # | |
6728 | # form. Thus taking the branch to Step 2 guarantees # | |
6729 | # |X| < 16380 log(2). There is no harm to have a small # | |
6730 | # number of cases where |X| is less than, but close to, # | |
6731 | # 16380 log(2) and the branch to Step 9 is taken. # | |
6732 | # # | |
6733 | # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # | |
6734 | # 2.1 Set AdjFlag := 0 (indicates the branch 1.3 -> 2 # | |
6735 | # was taken) # | |
6736 | # 2.2 N := round-to-nearest-integer( X * 64/log2 ). # | |
6737 | # 2.3 Calculate J = N mod 64; so J = 0,1,2,..., # | |
6738 | # or 63. # | |
6739 | # 2.4 Calculate M = (N - J)/64; so N = 64M + J. # | |
6740 | # 2.5 Calculate the address of the stored value of # | |
6741 | # 2^(J/64). # | |
6742 | # 2.6 Create the value Scale = 2^M. # | |
6743 | # Notes: The calculation in 2.2 is really performed by # | |
6744 | # Z := X * constant # | |
6745 | # N := round-to-nearest-integer(Z) # | |
6746 | # where # | |
6747 | # constant := single-precision( 64/log 2 ). # | |
6748 | # # | |
6749 | # Using a single-precision constant avoids memory # | |
6750 | # access. Another effect of using a single-precision # | |
6751 | # "constant" is that the calculated value Z is # | |
6752 | # # | |
6753 | # Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24). # | |
6754 | # # | |
6755 | # This error has to be considered later in Steps 3 and 4. # | |
6756 | # # | |
6757 | # Step 3. Calculate X - N*log2/64. # | |
6758 | # 3.1 R := X + N*L1, # | |
6759 | # where L1 := single-precision(-log2/64). # | |
6760 | # 3.2 R := R + N*L2, # | |
6761 | # L2 := extended-precision(-log2/64 - L1).# | |
6762 | # Notes: a) The way L1 and L2 are chosen ensures L1+L2 # | |
6763 | # approximate the value -log2/64 to 88 bits of accuracy. # | |
6764 | # b) N*L1 is exact because N is no longer than 22 bits # | |
6765 | # and L1 is no longer than 24 bits. # | |
6766 | # c) The calculation X+N*L1 is also exact due to # | |
6767 | # cancellation. Thus, R is practically X+N(L1+L2) to full # | |
6768 | # 64 bits. # | |
6769 | # d) It is important to estimate how large can |R| be # | |
6770 | # after Step 3.2. # | |
6771 | # # | |
6772 | # N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24) # | |
6773 | # X*64/log2 (1+eps) = N + f, |f| <= 0.5 # | |
6774 | # X*64/log2 - N = f - eps*X 64/log2 # | |
6775 | # X - N*log2/64 = f*log2/64 - eps*X # | |
6776 | # # | |
6777 | # # | |
6778 | # Now |X| <= 16446 log2, thus # | |
6779 | # # | |
6780 | # |X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64 # | |
6781 | # <= 0.57 log2/64. # | |
6782 | # This bound will be used in Step 4. # | |
6783 | # # | |
6784 | # Step 4. Approximate exp(R)-1 by a polynomial # | |
6785 | # p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) # | |
6786 | # Notes: a) In order to reduce memory access, the coefficients # | |
6787 | # are made as "short" as possible: A1 (which is 1/2), A4 # | |
6788 | # and A5 are single precision; A2 and A3 are double # | |
6789 | # precision. # | |
6790 | # b) Even with the restrictions above, # | |
6791 | # |p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062. # | |
6792 | # Note that 0.0062 is slightly bigger than 0.57 log2/64. # | |
6793 | # c) To fully utilize the pipeline, p is separated into # | |
6794 | # two independent pieces of roughly equal complexities # | |
6795 | # p = [ R + R*S*(A2 + S*A4) ] + # | |
6796 | # [ S*(A1 + S*(A3 + S*A5)) ] # | |
6797 | # where S = R*R. # | |
6798 | # # | |
6799 | # Step 5. Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by # | |
6800 | # ans := T + ( T*p + t) # | |
6801 | # where T and t are the stored values for 2^(J/64). # | |
6802 | # Notes: 2^(J/64) is stored as T and t where T+t approximates # | |
6803 | # 2^(J/64) to roughly 85 bits; T is in extended precision # | |
6804 | # and t is in single precision. Note also that T is # | |
6805 | # rounded to 62 bits so that the last two bits of T are # | |
6806 | # zero. The reason for such a special form is that T-1, # | |
6807 | # T-2, and T-8 will all be exact --- a property that will # | |
6808 | # give much more accurate computation of the function # | |
6809 | # EXPM1. # | |
6810 | # # | |
6811 | # Step 6. Reconstruction of exp(X) # | |
6812 | # exp(X) = 2^M * 2^(J/64) * exp(R). # | |
6813 | # 6.1 If AdjFlag = 0, go to 6.3 # | |
6814 | # 6.2 ans := ans * AdjScale # | |
6815 | # 6.3 Restore the user FPCR # | |
6816 | # 6.4 Return ans := ans * Scale. Exit. # | |
6817 | # Notes: If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R, # | |
6818 | # |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will # | |
6819 | # neither overflow nor underflow. If AdjFlag = 1, that # | |
6820 | # means that # | |
6821 | # X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380. # | |
6822 | # Hence, exp(X) may overflow or underflow or neither. # | |
6823 | # When that is the case, AdjScale = 2^(M1) where M1 is # | |
6824 | # approximately M. Thus 6.2 will never cause # | |
6825 | # over/underflow. Possible exception in 6.4 is overflow # | |
6826 | # or underflow. The inexact exception is not generated in # | |
6827 | # 6.4. Although one can argue that the inexact flag # | |
6828 | # should always be raised, to simulate that exception # | |
6829 | # cost to much than the flag is worth in practical uses. # | |
6830 | # # | |
6831 | # Step 7. Return 1 + X. # | |
6832 | # 7.1 ans := X # | |
6833 | # 7.2 Restore user FPCR. # | |
6834 | # 7.3 Return ans := 1 + ans. Exit # | |
6835 | # Notes: For non-zero X, the inexact exception will always be # | |
6836 | # raised by 7.3. That is the only exception raised by 7.3.# | |
6837 | # Note also that we use the FMOVEM instruction to move X # | |
6838 | # in Step 7.1 to avoid unnecessary trapping. (Although # | |
6839 | # the FMOVEM may not seem relevant since X is normalized, # | |
6840 | # the precaution will be useful in the library version of # | |
6841 | # this code where the separate entry for denormalized # | |
6842 | # inputs will be done away with.) # | |
6843 | # # | |
6844 | # Step 8. Handle exp(X) where |X| >= 16380log2. # | |
6845 | # 8.1 If |X| > 16480 log2, go to Step 9. # | |
6846 | # (mimic 2.2 - 2.6) # | |
6847 | # 8.2 N := round-to-integer( X * 64/log2 ) # | |
6848 | # 8.3 Calculate J = N mod 64, J = 0,1,...,63 # | |
6849 | # 8.4 K := (N-J)/64, M1 := truncate(K/2), M = K-M1, # | |
6850 | # AdjFlag := 1. # | |
6851 | # 8.5 Calculate the address of the stored value # | |
6852 | # 2^(J/64). # | |
6853 | # 8.6 Create the values Scale = 2^M, AdjScale = 2^M1. # | |
6854 | # 8.7 Go to Step 3. # | |
6855 | # Notes: Refer to notes for 2.2 - 2.6. # | |
6856 | # # | |
6857 | # Step 9. Handle exp(X), |X| > 16480 log2. # | |
6858 | # 9.1 If X < 0, go to 9.3 # | |
6859 | # 9.2 ans := Huge, go to 9.4 # | |
6860 | # 9.3 ans := Tiny. # | |
6861 | # 9.4 Restore user FPCR. # | |
6862 | # 9.5 Return ans := ans * ans. Exit. # | |
6863 | # Notes: Exp(X) will surely overflow or underflow, depending on # | |
6864 | # X's sign. "Huge" and "Tiny" are respectively large/tiny # | |
6865 | # extended-precision numbers whose square over/underflow # | |
6866 | # with an inexact result. Thus, 9.5 always raises the # | |
6867 | # inexact together with either overflow or underflow. # | |
6868 | # # | |
6869 | # setoxm1d # | |
6870 | # -------- # | |
6871 | # # | |
6872 | # Step 1. Set ans := 0 # | |
6873 | # # | |
6874 | # Step 2. Return ans := X + ans. Exit. # | |
6875 | # Notes: This will return X with the appropriate rounding # | |
6876 | # precision prescribed by the user FPCR. # | |
6877 | # # | |
6878 | # setoxm1 # | |
6879 | # ------- # | |
6880 | # # | |
6881 | # Step 1. Check |X| # | |
6882 | # 1.1 If |X| >= 1/4, go to Step 1.3. # | |
6883 | # 1.2 Go to Step 7. # | |
6884 | # 1.3 If |X| < 70 log(2), go to Step 2. # | |
6885 | # 1.4 Go to Step 10. # | |
6886 | # Notes: The usual case should take the branches 1.1 -> 1.3 -> 2.# | |
6887 | # However, it is conceivable |X| can be small very often # | |
6888 | # because EXPM1 is intended to evaluate exp(X)-1 # | |
6889 | # accurately when |X| is small. For further details on # | |
6890 | # the comparisons, see the notes on Step 1 of setox. # | |
6891 | # # | |
6892 | # Step 2. Calculate N = round-to-nearest-int( X * 64/log2 ). # | |
6893 | # 2.1 N := round-to-nearest-integer( X * 64/log2 ). # | |
6894 | # 2.2 Calculate J = N mod 64; so J = 0,1,2,..., # | |
6895 | # or 63. # | |
6896 | # 2.3 Calculate M = (N - J)/64; so N = 64M + J. # | |
6897 | # 2.4 Calculate the address of the stored value of # | |
6898 | # 2^(J/64). # | |
6899 | # 2.5 Create the values Sc = 2^M and # | |
6900 | # OnebySc := -2^(-M). # | |
6901 | # Notes: See the notes on Step 2 of setox. # | |
6902 | # # | |
6903 | # Step 3. Calculate X - N*log2/64. # | |
6904 | # 3.1 R := X + N*L1, # | |
6905 | # where L1 := single-precision(-log2/64). # | |
6906 | # 3.2 R := R + N*L2, # | |
6907 | # L2 := extended-precision(-log2/64 - L1).# | |
6908 | # Notes: Applying the analysis of Step 3 of setox in this case # | |
6909 | # shows that |R| <= 0.0055 (note that |X| <= 70 log2 in # | |
6910 | # this case). # | |
6911 | # # | |
6912 | # Step 4. Approximate exp(R)-1 by a polynomial # | |
6913 | # p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6))))) # | |
6914 | # Notes: a) In order to reduce memory access, the coefficients # | |
6915 | # are made as "short" as possible: A1 (which is 1/2), A5 # | |
6916 | # and A6 are single precision; A2, A3 and A4 are double # | |
6917 | # precision. # | |
6918 | # b) Even with the restriction above, # | |
6919 | # |p - (exp(R)-1)| < |R| * 2^(-72.7) # | |
6920 | # for all |R| <= 0.0055. # | |
6921 | # c) To fully utilize the pipeline, p is separated into # | |
6922 | # two independent pieces of roughly equal complexity # | |
6923 | # p = [ R*S*(A2 + S*(A4 + S*A6)) ] + # | |
6924 | # [ R + S*(A1 + S*(A3 + S*A5)) ] # | |
6925 | # where S = R*R. # | |
6926 | # # | |
6927 | # Step 5. Compute 2^(J/64)*p by # | |
6928 | # p := T*p # | |
6929 | # where T and t are the stored values for 2^(J/64). # | |
6930 | # Notes: 2^(J/64) is stored as T and t where T+t approximates # | |
6931 | # 2^(J/64) to roughly 85 bits; T is in extended precision # | |
6932 | # and t is in single precision. Note also that T is # | |
6933 | # rounded to 62 bits so that the last two bits of T are # | |
6934 | # zero. The reason for such a special form is that T-1, # | |
6935 | # T-2, and T-8 will all be exact --- a property that will # | |
6936 | # be exploited in Step 6 below. The total relative error # | |
6937 | # in p is no bigger than 2^(-67.7) compared to the final # | |
6938 | # result. # | |
6939 | # # | |
6940 | # Step 6. Reconstruction of exp(X)-1 # | |
6941 | # exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ). # | |
6942 | # 6.1 If M <= 63, go to Step 6.3. # | |
6943 | # 6.2 ans := T + (p + (t + OnebySc)). Go to 6.6 # | |
6944 | # 6.3 If M >= -3, go to 6.5. # | |
6945 | # 6.4 ans := (T + (p + t)) + OnebySc. Go to 6.6 # | |
6946 | # 6.5 ans := (T + OnebySc) + (p + t). # | |
6947 | # 6.6 Restore user FPCR. # | |
6948 | # 6.7 Return ans := Sc * ans. Exit. # | |
6949 | # Notes: The various arrangements of the expressions give # | |
6950 | # accurate evaluations. # | |
6951 | # # | |
6952 | # Step 7. exp(X)-1 for |X| < 1/4. # | |
6953 | # 7.1 If |X| >= 2^(-65), go to Step 9. # | |
6954 | # 7.2 Go to Step 8. # | |
6955 | # # | |
6956 | # Step 8. Calculate exp(X)-1, |X| < 2^(-65). # | |
6957 | # 8.1 If |X| < 2^(-16312), goto 8.3 # | |
6958 | # 8.2 Restore FPCR; return ans := X - 2^(-16382). # | |
6959 | # Exit. # | |
6960 | # 8.3 X := X * 2^(140). # | |
6961 | # 8.4 Restore FPCR; ans := ans - 2^(-16382). # | |
6962 | # Return ans := ans*2^(140). Exit # | |
6963 | # Notes: The idea is to return "X - tiny" under the user # | |
6964 | # precision and rounding modes. To avoid unnecessary # | |
6965 | # inefficiency, we stay away from denormalized numbers # | |
6966 | # the best we can. For |X| >= 2^(-16312), the # | |
6967 | # straightforward 8.2 generates the inexact exception as # | |
6968 | # the case warrants. # | |
6969 | # # | |
6970 | # Step 9. Calculate exp(X)-1, |X| < 1/4, by a polynomial # | |
6971 | # p = X + X*X*(B1 + X*(B2 + ... + X*B12)) # | |
6972 | # Notes: a) In order to reduce memory access, the coefficients # | |
6973 | # are made as "short" as possible: B1 (which is 1/2), B9 # | |
6974 | # to B12 are single precision; B3 to B8 are double # | |
6975 | # precision; and B2 is double extended. # | |
6976 | # b) Even with the restriction above, # | |
6977 | # |p - (exp(X)-1)| < |X| 2^(-70.6) # | |
6978 | # for all |X| <= 0.251. # | |
6979 | # Note that 0.251 is slightly bigger than 1/4. # | |
6980 | # c) To fully preserve accuracy, the polynomial is # | |
6981 | # computed as # | |
6982 | # X + ( S*B1 + Q ) where S = X*X and # | |
6983 | # Q = X*S*(B2 + X*(B3 + ... + X*B12)) # | |
6984 | # d) To fully utilize the pipeline, Q is separated into # | |
6985 | # two independent pieces of roughly equal complexity # | |
6986 | # Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] + # | |
6987 | # [ S*S*(B3 + S*(B5 + ... + S*B11)) ] # | |
6988 | # # | |
6989 | # Step 10. Calculate exp(X)-1 for |X| >= 70 log 2. # | |
6990 | # 10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all # | |
6991 | # practical purposes. Therefore, go to Step 1 of setox. # | |
6992 | # 10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical # | |
6993 | # purposes. # | |
6994 | # ans := -1 # | |
6995 | # Restore user FPCR # | |
6996 | # Return ans := ans + 2^(-126). Exit. # | |
6997 | # Notes: 10.2 will always create an inexact and return -1 + tiny # | |
6998 | # in the user rounding precision and mode. # | |
6999 | # # | |
7000 | ######################################################################### | |
7001 | ||
7002 | L2: long 0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000 | |
7003 | ||
7004 | EEXPA3: long 0x3FA55555,0x55554CC1 | |
7005 | EEXPA2: long 0x3FC55555,0x55554A54 | |
7006 | ||
7007 | EM1A4: long 0x3F811111,0x11174385 | |
7008 | EM1A3: long 0x3FA55555,0x55554F5A | |
7009 | ||
7010 | EM1A2: long 0x3FC55555,0x55555555,0x00000000,0x00000000 | |
7011 | ||
7012 | EM1B8: long 0x3EC71DE3,0xA5774682 | |
7013 | EM1B7: long 0x3EFA01A0,0x19D7CB68 | |
7014 | ||
7015 | EM1B6: long 0x3F2A01A0,0x1A019DF3 | |
7016 | EM1B5: long 0x3F56C16C,0x16C170E2 | |
7017 | ||
7018 | EM1B4: long 0x3F811111,0x11111111 | |
7019 | EM1B3: long 0x3FA55555,0x55555555 | |
7020 | ||
7021 | EM1B2: long 0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB | |
7022 | long 0x00000000 | |
7023 | ||
7024 | TWO140: long 0x48B00000,0x00000000 | |
7025 | TWON140: | |
7026 | long 0x37300000,0x00000000 | |
7027 | ||
7028 | EEXPTBL: | |
7029 | long 0x3FFF0000,0x80000000,0x00000000,0x00000000 | |
7030 | long 0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B | |
7031 | long 0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9 | |
7032 | long 0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369 | |
7033 | long 0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C | |
7034 | long 0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F | |
7035 | long 0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729 | |
7036 | long 0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF | |
7037 | long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF | |
7038 | long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA | |
7039 | long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051 | |
7040 | long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029 | |
7041 | long 0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494 | |
7042 | long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0 | |
7043 | long 0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D | |
7044 | long 0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537 | |
7045 | long 0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD | |
7046 | long 0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087 | |
7047 | long 0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818 | |
7048 | long 0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D | |
7049 | long 0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890 | |
7050 | long 0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C | |
7051 | long 0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05 | |
7052 | long 0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126 | |
7053 | long 0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140 | |
7054 | long 0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA | |
7055 | long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A | |
7056 | long 0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC | |
7057 | long 0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC | |
7058 | long 0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610 | |
7059 | long 0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90 | |
7060 | long 0x3FFF0000,0xB311C412,0xA9112488,0x201F678A | |
7061 | long 0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13 | |
7062 | long 0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30 | |
7063 | long 0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC | |
7064 | long 0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6 | |
7065 | long 0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70 | |
7066 | long 0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518 | |
7067 | long 0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41 | |
7068 | long 0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B | |
7069 | long 0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568 | |
7070 | long 0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E | |
7071 | long 0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03 | |
7072 | long 0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D | |
7073 | long 0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4 | |
7074 | long 0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C | |
7075 | long 0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9 | |
7076 | long 0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21 | |
7077 | long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F | |
7078 | long 0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F | |
7079 | long 0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207 | |
7080 | long 0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175 | |
7081 | long 0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B | |
7082 | long 0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5 | |
7083 | long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A | |
7084 | long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22 | |
7085 | long 0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945 | |
7086 | long 0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B | |
7087 | long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3 | |
7088 | long 0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05 | |
7089 | long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19 | |
7090 | long 0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5 | |
7091 | long 0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22 | |
7092 | long 0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A | |
7093 | ||
7094 | set ADJFLAG,L_SCR2 | |
7095 | set SCALE,FP_SCR0 | |
7096 | set ADJSCALE,FP_SCR1 | |
7097 | set SC,FP_SCR0 | |
7098 | set ONEBYSC,FP_SCR1 | |
7099 | ||
7100 | global setox | |
7101 | setox: | |
7102 | #--entry point for EXP(X), here X is finite, non-zero, and not NaN's | |
7103 | ||
7104 | #--Step 1. | |
7105 | mov.l (%a0),%d1 # load part of input X | |
7106 | and.l &0x7FFF0000,%d1 # biased expo. of X | |
7107 | cmp.l %d1,&0x3FBE0000 # 2^(-65) | |
7108 | bge.b EXPC1 # normal case | |
7109 | bra EXPSM | |
7110 | ||
7111 | EXPC1: | |
7112 | #--The case |X| >= 2^(-65) | |
7113 | mov.w 4(%a0),%d1 # expo. and partial sig. of |X| | |
7114 | cmp.l %d1,&0x400CB167 # 16380 log2 trunc. 16 bits | |
7115 | blt.b EXPMAIN # normal case | |
7116 | bra EEXPBIG | |
7117 | ||
7118 | EXPMAIN: | |
7119 | #--Step 2. | |
7120 | #--This is the normal branch: 2^(-65) <= |X| < 16380 log2. | |
7121 | fmov.x (%a0),%fp0 # load input from (a0) | |
7122 | ||
7123 | fmov.x %fp0,%fp1 | |
7124 | fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X | |
7125 | fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} | |
7126 | mov.l &0,ADJFLAG(%a6) | |
7127 | fmov.l %fp0,%d1 # N = int( X * 64/log2 ) | |
7128 | lea EEXPTBL(%pc),%a1 | |
7129 | fmov.l %d1,%fp0 # convert to floating-format | |
7130 | ||
7131 | mov.l %d1,L_SCR1(%a6) # save N temporarily | |
7132 | and.l &0x3F,%d1 # D0 is J = N mod 64 | |
7133 | lsl.l &4,%d1 | |
7134 | add.l %d1,%a1 # address of 2^(J/64) | |
7135 | mov.l L_SCR1(%a6),%d1 | |
7136 | asr.l &6,%d1 # D0 is M | |
7137 | add.w &0x3FFF,%d1 # biased expo. of 2^(M) | |
7138 | mov.w L2(%pc),L_SCR1(%a6) # prefetch L2, no need in CB | |
7139 | ||
7140 | EXPCONT1: | |
7141 | #--Step 3. | |
7142 | #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, | |
7143 | #--a0 points to 2^(J/64), D0 is biased expo. of 2^(M) | |
7144 | fmov.x %fp0,%fp2 | |
7145 | fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) | |
7146 | fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 | |
7147 | fadd.x %fp1,%fp0 # X + N*L1 | |
7148 | fadd.x %fp2,%fp0 # fp0 is R, reduced arg. | |
7149 | ||
7150 | #--Step 4. | |
7151 | #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL | |
7152 | #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5)))) | |
7153 | #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R | |
7154 | #--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))] | |
7155 | ||
7156 | fmov.x %fp0,%fp1 | |
7157 | fmul.x %fp1,%fp1 # fp1 IS S = R*R | |
7158 | ||
7159 | fmov.s &0x3AB60B70,%fp2 # fp2 IS A5 | |
7160 | ||
7161 | fmul.x %fp1,%fp2 # fp2 IS S*A5 | |
7162 | fmov.x %fp1,%fp3 | |
7163 | fmul.s &0x3C088895,%fp3 # fp3 IS S*A4 | |
7164 | ||
7165 | fadd.d EEXPA3(%pc),%fp2 # fp2 IS A3+S*A5 | |
7166 | fadd.d EEXPA2(%pc),%fp3 # fp3 IS A2+S*A4 | |
7167 | ||
7168 | fmul.x %fp1,%fp2 # fp2 IS S*(A3+S*A5) | |
7169 | mov.w %d1,SCALE(%a6) # SCALE is 2^(M) in extended | |
7170 | mov.l &0x80000000,SCALE+4(%a6) | |
7171 | clr.l SCALE+8(%a6) | |
7172 | ||
7173 | fmul.x %fp1,%fp3 # fp3 IS S*(A2+S*A4) | |
7174 | ||
7175 | fadd.s &0x3F000000,%fp2 # fp2 IS A1+S*(A3+S*A5) | |
7176 | fmul.x %fp0,%fp3 # fp3 IS R*S*(A2+S*A4) | |
7177 | ||
7178 | fmul.x %fp1,%fp2 # fp2 IS S*(A1+S*(A3+S*A5)) | |
7179 | fadd.x %fp3,%fp0 # fp0 IS R+R*S*(A2+S*A4), | |
7180 | ||
7181 | fmov.x (%a1)+,%fp1 # fp1 is lead. pt. of 2^(J/64) | |
7182 | fadd.x %fp2,%fp0 # fp0 is EXP(R) - 1 | |
7183 | ||
7184 | #--Step 5 | |
7185 | #--final reconstruction process | |
7186 | #--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) ) | |
7187 | ||
7188 | fmul.x %fp1,%fp0 # 2^(J/64)*(Exp(R)-1) | |
7189 | fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} | |
7190 | fadd.s (%a1),%fp0 # accurate 2^(J/64) | |
7191 | ||
7192 | fadd.x %fp1,%fp0 # 2^(J/64) + 2^(J/64)*... | |
7193 | mov.l ADJFLAG(%a6),%d1 | |
7194 | ||
7195 | #--Step 6 | |
7196 | tst.l %d1 | |
7197 | beq.b NORMAL | |
7198 | ADJUST: | |
7199 | fmul.x ADJSCALE(%a6),%fp0 | |
7200 | NORMAL: | |
7201 | fmov.l %d0,%fpcr # restore user FPCR | |
7202 | mov.b &FMUL_OP,%d1 # last inst is MUL | |
7203 | fmul.x SCALE(%a6),%fp0 # multiply 2^(M) | |
7204 | bra t_catch | |
7205 | ||
7206 | EXPSM: | |
7207 | #--Step 7 | |
7208 | fmovm.x (%a0),&0x80 # load X | |
7209 | fmov.l %d0,%fpcr | |
7210 | fadd.s &0x3F800000,%fp0 # 1+X in user mode | |
7211 | bra t_pinx2 | |
7212 | ||
7213 | EEXPBIG: | |
7214 | #--Step 8 | |
7215 | cmp.l %d1,&0x400CB27C # 16480 log2 | |
7216 | bgt.b EXP2BIG | |
7217 | #--Steps 8.2 -- 8.6 | |
7218 | fmov.x (%a0),%fp0 # load input from (a0) | |
7219 | ||
7220 | fmov.x %fp0,%fp1 | |
7221 | fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X | |
7222 | fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} | |
7223 | mov.l &1,ADJFLAG(%a6) | |
7224 | fmov.l %fp0,%d1 # N = int( X * 64/log2 ) | |
7225 | lea EEXPTBL(%pc),%a1 | |
7226 | fmov.l %d1,%fp0 # convert to floating-format | |
7227 | mov.l %d1,L_SCR1(%a6) # save N temporarily | |
7228 | and.l &0x3F,%d1 # D0 is J = N mod 64 | |
7229 | lsl.l &4,%d1 | |
7230 | add.l %d1,%a1 # address of 2^(J/64) | |
7231 | mov.l L_SCR1(%a6),%d1 | |
7232 | asr.l &6,%d1 # D0 is K | |
7233 | mov.l %d1,L_SCR1(%a6) # save K temporarily | |
7234 | asr.l &1,%d1 # D0 is M1 | |
7235 | sub.l %d1,L_SCR1(%a6) # a1 is M | |
7236 | add.w &0x3FFF,%d1 # biased expo. of 2^(M1) | |
7237 | mov.w %d1,ADJSCALE(%a6) # ADJSCALE := 2^(M1) | |
7238 | mov.l &0x80000000,ADJSCALE+4(%a6) | |
7239 | clr.l ADJSCALE+8(%a6) | |
7240 | mov.l L_SCR1(%a6),%d1 # D0 is M | |
7241 | add.w &0x3FFF,%d1 # biased expo. of 2^(M) | |
7242 | bra.w EXPCONT1 # go back to Step 3 | |
7243 | ||
7244 | EXP2BIG: | |
7245 | #--Step 9 | |
7246 | tst.b (%a0) # is X positive or negative? | |
7247 | bmi t_unfl2 | |
7248 | bra t_ovfl2 | |
7249 | ||
7250 | global setoxd | |
7251 | setoxd: | |
7252 | #--entry point for EXP(X), X is denormalized | |
7253 | mov.l (%a0),-(%sp) | |
7254 | andi.l &0x80000000,(%sp) | |
7255 | ori.l &0x00800000,(%sp) # sign(X)*2^(-126) | |
7256 | ||
7257 | fmov.s &0x3F800000,%fp0 | |
7258 | ||
7259 | fmov.l %d0,%fpcr | |
7260 | fadd.s (%sp)+,%fp0 | |
7261 | bra t_pinx2 | |
7262 | ||
7263 | global setoxm1 | |
7264 | setoxm1: | |
7265 | #--entry point for EXPM1(X), here X is finite, non-zero, non-NaN | |
7266 | ||
7267 | #--Step 1. | |
7268 | #--Step 1.1 | |
7269 | mov.l (%a0),%d1 # load part of input X | |
7270 | and.l &0x7FFF0000,%d1 # biased expo. of X | |
7271 | cmp.l %d1,&0x3FFD0000 # 1/4 | |
7272 | bge.b EM1CON1 # |X| >= 1/4 | |
7273 | bra EM1SM | |
7274 | ||
7275 | EM1CON1: | |
7276 | #--Step 1.3 | |
7277 | #--The case |X| >= 1/4 | |
7278 | mov.w 4(%a0),%d1 # expo. and partial sig. of |X| | |
7279 | cmp.l %d1,&0x4004C215 # 70log2 rounded up to 16 bits | |
7280 | ble.b EM1MAIN # 1/4 <= |X| <= 70log2 | |
7281 | bra EM1BIG | |
7282 | ||
7283 | EM1MAIN: | |
7284 | #--Step 2. | |
7285 | #--This is the case: 1/4 <= |X| <= 70 log2. | |
7286 | fmov.x (%a0),%fp0 # load input from (a0) | |
7287 | ||
7288 | fmov.x %fp0,%fp1 | |
7289 | fmul.s &0x42B8AA3B,%fp0 # 64/log2 * X | |
7290 | fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} | |
7291 | fmov.l %fp0,%d1 # N = int( X * 64/log2 ) | |
7292 | lea EEXPTBL(%pc),%a1 | |
7293 | fmov.l %d1,%fp0 # convert to floating-format | |
7294 | ||
7295 | mov.l %d1,L_SCR1(%a6) # save N temporarily | |
7296 | and.l &0x3F,%d1 # D0 is J = N mod 64 | |
7297 | lsl.l &4,%d1 | |
7298 | add.l %d1,%a1 # address of 2^(J/64) | |
7299 | mov.l L_SCR1(%a6),%d1 | |
7300 | asr.l &6,%d1 # D0 is M | |
7301 | mov.l %d1,L_SCR1(%a6) # save a copy of M | |
7302 | ||
7303 | #--Step 3. | |
7304 | #--fp1,fp2 saved on the stack. fp0 is N, fp1 is X, | |
7305 | #--a0 points to 2^(J/64), D0 and a1 both contain M | |
7306 | fmov.x %fp0,%fp2 | |
7307 | fmul.s &0xBC317218,%fp0 # N * L1, L1 = lead(-log2/64) | |
7308 | fmul.x L2(%pc),%fp2 # N * L2, L1+L2 = -log2/64 | |
7309 | fadd.x %fp1,%fp0 # X + N*L1 | |
7310 | fadd.x %fp2,%fp0 # fp0 is R, reduced arg. | |
7311 | add.w &0x3FFF,%d1 # D0 is biased expo. of 2^M | |
7312 | ||
7313 | #--Step 4. | |
7314 | #--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL | |
7315 | #-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6))))) | |
7316 | #--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R | |
7317 | #--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))] | |
7318 | ||
7319 | fmov.x %fp0,%fp1 | |
7320 | fmul.x %fp1,%fp1 # fp1 IS S = R*R | |
7321 | ||
7322 | fmov.s &0x3950097B,%fp2 # fp2 IS a6 | |
7323 | ||
7324 | fmul.x %fp1,%fp2 # fp2 IS S*A6 | |
7325 | fmov.x %fp1,%fp3 | |
7326 | fmul.s &0x3AB60B6A,%fp3 # fp3 IS S*A5 | |
7327 | ||
7328 | fadd.d EM1A4(%pc),%fp2 # fp2 IS A4+S*A6 | |
7329 | fadd.d EM1A3(%pc),%fp3 # fp3 IS A3+S*A5 | |
7330 | mov.w %d1,SC(%a6) # SC is 2^(M) in extended | |
7331 | mov.l &0x80000000,SC+4(%a6) | |
7332 | clr.l SC+8(%a6) | |
7333 | ||
7334 | fmul.x %fp1,%fp2 # fp2 IS S*(A4+S*A6) | |
7335 | mov.l L_SCR1(%a6),%d1 # D0 is M | |
7336 | neg.w %d1 # D0 is -M | |
7337 | fmul.x %fp1,%fp3 # fp3 IS S*(A3+S*A5) | |
7338 | add.w &0x3FFF,%d1 # biased expo. of 2^(-M) | |
7339 | fadd.d EM1A2(%pc),%fp2 # fp2 IS A2+S*(A4+S*A6) | |
7340 | fadd.s &0x3F000000,%fp3 # fp3 IS A1+S*(A3+S*A5) | |
7341 | ||
7342 | fmul.x %fp1,%fp2 # fp2 IS S*(A2+S*(A4+S*A6)) | |
7343 | or.w &0x8000,%d1 # signed/expo. of -2^(-M) | |
7344 | mov.w %d1,ONEBYSC(%a6) # OnebySc is -2^(-M) | |
7345 | mov.l &0x80000000,ONEBYSC+4(%a6) | |
7346 | clr.l ONEBYSC+8(%a6) | |
7347 | fmul.x %fp3,%fp1 # fp1 IS S*(A1+S*(A3+S*A5)) | |
7348 | ||
7349 | fmul.x %fp0,%fp2 # fp2 IS R*S*(A2+S*(A4+S*A6)) | |
7350 | fadd.x %fp1,%fp0 # fp0 IS R+S*(A1+S*(A3+S*A5)) | |
7351 | ||
7352 | fadd.x %fp2,%fp0 # fp0 IS EXP(R)-1 | |
7353 | ||
7354 | fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} | |
7355 | ||
7356 | #--Step 5 | |
7357 | #--Compute 2^(J/64)*p | |
7358 | ||
7359 | fmul.x (%a1),%fp0 # 2^(J/64)*(Exp(R)-1) | |
7360 | ||
7361 | #--Step 6 | |
7362 | #--Step 6.1 | |
7363 | mov.l L_SCR1(%a6),%d1 # retrieve M | |
7364 | cmp.l %d1,&63 | |
7365 | ble.b MLE63 | |
7366 | #--Step 6.2 M >= 64 | |
7367 | fmov.s 12(%a1),%fp1 # fp1 is t | |
7368 | fadd.x ONEBYSC(%a6),%fp1 # fp1 is t+OnebySc | |
7369 | fadd.x %fp1,%fp0 # p+(t+OnebySc), fp1 released | |
7370 | fadd.x (%a1),%fp0 # T+(p+(t+OnebySc)) | |
7371 | bra EM1SCALE | |
7372 | MLE63: | |
7373 | #--Step 6.3 M <= 63 | |
7374 | cmp.l %d1,&-3 | |
7375 | bge.b MGEN3 | |
7376 | MLTN3: | |
7377 | #--Step 6.4 M <= -4 | |
7378 | fadd.s 12(%a1),%fp0 # p+t | |
7379 | fadd.x (%a1),%fp0 # T+(p+t) | |
7380 | fadd.x ONEBYSC(%a6),%fp0 # OnebySc + (T+(p+t)) | |
7381 | bra EM1SCALE | |
7382 | MGEN3: | |
7383 | #--Step 6.5 -3 <= M <= 63 | |
7384 | fmov.x (%a1)+,%fp1 # fp1 is T | |
7385 | fadd.s (%a1),%fp0 # fp0 is p+t | |
7386 | fadd.x ONEBYSC(%a6),%fp1 # fp1 is T+OnebySc | |
7387 | fadd.x %fp1,%fp0 # (T+OnebySc)+(p+t) | |
7388 | ||
7389 | EM1SCALE: | |
7390 | #--Step 6.6 | |
7391 | fmov.l %d0,%fpcr | |
7392 | fmul.x SC(%a6),%fp0 | |
7393 | bra t_inx2 | |
7394 | ||
7395 | EM1SM: | |
7396 | #--Step 7 |X| < 1/4. | |
7397 | cmp.l %d1,&0x3FBE0000 # 2^(-65) | |
7398 | bge.b EM1POLY | |
7399 | ||
7400 | EM1TINY: | |
7401 | #--Step 8 |X| < 2^(-65) | |
7402 | cmp.l %d1,&0x00330000 # 2^(-16312) | |
7403 | blt.b EM12TINY | |
7404 | #--Step 8.2 | |
7405 | mov.l &0x80010000,SC(%a6) # SC is -2^(-16382) | |
7406 | mov.l &0x80000000,SC+4(%a6) | |
7407 | clr.l SC+8(%a6) | |
7408 | fmov.x (%a0),%fp0 | |
7409 | fmov.l %d0,%fpcr | |
7410 | mov.b &FADD_OP,%d1 # last inst is ADD | |
7411 | fadd.x SC(%a6),%fp0 | |
7412 | bra t_catch | |
7413 | ||
7414 | EM12TINY: | |
7415 | #--Step 8.3 | |
7416 | fmov.x (%a0),%fp0 | |
7417 | fmul.d TWO140(%pc),%fp0 | |
7418 | mov.l &0x80010000,SC(%a6) | |
7419 | mov.l &0x80000000,SC+4(%a6) | |
7420 | clr.l SC+8(%a6) | |
7421 | fadd.x SC(%a6),%fp0 | |
7422 | fmov.l %d0,%fpcr | |
7423 | mov.b &FMUL_OP,%d1 # last inst is MUL | |
7424 | fmul.d TWON140(%pc),%fp0 | |
7425 | bra t_catch | |
7426 | ||
7427 | EM1POLY: | |
7428 | #--Step 9 exp(X)-1 by a simple polynomial | |
7429 | fmov.x (%a0),%fp0 # fp0 is X | |
7430 | fmul.x %fp0,%fp0 # fp0 is S := X*X | |
7431 | fmovm.x &0xc,-(%sp) # save fp2 {%fp2/%fp3} | |
7432 | fmov.s &0x2F30CAA8,%fp1 # fp1 is B12 | |
7433 | fmul.x %fp0,%fp1 # fp1 is S*B12 | |
7434 | fmov.s &0x310F8290,%fp2 # fp2 is B11 | |
7435 | fadd.s &0x32D73220,%fp1 # fp1 is B10+S*B12 | |
7436 | ||
7437 | fmul.x %fp0,%fp2 # fp2 is S*B11 | |
7438 | fmul.x %fp0,%fp1 # fp1 is S*(B10 + ... | |
7439 | ||
7440 | fadd.s &0x3493F281,%fp2 # fp2 is B9+S*... | |
7441 | fadd.d EM1B8(%pc),%fp1 # fp1 is B8+S*... | |
7442 | ||
7443 | fmul.x %fp0,%fp2 # fp2 is S*(B9+... | |
7444 | fmul.x %fp0,%fp1 # fp1 is S*(B8+... | |
7445 | ||
7446 | fadd.d EM1B7(%pc),%fp2 # fp2 is B7+S*... | |
7447 | fadd.d EM1B6(%pc),%fp1 # fp1 is B6+S*... | |
7448 | ||
7449 | fmul.x %fp0,%fp2 # fp2 is S*(B7+... | |
7450 | fmul.x %fp0,%fp1 # fp1 is S*(B6+... | |
7451 | ||
7452 | fadd.d EM1B5(%pc),%fp2 # fp2 is B5+S*... | |
7453 | fadd.d EM1B4(%pc),%fp1 # fp1 is B4+S*... | |
7454 | ||
7455 | fmul.x %fp0,%fp2 # fp2 is S*(B5+... | |
7456 | fmul.x %fp0,%fp1 # fp1 is S*(B4+... | |
7457 | ||
7458 | fadd.d EM1B3(%pc),%fp2 # fp2 is B3+S*... | |
7459 | fadd.x EM1B2(%pc),%fp1 # fp1 is B2+S*... | |
7460 | ||
7461 | fmul.x %fp0,%fp2 # fp2 is S*(B3+... | |
7462 | fmul.x %fp0,%fp1 # fp1 is S*(B2+... | |
7463 | ||
7464 | fmul.x %fp0,%fp2 # fp2 is S*S*(B3+...) | |
7465 | fmul.x (%a0),%fp1 # fp1 is X*S*(B2... | |
7466 | ||
7467 | fmul.s &0x3F000000,%fp0 # fp0 is S*B1 | |
7468 | fadd.x %fp2,%fp1 # fp1 is Q | |
7469 | ||
7470 | fmovm.x (%sp)+,&0x30 # fp2 restored {%fp2/%fp3} | |
7471 | ||
7472 | fadd.x %fp1,%fp0 # fp0 is S*B1+Q | |
7473 | ||
7474 | fmov.l %d0,%fpcr | |
7475 | fadd.x (%a0),%fp0 | |
7476 | bra t_inx2 | |
7477 | ||
7478 | EM1BIG: | |
7479 | #--Step 10 |X| > 70 log2 | |
7480 | mov.l (%a0),%d1 | |
7481 | cmp.l %d1,&0 | |
7482 | bgt.w EXPC1 | |
7483 | #--Step 10.2 | |
7484 | fmov.s &0xBF800000,%fp0 # fp0 is -1 | |
7485 | fmov.l %d0,%fpcr | |
7486 | fadd.s &0x00800000,%fp0 # -1 + 2^(-126) | |
7487 | bra t_minx2 | |
7488 | ||
7489 | global setoxm1d | |
7490 | setoxm1d: | |
7491 | #--entry point for EXPM1(X), here X is denormalized | |
7492 | #--Step 0. | |
7493 | bra t_extdnrm | |
7494 | ||
7495 | ######################################################################### | |
7496 | # sgetexp(): returns the exponent portion of the input argument. # | |
7497 | # The exponent bias is removed and the exponent value is # | |
7498 | # returned as an extended precision number in fp0. # | |
7499 | # sgetexpd(): handles denormalized numbers. # | |
7500 | # # | |
7501 | # sgetman(): extracts the mantissa of the input argument. The # | |
7502 | # mantissa is converted to an extended precision number w/ # | |
7503 | # an exponent of $3fff and is returned in fp0. The range of # | |
7504 | # the result is [1.0 - 2.0). # | |
7505 | # sgetmand(): handles denormalized numbers. # | |
7506 | # # | |
7507 | # INPUT *************************************************************** # | |
7508 | # a0 = pointer to extended precision input # | |
7509 | # # | |
7510 | # OUTPUT ************************************************************** # | |
7511 | # fp0 = exponent(X) or mantissa(X) # | |
7512 | # # | |
7513 | ######################################################################### | |
7514 | ||
7515 | global sgetexp | |
7516 | sgetexp: | |
7517 | mov.w SRC_EX(%a0),%d0 # get the exponent | |
7518 | bclr &0xf,%d0 # clear the sign bit | |
7519 | subi.w &0x3fff,%d0 # subtract off the bias | |
7520 | fmov.w %d0,%fp0 # return exp in fp0 | |
7521 | blt.b sgetexpn # it's negative | |
7522 | rts | |
7523 | ||
7524 | sgetexpn: | |
7525 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit | |
7526 | rts | |
7527 | ||
7528 | global sgetexpd | |
7529 | sgetexpd: | |
7530 | bsr.l norm # normalize | |
7531 | neg.w %d0 # new exp = -(shft amt) | |
7532 | subi.w &0x3fff,%d0 # subtract off the bias | |
7533 | fmov.w %d0,%fp0 # return exp in fp0 | |
7534 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit | |
7535 | rts | |
7536 | ||
7537 | global sgetman | |
7538 | sgetman: | |
7539 | mov.w SRC_EX(%a0),%d0 # get the exp | |
7540 | ori.w &0x7fff,%d0 # clear old exp | |
7541 | bclr &0xe,%d0 # make it the new exp +-3fff | |
7542 | ||
7543 | # here, we build the result in a tmp location so as not to disturb the input | |
7544 | mov.l SRC_HI(%a0),FP_SCR0_HI(%a6) # copy to tmp loc | |
7545 | mov.l SRC_LO(%a0),FP_SCR0_LO(%a6) # copy to tmp loc | |
7546 | mov.w %d0,FP_SCR0_EX(%a6) # insert new exponent | |
7547 | fmov.x FP_SCR0(%a6),%fp0 # put new value back in fp0 | |
7548 | bmi.b sgetmann # it's negative | |
7549 | rts | |
7550 | ||
7551 | sgetmann: | |
7552 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit | |
7553 | rts | |
7554 | ||
7555 | # | |
7556 | # For denormalized numbers, shift the mantissa until the j-bit = 1, | |
7557 | # then load the exponent with +/1 $3fff. | |
7558 | # | |
7559 | global sgetmand | |
7560 | sgetmand: | |
7561 | bsr.l norm # normalize exponent | |
7562 | bra.b sgetman | |
7563 | ||
7564 | ######################################################################### | |
7565 | # scosh(): computes the hyperbolic cosine of a normalized input # | |
7566 | # scoshd(): computes the hyperbolic cosine of a denormalized input # | |
7567 | # # | |
7568 | # INPUT *************************************************************** # | |
7569 | # a0 = pointer to extended precision input # | |
7570 | # d0 = round precision,mode # | |
7571 | # # | |
7572 | # OUTPUT ************************************************************** # | |
7573 | # fp0 = cosh(X) # | |
7574 | # # | |
7575 | # ACCURACY and MONOTONICITY ******************************************* # | |
7576 | # The returned result is within 3 ulps in 64 significant bit, # | |
7577 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
7578 | # rounded to double precision. The result is provably monotonic # | |
7579 | # in double precision. # | |
7580 | # # | |
7581 | # ALGORITHM *********************************************************** # | |
7582 | # # | |
7583 | # COSH # | |
7584 | # 1. If |X| > 16380 log2, go to 3. # | |
7585 | # # | |
7586 | # 2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae # | |
7587 | # y = |X|, z = exp(Y), and # | |
7588 | # cosh(X) = (1/2)*( z + 1/z ). # | |
7589 | # Exit. # | |
7590 | # # | |
7591 | # 3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5. # | |
7592 | # # | |
7593 | # 4. (16380 log2 < |X| <= 16480 log2) # | |
7594 | # cosh(X) = sign(X) * exp(|X|)/2. # | |
7595 | # However, invoking exp(|X|) may cause premature # | |
7596 | # overflow. Thus, we calculate sinh(X) as follows: # | |
7597 | # Y := |X| # | |
7598 | # Fact := 2**(16380) # | |
7599 | # Y' := Y - 16381 log2 # | |
7600 | # cosh(X) := Fact * exp(Y'). # | |
7601 | # Exit. # | |
7602 | # # | |
7603 | # 5. (|X| > 16480 log2) sinh(X) must overflow. Return # | |
7604 | # Huge*Huge to generate overflow and an infinity with # | |
7605 | # the appropriate sign. Huge is the largest finite number # | |
7606 | # in extended format. Exit. # | |
7607 | # # | |
7608 | ######################################################################### | |
7609 | ||
7610 | TWO16380: | |
7611 | long 0x7FFB0000,0x80000000,0x00000000,0x00000000 | |
7612 | ||
7613 | global scosh | |
7614 | scosh: | |
7615 | fmov.x (%a0),%fp0 # LOAD INPUT | |
7616 | ||
7617 | mov.l (%a0),%d1 | |
7618 | mov.w 4(%a0),%d1 | |
7619 | and.l &0x7FFFFFFF,%d1 | |
7620 | cmp.l %d1,&0x400CB167 | |
7621 | bgt.b COSHBIG | |
7622 | ||
7623 | #--THIS IS THE USUAL CASE, |X| < 16380 LOG2 | |
7624 | #--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) ) | |
7625 | ||
7626 | fabs.x %fp0 # |X| | |
7627 | ||
7628 | mov.l %d0,-(%sp) | |
7629 | clr.l %d0 | |
7630 | fmovm.x &0x01,-(%sp) # save |X| to stack | |
7631 | lea (%sp),%a0 # pass ptr to |X| | |
7632 | bsr setox # FP0 IS EXP(|X|) | |
7633 | add.l &0xc,%sp # erase |X| from stack | |
7634 | fmul.s &0x3F000000,%fp0 # (1/2)EXP(|X|) | |
7635 | mov.l (%sp)+,%d0 | |
7636 | ||
7637 | fmov.s &0x3E800000,%fp1 # (1/4) | |
7638 | fdiv.x %fp0,%fp1 # 1/(2 EXP(|X|)) | |
7639 | ||
7640 | fmov.l %d0,%fpcr | |
7641 | mov.b &FADD_OP,%d1 # last inst is ADD | |
7642 | fadd.x %fp1,%fp0 | |
7643 | bra t_catch | |
7644 | ||
7645 | COSHBIG: | |
7646 | cmp.l %d1,&0x400CB2B3 | |
7647 | bgt.b COSHHUGE | |
7648 | ||
7649 | fabs.x %fp0 | |
7650 | fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) | |
7651 | fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE | |
7652 | ||
7653 | mov.l %d0,-(%sp) | |
7654 | clr.l %d0 | |
7655 | fmovm.x &0x01,-(%sp) # save fp0 to stack | |
7656 | lea (%sp),%a0 # pass ptr to fp0 | |
7657 | bsr setox | |
7658 | add.l &0xc,%sp # clear fp0 from stack | |
7659 | mov.l (%sp)+,%d0 | |
7660 | ||
7661 | fmov.l %d0,%fpcr | |
7662 | mov.b &FMUL_OP,%d1 # last inst is MUL | |
7663 | fmul.x TWO16380(%pc),%fp0 | |
7664 | bra t_catch | |
7665 | ||
7666 | COSHHUGE: | |
7667 | bra t_ovfl2 | |
7668 | ||
7669 | global scoshd | |
7670 | #--COSH(X) = 1 FOR DENORMALIZED X | |
7671 | scoshd: | |
7672 | fmov.s &0x3F800000,%fp0 | |
7673 | ||
7674 | fmov.l %d0,%fpcr | |
7675 | fadd.s &0x00800000,%fp0 | |
7676 | bra t_pinx2 | |
7677 | ||
7678 | ######################################################################### | |
7679 | # ssinh(): computes the hyperbolic sine of a normalized input # | |
7680 | # ssinhd(): computes the hyperbolic sine of a denormalized input # | |
7681 | # # | |
7682 | # INPUT *************************************************************** # | |
7683 | # a0 = pointer to extended precision input # | |
7684 | # d0 = round precision,mode # | |
7685 | # # | |
7686 | # OUTPUT ************************************************************** # | |
7687 | # fp0 = sinh(X) # | |
7688 | # # | |
7689 | # ACCURACY and MONOTONICITY ******************************************* # | |
7690 | # The returned result is within 3 ulps in 64 significant bit, # | |
7691 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
7692 | # rounded to double precision. The result is provably monotonic # | |
7693 | # in double precision. # | |
7694 | # # | |
7695 | # ALGORITHM *********************************************************** # | |
7696 | # # | |
7697 | # SINH # | |
7698 | # 1. If |X| > 16380 log2, go to 3. # | |
7699 | # # | |
7700 | # 2. (|X| <= 16380 log2) Sinh(X) is obtained by the formula # | |
7701 | # y = |X|, sgn = sign(X), and z = expm1(Y), # | |
7702 | # sinh(X) = sgn*(1/2)*( z + z/(1+z) ). # | |
7703 | # Exit. # | |
7704 | # # | |
7705 | # 3. If |X| > 16480 log2, go to 5. # | |
7706 | # # | |
7707 | # 4. (16380 log2 < |X| <= 16480 log2) # | |
7708 | # sinh(X) = sign(X) * exp(|X|)/2. # | |
7709 | # However, invoking exp(|X|) may cause premature overflow. # | |
7710 | # Thus, we calculate sinh(X) as follows: # | |
7711 | # Y := |X| # | |
7712 | # sgn := sign(X) # | |
7713 | # sgnFact := sgn * 2**(16380) # | |
7714 | # Y' := Y - 16381 log2 # | |
7715 | # sinh(X) := sgnFact * exp(Y'). # | |
7716 | # Exit. # | |
7717 | # # | |
7718 | # 5. (|X| > 16480 log2) sinh(X) must overflow. Return # | |
7719 | # sign(X)*Huge*Huge to generate overflow and an infinity with # | |
7720 | # the appropriate sign. Huge is the largest finite number in # | |
7721 | # extended format. Exit. # | |
7722 | # # | |
7723 | ######################################################################### | |
7724 | ||
7725 | global ssinh | |
7726 | ssinh: | |
7727 | fmov.x (%a0),%fp0 # LOAD INPUT | |
7728 | ||
7729 | mov.l (%a0),%d1 | |
7730 | mov.w 4(%a0),%d1 | |
7731 | mov.l %d1,%a1 # save (compacted) operand | |
7732 | and.l &0x7FFFFFFF,%d1 | |
7733 | cmp.l %d1,&0x400CB167 | |
7734 | bgt.b SINHBIG | |
7735 | ||
7736 | #--THIS IS THE USUAL CASE, |X| < 16380 LOG2 | |
7737 | #--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) ) | |
7738 | ||
7739 | fabs.x %fp0 # Y = |X| | |
7740 | ||
7741 | movm.l &0x8040,-(%sp) # {a1/d0} | |
7742 | fmovm.x &0x01,-(%sp) # save Y on stack | |
7743 | lea (%sp),%a0 # pass ptr to Y | |
7744 | clr.l %d0 | |
7745 | bsr setoxm1 # FP0 IS Z = EXPM1(Y) | |
7746 | add.l &0xc,%sp # clear Y from stack | |
7747 | fmov.l &0,%fpcr | |
7748 | movm.l (%sp)+,&0x0201 # {a1/d0} | |
7749 | ||
7750 | fmov.x %fp0,%fp1 | |
7751 | fadd.s &0x3F800000,%fp1 # 1+Z | |
7752 | fmov.x %fp0,-(%sp) | |
7753 | fdiv.x %fp1,%fp0 # Z/(1+Z) | |
7754 | mov.l %a1,%d1 | |
7755 | and.l &0x80000000,%d1 | |
7756 | or.l &0x3F000000,%d1 | |
7757 | fadd.x (%sp)+,%fp0 | |
7758 | mov.l %d1,-(%sp) | |
7759 | ||
7760 | fmov.l %d0,%fpcr | |
7761 | mov.b &FMUL_OP,%d1 # last inst is MUL | |
7762 | fmul.s (%sp)+,%fp0 # last fp inst - possible exceptions set | |
7763 | bra t_catch | |
7764 | ||
7765 | SINHBIG: | |
7766 | cmp.l %d1,&0x400CB2B3 | |
7767 | bgt t_ovfl | |
7768 | fabs.x %fp0 | |
7769 | fsub.d T1(%pc),%fp0 # (|X|-16381LOG2_LEAD) | |
7770 | mov.l &0,-(%sp) | |
7771 | mov.l &0x80000000,-(%sp) | |
7772 | mov.l %a1,%d1 | |
7773 | and.l &0x80000000,%d1 | |
7774 | or.l &0x7FFB0000,%d1 | |
7775 | mov.l %d1,-(%sp) # EXTENDED FMT | |
7776 | fsub.d T2(%pc),%fp0 # |X| - 16381 LOG2, ACCURATE | |
7777 | ||
7778 | mov.l %d0,-(%sp) | |
7779 | clr.l %d0 | |
7780 | fmovm.x &0x01,-(%sp) # save fp0 on stack | |
7781 | lea (%sp),%a0 # pass ptr to fp0 | |
7782 | bsr setox | |
7783 | add.l &0xc,%sp # clear fp0 from stack | |
7784 | ||
7785 | mov.l (%sp)+,%d0 | |
7786 | fmov.l %d0,%fpcr | |
7787 | mov.b &FMUL_OP,%d1 # last inst is MUL | |
7788 | fmul.x (%sp)+,%fp0 # possible exception | |
7789 | bra t_catch | |
7790 | ||
7791 | global ssinhd | |
7792 | #--SINH(X) = X FOR DENORMALIZED X | |
7793 | ssinhd: | |
7794 | bra t_extdnrm | |
7795 | ||
7796 | ######################################################################### | |
7797 | # stanh(): computes the hyperbolic tangent of a normalized input # | |
7798 | # stanhd(): computes the hyperbolic tangent of a denormalized input # | |
7799 | # # | |
7800 | # INPUT *************************************************************** # | |
7801 | # a0 = pointer to extended precision input # | |
7802 | # d0 = round precision,mode # | |
7803 | # # | |
7804 | # OUTPUT ************************************************************** # | |
7805 | # fp0 = tanh(X) # | |
7806 | # # | |
7807 | # ACCURACY and MONOTONICITY ******************************************* # | |
7808 | # The returned result is within 3 ulps in 64 significant bit, # | |
7809 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
7810 | # rounded to double precision. The result is provably monotonic # | |
7811 | # in double precision. # | |
7812 | # # | |
7813 | # ALGORITHM *********************************************************** # | |
7814 | # # | |
7815 | # TANH # | |
7816 | # 1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3. # | |
7817 | # # | |
7818 | # 2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by # | |
7819 | # sgn := sign(X), y := 2|X|, z := expm1(Y), and # | |
7820 | # tanh(X) = sgn*( z/(2+z) ). # | |
7821 | # Exit. # | |
7822 | # # | |
7823 | # 3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1, # | |
7824 | # go to 7. # | |
7825 | # # | |
7826 | # 4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6. # | |
7827 | # # | |
7828 | # 5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by # | |
7829 | # sgn := sign(X), y := 2|X|, z := exp(Y), # | |
7830 | # tanh(X) = sgn - [ sgn*2/(1+z) ]. # | |
7831 | # Exit. # | |
7832 | # # | |
7833 | # 6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we # | |
7834 | # calculate Tanh(X) by # | |
7835 | # sgn := sign(X), Tiny := 2**(-126), # | |
7836 | # tanh(X) := sgn - sgn*Tiny. # | |
7837 | # Exit. # | |
7838 | # # | |
7839 | # 7. (|X| < 2**(-40)). Tanh(X) = X. Exit. # | |
7840 | # # | |
7841 | ######################################################################### | |
7842 | ||
7843 | set X,FP_SCR0 | |
7844 | set XFRAC,X+4 | |
7845 | ||
7846 | set SGN,L_SCR3 | |
7847 | ||
7848 | set V,FP_SCR0 | |
7849 | ||
7850 | global stanh | |
7851 | stanh: | |
7852 | fmov.x (%a0),%fp0 # LOAD INPUT | |
7853 | ||
7854 | fmov.x %fp0,X(%a6) | |
7855 | mov.l (%a0),%d1 | |
7856 | mov.w 4(%a0),%d1 | |
7857 | mov.l %d1,X(%a6) | |
7858 | and.l &0x7FFFFFFF,%d1 | |
7859 | cmp.l %d1, &0x3fd78000 # is |X| < 2^(-40)? | |
7860 | blt.w TANHBORS # yes | |
7861 | cmp.l %d1, &0x3fffddce # is |X| > (5/2)LOG2? | |
7862 | bgt.w TANHBORS # yes | |
7863 | ||
7864 | #--THIS IS THE USUAL CASE | |
7865 | #--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2). | |
7866 | ||
7867 | mov.l X(%a6),%d1 | |
7868 | mov.l %d1,SGN(%a6) | |
7869 | and.l &0x7FFF0000,%d1 | |
7870 | add.l &0x00010000,%d1 # EXPONENT OF 2|X| | |
7871 | mov.l %d1,X(%a6) | |
7872 | and.l &0x80000000,SGN(%a6) | |
7873 | fmov.x X(%a6),%fp0 # FP0 IS Y = 2|X| | |
7874 | ||
7875 | mov.l %d0,-(%sp) | |
7876 | clr.l %d0 | |
7877 | fmovm.x &0x1,-(%sp) # save Y on stack | |
7878 | lea (%sp),%a0 # pass ptr to Y | |
7879 | bsr setoxm1 # FP0 IS Z = EXPM1(Y) | |
7880 | add.l &0xc,%sp # clear Y from stack | |
7881 | mov.l (%sp)+,%d0 | |
7882 | ||
7883 | fmov.x %fp0,%fp1 | |
7884 | fadd.s &0x40000000,%fp1 # Z+2 | |
7885 | mov.l SGN(%a6),%d1 | |
7886 | fmov.x %fp1,V(%a6) | |
7887 | eor.l %d1,V(%a6) | |
7888 | ||
7889 | fmov.l %d0,%fpcr # restore users round prec,mode | |
7890 | fdiv.x V(%a6),%fp0 | |
7891 | bra t_inx2 | |
7892 | ||
7893 | TANHBORS: | |
7894 | cmp.l %d1,&0x3FFF8000 | |
7895 | blt.w TANHSM | |
7896 | ||
7897 | cmp.l %d1,&0x40048AA1 | |
7898 | bgt.w TANHHUGE | |
7899 | ||
7900 | #-- (5/2) LOG2 < |X| < 50 LOG2, | |
7901 | #--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X), | |
7902 | #--TANH(X) = SGN - SGN*2/[EXP(Y)+1]. | |
7903 | ||
7904 | mov.l X(%a6),%d1 | |
7905 | mov.l %d1,SGN(%a6) | |
7906 | and.l &0x7FFF0000,%d1 | |
7907 | add.l &0x00010000,%d1 # EXPO OF 2|X| | |
7908 | mov.l %d1,X(%a6) # Y = 2|X| | |
7909 | and.l &0x80000000,SGN(%a6) | |
7910 | mov.l SGN(%a6),%d1 | |
7911 | fmov.x X(%a6),%fp0 # Y = 2|X| | |
7912 | ||
7913 | mov.l %d0,-(%sp) | |
7914 | clr.l %d0 | |
7915 | fmovm.x &0x01,-(%sp) # save Y on stack | |
7916 | lea (%sp),%a0 # pass ptr to Y | |
7917 | bsr setox # FP0 IS EXP(Y) | |
7918 | add.l &0xc,%sp # clear Y from stack | |
7919 | mov.l (%sp)+,%d0 | |
7920 | mov.l SGN(%a6),%d1 | |
7921 | fadd.s &0x3F800000,%fp0 # EXP(Y)+1 | |
7922 | ||
7923 | eor.l &0xC0000000,%d1 # -SIGN(X)*2 | |
7924 | fmov.s %d1,%fp1 # -SIGN(X)*2 IN SGL FMT | |
7925 | fdiv.x %fp0,%fp1 # -SIGN(X)2 / [EXP(Y)+1 ] | |
7926 | ||
7927 | mov.l SGN(%a6),%d1 | |
7928 | or.l &0x3F800000,%d1 # SGN | |
7929 | fmov.s %d1,%fp0 # SGN IN SGL FMT | |
7930 | ||
7931 | fmov.l %d0,%fpcr # restore users round prec,mode | |
7932 | mov.b &FADD_OP,%d1 # last inst is ADD | |
7933 | fadd.x %fp1,%fp0 | |
7934 | bra t_inx2 | |
7935 | ||
7936 | TANHSM: | |
7937 | fmov.l %d0,%fpcr # restore users round prec,mode | |
7938 | mov.b &FMOV_OP,%d1 # last inst is MOVE | |
7939 | fmov.x X(%a6),%fp0 # last inst - possible exception set | |
7940 | bra t_catch | |
7941 | ||
7942 | #---RETURN SGN(X) - SGN(X)EPS | |
7943 | TANHHUGE: | |
7944 | mov.l X(%a6),%d1 | |
7945 | and.l &0x80000000,%d1 | |
7946 | or.l &0x3F800000,%d1 | |
7947 | fmov.s %d1,%fp0 | |
7948 | and.l &0x80000000,%d1 | |
7949 | eor.l &0x80800000,%d1 # -SIGN(X)*EPS | |
7950 | ||
7951 | fmov.l %d0,%fpcr # restore users round prec,mode | |
7952 | fadd.s %d1,%fp0 | |
7953 | bra t_inx2 | |
7954 | ||
7955 | global stanhd | |
7956 | #--TANH(X) = X FOR DENORMALIZED X | |
7957 | stanhd: | |
7958 | bra t_extdnrm | |
7959 | ||
7960 | ######################################################################### | |
7961 | # slogn(): computes the natural logarithm of a normalized input # | |
7962 | # slognd(): computes the natural logarithm of a denormalized input # | |
7963 | # slognp1(): computes the log(1+X) of a normalized input # | |
7964 | # slognp1d(): computes the log(1+X) of a denormalized input # | |
7965 | # # | |
7966 | # INPUT *************************************************************** # | |
7967 | # a0 = pointer to extended precision input # | |
7968 | # d0 = round precision,mode # | |
7969 | # # | |
7970 | # OUTPUT ************************************************************** # | |
7971 | # fp0 = log(X) or log(1+X) # | |
7972 | # # | |
7973 | # ACCURACY and MONOTONICITY ******************************************* # | |
7974 | # The returned result is within 2 ulps in 64 significant bit, # | |
7975 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
7976 | # rounded to double precision. The result is provably monotonic # | |
7977 | # in double precision. # | |
7978 | # # | |
7979 | # ALGORITHM *********************************************************** # | |
7980 | # LOGN: # | |
7981 | # Step 1. If |X-1| < 1/16, approximate log(X) by an odd # | |
7982 | # polynomial in u, where u = 2(X-1)/(X+1). Otherwise, # | |
7983 | # move on to Step 2. # | |
7984 | # # | |
7985 | # Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first # | |
7986 | # seven significant bits of Y plus 2**(-7), i.e. # | |
7987 | # F = 1.xxxxxx1 in base 2 where the six "x" match those # | |
7988 | # of Y. Note that |Y-F| <= 2**(-7). # | |
7989 | # # | |
7990 | # Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a # | |
7991 | # polynomial in u, log(1+u) = poly. # | |
7992 | # # | |
7993 | # Step 4. Reconstruct # | |
7994 | # log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u) # | |
7995 | # by k*log(2) + (log(F) + poly). The values of log(F) are # | |
7996 | # calculated beforehand and stored in the program. # | |
7997 | # # | |
7998 | # lognp1: # | |
7999 | # Step 1: If |X| < 1/16, approximate log(1+X) by an odd # | |
8000 | # polynomial in u where u = 2X/(2+X). Otherwise, move on # | |
8001 | # to Step 2. # | |
8002 | # # | |
8003 | # Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done # | |
8004 | # in Step 2 of the algorithm for LOGN and compute # | |
8005 | # log(1+X) as k*log(2) + log(F) + poly where poly # | |
8006 | # approximates log(1+u), u = (Y-F)/F. # | |
8007 | # # | |
8008 | # Implementation Notes: # | |
8009 | # Note 1. There are 64 different possible values for F, thus 64 # | |
8010 | # log(F)'s need to be tabulated. Moreover, the values of # | |
8011 | # 1/F are also tabulated so that the division in (Y-F)/F # | |
8012 | # can be performed by a multiplication. # | |
8013 | # # | |
8014 | # Note 2. In Step 2 of lognp1, in order to preserved accuracy, # | |
8015 | # the value Y-F has to be calculated carefully when # | |
8016 | # 1/2 <= X < 3/2. # | |
8017 | # # | |
8018 | # Note 3. To fully exploit the pipeline, polynomials are usually # | |
8019 | # separated into two parts evaluated independently before # | |
8020 | # being added up. # | |
8021 | # # | |
8022 | ######################################################################### | |
8023 | LOGOF2: | |
8024 | long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 | |
8025 | ||
8026 | one: | |
8027 | long 0x3F800000 | |
8028 | zero: | |
8029 | long 0x00000000 | |
8030 | infty: | |
8031 | long 0x7F800000 | |
8032 | negone: | |
8033 | long 0xBF800000 | |
8034 | ||
8035 | LOGA6: | |
8036 | long 0x3FC2499A,0xB5E4040B | |
8037 | LOGA5: | |
8038 | long 0xBFC555B5,0x848CB7DB | |
8039 | ||
8040 | LOGA4: | |
8041 | long 0x3FC99999,0x987D8730 | |
8042 | LOGA3: | |
8043 | long 0xBFCFFFFF,0xFF6F7E97 | |
8044 | ||
8045 | LOGA2: | |
8046 | long 0x3FD55555,0x555555A4 | |
8047 | LOGA1: | |
8048 | long 0xBFE00000,0x00000008 | |
8049 | ||
8050 | LOGB5: | |
8051 | long 0x3F175496,0xADD7DAD6 | |
8052 | LOGB4: | |
8053 | long 0x3F3C71C2,0xFE80C7E0 | |
8054 | ||
8055 | LOGB3: | |
8056 | long 0x3F624924,0x928BCCFF | |
8057 | LOGB2: | |
8058 | long 0x3F899999,0x999995EC | |
8059 | ||
8060 | LOGB1: | |
8061 | long 0x3FB55555,0x55555555 | |
8062 | TWO: | |
8063 | long 0x40000000,0x00000000 | |
8064 | ||
8065 | LTHOLD: | |
8066 | long 0x3f990000,0x80000000,0x00000000,0x00000000 | |
8067 | ||
8068 | LOGTBL: | |
8069 | long 0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000 | |
8070 | long 0x3FF70000,0xFF015358,0x833C47E2,0x00000000 | |
8071 | long 0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000 | |
8072 | long 0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000 | |
8073 | long 0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000 | |
8074 | long 0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000 | |
8075 | long 0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000 | |
8076 | long 0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000 | |
8077 | long 0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000 | |
8078 | long 0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000 | |
8079 | long 0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000 | |
8080 | long 0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000 | |
8081 | long 0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000 | |
8082 | long 0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000 | |
8083 | long 0x3FFE0000,0xE525982A,0xF70C880E,0x00000000 | |
8084 | long 0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000 | |
8085 | long 0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000 | |
8086 | long 0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000 | |
8087 | long 0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000 | |
8088 | long 0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000 | |
8089 | long 0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000 | |
8090 | long 0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000 | |
8091 | long 0x3FFE0000,0xD901B203,0x6406C80E,0x00000000 | |
8092 | long 0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000 | |
8093 | long 0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000 | |
8094 | long 0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000 | |
8095 | long 0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000 | |
8096 | long 0x3FFC0000,0xC3FD0329,0x06488481,0x00000000 | |
8097 | long 0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000 | |
8098 | long 0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000 | |
8099 | long 0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000 | |
8100 | long 0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000 | |
8101 | long 0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000 | |
8102 | long 0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000 | |
8103 | long 0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000 | |
8104 | long 0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000 | |
8105 | long 0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000 | |
8106 | long 0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000 | |
8107 | long 0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000 | |
8108 | long 0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000 | |
8109 | long 0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000 | |
8110 | long 0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000 | |
8111 | long 0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000 | |
8112 | long 0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000 | |
8113 | long 0x3FFE0000,0xBD691047,0x07661AA3,0x00000000 | |
8114 | long 0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000 | |
8115 | long 0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000 | |
8116 | long 0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000 | |
8117 | long 0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000 | |
8118 | long 0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000 | |
8119 | long 0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000 | |
8120 | long 0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000 | |
8121 | long 0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000 | |
8122 | long 0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000 | |
8123 | long 0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000 | |
8124 | long 0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000 | |
8125 | long 0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000 | |
8126 | long 0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000 | |
8127 | long 0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000 | |
8128 | long 0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000 | |
8129 | long 0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000 | |
8130 | long 0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000 | |
8131 | long 0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000 | |
8132 | long 0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000 | |
8133 | long 0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000 | |
8134 | long 0x3FFD0000,0xD2420487,0x2DD85160,0x00000000 | |
8135 | long 0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000 | |
8136 | long 0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000 | |
8137 | long 0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000 | |
8138 | long 0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000 | |
8139 | long 0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000 | |
8140 | long 0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000 | |
8141 | long 0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000 | |
8142 | long 0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000 | |
8143 | long 0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000 | |
8144 | long 0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000 | |
8145 | long 0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000 | |
8146 | long 0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000 | |
8147 | long 0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000 | |
8148 | long 0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000 | |
8149 | long 0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000 | |
8150 | long 0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000 | |
8151 | long 0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000 | |
8152 | long 0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000 | |
8153 | long 0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000 | |
8154 | long 0x3FFE0000,0x825EFCED,0x49369330,0x00000000 | |
8155 | long 0x3FFE0000,0x9868C809,0x868C8098,0x00000000 | |
8156 | long 0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000 | |
8157 | long 0x3FFE0000,0x97012E02,0x5C04B809,0x00000000 | |
8158 | long 0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000 | |
8159 | long 0x3FFE0000,0x95A02568,0x095A0257,0x00000000 | |
8160 | long 0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000 | |
8161 | long 0x3FFE0000,0x94458094,0x45809446,0x00000000 | |
8162 | long 0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000 | |
8163 | long 0x3FFE0000,0x92F11384,0x0497889C,0x00000000 | |
8164 | long 0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000 | |
8165 | long 0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000 | |
8166 | long 0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000 | |
8167 | long 0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000 | |
8168 | long 0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000 | |
8169 | long 0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000 | |
8170 | long 0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000 | |
8171 | long 0x3FFE0000,0x8DDA5202,0x37694809,0x00000000 | |
8172 | long 0x3FFE0000,0x9723A1B7,0x20134203,0x00000000 | |
8173 | long 0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000 | |
8174 | long 0x3FFE0000,0x995899C8,0x90EB8990,0x00000000 | |
8175 | long 0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000 | |
8176 | long 0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000 | |
8177 | long 0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000 | |
8178 | long 0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000 | |
8179 | long 0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000 | |
8180 | long 0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000 | |
8181 | long 0x3FFE0000,0x87F78087,0xF78087F8,0x00000000 | |
8182 | long 0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000 | |
8183 | long 0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000 | |
8184 | long 0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000 | |
8185 | long 0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000 | |
8186 | long 0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000 | |
8187 | long 0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000 | |
8188 | long 0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000 | |
8189 | long 0x3FFE0000,0x83993052,0x3FBE3368,0x00000000 | |
8190 | long 0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000 | |
8191 | long 0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000 | |
8192 | long 0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000 | |
8193 | long 0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000 | |
8194 | long 0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000 | |
8195 | long 0x3FFE0000,0x80808080,0x80808081,0x00000000 | |
8196 | long 0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000 | |
8197 | ||
8198 | set ADJK,L_SCR1 | |
8199 | ||
8200 | set X,FP_SCR0 | |
8201 | set XDCARE,X+2 | |
8202 | set XFRAC,X+4 | |
8203 | ||
8204 | set F,FP_SCR1 | |
8205 | set FFRAC,F+4 | |
8206 | ||
8207 | set KLOG2,FP_SCR0 | |
8208 | ||
8209 | set SAVEU,FP_SCR0 | |
8210 | ||
8211 | global slogn | |
8212 | #--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S | |
8213 | slogn: | |
8214 | fmov.x (%a0),%fp0 # LOAD INPUT | |
8215 | mov.l &0x00000000,ADJK(%a6) | |
8216 | ||
8217 | LOGBGN: | |
8218 | #--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS | |
8219 | #--A FINITE, NON-ZERO, NORMALIZED NUMBER. | |
8220 | ||
8221 | mov.l (%a0),%d1 | |
8222 | mov.w 4(%a0),%d1 | |
8223 | ||
8224 | mov.l (%a0),X(%a6) | |
8225 | mov.l 4(%a0),X+4(%a6) | |
8226 | mov.l 8(%a0),X+8(%a6) | |
8227 | ||
8228 | cmp.l %d1,&0 # CHECK IF X IS NEGATIVE | |
8229 | blt.w LOGNEG # LOG OF NEGATIVE ARGUMENT IS INVALID | |
8230 | # X IS POSITIVE, CHECK IF X IS NEAR 1 | |
8231 | cmp.l %d1,&0x3ffef07d # IS X < 15/16? | |
8232 | blt.b LOGMAIN # YES | |
8233 | cmp.l %d1,&0x3fff8841 # IS X > 17/16? | |
8234 | ble.w LOGNEAR1 # NO | |
8235 | ||
8236 | LOGMAIN: | |
8237 | #--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1 | |
8238 | ||
8239 | #--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY. | |
8240 | #--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1. | |
8241 | #--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y) | |
8242 | #-- = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F). | |
8243 | #--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING | |
8244 | #--LOG(1+U) CAN BE VERY EFFICIENT. | |
8245 | #--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO | |
8246 | #--DIVISION IS NEEDED TO CALCULATE (Y-F)/F. | |
8247 | ||
8248 | #--GET K, Y, F, AND ADDRESS OF 1/F. | |
8249 | asr.l &8,%d1 | |
8250 | asr.l &8,%d1 # SHIFTED 16 BITS, BIASED EXPO. OF X | |
8251 | sub.l &0x3FFF,%d1 # THIS IS K | |
8252 | add.l ADJK(%a6),%d1 # ADJUST K, ORIGINAL INPUT MAY BE DENORM. | |
8253 | lea LOGTBL(%pc),%a0 # BASE ADDRESS OF 1/F AND LOG(F) | |
8254 | fmov.l %d1,%fp1 # CONVERT K TO FLOATING-POINT FORMAT | |
8255 | ||
8256 | #--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F | |
8257 | mov.l &0x3FFF0000,X(%a6) # X IS NOW Y, I.E. 2^(-K)*X | |
8258 | mov.l XFRAC(%a6),FFRAC(%a6) | |
8259 | and.l &0xFE000000,FFRAC(%a6) # FIRST 7 BITS OF Y | |
8260 | or.l &0x01000000,FFRAC(%a6) # GET F: ATTACH A 1 AT THE EIGHTH BIT | |
8261 | mov.l FFRAC(%a6),%d1 # READY TO GET ADDRESS OF 1/F | |
8262 | and.l &0x7E000000,%d1 | |
8263 | asr.l &8,%d1 | |
8264 | asr.l &8,%d1 | |
8265 | asr.l &4,%d1 # SHIFTED 20, D0 IS THE DISPLACEMENT | |
8266 | add.l %d1,%a0 # A0 IS THE ADDRESS FOR 1/F | |
8267 | ||
8268 | fmov.x X(%a6),%fp0 | |
8269 | mov.l &0x3fff0000,F(%a6) | |
8270 | clr.l F+8(%a6) | |
8271 | fsub.x F(%a6),%fp0 # Y-F | |
8272 | fmovm.x &0xc,-(%sp) # SAVE FP2-3 WHILE FP0 IS NOT READY | |
8273 | #--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K | |
8274 | #--REGISTERS SAVED: FPCR, FP1, FP2 | |
8275 | ||
8276 | LP1CONT1: | |
8277 | #--AN RE-ENTRY POINT FOR LOGNP1 | |
8278 | fmul.x (%a0),%fp0 # FP0 IS U = (Y-F)/F | |
8279 | fmul.x LOGOF2(%pc),%fp1 # GET K*LOG2 WHILE FP0 IS NOT READY | |
8280 | fmov.x %fp0,%fp2 | |
8281 | fmul.x %fp2,%fp2 # FP2 IS V=U*U | |
8282 | fmov.x %fp1,KLOG2(%a6) # PUT K*LOG2 IN MEMEORY, FREE FP1 | |
8283 | ||
8284 | #--LOG(1+U) IS APPROXIMATED BY | |
8285 | #--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS | |
8286 | #--[U + V*(A1+V*(A3+V*A5))] + [U*V*(A2+V*(A4+V*A6))] | |
8287 | ||
8288 | fmov.x %fp2,%fp3 | |
8289 | fmov.x %fp2,%fp1 | |
8290 | ||
8291 | fmul.d LOGA6(%pc),%fp1 # V*A6 | |
8292 | fmul.d LOGA5(%pc),%fp2 # V*A5 | |
8293 | ||
8294 | fadd.d LOGA4(%pc),%fp1 # A4+V*A6 | |
8295 | fadd.d LOGA3(%pc),%fp2 # A3+V*A5 | |
8296 | ||
8297 | fmul.x %fp3,%fp1 # V*(A4+V*A6) | |
8298 | fmul.x %fp3,%fp2 # V*(A3+V*A5) | |
8299 | ||
8300 | fadd.d LOGA2(%pc),%fp1 # A2+V*(A4+V*A6) | |
8301 | fadd.d LOGA1(%pc),%fp2 # A1+V*(A3+V*A5) | |
8302 | ||
8303 | fmul.x %fp3,%fp1 # V*(A2+V*(A4+V*A6)) | |
8304 | add.l &16,%a0 # ADDRESS OF LOG(F) | |
8305 | fmul.x %fp3,%fp2 # V*(A1+V*(A3+V*A5)) | |
8306 | ||
8307 | fmul.x %fp0,%fp1 # U*V*(A2+V*(A4+V*A6)) | |
8308 | fadd.x %fp2,%fp0 # U+V*(A1+V*(A3+V*A5)) | |
8309 | ||
8310 | fadd.x (%a0),%fp1 # LOG(F)+U*V*(A2+V*(A4+V*A6)) | |
8311 | fmovm.x (%sp)+,&0x30 # RESTORE FP2-3 | |
8312 | fadd.x %fp1,%fp0 # FP0 IS LOG(F) + LOG(1+U) | |
8313 | ||
8314 | fmov.l %d0,%fpcr | |
8315 | fadd.x KLOG2(%a6),%fp0 # FINAL ADD | |
8316 | bra t_inx2 | |
8317 | ||
8318 | ||
8319 | LOGNEAR1: | |
8320 | ||
8321 | # if the input is exactly equal to one, then exit through ld_pzero. | |
8322 | # if these 2 lines weren't here, the correct answer would be returned | |
8323 | # but the INEX2 bit would be set. | |
8324 | fcmp.b %fp0,&0x1 # is it equal to one? | |
8325 | fbeq.l ld_pzero # yes | |
8326 | ||
8327 | #--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT. | |
8328 | fmov.x %fp0,%fp1 | |
8329 | fsub.s one(%pc),%fp1 # FP1 IS X-1 | |
8330 | fadd.s one(%pc),%fp0 # FP0 IS X+1 | |
8331 | fadd.x %fp1,%fp1 # FP1 IS 2(X-1) | |
8332 | #--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL | |
8333 | #--IN U, U = 2(X-1)/(X+1) = FP1/FP0 | |
8334 | ||
8335 | LP1CONT2: | |
8336 | #--THIS IS AN RE-ENTRY POINT FOR LOGNP1 | |
8337 | fdiv.x %fp0,%fp1 # FP1 IS U | |
8338 | fmovm.x &0xc,-(%sp) # SAVE FP2-3 | |
8339 | #--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3 | |
8340 | #--LET V=U*U, W=V*V, CALCULATE | |
8341 | #--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY | |
8342 | #--U + U*V*( [B1 + W*(B3 + W*B5)] + [V*(B2 + W*B4)] ) | |
8343 | fmov.x %fp1,%fp0 | |
8344 | fmul.x %fp0,%fp0 # FP0 IS V | |
8345 | fmov.x %fp1,SAVEU(%a6) # STORE U IN MEMORY, FREE FP1 | |
8346 | fmov.x %fp0,%fp1 | |
8347 | fmul.x %fp1,%fp1 # FP1 IS W | |
8348 | ||
8349 | fmov.d LOGB5(%pc),%fp3 | |
8350 | fmov.d LOGB4(%pc),%fp2 | |
8351 | ||
8352 | fmul.x %fp1,%fp3 # W*B5 | |
8353 | fmul.x %fp1,%fp2 # W*B4 | |
8354 | ||
8355 | fadd.d LOGB3(%pc),%fp3 # B3+W*B5 | |
8356 | fadd.d LOGB2(%pc),%fp2 # B2+W*B4 | |
8357 | ||
8358 | fmul.x %fp3,%fp1 # W*(B3+W*B5), FP3 RELEASED | |
8359 | ||
8360 | fmul.x %fp0,%fp2 # V*(B2+W*B4) | |
8361 | ||
8362 | fadd.d LOGB1(%pc),%fp1 # B1+W*(B3+W*B5) | |
8363 | fmul.x SAVEU(%a6),%fp0 # FP0 IS U*V | |
8364 | ||
8365 | fadd.x %fp2,%fp1 # B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED | |
8366 | fmovm.x (%sp)+,&0x30 # FP2-3 RESTORED | |
8367 | ||
8368 | fmul.x %fp1,%fp0 # U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) | |
8369 | ||
8370 | fmov.l %d0,%fpcr | |
8371 | fadd.x SAVEU(%a6),%fp0 | |
8372 | bra t_inx2 | |
8373 | ||
8374 | #--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID | |
8375 | LOGNEG: | |
8376 | bra t_operr | |
8377 | ||
8378 | global slognd | |
8379 | slognd: | |
8380 | #--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT | |
8381 | ||
8382 | mov.l &-100,ADJK(%a6) # INPUT = 2^(ADJK) * FP0 | |
8383 | ||
8384 | #----normalize the input value by left shifting k bits (k to be determined | |
8385 | #----below), adjusting exponent and storing -k to ADJK | |
8386 | #----the value TWOTO100 is no longer needed. | |
8387 | #----Note that this code assumes the denormalized input is NON-ZERO. | |
8388 | ||
8389 | movm.l &0x3f00,-(%sp) # save some registers {d2-d7} | |
8390 | mov.l (%a0),%d3 # D3 is exponent of smallest norm. # | |
8391 | mov.l 4(%a0),%d4 | |
8392 | mov.l 8(%a0),%d5 # (D4,D5) is (Hi_X,Lo_X) | |
8393 | clr.l %d2 # D2 used for holding K | |
8394 | ||
8395 | tst.l %d4 | |
8396 | bne.b Hi_not0 | |
8397 | ||
8398 | Hi_0: | |
8399 | mov.l %d5,%d4 | |
8400 | clr.l %d5 | |
8401 | mov.l &32,%d2 | |
8402 | clr.l %d6 | |
8403 | bfffo %d4{&0:&32},%d6 | |
8404 | lsl.l %d6,%d4 | |
8405 | add.l %d6,%d2 # (D3,D4,D5) is normalized | |
8406 | ||
8407 | mov.l %d3,X(%a6) | |
8408 | mov.l %d4,XFRAC(%a6) | |
8409 | mov.l %d5,XFRAC+4(%a6) | |
8410 | neg.l %d2 | |
8411 | mov.l %d2,ADJK(%a6) | |
8412 | fmov.x X(%a6),%fp0 | |
8413 | movm.l (%sp)+,&0xfc # restore registers {d2-d7} | |
8414 | lea X(%a6),%a0 | |
8415 | bra.w LOGBGN # begin regular log(X) | |
8416 | ||
8417 | Hi_not0: | |
8418 | clr.l %d6 | |
8419 | bfffo %d4{&0:&32},%d6 # find first 1 | |
8420 | mov.l %d6,%d2 # get k | |
8421 | lsl.l %d6,%d4 | |
8422 | mov.l %d5,%d7 # a copy of D5 | |
8423 | lsl.l %d6,%d5 | |
8424 | neg.l %d6 | |
8425 | add.l &32,%d6 | |
8426 | lsr.l %d6,%d7 | |
8427 | or.l %d7,%d4 # (D3,D4,D5) normalized | |
8428 | ||
8429 | mov.l %d3,X(%a6) | |
8430 | mov.l %d4,XFRAC(%a6) | |
8431 | mov.l %d5,XFRAC+4(%a6) | |
8432 | neg.l %d2 | |
8433 | mov.l %d2,ADJK(%a6) | |
8434 | fmov.x X(%a6),%fp0 | |
8435 | movm.l (%sp)+,&0xfc # restore registers {d2-d7} | |
8436 | lea X(%a6),%a0 | |
8437 | bra.w LOGBGN # begin regular log(X) | |
8438 | ||
8439 | global slognp1 | |
8440 | #--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S | |
8441 | slognp1: | |
8442 | fmov.x (%a0),%fp0 # LOAD INPUT | |
8443 | fabs.x %fp0 # test magnitude | |
8444 | fcmp.x %fp0,LTHOLD(%pc) # compare with min threshold | |
8445 | fbgt.w LP1REAL # if greater, continue | |
8446 | fmov.l %d0,%fpcr | |
8447 | mov.b &FMOV_OP,%d1 # last inst is MOVE | |
8448 | fmov.x (%a0),%fp0 # return signed argument | |
8449 | bra t_catch | |
8450 | ||
8451 | LP1REAL: | |
8452 | fmov.x (%a0),%fp0 # LOAD INPUT | |
8453 | mov.l &0x00000000,ADJK(%a6) | |
8454 | fmov.x %fp0,%fp1 # FP1 IS INPUT Z | |
8455 | fadd.s one(%pc),%fp0 # X := ROUND(1+Z) | |
8456 | fmov.x %fp0,X(%a6) | |
8457 | mov.w XFRAC(%a6),XDCARE(%a6) | |
8458 | mov.l X(%a6),%d1 | |
8459 | cmp.l %d1,&0 | |
8460 | ble.w LP1NEG0 # LOG OF ZERO OR -VE | |
8461 | cmp.l %d1,&0x3ffe8000 # IS BOUNDS [1/2,3/2]? | |
8462 | blt.w LOGMAIN | |
8463 | cmp.l %d1,&0x3fffc000 | |
8464 | bgt.w LOGMAIN | |
8465 | #--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z, | |
8466 | #--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE, | |
8467 | #--SIMPLY INVOKE LOG(X) FOR LOG(1+Z). | |
8468 | ||
8469 | LP1NEAR1: | |
8470 | #--NEXT SEE IF EXP(-1/16) < X < EXP(1/16) | |
8471 | cmp.l %d1,&0x3ffef07d | |
8472 | blt.w LP1CARE | |
8473 | cmp.l %d1,&0x3fff8841 | |
8474 | bgt.w LP1CARE | |
8475 | ||
8476 | LP1ONE16: | |
8477 | #--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2) | |
8478 | #--WHERE U = 2Z/(2+Z) = 2Z/(1+X). | |
8479 | fadd.x %fp1,%fp1 # FP1 IS 2Z | |
8480 | fadd.s one(%pc),%fp0 # FP0 IS 1+X | |
8481 | #--U = FP1/FP0 | |
8482 | bra.w LP1CONT2 | |
8483 | ||
8484 | LP1CARE: | |
8485 | #--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE | |
8486 | #--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST | |
8487 | #--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2], | |
8488 | #--THERE ARE ONLY TWO CASES. | |
8489 | #--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z | |
8490 | #--CASE 2: 1+Z > 1, THEN K = 0 AND Y-F = (1-F) + Z | |
8491 | #--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF | |
8492 | #--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED. | |
8493 | ||
8494 | mov.l XFRAC(%a6),FFRAC(%a6) | |
8495 | and.l &0xFE000000,FFRAC(%a6) | |
8496 | or.l &0x01000000,FFRAC(%a6) # F OBTAINED | |
8497 | cmp.l %d1,&0x3FFF8000 # SEE IF 1+Z > 1 | |
8498 | bge.b KISZERO | |
8499 | ||
8500 | KISNEG1: | |
8501 | fmov.s TWO(%pc),%fp0 | |
8502 | mov.l &0x3fff0000,F(%a6) | |
8503 | clr.l F+8(%a6) | |
8504 | fsub.x F(%a6),%fp0 # 2-F | |
8505 | mov.l FFRAC(%a6),%d1 | |
8506 | and.l &0x7E000000,%d1 | |
8507 | asr.l &8,%d1 | |
8508 | asr.l &8,%d1 | |
8509 | asr.l &4,%d1 # D0 CONTAINS DISPLACEMENT FOR 1/F | |
8510 | fadd.x %fp1,%fp1 # GET 2Z | |
8511 | fmovm.x &0xc,-(%sp) # SAVE FP2 {%fp2/%fp3} | |
8512 | fadd.x %fp1,%fp0 # FP0 IS Y-F = (2-F)+2Z | |
8513 | lea LOGTBL(%pc),%a0 # A0 IS ADDRESS OF 1/F | |
8514 | add.l %d1,%a0 | |
8515 | fmov.s negone(%pc),%fp1 # FP1 IS K = -1 | |
8516 | bra.w LP1CONT1 | |
8517 | ||
8518 | KISZERO: | |
8519 | fmov.s one(%pc),%fp0 | |
8520 | mov.l &0x3fff0000,F(%a6) | |
8521 | clr.l F+8(%a6) | |
8522 | fsub.x F(%a6),%fp0 # 1-F | |
8523 | mov.l FFRAC(%a6),%d1 | |
8524 | and.l &0x7E000000,%d1 | |
8525 | asr.l &8,%d1 | |
8526 | asr.l &8,%d1 | |
8527 | asr.l &4,%d1 | |
8528 | fadd.x %fp1,%fp0 # FP0 IS Y-F | |
8529 | fmovm.x &0xc,-(%sp) # FP2 SAVED {%fp2/%fp3} | |
8530 | lea LOGTBL(%pc),%a0 | |
8531 | add.l %d1,%a0 # A0 IS ADDRESS OF 1/F | |
8532 | fmov.s zero(%pc),%fp1 # FP1 IS K = 0 | |
8533 | bra.w LP1CONT1 | |
8534 | ||
8535 | LP1NEG0: | |
8536 | #--FPCR SAVED. D0 IS X IN COMPACT FORM. | |
8537 | cmp.l %d1,&0 | |
8538 | blt.b LP1NEG | |
8539 | LP1ZERO: | |
8540 | fmov.s negone(%pc),%fp0 | |
8541 | ||
8542 | fmov.l %d0,%fpcr | |
8543 | bra t_dz | |
8544 | ||
8545 | LP1NEG: | |
8546 | fmov.s zero(%pc),%fp0 | |
8547 | ||
8548 | fmov.l %d0,%fpcr | |
8549 | bra t_operr | |
8550 | ||
8551 | global slognp1d | |
8552 | #--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT | |
8553 | # Simply return the denorm | |
8554 | slognp1d: | |
8555 | bra t_extdnrm | |
8556 | ||
8557 | ######################################################################### | |
8558 | # satanh(): computes the inverse hyperbolic tangent of a norm input # | |
8559 | # satanhd(): computes the inverse hyperbolic tangent of a denorm input # | |
8560 | # # | |
8561 | # INPUT *************************************************************** # | |
8562 | # a0 = pointer to extended precision input # | |
8563 | # d0 = round precision,mode # | |
8564 | # # | |
8565 | # OUTPUT ************************************************************** # | |
8566 | # fp0 = arctanh(X) # | |
8567 | # # | |
8568 | # ACCURACY and MONOTONICITY ******************************************* # | |
8569 | # The returned result is within 3 ulps in 64 significant bit, # | |
8570 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
8571 | # rounded to double precision. The result is provably monotonic # | |
8572 | # in double precision. # | |
8573 | # # | |
8574 | # ALGORITHM *********************************************************** # | |
8575 | # # | |
8576 | # ATANH # | |
8577 | # 1. If |X| >= 1, go to 3. # | |
8578 | # # | |
8579 | # 2. (|X| < 1) Calculate atanh(X) by # | |
8580 | # sgn := sign(X) # | |
8581 | # y := |X| # | |
8582 | # z := 2y/(1-y) # | |
8583 | # atanh(X) := sgn * (1/2) * logp1(z) # | |
8584 | # Exit. # | |
8585 | # # | |
8586 | # 3. If |X| > 1, go to 5. # | |
8587 | # # | |
8588 | # 4. (|X| = 1) Generate infinity with an appropriate sign and # | |
8589 | # divide-by-zero by # | |
8590 | # sgn := sign(X) # | |
8591 | # atan(X) := sgn / (+0). # | |
8592 | # Exit. # | |
8593 | # # | |
8594 | # 5. (|X| > 1) Generate an invalid operation by 0 * infinity. # | |
8595 | # Exit. # | |
8596 | # # | |
8597 | ######################################################################### | |
8598 | ||
8599 | global satanh | |
8600 | satanh: | |
8601 | mov.l (%a0),%d1 | |
8602 | mov.w 4(%a0),%d1 | |
8603 | and.l &0x7FFFFFFF,%d1 | |
8604 | cmp.l %d1,&0x3FFF8000 | |
8605 | bge.b ATANHBIG | |
8606 | ||
8607 | #--THIS IS THE USUAL CASE, |X| < 1 | |
8608 | #--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z). | |
8609 | ||
8610 | fabs.x (%a0),%fp0 # Y = |X| | |
8611 | fmov.x %fp0,%fp1 | |
8612 | fneg.x %fp1 # -Y | |
8613 | fadd.x %fp0,%fp0 # 2Y | |
8614 | fadd.s &0x3F800000,%fp1 # 1-Y | |
8615 | fdiv.x %fp1,%fp0 # 2Y/(1-Y) | |
8616 | mov.l (%a0),%d1 | |
8617 | and.l &0x80000000,%d1 | |
8618 | or.l &0x3F000000,%d1 # SIGN(X)*HALF | |
8619 | mov.l %d1,-(%sp) | |
8620 | ||
8621 | mov.l %d0,-(%sp) # save rnd prec,mode | |
8622 | clr.l %d0 # pass ext prec,RN | |
8623 | fmovm.x &0x01,-(%sp) # save Z on stack | |
8624 | lea (%sp),%a0 # pass ptr to Z | |
8625 | bsr slognp1 # LOG1P(Z) | |
8626 | add.l &0xc,%sp # clear Z from stack | |
8627 | ||
8628 | mov.l (%sp)+,%d0 # fetch old prec,mode | |
8629 | fmov.l %d0,%fpcr # load it | |
8630 | mov.b &FMUL_OP,%d1 # last inst is MUL | |
8631 | fmul.s (%sp)+,%fp0 | |
8632 | bra t_catch | |
8633 | ||
8634 | ATANHBIG: | |
8635 | fabs.x (%a0),%fp0 # |X| | |
8636 | fcmp.s %fp0,&0x3F800000 | |
8637 | fbgt t_operr | |
8638 | bra t_dz | |
8639 | ||
8640 | global satanhd | |
8641 | #--ATANH(X) = X FOR DENORMALIZED X | |
8642 | satanhd: | |
8643 | bra t_extdnrm | |
8644 | ||
8645 | ######################################################################### | |
8646 | # slog10(): computes the base-10 logarithm of a normalized input # | |
8647 | # slog10d(): computes the base-10 logarithm of a denormalized input # | |
8648 | # slog2(): computes the base-2 logarithm of a normalized input # | |
8649 | # slog2d(): computes the base-2 logarithm of a denormalized input # | |
8650 | # # | |
8651 | # INPUT *************************************************************** # | |
8652 | # a0 = pointer to extended precision input # | |
8653 | # d0 = round precision,mode # | |
8654 | # # | |
8655 | # OUTPUT ************************************************************** # | |
8656 | # fp0 = log_10(X) or log_2(X) # | |
8657 | # # | |
8658 | # ACCURACY and MONOTONICITY ******************************************* # | |
8659 | # The returned result is within 1.7 ulps in 64 significant bit, # | |
8660 | # i.e. within 0.5003 ulp to 53 bits if the result is subsequently # | |
8661 | # rounded to double precision. The result is provably monotonic # | |
8662 | # in double precision. # | |
8663 | # # | |
8664 | # ALGORITHM *********************************************************** # | |
8665 | # # | |
8666 | # slog10d: # | |
8667 | # # | |
8668 | # Step 0. If X < 0, create a NaN and raise the invalid operation # | |
8669 | # flag. Otherwise, save FPCR in D1; set FpCR to default. # | |
8670 | # Notes: Default means round-to-nearest mode, no floating-point # | |
8671 | # traps, and precision control = double extended. # | |
8672 | # # | |
8673 | # Step 1. Call slognd to obtain Y = log(X), the natural log of X. # | |
8674 | # Notes: Even if X is denormalized, log(X) is always normalized. # | |
8675 | # # | |
8676 | # Step 2. Compute log_10(X) = log(X) * (1/log(10)). # | |
8677 | # 2.1 Restore the user FPCR # | |
8678 | # 2.2 Return ans := Y * INV_L10. # | |
8679 | # # | |
8680 | # slog10: # | |
8681 | # # | |
8682 | # Step 0. If X < 0, create a NaN and raise the invalid operation # | |
8683 | # flag. Otherwise, save FPCR in D1; set FpCR to default. # | |
8684 | # Notes: Default means round-to-nearest mode, no floating-point # | |
8685 | # traps, and precision control = double extended. # | |
8686 | # # | |
8687 | # Step 1. Call sLogN to obtain Y = log(X), the natural log of X. # | |
8688 | # # | |
8689 | # Step 2. Compute log_10(X) = log(X) * (1/log(10)). # | |
8690 | # 2.1 Restore the user FPCR # | |
8691 | # 2.2 Return ans := Y * INV_L10. # | |
8692 | # # | |
8693 | # sLog2d: # | |
8694 | # # | |
8695 | # Step 0. If X < 0, create a NaN and raise the invalid operation # | |
8696 | # flag. Otherwise, save FPCR in D1; set FpCR to default. # | |
8697 | # Notes: Default means round-to-nearest mode, no floating-point # | |
8698 | # traps, and precision control = double extended. # | |
8699 | # # | |
8700 | # Step 1. Call slognd to obtain Y = log(X), the natural log of X. # | |
8701 | # Notes: Even if X is denormalized, log(X) is always normalized. # | |
8702 | # # | |
8703 | # Step 2. Compute log_10(X) = log(X) * (1/log(2)). # | |
8704 | # 2.1 Restore the user FPCR # | |
8705 | # 2.2 Return ans := Y * INV_L2. # | |
8706 | # # | |
8707 | # sLog2: # | |
8708 | # # | |
8709 | # Step 0. If X < 0, create a NaN and raise the invalid operation # | |
8710 | # flag. Otherwise, save FPCR in D1; set FpCR to default. # | |
8711 | # Notes: Default means round-to-nearest mode, no floating-point # | |
8712 | # traps, and precision control = double extended. # | |
8713 | # # | |
8714 | # Step 1. If X is not an integer power of two, i.e., X != 2^k, # | |
8715 | # go to Step 3. # | |
8716 | # # | |
8717 | # Step 2. Return k. # | |
8718 | # 2.1 Get integer k, X = 2^k. # | |
8719 | # 2.2 Restore the user FPCR. # | |
8720 | # 2.3 Return ans := convert-to-double-extended(k). # | |
8721 | # # | |
8722 | # Step 3. Call sLogN to obtain Y = log(X), the natural log of X. # | |
8723 | # # | |
8724 | # Step 4. Compute log_2(X) = log(X) * (1/log(2)). # | |
8725 | # 4.1 Restore the user FPCR # | |
8726 | # 4.2 Return ans := Y * INV_L2. # | |
8727 | # # | |
8728 | ######################################################################### | |
8729 | ||
8730 | INV_L10: | |
8731 | long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000 | |
8732 | ||
8733 | INV_L2: | |
8734 | long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000 | |
8735 | ||
8736 | global slog10 | |
8737 | #--entry point for Log10(X), X is normalized | |
8738 | slog10: | |
8739 | fmov.b &0x1,%fp0 | |
8740 | fcmp.x %fp0,(%a0) # if operand == 1, | |
8741 | fbeq.l ld_pzero # return an EXACT zero | |
8742 | ||
8743 | mov.l (%a0),%d1 | |
8744 | blt.w invalid | |
8745 | mov.l %d0,-(%sp) | |
8746 | clr.l %d0 | |
8747 | bsr slogn # log(X), X normal. | |
8748 | fmov.l (%sp)+,%fpcr | |
8749 | fmul.x INV_L10(%pc),%fp0 | |
8750 | bra t_inx2 | |
8751 | ||
8752 | global slog10d | |
8753 | #--entry point for Log10(X), X is denormalized | |
8754 | slog10d: | |
8755 | mov.l (%a0),%d1 | |
8756 | blt.w invalid | |
8757 | mov.l %d0,-(%sp) | |
8758 | clr.l %d0 | |
8759 | bsr slognd # log(X), X denorm. | |
8760 | fmov.l (%sp)+,%fpcr | |
8761 | fmul.x INV_L10(%pc),%fp0 | |
8762 | bra t_minx2 | |
8763 | ||
8764 | global slog2 | |
8765 | #--entry point for Log2(X), X is normalized | |
8766 | slog2: | |
8767 | mov.l (%a0),%d1 | |
8768 | blt.w invalid | |
8769 | ||
8770 | mov.l 8(%a0),%d1 | |
8771 | bne.b continue # X is not 2^k | |
8772 | ||
8773 | mov.l 4(%a0),%d1 | |
8774 | and.l &0x7FFFFFFF,%d1 | |
8775 | bne.b continue | |
8776 | ||
8777 | #--X = 2^k. | |
8778 | mov.w (%a0),%d1 | |
8779 | and.l &0x00007FFF,%d1 | |
8780 | sub.l &0x3FFF,%d1 | |
8781 | beq.l ld_pzero | |
8782 | fmov.l %d0,%fpcr | |
8783 | fmov.l %d1,%fp0 | |
8784 | bra t_inx2 | |
8785 | ||
8786 | continue: | |
8787 | mov.l %d0,-(%sp) | |
8788 | clr.l %d0 | |
8789 | bsr slogn # log(X), X normal. | |
8790 | fmov.l (%sp)+,%fpcr | |
8791 | fmul.x INV_L2(%pc),%fp0 | |
8792 | bra t_inx2 | |
8793 | ||
8794 | invalid: | |
8795 | bra t_operr | |
8796 | ||
8797 | global slog2d | |
8798 | #--entry point for Log2(X), X is denormalized | |
8799 | slog2d: | |
8800 | mov.l (%a0),%d1 | |
8801 | blt.w invalid | |
8802 | mov.l %d0,-(%sp) | |
8803 | clr.l %d0 | |
8804 | bsr slognd # log(X), X denorm. | |
8805 | fmov.l (%sp)+,%fpcr | |
8806 | fmul.x INV_L2(%pc),%fp0 | |
8807 | bra t_minx2 | |
8808 | ||
8809 | ######################################################################### | |
8810 | # stwotox(): computes 2**X for a normalized input # | |
8811 | # stwotoxd(): computes 2**X for a denormalized input # | |
8812 | # stentox(): computes 10**X for a normalized input # | |
8813 | # stentoxd(): computes 10**X for a denormalized input # | |
8814 | # # | |
8815 | # INPUT *************************************************************** # | |
8816 | # a0 = pointer to extended precision input # | |
8817 | # d0 = round precision,mode # | |
8818 | # # | |
8819 | # OUTPUT ************************************************************** # | |
8820 | # fp0 = 2**X or 10**X # | |
8821 | # # | |
8822 | # ACCURACY and MONOTONICITY ******************************************* # | |
8823 | # The returned result is within 2 ulps in 64 significant bit, # | |
8824 | # i.e. within 0.5001 ulp to 53 bits if the result is subsequently # | |
8825 | # rounded to double precision. The result is provably monotonic # | |
8826 | # in double precision. # | |
8827 | # # | |
8828 | # ALGORITHM *********************************************************** # | |
8829 | # # | |
8830 | # twotox # | |
8831 | # 1. If |X| > 16480, go to ExpBig. # | |
8832 | # # | |
8833 | # 2. If |X| < 2**(-70), go to ExpSm. # | |
8834 | # # | |
8835 | # 3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore # | |
8836 | # decompose N as # | |
8837 | # N = 64(M + M') + j, j = 0,1,2,...,63. # | |
8838 | # # | |
8839 | # 4. Overwrite r := r * log2. Then # | |
8840 | # 2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # | |
8841 | # Go to expr to compute that expression. # | |
8842 | # # | |
8843 | # tentox # | |
8844 | # 1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig. # | |
8845 | # # | |
8846 | # 2. If |X| < 2**(-70), go to ExpSm. # | |
8847 | # # | |
8848 | # 3. Set y := X*log_2(10)*64 (base 2 log of 10). Set # | |
8849 | # N := round-to-int(y). Decompose N as # | |
8850 | # N = 64(M + M') + j, j = 0,1,2,...,63. # | |
8851 | # # | |
8852 | # 4. Define r as # | |
8853 | # r := ((X - N*L1)-N*L2) * L10 # | |
8854 | # where L1, L2 are the leading and trailing parts of # | |
8855 | # log_10(2)/64 and L10 is the natural log of 10. Then # | |
8856 | # 10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r). # | |
8857 | # Go to expr to compute that expression. # | |
8858 | # # | |
8859 | # expr # | |
8860 | # 1. Fetch 2**(j/64) from table as Fact1 and Fact2. # | |
8861 | # # | |
8862 | # 2. Overwrite Fact1 and Fact2 by # | |
8863 | # Fact1 := 2**(M) * Fact1 # | |
8864 | # Fact2 := 2**(M) * Fact2 # | |
8865 | # Thus Fact1 + Fact2 = 2**(M) * 2**(j/64). # | |
8866 | # # | |
8867 | # 3. Calculate P where 1 + P approximates exp(r): # | |
8868 | # P = r + r*r*(A1+r*(A2+...+r*A5)). # | |
8869 | # # | |
8870 | # 4. Let AdjFact := 2**(M'). Return # | |
8871 | # AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ). # | |
8872 | # Exit. # | |
8873 | # # | |
8874 | # ExpBig # | |
8875 | # 1. Generate overflow by Huge * Huge if X > 0; otherwise, # | |
8876 | # generate underflow by Tiny * Tiny. # | |
8877 | # # | |
8878 | # ExpSm # | |
8879 | # 1. Return 1 + X. # | |
8880 | # # | |
8881 | ######################################################################### | |
8882 | ||
8883 | L2TEN64: | |
8884 | long 0x406A934F,0x0979A371 # 64LOG10/LOG2 | |
8885 | L10TWO1: | |
8886 | long 0x3F734413,0x509F8000 # LOG2/64LOG10 | |
8887 | ||
8888 | L10TWO2: | |
8889 | long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000 | |
8890 | ||
8891 | LOG10: long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000 | |
8892 | ||
8893 | LOG2: long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000 | |
8894 | ||
8895 | EXPA5: long 0x3F56C16D,0x6F7BD0B2 | |
8896 | EXPA4: long 0x3F811112,0x302C712C | |
8897 | EXPA3: long 0x3FA55555,0x55554CC1 | |
8898 | EXPA2: long 0x3FC55555,0x55554A54 | |
8899 | EXPA1: long 0x3FE00000,0x00000000,0x00000000,0x00000000 | |
8900 | ||
8901 | TEXPTBL: | |
8902 | long 0x3FFF0000,0x80000000,0x00000000,0x3F738000 | |
8903 | long 0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA | |
8904 | long 0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9 | |
8905 | long 0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9 | |
8906 | long 0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA | |
8907 | long 0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C | |
8908 | long 0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1 | |
8909 | long 0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA | |
8910 | long 0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373 | |
8911 | long 0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670 | |
8912 | long 0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700 | |
8913 | long 0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0 | |
8914 | long 0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D | |
8915 | long 0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319 | |
8916 | long 0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B | |
8917 | long 0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5 | |
8918 | long 0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A | |
8919 | long 0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B | |
8920 | long 0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF | |
8921 | long 0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA | |
8922 | long 0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD | |
8923 | long 0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E | |
8924 | long 0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B | |
8925 | long 0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB | |
8926 | long 0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB | |
8927 | long 0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274 | |
8928 | long 0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C | |
8929 | long 0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00 | |
8930 | long 0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301 | |
8931 | long 0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367 | |
8932 | long 0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F | |
8933 | long 0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C | |
8934 | long 0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB | |
8935 | long 0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB | |
8936 | long 0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C | |
8937 | long 0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA | |
8938 | long 0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD | |
8939 | long 0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51 | |
8940 | long 0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A | |
8941 | long 0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2 | |
8942 | long 0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB | |
8943 | long 0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17 | |
8944 | long 0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C | |
8945 | long 0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8 | |
8946 | long 0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53 | |
8947 | long 0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE | |
8948 | long 0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124 | |
8949 | long 0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243 | |
8950 | long 0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A | |
8951 | long 0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61 | |
8952 | long 0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610 | |
8953 | long 0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1 | |
8954 | long 0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12 | |
8955 | long 0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE | |
8956 | long 0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4 | |
8957 | long 0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F | |
8958 | long 0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A | |
8959 | long 0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A | |
8960 | long 0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC | |
8961 | long 0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F | |
8962 | long 0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A | |
8963 | long 0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795 | |
8964 | long 0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B | |
8965 | long 0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581 | |
8966 | ||
8967 | set INT,L_SCR1 | |
8968 | ||
8969 | set X,FP_SCR0 | |
8970 | set XDCARE,X+2 | |
8971 | set XFRAC,X+4 | |
8972 | ||
8973 | set ADJFACT,FP_SCR0 | |
8974 | ||
8975 | set FACT1,FP_SCR0 | |
8976 | set FACT1HI,FACT1+4 | |
8977 | set FACT1LOW,FACT1+8 | |
8978 | ||
8979 | set FACT2,FP_SCR1 | |
8980 | set FACT2HI,FACT2+4 | |
8981 | set FACT2LOW,FACT2+8 | |
8982 | ||
8983 | global stwotox | |
8984 | #--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | |
8985 | stwotox: | |
8986 | fmovm.x (%a0),&0x80 # LOAD INPUT | |
8987 | ||
8988 | mov.l (%a0),%d1 | |
8989 | mov.w 4(%a0),%d1 | |
8990 | fmov.x %fp0,X(%a6) | |
8991 | and.l &0x7FFFFFFF,%d1 | |
8992 | ||
8993 | cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? | |
8994 | bge.b TWOOK1 | |
8995 | bra.w EXPBORS | |
8996 | ||
8997 | TWOOK1: | |
8998 | cmp.l %d1,&0x400D80C0 # |X| > 16480? | |
8999 | ble.b TWOMAIN | |
9000 | bra.w EXPBORS | |
9001 | ||
9002 | TWOMAIN: | |
9003 | #--USUAL CASE, 2^(-70) <= |X| <= 16480 | |
9004 | ||
9005 | fmov.x %fp0,%fp1 | |
9006 | fmul.s &0x42800000,%fp1 # 64 * X | |
9007 | fmov.l %fp1,INT(%a6) # N = ROUND-TO-INT(64 X) | |
9008 | mov.l %d2,-(%sp) | |
9009 | lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) | |
9010 | fmov.l INT(%a6),%fp1 # N --> FLOATING FMT | |
9011 | mov.l INT(%a6),%d1 | |
9012 | mov.l %d1,%d2 | |
9013 | and.l &0x3F,%d1 # D0 IS J | |
9014 | asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) | |
9015 | add.l %d1,%a1 # ADDRESS FOR 2^(J/64) | |
9016 | asr.l &6,%d2 # d2 IS L, N = 64L + J | |
9017 | mov.l %d2,%d1 | |
9018 | asr.l &1,%d1 # D0 IS M | |
9019 | sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J | |
9020 | add.l &0x3FFF,%d2 | |
9021 | ||
9022 | #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), | |
9023 | #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. | |
9024 | #--ADJFACT = 2^(M'). | |
9025 | #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. | |
9026 | ||
9027 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | |
9028 | ||
9029 | fmul.s &0x3C800000,%fp1 # (1/64)*N | |
9030 | mov.l (%a1)+,FACT1(%a6) | |
9031 | mov.l (%a1)+,FACT1HI(%a6) | |
9032 | mov.l (%a1)+,FACT1LOW(%a6) | |
9033 | mov.w (%a1)+,FACT2(%a6) | |
9034 | ||
9035 | fsub.x %fp1,%fp0 # X - (1/64)*INT(64 X) | |
9036 | ||
9037 | mov.w (%a1)+,FACT2HI(%a6) | |
9038 | clr.w FACT2HI+2(%a6) | |
9039 | clr.l FACT2LOW(%a6) | |
9040 | add.w %d1,FACT1(%a6) | |
9041 | fmul.x LOG2(%pc),%fp0 # FP0 IS R | |
9042 | add.w %d1,FACT2(%a6) | |
9043 | ||
9044 | bra.w expr | |
9045 | ||
9046 | EXPBORS: | |
9047 | #--FPCR, D0 SAVED | |
9048 | cmp.l %d1,&0x3FFF8000 | |
9049 | bgt.b TEXPBIG | |
9050 | ||
9051 | #--|X| IS SMALL, RETURN 1 + X | |
9052 | ||
9053 | fmov.l %d0,%fpcr # restore users round prec,mode | |
9054 | fadd.s &0x3F800000,%fp0 # RETURN 1 + X | |
9055 | bra t_pinx2 | |
9056 | ||
9057 | TEXPBIG: | |
9058 | #--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW | |
9059 | #--REGISTERS SAVE SO FAR ARE FPCR AND D0 | |
9060 | mov.l X(%a6),%d1 | |
9061 | cmp.l %d1,&0 | |
9062 | blt.b EXPNEG | |
9063 | ||
9064 | bra t_ovfl2 # t_ovfl expects positive value | |
9065 | ||
9066 | EXPNEG: | |
9067 | bra t_unfl2 # t_unfl expects positive value | |
9068 | ||
9069 | global stwotoxd | |
9070 | stwotoxd: | |
9071 | #--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT | |
9072 | ||
9073 | fmov.l %d0,%fpcr # set user's rounding mode/precision | |
9074 | fmov.s &0x3F800000,%fp0 # RETURN 1 + X | |
9075 | mov.l (%a0),%d1 | |
9076 | or.l &0x00800001,%d1 | |
9077 | fadd.s %d1,%fp0 | |
9078 | bra t_pinx2 | |
9079 | ||
9080 | global stentox | |
9081 | #--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S | |
9082 | stentox: | |
9083 | fmovm.x (%a0),&0x80 # LOAD INPUT | |
9084 | ||
9085 | mov.l (%a0),%d1 | |
9086 | mov.w 4(%a0),%d1 | |
9087 | fmov.x %fp0,X(%a6) | |
9088 | and.l &0x7FFFFFFF,%d1 | |
9089 | ||
9090 | cmp.l %d1,&0x3FB98000 # |X| >= 2**(-70)? | |
9091 | bge.b TENOK1 | |
9092 | bra.w EXPBORS | |
9093 | ||
9094 | TENOK1: | |
9095 | cmp.l %d1,&0x400B9B07 # |X| <= 16480*log2/log10 ? | |
9096 | ble.b TENMAIN | |
9097 | bra.w EXPBORS | |
9098 | ||
9099 | TENMAIN: | |
9100 | #--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 | |
9101 | ||
9102 | fmov.x %fp0,%fp1 | |
9103 | fmul.d L2TEN64(%pc),%fp1 # X*64*LOG10/LOG2 | |
9104 | fmov.l %fp1,INT(%a6) # N=INT(X*64*LOG10/LOG2) | |
9105 | mov.l %d2,-(%sp) | |
9106 | lea TEXPTBL(%pc),%a1 # LOAD ADDRESS OF TABLE OF 2^(J/64) | |
9107 | fmov.l INT(%a6),%fp1 # N --> FLOATING FMT | |
9108 | mov.l INT(%a6),%d1 | |
9109 | mov.l %d1,%d2 | |
9110 | and.l &0x3F,%d1 # D0 IS J | |
9111 | asl.l &4,%d1 # DISPLACEMENT FOR 2^(J/64) | |
9112 | add.l %d1,%a1 # ADDRESS FOR 2^(J/64) | |
9113 | asr.l &6,%d2 # d2 IS L, N = 64L + J | |
9114 | mov.l %d2,%d1 | |
9115 | asr.l &1,%d1 # D0 IS M | |
9116 | sub.l %d1,%d2 # d2 IS M', N = 64(M+M') + J | |
9117 | add.l &0x3FFF,%d2 | |
9118 | ||
9119 | #--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64), | |
9120 | #--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN. | |
9121 | #--ADJFACT = 2^(M'). | |
9122 | #--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2. | |
9123 | fmovm.x &0x0c,-(%sp) # save fp2/fp3 | |
9124 | ||
9125 | fmov.x %fp1,%fp2 | |
9126 | ||
9127 | fmul.d L10TWO1(%pc),%fp1 # N*(LOG2/64LOG10)_LEAD | |
9128 | mov.l (%a1)+,FACT1(%a6) | |
9129 | ||
9130 | fmul.x L10TWO2(%pc),%fp2 # N*(LOG2/64LOG10)_TRAIL | |
9131 | ||
9132 | mov.l (%a1)+,FACT1HI(%a6) | |
9133 | mov.l (%a1)+,FACT1LOW(%a6) | |
9134 | fsub.x %fp1,%fp0 # X - N L_LEAD | |
9135 | mov.w (%a1)+,FACT2(%a6) | |
9136 | ||
9137 | fsub.x %fp2,%fp0 # X - N L_TRAIL | |
9138 | ||
9139 | mov.w (%a1)+,FACT2HI(%a6) | |
9140 | clr.w FACT2HI+2(%a6) | |
9141 | clr.l FACT2LOW(%a6) | |
9142 | ||
9143 | fmul.x LOG10(%pc),%fp0 # FP0 IS R | |
9144 | add.w %d1,FACT1(%a6) | |
9145 | add.w %d1,FACT2(%a6) | |
9146 | ||
9147 | expr: | |
9148 | #--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN. | |
9149 | #--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64). | |
9150 | #--FP0 IS R. THE FOLLOWING CODE COMPUTES | |
9151 | #-- 2**(M'+M) * 2**(J/64) * EXP(R) | |
9152 | ||
9153 | fmov.x %fp0,%fp1 | |
9154 | fmul.x %fp1,%fp1 # FP1 IS S = R*R | |
9155 | ||
9156 | fmov.d EXPA5(%pc),%fp2 # FP2 IS A5 | |
9157 | fmov.d EXPA4(%pc),%fp3 # FP3 IS A4 | |
9158 | ||
9159 | fmul.x %fp1,%fp2 # FP2 IS S*A5 | |
9160 | fmul.x %fp1,%fp3 # FP3 IS S*A4 | |
9161 | ||
9162 | fadd.d EXPA3(%pc),%fp2 # FP2 IS A3+S*A5 | |
9163 | fadd.d EXPA2(%pc),%fp3 # FP3 IS A2+S*A4 | |
9164 | ||
9165 | fmul.x %fp1,%fp2 # FP2 IS S*(A3+S*A5) | |
9166 | fmul.x %fp1,%fp3 # FP3 IS S*(A2+S*A4) | |
9167 | ||
9168 | fadd.d EXPA1(%pc),%fp2 # FP2 IS A1+S*(A3+S*A5) | |
9169 | fmul.x %fp0,%fp3 # FP3 IS R*S*(A2+S*A4) | |
9170 | ||
9171 | fmul.x %fp1,%fp2 # FP2 IS S*(A1+S*(A3+S*A5)) | |
9172 | fadd.x %fp3,%fp0 # FP0 IS R+R*S*(A2+S*A4) | |
9173 | fadd.x %fp2,%fp0 # FP0 IS EXP(R) - 1 | |
9174 | ||
9175 | fmovm.x (%sp)+,&0x30 # restore fp2/fp3 | |
9176 | ||
9177 | #--FINAL RECONSTRUCTION PROCESS | |
9178 | #--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1) - (1 OR 0) | |
9179 | ||
9180 | fmul.x FACT1(%a6),%fp0 | |
9181 | fadd.x FACT2(%a6),%fp0 | |
9182 | fadd.x FACT1(%a6),%fp0 | |
9183 | ||
9184 | fmov.l %d0,%fpcr # restore users round prec,mode | |
9185 | mov.w %d2,ADJFACT(%a6) # INSERT EXPONENT | |
9186 | mov.l (%sp)+,%d2 | |
9187 | mov.l &0x80000000,ADJFACT+4(%a6) | |
9188 | clr.l ADJFACT+8(%a6) | |
9189 | mov.b &FMUL_OP,%d1 # last inst is MUL | |
9190 | fmul.x ADJFACT(%a6),%fp0 # FINAL ADJUSTMENT | |
9191 | bra t_catch | |
9192 | ||
9193 | global stentoxd | |
9194 | stentoxd: | |
9195 | #--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT | |
9196 | ||
9197 | fmov.l %d0,%fpcr # set user's rounding mode/precision | |
9198 | fmov.s &0x3F800000,%fp0 # RETURN 1 + X | |
9199 | mov.l (%a0),%d1 | |
9200 | or.l &0x00800001,%d1 | |
9201 | fadd.s %d1,%fp0 | |
9202 | bra t_pinx2 | |
9203 | ||
9204 | ######################################################################### | |
9205 | # sscale(): computes the destination operand scaled by the source # | |
9206 | # operand. If the absoulute value of the source operand is # | |
9207 | # >= 2^14, an overflow or underflow is returned. # | |
9208 | # # | |
9209 | # INPUT *************************************************************** # | |
9210 | # a0 = pointer to double-extended source operand X # | |
9211 | # a1 = pointer to double-extended destination operand Y # | |
9212 | # # | |
9213 | # OUTPUT ************************************************************** # | |
9214 | # fp0 = scale(X,Y) # | |
9215 | # # | |
9216 | ######################################################################### | |
9217 | ||
9218 | set SIGN, L_SCR1 | |
9219 | ||
9220 | global sscale | |
9221 | sscale: | |
9222 | mov.l %d0,-(%sp) # store off ctrl bits for now | |
9223 | ||
9224 | mov.w DST_EX(%a1),%d1 # get dst exponent | |
9225 | smi.b SIGN(%a6) # use SIGN to hold dst sign | |
9226 | andi.l &0x00007fff,%d1 # strip sign from dst exp | |
9227 | ||
9228 | mov.w SRC_EX(%a0),%d0 # check src bounds | |
9229 | andi.w &0x7fff,%d0 # clr src sign bit | |
9230 | cmpi.w %d0,&0x3fff # is src ~ ZERO? | |
9231 | blt.w src_small # yes | |
9232 | cmpi.w %d0,&0x400c # no; is src too big? | |
9233 | bgt.w src_out # yes | |
9234 | ||
9235 | # | |
9236 | # Source is within 2^14 range. | |
9237 | # | |
9238 | src_ok: | |
9239 | fintrz.x SRC(%a0),%fp0 # calc int of src | |
9240 | fmov.l %fp0,%d0 # int src to d0 | |
9241 | # don't want any accrued bits from the fintrz showing up later since | |
9242 | # we may need to read the fpsr for the last fp op in t_catch2(). | |
9243 | fmov.l &0x0,%fpsr | |
9244 | ||
9245 | tst.b DST_HI(%a1) # is dst denormalized? | |
9246 | bmi.b sok_norm | |
9247 | ||
9248 | # the dst is a DENORM. normalize the DENORM and add the adjustment to | |
9249 | # the src value. then, jump to the norm part of the routine. | |
9250 | sok_dnrm: | |
9251 | mov.l %d0,-(%sp) # save src for now | |
9252 | ||
9253 | mov.w DST_EX(%a1),FP_SCR0_EX(%a6) # make a copy | |
9254 | mov.l DST_HI(%a1),FP_SCR0_HI(%a6) | |
9255 | mov.l DST_LO(%a1),FP_SCR0_LO(%a6) | |
9256 | ||
9257 | lea FP_SCR0(%a6),%a0 # pass ptr to DENORM | |
9258 | bsr.l norm # normalize the DENORM | |
9259 | neg.l %d0 | |
9260 | add.l (%sp)+,%d0 # add adjustment to src | |
9261 | ||
9262 | fmovm.x FP_SCR0(%a6),&0x80 # load normalized DENORM | |
9263 | ||
9264 | cmpi.w %d0,&-0x3fff # is the shft amt really low? | |
9265 | bge.b sok_norm2 # thank goodness no | |
9266 | ||
9267 | # the multiply factor that we're trying to create should be a denorm | |
9268 | # for the multiply to work. therefore, we're going to actually do a | |
9269 | # multiply with a denorm which will cause an unimplemented data type | |
9270 | # exception to be put into the machine which will be caught and corrected | |
9271 | # later. we don't do this with the DENORMs above because this method | |
9272 | # is slower. but, don't fret, I don't see it being used much either. | |
9273 | fmov.l (%sp)+,%fpcr # restore user fpcr | |
9274 | mov.l &0x80000000,%d1 # load normalized mantissa | |
9275 | subi.l &-0x3fff,%d0 # how many should we shift? | |
9276 | neg.l %d0 # make it positive | |
9277 | cmpi.b %d0,&0x20 # is it > 32? | |
9278 | bge.b sok_dnrm_32 # yes | |
9279 | lsr.l %d0,%d1 # no; bit stays in upper lw | |
9280 | clr.l -(%sp) # insert zero low mantissa | |
9281 | mov.l %d1,-(%sp) # insert new high mantissa | |
9282 | clr.l -(%sp) # make zero exponent | |
9283 | bra.b sok_norm_cont | |
9284 | sok_dnrm_32: | |
9285 | subi.b &0x20,%d0 # get shift count | |
9286 | lsr.l %d0,%d1 # make low mantissa longword | |
9287 | mov.l %d1,-(%sp) # insert new low mantissa | |
9288 | clr.l -(%sp) # insert zero high mantissa | |
9289 | clr.l -(%sp) # make zero exponent | |
9290 | bra.b sok_norm_cont | |
9291 | ||
9292 | # the src will force the dst to a DENORM value or worse. so, let's | |
9293 | # create an fp multiply that will create the result. | |
9294 | sok_norm: | |
9295 | fmovm.x DST(%a1),&0x80 # load fp0 with normalized src | |
9296 | sok_norm2: | |
9297 | fmov.l (%sp)+,%fpcr # restore user fpcr | |
9298 | ||
9299 | addi.w &0x3fff,%d0 # turn src amt into exp value | |
9300 | swap %d0 # put exponent in high word | |
9301 | clr.l -(%sp) # insert new exponent | |
9302 | mov.l &0x80000000,-(%sp) # insert new high mantissa | |
9303 | mov.l %d0,-(%sp) # insert new lo mantissa | |
9304 | ||
9305 | sok_norm_cont: | |
9306 | fmov.l %fpcr,%d0 # d0 needs fpcr for t_catch2 | |
9307 | mov.b &FMUL_OP,%d1 # last inst is MUL | |
9308 | fmul.x (%sp)+,%fp0 # do the multiply | |
9309 | bra t_catch2 # catch any exceptions | |
9310 | ||
9311 | # | |
9312 | # Source is outside of 2^14 range. Test the sign and branch | |
9313 | # to the appropriate exception handler. | |
9314 | # | |
9315 | src_out: | |
9316 | mov.l (%sp)+,%d0 # restore ctrl bits | |
9317 | exg %a0,%a1 # swap src,dst ptrs | |
9318 | tst.b SRC_EX(%a1) # is src negative? | |
9319 | bmi t_unfl # yes; underflow | |
9320 | bra t_ovfl_sc # no; overflow | |
9321 | ||
9322 | # | |
9323 | # The source input is below 1, so we check for denormalized numbers | |
9324 | # and set unfl. | |
9325 | # | |
9326 | src_small: | |
9327 | tst.b DST_HI(%a1) # is dst denormalized? | |
9328 | bpl.b ssmall_done # yes | |
9329 | ||
9330 | mov.l (%sp)+,%d0 | |
9331 | fmov.l %d0,%fpcr # no; load control bits | |
9332 | mov.b &FMOV_OP,%d1 # last inst is MOVE | |
9333 | fmov.x DST(%a1),%fp0 # simply return dest | |
9334 | bra t_catch2 | |
9335 | ssmall_done: | |
9336 | mov.l (%sp)+,%d0 # load control bits into d1 | |
9337 | mov.l %a1,%a0 # pass ptr to dst | |
9338 | bra t_resdnrm | |
9339 | ||
9340 | ######################################################################### | |
9341 | # smod(): computes the fp MOD of the input values X,Y. # | |
9342 | # srem(): computes the fp (IEEE) REM of the input values X,Y. # | |
9343 | # # | |
9344 | # INPUT *************************************************************** # | |
9345 | # a0 = pointer to extended precision input X # | |
9346 | # a1 = pointer to extended precision input Y # | |
9347 | # d0 = round precision,mode # | |
9348 | # # | |
9349 | # The input operands X and Y can be either normalized or # | |
9350 | # denormalized. # | |
9351 | # # | |
9352 | # OUTPUT ************************************************************** # | |
9353 | # fp0 = FREM(X,Y) or FMOD(X,Y) # | |
9354 | # # | |
9355 | # ALGORITHM *********************************************************** # | |
9356 | # # | |
9357 | # Step 1. Save and strip signs of X and Y: signX := sign(X), # | |
9358 | # signY := sign(Y), X := |X|, Y := |Y|, # | |
9359 | # signQ := signX EOR signY. Record whether MOD or REM # | |
9360 | # is requested. # | |
9361 | # # | |
9362 | # Step 2. Set L := expo(X)-expo(Y), k := 0, Q := 0. # | |
9363 | # If (L < 0) then # | |
9364 | # R := X, go to Step 4. # | |
9365 | # else # | |
9366 | # R := 2^(-L)X, j := L. # | |
9367 | # endif # | |
9368 | # # | |
9369 | # Step 3. Perform MOD(X,Y) # | |
9370 | # 3.1 If R = Y, go to Step 9. # | |
9371 | # 3.2 If R > Y, then { R := R - Y, Q := Q + 1} # | |
9372 | # 3.3 If j = 0, go to Step 4. # | |
9373 | # 3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to # | |
9374 | # Step 3.1. # | |
9375 | # # | |
9376 | # Step 4. At this point, R = X - QY = MOD(X,Y). Set # | |
9377 | # Last_Subtract := false (used in Step 7 below). If # | |
9378 | # MOD is requested, go to Step 6. # | |
9379 | # # | |
9380 | # Step 5. R = MOD(X,Y), but REM(X,Y) is requested. # | |
9381 | # 5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to # | |
9382 | # Step 6. # | |
9383 | # 5.2 If R > Y/2, then { set Last_Subtract := true, # | |
9384 | # Q := Q + 1, Y := signY*Y }. Go to Step 6. # | |
9385 | # 5.3 This is the tricky case of R = Y/2. If Q is odd, # | |
9386 | # then { Q := Q + 1, signX := -signX }. # | |
9387 | # # | |
9388 | # Step 6. R := signX*R. # | |
9389 | # # | |
9390 | # Step 7. If Last_Subtract = true, R := R - Y. # | |
9391 | # # | |
9392 | # Step 8. Return signQ, last 7 bits of Q, and R as required. # | |
9393 | # # | |
9394 | # Step 9. At this point, R = 2^(-j)*X - Q Y = Y. Thus, # | |
9395 | # X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1), # | |
9396 | # R := 0. Return signQ, last 7 bits of Q, and R. # | |
9397 | # # | |
9398 | ######################################################################### | |
9399 | ||
9400 | set Mod_Flag,L_SCR3 | |
9401 | set Sc_Flag,L_SCR3+1 | |
9402 | ||
9403 | set SignY,L_SCR2 | |
9404 | set SignX,L_SCR2+2 | |
9405 | set SignQ,L_SCR3+2 | |
9406 | ||
9407 | set Y,FP_SCR0 | |
9408 | set Y_Hi,Y+4 | |
9409 | set Y_Lo,Y+8 | |
9410 | ||
9411 | set R,FP_SCR1 | |
9412 | set R_Hi,R+4 | |
9413 | set R_Lo,R+8 | |
9414 | ||
9415 | Scale: | |
9416 | long 0x00010000,0x80000000,0x00000000,0x00000000 | |
9417 | ||
9418 | global smod | |
9419 | smod: | |
9420 | clr.b FPSR_QBYTE(%a6) | |
9421 | mov.l %d0,-(%sp) # save ctrl bits | |
9422 | clr.b Mod_Flag(%a6) | |
9423 | bra.b Mod_Rem | |
9424 | ||
9425 | global srem | |
9426 | srem: | |
9427 | clr.b FPSR_QBYTE(%a6) | |
9428 | mov.l %d0,-(%sp) # save ctrl bits | |
9429 | mov.b &0x1,Mod_Flag(%a6) | |
9430 | ||
9431 | Mod_Rem: | |
9432 | #..Save sign of X and Y | |
9433 | movm.l &0x3f00,-(%sp) # save data registers | |
9434 | mov.w SRC_EX(%a0),%d3 | |
9435 | mov.w %d3,SignY(%a6) | |
9436 | and.l &0x00007FFF,%d3 # Y := |Y| | |
9437 | ||
9438 | # | |
9439 | mov.l SRC_HI(%a0),%d4 | |
9440 | mov.l SRC_LO(%a0),%d5 # (D3,D4,D5) is |Y| | |
9441 | ||
9442 | tst.l %d3 | |
9443 | bne.b Y_Normal | |
9444 | ||
9445 | mov.l &0x00003FFE,%d3 # $3FFD + 1 | |
9446 | tst.l %d4 | |
9447 | bne.b HiY_not0 | |
9448 | ||
9449 | HiY_0: | |
9450 | mov.l %d5,%d4 | |
9451 | clr.l %d5 | |
9452 | sub.l &32,%d3 | |
9453 | clr.l %d6 | |
9454 | bfffo %d4{&0:&32},%d6 | |
9455 | lsl.l %d6,%d4 | |
9456 | sub.l %d6,%d3 # (D3,D4,D5) is normalized | |
9457 | # ...with bias $7FFD | |
9458 | bra.b Chk_X | |
9459 | ||
9460 | HiY_not0: | |
9461 | clr.l %d6 | |
9462 | bfffo %d4{&0:&32},%d6 | |
9463 | sub.l %d6,%d3 | |
9464 | lsl.l %d6,%d4 | |
9465 | mov.l %d5,%d7 # a copy of D5 | |
9466 | lsl.l %d6,%d5 | |
9467 | neg.l %d6 | |
9468 | add.l &32,%d6 | |
9469 | lsr.l %d6,%d7 | |
9470 | or.l %d7,%d4 # (D3,D4,D5) normalized | |
9471 | # ...with bias $7FFD | |
9472 | bra.b Chk_X | |
9473 | ||
9474 | Y_Normal: | |
9475 | add.l &0x00003FFE,%d3 # (D3,D4,D5) normalized | |
9476 | # ...with bias $7FFD | |
9477 | ||
9478 | Chk_X: | |
9479 | mov.w DST_EX(%a1),%d0 | |
9480 | mov.w %d0,SignX(%a6) | |
9481 | mov.w SignY(%a6),%d1 | |
9482 | eor.l %d0,%d1 | |
9483 | and.l &0x00008000,%d1 | |
9484 | mov.w %d1,SignQ(%a6) # sign(Q) obtained | |
9485 | and.l &0x00007FFF,%d0 | |
9486 | mov.l DST_HI(%a1),%d1 | |
9487 | mov.l DST_LO(%a1),%d2 # (D0,D1,D2) is |X| | |
9488 | tst.l %d0 | |
9489 | bne.b X_Normal | |
9490 | mov.l &0x00003FFE,%d0 | |
9491 | tst.l %d1 | |
9492 | bne.b HiX_not0 | |
9493 | ||
9494 | HiX_0: | |
9495 | mov.l %d2,%d1 | |
9496 | clr.l %d2 | |
9497 | sub.l &32,%d0 | |
9498 | clr.l %d6 | |
9499 | bfffo %d1{&0:&32},%d6 | |
9500 | lsl.l %d6,%d1 | |
9501 | sub.l %d6,%d0 # (D0,D1,D2) is normalized | |
9502 | # ...with bias $7FFD | |
9503 | bra.b Init | |
9504 | ||
9505 | HiX_not0: | |
9506 | clr.l %d6 | |
9507 | bfffo %d1{&0:&32},%d6 | |
9508 | sub.l %d6,%d0 | |
9509 | lsl.l %d6,%d1 | |
9510 | mov.l %d2,%d7 # a copy of D2 | |
9511 | lsl.l %d6,%d2 | |
9512 | neg.l %d6 | |
9513 | add.l &32,%d6 | |
9514 | lsr.l %d6,%d7 | |
9515 | or.l %d7,%d1 # (D0,D1,D2) normalized | |
9516 | # ...with bias $7FFD | |
9517 | bra.b Init | |
9518 | ||
9519 | X_Normal: | |
9520 | add.l &0x00003FFE,%d0 # (D0,D1,D2) normalized | |
9521 | # ...with bias $7FFD | |
9522 | ||
9523 | Init: | |
9524 | # | |
9525 | mov.l %d3,L_SCR1(%a6) # save biased exp(Y) | |
9526 | mov.l %d0,-(%sp) # save biased exp(X) | |
9527 | sub.l %d3,%d0 # L := expo(X)-expo(Y) | |
9528 | ||
9529 | clr.l %d6 # D6 := carry <- 0 | |
9530 | clr.l %d3 # D3 is Q | |
9531 | mov.l &0,%a1 # A1 is k; j+k=L, Q=0 | |
9532 | ||
9533 | #..(Carry,D1,D2) is R | |
9534 | tst.l %d0 | |
9535 | bge.b Mod_Loop_pre | |
9536 | ||
9537 | #..expo(X) < expo(Y). Thus X = mod(X,Y) | |
9538 | # | |
9539 | mov.l (%sp)+,%d0 # restore d0 | |
9540 | bra.w Get_Mod | |
9541 | ||
9542 | Mod_Loop_pre: | |
9543 | addq.l &0x4,%sp # erase exp(X) | |
9544 | #..At this point R = 2^(-L)X; Q = 0; k = 0; and k+j = L | |
9545 | Mod_Loop: | |
9546 | tst.l %d6 # test carry bit | |
9547 | bgt.b R_GT_Y | |
9548 | ||
9549 | #..At this point carry = 0, R = (D1,D2), Y = (D4,D5) | |
9550 | cmp.l %d1,%d4 # compare hi(R) and hi(Y) | |
9551 | bne.b R_NE_Y | |
9552 | cmp.l %d2,%d5 # compare lo(R) and lo(Y) | |
9553 | bne.b R_NE_Y | |
9554 | ||
9555 | #..At this point, R = Y | |
9556 | bra.w Rem_is_0 | |
9557 | ||
9558 | R_NE_Y: | |
9559 | #..use the borrow of the previous compare | |
9560 | bcs.b R_LT_Y # borrow is set iff R < Y | |
9561 | ||
9562 | R_GT_Y: | |
9563 | #..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0 | |
9564 | #..and Y < (D1,D2) < 2Y. Either way, perform R - Y | |
9565 | sub.l %d5,%d2 # lo(R) - lo(Y) | |
9566 | subx.l %d4,%d1 # hi(R) - hi(Y) | |
9567 | clr.l %d6 # clear carry | |
9568 | addq.l &1,%d3 # Q := Q + 1 | |
9569 | ||
9570 | R_LT_Y: | |
9571 | #..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0. | |
9572 | tst.l %d0 # see if j = 0. | |
9573 | beq.b PostLoop | |
9574 | ||
9575 | add.l %d3,%d3 # Q := 2Q | |
9576 | add.l %d2,%d2 # lo(R) = 2lo(R) | |
9577 | roxl.l &1,%d1 # hi(R) = 2hi(R) + carry | |
9578 | scs %d6 # set Carry if 2(R) overflows | |
9579 | addq.l &1,%a1 # k := k+1 | |
9580 | subq.l &1,%d0 # j := j - 1 | |
9581 | #..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y. | |
9582 | ||
9583 | bra.b Mod_Loop | |
9584 | ||
9585 | PostLoop: | |
9586 | #..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y. | |
9587 | ||
9588 | #..normalize R. | |
9589 | mov.l L_SCR1(%a6),%d0 # new biased expo of R | |
9590 | tst.l %d1 | |
9591 | bne.b HiR_not0 | |
9592 | ||
9593 | HiR_0: | |
9594 | mov.l %d2,%d1 | |
9595 | clr.l %d2 | |
9596 | sub.l &32,%d0 | |
9597 | clr.l %d6 | |
9598 | bfffo %d1{&0:&32},%d6 | |
9599 | lsl.l %d6,%d1 | |
9600 | sub.l %d6,%d0 # (D0,D1,D2) is normalized | |
9601 | # ...with bias $7FFD | |
9602 | bra.b Get_Mod | |
9603 | ||
9604 | HiR_not0: | |
9605 | clr.l %d6 | |
9606 | bfffo %d1{&0:&32},%d6 | |
9607 | bmi.b Get_Mod # already normalized | |
9608 | sub.l %d6,%d0 | |
9609 | lsl.l %d6,%d1 | |
9610 | mov.l %d2,%d7 # a copy of D2 | |
9611 | lsl.l %d6,%d2 | |
9612 | neg.l %d6 | |
9613 | add.l &32,%d6 | |
9614 | lsr.l %d6,%d7 | |
9615 | or.l %d7,%d1 # (D0,D1,D2) normalized | |
9616 | ||
9617 | # | |
9618 | Get_Mod: | |
9619 | cmp.l %d0,&0x000041FE | |
9620 | bge.b No_Scale | |
9621 | Do_Scale: | |
9622 | mov.w %d0,R(%a6) | |
9623 | mov.l %d1,R_Hi(%a6) | |
9624 | mov.l %d2,R_Lo(%a6) | |
9625 | mov.l L_SCR1(%a6),%d6 | |
9626 | mov.w %d6,Y(%a6) | |
9627 | mov.l %d4,Y_Hi(%a6) | |
9628 | mov.l %d5,Y_Lo(%a6) | |
9629 | fmov.x R(%a6),%fp0 # no exception | |
9630 | mov.b &1,Sc_Flag(%a6) | |
9631 | bra.b ModOrRem | |
9632 | No_Scale: | |
9633 | mov.l %d1,R_Hi(%a6) | |
9634 | mov.l %d2,R_Lo(%a6) | |
9635 | sub.l &0x3FFE,%d0 | |
9636 | mov.w %d0,R(%a6) | |
9637 | mov.l L_SCR1(%a6),%d6 | |
9638 | sub.l &0x3FFE,%d6 | |
9639 | mov.l %d6,L_SCR1(%a6) | |
9640 | fmov.x R(%a6),%fp0 | |
9641 | mov.w %d6,Y(%a6) | |
9642 | mov.l %d4,Y_Hi(%a6) | |
9643 | mov.l %d5,Y_Lo(%a6) | |
9644 | clr.b Sc_Flag(%a6) | |
9645 | ||
9646 | # | |
9647 | ModOrRem: | |
9648 | tst.b Mod_Flag(%a6) | |
9649 | beq.b Fix_Sign | |
9650 | ||
9651 | mov.l L_SCR1(%a6),%d6 # new biased expo(Y) | |
9652 | subq.l &1,%d6 # biased expo(Y/2) | |
9653 | cmp.l %d0,%d6 | |
9654 | blt.b Fix_Sign | |
9655 | bgt.b Last_Sub | |
9656 | ||
9657 | cmp.l %d1,%d4 | |
9658 | bne.b Not_EQ | |
9659 | cmp.l %d2,%d5 | |
9660 | bne.b Not_EQ | |
9661 | bra.w Tie_Case | |
9662 | ||
9663 | Not_EQ: | |
9664 | bcs.b Fix_Sign | |
9665 | ||
9666 | Last_Sub: | |
9667 | # | |
9668 | fsub.x Y(%a6),%fp0 # no exceptions | |
9669 | addq.l &1,%d3 # Q := Q + 1 | |
9670 | ||
9671 | # | |
9672 | Fix_Sign: | |
9673 | #..Get sign of X | |
9674 | mov.w SignX(%a6),%d6 | |
9675 | bge.b Get_Q | |
9676 | fneg.x %fp0 | |
9677 | ||
9678 | #..Get Q | |
9679 | # | |
9680 | Get_Q: | |
9681 | clr.l %d6 | |
9682 | mov.w SignQ(%a6),%d6 # D6 is sign(Q) | |
9683 | mov.l &8,%d7 | |
9684 | lsr.l %d7,%d6 | |
9685 | and.l &0x0000007F,%d3 # 7 bits of Q | |
9686 | or.l %d6,%d3 # sign and bits of Q | |
9687 | # swap %d3 | |
9688 | # fmov.l %fpsr,%d6 | |
9689 | # and.l &0xFF00FFFF,%d6 | |
9690 | # or.l %d3,%d6 | |
9691 | # fmov.l %d6,%fpsr # put Q in fpsr | |
9692 | mov.b %d3,FPSR_QBYTE(%a6) # put Q in fpsr | |
9693 | ||
9694 | # | |
9695 | Restore: | |
9696 | movm.l (%sp)+,&0xfc # {%d2-%d7} | |
9697 | mov.l (%sp)+,%d0 | |
9698 | fmov.l %d0,%fpcr | |
9699 | tst.b Sc_Flag(%a6) | |
9700 | beq.b Finish | |
9701 | mov.b &FMUL_OP,%d1 # last inst is MUL | |
9702 | fmul.x Scale(%pc),%fp0 # may cause underflow | |
9703 | bra t_catch2 | |
9704 | # the '040 package did this apparently to see if the dst operand for the | |
9705 | # preceding fmul was a denorm. but, it better not have been since the | |
9706 | # algorithm just got done playing with fp0 and expected no exceptions | |
9707 | # as a result. trust me... | |
9708 | # bra t_avoid_unsupp # check for denorm as a | |
9709 | # ;result of the scaling | |
9710 | ||
9711 | Finish: | |
9712 | mov.b &FMOV_OP,%d1 # last inst is MOVE | |
9713 | fmov.x %fp0,%fp0 # capture exceptions & round | |
9714 | bra t_catch2 | |
9715 | ||
9716 | Rem_is_0: | |
9717 | #..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1) | |
9718 | addq.l &1,%d3 | |
9719 | cmp.l %d0,&8 # D0 is j | |
9720 | bge.b Q_Big | |
9721 | ||
9722 | lsl.l %d0,%d3 | |
9723 | bra.b Set_R_0 | |
9724 | ||
9725 | Q_Big: | |
9726 | clr.l %d3 | |
9727 | ||
9728 | Set_R_0: | |
9729 | fmov.s &0x00000000,%fp0 | |
9730 | clr.b Sc_Flag(%a6) | |
9731 | bra.w Fix_Sign | |
9732 | ||
9733 | Tie_Case: | |
9734 | #..Check parity of Q | |
9735 | mov.l %d3,%d6 | |
9736 | and.l &0x00000001,%d6 | |
9737 | tst.l %d6 | |
9738 | beq.w Fix_Sign # Q is even | |
9739 | ||
9740 | #..Q is odd, Q := Q + 1, signX := -signX | |
9741 | addq.l &1,%d3 | |
9742 | mov.w SignX(%a6),%d6 | |
9743 | eor.l &0x00008000,%d6 | |
9744 | mov.w %d6,SignX(%a6) | |
9745 | bra.w Fix_Sign | |
9746 | ||
9747 | ######################################################################### | |
9748 | # XDEF **************************************************************** # | |
9749 | # tag(): return the optype of the input ext fp number # | |
9750 | # # | |
9751 | # This routine is used by the 060FPLSP. # | |
9752 | # # | |
9753 | # XREF **************************************************************** # | |
9754 | # None # | |
9755 | # # | |
9756 | # INPUT *************************************************************** # | |
9757 | # a0 = pointer to extended precision operand # | |
9758 | # # | |
9759 | # OUTPUT ************************************************************** # | |
9760 | # d0 = value of type tag # | |
9761 | # one of: NORM, INF, QNAN, SNAN, DENORM, ZERO # | |
9762 | # # | |
9763 | # ALGORITHM *********************************************************** # | |
9764 | # Simply test the exponent, j-bit, and mantissa values to # | |
9765 | # determine the type of operand. # | |
9766 | # If it's an unnormalized zero, alter the operand and force it # | |
9767 | # to be a normal zero. # | |
9768 | # # | |
9769 | ######################################################################### | |
9770 | ||
9771 | global tag | |
9772 | tag: | |
9773 | mov.w FTEMP_EX(%a0), %d0 # extract exponent | |
9774 | andi.w &0x7fff, %d0 # strip off sign | |
9775 | cmpi.w %d0, &0x7fff # is (EXP == MAX)? | |
9776 | beq.b inf_or_nan_x | |
9777 | not_inf_or_nan_x: | |
9778 | btst &0x7,FTEMP_HI(%a0) | |
9779 | beq.b not_norm_x | |
9780 | is_norm_x: | |
9781 | mov.b &NORM, %d0 | |
9782 | rts | |
9783 | not_norm_x: | |
9784 | tst.w %d0 # is exponent = 0? | |
9785 | bne.b is_unnorm_x | |
9786 | not_unnorm_x: | |
9787 | tst.l FTEMP_HI(%a0) | |
9788 | bne.b is_denorm_x | |
9789 | tst.l FTEMP_LO(%a0) | |
9790 | bne.b is_denorm_x | |
9791 | is_zero_x: | |
9792 | mov.b &ZERO, %d0 | |
9793 | rts | |
9794 | is_denorm_x: | |
9795 | mov.b &DENORM, %d0 | |
9796 | rts | |
9797 | is_unnorm_x: | |
9798 | bsr.l unnorm_fix # convert to norm,denorm,or zero | |
9799 | rts | |
9800 | is_unnorm_reg_x: | |
9801 | mov.b &UNNORM, %d0 | |
9802 | rts | |
9803 | inf_or_nan_x: | |
9804 | tst.l FTEMP_LO(%a0) | |
9805 | bne.b is_nan_x | |
9806 | mov.l FTEMP_HI(%a0), %d0 | |
9807 | and.l &0x7fffffff, %d0 # msb is a don't care! | |
9808 | bne.b is_nan_x | |
9809 | is_inf_x: | |
9810 | mov.b &INF, %d0 | |
9811 | rts | |
9812 | is_nan_x: | |
9813 | mov.b &QNAN, %d0 | |
9814 | rts | |
9815 | ||
9816 | ############################################################# | |
9817 | ||
9818 | qnan: long 0x7fff0000, 0xffffffff, 0xffffffff | |
9819 | ||
9820 | ######################################################################### | |
9821 | # XDEF **************************************************************** # | |
9822 | # t_dz(): Handle 060FPLSP dz exception for "flogn" emulation. # | |
9823 | # t_dz2(): Handle 060FPLSP dz exception for "fatanh" emulation. # | |
9824 | # # | |
9825 | # These rouitnes are used by the 060FPLSP package. # | |
9826 | # # | |
9827 | # XREF **************************************************************** # | |
9828 | # None # | |
9829 | # # | |
9830 | # INPUT *************************************************************** # | |
9831 | # a0 = pointer to extended precision source operand. # | |
9832 | # # | |
9833 | # OUTPUT ************************************************************** # | |
9834 | # fp0 = default DZ result. # | |
9835 | # # | |
9836 | # ALGORITHM *********************************************************** # | |
9837 | # Transcendental emulation for the 060FPLSP has detected that # | |
9838 | # a DZ exception should occur for the instruction. If DZ is disabled, # | |
9839 | # return the default result. # | |
9840 | # If DZ is enabled, the dst operand should be returned unscathed # | |
9841 | # in fp0 while fp1 is used to create a DZ exception so that the # | |
9842 | # operating system can log that such an event occurred. # | |
9843 | # # | |
9844 | ######################################################################### | |
9845 | ||
9846 | global t_dz | |
9847 | t_dz: | |
9848 | tst.b SRC_EX(%a0) # check sign for neg or pos | |
9849 | bpl.b dz_pinf # branch if pos sign | |
9850 | ||
9851 | global t_dz2 | |
9852 | t_dz2: | |
9853 | ori.l &dzinf_mask+neg_mask,USER_FPSR(%a6) # set N/I/DZ/ADZ | |
9854 | ||
9855 | btst &dz_bit,FPCR_ENABLE(%a6) | |
9856 | bne.b dz_minf_ena | |
9857 | ||
9858 | # dz is disabled. return a -INF. | |
9859 | fmov.s &0xff800000,%fp0 # return -INF | |
9860 | rts | |
9861 | ||
9862 | # dz is enabled. create a dz exception so the user can record it | |
9863 | # but use fp1 instead. return the dst operand unscathed in fp0. | |
9864 | dz_minf_ena: | |
9865 | fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed | |
9866 | fmov.l USER_FPCR(%a6),%fpcr | |
9867 | fmov.s &0xbf800000,%fp1 # load -1 | |
9868 | fdiv.s &0x00000000,%fp1 # -1 / 0 | |
9869 | rts | |
9870 | ||
9871 | dz_pinf: | |
9872 | ori.l &dzinf_mask,USER_FPSR(%a6) # set I/DZ/ADZ | |
9873 | ||
9874 | btst &dz_bit,FPCR_ENABLE(%a6) | |
9875 | bne.b dz_pinf_ena | |
9876 | ||
9877 | # dz is disabled. return a +INF. | |
9878 | fmov.s &0x7f800000,%fp0 # return +INF | |
9879 | rts | |
9880 | ||
9881 | # dz is enabled. create a dz exception so the user can record it | |
9882 | # but use fp1 instead. return the dst operand unscathed in fp0. | |
9883 | dz_pinf_ena: | |
9884 | fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed | |
9885 | fmov.l USER_FPCR(%a6),%fpcr | |
9886 | fmov.s &0x3f800000,%fp1 # load +1 | |
9887 | fdiv.s &0x00000000,%fp1 # +1 / 0 | |
9888 | rts | |
9889 | ||
9890 | ######################################################################### | |
9891 | # XDEF **************************************************************** # | |
9892 | # t_operr(): Handle 060FPLSP OPERR exception during emulation. # | |
9893 | # # | |
9894 | # This routine is used by the 060FPLSP package. # | |
9895 | # # | |
9896 | # XREF **************************************************************** # | |
9897 | # None. # | |
9898 | # # | |
9899 | # INPUT *************************************************************** # | |
9900 | # fp1 = source operand # | |
9901 | # # | |
9902 | # OUTPUT ************************************************************** # | |
9903 | # fp0 = default result # | |
9904 | # fp1 = unchanged # | |
9905 | # # | |
9906 | # ALGORITHM *********************************************************** # | |
9907 | # An operand error should occur as the result of transcendental # | |
9908 | # emulation in the 060FPLSP. If OPERR is disabled, just return a NAN # | |
9909 | # in fp0. If OPERR is enabled, return the dst operand unscathed in fp0 # | |
9910 | # and the source operand in fp1. Use fp2 to create an OPERR exception # | |
9911 | # so that the operating system can log the event. # | |
9912 | # # | |
9913 | ######################################################################### | |
9914 | ||
9915 | global t_operr | |
9916 | t_operr: | |
9917 | ori.l &opnan_mask,USER_FPSR(%a6) # set NAN/OPERR/AIOP | |
9918 | ||
9919 | btst &operr_bit,FPCR_ENABLE(%a6) | |
9920 | bne.b operr_ena | |
9921 | ||
9922 | # operr is disabled. return a QNAN in fp0 | |
9923 | fmovm.x qnan(%pc),&0x80 # return QNAN | |
9924 | rts | |
9925 | ||
9926 | # operr is enabled. create an operr exception so the user can record it | |
9927 | # but use fp2 instead. return the dst operand unscathed in fp0. | |
9928 | operr_ena: | |
9929 | fmovm.x EXC_FP0(%a6),&0x80 # return fp0 unscathed | |
9930 | fmov.l USER_FPCR(%a6),%fpcr | |
9931 | fmovm.x &0x04,-(%sp) # save fp2 | |
9932 | fmov.s &0x7f800000,%fp2 # load +INF | |
9933 | fmul.s &0x00000000,%fp2 # +INF x 0 | |
9934 | fmovm.x (%sp)+,&0x20 # restore fp2 | |
9935 | rts | |
9936 | ||
9937 | pls_huge: | |
9938 | long 0x7ffe0000,0xffffffff,0xffffffff | |
9939 | mns_huge: | |
9940 | long 0xfffe0000,0xffffffff,0xffffffff | |
9941 | pls_tiny: | |
9942 | long 0x00000000,0x80000000,0x00000000 | |
9943 | mns_tiny: | |
9944 | long 0x80000000,0x80000000,0x00000000 | |
9945 | ||
9946 | ######################################################################### | |
9947 | # XDEF **************************************************************** # | |
9948 | # t_unfl(): Handle 060FPLSP underflow exception during emulation. # | |
9949 | # t_unfl2(): Handle 060FPLSP underflow exception during # | |
9950 | # emulation. result always positive. # | |
9951 | # # | |
9952 | # This routine is used by the 060FPLSP package. # | |
9953 | # # | |
9954 | # XREF **************************************************************** # | |
9955 | # None. # | |
9956 | # # | |
9957 | # INPUT *************************************************************** # | |
9958 | # a0 = pointer to extended precision source operand # | |
9959 | # # | |
9960 | # OUTPUT ************************************************************** # | |
9961 | # fp0 = default underflow result # | |
9962 | # # | |
9963 | # ALGORITHM *********************************************************** # | |
9964 | # An underflow should occur as the result of transcendental # | |
9965 | # emulation in the 060FPLSP. Create an underflow by using "fmul" # | |
9966 | # and two very small numbers of appropriate sign so the operating # | |
9967 | # system can log the event. # | |
9968 | # # | |
9969 | ######################################################################### | |
9970 | ||
9971 | global t_unfl | |
9972 | t_unfl: | |
9973 | tst.b SRC_EX(%a0) | |
9974 | bpl.b unf_pos | |
9975 | ||
9976 | global t_unfl2 | |
9977 | t_unfl2: | |
9978 | ori.l &unfinx_mask+neg_mask,USER_FPSR(%a6) # set N/UNFL/INEX2/AUNFL/AINEX | |
9979 | ||
9980 | fmov.l USER_FPCR(%a6),%fpcr | |
9981 | fmovm.x mns_tiny(%pc),&0x80 | |
9982 | fmul.x pls_tiny(%pc),%fp0 | |
9983 | ||
9984 | fmov.l %fpsr,%d0 | |
9985 | rol.l &0x8,%d0 | |
9986 | mov.b %d0,FPSR_CC(%a6) | |
9987 | rts | |
9988 | unf_pos: | |
9989 | ori.w &unfinx_mask,FPSR_EXCEPT(%a6) # set UNFL/INEX2/AUNFL/AINEX | |
9990 | ||
9991 | fmov.l USER_FPCR(%a6),%fpcr | |
9992 | fmovm.x pls_tiny(%pc),&0x80 | |
9993 | fmul.x %fp0,%fp0 | |
9994 | ||
9995 | fmov.l %fpsr,%d0 | |
9996 | rol.l &0x8,%d0 | |
9997 | mov.b %d0,FPSR_CC(%a6) | |
9998 | rts | |
9999 | ||
10000 | ######################################################################### | |
10001 | # XDEF **************************************************************** # | |
10002 | # t_ovfl(): Handle 060FPLSP overflow exception during emulation. # | |
10003 | # (monadic) # | |
10004 | # t_ovfl2(): Handle 060FPLSP overflow exception during # | |
10005 | # emulation. result always positive. (dyadic) # | |
10006 | # t_ovfl_sc(): Handle 060FPLSP overflow exception during # | |
10007 | # emulation for "fscale". # | |
10008 | # # | |
10009 | # This routine is used by the 060FPLSP package. # | |
10010 | # # | |
10011 | # XREF **************************************************************** # | |
10012 | # None. # | |
10013 | # # | |
10014 | # INPUT *************************************************************** # | |
10015 | # a0 = pointer to extended precision source operand # | |
10016 | # # | |
10017 | # OUTPUT ************************************************************** # | |
10018 | # fp0 = default underflow result # | |
10019 | # # | |
10020 | # ALGORITHM *********************************************************** # | |
10021 | # An overflow should occur as the result of transcendental # | |
10022 | # emulation in the 060FPLSP. Create an overflow by using "fmul" # | |
10023 | # and two very lareg numbers of appropriate sign so the operating # | |
10024 | # system can log the event. # | |
10025 | # For t_ovfl_sc() we take special care not to lose the INEX2 bit. # | |
10026 | # # | |
10027 | ######################################################################### | |
10028 | ||
10029 | global t_ovfl_sc | |
10030 | t_ovfl_sc: | |
10031 | ori.l &ovfl_inx_mask,USER_FPSR(%a6) # set OVFL/AOVFL/AINEX | |
10032 | ||
10033 | mov.b %d0,%d1 # fetch rnd prec,mode | |
10034 | andi.b &0xc0,%d1 # extract prec | |
10035 | beq.w ovfl_work | |
10036 | ||
10037 | # dst op is a DENORM. we have to normalize the mantissa to see if the | |
10038 | # result would be inexact for the given precision. make a copy of the | |
10039 | # dst so we don't screw up the version passed to us. | |
10040 | mov.w LOCAL_EX(%a0),FP_SCR0_EX(%a6) | |
10041 | mov.l LOCAL_HI(%a0),FP_SCR0_HI(%a6) | |
10042 | mov.l LOCAL_LO(%a0),FP_SCR0_LO(%a6) | |
10043 | lea FP_SCR0(%a6),%a0 # pass ptr to FP_SCR0 | |
10044 | movm.l &0xc080,-(%sp) # save d0-d1/a0 | |
10045 | bsr.l norm # normalize mantissa | |
10046 | movm.l (%sp)+,&0x0103 # restore d0-d1/a0 | |
10047 | ||
10048 | cmpi.b %d1,&0x40 # is precision sgl? | |
10049 | bne.b ovfl_sc_dbl # no; dbl | |
10050 | ovfl_sc_sgl: | |
10051 | tst.l LOCAL_LO(%a0) # is lo lw of sgl set? | |
10052 | bne.b ovfl_sc_inx # yes | |
10053 | tst.b 3+LOCAL_HI(%a0) # is lo byte of hi lw set? | |
10054 | bne.b ovfl_sc_inx # yes | |
10055 | bra.w ovfl_work # don't set INEX2 | |
10056 | ovfl_sc_dbl: | |
10057 | mov.l LOCAL_LO(%a0),%d1 # are any of lo 11 bits of | |
10058 | andi.l &0x7ff,%d1 # dbl mantissa set? | |
10059 | beq.w ovfl_work # no; don't set INEX2 | |
10060 | ovfl_sc_inx: | |
10061 | ori.l &inex2_mask,USER_FPSR(%a6) # set INEX2 | |
10062 | bra.b ovfl_work # continue | |
10063 | ||
10064 | global t_ovfl | |
10065 | t_ovfl: | |
10066 | ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX | |
10067 | ovfl_work: | |
10068 | tst.b SRC_EX(%a0) | |
10069 | bpl.b ovfl_p | |
10070 | ovfl_m: | |
10071 | fmov.l USER_FPCR(%a6),%fpcr | |
10072 | fmovm.x mns_huge(%pc),&0x80 | |
10073 | fmul.x pls_huge(%pc),%fp0 | |
10074 | ||
10075 | fmov.l %fpsr,%d0 | |
10076 | rol.l &0x8,%d0 | |
10077 | ori.b &neg_mask,%d0 | |
10078 | mov.b %d0,FPSR_CC(%a6) | |
10079 | rts | |
10080 | ovfl_p: | |
10081 | fmov.l USER_FPCR(%a6),%fpcr | |
10082 | fmovm.x pls_huge(%pc),&0x80 | |
10083 | fmul.x pls_huge(%pc),%fp0 | |
10084 | ||
10085 | fmov.l %fpsr,%d0 | |
10086 | rol.l &0x8,%d0 | |
10087 | mov.b %d0,FPSR_CC(%a6) | |
10088 | rts | |
10089 | ||
10090 | global t_ovfl2 | |
10091 | t_ovfl2: | |
10092 | ori.w &ovfinx_mask,FPSR_EXCEPT(%a6) # set OVFL/INEX2/AOVFL/AINEX | |
10093 | fmov.l USER_FPCR(%a6),%fpcr | |
10094 | fmovm.x pls_huge(%pc),&0x80 | |
10095 | fmul.x pls_huge(%pc),%fp0 | |
10096 | ||
10097 | fmov.l %fpsr,%d0 | |
10098 | rol.l &0x8,%d0 | |
10099 | mov.b %d0,FPSR_CC(%a6) | |
10100 | rts | |
10101 | ||
10102 | ######################################################################### | |
10103 | # XDEF **************************************************************** # | |
10104 | # t_catch(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # | |
10105 | # emulation. # | |
10106 | # t_catch2(): Handle 060FPLSP OVFL,UNFL,or INEX2 exception during # | |
10107 | # emulation. # | |
10108 | # # | |
10109 | # These routines are used by the 060FPLSP package. # | |
10110 | # # | |
10111 | # XREF **************************************************************** # | |
10112 | # None. # | |
10113 | # # | |
10114 | # INPUT *************************************************************** # | |
10115 | # fp0 = default underflow or overflow result # | |
10116 | # # | |
10117 | # OUTPUT ************************************************************** # | |
10118 | # fp0 = default result # | |
10119 | # # | |
10120 | # ALGORITHM *********************************************************** # | |
10121 | # If an overflow or underflow occurred during the last # | |
10122 | # instruction of transcendental 060FPLSP emulation, then it has already # | |
10123 | # occurred and has been logged. Now we need to see if an inexact # | |
10124 | # exception should occur. # | |
10125 | # # | |
10126 | ######################################################################### | |
10127 | ||
10128 | global t_catch2 | |
10129 | t_catch2: | |
10130 | fmov.l %fpsr,%d0 | |
10131 | or.l %d0,USER_FPSR(%a6) | |
10132 | bra.b inx2_work | |
10133 | ||
10134 | global t_catch | |
10135 | t_catch: | |
10136 | fmov.l %fpsr,%d0 | |
10137 | or.l %d0,USER_FPSR(%a6) | |
10138 | ||
10139 | ######################################################################### | |
10140 | # XDEF **************************************************************** # | |
10141 | # t_inx2(): Handle inexact 060FPLSP exception during emulation. # | |
10142 | # t_pinx2(): Handle inexact 060FPLSP exception for "+" results. # | |
10143 | # t_minx2(): Handle inexact 060FPLSP exception for "-" results. # | |
10144 | # # | |
10145 | # XREF **************************************************************** # | |
10146 | # None. # | |
10147 | # # | |
10148 | # INPUT *************************************************************** # | |
10149 | # fp0 = default result # | |
10150 | # # | |
10151 | # OUTPUT ************************************************************** # | |
10152 | # fp0 = default result # | |
10153 | # # | |
10154 | # ALGORITHM *********************************************************** # | |
10155 | # The last instruction of transcendental emulation for the # | |
10156 | # 060FPLSP should be inexact. So, if inexact is enabled, then we create # | |
10157 | # the event here by adding a large and very small number together # | |
10158 | # so that the operating system can log the event. # | |
10159 | # Must check, too, if the result was zero, in which case we just # | |
10160 | # set the FPSR bits and return. # | |
10161 | # # | |
10162 | ######################################################################### | |
10163 | ||
10164 | global t_inx2 | |
10165 | t_inx2: | |
10166 | fblt.w t_minx2 | |
10167 | fbeq.w inx2_zero | |
10168 | ||
10169 | global t_pinx2 | |
10170 | t_pinx2: | |
10171 | ori.w &inx2a_mask,FPSR_EXCEPT(%a6) # set INEX2/AINEX | |
10172 | bra.b inx2_work | |
10173 | ||
10174 | global t_minx2 | |
10175 | t_minx2: | |
10176 | ori.l &inx2a_mask+neg_mask,USER_FPSR(%a6) | |
10177 | ||
10178 | inx2_work: | |
10179 | btst &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled? | |
10180 | bne.b inx2_work_ena # yes | |
10181 | rts | |
10182 | inx2_work_ena: | |
10183 | fmov.l USER_FPCR(%a6),%fpcr # insert user's exceptions | |
10184 | fmov.s &0x3f800000,%fp1 # load +1 | |
10185 | fadd.x pls_tiny(%pc),%fp1 # cause exception | |
10186 | rts | |
10187 | ||
10188 | inx2_zero: | |
10189 | mov.b &z_bmask,FPSR_CC(%a6) | |
10190 | ori.w &inx2a_mask,2+USER_FPSR(%a6) # set INEX/AINEX | |
10191 | rts | |
10192 | ||
10193 | ######################################################################### | |
10194 | # XDEF **************************************************************** # | |
10195 | # t_extdnrm(): Handle DENORM inputs in 060FPLSP. # | |
10196 | # t_resdnrm(): Handle DENORM inputs in 060FPLSP for "fscale". # | |
10197 | # # | |
10198 | # This routine is used by the 060FPLSP package. # | |
10199 | # # | |
10200 | # XREF **************************************************************** # | |
10201 | # None. # | |
10202 | # # | |
10203 | # INPUT *************************************************************** # | |
10204 | # a0 = pointer to extended precision input operand # | |
10205 | # # | |
10206 | # OUTPUT ************************************************************** # | |
10207 | # fp0 = default result # | |
10208 | # # | |
10209 | # ALGORITHM *********************************************************** # | |
10210 | # For all functions that have a denormalized input and that # | |
10211 | # f(x)=x, this is the entry point. # | |
10212 | # DENORM value is moved using "fmove" which triggers an exception # | |
10213 | # if enabled so the operating system can log the event. # | |
10214 | # # | |
10215 | ######################################################################### | |
10216 | ||
10217 | global t_extdnrm | |
10218 | t_extdnrm: | |
10219 | fmov.l USER_FPCR(%a6),%fpcr | |
10220 | fmov.x SRC_EX(%a0),%fp0 | |
10221 | fmov.l %fpsr,%d0 | |
10222 | ori.l &unfinx_mask,%d0 | |
10223 | or.l %d0,USER_FPSR(%a6) | |
10224 | rts | |
10225 | ||
10226 | global t_resdnrm | |
10227 | t_resdnrm: | |
10228 | fmov.l USER_FPCR(%a6),%fpcr | |
10229 | fmov.x SRC_EX(%a0),%fp0 | |
10230 | fmov.l %fpsr,%d0 | |
10231 | or.l %d0,USER_FPSR(%a6) | |
10232 | rts | |
10233 | ||
10234 | ########################################## | |
10235 | ||
10236 | # | |
10237 | # sto_cos: | |
10238 | # This is used by fsincos library emulation. The correct | |
10239 | # values are already in fp0 and fp1 so we do nothing here. | |
10240 | # | |
10241 | global sto_cos | |
10242 | sto_cos: | |
10243 | rts | |
10244 | ||
10245 | ########################################## | |
10246 | ||
10247 | # | |
10248 | # dst_qnan --- force result when destination is a NaN | |
10249 | # | |
10250 | global dst_qnan | |
10251 | dst_qnan: | |
10252 | fmov.x DST(%a1),%fp0 | |
10253 | tst.b DST_EX(%a1) | |
10254 | bmi.b dst_qnan_m | |
10255 | dst_qnan_p: | |
10256 | mov.b &nan_bmask,FPSR_CC(%a6) | |
10257 | rts | |
10258 | dst_qnan_m: | |
10259 | mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) | |
10260 | rts | |
10261 | ||
10262 | # | |
10263 | # src_qnan --- force result when source is a NaN | |
10264 | # | |
10265 | global src_qnan | |
10266 | src_qnan: | |
10267 | fmov.x SRC(%a0),%fp0 | |
10268 | tst.b SRC_EX(%a0) | |
10269 | bmi.b src_qnan_m | |
10270 | src_qnan_p: | |
10271 | mov.b &nan_bmask,FPSR_CC(%a6) | |
10272 | rts | |
10273 | src_qnan_m: | |
10274 | mov.b &nan_bmask+neg_bmask,FPSR_CC(%a6) | |
10275 | rts | |
10276 | ||
10277 | ########################################## | |
10278 | ||
10279 | # | |
10280 | # Native instruction support | |
10281 | # | |
10282 | # Some systems may need entry points even for 68060 native | |
10283 | # instructions. These routines are provided for | |
10284 | # convenience. | |
10285 | # | |
10286 | global _fadds_ | |
10287 | _fadds_: | |
10288 | fmov.l %fpcr,-(%sp) # save fpcr | |
10289 | fmov.l &0x00000000,%fpcr # clear fpcr for load | |
10290 | fmov.s 0x8(%sp),%fp0 # load sgl dst | |
10291 | fmov.l (%sp)+,%fpcr # restore fpcr | |
10292 | fadd.s 0x8(%sp),%fp0 # fadd w/ sgl src | |
10293 | rts | |
10294 | ||
10295 | global _faddd_ | |
10296 | _faddd_: | |
10297 | fmov.l %fpcr,-(%sp) # save fpcr | |
10298 | fmov.l &0x00000000,%fpcr # clear fpcr for load | |
10299 | fmov.d 0x8(%sp),%fp0 # load dbl dst | |
10300 | fmov.l (%sp)+,%fpcr # restore fpcr | |
10301 | fadd.d 0xc(%sp),%fp0 # fadd w/ dbl src | |
10302 | rts | |
10303 | ||
10304 | global _faddx_ | |
10305 | _faddx_: | |
10306 | fmovm.x 0x4(%sp),&0x80 # load ext dst | |
10307 | fadd.x 0x10(%sp),%fp0 # fadd w/ ext src | |
10308 | rts | |
10309 | ||
10310 | global _fsubs_ | |
10311 | _fsubs_: | |
10312 | fmov.l %fpcr,-(%sp) # save fpcr | |
10313 | fmov.l &0x00000000,%fpcr # clear fpcr for load | |
10314 | fmov.s 0x8(%sp),%fp0 # load sgl dst | |
10315 | fmov.l (%sp)+,%fpcr # restore fpcr | |
10316 | fsub.s 0x8(%sp),%fp0 # fsub w/ sgl src | |
10317 | rts | |
10318 | ||
10319 | global _fsubd_ | |
10320 | _fsubd_: | |
10321 | fmov.l %fpcr,-(%sp) # save fpcr | |
10322 | fmov.l &0x00000000,%fpcr # clear fpcr for load | |
10323 | fmov.d 0x8(%sp),%fp0 # load dbl dst | |
10324 | fmov.l (%sp)+,%fpcr # restore fpcr | |
10325 | fsub.d 0xc(%sp),%fp0 # fsub w/ dbl src | |
10326 | rts | |
10327 | ||
10328 | global _fsubx_ | |
10329 | _fsubx_: | |
10330 | fmovm.x 0x4(%sp),&0x80 # load ext dst | |
10331 | fsub.x 0x10(%sp),%fp0 # fsub w/ ext src | |
10332 | rts | |
10333 | ||
10334 | global _fmuls_ | |
10335 | _fmuls_: | |
10336 | fmov.l %fpcr,-(%sp) # save fpcr | |
10337 | fmov.l &0x00000000,%fpcr # clear fpcr for load | |
10338 | fmov.s 0x8(%sp),%fp0 # load sgl dst | |
10339 | fmov.l (%sp)+,%fpcr # restore fpcr | |
10340 | fmul.s 0x8(%sp),%fp0 # fmul w/ sgl src | |
10341 | rts | |
10342 | ||
10343 | global _fmuld_ | |
10344 | _fmuld_: | |
10345 | fmov.l %fpcr,-(%sp) # save fpcr | |
10346 | fmov.l &0x00000000,%fpcr # clear fpcr for load | |
10347 | fmov.d 0x8(%sp),%fp0 # load dbl dst | |
10348 | fmov.l (%sp)+,%fpcr # restore fpcr | |
10349 | fmul.d 0xc(%sp),%fp0 # fmul w/ dbl src | |
10350 | rts | |
10351 | ||
10352 | global _fmulx_ | |
10353 | _fmulx_: | |
10354 | fmovm.x 0x4(%sp),&0x80 # load ext dst | |
10355 | fmul.x 0x10(%sp),%fp0 # fmul w/ ext src | |
10356 | rts | |
10357 | ||
10358 | global _fdivs_ | |
10359 | _fdivs_: | |
10360 | fmov.l %fpcr,-(%sp) # save fpcr | |
10361 | fmov.l &0x00000000,%fpcr # clear fpcr for load | |
10362 | fmov.s 0x8(%sp),%fp0 # load sgl dst | |
10363 | fmov.l (%sp)+,%fpcr # restore fpcr | |
10364 | fdiv.s 0x8(%sp),%fp0 # fdiv w/ sgl src | |
10365 | rts | |
10366 | ||
10367 | global _fdivd_ | |
10368 | _fdivd_: | |
10369 | fmov.l %fpcr,-(%sp) # save fpcr | |
10370 | fmov.l &0x00000000,%fpcr # clear fpcr for load | |
10371 | fmov.d 0x8(%sp),%fp0 # load dbl dst | |
10372 | fmov.l (%sp)+,%fpcr # restore fpcr | |
10373 | fdiv.d 0xc(%sp),%fp0 # fdiv w/ dbl src | |
10374 | rts | |
10375 | ||
10376 | global _fdivx_ | |
10377 | _fdivx_: | |
10378 | fmovm.x 0x4(%sp),&0x80 # load ext dst | |
10379 | fdiv.x 0x10(%sp),%fp0 # fdiv w/ ext src | |
10380 | rts | |
10381 | ||
10382 | global _fabss_ | |
10383 | _fabss_: | |
10384 | fabs.s 0x4(%sp),%fp0 # fabs w/ sgl src | |
10385 | rts | |
10386 | ||
10387 | global _fabsd_ | |
10388 | _fabsd_: | |
10389 | fabs.d 0x4(%sp),%fp0 # fabs w/ dbl src | |
10390 | rts | |
10391 | ||
10392 | global _fabsx_ | |
10393 | _fabsx_: | |
10394 | fabs.x 0x4(%sp),%fp0 # fabs w/ ext src | |
10395 | rts | |
10396 | ||
10397 | global _fnegs_ | |
10398 | _fnegs_: | |
10399 | fneg.s 0x4(%sp),%fp0 # fneg w/ sgl src | |
10400 | rts | |
10401 | ||
10402 | global _fnegd_ | |
10403 | _fnegd_: | |
10404 | fneg.d 0x4(%sp),%fp0 # fneg w/ dbl src | |
10405 | rts | |
10406 | ||
10407 | global _fnegx_ | |
10408 | _fnegx_: | |
10409 | fneg.x 0x4(%sp),%fp0 # fneg w/ ext src | |
10410 | rts | |
10411 | ||
10412 | global _fsqrts_ | |
10413 | _fsqrts_: | |
10414 | fsqrt.s 0x4(%sp),%fp0 # fsqrt w/ sgl src | |
10415 | rts | |
10416 | ||
10417 | global _fsqrtd_ | |
10418 | _fsqrtd_: | |
10419 | fsqrt.d 0x4(%sp),%fp0 # fsqrt w/ dbl src | |
10420 | rts | |
10421 | ||
10422 | global _fsqrtx_ | |
10423 | _fsqrtx_: | |
10424 | fsqrt.x 0x4(%sp),%fp0 # fsqrt w/ ext src | |
10425 | rts | |
10426 | ||
10427 | global _fints_ | |
10428 | _fints_: | |
10429 | fint.s 0x4(%sp),%fp0 # fint w/ sgl src | |
10430 | rts | |
10431 | ||
10432 | global _fintd_ | |
10433 | _fintd_: | |
10434 | fint.d 0x4(%sp),%fp0 # fint w/ dbl src | |
10435 | rts | |
10436 | ||
10437 | global _fintx_ | |
10438 | _fintx_: | |
10439 | fint.x 0x4(%sp),%fp0 # fint w/ ext src | |
10440 | rts | |
10441 | ||
10442 | global _fintrzs_ | |
10443 | _fintrzs_: | |
10444 | fintrz.s 0x4(%sp),%fp0 # fintrz w/ sgl src | |
10445 | rts | |
10446 | ||
10447 | global _fintrzd_ | |
10448 | _fintrzd_: | |
10449 | fintrz.d 0x4(%sp),%fp0 # fintrx w/ dbl src | |
10450 | rts | |
10451 | ||
10452 | global _fintrzx_ | |
10453 | _fintrzx_: | |
10454 | fintrz.x 0x4(%sp),%fp0 # fintrz w/ ext src | |
10455 | rts | |
10456 | ||
10457 | ######################################################################## | |
10458 | ||
10459 | ######################################################################### | |
10460 | # src_zero(): Return signed zero according to sign of src operand. # | |
10461 | ######################################################################### | |
10462 | global src_zero | |
10463 | src_zero: | |
10464 | tst.b SRC_EX(%a0) # get sign of src operand | |
10465 | bmi.b ld_mzero # if neg, load neg zero | |
10466 | ||
10467 | # | |
10468 | # ld_pzero(): return a positive zero. | |
10469 | # | |
10470 | global ld_pzero | |
10471 | ld_pzero: | |
10472 | fmov.s &0x00000000,%fp0 # load +0 | |
10473 | mov.b &z_bmask,FPSR_CC(%a6) # set 'Z' ccode bit | |
10474 | rts | |
10475 | ||
10476 | # ld_mzero(): return a negative zero. | |
10477 | global ld_mzero | |
10478 | ld_mzero: | |
10479 | fmov.s &0x80000000,%fp0 # load -0 | |
10480 | mov.b &neg_bmask+z_bmask,FPSR_CC(%a6) # set 'N','Z' ccode bits | |
10481 | rts | |
10482 | ||
10483 | ######################################################################### | |
10484 | # dst_zero(): Return signed zero according to sign of dst operand. # | |
10485 | ######################################################################### | |
10486 | global dst_zero | |
10487 | dst_zero: | |
10488 | tst.b DST_EX(%a1) # get sign of dst operand | |
10489 | bmi.b ld_mzero # if neg, load neg zero | |
10490 | bra.b ld_pzero # load positive zero | |
10491 | ||
10492 | ######################################################################### | |
10493 | # src_inf(): Return signed inf according to sign of src operand. # | |
10494 | ######################################################################### | |
10495 | global src_inf | |
10496 | src_inf: | |
10497 | tst.b SRC_EX(%a0) # get sign of src operand | |
10498 | bmi.b ld_minf # if negative branch | |
10499 | ||
10500 | # | |
10501 | # ld_pinf(): return a positive infinity. | |
10502 | # | |
10503 | global ld_pinf | |
10504 | ld_pinf: | |
10505 | fmov.s &0x7f800000,%fp0 # load +INF | |
10506 | mov.b &inf_bmask,FPSR_CC(%a6) # set 'INF' ccode bit | |
10507 | rts | |
10508 | ||
10509 | # | |
10510 | # ld_minf():return a negative infinity. | |
10511 | # | |
10512 | global ld_minf | |
10513 | ld_minf: | |
10514 | fmov.s &0xff800000,%fp0 # load -INF | |
10515 | mov.b &neg_bmask+inf_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits | |
10516 | rts | |
10517 | ||
10518 | ######################################################################### | |
10519 | # dst_inf(): Return signed inf according to sign of dst operand. # | |
10520 | ######################################################################### | |
10521 | global dst_inf | |
10522 | dst_inf: | |
10523 | tst.b DST_EX(%a1) # get sign of dst operand | |
10524 | bmi.b ld_minf # if negative branch | |
10525 | bra.b ld_pinf | |
10526 | ||
10527 | global szr_inf | |
10528 | ################################################################# | |
10529 | # szr_inf(): Return +ZERO for a negative src operand or # | |
10530 | # +INF for a positive src operand. # | |
10531 | # Routine used for fetox, ftwotox, and ftentox. # | |
10532 | ################################################################# | |
10533 | szr_inf: | |
10534 | tst.b SRC_EX(%a0) # check sign of source | |
10535 | bmi.b ld_pzero | |
10536 | bra.b ld_pinf | |
10537 | ||
10538 | ######################################################################### | |
10539 | # sopr_inf(): Return +INF for a positive src operand or # | |
10540 | # jump to operand error routine for a negative src operand. # | |
10541 | # Routine used for flogn, flognp1, flog10, and flog2. # | |
10542 | ######################################################################### | |
10543 | global sopr_inf | |
10544 | sopr_inf: | |
10545 | tst.b SRC_EX(%a0) # check sign of source | |
10546 | bmi.w t_operr | |
10547 | bra.b ld_pinf | |
10548 | ||
10549 | ################################################################# | |
10550 | # setoxm1i(): Return minus one for a negative src operand or # | |
10551 | # positive infinity for a positive src operand. # | |
10552 | # Routine used for fetoxm1. # | |
10553 | ################################################################# | |
10554 | global setoxm1i | |
10555 | setoxm1i: | |
10556 | tst.b SRC_EX(%a0) # check sign of source | |
10557 | bmi.b ld_mone | |
10558 | bra.b ld_pinf | |
10559 | ||
10560 | ######################################################################### | |
10561 | # src_one(): Return signed one according to sign of src operand. # | |
10562 | ######################################################################### | |
10563 | global src_one | |
10564 | src_one: | |
10565 | tst.b SRC_EX(%a0) # check sign of source | |
10566 | bmi.b ld_mone | |
10567 | ||
10568 | # | |
10569 | # ld_pone(): return positive one. | |
10570 | # | |
10571 | global ld_pone | |
10572 | ld_pone: | |
10573 | fmov.s &0x3f800000,%fp0 # load +1 | |
10574 | clr.b FPSR_CC(%a6) | |
10575 | rts | |
10576 | ||
10577 | # | |
10578 | # ld_mone(): return negative one. | |
10579 | # | |
10580 | global ld_mone | |
10581 | ld_mone: | |
10582 | fmov.s &0xbf800000,%fp0 # load -1 | |
10583 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit | |
10584 | rts | |
10585 | ||
10586 | ppiby2: long 0x3fff0000, 0xc90fdaa2, 0x2168c235 | |
10587 | mpiby2: long 0xbfff0000, 0xc90fdaa2, 0x2168c235 | |
10588 | ||
10589 | ################################################################# | |
10590 | # spi_2(): Return signed PI/2 according to sign of src operand. # | |
10591 | ################################################################# | |
10592 | global spi_2 | |
10593 | spi_2: | |
10594 | tst.b SRC_EX(%a0) # check sign of source | |
10595 | bmi.b ld_mpi2 | |
10596 | ||
10597 | # | |
10598 | # ld_ppi2(): return positive PI/2. | |
10599 | # | |
10600 | global ld_ppi2 | |
10601 | ld_ppi2: | |
10602 | fmov.l %d0,%fpcr | |
10603 | fmov.x ppiby2(%pc),%fp0 # load +pi/2 | |
10604 | bra.w t_pinx2 # set INEX2 | |
10605 | ||
10606 | # | |
10607 | # ld_mpi2(): return negative PI/2. | |
10608 | # | |
10609 | global ld_mpi2 | |
10610 | ld_mpi2: | |
10611 | fmov.l %d0,%fpcr | |
10612 | fmov.x mpiby2(%pc),%fp0 # load -pi/2 | |
10613 | bra.w t_minx2 # set INEX2 | |
10614 | ||
10615 | #################################################### | |
10616 | # The following routines give support for fsincos. # | |
10617 | #################################################### | |
10618 | ||
10619 | # | |
10620 | # ssincosz(): When the src operand is ZERO, store a one in the | |
10621 | # cosine register and return a ZERO in fp0 w/ the same sign | |
10622 | # as the src operand. | |
10623 | # | |
10624 | global ssincosz | |
10625 | ssincosz: | |
10626 | fmov.s &0x3f800000,%fp1 | |
10627 | tst.b SRC_EX(%a0) # test sign | |
10628 | bpl.b sincoszp | |
10629 | fmov.s &0x80000000,%fp0 # return sin result in fp0 | |
10630 | mov.b &z_bmask+neg_bmask,FPSR_CC(%a6) | |
10631 | rts | |
10632 | sincoszp: | |
10633 | fmov.s &0x00000000,%fp0 # return sin result in fp0 | |
10634 | mov.b &z_bmask,FPSR_CC(%a6) | |
10635 | rts | |
10636 | ||
10637 | # | |
10638 | # ssincosi(): When the src operand is INF, store a QNAN in the cosine | |
10639 | # register and jump to the operand error routine for negative | |
10640 | # src operands. | |
10641 | # | |
10642 | global ssincosi | |
10643 | ssincosi: | |
10644 | fmov.x qnan(%pc),%fp1 # load NAN | |
10645 | bra.w t_operr | |
10646 | ||
10647 | # | |
10648 | # ssincosqnan(): When the src operand is a QNAN, store the QNAN in the cosine | |
10649 | # register and branch to the src QNAN routine. | |
10650 | # | |
10651 | global ssincosqnan | |
10652 | ssincosqnan: | |
10653 | fmov.x LOCAL_EX(%a0),%fp1 | |
10654 | bra.w src_qnan | |
10655 | ||
10656 | ######################################################################## | |
10657 | ||
10658 | global smod_sdnrm | |
10659 | global smod_snorm | |
10660 | smod_sdnrm: | |
10661 | smod_snorm: | |
10662 | mov.b DTAG(%a6),%d1 | |
10663 | beq.l smod | |
10664 | cmpi.b %d1,&ZERO | |
10665 | beq.w smod_zro | |
10666 | cmpi.b %d1,&INF | |
10667 | beq.l t_operr | |
10668 | cmpi.b %d1,&DENORM | |
10669 | beq.l smod | |
10670 | bra.l dst_qnan | |
10671 | ||
10672 | global smod_szero | |
10673 | smod_szero: | |
10674 | mov.b DTAG(%a6),%d1 | |
10675 | beq.l t_operr | |
10676 | cmpi.b %d1,&ZERO | |
10677 | beq.l t_operr | |
10678 | cmpi.b %d1,&INF | |
10679 | beq.l t_operr | |
10680 | cmpi.b %d1,&DENORM | |
10681 | beq.l t_operr | |
10682 | bra.l dst_qnan | |
10683 | ||
10684 | global smod_sinf | |
10685 | smod_sinf: | |
10686 | mov.b DTAG(%a6),%d1 | |
10687 | beq.l smod_fpn | |
10688 | cmpi.b %d1,&ZERO | |
10689 | beq.l smod_zro | |
10690 | cmpi.b %d1,&INF | |
10691 | beq.l t_operr | |
10692 | cmpi.b %d1,&DENORM | |
10693 | beq.l smod_fpn | |
10694 | bra.l dst_qnan | |
10695 | ||
10696 | smod_zro: | |
10697 | srem_zro: | |
10698 | mov.b SRC_EX(%a0),%d1 # get src sign | |
10699 | mov.b DST_EX(%a1),%d0 # get dst sign | |
10700 | eor.b %d0,%d1 # get qbyte sign | |
10701 | andi.b &0x80,%d1 | |
10702 | mov.b %d1,FPSR_QBYTE(%a6) | |
10703 | tst.b %d0 | |
10704 | bpl.w ld_pzero | |
10705 | bra.w ld_mzero | |
10706 | ||
10707 | smod_fpn: | |
10708 | srem_fpn: | |
10709 | clr.b FPSR_QBYTE(%a6) | |
10710 | mov.l %d0,-(%sp) | |
10711 | mov.b SRC_EX(%a0),%d1 # get src sign | |
10712 | mov.b DST_EX(%a1),%d0 # get dst sign | |
10713 | eor.b %d0,%d1 # get qbyte sign | |
10714 | andi.b &0x80,%d1 | |
10715 | mov.b %d1,FPSR_QBYTE(%a6) | |
10716 | cmpi.b DTAG(%a6),&DENORM | |
10717 | bne.b smod_nrm | |
10718 | lea DST(%a1),%a0 | |
10719 | mov.l (%sp)+,%d0 | |
10720 | bra t_resdnrm | |
10721 | smod_nrm: | |
10722 | fmov.l (%sp)+,%fpcr | |
10723 | fmov.x DST(%a1),%fp0 | |
10724 | tst.b DST_EX(%a1) | |
10725 | bmi.b smod_nrm_neg | |
10726 | rts | |
10727 | ||
10728 | smod_nrm_neg: | |
10729 | mov.b &neg_bmask,FPSR_CC(%a6) # set 'N' code | |
10730 | rts | |
10731 | ||
10732 | ######################################################################### | |
10733 | global srem_snorm | |
10734 | global srem_sdnrm | |
10735 | srem_sdnrm: | |
10736 | srem_snorm: | |
10737 | mov.b DTAG(%a6),%d1 | |
10738 | beq.l srem | |
10739 | cmpi.b %d1,&ZERO | |
10740 | beq.w srem_zro | |
10741 | cmpi.b %d1,&INF | |
10742 | beq.l t_operr | |
10743 | cmpi.b %d1,&DENORM | |
10744 | beq.l srem | |
10745 | bra.l dst_qnan | |
10746 | ||
10747 | global srem_szero | |
10748 | srem_szero: | |
10749 | mov.b DTAG(%a6),%d1 | |
10750 | beq.l t_operr | |
10751 | cmpi.b %d1,&ZERO | |
10752 | beq.l t_operr | |
10753 | cmpi.b %d1,&INF | |
10754 | beq.l t_operr | |
10755 | cmpi.b %d1,&DENORM | |
10756 | beq.l t_operr | |
10757 | bra.l dst_qnan | |
10758 | ||
10759 | global srem_sinf | |
10760 | srem_sinf: | |
10761 | mov.b DTAG(%a6),%d1 | |
10762 | beq.w srem_fpn | |
10763 | cmpi.b %d1,&ZERO | |
10764 | beq.w srem_zro | |
10765 | cmpi.b %d1,&INF | |
10766 | beq.l t_operr | |
10767 | cmpi.b %d1,&DENORM | |
10768 | beq.l srem_fpn | |
10769 | bra.l dst_qnan | |
10770 | ||
10771 | ######################################################################### | |
10772 | ||
10773 | global sscale_snorm | |
10774 | global sscale_sdnrm | |
10775 | sscale_snorm: | |
10776 | sscale_sdnrm: | |
10777 | mov.b DTAG(%a6),%d1 | |
10778 | beq.l sscale | |
10779 | cmpi.b %d1,&ZERO | |
10780 | beq.l dst_zero | |
10781 | cmpi.b %d1,&INF | |
10782 | beq.l dst_inf | |
10783 | cmpi.b %d1,&DENORM | |
10784 | beq.l sscale | |
10785 | bra.l dst_qnan | |
10786 | ||
10787 | global sscale_szero | |
10788 | sscale_szero: | |
10789 | mov.b DTAG(%a6),%d1 | |
10790 | beq.l sscale | |
10791 | cmpi.b %d1,&ZERO | |
10792 | beq.l dst_zero | |
10793 | cmpi.b %d1,&INF | |
10794 | beq.l dst_inf | |
10795 | cmpi.b %d1,&DENORM | |
10796 | beq.l sscale | |
10797 | bra.l dst_qnan | |
10798 | ||
10799 | global sscale_sinf | |
10800 | sscale_sinf: | |
10801 | mov.b DTAG(%a6),%d1 | |
10802 | beq.l t_operr | |
10803 | cmpi.b %d1,&QNAN | |
10804 | beq.l dst_qnan | |
10805 | bra.l t_operr | |
10806 | ||
10807 | ######################################################################## | |
10808 | ||
10809 | global sop_sqnan | |
10810 | sop_sqnan: | |
10811 | mov.b DTAG(%a6),%d1 | |
10812 | cmpi.b %d1,&QNAN | |
10813 | beq.l dst_qnan | |
10814 | bra.l src_qnan | |
10815 | ||
10816 | ######################################################################### | |
10817 | # norm(): normalize the mantissa of an extended precision input. the # | |
10818 | # input operand should not be normalized already. # | |
10819 | # # | |
10820 | # XDEF **************************************************************** # | |
10821 | # norm() # | |
10822 | # # | |
10823 | # XREF **************************************************************** # | |
10824 | # none # | |
10825 | # # | |
10826 | # INPUT *************************************************************** # | |
10827 | # a0 = pointer fp extended precision operand to normalize # | |
10828 | # # | |
10829 | # OUTPUT ************************************************************** # | |
10830 | # d0 = number of bit positions the mantissa was shifted # | |
10831 | # a0 = the input operand's mantissa is normalized; the exponent # | |
10832 | # is unchanged. # | |
10833 | # # | |
10834 | ######################################################################### | |
10835 | global norm | |
10836 | norm: | |
10837 | mov.l %d2, -(%sp) # create some temp regs | |
10838 | mov.l %d3, -(%sp) | |
10839 | ||
10840 | mov.l FTEMP_HI(%a0), %d0 # load hi(mantissa) | |
10841 | mov.l FTEMP_LO(%a0), %d1 # load lo(mantissa) | |
10842 | ||
10843 | bfffo %d0{&0:&32}, %d2 # how many places to shift? | |
10844 | beq.b norm_lo # hi(man) is all zeroes! | |
10845 | ||
10846 | norm_hi: | |
10847 | lsl.l %d2, %d0 # left shift hi(man) | |
10848 | bfextu %d1{&0:%d2}, %d3 # extract lo bits | |
10849 | ||
10850 | or.l %d3, %d0 # create hi(man) | |
10851 | lsl.l %d2, %d1 # create lo(man) | |
10852 | ||
10853 | mov.l %d0, FTEMP_HI(%a0) # store new hi(man) | |
10854 | mov.l %d1, FTEMP_LO(%a0) # store new lo(man) | |
10855 | ||
10856 | mov.l %d2, %d0 # return shift amount | |
10857 | ||
10858 | mov.l (%sp)+, %d3 # restore temp regs | |
10859 | mov.l (%sp)+, %d2 | |
10860 | ||
10861 | rts | |
10862 | ||
10863 | norm_lo: | |
10864 | bfffo %d1{&0:&32}, %d2 # how many places to shift? | |
10865 | lsl.l %d2, %d1 # shift lo(man) | |
10866 | add.l &32, %d2 # add 32 to shft amount | |
10867 | ||
10868 | mov.l %d1, FTEMP_HI(%a0) # store hi(man) | |
10869 | clr.l FTEMP_LO(%a0) # lo(man) is now zero | |
10870 | ||
10871 | mov.l %d2, %d0 # return shift amount | |
10872 | ||
10873 | mov.l (%sp)+, %d3 # restore temp regs | |
10874 | mov.l (%sp)+, %d2 | |
10875 | ||
10876 | rts | |
10877 | ||
10878 | ######################################################################### | |
10879 | # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO # | |
10880 | # - returns corresponding optype tag # | |
10881 | # # | |
10882 | # XDEF **************************************************************** # | |
10883 | # unnorm_fix() # | |
10884 | # # | |
10885 | # XREF **************************************************************** # | |
10886 | # norm() - normalize the mantissa # | |
10887 | # # | |
10888 | # INPUT *************************************************************** # | |
10889 | # a0 = pointer to unnormalized extended precision number # | |
10890 | # # | |
10891 | # OUTPUT ************************************************************** # | |
10892 | # d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO # | |
10893 | # a0 = input operand has been converted to a norm, denorm, or # | |
10894 | # zero; both the exponent and mantissa are changed. # | |
10895 | # # | |
10896 | ######################################################################### | |
10897 | ||
10898 | global unnorm_fix | |
10899 | unnorm_fix: | |
10900 | bfffo FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed? | |
10901 | bne.b unnorm_shift # hi(man) is not all zeroes | |
10902 | ||
10903 | # | |
10904 | # hi(man) is all zeroes so see if any bits in lo(man) are set | |
10905 | # | |
10906 | unnorm_chk_lo: | |
10907 | bfffo FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero? | |
10908 | beq.w unnorm_zero # yes | |
10909 | ||
10910 | add.w &32, %d0 # no; fix shift distance | |
10911 | ||
10912 | # | |
10913 | # d0 = # shifts needed for complete normalization | |
10914 | # | |
10915 | unnorm_shift: | |
10916 | clr.l %d1 # clear top word | |
10917 | mov.w FTEMP_EX(%a0), %d1 # extract exponent | |
10918 | and.w &0x7fff, %d1 # strip off sgn | |
10919 | ||
10920 | cmp.w %d0, %d1 # will denorm push exp < 0? | |
10921 | bgt.b unnorm_nrm_zero # yes; denorm only until exp = 0 | |
10922 | ||
10923 | # | |
10924 | # exponent would not go < 0. therefore, number stays normalized | |
10925 | # | |
10926 | sub.w %d0, %d1 # shift exponent value | |
10927 | mov.w FTEMP_EX(%a0), %d0 # load old exponent | |
10928 | and.w &0x8000, %d0 # save old sign | |
10929 | or.w %d0, %d1 # {sgn,new exp} | |
10930 | mov.w %d1, FTEMP_EX(%a0) # insert new exponent | |
10931 | ||
10932 | bsr.l norm # normalize UNNORM | |
10933 | ||
10934 | mov.b &NORM, %d0 # return new optype tag | |
10935 | rts | |
10936 | ||
10937 | # | |
10938 | # exponent would go < 0, so only denormalize until exp = 0 | |
10939 | # | |
10940 | unnorm_nrm_zero: | |
10941 | cmp.b %d1, &32 # is exp <= 32? | |
10942 | bgt.b unnorm_nrm_zero_lrg # no; go handle large exponent | |
10943 | ||
10944 | bfextu FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man) | |
10945 | mov.l %d0, FTEMP_HI(%a0) # save new hi(man) | |
10946 | ||
10947 | mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) | |
10948 | lsl.l %d1, %d0 # extract new lo(man) | |
10949 | mov.l %d0, FTEMP_LO(%a0) # save new lo(man) | |
10950 | ||
10951 | and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 | |
10952 | ||
10953 | mov.b &DENORM, %d0 # return new optype tag | |
10954 | rts | |
10955 | ||
10956 | # | |
10957 | # only mantissa bits set are in lo(man) | |
10958 | # | |
10959 | unnorm_nrm_zero_lrg: | |
10960 | sub.w &32, %d1 # adjust shft amt by 32 | |
10961 | ||
10962 | mov.l FTEMP_LO(%a0), %d0 # fetch old lo(man) | |
10963 | lsl.l %d1, %d0 # left shift lo(man) | |
10964 | ||
10965 | mov.l %d0, FTEMP_HI(%a0) # store new hi(man) | |
10966 | clr.l FTEMP_LO(%a0) # lo(man) = 0 | |
10967 | ||
10968 | and.w &0x8000, FTEMP_EX(%a0) # set exp = 0 | |
10969 | ||
10970 | mov.b &DENORM, %d0 # return new optype tag | |
10971 | rts | |
10972 | ||
10973 | # | |
10974 | # whole mantissa is zero so this UNNORM is actually a zero | |
10975 | # | |
10976 | unnorm_zero: | |
10977 | and.w &0x8000, FTEMP_EX(%a0) # force exponent to zero | |
10978 | ||
10979 | mov.b &ZERO, %d0 # fix optype tag | |
10980 | rts |