Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
1da177e4 LT |
2 | .file "wm_sqrt.S" |
3 | /*---------------------------------------------------------------------------+ | |
4 | | wm_sqrt.S | | |
5 | | | | |
6 | | Fixed point arithmetic square root evaluation. | | |
7 | | | | |
8 | | Copyright (C) 1992,1993,1995,1997 | | |
9 | | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | | |
10 | | Australia. E-mail billm@suburbia.net | | |
11 | | | | |
12 | | Call from C as: | | |
13 | | int wm_sqrt(FPU_REG *n, unsigned int control_word) | | |
14 | | | | |
15 | +---------------------------------------------------------------------------*/ | |
16 | ||
17 | /*---------------------------------------------------------------------------+ | |
18 | | wm_sqrt(FPU_REG *n, unsigned int control_word) | | |
19 | | returns the square root of n in n. | | |
20 | | | | |
21 | | Use Newton's method to compute the square root of a number, which must | | |
22 | | be in the range [1.0 .. 4.0), to 64 bits accuracy. | | |
23 | | Does not check the sign or tag of the argument. | | |
24 | | Sets the exponent, but not the sign or tag of the result. | | |
25 | | | | |
26 | | The guess is kept in %esi:%edi | | |
27 | +---------------------------------------------------------------------------*/ | |
28 | ||
29 | #include "exception.h" | |
30 | #include "fpu_emu.h" | |
31 | ||
32 | ||
33 | #ifndef NON_REENTRANT_FPU | |
34 | /* Local storage on the stack: */ | |
35 | #define FPU_accum_3 -4(%ebp) /* ms word */ | |
36 | #define FPU_accum_2 -8(%ebp) | |
37 | #define FPU_accum_1 -12(%ebp) | |
38 | #define FPU_accum_0 -16(%ebp) | |
39 | ||
40 | /* | |
41 | * The de-normalised argument: | |
42 | * sq_2 sq_1 sq_0 | |
43 | * b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 | |
44 | * ^ binary point here | |
45 | */ | |
46 | #define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */ | |
47 | #define FPU_fsqrt_arg_1 -24(%ebp) | |
48 | #define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */ | |
49 | ||
50 | #else | |
51 | /* Local storage in a static area: */ | |
52 | .data | |
53 | .align 4,0 | |
54 | FPU_accum_3: | |
55 | .long 0 /* ms word */ | |
56 | FPU_accum_2: | |
57 | .long 0 | |
58 | FPU_accum_1: | |
59 | .long 0 | |
60 | FPU_accum_0: | |
61 | .long 0 | |
62 | ||
63 | /* The de-normalised argument: | |
64 | sq_2 sq_1 sq_0 | |
65 | b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0 | |
66 | ^ binary point here | |
67 | */ | |
68 | FPU_fsqrt_arg_2: | |
69 | .long 0 /* ms word */ | |
70 | FPU_fsqrt_arg_1: | |
71 | .long 0 | |
72 | FPU_fsqrt_arg_0: | |
73 | .long 0 /* ls word, at most the ms bit is set */ | |
74 | #endif /* NON_REENTRANT_FPU */ | |
75 | ||
76 | ||
77 | .text | |
78 | ENTRY(wm_sqrt) | |
79 | pushl %ebp | |
80 | movl %esp,%ebp | |
81 | #ifndef NON_REENTRANT_FPU | |
82 | subl $28,%esp | |
83 | #endif /* NON_REENTRANT_FPU */ | |
84 | pushl %esi | |
85 | pushl %edi | |
86 | pushl %ebx | |
87 | ||
88 | movl PARAM1,%esi | |
89 | ||
90 | movl SIGH(%esi),%eax | |
91 | movl SIGL(%esi),%ecx | |
92 | xorl %edx,%edx | |
93 | ||
94 | /* We use a rough linear estimate for the first guess.. */ | |
95 | ||
96 | cmpw EXP_BIAS,EXP(%esi) | |
97 | jnz sqrt_arg_ge_2 | |
98 | ||
99 | shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */ | |
100 | rcrl $1,%ecx | |
101 | rcrl $1,%edx | |
102 | ||
103 | sqrt_arg_ge_2: | |
104 | /* From here on, n is never accessed directly again until it is | |
105 | replaced by the answer. */ | |
106 | ||
107 | movl %eax,FPU_fsqrt_arg_2 /* ms word of n */ | |
108 | movl %ecx,FPU_fsqrt_arg_1 | |
109 | movl %edx,FPU_fsqrt_arg_0 | |
110 | ||
111 | /* Make a linear first estimate */ | |
112 | shrl $1,%eax | |
113 | addl $0x40000000,%eax | |
114 | movl $0xaaaaaaaa,%ecx | |
115 | mull %ecx | |
116 | shll %edx /* max result was 7fff... */ | |
117 | testl $0x80000000,%edx /* but min was 3fff... */ | |
118 | jnz sqrt_prelim_no_adjust | |
119 | ||
120 | movl $0x80000000,%edx /* round up */ | |
121 | ||
122 | sqrt_prelim_no_adjust: | |
123 | movl %edx,%esi /* Our first guess */ | |
124 | ||
125 | /* We have now computed (approx) (2 + x) / 3, which forms the basis | |
126 | for a few iterations of Newton's method */ | |
127 | ||
128 | movl FPU_fsqrt_arg_2,%ecx /* ms word */ | |
129 | ||
130 | /* | |
131 | * From our initial estimate, three iterations are enough to get us | |
132 | * to 30 bits or so. This will then allow two iterations at better | |
133 | * precision to complete the process. | |
134 | */ | |
135 | ||
136 | /* Compute (g + n/g)/2 at each iteration (g is the guess). */ | |
137 | shrl %ecx /* Doing this first will prevent a divide */ | |
138 | /* overflow later. */ | |
139 | ||
140 | movl %ecx,%edx /* msw of the arg / 2 */ | |
141 | divl %esi /* current estimate */ | |
142 | shrl %esi /* divide by 2 */ | |
143 | addl %eax,%esi /* the new estimate */ | |
144 | ||
145 | movl %ecx,%edx | |
146 | divl %esi | |
147 | shrl %esi | |
148 | addl %eax,%esi | |
149 | ||
150 | movl %ecx,%edx | |
151 | divl %esi | |
152 | shrl %esi | |
153 | addl %eax,%esi | |
154 | ||
155 | /* | |
156 | * Now that an estimate accurate to about 30 bits has been obtained (in %esi), | |
157 | * we improve it to 60 bits or so. | |
158 | * | |
159 | * The strategy from now on is to compute new estimates from | |
160 | * guess := guess + (n - guess^2) / (2 * guess) | |
161 | */ | |
162 | ||
163 | /* First, find the square of the guess */ | |
164 | movl %esi,%eax | |
165 | mull %esi | |
166 | /* guess^2 now in %edx:%eax */ | |
167 | ||
168 | movl FPU_fsqrt_arg_1,%ecx | |
169 | subl %ecx,%eax | |
170 | movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */ | |
171 | sbbl %ecx,%edx | |
172 | jnc sqrt_stage_2_positive | |
173 | ||
174 | /* Subtraction gives a negative result, | |
175 | negate the result before division. */ | |
176 | notl %edx | |
177 | notl %eax | |
178 | addl $1,%eax | |
179 | adcl $0,%edx | |
180 | ||
181 | divl %esi | |
182 | movl %eax,%ecx | |
183 | ||
184 | movl %edx,%eax | |
185 | divl %esi | |
186 | jmp sqrt_stage_2_finish | |
187 | ||
188 | sqrt_stage_2_positive: | |
189 | divl %esi | |
190 | movl %eax,%ecx | |
191 | ||
192 | movl %edx,%eax | |
193 | divl %esi | |
194 | ||
195 | notl %ecx | |
196 | notl %eax | |
197 | addl $1,%eax | |
198 | adcl $0,%ecx | |
199 | ||
200 | sqrt_stage_2_finish: | |
201 | sarl $1,%ecx /* divide by 2 */ | |
202 | rcrl $1,%eax | |
203 | ||
204 | /* Form the new estimate in %esi:%edi */ | |
205 | movl %eax,%edi | |
206 | addl %ecx,%esi | |
207 | ||
208 | jnz sqrt_stage_2_done /* result should be [1..2) */ | |
209 | ||
210 | #ifdef PARANOID | |
211 | /* It should be possible to get here only if the arg is ffff....ffff */ | |
212 | cmp $0xffffffff,FPU_fsqrt_arg_1 | |
213 | jnz sqrt_stage_2_error | |
214 | #endif /* PARANOID */ | |
215 | ||
216 | /* The best rounded result. */ | |
217 | xorl %eax,%eax | |
218 | decl %eax | |
219 | movl %eax,%edi | |
220 | movl %eax,%esi | |
221 | movl $0x7fffffff,%eax | |
222 | jmp sqrt_round_result | |
223 | ||
224 | #ifdef PARANOID | |
225 | sqrt_stage_2_error: | |
226 | pushl EX_INTERNAL|0x213 | |
227 | call EXCEPTION | |
228 | #endif /* PARANOID */ | |
229 | ||
230 | sqrt_stage_2_done: | |
231 | ||
232 | /* Now the square root has been computed to better than 60 bits. */ | |
233 | ||
234 | /* Find the square of the guess. */ | |
235 | movl %edi,%eax /* ls word of guess */ | |
236 | mull %edi | |
237 | movl %edx,FPU_accum_1 | |
238 | ||
239 | movl %esi,%eax | |
240 | mull %esi | |
241 | movl %edx,FPU_accum_3 | |
242 | movl %eax,FPU_accum_2 | |
243 | ||
244 | movl %edi,%eax | |
245 | mull %esi | |
246 | addl %eax,FPU_accum_1 | |
247 | adcl %edx,FPU_accum_2 | |
248 | adcl $0,FPU_accum_3 | |
249 | ||
250 | /* movl %esi,%eax */ | |
251 | /* mull %edi */ | |
252 | addl %eax,FPU_accum_1 | |
253 | adcl %edx,FPU_accum_2 | |
254 | adcl $0,FPU_accum_3 | |
255 | ||
256 | /* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */ | |
257 | ||
258 | movl FPU_fsqrt_arg_0,%eax /* get normalized n */ | |
259 | subl %eax,FPU_accum_1 | |
260 | movl FPU_fsqrt_arg_1,%eax | |
261 | sbbl %eax,FPU_accum_2 | |
262 | movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */ | |
263 | sbbl %eax,FPU_accum_3 | |
264 | jnc sqrt_stage_3_positive | |
265 | ||
266 | /* Subtraction gives a negative result, | |
267 | negate the result before division */ | |
268 | notl FPU_accum_1 | |
269 | notl FPU_accum_2 | |
270 | notl FPU_accum_3 | |
271 | addl $1,FPU_accum_1 | |
272 | adcl $0,FPU_accum_2 | |
273 | ||
274 | #ifdef PARANOID | |
275 | adcl $0,FPU_accum_3 /* This must be zero */ | |
276 | jz sqrt_stage_3_no_error | |
277 | ||
278 | sqrt_stage_3_error: | |
279 | pushl EX_INTERNAL|0x207 | |
280 | call EXCEPTION | |
281 | ||
282 | sqrt_stage_3_no_error: | |
283 | #endif /* PARANOID */ | |
284 | ||
285 | movl FPU_accum_2,%edx | |
286 | movl FPU_accum_1,%eax | |
287 | divl %esi | |
288 | movl %eax,%ecx | |
289 | ||
290 | movl %edx,%eax | |
291 | divl %esi | |
292 | ||
293 | sarl $1,%ecx /* divide by 2 */ | |
294 | rcrl $1,%eax | |
295 | ||
296 | /* prepare to round the result */ | |
297 | ||
298 | addl %ecx,%edi | |
299 | adcl $0,%esi | |
300 | ||
301 | jmp sqrt_stage_3_finished | |
302 | ||
303 | sqrt_stage_3_positive: | |
304 | movl FPU_accum_2,%edx | |
305 | movl FPU_accum_1,%eax | |
306 | divl %esi | |
307 | movl %eax,%ecx | |
308 | ||
309 | movl %edx,%eax | |
310 | divl %esi | |
311 | ||
312 | sarl $1,%ecx /* divide by 2 */ | |
313 | rcrl $1,%eax | |
314 | ||
315 | /* prepare to round the result */ | |
316 | ||
317 | notl %eax /* Negate the correction term */ | |
318 | notl %ecx | |
319 | addl $1,%eax | |
320 | adcl $0,%ecx /* carry here ==> correction == 0 */ | |
321 | adcl $0xffffffff,%esi | |
322 | ||
323 | addl %ecx,%edi | |
324 | adcl $0,%esi | |
325 | ||
326 | sqrt_stage_3_finished: | |
327 | ||
328 | /* | |
329 | * The result in %esi:%edi:%esi should be good to about 90 bits here, | |
330 | * and the rounding information here does not have sufficient accuracy | |
331 | * in a few rare cases. | |
332 | */ | |
333 | cmpl $0xffffffe0,%eax | |
334 | ja sqrt_near_exact_x | |
335 | ||
336 | cmpl $0x00000020,%eax | |
337 | jb sqrt_near_exact | |
338 | ||
339 | cmpl $0x7fffffe0,%eax | |
340 | jb sqrt_round_result | |
341 | ||
342 | cmpl $0x80000020,%eax | |
343 | jb sqrt_get_more_precision | |
344 | ||
345 | sqrt_round_result: | |
346 | /* Set up for rounding operations */ | |
347 | movl %eax,%edx | |
348 | movl %esi,%eax | |
349 | movl %edi,%ebx | |
350 | movl PARAM1,%edi | |
351 | movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */ | |
352 | jmp fpu_reg_round | |
353 | ||
354 | ||
355 | sqrt_near_exact_x: | |
356 | /* First, the estimate must be rounded up. */ | |
357 | addl $1,%edi | |
358 | adcl $0,%esi | |
359 | ||
360 | sqrt_near_exact: | |
361 | /* | |
362 | * This is an easy case because x^1/2 is monotonic. | |
363 | * We need just find the square of our estimate, compare it | |
364 | * with the argument, and deduce whether our estimate is | |
365 | * above, below, or exact. We use the fact that the estimate | |
366 | * is known to be accurate to about 90 bits. | |
367 | */ | |
368 | movl %edi,%eax /* ls word of guess */ | |
369 | mull %edi | |
370 | movl %edx,%ebx /* 2nd ls word of square */ | |
371 | movl %eax,%ecx /* ls word of square */ | |
372 | ||
373 | movl %edi,%eax | |
374 | mull %esi | |
375 | addl %eax,%ebx | |
376 | addl %eax,%ebx | |
377 | ||
378 | #ifdef PARANOID | |
379 | cmp $0xffffffb0,%ebx | |
380 | jb sqrt_near_exact_ok | |
381 | ||
382 | cmp $0x00000050,%ebx | |
383 | ja sqrt_near_exact_ok | |
384 | ||
385 | pushl EX_INTERNAL|0x214 | |
386 | call EXCEPTION | |
387 | ||
388 | sqrt_near_exact_ok: | |
389 | #endif /* PARANOID */ | |
390 | ||
391 | or %ebx,%ebx | |
392 | js sqrt_near_exact_small | |
393 | ||
394 | jnz sqrt_near_exact_large | |
395 | ||
396 | or %ebx,%edx | |
397 | jnz sqrt_near_exact_large | |
398 | ||
399 | /* Our estimate is exactly the right answer */ | |
400 | xorl %eax,%eax | |
401 | jmp sqrt_round_result | |
402 | ||
403 | sqrt_near_exact_small: | |
404 | /* Our estimate is too small */ | |
405 | movl $0x000000ff,%eax | |
406 | jmp sqrt_round_result | |
407 | ||
408 | sqrt_near_exact_large: | |
409 | /* Our estimate is too large, we need to decrement it */ | |
410 | subl $1,%edi | |
411 | sbbl $0,%esi | |
412 | movl $0xffffff00,%eax | |
413 | jmp sqrt_round_result | |
414 | ||
415 | ||
416 | sqrt_get_more_precision: | |
417 | /* This case is almost the same as the above, except we start | |
418 | with an extra bit of precision in the estimate. */ | |
419 | stc /* The extra bit. */ | |
420 | rcll $1,%edi /* Shift the estimate left one bit */ | |
421 | rcll $1,%esi | |
422 | ||
423 | movl %edi,%eax /* ls word of guess */ | |
424 | mull %edi | |
425 | movl %edx,%ebx /* 2nd ls word of square */ | |
426 | movl %eax,%ecx /* ls word of square */ | |
427 | ||
428 | movl %edi,%eax | |
429 | mull %esi | |
430 | addl %eax,%ebx | |
431 | addl %eax,%ebx | |
432 | ||
433 | /* Put our estimate back to its original value */ | |
434 | stc /* The ms bit. */ | |
435 | rcrl $1,%esi /* Shift the estimate left one bit */ | |
436 | rcrl $1,%edi | |
437 | ||
438 | #ifdef PARANOID | |
439 | cmp $0xffffff60,%ebx | |
440 | jb sqrt_more_prec_ok | |
441 | ||
442 | cmp $0x000000a0,%ebx | |
443 | ja sqrt_more_prec_ok | |
444 | ||
445 | pushl EX_INTERNAL|0x215 | |
446 | call EXCEPTION | |
447 | ||
448 | sqrt_more_prec_ok: | |
449 | #endif /* PARANOID */ | |
450 | ||
451 | or %ebx,%ebx | |
452 | js sqrt_more_prec_small | |
453 | ||
454 | jnz sqrt_more_prec_large | |
455 | ||
456 | or %ebx,%ecx | |
457 | jnz sqrt_more_prec_large | |
458 | ||
459 | /* Our estimate is exactly the right answer */ | |
460 | movl $0x80000000,%eax | |
461 | jmp sqrt_round_result | |
462 | ||
463 | sqrt_more_prec_small: | |
464 | /* Our estimate is too small */ | |
465 | movl $0x800000ff,%eax | |
466 | jmp sqrt_round_result | |
467 | ||
468 | sqrt_more_prec_large: | |
469 | /* Our estimate is too large */ | |
470 | movl $0x7fffff00,%eax | |
471 | jmp sqrt_round_result | |
bd6be579 | 472 | ENDPROC(wm_sqrt) |