[linux-block.git] / arch / x86 / math-emu / wm_sqrt.S

/* SPDX-License-Identifier: GPL-2.0 */
	.file	"wm_sqrt.S"
/*---------------------------------------------------------------------------+
 |  wm_sqrt.S                                                                |
 |                                                                           |
 | Fixed point arithmetic square root evaluation.                            |
 |                                                                           |
 | Copyright (C) 1992,1993,1995,1997                                         |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@suburbia.net               |
 |                                                                           |
 | Call from C as:                                                           |
 |    int wm_sqrt(FPU_REG *n, unsigned int control_word)                     |
 |                                                                           |
 +---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------+
 |  wm_sqrt(FPU_REG *n, unsigned int control_word)                           |
 |    returns the square root of n in n.                                     |
 |                                                                           |
 |  Use Newton's method to compute the square root of a number, which must   |
 |  be in the range  [1.0 .. 4.0),  to 64 bits accuracy.                     |
 |  Does not check the sign or tag of the argument.                          |
 |  Sets the exponent, but not the sign or tag of the result.                |
 |                                                                           |
 |  The guess is kept in %esi:%edi                                           |
 +---------------------------------------------------------------------------*/

#include "exception.h"
#include "fpu_emu.h"


#ifndef NON_REENTRANT_FPU
/*	Local storage on the stack: */
#define FPU_accum_3	-4(%ebp)	/* ms word */
#define FPU_accum_2	-8(%ebp)
#define FPU_accum_1	-12(%ebp)
#define FPU_accum_0	-16(%ebp)

/*
 * The de-normalised argument:
 *                  sq_2                  sq_1              sq_0
 *        b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
 *           ^ binary point here
 */
#define FPU_fsqrt_arg_2	-20(%ebp)	/* ms word */
#define FPU_fsqrt_arg_1	-24(%ebp)
#define FPU_fsqrt_arg_0	-28(%ebp)	/* ls word, at most the ms bit is set */

#else
/*	Local storage in a static area: */
.data
	.align 4,0
FPU_accum_3:
	.long	0		/* ms word */
FPU_accum_2:
	.long	0
FPU_accum_1:
	.long	0
FPU_accum_0:
	.long	0

/* The de-normalised argument:
                    sq_2                  sq_1              sq_0
          b b b b b b b ... b b b   b b b .... b b b   b 0 0 0 ... 0
             ^ binary point here
 */
FPU_fsqrt_arg_2:
	.long	0		/* ms word */
FPU_fsqrt_arg_1:
	.long	0
FPU_fsqrt_arg_0:
	.long	0		/* ls word, at most the ms bit is set */
#endif /* NON_REENTRANT_FPU */ 


.text
ENTRY(wm_sqrt)
	pushl	%ebp
	movl	%esp,%ebp
#ifndef NON_REENTRANT_FPU
	subl	$28,%esp
#endif /* NON_REENTRANT_FPU */
	pushl	%esi
	pushl	%edi
	pushl	%ebx

	movl	PARAM1,%esi

	movl	SIGH(%esi),%eax
	movl	SIGL(%esi),%ecx
	xorl	%edx,%edx

/* We use a rough linear estimate for the first guess.. */

	cmpw	EXP_BIAS,EXP(%esi)
	jnz	sqrt_arg_ge_2

	shrl	$1,%eax			/* arg is in the range  [1.0 .. 2.0) */
	rcrl	$1,%ecx
	rcrl	$1,%edx

sqrt_arg_ge_2:
/* From here on, n is never accessed directly again until it is
   replaced by the answer. */

	movl	%eax,FPU_fsqrt_arg_2		/* ms word of n */
	movl	%ecx,FPU_fsqrt_arg_1
	movl	%edx,FPU_fsqrt_arg_0

/* Make a linear first estimate */
	shrl	$1,%eax
	addl	$0x40000000,%eax
	movl	$0xaaaaaaaa,%ecx
	mull	%ecx
	shll	%edx			/* max result was 7fff... */
	testl	$0x80000000,%edx	/* but min was 3fff... */
	jnz	sqrt_prelim_no_adjust

	movl	$0x80000000,%edx	/* round up */

sqrt_prelim_no_adjust:
	movl	%edx,%esi	/* Our first guess */

/* We have now computed (approx)   (2 + x) / 3, which forms the basis
   for a few iterations of Newton's method */

	movl	FPU_fsqrt_arg_2,%ecx	/* ms word */

/*
 * From our initial estimate, three iterations are enough to get us
 * to 30 bits or so. This will then allow two iterations at better
 * precision to complete the process.
 */

/* Compute  (g + n/g)/2  at each iteration (g is the guess). */
	shrl	%ecx		/* Doing this first will prevent a divide */
				/* overflow later. */

	movl	%ecx,%edx	/* msw of the arg / 2 */
	divl	%esi		/* current estimate */
	shrl	%esi		/* divide by 2 */
	addl	%eax,%esi	/* the new estimate */

	movl	%ecx,%edx
	divl	%esi
	shrl	%esi
	addl	%eax,%esi

	movl	%ecx,%edx
	divl	%esi
	shrl	%esi
	addl	%eax,%esi

/*
 * Now that an estimate accurate to about 30 bits has been obtained (in %esi),
 * we improve it to 60 bits or so.
 *
 * The strategy from now on is to compute new estimates from
 *      guess := guess + (n - guess^2) / (2 * guess)
 */

/* First, find the square of the guess */
	movl	%esi,%eax
	mull	%esi
/* guess^2 now in %edx:%eax */

	movl	FPU_fsqrt_arg_1,%ecx
	subl	%ecx,%eax
	movl	FPU_fsqrt_arg_2,%ecx	/* ms word of normalized n */
	sbbl	%ecx,%edx
	jnc	sqrt_stage_2_positive

/* Subtraction gives a negative result,
   negate the result before division. */
	notl	%edx
	notl	%eax
	addl	$1,%eax
	adcl	$0,%edx

	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi
	jmp	sqrt_stage_2_finish

sqrt_stage_2_positive:
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	notl	%ecx
	notl	%eax
	addl	$1,%eax
	adcl	$0,%ecx

sqrt_stage_2_finish:
	sarl	$1,%ecx		/* divide by 2 */
	rcrl	$1,%eax

	/* Form the new estimate in %esi:%edi */
	movl	%eax,%edi
	addl	%ecx,%esi

	jnz	sqrt_stage_2_done	/* result should be [1..2) */

#ifdef PARANOID
/* It should be possible to get here only if the arg is ffff....ffff */
	cmp	$0xffffffff,FPU_fsqrt_arg_1
	jnz	sqrt_stage_2_error
#endif /* PARANOID */

/* The best rounded result. */
	xorl	%eax,%eax
	decl	%eax
	movl	%eax,%edi
	movl	%eax,%esi
	movl	$0x7fffffff,%eax
	jmp	sqrt_round_result

#ifdef PARANOID
sqrt_stage_2_error:
	pushl	EX_INTERNAL|0x213
	call	EXCEPTION
#endif /* PARANOID */ 

sqrt_stage_2_done:

/* Now the square root has been computed to better than 60 bits. */

/* Find the square of the guess. */
	movl	%edi,%eax		/* ls word of guess */
	mull	%edi
	movl	%edx,FPU_accum_1

	movl	%esi,%eax
	mull	%esi
	movl	%edx,FPU_accum_3
	movl	%eax,FPU_accum_2

	movl	%edi,%eax
	mull	%esi
	addl	%eax,FPU_accum_1
	adcl	%edx,FPU_accum_2
	adcl	$0,FPU_accum_3

/*	movl	%esi,%eax */
/*	mull	%edi */
	addl	%eax,FPU_accum_1
	adcl	%edx,FPU_accum_2
	adcl	$0,FPU_accum_3

/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */

	movl	FPU_fsqrt_arg_0,%eax		/* get normalized n */
	subl	%eax,FPU_accum_1
	movl	FPU_fsqrt_arg_1,%eax
	sbbl	%eax,FPU_accum_2
	movl	FPU_fsqrt_arg_2,%eax		/* ms word of normalized n */
	sbbl	%eax,FPU_accum_3
	jnc	sqrt_stage_3_positive

/* Subtraction gives a negative result,
   negate the result before division */
	notl	FPU_accum_1
	notl	FPU_accum_2
	notl	FPU_accum_3
	addl	$1,FPU_accum_1
	adcl	$0,FPU_accum_2

#ifdef PARANOID
	adcl	$0,FPU_accum_3	/* This must be zero */
	jz	sqrt_stage_3_no_error

sqrt_stage_3_error:
	pushl	EX_INTERNAL|0x207
	call	EXCEPTION

sqrt_stage_3_no_error:
#endif /* PARANOID */

	movl	FPU_accum_2,%edx
	movl	FPU_accum_1,%eax
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	sarl	$1,%ecx		/* divide by 2 */
	rcrl	$1,%eax

	/* prepare to round the result */

	addl	%ecx,%edi
	adcl	$0,%esi

	jmp	sqrt_stage_3_finished

sqrt_stage_3_positive:
	movl	FPU_accum_2,%edx
	movl	FPU_accum_1,%eax
	divl	%esi
	movl	%eax,%ecx

	movl	%edx,%eax
	divl	%esi

	sarl	$1,%ecx		/* divide by 2 */
	rcrl	$1,%eax

	/* prepare to round the result */

	notl	%eax		/* Negate the correction term */
	notl	%ecx
	addl	$1,%eax
	adcl	$0,%ecx		/* carry here ==> correction == 0 */
	adcl	$0xffffffff,%esi

	addl	%ecx,%edi
	adcl	$0,%esi

sqrt_stage_3_finished:

/*
 * The result in %esi:%edi:%esi should be good to about 90 bits here,
 * and the rounding information here does not have sufficient accuracy
 * in a few rare cases.
 */
	cmpl	$0xffffffe0,%eax
	ja	sqrt_near_exact_x

	cmpl	$0x00000020,%eax
	jb	sqrt_near_exact

	cmpl	$0x7fffffe0,%eax
	jb	sqrt_round_result

	cmpl	$0x80000020,%eax
	jb	sqrt_get_more_precision

sqrt_round_result:
/* Set up for rounding operations */
	movl	%eax,%edx
	movl	%esi,%eax
	movl	%edi,%ebx
	movl	PARAM1,%edi
	movw	EXP_BIAS,EXP(%edi)	/* Result is in  [1.0 .. 2.0) */
	jmp	fpu_reg_round


sqrt_near_exact_x:
/* First, the estimate must be rounded up. */
	addl	$1,%edi
	adcl	$0,%esi

sqrt_near_exact:
/*
 * This is an easy case because x^1/2 is monotonic.
 * We need just find the square of our estimate, compare it
 * with the argument, and deduce whether our estimate is
 * above, below, or exact. We use the fact that the estimate
 * is known to be accurate to about 90 bits.
 */
	movl	%edi,%eax		/* ls word of guess */
	mull	%edi
	movl	%edx,%ebx		/* 2nd ls word of square */
	movl	%eax,%ecx		/* ls word of square */

	movl	%edi,%eax
	mull	%esi
	addl	%eax,%ebx
	addl	%eax,%ebx

#ifdef PARANOID
	cmp	$0xffffffb0,%ebx
	jb	sqrt_near_exact_ok

	cmp	$0x00000050,%ebx
	ja	sqrt_near_exact_ok

	pushl	EX_INTERNAL|0x214
	call	EXCEPTION

sqrt_near_exact_ok:
#endif /* PARANOID */ 

	or	%ebx,%ebx
	js	sqrt_near_exact_small

	jnz	sqrt_near_exact_large

	or	%ebx,%edx
	jnz	sqrt_near_exact_large

/* Our estimate is exactly the right answer */
	xorl	%eax,%eax
	jmp	sqrt_round_result

sqrt_near_exact_small:
/* Our estimate is too small */
	movl	$0x000000ff,%eax
	jmp	sqrt_round_result
	
sqrt_near_exact_large:
/* Our estimate is too large, we need to decrement it */
	subl	$1,%edi
	sbbl	$0,%esi
	movl	$0xffffff00,%eax
	jmp	sqrt_round_result


sqrt_get_more_precision:
/* This case is almost the same as the above, except we start
   with an extra bit of precision in the estimate. */
	stc			/* The extra bit. */
	rcll	$1,%edi		/* Shift the estimate left one bit */
	rcll	$1,%esi

	movl	%edi,%eax		/* ls word of guess */
	mull	%edi
	movl	%edx,%ebx		/* 2nd ls word of square */
	movl	%eax,%ecx		/* ls word of square */

	movl	%edi,%eax
	mull	%esi
	addl	%eax,%ebx
	addl	%eax,%ebx

/* Put our estimate back to its original value */
	stc			/* The ms bit. */
	rcrl	$1,%esi		/* Shift the estimate left one bit */
	rcrl	$1,%edi

#ifdef PARANOID
	cmp	$0xffffff60,%ebx
	jb	sqrt_more_prec_ok

	cmp	$0x000000a0,%ebx
	ja	sqrt_more_prec_ok

	pushl	EX_INTERNAL|0x215
	call	EXCEPTION

sqrt_more_prec_ok:
#endif /* PARANOID */ 

	or	%ebx,%ebx
	js	sqrt_more_prec_small

	jnz	sqrt_more_prec_large

	or	%ebx,%ecx
	jnz	sqrt_more_prec_large

/* Our estimate is exactly the right answer */
	movl	$0x80000000,%eax
	jmp	sqrt_round_result

sqrt_more_prec_small:
/* Our estimate is too small */
	movl	$0x800000ff,%eax
	jmp	sqrt_round_result
	
sqrt_more_prec_large:
/* Our estimate is too large */
	movl	$0x7fffff00,%eax
	jmp	sqrt_round_result
ENDPROC(wm_sqrt)
Commit	Line	Data
b2441318	1	/* SPDX-License-Identifier: GPL-2.0 */
1da177e4 LT	2	.file "wm_sqrt.S"
	3	/*---------------------------------------------------------------------------+
	4	\| wm_sqrt.S \|
	5	\| \|
	6	\| Fixed point arithmetic square root evaluation. \|
	7	\| \|
	8	\| Copyright (C) 1992,1993,1995,1997 \|
	9	\| W. Metzenthen, 22 Parker St, Ormond, Vic 3163, \|
	10	\| Australia. E-mail billm@suburbia.net \|
	11	\| \|
	12	\| Call from C as: \|
	13	\| int wm_sqrt(FPU_REG *n, unsigned int control_word) \|
	14	\| \|
	15	+---------------------------------------------------------------------------*/
	16
	17	/*---------------------------------------------------------------------------+
	18	\| wm_sqrt(FPU_REG *n, unsigned int control_word) \|
	19	\| returns the square root of n in n. \|
	20	\| \|
	21	\| Use Newton's method to compute the square root of a number, which must \|
	22	\| be in the range [1.0 .. 4.0), to 64 bits accuracy. \|
	23	\| Does not check the sign or tag of the argument. \|
	24	\| Sets the exponent, but not the sign or tag of the result. \|
	25	\| \|
	26	\| The guess is kept in %esi:%edi \|
	27	+---------------------------------------------------------------------------*/
	28
	29	#include "exception.h"
	30	#include "fpu_emu.h"
	31
	32
	33	#ifndef NON_REENTRANT_FPU
	34	/* Local storage on the stack: */
	35	#define FPU_accum_3 -4(%ebp) /* ms word */
	36	#define FPU_accum_2 -8(%ebp)
	37	#define FPU_accum_1 -12(%ebp)
	38	#define FPU_accum_0 -16(%ebp)
	39
	40	/*
	41	* The de-normalised argument:
	42	* sq_2 sq_1 sq_0
	43	* b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
	44	* ^ binary point here
	45	*/
	46	#define FPU_fsqrt_arg_2 -20(%ebp) /* ms word */
	47	#define FPU_fsqrt_arg_1 -24(%ebp)
	48	#define FPU_fsqrt_arg_0 -28(%ebp) /* ls word, at most the ms bit is set */
	49
	50	#else
	51	/* Local storage in a static area: */
	52	.data
	53	.align 4,0
	54	FPU_accum_3:
	55	.long 0 /* ms word */
	56	FPU_accum_2:
	57	.long 0
	58	FPU_accum_1:
	59	.long 0
	60	FPU_accum_0:
	61	.long 0
	62
	63	/* The de-normalised argument:
	64	sq_2 sq_1 sq_0
	65	b b b b b b b ... b b b b b b .... b b b b 0 0 0 ... 0
66	^ binary point here
67	*/
68	FPU_fsqrt_arg_2:
69	.long 0 /* ms word */
70	FPU_fsqrt_arg_1:
71	.long 0
72	FPU_fsqrt_arg_0:
73	.long 0 /* ls word, at most the ms bit is set */
74	#endif /* NON_REENTRANT_FPU */
75
76
77	.text
78	ENTRY(wm_sqrt)
79	pushl %ebp
80	movl %esp,%ebp
81	#ifndef NON_REENTRANT_FPU
82	subl $28,%esp
83	#endif /* NON_REENTRANT_FPU */
84	pushl %esi
85	pushl %edi
86	pushl %ebx
87
88	movl PARAM1,%esi
89
90	movl SIGH(%esi),%eax
91	movl SIGL(%esi),%ecx
92	xorl %edx,%edx
93
94	/* We use a rough linear estimate for the first guess.. */
95
96	cmpw EXP_BIAS,EXP(%esi)
97	jnz sqrt_arg_ge_2
98
99	shrl $1,%eax /* arg is in the range [1.0 .. 2.0) */
100	rcrl $1,%ecx
101	rcrl $1,%edx
102
103	sqrt_arg_ge_2:
104	/* From here on, n is never accessed directly again until it is
105	replaced by the answer. */
106
107	movl %eax,FPU_fsqrt_arg_2 /* ms word of n */
108	movl %ecx,FPU_fsqrt_arg_1
109	movl %edx,FPU_fsqrt_arg_0
110
111	/* Make a linear first estimate */
112	shrl $1,%eax
113	addl $0x40000000,%eax
114	movl $0xaaaaaaaa,%ecx
115	mull %ecx
116	shll %edx /* max result was 7fff... */
117	testl $0x80000000,%edx /* but min was 3fff... */
118	jnz sqrt_prelim_no_adjust
119
120	movl $0x80000000,%edx /* round up */
121
122	sqrt_prelim_no_adjust:
123	movl %edx,%esi /* Our first guess */
124
125	/* We have now computed (approx) (2 + x) / 3, which forms the basis
126	for a few iterations of Newton's method */
127
128	movl FPU_fsqrt_arg_2,%ecx /* ms word */
129
130	/*
131	* From our initial estimate, three iterations are enough to get us
132	* to 30 bits or so. This will then allow two iterations at better
133	* precision to complete the process.
134	*/
135
136	/* Compute (g + n/g)/2 at each iteration (g is the guess). */
137	shrl %ecx /* Doing this first will prevent a divide */
138	/* overflow later. */
139
140	movl %ecx,%edx /* msw of the arg / 2 */
141	divl %esi /* current estimate */
142	shrl %esi /* divide by 2 */
143	addl %eax,%esi /* the new estimate */
144
145	movl %ecx,%edx
146	divl %esi
147	shrl %esi
148	addl %eax,%esi
149
150	movl %ecx,%edx
151	divl %esi
152	shrl %esi
153	addl %eax,%esi
154
155	/*
156	* Now that an estimate accurate to about 30 bits has been obtained (in %esi),
157	* we improve it to 60 bits or so.
158	*
159	* The strategy from now on is to compute new estimates from
160	* guess := guess + (n - guess^2) / (2 * guess)
161	*/
162
163	/* First, find the square of the guess */
164	movl %esi,%eax
165	mull %esi
166	/* guess^2 now in %edx:%eax */
167
168	movl FPU_fsqrt_arg_1,%ecx
169	subl %ecx,%eax
170	movl FPU_fsqrt_arg_2,%ecx /* ms word of normalized n */
171	sbbl %ecx,%edx
172	jnc sqrt_stage_2_positive
173
174	/* Subtraction gives a negative result,
175	negate the result before division. */
176	notl %edx
177	notl %eax
178	addl $1,%eax
179	adcl $0,%edx
180
181	divl %esi
182	movl %eax,%ecx
183
184	movl %edx,%eax
185	divl %esi
186	jmp sqrt_stage_2_finish
187
188	sqrt_stage_2_positive:
189	divl %esi
190	movl %eax,%ecx
191
192	movl %edx,%eax
193	divl %esi
194
195	notl %ecx
196	notl %eax
197	addl $1,%eax
198	adcl $0,%ecx
199
200	sqrt_stage_2_finish:
201	sarl $1,%ecx /* divide by 2 */
202	rcrl $1,%eax
203
204	/* Form the new estimate in %esi:%edi */
205	movl %eax,%edi
206	addl %ecx,%esi
207
208	jnz sqrt_stage_2_done /* result should be [1..2) */
209
210	#ifdef PARANOID
211	/* It should be possible to get here only if the arg is ffff....ffff */
212	cmp $0xffffffff,FPU_fsqrt_arg_1
213	jnz sqrt_stage_2_error
214	#endif /* PARANOID */
215
216	/* The best rounded result. */
217	xorl %eax,%eax
218	decl %eax
219	movl %eax,%edi
220	movl %eax,%esi
221	movl $0x7fffffff,%eax
222	jmp sqrt_round_result
223
224	#ifdef PARANOID
225	sqrt_stage_2_error:
226	pushl EX_INTERNAL\|0x213
227	call EXCEPTION
228	#endif /* PARANOID */
229
230	sqrt_stage_2_done:
231
232	/* Now the square root has been computed to better than 60 bits. */
233
234	/* Find the square of the guess. */
235	movl %edi,%eax /* ls word of guess */
236	mull %edi
237	movl %edx,FPU_accum_1
238
239	movl %esi,%eax
240	mull %esi
241	movl %edx,FPU_accum_3
242	movl %eax,FPU_accum_2
243
244	movl %edi,%eax
245	mull %esi
246	addl %eax,FPU_accum_1
247	adcl %edx,FPU_accum_2
248	adcl $0,FPU_accum_3
249
250	/* movl %esi,%eax */
251	/* mull %edi */
252	addl %eax,FPU_accum_1
253	adcl %edx,FPU_accum_2
254	adcl $0,FPU_accum_3
255
256	/* guess^2 now in FPU_accum_3:FPU_accum_2:FPU_accum_1 */
257
258	movl FPU_fsqrt_arg_0,%eax /* get normalized n */
259	subl %eax,FPU_accum_1
260	movl FPU_fsqrt_arg_1,%eax
261	sbbl %eax,FPU_accum_2
262	movl FPU_fsqrt_arg_2,%eax /* ms word of normalized n */
263	sbbl %eax,FPU_accum_3
264	jnc sqrt_stage_3_positive
265
266	/* Subtraction gives a negative result,
267	negate the result before division */
268	notl FPU_accum_1
269	notl FPU_accum_2
270	notl FPU_accum_3
271	addl $1,FPU_accum_1
272	adcl $0,FPU_accum_2
273
274	#ifdef PARANOID
275	adcl $0,FPU_accum_3 /* This must be zero */
276	jz sqrt_stage_3_no_error
277
278	sqrt_stage_3_error:
279	pushl EX_INTERNAL\|0x207
280	call EXCEPTION
281
282	sqrt_stage_3_no_error:
283	#endif /* PARANOID */
284
285	movl FPU_accum_2,%edx
286	movl FPU_accum_1,%eax
287	divl %esi
288	movl %eax,%ecx
289
290	movl %edx,%eax
291	divl %esi
292
293	sarl $1,%ecx /* divide by 2 */
294	rcrl $1,%eax
295
296	/* prepare to round the result */
297
298	addl %ecx,%edi
299	adcl $0,%esi
300
301	jmp sqrt_stage_3_finished
302
303	sqrt_stage_3_positive:
304	movl FPU_accum_2,%edx
305	movl FPU_accum_1,%eax
306	divl %esi
307	movl %eax,%ecx
308
309	movl %edx,%eax
310	divl %esi
311
312	sarl $1,%ecx /* divide by 2 */
313	rcrl $1,%eax
314
315	/* prepare to round the result */
316
317	notl %eax /* Negate the correction term */
318	notl %ecx
319	addl $1,%eax
320	adcl $0,%ecx /* carry here ==> correction == 0 */
321	adcl $0xffffffff,%esi
322
323	addl %ecx,%edi
324	adcl $0,%esi
325
326	sqrt_stage_3_finished:
327
328	/*
329	* The result in %esi:%edi:%esi should be good to about 90 bits here,
330	* and the rounding information here does not have sufficient accuracy
331	* in a few rare cases.
332	*/
333	cmpl $0xffffffe0,%eax
334	ja sqrt_near_exact_x
335
336	cmpl $0x00000020,%eax
337	jb sqrt_near_exact
338
339	cmpl $0x7fffffe0,%eax
340	jb sqrt_round_result
341
342	cmpl $0x80000020,%eax
343	jb sqrt_get_more_precision
344
345	sqrt_round_result:
346	/* Set up for rounding operations */
347	movl %eax,%edx
348	movl %esi,%eax
349	movl %edi,%ebx
350	movl PARAM1,%edi
351	movw EXP_BIAS,EXP(%edi) /* Result is in [1.0 .. 2.0) */
352	jmp fpu_reg_round
353
354
355	sqrt_near_exact_x:
356	/* First, the estimate must be rounded up. */
357	addl $1,%edi
358	adcl $0,%esi
359
360	sqrt_near_exact:
361	/*
362	* This is an easy case because x^1/2 is monotonic.
363	* We need just find the square of our estimate, compare it
364	* with the argument, and deduce whether our estimate is
365	* above, below, or exact. We use the fact that the estimate
366	* is known to be accurate to about 90 bits.
367	*/
368	movl %edi,%eax /* ls word of guess */
369	mull %edi
370	movl %edx,%ebx /* 2nd ls word of square */
371	movl %eax,%ecx /* ls word of square */
372
373	movl %edi,%eax
374	mull %esi
375	addl %eax,%ebx
376	addl %eax,%ebx
377
378	#ifdef PARANOID
379	cmp $0xffffffb0,%ebx
380	jb sqrt_near_exact_ok
381
382	cmp $0x00000050,%ebx
383	ja sqrt_near_exact_ok
384
385	pushl EX_INTERNAL\|0x214
386	call EXCEPTION
387
388	sqrt_near_exact_ok:
389	#endif /* PARANOID */
390
391	or %ebx,%ebx
392	js sqrt_near_exact_small
393
394	jnz sqrt_near_exact_large
395
396	or %ebx,%edx
397	jnz sqrt_near_exact_large
398
399	/* Our estimate is exactly the right answer */
400	xorl %eax,%eax
401	jmp sqrt_round_result
402
403	sqrt_near_exact_small:
404	/* Our estimate is too small */
405	movl $0x000000ff,%eax
406	jmp sqrt_round_result
407
408	sqrt_near_exact_large:
409	/* Our estimate is too large, we need to decrement it */
410	subl $1,%edi
411	sbbl $0,%esi
412	movl $0xffffff00,%eax
413	jmp sqrt_round_result
414
415
416	sqrt_get_more_precision:
417	/* This case is almost the same as the above, except we start
418	with an extra bit of precision in the estimate. */
419	stc /* The extra bit. */
420	rcll $1,%edi /* Shift the estimate left one bit */
421	rcll $1,%esi
422
423	movl %edi,%eax /* ls word of guess */
424	mull %edi
425	movl %edx,%ebx /* 2nd ls word of square */
426	movl %eax,%ecx /* ls word of square */
427
428	movl %edi,%eax
429	mull %esi
430	addl %eax,%ebx
431	addl %eax,%ebx
432
433	/* Put our estimate back to its original value */
434	stc /* The ms bit. */
435	rcrl $1,%esi /* Shift the estimate left one bit */
436	rcrl $1,%edi
437
438	#ifdef PARANOID
439	cmp $0xffffff60,%ebx
440	jb sqrt_more_prec_ok
441
442	cmp $0x000000a0,%ebx
443	ja sqrt_more_prec_ok
444
445	pushl EX_INTERNAL\|0x215
446	call EXCEPTION
447
448	sqrt_more_prec_ok:
449	#endif /* PARANOID */
450
451	or %ebx,%ebx
452	js sqrt_more_prec_small
453
454	jnz sqrt_more_prec_large
455
456	or %ebx,%ecx
457	jnz sqrt_more_prec_large
458
459	/* Our estimate is exactly the right answer */
460	movl $0x80000000,%eax
461	jmp sqrt_round_result
462
463	sqrt_more_prec_small:
464	/* Our estimate is too small */
465	movl $0x800000ff,%eax
466	jmp sqrt_round_result
467
468	sqrt_more_prec_large:
469	/* Our estimate is too large */
470	movl $0x7fffff00,%eax
471	jmp sqrt_round_result
bd6be579	472	ENDPROC(wm_sqrt)