[linux-2.6-block.git] / arch / m68k / fpsp040 / x_unfl.S

|
|	x_unfl.sa 3.4 7/1/91
|
|	fpsp_unfl --- FPSP handler for underflow exception
|
| Trap disabled results
|	For 881/2 compatibility, sw must denormalize the intermediate
| result, then store the result.  Denormalization is accomplished
| by taking the intermediate result (which is always normalized) and
| shifting the mantissa right while incrementing the exponent until
| it is equal to the denormalized exponent for the destination
| format.  After denormalization, the result is rounded to the
| destination format.
|
| Trap enabled results
|	All trap disabled code applies.	In addition the exceptional
| operand needs to made available to the user with a bias of $6000
| added to the exponent.
|

|		Copyright (C) Motorola, Inc. 1990
|			All Rights Reserved
|
|       For details on the license for this file, please see the
|       file, README, in this same directory.

X_UNFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package

	|section	8

#include "fpsp.h"

	|xref	denorm
	|xref	round
	|xref	store
	|xref	g_rndpr
	|xref	g_opcls
	|xref	g_dfmtou
	|xref	real_unfl
	|xref	real_inex
	|xref	fpsp_done
	|xref	b1238_fix

	.global	fpsp_unfl
fpsp_unfl:
	link		%a6,#-LOCAL_SIZE
	fsave		-(%a7)
	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
	fmovemx	%fp0-%fp3,USER_FP0(%a6)
	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)

|
	bsrl		unf_res	|denormalize, round & store interm op
|
| If underflow exceptions are not enabled, check for inexact
| exception
|
	btstb		#unfl_bit,FPCR_ENABLE(%a6)
	beqs		ck_inex

	btstb		#E3,E_BYTE(%a6)
	beqs		no_e3_1
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
	bsrl		b1238_fix		|test for bug1238 case
	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
	orl		#sx_mask,E_BYTE(%a6)
no_e3_1:
	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
	fmovemx	USER_FP0(%a6),%fp0-%fp3
	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
	frestore	(%a7)+
	unlk		%a6
	bral		real_unfl
|
| It is possible to have either inex2 or inex1 exceptions with the
| unfl.  If the inex enable bit is set in the FPCR, and either
| inex2 or inex1 occurred, we must clean up and branch to the
| real inex handler.
|
ck_inex:
	moveb		FPCR_ENABLE(%a6),%d0
	andb		FPSR_EXCEPT(%a6),%d0
	andib		#0x3,%d0
	beqs		unfl_done

|
| Inexact enabled and reported, and we must take an inexact exception
|
take_inex:
	btstb		#E3,E_BYTE(%a6)
	beqs		no_e3_2
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
	bsrl		b1238_fix		|test for bug1238 case
	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
	orl		#sx_mask,E_BYTE(%a6)
no_e3_2:
	moveb		#INEX_VEC,EXC_VEC+1(%a6)
	moveml         USER_DA(%a6),%d0-%d1/%a0-%a1
	fmovemx        USER_FP0(%a6),%fp0-%fp3
	fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
	frestore        (%a7)+
	unlk            %a6
	bral		real_inex

unfl_done:
	bclrb		#E3,E_BYTE(%a6)
	beqs		e1_set		|if set then branch
|
| Clear dirty bit on dest resister in the frame before branching
| to b1238_fix.
|
	bfextu		CMDREG3B(%a6){#6:#3},%d0		|get dest reg no
	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
	bsrl		b1238_fix		|test for bug1238 case
	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
	orl		#sx_mask,E_BYTE(%a6)
	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
	fmovemx	USER_FP0(%a6),%fp0-%fp3
	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
	frestore	(%a7)+
	unlk		%a6
	bral		fpsp_done
e1_set:
	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
	fmovemx	USER_FP0(%a6),%fp0-%fp3
	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
	unlk		%a6
	bral		fpsp_done
|
|	unf_res --- underflow result calculation
|
unf_res:
	bsrl		g_rndpr		|returns RND_PREC in d0 0=ext,
|					;1=sgl, 2=dbl
|					;we need the RND_PREC in the
|					;upper word for round
	movew		#0,-(%a7)
	movew		%d0,-(%a7)	|copy RND_PREC to stack
|
|
| If the exception bit set is E3, the exceptional operand from the
| fpu is in WBTEMP; else it is in FPTEMP.
|
	btstb		#E3,E_BYTE(%a6)
	beqs		unf_E1
unf_E3:
	lea		WBTEMP(%a6),%a0	|a0 now points to operand
|
| Test for fsgldiv and fsglmul.  If the inst was one of these, then
| force the precision to extended for the denorm routine.  Use
| the user's precision for the round routine.
|
	movew		CMDREG3B(%a6),%d1	|check for fsgldiv or fsglmul
	andiw		#0x7f,%d1
	cmpiw		#0x30,%d1		|check for sgldiv
	beqs		unf_sgl
	cmpiw		#0x33,%d1		|check for sglmul
	bnes		unf_cont	|if not, use fpcr prec in round
unf_sgl:
	clrl		%d0
	movew		#0x1,(%a7)	|override g_rndpr precision
|					;force single
	bras		unf_cont
unf_E1:
	lea		FPTEMP(%a6),%a0	|a0 now points to operand
unf_cont:
	bclrb		#sign_bit,LOCAL_EX(%a0)	|clear sign bit
	sne		LOCAL_SGN(%a0)		|store sign

	bsrl		denorm		|returns denorm, a0 points to it
|
| WARNING:
|				;d0 has guard,round sticky bit
|				;make sure that it is not corrupted
|				;before it reaches the round subroutine
|				;also ensure that a0 isn't corrupted

|
| Set up d1 for round subroutine d1 contains the PREC/MODE
| information respectively on upper/lower register halves.
|
	bfextu		FPCR_MODE(%a6){#2:#2},%d1	|get mode from FPCR
|						;mode in lower d1
	addl		(%a7)+,%d1		|merge PREC/MODE
|
| WARNING: a0 and d0 are assumed to be intact between the denorm and
| round subroutines. All code between these two subroutines
| must not corrupt a0 and d0.
|
|
| Perform Round
|	Input:		a0 points to input operand
|			d0{31:29} has guard, round, sticky
|			d1{01:00} has rounding mode
|			d1{17:16} has rounding precision
|	Output:		a0 points to rounded operand
|

	bsrl		round		|returns rounded denorm at (a0)
|
| Differentiate between store to memory vs. store to register
|
unf_store:
	bsrl		g_opcls		|returns opclass in d0{2:0}
	cmpib		#0x3,%d0
	bnes		not_opc011
|
| At this point, a store to memory is pending
|
opc011:
	bsrl		g_dfmtou
	tstb		%d0
	beqs		ext_opc011	|If extended, do not subtract
|				;If destination format is sgl/dbl,
	tstb		LOCAL_HI(%a0)	|If rounded result is normal,don't
|					;subtract
	bmis		ext_opc011
	subqw		#1,LOCAL_EX(%a0)	|account for denorm bias vs.
|				;normalized bias
|				;          normalized   denormalized
|				;single       $7f           $7e
|				;double       $3ff          $3fe
|
ext_opc011:
	bsrl		store		|stores to memory
	bras		unf_done	|finish up

|
| At this point, a store to a float register is pending
|
not_opc011:
	bsrl		store	|stores to float register
|				;a0 is not corrupted on a store to a
|				;float register.
|
| Set the condition codes according to result
|
	tstl		LOCAL_HI(%a0)	|check upper mantissa
	bnes		ck_sgn
	tstl		LOCAL_LO(%a0)	|check lower mantissa
	bnes		ck_sgn
	bsetb		#z_bit,FPSR_CC(%a6) |set condition codes if zero
ck_sgn:
	btstb		#sign_bit,LOCAL_EX(%a0)	|check the sign bit
	beqs		unf_done
	bsetb		#neg_bit,FPSR_CC(%a6)

|
| Finish.
|
unf_done:
	btstb		#inex2_bit,FPSR_EXCEPT(%a6)
	beqs		no_aunfl
	bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6)
no_aunfl:
	rts

	|end
Commit	Line	Data
1da177e4 LT	1	\|
	2	\| x_unfl.sa 3.4 7/1/91
	3	\|
	4	\| fpsp_unfl --- FPSP handler for underflow exception
	5	\|
	6	\| Trap disabled results
	7	\| For 881/2 compatibility, sw must denormalize the intermediate
	8	\| result, then store the result. Denormalization is accomplished
	9	\| by taking the intermediate result (which is always normalized) and
	10	\| shifting the mantissa right while incrementing the exponent until
	11	\| it is equal to the denormalized exponent for the destination
	12	\| format. After denormalization, the result is rounded to the
	13	\| destination format.
	14	\|
	15	\| Trap enabled results
	16	\| All trap disabled code applies. In addition the exceptional
	17	\| operand needs to made available to the user with a bias of $6000
	18	\| added to the exponent.
	19	\|
	20
	21	\| Copyright (C) Motorola, Inc. 1990
	22	\| All Rights Reserved
	23	\|
e00d82d0 MW	24	\| For details on the license for this file, please see the
e00d82d0 MW	25	\| file, README, in this same directory.
1da177e4 LT	26
	27	X_UNFL: \|idnt 2,1 \| Motorola 040 Floating Point Software Package
	28
	29	\|section 8
	30
	31	#include "fpsp.h"
	32
	33	\|xref denorm
	34	\|xref round
	35	\|xref store
	36	\|xref g_rndpr
	37	\|xref g_opcls
	38	\|xref g_dfmtou
	39	\|xref real_unfl
	40	\|xref real_inex
	41	\|xref fpsp_done
	42	\|xref b1238_fix
	43
	44	.global fpsp_unfl
	45	fpsp_unfl:
	46	link %a6,#-LOCAL_SIZE
	47	fsave -(%a7)
	48	moveml %d0-%d1/%a0-%a1,USER_DA(%a6)
	49	fmovemx %fp0-%fp3,USER_FP0(%a6)
	50	fmoveml %fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
	51
	52	\|
	53	bsrl unf_res \|denormalize, round & store interm op
	54	\|
	55	\| If underflow exceptions are not enabled, check for inexact
	56	\| exception
	57	\|
	58	btstb #unfl_bit,FPCR_ENABLE(%a6)
	59	beqs ck_inex
	60
	61	btstb #E3,E_BYTE(%a6)
	62	beqs no_e3_1
	63	\|
	64	\| Clear dirty bit on dest resister in the frame before branching
	65	\| to b1238_fix.
	66	\|
	67	bfextu CMDREG3B(%a6){#6:#3},%d0 \|get dest reg no
	68	bclrb %d0,FPR_DIRTY_BITS(%a6) \|clr dest dirty bit
	69	bsrl b1238_fix \|test for bug1238 case
	70	movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
	71	orl #sx_mask,E_BYTE(%a6)
	72	no_e3_1:
	73	moveml USER_DA(%a6),%d0-%d1/%a0-%a1
	74	fmovemx USER_FP0(%a6),%fp0-%fp3
	75	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
	76	frestore (%a7)+
	77	unlk %a6
	78	bral real_unfl
	79	\|
	80	\| It is possible to have either inex2 or inex1 exceptions with the
	81	\| unfl. If the inex enable bit is set in the FPCR, and either
	82	\| inex2 or inex1 occurred, we must clean up and branch to the
	83	\| real inex handler.
	84	\|
	85	ck_inex:
	86	moveb FPCR_ENABLE(%a6),%d0
	87	andb FPSR_EXCEPT(%a6),%d0
	88	andib #0x3,%d0
	89	beqs unfl_done
90
91	\|
92	\| Inexact enabled and reported, and we must take an inexact exception
93	\|
94	take_inex:
95	btstb #E3,E_BYTE(%a6)
96	beqs no_e3_2
97	\|
98	\| Clear dirty bit on dest resister in the frame before branching
99	\| to b1238_fix.
100	\|
101	bfextu CMDREG3B(%a6){#6:#3},%d0 \|get dest reg no
102	bclrb %d0,FPR_DIRTY_BITS(%a6) \|clr dest dirty bit
103	bsrl b1238_fix \|test for bug1238 case
104	movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
105	orl #sx_mask,E_BYTE(%a6)
106	no_e3_2:
107	moveb #INEX_VEC,EXC_VEC+1(%a6)
108	moveml USER_DA(%a6),%d0-%d1/%a0-%a1
109	fmovemx USER_FP0(%a6),%fp0-%fp3
110	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
111	frestore (%a7)+
112	unlk %a6
113	bral real_inex
114
115	unfl_done:
116	bclrb #E3,E_BYTE(%a6)
117	beqs e1_set \|if set then branch
118	\|
119	\| Clear dirty bit on dest resister in the frame before branching
120	\| to b1238_fix.
121	\|
122	bfextu CMDREG3B(%a6){#6:#3},%d0 \|get dest reg no
123	bclrb %d0,FPR_DIRTY_BITS(%a6) \|clr dest dirty bit
124	bsrl b1238_fix \|test for bug1238 case
125	movel USER_FPSR(%a6),FPSR_SHADOW(%a6)
126	orl #sx_mask,E_BYTE(%a6)
127	moveml USER_DA(%a6),%d0-%d1/%a0-%a1
128	fmovemx USER_FP0(%a6),%fp0-%fp3
129	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
130	frestore (%a7)+
131	unlk %a6
132	bral fpsp_done
133	e1_set:
134	moveml USER_DA(%a6),%d0-%d1/%a0-%a1
135	fmovemx USER_FP0(%a6),%fp0-%fp3
136	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
137	unlk %a6
138	bral fpsp_done
139	\|
140	\| unf_res --- underflow result calculation
141	\|
142	unf_res:
143	bsrl g_rndpr \|returns RND_PREC in d0 0=ext,
144	\| ;1=sgl, 2=dbl
145	\| ;we need the RND_PREC in the
146	\| ;upper word for round
147	movew #0,-(%a7)
148	movew %d0,-(%a7) \|copy RND_PREC to stack
149	\|
150	\|
151	\| If the exception bit set is E3, the exceptional operand from the
152	\| fpu is in WBTEMP; else it is in FPTEMP.
153	\|
154	btstb #E3,E_BYTE(%a6)
155	beqs unf_E1
156	unf_E3:
157	lea WBTEMP(%a6),%a0 \|a0 now points to operand
158	\|
159	\| Test for fsgldiv and fsglmul. If the inst was one of these, then
160	\| force the precision to extended for the denorm routine. Use
161	\| the user's precision for the round routine.
162	\|
163	movew CMDREG3B(%a6),%d1 \|check for fsgldiv or fsglmul
164	andiw #0x7f,%d1
165	cmpiw #0x30,%d1 \|check for sgldiv
166	beqs unf_sgl
167	cmpiw #0x33,%d1 \|check for sglmul
168	bnes unf_cont \|if not, use fpcr prec in round
169	unf_sgl:
170	clrl %d0
171	movew #0x1,(%a7) \|override g_rndpr precision
172	\| ;force single
173	bras unf_cont
174	unf_E1:
175	lea FPTEMP(%a6),%a0 \|a0 now points to operand
176	unf_cont:
177	bclrb #sign_bit,LOCAL_EX(%a0) \|clear sign bit
178	sne LOCAL_SGN(%a0) \|store sign
179
180	bsrl denorm \|returns denorm, a0 points to it
181	\|
182	\| WARNING:
183	\| ;d0 has guard,round sticky bit
184	\| ;make sure that it is not corrupted
185	\| ;before it reaches the round subroutine
186	\| ;also ensure that a0 isn't corrupted
187
188	\|
189	\| Set up d1 for round subroutine d1 contains the PREC/MODE
190	\| information respectively on upper/lower register halves.
191	\|
192	bfextu FPCR_MODE(%a6){#2:#2},%d1 \|get mode from FPCR
193	\| ;mode in lower d1
194	addl (%a7)+,%d1 \|merge PREC/MODE
195	\|
196	\| WARNING: a0 and d0 are assumed to be intact between the denorm and
197	\| round subroutines. All code between these two subroutines
198	\| must not corrupt a0 and d0.
199	\|
200	\|
201	\| Perform Round
202	\| Input: a0 points to input operand
203	\| d0{31:29} has guard, round, sticky
204	\| d1{01:00} has rounding mode
205	\| d1{17:16} has rounding precision
206	\| Output: a0 points to rounded operand
207	\|
208
209	bsrl round \|returns rounded denorm at (a0)
210	\|
211	\| Differentiate between store to memory vs. store to register
212	\|
213	unf_store:
214	bsrl g_opcls \|returns opclass in d0{2:0}
215	cmpib #0x3,%d0
216	bnes not_opc011
217	\|
218	\| At this point, a store to memory is pending
219	\|
220	opc011:
221	bsrl g_dfmtou
222	tstb %d0
223	beqs ext_opc011 \|If extended, do not subtract
224	\| ;If destination format is sgl/dbl,
225	tstb LOCAL_HI(%a0) \|If rounded result is normal,don't
226	\| ;subtract
227	bmis ext_opc011
228	subqw #1,LOCAL_EX(%a0) \|account for denorm bias vs.
229	\| ;normalized bias
230	\| ; normalized denormalized
231	\| ;single $7f $7e
232	\| ;double $3ff $3fe
233	\|
234	ext_opc011:
235	bsrl store \|stores to memory
236	bras unf_done \|finish up
237
238	\|
239	\| At this point, a store to a float register is pending
240	\|
241	not_opc011:
242	bsrl store \|stores to float register
243	\| ;a0 is not corrupted on a store to a
244	\| ;float register.
245	\|
246	\| Set the condition codes according to result
247	\|
248	tstl LOCAL_HI(%a0) \|check upper mantissa
249	bnes ck_sgn
250	tstl LOCAL_LO(%a0) \|check lower mantissa
251	bnes ck_sgn
252	bsetb #z_bit,FPSR_CC(%a6) \|set condition codes if zero
253	ck_sgn:
254	btstb #sign_bit,LOCAL_EX(%a0) \|check the sign bit
255	beqs unf_done
256	bsetb #neg_bit,FPSR_CC(%a6)
257
258	\|
259	\| Finish.
260	\|
261	unf_done:
262	btstb #inex2_bit,FPSR_EXCEPT(%a6)
263	beqs no_aunfl
264	bsetb #aunfl_bit,FPSR_AEXCEPT(%a6)
265	no_aunfl:
266	rts
267
268	\|end