[linux-2.6-block.git] / arch / arm / lib / lib1funcs.S

/*
 * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
 *
 * Author: Nicolas Pitre <nico@cam.org>
 *   - contributed to gcc-3.4 on Sep 30, 2003
 *   - adapted for the Linux kernel on Oct 2, 2003
 */

/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.

This file is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any
later version.

In addition to the permissions in the GNU General Public License, the
Free Software Foundation gives you unlimited permission to link the
compiled version of this file into combinations with other programs,
and to distribute those combinations without any restriction coming
from the use of this file.  (The General Public License restrictions
do apply in other respects; for example, they cover modification of
the file, and distribution when not linked into a combine
executable.)

This file is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.  If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.  */


#include <linux/linkage.h>
#include <asm/assembler.h>


.macro ARM_DIV_BODY dividend, divisor, result, curbit

#if __LINUX_ARM_ARCH__ >= 5

	clz	\curbit, \divisor
	clz	\result, \dividend
	sub	\result, \curbit, \result
	mov	\curbit, #1
	mov	\divisor, \divisor, lsl \result
	mov	\curbit, \curbit, lsl \result
	mov	\result, #0
	
#else

	@ Initially shift the divisor left 3 bits if possible,
	@ set curbit accordingly.  This allows for curbit to be located
	@ at the left end of each 4 bit nibbles in the division loop
	@ to save one loop in most cases.
	tst	\divisor, #0xe0000000
	moveq	\divisor, \divisor, lsl #3
	moveq	\curbit, #8
	movne	\curbit, #1

	@ Unless the divisor is very big, shift it up in multiples of
	@ four bits, since this is the amount of unwinding in the main
	@ division loop.  Continue shifting until the divisor is 
	@ larger than the dividend.
1:	cmp	\divisor, #0x10000000
	cmplo	\divisor, \dividend
	movlo	\divisor, \divisor, lsl #4
	movlo	\curbit, \curbit, lsl #4
	blo	1b

	@ For very big divisors, we must shift it a bit at a time, or
	@ we will be in danger of overflowing.
1:	cmp	\divisor, #0x80000000
	cmplo	\divisor, \dividend
	movlo	\divisor, \divisor, lsl #1
	movlo	\curbit, \curbit, lsl #1
	blo	1b

	mov	\result, #0

#endif

	@ Division loop
1:	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
	orrhs	\result,   \result,   \curbit
	cmp	\dividend, \divisor,  lsr #1
	subhs	\dividend, \dividend, \divisor, lsr #1
	orrhs	\result,   \result,   \curbit,  lsr #1
	cmp	\dividend, \divisor,  lsr #2
	subhs	\dividend, \dividend, \divisor, lsr #2
	orrhs	\result,   \result,   \curbit,  lsr #2
	cmp	\dividend, \divisor,  lsr #3
	subhs	\dividend, \dividend, \divisor, lsr #3
	orrhs	\result,   \result,   \curbit,  lsr #3
	cmp	\dividend, #0			@ Early termination?
	movnes	\curbit,   \curbit,  lsr #4	@ No, any more bits to do?
	movne	\divisor,  \divisor, lsr #4
	bne	1b

.endm


.macro ARM_DIV2_ORDER divisor, order

#if __LINUX_ARM_ARCH__ >= 5

	clz	\order, \divisor
	rsb	\order, \order, #31

#else

	cmp	\divisor, #(1 << 16)
	movhs	\divisor, \divisor, lsr #16
	movhs	\order, #16
	movlo	\order, #0

	cmp	\divisor, #(1 << 8)
	movhs	\divisor, \divisor, lsr #8
	addhs	\order, \order, #8

	cmp	\divisor, #(1 << 4)
	movhs	\divisor, \divisor, lsr #4
	addhs	\order, \order, #4

	cmp	\divisor, #(1 << 2)
	addhi	\order, \order, #3
	addls	\order, \order, \divisor, lsr #1

#endif

.endm


.macro ARM_MOD_BODY dividend, divisor, order, spare

#if __LINUX_ARM_ARCH__ >= 5

	clz	\order, \divisor
	clz	\spare, \dividend
	sub	\order, \order, \spare
	mov	\divisor, \divisor, lsl \order

#else

	mov	\order, #0

	@ Unless the divisor is very big, shift it up in multiples of
	@ four bits, since this is the amount of unwinding in the main
	@ division loop.  Continue shifting until the divisor is 
	@ larger than the dividend.
1:	cmp	\divisor, #0x10000000
	cmplo	\divisor, \dividend
	movlo	\divisor, \divisor, lsl #4
	addlo	\order, \order, #4
	blo	1b

	@ For very big divisors, we must shift it a bit at a time, or
	@ we will be in danger of overflowing.
1:	cmp	\divisor, #0x80000000
	cmplo	\divisor, \dividend
	movlo	\divisor, \divisor, lsl #1
	addlo	\order, \order, #1
	blo	1b

#endif

	@ Perform all needed substractions to keep only the reminder.
	@ Do comparisons in batch of 4 first.
	subs	\order, \order, #3		@ yes, 3 is intended here
	blt	2f

1:	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
	cmp	\dividend, \divisor,  lsr #1
	subhs	\dividend, \dividend, \divisor, lsr #1
	cmp	\dividend, \divisor,  lsr #2
	subhs	\dividend, \dividend, \divisor, lsr #2
	cmp	\dividend, \divisor,  lsr #3
	subhs	\dividend, \dividend, \divisor, lsr #3
	cmp	\dividend, #1
	mov	\divisor, \divisor, lsr #4
	subges	\order, \order, #4
	bge	1b

	tst	\order, #3
	teqne	\dividend, #0
	beq	5f

	@ Either 1, 2 or 3 comparison/substractions are left.
2:	cmn	\order, #2
	blt	4f
	beq	3f
	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
	mov	\divisor,  \divisor,  lsr #1
3:	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
	mov	\divisor,  \divisor,  lsr #1
4:	cmp	\dividend, \divisor
	subhs	\dividend, \dividend, \divisor
5:
.endm


ENTRY(__udivsi3)

	subs	r2, r1, #1
	moveq	pc, lr
	bcc	Ldiv0
	cmp	r0, r1
	bls	11f
	tst	r1, r2
	beq	12f

	ARM_DIV_BODY r0, r1, r2, r3

	mov	r0, r2
	mov	pc, lr

11:	moveq	r0, #1
	movne	r0, #0
	mov	pc, lr

12:	ARM_DIV2_ORDER r1, r2

	mov	r0, r0, lsr r2
	mov	pc, lr


ENTRY(__umodsi3)

	subs	r2, r1, #1			@ compare divisor with 1
	bcc	Ldiv0
	cmpne	r0, r1				@ compare dividend with divisor
	moveq   r0, #0
	tsthi	r1, r2				@ see if divisor is power of 2
	andeq	r0, r0, r2
	movls	pc, lr

	ARM_MOD_BODY r0, r1, r2, r3

	mov	pc, lr


ENTRY(__divsi3)

	cmp	r1, #0
	eor	ip, r0, r1			@ save the sign of the result.
	beq	Ldiv0
	rsbmi	r1, r1, #0			@ loops below use unsigned.
	subs	r2, r1, #1			@ division by 1 or -1 ?
	beq	10f
	movs	r3, r0
	rsbmi	r3, r0, #0			@ positive dividend value
	cmp	r3, r1
	bls	11f
	tst	r1, r2				@ divisor is power of 2 ?
	beq	12f

	ARM_DIV_BODY r3, r1, r0, r2

	cmp	ip, #0
	rsbmi	r0, r0, #0
	mov	pc, lr

10:	teq	ip, r0				@ same sign ?
	rsbmi	r0, r0, #0
	mov	pc, lr

11:	movlo	r0, #0
	moveq	r0, ip, asr #31
	orreq	r0, r0, #1
	mov	pc, lr

12:	ARM_DIV2_ORDER r1, r2

	cmp	ip, #0
	mov	r0, r3, lsr r2
	rsbmi	r0, r0, #0
	mov	pc, lr


ENTRY(__modsi3)

	cmp	r1, #0
	beq	Ldiv0
	rsbmi	r1, r1, #0			@ loops below use unsigned.
	movs	ip, r0				@ preserve sign of dividend
	rsbmi	r0, r0, #0			@ if negative make positive
	subs	r2, r1, #1			@ compare divisor with 1
	cmpne	r0, r1				@ compare dividend with divisor
	moveq	r0, #0
	tsthi	r1, r2				@ see if divisor is power of 2
	andeq	r0, r0, r2
	bls	10f

	ARM_MOD_BODY r0, r1, r2, r3

10:	cmp	ip, #0
	rsbmi	r0, r0, #0
	mov	pc, lr


Ldiv0:

	str	lr, [sp, #-4]!
	bl	__div0
	mov	r0, #0			@ About as wrong as it could be.
	ldr	pc, [sp], #4
Commit	Line	Data
1da177e4 LT	1	/*
	2	* linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines
	3	*
	4	* Author: Nicolas Pitre <nico@cam.org>
	5	* - contributed to gcc-3.4 on Sep 30, 2003
	6	* - adapted for the Linux kernel on Oct 2, 2003
	7	*/
	8
	9	/* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc.
	10
	11	This file is free software; you can redistribute it and/or modify it
	12	under the terms of the GNU General Public License as published by the
	13	Free Software Foundation; either version 2, or (at your option) any
	14	later version.
	15
	16	In addition to the permissions in the GNU General Public License, the
	17	Free Software Foundation gives you unlimited permission to link the
	18	compiled version of this file into combinations with other programs,
	19	and to distribute those combinations without any restriction coming
	20	from the use of this file. (The General Public License restrictions
	21	do apply in other respects; for example, they cover modification of
	22	the file, and distribution when not linked into a combine
	23	executable.)
	24
	25	This file is distributed in the hope that it will be useful, but
	26	WITHOUT ANY WARRANTY; without even the implied warranty of
	27	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	28	General Public License for more details.
	29
	30	You should have received a copy of the GNU General Public License
	31	along with this program; see the file COPYING. If not, write to
	32	the Free Software Foundation, 59 Temple Place - Suite 330,
	33	Boston, MA 02111-1307, USA. */
	34
	35
	36	#include <linux/linkage.h>
	37	#include <asm/assembler.h>
	38
	39
	40	.macro ARM_DIV_BODY dividend, divisor, result, curbit
	41
	42	#if __LINUX_ARM_ARCH__ >= 5
	43
	44	clz \curbit, \divisor
	45	clz \result, \dividend
	46	sub \result, \curbit, \result
	47	mov \curbit, #1
	48	mov \divisor, \divisor, lsl \result
	49	mov \curbit, \curbit, lsl \result
	50	mov \result, #0
	51
	52	#else
	53
	54	@ Initially shift the divisor left 3 bits if possible,
	55	@ set curbit accordingly. This allows for curbit to be located
	56	@ at the left end of each 4 bit nibbles in the division loop
	57	@ to save one loop in most cases.
	58	tst \divisor, #0xe0000000
	59	moveq \divisor, \divisor, lsl #3
	60	moveq \curbit, #8
	61	movne \curbit, #1
	62
	63	@ Unless the divisor is very big, shift it up in multiples of
	64	@ four bits, since this is the amount of unwinding in the main
65	@ division loop. Continue shifting until the divisor is
66	@ larger than the dividend.
67	1: cmp \divisor, #0x10000000
68	cmplo \divisor, \dividend
69	movlo \divisor, \divisor, lsl #4
70	movlo \curbit, \curbit, lsl #4
71	blo 1b
72
73	@ For very big divisors, we must shift it a bit at a time, or
74	@ we will be in danger of overflowing.
75	1: cmp \divisor, #0x80000000
76	cmplo \divisor, \dividend
77	movlo \divisor, \divisor, lsl #1
78	movlo \curbit, \curbit, lsl #1
79	blo 1b
80
81	mov \result, #0
82
83	#endif
84
85	@ Division loop
86	1: cmp \dividend, \divisor
87	subhs \dividend, \dividend, \divisor
88	orrhs \result, \result, \curbit
89	cmp \dividend, \divisor, lsr #1
90	subhs \dividend, \dividend, \divisor, lsr #1
91	orrhs \result, \result, \curbit, lsr #1
92	cmp \dividend, \divisor, lsr #2
93	subhs \dividend, \dividend, \divisor, lsr #2
94	orrhs \result, \result, \curbit, lsr #2
95	cmp \dividend, \divisor, lsr #3
96	subhs \dividend, \dividend, \divisor, lsr #3
97	orrhs \result, \result, \curbit, lsr #3
98	cmp \dividend, #0 @ Early termination?
99	movnes \curbit, \curbit, lsr #4 @ No, any more bits to do?
100	movne \divisor, \divisor, lsr #4
101	bne 1b
102
103	.endm
104
105
106	.macro ARM_DIV2_ORDER divisor, order
107
108	#if __LINUX_ARM_ARCH__ >= 5
109
110	clz \order, \divisor
111	rsb \order, \order, #31
112
113	#else
114
115	cmp \divisor, #(1 << 16)
116	movhs \divisor, \divisor, lsr #16
117	movhs \order, #16
118	movlo \order, #0
119
120	cmp \divisor, #(1 << 8)
121	movhs \divisor, \divisor, lsr #8
122	addhs \order, \order, #8
123
124	cmp \divisor, #(1 << 4)
125	movhs \divisor, \divisor, lsr #4
126	addhs \order, \order, #4
127
128	cmp \divisor, #(1 << 2)
129	addhi \order, \order, #3
130	addls \order, \order, \divisor, lsr #1
131
132	#endif
133
134	.endm
135
136
137	.macro ARM_MOD_BODY dividend, divisor, order, spare
138
139	#if __LINUX_ARM_ARCH__ >= 5
140
141	clz \order, \divisor
142	clz \spare, \dividend
143	sub \order, \order, \spare
144	mov \divisor, \divisor, lsl \order
145
146	#else
147
148	mov \order, #0
149
150	@ Unless the divisor is very big, shift it up in multiples of
151	@ four bits, since this is the amount of unwinding in the main
152	@ division loop. Continue shifting until the divisor is
153	@ larger than the dividend.
154	1: cmp \divisor, #0x10000000
155	cmplo \divisor, \dividend
156	movlo \divisor, \divisor, lsl #4
157	addlo \order, \order, #4
158	blo 1b
159
160	@ For very big divisors, we must shift it a bit at a time, or
161	@ we will be in danger of overflowing.
162	1: cmp \divisor, #0x80000000
163	cmplo \divisor, \dividend
164	movlo \divisor, \divisor, lsl #1
165	addlo \order, \order, #1
166	blo 1b
167
168	#endif
169
170	@ Perform all needed substractions to keep only the reminder.
171	@ Do comparisons in batch of 4 first.
172	subs \order, \order, #3 @ yes, 3 is intended here
173	blt 2f
174
175	1: cmp \dividend, \divisor
176	subhs \dividend, \dividend, \divisor
177	cmp \dividend, \divisor, lsr #1
178	subhs \dividend, \dividend, \divisor, lsr #1
179	cmp \dividend, \divisor, lsr #2
180	subhs \dividend, \dividend, \divisor, lsr #2
181	cmp \dividend, \divisor, lsr #3
182	subhs \dividend, \dividend, \divisor, lsr #3
183	cmp \dividend, #1
184	mov \divisor, \divisor, lsr #4
185	subges \order, \order, #4
186	bge 1b
187
188	tst \order, #3
189	teqne \dividend, #0
190	beq 5f
191
192	@ Either 1, 2 or 3 comparison/substractions are left.
193	2: cmn \order, #2
194	blt 4f
195	beq 3f
196	cmp \dividend, \divisor
197	subhs \dividend, \dividend, \divisor
198	mov \divisor, \divisor, lsr #1
199	3: cmp \dividend, \divisor
200	subhs \dividend, \dividend, \divisor
201	mov \divisor, \divisor, lsr #1
202	4: cmp \dividend, \divisor
203	subhs \dividend, \dividend, \divisor
204	5:
205	.endm
206
207
208	ENTRY(__udivsi3)
209
210	subs r2, r1, #1
211	moveq pc, lr
212	bcc Ldiv0
213	cmp r0, r1
214	bls 11f
215	tst r1, r2
216	beq 12f
217
218	ARM_DIV_BODY r0, r1, r2, r3
219
220	mov r0, r2
221	mov pc, lr
222
223	11: moveq r0, #1
224	movne r0, #0
225	mov pc, lr
226
227	12: ARM_DIV2_ORDER r1, r2
228
229	mov r0, r0, lsr r2
230	mov pc, lr
231
232
233	ENTRY(__umodsi3)
234
235	subs r2, r1, #1 @ compare divisor with 1
236	bcc Ldiv0
237	cmpne r0, r1 @ compare dividend with divisor
238	moveq r0, #0
239	tsthi r1, r2 @ see if divisor is power of 2
240	andeq r0, r0, r2
241	movls pc, lr
242
243	ARM_MOD_BODY r0, r1, r2, r3
244
245	mov pc, lr
246
247
248	ENTRY(__divsi3)
249
250	cmp r1, #0
251	eor ip, r0, r1 @ save the sign of the result.
252	beq Ldiv0
253	rsbmi r1, r1, #0 @ loops below use unsigned.
254	subs r2, r1, #1 @ division by 1 or -1 ?
255	beq 10f
256	movs r3, r0
257	rsbmi r3, r0, #0 @ positive dividend value
258	cmp r3, r1
259	bls 11f
260	tst r1, r2 @ divisor is power of 2 ?
261	beq 12f
262
263	ARM_DIV_BODY r3, r1, r0, r2
264
265	cmp ip, #0
266	rsbmi r0, r0, #0
267	mov pc, lr
268
269	10: teq ip, r0 @ same sign ?
270	rsbmi r0, r0, #0
271	mov pc, lr
272
273	11: movlo r0, #0
274	moveq r0, ip, asr #31
275	orreq r0, r0, #1
276	mov pc, lr
277
278	12: ARM_DIV2_ORDER r1, r2
279
280	cmp ip, #0
281	mov r0, r3, lsr r2
282	rsbmi r0, r0, #0
283	mov pc, lr
284
285
286	ENTRY(__modsi3)
287
288	cmp r1, #0
289	beq Ldiv0
290	rsbmi r1, r1, #0 @ loops below use unsigned.
291	movs ip, r0 @ preserve sign of dividend
292	rsbmi r0, r0, #0 @ if negative make positive
293	subs r2, r1, #1 @ compare divisor with 1
294	cmpne r0, r1 @ compare dividend with divisor
295	moveq r0, #0
296	tsthi r1, r2 @ see if divisor is power of 2
297	andeq r0, r0, r2
298	bls 10f
299
300	ARM_MOD_BODY r0, r1, r2, r3
301
302	10: cmp ip, #0
303	rsbmi r0, r0, #0
304	mov pc, lr
305
306
307	Ldiv0:
308
309	str lr, [sp, #-4]!
310	bl __div0
311	mov r0, #0 @ About as wrong as it could be.
312	ldr pc, [sp], #4
313
314