[linux-2.6-block.git] / arch / arm / lib / div64.S

/*
 *  linux/arch/arm/lib/div64.S
 *
 *  Optimized computation of 64-bit dividend / 32-bit divisor
 *
 *  Author:	Nicolas Pitre
 *  Created:	Oct 5, 2003
 *  Copyright:	Monta Vista Software, Inc.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License version 2 as
 *  published by the Free Software Foundation.
 */

#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unwind.h>

#ifdef __ARMEB__
#define xh r0
#define xl r1
#define yh r2
#define yl r3
#else
#define xl r0
#define xh r1
#define yl r2
#define yh r3
#endif

/*
 * __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
 *
 * Note: Calling convention is totally non standard for optimal code.
 *       This is meant to be used by do_div() from include/asm/div64.h only.
 *
 * Input parameters:
 * 	xh-xl	= dividend (clobbered)
 * 	r4	= divisor (preserved)
 *
 * Output values:
 * 	yh-yl	= result
 * 	xh	= remainder
 *
 * Clobbered regs: xl, ip
 */

ENTRY(__do_div64)
UNWIND(.fnstart)

	@ Test for easy paths first.
	subs	ip, r4, #1
	bls	9f			@ divisor is 0 or 1
	tst	ip, r4
	beq	8f			@ divisor is power of 2

	@ See if we need to handle upper 32-bit result.
	cmp	xh, r4
	mov	yh, #0
	blo	3f

	@ Align divisor with upper part of dividend.
	@ The aligned divisor is stored in yl preserving the original.
	@ The bit position is stored in ip.

#if __LINUX_ARM_ARCH__ >= 5

	clz	yl, r4
	clz	ip, xh
	sub	yl, yl, ip
	mov	ip, #1
	mov	ip, ip, lsl yl
	mov	yl, r4, lsl yl

#else

	mov	yl, r4
	mov	ip, #1
1:	cmp	yl, #0x80000000
	cmpcc	yl, xh
	movcc	yl, yl, lsl #1
	movcc	ip, ip, lsl #1
	bcc	1b

#endif

	@ The division loop for needed upper bit positions.
 	@ Break out early if dividend reaches 0.
2:	cmp	xh, yl
	orrcs	yh, yh, ip
	subcss	xh, xh, yl
	movnes	ip, ip, lsr #1
	mov	yl, yl, lsr #1
	bne	2b

	@ See if we need to handle lower 32-bit result.
3:	cmp	xh, #0
	mov	yl, #0
	cmpeq	xl, r4
	movlo	xh, xl
	retlo	lr

	@ The division loop for lower bit positions.
	@ Here we shift remainer bits leftwards rather than moving the
	@ divisor for comparisons, considering the carry-out bit as well.
	mov	ip, #0x80000000
4:	movs	xl, xl, lsl #1
	adcs	xh, xh, xh
	beq	6f
	cmpcc	xh, r4
5:	orrcs	yl, yl, ip
	subcs	xh, xh, r4
	movs	ip, ip, lsr #1
	bne	4b
	ret	lr

	@ The top part of remainder became zero.  If carry is set
	@ (the 33th bit) this is a false positive so resume the loop.
	@ Otherwise, if lower part is also null then we are done.
6:	bcs	5b
	cmp	xl, #0
	reteq	lr

	@ We still have remainer bits in the low part.  Bring them up.

#if __LINUX_ARM_ARCH__ >= 5

	clz	xh, xl			@ we know xh is zero here so...
	add	xh, xh, #1
	mov	xl, xl, lsl xh
	mov	ip, ip, lsr xh

#else

7:	movs	xl, xl, lsl #1
	mov	ip, ip, lsr #1
	bcc	7b

#endif

	@ Current remainder is now 1.  It is worthless to compare with
	@ divisor at this point since divisor can not be smaller than 3 here.
	@ If possible, branch for another shift in the division loop.
	@ If no bit position left then we are done.
	movs	ip, ip, lsr #1
	mov	xh, #1
	bne	4b
	ret	lr

8:	@ Division by a power of 2: determine what that divisor order is
	@ then simply shift values around

#if __LINUX_ARM_ARCH__ >= 5

	clz	ip, r4
	rsb	ip, ip, #31

#else

	mov	yl, r4
	cmp	r4, #(1 << 16)
	mov	ip, #0
	movhs	yl, yl, lsr #16
	movhs	ip, #16

	cmp	yl, #(1 << 8)
	movhs	yl, yl, lsr #8
	addhs	ip, ip, #8

	cmp	yl, #(1 << 4)
	movhs	yl, yl, lsr #4
	addhs	ip, ip, #4

	cmp	yl, #(1 << 2)
	addhi	ip, ip, #3
	addls	ip, ip, yl, lsr #1

#endif

	mov	yh, xh, lsr ip
	mov	yl, xl, lsr ip
	rsb	ip, ip, #32
 ARM(	orr	yl, yl, xh, lsl ip	)
 THUMB(	lsl	xh, xh, ip		)
 THUMB(	orr	yl, yl, xh		)
	mov	xh, xl, lsl ip
	mov	xh, xh, lsr ip
	ret	lr

	@ eq -> division by 1: obvious enough...
9:	moveq	yl, xl
	moveq	yh, xh
	moveq	xh, #0
	reteq	lr
UNWIND(.fnend)

UNWIND(.fnstart)
UNWIND(.pad #4)
UNWIND(.save {lr})
Ldiv0_64:
	@ Division by 0:
	str	lr, [sp, #-8]!
	bl	__div0

	@ as wrong as it could be...
	mov	yl, #0
	mov	yh, #0
	mov	xh, #0
	ldr	pc, [sp], #8

UNWIND(.fnend)
ENDPROC(__do_div64)
Commit	Line	Data
1da177e4 LT	1	/*
	2	* linux/arch/arm/lib/div64.S
	3	*
	4	* Optimized computation of 64-bit dividend / 32-bit divisor
	5	*
	6	* Author: Nicolas Pitre
	7	* Created: Oct 5, 2003
	8	* Copyright: Monta Vista Software, Inc.
	9	*
	10	* This program is free software; you can redistribute it and/or modify
	11	* it under the terms of the GNU General Public License version 2 as
	12	* published by the Free Software Foundation.
	13	*/
	14
	15	#include <linux/linkage.h>
6ebbf2ce	16	#include <asm/assembler.h>
01885bc5	17	#include <asm/unwind.h>
1da177e4 LT	18
	19	#ifdef __ARMEB__
	20	#define xh r0
	21	#define xl r1
	22	#define yh r2
	23	#define yl r3
	24	#else
	25	#define xl r0
	26	#define xh r1
	27	#define yl r2
	28	#define yh r3
	29	#endif
	30
	31	/*
	32	* __do_div64: perform a division with 64-bit dividend and 32-bit divisor.
	33	*
	34	* Note: Calling convention is totally non standard for optimal code.
	35	* This is meant to be used by do_div() from include/asm/div64.h only.
	36	*
	37	* Input parameters:
	38	* xh-xl = dividend (clobbered)
	39	* r4 = divisor (preserved)
	40	*
	41	* Output values:
	42	* yh-yl = result
	43	* xh = remainder
	44	*
	45	* Clobbered regs: xl, ip
	46	*/
	47
	48	ENTRY(__do_div64)
01885bc5	49	UNWIND(.fnstart)
1da177e4 LT	50
	51	@ Test for easy paths first.
	52	subs ip, r4, #1
	53	bls 9f @ divisor is 0 or 1
	54	tst ip, r4
	55	beq 8f @ divisor is power of 2
	56
	57	@ See if we need to handle upper 32-bit result.
	58	cmp xh, r4
	59	mov yh, #0
	60	blo 3f
	61
	62	@ Align divisor with upper part of dividend.
	63	@ The aligned divisor is stored in yl preserving the original.
	64	@ The bit position is stored in ip.
	65
	66	#if __LINUX_ARM_ARCH__ >= 5
	67
	68	clz yl, r4
	69	clz ip, xh
	70	sub yl, yl, ip
	71	mov ip, #1
	72	mov ip, ip, lsl yl
	73	mov yl, r4, lsl yl
	74
	75	#else
	76
	77	mov yl, r4
	78	mov ip, #1
	79	1: cmp yl, #0x80000000
	80	cmpcc yl, xh
	81	movcc yl, yl, lsl #1
	82	movcc ip, ip, lsl #1
	83	bcc 1b
	84
	85	#endif
	86
	87	@ The division loop for needed upper bit positions.
	88	@ Break out early if dividend reaches 0.
	89	2: cmp xh, yl
	90	orrcs yh, yh, ip
	91	subcss xh, xh, yl
	92	movnes ip, ip, lsr #1
	93	mov yl, yl, lsr #1
	94	bne 2b
	95
	96	@ See if we need to handle lower 32-bit result.
	97	3: cmp xh, #0
	98	mov yl, #0
	99	cmpeq xl, r4
	100	movlo xh, xl
6ebbf2ce	101	retlo lr
1da177e4 LT	102
	103	@ The division loop for lower bit positions.
	104	@ Here we shift remainer bits leftwards rather than moving the
	105	@ divisor for comparisons, considering the carry-out bit as well.
	106	mov ip, #0x80000000
	107	4: movs xl, xl, lsl #1
	108	adcs xh, xh, xh
	109	beq 6f
	110	cmpcc xh, r4
	111	5: orrcs yl, yl, ip
	112	subcs xh, xh, r4
	113	movs ip, ip, lsr #1
	114	bne 4b
6ebbf2ce	115	ret lr
1da177e4 LT	116
	117	@ The top part of remainder became zero. If carry is set
	118	@ (the 33th bit) this is a false positive so resume the loop.
	119	@ Otherwise, if lower part is also null then we are done.
	120	6: bcs 5b
	121	cmp xl, #0
6ebbf2ce	122	reteq lr
1da177e4 LT	123
	124	@ We still have remainer bits in the low part. Bring them up.
	125
	126	#if __LINUX_ARM_ARCH__ >= 5
	127
	128	clz xh, xl @ we know xh is zero here so...
	129	add xh, xh, #1
	130	mov xl, xl, lsl xh
	131	mov ip, ip, lsr xh
	132
	133	#else
	134
	135	7: movs xl, xl, lsl #1
	136	mov ip, ip, lsr #1
	137	bcc 7b
	138
	139	#endif
	140
	141	@ Current remainder is now 1. It is worthless to compare with
	142	@ divisor at this point since divisor can not be smaller than 3 here.
	143	@ If possible, branch for another shift in the division loop.
	144	@ If no bit position left then we are done.
	145	movs ip, ip, lsr #1
	146	mov xh, #1
	147	bne 4b
6ebbf2ce	148	ret lr
1da177e4 LT	149
	150	8: @ Division by a power of 2: determine what that divisor order is
	151	@ then simply shift values around
	152
	153	#if __LINUX_ARM_ARCH__ >= 5
	154
	155	clz ip, r4
	156	rsb ip, ip, #31
	157
	158	#else
	159
	160	mov yl, r4
	161	cmp r4, #(1 << 16)
	162	mov ip, #0
	163	movhs yl, yl, lsr #16
	164	movhs ip, #16
	165
	166	cmp yl, #(1 << 8)
	167	movhs yl, yl, lsr #8
	168	addhs ip, ip, #8
	169
	170	cmp yl, #(1 << 4)
	171	movhs yl, yl, lsr #4
	172	addhs ip, ip, #4
	173
	174	cmp yl, #(1 << 2)
	175	addhi ip, ip, #3
	176	addls ip, ip, yl, lsr #1
	177
	178	#endif
	179
	180	mov yh, xh, lsr ip
	181	mov yl, xl, lsr ip
	182	rsb ip, ip, #32
8b592783 CM	183	ARM( orr yl, yl, xh, lsl ip )
	184	THUMB( lsl xh, xh, ip )
	185	THUMB( orr yl, yl, xh )
1da177e4 LT	186	mov xh, xl, lsl ip
1da177e4 LT	187	mov xh, xh, lsr ip
6ebbf2ce	188	ret lr
1da177e4 LT	189
	190	@ eq -> division by 1: obvious enough...
	191	9: moveq yl, xl
	192	moveq yh, xh
	193	moveq xh, #0
6ebbf2ce	194	reteq lr
01885bc5	195	UNWIND(.fnend)
1da177e4	196
01885bc5 LA	197	UNWIND(.fnstart)
	198	UNWIND(.pad #4)
	199	UNWIND(.save {lr})
	200	Ldiv0_64:
1da177e4	201	@ Division by 0:
1d6760a3	202	str lr, [sp, #-8]!
1da177e4 LT	203	bl __div0
	204
	205	@ as wrong as it could be...
	206	mov yl, #0
	207	mov yh, #0
	208	mov xh, #0
1d6760a3	209	ldr pc, [sp], #8
1da177e4	210
01885bc5	211	UNWIND(.fnend)
93ed3970	212	ENDPROC(__do_div64)