[linux-block.git] / arch / c6x / lib / divu.S

;; SPDX-License-Identifier: GPL-2.0-or-later
;;  Copyright 2010  Free Software Foundation, Inc.
;;  Contributed by Bernd Schmidt <bernds@codesourcery.com>.
;;

#include <linux/linkage.h>

	;; ABI considerations for the divide functions
	;; The following registers are call-used:
	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
	;;
	;; In our implementation, divu and remu are leaf functions,
	;; while both divi and remi call into divu.
	;; A0 is not clobbered by any of the functions.
	;; divu does not clobber B2 either, which is taken advantage of
	;; in remi.
	;; divi uses B5 to hold the original return address during
	;; the call to divu.
	;; remi uses B2 and A5 to hold the input values during the
	;; call to divu.  It stores B3 in on the stack.

	.text
ENTRY(__c6xabi_divu)
	;; We use a series of up to 31 subc instructions.  First, we find
	;; out how many leading zero bits there are in the divisor.  This
	;; gives us both a shift count for aligning (shifting) the divisor
	;; to the, and the number of times we have to execute subc.

	;; At the end, we have both the remainder and most of the quotient
	;; in A4.  The top bit of the quotient is computed first and is
	;; placed in A2.

	;; Return immediately if the dividend is zero.
	 mv	.s2x	A4, B1
   [B1]	 lmbd	.l2	1, B4, B1
|| [!B1] b	.s2	B3	; RETURN A
|| [!B1] mvk	.d2	1, B4
	 mv	.l1x	B1, A6
||	 shl	.s2	B4, B1, B4

	;; The loop performs a maximum of 28 steps, so we do the
	;; first 3 here.
	 cmpltu	.l1x	A4, B4, A2
   [!A2] sub	.l1x	A4, B4, A4
||	 shru	.s2	B4, 1, B4
||	 xor	.s1	1, A2, A2

	 shl	.s1	A2, 31, A2
|| [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1

	;; RETURN A may happen here (note: must happen before the next branch)
_divu_loop:
	 cmpgt	.l2	B1, 7, B0
|| [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
|| [B0]  b	.s1	_divu_loop
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
   [B1]	 subc	.l1x	A4,B4,A4
|| [B1]	 add	.s2	-1, B1, B1
	;; loop backwards branch happens here

	 ret	.s2	B3
||	 mvk	.s1	32, A1
	 sub	.l1	A1, A6, A6
	 shl	.s1	A4, A6, A4
	 shru	.s1	A4, 1, A4
||	 sub	.l1	A6, 1, A6
	 or	.l1	A2, A4, A4
	 shru	.s1	A4, A6, A4
	 nop
ENDPROC(__c6xabi_divu)
Commit	Line	Data
1a59d1b8	1	;; SPDX-License-Identifier: GPL-2.0-or-later
09831ca7 AJ	2	;; Copyright 2010 Free Software Foundation, Inc.
	3	;; Contributed by Bernd Schmidt <bernds@codesourcery.com>.
	4	;;
09831ca7 AJ	5
	6	#include <linux/linkage.h>
	7
	8	;; ABI considerations for the divide functions
	9	;; The following registers are call-used:
	10	;; __c6xabi_divi A0,A1,A2,A4,A6,B0,B1,B2,B4,B5
	11	;; __c6xabi_divu A0,A1,A2,A4,A6,B0,B1,B2,B4
	12	;; __c6xabi_remi A1,A2,A4,A5,A6,B0,B1,B2,B4
	13	;; __c6xabi_remu A1,A4,A5,A7,B0,B1,B2,B4
	14	;;
	15	;; In our implementation, divu and remu are leaf functions,
	16	;; while both divi and remi call into divu.
	17	;; A0 is not clobbered by any of the functions.
	18	;; divu does not clobber B2 either, which is taken advantage of
	19	;; in remi.
	20	;; divi uses B5 to hold the original return address during
	21	;; the call to divu.
	22	;; remi uses B2 and A5 to hold the input values during the
	23	;; call to divu. It stores B3 in on the stack.
	24
	25	.text
	26	ENTRY(__c6xabi_divu)
	27	;; We use a series of up to 31 subc instructions. First, we find
	28	;; out how many leading zero bits there are in the divisor. This
	29	;; gives us both a shift count for aligning (shifting) the divisor
	30	;; to the, and the number of times we have to execute subc.
	31
	32	;; At the end, we have both the remainder and most of the quotient
	33	;; in A4. The top bit of the quotient is computed first and is
	34	;; placed in A2.
	35
	36	;; Return immediately if the dividend is zero.
	37	mv .s2x A4, B1
	38	[B1] lmbd .l2 1, B4, B1
	39	\|\| [!B1] b .s2 B3 ; RETURN A
	40	\|\| [!B1] mvk .d2 1, B4
	41	mv .l1x B1, A6
	42	\|\| shl .s2 B4, B1, B4
	43
	44	;; The loop performs a maximum of 28 steps, so we do the
	45	;; first 3 here.
	46	cmpltu .l1x A4, B4, A2
	47	[!A2] sub .l1x A4, B4, A4
	48	\|\| shru .s2 B4, 1, B4
	49	\|\| xor .s1 1, A2, A2
	50
	51	shl .s1 A2, 31, A2
	52	\|\| [B1] subc .l1x A4,B4,A4
	53	\|\| [B1] add .s2 -1, B1, B1
	54	[B1] subc .l1x A4,B4,A4
	55	\|\| [B1] add .s2 -1, B1, B1
	56
	57	;; RETURN A may happen here (note: must happen before the next branch)
	58	_divu_loop:
	59	cmpgt .l2 B1, 7, B0
	60	\|\| [B1] subc .l1x A4,B4,A4
	61	\|\| [B1] add .s2 -1, B1, B1
	62	[B1] subc .l1x A4,B4,A4
	63	\|\| [B1] add .s2 -1, B1, B1
	64	\|\| [B0] b .s1 _divu_loop
	65	[B1] subc .l1x A4,B4,A4
	66	\|\| [B1] add .s2 -1, B1, B1
	67	[B1] subc .l1x A4,B4,A4
	68	\|\| [B1] add .s2 -1, B1, B1
69	[B1] subc .l1x A4,B4,A4
70	\|\| [B1] add .s2 -1, B1, B1
71	[B1] subc .l1x A4,B4,A4
72	\|\| [B1] add .s2 -1, B1, B1
73	[B1] subc .l1x A4,B4,A4
74	\|\| [B1] add .s2 -1, B1, B1
75	;; loop backwards branch happens here
76
77	ret .s2 B3
78	\|\| mvk .s1 32, A1
79	sub .l1 A1, A6, A6
80	shl .s1 A4, A6, A4
81	shru .s1 A4, 1, A4
82	\|\| sub .l1 A6, 1, A6
83	or .l1 A2, A4, A4
84	shru .s1 A4, A6, A4
85	nop
86	ENDPROC(__c6xabi_divu)