Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines | |
3 | * | |
4 | * Author: Nicolas Pitre <nico@cam.org> | |
5 | * - contributed to gcc-3.4 on Sep 30, 2003 | |
6 | * - adapted for the Linux kernel on Oct 2, 2003 | |
7 | */ | |
8 | ||
9 | /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. | |
10 | ||
11 | This file is free software; you can redistribute it and/or modify it | |
12 | under the terms of the GNU General Public License as published by the | |
13 | Free Software Foundation; either version 2, or (at your option) any | |
14 | later version. | |
15 | ||
16 | In addition to the permissions in the GNU General Public License, the | |
17 | Free Software Foundation gives you unlimited permission to link the | |
18 | compiled version of this file into combinations with other programs, | |
19 | and to distribute those combinations without any restriction coming | |
20 | from the use of this file. (The General Public License restrictions | |
21 | do apply in other respects; for example, they cover modification of | |
22 | the file, and distribution when not linked into a combine | |
23 | executable.) | |
24 | ||
25 | This file is distributed in the hope that it will be useful, but | |
26 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
27 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
28 | General Public License for more details. | |
29 | ||
30 | You should have received a copy of the GNU General Public License | |
31 | along with this program; see the file COPYING. If not, write to | |
32 | the Free Software Foundation, 59 Temple Place - Suite 330, | |
33 | Boston, MA 02111-1307, USA. */ | |
34 | ||
35 | ||
36 | #include <linux/linkage.h> | |
37 | #include <asm/assembler.h> | |
38 | ||
39 | ||
40 | .macro ARM_DIV_BODY dividend, divisor, result, curbit | |
41 | ||
42 | #if __LINUX_ARM_ARCH__ >= 5 | |
43 | ||
44 | clz \curbit, \divisor | |
45 | clz \result, \dividend | |
46 | sub \result, \curbit, \result | |
47 | mov \curbit, #1 | |
48 | mov \divisor, \divisor, lsl \result | |
49 | mov \curbit, \curbit, lsl \result | |
50 | mov \result, #0 | |
51 | ||
52 | #else | |
53 | ||
54 | @ Initially shift the divisor left 3 bits if possible, | |
55 | @ set curbit accordingly. This allows for curbit to be located | |
56 | @ at the left end of each 4 bit nibbles in the division loop | |
57 | @ to save one loop in most cases. | |
58 | tst \divisor, #0xe0000000 | |
59 | moveq \divisor, \divisor, lsl #3 | |
60 | moveq \curbit, #8 | |
61 | movne \curbit, #1 | |
62 | ||
63 | @ Unless the divisor is very big, shift it up in multiples of | |
64 | @ four bits, since this is the amount of unwinding in the main | |
65 | @ division loop. Continue shifting until the divisor is | |
66 | @ larger than the dividend. | |
67 | 1: cmp \divisor, #0x10000000 | |
68 | cmplo \divisor, \dividend | |
69 | movlo \divisor, \divisor, lsl #4 | |
70 | movlo \curbit, \curbit, lsl #4 | |
71 | blo 1b | |
72 | ||
73 | @ For very big divisors, we must shift it a bit at a time, or | |
74 | @ we will be in danger of overflowing. | |
75 | 1: cmp \divisor, #0x80000000 | |
76 | cmplo \divisor, \dividend | |
77 | movlo \divisor, \divisor, lsl #1 | |
78 | movlo \curbit, \curbit, lsl #1 | |
79 | blo 1b | |
80 | ||
81 | mov \result, #0 | |
82 | ||
83 | #endif | |
84 | ||
85 | @ Division loop | |
86 | 1: cmp \dividend, \divisor | |
87 | subhs \dividend, \dividend, \divisor | |
88 | orrhs \result, \result, \curbit | |
89 | cmp \dividend, \divisor, lsr #1 | |
90 | subhs \dividend, \dividend, \divisor, lsr #1 | |
91 | orrhs \result, \result, \curbit, lsr #1 | |
92 | cmp \dividend, \divisor, lsr #2 | |
93 | subhs \dividend, \dividend, \divisor, lsr #2 | |
94 | orrhs \result, \result, \curbit, lsr #2 | |
95 | cmp \dividend, \divisor, lsr #3 | |
96 | subhs \dividend, \dividend, \divisor, lsr #3 | |
97 | orrhs \result, \result, \curbit, lsr #3 | |
98 | cmp \dividend, #0 @ Early termination? | |
99 | movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? | |
100 | movne \divisor, \divisor, lsr #4 | |
101 | bne 1b | |
102 | ||
103 | .endm | |
104 | ||
105 | ||
106 | .macro ARM_DIV2_ORDER divisor, order | |
107 | ||
108 | #if __LINUX_ARM_ARCH__ >= 5 | |
109 | ||
110 | clz \order, \divisor | |
111 | rsb \order, \order, #31 | |
112 | ||
113 | #else | |
114 | ||
115 | cmp \divisor, #(1 << 16) | |
116 | movhs \divisor, \divisor, lsr #16 | |
117 | movhs \order, #16 | |
118 | movlo \order, #0 | |
119 | ||
120 | cmp \divisor, #(1 << 8) | |
121 | movhs \divisor, \divisor, lsr #8 | |
122 | addhs \order, \order, #8 | |
123 | ||
124 | cmp \divisor, #(1 << 4) | |
125 | movhs \divisor, \divisor, lsr #4 | |
126 | addhs \order, \order, #4 | |
127 | ||
128 | cmp \divisor, #(1 << 2) | |
129 | addhi \order, \order, #3 | |
130 | addls \order, \order, \divisor, lsr #1 | |
131 | ||
132 | #endif | |
133 | ||
134 | .endm | |
135 | ||
136 | ||
137 | .macro ARM_MOD_BODY dividend, divisor, order, spare | |
138 | ||
139 | #if __LINUX_ARM_ARCH__ >= 5 | |
140 | ||
141 | clz \order, \divisor | |
142 | clz \spare, \dividend | |
143 | sub \order, \order, \spare | |
144 | mov \divisor, \divisor, lsl \order | |
145 | ||
146 | #else | |
147 | ||
148 | mov \order, #0 | |
149 | ||
150 | @ Unless the divisor is very big, shift it up in multiples of | |
151 | @ four bits, since this is the amount of unwinding in the main | |
152 | @ division loop. Continue shifting until the divisor is | |
153 | @ larger than the dividend. | |
154 | 1: cmp \divisor, #0x10000000 | |
155 | cmplo \divisor, \dividend | |
156 | movlo \divisor, \divisor, lsl #4 | |
157 | addlo \order, \order, #4 | |
158 | blo 1b | |
159 | ||
160 | @ For very big divisors, we must shift it a bit at a time, or | |
161 | @ we will be in danger of overflowing. | |
162 | 1: cmp \divisor, #0x80000000 | |
163 | cmplo \divisor, \dividend | |
164 | movlo \divisor, \divisor, lsl #1 | |
165 | addlo \order, \order, #1 | |
166 | blo 1b | |
167 | ||
168 | #endif | |
169 | ||
170 | @ Perform all needed substractions to keep only the reminder. | |
171 | @ Do comparisons in batch of 4 first. | |
172 | subs \order, \order, #3 @ yes, 3 is intended here | |
173 | blt 2f | |
174 | ||
175 | 1: cmp \dividend, \divisor | |
176 | subhs \dividend, \dividend, \divisor | |
177 | cmp \dividend, \divisor, lsr #1 | |
178 | subhs \dividend, \dividend, \divisor, lsr #1 | |
179 | cmp \dividend, \divisor, lsr #2 | |
180 | subhs \dividend, \dividend, \divisor, lsr #2 | |
181 | cmp \dividend, \divisor, lsr #3 | |
182 | subhs \dividend, \dividend, \divisor, lsr #3 | |
183 | cmp \dividend, #1 | |
184 | mov \divisor, \divisor, lsr #4 | |
185 | subges \order, \order, #4 | |
186 | bge 1b | |
187 | ||
188 | tst \order, #3 | |
189 | teqne \dividend, #0 | |
190 | beq 5f | |
191 | ||
192 | @ Either 1, 2 or 3 comparison/substractions are left. | |
193 | 2: cmn \order, #2 | |
194 | blt 4f | |
195 | beq 3f | |
196 | cmp \dividend, \divisor | |
197 | subhs \dividend, \dividend, \divisor | |
198 | mov \divisor, \divisor, lsr #1 | |
199 | 3: cmp \dividend, \divisor | |
200 | subhs \dividend, \dividend, \divisor | |
201 | mov \divisor, \divisor, lsr #1 | |
202 | 4: cmp \dividend, \divisor | |
203 | subhs \dividend, \dividend, \divisor | |
204 | 5: | |
205 | .endm | |
206 | ||
207 | ||
208 | ENTRY(__udivsi3) | |
209 | ||
210 | subs r2, r1, #1 | |
211 | moveq pc, lr | |
212 | bcc Ldiv0 | |
213 | cmp r0, r1 | |
214 | bls 11f | |
215 | tst r1, r2 | |
216 | beq 12f | |
217 | ||
218 | ARM_DIV_BODY r0, r1, r2, r3 | |
219 | ||
220 | mov r0, r2 | |
221 | mov pc, lr | |
222 | ||
223 | 11: moveq r0, #1 | |
224 | movne r0, #0 | |
225 | mov pc, lr | |
226 | ||
227 | 12: ARM_DIV2_ORDER r1, r2 | |
228 | ||
229 | mov r0, r0, lsr r2 | |
230 | mov pc, lr | |
231 | ||
232 | ||
233 | ENTRY(__umodsi3) | |
234 | ||
235 | subs r2, r1, #1 @ compare divisor with 1 | |
236 | bcc Ldiv0 | |
237 | cmpne r0, r1 @ compare dividend with divisor | |
238 | moveq r0, #0 | |
239 | tsthi r1, r2 @ see if divisor is power of 2 | |
240 | andeq r0, r0, r2 | |
241 | movls pc, lr | |
242 | ||
243 | ARM_MOD_BODY r0, r1, r2, r3 | |
244 | ||
245 | mov pc, lr | |
246 | ||
247 | ||
248 | ENTRY(__divsi3) | |
249 | ||
250 | cmp r1, #0 | |
251 | eor ip, r0, r1 @ save the sign of the result. | |
252 | beq Ldiv0 | |
253 | rsbmi r1, r1, #0 @ loops below use unsigned. | |
254 | subs r2, r1, #1 @ division by 1 or -1 ? | |
255 | beq 10f | |
256 | movs r3, r0 | |
257 | rsbmi r3, r0, #0 @ positive dividend value | |
258 | cmp r3, r1 | |
259 | bls 11f | |
260 | tst r1, r2 @ divisor is power of 2 ? | |
261 | beq 12f | |
262 | ||
263 | ARM_DIV_BODY r3, r1, r0, r2 | |
264 | ||
265 | cmp ip, #0 | |
266 | rsbmi r0, r0, #0 | |
267 | mov pc, lr | |
268 | ||
269 | 10: teq ip, r0 @ same sign ? | |
270 | rsbmi r0, r0, #0 | |
271 | mov pc, lr | |
272 | ||
273 | 11: movlo r0, #0 | |
274 | moveq r0, ip, asr #31 | |
275 | orreq r0, r0, #1 | |
276 | mov pc, lr | |
277 | ||
278 | 12: ARM_DIV2_ORDER r1, r2 | |
279 | ||
280 | cmp ip, #0 | |
281 | mov r0, r3, lsr r2 | |
282 | rsbmi r0, r0, #0 | |
283 | mov pc, lr | |
284 | ||
285 | ||
286 | ENTRY(__modsi3) | |
287 | ||
288 | cmp r1, #0 | |
289 | beq Ldiv0 | |
290 | rsbmi r1, r1, #0 @ loops below use unsigned. | |
291 | movs ip, r0 @ preserve sign of dividend | |
292 | rsbmi r0, r0, #0 @ if negative make positive | |
293 | subs r2, r1, #1 @ compare divisor with 1 | |
294 | cmpne r0, r1 @ compare dividend with divisor | |
295 | moveq r0, #0 | |
296 | tsthi r1, r2 @ see if divisor is power of 2 | |
297 | andeq r0, r0, r2 | |
298 | bls 10f | |
299 | ||
300 | ARM_MOD_BODY r0, r1, r2, r3 | |
301 | ||
302 | 10: cmp ip, #0 | |
303 | rsbmi r0, r0, #0 | |
304 | mov pc, lr | |
305 | ||
306 | ||
307 | Ldiv0: | |
308 | ||
309 | str lr, [sp, #-4]! | |
310 | bl __div0 | |
311 | mov r0, #0 @ About as wrong as it could be. | |
312 | ldr pc, [sp], #4 | |
313 | ||
314 |