Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/arch/arm/lib/lib1funcs.S: Optimized ARM division routines | |
3 | * | |
2f82af08 | 4 | * Author: Nicolas Pitre <nico@fluxnic.net> |
1da177e4 LT |
5 | * - contributed to gcc-3.4 on Sep 30, 2003 |
6 | * - adapted for the Linux kernel on Oct 2, 2003 | |
7 | */ | |
8 | ||
9 | /* Copyright 1995, 1996, 1998, 1999, 2000, 2003 Free Software Foundation, Inc. | |
10 | ||
11 | This file is free software; you can redistribute it and/or modify it | |
12 | under the terms of the GNU General Public License as published by the | |
13 | Free Software Foundation; either version 2, or (at your option) any | |
14 | later version. | |
15 | ||
16 | In addition to the permissions in the GNU General Public License, the | |
17 | Free Software Foundation gives you unlimited permission to link the | |
18 | compiled version of this file into combinations with other programs, | |
19 | and to distribute those combinations without any restriction coming | |
20 | from the use of this file. (The General Public License restrictions | |
21 | do apply in other respects; for example, they cover modification of | |
22 | the file, and distribution when not linked into a combine | |
23 | executable.) | |
24 | ||
25 | This file is distributed in the hope that it will be useful, but | |
26 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
27 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
28 | General Public License for more details. | |
29 | ||
30 | You should have received a copy of the GNU General Public License | |
31 | along with this program; see the file COPYING. If not, write to | |
32 | the Free Software Foundation, 59 Temple Place - Suite 330, | |
33 | Boston, MA 02111-1307, USA. */ | |
34 | ||
35 | ||
36 | #include <linux/linkage.h> | |
37 | #include <asm/assembler.h> | |
38 | ||
39 | ||
40 | .macro ARM_DIV_BODY dividend, divisor, result, curbit | |
41 | ||
42 | #if __LINUX_ARM_ARCH__ >= 5 | |
43 | ||
44 | clz \curbit, \divisor | |
45 | clz \result, \dividend | |
46 | sub \result, \curbit, \result | |
47 | mov \curbit, #1 | |
48 | mov \divisor, \divisor, lsl \result | |
49 | mov \curbit, \curbit, lsl \result | |
50 | mov \result, #0 | |
51 | ||
52 | #else | |
53 | ||
54 | @ Initially shift the divisor left 3 bits if possible, | |
55 | @ set curbit accordingly. This allows for curbit to be located | |
56 | @ at the left end of each 4 bit nibbles in the division loop | |
57 | @ to save one loop in most cases. | |
58 | tst \divisor, #0xe0000000 | |
59 | moveq \divisor, \divisor, lsl #3 | |
60 | moveq \curbit, #8 | |
61 | movne \curbit, #1 | |
62 | ||
63 | @ Unless the divisor is very big, shift it up in multiples of | |
64 | @ four bits, since this is the amount of unwinding in the main | |
65 | @ division loop. Continue shifting until the divisor is | |
66 | @ larger than the dividend. | |
67 | 1: cmp \divisor, #0x10000000 | |
68 | cmplo \divisor, \dividend | |
69 | movlo \divisor, \divisor, lsl #4 | |
70 | movlo \curbit, \curbit, lsl #4 | |
71 | blo 1b | |
72 | ||
73 | @ For very big divisors, we must shift it a bit at a time, or | |
74 | @ we will be in danger of overflowing. | |
75 | 1: cmp \divisor, #0x80000000 | |
76 | cmplo \divisor, \dividend | |
77 | movlo \divisor, \divisor, lsl #1 | |
78 | movlo \curbit, \curbit, lsl #1 | |
79 | blo 1b | |
80 | ||
81 | mov \result, #0 | |
82 | ||
83 | #endif | |
84 | ||
85 | @ Division loop | |
86 | 1: cmp \dividend, \divisor | |
87 | subhs \dividend, \dividend, \divisor | |
88 | orrhs \result, \result, \curbit | |
89 | cmp \dividend, \divisor, lsr #1 | |
90 | subhs \dividend, \dividend, \divisor, lsr #1 | |
91 | orrhs \result, \result, \curbit, lsr #1 | |
92 | cmp \dividend, \divisor, lsr #2 | |
93 | subhs \dividend, \dividend, \divisor, lsr #2 | |
94 | orrhs \result, \result, \curbit, lsr #2 | |
95 | cmp \dividend, \divisor, lsr #3 | |
96 | subhs \dividend, \dividend, \divisor, lsr #3 | |
97 | orrhs \result, \result, \curbit, lsr #3 | |
98 | cmp \dividend, #0 @ Early termination? | |
99 | movnes \curbit, \curbit, lsr #4 @ No, any more bits to do? | |
100 | movne \divisor, \divisor, lsr #4 | |
101 | bne 1b | |
102 | ||
103 | .endm | |
104 | ||
105 | ||
106 | .macro ARM_DIV2_ORDER divisor, order | |
107 | ||
108 | #if __LINUX_ARM_ARCH__ >= 5 | |
109 | ||
110 | clz \order, \divisor | |
111 | rsb \order, \order, #31 | |
112 | ||
113 | #else | |
114 | ||
115 | cmp \divisor, #(1 << 16) | |
116 | movhs \divisor, \divisor, lsr #16 | |
117 | movhs \order, #16 | |
118 | movlo \order, #0 | |
119 | ||
120 | cmp \divisor, #(1 << 8) | |
121 | movhs \divisor, \divisor, lsr #8 | |
122 | addhs \order, \order, #8 | |
123 | ||
124 | cmp \divisor, #(1 << 4) | |
125 | movhs \divisor, \divisor, lsr #4 | |
126 | addhs \order, \order, #4 | |
127 | ||
128 | cmp \divisor, #(1 << 2) | |
129 | addhi \order, \order, #3 | |
130 | addls \order, \order, \divisor, lsr #1 | |
131 | ||
132 | #endif | |
133 | ||
134 | .endm | |
135 | ||
136 | ||
137 | .macro ARM_MOD_BODY dividend, divisor, order, spare | |
138 | ||
139 | #if __LINUX_ARM_ARCH__ >= 5 | |
140 | ||
141 | clz \order, \divisor | |
142 | clz \spare, \dividend | |
143 | sub \order, \order, \spare | |
144 | mov \divisor, \divisor, lsl \order | |
145 | ||
146 | #else | |
147 | ||
148 | mov \order, #0 | |
149 | ||
150 | @ Unless the divisor is very big, shift it up in multiples of | |
151 | @ four bits, since this is the amount of unwinding in the main | |
152 | @ division loop. Continue shifting until the divisor is | |
153 | @ larger than the dividend. | |
154 | 1: cmp \divisor, #0x10000000 | |
155 | cmplo \divisor, \dividend | |
156 | movlo \divisor, \divisor, lsl #4 | |
157 | addlo \order, \order, #4 | |
158 | blo 1b | |
159 | ||
160 | @ For very big divisors, we must shift it a bit at a time, or | |
161 | @ we will be in danger of overflowing. | |
162 | 1: cmp \divisor, #0x80000000 | |
163 | cmplo \divisor, \dividend | |
164 | movlo \divisor, \divisor, lsl #1 | |
165 | addlo \order, \order, #1 | |
166 | blo 1b | |
167 | ||
168 | #endif | |
169 | ||
170 | @ Perform all needed substractions to keep only the reminder. | |
171 | @ Do comparisons in batch of 4 first. | |
172 | subs \order, \order, #3 @ yes, 3 is intended here | |
173 | blt 2f | |
174 | ||
175 | 1: cmp \dividend, \divisor | |
176 | subhs \dividend, \dividend, \divisor | |
177 | cmp \dividend, \divisor, lsr #1 | |
178 | subhs \dividend, \dividend, \divisor, lsr #1 | |
179 | cmp \dividend, \divisor, lsr #2 | |
180 | subhs \dividend, \dividend, \divisor, lsr #2 | |
181 | cmp \dividend, \divisor, lsr #3 | |
182 | subhs \dividend, \dividend, \divisor, lsr #3 | |
183 | cmp \dividend, #1 | |
184 | mov \divisor, \divisor, lsr #4 | |
185 | subges \order, \order, #4 | |
186 | bge 1b | |
187 | ||
188 | tst \order, #3 | |
189 | teqne \dividend, #0 | |
190 | beq 5f | |
191 | ||
192 | @ Either 1, 2 or 3 comparison/substractions are left. | |
193 | 2: cmn \order, #2 | |
194 | blt 4f | |
195 | beq 3f | |
196 | cmp \dividend, \divisor | |
197 | subhs \dividend, \dividend, \divisor | |
198 | mov \divisor, \divisor, lsr #1 | |
199 | 3: cmp \dividend, \divisor | |
200 | subhs \dividend, \dividend, \divisor | |
201 | mov \divisor, \divisor, lsr #1 | |
202 | 4: cmp \dividend, \divisor | |
203 | subhs \dividend, \dividend, \divisor | |
204 | 5: | |
205 | .endm | |
206 | ||
207 | ||
208 | ENTRY(__udivsi3) | |
ba95e4e4 | 209 | ENTRY(__aeabi_uidiv) |
1da177e4 LT |
210 | |
211 | subs r2, r1, #1 | |
212 | moveq pc, lr | |
213 | bcc Ldiv0 | |
214 | cmp r0, r1 | |
215 | bls 11f | |
216 | tst r1, r2 | |
217 | beq 12f | |
218 | ||
219 | ARM_DIV_BODY r0, r1, r2, r3 | |
220 | ||
221 | mov r0, r2 | |
222 | mov pc, lr | |
223 | ||
224 | 11: moveq r0, #1 | |
225 | movne r0, #0 | |
226 | mov pc, lr | |
227 | ||
228 | 12: ARM_DIV2_ORDER r1, r2 | |
229 | ||
230 | mov r0, r0, lsr r2 | |
231 | mov pc, lr | |
232 | ||
93ed3970 CM |
233 | ENDPROC(__udivsi3) |
234 | ENDPROC(__aeabi_uidiv) | |
1da177e4 LT |
235 | |
236 | ENTRY(__umodsi3) | |
237 | ||
238 | subs r2, r1, #1 @ compare divisor with 1 | |
239 | bcc Ldiv0 | |
240 | cmpne r0, r1 @ compare dividend with divisor | |
241 | moveq r0, #0 | |
242 | tsthi r1, r2 @ see if divisor is power of 2 | |
243 | andeq r0, r0, r2 | |
244 | movls pc, lr | |
245 | ||
246 | ARM_MOD_BODY r0, r1, r2, r3 | |
247 | ||
248 | mov pc, lr | |
249 | ||
93ed3970 | 250 | ENDPROC(__umodsi3) |
1da177e4 LT |
251 | |
252 | ENTRY(__divsi3) | |
ba95e4e4 | 253 | ENTRY(__aeabi_idiv) |
1da177e4 LT |
254 | |
255 | cmp r1, #0 | |
256 | eor ip, r0, r1 @ save the sign of the result. | |
257 | beq Ldiv0 | |
258 | rsbmi r1, r1, #0 @ loops below use unsigned. | |
259 | subs r2, r1, #1 @ division by 1 or -1 ? | |
260 | beq 10f | |
261 | movs r3, r0 | |
262 | rsbmi r3, r0, #0 @ positive dividend value | |
263 | cmp r3, r1 | |
264 | bls 11f | |
265 | tst r1, r2 @ divisor is power of 2 ? | |
266 | beq 12f | |
267 | ||
268 | ARM_DIV_BODY r3, r1, r0, r2 | |
269 | ||
270 | cmp ip, #0 | |
271 | rsbmi r0, r0, #0 | |
272 | mov pc, lr | |
273 | ||
274 | 10: teq ip, r0 @ same sign ? | |
275 | rsbmi r0, r0, #0 | |
276 | mov pc, lr | |
277 | ||
278 | 11: movlo r0, #0 | |
279 | moveq r0, ip, asr #31 | |
280 | orreq r0, r0, #1 | |
281 | mov pc, lr | |
282 | ||
283 | 12: ARM_DIV2_ORDER r1, r2 | |
284 | ||
285 | cmp ip, #0 | |
286 | mov r0, r3, lsr r2 | |
287 | rsbmi r0, r0, #0 | |
288 | mov pc, lr | |
289 | ||
93ed3970 CM |
290 | ENDPROC(__divsi3) |
291 | ENDPROC(__aeabi_idiv) | |
1da177e4 LT |
292 | |
293 | ENTRY(__modsi3) | |
294 | ||
295 | cmp r1, #0 | |
296 | beq Ldiv0 | |
297 | rsbmi r1, r1, #0 @ loops below use unsigned. | |
298 | movs ip, r0 @ preserve sign of dividend | |
299 | rsbmi r0, r0, #0 @ if negative make positive | |
300 | subs r2, r1, #1 @ compare divisor with 1 | |
301 | cmpne r0, r1 @ compare dividend with divisor | |
302 | moveq r0, #0 | |
303 | tsthi r1, r2 @ see if divisor is power of 2 | |
304 | andeq r0, r0, r2 | |
305 | bls 10f | |
306 | ||
307 | ARM_MOD_BODY r0, r1, r2, r3 | |
308 | ||
309 | 10: cmp ip, #0 | |
310 | rsbmi r0, r0, #0 | |
311 | mov pc, lr | |
312 | ||
93ed3970 CM |
313 | ENDPROC(__modsi3) |
314 | ||
ba95e4e4 NP |
315 | #ifdef CONFIG_AEABI |
316 | ||
317 | ENTRY(__aeabi_uidivmod) | |
318 | ||
319 | stmfd sp!, {r0, r1, ip, lr} | |
320 | bl __aeabi_uidiv | |
321 | ldmfd sp!, {r1, r2, ip, lr} | |
322 | mul r3, r0, r2 | |
323 | sub r1, r1, r3 | |
324 | mov pc, lr | |
325 | ||
93ed3970 CM |
326 | ENDPROC(__aeabi_uidivmod) |
327 | ||
ba95e4e4 NP |
328 | ENTRY(__aeabi_idivmod) |
329 | ||
330 | stmfd sp!, {r0, r1, ip, lr} | |
331 | bl __aeabi_idiv | |
332 | ldmfd sp!, {r1, r2, ip, lr} | |
333 | mul r3, r0, r2 | |
334 | sub r1, r1, r3 | |
335 | mov pc, lr | |
336 | ||
93ed3970 CM |
337 | ENDPROC(__aeabi_idivmod) |
338 | ||
ba95e4e4 | 339 | #endif |
1da177e4 LT |
340 | |
341 | Ldiv0: | |
342 | ||
499b2ea1 | 343 | str lr, [sp, #-8]! |
1da177e4 LT |
344 | bl __div0 |
345 | mov r0, #0 @ About as wrong as it could be. | |
499b2ea1 | 346 | ldr pc, [sp], #8 |
1da177e4 LT |
347 | |
348 |