Commit | Line | Data |
---|---|---|
4494ce4f KM |
1 | /* SPDX-License-Identifier: GPL-2.0+ WITH GCC-exception-2.0 |
2 | ||
3 | Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, | |
3b041227 TY |
4 | 2004, 2005, 2006 |
5 | Free Software Foundation, Inc. | |
4494ce4f | 6 | */ |
3b041227 TY |
7 | |
8 | !! libgcc routines for the Renesas / SuperH SH CPUs. | |
9 | !! Contributed by Steve Chamberlain. | |
10 | !! sac@cygnus.com | |
11 | ||
12 | !! ashiftrt_r4_x, ___ashrsi3, ___ashlsi3, ___lshrsi3 routines | |
13 | !! recoded in assembly by Toshiyasu Morita | |
14 | !! tm@netcom.com | |
15 | ||
16 | /* SH2 optimizations for ___ashrsi3, ___ashlsi3, ___lshrsi3 and | |
17 | ELF local label prefixes by J"orn Rennecke | |
18 | amylaar@cygnus.com */ | |
19 | ||
20 | /* This code used shld, thus is not suitable for SH1 / SH2. */ | |
21 | ||
22 | /* Signed / unsigned division without use of FPU, optimized for SH4. | |
23 | Uses a lookup table for divisors in the range -128 .. +128, and | |
24 | div1 with case distinction for larger divisors in three more ranges. | |
25 | The code is lumped together with the table to allow the use of mova. */ | |
26 | #ifdef CONFIG_CPU_LITTLE_ENDIAN | |
27 | #define L_LSB 0 | |
28 | #define L_LSWMSB 1 | |
29 | #define L_MSWLSB 2 | |
30 | #else | |
31 | #define L_LSB 3 | |
32 | #define L_LSWMSB 2 | |
33 | #define L_MSWLSB 1 | |
34 | #endif | |
35 | ||
36 | .balign 4 | |
37 | .global __udivsi3_i4i | |
38 | .global __udivsi3_i4 | |
39 | .set __udivsi3_i4, __udivsi3_i4i | |
40 | .type __udivsi3_i4i, @function | |
41 | __udivsi3_i4i: | |
42 | mov.w c128_w, r1 | |
43 | div0u | |
44 | mov r4,r0 | |
45 | shlr8 r0 | |
46 | cmp/hi r1,r5 | |
47 | extu.w r5,r1 | |
48 | bf udiv_le128 | |
49 | cmp/eq r5,r1 | |
50 | bf udiv_ge64k | |
51 | shlr r0 | |
52 | mov r5,r1 | |
53 | shll16 r5 | |
54 | mov.l r4,@-r15 | |
55 | div1 r5,r0 | |
56 | mov.l r1,@-r15 | |
57 | div1 r5,r0 | |
58 | div1 r5,r0 | |
59 | bra udiv_25 | |
60 | div1 r5,r0 | |
61 | ||
62 | div_le128: | |
63 | mova div_table_ix,r0 | |
64 | bra div_le128_2 | |
65 | mov.b @(r0,r5),r1 | |
66 | udiv_le128: | |
67 | mov.l r4,@-r15 | |
68 | mova div_table_ix,r0 | |
69 | mov.b @(r0,r5),r1 | |
70 | mov.l r5,@-r15 | |
71 | div_le128_2: | |
72 | mova div_table_inv,r0 | |
73 | mov.l @(r0,r1),r1 | |
74 | mov r5,r0 | |
75 | tst #0xfe,r0 | |
76 | mova div_table_clz,r0 | |
77 | dmulu.l r1,r4 | |
78 | mov.b @(r0,r5),r1 | |
79 | bt/s div_by_1 | |
80 | mov r4,r0 | |
81 | mov.l @r15+,r5 | |
82 | sts mach,r0 | |
83 | /* clrt */ | |
84 | addc r4,r0 | |
85 | mov.l @r15+,r4 | |
86 | rotcr r0 | |
87 | rts | |
88 | shld r1,r0 | |
89 | ||
90 | div_by_1_neg: | |
91 | neg r4,r0 | |
92 | div_by_1: | |
93 | mov.l @r15+,r5 | |
94 | rts | |
95 | mov.l @r15+,r4 | |
96 | ||
97 | div_ge64k: | |
98 | bt/s div_r8 | |
99 | div0u | |
100 | shll8 r5 | |
101 | bra div_ge64k_2 | |
102 | div1 r5,r0 | |
103 | udiv_ge64k: | |
104 | cmp/hi r0,r5 | |
105 | mov r5,r1 | |
106 | bt udiv_r8 | |
107 | shll8 r5 | |
108 | mov.l r4,@-r15 | |
109 | div1 r5,r0 | |
110 | mov.l r1,@-r15 | |
111 | div_ge64k_2: | |
112 | div1 r5,r0 | |
113 | mov.l zero_l,r1 | |
114 | .rept 4 | |
115 | div1 r5,r0 | |
116 | .endr | |
117 | mov.l r1,@-r15 | |
118 | div1 r5,r0 | |
119 | mov.w m256_w,r1 | |
120 | div1 r5,r0 | |
121 | mov.b r0,@(L_LSWMSB,r15) | |
122 | xor r4,r0 | |
123 | and r1,r0 | |
124 | bra div_ge64k_end | |
125 | xor r4,r0 | |
126 | ||
127 | div_r8: | |
128 | shll16 r4 | |
129 | bra div_r8_2 | |
130 | shll8 r4 | |
131 | udiv_r8: | |
132 | mov.l r4,@-r15 | |
133 | shll16 r4 | |
134 | clrt | |
135 | shll8 r4 | |
136 | mov.l r5,@-r15 | |
137 | div_r8_2: | |
138 | rotcl r4 | |
139 | mov r0,r1 | |
140 | div1 r5,r1 | |
141 | mov r4,r0 | |
142 | rotcl r0 | |
143 | mov r5,r4 | |
144 | div1 r5,r1 | |
145 | .rept 5 | |
146 | rotcl r0; div1 r5,r1 | |
147 | .endr | |
148 | rotcl r0 | |
149 | mov.l @r15+,r5 | |
150 | div1 r4,r1 | |
151 | mov.l @r15+,r4 | |
152 | rts | |
153 | rotcl r0 | |
154 | ||
155 | .global __sdivsi3_i4i | |
156 | .global __sdivsi3_i4 | |
157 | .global __sdivsi3 | |
158 | .set __sdivsi3_i4, __sdivsi3_i4i | |
159 | .set __sdivsi3, __sdivsi3_i4i | |
160 | .type __sdivsi3_i4i, @function | |
161 | /* This is link-compatible with a __sdivsi3 call, | |
162 | but we effectively clobber only r1. */ | |
163 | __sdivsi3_i4i: | |
164 | mov.l r4,@-r15 | |
165 | cmp/pz r5 | |
166 | mov.w c128_w, r1 | |
167 | bt/s pos_divisor | |
168 | cmp/pz r4 | |
169 | mov.l r5,@-r15 | |
170 | neg r5,r5 | |
171 | bt/s neg_result | |
172 | cmp/hi r1,r5 | |
173 | neg r4,r4 | |
174 | pos_result: | |
175 | extu.w r5,r0 | |
176 | bf div_le128 | |
177 | cmp/eq r5,r0 | |
178 | mov r4,r0 | |
179 | shlr8 r0 | |
180 | bf/s div_ge64k | |
181 | cmp/hi r0,r5 | |
182 | div0u | |
183 | shll16 r5 | |
184 | div1 r5,r0 | |
185 | div1 r5,r0 | |
186 | div1 r5,r0 | |
187 | udiv_25: | |
188 | mov.l zero_l,r1 | |
189 | div1 r5,r0 | |
190 | div1 r5,r0 | |
191 | mov.l r1,@-r15 | |
192 | .rept 3 | |
193 | div1 r5,r0 | |
194 | .endr | |
195 | mov.b r0,@(L_MSWLSB,r15) | |
196 | xtrct r4,r0 | |
197 | swap.w r0,r0 | |
198 | .rept 8 | |
199 | div1 r5,r0 | |
200 | .endr | |
201 | mov.b r0,@(L_LSWMSB,r15) | |
202 | div_ge64k_end: | |
203 | .rept 8 | |
204 | div1 r5,r0 | |
205 | .endr | |
206 | mov.l @r15+,r4 ! zero-extension and swap using LS unit. | |
207 | extu.b r0,r0 | |
208 | mov.l @r15+,r5 | |
209 | or r4,r0 | |
210 | mov.l @r15+,r4 | |
211 | rts | |
212 | rotcl r0 | |
213 | ||
214 | div_le128_neg: | |
215 | tst #0xfe,r0 | |
216 | mova div_table_ix,r0 | |
217 | mov.b @(r0,r5),r1 | |
218 | mova div_table_inv,r0 | |
219 | bt/s div_by_1_neg | |
220 | mov.l @(r0,r1),r1 | |
221 | mova div_table_clz,r0 | |
222 | dmulu.l r1,r4 | |
223 | mov.b @(r0,r5),r1 | |
224 | mov.l @r15+,r5 | |
225 | sts mach,r0 | |
226 | /* clrt */ | |
227 | addc r4,r0 | |
228 | mov.l @r15+,r4 | |
229 | rotcr r0 | |
230 | shld r1,r0 | |
231 | rts | |
232 | neg r0,r0 | |
233 | ||
234 | pos_divisor: | |
235 | mov.l r5,@-r15 | |
236 | bt/s pos_result | |
237 | cmp/hi r1,r5 | |
238 | neg r4,r4 | |
239 | neg_result: | |
240 | extu.w r5,r0 | |
241 | bf div_le128_neg | |
242 | cmp/eq r5,r0 | |
243 | mov r4,r0 | |
244 | shlr8 r0 | |
245 | bf/s div_ge64k_neg | |
246 | cmp/hi r0,r5 | |
247 | div0u | |
248 | mov.l zero_l,r1 | |
249 | shll16 r5 | |
250 | div1 r5,r0 | |
251 | mov.l r1,@-r15 | |
252 | .rept 7 | |
253 | div1 r5,r0 | |
254 | .endr | |
255 | mov.b r0,@(L_MSWLSB,r15) | |
256 | xtrct r4,r0 | |
257 | swap.w r0,r0 | |
258 | .rept 8 | |
259 | div1 r5,r0 | |
260 | .endr | |
261 | mov.b r0,@(L_LSWMSB,r15) | |
262 | div_ge64k_neg_end: | |
263 | .rept 8 | |
264 | div1 r5,r0 | |
265 | .endr | |
266 | mov.l @r15+,r4 ! zero-extension and swap using LS unit. | |
267 | extu.b r0,r1 | |
268 | mov.l @r15+,r5 | |
269 | or r4,r1 | |
270 | div_r8_neg_end: | |
271 | mov.l @r15+,r4 | |
272 | rotcl r1 | |
273 | rts | |
274 | neg r1,r0 | |
275 | ||
276 | div_ge64k_neg: | |
277 | bt/s div_r8_neg | |
278 | div0u | |
279 | shll8 r5 | |
280 | mov.l zero_l,r1 | |
281 | .rept 6 | |
282 | div1 r5,r0 | |
283 | .endr | |
284 | mov.l r1,@-r15 | |
285 | div1 r5,r0 | |
286 | mov.w m256_w,r1 | |
287 | div1 r5,r0 | |
288 | mov.b r0,@(L_LSWMSB,r15) | |
289 | xor r4,r0 | |
290 | and r1,r0 | |
291 | bra div_ge64k_neg_end | |
292 | xor r4,r0 | |
293 | ||
294 | c128_w: | |
295 | .word 128 | |
296 | ||
297 | div_r8_neg: | |
298 | clrt | |
299 | shll16 r4 | |
300 | mov r4,r1 | |
301 | shll8 r1 | |
302 | mov r5,r4 | |
303 | .rept 7 | |
304 | rotcl r1; div1 r5,r0 | |
305 | .endr | |
306 | mov.l @r15+,r5 | |
307 | rotcl r1 | |
308 | bra div_r8_neg_end | |
309 | div1 r4,r0 | |
310 | ||
311 | m256_w: | |
312 | .word 0xff00 | |
313 | /* This table has been generated by divtab-sh4.c. */ | |
314 | .balign 4 | |
315 | div_table_clz: | |
316 | .byte 0 | |
317 | .byte 1 | |
318 | .byte 0 | |
319 | .byte -1 | |
320 | .byte -1 | |
321 | .byte -2 | |
322 | .byte -2 | |
323 | .byte -2 | |
324 | .byte -2 | |
325 | .byte -3 | |
326 | .byte -3 | |
327 | .byte -3 | |
328 | .byte -3 | |
329 | .byte -3 | |
330 | .byte -3 | |
331 | .byte -3 | |
332 | .byte -3 | |
333 | .byte -4 | |
334 | .byte -4 | |
335 | .byte -4 | |
336 | .byte -4 | |
337 | .byte -4 | |
338 | .byte -4 | |
339 | .byte -4 | |
340 | .byte -4 | |
341 | .byte -4 | |
342 | .byte -4 | |
343 | .byte -4 | |
344 | .byte -4 | |
345 | .byte -4 | |
346 | .byte -4 | |
347 | .byte -4 | |
348 | .byte -4 | |
349 | .byte -5 | |
350 | .byte -5 | |
351 | .byte -5 | |
352 | .byte -5 | |
353 | .byte -5 | |
354 | .byte -5 | |
355 | .byte -5 | |
356 | .byte -5 | |
357 | .byte -5 | |
358 | .byte -5 | |
359 | .byte -5 | |
360 | .byte -5 | |
361 | .byte -5 | |
362 | .byte -5 | |
363 | .byte -5 | |
364 | .byte -5 | |
365 | .byte -5 | |
366 | .byte -5 | |
367 | .byte -5 | |
368 | .byte -5 | |
369 | .byte -5 | |
370 | .byte -5 | |
371 | .byte -5 | |
372 | .byte -5 | |
373 | .byte -5 | |
374 | .byte -5 | |
375 | .byte -5 | |
376 | .byte -5 | |
377 | .byte -5 | |
378 | .byte -5 | |
379 | .byte -5 | |
380 | .byte -5 | |
381 | .byte -6 | |
382 | .byte -6 | |
383 | .byte -6 | |
384 | .byte -6 | |
385 | .byte -6 | |
386 | .byte -6 | |
387 | .byte -6 | |
388 | .byte -6 | |
389 | .byte -6 | |
390 | .byte -6 | |
391 | .byte -6 | |
392 | .byte -6 | |
393 | .byte -6 | |
394 | .byte -6 | |
395 | .byte -6 | |
396 | .byte -6 | |
397 | .byte -6 | |
398 | .byte -6 | |
399 | .byte -6 | |
400 | .byte -6 | |
401 | .byte -6 | |
402 | .byte -6 | |
403 | .byte -6 | |
404 | .byte -6 | |
405 | .byte -6 | |
406 | .byte -6 | |
407 | .byte -6 | |
408 | .byte -6 | |
409 | .byte -6 | |
410 | .byte -6 | |
411 | .byte -6 | |
412 | .byte -6 | |
413 | .byte -6 | |
414 | .byte -6 | |
415 | .byte -6 | |
416 | .byte -6 | |
417 | .byte -6 | |
418 | .byte -6 | |
419 | .byte -6 | |
420 | .byte -6 | |
421 | .byte -6 | |
422 | .byte -6 | |
423 | .byte -6 | |
424 | .byte -6 | |
425 | .byte -6 | |
426 | .byte -6 | |
427 | .byte -6 | |
428 | .byte -6 | |
429 | .byte -6 | |
430 | .byte -6 | |
431 | .byte -6 | |
432 | .byte -6 | |
433 | .byte -6 | |
434 | .byte -6 | |
435 | .byte -6 | |
436 | .byte -6 | |
437 | .byte -6 | |
438 | .byte -6 | |
439 | .byte -6 | |
440 | .byte -6 | |
441 | .byte -6 | |
442 | .byte -6 | |
443 | .byte -6 | |
444 | /* Lookup table translating positive divisor to index into table of | |
445 | normalized inverse. N.B. the '0' entry is also the last entry of the | |
446 | previous table, and causes an unaligned access for division by zero. */ | |
447 | div_table_ix: | |
448 | .byte -6 | |
449 | .byte -128 | |
450 | .byte -128 | |
451 | .byte 0 | |
452 | .byte -128 | |
453 | .byte -64 | |
454 | .byte 0 | |
455 | .byte 64 | |
456 | .byte -128 | |
457 | .byte -96 | |
458 | .byte -64 | |
459 | .byte -32 | |
460 | .byte 0 | |
461 | .byte 32 | |
462 | .byte 64 | |
463 | .byte 96 | |
464 | .byte -128 | |
465 | .byte -112 | |
466 | .byte -96 | |
467 | .byte -80 | |
468 | .byte -64 | |
469 | .byte -48 | |
470 | .byte -32 | |
471 | .byte -16 | |
472 | .byte 0 | |
473 | .byte 16 | |
474 | .byte 32 | |
475 | .byte 48 | |
476 | .byte 64 | |
477 | .byte 80 | |
478 | .byte 96 | |
479 | .byte 112 | |
480 | .byte -128 | |
481 | .byte -120 | |
482 | .byte -112 | |
483 | .byte -104 | |
484 | .byte -96 | |
485 | .byte -88 | |
486 | .byte -80 | |
487 | .byte -72 | |
488 | .byte -64 | |
489 | .byte -56 | |
490 | .byte -48 | |
491 | .byte -40 | |
492 | .byte -32 | |
493 | .byte -24 | |
494 | .byte -16 | |
495 | .byte -8 | |
496 | .byte 0 | |
497 | .byte 8 | |
498 | .byte 16 | |
499 | .byte 24 | |
500 | .byte 32 | |
501 | .byte 40 | |
502 | .byte 48 | |
503 | .byte 56 | |
504 | .byte 64 | |
505 | .byte 72 | |
506 | .byte 80 | |
507 | .byte 88 | |
508 | .byte 96 | |
509 | .byte 104 | |
510 | .byte 112 | |
511 | .byte 120 | |
512 | .byte -128 | |
513 | .byte -124 | |
514 | .byte -120 | |
515 | .byte -116 | |
516 | .byte -112 | |
517 | .byte -108 | |
518 | .byte -104 | |
519 | .byte -100 | |
520 | .byte -96 | |
521 | .byte -92 | |
522 | .byte -88 | |
523 | .byte -84 | |
524 | .byte -80 | |
525 | .byte -76 | |
526 | .byte -72 | |
527 | .byte -68 | |
528 | .byte -64 | |
529 | .byte -60 | |
530 | .byte -56 | |
531 | .byte -52 | |
532 | .byte -48 | |
533 | .byte -44 | |
534 | .byte -40 | |
535 | .byte -36 | |
536 | .byte -32 | |
537 | .byte -28 | |
538 | .byte -24 | |
539 | .byte -20 | |
540 | .byte -16 | |
541 | .byte -12 | |
542 | .byte -8 | |
543 | .byte -4 | |
544 | .byte 0 | |
545 | .byte 4 | |
546 | .byte 8 | |
547 | .byte 12 | |
548 | .byte 16 | |
549 | .byte 20 | |
550 | .byte 24 | |
551 | .byte 28 | |
552 | .byte 32 | |
553 | .byte 36 | |
554 | .byte 40 | |
555 | .byte 44 | |
556 | .byte 48 | |
557 | .byte 52 | |
558 | .byte 56 | |
559 | .byte 60 | |
560 | .byte 64 | |
561 | .byte 68 | |
562 | .byte 72 | |
563 | .byte 76 | |
564 | .byte 80 | |
565 | .byte 84 | |
566 | .byte 88 | |
567 | .byte 92 | |
568 | .byte 96 | |
569 | .byte 100 | |
570 | .byte 104 | |
571 | .byte 108 | |
572 | .byte 112 | |
573 | .byte 116 | |
574 | .byte 120 | |
575 | .byte 124 | |
576 | .byte -128 | |
577 | /* 1/64 .. 1/127, normalized. There is an implicit leading 1 in bit 32. */ | |
578 | .balign 4 | |
579 | zero_l: | |
580 | .long 0x0 | |
581 | .long 0xF81F81F9 | |
582 | .long 0xF07C1F08 | |
583 | .long 0xE9131AC0 | |
584 | .long 0xE1E1E1E2 | |
585 | .long 0xDAE6076C | |
586 | .long 0xD41D41D5 | |
587 | .long 0xCD856891 | |
588 | .long 0xC71C71C8 | |
589 | .long 0xC0E07039 | |
590 | .long 0xBACF914D | |
591 | .long 0xB4E81B4F | |
592 | .long 0xAF286BCB | |
593 | .long 0xA98EF607 | |
594 | .long 0xA41A41A5 | |
595 | .long 0x9EC8E952 | |
596 | .long 0x9999999A | |
597 | .long 0x948B0FCE | |
598 | .long 0x8F9C18FA | |
599 | .long 0x8ACB90F7 | |
600 | .long 0x86186187 | |
601 | .long 0x81818182 | |
602 | .long 0x7D05F418 | |
603 | .long 0x78A4C818 | |
604 | .long 0x745D1746 | |
605 | .long 0x702E05C1 | |
606 | .long 0x6C16C16D | |
607 | .long 0x68168169 | |
608 | .long 0x642C8591 | |
609 | .long 0x60581606 | |
610 | .long 0x5C9882BA | |
611 | .long 0x58ED2309 | |
612 | div_table_inv: | |
613 | .long 0x55555556 | |
614 | .long 0x51D07EAF | |
615 | .long 0x4E5E0A73 | |
616 | .long 0x4AFD6A06 | |
617 | .long 0x47AE147B | |
618 | .long 0x446F8657 | |
619 | .long 0x41414142 | |
620 | .long 0x3E22CBCF | |
621 | .long 0x3B13B13C | |
622 | .long 0x38138139 | |
623 | .long 0x3521CFB3 | |
624 | .long 0x323E34A3 | |
625 | .long 0x2F684BDB | |
626 | .long 0x2C9FB4D9 | |
627 | .long 0x29E4129F | |
628 | .long 0x27350B89 | |
629 | .long 0x24924925 | |
630 | .long 0x21FB7813 | |
631 | .long 0x1F7047DD | |
632 | .long 0x1CF06ADB | |
633 | .long 0x1A7B9612 | |
634 | .long 0x18118119 | |
635 | .long 0x15B1E5F8 | |
636 | .long 0x135C8114 | |
637 | .long 0x11111112 | |
638 | .long 0xECF56BF | |
639 | .long 0xC9714FC | |
640 | .long 0xA6810A7 | |
641 | .long 0x8421085 | |
642 | .long 0x624DD30 | |
643 | .long 0x4104105 | |
644 | .long 0x2040811 | |
645 | /* maximum error: 0.987342 scaled: 0.921875*/ |