Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
1da177e4 LT |
2 | /* memcpy.S: Sparc optimized memcpy and memmove code |
3 | * Hand optimized from GNU libc's memcpy and memmove | |
4 | * Copyright (C) 1991,1996 Free Software Foundation | |
5 | * Copyright (C) 1995 Linus Torvalds (Linus.Torvalds@helsinki.fi) | |
6 | * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) | |
7 | * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) | |
8 | * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) | |
9 | */ | |
10 | ||
d3867f04 | 11 | #include <asm/export.h> |
045b7de9 | 12 | #define FUNC(x) \ |
1da177e4 LT |
13 | .globl x; \ |
14 | .type x,@function; \ | |
045b7de9 | 15 | .align 4; \ |
1da177e4 LT |
16 | x: |
17 | ||
1da177e4 LT |
18 | /* Both these macros have to start with exactly the same insn */ |
19 | #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | |
20 | ldd [%src + (offset) + 0x00], %t0; \ | |
21 | ldd [%src + (offset) + 0x08], %t2; \ | |
22 | ldd [%src + (offset) + 0x10], %t4; \ | |
23 | ldd [%src + (offset) + 0x18], %t6; \ | |
24 | st %t0, [%dst + (offset) + 0x00]; \ | |
25 | st %t1, [%dst + (offset) + 0x04]; \ | |
26 | st %t2, [%dst + (offset) + 0x08]; \ | |
27 | st %t3, [%dst + (offset) + 0x0c]; \ | |
28 | st %t4, [%dst + (offset) + 0x10]; \ | |
29 | st %t5, [%dst + (offset) + 0x14]; \ | |
30 | st %t6, [%dst + (offset) + 0x18]; \ | |
31 | st %t7, [%dst + (offset) + 0x1c]; | |
32 | ||
33 | #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \ | |
34 | ldd [%src + (offset) + 0x00], %t0; \ | |
35 | ldd [%src + (offset) + 0x08], %t2; \ | |
36 | ldd [%src + (offset) + 0x10], %t4; \ | |
37 | ldd [%src + (offset) + 0x18], %t6; \ | |
38 | std %t0, [%dst + (offset) + 0x00]; \ | |
39 | std %t2, [%dst + (offset) + 0x08]; \ | |
40 | std %t4, [%dst + (offset) + 0x10]; \ | |
41 | std %t6, [%dst + (offset) + 0x18]; | |
42 | ||
43 | #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \ | |
44 | ldd [%src - (offset) - 0x10], %t0; \ | |
45 | ldd [%src - (offset) - 0x08], %t2; \ | |
46 | st %t0, [%dst - (offset) - 0x10]; \ | |
47 | st %t1, [%dst - (offset) - 0x0c]; \ | |
48 | st %t2, [%dst - (offset) - 0x08]; \ | |
49 | st %t3, [%dst - (offset) - 0x04]; | |
50 | ||
51 | #define MOVE_LASTALIGNCHUNK(src, dst, offset, t0, t1, t2, t3) \ | |
52 | ldd [%src - (offset) - 0x10], %t0; \ | |
53 | ldd [%src - (offset) - 0x08], %t2; \ | |
54 | std %t0, [%dst - (offset) - 0x10]; \ | |
55 | std %t2, [%dst - (offset) - 0x08]; | |
56 | ||
57 | #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \ | |
58 | ldub [%src - (offset) - 0x02], %t0; \ | |
59 | ldub [%src - (offset) - 0x01], %t1; \ | |
60 | stb %t0, [%dst - (offset) - 0x02]; \ | |
61 | stb %t1, [%dst - (offset) - 0x01]; | |
62 | ||
1da177e4 LT |
63 | .text |
64 | .align 4 | |
65 | ||
1da177e4 | 66 | FUNC(memmove) |
d3867f04 | 67 | EXPORT_SYMBOL(memmove) |
1da177e4 | 68 | cmp %o0, %o1 |
a52312b8 | 69 | mov %o0, %g7 |
1da177e4 LT |
70 | bleu 9f |
71 | sub %o0, %o1, %o4 | |
72 | ||
73 | add %o1, %o2, %o3 | |
74 | cmp %o3, %o0 | |
75 | bleu 0f | |
76 | andcc %o4, 3, %o5 | |
77 | ||
1da177e4 LT |
78 | add %o1, %o2, %o1 |
79 | add %o0, %o2, %o0 | |
80 | sub %o1, 1, %o1 | |
81 | sub %o0, 1, %o0 | |
82 | ||
83 | 1: /* reverse_bytes */ | |
84 | ||
85 | ldub [%o1], %o4 | |
86 | subcc %o2, 1, %o2 | |
87 | stb %o4, [%o0] | |
88 | sub %o1, 1, %o1 | |
89 | bne 1b | |
90 | sub %o0, 1, %o0 | |
91 | ||
92 | retl | |
a52312b8 | 93 | mov %g7, %o0 |
1da177e4 | 94 | |
1da177e4 LT |
95 | /* NOTE: This code is executed just for the cases, |
96 | where %src (=%o1) & 3 is != 0. | |
97 | We need to align it to 4. So, for (%src & 3) | |
98 | 1 we need to do ldub,lduh | |
99 | 2 lduh | |
100 | 3 just ldub | |
101 | so even if it looks weird, the branches | |
102 | are correct here. -jj | |
103 | */ | |
104 | 78: /* dword_align */ | |
105 | ||
106 | andcc %o1, 1, %g0 | |
107 | be 4f | |
108 | andcc %o1, 2, %g0 | |
109 | ||
110 | ldub [%o1], %g2 | |
111 | add %o1, 1, %o1 | |
112 | stb %g2, [%o0] | |
113 | sub %o2, 1, %o2 | |
114 | bne 3f | |
115 | add %o0, 1, %o0 | |
116 | 4: | |
117 | lduh [%o1], %g2 | |
118 | add %o1, 2, %o1 | |
119 | sth %g2, [%o0] | |
120 | sub %o2, 2, %o2 | |
121 | b 3f | |
122 | add %o0, 2, %o0 | |
123 | ||
1da177e4 | 124 | FUNC(memcpy) /* %o0=dst %o1=src %o2=len */ |
d3867f04 | 125 | EXPORT_SYMBOL(memcpy) |
1da177e4 LT |
126 | |
127 | sub %o0, %o1, %o4 | |
a52312b8 | 128 | mov %o0, %g7 |
1da177e4 LT |
129 | 9: |
130 | andcc %o4, 3, %o5 | |
131 | 0: | |
132 | bne 86f | |
133 | cmp %o2, 15 | |
134 | ||
135 | bleu 90f | |
136 | andcc %o1, 3, %g0 | |
137 | ||
138 | bne 78b | |
139 | 3: | |
140 | andcc %o1, 4, %g0 | |
141 | ||
142 | be 2f | |
143 | mov %o2, %g1 | |
144 | ||
145 | ld [%o1], %o4 | |
146 | sub %g1, 4, %g1 | |
147 | st %o4, [%o0] | |
148 | add %o1, 4, %o1 | |
149 | add %o0, 4, %o0 | |
150 | 2: | |
21f74d36 | 151 | andcc %g1, 0xffffff80, %g0 |
1da177e4 LT |
152 | be 3f |
153 | andcc %o0, 4, %g0 | |
154 | ||
155 | be 82f + 4 | |
156 | 5: | |
157 | MOVE_BIGCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | |
158 | MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | |
159 | MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | |
160 | MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | |
21f74d36 | 161 | sub %g1, 128, %g1 |
1da177e4 | 162 | add %o1, 128, %o1 |
21f74d36 DM |
163 | cmp %g1, 128 |
164 | bge 5b | |
1da177e4 LT |
165 | add %o0, 128, %o0 |
166 | 3: | |
21f74d36 | 167 | andcc %g1, 0x70, %g4 |
1da177e4 LT |
168 | be 80f |
169 | andcc %g1, 8, %g0 | |
170 | ||
171 | sethi %hi(80f), %o5 | |
21f74d36 DM |
172 | srl %g4, 1, %o4 |
173 | add %g4, %o4, %o4 | |
174 | add %o1, %g4, %o1 | |
1da177e4 LT |
175 | sub %o5, %o4, %o5 |
176 | jmpl %o5 + %lo(80f), %g0 | |
21f74d36 | 177 | add %o0, %g4, %o0 |
1da177e4 LT |
178 | |
179 | 79: /* memcpy_table */ | |
180 | ||
181 | MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | |
182 | MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | |
183 | MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | |
184 | MOVE_LASTCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | |
185 | MOVE_LASTCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | |
186 | MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | |
187 | MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | |
188 | ||
189 | 80: /* memcpy_table_end */ | |
190 | be 81f | |
191 | andcc %g1, 4, %g0 | |
192 | ||
193 | ldd [%o1], %g2 | |
194 | add %o0, 8, %o0 | |
195 | st %g2, [%o0 - 0x08] | |
196 | add %o1, 8, %o1 | |
197 | st %g3, [%o0 - 0x04] | |
198 | ||
199 | 81: /* memcpy_last7 */ | |
200 | ||
201 | be 1f | |
202 | andcc %g1, 2, %g0 | |
203 | ||
204 | ld [%o1], %g2 | |
205 | add %o1, 4, %o1 | |
206 | st %g2, [%o0] | |
207 | add %o0, 4, %o0 | |
208 | 1: | |
209 | be 1f | |
210 | andcc %g1, 1, %g0 | |
211 | ||
212 | lduh [%o1], %g2 | |
213 | add %o1, 2, %o1 | |
214 | sth %g2, [%o0] | |
215 | add %o0, 2, %o0 | |
216 | 1: | |
217 | be 1f | |
218 | nop | |
219 | ||
220 | ldub [%o1], %g2 | |
221 | stb %g2, [%o0] | |
222 | 1: | |
223 | retl | |
a52312b8 | 224 | mov %g7, %o0 |
1da177e4 LT |
225 | |
226 | 82: /* ldd_std */ | |
227 | MOVE_BIGALIGNCHUNK(o1, o0, 0x00, o2, o3, o4, o5, g2, g3, g4, g5) | |
228 | MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5) | |
229 | MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5) | |
230 | MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5) | |
21f74d36 | 231 | subcc %g1, 128, %g1 |
1da177e4 | 232 | add %o1, 128, %o1 |
21f74d36 DM |
233 | cmp %g1, 128 |
234 | bge 82b | |
1da177e4 LT |
235 | add %o0, 128, %o0 |
236 | ||
21f74d36 | 237 | andcc %g1, 0x70, %g4 |
1da177e4 LT |
238 | be 84f |
239 | andcc %g1, 8, %g0 | |
240 | ||
241 | sethi %hi(84f), %o5 | |
21f74d36 DM |
242 | add %o1, %g4, %o1 |
243 | sub %o5, %g4, %o5 | |
1da177e4 | 244 | jmpl %o5 + %lo(84f), %g0 |
21f74d36 | 245 | add %o0, %g4, %o0 |
1da177e4 LT |
246 | |
247 | 83: /* amemcpy_table */ | |
248 | ||
249 | MOVE_LASTALIGNCHUNK(o1, o0, 0x60, g2, g3, g4, g5) | |
250 | MOVE_LASTALIGNCHUNK(o1, o0, 0x50, g2, g3, g4, g5) | |
251 | MOVE_LASTALIGNCHUNK(o1, o0, 0x40, g2, g3, g4, g5) | |
252 | MOVE_LASTALIGNCHUNK(o1, o0, 0x30, g2, g3, g4, g5) | |
253 | MOVE_LASTALIGNCHUNK(o1, o0, 0x20, g2, g3, g4, g5) | |
254 | MOVE_LASTALIGNCHUNK(o1, o0, 0x10, g2, g3, g4, g5) | |
255 | MOVE_LASTALIGNCHUNK(o1, o0, 0x00, g2, g3, g4, g5) | |
256 | ||
257 | 84: /* amemcpy_table_end */ | |
258 | be 85f | |
259 | andcc %g1, 4, %g0 | |
260 | ||
261 | ldd [%o1], %g2 | |
262 | add %o0, 8, %o0 | |
263 | std %g2, [%o0 - 0x08] | |
264 | add %o1, 8, %o1 | |
265 | 85: /* amemcpy_last7 */ | |
266 | be 1f | |
267 | andcc %g1, 2, %g0 | |
268 | ||
269 | ld [%o1], %g2 | |
270 | add %o1, 4, %o1 | |
271 | st %g2, [%o0] | |
272 | add %o0, 4, %o0 | |
273 | 1: | |
274 | be 1f | |
275 | andcc %g1, 1, %g0 | |
276 | ||
277 | lduh [%o1], %g2 | |
278 | add %o1, 2, %o1 | |
279 | sth %g2, [%o0] | |
280 | add %o0, 2, %o0 | |
281 | 1: | |
282 | be 1f | |
283 | nop | |
284 | ||
285 | ldub [%o1], %g2 | |
286 | stb %g2, [%o0] | |
287 | 1: | |
288 | retl | |
a52312b8 | 289 | mov %g7, %o0 |
1da177e4 | 290 | |
1da177e4 LT |
291 | 86: /* non_aligned */ |
292 | cmp %o2, 6 | |
293 | bleu 88f | |
21f74d36 DM |
294 | nop |
295 | ||
296 | save %sp, -96, %sp | |
297 | andcc %i0, 3, %g0 | |
1da177e4 | 298 | be 61f |
21f74d36 | 299 | andcc %i0, 1, %g0 |
1da177e4 | 300 | be 60f |
21f74d36 | 301 | andcc %i0, 2, %g0 |
1da177e4 | 302 | |
21f74d36 DM |
303 | ldub [%i1], %g5 |
304 | add %i1, 1, %i1 | |
305 | stb %g5, [%i0] | |
306 | sub %i2, 1, %i2 | |
1da177e4 | 307 | bne 61f |
21f74d36 | 308 | add %i0, 1, %i0 |
1da177e4 | 309 | 60: |
21f74d36 DM |
310 | ldub [%i1], %g3 |
311 | add %i1, 2, %i1 | |
312 | stb %g3, [%i0] | |
313 | sub %i2, 2, %i2 | |
314 | ldub [%i1 - 1], %g3 | |
315 | add %i0, 2, %i0 | |
316 | stb %g3, [%i0 - 1] | |
1da177e4 | 317 | 61: |
21f74d36 DM |
318 | and %i1, 3, %g2 |
319 | and %i2, 0xc, %g3 | |
320 | and %i1, -4, %i1 | |
1da177e4 LT |
321 | cmp %g3, 4 |
322 | sll %g2, 3, %g4 | |
323 | mov 32, %g2 | |
324 | be 4f | |
21f74d36 | 325 | sub %g2, %g4, %l0 |
1da177e4 LT |
326 | |
327 | blu 3f | |
328 | cmp %g3, 0x8 | |
329 | ||
330 | be 2f | |
21f74d36 | 331 | srl %i2, 2, %g3 |
1da177e4 | 332 | |
21f74d36 DM |
333 | ld [%i1], %i3 |
334 | add %i0, -8, %i0 | |
335 | ld [%i1 + 4], %i4 | |
1da177e4 LT |
336 | b 8f |
337 | add %g3, 1, %g3 | |
338 | 2: | |
21f74d36 DM |
339 | ld [%i1], %i4 |
340 | add %i0, -12, %i0 | |
341 | ld [%i1 + 4], %i5 | |
1da177e4 LT |
342 | add %g3, 2, %g3 |
343 | b 9f | |
21f74d36 | 344 | add %i1, -4, %i1 |
1da177e4 | 345 | 3: |
21f74d36 DM |
346 | ld [%i1], %g1 |
347 | add %i0, -4, %i0 | |
348 | ld [%i1 + 4], %i3 | |
349 | srl %i2, 2, %g3 | |
1da177e4 | 350 | b 7f |
21f74d36 | 351 | add %i1, 4, %i1 |
1da177e4 | 352 | 4: |
21f74d36 DM |
353 | ld [%i1], %i5 |
354 | cmp %i2, 7 | |
355 | ld [%i1 + 4], %g1 | |
356 | srl %i2, 2, %g3 | |
1da177e4 | 357 | bleu 10f |
21f74d36 | 358 | add %i1, 8, %i1 |
1da177e4 | 359 | |
21f74d36 | 360 | ld [%i1], %i3 |
1da177e4 LT |
361 | add %g3, -1, %g3 |
362 | 5: | |
21f74d36 DM |
363 | sll %i5, %g4, %g2 |
364 | srl %g1, %l0, %g5 | |
1da177e4 | 365 | or %g2, %g5, %g2 |
21f74d36 | 366 | st %g2, [%i0] |
1da177e4 | 367 | 7: |
21f74d36 | 368 | ld [%i1 + 4], %i4 |
1da177e4 | 369 | sll %g1, %g4, %g2 |
21f74d36 | 370 | srl %i3, %l0, %g5 |
1da177e4 | 371 | or %g2, %g5, %g2 |
21f74d36 | 372 | st %g2, [%i0 + 4] |
1da177e4 | 373 | 8: |
21f74d36 DM |
374 | ld [%i1 + 8], %i5 |
375 | sll %i3, %g4, %g2 | |
376 | srl %i4, %l0, %g5 | |
1da177e4 | 377 | or %g2, %g5, %g2 |
21f74d36 | 378 | st %g2, [%i0 + 8] |
1da177e4 | 379 | 9: |
21f74d36 DM |
380 | ld [%i1 + 12], %g1 |
381 | sll %i4, %g4, %g2 | |
382 | srl %i5, %l0, %g5 | |
1da177e4 LT |
383 | addcc %g3, -4, %g3 |
384 | or %g2, %g5, %g2 | |
21f74d36 DM |
385 | add %i1, 16, %i1 |
386 | st %g2, [%i0 + 12] | |
387 | add %i0, 16, %i0 | |
1da177e4 | 388 | bne,a 5b |
21f74d36 | 389 | ld [%i1], %i3 |
1da177e4 | 390 | 10: |
21f74d36 DM |
391 | sll %i5, %g4, %g2 |
392 | srl %g1, %l0, %g5 | |
393 | srl %l0, 3, %g3 | |
1da177e4 | 394 | or %g2, %g5, %g2 |
21f74d36 DM |
395 | sub %i1, %g3, %i1 |
396 | andcc %i2, 2, %g0 | |
397 | st %g2, [%i0] | |
1da177e4 | 398 | be 1f |
21f74d36 DM |
399 | andcc %i2, 1, %g0 |
400 | ||
401 | ldub [%i1], %g2 | |
402 | add %i1, 2, %i1 | |
403 | stb %g2, [%i0 + 4] | |
404 | add %i0, 2, %i0 | |
405 | ldub [%i1 - 1], %g2 | |
406 | stb %g2, [%i0 + 3] | |
1da177e4 LT |
407 | 1: |
408 | be 1f | |
409 | nop | |
21f74d36 DM |
410 | ldub [%i1], %g2 |
411 | stb %g2, [%i0 + 4] | |
1da177e4 | 412 | 1: |
21f74d36 | 413 | ret |
a52312b8 | 414 | restore %g7, %g0, %o0 |
1da177e4 | 415 | |
1da177e4 LT |
416 | 88: /* short_end */ |
417 | ||
418 | and %o2, 0xe, %o3 | |
419 | 20: | |
420 | sethi %hi(89f), %o5 | |
421 | sll %o3, 3, %o4 | |
422 | add %o0, %o3, %o0 | |
423 | sub %o5, %o4, %o5 | |
424 | add %o1, %o3, %o1 | |
425 | jmpl %o5 + %lo(89f), %g0 | |
426 | andcc %o2, 1, %g0 | |
427 | ||
428 | MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3) | |
429 | MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3) | |
430 | MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3) | |
431 | MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3) | |
432 | MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3) | |
433 | MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3) | |
434 | MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3) | |
435 | ||
436 | 89: /* short_table_end */ | |
437 | ||
438 | be 1f | |
439 | nop | |
440 | ||
441 | ldub [%o1], %g2 | |
442 | stb %g2, [%o0] | |
443 | 1: | |
444 | retl | |
a52312b8 | 445 | mov %g7, %o0 |
1da177e4 LT |
446 | |
447 | 90: /* short_aligned_end */ | |
448 | bne 88b | |
449 | andcc %o2, 8, %g0 | |
450 | ||
451 | be 1f | |
452 | andcc %o2, 4, %g0 | |
453 | ||
454 | ld [%o1 + 0x00], %g2 | |
455 | ld [%o1 + 0x04], %g3 | |
456 | add %o1, 8, %o1 | |
457 | st %g2, [%o0 + 0x00] | |
458 | st %g3, [%o0 + 0x04] | |
459 | add %o0, 8, %o0 | |
460 | 1: | |
461 | b 81b | |
462 | mov %o2, %g1 |