Commit | Line | Data |
---|---|---|
14cf11af | 1 | /* |
14cf11af PM |
2 | * Copyright (C) 2002 Paul Mackerras, IBM Corp. |
3 | * | |
4 | * This program is free software; you can redistribute it and/or | |
5 | * modify it under the terms of the GNU General Public License | |
6 | * as published by the Free Software Foundation; either version | |
7 | * 2 of the License, or (at your option) any later version. | |
8 | */ | |
9 | #include <asm/processor.h> | |
10 | #include <asm/ppc_asm.h> | |
11 | ||
12 | .align 7 | |
13 | _GLOBAL(__copy_tofrom_user) | |
14 | /* first check for a whole page copy on a page boundary */ | |
15 | cmpldi cr1,r5,16 | |
16 | cmpdi cr6,r5,4096 | |
17 | or r0,r3,r4 | |
18 | neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ | |
19 | andi. r0,r0,4095 | |
20 | std r3,-24(r1) | |
21 | crand cr0*4+2,cr0*4+2,cr6*4+2 | |
22 | std r4,-16(r1) | |
23 | std r5,-8(r1) | |
24 | dcbt 0,r4 | |
3c726f8d | 25 | beq .Lcopy_page_4K |
14cf11af | 26 | andi. r6,r6,7 |
3467bfd3 | 27 | PPC_MTOCRF 0x01,r5 |
14cf11af | 28 | blt cr1,.Lshort_copy |
a4e22f02 MN |
29 | /* Below we want to nop out the bne if we're on a CPU that has the |
30 | * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit | |
31 | * cleared. | |
32 | * At the time of writing the only CPU that has this combination of bits | |
33 | * set is Power6. | |
34 | */ | |
35 | BEGIN_FTR_SECTION | |
36 | nop | |
37 | FTR_SECTION_ELSE | |
14cf11af | 38 | bne .Ldst_unaligned |
a4e22f02 MN |
39 | ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ |
40 | CPU_FTR_UNALIGNED_LD_STD) | |
14cf11af | 41 | .Ldst_aligned: |
14cf11af | 42 | addi r3,r3,-16 |
a4e22f02 MN |
43 | BEGIN_FTR_SECTION |
44 | andi. r0,r4,7 | |
14cf11af | 45 | bne .Lsrc_unaligned |
a4e22f02 | 46 | END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) |
14cf11af PM |
47 | srdi r7,r5,4 |
48 | 20: ld r9,0(r4) | |
49 | addi r4,r4,-8 | |
50 | mtctr r7 | |
51 | andi. r5,r5,7 | |
52 | bf cr7*4+0,22f | |
53 | addi r3,r3,8 | |
54 | addi r4,r4,8 | |
55 | mr r8,r9 | |
56 | blt cr1,72f | |
57 | 21: ld r9,8(r4) | |
58 | 70: std r8,8(r3) | |
59 | 22: ldu r8,16(r4) | |
60 | 71: stdu r9,16(r3) | |
61 | bdnz 21b | |
62 | 72: std r8,8(r3) | |
63 | beq+ 3f | |
64 | addi r3,r3,16 | |
14cf11af PM |
65 | .Ldo_tail: |
66 | bf cr7*4+1,1f | |
f72b728b MN |
67 | 23: lwz r9,8(r4) |
68 | addi r4,r4,4 | |
14cf11af PM |
69 | 73: stw r9,0(r3) |
70 | addi r3,r3,4 | |
71 | 1: bf cr7*4+2,2f | |
f72b728b MN |
72 | 44: lhz r9,8(r4) |
73 | addi r4,r4,2 | |
14cf11af PM |
74 | 74: sth r9,0(r3) |
75 | addi r3,r3,2 | |
76 | 2: bf cr7*4+3,3f | |
f72b728b | 77 | 45: lbz r9,8(r4) |
14cf11af PM |
78 | 75: stb r9,0(r3) |
79 | 3: li r3,0 | |
80 | blr | |
81 | ||
82 | .Lsrc_unaligned: | |
83 | srdi r6,r5,3 | |
84 | addi r5,r5,-16 | |
85 | subf r4,r0,r4 | |
86 | srdi r7,r5,4 | |
87 | sldi r10,r0,3 | |
88 | cmpldi cr6,r6,3 | |
89 | andi. r5,r5,7 | |
90 | mtctr r7 | |
91 | subfic r11,r10,64 | |
92 | add r5,r5,r0 | |
93 | bt cr7*4+0,28f | |
94 | ||
95 | 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ | |
96 | 25: ld r0,8(r4) | |
97 | sld r6,r9,r10 | |
98 | 26: ldu r9,16(r4) | |
99 | srd r7,r0,r11 | |
100 | sld r8,r0,r10 | |
101 | or r7,r7,r6 | |
102 | blt cr6,79f | |
103 | 27: ld r0,8(r4) | |
104 | b 2f | |
105 | ||
106 | 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ | |
107 | 29: ldu r9,8(r4) | |
108 | sld r8,r0,r10 | |
109 | addi r3,r3,-8 | |
110 | blt cr6,5f | |
111 | 30: ld r0,8(r4) | |
112 | srd r12,r9,r11 | |
113 | sld r6,r9,r10 | |
114 | 31: ldu r9,16(r4) | |
115 | or r12,r8,r12 | |
116 | srd r7,r0,r11 | |
117 | sld r8,r0,r10 | |
118 | addi r3,r3,16 | |
119 | beq cr6,78f | |
120 | ||
121 | 1: or r7,r7,r6 | |
122 | 32: ld r0,8(r4) | |
123 | 76: std r12,8(r3) | |
124 | 2: srd r12,r9,r11 | |
125 | sld r6,r9,r10 | |
126 | 33: ldu r9,16(r4) | |
127 | or r12,r8,r12 | |
128 | 77: stdu r7,16(r3) | |
129 | srd r7,r0,r11 | |
130 | sld r8,r0,r10 | |
131 | bdnz 1b | |
132 | ||
133 | 78: std r12,8(r3) | |
134 | or r7,r7,r6 | |
135 | 79: std r7,16(r3) | |
136 | 5: srd r12,r9,r11 | |
137 | or r12,r8,r12 | |
138 | 80: std r12,24(r3) | |
139 | bne 6f | |
140 | li r3,0 | |
141 | blr | |
142 | 6: cmpwi cr1,r5,8 | |
143 | addi r3,r3,32 | |
144 | sld r9,r9,r10 | |
f72b728b | 145 | ble cr1,7f |
14cf11af PM |
146 | 34: ld r0,8(r4) |
147 | srd r7,r0,r11 | |
148 | or r9,r7,r9 | |
f72b728b MN |
149 | 7: |
150 | bf cr7*4+1,1f | |
151 | rotldi r9,r9,32 | |
152 | 94: stw r9,0(r3) | |
153 | addi r3,r3,4 | |
154 | 1: bf cr7*4+2,2f | |
155 | rotldi r9,r9,16 | |
156 | 95: sth r9,0(r3) | |
157 | addi r3,r3,2 | |
158 | 2: bf cr7*4+3,3f | |
159 | rotldi r9,r9,8 | |
160 | 96: stb r9,0(r3) | |
161 | 3: li r3,0 | |
162 | blr | |
14cf11af PM |
163 | |
164 | .Ldst_unaligned: | |
3467bfd3 | 165 | PPC_MTOCRF 0x01,r6 /* put #bytes to 8B bdry into cr7 */ |
14cf11af PM |
166 | subf r5,r6,r5 |
167 | li r7,0 | |
a4e22f02 | 168 | cmpldi cr1,r5,16 |
14cf11af PM |
169 | bf cr7*4+3,1f |
170 | 35: lbz r0,0(r4) | |
171 | 81: stb r0,0(r3) | |
172 | addi r7,r7,1 | |
173 | 1: bf cr7*4+2,2f | |
174 | 36: lhzx r0,r7,r4 | |
175 | 82: sthx r0,r7,r3 | |
176 | addi r7,r7,2 | |
177 | 2: bf cr7*4+1,3f | |
178 | 37: lwzx r0,r7,r4 | |
179 | 83: stwx r0,r7,r3 | |
3467bfd3 | 180 | 3: PPC_MTOCRF 0x01,r5 |
14cf11af PM |
181 | add r4,r6,r4 |
182 | add r3,r6,r3 | |
183 | b .Ldst_aligned | |
184 | ||
185 | .Lshort_copy: | |
186 | bf cr7*4+0,1f | |
187 | 38: lwz r0,0(r4) | |
188 | 39: lwz r9,4(r4) | |
189 | addi r4,r4,8 | |
190 | 84: stw r0,0(r3) | |
191 | 85: stw r9,4(r3) | |
192 | addi r3,r3,8 | |
193 | 1: bf cr7*4+1,2f | |
194 | 40: lwz r0,0(r4) | |
195 | addi r4,r4,4 | |
196 | 86: stw r0,0(r3) | |
197 | addi r3,r3,4 | |
198 | 2: bf cr7*4+2,3f | |
199 | 41: lhz r0,0(r4) | |
200 | addi r4,r4,2 | |
201 | 87: sth r0,0(r3) | |
202 | addi r3,r3,2 | |
203 | 3: bf cr7*4+3,4f | |
204 | 42: lbz r0,0(r4) | |
205 | 88: stb r0,0(r3) | |
206 | 4: li r3,0 | |
207 | blr | |
208 | ||
209 | /* | |
210 | * exception handlers follow | |
211 | * we have to return the number of bytes not copied | |
212 | * for an exception on a load, we set the rest of the destination to 0 | |
213 | */ | |
214 | ||
215 | 136: | |
216 | 137: | |
217 | add r3,r3,r7 | |
218 | b 1f | |
219 | 130: | |
220 | 131: | |
221 | addi r3,r3,8 | |
222 | 120: | |
223 | 122: | |
224 | 124: | |
225 | 125: | |
226 | 126: | |
227 | 127: | |
228 | 128: | |
229 | 129: | |
230 | 133: | |
231 | addi r3,r3,8 | |
232 | 121: | |
233 | 132: | |
234 | addi r3,r3,8 | |
14cf11af PM |
235 | 134: |
236 | 135: | |
237 | 138: | |
238 | 139: | |
239 | 140: | |
240 | 141: | |
241 | 142: | |
f72b728b MN |
242 | 123: |
243 | 144: | |
244 | 145: | |
14cf11af PM |
245 | |
246 | /* | |
247 | * here we have had a fault on a load and r3 points to the first | |
248 | * unmodified byte of the destination | |
249 | */ | |
250 | 1: ld r6,-24(r1) | |
251 | ld r4,-16(r1) | |
252 | ld r5,-8(r1) | |
253 | subf r6,r6,r3 | |
254 | add r4,r4,r6 | |
255 | subf r5,r6,r5 /* #bytes left to go */ | |
256 | ||
257 | /* | |
258 | * first see if we can copy any more bytes before hitting another exception | |
259 | */ | |
260 | mtctr r5 | |
261 | 43: lbz r0,0(r4) | |
262 | addi r4,r4,1 | |
263 | 89: stb r0,0(r3) | |
264 | addi r3,r3,1 | |
265 | bdnz 43b | |
266 | li r3,0 /* huh? all copied successfully this time? */ | |
267 | blr | |
268 | ||
269 | /* | |
270 | * here we have trapped again, need to clear ctr bytes starting at r3 | |
271 | */ | |
272 | 143: mfctr r5 | |
273 | li r0,0 | |
274 | mr r4,r3 | |
275 | mr r3,r5 /* return the number of bytes not copied */ | |
276 | 1: andi. r9,r4,7 | |
277 | beq 3f | |
278 | 90: stb r0,0(r4) | |
279 | addic. r5,r5,-1 | |
280 | addi r4,r4,1 | |
281 | bne 1b | |
282 | blr | |
283 | 3: cmpldi cr1,r5,8 | |
284 | srdi r9,r5,3 | |
285 | andi. r5,r5,7 | |
286 | blt cr1,93f | |
287 | mtctr r9 | |
288 | 91: std r0,0(r4) | |
289 | addi r4,r4,8 | |
290 | bdnz 91b | |
291 | 93: beqlr | |
292 | mtctr r5 | |
293 | 92: stb r0,0(r4) | |
294 | addi r4,r4,1 | |
295 | bdnz 92b | |
296 | blr | |
297 | ||
298 | /* | |
299 | * exception handlers for stores: we just need to work | |
300 | * out how many bytes weren't copied | |
301 | */ | |
302 | 182: | |
303 | 183: | |
304 | add r3,r3,r7 | |
305 | b 1f | |
306 | 180: | |
307 | addi r3,r3,8 | |
308 | 171: | |
309 | 177: | |
310 | addi r3,r3,8 | |
311 | 170: | |
312 | 172: | |
313 | 176: | |
314 | 178: | |
315 | addi r3,r3,4 | |
316 | 185: | |
317 | addi r3,r3,4 | |
318 | 173: | |
319 | 174: | |
320 | 175: | |
321 | 179: | |
322 | 181: | |
323 | 184: | |
324 | 186: | |
325 | 187: | |
326 | 188: | |
327 | 189: | |
f72b728b MN |
328 | 194: |
329 | 195: | |
330 | 196: | |
14cf11af PM |
331 | 1: |
332 | ld r6,-24(r1) | |
333 | ld r5,-8(r1) | |
334 | add r6,r6,r5 | |
335 | subf r3,r3,r6 /* #bytes not copied */ | |
336 | 190: | |
337 | 191: | |
338 | 192: | |
339 | blr /* #bytes not copied in r3 */ | |
340 | ||
341 | .section __ex_table,"a" | |
342 | .align 3 | |
343 | .llong 20b,120b | |
344 | .llong 21b,121b | |
345 | .llong 70b,170b | |
346 | .llong 22b,122b | |
347 | .llong 71b,171b | |
348 | .llong 72b,172b | |
349 | .llong 23b,123b | |
350 | .llong 73b,173b | |
f72b728b | 351 | .llong 44b,144b |
14cf11af | 352 | .llong 74b,174b |
f72b728b | 353 | .llong 45b,145b |
14cf11af PM |
354 | .llong 75b,175b |
355 | .llong 24b,124b | |
356 | .llong 25b,125b | |
357 | .llong 26b,126b | |
358 | .llong 27b,127b | |
359 | .llong 28b,128b | |
360 | .llong 29b,129b | |
361 | .llong 30b,130b | |
362 | .llong 31b,131b | |
363 | .llong 32b,132b | |
364 | .llong 76b,176b | |
365 | .llong 33b,133b | |
366 | .llong 77b,177b | |
367 | .llong 78b,178b | |
368 | .llong 79b,179b | |
369 | .llong 80b,180b | |
370 | .llong 34b,134b | |
f72b728b MN |
371 | .llong 94b,194b |
372 | .llong 95b,195b | |
373 | .llong 96b,196b | |
14cf11af PM |
374 | .llong 35b,135b |
375 | .llong 81b,181b | |
376 | .llong 36b,136b | |
377 | .llong 82b,182b | |
378 | .llong 37b,137b | |
379 | .llong 83b,183b | |
380 | .llong 38b,138b | |
381 | .llong 39b,139b | |
382 | .llong 84b,184b | |
383 | .llong 85b,185b | |
384 | .llong 40b,140b | |
385 | .llong 86b,186b | |
386 | .llong 41b,141b | |
387 | .llong 87b,187b | |
388 | .llong 42b,142b | |
389 | .llong 88b,188b | |
390 | .llong 43b,143b | |
391 | .llong 89b,189b | |
392 | .llong 90b,190b | |
393 | .llong 91b,191b | |
394 | .llong 92b,192b | |
395 | ||
396 | .text | |
397 | ||
398 | /* | |
399 | * Routine to copy a whole page of data, optimized for POWER4. | |
400 | * On POWER4 it is more than 50% faster than the simple loop | |
401 | * above (following the .Ldst_aligned label) but it runs slightly | |
402 | * slower on POWER3. | |
403 | */ | |
3c726f8d | 404 | .Lcopy_page_4K: |
14cf11af PM |
405 | std r31,-32(1) |
406 | std r30,-40(1) | |
407 | std r29,-48(1) | |
408 | std r28,-56(1) | |
409 | std r27,-64(1) | |
410 | std r26,-72(1) | |
411 | std r25,-80(1) | |
412 | std r24,-88(1) | |
413 | std r23,-96(1) | |
414 | std r22,-104(1) | |
415 | std r21,-112(1) | |
416 | std r20,-120(1) | |
417 | li r5,4096/32 - 1 | |
418 | addi r3,r3,-8 | |
419 | li r0,5 | |
420 | 0: addi r5,r5,-24 | |
421 | mtctr r0 | |
422 | 20: ld r22,640(4) | |
423 | 21: ld r21,512(4) | |
424 | 22: ld r20,384(4) | |
425 | 23: ld r11,256(4) | |
426 | 24: ld r9,128(4) | |
427 | 25: ld r7,0(4) | |
428 | 26: ld r25,648(4) | |
429 | 27: ld r24,520(4) | |
430 | 28: ld r23,392(4) | |
431 | 29: ld r10,264(4) | |
432 | 30: ld r8,136(4) | |
433 | 31: ldu r6,8(4) | |
434 | cmpwi r5,24 | |
435 | 1: | |
436 | 32: std r22,648(3) | |
437 | 33: std r21,520(3) | |
438 | 34: std r20,392(3) | |
439 | 35: std r11,264(3) | |
440 | 36: std r9,136(3) | |
441 | 37: std r7,8(3) | |
442 | 38: ld r28,648(4) | |
443 | 39: ld r27,520(4) | |
444 | 40: ld r26,392(4) | |
445 | 41: ld r31,264(4) | |
446 | 42: ld r30,136(4) | |
447 | 43: ld r29,8(4) | |
448 | 44: std r25,656(3) | |
449 | 45: std r24,528(3) | |
450 | 46: std r23,400(3) | |
451 | 47: std r10,272(3) | |
452 | 48: std r8,144(3) | |
453 | 49: std r6,16(3) | |
454 | 50: ld r22,656(4) | |
455 | 51: ld r21,528(4) | |
456 | 52: ld r20,400(4) | |
457 | 53: ld r11,272(4) | |
458 | 54: ld r9,144(4) | |
459 | 55: ld r7,16(4) | |
460 | 56: std r28,664(3) | |
461 | 57: std r27,536(3) | |
462 | 58: std r26,408(3) | |
463 | 59: std r31,280(3) | |
464 | 60: std r30,152(3) | |
465 | 61: stdu r29,24(3) | |
466 | 62: ld r25,664(4) | |
467 | 63: ld r24,536(4) | |
468 | 64: ld r23,408(4) | |
469 | 65: ld r10,280(4) | |
470 | 66: ld r8,152(4) | |
471 | 67: ldu r6,24(4) | |
472 | bdnz 1b | |
473 | 68: std r22,648(3) | |
474 | 69: std r21,520(3) | |
475 | 70: std r20,392(3) | |
476 | 71: std r11,264(3) | |
477 | 72: std r9,136(3) | |
478 | 73: std r7,8(3) | |
479 | 74: addi r4,r4,640 | |
480 | 75: addi r3,r3,648 | |
481 | bge 0b | |
482 | mtctr r5 | |
483 | 76: ld r7,0(4) | |
484 | 77: ld r8,8(4) | |
485 | 78: ldu r9,16(4) | |
486 | 3: | |
487 | 79: ld r10,8(4) | |
488 | 80: std r7,8(3) | |
489 | 81: ld r7,16(4) | |
490 | 82: std r8,16(3) | |
491 | 83: ld r8,24(4) | |
492 | 84: std r9,24(3) | |
493 | 85: ldu r9,32(4) | |
494 | 86: stdu r10,32(3) | |
495 | bdnz 3b | |
496 | 4: | |
497 | 87: ld r10,8(4) | |
498 | 88: std r7,8(3) | |
499 | 89: std r8,16(3) | |
500 | 90: std r9,24(3) | |
501 | 91: std r10,32(3) | |
502 | 9: ld r20,-120(1) | |
503 | ld r21,-112(1) | |
504 | ld r22,-104(1) | |
505 | ld r23,-96(1) | |
506 | ld r24,-88(1) | |
507 | ld r25,-80(1) | |
508 | ld r26,-72(1) | |
509 | ld r27,-64(1) | |
510 | ld r28,-56(1) | |
511 | ld r29,-48(1) | |
512 | ld r30,-40(1) | |
513 | ld r31,-32(1) | |
514 | li r3,0 | |
515 | blr | |
516 | ||
517 | /* | |
518 | * on an exception, reset to the beginning and jump back into the | |
519 | * standard __copy_tofrom_user | |
520 | */ | |
521 | 100: ld r20,-120(1) | |
522 | ld r21,-112(1) | |
523 | ld r22,-104(1) | |
524 | ld r23,-96(1) | |
525 | ld r24,-88(1) | |
526 | ld r25,-80(1) | |
527 | ld r26,-72(1) | |
528 | ld r27,-64(1) | |
529 | ld r28,-56(1) | |
530 | ld r29,-48(1) | |
531 | ld r30,-40(1) | |
532 | ld r31,-32(1) | |
533 | ld r3,-24(r1) | |
534 | ld r4,-16(r1) | |
535 | li r5,4096 | |
536 | b .Ldst_aligned | |
537 | ||
538 | .section __ex_table,"a" | |
539 | .align 3 | |
540 | .llong 20b,100b | |
541 | .llong 21b,100b | |
542 | .llong 22b,100b | |
543 | .llong 23b,100b | |
544 | .llong 24b,100b | |
545 | .llong 25b,100b | |
546 | .llong 26b,100b | |
547 | .llong 27b,100b | |
548 | .llong 28b,100b | |
549 | .llong 29b,100b | |
550 | .llong 30b,100b | |
551 | .llong 31b,100b | |
552 | .llong 32b,100b | |
553 | .llong 33b,100b | |
554 | .llong 34b,100b | |
555 | .llong 35b,100b | |
556 | .llong 36b,100b | |
557 | .llong 37b,100b | |
558 | .llong 38b,100b | |
559 | .llong 39b,100b | |
560 | .llong 40b,100b | |
561 | .llong 41b,100b | |
562 | .llong 42b,100b | |
563 | .llong 43b,100b | |
564 | .llong 44b,100b | |
565 | .llong 45b,100b | |
566 | .llong 46b,100b | |
567 | .llong 47b,100b | |
568 | .llong 48b,100b | |
569 | .llong 49b,100b | |
570 | .llong 50b,100b | |
571 | .llong 51b,100b | |
572 | .llong 52b,100b | |
573 | .llong 53b,100b | |
574 | .llong 54b,100b | |
575 | .llong 55b,100b | |
576 | .llong 56b,100b | |
577 | .llong 57b,100b | |
578 | .llong 58b,100b | |
579 | .llong 59b,100b | |
580 | .llong 60b,100b | |
581 | .llong 61b,100b | |
582 | .llong 62b,100b | |
583 | .llong 63b,100b | |
584 | .llong 64b,100b | |
585 | .llong 65b,100b | |
586 | .llong 66b,100b | |
587 | .llong 67b,100b | |
588 | .llong 68b,100b | |
589 | .llong 69b,100b | |
590 | .llong 70b,100b | |
591 | .llong 71b,100b | |
592 | .llong 72b,100b | |
593 | .llong 73b,100b | |
594 | .llong 74b,100b | |
595 | .llong 75b,100b | |
596 | .llong 76b,100b | |
597 | .llong 77b,100b | |
598 | .llong 78b,100b | |
599 | .llong 79b,100b | |
600 | .llong 80b,100b | |
601 | .llong 81b,100b | |
602 | .llong 82b,100b | |
603 | .llong 83b,100b | |
604 | .llong 84b,100b | |
605 | .llong 85b,100b | |
606 | .llong 86b,100b | |
607 | .llong 87b,100b | |
608 | .llong 88b,100b | |
609 | .llong 89b,100b | |
610 | .llong 90b,100b | |
611 | .llong 91b,100b |