Merge remote-tracking branches 'spi/topic/qup', 'spi/topic/rockchip', 'spi/topic...
[linux-2.6-block.git] / arch / powerpc / lib / copyuser_power7.S
CommitLineData
a66086b8
AB
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2011
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/ppc_asm.h>
21
32ee1e18
AB
22#ifdef __BIG_ENDIAN__
23#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
24#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
25#else
26#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
27#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
28#endif
29
a66086b8
AB
30 .macro err1
31100:
32 .section __ex_table,"a"
33 .align 3
34 .llong 100b,.Ldo_err1
35 .previous
36 .endm
37
38 .macro err2
39200:
40 .section __ex_table,"a"
41 .align 3
42 .llong 200b,.Ldo_err2
43 .previous
44 .endm
45
46#ifdef CONFIG_ALTIVEC
47 .macro err3
48300:
49 .section __ex_table,"a"
50 .align 3
51 .llong 300b,.Ldo_err3
52 .previous
53 .endm
54
55 .macro err4
56400:
57 .section __ex_table,"a"
58 .align 3
59 .llong 400b,.Ldo_err4
60 .previous
61 .endm
62
63
64.Ldo_err4:
c75df6f9
MN
65 ld r16,STK_REG(R16)(r1)
66 ld r15,STK_REG(R15)(r1)
67 ld r14,STK_REG(R14)(r1)
a66086b8 68.Ldo_err3:
b1576fec 69 bl exit_vmx_usercopy
a66086b8
AB
70 ld r0,STACKFRAMESIZE+16(r1)
71 mtlr r0
72 b .Lexit
73#endif /* CONFIG_ALTIVEC */
74
75.Ldo_err2:
c75df6f9
MN
76 ld r22,STK_REG(R22)(r1)
77 ld r21,STK_REG(R21)(r1)
78 ld r20,STK_REG(R20)(r1)
79 ld r19,STK_REG(R19)(r1)
80 ld r18,STK_REG(R18)(r1)
81 ld r17,STK_REG(R17)(r1)
82 ld r16,STK_REG(R16)(r1)
83 ld r15,STK_REG(R15)(r1)
84 ld r14,STK_REG(R14)(r1)
a66086b8
AB
85.Lexit:
86 addi r1,r1,STACKFRAMESIZE
87.Ldo_err1:
752a6422
UW
88 ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
89 ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
90 ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
91 b __copy_tofrom_user_base
92
93
94_GLOBAL(__copy_tofrom_user_power7)
95#ifdef CONFIG_ALTIVEC
96 cmpldi r5,16
97 cmpldi cr1,r5,4096
98
752a6422
UW
99 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
100 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
101 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
102
103 blt .Lshort_copy
104 bgt cr1,.Lvmx_copy
105#else
106 cmpldi r5,16
107
752a6422
UW
108 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
109 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
110 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
111
112 blt .Lshort_copy
113#endif
114
115.Lnonvmx_copy:
116 /* Get the source 8B aligned */
117 neg r6,r4
118 mtocrf 0x01,r6
119 clrldi r6,r6,(64-3)
120
121 bf cr7*4+3,1f
122err1; lbz r0,0(r4)
123 addi r4,r4,1
124err1; stb r0,0(r3)
125 addi r3,r3,1
126
1271: bf cr7*4+2,2f
128err1; lhz r0,0(r4)
129 addi r4,r4,2
130err1; sth r0,0(r3)
131 addi r3,r3,2
132
1332: bf cr7*4+1,3f
134err1; lwz r0,0(r4)
135 addi r4,r4,4
136err1; stw r0,0(r3)
137 addi r3,r3,4
138
1393: sub r5,r5,r6
140 cmpldi r5,128
141 blt 5f
142
143 mflr r0
144 stdu r1,-STACKFRAMESIZE(r1)
c75df6f9
MN
145 std r14,STK_REG(R14)(r1)
146 std r15,STK_REG(R15)(r1)
147 std r16,STK_REG(R16)(r1)
148 std r17,STK_REG(R17)(r1)
149 std r18,STK_REG(R18)(r1)
150 std r19,STK_REG(R19)(r1)
151 std r20,STK_REG(R20)(r1)
152 std r21,STK_REG(R21)(r1)
153 std r22,STK_REG(R22)(r1)
a66086b8
AB
154 std r0,STACKFRAMESIZE+16(r1)
155
156 srdi r6,r5,7
157 mtctr r6
158
159 /* Now do cacheline (128B) sized loads and stores. */
160 .align 5
1614:
162err2; ld r0,0(r4)
163err2; ld r6,8(r4)
164err2; ld r7,16(r4)
165err2; ld r8,24(r4)
166err2; ld r9,32(r4)
167err2; ld r10,40(r4)
168err2; ld r11,48(r4)
169err2; ld r12,56(r4)
170err2; ld r14,64(r4)
171err2; ld r15,72(r4)
172err2; ld r16,80(r4)
173err2; ld r17,88(r4)
174err2; ld r18,96(r4)
175err2; ld r19,104(r4)
176err2; ld r20,112(r4)
177err2; ld r21,120(r4)
178 addi r4,r4,128
179err2; std r0,0(r3)
180err2; std r6,8(r3)
181err2; std r7,16(r3)
182err2; std r8,24(r3)
183err2; std r9,32(r3)
184err2; std r10,40(r3)
185err2; std r11,48(r3)
186err2; std r12,56(r3)
187err2; std r14,64(r3)
188err2; std r15,72(r3)
189err2; std r16,80(r3)
190err2; std r17,88(r3)
191err2; std r18,96(r3)
192err2; std r19,104(r3)
193err2; std r20,112(r3)
194err2; std r21,120(r3)
195 addi r3,r3,128
196 bdnz 4b
197
198 clrldi r5,r5,(64-7)
199
c75df6f9
MN
200 ld r14,STK_REG(R14)(r1)
201 ld r15,STK_REG(R15)(r1)
202 ld r16,STK_REG(R16)(r1)
203 ld r17,STK_REG(R17)(r1)
204 ld r18,STK_REG(R18)(r1)
205 ld r19,STK_REG(R19)(r1)
206 ld r20,STK_REG(R20)(r1)
207 ld r21,STK_REG(R21)(r1)
208 ld r22,STK_REG(R22)(r1)
a66086b8
AB
209 addi r1,r1,STACKFRAMESIZE
210
211 /* Up to 127B to go */
2125: srdi r6,r5,4
213 mtocrf 0x01,r6
214
2156: bf cr7*4+1,7f
216err1; ld r0,0(r4)
217err1; ld r6,8(r4)
218err1; ld r7,16(r4)
219err1; ld r8,24(r4)
220err1; ld r9,32(r4)
221err1; ld r10,40(r4)
222err1; ld r11,48(r4)
223err1; ld r12,56(r4)
224 addi r4,r4,64
225err1; std r0,0(r3)
226err1; std r6,8(r3)
227err1; std r7,16(r3)
228err1; std r8,24(r3)
229err1; std r9,32(r3)
230err1; std r10,40(r3)
231err1; std r11,48(r3)
232err1; std r12,56(r3)
233 addi r3,r3,64
234
235 /* Up to 63B to go */
2367: bf cr7*4+2,8f
237err1; ld r0,0(r4)
238err1; ld r6,8(r4)
239err1; ld r7,16(r4)
240err1; ld r8,24(r4)
241 addi r4,r4,32
242err1; std r0,0(r3)
243err1; std r6,8(r3)
244err1; std r7,16(r3)
245err1; std r8,24(r3)
246 addi r3,r3,32
247
248 /* Up to 31B to go */
2498: bf cr7*4+3,9f
250err1; ld r0,0(r4)
251err1; ld r6,8(r4)
252 addi r4,r4,16
253err1; std r0,0(r3)
254err1; std r6,8(r3)
255 addi r3,r3,16
256
2579: clrldi r5,r5,(64-4)
258
259 /* Up to 15B to go */
260.Lshort_copy:
261 mtocrf 0x01,r5
262 bf cr7*4+0,12f
263err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
264err1; lwz r6,4(r4)
265 addi r4,r4,8
266err1; stw r0,0(r3)
267err1; stw r6,4(r3)
268 addi r3,r3,8
269
27012: bf cr7*4+1,13f
271err1; lwz r0,0(r4)
272 addi r4,r4,4
273err1; stw r0,0(r3)
274 addi r3,r3,4
275
27613: bf cr7*4+2,14f
277err1; lhz r0,0(r4)
278 addi r4,r4,2
279err1; sth r0,0(r3)
280 addi r3,r3,2
281
28214: bf cr7*4+3,15f
283err1; lbz r0,0(r4)
284err1; stb r0,0(r3)
285
28615: li r3,0
287 blr
288
289.Lunwind_stack_nonvmx_copy:
290 addi r1,r1,STACKFRAMESIZE
291 b .Lnonvmx_copy
292
293#ifdef CONFIG_ALTIVEC
294.Lvmx_copy:
295 mflr r0
296 std r0,16(r1)
297 stdu r1,-STACKFRAMESIZE(r1)
b1576fec 298 bl enter_vmx_usercopy
2fae7cdb 299 cmpwi cr1,r3,0
a66086b8 300 ld r0,STACKFRAMESIZE+16(r1)
752a6422
UW
301 ld r3,STK_REG(R31)(r1)
302 ld r4,STK_REG(R30)(r1)
303 ld r5,STK_REG(R29)(r1)
a66086b8
AB
304 mtlr r0
305
a9514dc6
AB
306 /*
307 * We prefetch both the source and destination using enhanced touch
308 * instructions. We use a stream ID of 0 for the load side and
309 * 1 for the store side.
310 */
311 clrrdi r6,r4,7
312 clrrdi r9,r3,7
313 ori r9,r9,1 /* stream=1 */
314
315 srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
316 cmpldi r7,0x3FF
317 ble 1f
318 li r7,0x3FF
3191: lis r0,0x0E00 /* depth=7 */
320 sldi r7,r7,7
321 or r7,r7,r0
322 ori r10,r7,1 /* stream=1 */
323
324 lis r8,0x8000 /* GO=1 */
325 clrldi r8,r8,32
326
327.machine push
328.machine "power4"
280a5ba2
MN
329 /* setup read stream 0 */
330 dcbt r0,r6,0b01000 /* addr from */
331 dcbt r0,r7,0b01010 /* length and depth from */
332 /* setup write stream 1 */
333 dcbtst r0,r9,0b01000 /* addr to */
334 dcbtst r0,r10,0b01010 /* length and depth to */
a9514dc6 335 eieio
280a5ba2 336 dcbt r0,r8,0b01010 /* all streams GO */
a9514dc6
AB
337.machine pop
338
2fae7cdb 339 beq cr1,.Lunwind_stack_nonvmx_copy
a66086b8
AB
340
341 /*
342 * If source and destination are not relatively aligned we use a
343 * slower permute loop.
344 */
345 xor r6,r4,r3
346 rldicl. r6,r6,0,(64-4)
347 bne .Lvmx_unaligned_copy
348
349 /* Get the destination 16B aligned */
350 neg r6,r3
351 mtocrf 0x01,r6
352 clrldi r6,r6,(64-4)
353
354 bf cr7*4+3,1f
355err3; lbz r0,0(r4)
356 addi r4,r4,1
357err3; stb r0,0(r3)
358 addi r3,r3,1
359
3601: bf cr7*4+2,2f
361err3; lhz r0,0(r4)
362 addi r4,r4,2
363err3; sth r0,0(r3)
364 addi r3,r3,2
365
3662: bf cr7*4+1,3f
367err3; lwz r0,0(r4)
368 addi r4,r4,4
369err3; stw r0,0(r3)
370 addi r3,r3,4
371
3723: bf cr7*4+0,4f
373err3; ld r0,0(r4)
374 addi r4,r4,8
375err3; std r0,0(r3)
376 addi r3,r3,8
377
3784: sub r5,r5,r6
379
380 /* Get the desination 128B aligned */
381 neg r6,r3
382 srdi r7,r6,4
383 mtocrf 0x01,r7
384 clrldi r6,r6,(64-7)
385
386 li r9,16
387 li r10,32
388 li r11,48
389
390 bf cr7*4+3,5f
391err3; lvx vr1,r0,r4
392 addi r4,r4,16
393err3; stvx vr1,r0,r3
394 addi r3,r3,16
395
3965: bf cr7*4+2,6f
397err3; lvx vr1,r0,r4
398err3; lvx vr0,r4,r9
399 addi r4,r4,32
400err3; stvx vr1,r0,r3
401err3; stvx vr0,r3,r9
402 addi r3,r3,32
403
4046: bf cr7*4+1,7f
405err3; lvx vr3,r0,r4
406err3; lvx vr2,r4,r9
407err3; lvx vr1,r4,r10
408err3; lvx vr0,r4,r11
409 addi r4,r4,64
410err3; stvx vr3,r0,r3
411err3; stvx vr2,r3,r9
412err3; stvx vr1,r3,r10
413err3; stvx vr0,r3,r11
414 addi r3,r3,64
415
4167: sub r5,r5,r6
417 srdi r6,r5,7
418
c75df6f9
MN
419 std r14,STK_REG(R14)(r1)
420 std r15,STK_REG(R15)(r1)
421 std r16,STK_REG(R16)(r1)
a66086b8
AB
422
423 li r12,64
424 li r14,80
425 li r15,96
426 li r16,112
427
428 mtctr r6
429
430 /*
431 * Now do cacheline sized loads and stores. By this stage the
432 * cacheline stores are also cacheline aligned.
433 */
434 .align 5
4358:
436err4; lvx vr7,r0,r4
437err4; lvx vr6,r4,r9
438err4; lvx vr5,r4,r10
439err4; lvx vr4,r4,r11
440err4; lvx vr3,r4,r12
441err4; lvx vr2,r4,r14
442err4; lvx vr1,r4,r15
443err4; lvx vr0,r4,r16
444 addi r4,r4,128
445err4; stvx vr7,r0,r3
446err4; stvx vr6,r3,r9
447err4; stvx vr5,r3,r10
448err4; stvx vr4,r3,r11
449err4; stvx vr3,r3,r12
450err4; stvx vr2,r3,r14
451err4; stvx vr1,r3,r15
452err4; stvx vr0,r3,r16
453 addi r3,r3,128
454 bdnz 8b
455
c75df6f9
MN
456 ld r14,STK_REG(R14)(r1)
457 ld r15,STK_REG(R15)(r1)
458 ld r16,STK_REG(R16)(r1)
a66086b8
AB
459
460 /* Up to 127B to go */
461 clrldi r5,r5,(64-7)
462 srdi r6,r5,4
463 mtocrf 0x01,r6
464
465 bf cr7*4+1,9f
466err3; lvx vr3,r0,r4
467err3; lvx vr2,r4,r9
468err3; lvx vr1,r4,r10
469err3; lvx vr0,r4,r11
470 addi r4,r4,64
471err3; stvx vr3,r0,r3
472err3; stvx vr2,r3,r9
473err3; stvx vr1,r3,r10
474err3; stvx vr0,r3,r11
475 addi r3,r3,64
476
4779: bf cr7*4+2,10f
478err3; lvx vr1,r0,r4
479err3; lvx vr0,r4,r9
480 addi r4,r4,32
481err3; stvx vr1,r0,r3
482err3; stvx vr0,r3,r9
483 addi r3,r3,32
484
48510: bf cr7*4+3,11f
486err3; lvx vr1,r0,r4
487 addi r4,r4,16
488err3; stvx vr1,r0,r3
489 addi r3,r3,16
490
491 /* Up to 15B to go */
49211: clrldi r5,r5,(64-4)
493 mtocrf 0x01,r5
494 bf cr7*4+0,12f
495err3; ld r0,0(r4)
496 addi r4,r4,8
497err3; std r0,0(r3)
498 addi r3,r3,8
499
50012: bf cr7*4+1,13f
501err3; lwz r0,0(r4)
502 addi r4,r4,4
503err3; stw r0,0(r3)
504 addi r3,r3,4
505
50613: bf cr7*4+2,14f
507err3; lhz r0,0(r4)
508 addi r4,r4,2
509err3; sth r0,0(r3)
510 addi r3,r3,2
511
51214: bf cr7*4+3,15f
513err3; lbz r0,0(r4)
514err3; stb r0,0(r3)
515
51615: addi r1,r1,STACKFRAMESIZE
b1576fec 517 b exit_vmx_usercopy /* tail call optimise */
a66086b8
AB
518
519.Lvmx_unaligned_copy:
520 /* Get the destination 16B aligned */
521 neg r6,r3
522 mtocrf 0x01,r6
523 clrldi r6,r6,(64-4)
524
525 bf cr7*4+3,1f
526err3; lbz r0,0(r4)
527 addi r4,r4,1
528err3; stb r0,0(r3)
529 addi r3,r3,1
530
5311: bf cr7*4+2,2f
532err3; lhz r0,0(r4)
533 addi r4,r4,2
534err3; sth r0,0(r3)
535 addi r3,r3,2
536
5372: bf cr7*4+1,3f
538err3; lwz r0,0(r4)
539 addi r4,r4,4
540err3; stw r0,0(r3)
541 addi r3,r3,4
542
5433: bf cr7*4+0,4f
544err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
545err3; lwz r7,4(r4)
546 addi r4,r4,8
547err3; stw r0,0(r3)
548err3; stw r7,4(r3)
549 addi r3,r3,8
550
5514: sub r5,r5,r6
552
553 /* Get the desination 128B aligned */
554 neg r6,r3
555 srdi r7,r6,4
556 mtocrf 0x01,r7
557 clrldi r6,r6,(64-7)
558
559 li r9,16
560 li r10,32
561 li r11,48
562
32ee1e18 563 LVS(vr16,0,r4) /* Setup permute control vector */
a66086b8
AB
564err3; lvx vr0,0,r4
565 addi r4,r4,16
566
567 bf cr7*4+3,5f
568err3; lvx vr1,r0,r4
32ee1e18 569 VPERM(vr8,vr0,vr1,vr16)
a66086b8
AB
570 addi r4,r4,16
571err3; stvx vr8,r0,r3
572 addi r3,r3,16
573 vor vr0,vr1,vr1
574
5755: bf cr7*4+2,6f
576err3; lvx vr1,r0,r4
32ee1e18 577 VPERM(vr8,vr0,vr1,vr16)
a66086b8 578err3; lvx vr0,r4,r9
32ee1e18 579 VPERM(vr9,vr1,vr0,vr16)
a66086b8
AB
580 addi r4,r4,32
581err3; stvx vr8,r0,r3
582err3; stvx vr9,r3,r9
583 addi r3,r3,32
584
5856: bf cr7*4+1,7f
586err3; lvx vr3,r0,r4
32ee1e18 587 VPERM(vr8,vr0,vr3,vr16)
a66086b8 588err3; lvx vr2,r4,r9
32ee1e18 589 VPERM(vr9,vr3,vr2,vr16)
a66086b8 590err3; lvx vr1,r4,r10
32ee1e18 591 VPERM(vr10,vr2,vr1,vr16)
a66086b8 592err3; lvx vr0,r4,r11
32ee1e18 593 VPERM(vr11,vr1,vr0,vr16)
a66086b8
AB
594 addi r4,r4,64
595err3; stvx vr8,r0,r3
596err3; stvx vr9,r3,r9
597err3; stvx vr10,r3,r10
598err3; stvx vr11,r3,r11
599 addi r3,r3,64
600
6017: sub r5,r5,r6
602 srdi r6,r5,7
603
c75df6f9
MN
604 std r14,STK_REG(R14)(r1)
605 std r15,STK_REG(R15)(r1)
606 std r16,STK_REG(R16)(r1)
a66086b8
AB
607
608 li r12,64
609 li r14,80
610 li r15,96
611 li r16,112
612
613 mtctr r6
614
615 /*
616 * Now do cacheline sized loads and stores. By this stage the
617 * cacheline stores are also cacheline aligned.
618 */
619 .align 5
6208:
621err4; lvx vr7,r0,r4
32ee1e18 622 VPERM(vr8,vr0,vr7,vr16)
a66086b8 623err4; lvx vr6,r4,r9
32ee1e18 624 VPERM(vr9,vr7,vr6,vr16)
a66086b8 625err4; lvx vr5,r4,r10
32ee1e18 626 VPERM(vr10,vr6,vr5,vr16)
a66086b8 627err4; lvx vr4,r4,r11
32ee1e18 628 VPERM(vr11,vr5,vr4,vr16)
a66086b8 629err4; lvx vr3,r4,r12
32ee1e18 630 VPERM(vr12,vr4,vr3,vr16)
a66086b8 631err4; lvx vr2,r4,r14
32ee1e18 632 VPERM(vr13,vr3,vr2,vr16)
a66086b8 633err4; lvx vr1,r4,r15
32ee1e18 634 VPERM(vr14,vr2,vr1,vr16)
a66086b8 635err4; lvx vr0,r4,r16
32ee1e18 636 VPERM(vr15,vr1,vr0,vr16)
a66086b8
AB
637 addi r4,r4,128
638err4; stvx vr8,r0,r3
639err4; stvx vr9,r3,r9
640err4; stvx vr10,r3,r10
641err4; stvx vr11,r3,r11
642err4; stvx vr12,r3,r12
643err4; stvx vr13,r3,r14
644err4; stvx vr14,r3,r15
645err4; stvx vr15,r3,r16
646 addi r3,r3,128
647 bdnz 8b
648
c75df6f9
MN
649 ld r14,STK_REG(R14)(r1)
650 ld r15,STK_REG(R15)(r1)
651 ld r16,STK_REG(R16)(r1)
a66086b8
AB
652
653 /* Up to 127B to go */
654 clrldi r5,r5,(64-7)
655 srdi r6,r5,4
656 mtocrf 0x01,r6
657
658 bf cr7*4+1,9f
659err3; lvx vr3,r0,r4
32ee1e18 660 VPERM(vr8,vr0,vr3,vr16)
a66086b8 661err3; lvx vr2,r4,r9
32ee1e18 662 VPERM(vr9,vr3,vr2,vr16)
a66086b8 663err3; lvx vr1,r4,r10
32ee1e18 664 VPERM(vr10,vr2,vr1,vr16)
a66086b8 665err3; lvx vr0,r4,r11
32ee1e18 666 VPERM(vr11,vr1,vr0,vr16)
a66086b8
AB
667 addi r4,r4,64
668err3; stvx vr8,r0,r3
669err3; stvx vr9,r3,r9
670err3; stvx vr10,r3,r10
671err3; stvx vr11,r3,r11
672 addi r3,r3,64
673
6749: bf cr7*4+2,10f
675err3; lvx vr1,r0,r4
32ee1e18 676 VPERM(vr8,vr0,vr1,vr16)
a66086b8 677err3; lvx vr0,r4,r9
32ee1e18 678 VPERM(vr9,vr1,vr0,vr16)
a66086b8
AB
679 addi r4,r4,32
680err3; stvx vr8,r0,r3
681err3; stvx vr9,r3,r9
682 addi r3,r3,32
683
68410: bf cr7*4+3,11f
685err3; lvx vr1,r0,r4
32ee1e18 686 VPERM(vr8,vr0,vr1,vr16)
a66086b8
AB
687 addi r4,r4,16
688err3; stvx vr8,r0,r3
689 addi r3,r3,16
690
691 /* Up to 15B to go */
69211: clrldi r5,r5,(64-4)
693 addi r4,r4,-16 /* Unwind the +16 load offset */
694 mtocrf 0x01,r5
695 bf cr7*4+0,12f
696err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
697err3; lwz r6,4(r4)
698 addi r4,r4,8
699err3; stw r0,0(r3)
700err3; stw r6,4(r3)
701 addi r3,r3,8
702
70312: bf cr7*4+1,13f
704err3; lwz r0,0(r4)
705 addi r4,r4,4
706err3; stw r0,0(r3)
707 addi r3,r3,4
708
70913: bf cr7*4+2,14f
710err3; lhz r0,0(r4)
711 addi r4,r4,2
712err3; sth r0,0(r3)
713 addi r3,r3,2
714
71514: bf cr7*4+3,15f
716err3; lbz r0,0(r4)
717err3; stb r0,0(r3)
718
71915: addi r1,r1,STACKFRAMESIZE
b1576fec 720 b exit_vmx_usercopy /* tail call optimise */
c2522dcd 721#endif /* CONFIG_ALTIVEC */