Merge tag 'pwm/for-4.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/thierry...
[linux-block.git] / arch / powerpc / lib / copyuser_power7.S
CommitLineData
a66086b8
AB
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
15 *
16 * Copyright (C) IBM Corporation, 2011
17 *
18 * Author: Anton Blanchard <anton@au.ibm.com>
19 */
20#include <asm/ppc_asm.h>
21
98c45f51
PM
22#ifndef SELFTEST_CASE
23/* 0 == don't use VMX, 1 == use VMX */
24#define SELFTEST_CASE 0
25#endif
26
32ee1e18
AB
27#ifdef __BIG_ENDIAN__
28#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
29#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
30#else
31#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
32#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
33#endif
34
a66086b8
AB
35 .macro err1
36100:
24bfa6a9 37 EX_TABLE(100b,.Ldo_err1)
a66086b8
AB
38 .endm
39
40 .macro err2
41200:
24bfa6a9 42 EX_TABLE(200b,.Ldo_err2)
a66086b8
AB
43 .endm
44
45#ifdef CONFIG_ALTIVEC
46 .macro err3
47300:
24bfa6a9 48 EX_TABLE(300b,.Ldo_err3)
a66086b8
AB
49 .endm
50
51 .macro err4
52400:
24bfa6a9 53 EX_TABLE(400b,.Ldo_err4)
a66086b8
AB
54 .endm
55
56
57.Ldo_err4:
c75df6f9
MN
58 ld r16,STK_REG(R16)(r1)
59 ld r15,STK_REG(R15)(r1)
60 ld r14,STK_REG(R14)(r1)
a66086b8 61.Ldo_err3:
b1576fec 62 bl exit_vmx_usercopy
a66086b8
AB
63 ld r0,STACKFRAMESIZE+16(r1)
64 mtlr r0
65 b .Lexit
66#endif /* CONFIG_ALTIVEC */
67
68.Ldo_err2:
c75df6f9
MN
69 ld r22,STK_REG(R22)(r1)
70 ld r21,STK_REG(R21)(r1)
71 ld r20,STK_REG(R20)(r1)
72 ld r19,STK_REG(R19)(r1)
73 ld r18,STK_REG(R18)(r1)
74 ld r17,STK_REG(R17)(r1)
75 ld r16,STK_REG(R16)(r1)
76 ld r15,STK_REG(R15)(r1)
77 ld r14,STK_REG(R14)(r1)
a66086b8
AB
78.Lexit:
79 addi r1,r1,STACKFRAMESIZE
80.Ldo_err1:
752a6422
UW
81 ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
82 ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
83 ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
84 b __copy_tofrom_user_base
85
86
87_GLOBAL(__copy_tofrom_user_power7)
a66086b8 88 cmpldi r5,16
a3f952df 89 cmpldi cr1,r5,3328
a66086b8 90
752a6422
UW
91 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
92 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
93 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
94
95 blt .Lshort_copy
a66086b8 96
98c45f51
PM
97#ifdef CONFIG_ALTIVEC
98test_feature = SELFTEST_CASE
99BEGIN_FTR_SECTION
100 bgt cr1,.Lvmx_copy
101END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
a66086b8
AB
102#endif
103
104.Lnonvmx_copy:
105 /* Get the source 8B aligned */
106 neg r6,r4
107 mtocrf 0x01,r6
108 clrldi r6,r6,(64-3)
109
110 bf cr7*4+3,1f
111err1; lbz r0,0(r4)
112 addi r4,r4,1
113err1; stb r0,0(r3)
114 addi r3,r3,1
115
1161: bf cr7*4+2,2f
117err1; lhz r0,0(r4)
118 addi r4,r4,2
119err1; sth r0,0(r3)
120 addi r3,r3,2
121
1222: bf cr7*4+1,3f
123err1; lwz r0,0(r4)
124 addi r4,r4,4
125err1; stw r0,0(r3)
126 addi r3,r3,4
127
1283: sub r5,r5,r6
129 cmpldi r5,128
130 blt 5f
131
132 mflr r0
133 stdu r1,-STACKFRAMESIZE(r1)
c75df6f9
MN
134 std r14,STK_REG(R14)(r1)
135 std r15,STK_REG(R15)(r1)
136 std r16,STK_REG(R16)(r1)
137 std r17,STK_REG(R17)(r1)
138 std r18,STK_REG(R18)(r1)
139 std r19,STK_REG(R19)(r1)
140 std r20,STK_REG(R20)(r1)
141 std r21,STK_REG(R21)(r1)
142 std r22,STK_REG(R22)(r1)
a66086b8
AB
143 std r0,STACKFRAMESIZE+16(r1)
144
145 srdi r6,r5,7
146 mtctr r6
147
148 /* Now do cacheline (128B) sized loads and stores. */
149 .align 5
1504:
151err2; ld r0,0(r4)
152err2; ld r6,8(r4)
153err2; ld r7,16(r4)
154err2; ld r8,24(r4)
155err2; ld r9,32(r4)
156err2; ld r10,40(r4)
157err2; ld r11,48(r4)
158err2; ld r12,56(r4)
159err2; ld r14,64(r4)
160err2; ld r15,72(r4)
161err2; ld r16,80(r4)
162err2; ld r17,88(r4)
163err2; ld r18,96(r4)
164err2; ld r19,104(r4)
165err2; ld r20,112(r4)
166err2; ld r21,120(r4)
167 addi r4,r4,128
168err2; std r0,0(r3)
169err2; std r6,8(r3)
170err2; std r7,16(r3)
171err2; std r8,24(r3)
172err2; std r9,32(r3)
173err2; std r10,40(r3)
174err2; std r11,48(r3)
175err2; std r12,56(r3)
176err2; std r14,64(r3)
177err2; std r15,72(r3)
178err2; std r16,80(r3)
179err2; std r17,88(r3)
180err2; std r18,96(r3)
181err2; std r19,104(r3)
182err2; std r20,112(r3)
183err2; std r21,120(r3)
184 addi r3,r3,128
185 bdnz 4b
186
187 clrldi r5,r5,(64-7)
188
c75df6f9
MN
189 ld r14,STK_REG(R14)(r1)
190 ld r15,STK_REG(R15)(r1)
191 ld r16,STK_REG(R16)(r1)
192 ld r17,STK_REG(R17)(r1)
193 ld r18,STK_REG(R18)(r1)
194 ld r19,STK_REG(R19)(r1)
195 ld r20,STK_REG(R20)(r1)
196 ld r21,STK_REG(R21)(r1)
197 ld r22,STK_REG(R22)(r1)
a66086b8
AB
198 addi r1,r1,STACKFRAMESIZE
199
200 /* Up to 127B to go */
2015: srdi r6,r5,4
202 mtocrf 0x01,r6
203
2046: bf cr7*4+1,7f
205err1; ld r0,0(r4)
206err1; ld r6,8(r4)
207err1; ld r7,16(r4)
208err1; ld r8,24(r4)
209err1; ld r9,32(r4)
210err1; ld r10,40(r4)
211err1; ld r11,48(r4)
212err1; ld r12,56(r4)
213 addi r4,r4,64
214err1; std r0,0(r3)
215err1; std r6,8(r3)
216err1; std r7,16(r3)
217err1; std r8,24(r3)
218err1; std r9,32(r3)
219err1; std r10,40(r3)
220err1; std r11,48(r3)
221err1; std r12,56(r3)
222 addi r3,r3,64
223
224 /* Up to 63B to go */
2257: bf cr7*4+2,8f
226err1; ld r0,0(r4)
227err1; ld r6,8(r4)
228err1; ld r7,16(r4)
229err1; ld r8,24(r4)
230 addi r4,r4,32
231err1; std r0,0(r3)
232err1; std r6,8(r3)
233err1; std r7,16(r3)
234err1; std r8,24(r3)
235 addi r3,r3,32
236
237 /* Up to 31B to go */
2388: bf cr7*4+3,9f
239err1; ld r0,0(r4)
240err1; ld r6,8(r4)
241 addi r4,r4,16
242err1; std r0,0(r3)
243err1; std r6,8(r3)
244 addi r3,r3,16
245
2469: clrldi r5,r5,(64-4)
247
248 /* Up to 15B to go */
249.Lshort_copy:
250 mtocrf 0x01,r5
251 bf cr7*4+0,12f
252err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
253err1; lwz r6,4(r4)
254 addi r4,r4,8
255err1; stw r0,0(r3)
256err1; stw r6,4(r3)
257 addi r3,r3,8
258
25912: bf cr7*4+1,13f
260err1; lwz r0,0(r4)
261 addi r4,r4,4
262err1; stw r0,0(r3)
263 addi r3,r3,4
264
26513: bf cr7*4+2,14f
266err1; lhz r0,0(r4)
267 addi r4,r4,2
268err1; sth r0,0(r3)
269 addi r3,r3,2
270
27114: bf cr7*4+3,15f
272err1; lbz r0,0(r4)
273err1; stb r0,0(r3)
274
27515: li r3,0
276 blr
277
278.Lunwind_stack_nonvmx_copy:
279 addi r1,r1,STACKFRAMESIZE
280 b .Lnonvmx_copy
281
a66086b8 282.Lvmx_copy:
98c45f51 283#ifdef CONFIG_ALTIVEC
a66086b8
AB
284 mflr r0
285 std r0,16(r1)
286 stdu r1,-STACKFRAMESIZE(r1)
b1576fec 287 bl enter_vmx_usercopy
2fae7cdb 288 cmpwi cr1,r3,0
a66086b8 289 ld r0,STACKFRAMESIZE+16(r1)
752a6422
UW
290 ld r3,STK_REG(R31)(r1)
291 ld r4,STK_REG(R30)(r1)
292 ld r5,STK_REG(R29)(r1)
a66086b8
AB
293 mtlr r0
294
a9514dc6
AB
295 /*
296 * We prefetch both the source and destination using enhanced touch
297 * instructions. We use a stream ID of 0 for the load side and
298 * 1 for the store side.
299 */
300 clrrdi r6,r4,7
301 clrrdi r9,r3,7
302 ori r9,r9,1 /* stream=1 */
303
304 srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
305 cmpldi r7,0x3FF
306 ble 1f
307 li r7,0x3FF
3081: lis r0,0x0E00 /* depth=7 */
309 sldi r7,r7,7
310 or r7,r7,r0
311 ori r10,r7,1 /* stream=1 */
312
313 lis r8,0x8000 /* GO=1 */
314 clrldi r8,r8,32
315
280a5ba2 316 /* setup read stream 0 */
8a583c0a
AS
317 dcbt 0,r6,0b01000 /* addr from */
318 dcbt 0,r7,0b01010 /* length and depth from */
280a5ba2 319 /* setup write stream 1 */
8a583c0a
AS
320 dcbtst 0,r9,0b01000 /* addr to */
321 dcbtst 0,r10,0b01010 /* length and depth to */
a9514dc6 322 eieio
8a583c0a 323 dcbt 0,r8,0b01010 /* all streams GO */
a9514dc6 324
2fae7cdb 325 beq cr1,.Lunwind_stack_nonvmx_copy
a66086b8
AB
326
327 /*
328 * If source and destination are not relatively aligned we use a
329 * slower permute loop.
330 */
331 xor r6,r4,r3
332 rldicl. r6,r6,0,(64-4)
333 bne .Lvmx_unaligned_copy
334
335 /* Get the destination 16B aligned */
336 neg r6,r3
337 mtocrf 0x01,r6
338 clrldi r6,r6,(64-4)
339
340 bf cr7*4+3,1f
341err3; lbz r0,0(r4)
342 addi r4,r4,1
343err3; stb r0,0(r3)
344 addi r3,r3,1
345
3461: bf cr7*4+2,2f
347err3; lhz r0,0(r4)
348 addi r4,r4,2
349err3; sth r0,0(r3)
350 addi r3,r3,2
351
3522: bf cr7*4+1,3f
353err3; lwz r0,0(r4)
354 addi r4,r4,4
355err3; stw r0,0(r3)
356 addi r3,r3,4
357
3583: bf cr7*4+0,4f
359err3; ld r0,0(r4)
360 addi r4,r4,8
361err3; std r0,0(r3)
362 addi r3,r3,8
363
3644: sub r5,r5,r6
365
366 /* Get the desination 128B aligned */
367 neg r6,r3
368 srdi r7,r6,4
369 mtocrf 0x01,r7
370 clrldi r6,r6,(64-7)
371
372 li r9,16
373 li r10,32
374 li r11,48
375
376 bf cr7*4+3,5f
8a583c0a 377err3; lvx v1,0,r4
a66086b8 378 addi r4,r4,16
8a583c0a 379err3; stvx v1,0,r3
a66086b8
AB
380 addi r3,r3,16
381
3825: bf cr7*4+2,6f
8a583c0a 383err3; lvx v1,0,r4
c2ce6f9f 384err3; lvx v0,r4,r9
a66086b8 385 addi r4,r4,32
8a583c0a 386err3; stvx v1,0,r3
c2ce6f9f 387err3; stvx v0,r3,r9
a66086b8
AB
388 addi r3,r3,32
389
3906: bf cr7*4+1,7f
8a583c0a 391err3; lvx v3,0,r4
c2ce6f9f
AB
392err3; lvx v2,r4,r9
393err3; lvx v1,r4,r10
394err3; lvx v0,r4,r11
a66086b8 395 addi r4,r4,64
8a583c0a 396err3; stvx v3,0,r3
c2ce6f9f
AB
397err3; stvx v2,r3,r9
398err3; stvx v1,r3,r10
399err3; stvx v0,r3,r11
a66086b8
AB
400 addi r3,r3,64
401
4027: sub r5,r5,r6
403 srdi r6,r5,7
404
c75df6f9
MN
405 std r14,STK_REG(R14)(r1)
406 std r15,STK_REG(R15)(r1)
407 std r16,STK_REG(R16)(r1)
a66086b8
AB
408
409 li r12,64
410 li r14,80
411 li r15,96
412 li r16,112
413
414 mtctr r6
415
416 /*
417 * Now do cacheline sized loads and stores. By this stage the
418 * cacheline stores are also cacheline aligned.
419 */
420 .align 5
4218:
8a583c0a 422err4; lvx v7,0,r4
c2ce6f9f
AB
423err4; lvx v6,r4,r9
424err4; lvx v5,r4,r10
425err4; lvx v4,r4,r11
426err4; lvx v3,r4,r12
427err4; lvx v2,r4,r14
428err4; lvx v1,r4,r15
429err4; lvx v0,r4,r16
a66086b8 430 addi r4,r4,128
8a583c0a 431err4; stvx v7,0,r3
c2ce6f9f
AB
432err4; stvx v6,r3,r9
433err4; stvx v5,r3,r10
434err4; stvx v4,r3,r11
435err4; stvx v3,r3,r12
436err4; stvx v2,r3,r14
437err4; stvx v1,r3,r15
438err4; stvx v0,r3,r16
a66086b8
AB
439 addi r3,r3,128
440 bdnz 8b
441
c75df6f9
MN
442 ld r14,STK_REG(R14)(r1)
443 ld r15,STK_REG(R15)(r1)
444 ld r16,STK_REG(R16)(r1)
a66086b8
AB
445
446 /* Up to 127B to go */
447 clrldi r5,r5,(64-7)
448 srdi r6,r5,4
449 mtocrf 0x01,r6
450
451 bf cr7*4+1,9f
8a583c0a 452err3; lvx v3,0,r4
c2ce6f9f
AB
453err3; lvx v2,r4,r9
454err3; lvx v1,r4,r10
455err3; lvx v0,r4,r11
a66086b8 456 addi r4,r4,64
8a583c0a 457err3; stvx v3,0,r3
c2ce6f9f
AB
458err3; stvx v2,r3,r9
459err3; stvx v1,r3,r10
460err3; stvx v0,r3,r11
a66086b8
AB
461 addi r3,r3,64
462
4639: bf cr7*4+2,10f
8a583c0a 464err3; lvx v1,0,r4
c2ce6f9f 465err3; lvx v0,r4,r9
a66086b8 466 addi r4,r4,32
8a583c0a 467err3; stvx v1,0,r3
c2ce6f9f 468err3; stvx v0,r3,r9
a66086b8
AB
469 addi r3,r3,32
470
47110: bf cr7*4+3,11f
8a583c0a 472err3; lvx v1,0,r4
a66086b8 473 addi r4,r4,16
8a583c0a 474err3; stvx v1,0,r3
a66086b8
AB
475 addi r3,r3,16
476
477 /* Up to 15B to go */
47811: clrldi r5,r5,(64-4)
479 mtocrf 0x01,r5
480 bf cr7*4+0,12f
481err3; ld r0,0(r4)
482 addi r4,r4,8
483err3; std r0,0(r3)
484 addi r3,r3,8
485
48612: bf cr7*4+1,13f
487err3; lwz r0,0(r4)
488 addi r4,r4,4
489err3; stw r0,0(r3)
490 addi r3,r3,4
491
49213: bf cr7*4+2,14f
493err3; lhz r0,0(r4)
494 addi r4,r4,2
495err3; sth r0,0(r3)
496 addi r3,r3,2
497
49814: bf cr7*4+3,15f
499err3; lbz r0,0(r4)
500err3; stb r0,0(r3)
501
50215: addi r1,r1,STACKFRAMESIZE
b1576fec 503 b exit_vmx_usercopy /* tail call optimise */
a66086b8
AB
504
505.Lvmx_unaligned_copy:
506 /* Get the destination 16B aligned */
507 neg r6,r3
508 mtocrf 0x01,r6
509 clrldi r6,r6,(64-4)
510
511 bf cr7*4+3,1f
512err3; lbz r0,0(r4)
513 addi r4,r4,1
514err3; stb r0,0(r3)
515 addi r3,r3,1
516
5171: bf cr7*4+2,2f
518err3; lhz r0,0(r4)
519 addi r4,r4,2
520err3; sth r0,0(r3)
521 addi r3,r3,2
522
5232: bf cr7*4+1,3f
524err3; lwz r0,0(r4)
525 addi r4,r4,4
526err3; stw r0,0(r3)
527 addi r3,r3,4
528
5293: bf cr7*4+0,4f
530err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
531err3; lwz r7,4(r4)
532 addi r4,r4,8
533err3; stw r0,0(r3)
534err3; stw r7,4(r3)
535 addi r3,r3,8
536
5374: sub r5,r5,r6
538
539 /* Get the desination 128B aligned */
540 neg r6,r3
541 srdi r7,r6,4
542 mtocrf 0x01,r7
543 clrldi r6,r6,(64-7)
544
545 li r9,16
546 li r10,32
547 li r11,48
548
c2ce6f9f
AB
549 LVS(v16,0,r4) /* Setup permute control vector */
550err3; lvx v0,0,r4
a66086b8
AB
551 addi r4,r4,16
552
553 bf cr7*4+3,5f
8a583c0a 554err3; lvx v1,0,r4
c2ce6f9f 555 VPERM(v8,v0,v1,v16)
a66086b8 556 addi r4,r4,16
8a583c0a 557err3; stvx v8,0,r3
a66086b8 558 addi r3,r3,16
c2ce6f9f 559 vor v0,v1,v1
a66086b8
AB
560
5615: bf cr7*4+2,6f
8a583c0a 562err3; lvx v1,0,r4
c2ce6f9f
AB
563 VPERM(v8,v0,v1,v16)
564err3; lvx v0,r4,r9
565 VPERM(v9,v1,v0,v16)
a66086b8 566 addi r4,r4,32
8a583c0a 567err3; stvx v8,0,r3
c2ce6f9f 568err3; stvx v9,r3,r9
a66086b8
AB
569 addi r3,r3,32
570
5716: bf cr7*4+1,7f
8a583c0a 572err3; lvx v3,0,r4
c2ce6f9f
AB
573 VPERM(v8,v0,v3,v16)
574err3; lvx v2,r4,r9
575 VPERM(v9,v3,v2,v16)
576err3; lvx v1,r4,r10
577 VPERM(v10,v2,v1,v16)
578err3; lvx v0,r4,r11
579 VPERM(v11,v1,v0,v16)
a66086b8 580 addi r4,r4,64
8a583c0a 581err3; stvx v8,0,r3
c2ce6f9f
AB
582err3; stvx v9,r3,r9
583err3; stvx v10,r3,r10
584err3; stvx v11,r3,r11
a66086b8
AB
585 addi r3,r3,64
586
5877: sub r5,r5,r6
588 srdi r6,r5,7
589
c75df6f9
MN
590 std r14,STK_REG(R14)(r1)
591 std r15,STK_REG(R15)(r1)
592 std r16,STK_REG(R16)(r1)
a66086b8
AB
593
594 li r12,64
595 li r14,80
596 li r15,96
597 li r16,112
598
599 mtctr r6
600
601 /*
602 * Now do cacheline sized loads and stores. By this stage the
603 * cacheline stores are also cacheline aligned.
604 */
605 .align 5
6068:
8a583c0a 607err4; lvx v7,0,r4
c2ce6f9f
AB
608 VPERM(v8,v0,v7,v16)
609err4; lvx v6,r4,r9
610 VPERM(v9,v7,v6,v16)
611err4; lvx v5,r4,r10
612 VPERM(v10,v6,v5,v16)
613err4; lvx v4,r4,r11
614 VPERM(v11,v5,v4,v16)
615err4; lvx v3,r4,r12
616 VPERM(v12,v4,v3,v16)
617err4; lvx v2,r4,r14
618 VPERM(v13,v3,v2,v16)
619err4; lvx v1,r4,r15
620 VPERM(v14,v2,v1,v16)
621err4; lvx v0,r4,r16
622 VPERM(v15,v1,v0,v16)
a66086b8 623 addi r4,r4,128
8a583c0a 624err4; stvx v8,0,r3
c2ce6f9f
AB
625err4; stvx v9,r3,r9
626err4; stvx v10,r3,r10
627err4; stvx v11,r3,r11
628err4; stvx v12,r3,r12
629err4; stvx v13,r3,r14
630err4; stvx v14,r3,r15
631err4; stvx v15,r3,r16
a66086b8
AB
632 addi r3,r3,128
633 bdnz 8b
634
c75df6f9
MN
635 ld r14,STK_REG(R14)(r1)
636 ld r15,STK_REG(R15)(r1)
637 ld r16,STK_REG(R16)(r1)
a66086b8
AB
638
639 /* Up to 127B to go */
640 clrldi r5,r5,(64-7)
641 srdi r6,r5,4
642 mtocrf 0x01,r6
643
644 bf cr7*4+1,9f
8a583c0a 645err3; lvx v3,0,r4
c2ce6f9f
AB
646 VPERM(v8,v0,v3,v16)
647err3; lvx v2,r4,r9
648 VPERM(v9,v3,v2,v16)
649err3; lvx v1,r4,r10
650 VPERM(v10,v2,v1,v16)
651err3; lvx v0,r4,r11
652 VPERM(v11,v1,v0,v16)
a66086b8 653 addi r4,r4,64
8a583c0a 654err3; stvx v8,0,r3
c2ce6f9f
AB
655err3; stvx v9,r3,r9
656err3; stvx v10,r3,r10
657err3; stvx v11,r3,r11
a66086b8
AB
658 addi r3,r3,64
659
6609: bf cr7*4+2,10f
8a583c0a 661err3; lvx v1,0,r4
c2ce6f9f
AB
662 VPERM(v8,v0,v1,v16)
663err3; lvx v0,r4,r9
664 VPERM(v9,v1,v0,v16)
a66086b8 665 addi r4,r4,32
8a583c0a 666err3; stvx v8,0,r3
c2ce6f9f 667err3; stvx v9,r3,r9
a66086b8
AB
668 addi r3,r3,32
669
67010: bf cr7*4+3,11f
8a583c0a 671err3; lvx v1,0,r4
c2ce6f9f 672 VPERM(v8,v0,v1,v16)
a66086b8 673 addi r4,r4,16
8a583c0a 674err3; stvx v8,0,r3
a66086b8
AB
675 addi r3,r3,16
676
677 /* Up to 15B to go */
67811: clrldi r5,r5,(64-4)
679 addi r4,r4,-16 /* Unwind the +16 load offset */
680 mtocrf 0x01,r5
681 bf cr7*4+0,12f
682err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
683err3; lwz r6,4(r4)
684 addi r4,r4,8
685err3; stw r0,0(r3)
686err3; stw r6,4(r3)
687 addi r3,r3,8
688
68912: bf cr7*4+1,13f
690err3; lwz r0,0(r4)
691 addi r4,r4,4
692err3; stw r0,0(r3)
693 addi r3,r3,4
694
69513: bf cr7*4+2,14f
696err3; lhz r0,0(r4)
697 addi r4,r4,2
698err3; sth r0,0(r3)
699 addi r3,r3,2
700
70114: bf cr7*4+3,15f
702err3; lbz r0,0(r4)
703err3; stb r0,0(r3)
704
70515: addi r1,r1,STACKFRAMESIZE
b1576fec 706 b exit_vmx_usercopy /* tail call optimise */
c2522dcd 707#endif /* CONFIG_ALTIVEC */