treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 156
[linux-block.git] / arch / powerpc / lib / copyuser_power7.S
CommitLineData
1a59d1b8 1/* SPDX-License-Identifier: GPL-2.0-or-later */
a66086b8 2/*
a66086b8
AB
3 *
4 * Copyright (C) IBM Corporation, 2011
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8#include <asm/ppc_asm.h>
9
98c45f51
PM
10#ifndef SELFTEST_CASE
11/* 0 == don't use VMX, 1 == use VMX */
12#define SELFTEST_CASE 0
13#endif
14
32ee1e18
AB
15#ifdef __BIG_ENDIAN__
16#define LVS(VRT,RA,RB) lvsl VRT,RA,RB
17#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
18#else
19#define LVS(VRT,RA,RB) lvsr VRT,RA,RB
20#define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
21#endif
22
a66086b8
AB
23 .macro err1
24100:
24bfa6a9 25 EX_TABLE(100b,.Ldo_err1)
a66086b8
AB
26 .endm
27
28 .macro err2
29200:
24bfa6a9 30 EX_TABLE(200b,.Ldo_err2)
a66086b8
AB
31 .endm
32
33#ifdef CONFIG_ALTIVEC
34 .macro err3
35300:
24bfa6a9 36 EX_TABLE(300b,.Ldo_err3)
a66086b8
AB
37 .endm
38
39 .macro err4
40400:
24bfa6a9 41 EX_TABLE(400b,.Ldo_err4)
a66086b8
AB
42 .endm
43
44
45.Ldo_err4:
c75df6f9
MN
46 ld r16,STK_REG(R16)(r1)
47 ld r15,STK_REG(R15)(r1)
48 ld r14,STK_REG(R14)(r1)
a66086b8 49.Ldo_err3:
b1576fec 50 bl exit_vmx_usercopy
a66086b8
AB
51 ld r0,STACKFRAMESIZE+16(r1)
52 mtlr r0
53 b .Lexit
54#endif /* CONFIG_ALTIVEC */
55
56.Ldo_err2:
c75df6f9
MN
57 ld r22,STK_REG(R22)(r1)
58 ld r21,STK_REG(R21)(r1)
59 ld r20,STK_REG(R20)(r1)
60 ld r19,STK_REG(R19)(r1)
61 ld r18,STK_REG(R18)(r1)
62 ld r17,STK_REG(R17)(r1)
63 ld r16,STK_REG(R16)(r1)
64 ld r15,STK_REG(R15)(r1)
65 ld r14,STK_REG(R14)(r1)
a66086b8
AB
66.Lexit:
67 addi r1,r1,STACKFRAMESIZE
68.Ldo_err1:
752a6422
UW
69 ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
70 ld r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
71 ld r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
72 b __copy_tofrom_user_base
73
74
75_GLOBAL(__copy_tofrom_user_power7)
a66086b8 76 cmpldi r5,16
a3f952df 77 cmpldi cr1,r5,3328
a66086b8 78
752a6422
UW
79 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
80 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
81 std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
a66086b8
AB
82
83 blt .Lshort_copy
a66086b8 84
98c45f51
PM
85#ifdef CONFIG_ALTIVEC
86test_feature = SELFTEST_CASE
87BEGIN_FTR_SECTION
88 bgt cr1,.Lvmx_copy
89END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
a66086b8
AB
90#endif
91
92.Lnonvmx_copy:
93 /* Get the source 8B aligned */
94 neg r6,r4
95 mtocrf 0x01,r6
96 clrldi r6,r6,(64-3)
97
98 bf cr7*4+3,1f
99err1; lbz r0,0(r4)
100 addi r4,r4,1
101err1; stb r0,0(r3)
102 addi r3,r3,1
103
1041: bf cr7*4+2,2f
105err1; lhz r0,0(r4)
106 addi r4,r4,2
107err1; sth r0,0(r3)
108 addi r3,r3,2
109
1102: bf cr7*4+1,3f
111err1; lwz r0,0(r4)
112 addi r4,r4,4
113err1; stw r0,0(r3)
114 addi r3,r3,4
115
1163: sub r5,r5,r6
117 cmpldi r5,128
118 blt 5f
119
120 mflr r0
121 stdu r1,-STACKFRAMESIZE(r1)
c75df6f9
MN
122 std r14,STK_REG(R14)(r1)
123 std r15,STK_REG(R15)(r1)
124 std r16,STK_REG(R16)(r1)
125 std r17,STK_REG(R17)(r1)
126 std r18,STK_REG(R18)(r1)
127 std r19,STK_REG(R19)(r1)
128 std r20,STK_REG(R20)(r1)
129 std r21,STK_REG(R21)(r1)
130 std r22,STK_REG(R22)(r1)
a66086b8
AB
131 std r0,STACKFRAMESIZE+16(r1)
132
133 srdi r6,r5,7
134 mtctr r6
135
136 /* Now do cacheline (128B) sized loads and stores. */
137 .align 5
1384:
139err2; ld r0,0(r4)
140err2; ld r6,8(r4)
141err2; ld r7,16(r4)
142err2; ld r8,24(r4)
143err2; ld r9,32(r4)
144err2; ld r10,40(r4)
145err2; ld r11,48(r4)
146err2; ld r12,56(r4)
147err2; ld r14,64(r4)
148err2; ld r15,72(r4)
149err2; ld r16,80(r4)
150err2; ld r17,88(r4)
151err2; ld r18,96(r4)
152err2; ld r19,104(r4)
153err2; ld r20,112(r4)
154err2; ld r21,120(r4)
155 addi r4,r4,128
156err2; std r0,0(r3)
157err2; std r6,8(r3)
158err2; std r7,16(r3)
159err2; std r8,24(r3)
160err2; std r9,32(r3)
161err2; std r10,40(r3)
162err2; std r11,48(r3)
163err2; std r12,56(r3)
164err2; std r14,64(r3)
165err2; std r15,72(r3)
166err2; std r16,80(r3)
167err2; std r17,88(r3)
168err2; std r18,96(r3)
169err2; std r19,104(r3)
170err2; std r20,112(r3)
171err2; std r21,120(r3)
172 addi r3,r3,128
173 bdnz 4b
174
175 clrldi r5,r5,(64-7)
176
c75df6f9
MN
177 ld r14,STK_REG(R14)(r1)
178 ld r15,STK_REG(R15)(r1)
179 ld r16,STK_REG(R16)(r1)
180 ld r17,STK_REG(R17)(r1)
181 ld r18,STK_REG(R18)(r1)
182 ld r19,STK_REG(R19)(r1)
183 ld r20,STK_REG(R20)(r1)
184 ld r21,STK_REG(R21)(r1)
185 ld r22,STK_REG(R22)(r1)
a66086b8
AB
186 addi r1,r1,STACKFRAMESIZE
187
188 /* Up to 127B to go */
1895: srdi r6,r5,4
190 mtocrf 0x01,r6
191
1926: bf cr7*4+1,7f
193err1; ld r0,0(r4)
194err1; ld r6,8(r4)
195err1; ld r7,16(r4)
196err1; ld r8,24(r4)
197err1; ld r9,32(r4)
198err1; ld r10,40(r4)
199err1; ld r11,48(r4)
200err1; ld r12,56(r4)
201 addi r4,r4,64
202err1; std r0,0(r3)
203err1; std r6,8(r3)
204err1; std r7,16(r3)
205err1; std r8,24(r3)
206err1; std r9,32(r3)
207err1; std r10,40(r3)
208err1; std r11,48(r3)
209err1; std r12,56(r3)
210 addi r3,r3,64
211
212 /* Up to 63B to go */
2137: bf cr7*4+2,8f
214err1; ld r0,0(r4)
215err1; ld r6,8(r4)
216err1; ld r7,16(r4)
217err1; ld r8,24(r4)
218 addi r4,r4,32
219err1; std r0,0(r3)
220err1; std r6,8(r3)
221err1; std r7,16(r3)
222err1; std r8,24(r3)
223 addi r3,r3,32
224
225 /* Up to 31B to go */
2268: bf cr7*4+3,9f
227err1; ld r0,0(r4)
228err1; ld r6,8(r4)
229 addi r4,r4,16
230err1; std r0,0(r3)
231err1; std r6,8(r3)
232 addi r3,r3,16
233
2349: clrldi r5,r5,(64-4)
235
236 /* Up to 15B to go */
237.Lshort_copy:
238 mtocrf 0x01,r5
239 bf cr7*4+0,12f
240err1; lwz r0,0(r4) /* Less chance of a reject with word ops */
241err1; lwz r6,4(r4)
242 addi r4,r4,8
243err1; stw r0,0(r3)
244err1; stw r6,4(r3)
245 addi r3,r3,8
246
24712: bf cr7*4+1,13f
248err1; lwz r0,0(r4)
249 addi r4,r4,4
250err1; stw r0,0(r3)
251 addi r3,r3,4
252
25313: bf cr7*4+2,14f
254err1; lhz r0,0(r4)
255 addi r4,r4,2
256err1; sth r0,0(r3)
257 addi r3,r3,2
258
25914: bf cr7*4+3,15f
260err1; lbz r0,0(r4)
261err1; stb r0,0(r3)
262
26315: li r3,0
264 blr
265
266.Lunwind_stack_nonvmx_copy:
267 addi r1,r1,STACKFRAMESIZE
268 b .Lnonvmx_copy
269
a66086b8 270.Lvmx_copy:
98c45f51 271#ifdef CONFIG_ALTIVEC
a66086b8
AB
272 mflr r0
273 std r0,16(r1)
274 stdu r1,-STACKFRAMESIZE(r1)
b1576fec 275 bl enter_vmx_usercopy
2fae7cdb 276 cmpwi cr1,r3,0
a66086b8 277 ld r0,STACKFRAMESIZE+16(r1)
752a6422
UW
278 ld r3,STK_REG(R31)(r1)
279 ld r4,STK_REG(R30)(r1)
280 ld r5,STK_REG(R29)(r1)
a66086b8
AB
281 mtlr r0
282
a9514dc6
AB
283 /*
284 * We prefetch both the source and destination using enhanced touch
285 * instructions. We use a stream ID of 0 for the load side and
286 * 1 for the store side.
287 */
288 clrrdi r6,r4,7
289 clrrdi r9,r3,7
290 ori r9,r9,1 /* stream=1 */
291
292 srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
293 cmpldi r7,0x3FF
294 ble 1f
295 li r7,0x3FF
2961: lis r0,0x0E00 /* depth=7 */
297 sldi r7,r7,7
298 or r7,r7,r0
299 ori r10,r7,1 /* stream=1 */
300
301 lis r8,0x8000 /* GO=1 */
302 clrldi r8,r8,32
303
280a5ba2 304 /* setup read stream 0 */
8a583c0a
AS
305 dcbt 0,r6,0b01000 /* addr from */
306 dcbt 0,r7,0b01010 /* length and depth from */
280a5ba2 307 /* setup write stream 1 */
8a583c0a
AS
308 dcbtst 0,r9,0b01000 /* addr to */
309 dcbtst 0,r10,0b01010 /* length and depth to */
a9514dc6 310 eieio
8a583c0a 311 dcbt 0,r8,0b01010 /* all streams GO */
a9514dc6 312
2fae7cdb 313 beq cr1,.Lunwind_stack_nonvmx_copy
a66086b8
AB
314
315 /*
316 * If source and destination are not relatively aligned we use a
317 * slower permute loop.
318 */
319 xor r6,r4,r3
320 rldicl. r6,r6,0,(64-4)
321 bne .Lvmx_unaligned_copy
322
323 /* Get the destination 16B aligned */
324 neg r6,r3
325 mtocrf 0x01,r6
326 clrldi r6,r6,(64-4)
327
328 bf cr7*4+3,1f
329err3; lbz r0,0(r4)
330 addi r4,r4,1
331err3; stb r0,0(r3)
332 addi r3,r3,1
333
3341: bf cr7*4+2,2f
335err3; lhz r0,0(r4)
336 addi r4,r4,2
337err3; sth r0,0(r3)
338 addi r3,r3,2
339
3402: bf cr7*4+1,3f
341err3; lwz r0,0(r4)
342 addi r4,r4,4
343err3; stw r0,0(r3)
344 addi r3,r3,4
345
3463: bf cr7*4+0,4f
347err3; ld r0,0(r4)
348 addi r4,r4,8
349err3; std r0,0(r3)
350 addi r3,r3,8
351
3524: sub r5,r5,r6
353
354 /* Get the desination 128B aligned */
355 neg r6,r3
356 srdi r7,r6,4
357 mtocrf 0x01,r7
358 clrldi r6,r6,(64-7)
359
360 li r9,16
361 li r10,32
362 li r11,48
363
364 bf cr7*4+3,5f
8a583c0a 365err3; lvx v1,0,r4
a66086b8 366 addi r4,r4,16
8a583c0a 367err3; stvx v1,0,r3
a66086b8
AB
368 addi r3,r3,16
369
3705: bf cr7*4+2,6f
8a583c0a 371err3; lvx v1,0,r4
c2ce6f9f 372err3; lvx v0,r4,r9
a66086b8 373 addi r4,r4,32
8a583c0a 374err3; stvx v1,0,r3
c2ce6f9f 375err3; stvx v0,r3,r9
a66086b8
AB
376 addi r3,r3,32
377
3786: bf cr7*4+1,7f
8a583c0a 379err3; lvx v3,0,r4
c2ce6f9f
AB
380err3; lvx v2,r4,r9
381err3; lvx v1,r4,r10
382err3; lvx v0,r4,r11
a66086b8 383 addi r4,r4,64
8a583c0a 384err3; stvx v3,0,r3
c2ce6f9f
AB
385err3; stvx v2,r3,r9
386err3; stvx v1,r3,r10
387err3; stvx v0,r3,r11
a66086b8
AB
388 addi r3,r3,64
389
3907: sub r5,r5,r6
391 srdi r6,r5,7
392
c75df6f9
MN
393 std r14,STK_REG(R14)(r1)
394 std r15,STK_REG(R15)(r1)
395 std r16,STK_REG(R16)(r1)
a66086b8
AB
396
397 li r12,64
398 li r14,80
399 li r15,96
400 li r16,112
401
402 mtctr r6
403
404 /*
405 * Now do cacheline sized loads and stores. By this stage the
406 * cacheline stores are also cacheline aligned.
407 */
408 .align 5
4098:
8a583c0a 410err4; lvx v7,0,r4
c2ce6f9f
AB
411err4; lvx v6,r4,r9
412err4; lvx v5,r4,r10
413err4; lvx v4,r4,r11
414err4; lvx v3,r4,r12
415err4; lvx v2,r4,r14
416err4; lvx v1,r4,r15
417err4; lvx v0,r4,r16
a66086b8 418 addi r4,r4,128
8a583c0a 419err4; stvx v7,0,r3
c2ce6f9f
AB
420err4; stvx v6,r3,r9
421err4; stvx v5,r3,r10
422err4; stvx v4,r3,r11
423err4; stvx v3,r3,r12
424err4; stvx v2,r3,r14
425err4; stvx v1,r3,r15
426err4; stvx v0,r3,r16
a66086b8
AB
427 addi r3,r3,128
428 bdnz 8b
429
c75df6f9
MN
430 ld r14,STK_REG(R14)(r1)
431 ld r15,STK_REG(R15)(r1)
432 ld r16,STK_REG(R16)(r1)
a66086b8
AB
433
434 /* Up to 127B to go */
435 clrldi r5,r5,(64-7)
436 srdi r6,r5,4
437 mtocrf 0x01,r6
438
439 bf cr7*4+1,9f
8a583c0a 440err3; lvx v3,0,r4
c2ce6f9f
AB
441err3; lvx v2,r4,r9
442err3; lvx v1,r4,r10
443err3; lvx v0,r4,r11
a66086b8 444 addi r4,r4,64
8a583c0a 445err3; stvx v3,0,r3
c2ce6f9f
AB
446err3; stvx v2,r3,r9
447err3; stvx v1,r3,r10
448err3; stvx v0,r3,r11
a66086b8
AB
449 addi r3,r3,64
450
4519: bf cr7*4+2,10f
8a583c0a 452err3; lvx v1,0,r4
c2ce6f9f 453err3; lvx v0,r4,r9
a66086b8 454 addi r4,r4,32
8a583c0a 455err3; stvx v1,0,r3
c2ce6f9f 456err3; stvx v0,r3,r9
a66086b8
AB
457 addi r3,r3,32
458
45910: bf cr7*4+3,11f
8a583c0a 460err3; lvx v1,0,r4
a66086b8 461 addi r4,r4,16
8a583c0a 462err3; stvx v1,0,r3
a66086b8
AB
463 addi r3,r3,16
464
465 /* Up to 15B to go */
46611: clrldi r5,r5,(64-4)
467 mtocrf 0x01,r5
468 bf cr7*4+0,12f
469err3; ld r0,0(r4)
470 addi r4,r4,8
471err3; std r0,0(r3)
472 addi r3,r3,8
473
47412: bf cr7*4+1,13f
475err3; lwz r0,0(r4)
476 addi r4,r4,4
477err3; stw r0,0(r3)
478 addi r3,r3,4
479
48013: bf cr7*4+2,14f
481err3; lhz r0,0(r4)
482 addi r4,r4,2
483err3; sth r0,0(r3)
484 addi r3,r3,2
485
48614: bf cr7*4+3,15f
487err3; lbz r0,0(r4)
488err3; stb r0,0(r3)
489
49015: addi r1,r1,STACKFRAMESIZE
b1576fec 491 b exit_vmx_usercopy /* tail call optimise */
a66086b8
AB
492
493.Lvmx_unaligned_copy:
494 /* Get the destination 16B aligned */
495 neg r6,r3
496 mtocrf 0x01,r6
497 clrldi r6,r6,(64-4)
498
499 bf cr7*4+3,1f
500err3; lbz r0,0(r4)
501 addi r4,r4,1
502err3; stb r0,0(r3)
503 addi r3,r3,1
504
5051: bf cr7*4+2,2f
506err3; lhz r0,0(r4)
507 addi r4,r4,2
508err3; sth r0,0(r3)
509 addi r3,r3,2
510
5112: bf cr7*4+1,3f
512err3; lwz r0,0(r4)
513 addi r4,r4,4
514err3; stw r0,0(r3)
515 addi r3,r3,4
516
5173: bf cr7*4+0,4f
518err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
519err3; lwz r7,4(r4)
520 addi r4,r4,8
521err3; stw r0,0(r3)
522err3; stw r7,4(r3)
523 addi r3,r3,8
524
5254: sub r5,r5,r6
526
527 /* Get the desination 128B aligned */
528 neg r6,r3
529 srdi r7,r6,4
530 mtocrf 0x01,r7
531 clrldi r6,r6,(64-7)
532
533 li r9,16
534 li r10,32
535 li r11,48
536
c2ce6f9f
AB
537 LVS(v16,0,r4) /* Setup permute control vector */
538err3; lvx v0,0,r4
a66086b8
AB
539 addi r4,r4,16
540
541 bf cr7*4+3,5f
8a583c0a 542err3; lvx v1,0,r4
c2ce6f9f 543 VPERM(v8,v0,v1,v16)
a66086b8 544 addi r4,r4,16
8a583c0a 545err3; stvx v8,0,r3
a66086b8 546 addi r3,r3,16
c2ce6f9f 547 vor v0,v1,v1
a66086b8
AB
548
5495: bf cr7*4+2,6f
8a583c0a 550err3; lvx v1,0,r4
c2ce6f9f
AB
551 VPERM(v8,v0,v1,v16)
552err3; lvx v0,r4,r9
553 VPERM(v9,v1,v0,v16)
a66086b8 554 addi r4,r4,32
8a583c0a 555err3; stvx v8,0,r3
c2ce6f9f 556err3; stvx v9,r3,r9
a66086b8
AB
557 addi r3,r3,32
558
5596: bf cr7*4+1,7f
8a583c0a 560err3; lvx v3,0,r4
c2ce6f9f
AB
561 VPERM(v8,v0,v3,v16)
562err3; lvx v2,r4,r9
563 VPERM(v9,v3,v2,v16)
564err3; lvx v1,r4,r10
565 VPERM(v10,v2,v1,v16)
566err3; lvx v0,r4,r11
567 VPERM(v11,v1,v0,v16)
a66086b8 568 addi r4,r4,64
8a583c0a 569err3; stvx v8,0,r3
c2ce6f9f
AB
570err3; stvx v9,r3,r9
571err3; stvx v10,r3,r10
572err3; stvx v11,r3,r11
a66086b8
AB
573 addi r3,r3,64
574
5757: sub r5,r5,r6
576 srdi r6,r5,7
577
c75df6f9
MN
578 std r14,STK_REG(R14)(r1)
579 std r15,STK_REG(R15)(r1)
580 std r16,STK_REG(R16)(r1)
a66086b8
AB
581
582 li r12,64
583 li r14,80
584 li r15,96
585 li r16,112
586
587 mtctr r6
588
589 /*
590 * Now do cacheline sized loads and stores. By this stage the
591 * cacheline stores are also cacheline aligned.
592 */
593 .align 5
5948:
8a583c0a 595err4; lvx v7,0,r4
c2ce6f9f
AB
596 VPERM(v8,v0,v7,v16)
597err4; lvx v6,r4,r9
598 VPERM(v9,v7,v6,v16)
599err4; lvx v5,r4,r10
600 VPERM(v10,v6,v5,v16)
601err4; lvx v4,r4,r11
602 VPERM(v11,v5,v4,v16)
603err4; lvx v3,r4,r12
604 VPERM(v12,v4,v3,v16)
605err4; lvx v2,r4,r14
606 VPERM(v13,v3,v2,v16)
607err4; lvx v1,r4,r15
608 VPERM(v14,v2,v1,v16)
609err4; lvx v0,r4,r16
610 VPERM(v15,v1,v0,v16)
a66086b8 611 addi r4,r4,128
8a583c0a 612err4; stvx v8,0,r3
c2ce6f9f
AB
613err4; stvx v9,r3,r9
614err4; stvx v10,r3,r10
615err4; stvx v11,r3,r11
616err4; stvx v12,r3,r12
617err4; stvx v13,r3,r14
618err4; stvx v14,r3,r15
619err4; stvx v15,r3,r16
a66086b8
AB
620 addi r3,r3,128
621 bdnz 8b
622
c75df6f9
MN
623 ld r14,STK_REG(R14)(r1)
624 ld r15,STK_REG(R15)(r1)
625 ld r16,STK_REG(R16)(r1)
a66086b8
AB
626
627 /* Up to 127B to go */
628 clrldi r5,r5,(64-7)
629 srdi r6,r5,4
630 mtocrf 0x01,r6
631
632 bf cr7*4+1,9f
8a583c0a 633err3; lvx v3,0,r4
c2ce6f9f
AB
634 VPERM(v8,v0,v3,v16)
635err3; lvx v2,r4,r9
636 VPERM(v9,v3,v2,v16)
637err3; lvx v1,r4,r10
638 VPERM(v10,v2,v1,v16)
639err3; lvx v0,r4,r11
640 VPERM(v11,v1,v0,v16)
a66086b8 641 addi r4,r4,64
8a583c0a 642err3; stvx v8,0,r3
c2ce6f9f
AB
643err3; stvx v9,r3,r9
644err3; stvx v10,r3,r10
645err3; stvx v11,r3,r11
a66086b8
AB
646 addi r3,r3,64
647
6489: bf cr7*4+2,10f
8a583c0a 649err3; lvx v1,0,r4
c2ce6f9f
AB
650 VPERM(v8,v0,v1,v16)
651err3; lvx v0,r4,r9
652 VPERM(v9,v1,v0,v16)
a66086b8 653 addi r4,r4,32
8a583c0a 654err3; stvx v8,0,r3
c2ce6f9f 655err3; stvx v9,r3,r9
a66086b8
AB
656 addi r3,r3,32
657
65810: bf cr7*4+3,11f
8a583c0a 659err3; lvx v1,0,r4
c2ce6f9f 660 VPERM(v8,v0,v1,v16)
a66086b8 661 addi r4,r4,16
8a583c0a 662err3; stvx v8,0,r3
a66086b8
AB
663 addi r3,r3,16
664
665 /* Up to 15B to go */
66611: clrldi r5,r5,(64-4)
667 addi r4,r4,-16 /* Unwind the +16 load offset */
668 mtocrf 0x01,r5
669 bf cr7*4+0,12f
670err3; lwz r0,0(r4) /* Less chance of a reject with word ops */
671err3; lwz r6,4(r4)
672 addi r4,r4,8
673err3; stw r0,0(r3)
674err3; stw r6,4(r3)
675 addi r3,r3,8
676
67712: bf cr7*4+1,13f
678err3; lwz r0,0(r4)
679 addi r4,r4,4
680err3; stw r0,0(r3)
681 addi r3,r3,4
682
68313: bf cr7*4+2,14f
684err3; lhz r0,0(r4)
685 addi r4,r4,2
686err3; sth r0,0(r3)
687 addi r3,r3,2
688
68914: bf cr7*4+3,15f
690err3; lbz r0,0(r4)
691err3; stb r0,0(r3)
692
69315: addi r1,r1,STACKFRAMESIZE
b1576fec 694 b exit_vmx_usercopy /* tail call optimise */
c2522dcd 695#endif /* CONFIG_ALTIVEC */