Merge tag 'fsnotify_for_v6.4-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-block.git] / arch / powerpc / lib / copypage_power7.S
CommitLineData
1a59d1b8 1/* SPDX-License-Identifier: GPL-2.0-or-later */
fde69282 2/*
fde69282
AB
3 *
4 * Copyright (C) IBM Corporation, 2012
5 *
6 * Author: Anton Blanchard <anton@au.ibm.com>
7 */
8#include <asm/page.h>
9#include <asm/ppc_asm.h>
10
fde69282
AB
11_GLOBAL(copypage_power7)
12 /*
13 * We prefetch both the source and destination using enhanced touch
14 * instructions. We use a stream ID of 0 for the load side and
15 * 1 for the store side. Since source and destination are page
16 * aligned we don't need to clear the bottom 7 bits of either
17 * address.
18 */
280a5ba2 19 ori r9,r3,1 /* stream=1 => to */
fde69282
AB
20
21#ifdef CONFIG_PPC_64K_PAGES
280a5ba2
MN
22 lis r7,0x0E01 /* depth=7
23 * units/cachelines=512 */
fde69282
AB
24#else
25 lis r7,0x0E00 /* depth=7 */
280a5ba2 26 ori r7,r7,0x1000 /* units/cachelines=32 */
fde69282
AB
27#endif
28 ori r10,r7,1 /* stream=1 */
29
30 lis r8,0x8000 /* GO=1 */
31 clrldi r8,r8,32
32
280a5ba2 33 /* setup read stream 0 */
8a583c0a
AS
34 dcbt 0,r4,0b01000 /* addr from */
35 dcbt 0,r7,0b01010 /* length and depth from */
280a5ba2 36 /* setup write stream 1 */
8a583c0a
AS
37 dcbtst 0,r9,0b01000 /* addr to */
38 dcbtst 0,r10,0b01010 /* length and depth to */
fde69282 39 eieio
8a583c0a 40 dcbt 0,r8,0b01010 /* all streams GO */
fde69282
AB
41
42#ifdef CONFIG_ALTIVEC
43 mflr r0
752a6422
UW
44 std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
45 std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
fde69282
AB
46 std r0,16(r1)
47 stdu r1,-STACKFRAMESIZE(r1)
4e991e3c 48 bl CFUNC(enter_vmx_ops)
fde69282
AB
49 cmpwi r3,0
50 ld r0,STACKFRAMESIZE+16(r1)
752a6422
UW
51 ld r3,STK_REG(R31)(r1)
52 ld r4,STK_REG(R30)(r1)
fde69282
AB
53 mtlr r0
54
55 li r0,(PAGE_SIZE/128)
56 mtctr r0
57
58 beq .Lnonvmx_copy
59
60 addi r1,r1,STACKFRAMESIZE
61
62 li r6,16
63 li r7,32
64 li r8,48
65 li r9,64
66 li r10,80
67 li r11,96
68 li r12,112
69
70 .align 5
8a583c0a 711: lvx v7,0,r4
c2ce6f9f
AB
72 lvx v6,r4,r6
73 lvx v5,r4,r7
74 lvx v4,r4,r8
75 lvx v3,r4,r9
76 lvx v2,r4,r10
77 lvx v1,r4,r11
78 lvx v0,r4,r12
fde69282 79 addi r4,r4,128
8a583c0a 80 stvx v7,0,r3
c2ce6f9f
AB
81 stvx v6,r3,r6
82 stvx v5,r3,r7
83 stvx v4,r3,r8
84 stvx v3,r3,r9
85 stvx v2,r3,r10
86 stvx v1,r3,r11
87 stvx v0,r3,r12
fde69282
AB
88 addi r3,r3,128
89 bdnz 1b
90
4e991e3c 91 b CFUNC(exit_vmx_ops) /* tail call optimise */
fde69282
AB
92
93#else
94 li r0,(PAGE_SIZE/128)
95 mtctr r0
96
97 stdu r1,-STACKFRAMESIZE(r1)
98#endif
99
100.Lnonvmx_copy:
c75df6f9
MN
101 std r14,STK_REG(R14)(r1)
102 std r15,STK_REG(R15)(r1)
103 std r16,STK_REG(R16)(r1)
104 std r17,STK_REG(R17)(r1)
105 std r18,STK_REG(R18)(r1)
106 std r19,STK_REG(R19)(r1)
107 std r20,STK_REG(R20)(r1)
fde69282
AB
108
1091: ld r0,0(r4)
110 ld r5,8(r4)
111 ld r6,16(r4)
112 ld r7,24(r4)
113 ld r8,32(r4)
114 ld r9,40(r4)
115 ld r10,48(r4)
116 ld r11,56(r4)
117 ld r12,64(r4)
118 ld r14,72(r4)
119 ld r15,80(r4)
120 ld r16,88(r4)
121 ld r17,96(r4)
122 ld r18,104(r4)
123 ld r19,112(r4)
124 ld r20,120(r4)
125 addi r4,r4,128
126 std r0,0(r3)
127 std r5,8(r3)
128 std r6,16(r3)
129 std r7,24(r3)
130 std r8,32(r3)
131 std r9,40(r3)
132 std r10,48(r3)
133 std r11,56(r3)
134 std r12,64(r3)
135 std r14,72(r3)
136 std r15,80(r3)
137 std r16,88(r3)
138 std r17,96(r3)
139 std r18,104(r3)
140 std r19,112(r3)
141 std r20,120(r3)
142 addi r3,r3,128
143 bdnz 1b
144
c75df6f9
MN
145 ld r14,STK_REG(R14)(r1)
146 ld r15,STK_REG(R15)(r1)
147 ld r16,STK_REG(R16)(r1)
148 ld r17,STK_REG(R17)(r1)
149 ld r18,STK_REG(R18)(r1)
150 ld r19,STK_REG(R19)(r1)
151 ld r20,STK_REG(R20)(r1)
fde69282
AB
152 addi r1,r1,STACKFRAMESIZE
153 blr