Commit | Line | Data |
---|---|---|
457c8996 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
1da177e4 | 2 | /* Copyright 2002 Andi Kleen */ |
038b0a6d | 3 | |
8d379dad | 4 | #include <linux/linkage.h> |
cbf8b5a2 | 5 | #include <asm/errno.h> |
cd4d09ec | 6 | #include <asm/cpufeatures.h> |
5d8beee2 | 7 | #include <asm/mcsafe_test.h> |
101068c1 | 8 | #include <asm/alternative-asm.h> |
784d5699 | 9 | #include <asm/export.h> |
8d379dad | 10 | |
e0bc8d17 BP |
11 | /* |
12 | * We build a jump to memcpy_orig by default which gets NOPped out on | |
13 | * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which | |
14 | * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs | |
15 | * to a jmp to memcpy_erms which does the REP; MOVSB mem copy. | |
16 | */ | |
17 | ||
18 | .weak memcpy | |
19 | ||
1da177e4 LT |
20 | /* |
21 | * memcpy - Copy a memory block. | |
22 | * | |
f3b6eaf0 IM |
23 | * Input: |
24 | * rdi destination | |
25 | * rsi source | |
26 | * rdx count | |
27 | * | |
1da177e4 LT |
28 | * Output: |
29 | * rax original destination | |
f3b6eaf0 | 30 | */ |
e0bc8d17 BP |
31 | ENTRY(__memcpy) |
32 | ENTRY(memcpy) | |
33 | ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ | |
34 | "jmp memcpy_erms", X86_FEATURE_ERMS | |
1da177e4 | 35 | |
f3b6eaf0 | 36 | movq %rdi, %rax |
2ab56091 JB |
37 | movq %rdx, %rcx |
38 | shrq $3, %rcx | |
f3b6eaf0 | 39 | andl $7, %edx |
8d379dad | 40 | rep movsq |
f3b6eaf0 | 41 | movl %edx, %ecx |
8d379dad JB |
42 | rep movsb |
43 | ret | |
e0bc8d17 BP |
44 | ENDPROC(memcpy) |
45 | ENDPROC(__memcpy) | |
784d5699 AV |
46 | EXPORT_SYMBOL(memcpy) |
47 | EXPORT_SYMBOL(__memcpy) | |
8d379dad | 48 | |
101068c1 | 49 | /* |
e0bc8d17 BP |
50 | * memcpy_erms() - enhanced fast string memcpy. This is faster and |
51 | * simpler than memcpy. Use memcpy_erms when possible. | |
101068c1 | 52 | */ |
e0bc8d17 | 53 | ENTRY(memcpy_erms) |
101068c1 | 54 | movq %rdi, %rax |
2ab56091 | 55 | movq %rdx, %rcx |
101068c1 FY |
56 | rep movsb |
57 | ret | |
e0bc8d17 | 58 | ENDPROC(memcpy_erms) |
393f203f | 59 | |
e0bc8d17 | 60 | ENTRY(memcpy_orig) |
59daa706 | 61 | movq %rdi, %rax |
7bcd3f34 | 62 | |
2ab56091 | 63 | cmpq $0x20, %rdx |
59daa706 | 64 | jb .Lhandle_tail |
7bcd3f34 | 65 | |
f3b6eaf0 | 66 | /* |
9de4966a | 67 | * We check whether memory false dependence could occur, |
59daa706 | 68 | * then jump to corresponding copy mode. |
f3b6eaf0 | 69 | */ |
59daa706 ML |
70 | cmp %dil, %sil |
71 | jl .Lcopy_backward | |
2ab56091 | 72 | subq $0x20, %rdx |
59daa706 ML |
73 | .Lcopy_forward_loop: |
74 | subq $0x20, %rdx | |
7bcd3f34 | 75 | |
f3b6eaf0 | 76 | /* |
59daa706 | 77 | * Move in blocks of 4x8 bytes: |
f3b6eaf0 | 78 | */ |
59daa706 ML |
79 | movq 0*8(%rsi), %r8 |
80 | movq 1*8(%rsi), %r9 | |
81 | movq 2*8(%rsi), %r10 | |
82 | movq 3*8(%rsi), %r11 | |
83 | leaq 4*8(%rsi), %rsi | |
84 | ||
85 | movq %r8, 0*8(%rdi) | |
86 | movq %r9, 1*8(%rdi) | |
87 | movq %r10, 2*8(%rdi) | |
88 | movq %r11, 3*8(%rdi) | |
89 | leaq 4*8(%rdi), %rdi | |
90 | jae .Lcopy_forward_loop | |
2ab56091 | 91 | addl $0x20, %edx |
59daa706 ML |
92 | jmp .Lhandle_tail |
93 | ||
94 | .Lcopy_backward: | |
95 | /* | |
96 | * Calculate copy position to tail. | |
97 | */ | |
98 | addq %rdx, %rsi | |
99 | addq %rdx, %rdi | |
100 | subq $0x20, %rdx | |
101 | /* | |
102 | * At most 3 ALU operations in one cycle, | |
d50ba368 | 103 | * so append NOPS in the same 16 bytes trunk. |
59daa706 ML |
104 | */ |
105 | .p2align 4 | |
106 | .Lcopy_backward_loop: | |
107 | subq $0x20, %rdx | |
108 | movq -1*8(%rsi), %r8 | |
109 | movq -2*8(%rsi), %r9 | |
110 | movq -3*8(%rsi), %r10 | |
111 | movq -4*8(%rsi), %r11 | |
112 | leaq -4*8(%rsi), %rsi | |
113 | movq %r8, -1*8(%rdi) | |
114 | movq %r9, -2*8(%rdi) | |
115 | movq %r10, -3*8(%rdi) | |
116 | movq %r11, -4*8(%rdi) | |
117 | leaq -4*8(%rdi), %rdi | |
118 | jae .Lcopy_backward_loop | |
7bcd3f34 | 119 | |
59daa706 ML |
120 | /* |
121 | * Calculate copy position to head. | |
122 | */ | |
2ab56091 | 123 | addl $0x20, %edx |
59daa706 ML |
124 | subq %rdx, %rsi |
125 | subq %rdx, %rdi | |
7bcd3f34 | 126 | .Lhandle_tail: |
2ab56091 | 127 | cmpl $16, %edx |
59daa706 | 128 | jb .Lless_16bytes |
f3b6eaf0 | 129 | |
59daa706 ML |
130 | /* |
131 | * Move data from 16 bytes to 31 bytes. | |
132 | */ | |
133 | movq 0*8(%rsi), %r8 | |
134 | movq 1*8(%rsi), %r9 | |
135 | movq -2*8(%rsi, %rdx), %r10 | |
136 | movq -1*8(%rsi, %rdx), %r11 | |
137 | movq %r8, 0*8(%rdi) | |
138 | movq %r9, 1*8(%rdi) | |
139 | movq %r10, -2*8(%rdi, %rdx) | |
140 | movq %r11, -1*8(%rdi, %rdx) | |
141 | retq | |
7bcd3f34 | 142 | .p2align 4 |
59daa706 | 143 | .Lless_16bytes: |
2ab56091 | 144 | cmpl $8, %edx |
59daa706 ML |
145 | jb .Lless_8bytes |
146 | /* | |
147 | * Move data from 8 bytes to 15 bytes. | |
148 | */ | |
149 | movq 0*8(%rsi), %r8 | |
150 | movq -1*8(%rsi, %rdx), %r9 | |
151 | movq %r8, 0*8(%rdi) | |
152 | movq %r9, -1*8(%rdi, %rdx) | |
153 | retq | |
154 | .p2align 4 | |
155 | .Lless_8bytes: | |
2ab56091 | 156 | cmpl $4, %edx |
59daa706 | 157 | jb .Lless_3bytes |
f3b6eaf0 | 158 | |
59daa706 ML |
159 | /* |
160 | * Move data from 4 bytes to 7 bytes. | |
161 | */ | |
162 | movl (%rsi), %ecx | |
163 | movl -4(%rsi, %rdx), %r8d | |
164 | movl %ecx, (%rdi) | |
165 | movl %r8d, -4(%rdi, %rdx) | |
166 | retq | |
7bcd3f34 | 167 | .p2align 4 |
59daa706 | 168 | .Lless_3bytes: |
9d8e2277 JB |
169 | subl $1, %edx |
170 | jb .Lend | |
59daa706 ML |
171 | /* |
172 | * Move data from 1 bytes to 3 bytes. | |
173 | */ | |
9d8e2277 JB |
174 | movzbl (%rsi), %ecx |
175 | jz .Lstore_1byte | |
176 | movzbq 1(%rsi), %r8 | |
177 | movzbq (%rsi, %rdx), %r9 | |
178 | movb %r8b, 1(%rdi) | |
179 | movb %r9b, (%rdi, %rdx) | |
180 | .Lstore_1byte: | |
181 | movb %cl, (%rdi) | |
7bcd3f34 | 182 | |
f3b6eaf0 | 183 | .Lend: |
59daa706 | 184 | retq |
e0bc8d17 | 185 | ENDPROC(memcpy_orig) |
92b0729c TL |
186 | |
187 | #ifndef CONFIG_UML | |
5d8beee2 DW |
188 | |
189 | MCSAFE_TEST_CTL | |
190 | ||
92b0729c | 191 | /* |
da7bc9c5 | 192 | * __memcpy_mcsafe - memory copy with machine check exception handling |
92b0729c TL |
193 | * Note that we only catch machine checks when reading the source addresses. |
194 | * Writes to target are posted and don't generate machine checks. | |
195 | */ | |
da7bc9c5 | 196 | ENTRY(__memcpy_mcsafe) |
92b0729c TL |
197 | cmpl $8, %edx |
198 | /* Less than 8 bytes? Go to byte copy loop */ | |
199 | jb .L_no_whole_words | |
200 | ||
201 | /* Check for bad alignment of source */ | |
202 | testl $7, %esi | |
203 | /* Already aligned */ | |
204 | jz .L_8byte_aligned | |
205 | ||
206 | /* Copy one byte at a time until source is 8-byte aligned */ | |
207 | movl %esi, %ecx | |
208 | andl $7, %ecx | |
209 | subl $8, %ecx | |
210 | negl %ecx | |
211 | subl %ecx, %edx | |
bd131544 | 212 | .L_read_leading_bytes: |
92b0729c | 213 | movb (%rsi), %al |
5d8beee2 DW |
214 | MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes |
215 | MCSAFE_TEST_DST %rdi 1 .E_leading_bytes | |
bd131544 | 216 | .L_write_leading_bytes: |
92b0729c TL |
217 | movb %al, (%rdi) |
218 | incq %rsi | |
219 | incq %rdi | |
220 | decl %ecx | |
bd131544 | 221 | jnz .L_read_leading_bytes |
92b0729c TL |
222 | |
223 | .L_8byte_aligned: | |
92b0729c TL |
224 | movl %edx, %ecx |
225 | andl $7, %edx | |
226 | shrl $3, %ecx | |
227 | jz .L_no_whole_words | |
228 | ||
bd131544 | 229 | .L_read_words: |
92b0729c | 230 | movq (%rsi), %r8 |
5d8beee2 DW |
231 | MCSAFE_TEST_SRC %rsi 8 .E_read_words |
232 | MCSAFE_TEST_DST %rdi 8 .E_write_words | |
bd131544 | 233 | .L_write_words: |
da7bc9c5 DW |
234 | movq %r8, (%rdi) |
235 | addq $8, %rsi | |
236 | addq $8, %rdi | |
92b0729c | 237 | decl %ecx |
bd131544 | 238 | jnz .L_read_words |
92b0729c TL |
239 | |
240 | /* Any trailing bytes? */ | |
241 | .L_no_whole_words: | |
242 | andl %edx, %edx | |
243 | jz .L_done_memcpy_trap | |
244 | ||
245 | /* Copy trailing bytes */ | |
246 | movl %edx, %ecx | |
bd131544 | 247 | .L_read_trailing_bytes: |
92b0729c | 248 | movb (%rsi), %al |
5d8beee2 DW |
249 | MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes |
250 | MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes | |
bd131544 | 251 | .L_write_trailing_bytes: |
92b0729c TL |
252 | movb %al, (%rdi) |
253 | incq %rsi | |
254 | incq %rdi | |
255 | decl %ecx | |
bd131544 | 256 | jnz .L_read_trailing_bytes |
92b0729c | 257 | |
cbf8b5a2 | 258 | /* Copy successful. Return zero */ |
92b0729c | 259 | .L_done_memcpy_trap: |
a7bea830 | 260 | xorl %eax, %eax |
b69656fa | 261 | .L_done: |
92b0729c | 262 | ret |
da7bc9c5 DW |
263 | ENDPROC(__memcpy_mcsafe) |
264 | EXPORT_SYMBOL_GPL(__memcpy_mcsafe) | |
92b0729c TL |
265 | |
266 | .section .fixup, "ax" | |
60622d68 DW |
267 | /* |
268 | * Return number of bytes not copied for any failure. Note that | |
269 | * there is no "tail" handling since the source buffer is 8-byte | |
270 | * aligned and poison is cacheline aligned. | |
271 | */ | |
272 | .E_read_words: | |
273 | shll $3, %ecx | |
274 | .E_leading_bytes: | |
275 | addl %edx, %ecx | |
276 | .E_trailing_bytes: | |
277 | mov %ecx, %eax | |
b69656fa | 278 | jmp .L_done |
92b0729c | 279 | |
12c89130 DW |
280 | /* |
281 | * For write fault handling, given the destination is unaligned, | |
282 | * we handle faults on multi-byte writes with a byte-by-byte | |
283 | * copy up to the write-protected page. | |
284 | */ | |
285 | .E_write_words: | |
286 | shll $3, %ecx | |
287 | addl %edx, %ecx | |
288 | movl %ecx, %edx | |
289 | jmp mcsafe_handle_tail | |
290 | ||
92b0729c TL |
291 | .previous |
292 | ||
60622d68 DW |
293 | _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) |
294 | _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) | |
295 | _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) | |
12c89130 DW |
296 | _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) |
297 | _ASM_EXTABLE(.L_write_words, .E_write_words) | |
298 | _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) | |
92b0729c | 299 | #endif |