x86/debug: Remove perpetually broken, unmaintainable dwarf annotations
[linux-2.6-block.git] / arch / x86 / lib / memcpy_64.S
CommitLineData
1da177e4 1/* Copyright 2002 Andi Kleen */
038b0a6d 2
8d379dad 3#include <linux/linkage.h>
8d379dad 4#include <asm/cpufeature.h>
101068c1 5#include <asm/alternative-asm.h>
8d379dad 6
e0bc8d17
BP
7/*
8 * We build a jump to memcpy_orig by default which gets NOPped out on
9 * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
10 * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
11 * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
12 */
13
14.weak memcpy
15
1da177e4
LT
16/*
17 * memcpy - Copy a memory block.
18 *
f3b6eaf0
IM
19 * Input:
20 * rdi destination
21 * rsi source
22 * rdx count
23 *
1da177e4
LT
24 * Output:
25 * rax original destination
f3b6eaf0 26 */
e0bc8d17
BP
27ENTRY(__memcpy)
28ENTRY(memcpy)
29 ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
30 "jmp memcpy_erms", X86_FEATURE_ERMS
1da177e4 31
f3b6eaf0 32 movq %rdi, %rax
2ab56091
JB
33 movq %rdx, %rcx
34 shrq $3, %rcx
f3b6eaf0 35 andl $7, %edx
8d379dad 36 rep movsq
f3b6eaf0 37 movl %edx, %ecx
8d379dad
JB
38 rep movsb
39 ret
e0bc8d17
BP
40ENDPROC(memcpy)
41ENDPROC(__memcpy)
8d379dad 42
101068c1 43/*
e0bc8d17
BP
44 * memcpy_erms() - enhanced fast string memcpy. This is faster and
45 * simpler than memcpy. Use memcpy_erms when possible.
101068c1 46 */
e0bc8d17 47ENTRY(memcpy_erms)
101068c1 48 movq %rdi, %rax
2ab56091 49 movq %rdx, %rcx
101068c1
FY
50 rep movsb
51 ret
e0bc8d17 52ENDPROC(memcpy_erms)
393f203f 53
e0bc8d17 54ENTRY(memcpy_orig)
59daa706 55 movq %rdi, %rax
7bcd3f34 56
2ab56091 57 cmpq $0x20, %rdx
59daa706 58 jb .Lhandle_tail
7bcd3f34 59
f3b6eaf0 60 /*
9de4966a 61 * We check whether memory false dependence could occur,
59daa706 62 * then jump to corresponding copy mode.
f3b6eaf0 63 */
59daa706
ML
64 cmp %dil, %sil
65 jl .Lcopy_backward
2ab56091 66 subq $0x20, %rdx
59daa706
ML
67.Lcopy_forward_loop:
68 subq $0x20, %rdx
7bcd3f34 69
f3b6eaf0 70 /*
59daa706 71 * Move in blocks of 4x8 bytes:
f3b6eaf0 72 */
59daa706
ML
73 movq 0*8(%rsi), %r8
74 movq 1*8(%rsi), %r9
75 movq 2*8(%rsi), %r10
76 movq 3*8(%rsi), %r11
77 leaq 4*8(%rsi), %rsi
78
79 movq %r8, 0*8(%rdi)
80 movq %r9, 1*8(%rdi)
81 movq %r10, 2*8(%rdi)
82 movq %r11, 3*8(%rdi)
83 leaq 4*8(%rdi), %rdi
84 jae .Lcopy_forward_loop
2ab56091 85 addl $0x20, %edx
59daa706
ML
86 jmp .Lhandle_tail
87
88.Lcopy_backward:
89 /*
90 * Calculate copy position to tail.
91 */
92 addq %rdx, %rsi
93 addq %rdx, %rdi
94 subq $0x20, %rdx
95 /*
96 * At most 3 ALU operations in one cycle,
d50ba368 97 * so append NOPS in the same 16 bytes trunk.
59daa706
ML
98 */
99 .p2align 4
100.Lcopy_backward_loop:
101 subq $0x20, %rdx
102 movq -1*8(%rsi), %r8
103 movq -2*8(%rsi), %r9
104 movq -3*8(%rsi), %r10
105 movq -4*8(%rsi), %r11
106 leaq -4*8(%rsi), %rsi
107 movq %r8, -1*8(%rdi)
108 movq %r9, -2*8(%rdi)
109 movq %r10, -3*8(%rdi)
110 movq %r11, -4*8(%rdi)
111 leaq -4*8(%rdi), %rdi
112 jae .Lcopy_backward_loop
7bcd3f34 113
59daa706
ML
114 /*
115 * Calculate copy position to head.
116 */
2ab56091 117 addl $0x20, %edx
59daa706
ML
118 subq %rdx, %rsi
119 subq %rdx, %rdi
7bcd3f34 120.Lhandle_tail:
2ab56091 121 cmpl $16, %edx
59daa706 122 jb .Lless_16bytes
f3b6eaf0 123
59daa706
ML
124 /*
125 * Move data from 16 bytes to 31 bytes.
126 */
127 movq 0*8(%rsi), %r8
128 movq 1*8(%rsi), %r9
129 movq -2*8(%rsi, %rdx), %r10
130 movq -1*8(%rsi, %rdx), %r11
131 movq %r8, 0*8(%rdi)
132 movq %r9, 1*8(%rdi)
133 movq %r10, -2*8(%rdi, %rdx)
134 movq %r11, -1*8(%rdi, %rdx)
135 retq
7bcd3f34 136 .p2align 4
59daa706 137.Lless_16bytes:
2ab56091 138 cmpl $8, %edx
59daa706
ML
139 jb .Lless_8bytes
140 /*
141 * Move data from 8 bytes to 15 bytes.
142 */
143 movq 0*8(%rsi), %r8
144 movq -1*8(%rsi, %rdx), %r9
145 movq %r8, 0*8(%rdi)
146 movq %r9, -1*8(%rdi, %rdx)
147 retq
148 .p2align 4
149.Lless_8bytes:
2ab56091 150 cmpl $4, %edx
59daa706 151 jb .Lless_3bytes
f3b6eaf0 152
59daa706
ML
153 /*
154 * Move data from 4 bytes to 7 bytes.
155 */
156 movl (%rsi), %ecx
157 movl -4(%rsi, %rdx), %r8d
158 movl %ecx, (%rdi)
159 movl %r8d, -4(%rdi, %rdx)
160 retq
7bcd3f34 161 .p2align 4
59daa706 162.Lless_3bytes:
9d8e2277
JB
163 subl $1, %edx
164 jb .Lend
59daa706
ML
165 /*
166 * Move data from 1 bytes to 3 bytes.
167 */
9d8e2277
JB
168 movzbl (%rsi), %ecx
169 jz .Lstore_1byte
170 movzbq 1(%rsi), %r8
171 movzbq (%rsi, %rdx), %r9
172 movb %r8b, 1(%rdi)
173 movb %r9b, (%rdi, %rdx)
174.Lstore_1byte:
175 movb %cl, (%rdi)
7bcd3f34 176
f3b6eaf0 177.Lend:
59daa706 178 retq
e0bc8d17 179ENDPROC(memcpy_orig)