Merge tag 'thermal-6.3-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux-block.git] / arch / x86 / lib / clear_page_64.S
CommitLineData
457c8996 1/* SPDX-License-Identifier: GPL-2.0-only */
8d379dad 2#include <linux/linkage.h>
0db7058e 3#include <asm/asm.h>
784d5699 4#include <asm/export.h>
8d379dad 5
1da177e4 6/*
6620ef28
BP
7 * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
8 * recommended to use this when possible and we do use them by default.
9 * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
10 * Otherwise, use original.
11 */
12
13/*
14 * Zero a page.
15 * %rdi - page
16 */
6dcc5627 17SYM_FUNC_START(clear_page_rep)
8d379dad
JB
18 movl $4096/8,%ecx
19 xorl %eax,%eax
20 rep stosq
f94909ce 21 RET
6dcc5627 22SYM_FUNC_END(clear_page_rep)
f25d3847 23EXPORT_SYMBOL_GPL(clear_page_rep)
8d379dad 24
6dcc5627 25SYM_FUNC_START(clear_page_orig)
7bcd3f34
AK
26 xorl %eax,%eax
27 movl $4096/64,%ecx
28 .p2align 4
29.Lloop:
30 decl %ecx
31#define PUT(x) movq %rax,x*8(%rdi)
32 movq %rax,(%rdi)
33 PUT(1)
34 PUT(2)
35 PUT(3)
36 PUT(4)
37 PUT(5)
38 PUT(6)
39 PUT(7)
40 leaq 64(%rdi),%rdi
41 jnz .Lloop
42 nop
f94909ce 43 RET
6dcc5627 44SYM_FUNC_END(clear_page_orig)
f25d3847 45EXPORT_SYMBOL_GPL(clear_page_orig)
7bcd3f34 46
6dcc5627 47SYM_FUNC_START(clear_page_erms)
6620ef28
BP
48 movl $4096,%ecx
49 xorl %eax,%eax
50 rep stosb
f94909ce 51 RET
6dcc5627 52SYM_FUNC_END(clear_page_erms)
f25d3847 53EXPORT_SYMBOL_GPL(clear_page_erms)
0db7058e
BP
54
55/*
56 * Default clear user-space.
57 * Input:
58 * rdi destination
59 * rcx count
60 *
61 * Output:
62 * rcx: uncleared bytes or 0 if successful.
63 */
64SYM_FUNC_START(clear_user_original)
65 /*
66 * Copy only the lower 32 bits of size as that is enough to handle the rest bytes,
67 * i.e., no need for a 'q' suffix and thus a REX prefix.
68 */
69 mov %ecx,%eax
70 shr $3,%rcx
71 jz .Lrest_bytes
72
73 # do the qwords first
74 .p2align 4
75.Lqwords:
76 movq $0,(%rdi)
77 lea 8(%rdi),%rdi
78 dec %rcx
79 jnz .Lqwords
80
81.Lrest_bytes:
82 and $7, %eax
83 jz .Lexit
84
85 # now do the rest bytes
86.Lbytes:
87 movb $0,(%rdi)
88 inc %rdi
89 dec %eax
90 jnz .Lbytes
91
92.Lexit:
93 /*
94 * %rax still needs to be cleared in the exception case because this function is called
95 * from inline asm and the compiler expects %rax to be zero when exiting the inline asm,
96 * in case it might reuse it somewhere.
97 */
98 xor %eax,%eax
99 RET
100
101.Lqwords_exception:
102 # convert remaining qwords back into bytes to return to caller
103 shl $3, %rcx
104 and $7, %eax
105 add %rax,%rcx
106 jmp .Lexit
107
108.Lbytes_exception:
109 mov %eax,%ecx
110 jmp .Lexit
111
112 _ASM_EXTABLE_UA(.Lqwords, .Lqwords_exception)
113 _ASM_EXTABLE_UA(.Lbytes, .Lbytes_exception)
114SYM_FUNC_END(clear_user_original)
115EXPORT_SYMBOL(clear_user_original)
116
117/*
118 * Alternative clear user-space when CPU feature X86_FEATURE_REP_GOOD is
119 * present.
120 * Input:
121 * rdi destination
122 * rcx count
123 *
124 * Output:
125 * rcx: uncleared bytes or 0 if successful.
126 */
127SYM_FUNC_START(clear_user_rep_good)
128 # call the original thing for less than a cacheline
129 cmp $64, %rcx
130 jb clear_user_original
131
132.Lprep:
133 # copy lower 32-bits for rest bytes
134 mov %ecx, %edx
135 shr $3, %rcx
136 jz .Lrep_good_rest_bytes
137
138.Lrep_good_qwords:
139 rep stosq
140
141.Lrep_good_rest_bytes:
142 and $7, %edx
143 jz .Lrep_good_exit
144
145.Lrep_good_bytes:
146 mov %edx, %ecx
147 rep stosb
148
149.Lrep_good_exit:
150 # see .Lexit comment above
151 xor %eax, %eax
152 RET
153
154.Lrep_good_qwords_exception:
155 # convert remaining qwords back into bytes to return to caller
156 shl $3, %rcx
157 and $7, %edx
158 add %rdx, %rcx
159 jmp .Lrep_good_exit
160
161 _ASM_EXTABLE_UA(.Lrep_good_qwords, .Lrep_good_qwords_exception)
162 _ASM_EXTABLE_UA(.Lrep_good_bytes, .Lrep_good_exit)
163SYM_FUNC_END(clear_user_rep_good)
164EXPORT_SYMBOL(clear_user_rep_good)
165
166/*
167 * Alternative clear user-space when CPU feature X86_FEATURE_ERMS is present.
168 * Input:
169 * rdi destination
170 * rcx count
171 *
172 * Output:
173 * rcx: uncleared bytes or 0 if successful.
174 *
175 */
176SYM_FUNC_START(clear_user_erms)
177 # call the original thing for less than a cacheline
178 cmp $64, %rcx
179 jb clear_user_original
180
181.Lerms_bytes:
182 rep stosb
183
184.Lerms_exit:
185 xorl %eax,%eax
186 RET
187
188 _ASM_EXTABLE_UA(.Lerms_bytes, .Lerms_exit)
189SYM_FUNC_END(clear_user_erms)
190EXPORT_SYMBOL(clear_user_erms)