Commit | Line | Data |
---|---|---|
76b04384 DW |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | ||
94ea9c05 | 3 | #include <linux/export.h> |
76b04384 DW |
4 | #include <linux/stringify.h> |
5 | #include <linux/linkage.h> | |
6 | #include <asm/dwarf2.h> | |
7 | #include <asm/cpufeatures.h> | |
5e21a3ec | 8 | #include <asm/alternative.h> |
5d821386 | 9 | #include <asm/asm-offsets.h> |
76b04384 | 10 | #include <asm/nospec-branch.h> |
cc1ac9c7 | 11 | #include <asm/unwind_hints.h> |
5d821386 | 12 | #include <asm/percpu.h> |
cc1ac9c7 | 13 | #include <asm/frame.h> |
fb3bd914 | 14 | #include <asm/nops.h> |
76b04384 | 15 | |
79cd2a11 | 16 | .section .text..__x86.indirect_thunk |
9bc0bb50 | 17 | |
3b6c1747 PZ |
18 | |
19 | .macro POLINE reg | |
cc1ac9c7 | 20 | ANNOTATE_INTRA_FUNCTION_CALL |
11925185 | 21 | call .Ldo_rop_\@ |
3b6c1747 | 22 | int3 |
cc1ac9c7 | 23 | .Ldo_rop_\@: |
11925185 | 24 | mov %\reg, (%_ASM_SP) |
b735bd3e | 25 | UNWIND_HINT_FUNC |
3b6c1747 PZ |
26 | .endm |
27 | ||
28 | .macro RETPOLINE reg | |
29 | POLINE \reg | |
f94909ce | 30 | RET |
11925185 PZ |
31 | .endm |
32 | ||
33 | .macro THUNK reg | |
11925185 | 34 | |
1a6f7442 PZ |
35 | .align RETPOLINE_THUNK_SIZE |
36 | SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) | |
fb799447 | 37 | UNWIND_HINT_UNDEFINED |
1c0513de | 38 | ANNOTATE_NOENDBR |
11925185 | 39 | |
00e15333 PZ |
40 | ALTERNATIVE_2 __stringify(RETPOLINE \reg), \ |
41 | __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \ | |
42 | __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE) | |
11925185 | 43 | |
76b04384 DW |
44 | .endm |
45 | ||
46 | /* | |
47 | * Despite being an assembler file we can't just use .irp here | |
48 | * because __KSYM_DEPS__ only uses the C preprocessor and would | |
49 | * only see one instance of "__x86_indirect_thunk_\reg" rather | |
50 | * than one per register with the correct names. So we do it | |
51 | * the simple and nasty way... | |
ca3f0d80 PZ |
52 | * |
53 | * Worse, you can only have a single EXPORT_SYMBOL per line, | |
54 | * and CPP can't insert newlines, so we have to repeat everything | |
55 | * at least twice. | |
76b04384 | 56 | */ |
ca3f0d80 PZ |
57 | |
58 | #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) | |
ca3f0d80 | 59 | |
1a6f7442 PZ |
60 | .align RETPOLINE_THUNK_SIZE |
61 | SYM_CODE_START(__x86_indirect_thunk_array) | |
62 | ||
ca3f0d80 PZ |
63 | #define GEN(reg) THUNK reg |
64 | #include <asm/GEN-for-each-reg.h> | |
ca3f0d80 | 65 | #undef GEN |
b6d3d994 | 66 | |
1a6f7442 PZ |
67 | .align RETPOLINE_THUNK_SIZE |
68 | SYM_CODE_END(__x86_indirect_thunk_array) | |
69 | ||
3b6c1747 PZ |
70 | #define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) |
71 | #include <asm/GEN-for-each-reg.h> | |
72 | #undef GEN | |
73 | ||
5fa31af3 | 74 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING |
3b6c1747 PZ |
75 | .macro CALL_THUNK reg |
76 | .align RETPOLINE_THUNK_SIZE | |
77 | ||
78 | SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL) | |
fb799447 | 79 | UNWIND_HINT_UNDEFINED |
3b6c1747 PZ |
80 | ANNOTATE_NOENDBR |
81 | ||
82 | CALL_DEPTH_ACCOUNT | |
83 | POLINE \reg | |
84 | ANNOTATE_UNRET_SAFE | |
85 | ret | |
86 | int3 | |
87 | .endm | |
88 | ||
89 | .align RETPOLINE_THUNK_SIZE | |
90 | SYM_CODE_START(__x86_indirect_call_thunk_array) | |
91 | ||
92 | #define GEN(reg) CALL_THUNK reg | |
93 | #include <asm/GEN-for-each-reg.h> | |
94 | #undef GEN | |
95 | ||
96 | .align RETPOLINE_THUNK_SIZE | |
97 | SYM_CODE_END(__x86_indirect_call_thunk_array) | |
98 | ||
99 | #define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg) | |
ca3f0d80 | 100 | #include <asm/GEN-for-each-reg.h> |
b6d3d994 | 101 | #undef GEN |
0b53c374 | 102 | |
3b6c1747 PZ |
103 | .macro JUMP_THUNK reg |
104 | .align RETPOLINE_THUNK_SIZE | |
105 | ||
106 | SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL) | |
fb799447 | 107 | UNWIND_HINT_UNDEFINED |
3b6c1747 PZ |
108 | ANNOTATE_NOENDBR |
109 | POLINE \reg | |
110 | ANNOTATE_UNRET_SAFE | |
111 | ret | |
112 | int3 | |
113 | .endm | |
114 | ||
115 | .align RETPOLINE_THUNK_SIZE | |
116 | SYM_CODE_START(__x86_indirect_jump_thunk_array) | |
117 | ||
118 | #define GEN(reg) JUMP_THUNK reg | |
119 | #include <asm/GEN-for-each-reg.h> | |
120 | #undef GEN | |
121 | ||
122 | .align RETPOLINE_THUNK_SIZE | |
123 | SYM_CODE_END(__x86_indirect_jump_thunk_array) | |
124 | ||
125 | #define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg) | |
126 | #include <asm/GEN-for-each-reg.h> | |
127 | #undef GEN | |
128 | #endif | |
34a3cae7 | 129 | |
0911b8c5 | 130 | #ifdef CONFIG_MITIGATION_RETHUNK |
f43b9876 | 131 | |
9d9c22cc BPA |
132 | /* |
133 | * Be careful here: that label cannot really be removed because in | |
134 | * some configurations and toolchains, the JMP __x86_return_thunk the | |
135 | * compiler issues is either a short one or the compiler doesn't use | |
136 | * relocations for same-section JMPs and that breaks the returns | |
137 | * detection logic in apply_returns() and in objtool. | |
138 | */ | |
34a3cae7 JP |
139 | .section .text..__x86.return_thunk |
140 | ||
a033eec9 | 141 | #ifdef CONFIG_MITIGATION_SRSO |
34a3cae7 | 142 | |
fb3bd914 | 143 | /* |
42be649d | 144 | * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at |
fb3bd914 BPA |
145 | * special addresses: |
146 | * | |
42be649d PZ |
147 | * - srso_alias_untrain_ret() is 2M aligned |
148 | * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 | |
fb3bd914 | 149 | * and 20 in its virtual address are set (while those bits in the |
42be649d | 150 | * srso_alias_untrain_ret() function are cleared). |
fb3bd914 BPA |
151 | * |
152 | * This guarantees that those two addresses will alias in the branch | |
153 | * target buffer of Zen3/4 generations, leading to any potential | |
154 | * poisoned entries at that BTB slot to get evicted. | |
155 | * | |
42be649d | 156 | * As a result, srso_alias_safe_ret() becomes a safe return. |
fb3bd914 | 157 | */ |
34a3cae7 | 158 | .pushsection .text..__x86.rethunk_untrain |
0a3c4917 | 159 | SYM_CODE_START_NOALIGN(srso_alias_untrain_ret) |
d43490d0 | 160 | UNWIND_HINT_FUNC |
3bbbe97a | 161 | ANNOTATE_NOENDBR |
fb3bd914 BPA |
162 | ASM_NOP2 |
163 | lfence | |
d43490d0 | 164 | jmp srso_alias_return_thunk |
42be649d | 165 | SYM_FUNC_END(srso_alias_untrain_ret) |
4535e1a4 | 166 | __EXPORT_THUNK(srso_alias_untrain_ret) |
34a3cae7 | 167 | .popsection |
fb3bd914 | 168 | |
34a3cae7 | 169 | .pushsection .text..__x86.rethunk_safe |
0a3c4917 | 170 | SYM_CODE_START_NOALIGN(srso_alias_safe_ret) |
ba5ca5e5 | 171 | lea 8(%_ASM_SP), %_ASM_SP |
fb3bd914 | 172 | UNWIND_HINT_FUNC |
fb3bd914 BPA |
173 | ANNOTATE_UNRET_SAFE |
174 | ret | |
175 | int3 | |
42be649d | 176 | SYM_FUNC_END(srso_alias_safe_ret) |
fb3bd914 | 177 | |
aa730cff | 178 | SYM_CODE_START_NOALIGN(srso_alias_return_thunk) |
d43490d0 PZ |
179 | UNWIND_HINT_FUNC |
180 | ANNOTATE_NOENDBR | |
42be649d | 181 | call srso_alias_safe_ret |
d43490d0 PZ |
182 | ud2 |
183 | SYM_CODE_END(srso_alias_return_thunk) | |
34a3cae7 JP |
184 | .popsection |
185 | ||
186 | /* | |
187 | * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() | |
188 | * above. On kernel entry, srso_untrain_ret() is executed which is a | |
189 | * | |
190 | * movabs $0xccccc30824648d48,%rax | |
191 | * | |
192 | * and when the return thunk executes the inner label srso_safe_ret() | |
193 | * later, it is a stack manipulation and a RET which is mispredicted and | |
194 | * thus a "safe" one to use. | |
195 | */ | |
196 | .align 64 | |
197 | .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc | |
0a3c4917 | 198 | SYM_CODE_START_LOCAL_NOALIGN(srso_untrain_ret) |
34a3cae7 JP |
199 | ANNOTATE_NOENDBR |
200 | .byte 0x48, 0xb8 | |
201 | ||
202 | /* | |
203 | * This forces the function return instruction to speculate into a trap | |
204 | * (UD2 in srso_return_thunk() below). This RET will then mispredict | |
205 | * and execution will continue at the return site read from the top of | |
206 | * the stack. | |
207 | */ | |
208 | SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) | |
209 | lea 8(%_ASM_SP), %_ASM_SP | |
210 | ret | |
211 | int3 | |
212 | int3 | |
213 | /* end of movabs */ | |
214 | lfence | |
215 | call srso_safe_ret | |
216 | ud2 | |
217 | SYM_CODE_END(srso_safe_ret) | |
218 | SYM_FUNC_END(srso_untrain_ret) | |
219 | ||
220 | SYM_CODE_START(srso_return_thunk) | |
221 | UNWIND_HINT_FUNC | |
222 | ANNOTATE_NOENDBR | |
223 | call srso_safe_ret | |
224 | ud2 | |
225 | SYM_CODE_END(srso_return_thunk) | |
226 | ||
227 | #define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" | |
a033eec9 | 228 | #else /* !CONFIG_MITIGATION_SRSO */ |
4535e1a4 BPA |
229 | /* Dummy for the alternative in CALL_UNTRAIN_RET. */ |
230 | SYM_CODE_START(srso_alias_untrain_ret) | |
0e110732 | 231 | ANNOTATE_UNRET_SAFE |
b377c66a | 232 | ANNOTATE_NOENDBR |
0e110732 BPA |
233 | ret |
234 | int3 | |
4535e1a4 | 235 | SYM_FUNC_END(srso_alias_untrain_ret) |
0e110732 | 236 | __EXPORT_THUNK(srso_alias_untrain_ret) |
34a3cae7 | 237 | #define JMP_SRSO_UNTRAIN_RET "ud2" |
a033eec9 | 238 | #endif /* CONFIG_MITIGATION_SRSO */ |
34a3cae7 | 239 | |
ac61d439 | 240 | #ifdef CONFIG_MITIGATION_UNRET_ENTRY |
d43490d0 | 241 | |
9dbd23e4 BPA |
242 | /* |
243 | * Some generic notes on the untraining sequences: | |
244 | * | |
245 | * They are interchangeable when it comes to flushing potentially wrong | |
246 | * RET predictions from the BTB. | |
247 | * | |
248 | * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the | |
249 | * Retbleed sequence because the return sequence done there | |
250 | * (srso_safe_ret()) is longer and the return sequence must fully nest | |
251 | * (end before) the untraining sequence. Therefore, the untraining | |
252 | * sequence must fully overlap the return sequence. | |
253 | * | |
254 | * Regarding alignment - the instructions which need to be untrained, | |
255 | * must all start at a cacheline boundary for Zen1/2 generations. That | |
256 | * is, instruction sequences starting at srso_safe_ret() and | |
257 | * the respective instruction sequences at retbleed_return_thunk() | |
258 | * must start at a cacheline boundary. | |
259 | */ | |
260 | ||
a149180f PZ |
261 | /* |
262 | * Safety details here pertain to the AMD Zen{1,2} microarchitecture: | |
d025b7ba | 263 | * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for |
a149180f | 264 | * alignment within the BTB. |
d025b7ba | 265 | * 2) The instruction at retbleed_untrain_ret must contain, and not |
a149180f PZ |
266 | * end with, the 0xc3 byte of the RET. |
267 | * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread | |
268 | * from re-poisioning the BTB prediction. | |
269 | */ | |
270 | .align 64 | |
d025b7ba | 271 | .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc |
0a3c4917 | 272 | SYM_CODE_START_LOCAL_NOALIGN(retbleed_untrain_ret) |
9a48d604 | 273 | ANNOTATE_NOENDBR |
a149180f | 274 | /* |
d025b7ba | 275 | * As executed from retbleed_untrain_ret, this is: |
a149180f PZ |
276 | * |
277 | * TEST $0xcc, %bl | |
278 | * LFENCE | |
d025b7ba | 279 | * JMP retbleed_return_thunk |
a149180f PZ |
280 | * |
281 | * Executing the TEST instruction has a side effect of evicting any BTB | |
282 | * prediction (potentially attacker controlled) attached to the RET, as | |
d025b7ba | 283 | * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. |
a149180f PZ |
284 | */ |
285 | .byte 0xf6 | |
286 | ||
287 | /* | |
d025b7ba | 288 | * As executed from retbleed_return_thunk, this is a plain RET. |
a149180f PZ |
289 | * |
290 | * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. | |
291 | * | |
292 | * We subsequently jump backwards and architecturally execute the RET. | |
293 | * This creates a correct BTB prediction (type=ret), but in the | |
294 | * meantime we suffer Straight Line Speculation (because the type was | |
295 | * no branch) which is halted by the INT3. | |
296 | * | |
297 | * With SMT enabled and STIBP active, a sibling thread cannot poison | |
298 | * RET's prediction to a type of its choice, but can evict the | |
299 | * prediction due to competitive sharing. If the prediction is | |
d025b7ba | 300 | * evicted, retbleed_return_thunk will suffer Straight Line Speculation |
a149180f PZ |
301 | * which will be contained safely by the INT3. |
302 | */ | |
d025b7ba | 303 | SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) |
0b53c374 PZ |
304 | ret |
305 | int3 | |
d025b7ba | 306 | SYM_CODE_END(retbleed_return_thunk) |
0b53c374 | 307 | |
a149180f PZ |
308 | /* |
309 | * Ensure the TEST decoding / BTB invalidation is complete. | |
310 | */ | |
311 | lfence | |
312 | ||
313 | /* | |
314 | * Jump back and execute the RET in the middle of the TEST instruction. | |
315 | * INT3 is for SLS protection. | |
316 | */ | |
d025b7ba | 317 | jmp retbleed_return_thunk |
a149180f | 318 | int3 |
d025b7ba | 319 | SYM_FUNC_END(retbleed_untrain_ret) |
a149180f | 320 | |
34a3cae7 | 321 | #define JMP_RETBLEED_UNTRAIN_RET "jmp retbleed_untrain_ret" |
ac61d439 | 322 | #else /* !CONFIG_MITIGATION_UNRET_ENTRY */ |
34a3cae7 | 323 | #define JMP_RETBLEED_UNTRAIN_RET "ud2" |
ac61d439 | 324 | #endif /* CONFIG_MITIGATION_UNRET_ENTRY */ |
fb3bd914 | 325 | |
a033eec9 | 326 | #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) |
d43490d0 | 327 | |
e7c25c44 | 328 | SYM_FUNC_START(entry_untrain_ret) |
4535e1a4 | 329 | ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO |
e7c25c44 PZ |
330 | SYM_FUNC_END(entry_untrain_ret) |
331 | __EXPORT_THUNK(entry_untrain_ret) | |
332 | ||
a033eec9 | 333 | #endif /* CONFIG_MITIGATION_UNRET_ENTRY || CONFIG_MITIGATION_SRSO */ |
5d821386 | 334 | |
5fa31af3 | 335 | #ifdef CONFIG_MITIGATION_CALL_DEPTH_TRACKING |
5d821386 TG |
336 | |
337 | .align 64 | |
99ee56c7 | 338 | SYM_FUNC_START(call_depth_return_thunk) |
5d821386 | 339 | ANNOTATE_NOENDBR |
f5c1bb2a TG |
340 | /* |
341 | * Keep the hotpath in a 16byte I-fetch for the non-debug | |
342 | * case. | |
343 | */ | |
344 | CALL_THUNKS_DEBUG_INC_RETS | |
5d821386 TG |
345 | shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth) |
346 | jz 1f | |
347 | ANNOTATE_UNRET_SAFE | |
348 | ret | |
349 | int3 | |
350 | 1: | |
f5c1bb2a | 351 | CALL_THUNKS_DEBUG_INC_STUFFS |
5d821386 TG |
352 | .rept 16 |
353 | ANNOTATE_INTRA_FUNCTION_CALL | |
354 | call 2f | |
355 | int3 | |
356 | 2: | |
357 | .endr | |
358 | add $(8*16), %rsp | |
359 | ||
360 | CREDIT_CALL_DEPTH | |
361 | ||
362 | ANNOTATE_UNRET_SAFE | |
363 | ret | |
364 | int3 | |
99ee56c7 | 365 | SYM_FUNC_END(call_depth_return_thunk) |
5d821386 | 366 | |
5fa31af3 | 367 | #endif /* CONFIG_MITIGATION_CALL_DEPTH_TRACKING */ |
34a3cae7 JP |
368 | |
369 | /* | |
370 | * This function name is magical and is used by -mfunction-return=thunk-extern | |
371 | * for the compiler to generate JMPs to it. | |
372 | * | |
373 | * This code is only used during kernel boot or module init. All | |
374 | * 'JMP __x86_return_thunk' sites are changed to something else by | |
375 | * apply_returns(). | |
9d9c22cc | 376 | * |
4461438a JP |
377 | * The ALTERNATIVE below adds a really loud warning to catch the case |
378 | * where the insufficient default return thunk ends up getting used for | |
379 | * whatever reason like miscompilation or failure of | |
380 | * objtool/alternatives/etc to patch all the return sites. | |
34a3cae7 JP |
381 | */ |
382 | SYM_CODE_START(__x86_return_thunk) | |
383 | UNWIND_HINT_FUNC | |
384 | ANNOTATE_NOENDBR | |
6376306a BPA |
385 | #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \ |
386 | defined(CONFIG_MITIGATION_SRSO) || \ | |
387 | defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) | |
4461438a JP |
388 | ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \ |
389 | "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS | |
6376306a BPA |
390 | #else |
391 | ANNOTATE_UNRET_SAFE | |
392 | ret | |
393 | #endif | |
34a3cae7 JP |
394 | int3 |
395 | SYM_CODE_END(__x86_return_thunk) | |
396 | EXPORT_SYMBOL(__x86_return_thunk) | |
397 | ||
0911b8c5 | 398 | #endif /* CONFIG_MITIGATION_RETHUNK */ |