Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
478dc89c | 2 | #include <linux/jump_label.h> |
8c1f7558 | 3 | #include <asm/unwind_hints.h> |
8a09317b DH |
4 | #include <asm/cpufeatures.h> |
5 | #include <asm/page_types.h> | |
6fd166aa PZ |
6 | #include <asm/percpu.h> |
7 | #include <asm/asm-offsets.h> | |
8 | #include <asm/processor-flags.h> | |
478dc89c | 9 | |
0c2bd5a5 | 10 | /* |
063f8913 IM |
11 | |
12 | x86 function call convention, 64-bit: | |
13 | ------------------------------------- | |
14 | arguments | callee-saved | extra caller-saved | return | |
15 | [callee-clobbered] | | [callee-clobbered] | | |
16 | --------------------------------------------------------------------------- | |
17 | rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**] | |
18 | ||
19 | ( rsp is obviously invariant across normal function calls. (gcc can 'merge' | |
20 | functions when it sees tail-call optimization possibilities) rflags is | |
21 | clobbered. Leftover arguments are passed over the stack frame.) | |
22 | ||
23 | [*] In the frame-pointers case rbp is fixed to the stack frame. | |
24 | ||
25 | [**] for struct return values wider than 64 bits the return convention is a | |
26 | bit more complex: up to 128 bits width we return small structures | |
27 | straight in rax, rdx. For structures larger than that (3 words or | |
28 | larger) the caller puts a pointer to an on-stack return struct | |
29 | [allocated in the caller's stack frame] into the first argument - i.e. | |
30 | into rdi. All other arguments shift up by one in this case. | |
31 | Fortunately this case is rare in the kernel. | |
32 | ||
33 | For 32-bit we have the following conventions - kernel is built with | |
34 | -mregparm=3 and -freg-struct-return: | |
35 | ||
36 | x86 function calling convention, 32-bit: | |
37 | ---------------------------------------- | |
38 | arguments | callee-saved | extra caller-saved | return | |
39 | [callee-clobbered] | | [callee-clobbered] | | |
40 | ------------------------------------------------------------------------- | |
41 | eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**] | |
42 | ||
43 | ( here too esp is obviously invariant across normal function calls. eflags | |
44 | is clobbered. Leftover arguments are passed over the stack frame. ) | |
45 | ||
46 | [*] In the frame-pointers case ebp is fixed to the stack frame. | |
47 | ||
48 | [**] We build with -freg-struct-return, which on 32-bit means similar | |
49 | semantics as on 64-bit: edx can be used for a second return value | |
50 | (i.e. covering integer and structure sizes up to 64 bits) - after that | |
51 | it gets more complex and more expensive: 3-word or larger struct returns | |
52 | get done in the caller's frame and the pointer to the return struct goes | |
53 | into regparm0, i.e. eax - the other arguments shift up and the | |
54 | function's register parameters degenerate to regparm=2 in essence. | |
55 | ||
56 | */ | |
57 | ||
1a338ac3 PZ |
58 | #ifdef CONFIG_X86_64 |
59 | ||
063f8913 | 60 | /* |
1b2b23d8 TG |
61 | * 64-bit system call stack frame layout defines and helpers, |
62 | * for assembly code: | |
0c2bd5a5 | 63 | */ |
1da177e4 | 64 | |
76f5df43 DV |
65 | /* The layout forms the "struct pt_regs" on the stack: */ |
66 | /* | |
67 | * C ABI says these regs are callee-preserved. They aren't saved on kernel entry | |
68 | * unless syscall needs a complete, fully filled "struct pt_regs". | |
69 | */ | |
70 | #define R15 0*8 | |
71 | #define R14 1*8 | |
72 | #define R13 2*8 | |
73 | #define R12 3*8 | |
74 | #define RBP 4*8 | |
75 | #define RBX 5*8 | |
76 | /* These regs are callee-clobbered. Always saved on kernel entry. */ | |
77 | #define R11 6*8 | |
78 | #define R10 7*8 | |
79 | #define R9 8*8 | |
80 | #define R8 9*8 | |
81 | #define RAX 10*8 | |
82 | #define RCX 11*8 | |
83 | #define RDX 12*8 | |
84 | #define RSI 13*8 | |
85 | #define RDI 14*8 | |
86 | /* | |
87 | * On syscall entry, this is syscall#. On CPU exception, this is error code. | |
88 | * On hw interrupt, it's IRQ number: | |
89 | */ | |
90 | #define ORIG_RAX 15*8 | |
91 | /* Return frame for iretq */ | |
92 | #define RIP 16*8 | |
93 | #define CS 17*8 | |
94 | #define EFLAGS 18*8 | |
95 | #define RSP 19*8 | |
96 | #define SS 20*8 | |
97 | ||
911d2bb5 DV |
98 | #define SIZEOF_PTREGS 21*8 |
99 | ||
9e809d15 | 100 | .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 |
3f01daec DB |
101 | /* |
102 | * Push registers and sanitize registers of values that a | |
103 | * speculation attack might otherwise want to exploit. The | |
104 | * lower registers are likely clobbered well before they | |
105 | * could be put to use in a speculative execution gadget. | |
106 | * Interleave XOR with PUSH for better uop scheduling: | |
107 | */ | |
9e809d15 DB |
108 | .if \save_ret |
109 | pushq %rsi /* pt_regs->si */ | |
110 | movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ | |
111 | movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */ | |
112 | .else | |
3f01daec DB |
113 | pushq %rdi /* pt_regs->di */ |
114 | pushq %rsi /* pt_regs->si */ | |
9e809d15 | 115 | .endif |
30907fd1 | 116 | pushq \rdx /* pt_regs->dx */ |
3f01daec | 117 | pushq %rcx /* pt_regs->cx */ |
30907fd1 | 118 | pushq \rax /* pt_regs->ax */ |
3f01daec | 119 | pushq %r8 /* pt_regs->r8 */ |
ced5d0bf | 120 | xorl %r8d, %r8d /* nospec r8 */ |
3f01daec | 121 | pushq %r9 /* pt_regs->r9 */ |
ced5d0bf | 122 | xorl %r9d, %r9d /* nospec r9 */ |
3f01daec | 123 | pushq %r10 /* pt_regs->r10 */ |
ced5d0bf | 124 | xorl %r10d, %r10d /* nospec r10 */ |
3f01daec | 125 | pushq %r11 /* pt_regs->r11 */ |
ced5d0bf | 126 | xorl %r11d, %r11d /* nospec r11*/ |
3f01daec DB |
127 | pushq %rbx /* pt_regs->rbx */ |
128 | xorl %ebx, %ebx /* nospec rbx*/ | |
129 | pushq %rbp /* pt_regs->rbp */ | |
130 | xorl %ebp, %ebp /* nospec rbp*/ | |
131 | pushq %r12 /* pt_regs->r12 */ | |
ced5d0bf | 132 | xorl %r12d, %r12d /* nospec r12*/ |
3f01daec | 133 | pushq %r13 /* pt_regs->r13 */ |
ced5d0bf | 134 | xorl %r13d, %r13d /* nospec r13*/ |
3f01daec | 135 | pushq %r14 /* pt_regs->r14 */ |
ced5d0bf | 136 | xorl %r14d, %r14d /* nospec r14*/ |
3f01daec | 137 | pushq %r15 /* pt_regs->r15 */ |
ced5d0bf | 138 | xorl %r15d, %r15d /* nospec r15*/ |
3f01daec | 139 | UNWIND_HINT_REGS |
9e809d15 DB |
140 | .if \save_ret |
141 | pushq %rsi /* return address on top of stack */ | |
142 | .endif | |
92816f57 | 143 | .endm |
3f01daec | 144 | |
92816f57 | 145 | .macro POP_REGS pop_rdi=1 skip_r11rcx=0 |
e872045b AL |
146 | popq %r15 |
147 | popq %r14 | |
148 | popq %r13 | |
149 | popq %r12 | |
150 | popq %rbp | |
151 | popq %rbx | |
502af0d7 DB |
152 | .if \skip_r11rcx |
153 | popq %rsi | |
154 | .else | |
e872045b | 155 | popq %r11 |
502af0d7 | 156 | .endif |
e872045b AL |
157 | popq %r10 |
158 | popq %r9 | |
159 | popq %r8 | |
160 | popq %rax | |
502af0d7 DB |
161 | .if \skip_r11rcx |
162 | popq %rsi | |
163 | .else | |
e872045b | 164 | popq %rcx |
502af0d7 | 165 | .endif |
e872045b AL |
166 | popq %rdx |
167 | popq %rsi | |
502af0d7 | 168 | .if \pop_rdi |
e872045b | 169 | popq %rdi |
502af0d7 | 170 | .endif |
92816f57 | 171 | .endm |
1a338ac3 | 172 | |
946c1911 JP |
173 | /* |
174 | * This is a sneaky trick to help the unwinder find pt_regs on the stack. The | |
175 | * frame pointer is replaced with an encoded pointer to pt_regs. The encoding | |
176 | * is just setting the LSB, which makes it an invalid stack address and is also | |
177 | * a signal to the unwinder that it's a pt_regs pointer in disguise. | |
178 | * | |
dde3036d | 179 | * NOTE: This macro must be used *after* PUSH_AND_CLEAR_REGS because it corrupts |
946c1911 JP |
180 | * the original rbp. |
181 | */ | |
182 | .macro ENCODE_FRAME_POINTER ptregs_offset=0 | |
183 | #ifdef CONFIG_FRAME_POINTER | |
184 | .if \ptregs_offset | |
185 | leaq \ptregs_offset(%rsp), %rbp | |
186 | .else | |
187 | mov %rsp, %rbp | |
188 | .endif | |
189 | orq $0x1, %rbp | |
190 | #endif | |
191 | .endm | |
192 | ||
8a09317b DH |
193 | #ifdef CONFIG_PAGE_TABLE_ISOLATION |
194 | ||
6fd166aa PZ |
195 | /* |
196 | * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two | |
197 | * halves: | |
198 | */ | |
f10ee3dc TG |
199 | #define PTI_USER_PGTABLE_BIT PAGE_SHIFT |
200 | #define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT) | |
201 | #define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT | |
202 | #define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT) | |
203 | #define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK) | |
8a09317b | 204 | |
6fd166aa PZ |
205 | .macro SET_NOFLUSH_BIT reg:req |
206 | bts $X86_CR3_PCID_NOFLUSH_BIT, \reg | |
8a09317b DH |
207 | .endm |
208 | ||
6fd166aa PZ |
209 | .macro ADJUST_KERNEL_CR3 reg:req |
210 | ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID | |
211 | /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */ | |
f10ee3dc | 212 | andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg |
8a09317b DH |
213 | .endm |
214 | ||
215 | .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req | |
aa8c6248 | 216 | ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI |
8a09317b DH |
217 | mov %cr3, \scratch_reg |
218 | ADJUST_KERNEL_CR3 \scratch_reg | |
219 | mov \scratch_reg, %cr3 | |
aa8c6248 | 220 | .Lend_\@: |
8a09317b DH |
221 | .endm |
222 | ||
6fd166aa PZ |
223 | #define THIS_CPU_user_pcid_flush_mask \ |
224 | PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask | |
225 | ||
226 | .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req | |
aa8c6248 | 227 | ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI |
8a09317b | 228 | mov %cr3, \scratch_reg |
6fd166aa PZ |
229 | |
230 | ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID | |
231 | ||
232 | /* | |
233 | * Test if the ASID needs a flush. | |
234 | */ | |
235 | movq \scratch_reg, \scratch_reg2 | |
236 | andq $(0x7FF), \scratch_reg /* mask ASID */ | |
237 | bt \scratch_reg, THIS_CPU_user_pcid_flush_mask | |
238 | jnc .Lnoflush_\@ | |
239 | ||
240 | /* Flush needed, clear the bit */ | |
241 | btr \scratch_reg, THIS_CPU_user_pcid_flush_mask | |
242 | movq \scratch_reg2, \scratch_reg | |
f10ee3dc | 243 | jmp .Lwrcr3_pcid_\@ |
6fd166aa PZ |
244 | |
245 | .Lnoflush_\@: | |
246 | movq \scratch_reg2, \scratch_reg | |
247 | SET_NOFLUSH_BIT \scratch_reg | |
248 | ||
f10ee3dc TG |
249 | .Lwrcr3_pcid_\@: |
250 | /* Flip the ASID to the user version */ | |
251 | orq $(PTI_USER_PCID_MASK), \scratch_reg | |
252 | ||
6fd166aa | 253 | .Lwrcr3_\@: |
f10ee3dc TG |
254 | /* Flip the PGD to the user version */ |
255 | orq $(PTI_USER_PGTABLE_MASK), \scratch_reg | |
8a09317b | 256 | mov \scratch_reg, %cr3 |
aa8c6248 | 257 | .Lend_\@: |
8a09317b DH |
258 | .endm |
259 | ||
6fd166aa PZ |
260 | .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req |
261 | pushq %rax | |
262 | SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax | |
263 | popq %rax | |
264 | .endm | |
265 | ||
8a09317b | 266 | .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req |
aa8c6248 | 267 | ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI |
8a09317b DH |
268 | movq %cr3, \scratch_reg |
269 | movq \scratch_reg, \save_reg | |
270 | /* | |
f10ee3dc TG |
271 | * Test the user pagetable bit. If set, then the user page tables |
272 | * are active. If clear CR3 already has the kernel page table | |
273 | * active. | |
8a09317b | 274 | */ |
f10ee3dc TG |
275 | bt $PTI_USER_PGTABLE_BIT, \scratch_reg |
276 | jnc .Ldone_\@ | |
8a09317b DH |
277 | |
278 | ADJUST_KERNEL_CR3 \scratch_reg | |
279 | movq \scratch_reg, %cr3 | |
280 | ||
281 | .Ldone_\@: | |
282 | .endm | |
283 | ||
21e94459 | 284 | .macro RESTORE_CR3 scratch_reg:req save_reg:req |
aa8c6248 | 285 | ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI |
21e94459 PZ |
286 | |
287 | ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID | |
288 | ||
289 | /* | |
290 | * KERNEL pages can always resume with NOFLUSH as we do | |
291 | * explicit flushes. | |
292 | */ | |
f10ee3dc | 293 | bt $PTI_USER_PGTABLE_BIT, \save_reg |
21e94459 PZ |
294 | jnc .Lnoflush_\@ |
295 | ||
296 | /* | |
297 | * Check if there's a pending flush for the user ASID we're | |
298 | * about to set. | |
299 | */ | |
300 | movq \save_reg, \scratch_reg | |
301 | andq $(0x7FF), \scratch_reg | |
302 | bt \scratch_reg, THIS_CPU_user_pcid_flush_mask | |
303 | jnc .Lnoflush_\@ | |
304 | ||
305 | btr \scratch_reg, THIS_CPU_user_pcid_flush_mask | |
306 | jmp .Lwrcr3_\@ | |
307 | ||
308 | .Lnoflush_\@: | |
309 | SET_NOFLUSH_BIT \save_reg | |
310 | ||
311 | .Lwrcr3_\@: | |
8a09317b DH |
312 | /* |
313 | * The CR3 write could be avoided when not changing its value, | |
314 | * but would require a CR3 read *and* a scratch register. | |
315 | */ | |
316 | movq \save_reg, %cr3 | |
aa8c6248 | 317 | .Lend_\@: |
8a09317b DH |
318 | .endm |
319 | ||
320 | #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */ | |
321 | ||
322 | .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req | |
323 | .endm | |
6fd166aa PZ |
324 | .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req |
325 | .endm | |
326 | .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req | |
8a09317b DH |
327 | .endm |
328 | .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req | |
329 | .endm | |
21e94459 | 330 | .macro RESTORE_CR3 scratch_reg:req save_reg:req |
8a09317b DH |
331 | .endm |
332 | ||
333 | #endif | |
334 | ||
1a338ac3 PZ |
335 | #endif /* CONFIG_X86_64 */ |
336 | ||
478dc89c AL |
337 | /* |
338 | * This does 'call enter_from_user_mode' unless we can avoid it based on | |
339 | * kernel config or using the static jump infrastructure. | |
340 | */ | |
341 | .macro CALL_enter_from_user_mode | |
342 | #ifdef CONFIG_CONTEXT_TRACKING | |
343 | #ifdef HAVE_JUMP_LABEL | |
344 | STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0 | |
345 | #endif | |
346 | call enter_from_user_mode | |
347 | .Lafter_call_\@: | |
348 | #endif | |
349 | .endm |