Commit | Line | Data |
---|---|---|
f980f9c3 JR |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * AMD Memory Encryption Support | |
4 | * | |
5 | * Copyright (C) 2019 SUSE | |
6 | * | |
7 | * Author: Joerg Roedel <jroedel@suse.de> | |
8 | */ | |
9 | ||
8d9d46bb | 10 | #define pr_fmt(fmt) "SEV: " fmt |
0786138c | 11 | |
1aa9aa8e | 12 | #include <linux/sched/debug.h> /* For show_regs() */ |
885689e4 | 13 | #include <linux/percpu-defs.h> |
6283f2ef | 14 | #include <linux/cc_platform.h> |
1aa9aa8e | 15 | #include <linux/printk.h> |
885689e4 TL |
16 | #include <linux/mm_types.h> |
17 | #include <linux/set_memory.h> | |
18 | #include <linux/memblock.h> | |
19 | #include <linux/kernel.h> | |
f980f9c3 | 20 | #include <linux/mm.h> |
0afb6b66 | 21 | #include <linux/cpumask.h> |
3a45b375 BS |
22 | #include <linux/efi.h> |
23 | #include <linux/platform_device.h> | |
24 | #include <linux/io.h> | |
0144e3b8 | 25 | #include <linux/psp-sev.h> |
0f4a1e80 | 26 | #include <linux/dmi.h> |
0144e3b8 | 27 | #include <uapi/linux/sev-guest.h> |
f980f9c3 | 28 | |
428080c9 | 29 | #include <asm/init.h> |
02772fb9 | 30 | #include <asm/cpu_entry_area.h> |
8940ac9c | 31 | #include <asm/stacktrace.h> |
e759959f | 32 | #include <asm/sev.h> |
f980f9c3 | 33 | #include <asm/insn-eval.h> |
ff0c37e1 | 34 | #include <asm/fpu/xcr.h> |
f980f9c3 | 35 | #include <asm/processor.h> |
0786138c | 36 | #include <asm/realmode.h> |
75cc9a84 | 37 | #include <asm/setup.h> |
0786138c | 38 | #include <asm/traps.h> |
f980f9c3 | 39 | #include <asm/svm.h> |
094794f5 JR |
40 | #include <asm/smp.h> |
41 | #include <asm/cpu.h> | |
0afb6b66 | 42 | #include <asm/apic.h> |
801baa69 | 43 | #include <asm/cpuid.h> |
30612045 | 44 | #include <asm/cmdline.h> |
f980f9c3 | 45 | |
479a7bf5 TL |
46 | #define DR7_RESET_VALUE 0x400 |
47 | ||
0afb6b66 TL |
48 | /* AP INIT values as documented in the APM2 section "Processor Initialization State" */ |
49 | #define AP_INIT_CS_LIMIT 0xffff | |
50 | #define AP_INIT_DS_LIMIT 0xffff | |
51 | #define AP_INIT_LDTR_LIMIT 0xffff | |
52 | #define AP_INIT_GDTR_LIMIT 0xffff | |
53 | #define AP_INIT_IDTR_LIMIT 0xffff | |
54 | #define AP_INIT_TR_LIMIT 0xffff | |
55 | #define AP_INIT_RFLAGS_DEFAULT 0x2 | |
56 | #define AP_INIT_DR6_DEFAULT 0xffff0ff0 | |
57 | #define AP_INIT_GPAT_DEFAULT 0x0007040600070406ULL | |
58 | #define AP_INIT_XCR0_DEFAULT 0x1 | |
59 | #define AP_INIT_X87_FTW_DEFAULT 0x5555 | |
60 | #define AP_INIT_X87_FCW_DEFAULT 0x0040 | |
61 | #define AP_INIT_CR0_DEFAULT 0x60000010 | |
62 | #define AP_INIT_MXCSR_DEFAULT 0x1f80 | |
63 | ||
d7b69b59 BPA |
64 | static const char * const sev_status_feat_names[] = { |
65 | [MSR_AMD64_SEV_ENABLED_BIT] = "SEV", | |
66 | [MSR_AMD64_SEV_ES_ENABLED_BIT] = "SEV-ES", | |
67 | [MSR_AMD64_SEV_SNP_ENABLED_BIT] = "SEV-SNP", | |
68 | [MSR_AMD64_SNP_VTOM_BIT] = "vTom", | |
69 | [MSR_AMD64_SNP_REFLECT_VC_BIT] = "ReflectVC", | |
70 | [MSR_AMD64_SNP_RESTRICTED_INJ_BIT] = "RI", | |
71 | [MSR_AMD64_SNP_ALT_INJ_BIT] = "AI", | |
72 | [MSR_AMD64_SNP_DEBUG_SWAP_BIT] = "DebugSwap", | |
73 | [MSR_AMD64_SNP_PREVENT_HOST_IBS_BIT] = "NoHostIBS", | |
74 | [MSR_AMD64_SNP_BTB_ISOLATION_BIT] = "BTBIsol", | |
75 | [MSR_AMD64_SNP_VMPL_SSS_BIT] = "VmplSSS", | |
76 | [MSR_AMD64_SNP_SECURE_TSC_BIT] = "SecureTSC", | |
77 | [MSR_AMD64_SNP_VMGEXIT_PARAM_BIT] = "VMGExitParam", | |
78 | [MSR_AMD64_SNP_IBS_VIRT_BIT] = "IBSVirt", | |
79 | [MSR_AMD64_SNP_VMSA_REG_PROT_BIT] = "VMSARegProt", | |
80 | [MSR_AMD64_SNP_SMT_PROT_BIT] = "SMTProt", | |
81 | }; | |
82 | ||
1aa9aa8e JR |
83 | /* For early boot hypervisor communication in SEV-ES enabled guests */ |
84 | static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE); | |
85 | ||
86 | /* | |
87 | * Needs to be in the .data section because we need it NULL before bss is | |
88 | * cleared | |
89 | */ | |
95d33bfa | 90 | static struct ghcb *boot_ghcb __section(".data"); |
1aa9aa8e | 91 | |
cbd3d4f7 BS |
92 | /* Bitmap of SEV features supported by the hypervisor */ |
93 | static u64 sev_hv_features __ro_after_init; | |
94 | ||
885689e4 TL |
95 | /* #VC handler runtime per-CPU data */ |
96 | struct sev_es_runtime_data { | |
97 | struct ghcb ghcb_page; | |
02772fb9 | 98 | |
0786138c TL |
99 | /* |
100 | * Reserve one page per CPU as backup storage for the unencrypted GHCB. | |
101 | * It is needed when an NMI happens while the #VC handler uses the real | |
102 | * GHCB, and the NMI handler itself is causing another #VC exception. In | |
103 | * that case the GHCB content of the first handler needs to be backed up | |
104 | * and restored. | |
105 | */ | |
106 | struct ghcb backup_ghcb; | |
107 | ||
108 | /* | |
109 | * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions. | |
110 | * There is no need for it to be atomic, because nothing is written to | |
111 | * the GHCB between the read and the write of ghcb_active. So it is safe | |
112 | * to use it when a nested #VC exception happens before the write. | |
113 | * | |
114 | * This is necessary for example in the #VC->NMI->#VC case when the NMI | |
115 | * happens while the first #VC handler uses the GHCB. When the NMI code | |
116 | * raises a second #VC handler it might overwrite the contents of the | |
117 | * GHCB written by the first handler. To avoid this the content of the | |
118 | * GHCB is saved and restored when the GHCB is detected to be in use | |
119 | * already. | |
120 | */ | |
121 | bool ghcb_active; | |
122 | bool backup_ghcb_active; | |
479a7bf5 TL |
123 | |
124 | /* | |
125 | * Cached DR7 value - write it on DR7 writes and return it on reads. | |
126 | * That value will never make it to the real hardware DR7 as debugging | |
127 | * is currently unsupported in SEV-ES guests. | |
128 | */ | |
129 | unsigned long dr7; | |
0786138c TL |
130 | }; |
131 | ||
132 | struct ghcb_state { | |
133 | struct ghcb *ghcb; | |
885689e4 TL |
134 | }; |
135 | ||
136 | static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); | |
0afb6b66 TL |
137 | static DEFINE_PER_CPU(struct sev_es_save_area *, sev_vmsa); |
138 | ||
ba37a143 MR |
139 | struct sev_config { |
140 | __u64 debug : 1, | |
7006b755 TL |
141 | |
142 | /* | |
143 | * A flag used by __set_pages_state() that indicates when the | |
144 | * per-CPU GHCB has been created and registered and thus can be | |
145 | * used by the BSP instead of the early boot GHCB. | |
146 | * | |
147 | * For APs, the per-CPU GHCB is created before they are started | |
148 | * and registered upon startup, so this flag can be used globally | |
149 | * for the BSP and APs. | |
150 | */ | |
151 | ghcbs_initialized : 1, | |
152 | ||
153 | __reserved : 62; | |
ba37a143 MR |
154 | }; |
155 | ||
156 | static struct sev_config sev_cfg __read_mostly; | |
157 | ||
545ac14c | 158 | static __always_inline bool on_vc_stack(struct pt_regs *regs) |
315562c9 | 159 | { |
545ac14c JR |
160 | unsigned long sp = regs->sp; |
161 | ||
162 | /* User-mode RSP is not trusted */ | |
163 | if (user_mode(regs)) | |
164 | return false; | |
165 | ||
166 | /* SYSCALL gap still has user-mode RSP */ | |
167 | if (ip_within_syscall_gap(regs)) | |
168 | return false; | |
169 | ||
315562c9 JR |
170 | return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); |
171 | } | |
172 | ||
173 | /* | |
799de1ba JR |
174 | * This function handles the case when an NMI is raised in the #VC |
175 | * exception handler entry code, before the #VC handler has switched off | |
176 | * its IST stack. In this case, the IST entry for #VC must be adjusted, | |
177 | * so that any nested #VC exception will not overwrite the stack | |
178 | * contents of the interrupted #VC handler. | |
315562c9 JR |
179 | * |
180 | * The IST entry is adjusted unconditionally so that it can be also be | |
799de1ba JR |
181 | * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a |
182 | * nested sev_es_ist_exit() call may adjust back the IST entry too | |
183 | * early. | |
184 | * | |
185 | * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run | |
186 | * on the NMI IST stack, as they are only called from NMI handling code | |
187 | * right now. | |
315562c9 JR |
188 | */ |
189 | void noinstr __sev_es_ist_enter(struct pt_regs *regs) | |
190 | { | |
191 | unsigned long old_ist, new_ist; | |
192 | ||
193 | /* Read old IST entry */ | |
799de1ba | 194 | new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); |
315562c9 | 195 | |
799de1ba JR |
196 | /* |
197 | * If NMI happened while on the #VC IST stack, set the new IST | |
198 | * value below regs->sp, so that the interrupted stack frame is | |
199 | * not overwritten by subsequent #VC exceptions. | |
200 | */ | |
545ac14c | 201 | if (on_vc_stack(regs)) |
799de1ba | 202 | new_ist = regs->sp; |
315562c9 | 203 | |
799de1ba JR |
204 | /* |
205 | * Reserve additional 8 bytes and store old IST value so this | |
206 | * adjustment can be unrolled in __sev_es_ist_exit(). | |
207 | */ | |
208 | new_ist -= sizeof(old_ist); | |
315562c9 JR |
209 | *(unsigned long *)new_ist = old_ist; |
210 | ||
211 | /* Set new IST entry */ | |
212 | this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist); | |
213 | } | |
214 | ||
215 | void noinstr __sev_es_ist_exit(void) | |
216 | { | |
217 | unsigned long ist; | |
218 | ||
219 | /* Read IST entry */ | |
220 | ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); | |
221 | ||
222 | if (WARN_ON(ist == __this_cpu_ist_top_va(VC))) | |
223 | return; | |
224 | ||
225 | /* Read back old IST entry and write it to the TSS */ | |
226 | this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); | |
227 | } | |
228 | ||
d187f217 JR |
229 | /* |
230 | * Nothing shall interrupt this code path while holding the per-CPU | |
231 | * GHCB. The backup GHCB is only for NMIs interrupting this path. | |
232 | * | |
233 | * Callers must disable local interrupts around it. | |
234 | */ | |
235 | static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state) | |
0786138c TL |
236 | { |
237 | struct sev_es_runtime_data *data; | |
238 | struct ghcb *ghcb; | |
239 | ||
d187f217 JR |
240 | WARN_ON(!irqs_disabled()); |
241 | ||
0786138c TL |
242 | data = this_cpu_read(runtime_data); |
243 | ghcb = &data->ghcb_page; | |
244 | ||
245 | if (unlikely(data->ghcb_active)) { | |
246 | /* GHCB is already in use - save its contents */ | |
247 | ||
b250f2f7 JR |
248 | if (unlikely(data->backup_ghcb_active)) { |
249 | /* | |
250 | * Backup-GHCB is also already in use. There is no way | |
251 | * to continue here so just kill the machine. To make | |
252 | * panic() work, mark GHCBs inactive so that messages | |
253 | * can be printed out. | |
254 | */ | |
255 | data->ghcb_active = false; | |
256 | data->backup_ghcb_active = false; | |
257 | ||
d187f217 | 258 | instrumentation_begin(); |
b250f2f7 | 259 | panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); |
d187f217 | 260 | instrumentation_end(); |
b250f2f7 | 261 | } |
0786138c TL |
262 | |
263 | /* Mark backup_ghcb active before writing to it */ | |
264 | data->backup_ghcb_active = true; | |
265 | ||
266 | state->ghcb = &data->backup_ghcb; | |
267 | ||
268 | /* Backup GHCB content */ | |
269 | *state->ghcb = *ghcb; | |
270 | } else { | |
271 | state->ghcb = NULL; | |
272 | data->ghcb_active = true; | |
273 | } | |
274 | ||
275 | return ghcb; | |
276 | } | |
277 | ||
f980f9c3 JR |
278 | static inline u64 sev_es_rd_ghcb_msr(void) |
279 | { | |
280 | return __rdmsr(MSR_AMD64_SEV_ES_GHCB); | |
281 | } | |
282 | ||
a1d5c98a | 283 | static __always_inline void sev_es_wr_ghcb_msr(u64 val) |
f980f9c3 JR |
284 | { |
285 | u32 low, high; | |
286 | ||
287 | low = (u32)(val); | |
288 | high = (u32)(val >> 32); | |
289 | ||
290 | native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high); | |
291 | } | |
292 | ||
293 | static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, | |
294 | unsigned char *buffer) | |
295 | { | |
296 | return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE); | |
297 | } | |
298 | ||
99e4b0de | 299 | static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) |
f980f9c3 JR |
300 | { |
301 | char buffer[MAX_INSN_SIZE]; | |
4aaa7eac | 302 | int insn_bytes; |
f980f9c3 | 303 | |
4aaa7eac | 304 | insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer); |
07570cef JR |
305 | if (insn_bytes == 0) { |
306 | /* Nothing could be copied */ | |
99e4b0de BP |
307 | ctxt->fi.vector = X86_TRAP_PF; |
308 | ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; | |
309 | ctxt->fi.cr2 = ctxt->regs->ip; | |
310 | return ES_EXCEPTION; | |
07570cef JR |
311 | } else if (insn_bytes == -EINVAL) { |
312 | /* Effective RIP could not be calculated */ | |
313 | ctxt->fi.vector = X86_TRAP_GP; | |
314 | ctxt->fi.error_code = 0; | |
315 | ctxt->fi.cr2 = 0; | |
316 | return ES_EXCEPTION; | |
f980f9c3 JR |
317 | } |
318 | ||
4aaa7eac | 319 | if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes)) |
99e4b0de BP |
320 | return ES_DECODE_FAILED; |
321 | ||
5e32c64b BP |
322 | if (ctxt->insn.immediate.got) |
323 | return ES_OK; | |
324 | else | |
325 | return ES_DECODE_FAILED; | |
f980f9c3 JR |
326 | } |
327 | ||
99e4b0de BP |
328 | static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt) |
329 | { | |
330 | char buffer[MAX_INSN_SIZE]; | |
5e32c64b | 331 | int res, ret; |
99e4b0de BP |
332 | |
333 | res = vc_fetch_insn_kernel(ctxt, buffer); | |
334 | if (res) { | |
335 | ctxt->fi.vector = X86_TRAP_PF; | |
336 | ctxt->fi.error_code = X86_PF_INSTR; | |
337 | ctxt->fi.cr2 = ctxt->regs->ip; | |
338 | return ES_EXCEPTION; | |
f980f9c3 JR |
339 | } |
340 | ||
5e32c64b BP |
341 | ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64); |
342 | if (ret < 0) | |
343 | return ES_DECODE_FAILED; | |
344 | else | |
345 | return ES_OK; | |
99e4b0de | 346 | } |
f980f9c3 | 347 | |
99e4b0de BP |
348 | static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) |
349 | { | |
350 | if (user_mode(ctxt->regs)) | |
351 | return __vc_decode_user_insn(ctxt); | |
352 | else | |
353 | return __vc_decode_kern_insn(ctxt); | |
f980f9c3 JR |
354 | } |
355 | ||
356 | static enum es_result vc_write_mem(struct es_em_ctxt *ctxt, | |
357 | char *dst, char *buf, size_t size) | |
358 | { | |
359 | unsigned long error_code = X86_PF_PROT | X86_PF_WRITE; | |
f980f9c3 | 360 | |
4954f5b8 JR |
361 | /* |
362 | * This function uses __put_user() independent of whether kernel or user | |
363 | * memory is accessed. This works fine because __put_user() does no | |
364 | * sanity checks of the pointer being accessed. All that it does is | |
365 | * to report when the access failed. | |
366 | * | |
367 | * Also, this function runs in atomic context, so __put_user() is not | |
368 | * allowed to sleep. The page-fault handler detects that it is running | |
369 | * in atomic context and will not try to take mmap_sem and handle the | |
370 | * fault, so additional pagefault_enable()/disable() calls are not | |
371 | * needed. | |
372 | * | |
373 | * The access can't be done via copy_to_user() here because | |
374 | * vc_write_mem() must not use string instructions to access unsafe | |
375 | * memory. The reason is that MOVS is emulated by the #VC handler by | |
376 | * splitting the move up into a read and a write and taking a nested #VC | |
377 | * exception on whatever of them is the MMIO access. Using string | |
378 | * instructions here would cause infinite nesting. | |
379 | */ | |
f980f9c3 | 380 | switch (size) { |
1d5379d0 MS |
381 | case 1: { |
382 | u8 d1; | |
383 | u8 __user *target = (u8 __user *)dst; | |
384 | ||
f980f9c3 | 385 | memcpy(&d1, buf, 1); |
4954f5b8 | 386 | if (__put_user(d1, target)) |
f980f9c3 JR |
387 | goto fault; |
388 | break; | |
1d5379d0 MS |
389 | } |
390 | case 2: { | |
391 | u16 d2; | |
392 | u16 __user *target = (u16 __user *)dst; | |
393 | ||
f980f9c3 | 394 | memcpy(&d2, buf, 2); |
4954f5b8 | 395 | if (__put_user(d2, target)) |
f980f9c3 JR |
396 | goto fault; |
397 | break; | |
1d5379d0 MS |
398 | } |
399 | case 4: { | |
400 | u32 d4; | |
401 | u32 __user *target = (u32 __user *)dst; | |
402 | ||
f980f9c3 | 403 | memcpy(&d4, buf, 4); |
4954f5b8 | 404 | if (__put_user(d4, target)) |
f980f9c3 JR |
405 | goto fault; |
406 | break; | |
1d5379d0 MS |
407 | } |
408 | case 8: { | |
409 | u64 d8; | |
410 | u64 __user *target = (u64 __user *)dst; | |
411 | ||
f980f9c3 | 412 | memcpy(&d8, buf, 8); |
4954f5b8 | 413 | if (__put_user(d8, target)) |
f980f9c3 JR |
414 | goto fault; |
415 | break; | |
1d5379d0 | 416 | } |
f980f9c3 JR |
417 | default: |
418 | WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | |
419 | return ES_UNSUPPORTED; | |
420 | } | |
421 | ||
422 | return ES_OK; | |
423 | ||
424 | fault: | |
425 | if (user_mode(ctxt->regs)) | |
426 | error_code |= X86_PF_USER; | |
427 | ||
428 | ctxt->fi.vector = X86_TRAP_PF; | |
429 | ctxt->fi.error_code = error_code; | |
430 | ctxt->fi.cr2 = (unsigned long)dst; | |
431 | ||
432 | return ES_EXCEPTION; | |
433 | } | |
434 | ||
435 | static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, | |
436 | char *src, char *buf, size_t size) | |
437 | { | |
438 | unsigned long error_code = X86_PF_PROT; | |
f980f9c3 | 439 | |
4954f5b8 JR |
440 | /* |
441 | * This function uses __get_user() independent of whether kernel or user | |
442 | * memory is accessed. This works fine because __get_user() does no | |
443 | * sanity checks of the pointer being accessed. All that it does is | |
444 | * to report when the access failed. | |
445 | * | |
446 | * Also, this function runs in atomic context, so __get_user() is not | |
447 | * allowed to sleep. The page-fault handler detects that it is running | |
448 | * in atomic context and will not try to take mmap_sem and handle the | |
449 | * fault, so additional pagefault_enable()/disable() calls are not | |
450 | * needed. | |
451 | * | |
452 | * The access can't be done via copy_from_user() here because | |
453 | * vc_read_mem() must not use string instructions to access unsafe | |
454 | * memory. The reason is that MOVS is emulated by the #VC handler by | |
455 | * splitting the move up into a read and a write and taking a nested #VC | |
456 | * exception on whatever of them is the MMIO access. Using string | |
457 | * instructions here would cause infinite nesting. | |
458 | */ | |
f980f9c3 | 459 | switch (size) { |
1d5379d0 MS |
460 | case 1: { |
461 | u8 d1; | |
462 | u8 __user *s = (u8 __user *)src; | |
463 | ||
4954f5b8 | 464 | if (__get_user(d1, s)) |
f980f9c3 JR |
465 | goto fault; |
466 | memcpy(buf, &d1, 1); | |
467 | break; | |
1d5379d0 MS |
468 | } |
469 | case 2: { | |
470 | u16 d2; | |
471 | u16 __user *s = (u16 __user *)src; | |
472 | ||
4954f5b8 | 473 | if (__get_user(d2, s)) |
f980f9c3 JR |
474 | goto fault; |
475 | memcpy(buf, &d2, 2); | |
476 | break; | |
1d5379d0 MS |
477 | } |
478 | case 4: { | |
479 | u32 d4; | |
480 | u32 __user *s = (u32 __user *)src; | |
481 | ||
4954f5b8 | 482 | if (__get_user(d4, s)) |
f980f9c3 JR |
483 | goto fault; |
484 | memcpy(buf, &d4, 4); | |
485 | break; | |
1d5379d0 MS |
486 | } |
487 | case 8: { | |
488 | u64 d8; | |
489 | u64 __user *s = (u64 __user *)src; | |
4954f5b8 | 490 | if (__get_user(d8, s)) |
f980f9c3 JR |
491 | goto fault; |
492 | memcpy(buf, &d8, 8); | |
493 | break; | |
1d5379d0 | 494 | } |
f980f9c3 JR |
495 | default: |
496 | WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size); | |
497 | return ES_UNSUPPORTED; | |
498 | } | |
499 | ||
500 | return ES_OK; | |
501 | ||
502 | fault: | |
503 | if (user_mode(ctxt->regs)) | |
504 | error_code |= X86_PF_USER; | |
505 | ||
506 | ctxt->fi.vector = X86_TRAP_PF; | |
507 | ctxt->fi.error_code = error_code; | |
508 | ctxt->fi.cr2 = (unsigned long)src; | |
509 | ||
510 | return ES_EXCEPTION; | |
511 | } | |
512 | ||
2411cd82 JR |
513 | static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt, |
514 | unsigned long vaddr, phys_addr_t *paddr) | |
51ee7d6e TL |
515 | { |
516 | unsigned long va = (unsigned long)vaddr; | |
517 | unsigned int level; | |
518 | phys_addr_t pa; | |
519 | pgd_t *pgd; | |
520 | pte_t *pte; | |
521 | ||
522 | pgd = __va(read_cr3_pa()); | |
523 | pgd = &pgd[pgd_index(va)]; | |
524 | pte = lookup_address_in_pgd(pgd, va, &level); | |
525 | if (!pte) { | |
526 | ctxt->fi.vector = X86_TRAP_PF; | |
527 | ctxt->fi.cr2 = vaddr; | |
528 | ctxt->fi.error_code = 0; | |
529 | ||
530 | if (user_mode(ctxt->regs)) | |
531 | ctxt->fi.error_code |= X86_PF_USER; | |
532 | ||
2411cd82 | 533 | return ES_EXCEPTION; |
51ee7d6e TL |
534 | } |
535 | ||
2411cd82 JR |
536 | if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC)) |
537 | /* Emulated MMIO to/from encrypted memory not supported */ | |
538 | return ES_UNSUPPORTED; | |
539 | ||
51ee7d6e TL |
540 | pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT; |
541 | pa |= va & ~page_level_mask(level); | |
542 | ||
543 | *paddr = pa; | |
544 | ||
2411cd82 | 545 | return ES_OK; |
51ee7d6e TL |
546 | } |
547 | ||
b9cb9c45 JR |
548 | static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) |
549 | { | |
550 | BUG_ON(size > 4); | |
551 | ||
552 | if (user_mode(ctxt->regs)) { | |
553 | struct thread_struct *t = ¤t->thread; | |
554 | struct io_bitmap *iobm = t->io_bitmap; | |
555 | size_t idx; | |
556 | ||
557 | if (!iobm) | |
558 | goto fault; | |
559 | ||
560 | for (idx = port; idx < port + size; ++idx) { | |
561 | if (test_bit(idx, iobm->bitmap)) | |
562 | goto fault; | |
563 | } | |
564 | } | |
565 | ||
566 | return ES_OK; | |
567 | ||
568 | fault: | |
569 | ctxt->fi.vector = X86_TRAP_GP; | |
570 | ctxt->fi.error_code = 0; | |
571 | ||
572 | return ES_EXCEPTION; | |
573 | } | |
574 | ||
f980f9c3 | 575 | /* Include code shared with pre-decompression boot stage */ |
e759959f | 576 | #include "sev-shared.c" |
1aa9aa8e | 577 | |
d187f217 | 578 | static noinstr void __sev_put_ghcb(struct ghcb_state *state) |
fea63d54 TL |
579 | { |
580 | struct sev_es_runtime_data *data; | |
581 | struct ghcb *ghcb; | |
582 | ||
d187f217 JR |
583 | WARN_ON(!irqs_disabled()); |
584 | ||
fea63d54 TL |
585 | data = this_cpu_read(runtime_data); |
586 | ghcb = &data->ghcb_page; | |
587 | ||
588 | if (state->ghcb) { | |
589 | /* Restore GHCB from Backup */ | |
590 | *ghcb = *state->ghcb; | |
591 | data->backup_ghcb_active = false; | |
592 | state->ghcb = NULL; | |
593 | } else { | |
a50c5beb TL |
594 | /* |
595 | * Invalidate the GHCB so a VMGEXIT instruction issued | |
596 | * from userspace won't appear to be valid. | |
597 | */ | |
598 | vc_ghcb_invalidate(ghcb); | |
fea63d54 TL |
599 | data->ghcb_active = false; |
600 | } | |
601 | } | |
602 | ||
4ca68e02 JR |
603 | void noinstr __sev_es_nmi_complete(void) |
604 | { | |
605 | struct ghcb_state state; | |
606 | struct ghcb *ghcb; | |
607 | ||
d187f217 | 608 | ghcb = __sev_get_ghcb(&state); |
4ca68e02 JR |
609 | |
610 | vc_ghcb_invalidate(ghcb); | |
611 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); | |
612 | ghcb_set_sw_exit_info_1(ghcb, 0); | |
613 | ghcb_set_sw_exit_info_2(ghcb, 0); | |
614 | ||
615 | sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); | |
616 | VMGEXIT(); | |
617 | ||
d187f217 | 618 | __sev_put_ghcb(&state); |
4ca68e02 JR |
619 | } |
620 | ||
c2106a23 BS |
621 | static u64 __init get_secrets_page(void) |
622 | { | |
623 | u64 pa_data = boot_params.cc_blob_address; | |
624 | struct cc_blob_sev_info info; | |
625 | void *map; | |
626 | ||
627 | /* | |
628 | * The CC blob contains the address of the secrets page, check if the | |
629 | * blob is present. | |
630 | */ | |
631 | if (!pa_data) | |
632 | return 0; | |
633 | ||
634 | map = early_memremap(pa_data, sizeof(info)); | |
635 | if (!map) { | |
636 | pr_err("Unable to locate SNP secrets page: failed to map the Confidential Computing blob.\n"); | |
637 | return 0; | |
638 | } | |
639 | memcpy(&info, map, sizeof(info)); | |
640 | early_memunmap(map, sizeof(info)); | |
641 | ||
642 | /* smoke-test the secrets page passed */ | |
643 | if (!info.secrets_phys || info.secrets_len != PAGE_SIZE) | |
644 | return 0; | |
645 | ||
646 | return info.secrets_phys; | |
647 | } | |
648 | ||
649 | static u64 __init get_snp_jump_table_addr(void) | |
650 | { | |
651 | struct snp_secrets_page_layout *layout; | |
ab65f492 | 652 | void __iomem *mem; |
c2106a23 BS |
653 | u64 pa, addr; |
654 | ||
655 | pa = get_secrets_page(); | |
656 | if (!pa) | |
657 | return 0; | |
658 | ||
ab65f492 BP |
659 | mem = ioremap_encrypted(pa, PAGE_SIZE); |
660 | if (!mem) { | |
c2106a23 BS |
661 | pr_err("Unable to locate AP jump table address: failed to map the SNP secrets page.\n"); |
662 | return 0; | |
663 | } | |
664 | ||
ab65f492 BP |
665 | layout = (__force struct snp_secrets_page_layout *)mem; |
666 | ||
c2106a23 | 667 | addr = layout->os_area.ap_jump_table_pa; |
ab65f492 | 668 | iounmap(mem); |
c2106a23 BS |
669 | |
670 | return addr; | |
671 | } | |
672 | ||
75d359ec | 673 | static u64 __init get_jump_table_addr(void) |
8940ac9c TL |
674 | { |
675 | struct ghcb_state state; | |
676 | unsigned long flags; | |
677 | struct ghcb *ghcb; | |
678 | u64 ret = 0; | |
679 | ||
c2106a23 BS |
680 | if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) |
681 | return get_snp_jump_table_addr(); | |
682 | ||
8940ac9c TL |
683 | local_irq_save(flags); |
684 | ||
d187f217 | 685 | ghcb = __sev_get_ghcb(&state); |
8940ac9c TL |
686 | |
687 | vc_ghcb_invalidate(ghcb); | |
688 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); | |
689 | ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE); | |
690 | ghcb_set_sw_exit_info_2(ghcb, 0); | |
691 | ||
692 | sev_es_wr_ghcb_msr(__pa(ghcb)); | |
693 | VMGEXIT(); | |
694 | ||
695 | if (ghcb_sw_exit_info_1_is_valid(ghcb) && | |
696 | ghcb_sw_exit_info_2_is_valid(ghcb)) | |
697 | ret = ghcb->save.sw_exit_info_2; | |
698 | ||
d187f217 | 699 | __sev_put_ghcb(&state); |
8940ac9c TL |
700 | |
701 | local_irq_restore(flags); | |
702 | ||
703 | return ret; | |
704 | } | |
705 | ||
428080c9 AB |
706 | static void __head |
707 | early_set_pages_state(unsigned long vaddr, unsigned long paddr, | |
708 | unsigned long npages, enum psc_op op) | |
5e5ccff6 BS |
709 | { |
710 | unsigned long paddr_end; | |
711 | u64 val; | |
15d90887 TL |
712 | int ret; |
713 | ||
714 | vaddr = vaddr & PAGE_MASK; | |
5e5ccff6 BS |
715 | |
716 | paddr = paddr & PAGE_MASK; | |
717 | paddr_end = paddr + (npages << PAGE_SHIFT); | |
718 | ||
719 | while (paddr < paddr_end) { | |
15d90887 TL |
720 | if (op == SNP_PAGE_STATE_SHARED) { |
721 | /* Page validation must be rescinded before changing to shared */ | |
722 | ret = pvalidate(vaddr, RMP_PG_SIZE_4K, false); | |
723 | if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) | |
724 | goto e_term; | |
725 | } | |
726 | ||
5e5ccff6 BS |
727 | /* |
728 | * Use the MSR protocol because this function can be called before | |
729 | * the GHCB is established. | |
730 | */ | |
731 | sev_es_wr_ghcb_msr(GHCB_MSR_PSC_REQ_GFN(paddr >> PAGE_SHIFT, op)); | |
732 | VMGEXIT(); | |
733 | ||
734 | val = sev_es_rd_ghcb_msr(); | |
735 | ||
736 | if (WARN(GHCB_RESP_CODE(val) != GHCB_MSR_PSC_RESP, | |
737 | "Wrong PSC response code: 0x%x\n", | |
738 | (unsigned int)GHCB_RESP_CODE(val))) | |
739 | goto e_term; | |
740 | ||
741 | if (WARN(GHCB_MSR_PSC_RESP_VAL(val), | |
742 | "Failed to change page state to '%s' paddr 0x%lx error 0x%llx\n", | |
743 | op == SNP_PAGE_STATE_PRIVATE ? "private" : "shared", | |
744 | paddr, GHCB_MSR_PSC_RESP_VAL(val))) | |
745 | goto e_term; | |
746 | ||
15d90887 TL |
747 | if (op == SNP_PAGE_STATE_PRIVATE) { |
748 | /* Page validation must be performed after changing to private */ | |
749 | ret = pvalidate(vaddr, RMP_PG_SIZE_4K, true); | |
750 | if (WARN(ret, "Failed to validate address 0x%lx ret %d", paddr, ret)) | |
751 | goto e_term; | |
752 | } | |
753 | ||
754 | vaddr += PAGE_SIZE; | |
755 | paddr += PAGE_SIZE; | |
5e5ccff6 BS |
756 | } |
757 | ||
758 | return; | |
759 | ||
760 | e_term: | |
761 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); | |
762 | } | |
763 | ||
428080c9 | 764 | void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, |
5dee19b6 | 765 | unsigned long npages) |
5e5ccff6 | 766 | { |
cdaa0a40 TL |
767 | /* |
768 | * This can be invoked in early boot while running identity mapped, so | |
769 | * use an open coded check for SNP instead of using cc_platform_has(). | |
770 | * This eliminates worries about jump tables or checking boot_cpu_data | |
771 | * in the cc_platform_has() function. | |
772 | */ | |
1c811d40 | 773 | if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) |
5e5ccff6 BS |
774 | return; |
775 | ||
776 | /* | |
777 | * Ask the hypervisor to mark the memory pages as private in the RMP | |
778 | * table. | |
779 | */ | |
15d90887 | 780 | early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_PRIVATE); |
5e5ccff6 BS |
781 | } |
782 | ||
783 | void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, | |
5dee19b6 | 784 | unsigned long npages) |
5e5ccff6 | 785 | { |
cdaa0a40 TL |
786 | /* |
787 | * This can be invoked in early boot while running identity mapped, so | |
788 | * use an open coded check for SNP instead of using cc_platform_has(). | |
789 | * This eliminates worries about jump tables or checking boot_cpu_data | |
790 | * in the cc_platform_has() function. | |
791 | */ | |
1c811d40 | 792 | if (!(RIP_REL_REF(sev_status) & MSR_AMD64_SEV_SNP_ENABLED)) |
5e5ccff6 BS |
793 | return; |
794 | ||
5e5ccff6 | 795 | /* Ask hypervisor to mark the memory pages shared in the RMP table. */ |
15d90887 | 796 | early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); |
5e5ccff6 BS |
797 | } |
798 | ||
15d90887 TL |
799 | static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, |
800 | unsigned long vaddr_end, int op) | |
dc3f3d24 | 801 | { |
dc3f3d24 | 802 | struct ghcb_state state; |
15d90887 | 803 | bool use_large_entry; |
dc3f3d24 BS |
804 | struct psc_hdr *hdr; |
805 | struct psc_entry *e; | |
7006b755 | 806 | unsigned long flags; |
dc3f3d24 | 807 | unsigned long pfn; |
7006b755 | 808 | struct ghcb *ghcb; |
dc3f3d24 BS |
809 | int i; |
810 | ||
811 | hdr = &data->hdr; | |
812 | e = data->entries; | |
813 | ||
814 | memset(data, 0, sizeof(*data)); | |
815 | i = 0; | |
816 | ||
15d90887 TL |
817 | while (vaddr < vaddr_end && i < ARRAY_SIZE(data->entries)) { |
818 | hdr->end_entry = i; | |
819 | ||
820 | if (is_vmalloc_addr((void *)vaddr)) { | |
dc3f3d24 | 821 | pfn = vmalloc_to_pfn((void *)vaddr); |
15d90887 TL |
822 | use_large_entry = false; |
823 | } else { | |
dc3f3d24 | 824 | pfn = __pa(vaddr) >> PAGE_SHIFT; |
15d90887 TL |
825 | use_large_entry = true; |
826 | } | |
dc3f3d24 BS |
827 | |
828 | e->gfn = pfn; | |
829 | e->operation = op; | |
dc3f3d24 | 830 | |
15d90887 TL |
831 | if (use_large_entry && IS_ALIGNED(vaddr, PMD_SIZE) && |
832 | (vaddr_end - vaddr) >= PMD_SIZE) { | |
833 | e->pagesize = RMP_PG_SIZE_2M; | |
834 | vaddr += PMD_SIZE; | |
835 | } else { | |
836 | e->pagesize = RMP_PG_SIZE_4K; | |
837 | vaddr += PAGE_SIZE; | |
838 | } | |
dc3f3d24 | 839 | |
dc3f3d24 BS |
840 | e++; |
841 | i++; | |
842 | } | |
843 | ||
15d90887 TL |
844 | /* Page validation must be rescinded before changing to shared */ |
845 | if (op == SNP_PAGE_STATE_SHARED) | |
846 | pvalidate_pages(data); | |
847 | ||
7006b755 TL |
848 | local_irq_save(flags); |
849 | ||
850 | if (sev_cfg.ghcbs_initialized) | |
851 | ghcb = __sev_get_ghcb(&state); | |
852 | else | |
853 | ghcb = boot_ghcb; | |
854 | ||
15d90887 | 855 | /* Invoke the hypervisor to perform the page state changes */ |
7006b755 | 856 | if (!ghcb || vmgexit_psc(ghcb, data)) |
dc3f3d24 | 857 | sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC); |
7006b755 TL |
858 | |
859 | if (sev_cfg.ghcbs_initialized) | |
860 | __sev_put_ghcb(&state); | |
861 | ||
862 | local_irq_restore(flags); | |
15d90887 TL |
863 | |
864 | /* Page validation must be performed after changing to private */ | |
865 | if (op == SNP_PAGE_STATE_PRIVATE) | |
866 | pvalidate_pages(data); | |
867 | ||
868 | return vaddr; | |
dc3f3d24 BS |
869 | } |
870 | ||
5dee19b6 | 871 | static void set_pages_state(unsigned long vaddr, unsigned long npages, int op) |
dc3f3d24 | 872 | { |
69dcb1e3 | 873 | struct snp_psc_desc desc; |
15d90887 | 874 | unsigned long vaddr_end; |
dc3f3d24 | 875 | |
7006b755 TL |
876 | /* Use the MSR protocol when a GHCB is not available. */ |
877 | if (!boot_ghcb) | |
15d90887 | 878 | return early_set_pages_state(vaddr, __pa(vaddr), npages, op); |
dc3f3d24 BS |
879 | |
880 | vaddr = vaddr & PAGE_MASK; | |
881 | vaddr_end = vaddr + (npages << PAGE_SHIFT); | |
882 | ||
15d90887 TL |
883 | while (vaddr < vaddr_end) |
884 | vaddr = __set_pages_state(&desc, vaddr, vaddr_end, op); | |
dc3f3d24 BS |
885 | } |
886 | ||
5dee19b6 | 887 | void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) |
dc3f3d24 BS |
888 | { |
889 | if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) | |
890 | return; | |
891 | ||
dc3f3d24 BS |
892 | set_pages_state(vaddr, npages, SNP_PAGE_STATE_SHARED); |
893 | } | |
894 | ||
5dee19b6 | 895 | void snp_set_memory_private(unsigned long vaddr, unsigned long npages) |
dc3f3d24 BS |
896 | { |
897 | if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) | |
898 | return; | |
899 | ||
900 | set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); | |
dc3f3d24 | 901 | } |
dc3f3d24 | 902 | |
6c321179 TL |
903 | void snp_accept_memory(phys_addr_t start, phys_addr_t end) |
904 | { | |
62d5e970 | 905 | unsigned long vaddr, npages; |
6c321179 TL |
906 | |
907 | if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) | |
908 | return; | |
909 | ||
910 | vaddr = (unsigned long)__va(start); | |
911 | npages = (end - start) >> PAGE_SHIFT; | |
912 | ||
913 | set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE); | |
dc3f3d24 BS |
914 | } |
915 | ||
0afb6b66 TL |
916 | static int snp_set_vmsa(void *va, bool vmsa) |
917 | { | |
918 | u64 attrs; | |
919 | ||
920 | /* | |
921 | * Running at VMPL0 allows the kernel to change the VMSA bit for a page | |
922 | * using the RMPADJUST instruction. However, for the instruction to | |
923 | * succeed it must target the permissions of a lesser privileged | |
924 | * (higher numbered) VMPL level, so use VMPL1 (refer to the RMPADJUST | |
925 | * instruction in the AMD64 APM Volume 3). | |
926 | */ | |
927 | attrs = 1; | |
928 | if (vmsa) | |
929 | attrs |= RMPADJUST_VMSA_PAGE_BIT; | |
930 | ||
931 | return rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs); | |
932 | } | |
933 | ||
934 | #define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK) | |
935 | #define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK) | |
936 | #define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK) | |
937 | ||
938 | #define INIT_LDTR_ATTRIBS (SVM_SELECTOR_P_MASK | 2) | |
939 | #define INIT_TR_ATTRIBS (SVM_SELECTOR_P_MASK | 3) | |
940 | ||
941 | static void *snp_alloc_vmsa_page(void) | |
942 | { | |
943 | struct page *p; | |
944 | ||
945 | /* | |
946 | * Allocate VMSA page to work around the SNP erratum where the CPU will | |
947 | * incorrectly signal an RMP violation #PF if a large page (2MB or 1GB) | |
948 | * collides with the RMP entry of VMSA page. The recommended workaround | |
949 | * is to not use a large page. | |
950 | * | |
951 | * Allocate an 8k page which is also 8k-aligned. | |
952 | */ | |
953 | p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); | |
954 | if (!p) | |
955 | return NULL; | |
956 | ||
957 | split_page(p, 1); | |
958 | ||
959 | /* Free the first 4k. This page may be 2M/1G aligned and cannot be used. */ | |
960 | __free_page(p); | |
961 | ||
962 | return page_address(p + 1); | |
963 | } | |
964 | ||
965 | static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa) | |
966 | { | |
967 | int err; | |
968 | ||
969 | err = snp_set_vmsa(vmsa, false); | |
970 | if (err) | |
971 | pr_err("clear VMSA page failed (%u), leaking page\n", err); | |
972 | else | |
973 | free_page((unsigned long)vmsa); | |
974 | } | |
975 | ||
db4a4086 | 976 | static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip) |
0afb6b66 TL |
977 | { |
978 | struct sev_es_save_area *cur_vmsa, *vmsa; | |
979 | struct ghcb_state state; | |
980 | unsigned long flags; | |
981 | struct ghcb *ghcb; | |
982 | u8 sipi_vector; | |
983 | int cpu, ret; | |
984 | u64 cr4; | |
985 | ||
986 | /* | |
987 | * The hypervisor SNP feature support check has happened earlier, just check | |
988 | * the AP_CREATION one here. | |
989 | */ | |
990 | if (!(sev_hv_features & GHCB_HV_FT_SNP_AP_CREATION)) | |
991 | return -EOPNOTSUPP; | |
992 | ||
993 | /* | |
994 | * Verify the desired start IP against the known trampoline start IP | |
995 | * to catch any future new trampolines that may be introduced that | |
996 | * would require a new protected guest entry point. | |
997 | */ | |
998 | if (WARN_ONCE(start_ip != real_mode_header->trampoline_start, | |
999 | "Unsupported SNP start_ip: %lx\n", start_ip)) | |
1000 | return -EINVAL; | |
1001 | ||
1002 | /* Override start_ip with known protected guest start IP */ | |
1003 | start_ip = real_mode_header->sev_es_trampoline_start; | |
1004 | ||
1005 | /* Find the logical CPU for the APIC ID */ | |
1006 | for_each_present_cpu(cpu) { | |
1007 | if (arch_match_cpu_phys_id(cpu, apic_id)) | |
1008 | break; | |
1009 | } | |
1010 | if (cpu >= nr_cpu_ids) | |
1011 | return -EINVAL; | |
1012 | ||
1013 | cur_vmsa = per_cpu(sev_vmsa, cpu); | |
1014 | ||
1015 | /* | |
1016 | * A new VMSA is created each time because there is no guarantee that | |
1017 | * the current VMSA is the kernels or that the vCPU is not running. If | |
1018 | * an attempt was done to use the current VMSA with a running vCPU, a | |
1019 | * #VMEXIT of that vCPU would wipe out all of the settings being done | |
1020 | * here. | |
1021 | */ | |
1022 | vmsa = (struct sev_es_save_area *)snp_alloc_vmsa_page(); | |
1023 | if (!vmsa) | |
1024 | return -ENOMEM; | |
1025 | ||
1026 | /* CR4 should maintain the MCE value */ | |
1027 | cr4 = native_read_cr4() & X86_CR4_MCE; | |
1028 | ||
1029 | /* Set the CS value based on the start_ip converted to a SIPI vector */ | |
1030 | sipi_vector = (start_ip >> 12); | |
1031 | vmsa->cs.base = sipi_vector << 12; | |
1032 | vmsa->cs.limit = AP_INIT_CS_LIMIT; | |
1033 | vmsa->cs.attrib = INIT_CS_ATTRIBS; | |
1034 | vmsa->cs.selector = sipi_vector << 8; | |
1035 | ||
1036 | /* Set the RIP value based on start_ip */ | |
1037 | vmsa->rip = start_ip & 0xfff; | |
1038 | ||
1039 | /* Set AP INIT defaults as documented in the APM */ | |
1040 | vmsa->ds.limit = AP_INIT_DS_LIMIT; | |
1041 | vmsa->ds.attrib = INIT_DS_ATTRIBS; | |
1042 | vmsa->es = vmsa->ds; | |
1043 | vmsa->fs = vmsa->ds; | |
1044 | vmsa->gs = vmsa->ds; | |
1045 | vmsa->ss = vmsa->ds; | |
1046 | ||
1047 | vmsa->gdtr.limit = AP_INIT_GDTR_LIMIT; | |
1048 | vmsa->ldtr.limit = AP_INIT_LDTR_LIMIT; | |
1049 | vmsa->ldtr.attrib = INIT_LDTR_ATTRIBS; | |
1050 | vmsa->idtr.limit = AP_INIT_IDTR_LIMIT; | |
1051 | vmsa->tr.limit = AP_INIT_TR_LIMIT; | |
1052 | vmsa->tr.attrib = INIT_TR_ATTRIBS; | |
1053 | ||
1054 | vmsa->cr4 = cr4; | |
1055 | vmsa->cr0 = AP_INIT_CR0_DEFAULT; | |
1056 | vmsa->dr7 = DR7_RESET_VALUE; | |
1057 | vmsa->dr6 = AP_INIT_DR6_DEFAULT; | |
1058 | vmsa->rflags = AP_INIT_RFLAGS_DEFAULT; | |
1059 | vmsa->g_pat = AP_INIT_GPAT_DEFAULT; | |
1060 | vmsa->xcr0 = AP_INIT_XCR0_DEFAULT; | |
1061 | vmsa->mxcsr = AP_INIT_MXCSR_DEFAULT; | |
1062 | vmsa->x87_ftw = AP_INIT_X87_FTW_DEFAULT; | |
1063 | vmsa->x87_fcw = AP_INIT_X87_FCW_DEFAULT; | |
1064 | ||
1065 | /* SVME must be set. */ | |
1066 | vmsa->efer = EFER_SVME; | |
1067 | ||
1068 | /* | |
1069 | * Set the SNP-specific fields for this VMSA: | |
1070 | * VMPL level | |
1071 | * SEV_FEATURES (matches the SEV STATUS MSR right shifted 2 bits) | |
1072 | */ | |
1073 | vmsa->vmpl = 0; | |
1074 | vmsa->sev_features = sev_status >> 2; | |
1075 | ||
1076 | /* Switch the page over to a VMSA page now that it is initialized */ | |
1077 | ret = snp_set_vmsa(vmsa, true); | |
1078 | if (ret) { | |
1079 | pr_err("set VMSA page failed (%u)\n", ret); | |
1080 | free_page((unsigned long)vmsa); | |
1081 | ||
1082 | return -EINVAL; | |
1083 | } | |
1084 | ||
1085 | /* Issue VMGEXIT AP Creation NAE event */ | |
1086 | local_irq_save(flags); | |
1087 | ||
1088 | ghcb = __sev_get_ghcb(&state); | |
1089 | ||
1090 | vc_ghcb_invalidate(ghcb); | |
1091 | ghcb_set_rax(ghcb, vmsa->sev_features); | |
1092 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION); | |
1093 | ghcb_set_sw_exit_info_1(ghcb, ((u64)apic_id << 32) | SVM_VMGEXIT_AP_CREATE); | |
1094 | ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa)); | |
1095 | ||
1096 | sev_es_wr_ghcb_msr(__pa(ghcb)); | |
1097 | VMGEXIT(); | |
1098 | ||
1099 | if (!ghcb_sw_exit_info_1_is_valid(ghcb) || | |
1100 | lower_32_bits(ghcb->save.sw_exit_info_1)) { | |
1101 | pr_err("SNP AP Creation error\n"); | |
1102 | ret = -EINVAL; | |
1103 | } | |
1104 | ||
1105 | __sev_put_ghcb(&state); | |
1106 | ||
1107 | local_irq_restore(flags); | |
1108 | ||
1109 | /* Perform cleanup if there was an error */ | |
1110 | if (ret) { | |
1111 | snp_cleanup_vmsa(vmsa); | |
1112 | vmsa = NULL; | |
1113 | } | |
1114 | ||
1115 | /* Free up any previous VMSA page */ | |
1116 | if (cur_vmsa) | |
1117 | snp_cleanup_vmsa(cur_vmsa); | |
1118 | ||
1119 | /* Record the current VMSA page */ | |
1120 | per_cpu(sev_vmsa, cpu) = vmsa; | |
1121 | ||
1122 | return ret; | |
1123 | } | |
1124 | ||
d6f361ea | 1125 | void __init snp_set_wakeup_secondary_cpu(void) |
0afb6b66 TL |
1126 | { |
1127 | if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) | |
1128 | return; | |
1129 | ||
1130 | /* | |
1131 | * Always set this override if SNP is enabled. This makes it the | |
1132 | * required method to start APs under SNP. If the hypervisor does | |
1133 | * not support AP creation, then no APs will be started. | |
1134 | */ | |
d6f361ea | 1135 | apic_update_callback(wakeup_secondary_cpu, wakeup_cpu_via_vmgexit); |
0afb6b66 TL |
1136 | } |
1137 | ||
75d359ec | 1138 | int __init sev_es_setup_ap_jump_table(struct real_mode_header *rmh) |
8940ac9c TL |
1139 | { |
1140 | u16 startup_cs, startup_ip; | |
1141 | phys_addr_t jump_table_pa; | |
1142 | u64 jump_table_addr; | |
1143 | u16 __iomem *jump_table; | |
1144 | ||
1145 | jump_table_addr = get_jump_table_addr(); | |
1146 | ||
1147 | /* On UP guests there is no jump table so this is not a failure */ | |
1148 | if (!jump_table_addr) | |
1149 | return 0; | |
1150 | ||
1151 | /* Check if AP Jump Table is page-aligned */ | |
1152 | if (jump_table_addr & ~PAGE_MASK) | |
1153 | return -EINVAL; | |
1154 | ||
1155 | jump_table_pa = jump_table_addr & PAGE_MASK; | |
1156 | ||
1157 | startup_cs = (u16)(rmh->trampoline_start >> 4); | |
1158 | startup_ip = (u16)(rmh->sev_es_trampoline_start - | |
1159 | rmh->trampoline_start); | |
1160 | ||
1161 | jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE); | |
1162 | if (!jump_table) | |
1163 | return -EIO; | |
1164 | ||
1165 | writew(startup_ip, &jump_table[0]); | |
1166 | writew(startup_cs, &jump_table[1]); | |
1167 | ||
1168 | iounmap(jump_table); | |
1169 | ||
1170 | return 0; | |
1171 | } | |
1172 | ||
39336f4f TL |
1173 | /* |
1174 | * This is needed by the OVMF UEFI firmware which will use whatever it finds in | |
1175 | * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu | |
1176 | * runtime GHCBs used by the kernel are also mapped in the EFI page-table. | |
1177 | */ | |
1178 | int __init sev_es_efi_map_ghcbs(pgd_t *pgd) | |
1179 | { | |
1180 | struct sev_es_runtime_data *data; | |
1181 | unsigned long address, pflags; | |
1182 | int cpu; | |
1183 | u64 pfn; | |
1184 | ||
6283f2ef | 1185 | if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) |
39336f4f TL |
1186 | return 0; |
1187 | ||
1188 | pflags = _PAGE_NX | _PAGE_RW; | |
1189 | ||
1190 | for_each_possible_cpu(cpu) { | |
1191 | data = per_cpu(runtime_data, cpu); | |
1192 | ||
1193 | address = __pa(&data->ghcb_page); | |
1194 | pfn = address >> PAGE_SHIFT; | |
1195 | ||
1196 | if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags)) | |
1197 | return 1; | |
1198 | } | |
1199 | ||
1200 | return 0; | |
1201 | } | |
1202 | ||
a4afa608 TL |
1203 | static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt) |
1204 | { | |
1205 | struct pt_regs *regs = ctxt->regs; | |
1206 | enum es_result ret; | |
1207 | u64 exit_info_1; | |
1208 | ||
1209 | /* Is it a WRMSR? */ | |
1210 | exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0; | |
1211 | ||
1212 | ghcb_set_rcx(ghcb, regs->cx); | |
1213 | if (exit_info_1) { | |
1214 | ghcb_set_rax(ghcb, regs->ax); | |
1215 | ghcb_set_rdx(ghcb, regs->dx); | |
1216 | } | |
1217 | ||
5bb6c1d1 | 1218 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0); |
a4afa608 TL |
1219 | |
1220 | if ((ret == ES_OK) && (!exit_info_1)) { | |
1221 | regs->ax = ghcb->save.rax; | |
1222 | regs->dx = ghcb->save.rdx; | |
1223 | } | |
1224 | ||
1225 | return ret; | |
1226 | } | |
1227 | ||
95d33bfa | 1228 | static void snp_register_per_cpu_ghcb(void) |
1aa9aa8e | 1229 | { |
95d33bfa BS |
1230 | struct sev_es_runtime_data *data; |
1231 | struct ghcb *ghcb; | |
1232 | ||
1233 | data = this_cpu_read(runtime_data); | |
1234 | ghcb = &data->ghcb_page; | |
1235 | ||
1236 | snp_register_ghcb_early(__pa(ghcb)); | |
1237 | } | |
1238 | ||
1239 | void setup_ghcb(void) | |
1240 | { | |
1241 | if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) | |
1242 | return; | |
1243 | ||
95d33bfa BS |
1244 | /* |
1245 | * Check whether the runtime #VC exception handler is active. It uses | |
1246 | * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). | |
1247 | * | |
1248 | * If SNP is active, register the per-CPU GHCB page so that the runtime | |
1249 | * exception handler can use it. | |
1250 | */ | |
1251 | if (initial_vc_handler == (unsigned long)kernel_exc_vmm_communication) { | |
1252 | if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) | |
1253 | snp_register_per_cpu_ghcb(); | |
1254 | ||
7006b755 TL |
1255 | sev_cfg.ghcbs_initialized = true; |
1256 | ||
95d33bfa BS |
1257 | return; |
1258 | } | |
1aa9aa8e | 1259 | |
27d25348 ADK |
1260 | /* |
1261 | * Make sure the hypervisor talks a supported protocol. | |
1262 | * This gets called only in the BSP boot phase. | |
1263 | */ | |
1264 | if (!sev_es_negotiate_protocol()) | |
1265 | sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); | |
1266 | ||
1aa9aa8e JR |
1267 | /* |
1268 | * Clear the boot_ghcb. The first exception comes in before the bss | |
1269 | * section is cleared. | |
1270 | */ | |
1271 | memset(&boot_ghcb_page, 0, PAGE_SIZE); | |
1272 | ||
1273 | /* Alright - Make the boot-ghcb public */ | |
1274 | boot_ghcb = &boot_ghcb_page; | |
1275 | ||
95d33bfa BS |
1276 | /* SNP guest requires that GHCB GPA must be registered. */ |
1277 | if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) | |
1278 | snp_register_ghcb_early(__pa(&boot_ghcb_page)); | |
1aa9aa8e JR |
1279 | } |
1280 | ||
094794f5 JR |
1281 | #ifdef CONFIG_HOTPLUG_CPU |
1282 | static void sev_es_ap_hlt_loop(void) | |
1283 | { | |
1284 | struct ghcb_state state; | |
1285 | struct ghcb *ghcb; | |
1286 | ||
d187f217 | 1287 | ghcb = __sev_get_ghcb(&state); |
094794f5 JR |
1288 | |
1289 | while (true) { | |
1290 | vc_ghcb_invalidate(ghcb); | |
1291 | ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP); | |
1292 | ghcb_set_sw_exit_info_1(ghcb, 0); | |
1293 | ghcb_set_sw_exit_info_2(ghcb, 0); | |
1294 | ||
1295 | sev_es_wr_ghcb_msr(__pa(ghcb)); | |
1296 | VMGEXIT(); | |
1297 | ||
1298 | /* Wakeup signal? */ | |
1299 | if (ghcb_sw_exit_info_2_is_valid(ghcb) && | |
1300 | ghcb->save.sw_exit_info_2) | |
1301 | break; | |
1302 | } | |
1303 | ||
d187f217 | 1304 | __sev_put_ghcb(&state); |
094794f5 JR |
1305 | } |
1306 | ||
1307 | /* | |
1308 | * Play_dead handler when running under SEV-ES. This is needed because | |
1309 | * the hypervisor can't deliver an SIPI request to restart the AP. | |
1310 | * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the | |
1311 | * hypervisor wakes it up again. | |
1312 | */ | |
1313 | static void sev_es_play_dead(void) | |
1314 | { | |
1315 | play_dead_common(); | |
1316 | ||
1317 | /* IRQs now disabled */ | |
1318 | ||
1319 | sev_es_ap_hlt_loop(); | |
1320 | ||
1321 | /* | |
1322 | * If we get here, the VCPU was woken up again. Jump to CPU | |
1323 | * startup code to get it back online. | |
1324 | */ | |
666e1156 | 1325 | soft_restart_cpu(); |
094794f5 JR |
1326 | } |
1327 | #else /* CONFIG_HOTPLUG_CPU */ | |
1328 | #define sev_es_play_dead native_play_dead | |
1329 | #endif /* CONFIG_HOTPLUG_CPU */ | |
1330 | ||
1331 | #ifdef CONFIG_SMP | |
1332 | static void __init sev_es_setup_play_dead(void) | |
1333 | { | |
1334 | smp_ops.play_dead = sev_es_play_dead; | |
1335 | } | |
1336 | #else | |
1337 | static inline void sev_es_setup_play_dead(void) { } | |
1338 | #endif | |
1339 | ||
885689e4 TL |
1340 | static void __init alloc_runtime_data(int cpu) |
1341 | { | |
1342 | struct sev_es_runtime_data *data; | |
1343 | ||
1344 | data = memblock_alloc(sizeof(*data), PAGE_SIZE); | |
1345 | if (!data) | |
1346 | panic("Can't allocate SEV-ES runtime data"); | |
1347 | ||
1348 | per_cpu(runtime_data, cpu) = data; | |
1349 | } | |
1350 | ||
1351 | static void __init init_ghcb(int cpu) | |
1352 | { | |
1353 | struct sev_es_runtime_data *data; | |
1354 | int err; | |
1355 | ||
1356 | data = per_cpu(runtime_data, cpu); | |
1357 | ||
1358 | err = early_set_memory_decrypted((unsigned long)&data->ghcb_page, | |
1359 | sizeof(data->ghcb_page)); | |
1360 | if (err) | |
1361 | panic("Can't map GHCBs unencrypted"); | |
1362 | ||
1363 | memset(&data->ghcb_page, 0, sizeof(data->ghcb_page)); | |
0786138c TL |
1364 | |
1365 | data->ghcb_active = false; | |
1366 | data->backup_ghcb_active = false; | |
885689e4 TL |
1367 | } |
1368 | ||
1369 | void __init sev_es_init_vc_handling(void) | |
1370 | { | |
1371 | int cpu; | |
1372 | ||
1373 | BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE); | |
1374 | ||
6283f2ef | 1375 | if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) |
885689e4 TL |
1376 | return; |
1377 | ||
f5ed7775 MR |
1378 | if (!sev_es_check_cpu_features()) |
1379 | panic("SEV-ES CPU Features missing"); | |
1380 | ||
cbd3d4f7 BS |
1381 | /* |
1382 | * SNP is supported in v2 of the GHCB spec which mandates support for HV | |
1383 | * features. | |
1384 | */ | |
1385 | if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { | |
1386 | sev_hv_features = get_hv_features(); | |
1387 | ||
1388 | if (!(sev_hv_features & GHCB_HV_FT_SNP)) | |
1389 | sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); | |
1390 | } | |
1391 | ||
885689e4 TL |
1392 | /* Initialize per-cpu GHCB pages */ |
1393 | for_each_possible_cpu(cpu) { | |
1394 | alloc_runtime_data(cpu); | |
1395 | init_ghcb(cpu); | |
1396 | } | |
0786138c | 1397 | |
094794f5 JR |
1398 | sev_es_setup_play_dead(); |
1399 | ||
0786138c | 1400 | /* Secondary CPUs use the runtime #VC handler */ |
be1a5408 | 1401 | initial_vc_handler = (unsigned long)kernel_exc_vmm_communication; |
885689e4 TL |
1402 | } |
1403 | ||
1aa9aa8e JR |
1404 | static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) |
1405 | { | |
1406 | int trapnr = ctxt->fi.vector; | |
1407 | ||
1408 | if (trapnr == X86_TRAP_PF) | |
1409 | native_write_cr2(ctxt->fi.cr2); | |
1410 | ||
1411 | ctxt->regs->orig_ax = ctxt->fi.error_code; | |
1412 | do_early_exception(ctxt->regs, trapnr); | |
1413 | } | |
1414 | ||
479a7bf5 TL |
1415 | static long *vc_insn_get_rm(struct es_em_ctxt *ctxt) |
1416 | { | |
1417 | long *reg_array; | |
1418 | int offset; | |
1419 | ||
1420 | reg_array = (long *)ctxt->regs; | |
1421 | offset = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs); | |
1422 | ||
1423 | if (offset < 0) | |
1424 | return NULL; | |
1425 | ||
1426 | offset /= sizeof(long); | |
1427 | ||
1428 | return reg_array + offset; | |
1429 | } | |
51ee7d6e TL |
1430 | static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt, |
1431 | unsigned int bytes, bool read) | |
1432 | { | |
1433 | u64 exit_code, exit_info_1, exit_info_2; | |
1434 | unsigned long ghcb_pa = __pa(ghcb); | |
2411cd82 | 1435 | enum es_result res; |
51ee7d6e TL |
1436 | phys_addr_t paddr; |
1437 | void __user *ref; | |
1438 | ||
1439 | ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs); | |
1440 | if (ref == (void __user *)-1L) | |
1441 | return ES_UNSUPPORTED; | |
1442 | ||
1443 | exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE; | |
1444 | ||
2411cd82 JR |
1445 | res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr); |
1446 | if (res != ES_OK) { | |
1447 | if (res == ES_EXCEPTION && !read) | |
51ee7d6e TL |
1448 | ctxt->fi.error_code |= X86_PF_WRITE; |
1449 | ||
2411cd82 | 1450 | return res; |
51ee7d6e TL |
1451 | } |
1452 | ||
1453 | exit_info_1 = paddr; | |
1454 | /* Can never be greater than 8 */ | |
1455 | exit_info_2 = bytes; | |
1456 | ||
0ddfb1cf | 1457 | ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer)); |
51ee7d6e | 1458 | |
5bb6c1d1 | 1459 | return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2); |
51ee7d6e TL |
1460 | } |
1461 | ||
0118b604 JR |
1462 | /* |
1463 | * The MOVS instruction has two memory operands, which raises the | |
1464 | * problem that it is not known whether the access to the source or the | |
1465 | * destination caused the #VC exception (and hence whether an MMIO read | |
1466 | * or write operation needs to be emulated). | |
1467 | * | |
1468 | * Instead of playing games with walking page-tables and trying to guess | |
1469 | * whether the source or destination is an MMIO range, split the move | |
1470 | * into two operations, a read and a write with only one memory operand. | |
1471 | * This will cause a nested #VC exception on the MMIO address which can | |
1472 | * then be handled. | |
1473 | * | |
1474 | * This implementation has the benefit that it also supports MOVS where | |
1475 | * source _and_ destination are MMIO regions. | |
1476 | * | |
1477 | * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a | |
1478 | * rare operation. If it turns out to be a performance problem the split | |
1479 | * operations can be moved to memcpy_fromio() and memcpy_toio(). | |
1480 | */ | |
1481 | static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt, | |
1482 | unsigned int bytes) | |
1483 | { | |
1484 | unsigned long ds_base, es_base; | |
1485 | unsigned char *src, *dst; | |
1486 | unsigned char buffer[8]; | |
1487 | enum es_result ret; | |
1488 | bool rep; | |
1489 | int off; | |
1490 | ||
1491 | ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS); | |
1492 | es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES); | |
1493 | ||
1494 | if (ds_base == -1L || es_base == -1L) { | |
1495 | ctxt->fi.vector = X86_TRAP_GP; | |
1496 | ctxt->fi.error_code = 0; | |
1497 | return ES_EXCEPTION; | |
1498 | } | |
1499 | ||
1500 | src = ds_base + (unsigned char *)ctxt->regs->si; | |
1501 | dst = es_base + (unsigned char *)ctxt->regs->di; | |
1502 | ||
1503 | ret = vc_read_mem(ctxt, src, buffer, bytes); | |
1504 | if (ret != ES_OK) | |
1505 | return ret; | |
1506 | ||
1507 | ret = vc_write_mem(ctxt, dst, buffer, bytes); | |
1508 | if (ret != ES_OK) | |
1509 | return ret; | |
1510 | ||
1511 | if (ctxt->regs->flags & X86_EFLAGS_DF) | |
1512 | off = -bytes; | |
1513 | else | |
1514 | off = bytes; | |
1515 | ||
1516 | ctxt->regs->si += off; | |
1517 | ctxt->regs->di += off; | |
1518 | ||
1519 | rep = insn_has_rep_prefix(&ctxt->insn); | |
1520 | if (rep) | |
1521 | ctxt->regs->cx -= 1; | |
1522 | ||
1523 | if (!rep || ctxt->regs->cx == 0) | |
1524 | return ES_OK; | |
1525 | else | |
1526 | return ES_RETRY; | |
1527 | } | |
1528 | ||
c494eb36 | 1529 | static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) |
51ee7d6e TL |
1530 | { |
1531 | struct insn *insn = &ctxt->insn; | |
72bb8f8c | 1532 | enum insn_mmio_type mmio; |
51ee7d6e TL |
1533 | unsigned int bytes = 0; |
1534 | enum es_result ret; | |
c494eb36 | 1535 | u8 sign_byte; |
51ee7d6e TL |
1536 | long *reg_data; |
1537 | ||
c494eb36 | 1538 | mmio = insn_decode_mmio(insn, &bytes); |
72bb8f8c | 1539 | if (mmio == INSN_MMIO_DECODE_FAILED) |
c494eb36 | 1540 | return ES_DECODE_FAILED; |
51ee7d6e | 1541 | |
72bb8f8c | 1542 | if (mmio != INSN_MMIO_WRITE_IMM && mmio != INSN_MMIO_MOVS) { |
c494eb36 | 1543 | reg_data = insn_get_modrm_reg_ptr(insn, ctxt->regs); |
51ee7d6e TL |
1544 | if (!reg_data) |
1545 | return ES_DECODE_FAILED; | |
c494eb36 | 1546 | } |
51ee7d6e | 1547 | |
a37cd2a5 BPA |
1548 | if (user_mode(ctxt->regs)) |
1549 | return ES_UNSUPPORTED; | |
1550 | ||
c494eb36 | 1551 | switch (mmio) { |
72bb8f8c | 1552 | case INSN_MMIO_WRITE: |
51ee7d6e | 1553 | memcpy(ghcb->shared_buffer, reg_data, bytes); |
51ee7d6e TL |
1554 | ret = vc_do_mmio(ghcb, ctxt, bytes, false); |
1555 | break; | |
72bb8f8c | 1556 | case INSN_MMIO_WRITE_IMM: |
51ee7d6e | 1557 | memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes); |
51ee7d6e TL |
1558 | ret = vc_do_mmio(ghcb, ctxt, bytes, false); |
1559 | break; | |
72bb8f8c | 1560 | case INSN_MMIO_READ: |
51ee7d6e TL |
1561 | ret = vc_do_mmio(ghcb, ctxt, bytes, true); |
1562 | if (ret) | |
1563 | break; | |
1564 | ||
51ee7d6e TL |
1565 | /* Zero-extend for 32-bit operation */ |
1566 | if (bytes == 4) | |
1567 | *reg_data = 0; | |
1568 | ||
1569 | memcpy(reg_data, ghcb->shared_buffer, bytes); | |
1570 | break; | |
72bb8f8c | 1571 | case INSN_MMIO_READ_ZERO_EXTEND: |
c494eb36 KS |
1572 | ret = vc_do_mmio(ghcb, ctxt, bytes, true); |
1573 | if (ret) | |
1574 | break; | |
1575 | ||
1576 | /* Zero extend based on operand size */ | |
1577 | memset(reg_data, 0, insn->opnd_bytes); | |
1578 | memcpy(reg_data, ghcb->shared_buffer, bytes); | |
1579 | break; | |
72bb8f8c | 1580 | case INSN_MMIO_READ_SIGN_EXTEND: |
c494eb36 KS |
1581 | ret = vc_do_mmio(ghcb, ctxt, bytes, true); |
1582 | if (ret) | |
1583 | break; | |
51ee7d6e | 1584 | |
c494eb36 KS |
1585 | if (bytes == 1) { |
1586 | u8 *val = (u8 *)ghcb->shared_buffer; | |
0118b604 | 1587 | |
c494eb36 KS |
1588 | sign_byte = (*val & 0x80) ? 0xff : 0x00; |
1589 | } else { | |
1590 | u16 *val = (u16 *)ghcb->shared_buffer; | |
1591 | ||
1592 | sign_byte = (*val & 0x8000) ? 0xff : 0x00; | |
1593 | } | |
1594 | ||
1595 | /* Sign extend based on operand size */ | |
1596 | memset(reg_data, sign_byte, insn->opnd_bytes); | |
1597 | memcpy(reg_data, ghcb->shared_buffer, bytes); | |
0118b604 | 1598 | break; |
72bb8f8c | 1599 | case INSN_MMIO_MOVS: |
c494eb36 | 1600 | ret = vc_handle_mmio_movs(ctxt, bytes); |
51ee7d6e TL |
1601 | break; |
1602 | default: | |
1603 | ret = ES_UNSUPPORTED; | |
c494eb36 | 1604 | break; |
51ee7d6e TL |
1605 | } |
1606 | ||
1607 | return ret; | |
1608 | } | |
1609 | ||
479a7bf5 TL |
1610 | static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, |
1611 | struct es_em_ctxt *ctxt) | |
1612 | { | |
1613 | struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | |
1614 | long val, *reg = vc_insn_get_rm(ctxt); | |
1615 | enum es_result ret; | |
1616 | ||
e221804d AK |
1617 | if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) |
1618 | return ES_VMM_ERROR; | |
1619 | ||
479a7bf5 TL |
1620 | if (!reg) |
1621 | return ES_DECODE_FAILED; | |
1622 | ||
1623 | val = *reg; | |
1624 | ||
1625 | /* Upper 32 bits must be written as zeroes */ | |
1626 | if (val >> 32) { | |
1627 | ctxt->fi.vector = X86_TRAP_GP; | |
1628 | ctxt->fi.error_code = 0; | |
1629 | return ES_EXCEPTION; | |
1630 | } | |
1631 | ||
1632 | /* Clear out other reserved bits and set bit 10 */ | |
1633 | val = (val & 0xffff23ffL) | BIT(10); | |
1634 | ||
1635 | /* Early non-zero writes to DR7 are not supported */ | |
1636 | if (!data && (val & ~DR7_RESET_VALUE)) | |
1637 | return ES_UNSUPPORTED; | |
1638 | ||
1639 | /* Using a value of 0 for ExitInfo1 means RAX holds the value */ | |
1640 | ghcb_set_rax(ghcb, val); | |
5bb6c1d1 | 1641 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0); |
479a7bf5 TL |
1642 | if (ret != ES_OK) |
1643 | return ret; | |
1644 | ||
1645 | if (data) | |
1646 | data->dr7 = val; | |
1647 | ||
1648 | return ES_OK; | |
1649 | } | |
1650 | ||
1651 | static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, | |
1652 | struct es_em_ctxt *ctxt) | |
1653 | { | |
1654 | struct sev_es_runtime_data *data = this_cpu_read(runtime_data); | |
1655 | long *reg = vc_insn_get_rm(ctxt); | |
1656 | ||
e221804d AK |
1657 | if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) |
1658 | return ES_VMM_ERROR; | |
1659 | ||
479a7bf5 TL |
1660 | if (!reg) |
1661 | return ES_DECODE_FAILED; | |
1662 | ||
1663 | if (data) | |
1664 | *reg = data->dr7; | |
1665 | else | |
1666 | *reg = DR7_RESET_VALUE; | |
1667 | ||
1668 | return ES_OK; | |
1669 | } | |
1670 | ||
a14a92fc TL |
1671 | static enum es_result vc_handle_wbinvd(struct ghcb *ghcb, |
1672 | struct es_em_ctxt *ctxt) | |
1673 | { | |
5bb6c1d1 | 1674 | return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0); |
a14a92fc TL |
1675 | } |
1676 | ||
5d55cf78 TL |
1677 | static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt) |
1678 | { | |
1679 | enum es_result ret; | |
1680 | ||
1681 | ghcb_set_rcx(ghcb, ctxt->regs->cx); | |
1682 | ||
5bb6c1d1 | 1683 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0); |
5d55cf78 TL |
1684 | if (ret != ES_OK) |
1685 | return ret; | |
1686 | ||
1687 | if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb))) | |
1688 | return ES_VMM_ERROR; | |
1689 | ||
1690 | ctxt->regs->ax = ghcb->save.rax; | |
1691 | ctxt->regs->dx = ghcb->save.rdx; | |
1692 | ||
1693 | return ES_OK; | |
1694 | } | |
1695 | ||
0c2fd2ef TL |
1696 | static enum es_result vc_handle_monitor(struct ghcb *ghcb, |
1697 | struct es_em_ctxt *ctxt) | |
1698 | { | |
1699 | /* | |
1700 | * Treat it as a NOP and do not leak a physical address to the | |
1701 | * hypervisor. | |
1702 | */ | |
1703 | return ES_OK; | |
1704 | } | |
1705 | ||
ded476bb TL |
1706 | static enum es_result vc_handle_mwait(struct ghcb *ghcb, |
1707 | struct es_em_ctxt *ctxt) | |
1708 | { | |
1709 | /* Treat the same as MONITOR/MONITORX */ | |
1710 | return ES_OK; | |
1711 | } | |
1712 | ||
2eb7dcf0 TL |
1713 | static enum es_result vc_handle_vmmcall(struct ghcb *ghcb, |
1714 | struct es_em_ctxt *ctxt) | |
1715 | { | |
1716 | enum es_result ret; | |
1717 | ||
1718 | ghcb_set_rax(ghcb, ctxt->regs->ax); | |
1719 | ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0); | |
1720 | ||
f6a9f8a4 JR |
1721 | if (x86_platform.hyper.sev_es_hcall_prepare) |
1722 | x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs); | |
1723 | ||
5bb6c1d1 | 1724 | ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0); |
2eb7dcf0 TL |
1725 | if (ret != ES_OK) |
1726 | return ret; | |
1727 | ||
1728 | if (!ghcb_rax_is_valid(ghcb)) | |
1729 | return ES_VMM_ERROR; | |
1730 | ||
1731 | ctxt->regs->ax = ghcb->save.rax; | |
1732 | ||
f6a9f8a4 JR |
1733 | /* |
1734 | * Call sev_es_hcall_finish() after regs->ax is already set. | |
1735 | * This allows the hypervisor handler to overwrite it again if | |
1736 | * necessary. | |
1737 | */ | |
1738 | if (x86_platform.hyper.sev_es_hcall_finish && | |
1739 | !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs)) | |
1740 | return ES_VMM_ERROR; | |
1741 | ||
2eb7dcf0 TL |
1742 | return ES_OK; |
1743 | } | |
1744 | ||
a2d0171a JR |
1745 | static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, |
1746 | struct es_em_ctxt *ctxt) | |
1747 | { | |
1748 | /* | |
1749 | * Calling ecx_alignment_check() directly does not work, because it | |
1750 | * enables IRQs and the GHCB is active. Forward the exception and call | |
1751 | * it later from vc_forward_exception(). | |
1752 | */ | |
1753 | ctxt->fi.vector = X86_TRAP_AC; | |
1754 | ctxt->fi.error_code = 0; | |
1755 | return ES_EXCEPTION; | |
1756 | } | |
1757 | ||
1aa9aa8e JR |
1758 | static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, |
1759 | struct ghcb *ghcb, | |
1760 | unsigned long exit_code) | |
1761 | { | |
e3ef461a BPA |
1762 | enum es_result result = vc_check_opcode_bytes(ctxt, exit_code); |
1763 | ||
1764 | if (result != ES_OK) | |
1765 | return result; | |
1aa9aa8e JR |
1766 | |
1767 | switch (exit_code) { | |
479a7bf5 TL |
1768 | case SVM_EXIT_READ_DR7: |
1769 | result = vc_handle_dr7_read(ghcb, ctxt); | |
1770 | break; | |
1771 | case SVM_EXIT_WRITE_DR7: | |
1772 | result = vc_handle_dr7_write(ghcb, ctxt); | |
1773 | break; | |
a2d0171a JR |
1774 | case SVM_EXIT_EXCP_BASE + X86_TRAP_AC: |
1775 | result = vc_handle_trap_ac(ghcb, ctxt); | |
1776 | break; | |
4711e7ac TL |
1777 | case SVM_EXIT_RDTSC: |
1778 | case SVM_EXIT_RDTSCP: | |
1779 | result = vc_handle_rdtsc(ghcb, ctxt, exit_code); | |
1780 | break; | |
5d55cf78 TL |
1781 | case SVM_EXIT_RDPMC: |
1782 | result = vc_handle_rdpmc(ghcb, ctxt); | |
1783 | break; | |
8b4ce837 TL |
1784 | case SVM_EXIT_INVD: |
1785 | pr_err_ratelimited("#VC exception for INVD??? Seriously???\n"); | |
1786 | result = ES_UNSUPPORTED; | |
1787 | break; | |
d3529bb7 JR |
1788 | case SVM_EXIT_CPUID: |
1789 | result = vc_handle_cpuid(ghcb, ctxt); | |
1790 | break; | |
1791 | case SVM_EXIT_IOIO: | |
1792 | result = vc_handle_ioio(ghcb, ctxt); | |
1793 | break; | |
a4afa608 TL |
1794 | case SVM_EXIT_MSR: |
1795 | result = vc_handle_msr(ghcb, ctxt); | |
1796 | break; | |
2eb7dcf0 TL |
1797 | case SVM_EXIT_VMMCALL: |
1798 | result = vc_handle_vmmcall(ghcb, ctxt); | |
1799 | break; | |
a14a92fc TL |
1800 | case SVM_EXIT_WBINVD: |
1801 | result = vc_handle_wbinvd(ghcb, ctxt); | |
1802 | break; | |
0c2fd2ef TL |
1803 | case SVM_EXIT_MONITOR: |
1804 | result = vc_handle_monitor(ghcb, ctxt); | |
1805 | break; | |
ded476bb TL |
1806 | case SVM_EXIT_MWAIT: |
1807 | result = vc_handle_mwait(ghcb, ctxt); | |
1808 | break; | |
51ee7d6e TL |
1809 | case SVM_EXIT_NPF: |
1810 | result = vc_handle_mmio(ghcb, ctxt); | |
1811 | break; | |
1aa9aa8e JR |
1812 | default: |
1813 | /* | |
1814 | * Unexpected #VC exception | |
1815 | */ | |
1816 | result = ES_UNSUPPORTED; | |
1817 | } | |
1818 | ||
1819 | return result; | |
1820 | } | |
1821 | ||
0786138c TL |
1822 | static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt) |
1823 | { | |
1824 | long error_code = ctxt->fi.error_code; | |
1825 | int trapnr = ctxt->fi.vector; | |
1826 | ||
1827 | ctxt->regs->orig_ax = ctxt->fi.error_code; | |
1828 | ||
1829 | switch (trapnr) { | |
1830 | case X86_TRAP_GP: | |
1831 | exc_general_protection(ctxt->regs, error_code); | |
1832 | break; | |
1833 | case X86_TRAP_UD: | |
1834 | exc_invalid_op(ctxt->regs); | |
1835 | break; | |
c25bbdb5 JR |
1836 | case X86_TRAP_PF: |
1837 | write_cr2(ctxt->fi.cr2); | |
1838 | exc_page_fault(ctxt->regs, error_code); | |
1839 | break; | |
a2d0171a JR |
1840 | case X86_TRAP_AC: |
1841 | exc_alignment_check(ctxt->regs, error_code); | |
1842 | break; | |
0786138c TL |
1843 | default: |
1844 | pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n"); | |
1845 | BUG(); | |
1846 | } | |
1847 | } | |
1848 | ||
ce47d0c0 | 1849 | static __always_inline bool is_vc2_stack(unsigned long sp) |
0786138c | 1850 | { |
0786138c TL |
1851 | return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); |
1852 | } | |
1853 | ||
ce47d0c0 JR |
1854 | static __always_inline bool vc_from_invalid_context(struct pt_regs *regs) |
1855 | { | |
1856 | unsigned long sp, prev_sp; | |
1857 | ||
1858 | sp = (unsigned long)regs; | |
1859 | prev_sp = regs->sp; | |
1860 | ||
1861 | /* | |
1862 | * If the code was already executing on the VC2 stack when the #VC | |
1863 | * happened, let it proceed to the normal handling routine. This way the | |
1864 | * code executing on the VC2 stack can cause #VC exceptions to get handled. | |
1865 | */ | |
1866 | return is_vc2_stack(sp) && !is_vc2_stack(prev_sp); | |
1867 | } | |
1868 | ||
be1a5408 | 1869 | static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code) |
0786138c | 1870 | { |
0786138c TL |
1871 | struct ghcb_state state; |
1872 | struct es_em_ctxt ctxt; | |
1873 | enum es_result result; | |
1874 | struct ghcb *ghcb; | |
be1a5408 | 1875 | bool ret = true; |
0786138c | 1876 | |
d187f217 | 1877 | ghcb = __sev_get_ghcb(&state); |
0786138c TL |
1878 | |
1879 | vc_ghcb_invalidate(ghcb); | |
1880 | result = vc_init_em_ctxt(&ctxt, regs, error_code); | |
1881 | ||
1882 | if (result == ES_OK) | |
1883 | result = vc_handle_exitcode(&ctxt, ghcb, error_code); | |
1884 | ||
d187f217 | 1885 | __sev_put_ghcb(&state); |
0786138c TL |
1886 | |
1887 | /* Done - now check the result */ | |
1888 | switch (result) { | |
1889 | case ES_OK: | |
1890 | vc_finish_insn(&ctxt); | |
1891 | break; | |
1892 | case ES_UNSUPPORTED: | |
4aca2d99 | 1893 | pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n", |
0786138c | 1894 | error_code, regs->ip); |
be1a5408 JR |
1895 | ret = false; |
1896 | break; | |
0786138c TL |
1897 | case ES_VMM_ERROR: |
1898 | pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | |
1899 | error_code, regs->ip); | |
be1a5408 JR |
1900 | ret = false; |
1901 | break; | |
0786138c TL |
1902 | case ES_DECODE_FAILED: |
1903 | pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | |
1904 | error_code, regs->ip); | |
be1a5408 JR |
1905 | ret = false; |
1906 | break; | |
0786138c TL |
1907 | case ES_EXCEPTION: |
1908 | vc_forward_exception(&ctxt); | |
1909 | break; | |
1910 | case ES_RETRY: | |
1911 | /* Nothing to do */ | |
1912 | break; | |
1913 | default: | |
1914 | pr_emerg("Unknown result in %s():%d\n", __func__, result); | |
1915 | /* | |
1916 | * Emulating the instruction which caused the #VC exception | |
1917 | * failed - can't continue so print debug information | |
1918 | */ | |
1919 | BUG(); | |
1920 | } | |
1921 | ||
be1a5408 JR |
1922 | return ret; |
1923 | } | |
0786138c | 1924 | |
be1a5408 JR |
1925 | static __always_inline bool vc_is_db(unsigned long error_code) |
1926 | { | |
1927 | return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB; | |
1928 | } | |
0786138c | 1929 | |
be1a5408 JR |
1930 | /* |
1931 | * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode | |
1932 | * and will panic when an error happens. | |
1933 | */ | |
1934 | DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication) | |
1935 | { | |
1936 | irqentry_state_t irq_state; | |
1937 | ||
1938 | /* | |
1939 | * With the current implementation it is always possible to switch to a | |
1940 | * safe stack because #VC exceptions only happen at known places, like | |
1941 | * intercepted instructions or accesses to MMIO areas/IO ports. They can | |
1942 | * also happen with code instrumentation when the hypervisor intercepts | |
1943 | * #DB, but the critical paths are forbidden to be instrumented, so #DB | |
1944 | * exceptions currently also only happen in safe places. | |
1945 | * | |
1946 | * But keep this here in case the noinstr annotations are violated due | |
1947 | * to bug elsewhere. | |
1948 | */ | |
ce47d0c0 | 1949 | if (unlikely(vc_from_invalid_context(regs))) { |
be1a5408 JR |
1950 | instrumentation_begin(); |
1951 | panic("Can't handle #VC exception from unsupported context\n"); | |
1952 | instrumentation_end(); | |
1953 | } | |
0786138c | 1954 | |
be1a5408 JR |
1955 | /* |
1956 | * Handle #DB before calling into !noinstr code to avoid recursive #DB. | |
1957 | */ | |
1958 | if (vc_is_db(error_code)) { | |
1959 | exc_debug(regs); | |
1960 | return; | |
1961 | } | |
1962 | ||
1963 | irq_state = irqentry_nmi_enter(regs); | |
1964 | ||
1965 | instrumentation_begin(); | |
1966 | ||
1967 | if (!vc_raw_handle_exception(regs, error_code)) { | |
0786138c TL |
1968 | /* Show some debug info */ |
1969 | show_regs(regs); | |
1970 | ||
1971 | /* Ask hypervisor to sev_es_terminate */ | |
6c0f74d6 | 1972 | sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); |
0786138c TL |
1973 | |
1974 | /* If that fails and we get here - just panic */ | |
1975 | panic("Returned from Terminate-Request to Hypervisor\n"); | |
1976 | } | |
1977 | ||
be1a5408 JR |
1978 | instrumentation_end(); |
1979 | irqentry_nmi_exit(regs, irq_state); | |
0786138c TL |
1980 | } |
1981 | ||
be1a5408 JR |
1982 | /* |
1983 | * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode | |
1984 | * and will kill the current task with SIGBUS when an error happens. | |
1985 | */ | |
1986 | DEFINE_IDTENTRY_VC_USER(exc_vmm_communication) | |
0786138c | 1987 | { |
be1a5408 JR |
1988 | /* |
1989 | * Handle #DB before calling into !noinstr code to avoid recursive #DB. | |
1990 | */ | |
1991 | if (vc_is_db(error_code)) { | |
1992 | noist_exc_debug(regs); | |
1993 | return; | |
1994 | } | |
1995 | ||
1996 | irqentry_enter_from_user_mode(regs); | |
0786138c | 1997 | instrumentation_begin(); |
0786138c | 1998 | |
be1a5408 JR |
1999 | if (!vc_raw_handle_exception(regs, error_code)) { |
2000 | /* | |
2001 | * Do not kill the machine if user-space triggered the | |
2002 | * exception. Send SIGBUS instead and let user-space deal with | |
2003 | * it. | |
2004 | */ | |
2005 | force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0); | |
2006 | } | |
2007 | ||
2008 | instrumentation_end(); | |
2009 | irqentry_exit_to_user_mode(regs); | |
0786138c TL |
2010 | } |
2011 | ||
1aa9aa8e JR |
2012 | bool __init handle_vc_boot_ghcb(struct pt_regs *regs) |
2013 | { | |
2014 | unsigned long exit_code = regs->orig_ax; | |
2015 | struct es_em_ctxt ctxt; | |
2016 | enum es_result result; | |
2017 | ||
1aa9aa8e JR |
2018 | vc_ghcb_invalidate(boot_ghcb); |
2019 | ||
2020 | result = vc_init_em_ctxt(&ctxt, regs, exit_code); | |
2021 | if (result == ES_OK) | |
2022 | result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code); | |
2023 | ||
2024 | /* Done - now check the result */ | |
2025 | switch (result) { | |
2026 | case ES_OK: | |
2027 | vc_finish_insn(&ctxt); | |
2028 | break; | |
2029 | case ES_UNSUPPORTED: | |
2030 | early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", | |
2031 | exit_code, regs->ip); | |
2032 | goto fail; | |
2033 | case ES_VMM_ERROR: | |
2034 | early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", | |
2035 | exit_code, regs->ip); | |
2036 | goto fail; | |
2037 | case ES_DECODE_FAILED: | |
2038 | early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", | |
2039 | exit_code, regs->ip); | |
2040 | goto fail; | |
2041 | case ES_EXCEPTION: | |
2042 | vc_early_forward_exception(&ctxt); | |
2043 | break; | |
2044 | case ES_RETRY: | |
2045 | /* Nothing to do */ | |
2046 | break; | |
2047 | default: | |
2048 | BUG(); | |
2049 | } | |
2050 | ||
2051 | return true; | |
2052 | ||
2053 | fail: | |
2054 | show_regs(regs); | |
2055 | ||
e720ea52 | 2056 | sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); |
1aa9aa8e | 2057 | } |
b190a043 MR |
2058 | |
2059 | /* | |
2060 | * Initial set up of SNP relies on information provided by the | |
2061 | * Confidential Computing blob, which can be passed to the kernel | |
2062 | * in the following ways, depending on how it is booted: | |
2063 | * | |
2064 | * - when booted via the boot/decompress kernel: | |
2065 | * - via boot_params | |
2066 | * | |
2067 | * - when booted directly by firmware/bootloader (e.g. CONFIG_PVH): | |
2068 | * - via a setup_data entry, as defined by the Linux Boot Protocol | |
2069 | * | |
2070 | * Scan for the blob in that order. | |
2071 | */ | |
428080c9 | 2072 | static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp) |
b190a043 MR |
2073 | { |
2074 | struct cc_blob_sev_info *cc_info; | |
2075 | ||
2076 | /* Boot kernel would have passed the CC blob via boot_params. */ | |
2077 | if (bp->cc_blob_address) { | |
2078 | cc_info = (struct cc_blob_sev_info *)(unsigned long)bp->cc_blob_address; | |
2079 | goto found_cc_info; | |
2080 | } | |
2081 | ||
2082 | /* | |
2083 | * If kernel was booted directly, without the use of the | |
2084 | * boot/decompression kernel, the CC blob may have been passed via | |
2085 | * setup_data instead. | |
2086 | */ | |
2087 | cc_info = find_cc_blob_setup_data(bp); | |
2088 | if (!cc_info) | |
2089 | return NULL; | |
2090 | ||
2091 | found_cc_info: | |
2092 | if (cc_info->magic != CC_BLOB_SEV_HDR_MAGIC) | |
2093 | snp_abort(); | |
2094 | ||
2095 | return cc_info; | |
2096 | } | |
2097 | ||
428080c9 | 2098 | bool __head snp_init(struct boot_params *bp) |
b190a043 MR |
2099 | { |
2100 | struct cc_blob_sev_info *cc_info; | |
2101 | ||
2102 | if (!bp) | |
2103 | return false; | |
2104 | ||
2105 | cc_info = find_cc_blob(bp); | |
2106 | if (!cc_info) | |
2107 | return false; | |
2108 | ||
30612045 MR |
2109 | setup_cpuid_table(cc_info); |
2110 | ||
b190a043 MR |
2111 | /* |
2112 | * The CC blob will be used later to access the secrets page. Cache | |
2113 | * it here like the boot kernel does. | |
2114 | */ | |
2115 | bp->cc_blob_address = (u32)(unsigned long)cc_info; | |
2116 | ||
2117 | return true; | |
2118 | } | |
2119 | ||
428080c9 | 2120 | void __head __noreturn snp_abort(void) |
b190a043 MR |
2121 | { |
2122 | sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); | |
2123 | } | |
30612045 | 2124 | |
0f4a1e80 KL |
2125 | /* |
2126 | * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are | |
2127 | * enabled, as the alternative (fallback) logic for DMI probing in the legacy | |
2128 | * ROM region can cause a crash since this region is not pre-validated. | |
2129 | */ | |
2130 | void __init snp_dmi_setup(void) | |
2131 | { | |
2132 | if (efi_enabled(EFI_CONFIG_TABLES)) | |
2133 | dmi_setup(); | |
2134 | } | |
2135 | ||
ba37a143 MR |
2136 | static void dump_cpuid_table(void) |
2137 | { | |
2138 | const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); | |
2139 | int i = 0; | |
2140 | ||
2141 | pr_info("count=%d reserved=0x%x reserved2=0x%llx\n", | |
2142 | cpuid_table->count, cpuid_table->__reserved1, cpuid_table->__reserved2); | |
2143 | ||
2144 | for (i = 0; i < SNP_CPUID_COUNT_MAX; i++) { | |
2145 | const struct snp_cpuid_fn *fn = &cpuid_table->fn[i]; | |
2146 | ||
2147 | pr_info("index=%3d fn=0x%08x subfn=0x%08x: eax=0x%08x ebx=0x%08x ecx=0x%08x edx=0x%08x xcr0_in=0x%016llx xss_in=0x%016llx reserved=0x%016llx\n", | |
2148 | i, fn->eax_in, fn->ecx_in, fn->eax, fn->ebx, fn->ecx, | |
2149 | fn->edx, fn->xcr0_in, fn->xss_in, fn->__reserved); | |
2150 | } | |
2151 | } | |
2152 | ||
30612045 MR |
2153 | /* |
2154 | * It is useful from an auditing/testing perspective to provide an easy way | |
2155 | * for the guest owner to know that the CPUID table has been initialized as | |
2156 | * expected, but that initialization happens too early in boot to print any | |
2157 | * sort of indicator, and there's not really any other good place to do it, | |
2158 | * so do it here. | |
2159 | */ | |
2160 | static int __init report_cpuid_table(void) | |
2161 | { | |
2162 | const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); | |
2163 | ||
2164 | if (!cpuid_table->count) | |
2165 | return 0; | |
2166 | ||
2167 | pr_info("Using SNP CPUID table, %d entries present.\n", | |
2168 | cpuid_table->count); | |
2169 | ||
ba37a143 MR |
2170 | if (sev_cfg.debug) |
2171 | dump_cpuid_table(); | |
2172 | ||
30612045 MR |
2173 | return 0; |
2174 | } | |
2175 | arch_initcall(report_cpuid_table); | |
ba37a143 MR |
2176 | |
2177 | static int __init init_sev_config(char *str) | |
2178 | { | |
2179 | char *s; | |
2180 | ||
2181 | while ((s = strsep(&str, ","))) { | |
2182 | if (!strcmp(s, "debug")) { | |
2183 | sev_cfg.debug = true; | |
2184 | continue; | |
2185 | } | |
2186 | ||
2187 | pr_info("SEV command-line option '%s' was not recognized\n", s); | |
2188 | } | |
2189 | ||
2190 | return 1; | |
2191 | } | |
2192 | __setup("sev=", init_sev_config); | |
d5af44dd | 2193 | |
0144e3b8 | 2194 | int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) |
d5af44dd BS |
2195 | { |
2196 | struct ghcb_state state; | |
2197 | struct es_em_ctxt ctxt; | |
2198 | unsigned long flags; | |
2199 | struct ghcb *ghcb; | |
2200 | int ret; | |
2201 | ||
0144e3b8 | 2202 | rio->exitinfo2 = SEV_RET_NO_FW_CALL; |
d5af44dd BS |
2203 | |
2204 | /* | |
2205 | * __sev_get_ghcb() needs to run with IRQs disabled because it is using | |
2206 | * a per-CPU GHCB. | |
2207 | */ | |
2208 | local_irq_save(flags); | |
2209 | ||
2210 | ghcb = __sev_get_ghcb(&state); | |
2211 | if (!ghcb) { | |
2212 | ret = -EIO; | |
2213 | goto e_restore_irq; | |
2214 | } | |
2215 | ||
2216 | vc_ghcb_invalidate(ghcb); | |
2217 | ||
2218 | if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { | |
2219 | ghcb_set_rax(ghcb, input->data_gpa); | |
2220 | ghcb_set_rbx(ghcb, input->data_npages); | |
2221 | } | |
2222 | ||
5bb6c1d1 | 2223 | ret = sev_es_ghcb_hv_call(ghcb, &ctxt, exit_code, input->req_gpa, input->resp_gpa); |
d5af44dd BS |
2224 | if (ret) |
2225 | goto e_put; | |
2226 | ||
0144e3b8 DG |
2227 | rio->exitinfo2 = ghcb->save.sw_exit_info_2; |
2228 | switch (rio->exitinfo2) { | |
fa4ae42c BPA |
2229 | case 0: |
2230 | break; | |
2231 | ||
0144e3b8 | 2232 | case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_BUSY): |
72f7754d DG |
2233 | ret = -EAGAIN; |
2234 | break; | |
2235 | ||
0144e3b8 | 2236 | case SNP_GUEST_VMM_ERR(SNP_GUEST_VMM_ERR_INVALID_LEN): |
d5af44dd | 2237 | /* Number of expected pages are returned in RBX */ |
fa4ae42c | 2238 | if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) { |
d5af44dd | 2239 | input->data_npages = ghcb_get_rbx(ghcb); |
970ab823 | 2240 | ret = -ENOSPC; |
fa4ae42c | 2241 | break; |
970ab823 | 2242 | } |
fa4ae42c BPA |
2243 | fallthrough; |
2244 | default: | |
2245 | ret = -EIO; | |
2246 | break; | |
d5af44dd BS |
2247 | } |
2248 | ||
2249 | e_put: | |
2250 | __sev_put_ghcb(&state); | |
2251 | e_restore_irq: | |
2252 | local_irq_restore(flags); | |
2253 | ||
2254 | return ret; | |
2255 | } | |
2256 | EXPORT_SYMBOL_GPL(snp_issue_guest_request); | |
3a45b375 | 2257 | |
2bf93ffb TL |
2258 | static struct platform_device sev_guest_device = { |
2259 | .name = "sev-guest", | |
3a45b375 BS |
2260 | .id = -1, |
2261 | }; | |
2262 | ||
3a45b375 BS |
2263 | static int __init snp_init_platform_device(void) |
2264 | { | |
2bf93ffb | 2265 | struct sev_guest_platform_data data; |
3a45b375 BS |
2266 | u64 gpa; |
2267 | ||
2268 | if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) | |
2269 | return -ENODEV; | |
2270 | ||
2271 | gpa = get_secrets_page(); | |
2272 | if (!gpa) | |
2273 | return -ENODEV; | |
2274 | ||
2275 | data.secrets_gpa = gpa; | |
2bf93ffb | 2276 | if (platform_device_add_data(&sev_guest_device, &data, sizeof(data))) |
3a45b375 BS |
2277 | return -ENODEV; |
2278 | ||
2bf93ffb | 2279 | if (platform_device_register(&sev_guest_device)) |
3a45b375 BS |
2280 | return -ENODEV; |
2281 | ||
2282 | pr_info("SNP guest platform device initialized.\n"); | |
2283 | return 0; | |
2284 | } | |
2285 | device_initcall(snp_init_platform_device); | |
8ef97958 | 2286 | |
d7b69b59 BPA |
2287 | void sev_show_status(void) |
2288 | { | |
2289 | int i; | |
2290 | ||
2291 | pr_info("Status: "); | |
2292 | for (i = 0; i < MSR_AMD64_SNP_RESV_BIT; i++) { | |
2293 | if (sev_status & BIT_ULL(i)) { | |
2294 | if (!sev_status_feat_names[i]) | |
2295 | continue; | |
2296 | ||
2297 | pr_cont("%s ", sev_status_feat_names[i]); | |
2298 | } | |
2299 | } | |
2300 | pr_cont("\n"); | |
2301 | } |