Commit | Line | Data |
---|---|---|
59bd54a8 KS |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (C) 2021-2022 Intel Corporation */ | |
3 | ||
4 | #undef pr_fmt | |
5 | #define pr_fmt(fmt) "tdx: " fmt | |
6 | ||
7 | #include <linux/cpufeature.h> | |
41394e33 | 8 | #include <asm/coco.h> |
59bd54a8 | 9 | #include <asm/tdx.h> |
bfe6ed0c | 10 | #include <asm/vmx.h> |
31d58c4e KS |
11 | #include <asm/insn.h> |
12 | #include <asm/insn-eval.h> | |
7dbde763 | 13 | #include <asm/pgtable.h> |
59bd54a8 | 14 | |
41394e33 KS |
15 | /* TDX module Call Leaf IDs */ |
16 | #define TDX_GET_INFO 1 | |
9a22bf6d | 17 | #define TDX_GET_VEINFO 3 |
7dbde763 KS |
18 | #define TDX_ACCEPT_PAGE 6 |
19 | ||
20 | /* TDX hypercall Leaf IDs */ | |
21 | #define TDVMCALL_MAP_GPA 0x10001 | |
41394e33 | 22 | |
31d58c4e KS |
23 | /* MMIO direction */ |
24 | #define EPT_READ 0 | |
25 | #define EPT_WRITE 1 | |
26 | ||
03149948 KS |
27 | /* Port I/O direction */ |
28 | #define PORT_READ 0 | |
29 | #define PORT_WRITE 1 | |
30 | ||
31 | /* See Exit Qualification for I/O Instructions in VMX documentation */ | |
32 | #define VE_IS_IO_IN(e) ((e) & BIT(3)) | |
33 | #define VE_GET_IO_SIZE(e) (((e) & GENMASK(2, 0)) + 1) | |
34 | #define VE_GET_PORT_NUM(e) ((e) >> 16) | |
35 | #define VE_IS_IO_STRING(e) ((e) & BIT(4)) | |
36 | ||
373e715e KS |
37 | #define ATTR_SEPT_VE_DISABLE BIT(28) |
38 | ||
eb94f1b6 KS |
39 | /* |
40 | * Wrapper for standard use of __tdx_hypercall with no output aside from | |
41 | * return code. | |
42 | */ | |
43 | static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) | |
44 | { | |
45 | struct tdx_hypercall_args args = { | |
46 | .r10 = TDX_HYPERCALL_STANDARD, | |
47 | .r11 = fn, | |
48 | .r12 = r12, | |
49 | .r13 = r13, | |
50 | .r14 = r14, | |
51 | .r15 = r15, | |
52 | }; | |
53 | ||
54 | return __tdx_hypercall(&args, 0); | |
55 | } | |
56 | ||
57 | /* Called from __tdx_hypercall() for unrecoverable failure */ | |
58 | void __tdx_hypercall_failed(void) | |
59 | { | |
60 | panic("TDVMCALL failed. TDX module bug?"); | |
61 | } | |
62 | ||
bfe6ed0c KS |
63 | /* |
64 | * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined | |
65 | * independently from but are currently matched 1:1 with VMX EXIT_REASONs. | |
66 | * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and | |
67 | * guest sides of these calls. | |
68 | */ | |
69 | static u64 hcall_func(u64 exit_reason) | |
70 | { | |
71 | return exit_reason; | |
72 | } | |
73 | ||
cfb8ec7a KS |
74 | #ifdef CONFIG_KVM_GUEST |
75 | long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, | |
76 | unsigned long p3, unsigned long p4) | |
77 | { | |
78 | struct tdx_hypercall_args args = { | |
79 | .r10 = nr, | |
80 | .r11 = p1, | |
81 | .r12 = p2, | |
82 | .r13 = p3, | |
83 | .r14 = p4, | |
84 | }; | |
85 | ||
86 | return __tdx_hypercall(&args, 0); | |
87 | } | |
88 | EXPORT_SYMBOL_GPL(tdx_kvm_hypercall); | |
89 | #endif | |
90 | ||
41394e33 KS |
91 | /* |
92 | * Used for TDX guests to make calls directly to the TD module. This | |
93 | * should only be used for calls that have no legitimate reason to fail | |
94 | * or where the kernel can not survive the call failing. | |
95 | */ | |
96 | static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, | |
97 | struct tdx_module_output *out) | |
98 | { | |
99 | if (__tdx_module_call(fn, rcx, rdx, r8, r9, out)) | |
100 | panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); | |
101 | } | |
102 | ||
a6dd6f39 | 103 | static void tdx_parse_tdinfo(u64 *cc_mask) |
41394e33 KS |
104 | { |
105 | struct tdx_module_output out; | |
106 | unsigned int gpa_width; | |
373e715e | 107 | u64 td_attr; |
41394e33 KS |
108 | |
109 | /* | |
110 | * TDINFO TDX module call is used to get the TD execution environment | |
111 | * information like GPA width, number of available vcpus, debug mode | |
112 | * information, etc. More details about the ABI can be found in TDX | |
113 | * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL | |
114 | * [TDG.VP.INFO]. | |
41394e33 KS |
115 | */ |
116 | tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out); | |
117 | ||
41394e33 KS |
118 | /* |
119 | * The highest bit of a guest physical address is the "sharing" bit. | |
120 | * Set it for shared pages and clear it for private pages. | |
373e715e KS |
121 | * |
122 | * The GPA width that comes out of this call is critical. TDX guests | |
123 | * can not meaningfully run without it. | |
41394e33 | 124 | */ |
373e715e | 125 | gpa_width = out.rcx & GENMASK(5, 0); |
a6dd6f39 | 126 | *cc_mask = BIT_ULL(gpa_width - 1); |
373e715e KS |
127 | |
128 | /* | |
129 | * The kernel can not handle #VE's when accessing normal kernel | |
130 | * memory. Ensure that no #VE will be delivered for accesses to | |
131 | * TD-private memory. Only VMM-shared memory (MMIO) will #VE. | |
132 | */ | |
133 | td_attr = out.rdx; | |
134 | if (!(td_attr & ATTR_SEPT_VE_DISABLE)) | |
135 | panic("TD misconfiguration: SEPT_VE_DISABLE attibute must be set.\n"); | |
41394e33 KS |
136 | } |
137 | ||
cdd85786 KS |
138 | /* |
139 | * The TDX module spec states that #VE may be injected for a limited set of | |
140 | * reasons: | |
141 | * | |
142 | * - Emulation of the architectural #VE injection on EPT violation; | |
143 | * | |
144 | * - As a result of guest TD execution of a disallowed instruction, | |
145 | * a disallowed MSR access, or CPUID virtualization; | |
146 | * | |
147 | * - A notification to the guest TD about anomalous behavior; | |
148 | * | |
149 | * The last one is opt-in and is not used by the kernel. | |
150 | * | |
151 | * The Intel Software Developer's Manual describes cases when instruction | |
152 | * length field can be used in section "Information for VM Exits Due to | |
153 | * Instruction Execution". | |
154 | * | |
155 | * For TDX, it ultimately means GET_VEINFO provides reliable instruction length | |
156 | * information if #VE occurred due to instruction execution, but not for EPT | |
157 | * violations. | |
158 | */ | |
159 | static int ve_instr_len(struct ve_info *ve) | |
160 | { | |
161 | switch (ve->exit_reason) { | |
162 | case EXIT_REASON_HLT: | |
163 | case EXIT_REASON_MSR_READ: | |
164 | case EXIT_REASON_MSR_WRITE: | |
165 | case EXIT_REASON_CPUID: | |
166 | case EXIT_REASON_IO_INSTRUCTION: | |
167 | /* It is safe to use ve->instr_len for #VE due instructions */ | |
168 | return ve->instr_len; | |
169 | case EXIT_REASON_EPT_VIOLATION: | |
170 | /* | |
171 | * For EPT violations, ve->insn_len is not defined. For those, | |
172 | * the kernel must decode instructions manually and should not | |
173 | * be using this function. | |
174 | */ | |
175 | WARN_ONCE(1, "ve->instr_len is not defined for EPT violations"); | |
176 | return 0; | |
177 | default: | |
178 | WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason); | |
179 | return ve->instr_len; | |
180 | } | |
181 | } | |
182 | ||
bfe6ed0c KS |
183 | static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) |
184 | { | |
185 | struct tdx_hypercall_args args = { | |
186 | .r10 = TDX_HYPERCALL_STANDARD, | |
187 | .r11 = hcall_func(EXIT_REASON_HLT), | |
188 | .r12 = irq_disabled, | |
189 | }; | |
190 | ||
191 | /* | |
192 | * Emulate HLT operation via hypercall. More info about ABI | |
193 | * can be found in TDX Guest-Host-Communication Interface | |
194 | * (GHCI), section 3.8 TDG.VP.VMCALL<Instruction.HLT>. | |
195 | * | |
196 | * The VMM uses the "IRQ disabled" param to understand IRQ | |
197 | * enabled status (RFLAGS.IF) of the TD guest and to determine | |
198 | * whether or not it should schedule the halted vCPU if an | |
199 | * IRQ becomes pending. E.g. if IRQs are disabled, the VMM | |
200 | * can keep the vCPU in virtual HLT, even if an IRQ is | |
201 | * pending, without hanging/breaking the guest. | |
202 | */ | |
203 | return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0); | |
204 | } | |
205 | ||
cdd85786 | 206 | static int handle_halt(struct ve_info *ve) |
bfe6ed0c KS |
207 | { |
208 | /* | |
209 | * Since non safe halt is mainly used in CPU offlining | |
210 | * and the guest will always stay in the halt state, don't | |
211 | * call the STI instruction (set do_sti as false). | |
212 | */ | |
213 | const bool irq_disabled = irqs_disabled(); | |
214 | const bool do_sti = false; | |
215 | ||
216 | if (__halt(irq_disabled, do_sti)) | |
cdd85786 | 217 | return -EIO; |
bfe6ed0c | 218 | |
cdd85786 | 219 | return ve_instr_len(ve); |
bfe6ed0c KS |
220 | } |
221 | ||
222 | void __cpuidle tdx_safe_halt(void) | |
223 | { | |
224 | /* | |
225 | * For do_sti=true case, __tdx_hypercall() function enables | |
226 | * interrupts using the STI instruction before the TDCALL. So | |
227 | * set irq_disabled as false. | |
228 | */ | |
229 | const bool irq_disabled = false; | |
230 | const bool do_sti = true; | |
231 | ||
232 | /* | |
233 | * Use WARN_ONCE() to report the failure. | |
234 | */ | |
235 | if (__halt(irq_disabled, do_sti)) | |
236 | WARN_ONCE(1, "HLT instruction emulation failed\n"); | |
237 | } | |
238 | ||
cdd85786 | 239 | static int read_msr(struct pt_regs *regs, struct ve_info *ve) |
ae87f609 KS |
240 | { |
241 | struct tdx_hypercall_args args = { | |
242 | .r10 = TDX_HYPERCALL_STANDARD, | |
243 | .r11 = hcall_func(EXIT_REASON_MSR_READ), | |
244 | .r12 = regs->cx, | |
245 | }; | |
246 | ||
247 | /* | |
248 | * Emulate the MSR read via hypercall. More info about ABI | |
249 | * can be found in TDX Guest-Host-Communication Interface | |
250 | * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>". | |
251 | */ | |
252 | if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) | |
cdd85786 | 253 | return -EIO; |
ae87f609 KS |
254 | |
255 | regs->ax = lower_32_bits(args.r11); | |
256 | regs->dx = upper_32_bits(args.r11); | |
cdd85786 | 257 | return ve_instr_len(ve); |
ae87f609 KS |
258 | } |
259 | ||
cdd85786 | 260 | static int write_msr(struct pt_regs *regs, struct ve_info *ve) |
ae87f609 KS |
261 | { |
262 | struct tdx_hypercall_args args = { | |
263 | .r10 = TDX_HYPERCALL_STANDARD, | |
264 | .r11 = hcall_func(EXIT_REASON_MSR_WRITE), | |
265 | .r12 = regs->cx, | |
266 | .r13 = (u64)regs->dx << 32 | regs->ax, | |
267 | }; | |
268 | ||
269 | /* | |
270 | * Emulate the MSR write via hypercall. More info about ABI | |
271 | * can be found in TDX Guest-Host-Communication Interface | |
272 | * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>". | |
273 | */ | |
cdd85786 KS |
274 | if (__tdx_hypercall(&args, 0)) |
275 | return -EIO; | |
276 | ||
277 | return ve_instr_len(ve); | |
ae87f609 KS |
278 | } |
279 | ||
cdd85786 | 280 | static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve) |
c141fa2c KS |
281 | { |
282 | struct tdx_hypercall_args args = { | |
283 | .r10 = TDX_HYPERCALL_STANDARD, | |
284 | .r11 = hcall_func(EXIT_REASON_CPUID), | |
285 | .r12 = regs->ax, | |
286 | .r13 = regs->cx, | |
287 | }; | |
288 | ||
289 | /* | |
290 | * Only allow VMM to control range reserved for hypervisor | |
291 | * communication. | |
292 | * | |
293 | * Return all-zeros for any CPUID outside the range. It matches CPU | |
294 | * behaviour for non-supported leaf. | |
295 | */ | |
296 | if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) { | |
297 | regs->ax = regs->bx = regs->cx = regs->dx = 0; | |
cdd85786 | 298 | return ve_instr_len(ve); |
c141fa2c KS |
299 | } |
300 | ||
301 | /* | |
302 | * Emulate the CPUID instruction via a hypercall. More info about | |
303 | * ABI can be found in TDX Guest-Host-Communication Interface | |
304 | * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>". | |
305 | */ | |
306 | if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) | |
cdd85786 | 307 | return -EIO; |
c141fa2c KS |
308 | |
309 | /* | |
310 | * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of | |
311 | * EAX, EBX, ECX, EDX registers after the CPUID instruction execution. | |
312 | * So copy the register contents back to pt_regs. | |
313 | */ | |
314 | regs->ax = args.r12; | |
315 | regs->bx = args.r13; | |
316 | regs->cx = args.r14; | |
317 | regs->dx = args.r15; | |
318 | ||
cdd85786 | 319 | return ve_instr_len(ve); |
c141fa2c KS |
320 | } |
321 | ||
31d58c4e KS |
322 | static bool mmio_read(int size, unsigned long addr, unsigned long *val) |
323 | { | |
324 | struct tdx_hypercall_args args = { | |
325 | .r10 = TDX_HYPERCALL_STANDARD, | |
326 | .r11 = hcall_func(EXIT_REASON_EPT_VIOLATION), | |
327 | .r12 = size, | |
328 | .r13 = EPT_READ, | |
329 | .r14 = addr, | |
330 | .r15 = *val, | |
331 | }; | |
332 | ||
333 | if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) | |
334 | return false; | |
335 | *val = args.r11; | |
336 | return true; | |
337 | } | |
338 | ||
339 | static bool mmio_write(int size, unsigned long addr, unsigned long val) | |
340 | { | |
341 | return !_tdx_hypercall(hcall_func(EXIT_REASON_EPT_VIOLATION), size, | |
342 | EPT_WRITE, addr, val); | |
343 | } | |
344 | ||
cdd85786 | 345 | static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) |
31d58c4e | 346 | { |
1e776965 | 347 | unsigned long *reg, val, vaddr; |
31d58c4e | 348 | char buffer[MAX_INSN_SIZE]; |
31d58c4e KS |
349 | struct insn insn = {}; |
350 | enum mmio_type mmio; | |
351 | int size, extend_size; | |
352 | u8 extend_val = 0; | |
353 | ||
354 | /* Only in-kernel MMIO is supported */ | |
355 | if (WARN_ON_ONCE(user_mode(regs))) | |
cdd85786 | 356 | return -EFAULT; |
31d58c4e KS |
357 | |
358 | if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) | |
cdd85786 | 359 | return -EFAULT; |
31d58c4e KS |
360 | |
361 | if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) | |
cdd85786 | 362 | return -EINVAL; |
31d58c4e KS |
363 | |
364 | mmio = insn_decode_mmio(&insn, &size); | |
365 | if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED)) | |
cdd85786 | 366 | return -EINVAL; |
31d58c4e KS |
367 | |
368 | if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { | |
369 | reg = insn_get_modrm_reg_ptr(&insn, regs); | |
370 | if (!reg) | |
cdd85786 | 371 | return -EINVAL; |
31d58c4e KS |
372 | } |
373 | ||
1e776965 KS |
374 | /* |
375 | * Reject EPT violation #VEs that split pages. | |
376 | * | |
377 | * MMIO accesses are supposed to be naturally aligned and therefore | |
378 | * never cross page boundaries. Seeing split page accesses indicates | |
379 | * a bug or a load_unaligned_zeropad() that stepped into an MMIO page. | |
380 | * | |
381 | * load_unaligned_zeropad() will recover using exception fixups. | |
382 | */ | |
383 | vaddr = (unsigned long)insn_get_addr_ref(&insn, regs); | |
384 | if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE) | |
385 | return -EFAULT; | |
386 | ||
31d58c4e KS |
387 | /* Handle writes first */ |
388 | switch (mmio) { | |
389 | case MMIO_WRITE: | |
390 | memcpy(&val, reg, size); | |
cdd85786 KS |
391 | if (!mmio_write(size, ve->gpa, val)) |
392 | return -EIO; | |
393 | return insn.length; | |
31d58c4e KS |
394 | case MMIO_WRITE_IMM: |
395 | val = insn.immediate.value; | |
cdd85786 KS |
396 | if (!mmio_write(size, ve->gpa, val)) |
397 | return -EIO; | |
398 | return insn.length; | |
31d58c4e KS |
399 | case MMIO_READ: |
400 | case MMIO_READ_ZERO_EXTEND: | |
401 | case MMIO_READ_SIGN_EXTEND: | |
402 | /* Reads are handled below */ | |
403 | break; | |
404 | case MMIO_MOVS: | |
405 | case MMIO_DECODE_FAILED: | |
406 | /* | |
407 | * MMIO was accessed with an instruction that could not be | |
408 | * decoded or handled properly. It was likely not using io.h | |
409 | * helpers or accessed MMIO accidentally. | |
410 | */ | |
cdd85786 | 411 | return -EINVAL; |
31d58c4e KS |
412 | default: |
413 | WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); | |
cdd85786 | 414 | return -EINVAL; |
31d58c4e KS |
415 | } |
416 | ||
417 | /* Handle reads */ | |
418 | if (!mmio_read(size, ve->gpa, &val)) | |
cdd85786 | 419 | return -EIO; |
31d58c4e KS |
420 | |
421 | switch (mmio) { | |
422 | case MMIO_READ: | |
423 | /* Zero-extend for 32-bit operation */ | |
424 | extend_size = size == 4 ? sizeof(*reg) : 0; | |
425 | break; | |
426 | case MMIO_READ_ZERO_EXTEND: | |
427 | /* Zero extend based on operand size */ | |
428 | extend_size = insn.opnd_bytes; | |
429 | break; | |
430 | case MMIO_READ_SIGN_EXTEND: | |
431 | /* Sign extend based on operand size */ | |
432 | extend_size = insn.opnd_bytes; | |
433 | if (size == 1 && val & BIT(7)) | |
434 | extend_val = 0xFF; | |
435 | else if (size > 1 && val & BIT(15)) | |
436 | extend_val = 0xFF; | |
437 | break; | |
438 | default: | |
439 | /* All other cases has to be covered with the first switch() */ | |
440 | WARN_ON_ONCE(1); | |
cdd85786 | 441 | return -EINVAL; |
31d58c4e KS |
442 | } |
443 | ||
444 | if (extend_size) | |
445 | memset(reg, extend_val, extend_size); | |
446 | memcpy(reg, &val, size); | |
cdd85786 | 447 | return insn.length; |
31d58c4e KS |
448 | } |
449 | ||
03149948 KS |
450 | static bool handle_in(struct pt_regs *regs, int size, int port) |
451 | { | |
452 | struct tdx_hypercall_args args = { | |
453 | .r10 = TDX_HYPERCALL_STANDARD, | |
454 | .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), | |
455 | .r12 = size, | |
456 | .r13 = PORT_READ, | |
457 | .r14 = port, | |
458 | }; | |
459 | u64 mask = GENMASK(BITS_PER_BYTE * size, 0); | |
460 | bool success; | |
461 | ||
462 | /* | |
463 | * Emulate the I/O read via hypercall. More info about ABI can be found | |
464 | * in TDX Guest-Host-Communication Interface (GHCI) section titled | |
465 | * "TDG.VP.VMCALL<Instruction.IO>". | |
466 | */ | |
467 | success = !__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT); | |
468 | ||
469 | /* Update part of the register affected by the emulated instruction */ | |
470 | regs->ax &= ~mask; | |
471 | if (success) | |
472 | regs->ax |= args.r11 & mask; | |
473 | ||
474 | return success; | |
475 | } | |
476 | ||
477 | static bool handle_out(struct pt_regs *regs, int size, int port) | |
478 | { | |
479 | u64 mask = GENMASK(BITS_PER_BYTE * size, 0); | |
480 | ||
481 | /* | |
482 | * Emulate the I/O write via hypercall. More info about ABI can be found | |
483 | * in TDX Guest-Host-Communication Interface (GHCI) section titled | |
484 | * "TDG.VP.VMCALL<Instruction.IO>". | |
485 | */ | |
486 | return !_tdx_hypercall(hcall_func(EXIT_REASON_IO_INSTRUCTION), size, | |
487 | PORT_WRITE, port, regs->ax & mask); | |
488 | } | |
489 | ||
490 | /* | |
491 | * Emulate I/O using hypercall. | |
492 | * | |
493 | * Assumes the IO instruction was using ax, which is enforced | |
494 | * by the standard io.h macros. | |
495 | * | |
496 | * Return True on success or False on failure. | |
497 | */ | |
cdd85786 | 498 | static int handle_io(struct pt_regs *regs, struct ve_info *ve) |
03149948 | 499 | { |
cdd85786 | 500 | u32 exit_qual = ve->exit_qual; |
03149948 | 501 | int size, port; |
cdd85786 | 502 | bool in, ret; |
03149948 KS |
503 | |
504 | if (VE_IS_IO_STRING(exit_qual)) | |
cdd85786 | 505 | return -EIO; |
03149948 KS |
506 | |
507 | in = VE_IS_IO_IN(exit_qual); | |
508 | size = VE_GET_IO_SIZE(exit_qual); | |
509 | port = VE_GET_PORT_NUM(exit_qual); | |
510 | ||
511 | ||
512 | if (in) | |
cdd85786 | 513 | ret = handle_in(regs, size, port); |
03149948 | 514 | else |
cdd85786 KS |
515 | ret = handle_out(regs, size, port); |
516 | if (!ret) | |
517 | return -EIO; | |
518 | ||
519 | return ve_instr_len(ve); | |
03149948 KS |
520 | } |
521 | ||
32e72854 AK |
522 | /* |
523 | * Early #VE exception handler. Only handles a subset of port I/O. | |
524 | * Intended only for earlyprintk. If failed, return false. | |
525 | */ | |
526 | __init bool tdx_early_handle_ve(struct pt_regs *regs) | |
527 | { | |
528 | struct ve_info ve; | |
cdd85786 | 529 | int insn_len; |
32e72854 AK |
530 | |
531 | tdx_get_ve_info(&ve); | |
532 | ||
533 | if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION) | |
534 | return false; | |
535 | ||
cdd85786 KS |
536 | insn_len = handle_io(regs, &ve); |
537 | if (insn_len < 0) | |
538 | return false; | |
539 | ||
540 | regs->ip += insn_len; | |
541 | return true; | |
32e72854 AK |
542 | } |
543 | ||
9a22bf6d KS |
544 | void tdx_get_ve_info(struct ve_info *ve) |
545 | { | |
546 | struct tdx_module_output out; | |
547 | ||
548 | /* | |
549 | * Called during #VE handling to retrieve the #VE info from the | |
550 | * TDX module. | |
551 | * | |
552 | * This has to be called early in #VE handling. A "nested" #VE which | |
553 | * occurs before this will raise a #DF and is not recoverable. | |
554 | * | |
555 | * The call retrieves the #VE info from the TDX module, which also | |
556 | * clears the "#VE valid" flag. This must be done before anything else | |
557 | * because any #VE that occurs while the valid flag is set will lead to | |
558 | * #DF. | |
559 | * | |
560 | * Note, the TDX module treats virtual NMIs as inhibited if the #VE | |
561 | * valid flag is set. It means that NMI=>#VE will not result in a #DF. | |
562 | */ | |
563 | tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out); | |
564 | ||
565 | /* Transfer the output parameters */ | |
566 | ve->exit_reason = out.rcx; | |
567 | ve->exit_qual = out.rdx; | |
568 | ve->gla = out.r8; | |
569 | ve->gpa = out.r9; | |
570 | ve->instr_len = lower_32_bits(out.r10); | |
571 | ve->instr_info = upper_32_bits(out.r10); | |
572 | } | |
573 | ||
cdd85786 KS |
574 | /* |
575 | * Handle the user initiated #VE. | |
576 | * | |
577 | * On success, returns the number of bytes RIP should be incremented (>=0) | |
578 | * or -errno on error. | |
579 | */ | |
580 | static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve) | |
c141fa2c KS |
581 | { |
582 | switch (ve->exit_reason) { | |
583 | case EXIT_REASON_CPUID: | |
cdd85786 | 584 | return handle_cpuid(regs, ve); |
c141fa2c KS |
585 | default: |
586 | pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); | |
cdd85786 | 587 | return -EIO; |
c141fa2c KS |
588 | } |
589 | } | |
590 | ||
cdd85786 KS |
591 | /* |
592 | * Handle the kernel #VE. | |
593 | * | |
594 | * On success, returns the number of bytes RIP should be incremented (>=0) | |
595 | * or -errno on error. | |
596 | */ | |
597 | static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve) | |
bfe6ed0c KS |
598 | { |
599 | switch (ve->exit_reason) { | |
600 | case EXIT_REASON_HLT: | |
cdd85786 | 601 | return handle_halt(ve); |
ae87f609 | 602 | case EXIT_REASON_MSR_READ: |
cdd85786 | 603 | return read_msr(regs, ve); |
ae87f609 | 604 | case EXIT_REASON_MSR_WRITE: |
cdd85786 | 605 | return write_msr(regs, ve); |
c141fa2c | 606 | case EXIT_REASON_CPUID: |
cdd85786 | 607 | return handle_cpuid(regs, ve); |
31d58c4e KS |
608 | case EXIT_REASON_EPT_VIOLATION: |
609 | return handle_mmio(regs, ve); | |
03149948 | 610 | case EXIT_REASON_IO_INSTRUCTION: |
cdd85786 | 611 | return handle_io(regs, ve); |
bfe6ed0c KS |
612 | default: |
613 | pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); | |
cdd85786 | 614 | return -EIO; |
bfe6ed0c KS |
615 | } |
616 | } | |
617 | ||
9a22bf6d KS |
618 | bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve) |
619 | { | |
cdd85786 | 620 | int insn_len; |
bfe6ed0c KS |
621 | |
622 | if (user_mode(regs)) | |
cdd85786 | 623 | insn_len = virt_exception_user(regs, ve); |
bfe6ed0c | 624 | else |
cdd85786 KS |
625 | insn_len = virt_exception_kernel(regs, ve); |
626 | if (insn_len < 0) | |
627 | return false; | |
bfe6ed0c KS |
628 | |
629 | /* After successful #VE handling, move the IP */ | |
cdd85786 | 630 | regs->ip += insn_len; |
9a22bf6d | 631 | |
cdd85786 | 632 | return true; |
9a22bf6d KS |
633 | } |
634 | ||
7dbde763 KS |
635 | static bool tdx_tlb_flush_required(bool private) |
636 | { | |
637 | /* | |
638 | * TDX guest is responsible for flushing TLB on private->shared | |
639 | * transition. VMM is responsible for flushing on shared->private. | |
640 | * | |
641 | * The VMM _can't_ flush private addresses as it can't generate PAs | |
642 | * with the guest's HKID. Shared memory isn't subject to integrity | |
643 | * checking, i.e. the VMM doesn't need to flush for its own protection. | |
644 | * | |
645 | * There's no need to flush when converting from shared to private, | |
646 | * as flushing is the VMM's responsibility in this case, e.g. it must | |
647 | * flush to avoid integrity failures in the face of a buggy or | |
648 | * malicious guest. | |
649 | */ | |
650 | return !private; | |
651 | } | |
652 | ||
653 | static bool tdx_cache_flush_required(void) | |
654 | { | |
655 | /* | |
656 | * AMD SME/SEV can avoid cache flushing if HW enforces cache coherence. | |
657 | * TDX doesn't have such capability. | |
658 | * | |
659 | * Flush cache unconditionally. | |
660 | */ | |
661 | return true; | |
662 | } | |
663 | ||
664 | static bool try_accept_one(phys_addr_t *start, unsigned long len, | |
665 | enum pg_level pg_level) | |
666 | { | |
667 | unsigned long accept_size = page_level_size(pg_level); | |
668 | u64 tdcall_rcx; | |
669 | u8 page_size; | |
670 | ||
671 | if (!IS_ALIGNED(*start, accept_size)) | |
672 | return false; | |
673 | ||
674 | if (len < accept_size) | |
675 | return false; | |
676 | ||
677 | /* | |
678 | * Pass the page physical address to the TDX module to accept the | |
679 | * pending, private page. | |
680 | * | |
681 | * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G. | |
682 | */ | |
683 | switch (pg_level) { | |
684 | case PG_LEVEL_4K: | |
685 | page_size = 0; | |
686 | break; | |
687 | case PG_LEVEL_2M: | |
688 | page_size = 1; | |
689 | break; | |
690 | case PG_LEVEL_1G: | |
691 | page_size = 2; | |
692 | break; | |
693 | default: | |
694 | return false; | |
695 | } | |
696 | ||
697 | tdcall_rcx = *start | page_size; | |
698 | if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL)) | |
699 | return false; | |
700 | ||
701 | *start += accept_size; | |
702 | return true; | |
703 | } | |
704 | ||
705 | /* | |
706 | * Inform the VMM of the guest's intent for this physical page: shared with | |
707 | * the VMM or private to the guest. The VMM is expected to change its mapping | |
708 | * of the page in response. | |
709 | */ | |
710 | static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) | |
711 | { | |
712 | phys_addr_t start = __pa(vaddr); | |
713 | phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); | |
714 | ||
715 | if (!enc) { | |
716 | /* Set the shared (decrypted) bits: */ | |
717 | start |= cc_mkdec(0); | |
718 | end |= cc_mkdec(0); | |
719 | } | |
720 | ||
721 | /* | |
722 | * Notify the VMM about page mapping conversion. More info about ABI | |
723 | * can be found in TDX Guest-Host-Communication Interface (GHCI), | |
724 | * section "TDG.VP.VMCALL<MapGPA>" | |
725 | */ | |
726 | if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0)) | |
727 | return false; | |
728 | ||
729 | /* private->shared conversion requires only MapGPA call */ | |
730 | if (!enc) | |
731 | return true; | |
732 | ||
733 | /* | |
734 | * For shared->private conversion, accept the page using | |
735 | * TDX_ACCEPT_PAGE TDX module call. | |
736 | */ | |
737 | while (start < end) { | |
738 | unsigned long len = end - start; | |
739 | ||
740 | /* | |
741 | * Try larger accepts first. It gives chance to VMM to keep | |
742 | * 1G/2M SEPT entries where possible and speeds up process by | |
743 | * cutting number of hypercalls (if successful). | |
744 | */ | |
745 | ||
746 | if (try_accept_one(&start, len, PG_LEVEL_1G)) | |
747 | continue; | |
748 | ||
749 | if (try_accept_one(&start, len, PG_LEVEL_2M)) | |
750 | continue; | |
751 | ||
752 | if (!try_accept_one(&start, len, PG_LEVEL_4K)) | |
753 | return false; | |
754 | } | |
755 | ||
756 | return true; | |
757 | } | |
758 | ||
59bd54a8 KS |
759 | void __init tdx_early_init(void) |
760 | { | |
41394e33 | 761 | u64 cc_mask; |
59bd54a8 KS |
762 | u32 eax, sig[3]; |
763 | ||
764 | cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2], &sig[1]); | |
765 | ||
766 | if (memcmp(TDX_IDENT, sig, sizeof(sig))) | |
767 | return; | |
768 | ||
769 | setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); | |
770 | ||
41394e33 | 771 | cc_set_vendor(CC_VENDOR_INTEL); |
a6dd6f39 | 772 | tdx_parse_tdinfo(&cc_mask); |
41394e33 KS |
773 | cc_set_mask(cc_mask); |
774 | ||
65fab5bc KS |
775 | /* |
776 | * All bits above GPA width are reserved and kernel treats shared bit | |
777 | * as flag, not as part of physical address. | |
778 | * | |
779 | * Adjust physical mask to only cover valid GPA bits. | |
780 | */ | |
781 | physical_mask &= cc_mask - 1; | |
782 | ||
7dbde763 KS |
783 | x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; |
784 | x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; | |
785 | x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; | |
786 | ||
59bd54a8 KS |
787 | pr_info("Guest detected\n"); |
788 | } |