Commit | Line | Data |
---|---|---|
59bd54a8 KS |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (C) 2021-2022 Intel Corporation */ | |
3 | ||
4 | #undef pr_fmt | |
5 | #define pr_fmt(fmt) "tdx: " fmt | |
6 | ||
7 | #include <linux/cpufeature.h> | |
41394e33 | 8 | #include <asm/coco.h> |
59bd54a8 | 9 | #include <asm/tdx.h> |
bfe6ed0c | 10 | #include <asm/vmx.h> |
31d58c4e KS |
11 | #include <asm/insn.h> |
12 | #include <asm/insn-eval.h> | |
7dbde763 | 13 | #include <asm/pgtable.h> |
59bd54a8 | 14 | |
41394e33 KS |
15 | /* TDX module Call Leaf IDs */ |
16 | #define TDX_GET_INFO 1 | |
9a22bf6d | 17 | #define TDX_GET_VEINFO 3 |
7dbde763 KS |
18 | #define TDX_ACCEPT_PAGE 6 |
19 | ||
20 | /* TDX hypercall Leaf IDs */ | |
21 | #define TDVMCALL_MAP_GPA 0x10001 | |
41394e33 | 22 | |
31d58c4e KS |
23 | /* MMIO direction */ |
24 | #define EPT_READ 0 | |
25 | #define EPT_WRITE 1 | |
26 | ||
03149948 KS |
27 | /* Port I/O direction */ |
28 | #define PORT_READ 0 | |
29 | #define PORT_WRITE 1 | |
30 | ||
31 | /* See Exit Qualification for I/O Instructions in VMX documentation */ | |
32 | #define VE_IS_IO_IN(e) ((e) & BIT(3)) | |
33 | #define VE_GET_IO_SIZE(e) (((e) & GENMASK(2, 0)) + 1) | |
34 | #define VE_GET_PORT_NUM(e) ((e) >> 16) | |
35 | #define VE_IS_IO_STRING(e) ((e) & BIT(4)) | |
36 | ||
eb94f1b6 KS |
37 | /* |
38 | * Wrapper for standard use of __tdx_hypercall with no output aside from | |
39 | * return code. | |
40 | */ | |
41 | static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) | |
42 | { | |
43 | struct tdx_hypercall_args args = { | |
44 | .r10 = TDX_HYPERCALL_STANDARD, | |
45 | .r11 = fn, | |
46 | .r12 = r12, | |
47 | .r13 = r13, | |
48 | .r14 = r14, | |
49 | .r15 = r15, | |
50 | }; | |
51 | ||
52 | return __tdx_hypercall(&args, 0); | |
53 | } | |
54 | ||
55 | /* Called from __tdx_hypercall() for unrecoverable failure */ | |
56 | void __tdx_hypercall_failed(void) | |
57 | { | |
58 | panic("TDVMCALL failed. TDX module bug?"); | |
59 | } | |
60 | ||
bfe6ed0c KS |
61 | /* |
62 | * The TDG.VP.VMCALL-Instruction-execution sub-functions are defined | |
63 | * independently from but are currently matched 1:1 with VMX EXIT_REASONs. | |
64 | * Reusing the KVM EXIT_REASON macros makes it easier to connect the host and | |
65 | * guest sides of these calls. | |
66 | */ | |
67 | static u64 hcall_func(u64 exit_reason) | |
68 | { | |
69 | return exit_reason; | |
70 | } | |
71 | ||
cfb8ec7a KS |
72 | #ifdef CONFIG_KVM_GUEST |
73 | long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, | |
74 | unsigned long p3, unsigned long p4) | |
75 | { | |
76 | struct tdx_hypercall_args args = { | |
77 | .r10 = nr, | |
78 | .r11 = p1, | |
79 | .r12 = p2, | |
80 | .r13 = p3, | |
81 | .r14 = p4, | |
82 | }; | |
83 | ||
84 | return __tdx_hypercall(&args, 0); | |
85 | } | |
86 | EXPORT_SYMBOL_GPL(tdx_kvm_hypercall); | |
87 | #endif | |
88 | ||
41394e33 KS |
89 | /* |
90 | * Used for TDX guests to make calls directly to the TD module. This | |
91 | * should only be used for calls that have no legitimate reason to fail | |
92 | * or where the kernel can not survive the call failing. | |
93 | */ | |
94 | static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, | |
95 | struct tdx_module_output *out) | |
96 | { | |
97 | if (__tdx_module_call(fn, rcx, rdx, r8, r9, out)) | |
98 | panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); | |
99 | } | |
100 | ||
101 | static u64 get_cc_mask(void) | |
102 | { | |
103 | struct tdx_module_output out; | |
104 | unsigned int gpa_width; | |
105 | ||
106 | /* | |
107 | * TDINFO TDX module call is used to get the TD execution environment | |
108 | * information like GPA width, number of available vcpus, debug mode | |
109 | * information, etc. More details about the ABI can be found in TDX | |
110 | * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL | |
111 | * [TDG.VP.INFO]. | |
112 | * | |
113 | * The GPA width that comes out of this call is critical. TDX guests | |
114 | * can not meaningfully run without it. | |
115 | */ | |
116 | tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out); | |
117 | ||
118 | gpa_width = out.rcx & GENMASK(5, 0); | |
119 | ||
120 | /* | |
121 | * The highest bit of a guest physical address is the "sharing" bit. | |
122 | * Set it for shared pages and clear it for private pages. | |
123 | */ | |
124 | return BIT_ULL(gpa_width - 1); | |
125 | } | |
126 | ||
bfe6ed0c KS |
127 | static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti) |
128 | { | |
129 | struct tdx_hypercall_args args = { | |
130 | .r10 = TDX_HYPERCALL_STANDARD, | |
131 | .r11 = hcall_func(EXIT_REASON_HLT), | |
132 | .r12 = irq_disabled, | |
133 | }; | |
134 | ||
135 | /* | |
136 | * Emulate HLT operation via hypercall. More info about ABI | |
137 | * can be found in TDX Guest-Host-Communication Interface | |
138 | * (GHCI), section 3.8 TDG.VP.VMCALL<Instruction.HLT>. | |
139 | * | |
140 | * The VMM uses the "IRQ disabled" param to understand IRQ | |
141 | * enabled status (RFLAGS.IF) of the TD guest and to determine | |
142 | * whether or not it should schedule the halted vCPU if an | |
143 | * IRQ becomes pending. E.g. if IRQs are disabled, the VMM | |
144 | * can keep the vCPU in virtual HLT, even if an IRQ is | |
145 | * pending, without hanging/breaking the guest. | |
146 | */ | |
147 | return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0); | |
148 | } | |
149 | ||
150 | static bool handle_halt(void) | |
151 | { | |
152 | /* | |
153 | * Since non safe halt is mainly used in CPU offlining | |
154 | * and the guest will always stay in the halt state, don't | |
155 | * call the STI instruction (set do_sti as false). | |
156 | */ | |
157 | const bool irq_disabled = irqs_disabled(); | |
158 | const bool do_sti = false; | |
159 | ||
160 | if (__halt(irq_disabled, do_sti)) | |
161 | return false; | |
162 | ||
163 | return true; | |
164 | } | |
165 | ||
166 | void __cpuidle tdx_safe_halt(void) | |
167 | { | |
168 | /* | |
169 | * For do_sti=true case, __tdx_hypercall() function enables | |
170 | * interrupts using the STI instruction before the TDCALL. So | |
171 | * set irq_disabled as false. | |
172 | */ | |
173 | const bool irq_disabled = false; | |
174 | const bool do_sti = true; | |
175 | ||
176 | /* | |
177 | * Use WARN_ONCE() to report the failure. | |
178 | */ | |
179 | if (__halt(irq_disabled, do_sti)) | |
180 | WARN_ONCE(1, "HLT instruction emulation failed\n"); | |
181 | } | |
182 | ||
ae87f609 KS |
183 | static bool read_msr(struct pt_regs *regs) |
184 | { | |
185 | struct tdx_hypercall_args args = { | |
186 | .r10 = TDX_HYPERCALL_STANDARD, | |
187 | .r11 = hcall_func(EXIT_REASON_MSR_READ), | |
188 | .r12 = regs->cx, | |
189 | }; | |
190 | ||
191 | /* | |
192 | * Emulate the MSR read via hypercall. More info about ABI | |
193 | * can be found in TDX Guest-Host-Communication Interface | |
194 | * (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>". | |
195 | */ | |
196 | if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) | |
197 | return false; | |
198 | ||
199 | regs->ax = lower_32_bits(args.r11); | |
200 | regs->dx = upper_32_bits(args.r11); | |
201 | return true; | |
202 | } | |
203 | ||
204 | static bool write_msr(struct pt_regs *regs) | |
205 | { | |
206 | struct tdx_hypercall_args args = { | |
207 | .r10 = TDX_HYPERCALL_STANDARD, | |
208 | .r11 = hcall_func(EXIT_REASON_MSR_WRITE), | |
209 | .r12 = regs->cx, | |
210 | .r13 = (u64)regs->dx << 32 | regs->ax, | |
211 | }; | |
212 | ||
213 | /* | |
214 | * Emulate the MSR write via hypercall. More info about ABI | |
215 | * can be found in TDX Guest-Host-Communication Interface | |
216 | * (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>". | |
217 | */ | |
218 | return !__tdx_hypercall(&args, 0); | |
219 | } | |
220 | ||
c141fa2c KS |
221 | static bool handle_cpuid(struct pt_regs *regs) |
222 | { | |
223 | struct tdx_hypercall_args args = { | |
224 | .r10 = TDX_HYPERCALL_STANDARD, | |
225 | .r11 = hcall_func(EXIT_REASON_CPUID), | |
226 | .r12 = regs->ax, | |
227 | .r13 = regs->cx, | |
228 | }; | |
229 | ||
230 | /* | |
231 | * Only allow VMM to control range reserved for hypervisor | |
232 | * communication. | |
233 | * | |
234 | * Return all-zeros for any CPUID outside the range. It matches CPU | |
235 | * behaviour for non-supported leaf. | |
236 | */ | |
237 | if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) { | |
238 | regs->ax = regs->bx = regs->cx = regs->dx = 0; | |
239 | return true; | |
240 | } | |
241 | ||
242 | /* | |
243 | * Emulate the CPUID instruction via a hypercall. More info about | |
244 | * ABI can be found in TDX Guest-Host-Communication Interface | |
245 | * (GHCI), section titled "VP.VMCALL<Instruction.CPUID>". | |
246 | */ | |
247 | if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) | |
248 | return false; | |
249 | ||
250 | /* | |
251 | * As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of | |
252 | * EAX, EBX, ECX, EDX registers after the CPUID instruction execution. | |
253 | * So copy the register contents back to pt_regs. | |
254 | */ | |
255 | regs->ax = args.r12; | |
256 | regs->bx = args.r13; | |
257 | regs->cx = args.r14; | |
258 | regs->dx = args.r15; | |
259 | ||
260 | return true; | |
261 | } | |
262 | ||
31d58c4e KS |
263 | static bool mmio_read(int size, unsigned long addr, unsigned long *val) |
264 | { | |
265 | struct tdx_hypercall_args args = { | |
266 | .r10 = TDX_HYPERCALL_STANDARD, | |
267 | .r11 = hcall_func(EXIT_REASON_EPT_VIOLATION), | |
268 | .r12 = size, | |
269 | .r13 = EPT_READ, | |
270 | .r14 = addr, | |
271 | .r15 = *val, | |
272 | }; | |
273 | ||
274 | if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT)) | |
275 | return false; | |
276 | *val = args.r11; | |
277 | return true; | |
278 | } | |
279 | ||
280 | static bool mmio_write(int size, unsigned long addr, unsigned long val) | |
281 | { | |
282 | return !_tdx_hypercall(hcall_func(EXIT_REASON_EPT_VIOLATION), size, | |
283 | EPT_WRITE, addr, val); | |
284 | } | |
285 | ||
286 | static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve) | |
287 | { | |
288 | char buffer[MAX_INSN_SIZE]; | |
289 | unsigned long *reg, val; | |
290 | struct insn insn = {}; | |
291 | enum mmio_type mmio; | |
292 | int size, extend_size; | |
293 | u8 extend_val = 0; | |
294 | ||
295 | /* Only in-kernel MMIO is supported */ | |
296 | if (WARN_ON_ONCE(user_mode(regs))) | |
297 | return false; | |
298 | ||
299 | if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE)) | |
300 | return false; | |
301 | ||
302 | if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64)) | |
303 | return false; | |
304 | ||
305 | mmio = insn_decode_mmio(&insn, &size); | |
306 | if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED)) | |
307 | return false; | |
308 | ||
309 | if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) { | |
310 | reg = insn_get_modrm_reg_ptr(&insn, regs); | |
311 | if (!reg) | |
312 | return false; | |
313 | } | |
314 | ||
315 | ve->instr_len = insn.length; | |
316 | ||
317 | /* Handle writes first */ | |
318 | switch (mmio) { | |
319 | case MMIO_WRITE: | |
320 | memcpy(&val, reg, size); | |
321 | return mmio_write(size, ve->gpa, val); | |
322 | case MMIO_WRITE_IMM: | |
323 | val = insn.immediate.value; | |
324 | return mmio_write(size, ve->gpa, val); | |
325 | case MMIO_READ: | |
326 | case MMIO_READ_ZERO_EXTEND: | |
327 | case MMIO_READ_SIGN_EXTEND: | |
328 | /* Reads are handled below */ | |
329 | break; | |
330 | case MMIO_MOVS: | |
331 | case MMIO_DECODE_FAILED: | |
332 | /* | |
333 | * MMIO was accessed with an instruction that could not be | |
334 | * decoded or handled properly. It was likely not using io.h | |
335 | * helpers or accessed MMIO accidentally. | |
336 | */ | |
337 | return false; | |
338 | default: | |
339 | WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?"); | |
340 | return false; | |
341 | } | |
342 | ||
343 | /* Handle reads */ | |
344 | if (!mmio_read(size, ve->gpa, &val)) | |
345 | return false; | |
346 | ||
347 | switch (mmio) { | |
348 | case MMIO_READ: | |
349 | /* Zero-extend for 32-bit operation */ | |
350 | extend_size = size == 4 ? sizeof(*reg) : 0; | |
351 | break; | |
352 | case MMIO_READ_ZERO_EXTEND: | |
353 | /* Zero extend based on operand size */ | |
354 | extend_size = insn.opnd_bytes; | |
355 | break; | |
356 | case MMIO_READ_SIGN_EXTEND: | |
357 | /* Sign extend based on operand size */ | |
358 | extend_size = insn.opnd_bytes; | |
359 | if (size == 1 && val & BIT(7)) | |
360 | extend_val = 0xFF; | |
361 | else if (size > 1 && val & BIT(15)) | |
362 | extend_val = 0xFF; | |
363 | break; | |
364 | default: | |
365 | /* All other cases has to be covered with the first switch() */ | |
366 | WARN_ON_ONCE(1); | |
367 | return false; | |
368 | } | |
369 | ||
370 | if (extend_size) | |
371 | memset(reg, extend_val, extend_size); | |
372 | memcpy(reg, &val, size); | |
373 | return true; | |
374 | } | |
375 | ||
03149948 KS |
376 | static bool handle_in(struct pt_regs *regs, int size, int port) |
377 | { | |
378 | struct tdx_hypercall_args args = { | |
379 | .r10 = TDX_HYPERCALL_STANDARD, | |
380 | .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), | |
381 | .r12 = size, | |
382 | .r13 = PORT_READ, | |
383 | .r14 = port, | |
384 | }; | |
385 | u64 mask = GENMASK(BITS_PER_BYTE * size, 0); | |
386 | bool success; | |
387 | ||
388 | /* | |
389 | * Emulate the I/O read via hypercall. More info about ABI can be found | |
390 | * in TDX Guest-Host-Communication Interface (GHCI) section titled | |
391 | * "TDG.VP.VMCALL<Instruction.IO>". | |
392 | */ | |
393 | success = !__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT); | |
394 | ||
395 | /* Update part of the register affected by the emulated instruction */ | |
396 | regs->ax &= ~mask; | |
397 | if (success) | |
398 | regs->ax |= args.r11 & mask; | |
399 | ||
400 | return success; | |
401 | } | |
402 | ||
403 | static bool handle_out(struct pt_regs *regs, int size, int port) | |
404 | { | |
405 | u64 mask = GENMASK(BITS_PER_BYTE * size, 0); | |
406 | ||
407 | /* | |
408 | * Emulate the I/O write via hypercall. More info about ABI can be found | |
409 | * in TDX Guest-Host-Communication Interface (GHCI) section titled | |
410 | * "TDG.VP.VMCALL<Instruction.IO>". | |
411 | */ | |
412 | return !_tdx_hypercall(hcall_func(EXIT_REASON_IO_INSTRUCTION), size, | |
413 | PORT_WRITE, port, regs->ax & mask); | |
414 | } | |
415 | ||
416 | /* | |
417 | * Emulate I/O using hypercall. | |
418 | * | |
419 | * Assumes the IO instruction was using ax, which is enforced | |
420 | * by the standard io.h macros. | |
421 | * | |
422 | * Return True on success or False on failure. | |
423 | */ | |
424 | static bool handle_io(struct pt_regs *regs, u32 exit_qual) | |
425 | { | |
426 | int size, port; | |
427 | bool in; | |
428 | ||
429 | if (VE_IS_IO_STRING(exit_qual)) | |
430 | return false; | |
431 | ||
432 | in = VE_IS_IO_IN(exit_qual); | |
433 | size = VE_GET_IO_SIZE(exit_qual); | |
434 | port = VE_GET_PORT_NUM(exit_qual); | |
435 | ||
436 | ||
437 | if (in) | |
438 | return handle_in(regs, size, port); | |
439 | else | |
440 | return handle_out(regs, size, port); | |
441 | } | |
442 | ||
32e72854 AK |
443 | /* |
444 | * Early #VE exception handler. Only handles a subset of port I/O. | |
445 | * Intended only for earlyprintk. If failed, return false. | |
446 | */ | |
447 | __init bool tdx_early_handle_ve(struct pt_regs *regs) | |
448 | { | |
449 | struct ve_info ve; | |
60428d8b | 450 | bool ret; |
32e72854 AK |
451 | |
452 | tdx_get_ve_info(&ve); | |
453 | ||
454 | if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION) | |
455 | return false; | |
456 | ||
60428d8b KS |
457 | ret = handle_io(regs, ve.exit_qual); |
458 | if (ret) | |
459 | regs->ip += ve.instr_len; | |
460 | return ret; | |
32e72854 AK |
461 | } |
462 | ||
9a22bf6d KS |
463 | void tdx_get_ve_info(struct ve_info *ve) |
464 | { | |
465 | struct tdx_module_output out; | |
466 | ||
467 | /* | |
468 | * Called during #VE handling to retrieve the #VE info from the | |
469 | * TDX module. | |
470 | * | |
471 | * This has to be called early in #VE handling. A "nested" #VE which | |
472 | * occurs before this will raise a #DF and is not recoverable. | |
473 | * | |
474 | * The call retrieves the #VE info from the TDX module, which also | |
475 | * clears the "#VE valid" flag. This must be done before anything else | |
476 | * because any #VE that occurs while the valid flag is set will lead to | |
477 | * #DF. | |
478 | * | |
479 | * Note, the TDX module treats virtual NMIs as inhibited if the #VE | |
480 | * valid flag is set. It means that NMI=>#VE will not result in a #DF. | |
481 | */ | |
482 | tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out); | |
483 | ||
484 | /* Transfer the output parameters */ | |
485 | ve->exit_reason = out.rcx; | |
486 | ve->exit_qual = out.rdx; | |
487 | ve->gla = out.r8; | |
488 | ve->gpa = out.r9; | |
489 | ve->instr_len = lower_32_bits(out.r10); | |
490 | ve->instr_info = upper_32_bits(out.r10); | |
491 | } | |
492 | ||
c141fa2c KS |
493 | /* Handle the user initiated #VE */ |
494 | static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve) | |
495 | { | |
496 | switch (ve->exit_reason) { | |
497 | case EXIT_REASON_CPUID: | |
498 | return handle_cpuid(regs); | |
499 | default: | |
500 | pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); | |
501 | return false; | |
502 | } | |
503 | } | |
504 | ||
bfe6ed0c KS |
505 | /* Handle the kernel #VE */ |
506 | static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve) | |
507 | { | |
508 | switch (ve->exit_reason) { | |
509 | case EXIT_REASON_HLT: | |
510 | return handle_halt(); | |
ae87f609 KS |
511 | case EXIT_REASON_MSR_READ: |
512 | return read_msr(regs); | |
513 | case EXIT_REASON_MSR_WRITE: | |
514 | return write_msr(regs); | |
c141fa2c KS |
515 | case EXIT_REASON_CPUID: |
516 | return handle_cpuid(regs); | |
31d58c4e KS |
517 | case EXIT_REASON_EPT_VIOLATION: |
518 | return handle_mmio(regs, ve); | |
03149948 KS |
519 | case EXIT_REASON_IO_INSTRUCTION: |
520 | return handle_io(regs, ve->exit_qual); | |
bfe6ed0c KS |
521 | default: |
522 | pr_warn("Unexpected #VE: %lld\n", ve->exit_reason); | |
523 | return false; | |
524 | } | |
525 | } | |
526 | ||
9a22bf6d KS |
527 | bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve) |
528 | { | |
bfe6ed0c KS |
529 | bool ret; |
530 | ||
531 | if (user_mode(regs)) | |
c141fa2c | 532 | ret = virt_exception_user(regs, ve); |
bfe6ed0c KS |
533 | else |
534 | ret = virt_exception_kernel(regs, ve); | |
535 | ||
536 | /* After successful #VE handling, move the IP */ | |
537 | if (ret) | |
538 | regs->ip += ve->instr_len; | |
9a22bf6d | 539 | |
bfe6ed0c | 540 | return ret; |
9a22bf6d KS |
541 | } |
542 | ||
7dbde763 KS |
543 | static bool tdx_tlb_flush_required(bool private) |
544 | { | |
545 | /* | |
546 | * TDX guest is responsible for flushing TLB on private->shared | |
547 | * transition. VMM is responsible for flushing on shared->private. | |
548 | * | |
549 | * The VMM _can't_ flush private addresses as it can't generate PAs | |
550 | * with the guest's HKID. Shared memory isn't subject to integrity | |
551 | * checking, i.e. the VMM doesn't need to flush for its own protection. | |
552 | * | |
553 | * There's no need to flush when converting from shared to private, | |
554 | * as flushing is the VMM's responsibility in this case, e.g. it must | |
555 | * flush to avoid integrity failures in the face of a buggy or | |
556 | * malicious guest. | |
557 | */ | |
558 | return !private; | |
559 | } | |
560 | ||
561 | static bool tdx_cache_flush_required(void) | |
562 | { | |
563 | /* | |
564 | * AMD SME/SEV can avoid cache flushing if HW enforces cache coherence. | |
565 | * TDX doesn't have such capability. | |
566 | * | |
567 | * Flush cache unconditionally. | |
568 | */ | |
569 | return true; | |
570 | } | |
571 | ||
572 | static bool try_accept_one(phys_addr_t *start, unsigned long len, | |
573 | enum pg_level pg_level) | |
574 | { | |
575 | unsigned long accept_size = page_level_size(pg_level); | |
576 | u64 tdcall_rcx; | |
577 | u8 page_size; | |
578 | ||
579 | if (!IS_ALIGNED(*start, accept_size)) | |
580 | return false; | |
581 | ||
582 | if (len < accept_size) | |
583 | return false; | |
584 | ||
585 | /* | |
586 | * Pass the page physical address to the TDX module to accept the | |
587 | * pending, private page. | |
588 | * | |
589 | * Bits 2:0 of RCX encode page size: 0 - 4K, 1 - 2M, 2 - 1G. | |
590 | */ | |
591 | switch (pg_level) { | |
592 | case PG_LEVEL_4K: | |
593 | page_size = 0; | |
594 | break; | |
595 | case PG_LEVEL_2M: | |
596 | page_size = 1; | |
597 | break; | |
598 | case PG_LEVEL_1G: | |
599 | page_size = 2; | |
600 | break; | |
601 | default: | |
602 | return false; | |
603 | } | |
604 | ||
605 | tdcall_rcx = *start | page_size; | |
606 | if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL)) | |
607 | return false; | |
608 | ||
609 | *start += accept_size; | |
610 | return true; | |
611 | } | |
612 | ||
613 | /* | |
614 | * Inform the VMM of the guest's intent for this physical page: shared with | |
615 | * the VMM or private to the guest. The VMM is expected to change its mapping | |
616 | * of the page in response. | |
617 | */ | |
618 | static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) | |
619 | { | |
620 | phys_addr_t start = __pa(vaddr); | |
621 | phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); | |
622 | ||
623 | if (!enc) { | |
624 | /* Set the shared (decrypted) bits: */ | |
625 | start |= cc_mkdec(0); | |
626 | end |= cc_mkdec(0); | |
627 | } | |
628 | ||
629 | /* | |
630 | * Notify the VMM about page mapping conversion. More info about ABI | |
631 | * can be found in TDX Guest-Host-Communication Interface (GHCI), | |
632 | * section "TDG.VP.VMCALL<MapGPA>" | |
633 | */ | |
634 | if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0)) | |
635 | return false; | |
636 | ||
637 | /* private->shared conversion requires only MapGPA call */ | |
638 | if (!enc) | |
639 | return true; | |
640 | ||
641 | /* | |
642 | * For shared->private conversion, accept the page using | |
643 | * TDX_ACCEPT_PAGE TDX module call. | |
644 | */ | |
645 | while (start < end) { | |
646 | unsigned long len = end - start; | |
647 | ||
648 | /* | |
649 | * Try larger accepts first. It gives chance to VMM to keep | |
650 | * 1G/2M SEPT entries where possible and speeds up process by | |
651 | * cutting number of hypercalls (if successful). | |
652 | */ | |
653 | ||
654 | if (try_accept_one(&start, len, PG_LEVEL_1G)) | |
655 | continue; | |
656 | ||
657 | if (try_accept_one(&start, len, PG_LEVEL_2M)) | |
658 | continue; | |
659 | ||
660 | if (!try_accept_one(&start, len, PG_LEVEL_4K)) | |
661 | return false; | |
662 | } | |
663 | ||
664 | return true; | |
665 | } | |
666 | ||
59bd54a8 KS |
667 | void __init tdx_early_init(void) |
668 | { | |
41394e33 | 669 | u64 cc_mask; |
59bd54a8 KS |
670 | u32 eax, sig[3]; |
671 | ||
672 | cpuid_count(TDX_CPUID_LEAF_ID, 0, &eax, &sig[0], &sig[2], &sig[1]); | |
673 | ||
674 | if (memcmp(TDX_IDENT, sig, sizeof(sig))) | |
675 | return; | |
676 | ||
677 | setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); | |
678 | ||
41394e33 KS |
679 | cc_set_vendor(CC_VENDOR_INTEL); |
680 | cc_mask = get_cc_mask(); | |
681 | cc_set_mask(cc_mask); | |
682 | ||
65fab5bc KS |
683 | /* |
684 | * All bits above GPA width are reserved and kernel treats shared bit | |
685 | * as flag, not as part of physical address. | |
686 | * | |
687 | * Adjust physical mask to only cover valid GPA bits. | |
688 | */ | |
689 | physical_mask &= cc_mask - 1; | |
690 | ||
7dbde763 KS |
691 | x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required; |
692 | x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; | |
693 | x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; | |
694 | ||
59bd54a8 KS |
695 | pr_info("Guest detected\n"); |
696 | } |