Commit | Line | Data |
---|---|---|
883b0a91 JR |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* | |
3 | * Kernel-based Virtual Machine driver for Linux | |
4 | * | |
5 | * AMD SVM support | |
6 | * | |
7 | * Copyright (C) 2006 Qumranet, Inc. | |
8 | * Copyright 2010 Red Hat, Inc. and/or its affiliates. | |
9 | * | |
10 | * Authors: | |
11 | * Yaniv Kamay <yaniv@qumranet.com> | |
12 | * Avi Kivity <avi@qumranet.com> | |
13 | */ | |
14 | ||
8d20bd63 | 15 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
883b0a91 JR |
16 | |
17 | #include <linux/kvm_types.h> | |
18 | #include <linux/kvm_host.h> | |
19 | #include <linux/kernel.h> | |
20 | ||
21 | #include <asm/msr-index.h> | |
5679b803 | 22 | #include <asm/debugreg.h> |
883b0a91 JR |
23 | |
24 | #include "kvm_emulate.h" | |
25 | #include "trace.h" | |
26 | #include "mmu.h" | |
27 | #include "x86.h" | |
b0b42197 | 28 | #include "smm.h" |
cc440cda | 29 | #include "cpuid.h" |
5b672408 | 30 | #include "lapic.h" |
883b0a91 | 31 | #include "svm.h" |
66c03a92 | 32 | #include "hyperv.h" |
883b0a91 | 33 | |
11f0cbf0 SC |
34 | #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK |
35 | ||
883b0a91 JR |
36 | static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, |
37 | struct x86_exception *fault) | |
38 | { | |
39 | struct vcpu_svm *svm = to_svm(vcpu); | |
db663af4 | 40 | struct vmcb *vmcb = svm->vmcb; |
883b0a91 | 41 | |
db663af4 | 42 | if (vmcb->control.exit_code != SVM_EXIT_NPF) { |
883b0a91 JR |
43 | /* |
44 | * TODO: track the cause of the nested page fault, and | |
45 | * correctly fill in the high bits of exit_info_1. | |
46 | */ | |
db663af4 ML |
47 | vmcb->control.exit_code = SVM_EXIT_NPF; |
48 | vmcb->control.exit_code_hi = 0; | |
49 | vmcb->control.exit_info_1 = (1ULL << 32); | |
50 | vmcb->control.exit_info_2 = fault->address; | |
883b0a91 JR |
51 | } |
52 | ||
db663af4 ML |
53 | vmcb->control.exit_info_1 &= ~0xffffffffULL; |
54 | vmcb->control.exit_info_1 |= fault->error_code; | |
883b0a91 | 55 | |
883b0a91 JR |
56 | nested_svm_vmexit(svm); |
57 | } | |
58 | ||
59 | static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) | |
60 | { | |
61 | struct vcpu_svm *svm = to_svm(vcpu); | |
e670bf68 | 62 | u64 cr3 = svm->nested.ctl.nested_cr3; |
883b0a91 JR |
63 | u64 pdpte; |
64 | int ret; | |
65 | ||
2732be90 | 66 | ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, |
883b0a91 JR |
67 | offset_in_page(cr3) + index * 8, 8); |
68 | if (ret) | |
69 | return 0; | |
70 | return pdpte; | |
71 | } | |
72 | ||
73 | static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) | |
74 | { | |
75 | struct vcpu_svm *svm = to_svm(vcpu); | |
76 | ||
e670bf68 | 77 | return svm->nested.ctl.nested_cr3; |
883b0a91 JR |
78 | } |
79 | ||
80 | static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) | |
81 | { | |
929d1cfa | 82 | struct vcpu_svm *svm = to_svm(vcpu); |
929d1cfa | 83 | |
883b0a91 JR |
84 | WARN_ON(mmu_is_nested(vcpu)); |
85 | ||
86 | vcpu->arch.mmu = &vcpu->arch.guest_mmu; | |
31e96bc6 SC |
87 | |
88 | /* | |
89 | * The NPT format depends on L1's CR4 and EFER, which is in vmcb01. Note, | |
90 | * when called via KVM_SET_NESTED_STATE, that state may _not_ match current | |
91 | * vCPU state. CR0.WP is explicitly ignored, while CR0.PG is required. | |
92 | */ | |
4995a368 CA |
93 | kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4, |
94 | svm->vmcb01.ptr->save.efer, | |
0f04a2ac | 95 | svm->nested.ctl.nested_cr3); |
883b0a91 JR |
96 | vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3; |
97 | vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; | |
98 | vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; | |
883b0a91 JR |
99 | vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; |
100 | } | |
101 | ||
102 | static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) | |
103 | { | |
104 | vcpu->arch.mmu = &vcpu->arch.root_mmu; | |
105 | vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; | |
106 | } | |
107 | ||
b9f3973a ML |
108 | static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm) |
109 | { | |
4d2a1560 | 110 | if (!guest_can_use(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD)) |
b9f3973a ML |
111 | return true; |
112 | ||
113 | if (!nested_npt_enabled(svm)) | |
114 | return true; | |
115 | ||
116 | if (!(svm->nested.ctl.virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK)) | |
117 | return true; | |
118 | ||
119 | return false; | |
120 | } | |
121 | ||
883b0a91 JR |
122 | void recalc_intercepts(struct vcpu_svm *svm) |
123 | { | |
8fc78909 EGE |
124 | struct vmcb_control_area *c, *h; |
125 | struct vmcb_ctrl_area_cached *g; | |
c45ad722 | 126 | unsigned int i; |
883b0a91 | 127 | |
06e7852c | 128 | vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); |
883b0a91 JR |
129 | |
130 | if (!is_guest_mode(&svm->vcpu)) | |
131 | return; | |
132 | ||
133 | c = &svm->vmcb->control; | |
4995a368 | 134 | h = &svm->vmcb01.ptr->control; |
e670bf68 | 135 | g = &svm->nested.ctl; |
883b0a91 | 136 | |
c45ad722 BM |
137 | for (i = 0; i < MAX_INTERCEPT; i++) |
138 | c->intercepts[i] = h->intercepts[i]; | |
139 | ||
e9fd761a | 140 | if (g->int_ctl & V_INTR_MASKING_MASK) { |
883b0a91 | 141 | /* |
7334ede4 SS |
142 | * If L2 is active and V_INTR_MASKING is enabled in vmcb12, |
143 | * disable intercept of CR8 writes as L2's CR8 does not affect | |
144 | * any interrupt KVM may want to inject. | |
145 | * | |
146 | * Similarly, disable intercept of virtual interrupts (used to | |
147 | * detect interrupt windows) if the saved RFLAGS.IF is '0', as | |
148 | * the effective RFLAGS.IF for L1 interrupts will never be set | |
149 | * while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs). | |
883b0a91 | 150 | */ |
74905e3d | 151 | vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE); |
7334ede4 SS |
152 | if (!(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)) |
153 | vmcb_clr_intercept(c, INTERCEPT_VINTR); | |
883b0a91 JR |
154 | } |
155 | ||
3f4a812e VK |
156 | /* |
157 | * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB | |
158 | * flush feature is enabled. | |
159 | */ | |
160 | if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu)) | |
161 | vmcb_clr_intercept(c, INTERCEPT_VMMCALL); | |
883b0a91 | 162 | |
c45ad722 BM |
163 | for (i = 0; i < MAX_INTERCEPT; i++) |
164 | c->intercepts[i] |= g->intercepts[i]; | |
4b639a9f ML |
165 | |
166 | /* If SMI is not intercepted, ignore guest SMI intercept as well */ | |
167 | if (!intercept_smi) | |
168 | vmcb_clr_intercept(c, INTERCEPT_SMI); | |
c7dfa400 | 169 | |
b9f3973a ML |
170 | if (nested_vmcb_needs_vls_intercept(svm)) { |
171 | /* | |
172 | * If the virtual VMLOAD/VMSAVE is not enabled for the L2, | |
173 | * we must intercept these instructions to correctly | |
174 | * emulate them in case L1 doesn't intercept them. | |
175 | */ | |
176 | vmcb_set_intercept(c, INTERCEPT_VMLOAD); | |
177 | vmcb_set_intercept(c, INTERCEPT_VMSAVE); | |
178 | } else { | |
179 | WARN_ON(!(c->virt_ext & VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK)); | |
180 | } | |
883b0a91 JR |
181 | } |
182 | ||
66c03a92 VK |
183 | /* |
184 | * Merge L0's (KVM) and L1's (Nested VMCB) MSR permission bitmaps. The function | |
185 | * is optimized in that it only merges the parts where KVM MSR permission bitmap | |
186 | * may contain zero bits. | |
187 | */ | |
883b0a91 JR |
188 | static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) |
189 | { | |
26b516bb | 190 | struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments; |
66c03a92 VK |
191 | int i; |
192 | ||
883b0a91 | 193 | /* |
66c03a92 VK |
194 | * MSR bitmap update can be skipped when: |
195 | * - MSR bitmap for L1 hasn't changed. | |
196 | * - Nested hypervisor (L1) is attempting to launch the same L2 as | |
197 | * before. | |
198 | * - Nested hypervisor (L1) is using Hyper-V emulation interface and | |
199 | * tells KVM (L0) there were no changes in MSR bitmap for L2. | |
883b0a91 | 200 | */ |
66c03a92 VK |
201 | if (!svm->nested.force_msr_bitmap_recalc && |
202 | kvm_hv_hypercall_enabled(&svm->vcpu) && | |
203 | hve->hv_enlightenments_control.msr_bitmap && | |
089fe572 | 204 | (svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS))) |
66c03a92 | 205 | goto set_msrpm_base_pa; |
883b0a91 | 206 | |
8fc78909 | 207 | if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) |
883b0a91 JR |
208 | return true; |
209 | ||
210 | for (i = 0; i < MSRPM_OFFSETS; i++) { | |
211 | u32 value, p; | |
212 | u64 offset; | |
213 | ||
214 | if (msrpm_offsets[i] == 0xffffffff) | |
215 | break; | |
216 | ||
217 | p = msrpm_offsets[i]; | |
7a8f7c1f ML |
218 | |
219 | /* x2apic msrs are intercepted always for the nested guest */ | |
220 | if (is_x2apic_msrpm_offset(p)) | |
221 | continue; | |
222 | ||
e670bf68 | 223 | offset = svm->nested.ctl.msrpm_base_pa + (p * 4); |
883b0a91 JR |
224 | |
225 | if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) | |
226 | return false; | |
227 | ||
228 | svm->nested.msrpm[p] = svm->msrpm[p] | value; | |
229 | } | |
230 | ||
73c25546 VK |
231 | svm->nested.force_msr_bitmap_recalc = false; |
232 | ||
66c03a92 | 233 | set_msrpm_base_pa: |
883b0a91 JR |
234 | svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm)); |
235 | ||
236 | return true; | |
237 | } | |
238 | ||
ee695f22 KS |
239 | /* |
240 | * Bits 11:0 of bitmap address are ignored by hardware | |
241 | */ | |
242 | static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size) | |
243 | { | |
244 | u64 addr = PAGE_ALIGN(pa); | |
245 | ||
246 | return kvm_vcpu_is_legal_gpa(vcpu, addr) && | |
247 | kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1); | |
248 | } | |
249 | ||
174a921b KS |
250 | static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl) |
251 | { | |
252 | /* Nested FLUSHBYASID is not supported yet. */ | |
253 | switch(tlb_ctl) { | |
254 | case TLB_CONTROL_DO_NOTHING: | |
255 | case TLB_CONTROL_FLUSH_ALL_ASID: | |
256 | return true; | |
257 | default: | |
258 | return false; | |
259 | } | |
260 | } | |
261 | ||
bd95926c | 262 | static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu, |
8fc78909 | 263 | struct vmcb_ctrl_area_cached *control) |
883b0a91 | 264 | { |
8fc78909 | 265 | if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN))) |
883b0a91 JR |
266 | return false; |
267 | ||
11f0cbf0 | 268 | if (CC(control->asid == 0)) |
4f233371 KS |
269 | return false; |
270 | ||
11f0cbf0 | 271 | if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled)) |
883b0a91 JR |
272 | return false; |
273 | ||
ee695f22 KS |
274 | if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa, |
275 | MSRPM_SIZE))) | |
276 | return false; | |
277 | if (CC(!nested_svm_check_bitmap_pa(vcpu, control->iopm_base_pa, | |
278 | IOPM_SIZE))) | |
279 | return false; | |
280 | ||
174a921b KS |
281 | if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl))) |
282 | return false; | |
283 | ||
0977cfac SS |
284 | if (CC((control->int_ctl & V_NMI_ENABLE_MASK) && |
285 | !vmcb12_is_intercept(control, INTERCEPT_NMI))) { | |
286 | return false; | |
287 | } | |
288 | ||
ca46d739 PB |
289 | return true; |
290 | } | |
291 | ||
6906e06d | 292 | /* Common checks that apply to both L1 and L2 state. */ |
b7a3d8b6 EGE |
293 | static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu, |
294 | struct vmcb_save_area_cached *save) | |
6906e06d | 295 | { |
11f0cbf0 | 296 | if (CC(!(save->efer & EFER_SVME))) |
883b0a91 JR |
297 | return false; |
298 | ||
11f0cbf0 SC |
299 | if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) || |
300 | CC(save->cr0 & ~0xffffffffULL)) | |
883b0a91 JR |
301 | return false; |
302 | ||
11f0cbf0 | 303 | if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7))) |
1aef8161 KS |
304 | return false; |
305 | ||
907afa48 EGE |
306 | /* |
307 | * These checks are also performed by KVM_SET_SREGS, | |
308 | * except that EFER.LMA is not checked by SVM against | |
309 | * CR0.PG && EFER.LME. | |
310 | */ | |
311 | if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) { | |
312 | if (CC(!(save->cr4 & X86_CR4_PAE)) || | |
313 | CC(!(save->cr0 & X86_CR0_PE)) || | |
314 | CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3))) | |
315 | return false; | |
316 | } | |
317 | ||
c33f6f22 SC |
318 | /* Note, SVM doesn't have any additional restrictions on CR4. */ |
319 | if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4))) | |
6906e06d | 320 | return false; |
761e4169 | 321 | |
63129754 | 322 | if (CC(!kvm_valid_efer(vcpu, save->efer))) |
6906e06d KS |
323 | return false; |
324 | ||
325 | return true; | |
326 | } | |
327 | ||
b7a3d8b6 EGE |
328 | static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu) |
329 | { | |
330 | struct vcpu_svm *svm = to_svm(vcpu); | |
331 | struct vmcb_save_area_cached *save = &svm->nested.save; | |
332 | ||
333 | return __nested_vmcb_check_save(vcpu, save); | |
334 | } | |
335 | ||
bd95926c PB |
336 | static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu) |
337 | { | |
338 | struct vcpu_svm *svm = to_svm(vcpu); | |
8fc78909 | 339 | struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl; |
bd95926c PB |
340 | |
341 | return __nested_vmcb_check_controls(vcpu, ctl); | |
342 | } | |
343 | ||
7907160d | 344 | static |
66c03a92 VK |
345 | void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu, |
346 | struct vmcb_ctrl_area_cached *to, | |
7907160d | 347 | struct vmcb_control_area *from) |
3e06f016 | 348 | { |
7907160d EGE |
349 | unsigned int i; |
350 | ||
351 | for (i = 0; i < MAX_INTERCEPT; i++) | |
352 | to->intercepts[i] = from->intercepts[i]; | |
353 | ||
354 | to->iopm_base_pa = from->iopm_base_pa; | |
355 | to->msrpm_base_pa = from->msrpm_base_pa; | |
356 | to->tsc_offset = from->tsc_offset; | |
357 | to->tlb_ctl = from->tlb_ctl; | |
358 | to->int_ctl = from->int_ctl; | |
359 | to->int_vector = from->int_vector; | |
360 | to->int_state = from->int_state; | |
361 | to->exit_code = from->exit_code; | |
362 | to->exit_code_hi = from->exit_code_hi; | |
363 | to->exit_info_1 = from->exit_info_1; | |
364 | to->exit_info_2 = from->exit_info_2; | |
365 | to->exit_int_info = from->exit_int_info; | |
366 | to->exit_int_info_err = from->exit_int_info_err; | |
367 | to->nested_ctl = from->nested_ctl; | |
368 | to->event_inj = from->event_inj; | |
369 | to->event_inj_err = from->event_inj_err; | |
00f08d99 | 370 | to->next_rip = from->next_rip; |
7907160d EGE |
371 | to->nested_cr3 = from->nested_cr3; |
372 | to->virt_ext = from->virt_ext; | |
373 | to->pause_filter_count = from->pause_filter_count; | |
374 | to->pause_filter_thresh = from->pause_filter_thresh; | |
375 | ||
376 | /* Copy asid here because nested_vmcb_check_controls will check it. */ | |
377 | to->asid = from->asid; | |
378 | to->msrpm_base_pa &= ~0x0fffULL; | |
379 | to->iopm_base_pa &= ~0x0fffULL; | |
66c03a92 VK |
380 | |
381 | /* Hyper-V extensions (Enlightened VMCB) */ | |
382 | if (kvm_hv_hypercall_enabled(vcpu)) { | |
383 | to->clean = from->clean; | |
68ae7c7b SC |
384 | memcpy(&to->hv_enlightenments, &from->hv_enlightenments, |
385 | sizeof(to->hv_enlightenments)); | |
66c03a92 | 386 | } |
7907160d | 387 | } |
3e06f016 | 388 | |
7907160d EGE |
389 | void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, |
390 | struct vmcb_control_area *control) | |
391 | { | |
66c03a92 | 392 | __nested_copy_vmcb_control_to_cache(&svm->vcpu, &svm->nested.ctl, control); |
3e06f016 PB |
393 | } |
394 | ||
f2740a8d EGE |
395 | static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to, |
396 | struct vmcb_save_area *from) | |
397 | { | |
398 | /* | |
399 | * Copy only fields that are validated, as we need them | |
400 | * to avoid TOC/TOU races. | |
401 | */ | |
402 | to->efer = from->efer; | |
403 | to->cr0 = from->cr0; | |
404 | to->cr3 = from->cr3; | |
405 | to->cr4 = from->cr4; | |
406 | ||
407 | to->dr6 = from->dr6; | |
408 | to->dr7 = from->dr7; | |
409 | } | |
410 | ||
411 | void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm, | |
412 | struct vmcb_save_area *save) | |
413 | { | |
414 | __nested_copy_vmcb_save_to_cache(&svm->nested.save, save); | |
415 | } | |
416 | ||
2d8a42be PB |
417 | /* |
418 | * Synchronize fields that are written by the processor, so that | |
9e8f0fbf | 419 | * they can be copied back into the vmcb12. |
2d8a42be | 420 | */ |
9e8f0fbf | 421 | void nested_sync_control_from_vmcb02(struct vcpu_svm *svm) |
2d8a42be PB |
422 | { |
423 | u32 mask; | |
424 | svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; | |
425 | svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; | |
426 | ||
427 | /* Only a few fields of int_ctl are written by the processor. */ | |
428 | mask = V_IRQ_MASK | V_TPR_MASK; | |
5faaffab SS |
429 | /* |
430 | * Don't sync vmcb02 V_IRQ back to vmcb12 if KVM (L0) is intercepting | |
431 | * virtual interrupts in order to request an interrupt window, as KVM | |
432 | * has usurped vmcb02's int_ctl. If an interrupt window opens before | |
433 | * the next VM-Exit, svm_clear_vintr() will restore vmcb12's int_ctl. | |
434 | * If no window opens, V_IRQ will be correctly preserved in vmcb12's | |
435 | * int_ctl (because it was never recognized while L2 was running). | |
436 | */ | |
437 | if (svm_is_intercept(svm, INTERCEPT_VINTR) && | |
438 | !test_bit(INTERCEPT_VINTR, (unsigned long *)svm->nested.ctl.intercepts)) | |
2d8a42be | 439 | mask &= ~V_IRQ_MASK; |
0b349662 ML |
440 | |
441 | if (nested_vgif_enabled(svm)) | |
442 | mask |= V_GIF_MASK; | |
443 | ||
0977cfac SS |
444 | if (nested_vnmi_enabled(svm)) |
445 | mask |= V_NMI_BLOCKING_MASK | V_NMI_PENDING_MASK; | |
446 | ||
2d8a42be PB |
447 | svm->nested.ctl.int_ctl &= ~mask; |
448 | svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask; | |
449 | } | |
450 | ||
36e2e983 PB |
451 | /* |
452 | * Transfer any event that L0 or L1 wanted to inject into L2 to | |
453 | * EXIT_INT_INFO. | |
454 | */ | |
9e8f0fbf PB |
455 | static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm, |
456 | struct vmcb *vmcb12) | |
36e2e983 PB |
457 | { |
458 | struct kvm_vcpu *vcpu = &svm->vcpu; | |
459 | u32 exit_int_info = 0; | |
460 | unsigned int nr; | |
461 | ||
462 | if (vcpu->arch.exception.injected) { | |
d4963e31 | 463 | nr = vcpu->arch.exception.vector; |
36e2e983 PB |
464 | exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; |
465 | ||
466 | if (vcpu->arch.exception.has_error_code) { | |
467 | exit_int_info |= SVM_EVTINJ_VALID_ERR; | |
0dd16b5b | 468 | vmcb12->control.exit_int_info_err = |
36e2e983 PB |
469 | vcpu->arch.exception.error_code; |
470 | } | |
471 | ||
472 | } else if (vcpu->arch.nmi_injected) { | |
473 | exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | |
474 | ||
475 | } else if (vcpu->arch.interrupt.injected) { | |
476 | nr = vcpu->arch.interrupt.nr; | |
477 | exit_int_info = nr | SVM_EVTINJ_VALID; | |
478 | ||
479 | if (vcpu->arch.interrupt.soft) | |
480 | exit_int_info |= SVM_EVTINJ_TYPE_SOFT; | |
481 | else | |
482 | exit_int_info |= SVM_EVTINJ_TYPE_INTR; | |
483 | } | |
484 | ||
0dd16b5b | 485 | vmcb12->control.exit_int_info = exit_int_info; |
36e2e983 PB |
486 | } |
487 | ||
d2e56019 SC |
488 | static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu) |
489 | { | |
3f4a812e VK |
490 | /* |
491 | * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or | |
492 | * L2's VP_ID upon request from the guest. Make sure we check for | |
493 | * pending entries in the right FIFO upon L1/L2 transition as these | |
494 | * requests are put by other vCPUs asynchronously. | |
495 | */ | |
496 | if (to_hv_vcpu(vcpu) && npt_enabled) | |
497 | kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu); | |
498 | ||
d2e56019 SC |
499 | /* |
500 | * TODO: optimize unconditional TLB flush/MMU sync. A partial list of | |
501 | * things to fix before this can be conditional: | |
502 | * | |
503 | * - Flush TLBs for both L1 and L2 remote TLB flush | |
504 | * - Honor L1's request to flush an ASID on nested VMRUN | |
505 | * - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*] | |
506 | * - Don't crush a pending TLB flush in vmcb02 on nested VMRUN | |
507 | * - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST | |
508 | * | |
509 | * [*] Unlike nested EPT, SVM's ASID management can invalidate nested | |
510 | * NPT guest-physical mappings on VMRUN. | |
511 | */ | |
512 | kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); | |
513 | kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); | |
514 | } | |
515 | ||
62156f6c | 516 | /* |
d82aaef9 VK |
517 | * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true |
518 | * if we are emulating VM-Entry into a guest with NPT enabled. | |
62156f6c VK |
519 | */ |
520 | static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, | |
b222b0b8 | 521 | bool nested_npt, bool reload_pdptrs) |
62156f6c | 522 | { |
11f0cbf0 | 523 | if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) |
a506fdd2 VK |
524 | return -EINVAL; |
525 | ||
b222b0b8 | 526 | if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) && |
2df4a5eb | 527 | CC(!load_pdptrs(vcpu, cr3))) |
a36dbec6 | 528 | return -EINVAL; |
a506fdd2 | 529 | |
a506fdd2 | 530 | vcpu->arch.cr3 = cr3; |
a506fdd2 | 531 | |
616007c8 | 532 | /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */ |
c9060662 | 533 | kvm_init_mmu(vcpu); |
a506fdd2 | 534 | |
3cffc89d PB |
535 | if (!nested_npt) |
536 | kvm_mmu_new_pgd(vcpu, cr3); | |
537 | ||
a506fdd2 | 538 | return 0; |
62156f6c VK |
539 | } |
540 | ||
4995a368 CA |
541 | void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm) |
542 | { | |
543 | if (!svm->nested.vmcb02.ptr) | |
544 | return; | |
545 | ||
546 | /* FIXME: merge g_pat from vmcb01 and vmcb12. */ | |
547 | svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat; | |
548 | } | |
549 | ||
9e8f0fbf | 550 | static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12) |
883b0a91 | 551 | { |
8173396e | 552 | bool new_vmcb12 = false; |
1d5a1b58 | 553 | struct vmcb *vmcb01 = svm->vmcb01.ptr; |
db663af4 | 554 | struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; |
e183d17a | 555 | struct kvm_vcpu *vcpu = &svm->vcpu; |
8173396e | 556 | |
4995a368 CA |
557 | nested_vmcb02_compute_g_pat(svm); |
558 | ||
883b0a91 | 559 | /* Load the nested guest state */ |
8173396e CA |
560 | if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) { |
561 | new_vmcb12 = true; | |
562 | svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa; | |
73c25546 | 563 | svm->nested.force_msr_bitmap_recalc = true; |
8173396e CA |
564 | } |
565 | ||
566 | if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) { | |
db663af4 ML |
567 | vmcb02->save.es = vmcb12->save.es; |
568 | vmcb02->save.cs = vmcb12->save.cs; | |
569 | vmcb02->save.ss = vmcb12->save.ss; | |
570 | vmcb02->save.ds = vmcb12->save.ds; | |
571 | vmcb02->save.cpl = vmcb12->save.cpl; | |
572 | vmcb_mark_dirty(vmcb02, VMCB_SEG); | |
8173396e CA |
573 | } |
574 | ||
575 | if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) { | |
db663af4 ML |
576 | vmcb02->save.gdtr = vmcb12->save.gdtr; |
577 | vmcb02->save.idtr = vmcb12->save.idtr; | |
578 | vmcb_mark_dirty(vmcb02, VMCB_DT); | |
8173396e | 579 | } |
4bb170a5 | 580 | |
e183d17a | 581 | kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); |
3c346c0c | 582 | |
e183d17a | 583 | svm_set_efer(vcpu, svm->nested.save.efer); |
3c346c0c | 584 | |
e183d17a SC |
585 | svm_set_cr0(vcpu, svm->nested.save.cr0); |
586 | svm_set_cr4(vcpu, svm->nested.save.cr4); | |
4bb170a5 PB |
587 | |
588 | svm->vcpu.arch.cr2 = vmcb12->save.cr2; | |
8173396e | 589 | |
e183d17a SC |
590 | kvm_rax_write(vcpu, vmcb12->save.rax); |
591 | kvm_rsp_write(vcpu, vmcb12->save.rsp); | |
592 | kvm_rip_write(vcpu, vmcb12->save.rip); | |
883b0a91 JR |
593 | |
594 | /* In case we don't even reach vcpu_run, the fields are not updated */ | |
db663af4 ML |
595 | vmcb02->save.rax = vmcb12->save.rax; |
596 | vmcb02->save.rsp = vmcb12->save.rsp; | |
597 | vmcb02->save.rip = vmcb12->save.rip; | |
4bb170a5 | 598 | |
8173396e CA |
599 | /* These bits will be set properly on the first execution when new_vmc12 is true */ |
600 | if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) { | |
db663af4 | 601 | vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1; |
355d0473 | 602 | svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW; |
db663af4 | 603 | vmcb_mark_dirty(vmcb02, VMCB_DR); |
8173396e | 604 | } |
1d5a1b58 | 605 | |
e183d17a SC |
606 | if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) && |
607 | (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { | |
d20c796c ML |
608 | /* |
609 | * Reserved bits of DEBUGCTL are ignored. Be consistent with | |
610 | * svm_set_msr's definition of reserved bits. | |
611 | */ | |
612 | svm_copy_lbrs(vmcb02, vmcb12); | |
613 | vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS; | |
614 | svm_update_lbrv(&svm->vcpu); | |
615 | ||
616 | } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { | |
1d5a1b58 | 617 | svm_copy_lbrs(vmcb02, vmcb01); |
d20c796c | 618 | } |
f241d711 | 619 | } |
883b0a91 | 620 | |
6ef88d6e SC |
621 | static inline bool is_evtinj_soft(u32 evtinj) |
622 | { | |
623 | u32 type = evtinj & SVM_EVTINJ_TYPE_MASK; | |
624 | u8 vector = evtinj & SVM_EVTINJ_VEC_MASK; | |
625 | ||
626 | if (!(evtinj & SVM_EVTINJ_VALID)) | |
627 | return false; | |
628 | ||
7e5b5ef8 SC |
629 | if (type == SVM_EVTINJ_TYPE_SOFT) |
630 | return true; | |
631 | ||
6ef88d6e SC |
632 | return type == SVM_EVTINJ_TYPE_EXEPT && kvm_exception_is_soft(vector); |
633 | } | |
634 | ||
159fc6fa MS |
635 | static bool is_evtinj_nmi(u32 evtinj) |
636 | { | |
637 | u32 type = evtinj & SVM_EVTINJ_TYPE_MASK; | |
638 | ||
639 | if (!(evtinj & SVM_EVTINJ_VALID)) | |
640 | return false; | |
641 | ||
642 | return type == SVM_EVTINJ_TYPE_NMI; | |
643 | } | |
644 | ||
00f08d99 | 645 | static void nested_vmcb02_prepare_control(struct vcpu_svm *svm, |
da0b93d6 MS |
646 | unsigned long vmcb12_rip, |
647 | unsigned long vmcb12_csbase) | |
f241d711 | 648 | { |
0b349662 ML |
649 | u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK; |
650 | u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; | |
0f923e07 | 651 | |
d2e56019 | 652 | struct kvm_vcpu *vcpu = &svm->vcpu; |
db663af4 ML |
653 | struct vmcb *vmcb01 = svm->vmcb01.ptr; |
654 | struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; | |
e3cdaab5 PB |
655 | u32 pause_count12; |
656 | u32 pause_thresh12; | |
62156f6c | 657 | |
7c3ecfcd PB |
658 | /* |
659 | * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, | |
660 | * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. | |
661 | */ | |
662 | ||
b89456ae SC |
663 | if (guest_can_use(vcpu, X86_FEATURE_VGIF) && |
664 | (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK)) | |
0b349662 ML |
665 | int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK); |
666 | else | |
667 | int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK); | |
668 | ||
0977cfac SS |
669 | if (vnmi) { |
670 | if (vmcb01->control.int_ctl & V_NMI_PENDING_MASK) { | |
671 | svm->vcpu.arch.nmi_pending++; | |
672 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | |
673 | } | |
674 | if (nested_vnmi_enabled(svm)) | |
675 | int_ctl_vmcb12_bits |= (V_NMI_PENDING_MASK | | |
676 | V_NMI_ENABLE_MASK | | |
677 | V_NMI_BLOCKING_MASK); | |
678 | } | |
679 | ||
7c3ecfcd | 680 | /* Copied from vmcb01. msrpm_base can be overwritten later. */ |
db663af4 ML |
681 | vmcb02->control.nested_ctl = vmcb01->control.nested_ctl; |
682 | vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa; | |
683 | vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa; | |
7c3ecfcd PB |
684 | |
685 | /* Done at vmrun: asid. */ | |
686 | ||
687 | /* Also overwritten later if necessary. */ | |
db663af4 | 688 | vmcb02->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; |
4995a368 | 689 | |
7c3ecfcd | 690 | /* nested_cr3. */ |
62156f6c | 691 | if (nested_npt_enabled(svm)) |
d2e56019 | 692 | nested_svm_init_mmu_context(vcpu); |
69cb8774 | 693 | |
5228eb96 ML |
694 | vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset( |
695 | vcpu->arch.l1_tsc_offset, | |
696 | svm->nested.ctl.tsc_offset, | |
697 | svm->tsc_ratio_msr); | |
698 | ||
db663af4 | 699 | vmcb02->control.tsc_offset = vcpu->arch.tsc_offset; |
5228eb96 | 700 | |
4365a455 | 701 | if (guest_can_use(vcpu, X86_FEATURE_TSCRATEMSR) && |
7cafe9b8 | 702 | svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio) |
5228eb96 | 703 | nested_svm_update_tsc_ratio_msr(vcpu); |
883b0a91 | 704 | |
db663af4 | 705 | vmcb02->control.int_ctl = |
0f923e07 | 706 | (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | |
db663af4 | 707 | (vmcb01->control.int_ctl & int_ctl_vmcb01_bits); |
91b7130c | 708 | |
db663af4 ML |
709 | vmcb02->control.int_vector = svm->nested.ctl.int_vector; |
710 | vmcb02->control.int_state = svm->nested.ctl.int_state; | |
711 | vmcb02->control.event_inj = svm->nested.ctl.event_inj; | |
712 | vmcb02->control.event_inj_err = svm->nested.ctl.event_inj_err; | |
883b0a91 | 713 | |
00f08d99 MS |
714 | /* |
715 | * next_rip is consumed on VMRUN as the return address pushed on the | |
716 | * stack for injected soft exceptions/interrupts. If nrips is exposed | |
717 | * to L1, take it verbatim from vmcb12. If nrips is supported in | |
718 | * hardware but not exposed to L1, stuff the actual L2 RIP to emulate | |
719 | * what a nrips=0 CPU would do (L1 is responsible for advancing RIP | |
720 | * prior to injecting the event). | |
721 | */ | |
7a6a6a3b | 722 | if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) |
00f08d99 MS |
723 | vmcb02->control.next_rip = svm->nested.ctl.next_rip; |
724 | else if (boot_cpu_has(X86_FEATURE_NRIPS)) | |
725 | vmcb02->control.next_rip = vmcb12_rip; | |
726 | ||
159fc6fa | 727 | svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj); |
6ef88d6e SC |
728 | if (is_evtinj_soft(vmcb02->control.event_inj)) { |
729 | svm->soft_int_injected = true; | |
da0b93d6 | 730 | svm->soft_int_csbase = vmcb12_csbase; |
6ef88d6e | 731 | svm->soft_int_old_rip = vmcb12_rip; |
7a6a6a3b | 732 | if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) |
6ef88d6e SC |
733 | svm->soft_int_next_rip = svm->nested.ctl.next_rip; |
734 | else | |
735 | svm->soft_int_next_rip = vmcb12_rip; | |
736 | } | |
737 | ||
1d5a1b58 ML |
738 | vmcb02->control.virt_ext = vmcb01->control.virt_ext & |
739 | LBR_CTL_ENABLE_MASK; | |
e183d17a | 740 | if (guest_can_use(vcpu, X86_FEATURE_LBRV)) |
d20c796c ML |
741 | vmcb02->control.virt_ext |= |
742 | (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK); | |
1d5a1b58 | 743 | |
b9f3973a | 744 | if (!nested_vmcb_needs_vls_intercept(svm)) |
db663af4 | 745 | vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK; |
b9f3973a | 746 | |
59d67fc1 SC |
747 | if (guest_can_use(vcpu, X86_FEATURE_PAUSEFILTER)) |
748 | pause_count12 = svm->nested.ctl.pause_filter_count; | |
749 | else | |
750 | pause_count12 = 0; | |
751 | if (guest_can_use(vcpu, X86_FEATURE_PFTHRESHOLD)) | |
752 | pause_thresh12 = svm->nested.ctl.pause_filter_thresh; | |
753 | else | |
754 | pause_thresh12 = 0; | |
74fd41ed | 755 | if (kvm_pause_in_guest(svm->vcpu.kvm)) { |
e3cdaab5 PB |
756 | /* use guest values since host doesn't intercept PAUSE */ |
757 | vmcb02->control.pause_filter_count = pause_count12; | |
758 | vmcb02->control.pause_filter_thresh = pause_thresh12; | |
74fd41ed | 759 | |
e3cdaab5 PB |
760 | } else { |
761 | /* start from host values otherwise */ | |
74fd41ed ML |
762 | vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count; |
763 | vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh; | |
e3cdaab5 PB |
764 | |
765 | /* ... but ensure filtering is disabled if so requested. */ | |
766 | if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) { | |
767 | if (!pause_count12) | |
768 | vmcb02->control.pause_filter_count = 0; | |
769 | if (!pause_thresh12) | |
770 | vmcb02->control.pause_filter_thresh = 0; | |
771 | } | |
74fd41ed ML |
772 | } |
773 | ||
d2e56019 SC |
774 | nested_svm_transition_tlb_flush(vcpu); |
775 | ||
883b0a91 | 776 | /* Enter Guest-Mode */ |
d2e56019 | 777 | enter_guest_mode(vcpu); |
883b0a91 JR |
778 | |
779 | /* | |
4bb170a5 PB |
780 | * Merge guest and host intercepts - must be called with vcpu in |
781 | * guest-mode to take effect. | |
883b0a91 JR |
782 | */ |
783 | recalc_intercepts(svm); | |
f241d711 PB |
784 | } |
785 | ||
d00b99c5 BM |
786 | static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb) |
787 | { | |
788 | /* | |
789 | * Some VMCB state is shared between L1 and L2 and thus has to be | |
790 | * moved at the time of nested vmrun and vmexit. | |
791 | * | |
792 | * VMLOAD/VMSAVE state would also belong in this category, but KVM | |
793 | * always performs VMLOAD and VMSAVE from the VMCB01. | |
794 | */ | |
795 | to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl; | |
796 | } | |
797 | ||
63129754 | 798 | int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, |
e85d3e7b | 799 | struct vmcb *vmcb12, bool from_vmrun) |
f241d711 | 800 | { |
63129754 | 801 | struct vcpu_svm *svm = to_svm(vcpu); |
a506fdd2 VK |
802 | int ret; |
803 | ||
89e54ec5 MZ |
804 | trace_kvm_nested_vmenter(svm->vmcb->save.rip, |
805 | vmcb12_gpa, | |
806 | vmcb12->save.rip, | |
807 | vmcb12->control.int_ctl, | |
808 | vmcb12->control.event_inj, | |
809 | vmcb12->control.nested_ctl, | |
02dfc44f MZ |
810 | vmcb12->control.nested_cr3, |
811 | vmcb12->save.cr3, | |
89e54ec5 | 812 | KVM_ISA_SVM); |
954f419b ML |
813 | |
814 | trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff, | |
815 | vmcb12->control.intercepts[INTERCEPT_CR] >> 16, | |
816 | vmcb12->control.intercepts[INTERCEPT_EXCEPTION], | |
817 | vmcb12->control.intercepts[INTERCEPT_WORD3], | |
818 | vmcb12->control.intercepts[INTERCEPT_WORD4], | |
819 | vmcb12->control.intercepts[INTERCEPT_WORD5]); | |
820 | ||
821 | ||
0dd16b5b | 822 | svm->nested.vmcb12_gpa = vmcb12_gpa; |
4995a368 CA |
823 | |
824 | WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr); | |
825 | ||
d00b99c5 | 826 | nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr); |
4995a368 CA |
827 | |
828 | svm_switch_vmcb(svm, &svm->nested.vmcb02); | |
da0b93d6 | 829 | nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base); |
9e8f0fbf | 830 | nested_vmcb02_prepare_save(svm, vmcb12); |
f241d711 | 831 | |
355d0473 | 832 | ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3, |
e85d3e7b | 833 | nested_npt_enabled(svm), from_vmrun); |
a506fdd2 VK |
834 | if (ret) |
835 | return ret; | |
836 | ||
e85d3e7b ML |
837 | if (!from_vmrun) |
838 | kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); | |
839 | ||
ffdf7f9e | 840 | svm_set_gif(svm, true); |
59cd9bc5 | 841 | |
f44509f8 ML |
842 | if (kvm_vcpu_apicv_active(vcpu)) |
843 | kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); | |
844 | ||
e45aa244 VK |
845 | nested_svm_hv_update_vm_vp_ids(vcpu); |
846 | ||
59cd9bc5 | 847 | return 0; |
883b0a91 JR |
848 | } |
849 | ||
63129754 | 850 | int nested_svm_vmrun(struct kvm_vcpu *vcpu) |
883b0a91 | 851 | { |
63129754 | 852 | struct vcpu_svm *svm = to_svm(vcpu); |
883b0a91 | 853 | int ret; |
0dd16b5b | 854 | struct vmcb *vmcb12; |
883b0a91 | 855 | struct kvm_host_map map; |
0dd16b5b | 856 | u64 vmcb12_gpa; |
db663af4 | 857 | struct vmcb *vmcb01 = svm->vmcb01.ptr; |
883b0a91 | 858 | |
fb79f566 VK |
859 | if (!svm->nested.hsave_msr) { |
860 | kvm_inject_gp(vcpu, 0); | |
861 | return 1; | |
862 | } | |
863 | ||
63129754 PB |
864 | if (is_smm(vcpu)) { |
865 | kvm_queue_exception(vcpu, UD_VECTOR); | |
7c67f546 PB |
866 | return 1; |
867 | } | |
883b0a91 | 868 | |
3f4a812e VK |
869 | /* This fails when VP assist page is enabled but the supplied GPA is bogus */ |
870 | ret = kvm_hv_verify_vp_assist(vcpu); | |
871 | if (ret) { | |
872 | kvm_inject_gp(vcpu, 0); | |
873 | return ret; | |
874 | } | |
875 | ||
0dd16b5b | 876 | vmcb12_gpa = svm->vmcb->save.rax; |
63129754 | 877 | ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map); |
883b0a91 | 878 | if (ret == -EINVAL) { |
63129754 | 879 | kvm_inject_gp(vcpu, 0); |
883b0a91 JR |
880 | return 1; |
881 | } else if (ret) { | |
63129754 | 882 | return kvm_skip_emulated_instruction(vcpu); |
883b0a91 JR |
883 | } |
884 | ||
63129754 | 885 | ret = kvm_skip_emulated_instruction(vcpu); |
883b0a91 | 886 | |
0dd16b5b | 887 | vmcb12 = map.hva; |
883b0a91 | 888 | |
2fcf4876 ML |
889 | if (WARN_ON_ONCE(!svm->nested.initialized)) |
890 | return -EINVAL; | |
891 | ||
7907160d | 892 | nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); |
f2740a8d | 893 | nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); |
a58d9166 | 894 | |
b7a3d8b6 | 895 | if (!nested_vmcb_check_save(vcpu) || |
bd95926c | 896 | !nested_vmcb_check_controls(vcpu)) { |
0dd16b5b ML |
897 | vmcb12->control.exit_code = SVM_EXIT_ERR; |
898 | vmcb12->control.exit_code_hi = 0; | |
899 | vmcb12->control.exit_info_1 = 0; | |
900 | vmcb12->control.exit_info_2 = 0; | |
69c9dfa2 | 901 | goto out; |
883b0a91 JR |
902 | } |
903 | ||
883b0a91 | 904 | /* |
4995a368 CA |
905 | * Since vmcb01 is not in use, we can use it to store some of the L1 |
906 | * state. | |
883b0a91 | 907 | */ |
db663af4 ML |
908 | vmcb01->save.efer = vcpu->arch.efer; |
909 | vmcb01->save.cr0 = kvm_read_cr0(vcpu); | |
910 | vmcb01->save.cr4 = vcpu->arch.cr4; | |
911 | vmcb01->save.rflags = kvm_get_rflags(vcpu); | |
912 | vmcb01->save.rip = kvm_rip_read(vcpu); | |
4995a368 CA |
913 | |
914 | if (!npt_enabled) | |
db663af4 | 915 | vmcb01->save.cr3 = kvm_read_cr3(vcpu); |
883b0a91 | 916 | |
f74f9414 | 917 | svm->nested.nested_run_pending = 1; |
883b0a91 | 918 | |
e85d3e7b | 919 | if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true)) |
59cd9bc5 | 920 | goto out_exit_err; |
ebdb3dba | 921 | |
59cd9bc5 VK |
922 | if (nested_svm_vmrun_msrpm(svm)) |
923 | goto out; | |
883b0a91 | 924 | |
59cd9bc5 VK |
925 | out_exit_err: |
926 | svm->nested.nested_run_pending = 0; | |
159fc6fa | 927 | svm->nmi_l1_to_l2 = false; |
6ef88d6e | 928 | svm->soft_int_injected = false; |
59cd9bc5 VK |
929 | |
930 | svm->vmcb->control.exit_code = SVM_EXIT_ERR; | |
931 | svm->vmcb->control.exit_code_hi = 0; | |
932 | svm->vmcb->control.exit_info_1 = 0; | |
933 | svm->vmcb->control.exit_info_2 = 0; | |
934 | ||
935 | nested_svm_vmexit(svm); | |
883b0a91 | 936 | |
69c9dfa2 | 937 | out: |
63129754 | 938 | kvm_vcpu_unmap(vcpu, &map, true); |
69c9dfa2 | 939 | |
883b0a91 JR |
940 | return ret; |
941 | } | |
942 | ||
0a758290 | 943 | /* Copy state save area fields which are handled by VMRUN */ |
2bb16bea VK |
944 | void svm_copy_vmrun_state(struct vmcb_save_area *to_save, |
945 | struct vmcb_save_area *from_save) | |
0a758290 VK |
946 | { |
947 | to_save->es = from_save->es; | |
948 | to_save->cs = from_save->cs; | |
949 | to_save->ss = from_save->ss; | |
950 | to_save->ds = from_save->ds; | |
951 | to_save->gdtr = from_save->gdtr; | |
952 | to_save->idtr = from_save->idtr; | |
953 | to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED; | |
954 | to_save->efer = from_save->efer; | |
955 | to_save->cr0 = from_save->cr0; | |
956 | to_save->cr3 = from_save->cr3; | |
957 | to_save->cr4 = from_save->cr4; | |
958 | to_save->rax = from_save->rax; | |
959 | to_save->rsp = from_save->rsp; | |
960 | to_save->rip = from_save->rip; | |
961 | to_save->cpl = 0; | |
962 | } | |
963 | ||
2bb16bea | 964 | void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) |
883b0a91 JR |
965 | { |
966 | to_vmcb->save.fs = from_vmcb->save.fs; | |
967 | to_vmcb->save.gs = from_vmcb->save.gs; | |
968 | to_vmcb->save.tr = from_vmcb->save.tr; | |
969 | to_vmcb->save.ldtr = from_vmcb->save.ldtr; | |
970 | to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; | |
971 | to_vmcb->save.star = from_vmcb->save.star; | |
972 | to_vmcb->save.lstar = from_vmcb->save.lstar; | |
973 | to_vmcb->save.cstar = from_vmcb->save.cstar; | |
974 | to_vmcb->save.sfmask = from_vmcb->save.sfmask; | |
975 | to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; | |
976 | to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; | |
977 | to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; | |
978 | } | |
979 | ||
980 | int nested_svm_vmexit(struct vcpu_svm *svm) | |
981 | { | |
63129754 | 982 | struct kvm_vcpu *vcpu = &svm->vcpu; |
db663af4 ML |
983 | struct vmcb *vmcb01 = svm->vmcb01.ptr; |
984 | struct vmcb *vmcb02 = svm->nested.vmcb02.ptr; | |
0dd16b5b | 985 | struct vmcb *vmcb12; |
883b0a91 | 986 | struct kvm_host_map map; |
63129754 | 987 | int rc; |
883b0a91 | 988 | |
63129754 | 989 | rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map); |
883b0a91 JR |
990 | if (rc) { |
991 | if (rc == -EINVAL) | |
63129754 | 992 | kvm_inject_gp(vcpu, 0); |
883b0a91 JR |
993 | return 1; |
994 | } | |
995 | ||
0dd16b5b | 996 | vmcb12 = map.hva; |
883b0a91 JR |
997 | |
998 | /* Exit Guest-Mode */ | |
63129754 | 999 | leave_guest_mode(vcpu); |
0dd16b5b | 1000 | svm->nested.vmcb12_gpa = 0; |
2d8a42be | 1001 | WARN_ON_ONCE(svm->nested.nested_run_pending); |
883b0a91 | 1002 | |
63129754 | 1003 | kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); |
f2c7ef3b | 1004 | |
38c0b192 PB |
1005 | /* in case we halted in L2 */ |
1006 | svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; | |
1007 | ||
883b0a91 | 1008 | /* Give the current vmcb to the guest */ |
883b0a91 | 1009 | |
db663af4 ML |
1010 | vmcb12->save.es = vmcb02->save.es; |
1011 | vmcb12->save.cs = vmcb02->save.cs; | |
1012 | vmcb12->save.ss = vmcb02->save.ss; | |
1013 | vmcb12->save.ds = vmcb02->save.ds; | |
1014 | vmcb12->save.gdtr = vmcb02->save.gdtr; | |
1015 | vmcb12->save.idtr = vmcb02->save.idtr; | |
0dd16b5b | 1016 | vmcb12->save.efer = svm->vcpu.arch.efer; |
63129754 PB |
1017 | vmcb12->save.cr0 = kvm_read_cr0(vcpu); |
1018 | vmcb12->save.cr3 = kvm_read_cr3(vcpu); | |
db663af4 | 1019 | vmcb12->save.cr2 = vmcb02->save.cr2; |
0dd16b5b | 1020 | vmcb12->save.cr4 = svm->vcpu.arch.cr4; |
63129754 PB |
1021 | vmcb12->save.rflags = kvm_get_rflags(vcpu); |
1022 | vmcb12->save.rip = kvm_rip_read(vcpu); | |
1023 | vmcb12->save.rsp = kvm_rsp_read(vcpu); | |
1024 | vmcb12->save.rax = kvm_rax_read(vcpu); | |
db663af4 | 1025 | vmcb12->save.dr7 = vmcb02->save.dr7; |
0dd16b5b | 1026 | vmcb12->save.dr6 = svm->vcpu.arch.dr6; |
db663af4 | 1027 | vmcb12->save.cpl = vmcb02->save.cpl; |
0dd16b5b | 1028 | |
db663af4 ML |
1029 | vmcb12->control.int_state = vmcb02->control.int_state; |
1030 | vmcb12->control.exit_code = vmcb02->control.exit_code; | |
1031 | vmcb12->control.exit_code_hi = vmcb02->control.exit_code_hi; | |
1032 | vmcb12->control.exit_info_1 = vmcb02->control.exit_info_1; | |
1033 | vmcb12->control.exit_info_2 = vmcb02->control.exit_info_2; | |
0dd16b5b ML |
1034 | |
1035 | if (vmcb12->control.exit_code != SVM_EXIT_ERR) | |
9e8f0fbf | 1036 | nested_save_pending_event_to_vmcb12(svm, vmcb12); |
883b0a91 | 1037 | |
7a6a6a3b | 1038 | if (guest_can_use(vcpu, X86_FEATURE_NRIPS)) |
db663af4 | 1039 | vmcb12->control.next_rip = vmcb02->control.next_rip; |
883b0a91 | 1040 | |
0dd16b5b | 1041 | vmcb12->control.int_ctl = svm->nested.ctl.int_ctl; |
0dd16b5b ML |
1042 | vmcb12->control.event_inj = svm->nested.ctl.event_inj; |
1043 | vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; | |
883b0a91 | 1044 | |
e3cdaab5 | 1045 | if (!kvm_pause_in_guest(vcpu->kvm)) { |
74fd41ed | 1046 | vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count; |
e3cdaab5 PB |
1047 | vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); |
1048 | ||
1049 | } | |
74fd41ed | 1050 | |
d00b99c5 BM |
1051 | nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); |
1052 | ||
4995a368 | 1053 | svm_switch_vmcb(svm, &svm->vmcb01); |
883b0a91 | 1054 | |
5d1ec456 ML |
1055 | /* |
1056 | * Rules for synchronizing int_ctl bits from vmcb02 to vmcb01: | |
1057 | * | |
1058 | * V_IRQ, V_IRQ_VECTOR, V_INTR_PRIO_MASK, V_IGN_TPR: If L1 doesn't | |
1059 | * intercept interrupts, then KVM will use vmcb02's V_IRQ (and related | |
1060 | * flags) to detect interrupt windows for L1 IRQs (even if L1 uses | |
1061 | * virtual interrupt masking). Raise KVM_REQ_EVENT to ensure that | |
1062 | * KVM re-requests an interrupt window if necessary, which implicitly | |
1063 | * copies this bits from vmcb02 to vmcb01. | |
1064 | * | |
1065 | * V_TPR: If L1 doesn't use virtual interrupt masking, then L1's vTPR | |
1066 | * is stored in vmcb02, but its value doesn't need to be copied from/to | |
1067 | * vmcb01 because it is copied from/to the virtual APIC's TPR register | |
1068 | * on each VM entry/exit. | |
1069 | * | |
1070 | * V_GIF: If nested vGIF is not used, KVM uses vmcb02's V_GIF for L1's | |
1071 | * V_GIF. However, GIF is architecturally clear on each VM exit, thus | |
1072 | * there is no need to copy V_GIF from vmcb02 to vmcb01. | |
1073 | */ | |
1074 | if (!nested_exit_on_intr(svm)) | |
1075 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | |
1076 | ||
e183d17a SC |
1077 | if (unlikely(guest_can_use(vcpu, X86_FEATURE_LBRV) && |
1078 | (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) { | |
d20c796c ML |
1079 | svm_copy_lbrs(vmcb12, vmcb02); |
1080 | svm_update_lbrv(vcpu); | |
1081 | } else if (unlikely(vmcb01->control.virt_ext & LBR_CTL_ENABLE_MASK)) { | |
1d5a1b58 ML |
1082 | svm_copy_lbrs(vmcb01, vmcb02); |
1083 | svm_update_lbrv(vcpu); | |
1084 | } | |
1085 | ||
0977cfac SS |
1086 | if (vnmi) { |
1087 | if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK) | |
1088 | vmcb01->control.int_ctl |= V_NMI_BLOCKING_MASK; | |
1089 | else | |
1090 | vmcb01->control.int_ctl &= ~V_NMI_BLOCKING_MASK; | |
1091 | ||
1092 | if (vcpu->arch.nmi_pending) { | |
1093 | vcpu->arch.nmi_pending--; | |
1094 | vmcb01->control.int_ctl |= V_NMI_PENDING_MASK; | |
1095 | } else { | |
1096 | vmcb01->control.int_ctl &= ~V_NMI_PENDING_MASK; | |
1097 | } | |
1098 | } | |
1099 | ||
4995a368 CA |
1100 | /* |
1101 | * On vmexit the GIF is set to false and | |
1102 | * no event can be injected in L1. | |
1103 | */ | |
9883764a | 1104 | svm_set_gif(svm, false); |
db663af4 | 1105 | vmcb01->control.exit_int_info = 0; |
9883764a | 1106 | |
7ca62d13 | 1107 | svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset; |
db663af4 ML |
1108 | if (vmcb01->control.tsc_offset != svm->vcpu.arch.tsc_offset) { |
1109 | vmcb01->control.tsc_offset = svm->vcpu.arch.tsc_offset; | |
1110 | vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS); | |
7ca62d13 | 1111 | } |
18fc6c55 | 1112 | |
0c94e246 SC |
1113 | if (kvm_caps.has_tsc_control && |
1114 | vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) { | |
5228eb96 | 1115 | vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; |
2d636990 | 1116 | svm_write_tsc_multiplier(vcpu); |
5228eb96 ML |
1117 | } |
1118 | ||
e670bf68 | 1119 | svm->nested.ctl.nested_cr3 = 0; |
883b0a91 | 1120 | |
4995a368 CA |
1121 | /* |
1122 | * Restore processor state that had been saved in vmcb01 | |
1123 | */ | |
db663af4 ML |
1124 | kvm_set_rflags(vcpu, vmcb01->save.rflags); |
1125 | svm_set_efer(vcpu, vmcb01->save.efer); | |
1126 | svm_set_cr0(vcpu, vmcb01->save.cr0 | X86_CR0_PE); | |
1127 | svm_set_cr4(vcpu, vmcb01->save.cr4); | |
1128 | kvm_rax_write(vcpu, vmcb01->save.rax); | |
1129 | kvm_rsp_write(vcpu, vmcb01->save.rsp); | |
1130 | kvm_rip_write(vcpu, vmcb01->save.rip); | |
4995a368 CA |
1131 | |
1132 | svm->vcpu.arch.dr7 = DR7_FIXED_1; | |
1133 | kvm_update_dr7(&svm->vcpu); | |
883b0a91 | 1134 | |
0dd16b5b ML |
1135 | trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code, |
1136 | vmcb12->control.exit_info_1, | |
1137 | vmcb12->control.exit_info_2, | |
1138 | vmcb12->control.exit_int_info, | |
1139 | vmcb12->control.exit_int_info_err, | |
36e2e983 PB |
1140 | KVM_ISA_SVM); |
1141 | ||
63129754 | 1142 | kvm_vcpu_unmap(vcpu, &map, true); |
883b0a91 | 1143 | |
d2e56019 SC |
1144 | nested_svm_transition_tlb_flush(vcpu); |
1145 | ||
63129754 | 1146 | nested_svm_uninit_mmu_context(vcpu); |
bf7dea42 | 1147 | |
db663af4 | 1148 | rc = nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true); |
d82aaef9 VK |
1149 | if (rc) |
1150 | return 1; | |
bf7dea42 | 1151 | |
883b0a91 JR |
1152 | /* |
1153 | * Drop what we picked up for L2 via svm_complete_interrupts() so it | |
1154 | * doesn't end up in L1. | |
1155 | */ | |
1156 | svm->vcpu.arch.nmi_injected = false; | |
63129754 PB |
1157 | kvm_clear_exception_queue(vcpu); |
1158 | kvm_clear_interrupt_queue(vcpu); | |
9a7de6ec KS |
1159 | |
1160 | /* | |
1161 | * If we are here following the completion of a VMRUN that | |
1162 | * is being single-stepped, queue the pending #DB intercept | |
1163 | * right now so that it an be accounted for before we execute | |
1164 | * L1's next instruction. | |
1165 | */ | |
db663af4 | 1166 | if (unlikely(vmcb01->save.rflags & X86_EFLAGS_TF)) |
9a7de6ec | 1167 | kvm_queue_exception(&(svm->vcpu), DB_VECTOR); |
883b0a91 | 1168 | |
f44509f8 ML |
1169 | /* |
1170 | * Un-inhibit the AVIC right away, so that other vCPUs can start | |
1171 | * to benefit from it right away. | |
1172 | */ | |
1173 | if (kvm_apicv_activated(vcpu->kvm)) | |
2008fab3 | 1174 | __kvm_vcpu_update_apicv(vcpu); |
f44509f8 | 1175 | |
883b0a91 JR |
1176 | return 0; |
1177 | } | |
1178 | ||
cb6a32c2 SC |
1179 | static void nested_svm_triple_fault(struct kvm_vcpu *vcpu) |
1180 | { | |
92e7d5c8 ML |
1181 | struct vcpu_svm *svm = to_svm(vcpu); |
1182 | ||
1183 | if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN)) | |
1184 | return; | |
1185 | ||
1186 | kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu); | |
3a87c7e0 | 1187 | nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN); |
cb6a32c2 SC |
1188 | } |
1189 | ||
2fcf4876 ML |
1190 | int svm_allocate_nested(struct vcpu_svm *svm) |
1191 | { | |
4995a368 | 1192 | struct page *vmcb02_page; |
2fcf4876 ML |
1193 | |
1194 | if (svm->nested.initialized) | |
1195 | return 0; | |
1196 | ||
4995a368 CA |
1197 | vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
1198 | if (!vmcb02_page) | |
2fcf4876 | 1199 | return -ENOMEM; |
4995a368 CA |
1200 | svm->nested.vmcb02.ptr = page_address(vmcb02_page); |
1201 | svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT); | |
2fcf4876 ML |
1202 | |
1203 | svm->nested.msrpm = svm_vcpu_alloc_msrpm(); | |
1204 | if (!svm->nested.msrpm) | |
4995a368 | 1205 | goto err_free_vmcb02; |
2fcf4876 ML |
1206 | svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm); |
1207 | ||
1208 | svm->nested.initialized = true; | |
1209 | return 0; | |
1210 | ||
4995a368 CA |
1211 | err_free_vmcb02: |
1212 | __free_page(vmcb02_page); | |
2fcf4876 ML |
1213 | return -ENOMEM; |
1214 | } | |
1215 | ||
1216 | void svm_free_nested(struct vcpu_svm *svm) | |
1217 | { | |
1218 | if (!svm->nested.initialized) | |
1219 | return; | |
1220 | ||
16ae56d7 ML |
1221 | if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr)) |
1222 | svm_switch_vmcb(svm, &svm->vmcb01); | |
1223 | ||
2fcf4876 ML |
1224 | svm_vcpu_free_msrpm(svm->nested.msrpm); |
1225 | svm->nested.msrpm = NULL; | |
1226 | ||
4995a368 CA |
1227 | __free_page(virt_to_page(svm->nested.vmcb02.ptr)); |
1228 | svm->nested.vmcb02.ptr = NULL; | |
2fcf4876 | 1229 | |
c74ad08f ML |
1230 | /* |
1231 | * When last_vmcb12_gpa matches the current vmcb12 gpa, | |
1232 | * some vmcb12 fields are not loaded if they are marked clean | |
1233 | * in the vmcb12, since in this case they are up to date already. | |
1234 | * | |
1235 | * When the vmcb02 is freed, this optimization becomes invalid. | |
1236 | */ | |
1237 | svm->nested.last_vmcb12_gpa = INVALID_GPA; | |
1238 | ||
2fcf4876 ML |
1239 | svm->nested.initialized = false; |
1240 | } | |
1241 | ||
f7e57078 | 1242 | void svm_leave_nested(struct kvm_vcpu *vcpu) |
c513f484 | 1243 | { |
f7e57078 | 1244 | struct vcpu_svm *svm = to_svm(vcpu); |
63129754 PB |
1245 | |
1246 | if (is_guest_mode(vcpu)) { | |
c513f484 | 1247 | svm->nested.nested_run_pending = 0; |
c74ad08f ML |
1248 | svm->nested.vmcb12_gpa = INVALID_GPA; |
1249 | ||
63129754 | 1250 | leave_guest_mode(vcpu); |
4995a368 | 1251 | |
deee59ba | 1252 | svm_switch_vmcb(svm, &svm->vmcb01); |
4995a368 | 1253 | |
63129754 | 1254 | nested_svm_uninit_mmu_context(vcpu); |
56fe28de | 1255 | vmcb_mark_all_dirty(svm->vmcb); |
3fdc6087 ML |
1256 | |
1257 | if (kvm_apicv_activated(vcpu->kvm)) | |
1258 | kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); | |
c513f484 | 1259 | } |
a7d5c7ce | 1260 | |
63129754 | 1261 | kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); |
c513f484 PB |
1262 | } |
1263 | ||
883b0a91 JR |
1264 | static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) |
1265 | { | |
1266 | u32 offset, msr, value; | |
1267 | int write, mask; | |
1268 | ||
8fc78909 | 1269 | if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) |
883b0a91 JR |
1270 | return NESTED_EXIT_HOST; |
1271 | ||
1272 | msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | |
1273 | offset = svm_msrpm_offset(msr); | |
1274 | write = svm->vmcb->control.exit_info_1 & 1; | |
1275 | mask = 1 << ((2 * (msr & 0xf)) + write); | |
1276 | ||
1277 | if (offset == MSR_INVALID) | |
1278 | return NESTED_EXIT_DONE; | |
1279 | ||
1280 | /* Offset is in 32 bit units but need in 8 bit units */ | |
1281 | offset *= 4; | |
1282 | ||
e670bf68 | 1283 | if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4)) |
883b0a91 JR |
1284 | return NESTED_EXIT_DONE; |
1285 | ||
1286 | return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | |
1287 | } | |
1288 | ||
883b0a91 JR |
1289 | static int nested_svm_intercept_ioio(struct vcpu_svm *svm) |
1290 | { | |
1291 | unsigned port, size, iopm_len; | |
1292 | u16 val, mask; | |
1293 | u8 start_bit; | |
1294 | u64 gpa; | |
1295 | ||
8fc78909 | 1296 | if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT))) |
883b0a91 JR |
1297 | return NESTED_EXIT_HOST; |
1298 | ||
1299 | port = svm->vmcb->control.exit_info_1 >> 16; | |
1300 | size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> | |
1301 | SVM_IOIO_SIZE_SHIFT; | |
e670bf68 | 1302 | gpa = svm->nested.ctl.iopm_base_pa + (port / 8); |
883b0a91 JR |
1303 | start_bit = port % 8; |
1304 | iopm_len = (start_bit + size > 8) ? 2 : 1; | |
1305 | mask = (0xf >> (4 - size)) << start_bit; | |
1306 | val = 0; | |
1307 | ||
1308 | if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len)) | |
1309 | return NESTED_EXIT_DONE; | |
1310 | ||
1311 | return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; | |
1312 | } | |
1313 | ||
1314 | static int nested_svm_intercept(struct vcpu_svm *svm) | |
1315 | { | |
1316 | u32 exit_code = svm->vmcb->control.exit_code; | |
1317 | int vmexit = NESTED_EXIT_HOST; | |
1318 | ||
1319 | switch (exit_code) { | |
1320 | case SVM_EXIT_MSR: | |
1321 | vmexit = nested_svm_exit_handled_msr(svm); | |
1322 | break; | |
1323 | case SVM_EXIT_IOIO: | |
1324 | vmexit = nested_svm_intercept_ioio(svm); | |
1325 | break; | |
1326 | case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { | |
8fc78909 | 1327 | if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) |
883b0a91 JR |
1328 | vmexit = NESTED_EXIT_DONE; |
1329 | break; | |
1330 | } | |
1331 | case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { | |
8fc78909 | 1332 | if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) |
883b0a91 JR |
1333 | vmexit = NESTED_EXIT_DONE; |
1334 | break; | |
1335 | } | |
1336 | case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { | |
7c86663b PB |
1337 | /* |
1338 | * Host-intercepted exceptions have been checked already in | |
1339 | * nested_svm_exit_special. There is nothing to do here, | |
1340 | * the vmexit is injected by svm_check_nested_events. | |
1341 | */ | |
1342 | vmexit = NESTED_EXIT_DONE; | |
883b0a91 JR |
1343 | break; |
1344 | } | |
1345 | case SVM_EXIT_ERR: { | |
1346 | vmexit = NESTED_EXIT_DONE; | |
1347 | break; | |
1348 | } | |
1349 | default: { | |
8fc78909 | 1350 | if (vmcb12_is_intercept(&svm->nested.ctl, exit_code)) |
883b0a91 JR |
1351 | vmexit = NESTED_EXIT_DONE; |
1352 | } | |
1353 | } | |
1354 | ||
1355 | return vmexit; | |
1356 | } | |
1357 | ||
1358 | int nested_svm_exit_handled(struct vcpu_svm *svm) | |
1359 | { | |
1360 | int vmexit; | |
1361 | ||
1362 | vmexit = nested_svm_intercept(svm); | |
1363 | ||
1364 | if (vmexit == NESTED_EXIT_DONE) | |
1365 | nested_svm_vmexit(svm); | |
1366 | ||
1367 | return vmexit; | |
1368 | } | |
1369 | ||
63129754 | 1370 | int nested_svm_check_permissions(struct kvm_vcpu *vcpu) |
883b0a91 | 1371 | { |
63129754 PB |
1372 | if (!(vcpu->arch.efer & EFER_SVME) || !is_paging(vcpu)) { |
1373 | kvm_queue_exception(vcpu, UD_VECTOR); | |
883b0a91 JR |
1374 | return 1; |
1375 | } | |
1376 | ||
63129754 PB |
1377 | if (to_svm(vcpu)->vmcb->save.cpl) { |
1378 | kvm_inject_gp(vcpu, 0); | |
883b0a91 JR |
1379 | return 1; |
1380 | } | |
1381 | ||
1382 | return 0; | |
1383 | } | |
1384 | ||
7709aba8 SC |
1385 | static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector, |
1386 | u32 error_code) | |
883b0a91 | 1387 | { |
7709aba8 | 1388 | struct vcpu_svm *svm = to_svm(vcpu); |
883b0a91 | 1389 | |
d4963e31 | 1390 | return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector)); |
7c86663b | 1391 | } |
883b0a91 | 1392 | |
d4963e31 | 1393 | static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu) |
7c86663b | 1394 | { |
7709aba8 | 1395 | struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit; |
d4963e31 | 1396 | struct vcpu_svm *svm = to_svm(vcpu); |
db663af4 | 1397 | struct vmcb *vmcb = svm->vmcb; |
883b0a91 | 1398 | |
d4963e31 | 1399 | vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector; |
db663af4 | 1400 | vmcb->control.exit_code_hi = 0; |
7c86663b | 1401 | |
d4963e31 SC |
1402 | if (ex->has_error_code) |
1403 | vmcb->control.exit_info_1 = ex->error_code; | |
883b0a91 JR |
1404 | |
1405 | /* | |
1406 | * EXITINFO2 is undefined for all exception intercepts other | |
1407 | * than #PF. | |
1408 | */ | |
d4963e31 | 1409 | if (ex->vector == PF_VECTOR) { |
7709aba8 | 1410 | if (ex->has_payload) |
d4963e31 | 1411 | vmcb->control.exit_info_2 = ex->payload; |
7c86663b | 1412 | else |
d4963e31 SC |
1413 | vmcb->control.exit_info_2 = vcpu->arch.cr2; |
1414 | } else if (ex->vector == DB_VECTOR) { | |
e746c1f1 | 1415 | /* See kvm_check_and_inject_events(). */ |
d4963e31 SC |
1416 | kvm_deliver_exception_payload(vcpu, ex); |
1417 | ||
1418 | if (vcpu->arch.dr7 & DR7_GD) { | |
1419 | vcpu->arch.dr7 &= ~DR7_GD; | |
1420 | kvm_update_dr7(vcpu); | |
7c86663b | 1421 | } |
d4963e31 SC |
1422 | } else { |
1423 | WARN_ON(ex->has_payload); | |
1424 | } | |
883b0a91 | 1425 | |
7c86663b | 1426 | nested_svm_vmexit(svm); |
883b0a91 JR |
1427 | } |
1428 | ||
5b672408 PB |
1429 | static inline bool nested_exit_on_init(struct vcpu_svm *svm) |
1430 | { | |
8fc78909 | 1431 | return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT); |
5b672408 PB |
1432 | } |
1433 | ||
33b22172 | 1434 | static int svm_check_nested_events(struct kvm_vcpu *vcpu) |
883b0a91 | 1435 | { |
5b672408 | 1436 | struct kvm_lapic *apic = vcpu->arch.apic; |
72c14e00 SC |
1437 | struct vcpu_svm *svm = to_svm(vcpu); |
1438 | /* | |
1439 | * Only a pending nested run blocks a pending exception. If there is a | |
1440 | * previously injected event, the pending exception occurred while said | |
1441 | * event was being delivered and thus needs to be handled. | |
1442 | */ | |
1443 | bool block_nested_exceptions = svm->nested.nested_run_pending; | |
1444 | /* | |
1445 | * New events (not exceptions) are only recognized at instruction | |
1446 | * boundaries. If an event needs reinjection, then KVM is handling a | |
1447 | * VM-Exit that occurred _during_ instruction execution; new events are | |
1448 | * blocked until the instruction completes. | |
1449 | */ | |
1450 | bool block_nested_events = block_nested_exceptions || | |
1451 | kvm_event_needs_reinjection(vcpu); | |
5b672408 PB |
1452 | |
1453 | if (lapic_in_kernel(vcpu) && | |
1454 | test_bit(KVM_APIC_INIT, &apic->pending_events)) { | |
1455 | if (block_nested_events) | |
1456 | return -EBUSY; | |
1457 | if (!nested_exit_on_init(svm)) | |
1458 | return 0; | |
3a87c7e0 | 1459 | nested_svm_simple_vmexit(svm, SVM_EXIT_INIT); |
5b672408 PB |
1460 | return 0; |
1461 | } | |
883b0a91 | 1462 | |
7709aba8 | 1463 | if (vcpu->arch.exception_vmexit.pending) { |
72c14e00 | 1464 | if (block_nested_exceptions) |
7c86663b | 1465 | return -EBUSY; |
d4963e31 | 1466 | nested_svm_inject_exception_vmexit(vcpu); |
7c86663b PB |
1467 | return 0; |
1468 | } | |
1469 | ||
7709aba8 SC |
1470 | if (vcpu->arch.exception.pending) { |
1471 | if (block_nested_exceptions) | |
1472 | return -EBUSY; | |
1473 | return 0; | |
1474 | } | |
1475 | ||
31e83e21 | 1476 | #ifdef CONFIG_KVM_SMM |
221e7610 | 1477 | if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { |
55714cdd PB |
1478 | if (block_nested_events) |
1479 | return -EBUSY; | |
221e7610 PB |
1480 | if (!nested_exit_on_smi(svm)) |
1481 | return 0; | |
3a87c7e0 | 1482 | nested_svm_simple_vmexit(svm, SVM_EXIT_SMI); |
55714cdd PB |
1483 | return 0; |
1484 | } | |
31e83e21 | 1485 | #endif |
55714cdd | 1486 | |
221e7610 | 1487 | if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) { |
9c3d370a CA |
1488 | if (block_nested_events) |
1489 | return -EBUSY; | |
221e7610 PB |
1490 | if (!nested_exit_on_nmi(svm)) |
1491 | return 0; | |
3a87c7e0 | 1492 | nested_svm_simple_vmexit(svm, SVM_EXIT_NMI); |
9c3d370a CA |
1493 | return 0; |
1494 | } | |
1495 | ||
221e7610 | 1496 | if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) { |
883b0a91 JR |
1497 | if (block_nested_events) |
1498 | return -EBUSY; | |
221e7610 PB |
1499 | if (!nested_exit_on_intr(svm)) |
1500 | return 0; | |
3a87c7e0 SC |
1501 | trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); |
1502 | nested_svm_simple_vmexit(svm, SVM_EXIT_INTR); | |
883b0a91 JR |
1503 | return 0; |
1504 | } | |
1505 | ||
1506 | return 0; | |
1507 | } | |
1508 | ||
1509 | int nested_svm_exit_special(struct vcpu_svm *svm) | |
1510 | { | |
1511 | u32 exit_code = svm->vmcb->control.exit_code; | |
3f4a812e | 1512 | struct kvm_vcpu *vcpu = &svm->vcpu; |
883b0a91 JR |
1513 | |
1514 | switch (exit_code) { | |
1515 | case SVM_EXIT_INTR: | |
1516 | case SVM_EXIT_NMI: | |
883b0a91 | 1517 | case SVM_EXIT_NPF: |
7c86663b PB |
1518 | return NESTED_EXIT_HOST; |
1519 | case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { | |
1520 | u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); | |
1521 | ||
4995a368 CA |
1522 | if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] & |
1523 | excp_bits) | |
883b0a91 | 1524 | return NESTED_EXIT_HOST; |
7c86663b | 1525 | else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR && |
68fd66f1 | 1526 | svm->vcpu.arch.apf.host_apf_flags) |
7c86663b | 1527 | /* Trap async PF even if not shadowing */ |
883b0a91 JR |
1528 | return NESTED_EXIT_HOST; |
1529 | break; | |
7c86663b | 1530 | } |
3f4a812e VK |
1531 | case SVM_EXIT_VMMCALL: |
1532 | /* Hyper-V L2 TLB flush hypercall is handled by L0 */ | |
1533 | if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) && | |
1534 | nested_svm_l2_tlb_flush_enabled(vcpu) && | |
1535 | kvm_hv_is_tlb_flush_hcall(vcpu)) | |
1536 | return NESTED_EXIT_HOST; | |
1537 | break; | |
883b0a91 JR |
1538 | default: |
1539 | break; | |
1540 | } | |
1541 | ||
1542 | return NESTED_EXIT_CONTINUE; | |
1543 | } | |
33b22172 | 1544 | |
5228eb96 ML |
1545 | void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu) |
1546 | { | |
1547 | struct vcpu_svm *svm = to_svm(vcpu); | |
1548 | ||
1549 | vcpu->arch.tsc_scaling_ratio = | |
1550 | kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio, | |
1551 | svm->tsc_ratio_msr); | |
2d636990 | 1552 | svm_write_tsc_multiplier(vcpu); |
5228eb96 ML |
1553 | } |
1554 | ||
8fc78909 EGE |
1555 | /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */ |
1556 | static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst, | |
1557 | struct vmcb_ctrl_area_cached *from) | |
1558 | { | |
1559 | unsigned int i; | |
1560 | ||
1561 | memset(dst, 0, sizeof(struct vmcb_control_area)); | |
1562 | ||
1563 | for (i = 0; i < MAX_INTERCEPT; i++) | |
1564 | dst->intercepts[i] = from->intercepts[i]; | |
1565 | ||
1566 | dst->iopm_base_pa = from->iopm_base_pa; | |
1567 | dst->msrpm_base_pa = from->msrpm_base_pa; | |
1568 | dst->tsc_offset = from->tsc_offset; | |
1569 | dst->asid = from->asid; | |
1570 | dst->tlb_ctl = from->tlb_ctl; | |
1571 | dst->int_ctl = from->int_ctl; | |
1572 | dst->int_vector = from->int_vector; | |
1573 | dst->int_state = from->int_state; | |
1574 | dst->exit_code = from->exit_code; | |
1575 | dst->exit_code_hi = from->exit_code_hi; | |
1576 | dst->exit_info_1 = from->exit_info_1; | |
1577 | dst->exit_info_2 = from->exit_info_2; | |
1578 | dst->exit_int_info = from->exit_int_info; | |
1579 | dst->exit_int_info_err = from->exit_int_info_err; | |
1580 | dst->nested_ctl = from->nested_ctl; | |
1581 | dst->event_inj = from->event_inj; | |
1582 | dst->event_inj_err = from->event_inj_err; | |
00f08d99 | 1583 | dst->next_rip = from->next_rip; |
8fc78909 EGE |
1584 | dst->nested_cr3 = from->nested_cr3; |
1585 | dst->virt_ext = from->virt_ext; | |
1586 | dst->pause_filter_count = from->pause_filter_count; | |
1587 | dst->pause_filter_thresh = from->pause_filter_thresh; | |
68ae7c7b | 1588 | /* 'clean' and 'hv_enlightenments' are not changed by KVM */ |
8fc78909 EGE |
1589 | } |
1590 | ||
cc440cda PB |
1591 | static int svm_get_nested_state(struct kvm_vcpu *vcpu, |
1592 | struct kvm_nested_state __user *user_kvm_nested_state, | |
1593 | u32 user_data_size) | |
1594 | { | |
1595 | struct vcpu_svm *svm; | |
8fc78909 EGE |
1596 | struct vmcb_control_area *ctl; |
1597 | unsigned long r; | |
cc440cda PB |
1598 | struct kvm_nested_state kvm_state = { |
1599 | .flags = 0, | |
1600 | .format = KVM_STATE_NESTED_FORMAT_SVM, | |
1601 | .size = sizeof(kvm_state), | |
1602 | }; | |
1603 | struct vmcb __user *user_vmcb = (struct vmcb __user *) | |
1604 | &user_kvm_nested_state->data.svm[0]; | |
1605 | ||
1606 | if (!vcpu) | |
1607 | return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE; | |
1608 | ||
1609 | svm = to_svm(vcpu); | |
1610 | ||
1611 | if (user_data_size < kvm_state.size) | |
1612 | goto out; | |
1613 | ||
1614 | /* First fill in the header and copy it out. */ | |
1615 | if (is_guest_mode(vcpu)) { | |
0dd16b5b | 1616 | kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb12_gpa; |
cc440cda PB |
1617 | kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE; |
1618 | kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; | |
1619 | ||
1620 | if (svm->nested.nested_run_pending) | |
1621 | kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; | |
1622 | } | |
1623 | ||
1624 | if (gif_set(svm)) | |
1625 | kvm_state.flags |= KVM_STATE_NESTED_GIF_SET; | |
1626 | ||
1627 | if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) | |
1628 | return -EFAULT; | |
1629 | ||
1630 | if (!is_guest_mode(vcpu)) | |
1631 | goto out; | |
1632 | ||
1633 | /* | |
1634 | * Copy over the full size of the VMCB rather than just the size | |
1635 | * of the structs. | |
1636 | */ | |
1637 | if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE)) | |
1638 | return -EFAULT; | |
8fc78909 EGE |
1639 | |
1640 | ctl = kzalloc(sizeof(*ctl), GFP_KERNEL); | |
1641 | if (!ctl) | |
1642 | return -ENOMEM; | |
1643 | ||
1644 | nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl); | |
1645 | r = copy_to_user(&user_vmcb->control, ctl, | |
1646 | sizeof(user_vmcb->control)); | |
1647 | kfree(ctl); | |
1648 | if (r) | |
cc440cda | 1649 | return -EFAULT; |
8fc78909 | 1650 | |
4995a368 | 1651 | if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save, |
cc440cda PB |
1652 | sizeof(user_vmcb->save))) |
1653 | return -EFAULT; | |
cc440cda PB |
1654 | out: |
1655 | return kvm_state.size; | |
1656 | } | |
1657 | ||
1658 | static int svm_set_nested_state(struct kvm_vcpu *vcpu, | |
1659 | struct kvm_nested_state __user *user_kvm_nested_state, | |
1660 | struct kvm_nested_state *kvm_state) | |
1661 | { | |
1662 | struct vcpu_svm *svm = to_svm(vcpu); | |
cc440cda PB |
1663 | struct vmcb __user *user_vmcb = (struct vmcb __user *) |
1664 | &user_kvm_nested_state->data.svm[0]; | |
6ccbd29a JR |
1665 | struct vmcb_control_area *ctl; |
1666 | struct vmcb_save_area *save; | |
b7a3d8b6 | 1667 | struct vmcb_save_area_cached save_cached; |
8fc78909 | 1668 | struct vmcb_ctrl_area_cached ctl_cached; |
dbc4739b | 1669 | unsigned long cr0; |
6ccbd29a | 1670 | int ret; |
cc440cda | 1671 | |
6ccbd29a JR |
1672 | BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) > |
1673 | KVM_STATE_NESTED_SVM_VMCB_SIZE); | |
1674 | ||
cc440cda PB |
1675 | if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM) |
1676 | return -EINVAL; | |
1677 | ||
1678 | if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE | | |
1679 | KVM_STATE_NESTED_RUN_PENDING | | |
1680 | KVM_STATE_NESTED_GIF_SET)) | |
1681 | return -EINVAL; | |
1682 | ||
1683 | /* | |
1684 | * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's | |
1685 | * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed. | |
1686 | */ | |
1687 | if (!(vcpu->arch.efer & EFER_SVME)) { | |
1688 | /* GIF=1 and no guest mode are required if SVME=0. */ | |
1689 | if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET) | |
1690 | return -EINVAL; | |
1691 | } | |
1692 | ||
1693 | /* SMM temporarily disables SVM, so we cannot be in guest mode. */ | |
1694 | if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) | |
1695 | return -EINVAL; | |
1696 | ||
1697 | if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { | |
f7e57078 | 1698 | svm_leave_nested(vcpu); |
d5cd6f34 VK |
1699 | svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); |
1700 | return 0; | |
cc440cda PB |
1701 | } |
1702 | ||
1703 | if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa)) | |
1704 | return -EINVAL; | |
1705 | if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE) | |
1706 | return -EINVAL; | |
cc440cda | 1707 | |
6ccbd29a | 1708 | ret = -ENOMEM; |
eba04b20 SC |
1709 | ctl = kzalloc(sizeof(*ctl), GFP_KERNEL_ACCOUNT); |
1710 | save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT); | |
6ccbd29a JR |
1711 | if (!ctl || !save) |
1712 | goto out_free; | |
1713 | ||
1714 | ret = -EFAULT; | |
1715 | if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl))) | |
1716 | goto out_free; | |
1717 | if (copy_from_user(save, &user_vmcb->save, sizeof(*save))) | |
1718 | goto out_free; | |
1719 | ||
1720 | ret = -EINVAL; | |
66c03a92 | 1721 | __nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl); |
8fc78909 | 1722 | if (!__nested_vmcb_check_controls(vcpu, &ctl_cached)) |
6ccbd29a | 1723 | goto out_free; |
cc440cda PB |
1724 | |
1725 | /* | |
1726 | * Processor state contains L2 state. Check that it is | |
cb9b6a1b | 1727 | * valid for guest mode (see nested_vmcb_check_save). |
cc440cda PB |
1728 | */ |
1729 | cr0 = kvm_read_cr0(vcpu); | |
1730 | if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW)) | |
6ccbd29a | 1731 | goto out_free; |
cc440cda PB |
1732 | |
1733 | /* | |
1734 | * Validate host state saved from before VMRUN (see | |
1735 | * nested_svm_check_permissions). | |
cc440cda | 1736 | */ |
b7a3d8b6 | 1737 | __nested_copy_vmcb_save_to_cache(&save_cached, save); |
6906e06d KS |
1738 | if (!(save->cr0 & X86_CR0_PG) || |
1739 | !(save->cr0 & X86_CR0_PE) || | |
1740 | (save->rflags & X86_EFLAGS_VM) || | |
b7a3d8b6 | 1741 | !__nested_vmcb_check_save(vcpu, &save_cached)) |
6ccbd29a | 1742 | goto out_free; |
cc440cda | 1743 | |
b222b0b8 | 1744 | |
cc440cda | 1745 | /* |
4995a368 CA |
1746 | * All checks done, we can enter guest mode. Userspace provides |
1747 | * vmcb12.control, which will be combined with L1 and stored into | |
1748 | * vmcb02, and the L1 save state which we store in vmcb01. | |
1749 | * L2 registers if needed are moved from the current VMCB to VMCB02. | |
cc440cda | 1750 | */ |
81f76ada | 1751 | |
9d290e16 | 1752 | if (is_guest_mode(vcpu)) |
f7e57078 | 1753 | svm_leave_nested(vcpu); |
9d290e16 ML |
1754 | else |
1755 | svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; | |
1756 | ||
063ab16c ML |
1757 | svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); |
1758 | ||
81f76ada ML |
1759 | svm->nested.nested_run_pending = |
1760 | !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); | |
1761 | ||
0dd16b5b | 1762 | svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; |
c08f390a | 1763 | |
2bb16bea | 1764 | svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); |
7907160d | 1765 | nested_copy_vmcb_control_to_cache(svm, ctl); |
4995a368 CA |
1766 | |
1767 | svm_switch_vmcb(svm, &svm->nested.vmcb02); | |
da0b93d6 | 1768 | nested_vmcb02_prepare_control(svm, svm->vmcb->save.rip, svm->vmcb->save.cs.base); |
e1779c27 ML |
1769 | |
1770 | /* | |
1771 | * While the nested guest CR3 is already checked and set by | |
1772 | * KVM_SET_SREGS, it was set when nested state was yet loaded, | |
1773 | * thus MMU might not be initialized correctly. | |
1774 | * Set it again to fix this. | |
1775 | */ | |
1776 | ||
1777 | ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3, | |
1778 | nested_npt_enabled(svm), false); | |
1779 | if (WARN_ON_ONCE(ret)) | |
1780 | goto out_free; | |
1781 | ||
73c25546 | 1782 | svm->nested.force_msr_bitmap_recalc = true; |
e1779c27 | 1783 | |
a7d5c7ce | 1784 | kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); |
6ccbd29a JR |
1785 | ret = 0; |
1786 | out_free: | |
1787 | kfree(save); | |
1788 | kfree(ctl); | |
1789 | ||
1790 | return ret; | |
cc440cda PB |
1791 | } |
1792 | ||
232f75d3 ML |
1793 | static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) |
1794 | { | |
1795 | struct vcpu_svm *svm = to_svm(vcpu); | |
1796 | ||
1797 | if (WARN_ON(!is_guest_mode(vcpu))) | |
1798 | return true; | |
1799 | ||
158a48ec ML |
1800 | if (!vcpu->arch.pdptrs_from_userspace && |
1801 | !nested_npt_enabled(svm) && is_pae_paging(vcpu)) | |
b222b0b8 ML |
1802 | /* |
1803 | * Reload the guest's PDPTRs since after a migration | |
1804 | * the guest CR3 might be restored prior to setting the nested | |
1805 | * state which can lead to a load of wrong PDPTRs. | |
1806 | */ | |
2df4a5eb | 1807 | if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3))) |
b222b0b8 | 1808 | return false; |
232f75d3 ML |
1809 | |
1810 | if (!nested_svm_vmrun_msrpm(svm)) { | |
1811 | vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; | |
1812 | vcpu->run->internal.suberror = | |
1813 | KVM_INTERNAL_ERROR_EMULATION; | |
1814 | vcpu->run->internal.ndata = 0; | |
1815 | return false; | |
1816 | } | |
1817 | ||
3f4a812e VK |
1818 | if (kvm_hv_verify_vp_assist(vcpu)) |
1819 | return false; | |
1820 | ||
232f75d3 ML |
1821 | return true; |
1822 | } | |
1823 | ||
33b22172 | 1824 | struct kvm_x86_nested_ops svm_nested_ops = { |
f7e57078 | 1825 | .leave_nested = svm_leave_nested, |
7709aba8 | 1826 | .is_exception_vmexit = nested_svm_is_exception_vmexit, |
33b22172 | 1827 | .check_events = svm_check_nested_events, |
cb6a32c2 | 1828 | .triple_fault = nested_svm_triple_fault, |
a7d5c7ce | 1829 | .get_nested_state_pages = svm_get_nested_state_pages, |
cc440cda PB |
1830 | .get_state = svm_get_nested_state, |
1831 | .set_state = svm_set_nested_state, | |
b0c9c25e | 1832 | .hv_inject_synthetic_vmexit_post_tlb_flush = svm_hv_inject_synthetic_vmexit_post_tlb_flush, |
33b22172 | 1833 | }; |