Commit | Line | Data |
---|---|---|
20c8ccb1 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
a656c8ef | 2 | /* |
043405e1 CO |
3 | * Kernel-based Virtual Machine driver for Linux |
4 | * | |
5 | * This header defines architecture specific interfaces, x86 version | |
043405e1 CO |
6 | */ |
7 | ||
1965aae3 PA |
8 | #ifndef _ASM_X86_KVM_HOST_H |
9 | #define _ASM_X86_KVM_HOST_H | |
043405e1 | 10 | |
34c16eec ZX |
11 | #include <linux/types.h> |
12 | #include <linux/mm.h> | |
e930bffe | 13 | #include <linux/mmu_notifier.h> |
229456fc | 14 | #include <linux/tracepoint.h> |
f5f48ee1 | 15 | #include <linux/cpumask.h> |
f5132b01 | 16 | #include <linux/irq_work.h> |
447ae316 | 17 | #include <linux/irq.h> |
22b94c4b | 18 | #include <linux/workqueue.h> |
34c16eec ZX |
19 | |
20 | #include <linux/kvm.h> | |
21 | #include <linux/kvm_para.h> | |
edf88417 | 22 | #include <linux/kvm_types.h> |
f5132b01 | 23 | #include <linux/perf_event.h> |
d828199e MT |
24 | #include <linux/pvclock_gtod.h> |
25 | #include <linux/clocksource.h> | |
87276880 | 26 | #include <linux/irqbypass.h> |
0823570f | 27 | #include <linux/kfifo.h> |
d96c77bd | 28 | #include <linux/sched/vhost_task.h> |
931656b9 | 29 | #include <linux/call_once.h> |
4834eade | 30 | #include <linux/atomic.h> |
34c16eec | 31 | |
7d669f50 | 32 | #include <asm/apic.h> |
50d0a0f9 | 33 | #include <asm/pvclock-abi.h> |
fa7d0f83 | 34 | #include <asm/debugreg.h> |
e01a1b57 | 35 | #include <asm/desc.h> |
0bed3b56 | 36 | #include <asm/mtrr.h> |
9962d032 | 37 | #include <asm/msr-index.h> |
efef7f18 | 38 | #include <asm/msr.h> |
3ee89722 | 39 | #include <asm/asm.h> |
5f9e1698 | 40 | #include <asm/irq_remapping.h> |
21ebbeda | 41 | #include <asm/kvm_page_track.h> |
95c7b77d | 42 | #include <asm/kvm_vcpu_regs.h> |
590b09b1 | 43 | #include <asm/reboot.h> |
ef5a3c92 | 44 | #include <hyperv/hvhdk.h> |
e01a1b57 | 45 | |
741cbbae PB |
46 | #define __KVM_HAVE_ARCH_VCPU_DEBUGFS |
47 | ||
f10a570b KM |
48 | /* |
49 | * CONFIG_KVM_MAX_NR_VCPUS is defined iff CONFIG_KVM!=n, provide a dummy max if | |
50 | * KVM is disabled (arbitrarily use the default from CONFIG_KVM_MAX_NR_VCPUS). | |
51 | */ | |
52 | #ifdef CONFIG_KVM_MAX_NR_VCPUS | |
53 | #define KVM_MAX_VCPUS CONFIG_KVM_MAX_NR_VCPUS | |
54 | #else | |
074c82c8 | 55 | #define KVM_MAX_VCPUS 1024 |
f10a570b | 56 | #endif |
4ddacd52 EH |
57 | |
58 | /* | |
59 | * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs | |
60 | * might be larger than the actual number of VCPUs because the | |
61 | * APIC ID encodes CPU topology information. | |
62 | * | |
63 | * In the worst case, we'll need less than one extra bit for the | |
64 | * Core ID, and less than one extra bit for the Package (Die) ID, | |
65 | * so ratio of 4 should be enough. | |
66 | */ | |
67 | #define KVM_VCPU_ID_RATIO 4 | |
a1c42dde | 68 | #define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) |
4ddacd52 | 69 | |
0743247f | 70 | /* memory slots that are not exposed to userspace */ |
bdd1c37a | 71 | #define KVM_INTERNAL_MEM_SLOTS 3 |
93a5cef0 | 72 | |
b401ee0b | 73 | #define KVM_HALT_POLL_NS_DEFAULT 200000 |
69a9f69b | 74 | |
8175e5b7 AG |
75 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS |
76 | ||
3c9bd400 JZ |
77 | #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ |
78 | KVM_DIRTY_LOG_INITIALLY_SET) | |
79 | ||
fe6b6bc8 CQ |
80 | #define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ |
81 | KVM_BUS_LOCK_DETECTION_EXIT) | |
82 | ||
2f4073e0 TX |
83 | #define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \ |
84 | KVM_X86_NOTIFY_VMEXIT_USER) | |
85 | ||
2860c4b1 | 86 | /* x86-specific vcpu->requests bit members */ |
2387149e AJ |
87 | #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) |
88 | #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) | |
89 | #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) | |
90 | #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) | |
91 | #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) | |
727a7e27 | 92 | #define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5) |
2387149e AJ |
93 | #define KVM_REQ_EVENT KVM_ARCH_REQ(6) |
94 | #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) | |
95 | #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) | |
96 | #define KVM_REQ_NMI KVM_ARCH_REQ(9) | |
97 | #define KVM_REQ_PMU KVM_ARCH_REQ(10) | |
98 | #define KVM_REQ_PMI KVM_ARCH_REQ(11) | |
cf7316d0 | 99 | #ifdef CONFIG_KVM_SMM |
2387149e | 100 | #define KVM_REQ_SMI KVM_ARCH_REQ(12) |
cf7316d0 | 101 | #endif |
2387149e AJ |
102 | #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) |
103 | #define KVM_REQ_MCLOCK_INPROGRESS \ | |
104 | KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
105 | #define KVM_REQ_SCAN_IOAPIC \ | |
106 | KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
107 | #define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16) | |
108 | #define KVM_REQ_APIC_PAGE_RELOAD \ | |
109 | KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
110 | #define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) | |
111 | #define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) | |
112 | #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) | |
113 | #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) | |
114 | #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) | |
e40ff1d6 | 115 | #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) |
729c15c2 | 116 | #define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24) |
8df14af4 SS |
117 | #define KVM_REQ_APICV_UPDATE \ |
118 | KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
eeeb4f67 | 119 | #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) |
07ffaf34 | 120 | #define KVM_REQ_TLB_FLUSH_GUEST \ |
1ebfaa11 | 121 | KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) |
557a961a | 122 | #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) |
1a155254 | 123 | #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) |
a85863c2 MS |
124 | #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ |
125 | KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
527d5cd7 SC |
126 | #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ |
127 | KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
adc43caa VK |
128 | #define KVM_REQ_HV_TLB_FLUSH \ |
129 | KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
309d2857 TL |
130 | #define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE \ |
131 | KVM_ARCH_REQ_FLAGS(34, KVM_REQUEST_WAIT) | |
2860c4b1 | 132 | |
cfec82cb JR |
133 | #define CR0_RESERVED_BITS \ |
134 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | |
135 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | |
136 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | |
137 | ||
cfec82cb JR |
138 | #define CR4_RESERVED_BITS \ |
139 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | |
140 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | |
ad756a16 | 141 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ |
afcbf13f | 142 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ |
fd8cb433 | 143 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ |
93d1c9f4 RH |
144 | | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP \ |
145 | | X86_CR4_LAM_SUP)) | |
cfec82cb JR |
146 | |
147 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | |
148 | ||
149 | ||
cd6e8f87 | 150 | |
cd6e8f87 | 151 | #define INVALID_PAGE (~(hpa_t)0) |
dd180b3e XG |
152 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) |
153 | ||
ec04b260 | 154 | /* KVM Hugepage definitions for x86 */ |
3bae0459 SC |
155 | #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G |
156 | #define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) | |
82855413 JR |
157 | #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) |
158 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) | |
ec04b260 JR |
159 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) |
160 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) | |
161 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) | |
05da4558 | 162 | |
f5756029 | 163 | #define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50 |
bc8a3d89 | 164 | #define KVM_MIN_ALLOC_MMU_PAGES 64UL |
114df303 | 165 | #define KVM_MMU_HASH_SHIFT 12 |
1ae0a13d | 166 | #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) |
d657a98e ZX |
167 | #define KVM_MIN_FREE_MMU_PAGES 5 |
168 | #define KVM_REFILL_PAGES 25 | |
3f4e3eb4 | 169 | #define KVM_MAX_CPUID_ENTRIES 256 |
0d234daf | 170 | #define KVM_NR_VAR_MTRR 8 |
d657a98e | 171 | |
af585b92 GN |
172 | #define ASYNC_PF_PER_VCPU 64 |
173 | ||
5fdbf976 | 174 | enum kvm_reg { |
95c7b77d SC |
175 | VCPU_REGS_RAX = __VCPU_REGS_RAX, |
176 | VCPU_REGS_RCX = __VCPU_REGS_RCX, | |
177 | VCPU_REGS_RDX = __VCPU_REGS_RDX, | |
178 | VCPU_REGS_RBX = __VCPU_REGS_RBX, | |
179 | VCPU_REGS_RSP = __VCPU_REGS_RSP, | |
180 | VCPU_REGS_RBP = __VCPU_REGS_RBP, | |
181 | VCPU_REGS_RSI = __VCPU_REGS_RSI, | |
182 | VCPU_REGS_RDI = __VCPU_REGS_RDI, | |
2b3ccfa0 | 183 | #ifdef CONFIG_X86_64 |
95c7b77d SC |
184 | VCPU_REGS_R8 = __VCPU_REGS_R8, |
185 | VCPU_REGS_R9 = __VCPU_REGS_R9, | |
186 | VCPU_REGS_R10 = __VCPU_REGS_R10, | |
187 | VCPU_REGS_R11 = __VCPU_REGS_R11, | |
188 | VCPU_REGS_R12 = __VCPU_REGS_R12, | |
189 | VCPU_REGS_R13 = __VCPU_REGS_R13, | |
190 | VCPU_REGS_R14 = __VCPU_REGS_R14, | |
191 | VCPU_REGS_R15 = __VCPU_REGS_R15, | |
2b3ccfa0 | 192 | #endif |
5fdbf976 | 193 | VCPU_REGS_RIP, |
f8845541 | 194 | NR_VCPU_REGS, |
2b3ccfa0 | 195 | |
6de4f3ad | 196 | VCPU_EXREG_PDPTR = NR_VCPU_REGS, |
bd31fe49 | 197 | VCPU_EXREG_CR0, |
aff48baa | 198 | VCPU_EXREG_CR3, |
f98c1e77 | 199 | VCPU_EXREG_CR4, |
6de12732 | 200 | VCPU_EXREG_RFLAGS, |
2fb92db1 | 201 | VCPU_EXREG_SEGMENTS, |
5addc235 | 202 | VCPU_EXREG_EXIT_INFO_1, |
87915858 | 203 | VCPU_EXREG_EXIT_INFO_2, |
6de4f3ad AK |
204 | }; |
205 | ||
2b3ccfa0 | 206 | enum { |
81609e3e | 207 | VCPU_SREG_ES, |
2b3ccfa0 | 208 | VCPU_SREG_CS, |
81609e3e | 209 | VCPU_SREG_SS, |
2b3ccfa0 | 210 | VCPU_SREG_DS, |
2b3ccfa0 ZX |
211 | VCPU_SREG_FS, |
212 | VCPU_SREG_GS, | |
2b3ccfa0 ZX |
213 | VCPU_SREG_TR, |
214 | VCPU_SREG_LDTR, | |
215 | }; | |
216 | ||
1e9e2622 WL |
217 | enum exit_fastpath_completion { |
218 | EXIT_FASTPATH_NONE, | |
404d5d7b WL |
219 | EXIT_FASTPATH_REENTER_GUEST, |
220 | EXIT_FASTPATH_EXIT_HANDLED, | |
f7f39c50 | 221 | EXIT_FASTPATH_EXIT_USERSPACE, |
1e9e2622 | 222 | }; |
404d5d7b | 223 | typedef enum exit_fastpath_completion fastpath_t; |
1e9e2622 | 224 | |
2f728d66 SC |
225 | struct x86_emulate_ctxt; |
226 | struct x86_exception; | |
58c1d206 | 227 | union kvm_smram; |
2f728d66 SC |
228 | enum x86_intercept; |
229 | enum x86_intercept_stage; | |
2b3ccfa0 | 230 | |
42dbaa5a JK |
231 | #define KVM_NR_DB_REGS 4 |
232 | ||
e8ea85fb | 233 | #define DR6_BUS_LOCK (1 << 11) |
42dbaa5a JK |
234 | #define DR6_BD (1 << 13) |
235 | #define DR6_BS (1 << 14) | |
cfb634fe | 236 | #define DR6_BT (1 << 15) |
6f43ed01 | 237 | #define DR6_RTM (1 << 16) |
9a3ecd5e CQ |
238 | /* |
239 | * DR6_ACTIVE_LOW combines fixed-1 and active-low bits. | |
240 | * We can regard all the bits in DR6_FIXED_1 as active_low bits; | |
241 | * they will never be 0 for now, but when they are defined | |
242 | * in the future it will require no code change. | |
243 | * | |
244 | * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. | |
245 | */ | |
246 | #define DR6_ACTIVE_LOW 0xffff0ff0 | |
e8ea85fb | 247 | #define DR6_VOLATILE 0x0001e80f |
9a3ecd5e | 248 | #define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) |
42dbaa5a JK |
249 | |
250 | #define DR7_BP_EN_MASK 0x000000ff | |
251 | #define DR7_GE (1 << 9) | |
252 | #define DR7_GD (1 << 13) | |
6f43ed01 | 253 | #define DR7_VOLATILE 0xffff2bff |
42dbaa5a | 254 | |
7e582ccb ML |
255 | #define KVM_GUESTDBG_VALID_MASK \ |
256 | (KVM_GUESTDBG_ENABLE | \ | |
257 | KVM_GUESTDBG_SINGLESTEP | \ | |
258 | KVM_GUESTDBG_USE_HW_BP | \ | |
259 | KVM_GUESTDBG_USE_SW_BP | \ | |
260 | KVM_GUESTDBG_INJECT_BP | \ | |
61e5f69e ML |
261 | KVM_GUESTDBG_INJECT_DB | \ |
262 | KVM_GUESTDBG_BLOCKIRQ) | |
7e582ccb | 263 | |
63b6206e SC |
264 | #define PFERR_PRESENT_MASK BIT(0) |
265 | #define PFERR_WRITE_MASK BIT(1) | |
266 | #define PFERR_USER_MASK BIT(2) | |
267 | #define PFERR_RSVD_MASK BIT(3) | |
268 | #define PFERR_FETCH_MASK BIT(4) | |
269 | #define PFERR_PK_MASK BIT(5) | |
270 | #define PFERR_SGX_MASK BIT(15) | |
9b62e03e | 271 | #define PFERR_GUEST_RMP_MASK BIT_ULL(31) |
63b6206e SC |
272 | #define PFERR_GUEST_FINAL_MASK BIT_ULL(32) |
273 | #define PFERR_GUEST_PAGE_MASK BIT_ULL(33) | |
9b62e03e SC |
274 | #define PFERR_GUEST_ENC_MASK BIT_ULL(34) |
275 | #define PFERR_GUEST_SIZEM_MASK BIT_ULL(35) | |
276 | #define PFERR_GUEST_VMPL_MASK BIT_ULL(36) | |
7e582ccb | 277 | |
dee281e4 SC |
278 | /* |
279 | * IMPLICIT_ACCESS is a KVM-defined flag used to correctly perform SMAP checks | |
280 | * when emulating instructions that triggers implicit access. | |
281 | */ | |
63b6206e | 282 | #define PFERR_IMPLICIT_ACCESS BIT_ULL(48) |
b3d5dc62 SC |
283 | /* |
284 | * PRIVATE_ACCESS is a KVM-defined flag us to indicate that a fault occurred | |
285 | * when the guest was accessing private memory. | |
286 | */ | |
287 | #define PFERR_PRIVATE_ACCESS BIT_ULL(49) | |
288 | #define PFERR_SYNTHETIC_MASK (PFERR_IMPLICIT_ACCESS | PFERR_PRIVATE_ACCESS) | |
14727754 | 289 | |
41383771 GN |
290 | /* apic attention bits */ |
291 | #define KVM_APIC_CHECK_VAPIC 0 | |
ae7a2a3f MT |
292 | /* |
293 | * The following bit is set with PV-EOI, unset on EOI. | |
294 | * We detect PV-EOI changes by guest by comparing | |
295 | * this bit with PV-EOI in guest memory. | |
296 | * See the implementation in apic_update_pv_eoi. | |
297 | */ | |
298 | #define KVM_APIC_PV_EOI_PENDING 1 | |
41383771 | 299 | |
d84f1e07 FW |
300 | struct kvm_kernel_irq_routing_entry; |
301 | ||
21ebbeda | 302 | /* |
616007c8 SC |
303 | * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page |
304 | * also includes TDP pages) to determine whether or not a page can be used in | |
7a7ae829 | 305 | * the given MMU context. This is a subset of the overall kvm_cpu_role to |
338068b5 SC |
306 | * minimize the size of kvm_memory_slot.arch.gfn_write_track, i.e. allows |
307 | * allocating 2 bytes per gfn instead of 4 bytes per gfn. | |
21ebbeda | 308 | * |
84e5ffd0 | 309 | * Upper-level shadow pages having gptes are tracked for write-protection via |
338068b5 SC |
310 | * gfn_write_track. As above, gfn_write_track is a 16 bit counter, so KVM must |
311 | * not create more than 2^16-1 upper-level shadow pages at a single gfn, | |
312 | * otherwise gfn_write_track will overflow and explosions will ensue. | |
616007c8 SC |
313 | * |
314 | * A unique shadow page (SP) for a gfn is created if and only if an existing SP | |
315 | * cannot be reused. The ability to reuse a SP is tracked by its role, which | |
316 | * incorporates various mode bits and properties of the SP. Roughly speaking, | |
317 | * the number of unique SPs that can theoretically be created is 2^n, where n | |
318 | * is the number of bits that are used to compute the role. | |
319 | * | |
6961ab0b | 320 | * But, even though there are 20 bits in the mask below, not all combinations |
dc1ce455 | 321 | * of modes and flags are possible: |
616007c8 | 322 | * |
6961ab0b IY |
323 | * - invalid shadow pages are not accounted, mirror pages are not shadowed, |
324 | * so the bits are effectively 18. | |
dc1ce455 | 325 | * |
bb3b394d | 326 | * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging); |
dc1ce455 | 327 | * execonly and ad_disabled are only used for nested EPT which has |
bb3b394d | 328 | * has_4_byte_gpte=0. Therefore, 2 bits are always unused. |
dc1ce455 PB |
329 | * |
330 | * - the 4 bits of level are effectively limited to the values 2/3/4/5, | |
331 | * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE | |
332 | * paging has exactly one upper level, making level completely redundant | |
bb3b394d | 333 | * when has_4_byte_gpte=1. |
dc1ce455 PB |
334 | * |
335 | * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if | |
336 | * cr0_wp=0, therefore these three bits only give rise to 5 possibilities. | |
337 | * | |
338 | * Therefore, the maximum number of possible upper-level shadow pages for a | |
339 | * single gfn is a bit less than 2^13. | |
21ebbeda | 340 | */ |
d657a98e | 341 | union kvm_mmu_page_role { |
36d9594d | 342 | u32 word; |
d657a98e | 343 | struct { |
7d76b4d3 | 344 | unsigned level:4; |
bb3b394d | 345 | unsigned has_4_byte_gpte:1; |
7d76b4d3 | 346 | unsigned quadrant:2; |
f6e2c02b | 347 | unsigned direct:1; |
7d76b4d3 | 348 | unsigned access:3; |
2e53d63a | 349 | unsigned invalid:1; |
167f8a5c | 350 | unsigned efer_nx:1; |
3dbe1415 | 351 | unsigned cr0_wp:1; |
411c588d | 352 | unsigned smep_andnot_wp:1; |
0be0226f | 353 | unsigned smap_andnot_wp:1; |
ac8d57e5 | 354 | unsigned ad_disabled:1; |
1313cc2b | 355 | unsigned guest_mode:1; |
84e5ffd0 | 356 | unsigned passthrough:1; |
6961ab0b IY |
357 | unsigned is_mirror:1; |
358 | unsigned :4; | |
699023e2 PB |
359 | |
360 | /* | |
361 | * This is left at the top of the word so that | |
362 | * kvm_memslots_for_spte_role can extract it with a | |
363 | * simple shift. While there is room, give it a whole | |
364 | * byte so it is also faster to load it from memory. | |
365 | */ | |
366 | unsigned smm:8; | |
d657a98e ZX |
367 | }; |
368 | }; | |
369 | ||
a336282d | 370 | /* |
616007c8 SC |
371 | * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties |
372 | * relevant to the current MMU configuration. When loading CR0, CR4, or EFER, | |
373 | * including on nested transitions, if nothing in the full role changes then | |
374 | * MMU re-configuration can be skipped. @valid bit is set on first usage so we | |
375 | * don't treat all-zero structure as valid data. | |
376 | * | |
377 | * The properties that are tracked in the extended role but not the page role | |
378 | * are for things that either (a) do not affect the validity of the shadow page | |
379 | * or (b) are indirectly reflected in the shadow page's role. For example, | |
380 | * CR4.PKE only affects permission checks for software walks of the guest page | |
381 | * tables (because KVM doesn't support Protection Keys with shadow paging), and | |
382 | * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level. | |
383 | * | |
384 | * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role. | |
385 | * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and | |
386 | * SMAP, but the MMU's permission checks for software walks need to be SMEP and | |
387 | * SMAP aware regardless of CR0.WP. | |
a336282d | 388 | */ |
616007c8 | 389 | union kvm_mmu_extended_role { |
36d9594d | 390 | u32 word; |
a336282d VK |
391 | struct { |
392 | unsigned int valid:1; | |
393 | unsigned int execonly:1; | |
394 | unsigned int cr4_pse:1; | |
395 | unsigned int cr4_pke:1; | |
396 | unsigned int cr4_smap:1; | |
397 | unsigned int cr4_smep:1; | |
f71a53d1 | 398 | unsigned int cr4_la57:1; |
b8453cdc | 399 | unsigned int efer_lma:1; |
a336282d | 400 | }; |
36d9594d VK |
401 | }; |
402 | ||
7a7ae829 | 403 | union kvm_cpu_role { |
36d9594d VK |
404 | u64 as_u64; |
405 | struct { | |
406 | union kvm_mmu_page_role base; | |
407 | union kvm_mmu_extended_role ext; | |
408 | }; | |
409 | }; | |
410 | ||
018aabb5 | 411 | struct kvm_rmap_head { |
4834eade | 412 | atomic_long_t val; |
018aabb5 TY |
413 | }; |
414 | ||
1c08364c AK |
415 | struct kvm_pio_request { |
416 | unsigned long count; | |
1c08364c AK |
417 | int in; |
418 | int port; | |
419 | int size; | |
1c08364c AK |
420 | }; |
421 | ||
855feb67 | 422 | #define PT64_ROOT_MAX_LEVEL 5 |
2a7266a8 | 423 | |
a0a64f50 | 424 | struct rsvd_bits_validate { |
2a7266a8 | 425 | u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; |
a0a64f50 XG |
426 | u64 bad_mt_xwr; |
427 | }; | |
428 | ||
7c390d35 | 429 | struct kvm_mmu_root_info { |
be01e8e2 | 430 | gpa_t pgd; |
7c390d35 JS |
431 | hpa_t hpa; |
432 | }; | |
433 | ||
434 | #define KVM_MMU_ROOT_INFO_INVALID \ | |
be01e8e2 | 435 | ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE }) |
7c390d35 | 436 | |
b94742c9 JS |
437 | #define KVM_MMU_NUM_PREV_ROOTS 3 |
438 | ||
f94db0c8 SC |
439 | #define KVM_MMU_ROOT_CURRENT BIT(0) |
440 | #define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) | |
441 | #define KVM_MMU_ROOTS_ALL (BIT(1 + KVM_MMU_NUM_PREV_ROOTS) - 1) | |
442 | ||
531810ca BG |
443 | #define KVM_HAVE_MMU_RWLOCK |
444 | ||
985ab278 | 445 | struct kvm_mmu_page; |
c501040a | 446 | struct kvm_page_fault; |
985ab278 | 447 | |
d657a98e | 448 | /* |
855feb67 YZ |
449 | * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, |
450 | * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the | |
451 | * current mmu mode. | |
d657a98e ZX |
452 | */ |
453 | struct kvm_mmu { | |
d8dd54e0 | 454 | unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); |
e4e517b4 | 455 | u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); |
c501040a | 456 | int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); |
6389ee94 AK |
457 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, |
458 | struct x86_exception *fault); | |
1f5a21ee | 459 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
5b22bbe7 | 460 | gpa_t gva_or_gpa, u64 access, |
1f5a21ee | 461 | struct x86_exception *exception); |
c3c6c9fc LJ |
462 | int (*sync_spte)(struct kvm_vcpu *vcpu, |
463 | struct kvm_mmu_page *sp, int i); | |
b9e5603c | 464 | struct kvm_mmu_root_info root; |
fabaa765 | 465 | hpa_t mirror_root_hpa; |
7a7ae829 | 466 | union kvm_cpu_role cpu_role; |
7a458f0e | 467 | union kvm_mmu_page_role root_role; |
97d64b78 | 468 | |
2d344105 HH |
469 | /* |
470 | * The pkru_mask indicates if protection key checks are needed. It | |
471 | * consists of 16 domains indexed by page fault error code bits [4:1], | |
472 | * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables. | |
473 | * Each domain has 2 bits which are ANDed with AD and WD from PKRU. | |
474 | */ | |
475 | u32 pkru_mask; | |
476 | ||
81764725 PH |
477 | struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; |
478 | ||
479 | /* | |
480 | * Bitmap; bit set = permission fault | |
481 | * Byte index: page fault error code [4:1] | |
482 | * Bit index: pte permissions in ACC_* format | |
483 | */ | |
484 | u8 permissions[16]; | |
485 | ||
d657a98e | 486 | u64 *pae_root; |
03ca4589 | 487 | u64 *pml4_root; |
cb0f722a | 488 | u64 *pml5_root; |
c258b62b XG |
489 | |
490 | /* | |
491 | * check zero bits on shadow page table entries, these | |
492 | * bits include not only hardware reserved bits but also | |
493 | * the bits spte never used. | |
494 | */ | |
495 | struct rsvd_bits_validate shadow_zero_check; | |
496 | ||
a0a64f50 | 497 | struct rsvd_bits_validate guest_rsvd_check; |
ff03a073 JR |
498 | |
499 | u64 pdptrs[4]; /* pae */ | |
d657a98e ZX |
500 | }; |
501 | ||
f5132b01 GN |
502 | enum pmc_type { |
503 | KVM_PMC_GP = 0, | |
504 | KVM_PMC_FIXED, | |
505 | }; | |
506 | ||
507 | struct kvm_pmc { | |
508 | enum pmc_type type; | |
509 | u8 idx; | |
de0f6195 LX |
510 | bool is_paused; |
511 | bool intr; | |
fd89499a SC |
512 | /* |
513 | * Base value of the PMC counter, relative to the *consumed* count in | |
514 | * the associated perf_event. This value includes counter updates from | |
515 | * the perf_event and emulated_count since the last time the counter | |
516 | * was reprogrammed, but it is *not* the current value as seen by the | |
517 | * guest or userspace. | |
518 | * | |
519 | * The count is relative to the associated perf_event so that KVM | |
520 | * doesn't need to reprogram the perf_event every time the guest writes | |
521 | * to the counter. | |
522 | */ | |
f5132b01 | 523 | u64 counter; |
fd89499a SC |
524 | /* |
525 | * PMC events triggered by KVM emulation that haven't been fully | |
526 | * processed, i.e. haven't undergone overflow detection. | |
527 | */ | |
528 | u64 emulated_counter; | |
f5132b01 GN |
529 | u64 eventsel; |
530 | struct perf_event *perf_event; | |
531 | struct kvm_vcpu *vcpu; | |
a6da0d77 | 532 | /* |
68fb4757 | 533 | * only for creating or reusing perf_event, |
a6da0d77 LX |
534 | * eventsel value for general purpose counters, |
535 | * ctrl value for fixed counters. | |
536 | */ | |
537 | u64 current_config; | |
f5132b01 GN |
538 | }; |
539 | ||
4f1fa2a1 | 540 | /* More counters may conflict with other existing Architectural MSRs */ |
f287bef6 DM |
541 | #define KVM_MAX(a, b) ((a) >= (b) ? (a) : (b)) |
542 | #define KVM_MAX_NR_INTEL_GP_COUNTERS 8 | |
543 | #define KVM_MAX_NR_AMD_GP_COUNTERS 6 | |
544 | #define KVM_MAX_NR_GP_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_GP_COUNTERS, \ | |
545 | KVM_MAX_NR_AMD_GP_COUNTERS) | |
546 | ||
547 | #define KVM_MAX_NR_INTEL_FIXED_COUTNERS 3 | |
548 | #define KVM_MAX_NR_AMD_FIXED_COUTNERS 0 | |
549 | #define KVM_MAX_NR_FIXED_COUNTERS KVM_MAX(KVM_MAX_NR_INTEL_FIXED_COUTNERS, \ | |
550 | KVM_MAX_NR_AMD_FIXED_COUTNERS) | |
004a0aa5 | 551 | |
f5132b01 | 552 | struct kvm_pmu { |
12aad916 | 553 | u8 version; |
f5132b01 GN |
554 | unsigned nr_arch_gp_counters; |
555 | unsigned nr_arch_fixed_counters; | |
556 | unsigned available_event_types; | |
557 | u64 fixed_ctr_ctrl; | |
0e102ce3 | 558 | u64 fixed_ctr_ctrl_rsvd; |
f5132b01 GN |
559 | u64 global_ctrl; |
560 | u64 global_status; | |
f5132b01 | 561 | u64 counter_bitmask[2]; |
0e102ce3 DM |
562 | u64 global_ctrl_rsvd; |
563 | u64 global_status_rsvd; | |
103af0a9 | 564 | u64 reserved_bits; |
95b065bf | 565 | u64 raw_event_mask; |
f287bef6 DM |
566 | struct kvm_pmc gp_counters[KVM_MAX_NR_GP_COUNTERS]; |
567 | struct kvm_pmc fixed_counters[KVM_MAX_NR_FIXED_COUNTERS]; | |
f1c5651f SC |
568 | |
569 | /* | |
570 | * Overlay the bitmap with a 64-bit atomic so that all bits can be | |
571 | * set in a single access, e.g. to reprogram all counters when the PMU | |
572 | * filter changes. | |
573 | */ | |
574 | union { | |
575 | DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); | |
576 | atomic64_t __reprogram_pmi; | |
577 | }; | |
b35e5548 LX |
578 | DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); |
579 | DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); | |
580 | ||
8183a538 | 581 | u64 ds_area; |
c59a1f10 | 582 | u64 pebs_enable; |
0e102ce3 | 583 | u64 pebs_enable_rsvd; |
902caeb6 | 584 | u64 pebs_data_cfg; |
0e102ce3 | 585 | u64 pebs_data_cfg_rsvd; |
c59a1f10 | 586 | |
85425032 LX |
587 | /* |
588 | * If a guest counter is cross-mapped to host counter with different | |
589 | * index, its PEBS capability will be temporarily disabled. | |
590 | * | |
591 | * The user should make sure that this mask is updated | |
592 | * after disabling interrupts and before perf_guest_get_msrs(); | |
593 | */ | |
594 | u64 host_cross_mapped_mask; | |
595 | ||
b35e5548 LX |
596 | /* |
597 | * The gate to release perf_events not marked in | |
598 | * pmc_in_use only once in a vcpu time slice. | |
599 | */ | |
600 | bool need_cleanup; | |
601 | ||
602 | /* | |
603 | * The total number of programmed perf_events and it helps to avoid | |
604 | * redundant check before cleanup if guest don't use vPMU at all. | |
605 | */ | |
606 | u8 event_count; | |
f5132b01 GN |
607 | }; |
608 | ||
25462f7f WH |
609 | struct kvm_pmu_ops; |
610 | ||
360b948d | 611 | enum { |
484612f1 IY |
612 | KVM_DEBUGREG_BP_ENABLED = BIT(0), |
613 | KVM_DEBUGREG_WONT_EXIT = BIT(1), | |
614 | /* | |
615 | * Guest debug registers (DR0-3, DR6 and DR7) are saved/restored by | |
616 | * hardware on exit from or enter to guest. KVM needn't switch them. | |
617 | * DR0-3, DR6 and DR7 are set to their architectural INIT value on VM | |
618 | * exit, host values need to be restored. | |
619 | */ | |
620 | KVM_DEBUGREG_AUTO_SWITCH = BIT(2), | |
360b948d PB |
621 | }; |
622 | ||
70109e7d | 623 | struct kvm_mtrr { |
0a7b7355 SC |
624 | u64 var[KVM_NR_VAR_MTRR * 2]; |
625 | u64 fixed_64k; | |
626 | u64 fixed_16k[2]; | |
627 | u64 fixed_4k[8]; | |
10fac2dc | 628 | u64 deftype; |
70109e7d XG |
629 | }; |
630 | ||
1f4b34f8 AS |
631 | /* Hyper-V SynIC timer */ |
632 | struct kvm_vcpu_hv_stimer { | |
633 | struct hrtimer timer; | |
634 | int index; | |
6a058a1e | 635 | union hv_stimer_config config; |
1f4b34f8 AS |
636 | u64 count; |
637 | u64 exp_time; | |
638 | struct hv_message msg; | |
639 | bool msg_pending; | |
640 | }; | |
641 | ||
5c919412 AS |
642 | /* Hyper-V synthetic interrupt controller (SynIC)*/ |
643 | struct kvm_vcpu_hv_synic { | |
644 | u64 version; | |
645 | u64 control; | |
646 | u64 msg_page; | |
647 | u64 evt_page; | |
648 | atomic64_t sint[HV_SYNIC_SINT_COUNT]; | |
649 | atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT]; | |
650 | DECLARE_BITMAP(auto_eoi_bitmap, 256); | |
651 | DECLARE_BITMAP(vec_bitmap, 256); | |
652 | bool active; | |
efc479e6 | 653 | bool dont_zero_synic_pages; |
5c919412 AS |
654 | }; |
655 | ||
0823570f VK |
656 | /* The maximum number of entries on the TLB flush fifo. */ |
657 | #define KVM_HV_TLB_FLUSH_FIFO_SIZE (16) | |
658 | /* | |
659 | * Note: the following 'magic' entry is made up by KVM to avoid putting | |
660 | * anything besides GVA on the TLB flush fifo. It is theoretically possible | |
661 | * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000 | |
662 | * which will look identical. KVM's action to 'flush everything' instead of | |
663 | * flushing these particular addresses is, however, fully legitimate as | |
664 | * flushing more than requested is always OK. | |
665 | */ | |
666 | #define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1) | |
667 | ||
53ca765a VK |
668 | enum hv_tlb_flush_fifos { |
669 | HV_L1_TLB_FLUSH_FIFO, | |
670 | HV_L2_TLB_FLUSH_FIFO, | |
671 | HV_NR_TLB_FLUSH_FIFOS, | |
672 | }; | |
673 | ||
0823570f VK |
674 | struct kvm_vcpu_hv_tlb_flush_fifo { |
675 | spinlock_t write_lock; | |
676 | DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE); | |
677 | }; | |
678 | ||
e83d5887 AS |
679 | /* Hyper-V per vcpu emulation context */ |
680 | struct kvm_vcpu_hv { | |
4592b7ea | 681 | struct kvm_vcpu *vcpu; |
d3457c87 | 682 | u32 vp_index; |
e83d5887 | 683 | u64 hv_vapic; |
9eec50b8 | 684 | s64 runtime_offset; |
5c919412 | 685 | struct kvm_vcpu_hv_synic synic; |
db397571 | 686 | struct kvm_hyperv_exit exit; |
1f4b34f8 AS |
687 | struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; |
688 | DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); | |
644f7067 | 689 | bool enforce_cpuid; |
10d7bf1e VK |
690 | struct { |
691 | u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */ | |
692 | u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */ | |
693 | u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */ | |
694 | u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ | |
695 | u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ | |
696 | u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ | |
dea6e140 VK |
697 | u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */ |
698 | u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */ | |
10d7bf1e | 699 | } cpuid_cache; |
0823570f | 700 | |
53ca765a | 701 | struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; |
7d5e88d3 VK |
702 | |
703 | /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ | |
704 | u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; | |
38edb452 | 705 | |
046f5756 VK |
706 | struct hv_vp_assist_page vp_assist_page; |
707 | ||
38edb452 VK |
708 | struct { |
709 | u64 pa_page_gpa; | |
710 | u64 vm_id; | |
711 | u32 vp_id; | |
712 | } nested; | |
e83d5887 AS |
713 | }; |
714 | ||
48639df8 PD |
715 | struct kvm_hypervisor_cpuid { |
716 | u32 base; | |
717 | u32 limit; | |
718 | }; | |
719 | ||
ee11ab6b | 720 | #ifdef CONFIG_KVM_XEN |
23200b7a JM |
721 | /* Xen HVM per vcpu emulation context */ |
722 | struct kvm_vcpu_xen { | |
723 | u64 hypercall_rip; | |
30b5c851 | 724 | u32 current_runstate; |
fde0451b | 725 | u8 upcall_vector; |
7caf9571 | 726 | struct gfn_to_pfn_cache vcpu_info_cache; |
69d413cf | 727 | struct gfn_to_pfn_cache vcpu_time_info_cache; |
a795cd43 | 728 | struct gfn_to_pfn_cache runstate_cache; |
5ec3289b | 729 | struct gfn_to_pfn_cache runstate2_cache; |
30b5c851 DW |
730 | u64 last_steal; |
731 | u64 runstate_entry_time; | |
732 | u64 runstate_times[4]; | |
14243b38 | 733 | unsigned long evtchn_pending_sel; |
942c2490 | 734 | u32 vcpu_id; /* The Xen / ACPI vCPU ID */ |
53639526 JM |
735 | u32 timer_virq; |
736 | u64 timer_expires; /* In guest epoch */ | |
737 | atomic_t timer_pending; | |
738 | struct hrtimer timer; | |
1a65105a BO |
739 | int poll_evtchn; |
740 | struct timer_list poll_timer; | |
f422f853 | 741 | struct kvm_hypervisor_cpuid cpuid; |
23200b7a | 742 | }; |
ee11ab6b | 743 | #endif |
23200b7a | 744 | |
d4963e31 SC |
745 | struct kvm_queued_exception { |
746 | bool pending; | |
747 | bool injected; | |
748 | bool has_error_code; | |
749 | u8 vector; | |
750 | u32 error_code; | |
751 | unsigned long payload; | |
752 | bool has_payload; | |
d4963e31 SC |
753 | }; |
754 | ||
7ea34578 SC |
755 | /* |
756 | * Hardware-defined CPUID leafs that are either scattered by the kernel or are | |
757 | * unknown to the kernel, but need to be directly used by KVM. Note, these | |
758 | * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. | |
759 | */ | |
760 | enum kvm_only_cpuid_leafs { | |
761 | CPUID_12_EAX = NCAPINTS, | |
762 | CPUID_7_1_EDX, | |
763 | CPUID_8000_0007_EDX, | |
764 | CPUID_8000_0022_EAX, | |
765 | CPUID_7_2_EDX, | |
766 | CPUID_24_0_EBX, | |
767 | NR_KVM_CPU_CAPS, | |
768 | ||
769 | NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, | |
770 | }; | |
771 | ||
ad312c7c | 772 | struct kvm_vcpu_arch { |
5fdbf976 MT |
773 | /* |
774 | * rip and regs accesses must go through | |
775 | * kvm_{register,rip}_{read,write} functions. | |
776 | */ | |
777 | unsigned long regs[NR_VCPU_REGS]; | |
778 | u32 regs_avail; | |
779 | u32 regs_dirty; | |
34c16eec ZX |
780 | |
781 | unsigned long cr0; | |
e8467fda | 782 | unsigned long cr0_guest_owned_bits; |
34c16eec ZX |
783 | unsigned long cr2; |
784 | unsigned long cr3; | |
785 | unsigned long cr4; | |
fc78f519 | 786 | unsigned long cr4_guest_owned_bits; |
b899c132 | 787 | unsigned long cr4_guest_rsvd_bits; |
34c16eec | 788 | unsigned long cr8; |
37486135 | 789 | u32 host_pkru; |
b9dd21e1 | 790 | u32 pkru; |
1371d904 | 791 | u32 hflags; |
f6801dff | 792 | u64 efer; |
fb71c795 | 793 | u64 host_debugctl; |
34c16eec ZX |
794 | u64 apic_base; |
795 | struct kvm_lapic *apic; /* kernel irqchip context */ | |
e40ff1d6 | 796 | bool load_eoi_exitmap_pending; |
6308630b | 797 | DECLARE_BITMAP(ioapic_handled_vectors, 256); |
41383771 | 798 | unsigned long apic_attention; |
e1035715 | 799 | int32_t apic_arb_prio; |
34c16eec | 800 | int mp_state; |
34c16eec | 801 | u64 ia32_misc_enable_msr; |
64d60670 | 802 | u64 smbase; |
52797bf9 | 803 | u64 smi_count; |
6cd88243 | 804 | bool at_instruction_boundary; |
b209749f | 805 | bool tpr_access_reporting; |
b5274b1b | 806 | bool xfd_no_write_intercept; |
20300099 | 807 | u64 ia32_xss; |
518e7b94 | 808 | u64 microcode_version; |
0cf9135b | 809 | u64 arch_capabilities; |
27461da3 | 810 | u64 perf_capabilities; |
34c16eec | 811 | |
14dfe855 JR |
812 | /* |
813 | * Paging state of the vcpu | |
814 | * | |
815 | * If the vcpu runs in guest mode with two level paging this still saves | |
816 | * the paging mode of the l1 guest. This context is always used to | |
817 | * handle faults. | |
818 | */ | |
44dd3ffa VK |
819 | struct kvm_mmu *mmu; |
820 | ||
821 | /* Non-nested MMU for L1 */ | |
822 | struct kvm_mmu root_mmu; | |
8df25a32 | 823 | |
14c07ad8 VK |
824 | /* L1 MMU when running nested */ |
825 | struct kvm_mmu guest_mmu; | |
826 | ||
6539e738 JR |
827 | /* |
828 | * Paging state of an L2 guest (used for nested npt) | |
829 | * | |
830 | * This context will save all necessary information to walk page tables | |
311497e0 | 831 | * of an L2 guest. This context is only initialized for page table |
6539e738 JR |
832 | * walking and not for faulting since we never handle l2 page faults on |
833 | * the host. | |
834 | */ | |
835 | struct kvm_mmu nested_mmu; | |
836 | ||
14dfe855 JR |
837 | /* |
838 | * Pointer to the mmu context currently used for | |
839 | * gva_to_gpa translations. | |
840 | */ | |
841 | struct kvm_mmu *walk_mmu; | |
842 | ||
53c07b18 | 843 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; |
171a90d7 | 844 | struct kvm_mmu_memory_cache mmu_shadow_page_cache; |
6a97575d | 845 | struct kvm_mmu_memory_cache mmu_shadowed_info_cache; |
34c16eec | 846 | struct kvm_mmu_memory_cache mmu_page_header_cache; |
3a4eb364 IY |
847 | /* |
848 | * This cache is to allocate external page table. E.g. private EPT used | |
849 | * by the TDX module. | |
850 | */ | |
851 | struct kvm_mmu_memory_cache mmu_external_spt_cache; | |
34c16eec | 852 | |
f775b13e RR |
853 | /* |
854 | * QEMU userspace and the guest each have their own FPU state. | |
ec269475 PB |
855 | * In vcpu_run, we switch between the user and guest FPU contexts. |
856 | * While running a VCPU, the VCPU thread will have the guest FPU | |
857 | * context. | |
f775b13e RR |
858 | * |
859 | * Note that while the PKRU state lives inside the fpu registers, | |
860 | * it is switched out separately at VMENTER and VMEXIT time. The | |
d69c1382 | 861 | * "guest_fpstate" state here contains the guest FPU context, with the |
f775b13e RR |
862 | * host PRKU bits. |
863 | */ | |
d69c1382 | 864 | struct fpu_guest guest_fpu; |
f775b13e | 865 | |
2acf923e | 866 | u64 xcr0; |
ee519b3a | 867 | u64 guest_supported_xcr0; |
34c16eec | 868 | |
34c16eec ZX |
869 | struct kvm_pio_request pio; |
870 | void *pio_data; | |
b5998402 | 871 | void *sev_pio_data; |
95e16b47 | 872 | unsigned sev_pio_count; |
34c16eec | 873 | |
66fd3f7f GN |
874 | u8 event_exit_inst_len; |
875 | ||
7709aba8 SC |
876 | bool exception_from_userspace; |
877 | ||
d4963e31 SC |
878 | /* Exceptions to be injected to the guest. */ |
879 | struct kvm_queued_exception exception; | |
7709aba8 SC |
880 | /* Exception VM-Exits to be synthesized to L1. */ |
881 | struct kvm_queued_exception exception_vmexit; | |
298101da | 882 | |
937a7eae | 883 | struct kvm_queued_interrupt { |
04140b41 | 884 | bool injected; |
66fd3f7f | 885 | bool soft; |
937a7eae AK |
886 | u8 nr; |
887 | } interrupt; | |
888 | ||
34c16eec ZX |
889 | int halt_request; /* real mode on Intel only */ |
890 | ||
891 | int cpuid_nent; | |
255cbecf | 892 | struct kvm_cpuid_entry2 *cpuid_entries; |
93da6af3 | 893 | bool cpuid_dynamic_bits_dirty; |
fd706c9b | 894 | bool is_amd_compatible; |
5a4f55cd | 895 | |
42764413 | 896 | /* |
7ea34578 SC |
897 | * cpu_caps holds the effective guest capabilities, i.e. the features |
898 | * the vCPU is allowed to use. Typically, but not always, features can | |
899 | * be used by the guest if and only if both KVM and userspace want to | |
900 | * expose the feature to the guest. | |
901 | * | |
902 | * A common exception is for virtualization holes, i.e. when KVM can't | |
903 | * prevent the guest from using a feature, in which case the vCPU "has" | |
904 | * the feature regardless of what KVM or userspace desires. | |
905 | * | |
906 | * Note, features that don't require KVM involvement in any way are | |
907 | * NOT enforced/sanitized by KVM, i.e. are taken verbatim from the | |
908 | * guest CPUID provided by userspace. | |
42764413 | 909 | */ |
7ea34578 | 910 | u32 cpu_caps[NR_KVM_CPU_CAPS]; |
42764413 | 911 | |
ca29e145 | 912 | u64 reserved_gpa_bits; |
5a4f55cd EK |
913 | int maxphyaddr; |
914 | ||
34c16eec ZX |
915 | /* emulate context */ |
916 | ||
c9b8b07c | 917 | struct x86_emulate_ctxt *emulate_ctxt; |
7ae441ea GN |
918 | bool emulate_regs_need_sync_to_vcpu; |
919 | bool emulate_regs_need_sync_from_vcpu; | |
716d51ab | 920 | int (*complete_userspace_io)(struct kvm_vcpu *vcpu); |
e9628b01 | 921 | unsigned long cui_linear_rip; |
18068523 GOC |
922 | |
923 | gpa_t time; | |
39d61b46 SC |
924 | s8 pvclock_tsc_shift; |
925 | u32 pvclock_tsc_mul; | |
e48672fa | 926 | unsigned int hw_tsc_khz; |
916d3608 | 927 | struct gfn_to_pfn_cache pv_time; |
51d59c6b MT |
928 | /* set guest stopped flag in pvclock flags field */ |
929 | bool pvclock_set_guest_stopped_request; | |
c9aaa895 GC |
930 | |
931 | struct { | |
a6bd811f | 932 | u8 preempted; |
c9aaa895 GC |
933 | u64 msr_val; |
934 | u64 last_steal; | |
7e2175eb | 935 | struct gfn_to_hva_cache cache; |
c9aaa895 GC |
936 | } st; |
937 | ||
56ba77a4 | 938 | u64 l1_tsc_offset; |
805d705f | 939 | u64 tsc_offset; /* current tsc offset */ |
1d5f066e | 940 | u64 last_guest_tsc; |
6f526ec5 | 941 | u64 last_host_tsc; |
0dd6a6ed | 942 | u64 tsc_offset_adjustment; |
e26101b1 ZA |
943 | u64 this_tsc_nsec; |
944 | u64 this_tsc_write; | |
0d3da0d2 | 945 | u64 this_tsc_generation; |
c285545f | 946 | bool tsc_catchup; |
cc578287 ZA |
947 | bool tsc_always_catchup; |
948 | s8 virtual_tsc_shift; | |
949 | u32 virtual_tsc_mult; | |
950 | u32 virtual_tsc_khz; | |
ba904635 | 951 | s64 ia32_tsc_adjust_msr; |
73f624f4 | 952 | u64 msr_ia32_power_ctl; |
805d705f IS |
953 | u64 l1_tsc_scaling_ratio; |
954 | u64 tsc_scaling_ratio; /* current scaling ratio */ | |
3419ffc8 | 955 | |
7460fb4a | 956 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
fa4c027a SS |
957 | /* Number of NMIs pending injection, not including hardware vNMIs. */ |
958 | unsigned int nmi_pending; | |
7460fb4a | 959 | bool nmi_injected; /* Trying to inject an NMI this entry */ |
f077825a | 960 | bool smi_pending; /* SMI queued after currently running handler */ |
73cd107b | 961 | u8 handling_intr_from_guest; |
9ba075a6 | 962 | |
70109e7d | 963 | struct kvm_mtrr mtrr_state; |
7cb060a9 | 964 | u64 pat; |
42dbaa5a | 965 | |
360b948d | 966 | unsigned switch_db_regs; |
42dbaa5a JK |
967 | unsigned long db[KVM_NR_DB_REGS]; |
968 | unsigned long dr6; | |
969 | unsigned long dr7; | |
970 | unsigned long eff_db[KVM_NR_DB_REGS]; | |
c8639010 | 971 | unsigned long guest_debug_dr7; |
db2336a8 KH |
972 | u64 msr_platform_info; |
973 | u64 msr_misc_features_enables; | |
890ca9ae HY |
974 | |
975 | u64 mcg_cap; | |
976 | u64 mcg_status; | |
977 | u64 mcg_ctl; | |
c45dcc71 | 978 | u64 mcg_ext_ctl; |
890ca9ae | 979 | u64 *mce_banks; |
281b5278 | 980 | u64 *mci_ctl2_banks; |
94fe45da | 981 | |
bebb106a XG |
982 | /* Cache MMIO info */ |
983 | u64 mmio_gva; | |
871bd034 | 984 | unsigned mmio_access; |
bebb106a | 985 | gfn_t mmio_gfn; |
56f17dd3 | 986 | u64 mmio_gen; |
bebb106a | 987 | |
f5132b01 GN |
988 | struct kvm_pmu pmu; |
989 | ||
94fe45da | 990 | /* used for guest single stepping over the given code position */ |
94fe45da | 991 | unsigned long singlestep_rip; |
f92653ee | 992 | |
b4f69df0 | 993 | #ifdef CONFIG_KVM_HYPERV |
8f014550 | 994 | bool hyperv_enabled; |
4592b7ea | 995 | struct kvm_vcpu_hv *hyperv; |
b4f69df0 | 996 | #endif |
ee11ab6b | 997 | #ifdef CONFIG_KVM_XEN |
23200b7a | 998 | struct kvm_vcpu_xen xen; |
ee11ab6b | 999 | #endif |
f5f48ee1 | 1000 | cpumask_var_t wbinvd_dirty_mask; |
af585b92 | 1001 | |
1cb3f3ae XG |
1002 | unsigned long last_retry_eip; |
1003 | unsigned long last_retry_addr; | |
1004 | ||
af585b92 GN |
1005 | struct { |
1006 | bool halted; | |
dd03bcaa | 1007 | gfn_t gfns[ASYNC_PF_PER_VCPU]; |
344d9588 | 1008 | struct gfn_to_hva_cache data; |
2635b5c4 VK |
1009 | u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */ |
1010 | u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ | |
1011 | u16 vec; | |
7c90705b | 1012 | u32 id; |
68fd66f1 | 1013 | u32 host_apf_flags; |
4fa0efb4 | 1014 | bool send_always; |
52a5c155 | 1015 | bool delivery_as_pf_vmexit; |
557a961a | 1016 | bool pageready_pending; |
af585b92 | 1017 | } apf; |
2b036c6b BO |
1018 | |
1019 | /* OSVW MSRs (AMD only) */ | |
1020 | struct { | |
1021 | u64 length; | |
1022 | u64 status; | |
1023 | } osvw; | |
ae7a2a3f MT |
1024 | |
1025 | struct { | |
1026 | u64 msr_val; | |
1027 | struct gfn_to_hva_cache data; | |
1028 | } pv_eoi; | |
93c05d3e | 1029 | |
2d5ba19b MT |
1030 | u64 msr_kvm_poll_control; |
1031 | ||
6aef266c SV |
1032 | /* pv related host specific info */ |
1033 | struct { | |
1034 | bool pv_unhalted; | |
1035 | } pv; | |
7543a635 SR |
1036 | |
1037 | int pending_ioapic_eoi; | |
1c1a9ce9 | 1038 | int pending_external_vector; |
87e4951e | 1039 | int highest_stale_pending_ioapic_eoi; |
0f89b207 | 1040 | |
de63ad4c LM |
1041 | /* be preempted when it's in kernel-mode(cpl=0) */ |
1042 | bool preempted_in_kernel; | |
c595ceee PB |
1043 | |
1044 | /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ | |
1045 | bool l1tf_flush_l1d; | |
191c8137 | 1046 | |
8a14fe4f | 1047 | /* Host CPU on which VM-entry was most recently attempted */ |
63f5a190 | 1048 | int last_vmentry_cpu; |
8a14fe4f | 1049 | |
191c8137 BP |
1050 | /* AMD MSRC001_0015 Hardware Configuration */ |
1051 | u64 msr_hwcr; | |
66570e96 OU |
1052 | |
1053 | /* pv related cpuid info */ | |
1054 | struct { | |
1055 | /* | |
1056 | * value of the eax register in the KVM_CPUID_FEATURES CPUID | |
1057 | * leaf. | |
1058 | */ | |
1059 | u32 features; | |
1060 | ||
1061 | /* | |
1062 | * indicates whether pv emulation should be disabled if features | |
1063 | * are not present in the guest's cpuid | |
1064 | */ | |
1065 | bool enforce; | |
1066 | } pv_cpuid; | |
add5e2f0 TL |
1067 | |
1068 | /* Protected Guests */ | |
1069 | bool guest_state_protected; | |
adafea11 | 1070 | bool guest_tsc_protected; |
3c86c0d3 | 1071 | |
158a48ec ML |
1072 | /* |
1073 | * Set when PDPTS were loaded directly by the userspace without | |
1074 | * reading the guest memory | |
1075 | */ | |
1076 | bool pdptrs_from_userspace; | |
1077 | ||
3c86c0d3 VP |
1078 | #if IS_ENABLED(CONFIG_HYPERV) |
1079 | hpa_t hv_root_tdp; | |
1080 | #endif | |
34c16eec ZX |
1081 | }; |
1082 | ||
db3fe4eb | 1083 | struct kvm_lpage_info { |
92f94f1e | 1084 | int disallow_lpage; |
db3fe4eb TY |
1085 | }; |
1086 | ||
1087 | struct kvm_arch_memory_slot { | |
018aabb5 | 1088 | struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; |
db3fe4eb | 1089 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
338068b5 | 1090 | unsigned short *gfn_write_track; |
db3fe4eb TY |
1091 | }; |
1092 | ||
3548a259 | 1093 | /* |
35366901 SC |
1094 | * Track the mode of the optimized logical map, as the rules for decoding the |
1095 | * destination vary per mode. Enabling the optimized logical map requires all | |
1096 | * software-enabled local APIs to be in the same mode, each addressable APIC to | |
1097 | * be mapped to only one MDA, and each MDA to map to at most one APIC. | |
3548a259 | 1098 | */ |
35366901 SC |
1099 | enum kvm_apic_logical_mode { |
1100 | /* All local APICs are software disabled. */ | |
1101 | KVM_APIC_MODE_SW_DISABLED, | |
1102 | /* All software enabled local APICs in xAPIC cluster addressing mode. */ | |
1103 | KVM_APIC_MODE_XAPIC_CLUSTER, | |
1104 | /* All software enabled local APICs in xAPIC flat addressing mode. */ | |
1105 | KVM_APIC_MODE_XAPIC_FLAT, | |
1106 | /* All software enabled local APICs in x2APIC mode. */ | |
1107 | KVM_APIC_MODE_X2APIC, | |
1108 | /* | |
1109 | * Optimized map disabled, e.g. not all local APICs in the same logical | |
1110 | * mode, same logical ID assigned to multiple APICs, etc. | |
1111 | */ | |
1112 | KVM_APIC_MODE_MAP_DISABLED, | |
1113 | }; | |
3548a259 | 1114 | |
1e08ec4a GN |
1115 | struct kvm_apic_map { |
1116 | struct rcu_head rcu; | |
35366901 | 1117 | enum kvm_apic_logical_mode logical_mode; |
0ca52e7b | 1118 | u32 max_apic_id; |
e45115b6 RK |
1119 | union { |
1120 | struct kvm_lapic *xapic_flat_map[8]; | |
1121 | struct kvm_lapic *xapic_cluster_map[16][4]; | |
1122 | }; | |
0ca52e7b | 1123 | struct kvm_lapic *phys_map[]; |
1e08ec4a GN |
1124 | }; |
1125 | ||
f97f5a56 JD |
1126 | /* Hyper-V synthetic debugger (SynDbg)*/ |
1127 | struct kvm_hv_syndbg { | |
1128 | struct { | |
1129 | u64 control; | |
1130 | u64 status; | |
1131 | u64 send_page; | |
1132 | u64 recv_page; | |
1133 | u64 pending_page; | |
1134 | } control; | |
1135 | u64 options; | |
1136 | }; | |
1137 | ||
cc9cfddb VK |
1138 | /* Current state of Hyper-V TSC page clocksource */ |
1139 | enum hv_tsc_page_status { | |
1140 | /* TSC page was not set up or disabled */ | |
1141 | HV_TSC_PAGE_UNSET = 0, | |
1142 | /* TSC page MSR was written by the guest, update pending */ | |
1143 | HV_TSC_PAGE_GUEST_CHANGED, | |
42dcbe7d | 1144 | /* TSC page update was triggered from the host side */ |
cc9cfddb VK |
1145 | HV_TSC_PAGE_HOST_CHANGED, |
1146 | /* TSC page was properly set up and is currently active */ | |
1147 | HV_TSC_PAGE_SET, | |
cc9cfddb VK |
1148 | /* TSC page was set up with an inaccessible GPA */ |
1149 | HV_TSC_PAGE_BROKEN, | |
1150 | }; | |
1151 | ||
b4f69df0 | 1152 | #ifdef CONFIG_KVM_HYPERV |
e83d5887 AS |
1153 | /* Hyper-V emulation context */ |
1154 | struct kvm_hv { | |
3f5ad8be | 1155 | struct mutex hv_lock; |
e83d5887 AS |
1156 | u64 hv_guest_os_id; |
1157 | u64 hv_hypercall; | |
1158 | u64 hv_tsc_page; | |
cc9cfddb | 1159 | enum hv_tsc_page_status hv_tsc_page_status; |
e7d9513b AS |
1160 | |
1161 | /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ | |
1162 | u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; | |
1163 | u64 hv_crash_ctl; | |
095cf55d | 1164 | |
7357b1df | 1165 | struct ms_hyperv_tsc_page tsc_ref; |
faeb7833 RK |
1166 | |
1167 | struct idr conn_to_evt; | |
a2e164e7 VK |
1168 | |
1169 | u64 hv_reenlightenment_control; | |
1170 | u64 hv_tsc_emulation_control; | |
1171 | u64 hv_tsc_emulation_status; | |
2be1bd3a | 1172 | u64 hv_invtsc_control; |
87ee613d VK |
1173 | |
1174 | /* How many vCPUs have VP index != vCPU index */ | |
1175 | atomic_t num_mismatched_vp_indexes; | |
6f6a657c | 1176 | |
0f250a64 VK |
1177 | /* |
1178 | * How many SynICs use 'AutoEOI' feature | |
1179 | * (protected by arch.apicv_update_lock) | |
1180 | */ | |
1181 | unsigned int synic_auto_eoi_used; | |
1182 | ||
f97f5a56 | 1183 | struct kvm_hv_syndbg hv_syndbg; |
d52734d0 MS |
1184 | |
1185 | bool xsaves_xsavec_checked; | |
e83d5887 | 1186 | }; |
b4f69df0 | 1187 | #endif |
e83d5887 | 1188 | |
1a155254 AG |
1189 | struct msr_bitmap_range { |
1190 | u32 flags; | |
1191 | u32 nmsrs; | |
1192 | u32 base; | |
1193 | unsigned long *bitmap; | |
1194 | }; | |
1195 | ||
87562052 | 1196 | #ifdef CONFIG_KVM_XEN |
a3833b81 DW |
1197 | /* Xen emulation context */ |
1198 | struct kvm_xen { | |
310bc395 | 1199 | struct mutex xen_lock; |
28d1629f | 1200 | u32 xen_version; |
a3833b81 | 1201 | bool long_mode; |
d8ba8ba4 | 1202 | bool runstate_update_flag; |
40da8ccd | 1203 | u8 upcall_vector; |
1cfc9c4b | 1204 | struct gfn_to_pfn_cache shinfo_cache; |
2fd6df2f | 1205 | struct idr evtchn_ports; |
1a65105a | 1206 | unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; |
26e228ec SC |
1207 | |
1208 | struct kvm_xen_hvm_config hvm_config; | |
a3833b81 | 1209 | }; |
87562052 | 1210 | #endif |
a3833b81 | 1211 | |
49776faf RK |
1212 | enum kvm_irqchip_mode { |
1213 | KVM_IRQCHIP_NONE, | |
1214 | KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ | |
1215 | KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ | |
1216 | }; | |
1217 | ||
b318e8de SC |
1218 | struct kvm_x86_msr_filter { |
1219 | u8 count; | |
1220 | bool default_allow:1; | |
1221 | struct msr_bitmap_range ranges[16]; | |
1222 | }; | |
1223 | ||
14329b82 AL |
1224 | struct kvm_x86_pmu_event_filter { |
1225 | __u32 action; | |
1226 | __u32 nevents; | |
1227 | __u32 fixed_counter_bitmap; | |
1228 | __u32 flags; | |
1229 | __u32 nr_includes; | |
1230 | __u32 nr_excludes; | |
1231 | __u64 *includes; | |
1232 | __u64 *excludes; | |
1233 | __u64 events[]; | |
1234 | }; | |
1235 | ||
7491b7b2 | 1236 | enum kvm_apicv_inhibit { |
a9603ae0 ML |
1237 | |
1238 | /********************************************************************/ | |
1239 | /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */ | |
1240 | /********************************************************************/ | |
1241 | ||
1242 | /* | |
1243 | * APIC acceleration is disabled by a module parameter | |
1244 | * and/or not supported in hardware. | |
1245 | */ | |
f9925721 | 1246 | APICV_INHIBIT_REASON_DISABLED, |
a9603ae0 ML |
1247 | |
1248 | /* | |
1249 | * APIC acceleration is inhibited because AutoEOI feature is | |
1250 | * being used by a HyperV guest. | |
1251 | */ | |
7491b7b2 | 1252 | APICV_INHIBIT_REASON_HYPERV, |
a9603ae0 ML |
1253 | |
1254 | /* | |
1255 | * APIC acceleration is inhibited because the userspace didn't yet | |
1256 | * enable the kernel/split irqchip. | |
1257 | */ | |
1258 | APICV_INHIBIT_REASON_ABSENT, | |
1259 | ||
1260 | /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ | |
1261 | * (out of band, debug measure of blocking all interrupts on this vCPU) | |
1262 | * was enabled, to avoid AVIC/APICv bypassing it. | |
1263 | */ | |
1264 | APICV_INHIBIT_REASON_BLOCKIRQ, | |
1265 | ||
5063c41b SC |
1266 | /* |
1267 | * APICv is disabled because not all vCPUs have a 1:1 mapping between | |
1268 | * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack. | |
1269 | */ | |
1270 | APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED, | |
1271 | ||
3743c2f0 ML |
1272 | /* |
1273 | * For simplicity, the APIC acceleration is inhibited | |
1274 | * first time either APIC ID or APIC base are changed by the guest | |
1275 | * from their reset values. | |
1276 | */ | |
1277 | APICV_INHIBIT_REASON_APIC_ID_MODIFIED, | |
1278 | APICV_INHIBIT_REASON_APIC_BASE_MODIFIED, | |
1279 | ||
a9603ae0 ML |
1280 | /******************************************************/ |
1281 | /* INHIBITs that are relevant only to the AMD's AVIC. */ | |
1282 | /******************************************************/ | |
1283 | ||
1284 | /* | |
1285 | * AVIC is inhibited on a vCPU because it runs a nested guest. | |
1286 | * | |
1287 | * This is needed because unlike APICv, the peers of this vCPU | |
1288 | * cannot use the doorbell mechanism to signal interrupts via AVIC when | |
1289 | * a vCPU runs nested. | |
1290 | */ | |
7491b7b2 | 1291 | APICV_INHIBIT_REASON_NESTED, |
a9603ae0 ML |
1292 | |
1293 | /* | |
1294 | * On SVM, the wait for the IRQ window is implemented with pending vIRQ, | |
1295 | * which cannot be injected when the AVIC is enabled, thus AVIC | |
1296 | * is inhibited while KVM waits for IRQ window. | |
1297 | */ | |
7491b7b2 | 1298 | APICV_INHIBIT_REASON_IRQWIN, |
a9603ae0 ML |
1299 | |
1300 | /* | |
1301 | * PIT (i8254) 're-inject' mode, relies on EOI intercept, | |
1302 | * which AVIC doesn't support for edge triggered interrupts. | |
1303 | */ | |
7491b7b2 | 1304 | APICV_INHIBIT_REASON_PIT_REINJ, |
a9603ae0 | 1305 | |
a9603ae0 ML |
1306 | /* |
1307 | * AVIC is disabled because SEV doesn't support it. | |
1308 | */ | |
c538dc79 | 1309 | APICV_INHIBIT_REASON_SEV, |
9a364857 SC |
1310 | |
1311 | /* | |
1312 | * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1 | |
1313 | * mapping between logical ID and vCPU. | |
1314 | */ | |
1315 | APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED, | |
69148cce AJ |
1316 | |
1317 | NR_APICV_INHIBIT_REASONS, | |
7491b7b2 | 1318 | }; |
4e19c36f | 1319 | |
69148cce AJ |
1320 | #define __APICV_INHIBIT_REASON(reason) \ |
1321 | { BIT(APICV_INHIBIT_REASON_##reason), #reason } | |
1322 | ||
1323 | #define APICV_INHIBIT_REASONS \ | |
f9925721 | 1324 | __APICV_INHIBIT_REASON(DISABLED), \ |
69148cce AJ |
1325 | __APICV_INHIBIT_REASON(HYPERV), \ |
1326 | __APICV_INHIBIT_REASON(ABSENT), \ | |
1327 | __APICV_INHIBIT_REASON(BLOCKIRQ), \ | |
1328 | __APICV_INHIBIT_REASON(PHYSICAL_ID_ALIASED), \ | |
1329 | __APICV_INHIBIT_REASON(APIC_ID_MODIFIED), \ | |
1330 | __APICV_INHIBIT_REASON(APIC_BASE_MODIFIED), \ | |
1331 | __APICV_INHIBIT_REASON(NESTED), \ | |
1332 | __APICV_INHIBIT_REASON(IRQWIN), \ | |
1333 | __APICV_INHIBIT_REASON(PIT_REINJ), \ | |
1334 | __APICV_INHIBIT_REASON(SEV), \ | |
1335 | __APICV_INHIBIT_REASON(LOGICAL_ID_ALIASED) | |
1336 | ||
fef9cce0 | 1337 | struct kvm_arch { |
bc8a3d89 BG |
1338 | unsigned long n_used_mmu_pages; |
1339 | unsigned long n_requested_mmu_pages; | |
1340 | unsigned long n_max_mmu_pages; | |
332b207d | 1341 | unsigned int indirect_shadow_pages; |
ca333add | 1342 | u8 mmu_valid_gen; |
517987e3 PB |
1343 | u8 vm_type; |
1344 | bool has_private_mem; | |
1345 | bool has_protected_state; | |
5932ca41 | 1346 | bool pre_fault_allowed; |
f05e70ac | 1347 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
f05e70ac | 1348 | struct list_head active_mmu_pages; |
55c510e2 SC |
1349 | /* |
1350 | * A list of kvm_mmu_page structs that, if zapped, could possibly be | |
1351 | * replaced by an NX huge page. A shadow page is on this list if its | |
1352 | * existence disallows an NX huge page (nx_huge_page_disallowed is set) | |
1353 | * and there are no other conditions that prevent a huge page, e.g. | |
1354 | * the backing host page is huge, dirtly logging is not enabled for its | |
1355 | * memslot, etc... Note, zapping shadow pages on this list doesn't | |
1356 | * guarantee an NX huge page will be created in its stead, e.g. if the | |
1357 | * guest attempts to execute from the region then KVM obviously can't | |
1358 | * create an NX huge page (without hanging the guest). | |
1359 | */ | |
1360 | struct list_head possible_nx_huge_pages; | |
e998fb1a | 1361 | #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING |
0eb05bf2 | 1362 | struct kvm_page_track_notifier_head track_notifier_head; |
e998fb1a | 1363 | #endif |
ce25681d SC |
1364 | /* |
1365 | * Protects marking pages unsync during page faults, as TDP MMU page | |
1366 | * faults only take mmu_lock for read. For simplicity, the unsync | |
1367 | * pages lock is always taken when marking pages unsync regardless of | |
1368 | * whether mmu_lock is held for read or write. | |
1369 | */ | |
1370 | spinlock_t mmu_unsync_pages_lock; | |
365c8868 | 1371 | |
949019b9 SC |
1372 | u64 shadow_mmio_value; |
1373 | ||
e0f0bbc5 AW |
1374 | #define __KVM_HAVE_ARCH_NONCOHERENT_DMA |
1375 | atomic_t noncoherent_dma_count; | |
5544eb9b PB |
1376 | #define __KVM_HAVE_ARCH_ASSIGNED_DEVICE |
1377 | atomic_t assigned_device_count; | |
d7deeeb0 ZX |
1378 | struct kvm_pic *vpic; |
1379 | struct kvm_ioapic *vioapic; | |
7837699f | 1380 | struct kvm_pit *vpit; |
42720138 | 1381 | atomic_t vapics_in_nmi_mode; |
1e08ec4a | 1382 | struct mutex apic_map_lock; |
6fcd9cbc | 1383 | struct kvm_apic_map __rcu *apic_map; |
44d52717 | 1384 | atomic_t apic_map_dirty; |
bfc6d222 | 1385 | |
a01b45e9 | 1386 | bool apic_access_memslot_enabled; |
2008fab3 SC |
1387 | bool apic_access_memslot_inhibited; |
1388 | ||
1389 | /* Protects apicv_inhibit_reasons */ | |
1390 | struct rw_semaphore apicv_update_lock; | |
4e19c36f | 1391 | unsigned long apicv_inhibit_reasons; |
18068523 GOC |
1392 | |
1393 | gpa_t wall_clock; | |
b7ebfb05 | 1394 | |
4d5422ce | 1395 | bool mwait_in_guest; |
caa057a2 | 1396 | bool hlt_in_guest; |
b31c114b | 1397 | bool pause_in_guest; |
b5170063 | 1398 | bool cstate_in_guest; |
4d5422ce | 1399 | |
5550af4d | 1400 | unsigned long irq_sources_bitmap; |
afbcf7ab | 1401 | s64 kvmclock_offset; |
869b4421 PB |
1402 | |
1403 | /* | |
1404 | * This also protects nr_vcpus_matched_tsc which is read from a | |
1405 | * preemption-disabled region, so it must be a raw spinlock. | |
1406 | */ | |
038f8c11 | 1407 | raw_spinlock_t tsc_write_lock; |
f38e098f | 1408 | u64 last_tsc_nsec; |
f38e098f | 1409 | u64 last_tsc_write; |
5d3cb0f6 | 1410 | u32 last_tsc_khz; |
828ca896 | 1411 | u64 last_tsc_offset; |
e26101b1 ZA |
1412 | u64 cur_tsc_nsec; |
1413 | u64 cur_tsc_write; | |
1414 | u64 cur_tsc_offset; | |
0d3da0d2 | 1415 | u64 cur_tsc_generation; |
b48aa97e | 1416 | int nr_vcpus_matched_tsc; |
ffde22ac | 1417 | |
ffbb61d0 | 1418 | u32 default_tsc_khz; |
bf328e22 | 1419 | bool user_set_tsc; |
b460256b | 1420 | u64 apic_bus_cycle_ns; |
ffbb61d0 | 1421 | |
869b4421 | 1422 | seqcount_raw_spinlock_t pvclock_sc; |
d828199e MT |
1423 | bool use_master_clock; |
1424 | u64 master_kernel_ns; | |
a5a1d1c2 | 1425 | u64 master_cycle_now; |
7e44e449 | 1426 | struct delayed_work kvmclock_update_work; |
332967a3 | 1427 | struct delayed_work kvmclock_sync_work; |
d828199e | 1428 | |
6ef768fa PB |
1429 | /* reads protected by irq_srcu, writes by irq_lock */ |
1430 | struct hlist_head mask_notifier_list; | |
1431 | ||
b4f69df0 | 1432 | #ifdef CONFIG_KVM_HYPERV |
e83d5887 | 1433 | struct kvm_hv hyperv; |
b4f69df0 | 1434 | #endif |
87562052 VK |
1435 | |
1436 | #ifdef CONFIG_KVM_XEN | |
a3833b81 | 1437 | struct kvm_xen xen; |
87562052 | 1438 | #endif |
b034cf01 | 1439 | |
a826faf1 | 1440 | bool backwards_tsc_observed; |
54750f2c | 1441 | bool boot_vcpu_runs_old_kvmclock; |
d71ba788 | 1442 | u32 bsp_vcpu_id; |
90de4a18 NA |
1443 | |
1444 | u64 disabled_quirks; | |
49df6397 | 1445 | |
49776faf | 1446 | enum kvm_irqchip_mode irqchip_mode; |
b053b2ae | 1447 | u8 nr_reserved_ioapic_pins; |
52004014 FW |
1448 | |
1449 | bool disabled_lapic_found; | |
44a95dae | 1450 | |
37131313 | 1451 | bool x2apic_format; |
c519265f | 1452 | bool x2apic_broadcast_quirk_disabled; |
6fbbde9a DS |
1453 | |
1454 | bool guest_can_read_msr_platform_info; | |
59073aaf | 1455 | bool exception_payload_enabled; |
66bb8a06 | 1456 | |
ed235117 CQ |
1457 | bool triple_fault_event; |
1458 | ||
b318e8de | 1459 | bool bus_lock_detection_enabled; |
ba7bb663 | 1460 | bool enable_pmu; |
2f4073e0 TX |
1461 | |
1462 | u32 notify_window; | |
1463 | u32 notify_vmexit_flags; | |
19238e75 AL |
1464 | /* |
1465 | * If exit_on_emulation_error is set, and the in-kernel instruction | |
1466 | * emulator fails to emulate an instruction, allow userspace | |
1467 | * the opportunity to look at it. | |
1468 | */ | |
1469 | bool exit_on_emulation_error; | |
b318e8de | 1470 | |
1ae09954 AG |
1471 | /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ |
1472 | u32 user_space_msr_mask; | |
b318e8de | 1473 | struct kvm_x86_msr_filter __rcu *msr_filter; |
fe6b6bc8 | 1474 | |
0dbb1123 AK |
1475 | u32 hypercall_exit_enabled; |
1476 | ||
70210c04 SC |
1477 | /* Guest can access the SGX PROVISIONKEY. */ |
1478 | bool sgx_provisioning_allowed; | |
1479 | ||
14329b82 | 1480 | struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter; |
d96c77bd PB |
1481 | struct vhost_task *nx_huge_page_recovery_thread; |
1482 | u64 nx_huge_page_last; | |
931656b9 | 1483 | struct once nx_once; |
fe5db27d | 1484 | |
897218ff | 1485 | #ifdef CONFIG_X86_64 |
81d480fd SC |
1486 | #ifdef CONFIG_KVM_PROVE_MMU |
1487 | /* | |
1488 | * The number of TDP MMU pages across all roots. Used only to sanity | |
1489 | * check that KVM isn't leaking TDP MMU pages. | |
1490 | */ | |
d25ceb92 | 1491 | atomic64_t tdp_mmu_pages; |
81d480fd | 1492 | #endif |
d25ceb92 | 1493 | |
c0dba6e4 | 1494 | /* |
1f98f2bd DM |
1495 | * List of struct kvm_mmu_pages being used as roots. |
1496 | * All struct kvm_mmu_pages in the list should have | |
c0dba6e4 | 1497 | * tdp_mmu_page set. |
c0e64238 BG |
1498 | * |
1499 | * For reads, this list is protected by: | |
b146a9b3 | 1500 | * RCU alone or |
c0e64238 BG |
1501 | * the MMU lock in read mode + RCU or |
1502 | * the MMU lock in write mode | |
1503 | * | |
250ce1b4 PB |
1504 | * For writes, this list is protected by tdp_mmu_pages_lock; see |
1505 | * below for the details. | |
c0e64238 BG |
1506 | * |
1507 | * Roots will remain in the list until their tdp_mmu_root_count | |
1508 | * drops to zero, at which point the thread that decremented the | |
1509 | * count to zero should removed the root from the list and clean | |
1510 | * it up, freeing the root after an RCU grace period. | |
c0dba6e4 | 1511 | */ |
02c00b3a | 1512 | struct list_head tdp_mmu_roots; |
c0dba6e4 | 1513 | |
9a77daac BG |
1514 | /* |
1515 | * Protects accesses to the following fields when the MMU lock | |
1516 | * is held in read mode: | |
c0e64238 | 1517 | * - tdp_mmu_roots (above) |
f96c48e9 | 1518 | * - the link field of kvm_mmu_page structs used by the TDP MMU |
55c510e2 SC |
1519 | * - possible_nx_huge_pages; |
1520 | * - the possible_nx_huge_page_link field of kvm_mmu_page structs used | |
9a77daac | 1521 | * by the TDP MMU |
250ce1b4 PB |
1522 | * Because the lock is only taken within the MMU lock, strictly |
1523 | * speaking it is redundant to acquire this lock when the thread | |
1524 | * holds the MMU lock in write mode. However it often simplifies | |
1525 | * the code to do so. | |
9a77daac BG |
1526 | */ |
1527 | spinlock_t tdp_mmu_pages_lock; | |
897218ff | 1528 | #endif /* CONFIG_X86_64 */ |
a2557408 BG |
1529 | |
1530 | /* | |
1e76a3ce DS |
1531 | * If set, at least one shadow root has been allocated. This flag |
1532 | * is used as one input when determining whether certain memslot | |
1533 | * related allocations are necessary. | |
a2557408 | 1534 | */ |
1e76a3ce | 1535 | bool shadow_root_allocated; |
3c86c0d3 | 1536 | |
a364c014 AV |
1537 | #ifdef CONFIG_KVM_EXTERNAL_WRITE_TRACKING |
1538 | /* | |
1539 | * If set, the VM has (or had) an external write tracking user, and | |
1540 | * thus all write tracking metadata has been allocated, even if KVM | |
1541 | * itself isn't using write tracking. | |
1542 | */ | |
1543 | bool external_write_tracking_enabled; | |
1544 | #endif | |
1545 | ||
3c86c0d3 VP |
1546 | #if IS_ENABLED(CONFIG_HYPERV) |
1547 | hpa_t hv_root_tdp; | |
1548 | spinlock_t hv_root_tdp_lock; | |
cfef5af3 | 1549 | struct hv_partition_assist_pg *hv_pa_pg; |
3c86c0d3 | 1550 | #endif |
35875316 ZG |
1551 | /* |
1552 | * VM-scope maximum vCPU ID. Used to determine the size of structures | |
1553 | * that increase along with the maximum vCPU ID, in which case, using | |
1554 | * the global KVM_MAX_VCPU_IDS may lead to significant memory waste. | |
1555 | */ | |
1556 | u32 max_vcpu_ids; | |
084cc29f BG |
1557 | |
1558 | bool disable_nx_huge_pages; | |
ada51a9d DM |
1559 | |
1560 | /* | |
1561 | * Memory caches used to allocate shadow pages when performing eager | |
1562 | * page splitting. No need for a shadowed_info_cache since eager page | |
1563 | * splitting only allocates direct shadow pages. | |
1564 | * | |
1565 | * Protected by kvm->slots_lock. | |
1566 | */ | |
1567 | struct kvm_mmu_memory_cache split_shadow_page_cache; | |
1568 | struct kvm_mmu_memory_cache split_page_header_cache; | |
1569 | ||
1570 | /* | |
1571 | * Memory cache used to allocate pte_list_desc structs while splitting | |
1572 | * huge pages. In the worst case, to split one huge page, 512 | |
1573 | * pte_list_desc structs are needed to add each lower level leaf sptep | |
1574 | * to the rmap plus 1 to extend the parent_ptes rmap of the lower level | |
1575 | * page table. | |
1576 | * | |
1577 | * Protected by kvm->slots_lock. | |
1578 | */ | |
1579 | #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) | |
1580 | struct kvm_mmu_memory_cache split_desc_cache; | |
3fc3f718 IY |
1581 | |
1582 | gfn_t gfn_direct_bits; | |
fbb4adad YZ |
1583 | |
1584 | /* | |
1585 | * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A Zero | |
1586 | * value indicates CPU dirty logging is unsupported or disabled in | |
1587 | * current VM. | |
1588 | */ | |
1589 | int cpu_dirty_log_size; | |
d69fb81f ZX |
1590 | }; |
1591 | ||
0711456c | 1592 | struct kvm_vm_stat { |
0193cc90 | 1593 | struct kvm_vm_stat_generic generic; |
e3cb6fa0 PB |
1594 | u64 mmu_shadow_zapped; |
1595 | u64 mmu_pte_write; | |
1596 | u64 mmu_pde_zapped; | |
1597 | u64 mmu_flooded; | |
1598 | u64 mmu_recycled; | |
1599 | u64 mmu_cache_miss; | |
1600 | u64 mmu_unsync; | |
71f51d2c MZ |
1601 | union { |
1602 | struct { | |
1603 | atomic64_t pages_4k; | |
1604 | atomic64_t pages_2m; | |
1605 | atomic64_t pages_1g; | |
1606 | }; | |
1607 | atomic64_t pages[KVM_NR_PAGE_SIZES]; | |
1608 | }; | |
e3cb6fa0 PB |
1609 | u64 nx_lpage_splits; |
1610 | u64 max_mmu_page_hash_collisions; | |
ec1cf69c | 1611 | u64 max_mmu_rmap_size; |
0711456c ZX |
1612 | }; |
1613 | ||
77b4c255 | 1614 | struct kvm_vcpu_stat { |
0193cc90 | 1615 | struct kvm_vcpu_stat_generic generic; |
1075d41e | 1616 | u64 pf_taken; |
8a7e75d4 | 1617 | u64 pf_fixed; |
1075d41e SC |
1618 | u64 pf_emulate; |
1619 | u64 pf_spurious; | |
1620 | u64 pf_fast; | |
1621 | u64 pf_mmio_spte_created; | |
8a7e75d4 SJS |
1622 | u64 pf_guest; |
1623 | u64 tlb_flush; | |
1624 | u64 invlpg; | |
1625 | ||
1626 | u64 exits; | |
1627 | u64 io_exits; | |
1628 | u64 mmio_exits; | |
1629 | u64 signal_exits; | |
1630 | u64 irq_window_exits; | |
1631 | u64 nmi_window_exits; | |
c595ceee | 1632 | u64 l1d_flush; |
8a7e75d4 | 1633 | u64 halt_exits; |
8a7e75d4 SJS |
1634 | u64 request_irq_exits; |
1635 | u64 irq_exits; | |
1636 | u64 host_state_reload; | |
8a7e75d4 SJS |
1637 | u64 fpu_reload; |
1638 | u64 insn_emulation; | |
1639 | u64 insn_emulation_fail; | |
1640 | u64 hypercalls; | |
1641 | u64 irq_injections; | |
1642 | u64 nmi_injections; | |
0f1e261e | 1643 | u64 req_event; |
43c11d91 | 1644 | u64 nested_run; |
4a7132ef WL |
1645 | u64 directed_yield_attempted; |
1646 | u64 directed_yield_successful; | |
6cd88243 PB |
1647 | u64 preemption_reported; |
1648 | u64 preemption_other; | |
d5a0483f | 1649 | u64 guest_mode; |
2f4073e0 | 1650 | u64 notify_window_exits; |
77b4c255 | 1651 | }; |
ad312c7c | 1652 | |
8a76d7f2 JR |
1653 | struct x86_instruction_info; |
1654 | ||
8fe8ab46 WA |
1655 | struct msr_data { |
1656 | bool host_initiated; | |
1657 | u32 index; | |
1658 | u64 data; | |
1659 | }; | |
1660 | ||
cb5281a5 PB |
1661 | struct kvm_lapic_irq { |
1662 | u32 vector; | |
b7cb2231 PB |
1663 | u16 delivery_mode; |
1664 | u16 dest_mode; | |
1665 | bool level; | |
1666 | u16 trig_mode; | |
cb5281a5 PB |
1667 | u32 shorthand; |
1668 | u32 dest_id; | |
93bbf0b8 | 1669 | bool msi_redir_hint; |
cb5281a5 PB |
1670 | }; |
1671 | ||
c96001c5 PX |
1672 | static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) |
1673 | { | |
1674 | return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; | |
1675 | } | |
1676 | ||
ea4a5ff8 | 1677 | struct kvm_x86_ops { |
9dadfc4a SC |
1678 | const char *name; |
1679 | ||
d83420c2 SC |
1680 | int (*check_processor_compatibility)(void); |
1681 | ||
0617a769 SC |
1682 | int (*enable_virtualization_cpu)(void); |
1683 | void (*disable_virtualization_cpu)(void); | |
590b09b1 SC |
1684 | cpu_emergency_virt_cb *emergency_disable_virtualization_cpu; |
1685 | ||
6e4fd06f | 1686 | void (*hardware_unsetup)(void); |
5719455f | 1687 | bool (*has_emulated_msr)(struct kvm *kvm, u32 index); |
7c1b761b | 1688 | void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1689 | |
562b6b08 | 1690 | unsigned int vm_size; |
03543133 SS |
1691 | int (*vm_init)(struct kvm *kvm); |
1692 | void (*vm_destroy)(struct kvm *kvm); | |
8d032b68 | 1693 | void (*vm_pre_destroy)(struct kvm *kvm); |
03543133 | 1694 | |
ea4a5ff8 | 1695 | /* Create, but do not attach this VCPU */ |
d588bb9b | 1696 | int (*vcpu_precreate)(struct kvm *kvm); |
987b2594 | 1697 | int (*vcpu_create)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1698 | void (*vcpu_free)(struct kvm_vcpu *vcpu); |
d28bc9dd | 1699 | void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); |
ea4a5ff8 | 1700 | |
e27bc044 | 1701 | void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1702 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
1703 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | |
ea4a5ff8 | 1704 | |
6986982f | 1705 | void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); |
609e36d3 | 1706 | int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
8fe8ab46 | 1707 | int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
ea4a5ff8 ZX |
1708 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
1709 | void (*get_segment)(struct kvm_vcpu *vcpu, | |
1710 | struct kvm_segment *var, int seg); | |
2e4d2653 | 1711 | int (*get_cpl)(struct kvm_vcpu *vcpu); |
f0e7012c | 1712 | int (*get_cpl_no_cache)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1713 | void (*set_segment)(struct kvm_vcpu *vcpu, |
1714 | struct kvm_segment *var, int seg); | |
1715 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); | |
26a0652c | 1716 | bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); |
ea4a5ff8 | 1717 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); |
405329fc | 1718 | void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
26a0652c | 1719 | bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); |
c2fe3cd4 | 1720 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); |
72f211ec | 1721 | int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
89a27f4d GN |
1722 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
1723 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | |
1724 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | |
1725 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | |
c77fb5fe | 1726 | void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); |
c2fee09f | 1727 | void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); |
020df079 | 1728 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
5fdbf976 | 1729 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
ea4a5ff8 ZX |
1730 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
1731 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | |
c5063551 | 1732 | bool (*get_if_flag)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1733 | |
e27bc044 SC |
1734 | void (*flush_tlb_all)(struct kvm_vcpu *vcpu); |
1735 | void (*flush_tlb_current)(struct kvm_vcpu *vcpu); | |
0277022a | 1736 | #if IS_ENABLED(CONFIG_HYPERV) |
8a1300ff | 1737 | int (*flush_remote_tlbs)(struct kvm *kvm); |
9ed3bf41 SC |
1738 | int (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn, |
1739 | gfn_t nr_pages); | |
0277022a | 1740 | #endif |
ea4a5ff8 | 1741 | |
faff8758 JS |
1742 | /* |
1743 | * Flush any TLB entries associated with the given GVA. | |
1744 | * Does not need to flush GPA->HPA mappings. | |
1745 | * Can potentially get non-canonical addresses through INVLPGs, which | |
1746 | * the implementation may choose to ignore if appropriate. | |
1747 | */ | |
e27bc044 | 1748 | void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr); |
ea4a5ff8 | 1749 | |
e64419d9 SC |
1750 | /* |
1751 | * Flush any TLB entries created by the guest. Like tlb_flush_gva(), | |
1752 | * does not need to flush GPA->HPA mappings. | |
1753 | */ | |
e27bc044 | 1754 | void (*flush_tlb_guest)(struct kvm_vcpu *vcpu); |
e64419d9 | 1755 | |
fc4fad79 | 1756 | int (*vcpu_pre_run)(struct kvm_vcpu *vcpu); |
9c9025ea SC |
1757 | enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu, |
1758 | bool force_immediate_exit); | |
1e9e2622 WL |
1759 | int (*handle_exit)(struct kvm_vcpu *vcpu, |
1760 | enum exit_fastpath_completion exit_fastpath); | |
f8ea7c60 | 1761 | int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); |
5ef8acbd | 1762 | void (*update_emulated_instruction)(struct kvm_vcpu *vcpu); |
2809f5d2 | 1763 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); |
37ccdcbe | 1764 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1765 | void (*patch_hypercall)(struct kvm_vcpu *vcpu, |
1766 | unsigned char *hypercall_addr); | |
2d613912 | 1767 | void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected); |
e27bc044 | 1768 | void (*inject_nmi)(struct kvm_vcpu *vcpu); |
6ad75c5c | 1769 | void (*inject_exception)(struct kvm_vcpu *vcpu); |
b463a6f7 | 1770 | void (*cancel_injection)(struct kvm_vcpu *vcpu); |
c9d40913 PB |
1771 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); |
1772 | int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); | |
3cfc3092 JK |
1773 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
1774 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | |
fa4c027a SS |
1775 | /* Whether or not a virtual NMI is pending in hardware. */ |
1776 | bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu); | |
1777 | /* | |
54aa699e | 1778 | * Attempt to pend a virtual NMI in hardware. Returns %true on success |
fa4c027a SS |
1779 | * to allow using static_call_ret0 as the fallback. |
1780 | */ | |
1781 | bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu); | |
c9a7953f JK |
1782 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
1783 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | |
95ba8273 | 1784 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
73b42dc6 SC |
1785 | |
1786 | const bool x2apic_icr_is_split; | |
b3f257a8 | 1787 | const unsigned long required_apicv_inhibits; |
2008fab3 | 1788 | bool allow_apicv_in_x2apic_without_x2apic_virtualization; |
d62caabb | 1789 | void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); |
76bce9f1 | 1790 | void (*hwapic_isr_update)(struct kvm_vcpu *vcpu, int isr); |
6308630b | 1791 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
8d860bbe | 1792 | void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); |
a4148b7c | 1793 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); |
57dfd7b5 SC |
1794 | void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode, |
1795 | int trig_mode, int vector); | |
76dfafd5 | 1796 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1797 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
2ac52ab8 | 1798 | int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); |
ba28401b | 1799 | u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
344f414f | 1800 | |
e83bc09c SC |
1801 | void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, |
1802 | int root_level); | |
727a7e27 | 1803 | |
77ac7079 IY |
1804 | /* Update external mapping with page table link. */ |
1805 | int (*link_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, | |
1806 | void *external_spt); | |
1807 | /* Update the external page table from spte getting set. */ | |
1808 | int (*set_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, | |
1809 | kvm_pfn_t pfn_for_gfn); | |
1810 | ||
94faba89 IY |
1811 | /* Update external page tables for page table about to be freed. */ |
1812 | int (*free_external_spt)(struct kvm *kvm, gfn_t gfn, enum pg_level level, | |
1813 | void *external_spt); | |
1814 | ||
1815 | /* Update external page table from spte getting removed, and flush TLB. */ | |
1816 | int (*remove_external_spte)(struct kvm *kvm, gfn_t gfn, enum pg_level level, | |
1817 | kvm_pfn_t pfn_for_gfn); | |
1818 | ||
f5f48ee1 SY |
1819 | bool (*has_wbinvd_exit)(void); |
1820 | ||
307a94c7 IS |
1821 | u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); |
1822 | u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu); | |
2d636990 SC |
1823 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu); |
1824 | void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu); | |
99e3e30a | 1825 | |
235ba74f | 1826 | /* |
3e633e7e | 1827 | * Retrieve somewhat arbitrary exit/entry information. Intended to |
0a62a031 | 1828 | * be used only from within tracepoints or error paths. |
235ba74f | 1829 | */ |
0a62a031 DE |
1830 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, |
1831 | u64 *info1, u64 *info2, | |
3e633e7e ML |
1832 | u32 *intr_info, u32 *error_code); |
1833 | ||
1834 | void (*get_entry_info)(struct kvm_vcpu *vcpu, | |
1835 | u32 *intr_info, u32 *error_code); | |
8a76d7f2 JR |
1836 | |
1837 | int (*check_intercept)(struct kvm_vcpu *vcpu, | |
1838 | struct x86_instruction_info *info, | |
21f1b8f2 SC |
1839 | enum x86_intercept_stage stage, |
1840 | struct x86_exception *exception); | |
a9ab13ff | 1841 | void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); |
7f5581f5 | 1842 | |
a85863c2 | 1843 | void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu); |
bab4165e | 1844 | |
33b22172 | 1845 | const struct kvm_x86_nested_ops *nested_ops; |
efc64404 | 1846 | |
d1ed092f SS |
1847 | void (*vcpu_blocking)(struct kvm_vcpu *vcpu); |
1848 | void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); | |
1849 | ||
e27bc044 | 1850 | int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, |
efc64404 | 1851 | uint32_t guest_irq, bool set); |
e27bc044 | 1852 | void (*pi_start_assignment)(struct kvm *kvm); |
9cfec6d0 | 1853 | void (*apicv_pre_state_restore)(struct kvm_vcpu *vcpu); |
be8ca170 | 1854 | void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); |
17e433b5 | 1855 | bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); |
90cfe144 | 1856 | bool (*protected_apic_has_interrupt)(struct kvm_vcpu *vcpu); |
ce7a058a | 1857 | |
f9927982 SC |
1858 | int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, |
1859 | bool *expired); | |
ce7a058a | 1860 | void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); |
c45dcc71 AR |
1861 | |
1862 | void (*setup_mce)(struct kvm_vcpu *vcpu); | |
0234bf88 | 1863 | |
31e83e21 | 1864 | #ifdef CONFIG_KVM_SMM |
c9d40913 | 1865 | int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); |
58c1d206 ML |
1866 | int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram); |
1867 | int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram); | |
c9d40913 | 1868 | void (*enable_smi_window)(struct kvm_vcpu *vcpu); |
31e83e21 | 1869 | #endif |
5acc5c06 | 1870 | |
546d714b | 1871 | int (*dev_get_attr)(u32 group, u64 attr, u64 *val); |
03d004cd | 1872 | int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); |
a50f673f | 1873 | int (*vcpu_mem_enc_ioctl)(struct kvm_vcpu *vcpu, void __user *argp); |
03d004cd SC |
1874 | int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); |
1875 | int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); | |
54526d1f | 1876 | int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); |
b5663931 | 1877 | int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); |
683412cc | 1878 | void (*guest_memory_reclaimed)(struct kvm *kvm); |
801e459a | 1879 | |
b848f24b | 1880 | int (*get_feature_msr)(u32 msr, u64 *data); |
57b119da | 1881 | |
aeb904f6 SC |
1882 | int (*check_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, |
1883 | void *insn, int insn_len); | |
4b9852f4 LA |
1884 | |
1885 | bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); | |
b83237ad | 1886 | int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); |
93dff2fe JM |
1887 | |
1888 | void (*migrate_timers)(struct kvm_vcpu *vcpu); | |
51de8151 | 1889 | void (*msr_filter_changed)(struct kvm_vcpu *vcpu); |
f9a4d621 | 1890 | int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); |
647daca2 TL |
1891 | |
1892 | void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); | |
d5fa597e ML |
1893 | |
1894 | /* | |
1895 | * Returns vCPU specific APICv inhibit reasons | |
1896 | */ | |
1897 | unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu); | |
37a41847 BW |
1898 | |
1899 | gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); | |
75253db4 | 1900 | void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); |
3bb2531e | 1901 | int (*gmem_prepare)(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order); |
a90764f0 | 1902 | void (*gmem_invalidate)(kvm_pfn_t start, kvm_pfn_t end); |
f32fb328 | 1903 | int (*private_max_mapping_level)(struct kvm *kvm, kvm_pfn_t pfn); |
ea4a5ff8 ZX |
1904 | }; |
1905 | ||
33b22172 | 1906 | struct kvm_x86_nested_ops { |
f7e57078 | 1907 | void (*leave_nested)(struct kvm_vcpu *vcpu); |
7709aba8 SC |
1908 | bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector, |
1909 | u32 error_code); | |
33b22172 | 1910 | int (*check_events)(struct kvm_vcpu *vcpu); |
32f55e47 | 1911 | bool (*has_events)(struct kvm_vcpu *vcpu, bool for_injection); |
cb6a32c2 | 1912 | void (*triple_fault)(struct kvm_vcpu *vcpu); |
33b22172 PB |
1913 | int (*get_state)(struct kvm_vcpu *vcpu, |
1914 | struct kvm_nested_state __user *user_kvm_nested_state, | |
1915 | unsigned user_data_size); | |
1916 | int (*set_state)(struct kvm_vcpu *vcpu, | |
1917 | struct kvm_nested_state __user *user_kvm_nested_state, | |
1918 | struct kvm_nested_state *kvm_state); | |
729c15c2 | 1919 | bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu); |
02f5fb2e | 1920 | int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); |
33b22172 PB |
1921 | |
1922 | int (*enable_evmcs)(struct kvm_vcpu *vcpu, | |
1923 | uint16_t *vmcs_version); | |
1924 | uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); | |
b0c9c25e | 1925 | void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1926 | }; |
1927 | ||
d008dfdb | 1928 | struct kvm_x86_init_ops { |
d008dfdb | 1929 | int (*hardware_setup)(void); |
33271a9e | 1930 | unsigned int (*handle_intel_pt_intr)(void); |
d008dfdb SC |
1931 | |
1932 | struct kvm_x86_ops *runtime_ops; | |
34886e79 | 1933 | struct kvm_pmu_ops *pmu_ops; |
d008dfdb SC |
1934 | }; |
1935 | ||
af585b92 | 1936 | struct kvm_arch_async_pf { |
7c90705b | 1937 | u32 token; |
af585b92 | 1938 | gfn_t gfn; |
fb67e14f | 1939 | unsigned long cr3; |
c4806acd | 1940 | bool direct_map; |
cd389f50 | 1941 | u64 error_code; |
af585b92 GN |
1942 | }; |
1943 | ||
9cc39a5a | 1944 | extern u32 __read_mostly kvm_nr_uret_msrs; |
3edd6839 | 1945 | extern bool __read_mostly allow_smaller_maxphyaddr; |
fdf513e3 | 1946 | extern bool __read_mostly enable_apicv; |
459074cf | 1947 | extern bool __read_mostly enable_device_posted_irqs; |
afaf0b2f | 1948 | extern struct kvm_x86_ops kvm_x86_ops; |
97896d04 | 1949 | |
89604647 | 1950 | #define kvm_x86_call(func) static_call(kvm_x86_##func) |
5d766508 | 1951 | #define kvm_pmu_call(func) static_call(kvm_x86_pmu_##func) |
89604647 | 1952 | |
9af5471b JB |
1953 | #define KVM_X86_OP(func) \ |
1954 | DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func)); | |
e4fc23ba | 1955 | #define KVM_X86_OP_OPTIONAL KVM_X86_OP |
5be2226f | 1956 | #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP |
9af5471b JB |
1957 | #include <asm/kvm-x86-ops.h> |
1958 | ||
4f8396b9 SC |
1959 | int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops); |
1960 | void kvm_x86_vendor_exit(void); | |
1961 | ||
434a1e94 SC |
1962 | #define __KVM_HAVE_ARCH_VM_ALLOC |
1963 | static inline struct kvm *kvm_arch_alloc_vm(void) | |
1964 | { | |
88dca4ca | 1965 | return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
434a1e94 | 1966 | } |
78b497f2 JG |
1967 | |
1968 | #define __KVM_HAVE_ARCH_VM_FREE | |
562b6b08 | 1969 | void kvm_arch_free_vm(struct kvm *kvm); |
434a1e94 | 1970 | |
0277022a | 1971 | #if IS_ENABLED(CONFIG_HYPERV) |
a1342c80 DM |
1972 | #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS |
1973 | static inline int kvm_arch_flush_remote_tlbs(struct kvm *kvm) | |
b08660e5 | 1974 | { |
8a1300ff | 1975 | if (kvm_x86_ops.flush_remote_tlbs && |
89604647 | 1976 | !kvm_x86_call(flush_remote_tlbs)(kvm)) |
b08660e5 TL |
1977 | return 0; |
1978 | else | |
1979 | return -ENOTSUPP; | |
1980 | } | |
1981 | ||
d4788996 | 1982 | #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLBS_RANGE |
0277022a SC |
1983 | static inline int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm, gfn_t gfn, |
1984 | u64 nr_pages) | |
1985 | { | |
1986 | if (!kvm_x86_ops.flush_remote_tlbs_range) | |
1987 | return -EOPNOTSUPP; | |
1988 | ||
89604647 | 1989 | return kvm_x86_call(flush_remote_tlbs_range)(kvm, gfn, nr_pages); |
0277022a SC |
1990 | } |
1991 | #endif /* CONFIG_HYPERV */ | |
d4788996 | 1992 | |
812d4323 LX |
1993 | enum kvm_intr_type { |
1994 | /* Values are arbitrary, but must be non-zero. */ | |
1995 | KVM_HANDLING_IRQ = 1, | |
1996 | KVM_HANDLING_NMI, | |
1997 | }; | |
1998 | ||
1999 | /* Enable perf NMI and timer modes to work, and minimise false positives. */ | |
e1bfc245 | 2000 | #define kvm_arch_pmi_in_guest(vcpu) \ |
812d4323 LX |
2001 | ((vcpu) && (vcpu)->arch.handling_intr_from_guest && \ |
2002 | (!!in_nmi() == ((vcpu)->arch.handling_intr_from_guest == KVM_HANDLING_NMI))) | |
e1bfc245 | 2003 | |
982bae43 | 2004 | void __init kvm_mmu_x86_module_init(void); |
1d0e8480 SC |
2005 | int kvm_mmu_vendor_module_init(void); |
2006 | void kvm_mmu_vendor_module_exit(void); | |
54f1585a ZX |
2007 | |
2008 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); | |
2009 | int kvm_mmu_create(struct kvm_vcpu *vcpu); | |
0df9dab8 | 2010 | void kvm_mmu_init_vm(struct kvm *kvm); |
13d268ca | 2011 | void kvm_mmu_uninit_vm(struct kvm *kvm); |
54f1585a | 2012 | |
90b4fe17 CP |
2013 | void kvm_mmu_init_memslot_memory_attributes(struct kvm *kvm, |
2014 | struct kvm_memory_slot *slot); | |
2015 | ||
49c6f875 | 2016 | void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); |
8a3c1a33 | 2017 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
1c91cad4 | 2018 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, |
269e9552 | 2019 | const struct kvm_memory_slot *memslot, |
3c9bd400 | 2020 | int start_level); |
a3fe5dbd DM |
2021 | void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, |
2022 | const struct kvm_memory_slot *memslot, | |
2023 | int target_level); | |
cb00a70b DM |
2024 | void kvm_mmu_try_split_huge_pages(struct kvm *kvm, |
2025 | const struct kvm_memory_slot *memslot, | |
2026 | u64 start, u64 end, | |
2027 | int target_level); | |
13e2e4f6 DM |
2028 | void kvm_mmu_recover_huge_pages(struct kvm *kvm, |
2029 | const struct kvm_memory_slot *memslot); | |
f4b4b180 | 2030 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, |
269e9552 | 2031 | const struct kvm_memory_slot *memslot); |
15248258 | 2032 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); |
bc8a3d89 | 2033 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); |
c63cf135 | 2034 | void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end); |
54f1585a | 2035 | |
2df4a5eb | 2036 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); |
cc4b6871 | 2037 | |
3200f405 | 2038 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
9f811285 | 2039 | const void *val, int bytes); |
2f333bcb | 2040 | |
6ef768fa PB |
2041 | struct kvm_irq_mask_notifier { |
2042 | void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); | |
2043 | int irq; | |
2044 | struct hlist_node link; | |
2045 | }; | |
2046 | ||
2047 | void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | |
2048 | struct kvm_irq_mask_notifier *kimn); | |
2049 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |
2050 | struct kvm_irq_mask_notifier *kimn); | |
2051 | void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, | |
2052 | bool mask); | |
2053 | ||
2f333bcb | 2054 | extern bool tdp_enabled; |
9f811285 | 2055 | |
a3e06bbe LJ |
2056 | u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); |
2057 | ||
41577ab8 SC |
2058 | /* |
2059 | * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing | |
2060 | * userspace I/O) to indicate that the emulation context | |
d9f6e12f | 2061 | * should be reused as is, i.e. skip initialization of |
41577ab8 SC |
2062 | * emulation context, instruction fetch and decode. |
2063 | * | |
2064 | * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware. | |
2065 | * Indicates that only select instructions (tagged with | |
2066 | * EmulateOnUD) should be emulated (to minimize the emulator | |
2067 | * attack surface). See also EMULTYPE_TRAP_UD_FORCED. | |
2068 | * | |
2069 | * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to | |
2070 | * decode the instruction length. For use *only* by | |
906fa904 HW |
2071 | * kvm_x86_ops.skip_emulated_instruction() implementations if |
2072 | * EMULTYPE_COMPLETE_USER_EXIT is not set. | |
41577ab8 | 2073 | * |
92daa48b SC |
2074 | * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to |
2075 | * retry native execution under certain conditions, | |
2076 | * Can only be set in conjunction with EMULTYPE_PF. | |
41577ab8 SC |
2077 | * |
2078 | * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was | |
2079 | * triggered by KVM's magic "force emulation" prefix, | |
2080 | * which is opt in via module param (off by default). | |
2081 | * Bypasses EmulateOnUD restriction despite emulating | |
2082 | * due to an intercepted #UD (see EMULTYPE_TRAP_UD). | |
2083 | * Used to test the full emulator from userspace. | |
2084 | * | |
2085 | * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware | |
2086 | * backdoor emulation, which is opt in via module param. | |
d9f6e12f | 2087 | * VMware backdoor emulation handles select instructions |
41577ab8 | 2088 | * and reinjects the #GP for all other cases. |
92daa48b | 2089 | * |
47ef3ef8 IO |
2090 | * EMULTYPE_PF - Set when an intercepted #PF triggers the emulation, in which case |
2091 | * the CR2/GPA value pass on the stack is valid. | |
906fa904 HW |
2092 | * |
2093 | * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility | |
2094 | * state and inject single-step #DBs after skipping | |
2095 | * an instruction (after completing userspace I/O). | |
258d985f SC |
2096 | * |
2097 | * EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that | |
2098 | * is attempting to write a gfn that contains one or | |
2099 | * more of the PTEs used to translate the write itself, | |
2100 | * and the owning page table is being shadowed by KVM. | |
2101 | * If emulation of the faulting instruction fails and | |
2102 | * this flag is set, KVM will exit to userspace instead | |
2103 | * of retrying emulation as KVM cannot make forward | |
2104 | * progress. | |
2105 | * | |
2106 | * If emulation fails for a write to guest page tables, | |
2107 | * KVM unprotects (zaps) the shadow page for the target | |
2108 | * gfn and resumes the guest to retry the non-emulatable | |
2109 | * instruction (on hardware). Unprotecting the gfn | |
2110 | * doesn't allow forward progress for a self-changing | |
2111 | * access because doing so also zaps the translation for | |
2112 | * the gfn, i.e. retrying the instruction will hit a | |
2113 | * !PRESENT fault, which results in a new shadow page | |
2114 | * and sends KVM back to square one. | |
41577ab8 | 2115 | */ |
571008da SY |
2116 | #define EMULTYPE_NO_DECODE (1 << 0) |
2117 | #define EMULTYPE_TRAP_UD (1 << 1) | |
ba8afb6b | 2118 | #define EMULTYPE_SKIP (1 << 2) |
92daa48b | 2119 | #define EMULTYPE_ALLOW_RETRY_PF (1 << 3) |
b4000606 | 2120 | #define EMULTYPE_TRAP_UD_FORCED (1 << 4) |
42cbf068 | 2121 | #define EMULTYPE_VMWARE_GP (1 << 5) |
92daa48b | 2122 | #define EMULTYPE_PF (1 << 6) |
906fa904 | 2123 | #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) |
258d985f | 2124 | #define EMULTYPE_WRITE_PF_TO_SP (1 << 8) |
92daa48b | 2125 | |
47ef3ef8 IO |
2126 | static inline bool kvm_can_emulate_event_vectoring(int emul_type) |
2127 | { | |
2128 | return !(emul_type & EMULTYPE_PF); | |
2129 | } | |
2130 | ||
c60658d1 SC |
2131 | int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); |
2132 | int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, | |
2133 | void *insn, int insn_len); | |
e615e355 DE |
2134 | void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, |
2135 | u64 *data, u8 ndata); | |
2136 | void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); | |
35be0ade | 2137 | |
11c98fa0 IO |
2138 | void kvm_prepare_event_vectoring_exit(struct kvm_vcpu *vcpu, gpa_t gpa); |
2139 | ||
f2b4b7dd | 2140 | void kvm_enable_efer_bits(u64); |
384bb783 | 2141 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); |
653ea448 SC |
2142 | int kvm_get_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 *data); |
2143 | int kvm_set_msr_with_filter(struct kvm_vcpu *vcpu, u32 index, u64 data); | |
edef5c36 | 2144 | int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); |
f20935d8 SC |
2145 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); |
2146 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); | |
1edce0a9 SC |
2147 | int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); |
2148 | int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); | |
5ff3a351 SC |
2149 | int kvm_emulate_as_nop(struct kvm_vcpu *vcpu); |
2150 | int kvm_emulate_invd(struct kvm_vcpu *vcpu); | |
2151 | int kvm_emulate_mwait(struct kvm_vcpu *vcpu); | |
2152 | int kvm_handle_invalid_op(struct kvm_vcpu *vcpu); | |
2153 | int kvm_emulate_monitor(struct kvm_vcpu *vcpu); | |
54f1585a | 2154 | |
dca7f128 | 2155 | int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); |
6a908b62 | 2156 | int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
54f1585a | 2157 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
1460179d | 2158 | int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu); |
647daca2 | 2159 | int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); |
f5f48ee1 | 2160 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); |
54f1585a | 2161 | |
3e6e0aab | 2162 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
c53da4f3 | 2163 | void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
c697518a | 2164 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
2b4a273b | 2165 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); |
3e6e0aab | 2166 | |
7f3d35fd KW |
2167 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
2168 | int reason, bool has_error_code, u32 error_code); | |
37817f29 | 2169 | |
f27ad38a | 2170 | void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); |
5b51cb13 | 2171 | void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); |
49a9b07e | 2172 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
2390218b | 2173 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
a83b29c6 | 2174 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
eea1cff9 | 2175 | int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
020df079 | 2176 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); |
fc5375dd | 2177 | unsigned long kvm_get_dr(struct kvm_vcpu *vcpu, int dr); |
2d3ad1f4 AK |
2178 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
2179 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | |
92f9895c | 2180 | int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu); |
54f1585a | 2181 | |
609e36d3 | 2182 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); |
8fe8ab46 | 2183 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); |
54f1585a | 2184 | |
91586a3b JK |
2185 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); |
2186 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | |
c483c454 | 2187 | int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu); |
91586a3b | 2188 | |
298101da AK |
2189 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
2190 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | |
4d5523cf | 2191 | void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); |
b50cb2b1 SC |
2192 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned int nr, |
2193 | bool has_error_code, u32 error_code); | |
6389ee94 | 2194 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); |
7709aba8 | 2195 | void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, |
53b3d8e9 | 2196 | struct x86_exception *fault); |
0a79b009 | 2197 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
16f8a6f9 | 2198 | bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); |
298101da | 2199 | |
1a577b72 MT |
2200 | static inline int __kvm_irq_line_state(unsigned long *irq_state, |
2201 | int irq_source_id, int level) | |
2202 | { | |
2203 | /* Logical OR for level trig interrupt */ | |
2204 | if (level) | |
2205 | __set_bit(irq_source_id, irq_state); | |
2206 | else | |
2207 | __clear_bit(irq_source_id, irq_state); | |
2208 | ||
2209 | return !!(*irq_state); | |
2210 | } | |
2211 | ||
2212 | int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); | |
2213 | void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); | |
3de42dc0 | 2214 | |
3419ffc8 | 2215 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
fa4c027a | 2216 | int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu); |
3419ffc8 | 2217 | |
7c86663b PB |
2218 | void kvm_update_dr7(struct kvm_vcpu *vcpu); |
2219 | ||
4df68566 SC |
2220 | bool __kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, |
2221 | bool always_retry); | |
2222 | ||
2223 | static inline bool kvm_mmu_unprotect_gfn_and_retry(struct kvm_vcpu *vcpu, | |
2224 | gpa_t cr2_or_gpa) | |
2225 | { | |
2226 | return __kvm_mmu_unprotect_gfn_and_retry(vcpu, cr2_or_gpa, false); | |
2227 | } | |
2228 | ||
0c1c92f1 | 2229 | void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, |
6a82cd1c | 2230 | ulong roots_to_free); |
0c1c92f1 | 2231 | void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); |
ab9ae313 AK |
2232 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
2233 | struct x86_exception *exception); | |
ab9ae313 AK |
2234 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, |
2235 | struct x86_exception *exception); | |
2236 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, | |
2237 | struct x86_exception *exception); | |
54f1585a | 2238 | |
4e19c36f | 2239 | bool kvm_apicv_activated(struct kvm *kvm); |
d5fa597e | 2240 | bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu); |
2008fab3 | 2241 | void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); |
320af55a SC |
2242 | void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, |
2243 | enum kvm_apicv_inhibit reason, bool set); | |
2244 | void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, | |
2245 | enum kvm_apicv_inhibit reason, bool set); | |
2246 | ||
2247 | static inline void kvm_set_apicv_inhibit(struct kvm *kvm, | |
2248 | enum kvm_apicv_inhibit reason) | |
2249 | { | |
2250 | kvm_set_or_clear_apicv_inhibit(kvm, reason, true); | |
2251 | } | |
d62caabb | 2252 | |
320af55a SC |
2253 | static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, |
2254 | enum kvm_apicv_inhibit reason) | |
2255 | { | |
2256 | kvm_set_or_clear_apicv_inhibit(kvm, reason, false); | |
2257 | } | |
b0a1637f | 2258 | |
736c291c | 2259 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, |
dc25e89e | 2260 | void *insn, int insn_len); |
bca99c03 | 2261 | void kvm_mmu_print_sptes(struct kvm_vcpu *vcpu, gpa_t gpa, const char *msg); |
a7052897 | 2262 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); |
753b43c9 | 2263 | void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
cd42853e | 2264 | u64 addr, unsigned long roots); |
eb4b248e | 2265 | void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); |
b5129100 | 2266 | void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); |
34c16eec | 2267 | |
746700d2 WH |
2268 | void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, |
2269 | int tdp_max_root_level, int tdp_huge_page_level); | |
18552672 | 2270 | |
517987e3 | 2271 | |
89ea60c2 | 2272 | #ifdef CONFIG_KVM_PRIVATE_MEM |
517987e3 | 2273 | #define kvm_arch_has_private_mem(kvm) ((kvm)->arch.has_private_mem) |
89ea60c2 SC |
2274 | #else |
2275 | #define kvm_arch_has_private_mem(kvm) false | |
2276 | #endif | |
2277 | ||
66155de9 SC |
2278 | #define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) |
2279 | ||
d6e88aec | 2280 | static inline u16 kvm_read_ldt(void) |
ec6d273d ZX |
2281 | { |
2282 | u16 ldt; | |
2283 | asm("sldt %0" : "=g"(ldt)); | |
2284 | return ldt; | |
2285 | } | |
2286 | ||
d6e88aec | 2287 | static inline void kvm_load_ldt(u16 sel) |
ec6d273d ZX |
2288 | { |
2289 | asm("lldt %0" : : "rm"(sel)); | |
2290 | } | |
ec6d273d | 2291 | |
ec6d273d ZX |
2292 | #ifdef CONFIG_X86_64 |
2293 | static inline unsigned long read_msr(unsigned long msr) | |
2294 | { | |
2295 | u64 value; | |
2296 | ||
c435e608 | 2297 | rdmsrq(msr, value); |
ec6d273d ZX |
2298 | return value; |
2299 | } | |
2300 | #endif | |
2301 | ||
c1a5d4f9 AK |
2302 | static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) |
2303 | { | |
2304 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | |
2305 | } | |
2306 | ||
ec6d273d ZX |
2307 | #define TSS_IOPB_BASE_OFFSET 0x66 |
2308 | #define TSS_BASE_SIZE 0x68 | |
2309 | #define TSS_IOPB_SIZE (65536 / 8) | |
2310 | #define TSS_REDIRECTION_SIZE (256 / 8) | |
7d76b4d3 JP |
2311 | #define RMODE_TSS_SIZE \ |
2312 | (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) | |
53e0aa7b | 2313 | |
37817f29 IE |
2314 | enum { |
2315 | TASK_SWITCH_CALL = 0, | |
2316 | TASK_SWITCH_IRET = 1, | |
2317 | TASK_SWITCH_JMP = 2, | |
2318 | TASK_SWITCH_GATE = 3, | |
2319 | }; | |
2320 | ||
32e69f23 | 2321 | #define HF_GUEST_MASK (1 << 0) /* VCPU is in guest-mode */ |
a7662aa5 PB |
2322 | |
2323 | #ifdef CONFIG_KVM_SMM | |
32e69f23 ML |
2324 | #define HF_SMM_MASK (1 << 1) |
2325 | #define HF_SMM_INSIDE_NMI_MASK (1 << 2) | |
1371d904 | 2326 | |
eed52e43 | 2327 | # define KVM_MAX_NR_ADDRESS_SPACES 2 |
89ea60c2 SC |
2328 | /* SMM is currently unsupported for guests with private memory. */ |
2329 | # define kvm_arch_nr_memslot_as_ids(kvm) (kvm_arch_has_private_mem(kvm) ? 1 : 2) | |
ba97bb07 PB |
2330 | # define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) |
2331 | # define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) | |
2332 | #else | |
2333 | # define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0) | |
2334 | #endif | |
1371d904 | 2335 | |
c7c9c56c | 2336 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); |
a1b37100 | 2337 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
71cc849b | 2338 | int kvm_cpu_has_extint(struct kvm_vcpu *v); |
a1b37100 | 2339 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
363010e1 | 2340 | int kvm_cpu_get_extint(struct kvm_vcpu *v); |
0b71785d | 2341 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
d28bc9dd | 2342 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); |
e930bffe | 2343 | |
4180bf1b | 2344 | int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, |
bdf7ffc8 | 2345 | unsigned long ipi_bitmap_high, u32 min, |
4180bf1b WL |
2346 | unsigned long icr, int op_64_bit); |
2347 | ||
e5fda4bb | 2348 | int kvm_add_user_return_msr(u32 msr); |
8ea8b8d6 | 2349 | int kvm_find_user_return_msr(u32 msr); |
7e34fbd0 | 2350 | int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); |
d3a6b6cf | 2351 | void kvm_user_return_msr_update_cache(unsigned int index, u64 val); |
18863bdd | 2352 | |
61a05d44 SC |
2353 | static inline bool kvm_is_supported_user_return_msr(u32 msr) |
2354 | { | |
2355 | return kvm_find_user_return_msr(msr) >= 0; | |
2356 | } | |
2357 | ||
62711e5a | 2358 | u64 kvm_scale_tsc(u64 tsc, u64 ratio); |
4ba76538 | 2359 | u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); |
83150f29 IS |
2360 | u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier); |
2361 | u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier); | |
35181e86 | 2362 | |
82b32774 | 2363 | unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); |
f92653ee JK |
2364 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); |
2365 | ||
2860c4b1 | 2366 | void kvm_make_scan_ioapic_request(struct kvm *kvm); |
7ee30bc1 NNL |
2367 | void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, |
2368 | unsigned long *vcpu_bitmap); | |
2860c4b1 | 2369 | |
2a18b7e7 | 2370 | bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, |
af585b92 GN |
2371 | struct kvm_async_pf *work); |
2372 | void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | |
2373 | struct kvm_async_pf *work); | |
56028d08 GN |
2374 | void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, |
2375 | struct kvm_async_pf *work); | |
557a961a | 2376 | void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu); |
7c0ade6c | 2377 | bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); |
af585b92 GN |
2378 | extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); |
2379 | ||
6affcbed KH |
2380 | int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); |
2381 | int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); | |
db8fcefa | 2382 | |
ff5a983c PX |
2383 | void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, |
2384 | u32 size); | |
d71ba788 PB |
2385 | bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); |
2386 | bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); | |
f5132b01 | 2387 | |
8feb4a04 FW |
2388 | bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, |
2389 | struct kvm_vcpu **dest_vcpu); | |
2390 | ||
37131313 | 2391 | void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, |
d84f1e07 | 2392 | struct kvm_lapic_irq *irq); |
197a4f4b | 2393 | |
fdcf7562 AG |
2394 | static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) |
2395 | { | |
2396 | /* We can only post Fixed and LowPrio IRQs */ | |
637543a8 SS |
2397 | return (irq->delivery_mode == APIC_DM_FIXED || |
2398 | irq->delivery_mode == APIC_DM_LOWEST); | |
fdcf7562 AG |
2399 | } |
2400 | ||
d1ed092f SS |
2401 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) |
2402 | { | |
89604647 | 2403 | kvm_x86_call(vcpu_blocking)(vcpu); |
d1ed092f SS |
2404 | } |
2405 | ||
2406 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) | |
2407 | { | |
89604647 | 2408 | kvm_x86_call(vcpu_unblocking)(vcpu); |
d1ed092f SS |
2409 | } |
2410 | ||
7d669f50 SS |
2411 | static inline int kvm_cpu_get_apicid(int mps_cpu) |
2412 | { | |
2413 | #ifdef CONFIG_X86_LOCAL_APIC | |
64063505 | 2414 | return default_cpu_present_to_apicid(mps_cpu); |
7d669f50 SS |
2415 | #else |
2416 | WARN_ON_ONCE(1); | |
2417 | return BAD_APICID; | |
2418 | #endif | |
2419 | } | |
2420 | ||
1e76a3ce | 2421 | int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); |
d501f747 | 2422 | |
c68dc1b5 OU |
2423 | #define KVM_CLOCK_VALID_FLAGS \ |
2424 | (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) | |
d501f747 | 2425 | |
6d849191 OU |
2426 | #define KVM_X86_VALID_QUIRKS \ |
2427 | (KVM_X86_QUIRK_LINT0_REENABLED | \ | |
2428 | KVM_X86_QUIRK_CD_NW_CLEARED | \ | |
2429 | KVM_X86_QUIRK_LAPIC_MMIO_HOLE | \ | |
2430 | KVM_X86_QUIRK_OUT_7E_INC_RIP | \ | |
f1a9761f | 2431 | KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \ |
bfbcc81b | 2432 | KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ |
aa8d1f48 | 2433 | KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ |
dcb988cd | 2434 | KVM_X86_QUIRK_SLOT_ZAP_ALL | \ |
c9c1e20b YZ |
2435 | KVM_X86_QUIRK_STUFF_FEATURE_MSRS | \ |
2436 | KVM_X86_QUIRK_IGNORE_GUEST_PAT) | |
6d849191 | 2437 | |
a4dae7c7 | 2438 | #define KVM_X86_CONDITIONAL_QUIRKS \ |
c9c1e20b YZ |
2439 | (KVM_X86_QUIRK_CD_NW_CLEARED | \ |
2440 | KVM_X86_QUIRK_IGNORE_GUEST_PAT) | |
6d849191 | 2441 | |
e65733b5 OU |
2442 | /* |
2443 | * KVM previously used a u32 field in kvm_run to indicate the hypercall was | |
2444 | * initiated from long mode. KVM now sets bit 0 to indicate long mode, but the | |
2445 | * remaining 31 lower bits must be 0 to preserve ABI. | |
2446 | */ | |
2447 | #define KVM_EXIT_HYPERCALL_MBZ GENMASK_ULL(31, 1) | |
2448 | ||
5f9e1698 PB |
2449 | static inline bool kvm_arch_has_irq_bypass(void) |
2450 | { | |
459074cf | 2451 | return enable_device_posted_irqs; |
5f9e1698 PB |
2452 | } |
2453 | ||
1965aae3 | 2454 | #endif /* _ASM_X86_KVM_HOST_H */ |