Commit | Line | Data |
---|---|---|
20c8ccb1 | 1 | /* SPDX-License-Identifier: GPL-2.0-only */ |
a656c8ef | 2 | /* |
043405e1 CO |
3 | * Kernel-based Virtual Machine driver for Linux |
4 | * | |
5 | * This header defines architecture specific interfaces, x86 version | |
043405e1 CO |
6 | */ |
7 | ||
1965aae3 PA |
8 | #ifndef _ASM_X86_KVM_HOST_H |
9 | #define _ASM_X86_KVM_HOST_H | |
043405e1 | 10 | |
34c16eec ZX |
11 | #include <linux/types.h> |
12 | #include <linux/mm.h> | |
e930bffe | 13 | #include <linux/mmu_notifier.h> |
229456fc | 14 | #include <linux/tracepoint.h> |
f5f48ee1 | 15 | #include <linux/cpumask.h> |
f5132b01 | 16 | #include <linux/irq_work.h> |
447ae316 | 17 | #include <linux/irq.h> |
22b94c4b | 18 | #include <linux/workqueue.h> |
34c16eec ZX |
19 | |
20 | #include <linux/kvm.h> | |
21 | #include <linux/kvm_para.h> | |
edf88417 | 22 | #include <linux/kvm_types.h> |
f5132b01 | 23 | #include <linux/perf_event.h> |
d828199e MT |
24 | #include <linux/pvclock_gtod.h> |
25 | #include <linux/clocksource.h> | |
87276880 | 26 | #include <linux/irqbypass.h> |
5c919412 | 27 | #include <linux/hyperv.h> |
0823570f | 28 | #include <linux/kfifo.h> |
34c16eec | 29 | |
7d669f50 | 30 | #include <asm/apic.h> |
50d0a0f9 | 31 | #include <asm/pvclock-abi.h> |
e01a1b57 | 32 | #include <asm/desc.h> |
0bed3b56 | 33 | #include <asm/mtrr.h> |
9962d032 | 34 | #include <asm/msr-index.h> |
3ee89722 | 35 | #include <asm/asm.h> |
21ebbeda | 36 | #include <asm/kvm_page_track.h> |
95c7b77d | 37 | #include <asm/kvm_vcpu_regs.h> |
5a485803 | 38 | #include <asm/hyperv-tlfs.h> |
e01a1b57 | 39 | |
741cbbae PB |
40 | #define __KVM_HAVE_ARCH_VCPU_DEBUGFS |
41 | ||
074c82c8 | 42 | #define KVM_MAX_VCPUS 1024 |
4ddacd52 EH |
43 | |
44 | /* | |
45 | * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs | |
46 | * might be larger than the actual number of VCPUs because the | |
47 | * APIC ID encodes CPU topology information. | |
48 | * | |
49 | * In the worst case, we'll need less than one extra bit for the | |
50 | * Core ID, and less than one extra bit for the Package (Die) ID, | |
51 | * so ratio of 4 should be enough. | |
52 | */ | |
53 | #define KVM_VCPU_ID_RATIO 4 | |
a1c42dde | 54 | #define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO) |
4ddacd52 | 55 | |
0743247f | 56 | /* memory slots that are not exposed to userspace */ |
bdd1c37a | 57 | #define KVM_INTERNAL_MEM_SLOTS 3 |
93a5cef0 | 58 | |
b401ee0b | 59 | #define KVM_HALT_POLL_NS_DEFAULT 200000 |
69a9f69b | 60 | |
8175e5b7 AG |
61 | #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS |
62 | ||
3c9bd400 JZ |
63 | #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ |
64 | KVM_DIRTY_LOG_INITIALLY_SET) | |
65 | ||
fe6b6bc8 CQ |
66 | #define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \ |
67 | KVM_BUS_LOCK_DETECTION_EXIT) | |
68 | ||
2f4073e0 TX |
69 | #define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \ |
70 | KVM_X86_NOTIFY_VMEXIT_USER) | |
71 | ||
2860c4b1 | 72 | /* x86-specific vcpu->requests bit members */ |
2387149e AJ |
73 | #define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) |
74 | #define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) | |
75 | #define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) | |
76 | #define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) | |
77 | #define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) | |
727a7e27 | 78 | #define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5) |
2387149e AJ |
79 | #define KVM_REQ_EVENT KVM_ARCH_REQ(6) |
80 | #define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) | |
81 | #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) | |
82 | #define KVM_REQ_NMI KVM_ARCH_REQ(9) | |
83 | #define KVM_REQ_PMU KVM_ARCH_REQ(10) | |
84 | #define KVM_REQ_PMI KVM_ARCH_REQ(11) | |
cf7316d0 | 85 | #ifdef CONFIG_KVM_SMM |
2387149e | 86 | #define KVM_REQ_SMI KVM_ARCH_REQ(12) |
cf7316d0 | 87 | #endif |
2387149e AJ |
88 | #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) |
89 | #define KVM_REQ_MCLOCK_INPROGRESS \ | |
90 | KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
91 | #define KVM_REQ_SCAN_IOAPIC \ | |
92 | KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
93 | #define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16) | |
94 | #define KVM_REQ_APIC_PAGE_RELOAD \ | |
95 | KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
96 | #define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) | |
97 | #define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) | |
98 | #define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) | |
99 | #define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) | |
100 | #define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) | |
e40ff1d6 | 101 | #define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23) |
729c15c2 | 102 | #define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24) |
8df14af4 SS |
103 | #define KVM_REQ_APICV_UPDATE \ |
104 | KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
eeeb4f67 | 105 | #define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) |
07ffaf34 | 106 | #define KVM_REQ_TLB_FLUSH_GUEST \ |
1ebfaa11 | 107 | KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) |
557a961a | 108 | #define KVM_REQ_APF_READY KVM_ARCH_REQ(28) |
1a155254 | 109 | #define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29) |
a85863c2 MS |
110 | #define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \ |
111 | KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
527d5cd7 SC |
112 | #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ |
113 | KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
adc43caa VK |
114 | #define KVM_REQ_HV_TLB_FLUSH \ |
115 | KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) | |
2860c4b1 | 116 | |
cfec82cb JR |
117 | #define CR0_RESERVED_BITS \ |
118 | (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | |
119 | | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ | |
120 | | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG)) | |
121 | ||
cfec82cb JR |
122 | #define CR4_RESERVED_BITS \ |
123 | (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\ | |
124 | | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \ | |
ad756a16 | 125 | | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \ |
afcbf13f | 126 | | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \ |
fd8cb433 | 127 | | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \ |
ae3e61e1 | 128 | | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP)) |
cfec82cb JR |
129 | |
130 | #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR) | |
131 | ||
132 | ||
cd6e8f87 | 133 | |
cd6e8f87 | 134 | #define INVALID_PAGE (~(hpa_t)0) |
dd180b3e XG |
135 | #define VALID_PAGE(x) ((x) != INVALID_PAGE) |
136 | ||
ec04b260 | 137 | /* KVM Hugepage definitions for x86 */ |
3bae0459 SC |
138 | #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G |
139 | #define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) | |
82855413 JR |
140 | #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) |
141 | #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) | |
ec04b260 JR |
142 | #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) |
143 | #define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1)) | |
144 | #define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE) | |
05da4558 | 145 | |
f5756029 | 146 | #define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50 |
bc8a3d89 | 147 | #define KVM_MIN_ALLOC_MMU_PAGES 64UL |
114df303 | 148 | #define KVM_MMU_HASH_SHIFT 12 |
1ae0a13d | 149 | #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) |
d657a98e ZX |
150 | #define KVM_MIN_FREE_MMU_PAGES 5 |
151 | #define KVM_REFILL_PAGES 25 | |
3f4e3eb4 | 152 | #define KVM_MAX_CPUID_ENTRIES 256 |
0bed3b56 | 153 | #define KVM_NR_FIXED_MTRR_REGION 88 |
0d234daf | 154 | #define KVM_NR_VAR_MTRR 8 |
d657a98e | 155 | |
af585b92 GN |
156 | #define ASYNC_PF_PER_VCPU 64 |
157 | ||
5fdbf976 | 158 | enum kvm_reg { |
95c7b77d SC |
159 | VCPU_REGS_RAX = __VCPU_REGS_RAX, |
160 | VCPU_REGS_RCX = __VCPU_REGS_RCX, | |
161 | VCPU_REGS_RDX = __VCPU_REGS_RDX, | |
162 | VCPU_REGS_RBX = __VCPU_REGS_RBX, | |
163 | VCPU_REGS_RSP = __VCPU_REGS_RSP, | |
164 | VCPU_REGS_RBP = __VCPU_REGS_RBP, | |
165 | VCPU_REGS_RSI = __VCPU_REGS_RSI, | |
166 | VCPU_REGS_RDI = __VCPU_REGS_RDI, | |
2b3ccfa0 | 167 | #ifdef CONFIG_X86_64 |
95c7b77d SC |
168 | VCPU_REGS_R8 = __VCPU_REGS_R8, |
169 | VCPU_REGS_R9 = __VCPU_REGS_R9, | |
170 | VCPU_REGS_R10 = __VCPU_REGS_R10, | |
171 | VCPU_REGS_R11 = __VCPU_REGS_R11, | |
172 | VCPU_REGS_R12 = __VCPU_REGS_R12, | |
173 | VCPU_REGS_R13 = __VCPU_REGS_R13, | |
174 | VCPU_REGS_R14 = __VCPU_REGS_R14, | |
175 | VCPU_REGS_R15 = __VCPU_REGS_R15, | |
2b3ccfa0 | 176 | #endif |
5fdbf976 | 177 | VCPU_REGS_RIP, |
f8845541 | 178 | NR_VCPU_REGS, |
2b3ccfa0 | 179 | |
6de4f3ad | 180 | VCPU_EXREG_PDPTR = NR_VCPU_REGS, |
bd31fe49 | 181 | VCPU_EXREG_CR0, |
aff48baa | 182 | VCPU_EXREG_CR3, |
f98c1e77 | 183 | VCPU_EXREG_CR4, |
6de12732 | 184 | VCPU_EXREG_RFLAGS, |
2fb92db1 | 185 | VCPU_EXREG_SEGMENTS, |
5addc235 | 186 | VCPU_EXREG_EXIT_INFO_1, |
87915858 | 187 | VCPU_EXREG_EXIT_INFO_2, |
6de4f3ad AK |
188 | }; |
189 | ||
2b3ccfa0 | 190 | enum { |
81609e3e | 191 | VCPU_SREG_ES, |
2b3ccfa0 | 192 | VCPU_SREG_CS, |
81609e3e | 193 | VCPU_SREG_SS, |
2b3ccfa0 | 194 | VCPU_SREG_DS, |
2b3ccfa0 ZX |
195 | VCPU_SREG_FS, |
196 | VCPU_SREG_GS, | |
2b3ccfa0 ZX |
197 | VCPU_SREG_TR, |
198 | VCPU_SREG_LDTR, | |
199 | }; | |
200 | ||
1e9e2622 WL |
201 | enum exit_fastpath_completion { |
202 | EXIT_FASTPATH_NONE, | |
404d5d7b WL |
203 | EXIT_FASTPATH_REENTER_GUEST, |
204 | EXIT_FASTPATH_EXIT_HANDLED, | |
1e9e2622 | 205 | }; |
404d5d7b | 206 | typedef enum exit_fastpath_completion fastpath_t; |
1e9e2622 | 207 | |
2f728d66 SC |
208 | struct x86_emulate_ctxt; |
209 | struct x86_exception; | |
58c1d206 | 210 | union kvm_smram; |
2f728d66 SC |
211 | enum x86_intercept; |
212 | enum x86_intercept_stage; | |
2b3ccfa0 | 213 | |
42dbaa5a JK |
214 | #define KVM_NR_DB_REGS 4 |
215 | ||
e8ea85fb | 216 | #define DR6_BUS_LOCK (1 << 11) |
42dbaa5a JK |
217 | #define DR6_BD (1 << 13) |
218 | #define DR6_BS (1 << 14) | |
cfb634fe | 219 | #define DR6_BT (1 << 15) |
6f43ed01 | 220 | #define DR6_RTM (1 << 16) |
9a3ecd5e CQ |
221 | /* |
222 | * DR6_ACTIVE_LOW combines fixed-1 and active-low bits. | |
223 | * We can regard all the bits in DR6_FIXED_1 as active_low bits; | |
224 | * they will never be 0 for now, but when they are defined | |
225 | * in the future it will require no code change. | |
226 | * | |
227 | * DR6_ACTIVE_LOW is also used as the init/reset value for DR6. | |
228 | */ | |
229 | #define DR6_ACTIVE_LOW 0xffff0ff0 | |
e8ea85fb | 230 | #define DR6_VOLATILE 0x0001e80f |
9a3ecd5e | 231 | #define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE) |
42dbaa5a JK |
232 | |
233 | #define DR7_BP_EN_MASK 0x000000ff | |
234 | #define DR7_GE (1 << 9) | |
235 | #define DR7_GD (1 << 13) | |
236 | #define DR7_FIXED_1 0x00000400 | |
6f43ed01 | 237 | #define DR7_VOLATILE 0xffff2bff |
42dbaa5a | 238 | |
7e582ccb ML |
239 | #define KVM_GUESTDBG_VALID_MASK \ |
240 | (KVM_GUESTDBG_ENABLE | \ | |
241 | KVM_GUESTDBG_SINGLESTEP | \ | |
242 | KVM_GUESTDBG_USE_HW_BP | \ | |
243 | KVM_GUESTDBG_USE_SW_BP | \ | |
244 | KVM_GUESTDBG_INJECT_BP | \ | |
61e5f69e ML |
245 | KVM_GUESTDBG_INJECT_DB | \ |
246 | KVM_GUESTDBG_BLOCKIRQ) | |
7e582ccb ML |
247 | |
248 | ||
c205fb7d NA |
249 | #define PFERR_PRESENT_BIT 0 |
250 | #define PFERR_WRITE_BIT 1 | |
251 | #define PFERR_USER_BIT 2 | |
252 | #define PFERR_RSVD_BIT 3 | |
253 | #define PFERR_FETCH_BIT 4 | |
be94f6b7 | 254 | #define PFERR_PK_BIT 5 |
00e7646c | 255 | #define PFERR_SGX_BIT 15 |
14727754 TL |
256 | #define PFERR_GUEST_FINAL_BIT 32 |
257 | #define PFERR_GUEST_PAGE_BIT 33 | |
4f4aa80e | 258 | #define PFERR_IMPLICIT_ACCESS_BIT 48 |
c205fb7d | 259 | |
d6ecfe97 DM |
260 | #define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) |
261 | #define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) | |
262 | #define PFERR_USER_MASK BIT(PFERR_USER_BIT) | |
263 | #define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) | |
264 | #define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) | |
265 | #define PFERR_PK_MASK BIT(PFERR_PK_BIT) | |
266 | #define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) | |
267 | #define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) | |
268 | #define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) | |
269 | #define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) | |
14727754 TL |
270 | |
271 | #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ | |
14727754 TL |
272 | PFERR_WRITE_MASK | \ |
273 | PFERR_PRESENT_MASK) | |
c205fb7d | 274 | |
41383771 GN |
275 | /* apic attention bits */ |
276 | #define KVM_APIC_CHECK_VAPIC 0 | |
ae7a2a3f MT |
277 | /* |
278 | * The following bit is set with PV-EOI, unset on EOI. | |
279 | * We detect PV-EOI changes by guest by comparing | |
280 | * this bit with PV-EOI in guest memory. | |
281 | * See the implementation in apic_update_pv_eoi. | |
282 | */ | |
283 | #define KVM_APIC_PV_EOI_PENDING 1 | |
41383771 | 284 | |
d84f1e07 FW |
285 | struct kvm_kernel_irq_routing_entry; |
286 | ||
21ebbeda | 287 | /* |
616007c8 SC |
288 | * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page |
289 | * also includes TDP pages) to determine whether or not a page can be used in | |
7a7ae829 | 290 | * the given MMU context. This is a subset of the overall kvm_cpu_role to |
616007c8 SC |
291 | * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating |
292 | * 2 bytes per gfn instead of 4 bytes per gfn. | |
21ebbeda | 293 | * |
84e5ffd0 | 294 | * Upper-level shadow pages having gptes are tracked for write-protection via |
616007c8 SC |
295 | * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create |
296 | * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise | |
297 | * gfn_track will overflow and explosions will ensure. | |
298 | * | |
299 | * A unique shadow page (SP) for a gfn is created if and only if an existing SP | |
300 | * cannot be reused. The ability to reuse a SP is tracked by its role, which | |
301 | * incorporates various mode bits and properties of the SP. Roughly speaking, | |
302 | * the number of unique SPs that can theoretically be created is 2^n, where n | |
303 | * is the number of bits that are used to compute the role. | |
304 | * | |
dc1ce455 PB |
305 | * But, even though there are 19 bits in the mask below, not all combinations |
306 | * of modes and flags are possible: | |
616007c8 | 307 | * |
dc1ce455 PB |
308 | * - invalid shadow pages are not accounted, so the bits are effectively 18 |
309 | * | |
bb3b394d | 310 | * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging); |
dc1ce455 | 311 | * execonly and ad_disabled are only used for nested EPT which has |
bb3b394d | 312 | * has_4_byte_gpte=0. Therefore, 2 bits are always unused. |
dc1ce455 PB |
313 | * |
314 | * - the 4 bits of level are effectively limited to the values 2/3/4/5, | |
315 | * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE | |
316 | * paging has exactly one upper level, making level completely redundant | |
bb3b394d | 317 | * when has_4_byte_gpte=1. |
dc1ce455 PB |
318 | * |
319 | * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if | |
320 | * cr0_wp=0, therefore these three bits only give rise to 5 possibilities. | |
321 | * | |
322 | * Therefore, the maximum number of possible upper-level shadow pages for a | |
323 | * single gfn is a bit less than 2^13. | |
21ebbeda | 324 | */ |
d657a98e | 325 | union kvm_mmu_page_role { |
36d9594d | 326 | u32 word; |
d657a98e | 327 | struct { |
7d76b4d3 | 328 | unsigned level:4; |
bb3b394d | 329 | unsigned has_4_byte_gpte:1; |
7d76b4d3 | 330 | unsigned quadrant:2; |
f6e2c02b | 331 | unsigned direct:1; |
7d76b4d3 | 332 | unsigned access:3; |
2e53d63a | 333 | unsigned invalid:1; |
167f8a5c | 334 | unsigned efer_nx:1; |
3dbe1415 | 335 | unsigned cr0_wp:1; |
411c588d | 336 | unsigned smep_andnot_wp:1; |
0be0226f | 337 | unsigned smap_andnot_wp:1; |
ac8d57e5 | 338 | unsigned ad_disabled:1; |
1313cc2b | 339 | unsigned guest_mode:1; |
84e5ffd0 LJ |
340 | unsigned passthrough:1; |
341 | unsigned :5; | |
699023e2 PB |
342 | |
343 | /* | |
344 | * This is left at the top of the word so that | |
345 | * kvm_memslots_for_spte_role can extract it with a | |
346 | * simple shift. While there is room, give it a whole | |
347 | * byte so it is also faster to load it from memory. | |
348 | */ | |
349 | unsigned smm:8; | |
d657a98e ZX |
350 | }; |
351 | }; | |
352 | ||
a336282d | 353 | /* |
616007c8 SC |
354 | * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties |
355 | * relevant to the current MMU configuration. When loading CR0, CR4, or EFER, | |
356 | * including on nested transitions, if nothing in the full role changes then | |
357 | * MMU re-configuration can be skipped. @valid bit is set on first usage so we | |
358 | * don't treat all-zero structure as valid data. | |
359 | * | |
360 | * The properties that are tracked in the extended role but not the page role | |
361 | * are for things that either (a) do not affect the validity of the shadow page | |
362 | * or (b) are indirectly reflected in the shadow page's role. For example, | |
363 | * CR4.PKE only affects permission checks for software walks of the guest page | |
364 | * tables (because KVM doesn't support Protection Keys with shadow paging), and | |
365 | * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level. | |
366 | * | |
367 | * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role. | |
368 | * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and | |
369 | * SMAP, but the MMU's permission checks for software walks need to be SMEP and | |
370 | * SMAP aware regardless of CR0.WP. | |
a336282d | 371 | */ |
616007c8 | 372 | union kvm_mmu_extended_role { |
36d9594d | 373 | u32 word; |
a336282d VK |
374 | struct { |
375 | unsigned int valid:1; | |
376 | unsigned int execonly:1; | |
377 | unsigned int cr4_pse:1; | |
378 | unsigned int cr4_pke:1; | |
379 | unsigned int cr4_smap:1; | |
380 | unsigned int cr4_smep:1; | |
f71a53d1 | 381 | unsigned int cr4_la57:1; |
b8453cdc | 382 | unsigned int efer_lma:1; |
a336282d | 383 | }; |
36d9594d VK |
384 | }; |
385 | ||
7a7ae829 | 386 | union kvm_cpu_role { |
36d9594d VK |
387 | u64 as_u64; |
388 | struct { | |
389 | union kvm_mmu_page_role base; | |
390 | union kvm_mmu_extended_role ext; | |
391 | }; | |
392 | }; | |
393 | ||
018aabb5 TY |
394 | struct kvm_rmap_head { |
395 | unsigned long val; | |
396 | }; | |
397 | ||
1c08364c | 398 | struct kvm_pio_request { |
45def77e | 399 | unsigned long linear_rip; |
1c08364c | 400 | unsigned long count; |
1c08364c AK |
401 | int in; |
402 | int port; | |
403 | int size; | |
1c08364c AK |
404 | }; |
405 | ||
855feb67 | 406 | #define PT64_ROOT_MAX_LEVEL 5 |
2a7266a8 | 407 | |
a0a64f50 | 408 | struct rsvd_bits_validate { |
2a7266a8 | 409 | u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL]; |
a0a64f50 XG |
410 | u64 bad_mt_xwr; |
411 | }; | |
412 | ||
7c390d35 | 413 | struct kvm_mmu_root_info { |
be01e8e2 | 414 | gpa_t pgd; |
7c390d35 JS |
415 | hpa_t hpa; |
416 | }; | |
417 | ||
418 | #define KVM_MMU_ROOT_INFO_INVALID \ | |
be01e8e2 | 419 | ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE }) |
7c390d35 | 420 | |
b94742c9 JS |
421 | #define KVM_MMU_NUM_PREV_ROOTS 3 |
422 | ||
f94db0c8 SC |
423 | #define KVM_MMU_ROOT_CURRENT BIT(0) |
424 | #define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i) | |
425 | #define KVM_MMU_ROOTS_ALL (BIT(1 + KVM_MMU_NUM_PREV_ROOTS) - 1) | |
426 | ||
531810ca BG |
427 | #define KVM_HAVE_MMU_RWLOCK |
428 | ||
985ab278 | 429 | struct kvm_mmu_page; |
c501040a | 430 | struct kvm_page_fault; |
985ab278 | 431 | |
d657a98e | 432 | /* |
855feb67 YZ |
433 | * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit, |
434 | * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the | |
435 | * current mmu mode. | |
d657a98e ZX |
436 | */ |
437 | struct kvm_mmu { | |
d8dd54e0 | 438 | unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu); |
e4e517b4 | 439 | u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index); |
c501040a | 440 | int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault); |
6389ee94 AK |
441 | void (*inject_page_fault)(struct kvm_vcpu *vcpu, |
442 | struct x86_exception *fault); | |
1f5a21ee | 443 | gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
5b22bbe7 | 444 | gpa_t gva_or_gpa, u64 access, |
1f5a21ee | 445 | struct x86_exception *exception); |
c3c6c9fc LJ |
446 | int (*sync_spte)(struct kvm_vcpu *vcpu, |
447 | struct kvm_mmu_page *sp, int i); | |
b9e5603c | 448 | struct kvm_mmu_root_info root; |
7a7ae829 | 449 | union kvm_cpu_role cpu_role; |
7a458f0e | 450 | union kvm_mmu_page_role root_role; |
97d64b78 | 451 | |
2d344105 HH |
452 | /* |
453 | * The pkru_mask indicates if protection key checks are needed. It | |
454 | * consists of 16 domains indexed by page fault error code bits [4:1], | |
455 | * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables. | |
456 | * Each domain has 2 bits which are ANDed with AD and WD from PKRU. | |
457 | */ | |
458 | u32 pkru_mask; | |
459 | ||
81764725 PH |
460 | struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS]; |
461 | ||
462 | /* | |
463 | * Bitmap; bit set = permission fault | |
464 | * Byte index: page fault error code [4:1] | |
465 | * Bit index: pte permissions in ACC_* format | |
466 | */ | |
467 | u8 permissions[16]; | |
468 | ||
d657a98e | 469 | u64 *pae_root; |
03ca4589 | 470 | u64 *pml4_root; |
cb0f722a | 471 | u64 *pml5_root; |
c258b62b XG |
472 | |
473 | /* | |
474 | * check zero bits on shadow page table entries, these | |
475 | * bits include not only hardware reserved bits but also | |
476 | * the bits spte never used. | |
477 | */ | |
478 | struct rsvd_bits_validate shadow_zero_check; | |
479 | ||
a0a64f50 | 480 | struct rsvd_bits_validate guest_rsvd_check; |
ff03a073 JR |
481 | |
482 | u64 pdptrs[4]; /* pae */ | |
d657a98e ZX |
483 | }; |
484 | ||
f5132b01 GN |
485 | enum pmc_type { |
486 | KVM_PMC_GP = 0, | |
487 | KVM_PMC_FIXED, | |
488 | }; | |
489 | ||
490 | struct kvm_pmc { | |
491 | enum pmc_type type; | |
492 | u8 idx; | |
de0f6195 LX |
493 | bool is_paused; |
494 | bool intr; | |
f5132b01 | 495 | u64 counter; |
de0f6195 | 496 | u64 prev_counter; |
f5132b01 GN |
497 | u64 eventsel; |
498 | struct perf_event *perf_event; | |
499 | struct kvm_vcpu *vcpu; | |
a6da0d77 | 500 | /* |
68fb4757 | 501 | * only for creating or reusing perf_event, |
a6da0d77 LX |
502 | * eventsel value for general purpose counters, |
503 | * ctrl value for fixed counters. | |
504 | */ | |
505 | u64 current_config; | |
f5132b01 GN |
506 | }; |
507 | ||
4f1fa2a1 LX |
508 | /* More counters may conflict with other existing Architectural MSRs */ |
509 | #define KVM_INTEL_PMC_MAX_GENERIC 8 | |
510 | #define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1) | |
511 | #define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1) | |
0144ba0c | 512 | #define KVM_PMC_MAX_FIXED 3 |
e33b6d79 | 513 | #define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1) |
556f3c9a | 514 | #define KVM_AMD_PMC_MAX_GENERIC 6 |
f5132b01 GN |
515 | struct kvm_pmu { |
516 | unsigned nr_arch_gp_counters; | |
517 | unsigned nr_arch_fixed_counters; | |
518 | unsigned available_event_types; | |
519 | u64 fixed_ctr_ctrl; | |
2c985527 | 520 | u64 fixed_ctr_ctrl_mask; |
f5132b01 GN |
521 | u64 global_ctrl; |
522 | u64 global_status; | |
f5132b01 GN |
523 | u64 counter_bitmask[2]; |
524 | u64 global_ctrl_mask; | |
c715eb9f | 525 | u64 global_ovf_ctrl_mask; |
103af0a9 | 526 | u64 reserved_bits; |
95b065bf | 527 | u64 raw_event_mask; |
f5132b01 | 528 | u8 version; |
4f1fa2a1 | 529 | struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; |
0144ba0c | 530 | struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; |
f5132b01 | 531 | struct irq_work irq_work; |
f1c5651f SC |
532 | |
533 | /* | |
534 | * Overlay the bitmap with a 64-bit atomic so that all bits can be | |
535 | * set in a single access, e.g. to reprogram all counters when the PMU | |
536 | * filter changes. | |
537 | */ | |
538 | union { | |
539 | DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); | |
540 | atomic64_t __reprogram_pmi; | |
541 | }; | |
b35e5548 LX |
542 | DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); |
543 | DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); | |
544 | ||
8183a538 | 545 | u64 ds_area; |
c59a1f10 LX |
546 | u64 pebs_enable; |
547 | u64 pebs_enable_mask; | |
902caeb6 LX |
548 | u64 pebs_data_cfg; |
549 | u64 pebs_data_cfg_mask; | |
c59a1f10 | 550 | |
85425032 LX |
551 | /* |
552 | * If a guest counter is cross-mapped to host counter with different | |
553 | * index, its PEBS capability will be temporarily disabled. | |
554 | * | |
555 | * The user should make sure that this mask is updated | |
556 | * after disabling interrupts and before perf_guest_get_msrs(); | |
557 | */ | |
558 | u64 host_cross_mapped_mask; | |
559 | ||
b35e5548 LX |
560 | /* |
561 | * The gate to release perf_events not marked in | |
562 | * pmc_in_use only once in a vcpu time slice. | |
563 | */ | |
564 | bool need_cleanup; | |
565 | ||
566 | /* | |
567 | * The total number of programmed perf_events and it helps to avoid | |
568 | * redundant check before cleanup if guest don't use vPMU at all. | |
569 | */ | |
570 | u8 event_count; | |
f5132b01 GN |
571 | }; |
572 | ||
25462f7f WH |
573 | struct kvm_pmu_ops; |
574 | ||
360b948d PB |
575 | enum { |
576 | KVM_DEBUGREG_BP_ENABLED = 1, | |
c77fb5fe | 577 | KVM_DEBUGREG_WONT_EXIT = 2, |
360b948d PB |
578 | }; |
579 | ||
86fd5270 XG |
580 | struct kvm_mtrr_range { |
581 | u64 base; | |
582 | u64 mask; | |
19efffa2 | 583 | struct list_head node; |
86fd5270 XG |
584 | }; |
585 | ||
70109e7d | 586 | struct kvm_mtrr { |
86fd5270 | 587 | struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR]; |
70109e7d | 588 | mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION]; |
10fac2dc | 589 | u64 deftype; |
19efffa2 XG |
590 | |
591 | struct list_head head; | |
70109e7d XG |
592 | }; |
593 | ||
1f4b34f8 AS |
594 | /* Hyper-V SynIC timer */ |
595 | struct kvm_vcpu_hv_stimer { | |
596 | struct hrtimer timer; | |
597 | int index; | |
6a058a1e | 598 | union hv_stimer_config config; |
1f4b34f8 AS |
599 | u64 count; |
600 | u64 exp_time; | |
601 | struct hv_message msg; | |
602 | bool msg_pending; | |
603 | }; | |
604 | ||
5c919412 AS |
605 | /* Hyper-V synthetic interrupt controller (SynIC)*/ |
606 | struct kvm_vcpu_hv_synic { | |
607 | u64 version; | |
608 | u64 control; | |
609 | u64 msg_page; | |
610 | u64 evt_page; | |
611 | atomic64_t sint[HV_SYNIC_SINT_COUNT]; | |
612 | atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT]; | |
613 | DECLARE_BITMAP(auto_eoi_bitmap, 256); | |
614 | DECLARE_BITMAP(vec_bitmap, 256); | |
615 | bool active; | |
efc479e6 | 616 | bool dont_zero_synic_pages; |
5c919412 AS |
617 | }; |
618 | ||
0823570f VK |
619 | /* The maximum number of entries on the TLB flush fifo. */ |
620 | #define KVM_HV_TLB_FLUSH_FIFO_SIZE (16) | |
621 | /* | |
622 | * Note: the following 'magic' entry is made up by KVM to avoid putting | |
623 | * anything besides GVA on the TLB flush fifo. It is theoretically possible | |
624 | * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000 | |
625 | * which will look identical. KVM's action to 'flush everything' instead of | |
626 | * flushing these particular addresses is, however, fully legitimate as | |
627 | * flushing more than requested is always OK. | |
628 | */ | |
629 | #define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1) | |
630 | ||
53ca765a VK |
631 | enum hv_tlb_flush_fifos { |
632 | HV_L1_TLB_FLUSH_FIFO, | |
633 | HV_L2_TLB_FLUSH_FIFO, | |
634 | HV_NR_TLB_FLUSH_FIFOS, | |
635 | }; | |
636 | ||
0823570f VK |
637 | struct kvm_vcpu_hv_tlb_flush_fifo { |
638 | spinlock_t write_lock; | |
639 | DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE); | |
640 | }; | |
641 | ||
e83d5887 AS |
642 | /* Hyper-V per vcpu emulation context */ |
643 | struct kvm_vcpu_hv { | |
4592b7ea | 644 | struct kvm_vcpu *vcpu; |
d3457c87 | 645 | u32 vp_index; |
e83d5887 | 646 | u64 hv_vapic; |
9eec50b8 | 647 | s64 runtime_offset; |
5c919412 | 648 | struct kvm_vcpu_hv_synic synic; |
db397571 | 649 | struct kvm_hyperv_exit exit; |
1f4b34f8 AS |
650 | struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT]; |
651 | DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT); | |
644f7067 | 652 | bool enforce_cpuid; |
10d7bf1e VK |
653 | struct { |
654 | u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */ | |
655 | u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */ | |
656 | u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */ | |
657 | u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */ | |
658 | u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */ | |
659 | u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ | |
dea6e140 VK |
660 | u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */ |
661 | u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */ | |
10d7bf1e | 662 | } cpuid_cache; |
0823570f | 663 | |
53ca765a | 664 | struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; |
7d5e88d3 VK |
665 | |
666 | /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ | |
667 | u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; | |
38edb452 | 668 | |
046f5756 VK |
669 | struct hv_vp_assist_page vp_assist_page; |
670 | ||
38edb452 VK |
671 | struct { |
672 | u64 pa_page_gpa; | |
673 | u64 vm_id; | |
674 | u32 vp_id; | |
675 | } nested; | |
e83d5887 AS |
676 | }; |
677 | ||
48639df8 PD |
678 | struct kvm_hypervisor_cpuid { |
679 | u32 base; | |
680 | u32 limit; | |
681 | }; | |
682 | ||
23200b7a JM |
683 | /* Xen HVM per vcpu emulation context */ |
684 | struct kvm_vcpu_xen { | |
685 | u64 hypercall_rip; | |
30b5c851 | 686 | u32 current_runstate; |
fde0451b | 687 | u8 upcall_vector; |
7caf9571 | 688 | struct gfn_to_pfn_cache vcpu_info_cache; |
69d413cf | 689 | struct gfn_to_pfn_cache vcpu_time_info_cache; |
a795cd43 | 690 | struct gfn_to_pfn_cache runstate_cache; |
5ec3289b | 691 | struct gfn_to_pfn_cache runstate2_cache; |
30b5c851 DW |
692 | u64 last_steal; |
693 | u64 runstate_entry_time; | |
694 | u64 runstate_times[4]; | |
14243b38 | 695 | unsigned long evtchn_pending_sel; |
942c2490 | 696 | u32 vcpu_id; /* The Xen / ACPI vCPU ID */ |
53639526 JM |
697 | u32 timer_virq; |
698 | u64 timer_expires; /* In guest epoch */ | |
699 | atomic_t timer_pending; | |
700 | struct hrtimer timer; | |
1a65105a BO |
701 | int poll_evtchn; |
702 | struct timer_list poll_timer; | |
f422f853 | 703 | struct kvm_hypervisor_cpuid cpuid; |
23200b7a JM |
704 | }; |
705 | ||
d4963e31 SC |
706 | struct kvm_queued_exception { |
707 | bool pending; | |
708 | bool injected; | |
709 | bool has_error_code; | |
710 | u8 vector; | |
711 | u32 error_code; | |
712 | unsigned long payload; | |
713 | bool has_payload; | |
d4963e31 SC |
714 | }; |
715 | ||
ad312c7c | 716 | struct kvm_vcpu_arch { |
5fdbf976 MT |
717 | /* |
718 | * rip and regs accesses must go through | |
719 | * kvm_{register,rip}_{read,write} functions. | |
720 | */ | |
721 | unsigned long regs[NR_VCPU_REGS]; | |
722 | u32 regs_avail; | |
723 | u32 regs_dirty; | |
34c16eec ZX |
724 | |
725 | unsigned long cr0; | |
e8467fda | 726 | unsigned long cr0_guest_owned_bits; |
34c16eec ZX |
727 | unsigned long cr2; |
728 | unsigned long cr3; | |
729 | unsigned long cr4; | |
fc78f519 | 730 | unsigned long cr4_guest_owned_bits; |
b899c132 | 731 | unsigned long cr4_guest_rsvd_bits; |
34c16eec | 732 | unsigned long cr8; |
37486135 | 733 | u32 host_pkru; |
b9dd21e1 | 734 | u32 pkru; |
1371d904 | 735 | u32 hflags; |
f6801dff | 736 | u64 efer; |
34c16eec ZX |
737 | u64 apic_base; |
738 | struct kvm_lapic *apic; /* kernel irqchip context */ | |
e40ff1d6 | 739 | bool load_eoi_exitmap_pending; |
6308630b | 740 | DECLARE_BITMAP(ioapic_handled_vectors, 256); |
41383771 | 741 | unsigned long apic_attention; |
e1035715 | 742 | int32_t apic_arb_prio; |
34c16eec | 743 | int mp_state; |
34c16eec | 744 | u64 ia32_misc_enable_msr; |
64d60670 | 745 | u64 smbase; |
52797bf9 | 746 | u64 smi_count; |
6cd88243 | 747 | bool at_instruction_boundary; |
b209749f | 748 | bool tpr_access_reporting; |
7204160e | 749 | bool xsaves_enabled; |
b5274b1b | 750 | bool xfd_no_write_intercept; |
20300099 | 751 | u64 ia32_xss; |
518e7b94 | 752 | u64 microcode_version; |
0cf9135b | 753 | u64 arch_capabilities; |
27461da3 | 754 | u64 perf_capabilities; |
34c16eec | 755 | |
14dfe855 JR |
756 | /* |
757 | * Paging state of the vcpu | |
758 | * | |
759 | * If the vcpu runs in guest mode with two level paging this still saves | |
760 | * the paging mode of the l1 guest. This context is always used to | |
761 | * handle faults. | |
762 | */ | |
44dd3ffa VK |
763 | struct kvm_mmu *mmu; |
764 | ||
765 | /* Non-nested MMU for L1 */ | |
766 | struct kvm_mmu root_mmu; | |
8df25a32 | 767 | |
14c07ad8 VK |
768 | /* L1 MMU when running nested */ |
769 | struct kvm_mmu guest_mmu; | |
770 | ||
6539e738 JR |
771 | /* |
772 | * Paging state of an L2 guest (used for nested npt) | |
773 | * | |
774 | * This context will save all necessary information to walk page tables | |
311497e0 | 775 | * of an L2 guest. This context is only initialized for page table |
6539e738 JR |
776 | * walking and not for faulting since we never handle l2 page faults on |
777 | * the host. | |
778 | */ | |
779 | struct kvm_mmu nested_mmu; | |
780 | ||
14dfe855 JR |
781 | /* |
782 | * Pointer to the mmu context currently used for | |
783 | * gva_to_gpa translations. | |
784 | */ | |
785 | struct kvm_mmu *walk_mmu; | |
786 | ||
53c07b18 | 787 | struct kvm_mmu_memory_cache mmu_pte_list_desc_cache; |
171a90d7 | 788 | struct kvm_mmu_memory_cache mmu_shadow_page_cache; |
6a97575d | 789 | struct kvm_mmu_memory_cache mmu_shadowed_info_cache; |
34c16eec ZX |
790 | struct kvm_mmu_memory_cache mmu_page_header_cache; |
791 | ||
f775b13e RR |
792 | /* |
793 | * QEMU userspace and the guest each have their own FPU state. | |
ec269475 PB |
794 | * In vcpu_run, we switch between the user and guest FPU contexts. |
795 | * While running a VCPU, the VCPU thread will have the guest FPU | |
796 | * context. | |
f775b13e RR |
797 | * |
798 | * Note that while the PKRU state lives inside the fpu registers, | |
799 | * it is switched out separately at VMENTER and VMEXIT time. The | |
d69c1382 | 800 | * "guest_fpstate" state here contains the guest FPU context, with the |
f775b13e RR |
801 | * host PRKU bits. |
802 | */ | |
d69c1382 | 803 | struct fpu_guest guest_fpu; |
f775b13e | 804 | |
2acf923e | 805 | u64 xcr0; |
ee519b3a | 806 | u64 guest_supported_xcr0; |
34c16eec | 807 | |
34c16eec ZX |
808 | struct kvm_pio_request pio; |
809 | void *pio_data; | |
b5998402 | 810 | void *sev_pio_data; |
95e16b47 | 811 | unsigned sev_pio_count; |
34c16eec | 812 | |
66fd3f7f GN |
813 | u8 event_exit_inst_len; |
814 | ||
7709aba8 SC |
815 | bool exception_from_userspace; |
816 | ||
d4963e31 SC |
817 | /* Exceptions to be injected to the guest. */ |
818 | struct kvm_queued_exception exception; | |
7709aba8 SC |
819 | /* Exception VM-Exits to be synthesized to L1. */ |
820 | struct kvm_queued_exception exception_vmexit; | |
298101da | 821 | |
937a7eae | 822 | struct kvm_queued_interrupt { |
04140b41 | 823 | bool injected; |
66fd3f7f | 824 | bool soft; |
937a7eae AK |
825 | u8 nr; |
826 | } interrupt; | |
827 | ||
34c16eec ZX |
828 | int halt_request; /* real mode on Intel only */ |
829 | ||
830 | int cpuid_nent; | |
255cbecf | 831 | struct kvm_cpuid_entry2 *cpuid_entries; |
48639df8 | 832 | struct kvm_hypervisor_cpuid kvm_cpuid; |
5a4f55cd | 833 | |
ca29e145 | 834 | u64 reserved_gpa_bits; |
5a4f55cd EK |
835 | int maxphyaddr; |
836 | ||
34c16eec ZX |
837 | /* emulate context */ |
838 | ||
c9b8b07c | 839 | struct x86_emulate_ctxt *emulate_ctxt; |
7ae441ea GN |
840 | bool emulate_regs_need_sync_to_vcpu; |
841 | bool emulate_regs_need_sync_from_vcpu; | |
716d51ab | 842 | int (*complete_userspace_io)(struct kvm_vcpu *vcpu); |
18068523 GOC |
843 | |
844 | gpa_t time; | |
50d0a0f9 | 845 | struct pvclock_vcpu_time_info hv_clock; |
e48672fa | 846 | unsigned int hw_tsc_khz; |
916d3608 | 847 | struct gfn_to_pfn_cache pv_time; |
51d59c6b MT |
848 | /* set guest stopped flag in pvclock flags field */ |
849 | bool pvclock_set_guest_stopped_request; | |
c9aaa895 GC |
850 | |
851 | struct { | |
a6bd811f | 852 | u8 preempted; |
c9aaa895 GC |
853 | u64 msr_val; |
854 | u64 last_steal; | |
7e2175eb | 855 | struct gfn_to_hva_cache cache; |
c9aaa895 GC |
856 | } st; |
857 | ||
56ba77a4 | 858 | u64 l1_tsc_offset; |
805d705f | 859 | u64 tsc_offset; /* current tsc offset */ |
1d5f066e | 860 | u64 last_guest_tsc; |
6f526ec5 | 861 | u64 last_host_tsc; |
0dd6a6ed | 862 | u64 tsc_offset_adjustment; |
e26101b1 ZA |
863 | u64 this_tsc_nsec; |
864 | u64 this_tsc_write; | |
0d3da0d2 | 865 | u64 this_tsc_generation; |
c285545f | 866 | bool tsc_catchup; |
cc578287 ZA |
867 | bool tsc_always_catchup; |
868 | s8 virtual_tsc_shift; | |
869 | u32 virtual_tsc_mult; | |
870 | u32 virtual_tsc_khz; | |
ba904635 | 871 | s64 ia32_tsc_adjust_msr; |
73f624f4 | 872 | u64 msr_ia32_power_ctl; |
805d705f IS |
873 | u64 l1_tsc_scaling_ratio; |
874 | u64 tsc_scaling_ratio; /* current scaling ratio */ | |
3419ffc8 | 875 | |
7460fb4a AK |
876 | atomic_t nmi_queued; /* unprocessed asynchronous NMIs */ |
877 | unsigned nmi_pending; /* NMI queued after currently running handler */ | |
878 | bool nmi_injected; /* Trying to inject an NMI this entry */ | |
f077825a | 879 | bool smi_pending; /* SMI queued after currently running handler */ |
73cd107b | 880 | u8 handling_intr_from_guest; |
9ba075a6 | 881 | |
70109e7d | 882 | struct kvm_mtrr mtrr_state; |
7cb060a9 | 883 | u64 pat; |
42dbaa5a | 884 | |
360b948d | 885 | unsigned switch_db_regs; |
42dbaa5a JK |
886 | unsigned long db[KVM_NR_DB_REGS]; |
887 | unsigned long dr6; | |
888 | unsigned long dr7; | |
889 | unsigned long eff_db[KVM_NR_DB_REGS]; | |
c8639010 | 890 | unsigned long guest_debug_dr7; |
db2336a8 KH |
891 | u64 msr_platform_info; |
892 | u64 msr_misc_features_enables; | |
890ca9ae HY |
893 | |
894 | u64 mcg_cap; | |
895 | u64 mcg_status; | |
896 | u64 mcg_ctl; | |
c45dcc71 | 897 | u64 mcg_ext_ctl; |
890ca9ae | 898 | u64 *mce_banks; |
281b5278 | 899 | u64 *mci_ctl2_banks; |
94fe45da | 900 | |
bebb106a XG |
901 | /* Cache MMIO info */ |
902 | u64 mmio_gva; | |
871bd034 | 903 | unsigned mmio_access; |
bebb106a | 904 | gfn_t mmio_gfn; |
56f17dd3 | 905 | u64 mmio_gen; |
bebb106a | 906 | |
f5132b01 GN |
907 | struct kvm_pmu pmu; |
908 | ||
94fe45da | 909 | /* used for guest single stepping over the given code position */ |
94fe45da | 910 | unsigned long singlestep_rip; |
f92653ee | 911 | |
8f014550 | 912 | bool hyperv_enabled; |
4592b7ea | 913 | struct kvm_vcpu_hv *hyperv; |
23200b7a | 914 | struct kvm_vcpu_xen xen; |
f5f48ee1 SY |
915 | |
916 | cpumask_var_t wbinvd_dirty_mask; | |
af585b92 | 917 | |
1cb3f3ae XG |
918 | unsigned long last_retry_eip; |
919 | unsigned long last_retry_addr; | |
920 | ||
af585b92 GN |
921 | struct { |
922 | bool halted; | |
dd03bcaa | 923 | gfn_t gfns[ASYNC_PF_PER_VCPU]; |
344d9588 | 924 | struct gfn_to_hva_cache data; |
2635b5c4 VK |
925 | u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */ |
926 | u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ | |
927 | u16 vec; | |
7c90705b | 928 | u32 id; |
6adba527 | 929 | bool send_user_only; |
68fd66f1 | 930 | u32 host_apf_flags; |
52a5c155 | 931 | bool delivery_as_pf_vmexit; |
557a961a | 932 | bool pageready_pending; |
af585b92 | 933 | } apf; |
2b036c6b BO |
934 | |
935 | /* OSVW MSRs (AMD only) */ | |
936 | struct { | |
937 | u64 length; | |
938 | u64 status; | |
939 | } osvw; | |
ae7a2a3f MT |
940 | |
941 | struct { | |
942 | u64 msr_val; | |
943 | struct gfn_to_hva_cache data; | |
944 | } pv_eoi; | |
93c05d3e | 945 | |
2d5ba19b MT |
946 | u64 msr_kvm_poll_control; |
947 | ||
25d92081 YZ |
948 | /* set at EPT violation at this point */ |
949 | unsigned long exit_qualification; | |
6aef266c SV |
950 | |
951 | /* pv related host specific info */ | |
952 | struct { | |
953 | bool pv_unhalted; | |
954 | } pv; | |
7543a635 SR |
955 | |
956 | int pending_ioapic_eoi; | |
1c1a9ce9 | 957 | int pending_external_vector; |
0f89b207 | 958 | |
de63ad4c LM |
959 | /* be preempted when it's in kernel-mode(cpl=0) */ |
960 | bool preempted_in_kernel; | |
c595ceee PB |
961 | |
962 | /* Flush the L1 Data cache for L1TF mitigation on VMENTER */ | |
963 | bool l1tf_flush_l1d; | |
191c8137 | 964 | |
8a14fe4f | 965 | /* Host CPU on which VM-entry was most recently attempted */ |
63f5a190 | 966 | int last_vmentry_cpu; |
8a14fe4f | 967 | |
191c8137 BP |
968 | /* AMD MSRC001_0015 Hardware Configuration */ |
969 | u64 msr_hwcr; | |
66570e96 OU |
970 | |
971 | /* pv related cpuid info */ | |
972 | struct { | |
973 | /* | |
974 | * value of the eax register in the KVM_CPUID_FEATURES CPUID | |
975 | * leaf. | |
976 | */ | |
977 | u32 features; | |
978 | ||
979 | /* | |
980 | * indicates whether pv emulation should be disabled if features | |
981 | * are not present in the guest's cpuid | |
982 | */ | |
983 | bool enforce; | |
984 | } pv_cpuid; | |
add5e2f0 TL |
985 | |
986 | /* Protected Guests */ | |
987 | bool guest_state_protected; | |
3c86c0d3 | 988 | |
158a48ec ML |
989 | /* |
990 | * Set when PDPTS were loaded directly by the userspace without | |
991 | * reading the guest memory | |
992 | */ | |
993 | bool pdptrs_from_userspace; | |
994 | ||
3c86c0d3 VP |
995 | #if IS_ENABLED(CONFIG_HYPERV) |
996 | hpa_t hv_root_tdp; | |
997 | #endif | |
34c16eec ZX |
998 | }; |
999 | ||
db3fe4eb | 1000 | struct kvm_lpage_info { |
92f94f1e | 1001 | int disallow_lpage; |
db3fe4eb TY |
1002 | }; |
1003 | ||
1004 | struct kvm_arch_memory_slot { | |
018aabb5 | 1005 | struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES]; |
db3fe4eb | 1006 | struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; |
21ebbeda | 1007 | unsigned short *gfn_track[KVM_PAGE_TRACK_MAX]; |
db3fe4eb TY |
1008 | }; |
1009 | ||
3548a259 | 1010 | /* |
35366901 SC |
1011 | * Track the mode of the optimized logical map, as the rules for decoding the |
1012 | * destination vary per mode. Enabling the optimized logical map requires all | |
1013 | * software-enabled local APIs to be in the same mode, each addressable APIC to | |
1014 | * be mapped to only one MDA, and each MDA to map to at most one APIC. | |
3548a259 | 1015 | */ |
35366901 SC |
1016 | enum kvm_apic_logical_mode { |
1017 | /* All local APICs are software disabled. */ | |
1018 | KVM_APIC_MODE_SW_DISABLED, | |
1019 | /* All software enabled local APICs in xAPIC cluster addressing mode. */ | |
1020 | KVM_APIC_MODE_XAPIC_CLUSTER, | |
1021 | /* All software enabled local APICs in xAPIC flat addressing mode. */ | |
1022 | KVM_APIC_MODE_XAPIC_FLAT, | |
1023 | /* All software enabled local APICs in x2APIC mode. */ | |
1024 | KVM_APIC_MODE_X2APIC, | |
1025 | /* | |
1026 | * Optimized map disabled, e.g. not all local APICs in the same logical | |
1027 | * mode, same logical ID assigned to multiple APICs, etc. | |
1028 | */ | |
1029 | KVM_APIC_MODE_MAP_DISABLED, | |
1030 | }; | |
3548a259 | 1031 | |
1e08ec4a GN |
1032 | struct kvm_apic_map { |
1033 | struct rcu_head rcu; | |
35366901 | 1034 | enum kvm_apic_logical_mode logical_mode; |
0ca52e7b | 1035 | u32 max_apic_id; |
e45115b6 RK |
1036 | union { |
1037 | struct kvm_lapic *xapic_flat_map[8]; | |
1038 | struct kvm_lapic *xapic_cluster_map[16][4]; | |
1039 | }; | |
0ca52e7b | 1040 | struct kvm_lapic *phys_map[]; |
1e08ec4a GN |
1041 | }; |
1042 | ||
f97f5a56 JD |
1043 | /* Hyper-V synthetic debugger (SynDbg)*/ |
1044 | struct kvm_hv_syndbg { | |
1045 | struct { | |
1046 | u64 control; | |
1047 | u64 status; | |
1048 | u64 send_page; | |
1049 | u64 recv_page; | |
1050 | u64 pending_page; | |
1051 | } control; | |
1052 | u64 options; | |
1053 | }; | |
1054 | ||
cc9cfddb VK |
1055 | /* Current state of Hyper-V TSC page clocksource */ |
1056 | enum hv_tsc_page_status { | |
1057 | /* TSC page was not set up or disabled */ | |
1058 | HV_TSC_PAGE_UNSET = 0, | |
1059 | /* TSC page MSR was written by the guest, update pending */ | |
1060 | HV_TSC_PAGE_GUEST_CHANGED, | |
42dcbe7d | 1061 | /* TSC page update was triggered from the host side */ |
cc9cfddb VK |
1062 | HV_TSC_PAGE_HOST_CHANGED, |
1063 | /* TSC page was properly set up and is currently active */ | |
1064 | HV_TSC_PAGE_SET, | |
cc9cfddb VK |
1065 | /* TSC page was set up with an inaccessible GPA */ |
1066 | HV_TSC_PAGE_BROKEN, | |
1067 | }; | |
1068 | ||
e83d5887 AS |
1069 | /* Hyper-V emulation context */ |
1070 | struct kvm_hv { | |
3f5ad8be | 1071 | struct mutex hv_lock; |
e83d5887 AS |
1072 | u64 hv_guest_os_id; |
1073 | u64 hv_hypercall; | |
1074 | u64 hv_tsc_page; | |
cc9cfddb | 1075 | enum hv_tsc_page_status hv_tsc_page_status; |
e7d9513b AS |
1076 | |
1077 | /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ | |
1078 | u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; | |
1079 | u64 hv_crash_ctl; | |
095cf55d | 1080 | |
7357b1df | 1081 | struct ms_hyperv_tsc_page tsc_ref; |
faeb7833 RK |
1082 | |
1083 | struct idr conn_to_evt; | |
a2e164e7 VK |
1084 | |
1085 | u64 hv_reenlightenment_control; | |
1086 | u64 hv_tsc_emulation_control; | |
1087 | u64 hv_tsc_emulation_status; | |
2be1bd3a | 1088 | u64 hv_invtsc_control; |
87ee613d VK |
1089 | |
1090 | /* How many vCPUs have VP index != vCPU index */ | |
1091 | atomic_t num_mismatched_vp_indexes; | |
6f6a657c | 1092 | |
0f250a64 VK |
1093 | /* |
1094 | * How many SynICs use 'AutoEOI' feature | |
1095 | * (protected by arch.apicv_update_lock) | |
1096 | */ | |
1097 | unsigned int synic_auto_eoi_used; | |
1098 | ||
6f6a657c | 1099 | struct hv_partition_assist_pg *hv_pa_pg; |
f97f5a56 | 1100 | struct kvm_hv_syndbg hv_syndbg; |
e83d5887 AS |
1101 | }; |
1102 | ||
1a155254 AG |
1103 | struct msr_bitmap_range { |
1104 | u32 flags; | |
1105 | u32 nmsrs; | |
1106 | u32 base; | |
1107 | unsigned long *bitmap; | |
1108 | }; | |
1109 | ||
a3833b81 DW |
1110 | /* Xen emulation context */ |
1111 | struct kvm_xen { | |
310bc395 | 1112 | struct mutex xen_lock; |
28d1629f | 1113 | u32 xen_version; |
a3833b81 | 1114 | bool long_mode; |
d8ba8ba4 | 1115 | bool runstate_update_flag; |
40da8ccd | 1116 | u8 upcall_vector; |
1cfc9c4b | 1117 | struct gfn_to_pfn_cache shinfo_cache; |
2fd6df2f | 1118 | struct idr evtchn_ports; |
1a65105a | 1119 | unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)]; |
a3833b81 DW |
1120 | }; |
1121 | ||
49776faf RK |
1122 | enum kvm_irqchip_mode { |
1123 | KVM_IRQCHIP_NONE, | |
1124 | KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ | |
1125 | KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ | |
1126 | }; | |
1127 | ||
b318e8de SC |
1128 | struct kvm_x86_msr_filter { |
1129 | u8 count; | |
1130 | bool default_allow:1; | |
1131 | struct msr_bitmap_range ranges[16]; | |
1132 | }; | |
1133 | ||
14329b82 AL |
1134 | struct kvm_x86_pmu_event_filter { |
1135 | __u32 action; | |
1136 | __u32 nevents; | |
1137 | __u32 fixed_counter_bitmap; | |
1138 | __u32 flags; | |
1139 | __u32 nr_includes; | |
1140 | __u32 nr_excludes; | |
1141 | __u64 *includes; | |
1142 | __u64 *excludes; | |
1143 | __u64 events[]; | |
1144 | }; | |
1145 | ||
7491b7b2 | 1146 | enum kvm_apicv_inhibit { |
a9603ae0 ML |
1147 | |
1148 | /********************************************************************/ | |
1149 | /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */ | |
1150 | /********************************************************************/ | |
1151 | ||
1152 | /* | |
1153 | * APIC acceleration is disabled by a module parameter | |
1154 | * and/or not supported in hardware. | |
1155 | */ | |
7491b7b2 | 1156 | APICV_INHIBIT_REASON_DISABLE, |
a9603ae0 ML |
1157 | |
1158 | /* | |
1159 | * APIC acceleration is inhibited because AutoEOI feature is | |
1160 | * being used by a HyperV guest. | |
1161 | */ | |
7491b7b2 | 1162 | APICV_INHIBIT_REASON_HYPERV, |
a9603ae0 ML |
1163 | |
1164 | /* | |
1165 | * APIC acceleration is inhibited because the userspace didn't yet | |
1166 | * enable the kernel/split irqchip. | |
1167 | */ | |
1168 | APICV_INHIBIT_REASON_ABSENT, | |
1169 | ||
1170 | /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ | |
1171 | * (out of band, debug measure of blocking all interrupts on this vCPU) | |
1172 | * was enabled, to avoid AVIC/APICv bypassing it. | |
1173 | */ | |
1174 | APICV_INHIBIT_REASON_BLOCKIRQ, | |
1175 | ||
5063c41b SC |
1176 | /* |
1177 | * APICv is disabled because not all vCPUs have a 1:1 mapping between | |
1178 | * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack. | |
1179 | */ | |
1180 | APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED, | |
1181 | ||
3743c2f0 ML |
1182 | /* |
1183 | * For simplicity, the APIC acceleration is inhibited | |
1184 | * first time either APIC ID or APIC base are changed by the guest | |
1185 | * from their reset values. | |
1186 | */ | |
1187 | APICV_INHIBIT_REASON_APIC_ID_MODIFIED, | |
1188 | APICV_INHIBIT_REASON_APIC_BASE_MODIFIED, | |
1189 | ||
a9603ae0 ML |
1190 | /******************************************************/ |
1191 | /* INHIBITs that are relevant only to the AMD's AVIC. */ | |
1192 | /******************************************************/ | |
1193 | ||
1194 | /* | |
1195 | * AVIC is inhibited on a vCPU because it runs a nested guest. | |
1196 | * | |
1197 | * This is needed because unlike APICv, the peers of this vCPU | |
1198 | * cannot use the doorbell mechanism to signal interrupts via AVIC when | |
1199 | * a vCPU runs nested. | |
1200 | */ | |
7491b7b2 | 1201 | APICV_INHIBIT_REASON_NESTED, |
a9603ae0 ML |
1202 | |
1203 | /* | |
1204 | * On SVM, the wait for the IRQ window is implemented with pending vIRQ, | |
1205 | * which cannot be injected when the AVIC is enabled, thus AVIC | |
1206 | * is inhibited while KVM waits for IRQ window. | |
1207 | */ | |
7491b7b2 | 1208 | APICV_INHIBIT_REASON_IRQWIN, |
a9603ae0 ML |
1209 | |
1210 | /* | |
1211 | * PIT (i8254) 're-inject' mode, relies on EOI intercept, | |
1212 | * which AVIC doesn't support for edge triggered interrupts. | |
1213 | */ | |
7491b7b2 | 1214 | APICV_INHIBIT_REASON_PIT_REINJ, |
a9603ae0 | 1215 | |
a9603ae0 ML |
1216 | /* |
1217 | * AVIC is disabled because SEV doesn't support it. | |
1218 | */ | |
c538dc79 | 1219 | APICV_INHIBIT_REASON_SEV, |
9a364857 SC |
1220 | |
1221 | /* | |
1222 | * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1 | |
1223 | * mapping between logical ID and vCPU. | |
1224 | */ | |
1225 | APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED, | |
7491b7b2 | 1226 | }; |
4e19c36f | 1227 | |
fef9cce0 | 1228 | struct kvm_arch { |
bc8a3d89 BG |
1229 | unsigned long n_used_mmu_pages; |
1230 | unsigned long n_requested_mmu_pages; | |
1231 | unsigned long n_max_mmu_pages; | |
332b207d | 1232 | unsigned int indirect_shadow_pages; |
ca333add | 1233 | u8 mmu_valid_gen; |
f05e70ac | 1234 | struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; |
f05e70ac | 1235 | struct list_head active_mmu_pages; |
31741eb1 | 1236 | struct list_head zapped_obsolete_pages; |
55c510e2 SC |
1237 | /* |
1238 | * A list of kvm_mmu_page structs that, if zapped, could possibly be | |
1239 | * replaced by an NX huge page. A shadow page is on this list if its | |
1240 | * existence disallows an NX huge page (nx_huge_page_disallowed is set) | |
1241 | * and there are no other conditions that prevent a huge page, e.g. | |
1242 | * the backing host page is huge, dirtly logging is not enabled for its | |
1243 | * memslot, etc... Note, zapping shadow pages on this list doesn't | |
1244 | * guarantee an NX huge page will be created in its stead, e.g. if the | |
1245 | * guest attempts to execute from the region then KVM obviously can't | |
1246 | * create an NX huge page (without hanging the guest). | |
1247 | */ | |
1248 | struct list_head possible_nx_huge_pages; | |
13d268ca | 1249 | struct kvm_page_track_notifier_node mmu_sp_tracker; |
0eb05bf2 | 1250 | struct kvm_page_track_notifier_head track_notifier_head; |
ce25681d SC |
1251 | /* |
1252 | * Protects marking pages unsync during page faults, as TDP MMU page | |
1253 | * faults only take mmu_lock for read. For simplicity, the unsync | |
1254 | * pages lock is always taken when marking pages unsync regardless of | |
1255 | * whether mmu_lock is held for read or write. | |
1256 | */ | |
1257 | spinlock_t mmu_unsync_pages_lock; | |
365c8868 | 1258 | |
4d5c5d0f | 1259 | struct list_head assigned_dev_head; |
19de40a8 | 1260 | struct iommu_domain *iommu_domain; |
d96eb2c6 | 1261 | bool iommu_noncoherent; |
e0f0bbc5 AW |
1262 | #define __KVM_HAVE_ARCH_NONCOHERENT_DMA |
1263 | atomic_t noncoherent_dma_count; | |
5544eb9b PB |
1264 | #define __KVM_HAVE_ARCH_ASSIGNED_DEVICE |
1265 | atomic_t assigned_device_count; | |
d7deeeb0 ZX |
1266 | struct kvm_pic *vpic; |
1267 | struct kvm_ioapic *vioapic; | |
7837699f | 1268 | struct kvm_pit *vpit; |
42720138 | 1269 | atomic_t vapics_in_nmi_mode; |
1e08ec4a | 1270 | struct mutex apic_map_lock; |
6fcd9cbc | 1271 | struct kvm_apic_map __rcu *apic_map; |
44d52717 | 1272 | atomic_t apic_map_dirty; |
bfc6d222 | 1273 | |
a01b45e9 | 1274 | bool apic_access_memslot_enabled; |
2008fab3 SC |
1275 | bool apic_access_memslot_inhibited; |
1276 | ||
1277 | /* Protects apicv_inhibit_reasons */ | |
1278 | struct rw_semaphore apicv_update_lock; | |
4e19c36f | 1279 | unsigned long apicv_inhibit_reasons; |
18068523 GOC |
1280 | |
1281 | gpa_t wall_clock; | |
b7ebfb05 | 1282 | |
4d5422ce | 1283 | bool mwait_in_guest; |
caa057a2 | 1284 | bool hlt_in_guest; |
b31c114b | 1285 | bool pause_in_guest; |
b5170063 | 1286 | bool cstate_in_guest; |
4d5422ce | 1287 | |
5550af4d | 1288 | unsigned long irq_sources_bitmap; |
afbcf7ab | 1289 | s64 kvmclock_offset; |
869b4421 PB |
1290 | |
1291 | /* | |
1292 | * This also protects nr_vcpus_matched_tsc which is read from a | |
1293 | * preemption-disabled region, so it must be a raw spinlock. | |
1294 | */ | |
038f8c11 | 1295 | raw_spinlock_t tsc_write_lock; |
f38e098f | 1296 | u64 last_tsc_nsec; |
f38e098f | 1297 | u64 last_tsc_write; |
5d3cb0f6 | 1298 | u32 last_tsc_khz; |
828ca896 | 1299 | u64 last_tsc_offset; |
e26101b1 ZA |
1300 | u64 cur_tsc_nsec; |
1301 | u64 cur_tsc_write; | |
1302 | u64 cur_tsc_offset; | |
0d3da0d2 | 1303 | u64 cur_tsc_generation; |
b48aa97e | 1304 | int nr_vcpus_matched_tsc; |
ffde22ac | 1305 | |
ffbb61d0 DW |
1306 | u32 default_tsc_khz; |
1307 | ||
869b4421 | 1308 | seqcount_raw_spinlock_t pvclock_sc; |
d828199e MT |
1309 | bool use_master_clock; |
1310 | u64 master_kernel_ns; | |
a5a1d1c2 | 1311 | u64 master_cycle_now; |
7e44e449 | 1312 | struct delayed_work kvmclock_update_work; |
332967a3 | 1313 | struct delayed_work kvmclock_sync_work; |
d828199e | 1314 | |
ffde22ac | 1315 | struct kvm_xen_hvm_config xen_hvm_config; |
55cd8e5a | 1316 | |
6ef768fa PB |
1317 | /* reads protected by irq_srcu, writes by irq_lock */ |
1318 | struct hlist_head mask_notifier_list; | |
1319 | ||
e83d5887 | 1320 | struct kvm_hv hyperv; |
a3833b81 | 1321 | struct kvm_xen xen; |
b034cf01 | 1322 | |
a826faf1 | 1323 | bool backwards_tsc_observed; |
54750f2c | 1324 | bool boot_vcpu_runs_old_kvmclock; |
d71ba788 | 1325 | u32 bsp_vcpu_id; |
90de4a18 NA |
1326 | |
1327 | u64 disabled_quirks; | |
49df6397 | 1328 | |
49776faf | 1329 | enum kvm_irqchip_mode irqchip_mode; |
b053b2ae | 1330 | u8 nr_reserved_ioapic_pins; |
52004014 FW |
1331 | |
1332 | bool disabled_lapic_found; | |
44a95dae | 1333 | |
37131313 | 1334 | bool x2apic_format; |
c519265f | 1335 | bool x2apic_broadcast_quirk_disabled; |
6fbbde9a DS |
1336 | |
1337 | bool guest_can_read_msr_platform_info; | |
59073aaf | 1338 | bool exception_payload_enabled; |
66bb8a06 | 1339 | |
ed235117 CQ |
1340 | bool triple_fault_event; |
1341 | ||
b318e8de | 1342 | bool bus_lock_detection_enabled; |
ba7bb663 | 1343 | bool enable_pmu; |
2f4073e0 TX |
1344 | |
1345 | u32 notify_window; | |
1346 | u32 notify_vmexit_flags; | |
19238e75 AL |
1347 | /* |
1348 | * If exit_on_emulation_error is set, and the in-kernel instruction | |
1349 | * emulator fails to emulate an instruction, allow userspace | |
1350 | * the opportunity to look at it. | |
1351 | */ | |
1352 | bool exit_on_emulation_error; | |
b318e8de | 1353 | |
1ae09954 AG |
1354 | /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ |
1355 | u32 user_space_msr_mask; | |
b318e8de | 1356 | struct kvm_x86_msr_filter __rcu *msr_filter; |
fe6b6bc8 | 1357 | |
0dbb1123 AK |
1358 | u32 hypercall_exit_enabled; |
1359 | ||
70210c04 SC |
1360 | /* Guest can access the SGX PROVISIONKEY. */ |
1361 | bool sgx_provisioning_allowed; | |
1362 | ||
14329b82 | 1363 | struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter; |
55c510e2 | 1364 | struct task_struct *nx_huge_page_recovery_thread; |
fe5db27d | 1365 | |
897218ff | 1366 | #ifdef CONFIG_X86_64 |
d25ceb92 SC |
1367 | /* The number of TDP MMU pages across all roots. */ |
1368 | atomic64_t tdp_mmu_pages; | |
1369 | ||
c0dba6e4 | 1370 | /* |
1f98f2bd DM |
1371 | * List of struct kvm_mmu_pages being used as roots. |
1372 | * All struct kvm_mmu_pages in the list should have | |
c0dba6e4 | 1373 | * tdp_mmu_page set. |
c0e64238 BG |
1374 | * |
1375 | * For reads, this list is protected by: | |
1376 | * the MMU lock in read mode + RCU or | |
1377 | * the MMU lock in write mode | |
1378 | * | |
1379 | * For writes, this list is protected by: | |
1380 | * the MMU lock in read mode + the tdp_mmu_pages_lock or | |
1381 | * the MMU lock in write mode | |
1382 | * | |
1383 | * Roots will remain in the list until their tdp_mmu_root_count | |
1384 | * drops to zero, at which point the thread that decremented the | |
1385 | * count to zero should removed the root from the list and clean | |
1386 | * it up, freeing the root after an RCU grace period. | |
c0dba6e4 | 1387 | */ |
02c00b3a | 1388 | struct list_head tdp_mmu_roots; |
c0dba6e4 | 1389 | |
9a77daac BG |
1390 | /* |
1391 | * Protects accesses to the following fields when the MMU lock | |
1392 | * is held in read mode: | |
c0e64238 | 1393 | * - tdp_mmu_roots (above) |
f96c48e9 | 1394 | * - the link field of kvm_mmu_page structs used by the TDP MMU |
55c510e2 SC |
1395 | * - possible_nx_huge_pages; |
1396 | * - the possible_nx_huge_page_link field of kvm_mmu_page structs used | |
9a77daac BG |
1397 | * by the TDP MMU |
1398 | * It is acceptable, but not necessary, to acquire this lock when | |
1399 | * the thread holds the MMU lock in write mode. | |
1400 | */ | |
1401 | spinlock_t tdp_mmu_pages_lock; | |
22b94c4b | 1402 | struct workqueue_struct *tdp_mmu_zap_wq; |
897218ff | 1403 | #endif /* CONFIG_X86_64 */ |
a2557408 BG |
1404 | |
1405 | /* | |
1e76a3ce DS |
1406 | * If set, at least one shadow root has been allocated. This flag |
1407 | * is used as one input when determining whether certain memslot | |
1408 | * related allocations are necessary. | |
a2557408 | 1409 | */ |
1e76a3ce | 1410 | bool shadow_root_allocated; |
3c86c0d3 VP |
1411 | |
1412 | #if IS_ENABLED(CONFIG_HYPERV) | |
1413 | hpa_t hv_root_tdp; | |
1414 | spinlock_t hv_root_tdp_lock; | |
1415 | #endif | |
35875316 ZG |
1416 | /* |
1417 | * VM-scope maximum vCPU ID. Used to determine the size of structures | |
1418 | * that increase along with the maximum vCPU ID, in which case, using | |
1419 | * the global KVM_MAX_VCPU_IDS may lead to significant memory waste. | |
1420 | */ | |
1421 | u32 max_vcpu_ids; | |
084cc29f BG |
1422 | |
1423 | bool disable_nx_huge_pages; | |
ada51a9d DM |
1424 | |
1425 | /* | |
1426 | * Memory caches used to allocate shadow pages when performing eager | |
1427 | * page splitting. No need for a shadowed_info_cache since eager page | |
1428 | * splitting only allocates direct shadow pages. | |
1429 | * | |
1430 | * Protected by kvm->slots_lock. | |
1431 | */ | |
1432 | struct kvm_mmu_memory_cache split_shadow_page_cache; | |
1433 | struct kvm_mmu_memory_cache split_page_header_cache; | |
1434 | ||
1435 | /* | |
1436 | * Memory cache used to allocate pte_list_desc structs while splitting | |
1437 | * huge pages. In the worst case, to split one huge page, 512 | |
1438 | * pte_list_desc structs are needed to add each lower level leaf sptep | |
1439 | * to the rmap plus 1 to extend the parent_ptes rmap of the lower level | |
1440 | * page table. | |
1441 | * | |
1442 | * Protected by kvm->slots_lock. | |
1443 | */ | |
1444 | #define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1) | |
1445 | struct kvm_mmu_memory_cache split_desc_cache; | |
d69fb81f ZX |
1446 | }; |
1447 | ||
0711456c | 1448 | struct kvm_vm_stat { |
0193cc90 | 1449 | struct kvm_vm_stat_generic generic; |
e3cb6fa0 PB |
1450 | u64 mmu_shadow_zapped; |
1451 | u64 mmu_pte_write; | |
1452 | u64 mmu_pde_zapped; | |
1453 | u64 mmu_flooded; | |
1454 | u64 mmu_recycled; | |
1455 | u64 mmu_cache_miss; | |
1456 | u64 mmu_unsync; | |
71f51d2c MZ |
1457 | union { |
1458 | struct { | |
1459 | atomic64_t pages_4k; | |
1460 | atomic64_t pages_2m; | |
1461 | atomic64_t pages_1g; | |
1462 | }; | |
1463 | atomic64_t pages[KVM_NR_PAGE_SIZES]; | |
1464 | }; | |
e3cb6fa0 PB |
1465 | u64 nx_lpage_splits; |
1466 | u64 max_mmu_page_hash_collisions; | |
ec1cf69c | 1467 | u64 max_mmu_rmap_size; |
0711456c ZX |
1468 | }; |
1469 | ||
77b4c255 | 1470 | struct kvm_vcpu_stat { |
0193cc90 | 1471 | struct kvm_vcpu_stat_generic generic; |
1075d41e | 1472 | u64 pf_taken; |
8a7e75d4 | 1473 | u64 pf_fixed; |
1075d41e SC |
1474 | u64 pf_emulate; |
1475 | u64 pf_spurious; | |
1476 | u64 pf_fast; | |
1477 | u64 pf_mmio_spte_created; | |
8a7e75d4 SJS |
1478 | u64 pf_guest; |
1479 | u64 tlb_flush; | |
1480 | u64 invlpg; | |
1481 | ||
1482 | u64 exits; | |
1483 | u64 io_exits; | |
1484 | u64 mmio_exits; | |
1485 | u64 signal_exits; | |
1486 | u64 irq_window_exits; | |
1487 | u64 nmi_window_exits; | |
c595ceee | 1488 | u64 l1d_flush; |
8a7e75d4 | 1489 | u64 halt_exits; |
8a7e75d4 SJS |
1490 | u64 request_irq_exits; |
1491 | u64 irq_exits; | |
1492 | u64 host_state_reload; | |
8a7e75d4 SJS |
1493 | u64 fpu_reload; |
1494 | u64 insn_emulation; | |
1495 | u64 insn_emulation_fail; | |
1496 | u64 hypercalls; | |
1497 | u64 irq_injections; | |
1498 | u64 nmi_injections; | |
0f1e261e | 1499 | u64 req_event; |
43c11d91 | 1500 | u64 nested_run; |
4a7132ef WL |
1501 | u64 directed_yield_attempted; |
1502 | u64 directed_yield_successful; | |
6cd88243 PB |
1503 | u64 preemption_reported; |
1504 | u64 preemption_other; | |
d5a0483f | 1505 | u64 guest_mode; |
2f4073e0 | 1506 | u64 notify_window_exits; |
77b4c255 | 1507 | }; |
ad312c7c | 1508 | |
8a76d7f2 JR |
1509 | struct x86_instruction_info; |
1510 | ||
8fe8ab46 WA |
1511 | struct msr_data { |
1512 | bool host_initiated; | |
1513 | u32 index; | |
1514 | u64 data; | |
1515 | }; | |
1516 | ||
cb5281a5 PB |
1517 | struct kvm_lapic_irq { |
1518 | u32 vector; | |
b7cb2231 PB |
1519 | u16 delivery_mode; |
1520 | u16 dest_mode; | |
1521 | bool level; | |
1522 | u16 trig_mode; | |
cb5281a5 PB |
1523 | u32 shorthand; |
1524 | u32 dest_id; | |
93bbf0b8 | 1525 | bool msi_redir_hint; |
cb5281a5 PB |
1526 | }; |
1527 | ||
c96001c5 PX |
1528 | static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical) |
1529 | { | |
1530 | return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL; | |
1531 | } | |
1532 | ||
ea4a5ff8 | 1533 | struct kvm_x86_ops { |
9dadfc4a SC |
1534 | const char *name; |
1535 | ||
d83420c2 SC |
1536 | int (*check_processor_compatibility)(void); |
1537 | ||
13a34e06 RK |
1538 | int (*hardware_enable)(void); |
1539 | void (*hardware_disable)(void); | |
6e4fd06f | 1540 | void (*hardware_unsetup)(void); |
5719455f | 1541 | bool (*has_emulated_msr)(struct kvm *kvm, u32 index); |
7c1b761b | 1542 | void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1543 | |
562b6b08 | 1544 | unsigned int vm_size; |
03543133 SS |
1545 | int (*vm_init)(struct kvm *kvm); |
1546 | void (*vm_destroy)(struct kvm *kvm); | |
1547 | ||
ea4a5ff8 | 1548 | /* Create, but do not attach this VCPU */ |
d588bb9b | 1549 | int (*vcpu_precreate)(struct kvm *kvm); |
987b2594 | 1550 | int (*vcpu_create)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1551 | void (*vcpu_free)(struct kvm_vcpu *vcpu); |
d28bc9dd | 1552 | void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event); |
ea4a5ff8 | 1553 | |
e27bc044 | 1554 | void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1555 | void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); |
1556 | void (*vcpu_put)(struct kvm_vcpu *vcpu); | |
ea4a5ff8 | 1557 | |
6986982f | 1558 | void (*update_exception_bitmap)(struct kvm_vcpu *vcpu); |
609e36d3 | 1559 | int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
8fe8ab46 | 1560 | int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr); |
ea4a5ff8 ZX |
1561 | u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg); |
1562 | void (*get_segment)(struct kvm_vcpu *vcpu, | |
1563 | struct kvm_segment *var, int seg); | |
2e4d2653 | 1564 | int (*get_cpl)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1565 | void (*set_segment)(struct kvm_vcpu *vcpu, |
1566 | struct kvm_segment *var, int seg); | |
1567 | void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); | |
ea4a5ff8 | 1568 | void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); |
405329fc | 1569 | void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); |
c2fe3cd4 SC |
1570 | bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0); |
1571 | void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); | |
72f211ec | 1572 | int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); |
89a27f4d GN |
1573 | void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
1574 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | |
1575 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | |
1576 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | |
c77fb5fe | 1577 | void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); |
020df079 | 1578 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
5fdbf976 | 1579 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
ea4a5ff8 ZX |
1580 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
1581 | void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); | |
c5063551 | 1582 | bool (*get_if_flag)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1583 | |
e27bc044 SC |
1584 | void (*flush_tlb_all)(struct kvm_vcpu *vcpu); |
1585 | void (*flush_tlb_current)(struct kvm_vcpu *vcpu); | |
8a1300ff | 1586 | int (*flush_remote_tlbs)(struct kvm *kvm); |
9ed3bf41 SC |
1587 | int (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn, |
1588 | gfn_t nr_pages); | |
ea4a5ff8 | 1589 | |
faff8758 JS |
1590 | /* |
1591 | * Flush any TLB entries associated with the given GVA. | |
1592 | * Does not need to flush GPA->HPA mappings. | |
1593 | * Can potentially get non-canonical addresses through INVLPGs, which | |
1594 | * the implementation may choose to ignore if appropriate. | |
1595 | */ | |
e27bc044 | 1596 | void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr); |
ea4a5ff8 | 1597 | |
e64419d9 SC |
1598 | /* |
1599 | * Flush any TLB entries created by the guest. Like tlb_flush_gva(), | |
1600 | * does not need to flush GPA->HPA mappings. | |
1601 | */ | |
e27bc044 | 1602 | void (*flush_tlb_guest)(struct kvm_vcpu *vcpu); |
e64419d9 | 1603 | |
fc4fad79 | 1604 | int (*vcpu_pre_run)(struct kvm_vcpu *vcpu); |
e27bc044 | 1605 | enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu); |
1e9e2622 WL |
1606 | int (*handle_exit)(struct kvm_vcpu *vcpu, |
1607 | enum exit_fastpath_completion exit_fastpath); | |
f8ea7c60 | 1608 | int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); |
5ef8acbd | 1609 | void (*update_emulated_instruction)(struct kvm_vcpu *vcpu); |
2809f5d2 | 1610 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); |
37ccdcbe | 1611 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1612 | void (*patch_hypercall)(struct kvm_vcpu *vcpu, |
1613 | unsigned char *hypercall_addr); | |
2d613912 | 1614 | void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected); |
e27bc044 | 1615 | void (*inject_nmi)(struct kvm_vcpu *vcpu); |
6ad75c5c | 1616 | void (*inject_exception)(struct kvm_vcpu *vcpu); |
b463a6f7 | 1617 | void (*cancel_injection)(struct kvm_vcpu *vcpu); |
c9d40913 PB |
1618 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); |
1619 | int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); | |
3cfc3092 JK |
1620 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
1621 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | |
c9a7953f JK |
1622 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
1623 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); | |
95ba8273 | 1624 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
7491b7b2 | 1625 | bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason); |
b3f257a8 | 1626 | const unsigned long required_apicv_inhibits; |
2008fab3 | 1627 | bool allow_apicv_in_x2apic_without_x2apic_virtualization; |
d62caabb | 1628 | void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu); |
c7c9c56c | 1629 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); |
d39850f5 | 1630 | void (*hwapic_isr_update)(int isr); |
e6c67d8c | 1631 | bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); |
6308630b | 1632 | void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); |
8d860bbe | 1633 | void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); |
a4148b7c | 1634 | void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); |
57dfd7b5 SC |
1635 | void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode, |
1636 | int trig_mode, int vector); | |
76dfafd5 | 1637 | int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); |
ea4a5ff8 | 1638 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
2ac52ab8 | 1639 | int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr); |
ba28401b | 1640 | u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
344f414f | 1641 | |
e83bc09c SC |
1642 | void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa, |
1643 | int root_level); | |
727a7e27 | 1644 | |
f5f48ee1 SY |
1645 | bool (*has_wbinvd_exit)(void); |
1646 | ||
307a94c7 IS |
1647 | u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu); |
1648 | u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu); | |
edcfe540 | 1649 | void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); |
1ab9287a | 1650 | void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier); |
99e3e30a | 1651 | |
235ba74f | 1652 | /* |
0a62a031 DE |
1653 | * Retrieve somewhat arbitrary exit information. Intended to |
1654 | * be used only from within tracepoints or error paths. | |
235ba74f | 1655 | */ |
0a62a031 DE |
1656 | void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason, |
1657 | u64 *info1, u64 *info2, | |
235ba74f | 1658 | u32 *exit_int_info, u32 *exit_int_info_err_code); |
8a76d7f2 JR |
1659 | |
1660 | int (*check_intercept)(struct kvm_vcpu *vcpu, | |
1661 | struct x86_instruction_info *info, | |
21f1b8f2 SC |
1662 | enum x86_intercept_stage stage, |
1663 | struct x86_exception *exception); | |
a9ab13ff | 1664 | void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); |
7f5581f5 | 1665 | |
d264ee0c | 1666 | void (*request_immediate_exit)(struct kvm_vcpu *vcpu); |
ae97a3b8 RK |
1667 | |
1668 | void (*sched_in)(struct kvm_vcpu *kvm, int cpu); | |
88178fd4 KH |
1669 | |
1670 | /* | |
a018eba5 SC |
1671 | * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero |
1672 | * value indicates CPU dirty logging is unsupported or disabled. | |
88178fd4 | 1673 | */ |
6dd03800 | 1674 | int cpu_dirty_log_size; |
a85863c2 | 1675 | void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu); |
bab4165e | 1676 | |
33b22172 | 1677 | const struct kvm_x86_nested_ops *nested_ops; |
efc64404 | 1678 | |
d1ed092f SS |
1679 | void (*vcpu_blocking)(struct kvm_vcpu *vcpu); |
1680 | void (*vcpu_unblocking)(struct kvm_vcpu *vcpu); | |
1681 | ||
e27bc044 | 1682 | int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq, |
efc64404 | 1683 | uint32_t guest_irq, bool set); |
e27bc044 | 1684 | void (*pi_start_assignment)(struct kvm *kvm); |
be8ca170 | 1685 | void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu); |
17e433b5 | 1686 | bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu); |
ce7a058a | 1687 | |
f9927982 SC |
1688 | int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, |
1689 | bool *expired); | |
ce7a058a | 1690 | void (*cancel_hv_timer)(struct kvm_vcpu *vcpu); |
c45dcc71 AR |
1691 | |
1692 | void (*setup_mce)(struct kvm_vcpu *vcpu); | |
0234bf88 | 1693 | |
31e83e21 | 1694 | #ifdef CONFIG_KVM_SMM |
c9d40913 | 1695 | int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); |
58c1d206 ML |
1696 | int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram); |
1697 | int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram); | |
c9d40913 | 1698 | void (*enable_smi_window)(struct kvm_vcpu *vcpu); |
31e83e21 | 1699 | #endif |
5acc5c06 | 1700 | |
03d004cd SC |
1701 | int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); |
1702 | int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); | |
1703 | int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp); | |
54526d1f | 1704 | int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd); |
b5663931 | 1705 | int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd); |
683412cc | 1706 | void (*guest_memory_reclaimed)(struct kvm *kvm); |
801e459a TL |
1707 | |
1708 | int (*get_msr_feature)(struct kvm_msr_entry *entry); | |
57b119da | 1709 | |
4d31d9ef SC |
1710 | bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type, |
1711 | void *insn, int insn_len); | |
4b9852f4 LA |
1712 | |
1713 | bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); | |
b83237ad | 1714 | int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); |
93dff2fe JM |
1715 | |
1716 | void (*migrate_timers)(struct kvm_vcpu *vcpu); | |
51de8151 | 1717 | void (*msr_filter_changed)(struct kvm_vcpu *vcpu); |
f9a4d621 | 1718 | int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); |
647daca2 TL |
1719 | |
1720 | void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); | |
d5fa597e ML |
1721 | |
1722 | /* | |
1723 | * Returns vCPU specific APICv inhibit reasons | |
1724 | */ | |
1725 | unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu); | |
ea4a5ff8 ZX |
1726 | }; |
1727 | ||
33b22172 | 1728 | struct kvm_x86_nested_ops { |
f7e57078 | 1729 | void (*leave_nested)(struct kvm_vcpu *vcpu); |
7709aba8 SC |
1730 | bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector, |
1731 | u32 error_code); | |
33b22172 | 1732 | int (*check_events)(struct kvm_vcpu *vcpu); |
5b4ac1a1 | 1733 | bool (*has_events)(struct kvm_vcpu *vcpu); |
cb6a32c2 | 1734 | void (*triple_fault)(struct kvm_vcpu *vcpu); |
33b22172 PB |
1735 | int (*get_state)(struct kvm_vcpu *vcpu, |
1736 | struct kvm_nested_state __user *user_kvm_nested_state, | |
1737 | unsigned user_data_size); | |
1738 | int (*set_state)(struct kvm_vcpu *vcpu, | |
1739 | struct kvm_nested_state __user *user_kvm_nested_state, | |
1740 | struct kvm_nested_state *kvm_state); | |
729c15c2 | 1741 | bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu); |
02f5fb2e | 1742 | int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa); |
33b22172 PB |
1743 | |
1744 | int (*enable_evmcs)(struct kvm_vcpu *vcpu, | |
1745 | uint16_t *vmcs_version); | |
1746 | uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); | |
b0c9c25e | 1747 | void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu); |
ea4a5ff8 ZX |
1748 | }; |
1749 | ||
d008dfdb | 1750 | struct kvm_x86_init_ops { |
d008dfdb | 1751 | int (*hardware_setup)(void); |
33271a9e | 1752 | unsigned int (*handle_intel_pt_intr)(void); |
d008dfdb SC |
1753 | |
1754 | struct kvm_x86_ops *runtime_ops; | |
34886e79 | 1755 | struct kvm_pmu_ops *pmu_ops; |
d008dfdb SC |
1756 | }; |
1757 | ||
af585b92 | 1758 | struct kvm_arch_async_pf { |
7c90705b | 1759 | u32 token; |
af585b92 | 1760 | gfn_t gfn; |
fb67e14f | 1761 | unsigned long cr3; |
c4806acd | 1762 | bool direct_map; |
af585b92 GN |
1763 | }; |
1764 | ||
9cc39a5a | 1765 | extern u32 __read_mostly kvm_nr_uret_msrs; |
91661989 | 1766 | extern u64 __read_mostly host_efer; |
3edd6839 | 1767 | extern bool __read_mostly allow_smaller_maxphyaddr; |
fdf513e3 | 1768 | extern bool __read_mostly enable_apicv; |
afaf0b2f | 1769 | extern struct kvm_x86_ops kvm_x86_ops; |
97896d04 | 1770 | |
9af5471b JB |
1771 | #define KVM_X86_OP(func) \ |
1772 | DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func)); | |
e4fc23ba | 1773 | #define KVM_X86_OP_OPTIONAL KVM_X86_OP |
5be2226f | 1774 | #define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP |
9af5471b JB |
1775 | #include <asm/kvm-x86-ops.h> |
1776 | ||
4f8396b9 SC |
1777 | int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops); |
1778 | void kvm_x86_vendor_exit(void); | |
1779 | ||
434a1e94 SC |
1780 | #define __KVM_HAVE_ARCH_VM_ALLOC |
1781 | static inline struct kvm *kvm_arch_alloc_vm(void) | |
1782 | { | |
88dca4ca | 1783 | return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); |
434a1e94 | 1784 | } |
78b497f2 JG |
1785 | |
1786 | #define __KVM_HAVE_ARCH_VM_FREE | |
562b6b08 | 1787 | void kvm_arch_free_vm(struct kvm *kvm); |
434a1e94 | 1788 | |
b08660e5 TL |
1789 | #define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB |
1790 | static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm) | |
1791 | { | |
8a1300ff SC |
1792 | if (kvm_x86_ops.flush_remote_tlbs && |
1793 | !static_call(kvm_x86_flush_remote_tlbs)(kvm)) | |
b08660e5 TL |
1794 | return 0; |
1795 | else | |
1796 | return -ENOTSUPP; | |
1797 | } | |
1798 | ||
e1bfc245 SC |
1799 | #define kvm_arch_pmi_in_guest(vcpu) \ |
1800 | ((vcpu) && (vcpu)->arch.handling_intr_from_guest) | |
1801 | ||
982bae43 | 1802 | void __init kvm_mmu_x86_module_init(void); |
1d0e8480 SC |
1803 | int kvm_mmu_vendor_module_init(void); |
1804 | void kvm_mmu_vendor_module_exit(void); | |
54f1585a ZX |
1805 | |
1806 | void kvm_mmu_destroy(struct kvm_vcpu *vcpu); | |
1807 | int kvm_mmu_create(struct kvm_vcpu *vcpu); | |
a1a39128 | 1808 | int kvm_mmu_init_vm(struct kvm *kvm); |
13d268ca | 1809 | void kvm_mmu_uninit_vm(struct kvm *kvm); |
54f1585a | 1810 | |
49c6f875 | 1811 | void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu); |
8a3c1a33 | 1812 | void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
1c91cad4 | 1813 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, |
269e9552 | 1814 | const struct kvm_memory_slot *memslot, |
3c9bd400 | 1815 | int start_level); |
a3fe5dbd DM |
1816 | void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm, |
1817 | const struct kvm_memory_slot *memslot, | |
1818 | int target_level); | |
cb00a70b DM |
1819 | void kvm_mmu_try_split_huge_pages(struct kvm *kvm, |
1820 | const struct kvm_memory_slot *memslot, | |
1821 | u64 start, u64 end, | |
1822 | int target_level); | |
3ea3b7fa | 1823 | void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm, |
f36f3f28 | 1824 | const struct kvm_memory_slot *memslot); |
f4b4b180 | 1825 | void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm, |
269e9552 | 1826 | const struct kvm_memory_slot *memslot); |
54f1585a | 1827 | void kvm_mmu_zap_all(struct kvm *kvm); |
15248258 | 1828 | void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); |
bc8a3d89 | 1829 | void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages); |
54f1585a | 1830 | |
2df4a5eb | 1831 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); |
cc4b6871 | 1832 | |
3200f405 | 1833 | int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, |
9f811285 | 1834 | const void *val, int bytes); |
2f333bcb | 1835 | |
6ef768fa PB |
1836 | struct kvm_irq_mask_notifier { |
1837 | void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked); | |
1838 | int irq; | |
1839 | struct hlist_node link; | |
1840 | }; | |
1841 | ||
1842 | void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq, | |
1843 | struct kvm_irq_mask_notifier *kimn); | |
1844 | void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, | |
1845 | struct kvm_irq_mask_notifier *kimn); | |
1846 | void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, | |
1847 | bool mask); | |
1848 | ||
2f333bcb | 1849 | extern bool tdp_enabled; |
9f811285 | 1850 | |
a3e06bbe LJ |
1851 | u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu); |
1852 | ||
41577ab8 SC |
1853 | /* |
1854 | * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing | |
1855 | * userspace I/O) to indicate that the emulation context | |
d9f6e12f | 1856 | * should be reused as is, i.e. skip initialization of |
41577ab8 SC |
1857 | * emulation context, instruction fetch and decode. |
1858 | * | |
1859 | * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware. | |
1860 | * Indicates that only select instructions (tagged with | |
1861 | * EmulateOnUD) should be emulated (to minimize the emulator | |
1862 | * attack surface). See also EMULTYPE_TRAP_UD_FORCED. | |
1863 | * | |
1864 | * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to | |
1865 | * decode the instruction length. For use *only* by | |
906fa904 HW |
1866 | * kvm_x86_ops.skip_emulated_instruction() implementations if |
1867 | * EMULTYPE_COMPLETE_USER_EXIT is not set. | |
41577ab8 | 1868 | * |
92daa48b SC |
1869 | * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to |
1870 | * retry native execution under certain conditions, | |
1871 | * Can only be set in conjunction with EMULTYPE_PF. | |
41577ab8 SC |
1872 | * |
1873 | * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was | |
1874 | * triggered by KVM's magic "force emulation" prefix, | |
1875 | * which is opt in via module param (off by default). | |
1876 | * Bypasses EmulateOnUD restriction despite emulating | |
1877 | * due to an intercepted #UD (see EMULTYPE_TRAP_UD). | |
1878 | * Used to test the full emulator from userspace. | |
1879 | * | |
1880 | * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware | |
1881 | * backdoor emulation, which is opt in via module param. | |
d9f6e12f | 1882 | * VMware backdoor emulation handles select instructions |
41577ab8 | 1883 | * and reinjects the #GP for all other cases. |
92daa48b SC |
1884 | * |
1885 | * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which | |
1886 | * case the CR2/GPA value pass on the stack is valid. | |
906fa904 HW |
1887 | * |
1888 | * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility | |
1889 | * state and inject single-step #DBs after skipping | |
1890 | * an instruction (after completing userspace I/O). | |
258d985f SC |
1891 | * |
1892 | * EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that | |
1893 | * is attempting to write a gfn that contains one or | |
1894 | * more of the PTEs used to translate the write itself, | |
1895 | * and the owning page table is being shadowed by KVM. | |
1896 | * If emulation of the faulting instruction fails and | |
1897 | * this flag is set, KVM will exit to userspace instead | |
1898 | * of retrying emulation as KVM cannot make forward | |
1899 | * progress. | |
1900 | * | |
1901 | * If emulation fails for a write to guest page tables, | |
1902 | * KVM unprotects (zaps) the shadow page for the target | |
1903 | * gfn and resumes the guest to retry the non-emulatable | |
1904 | * instruction (on hardware). Unprotecting the gfn | |
1905 | * doesn't allow forward progress for a self-changing | |
1906 | * access because doing so also zaps the translation for | |
1907 | * the gfn, i.e. retrying the instruction will hit a | |
1908 | * !PRESENT fault, which results in a new shadow page | |
1909 | * and sends KVM back to square one. | |
41577ab8 | 1910 | */ |
571008da SY |
1911 | #define EMULTYPE_NO_DECODE (1 << 0) |
1912 | #define EMULTYPE_TRAP_UD (1 << 1) | |
ba8afb6b | 1913 | #define EMULTYPE_SKIP (1 << 2) |
92daa48b | 1914 | #define EMULTYPE_ALLOW_RETRY_PF (1 << 3) |
b4000606 | 1915 | #define EMULTYPE_TRAP_UD_FORCED (1 << 4) |
42cbf068 | 1916 | #define EMULTYPE_VMWARE_GP (1 << 5) |
92daa48b | 1917 | #define EMULTYPE_PF (1 << 6) |
906fa904 | 1918 | #define EMULTYPE_COMPLETE_USER_EXIT (1 << 7) |
258d985f | 1919 | #define EMULTYPE_WRITE_PF_TO_SP (1 << 8) |
92daa48b | 1920 | |
c60658d1 SC |
1921 | int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type); |
1922 | int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, | |
1923 | void *insn, int insn_len); | |
e615e355 DE |
1924 | void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu, |
1925 | u64 *data, u8 ndata); | |
1926 | void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu); | |
35be0ade | 1927 | |
f2b4b7dd | 1928 | void kvm_enable_efer_bits(u64); |
384bb783 | 1929 | bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer); |
edef5c36 | 1930 | int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated); |
f20935d8 SC |
1931 | int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data); |
1932 | int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data); | |
1edce0a9 SC |
1933 | int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu); |
1934 | int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu); | |
5ff3a351 SC |
1935 | int kvm_emulate_as_nop(struct kvm_vcpu *vcpu); |
1936 | int kvm_emulate_invd(struct kvm_vcpu *vcpu); | |
1937 | int kvm_emulate_mwait(struct kvm_vcpu *vcpu); | |
1938 | int kvm_handle_invalid_op(struct kvm_vcpu *vcpu); | |
1939 | int kvm_emulate_monitor(struct kvm_vcpu *vcpu); | |
54f1585a | 1940 | |
dca7f128 | 1941 | int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in); |
6a908b62 | 1942 | int kvm_emulate_cpuid(struct kvm_vcpu *vcpu); |
54f1585a | 1943 | int kvm_emulate_halt(struct kvm_vcpu *vcpu); |
1460179d | 1944 | int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu); |
647daca2 | 1945 | int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); |
f5f48ee1 | 1946 | int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); |
54f1585a | 1947 | |
3e6e0aab | 1948 | void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
c53da4f3 | 1949 | void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); |
c697518a | 1950 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); |
2b4a273b | 1951 | void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); |
3e6e0aab | 1952 | |
7f3d35fd KW |
1953 | int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, |
1954 | int reason, bool has_error_code, u32 error_code); | |
37817f29 | 1955 | |
f27ad38a | 1956 | void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0); |
5b51cb13 | 1957 | void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4); |
49a9b07e | 1958 | int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); |
2390218b | 1959 | int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3); |
a83b29c6 | 1960 | int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); |
eea1cff9 | 1961 | int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8); |
020df079 | 1962 | int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val); |
29d6ca41 | 1963 | void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val); |
2d3ad1f4 AK |
1964 | unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu); |
1965 | void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw); | |
92f9895c | 1966 | int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu); |
54f1585a | 1967 | |
609e36d3 | 1968 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); |
8fe8ab46 | 1969 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr); |
54f1585a | 1970 | |
91586a3b JK |
1971 | unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu); |
1972 | void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); | |
c483c454 | 1973 | int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu); |
91586a3b | 1974 | |
298101da AK |
1975 | void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
1976 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | |
4d5523cf | 1977 | void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload); |
ce7ddec4 JR |
1978 | void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); |
1979 | void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); | |
6389ee94 | 1980 | void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); |
7709aba8 | 1981 | void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, |
53b3d8e9 | 1982 | struct x86_exception *fault); |
0a79b009 | 1983 | bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); |
16f8a6f9 | 1984 | bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr); |
298101da | 1985 | |
1a577b72 MT |
1986 | static inline int __kvm_irq_line_state(unsigned long *irq_state, |
1987 | int irq_source_id, int level) | |
1988 | { | |
1989 | /* Logical OR for level trig interrupt */ | |
1990 | if (level) | |
1991 | __set_bit(irq_source_id, irq_state); | |
1992 | else | |
1993 | __clear_bit(irq_source_id, irq_state); | |
1994 | ||
1995 | return !!(*irq_state); | |
1996 | } | |
1997 | ||
1998 | int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level); | |
1999 | void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); | |
3de42dc0 | 2000 | |
3419ffc8 SY |
2001 | void kvm_inject_nmi(struct kvm_vcpu *vcpu); |
2002 | ||
7c86663b PB |
2003 | void kvm_update_dr7(struct kvm_vcpu *vcpu); |
2004 | ||
1cb3f3ae | 2005 | int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); |
0c1c92f1 | 2006 | void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, |
6a82cd1c | 2007 | ulong roots_to_free); |
0c1c92f1 | 2008 | void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); |
ab9ae313 AK |
2009 | gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, |
2010 | struct x86_exception *exception); | |
ab9ae313 AK |
2011 | gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, |
2012 | struct x86_exception *exception); | |
2013 | gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, | |
2014 | struct x86_exception *exception); | |
54f1585a | 2015 | |
4e19c36f | 2016 | bool kvm_apicv_activated(struct kvm *kvm); |
d5fa597e | 2017 | bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu); |
2008fab3 | 2018 | void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu); |
320af55a SC |
2019 | void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, |
2020 | enum kvm_apicv_inhibit reason, bool set); | |
2021 | void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm, | |
2022 | enum kvm_apicv_inhibit reason, bool set); | |
2023 | ||
2024 | static inline void kvm_set_apicv_inhibit(struct kvm *kvm, | |
2025 | enum kvm_apicv_inhibit reason) | |
2026 | { | |
2027 | kvm_set_or_clear_apicv_inhibit(kvm, reason, true); | |
2028 | } | |
d62caabb | 2029 | |
320af55a SC |
2030 | static inline void kvm_clear_apicv_inhibit(struct kvm *kvm, |
2031 | enum kvm_apicv_inhibit reason) | |
2032 | { | |
2033 | kvm_set_or_clear_apicv_inhibit(kvm, reason, false); | |
2034 | } | |
b0a1637f | 2035 | |
54f1585a ZX |
2036 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
2037 | ||
736c291c | 2038 | int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, |
dc25e89e | 2039 | void *insn, int insn_len); |
a7052897 | 2040 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); |
753b43c9 | 2041 | void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, |
cd42853e | 2042 | u64 addr, unsigned long roots); |
eb4b248e | 2043 | void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); |
b5129100 | 2044 | void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd); |
34c16eec | 2045 | |
746700d2 WH |
2046 | void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, |
2047 | int tdp_max_root_level, int tdp_huge_page_level); | |
18552672 | 2048 | |
d6e88aec | 2049 | static inline u16 kvm_read_ldt(void) |
ec6d273d ZX |
2050 | { |
2051 | u16 ldt; | |
2052 | asm("sldt %0" : "=g"(ldt)); | |
2053 | return ldt; | |
2054 | } | |
2055 | ||
d6e88aec | 2056 | static inline void kvm_load_ldt(u16 sel) |
ec6d273d ZX |
2057 | { |
2058 | asm("lldt %0" : : "rm"(sel)); | |
2059 | } | |
ec6d273d | 2060 | |
ec6d273d ZX |
2061 | #ifdef CONFIG_X86_64 |
2062 | static inline unsigned long read_msr(unsigned long msr) | |
2063 | { | |
2064 | u64 value; | |
2065 | ||
2066 | rdmsrl(msr, value); | |
2067 | return value; | |
2068 | } | |
2069 | #endif | |
2070 | ||
c1a5d4f9 AK |
2071 | static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) |
2072 | { | |
2073 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | |
2074 | } | |
2075 | ||
ec6d273d ZX |
2076 | #define TSS_IOPB_BASE_OFFSET 0x66 |
2077 | #define TSS_BASE_SIZE 0x68 | |
2078 | #define TSS_IOPB_SIZE (65536 / 8) | |
2079 | #define TSS_REDIRECTION_SIZE (256 / 8) | |
7d76b4d3 JP |
2080 | #define RMODE_TSS_SIZE \ |
2081 | (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1) | |
53e0aa7b | 2082 | |
37817f29 IE |
2083 | enum { |
2084 | TASK_SWITCH_CALL = 0, | |
2085 | TASK_SWITCH_IRET = 1, | |
2086 | TASK_SWITCH_JMP = 2, | |
2087 | TASK_SWITCH_GATE = 3, | |
2088 | }; | |
2089 | ||
32e69f23 | 2090 | #define HF_GUEST_MASK (1 << 0) /* VCPU is in guest-mode */ |
a7662aa5 PB |
2091 | |
2092 | #ifdef CONFIG_KVM_SMM | |
32e69f23 ML |
2093 | #define HF_SMM_MASK (1 << 1) |
2094 | #define HF_SMM_INSIDE_NMI_MASK (1 << 2) | |
1371d904 | 2095 | |
ba97bb07 PB |
2096 | # define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE |
2097 | # define KVM_ADDRESS_SPACE_NUM 2 | |
2098 | # define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) | |
2099 | # define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) | |
2100 | #else | |
2101 | # define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0) | |
2102 | #endif | |
1371d904 | 2103 | |
e930bffe | 2104 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
5f7c292b | 2105 | |
c7c9c56c | 2106 | int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v); |
a1b37100 | 2107 | int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu); |
71cc849b | 2108 | int kvm_cpu_has_extint(struct kvm_vcpu *v); |
a1b37100 | 2109 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu); |
0b71785d | 2110 | int kvm_cpu_get_interrupt(struct kvm_vcpu *v); |
d28bc9dd | 2111 | void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event); |
e930bffe | 2112 | |
4180bf1b | 2113 | int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, |
bdf7ffc8 | 2114 | unsigned long ipi_bitmap_high, u32 min, |
4180bf1b WL |
2115 | unsigned long icr, int op_64_bit); |
2116 | ||
e5fda4bb | 2117 | int kvm_add_user_return_msr(u32 msr); |
8ea8b8d6 | 2118 | int kvm_find_user_return_msr(u32 msr); |
7e34fbd0 | 2119 | int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask); |
18863bdd | 2120 | |
61a05d44 SC |
2121 | static inline bool kvm_is_supported_user_return_msr(u32 msr) |
2122 | { | |
2123 | return kvm_find_user_return_msr(msr) >= 0; | |
2124 | } | |
2125 | ||
62711e5a | 2126 | u64 kvm_scale_tsc(u64 tsc, u64 ratio); |
4ba76538 | 2127 | u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc); |
83150f29 IS |
2128 | u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier); |
2129 | u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier); | |
35181e86 | 2130 | |
82b32774 | 2131 | unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu); |
f92653ee JK |
2132 | bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip); |
2133 | ||
2860c4b1 | 2134 | void kvm_make_scan_ioapic_request(struct kvm *kvm); |
7ee30bc1 NNL |
2135 | void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, |
2136 | unsigned long *vcpu_bitmap); | |
2860c4b1 | 2137 | |
2a18b7e7 | 2138 | bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, |
af585b92 GN |
2139 | struct kvm_async_pf *work); |
2140 | void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, | |
2141 | struct kvm_async_pf *work); | |
56028d08 GN |
2142 | void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, |
2143 | struct kvm_async_pf *work); | |
557a961a | 2144 | void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu); |
7c0ade6c | 2145 | bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); |
af585b92 GN |
2146 | extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); |
2147 | ||
6affcbed KH |
2148 | int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); |
2149 | int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err); | |
d264ee0c | 2150 | void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu); |
db8fcefa | 2151 | |
ff5a983c PX |
2152 | void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, |
2153 | u32 size); | |
d71ba788 PB |
2154 | bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); |
2155 | bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); | |
f5132b01 | 2156 | |
8feb4a04 FW |
2157 | bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq, |
2158 | struct kvm_vcpu **dest_vcpu); | |
2159 | ||
37131313 | 2160 | void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, |
d84f1e07 | 2161 | struct kvm_lapic_irq *irq); |
197a4f4b | 2162 | |
fdcf7562 AG |
2163 | static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) |
2164 | { | |
2165 | /* We can only post Fixed and LowPrio IRQs */ | |
637543a8 SS |
2166 | return (irq->delivery_mode == APIC_DM_FIXED || |
2167 | irq->delivery_mode == APIC_DM_LOWEST); | |
fdcf7562 AG |
2168 | } |
2169 | ||
d1ed092f SS |
2170 | static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) |
2171 | { | |
b3646477 | 2172 | static_call_cond(kvm_x86_vcpu_blocking)(vcpu); |
d1ed092f SS |
2173 | } |
2174 | ||
2175 | static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) | |
2176 | { | |
b3646477 | 2177 | static_call_cond(kvm_x86_vcpu_unblocking)(vcpu); |
d1ed092f SS |
2178 | } |
2179 | ||
7d669f50 SS |
2180 | static inline int kvm_cpu_get_apicid(int mps_cpu) |
2181 | { | |
2182 | #ifdef CONFIG_X86_LOCAL_APIC | |
64063505 | 2183 | return default_cpu_present_to_apicid(mps_cpu); |
7d669f50 SS |
2184 | #else |
2185 | WARN_ON_ONCE(1); | |
2186 | return BAD_APICID; | |
2187 | #endif | |
2188 | } | |
2189 | ||
1e76a3ce | 2190 | int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); |
d501f747 | 2191 | |
c68dc1b5 OU |
2192 | #define KVM_CLOCK_VALID_FLAGS \ |
2193 | (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC) | |
d501f747 | 2194 | |
6d849191 OU |
2195 | #define KVM_X86_VALID_QUIRKS \ |
2196 | (KVM_X86_QUIRK_LINT0_REENABLED | \ | |
2197 | KVM_X86_QUIRK_CD_NW_CLEARED | \ | |
2198 | KVM_X86_QUIRK_LAPIC_MMIO_HOLE | \ | |
2199 | KVM_X86_QUIRK_OUT_7E_INC_RIP | \ | |
f1a9761f | 2200 | KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \ |
bfbcc81b | 2201 | KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ |
43bb9e00 | 2202 | KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS) |
6d849191 | 2203 | |
1965aae3 | 2204 | #endif /* _ASM_X86_KVM_HOST_H */ |