KVM: x86/mmu: Move filling of Hyper-V's TLB range struct into Hyper-V code
[linux-block.git] / arch / x86 / include / asm / kvm_host.h
CommitLineData
20c8ccb1 1/* SPDX-License-Identifier: GPL-2.0-only */
a656c8ef 2/*
043405e1
CO
3 * Kernel-based Virtual Machine driver for Linux
4 *
5 * This header defines architecture specific interfaces, x86 version
043405e1
CO
6 */
7
1965aae3
PA
8#ifndef _ASM_X86_KVM_HOST_H
9#define _ASM_X86_KVM_HOST_H
043405e1 10
34c16eec
ZX
11#include <linux/types.h>
12#include <linux/mm.h>
e930bffe 13#include <linux/mmu_notifier.h>
229456fc 14#include <linux/tracepoint.h>
f5f48ee1 15#include <linux/cpumask.h>
f5132b01 16#include <linux/irq_work.h>
447ae316 17#include <linux/irq.h>
22b94c4b 18#include <linux/workqueue.h>
34c16eec
ZX
19
20#include <linux/kvm.h>
21#include <linux/kvm_para.h>
edf88417 22#include <linux/kvm_types.h>
f5132b01 23#include <linux/perf_event.h>
d828199e
MT
24#include <linux/pvclock_gtod.h>
25#include <linux/clocksource.h>
87276880 26#include <linux/irqbypass.h>
5c919412 27#include <linux/hyperv.h>
0823570f 28#include <linux/kfifo.h>
34c16eec 29
7d669f50 30#include <asm/apic.h>
50d0a0f9 31#include <asm/pvclock-abi.h>
e01a1b57 32#include <asm/desc.h>
0bed3b56 33#include <asm/mtrr.h>
9962d032 34#include <asm/msr-index.h>
3ee89722 35#include <asm/asm.h>
21ebbeda 36#include <asm/kvm_page_track.h>
95c7b77d 37#include <asm/kvm_vcpu_regs.h>
5a485803 38#include <asm/hyperv-tlfs.h>
e01a1b57 39
741cbbae
PB
40#define __KVM_HAVE_ARCH_VCPU_DEBUGFS
41
074c82c8 42#define KVM_MAX_VCPUS 1024
4ddacd52
EH
43
44/*
45 * In x86, the VCPU ID corresponds to the APIC ID, and APIC IDs
46 * might be larger than the actual number of VCPUs because the
47 * APIC ID encodes CPU topology information.
48 *
49 * In the worst case, we'll need less than one extra bit for the
50 * Core ID, and less than one extra bit for the Package (Die) ID,
51 * so ratio of 4 should be enough.
52 */
53#define KVM_VCPU_ID_RATIO 4
a1c42dde 54#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
4ddacd52 55
0743247f 56/* memory slots that are not exposed to userspace */
bdd1c37a 57#define KVM_INTERNAL_MEM_SLOTS 3
93a5cef0 58
b401ee0b 59#define KVM_HALT_POLL_NS_DEFAULT 200000
69a9f69b 60
8175e5b7
AG
61#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
62
3c9bd400
JZ
63#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
64 KVM_DIRTY_LOG_INITIALLY_SET)
65
fe6b6bc8
CQ
66#define KVM_BUS_LOCK_DETECTION_VALID_MODE (KVM_BUS_LOCK_DETECTION_OFF | \
67 KVM_BUS_LOCK_DETECTION_EXIT)
68
2f4073e0
TX
69#define KVM_X86_NOTIFY_VMEXIT_VALID_BITS (KVM_X86_NOTIFY_VMEXIT_ENABLED | \
70 KVM_X86_NOTIFY_VMEXIT_USER)
71
2860c4b1 72/* x86-specific vcpu->requests bit members */
2387149e
AJ
73#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0)
74#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1)
75#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2)
76#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3)
77#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4)
727a7e27 78#define KVM_REQ_LOAD_MMU_PGD KVM_ARCH_REQ(5)
2387149e
AJ
79#define KVM_REQ_EVENT KVM_ARCH_REQ(6)
80#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7)
81#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8)
82#define KVM_REQ_NMI KVM_ARCH_REQ(9)
83#define KVM_REQ_PMU KVM_ARCH_REQ(10)
84#define KVM_REQ_PMI KVM_ARCH_REQ(11)
cf7316d0 85#ifdef CONFIG_KVM_SMM
2387149e 86#define KVM_REQ_SMI KVM_ARCH_REQ(12)
cf7316d0 87#endif
2387149e
AJ
88#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13)
89#define KVM_REQ_MCLOCK_INPROGRESS \
90 KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
91#define KVM_REQ_SCAN_IOAPIC \
92 KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
93#define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16)
94#define KVM_REQ_APIC_PAGE_RELOAD \
95 KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
96#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18)
97#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19)
98#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20)
99#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21)
100#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22)
e40ff1d6 101#define KVM_REQ_LOAD_EOI_EXITMAP KVM_ARCH_REQ(23)
729c15c2 102#define KVM_REQ_GET_NESTED_STATE_PAGES KVM_ARCH_REQ(24)
8df14af4
SS
103#define KVM_REQ_APICV_UPDATE \
104 KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
eeeb4f67 105#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26)
07ffaf34 106#define KVM_REQ_TLB_FLUSH_GUEST \
1ebfaa11 107 KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
557a961a 108#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
1a155254 109#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
a85863c2
MS
110#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
111 KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
527d5cd7
SC
112#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
113 KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
adc43caa
VK
114#define KVM_REQ_HV_TLB_FLUSH \
115 KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
2860c4b1 116
cfec82cb
JR
117#define CR0_RESERVED_BITS \
118 (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
119 | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
120 | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
121
cfec82cb
JR
122#define CR4_RESERVED_BITS \
123 (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
124 | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
ad756a16 125 | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_PCIDE \
afcbf13f 126 | X86_CR4_OSXSAVE | X86_CR4_SMEP | X86_CR4_FSGSBASE \
fd8cb433 127 | X86_CR4_OSXMMEXCPT | X86_CR4_LA57 | X86_CR4_VMXE \
ae3e61e1 128 | X86_CR4_SMAP | X86_CR4_PKE | X86_CR4_UMIP))
cfec82cb
JR
129
130#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
131
132
cd6e8f87 133
cd6e8f87 134#define INVALID_PAGE (~(hpa_t)0)
dd180b3e
XG
135#define VALID_PAGE(x) ((x) != INVALID_PAGE)
136
ec04b260 137/* KVM Hugepage definitions for x86 */
3bae0459
SC
138#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
139#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1)
82855413
JR
140#define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9)
141#define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x))
ec04b260
JR
142#define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x))
143#define KVM_HPAGE_MASK(x) (~(KVM_HPAGE_SIZE(x) - 1))
144#define KVM_PAGES_PER_HPAGE(x) (KVM_HPAGE_SIZE(x) / PAGE_SIZE)
05da4558 145
f5756029 146#define KVM_MEMSLOT_PAGES_TO_MMU_PAGES_RATIO 50
bc8a3d89 147#define KVM_MIN_ALLOC_MMU_PAGES 64UL
114df303 148#define KVM_MMU_HASH_SHIFT 12
1ae0a13d 149#define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT)
d657a98e
ZX
150#define KVM_MIN_FREE_MMU_PAGES 5
151#define KVM_REFILL_PAGES 25
3f4e3eb4 152#define KVM_MAX_CPUID_ENTRIES 256
0bed3b56 153#define KVM_NR_FIXED_MTRR_REGION 88
0d234daf 154#define KVM_NR_VAR_MTRR 8
d657a98e 155
af585b92
GN
156#define ASYNC_PF_PER_VCPU 64
157
5fdbf976 158enum kvm_reg {
95c7b77d
SC
159 VCPU_REGS_RAX = __VCPU_REGS_RAX,
160 VCPU_REGS_RCX = __VCPU_REGS_RCX,
161 VCPU_REGS_RDX = __VCPU_REGS_RDX,
162 VCPU_REGS_RBX = __VCPU_REGS_RBX,
163 VCPU_REGS_RSP = __VCPU_REGS_RSP,
164 VCPU_REGS_RBP = __VCPU_REGS_RBP,
165 VCPU_REGS_RSI = __VCPU_REGS_RSI,
166 VCPU_REGS_RDI = __VCPU_REGS_RDI,
2b3ccfa0 167#ifdef CONFIG_X86_64
95c7b77d
SC
168 VCPU_REGS_R8 = __VCPU_REGS_R8,
169 VCPU_REGS_R9 = __VCPU_REGS_R9,
170 VCPU_REGS_R10 = __VCPU_REGS_R10,
171 VCPU_REGS_R11 = __VCPU_REGS_R11,
172 VCPU_REGS_R12 = __VCPU_REGS_R12,
173 VCPU_REGS_R13 = __VCPU_REGS_R13,
174 VCPU_REGS_R14 = __VCPU_REGS_R14,
175 VCPU_REGS_R15 = __VCPU_REGS_R15,
2b3ccfa0 176#endif
5fdbf976 177 VCPU_REGS_RIP,
f8845541 178 NR_VCPU_REGS,
2b3ccfa0 179
6de4f3ad 180 VCPU_EXREG_PDPTR = NR_VCPU_REGS,
bd31fe49 181 VCPU_EXREG_CR0,
aff48baa 182 VCPU_EXREG_CR3,
f98c1e77 183 VCPU_EXREG_CR4,
6de12732 184 VCPU_EXREG_RFLAGS,
2fb92db1 185 VCPU_EXREG_SEGMENTS,
5addc235 186 VCPU_EXREG_EXIT_INFO_1,
87915858 187 VCPU_EXREG_EXIT_INFO_2,
6de4f3ad
AK
188};
189
2b3ccfa0 190enum {
81609e3e 191 VCPU_SREG_ES,
2b3ccfa0 192 VCPU_SREG_CS,
81609e3e 193 VCPU_SREG_SS,
2b3ccfa0 194 VCPU_SREG_DS,
2b3ccfa0
ZX
195 VCPU_SREG_FS,
196 VCPU_SREG_GS,
2b3ccfa0
ZX
197 VCPU_SREG_TR,
198 VCPU_SREG_LDTR,
199};
200
1e9e2622
WL
201enum exit_fastpath_completion {
202 EXIT_FASTPATH_NONE,
404d5d7b
WL
203 EXIT_FASTPATH_REENTER_GUEST,
204 EXIT_FASTPATH_EXIT_HANDLED,
1e9e2622 205};
404d5d7b 206typedef enum exit_fastpath_completion fastpath_t;
1e9e2622 207
2f728d66
SC
208struct x86_emulate_ctxt;
209struct x86_exception;
58c1d206 210union kvm_smram;
2f728d66
SC
211enum x86_intercept;
212enum x86_intercept_stage;
2b3ccfa0 213
42dbaa5a
JK
214#define KVM_NR_DB_REGS 4
215
e8ea85fb 216#define DR6_BUS_LOCK (1 << 11)
42dbaa5a
JK
217#define DR6_BD (1 << 13)
218#define DR6_BS (1 << 14)
cfb634fe 219#define DR6_BT (1 << 15)
6f43ed01 220#define DR6_RTM (1 << 16)
9a3ecd5e
CQ
221/*
222 * DR6_ACTIVE_LOW combines fixed-1 and active-low bits.
223 * We can regard all the bits in DR6_FIXED_1 as active_low bits;
224 * they will never be 0 for now, but when they are defined
225 * in the future it will require no code change.
226 *
227 * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
228 */
229#define DR6_ACTIVE_LOW 0xffff0ff0
e8ea85fb 230#define DR6_VOLATILE 0x0001e80f
9a3ecd5e 231#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
42dbaa5a
JK
232
233#define DR7_BP_EN_MASK 0x000000ff
234#define DR7_GE (1 << 9)
235#define DR7_GD (1 << 13)
236#define DR7_FIXED_1 0x00000400
6f43ed01 237#define DR7_VOLATILE 0xffff2bff
42dbaa5a 238
7e582ccb
ML
239#define KVM_GUESTDBG_VALID_MASK \
240 (KVM_GUESTDBG_ENABLE | \
241 KVM_GUESTDBG_SINGLESTEP | \
242 KVM_GUESTDBG_USE_HW_BP | \
243 KVM_GUESTDBG_USE_SW_BP | \
244 KVM_GUESTDBG_INJECT_BP | \
61e5f69e
ML
245 KVM_GUESTDBG_INJECT_DB | \
246 KVM_GUESTDBG_BLOCKIRQ)
7e582ccb
ML
247
248
c205fb7d
NA
249#define PFERR_PRESENT_BIT 0
250#define PFERR_WRITE_BIT 1
251#define PFERR_USER_BIT 2
252#define PFERR_RSVD_BIT 3
253#define PFERR_FETCH_BIT 4
be94f6b7 254#define PFERR_PK_BIT 5
00e7646c 255#define PFERR_SGX_BIT 15
14727754
TL
256#define PFERR_GUEST_FINAL_BIT 32
257#define PFERR_GUEST_PAGE_BIT 33
4f4aa80e 258#define PFERR_IMPLICIT_ACCESS_BIT 48
c205fb7d 259
d6ecfe97
DM
260#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
261#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
262#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
263#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
264#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
265#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
266#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
267#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
268#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
269#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
14727754
TL
270
271#define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \
14727754
TL
272 PFERR_WRITE_MASK | \
273 PFERR_PRESENT_MASK)
c205fb7d 274
41383771
GN
275/* apic attention bits */
276#define KVM_APIC_CHECK_VAPIC 0
ae7a2a3f
MT
277/*
278 * The following bit is set with PV-EOI, unset on EOI.
279 * We detect PV-EOI changes by guest by comparing
280 * this bit with PV-EOI in guest memory.
281 * See the implementation in apic_update_pv_eoi.
282 */
283#define KVM_APIC_PV_EOI_PENDING 1
41383771 284
d84f1e07
FW
285struct kvm_kernel_irq_routing_entry;
286
21ebbeda 287/*
616007c8
SC
288 * kvm_mmu_page_role tracks the properties of a shadow page (where shadow page
289 * also includes TDP pages) to determine whether or not a page can be used in
7a7ae829 290 * the given MMU context. This is a subset of the overall kvm_cpu_role to
616007c8
SC
291 * minimize the size of kvm_memory_slot.arch.gfn_track, i.e. allows allocating
292 * 2 bytes per gfn instead of 4 bytes per gfn.
21ebbeda 293 *
84e5ffd0 294 * Upper-level shadow pages having gptes are tracked for write-protection via
616007c8
SC
295 * gfn_track. As above, gfn_track is a 16 bit counter, so KVM must not create
296 * more than 2^16-1 upper-level shadow pages at a single gfn, otherwise
297 * gfn_track will overflow and explosions will ensure.
298 *
299 * A unique shadow page (SP) for a gfn is created if and only if an existing SP
300 * cannot be reused. The ability to reuse a SP is tracked by its role, which
301 * incorporates various mode bits and properties of the SP. Roughly speaking,
302 * the number of unique SPs that can theoretically be created is 2^n, where n
303 * is the number of bits that are used to compute the role.
304 *
dc1ce455
PB
305 * But, even though there are 19 bits in the mask below, not all combinations
306 * of modes and flags are possible:
616007c8 307 *
dc1ce455
PB
308 * - invalid shadow pages are not accounted, so the bits are effectively 18
309 *
bb3b394d 310 * - quadrant will only be used if has_4_byte_gpte=1 (non-PAE paging);
dc1ce455 311 * execonly and ad_disabled are only used for nested EPT which has
bb3b394d 312 * has_4_byte_gpte=0. Therefore, 2 bits are always unused.
dc1ce455
PB
313 *
314 * - the 4 bits of level are effectively limited to the values 2/3/4/5,
315 * as 4k SPs are not tracked (allowed to go unsync). In addition non-PAE
316 * paging has exactly one upper level, making level completely redundant
bb3b394d 317 * when has_4_byte_gpte=1.
dc1ce455
PB
318 *
319 * - on top of this, smep_andnot_wp and smap_andnot_wp are only set if
320 * cr0_wp=0, therefore these three bits only give rise to 5 possibilities.
321 *
322 * Therefore, the maximum number of possible upper-level shadow pages for a
323 * single gfn is a bit less than 2^13.
21ebbeda 324 */
d657a98e 325union kvm_mmu_page_role {
36d9594d 326 u32 word;
d657a98e 327 struct {
7d76b4d3 328 unsigned level:4;
bb3b394d 329 unsigned has_4_byte_gpte:1;
7d76b4d3 330 unsigned quadrant:2;
f6e2c02b 331 unsigned direct:1;
7d76b4d3 332 unsigned access:3;
2e53d63a 333 unsigned invalid:1;
167f8a5c 334 unsigned efer_nx:1;
3dbe1415 335 unsigned cr0_wp:1;
411c588d 336 unsigned smep_andnot_wp:1;
0be0226f 337 unsigned smap_andnot_wp:1;
ac8d57e5 338 unsigned ad_disabled:1;
1313cc2b 339 unsigned guest_mode:1;
84e5ffd0
LJ
340 unsigned passthrough:1;
341 unsigned :5;
699023e2
PB
342
343 /*
344 * This is left at the top of the word so that
345 * kvm_memslots_for_spte_role can extract it with a
346 * simple shift. While there is room, give it a whole
347 * byte so it is also faster to load it from memory.
348 */
349 unsigned smm:8;
d657a98e
ZX
350 };
351};
352
a336282d 353/*
616007c8
SC
354 * kvm_mmu_extended_role complements kvm_mmu_page_role, tracking properties
355 * relevant to the current MMU configuration. When loading CR0, CR4, or EFER,
356 * including on nested transitions, if nothing in the full role changes then
357 * MMU re-configuration can be skipped. @valid bit is set on first usage so we
358 * don't treat all-zero structure as valid data.
359 *
360 * The properties that are tracked in the extended role but not the page role
361 * are for things that either (a) do not affect the validity of the shadow page
362 * or (b) are indirectly reflected in the shadow page's role. For example,
363 * CR4.PKE only affects permission checks for software walks of the guest page
364 * tables (because KVM doesn't support Protection Keys with shadow paging), and
365 * CR0.PG, CR4.PAE, and CR4.PSE are indirectly reflected in role.level.
366 *
367 * Note, SMEP and SMAP are not redundant with sm*p_andnot_wp in the page role.
368 * If CR0.WP=1, KVM can reuse shadow pages for the guest regardless of SMEP and
369 * SMAP, but the MMU's permission checks for software walks need to be SMEP and
370 * SMAP aware regardless of CR0.WP.
a336282d 371 */
616007c8 372union kvm_mmu_extended_role {
36d9594d 373 u32 word;
a336282d
VK
374 struct {
375 unsigned int valid:1;
376 unsigned int execonly:1;
377 unsigned int cr4_pse:1;
378 unsigned int cr4_pke:1;
379 unsigned int cr4_smap:1;
380 unsigned int cr4_smep:1;
f71a53d1 381 unsigned int cr4_la57:1;
b8453cdc 382 unsigned int efer_lma:1;
a336282d 383 };
36d9594d
VK
384};
385
7a7ae829 386union kvm_cpu_role {
36d9594d
VK
387 u64 as_u64;
388 struct {
389 union kvm_mmu_page_role base;
390 union kvm_mmu_extended_role ext;
391 };
392};
393
018aabb5
TY
394struct kvm_rmap_head {
395 unsigned long val;
396};
397
1c08364c 398struct kvm_pio_request {
45def77e 399 unsigned long linear_rip;
1c08364c 400 unsigned long count;
1c08364c
AK
401 int in;
402 int port;
403 int size;
1c08364c
AK
404};
405
855feb67 406#define PT64_ROOT_MAX_LEVEL 5
2a7266a8 407
a0a64f50 408struct rsvd_bits_validate {
2a7266a8 409 u64 rsvd_bits_mask[2][PT64_ROOT_MAX_LEVEL];
a0a64f50
XG
410 u64 bad_mt_xwr;
411};
412
7c390d35 413struct kvm_mmu_root_info {
be01e8e2 414 gpa_t pgd;
7c390d35
JS
415 hpa_t hpa;
416};
417
418#define KVM_MMU_ROOT_INFO_INVALID \
be01e8e2 419 ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE })
7c390d35 420
b94742c9
JS
421#define KVM_MMU_NUM_PREV_ROOTS 3
422
f94db0c8
SC
423#define KVM_MMU_ROOT_CURRENT BIT(0)
424#define KVM_MMU_ROOT_PREVIOUS(i) BIT(1+i)
425#define KVM_MMU_ROOTS_ALL (BIT(1 + KVM_MMU_NUM_PREV_ROOTS) - 1)
426
531810ca
BG
427#define KVM_HAVE_MMU_RWLOCK
428
985ab278 429struct kvm_mmu_page;
c501040a 430struct kvm_page_fault;
985ab278 431
d657a98e 432/*
855feb67
YZ
433 * x86 supports 4 paging modes (5-level 64-bit, 4-level 64-bit, 3-level 32-bit,
434 * and 2-level 32-bit). The kvm_mmu structure abstracts the details of the
435 * current mmu mode.
d657a98e
ZX
436 */
437struct kvm_mmu {
d8dd54e0 438 unsigned long (*get_guest_pgd)(struct kvm_vcpu *vcpu);
e4e517b4 439 u64 (*get_pdptr)(struct kvm_vcpu *vcpu, int index);
c501040a 440 int (*page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault);
6389ee94
AK
441 void (*inject_page_fault)(struct kvm_vcpu *vcpu,
442 struct x86_exception *fault);
1f5a21ee 443 gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
5b22bbe7 444 gpa_t gva_or_gpa, u64 access,
1f5a21ee 445 struct x86_exception *exception);
c3c6c9fc
LJ
446 int (*sync_spte)(struct kvm_vcpu *vcpu,
447 struct kvm_mmu_page *sp, int i);
b9e5603c 448 struct kvm_mmu_root_info root;
7a7ae829 449 union kvm_cpu_role cpu_role;
7a458f0e 450 union kvm_mmu_page_role root_role;
97d64b78 451
2d344105
HH
452 /*
453 * The pkru_mask indicates if protection key checks are needed. It
454 * consists of 16 domains indexed by page fault error code bits [4:1],
455 * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables.
456 * Each domain has 2 bits which are ANDed with AD and WD from PKRU.
457 */
458 u32 pkru_mask;
459
81764725
PH
460 struct kvm_mmu_root_info prev_roots[KVM_MMU_NUM_PREV_ROOTS];
461
462 /*
463 * Bitmap; bit set = permission fault
464 * Byte index: page fault error code [4:1]
465 * Bit index: pte permissions in ACC_* format
466 */
467 u8 permissions[16];
468
d657a98e 469 u64 *pae_root;
03ca4589 470 u64 *pml4_root;
cb0f722a 471 u64 *pml5_root;
c258b62b
XG
472
473 /*
474 * check zero bits on shadow page table entries, these
475 * bits include not only hardware reserved bits but also
476 * the bits spte never used.
477 */
478 struct rsvd_bits_validate shadow_zero_check;
479
a0a64f50 480 struct rsvd_bits_validate guest_rsvd_check;
ff03a073
JR
481
482 u64 pdptrs[4]; /* pae */
d657a98e
ZX
483};
484
f5132b01
GN
485enum pmc_type {
486 KVM_PMC_GP = 0,
487 KVM_PMC_FIXED,
488};
489
490struct kvm_pmc {
491 enum pmc_type type;
492 u8 idx;
de0f6195
LX
493 bool is_paused;
494 bool intr;
f5132b01 495 u64 counter;
de0f6195 496 u64 prev_counter;
f5132b01
GN
497 u64 eventsel;
498 struct perf_event *perf_event;
499 struct kvm_vcpu *vcpu;
a6da0d77 500 /*
68fb4757 501 * only for creating or reusing perf_event,
a6da0d77
LX
502 * eventsel value for general purpose counters,
503 * ctrl value for fixed counters.
504 */
505 u64 current_config;
f5132b01
GN
506};
507
4f1fa2a1
LX
508/* More counters may conflict with other existing Architectural MSRs */
509#define KVM_INTEL_PMC_MAX_GENERIC 8
510#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
511#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
0144ba0c 512#define KVM_PMC_MAX_FIXED 3
e33b6d79 513#define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1)
556f3c9a 514#define KVM_AMD_PMC_MAX_GENERIC 6
f5132b01
GN
515struct kvm_pmu {
516 unsigned nr_arch_gp_counters;
517 unsigned nr_arch_fixed_counters;
518 unsigned available_event_types;
519 u64 fixed_ctr_ctrl;
2c985527 520 u64 fixed_ctr_ctrl_mask;
f5132b01
GN
521 u64 global_ctrl;
522 u64 global_status;
f5132b01
GN
523 u64 counter_bitmask[2];
524 u64 global_ctrl_mask;
c715eb9f 525 u64 global_ovf_ctrl_mask;
103af0a9 526 u64 reserved_bits;
95b065bf 527 u64 raw_event_mask;
f5132b01 528 u8 version;
4f1fa2a1 529 struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
0144ba0c 530 struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
f5132b01 531 struct irq_work irq_work;
f1c5651f
SC
532
533 /*
534 * Overlay the bitmap with a 64-bit atomic so that all bits can be
535 * set in a single access, e.g. to reprogram all counters when the PMU
536 * filter changes.
537 */
538 union {
539 DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
540 atomic64_t __reprogram_pmi;
541 };
b35e5548
LX
542 DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
543 DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
544
8183a538 545 u64 ds_area;
c59a1f10
LX
546 u64 pebs_enable;
547 u64 pebs_enable_mask;
902caeb6
LX
548 u64 pebs_data_cfg;
549 u64 pebs_data_cfg_mask;
c59a1f10 550
85425032
LX
551 /*
552 * If a guest counter is cross-mapped to host counter with different
553 * index, its PEBS capability will be temporarily disabled.
554 *
555 * The user should make sure that this mask is updated
556 * after disabling interrupts and before perf_guest_get_msrs();
557 */
558 u64 host_cross_mapped_mask;
559
b35e5548
LX
560 /*
561 * The gate to release perf_events not marked in
562 * pmc_in_use only once in a vcpu time slice.
563 */
564 bool need_cleanup;
565
566 /*
567 * The total number of programmed perf_events and it helps to avoid
568 * redundant check before cleanup if guest don't use vPMU at all.
569 */
570 u8 event_count;
f5132b01
GN
571};
572
25462f7f
WH
573struct kvm_pmu_ops;
574
360b948d
PB
575enum {
576 KVM_DEBUGREG_BP_ENABLED = 1,
c77fb5fe 577 KVM_DEBUGREG_WONT_EXIT = 2,
360b948d
PB
578};
579
86fd5270
XG
580struct kvm_mtrr_range {
581 u64 base;
582 u64 mask;
19efffa2 583 struct list_head node;
86fd5270
XG
584};
585
70109e7d 586struct kvm_mtrr {
86fd5270 587 struct kvm_mtrr_range var_ranges[KVM_NR_VAR_MTRR];
70109e7d 588 mtrr_type fixed_ranges[KVM_NR_FIXED_MTRR_REGION];
10fac2dc 589 u64 deftype;
19efffa2
XG
590
591 struct list_head head;
70109e7d
XG
592};
593
1f4b34f8
AS
594/* Hyper-V SynIC timer */
595struct kvm_vcpu_hv_stimer {
596 struct hrtimer timer;
597 int index;
6a058a1e 598 union hv_stimer_config config;
1f4b34f8
AS
599 u64 count;
600 u64 exp_time;
601 struct hv_message msg;
602 bool msg_pending;
603};
604
5c919412
AS
605/* Hyper-V synthetic interrupt controller (SynIC)*/
606struct kvm_vcpu_hv_synic {
607 u64 version;
608 u64 control;
609 u64 msg_page;
610 u64 evt_page;
611 atomic64_t sint[HV_SYNIC_SINT_COUNT];
612 atomic_t sint_to_gsi[HV_SYNIC_SINT_COUNT];
613 DECLARE_BITMAP(auto_eoi_bitmap, 256);
614 DECLARE_BITMAP(vec_bitmap, 256);
615 bool active;
efc479e6 616 bool dont_zero_synic_pages;
5c919412
AS
617};
618
0823570f
VK
619/* The maximum number of entries on the TLB flush fifo. */
620#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16)
621/*
622 * Note: the following 'magic' entry is made up by KVM to avoid putting
623 * anything besides GVA on the TLB flush fifo. It is theoretically possible
624 * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000
625 * which will look identical. KVM's action to 'flush everything' instead of
626 * flushing these particular addresses is, however, fully legitimate as
627 * flushing more than requested is always OK.
628 */
629#define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1)
630
53ca765a
VK
631enum hv_tlb_flush_fifos {
632 HV_L1_TLB_FLUSH_FIFO,
633 HV_L2_TLB_FLUSH_FIFO,
634 HV_NR_TLB_FLUSH_FIFOS,
635};
636
0823570f
VK
637struct kvm_vcpu_hv_tlb_flush_fifo {
638 spinlock_t write_lock;
639 DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
640};
641
e83d5887
AS
642/* Hyper-V per vcpu emulation context */
643struct kvm_vcpu_hv {
4592b7ea 644 struct kvm_vcpu *vcpu;
d3457c87 645 u32 vp_index;
e83d5887 646 u64 hv_vapic;
9eec50b8 647 s64 runtime_offset;
5c919412 648 struct kvm_vcpu_hv_synic synic;
db397571 649 struct kvm_hyperv_exit exit;
1f4b34f8
AS
650 struct kvm_vcpu_hv_stimer stimer[HV_SYNIC_STIMER_COUNT];
651 DECLARE_BITMAP(stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
644f7067 652 bool enforce_cpuid;
10d7bf1e
VK
653 struct {
654 u32 features_eax; /* HYPERV_CPUID_FEATURES.EAX */
655 u32 features_ebx; /* HYPERV_CPUID_FEATURES.EBX */
656 u32 features_edx; /* HYPERV_CPUID_FEATURES.EDX */
657 u32 enlightenments_eax; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
658 u32 enlightenments_ebx; /* HYPERV_CPUID_ENLIGHTMENT_INFO.EBX */
659 u32 syndbg_cap_eax; /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
dea6e140
VK
660 u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */
661 u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
10d7bf1e 662 } cpuid_cache;
0823570f 663
53ca765a 664 struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
7d5e88d3
VK
665
666 /* Preallocated buffer for handling hypercalls passing sparse vCPU set */
667 u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
38edb452 668
046f5756
VK
669 struct hv_vp_assist_page vp_assist_page;
670
38edb452
VK
671 struct {
672 u64 pa_page_gpa;
673 u64 vm_id;
674 u32 vp_id;
675 } nested;
e83d5887
AS
676};
677
48639df8
PD
678struct kvm_hypervisor_cpuid {
679 u32 base;
680 u32 limit;
681};
682
23200b7a
JM
683/* Xen HVM per vcpu emulation context */
684struct kvm_vcpu_xen {
685 u64 hypercall_rip;
30b5c851 686 u32 current_runstate;
fde0451b 687 u8 upcall_vector;
7caf9571 688 struct gfn_to_pfn_cache vcpu_info_cache;
69d413cf 689 struct gfn_to_pfn_cache vcpu_time_info_cache;
a795cd43 690 struct gfn_to_pfn_cache runstate_cache;
5ec3289b 691 struct gfn_to_pfn_cache runstate2_cache;
30b5c851
DW
692 u64 last_steal;
693 u64 runstate_entry_time;
694 u64 runstate_times[4];
14243b38 695 unsigned long evtchn_pending_sel;
942c2490 696 u32 vcpu_id; /* The Xen / ACPI vCPU ID */
53639526
JM
697 u32 timer_virq;
698 u64 timer_expires; /* In guest epoch */
699 atomic_t timer_pending;
700 struct hrtimer timer;
1a65105a
BO
701 int poll_evtchn;
702 struct timer_list poll_timer;
f422f853 703 struct kvm_hypervisor_cpuid cpuid;
23200b7a
JM
704};
705
d4963e31
SC
706struct kvm_queued_exception {
707 bool pending;
708 bool injected;
709 bool has_error_code;
710 u8 vector;
711 u32 error_code;
712 unsigned long payload;
713 bool has_payload;
d4963e31
SC
714};
715
ad312c7c 716struct kvm_vcpu_arch {
5fdbf976
MT
717 /*
718 * rip and regs accesses must go through
719 * kvm_{register,rip}_{read,write} functions.
720 */
721 unsigned long regs[NR_VCPU_REGS];
722 u32 regs_avail;
723 u32 regs_dirty;
34c16eec
ZX
724
725 unsigned long cr0;
e8467fda 726 unsigned long cr0_guest_owned_bits;
34c16eec
ZX
727 unsigned long cr2;
728 unsigned long cr3;
729 unsigned long cr4;
fc78f519 730 unsigned long cr4_guest_owned_bits;
b899c132 731 unsigned long cr4_guest_rsvd_bits;
34c16eec 732 unsigned long cr8;
37486135 733 u32 host_pkru;
b9dd21e1 734 u32 pkru;
1371d904 735 u32 hflags;
f6801dff 736 u64 efer;
34c16eec
ZX
737 u64 apic_base;
738 struct kvm_lapic *apic; /* kernel irqchip context */
e40ff1d6 739 bool load_eoi_exitmap_pending;
6308630b 740 DECLARE_BITMAP(ioapic_handled_vectors, 256);
41383771 741 unsigned long apic_attention;
e1035715 742 int32_t apic_arb_prio;
34c16eec 743 int mp_state;
34c16eec 744 u64 ia32_misc_enable_msr;
64d60670 745 u64 smbase;
52797bf9 746 u64 smi_count;
6cd88243 747 bool at_instruction_boundary;
b209749f 748 bool tpr_access_reporting;
7204160e 749 bool xsaves_enabled;
b5274b1b 750 bool xfd_no_write_intercept;
20300099 751 u64 ia32_xss;
518e7b94 752 u64 microcode_version;
0cf9135b 753 u64 arch_capabilities;
27461da3 754 u64 perf_capabilities;
34c16eec 755
14dfe855
JR
756 /*
757 * Paging state of the vcpu
758 *
759 * If the vcpu runs in guest mode with two level paging this still saves
760 * the paging mode of the l1 guest. This context is always used to
761 * handle faults.
762 */
44dd3ffa
VK
763 struct kvm_mmu *mmu;
764
765 /* Non-nested MMU for L1 */
766 struct kvm_mmu root_mmu;
8df25a32 767
14c07ad8
VK
768 /* L1 MMU when running nested */
769 struct kvm_mmu guest_mmu;
770
6539e738
JR
771 /*
772 * Paging state of an L2 guest (used for nested npt)
773 *
774 * This context will save all necessary information to walk page tables
311497e0 775 * of an L2 guest. This context is only initialized for page table
6539e738
JR
776 * walking and not for faulting since we never handle l2 page faults on
777 * the host.
778 */
779 struct kvm_mmu nested_mmu;
780
14dfe855
JR
781 /*
782 * Pointer to the mmu context currently used for
783 * gva_to_gpa translations.
784 */
785 struct kvm_mmu *walk_mmu;
786
53c07b18 787 struct kvm_mmu_memory_cache mmu_pte_list_desc_cache;
171a90d7 788 struct kvm_mmu_memory_cache mmu_shadow_page_cache;
6a97575d 789 struct kvm_mmu_memory_cache mmu_shadowed_info_cache;
34c16eec
ZX
790 struct kvm_mmu_memory_cache mmu_page_header_cache;
791
f775b13e
RR
792 /*
793 * QEMU userspace and the guest each have their own FPU state.
ec269475
PB
794 * In vcpu_run, we switch between the user and guest FPU contexts.
795 * While running a VCPU, the VCPU thread will have the guest FPU
796 * context.
f775b13e
RR
797 *
798 * Note that while the PKRU state lives inside the fpu registers,
799 * it is switched out separately at VMENTER and VMEXIT time. The
d69c1382 800 * "guest_fpstate" state here contains the guest FPU context, with the
f775b13e
RR
801 * host PRKU bits.
802 */
d69c1382 803 struct fpu_guest guest_fpu;
f775b13e 804
2acf923e 805 u64 xcr0;
ee519b3a 806 u64 guest_supported_xcr0;
34c16eec 807
34c16eec
ZX
808 struct kvm_pio_request pio;
809 void *pio_data;
b5998402 810 void *sev_pio_data;
95e16b47 811 unsigned sev_pio_count;
34c16eec 812
66fd3f7f
GN
813 u8 event_exit_inst_len;
814
7709aba8
SC
815 bool exception_from_userspace;
816
d4963e31
SC
817 /* Exceptions to be injected to the guest. */
818 struct kvm_queued_exception exception;
7709aba8
SC
819 /* Exception VM-Exits to be synthesized to L1. */
820 struct kvm_queued_exception exception_vmexit;
298101da 821
937a7eae 822 struct kvm_queued_interrupt {
04140b41 823 bool injected;
66fd3f7f 824 bool soft;
937a7eae
AK
825 u8 nr;
826 } interrupt;
827
34c16eec
ZX
828 int halt_request; /* real mode on Intel only */
829
830 int cpuid_nent;
255cbecf 831 struct kvm_cpuid_entry2 *cpuid_entries;
48639df8 832 struct kvm_hypervisor_cpuid kvm_cpuid;
5a4f55cd 833
ca29e145 834 u64 reserved_gpa_bits;
5a4f55cd
EK
835 int maxphyaddr;
836
34c16eec
ZX
837 /* emulate context */
838
c9b8b07c 839 struct x86_emulate_ctxt *emulate_ctxt;
7ae441ea
GN
840 bool emulate_regs_need_sync_to_vcpu;
841 bool emulate_regs_need_sync_from_vcpu;
716d51ab 842 int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
18068523
GOC
843
844 gpa_t time;
50d0a0f9 845 struct pvclock_vcpu_time_info hv_clock;
e48672fa 846 unsigned int hw_tsc_khz;
916d3608 847 struct gfn_to_pfn_cache pv_time;
51d59c6b
MT
848 /* set guest stopped flag in pvclock flags field */
849 bool pvclock_set_guest_stopped_request;
c9aaa895
GC
850
851 struct {
a6bd811f 852 u8 preempted;
c9aaa895
GC
853 u64 msr_val;
854 u64 last_steal;
7e2175eb 855 struct gfn_to_hva_cache cache;
c9aaa895
GC
856 } st;
857
56ba77a4 858 u64 l1_tsc_offset;
805d705f 859 u64 tsc_offset; /* current tsc offset */
1d5f066e 860 u64 last_guest_tsc;
6f526ec5 861 u64 last_host_tsc;
0dd6a6ed 862 u64 tsc_offset_adjustment;
e26101b1
ZA
863 u64 this_tsc_nsec;
864 u64 this_tsc_write;
0d3da0d2 865 u64 this_tsc_generation;
c285545f 866 bool tsc_catchup;
cc578287
ZA
867 bool tsc_always_catchup;
868 s8 virtual_tsc_shift;
869 u32 virtual_tsc_mult;
870 u32 virtual_tsc_khz;
ba904635 871 s64 ia32_tsc_adjust_msr;
73f624f4 872 u64 msr_ia32_power_ctl;
805d705f
IS
873 u64 l1_tsc_scaling_ratio;
874 u64 tsc_scaling_ratio; /* current scaling ratio */
3419ffc8 875
7460fb4a
AK
876 atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
877 unsigned nmi_pending; /* NMI queued after currently running handler */
878 bool nmi_injected; /* Trying to inject an NMI this entry */
f077825a 879 bool smi_pending; /* SMI queued after currently running handler */
73cd107b 880 u8 handling_intr_from_guest;
9ba075a6 881
70109e7d 882 struct kvm_mtrr mtrr_state;
7cb060a9 883 u64 pat;
42dbaa5a 884
360b948d 885 unsigned switch_db_regs;
42dbaa5a
JK
886 unsigned long db[KVM_NR_DB_REGS];
887 unsigned long dr6;
888 unsigned long dr7;
889 unsigned long eff_db[KVM_NR_DB_REGS];
c8639010 890 unsigned long guest_debug_dr7;
db2336a8
KH
891 u64 msr_platform_info;
892 u64 msr_misc_features_enables;
890ca9ae
HY
893
894 u64 mcg_cap;
895 u64 mcg_status;
896 u64 mcg_ctl;
c45dcc71 897 u64 mcg_ext_ctl;
890ca9ae 898 u64 *mce_banks;
281b5278 899 u64 *mci_ctl2_banks;
94fe45da 900
bebb106a
XG
901 /* Cache MMIO info */
902 u64 mmio_gva;
871bd034 903 unsigned mmio_access;
bebb106a 904 gfn_t mmio_gfn;
56f17dd3 905 u64 mmio_gen;
bebb106a 906
f5132b01
GN
907 struct kvm_pmu pmu;
908
94fe45da 909 /* used for guest single stepping over the given code position */
94fe45da 910 unsigned long singlestep_rip;
f92653ee 911
8f014550 912 bool hyperv_enabled;
4592b7ea 913 struct kvm_vcpu_hv *hyperv;
23200b7a 914 struct kvm_vcpu_xen xen;
f5f48ee1
SY
915
916 cpumask_var_t wbinvd_dirty_mask;
af585b92 917
1cb3f3ae
XG
918 unsigned long last_retry_eip;
919 unsigned long last_retry_addr;
920
af585b92
GN
921 struct {
922 bool halted;
dd03bcaa 923 gfn_t gfns[ASYNC_PF_PER_VCPU];
344d9588 924 struct gfn_to_hva_cache data;
2635b5c4
VK
925 u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */
926 u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */
927 u16 vec;
7c90705b 928 u32 id;
6adba527 929 bool send_user_only;
68fd66f1 930 u32 host_apf_flags;
52a5c155 931 bool delivery_as_pf_vmexit;
557a961a 932 bool pageready_pending;
af585b92 933 } apf;
2b036c6b
BO
934
935 /* OSVW MSRs (AMD only) */
936 struct {
937 u64 length;
938 u64 status;
939 } osvw;
ae7a2a3f
MT
940
941 struct {
942 u64 msr_val;
943 struct gfn_to_hva_cache data;
944 } pv_eoi;
93c05d3e 945
2d5ba19b
MT
946 u64 msr_kvm_poll_control;
947
25d92081
YZ
948 /* set at EPT violation at this point */
949 unsigned long exit_qualification;
6aef266c
SV
950
951 /* pv related host specific info */
952 struct {
953 bool pv_unhalted;
954 } pv;
7543a635
SR
955
956 int pending_ioapic_eoi;
1c1a9ce9 957 int pending_external_vector;
0f89b207 958
de63ad4c
LM
959 /* be preempted when it's in kernel-mode(cpl=0) */
960 bool preempted_in_kernel;
c595ceee
PB
961
962 /* Flush the L1 Data cache for L1TF mitigation on VMENTER */
963 bool l1tf_flush_l1d;
191c8137 964
8a14fe4f 965 /* Host CPU on which VM-entry was most recently attempted */
63f5a190 966 int last_vmentry_cpu;
8a14fe4f 967
191c8137
BP
968 /* AMD MSRC001_0015 Hardware Configuration */
969 u64 msr_hwcr;
66570e96
OU
970
971 /* pv related cpuid info */
972 struct {
973 /*
974 * value of the eax register in the KVM_CPUID_FEATURES CPUID
975 * leaf.
976 */
977 u32 features;
978
979 /*
980 * indicates whether pv emulation should be disabled if features
981 * are not present in the guest's cpuid
982 */
983 bool enforce;
984 } pv_cpuid;
add5e2f0
TL
985
986 /* Protected Guests */
987 bool guest_state_protected;
3c86c0d3 988
158a48ec
ML
989 /*
990 * Set when PDPTS were loaded directly by the userspace without
991 * reading the guest memory
992 */
993 bool pdptrs_from_userspace;
994
3c86c0d3
VP
995#if IS_ENABLED(CONFIG_HYPERV)
996 hpa_t hv_root_tdp;
997#endif
34c16eec
ZX
998};
999
db3fe4eb 1000struct kvm_lpage_info {
92f94f1e 1001 int disallow_lpage;
db3fe4eb
TY
1002};
1003
1004struct kvm_arch_memory_slot {
018aabb5 1005 struct kvm_rmap_head *rmap[KVM_NR_PAGE_SIZES];
db3fe4eb 1006 struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
21ebbeda 1007 unsigned short *gfn_track[KVM_PAGE_TRACK_MAX];
db3fe4eb
TY
1008};
1009
3548a259 1010/*
35366901
SC
1011 * Track the mode of the optimized logical map, as the rules for decoding the
1012 * destination vary per mode. Enabling the optimized logical map requires all
1013 * software-enabled local APIs to be in the same mode, each addressable APIC to
1014 * be mapped to only one MDA, and each MDA to map to at most one APIC.
3548a259 1015 */
35366901
SC
1016enum kvm_apic_logical_mode {
1017 /* All local APICs are software disabled. */
1018 KVM_APIC_MODE_SW_DISABLED,
1019 /* All software enabled local APICs in xAPIC cluster addressing mode. */
1020 KVM_APIC_MODE_XAPIC_CLUSTER,
1021 /* All software enabled local APICs in xAPIC flat addressing mode. */
1022 KVM_APIC_MODE_XAPIC_FLAT,
1023 /* All software enabled local APICs in x2APIC mode. */
1024 KVM_APIC_MODE_X2APIC,
1025 /*
1026 * Optimized map disabled, e.g. not all local APICs in the same logical
1027 * mode, same logical ID assigned to multiple APICs, etc.
1028 */
1029 KVM_APIC_MODE_MAP_DISABLED,
1030};
3548a259 1031
1e08ec4a
GN
1032struct kvm_apic_map {
1033 struct rcu_head rcu;
35366901 1034 enum kvm_apic_logical_mode logical_mode;
0ca52e7b 1035 u32 max_apic_id;
e45115b6
RK
1036 union {
1037 struct kvm_lapic *xapic_flat_map[8];
1038 struct kvm_lapic *xapic_cluster_map[16][4];
1039 };
0ca52e7b 1040 struct kvm_lapic *phys_map[];
1e08ec4a
GN
1041};
1042
f97f5a56
JD
1043/* Hyper-V synthetic debugger (SynDbg)*/
1044struct kvm_hv_syndbg {
1045 struct {
1046 u64 control;
1047 u64 status;
1048 u64 send_page;
1049 u64 recv_page;
1050 u64 pending_page;
1051 } control;
1052 u64 options;
1053};
1054
cc9cfddb
VK
1055/* Current state of Hyper-V TSC page clocksource */
1056enum hv_tsc_page_status {
1057 /* TSC page was not set up or disabled */
1058 HV_TSC_PAGE_UNSET = 0,
1059 /* TSC page MSR was written by the guest, update pending */
1060 HV_TSC_PAGE_GUEST_CHANGED,
42dcbe7d 1061 /* TSC page update was triggered from the host side */
cc9cfddb
VK
1062 HV_TSC_PAGE_HOST_CHANGED,
1063 /* TSC page was properly set up and is currently active */
1064 HV_TSC_PAGE_SET,
cc9cfddb
VK
1065 /* TSC page was set up with an inaccessible GPA */
1066 HV_TSC_PAGE_BROKEN,
1067};
1068
e83d5887
AS
1069/* Hyper-V emulation context */
1070struct kvm_hv {
3f5ad8be 1071 struct mutex hv_lock;
e83d5887
AS
1072 u64 hv_guest_os_id;
1073 u64 hv_hypercall;
1074 u64 hv_tsc_page;
cc9cfddb 1075 enum hv_tsc_page_status hv_tsc_page_status;
e7d9513b
AS
1076
1077 /* Hyper-v based guest crash (NT kernel bugcheck) parameters */
1078 u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
1079 u64 hv_crash_ctl;
095cf55d 1080
7357b1df 1081 struct ms_hyperv_tsc_page tsc_ref;
faeb7833
RK
1082
1083 struct idr conn_to_evt;
a2e164e7
VK
1084
1085 u64 hv_reenlightenment_control;
1086 u64 hv_tsc_emulation_control;
1087 u64 hv_tsc_emulation_status;
2be1bd3a 1088 u64 hv_invtsc_control;
87ee613d
VK
1089
1090 /* How many vCPUs have VP index != vCPU index */
1091 atomic_t num_mismatched_vp_indexes;
6f6a657c 1092
0f250a64
VK
1093 /*
1094 * How many SynICs use 'AutoEOI' feature
1095 * (protected by arch.apicv_update_lock)
1096 */
1097 unsigned int synic_auto_eoi_used;
1098
6f6a657c 1099 struct hv_partition_assist_pg *hv_pa_pg;
f97f5a56 1100 struct kvm_hv_syndbg hv_syndbg;
e83d5887
AS
1101};
1102
1a155254
AG
1103struct msr_bitmap_range {
1104 u32 flags;
1105 u32 nmsrs;
1106 u32 base;
1107 unsigned long *bitmap;
1108};
1109
a3833b81
DW
1110/* Xen emulation context */
1111struct kvm_xen {
310bc395 1112 struct mutex xen_lock;
28d1629f 1113 u32 xen_version;
a3833b81 1114 bool long_mode;
d8ba8ba4 1115 bool runstate_update_flag;
40da8ccd 1116 u8 upcall_vector;
1cfc9c4b 1117 struct gfn_to_pfn_cache shinfo_cache;
2fd6df2f 1118 struct idr evtchn_ports;
1a65105a 1119 unsigned long poll_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
a3833b81
DW
1120};
1121
49776faf
RK
1122enum kvm_irqchip_mode {
1123 KVM_IRQCHIP_NONE,
1124 KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
1125 KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
1126};
1127
b318e8de
SC
1128struct kvm_x86_msr_filter {
1129 u8 count;
1130 bool default_allow:1;
1131 struct msr_bitmap_range ranges[16];
1132};
1133
14329b82
AL
1134struct kvm_x86_pmu_event_filter {
1135 __u32 action;
1136 __u32 nevents;
1137 __u32 fixed_counter_bitmap;
1138 __u32 flags;
1139 __u32 nr_includes;
1140 __u32 nr_excludes;
1141 __u64 *includes;
1142 __u64 *excludes;
1143 __u64 events[];
1144};
1145
7491b7b2 1146enum kvm_apicv_inhibit {
a9603ae0
ML
1147
1148 /********************************************************************/
1149 /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
1150 /********************************************************************/
1151
1152 /*
1153 * APIC acceleration is disabled by a module parameter
1154 * and/or not supported in hardware.
1155 */
7491b7b2 1156 APICV_INHIBIT_REASON_DISABLE,
a9603ae0
ML
1157
1158 /*
1159 * APIC acceleration is inhibited because AutoEOI feature is
1160 * being used by a HyperV guest.
1161 */
7491b7b2 1162 APICV_INHIBIT_REASON_HYPERV,
a9603ae0
ML
1163
1164 /*
1165 * APIC acceleration is inhibited because the userspace didn't yet
1166 * enable the kernel/split irqchip.
1167 */
1168 APICV_INHIBIT_REASON_ABSENT,
1169
1170 /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
1171 * (out of band, debug measure of blocking all interrupts on this vCPU)
1172 * was enabled, to avoid AVIC/APICv bypassing it.
1173 */
1174 APICV_INHIBIT_REASON_BLOCKIRQ,
1175
5063c41b
SC
1176 /*
1177 * APICv is disabled because not all vCPUs have a 1:1 mapping between
1178 * APIC ID and vCPU, _and_ KVM is not applying its x2APIC hotplug hack.
1179 */
1180 APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED,
1181
3743c2f0
ML
1182 /*
1183 * For simplicity, the APIC acceleration is inhibited
1184 * first time either APIC ID or APIC base are changed by the guest
1185 * from their reset values.
1186 */
1187 APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
1188 APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
1189
a9603ae0
ML
1190 /******************************************************/
1191 /* INHIBITs that are relevant only to the AMD's AVIC. */
1192 /******************************************************/
1193
1194 /*
1195 * AVIC is inhibited on a vCPU because it runs a nested guest.
1196 *
1197 * This is needed because unlike APICv, the peers of this vCPU
1198 * cannot use the doorbell mechanism to signal interrupts via AVIC when
1199 * a vCPU runs nested.
1200 */
7491b7b2 1201 APICV_INHIBIT_REASON_NESTED,
a9603ae0
ML
1202
1203 /*
1204 * On SVM, the wait for the IRQ window is implemented with pending vIRQ,
1205 * which cannot be injected when the AVIC is enabled, thus AVIC
1206 * is inhibited while KVM waits for IRQ window.
1207 */
7491b7b2 1208 APICV_INHIBIT_REASON_IRQWIN,
a9603ae0
ML
1209
1210 /*
1211 * PIT (i8254) 're-inject' mode, relies on EOI intercept,
1212 * which AVIC doesn't support for edge triggered interrupts.
1213 */
7491b7b2 1214 APICV_INHIBIT_REASON_PIT_REINJ,
a9603ae0 1215
a9603ae0
ML
1216 /*
1217 * AVIC is disabled because SEV doesn't support it.
1218 */
c538dc79 1219 APICV_INHIBIT_REASON_SEV,
9a364857
SC
1220
1221 /*
1222 * AVIC is disabled because not all vCPUs with a valid LDR have a 1:1
1223 * mapping between logical ID and vCPU.
1224 */
1225 APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED,
7491b7b2 1226};
4e19c36f 1227
fef9cce0 1228struct kvm_arch {
bc8a3d89
BG
1229 unsigned long n_used_mmu_pages;
1230 unsigned long n_requested_mmu_pages;
1231 unsigned long n_max_mmu_pages;
332b207d 1232 unsigned int indirect_shadow_pages;
ca333add 1233 u8 mmu_valid_gen;
f05e70ac 1234 struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
f05e70ac 1235 struct list_head active_mmu_pages;
31741eb1 1236 struct list_head zapped_obsolete_pages;
55c510e2
SC
1237 /*
1238 * A list of kvm_mmu_page structs that, if zapped, could possibly be
1239 * replaced by an NX huge page. A shadow page is on this list if its
1240 * existence disallows an NX huge page (nx_huge_page_disallowed is set)
1241 * and there are no other conditions that prevent a huge page, e.g.
1242 * the backing host page is huge, dirtly logging is not enabled for its
1243 * memslot, etc... Note, zapping shadow pages on this list doesn't
1244 * guarantee an NX huge page will be created in its stead, e.g. if the
1245 * guest attempts to execute from the region then KVM obviously can't
1246 * create an NX huge page (without hanging the guest).
1247 */
1248 struct list_head possible_nx_huge_pages;
13d268ca 1249 struct kvm_page_track_notifier_node mmu_sp_tracker;
0eb05bf2 1250 struct kvm_page_track_notifier_head track_notifier_head;
ce25681d
SC
1251 /*
1252 * Protects marking pages unsync during page faults, as TDP MMU page
1253 * faults only take mmu_lock for read. For simplicity, the unsync
1254 * pages lock is always taken when marking pages unsync regardless of
1255 * whether mmu_lock is held for read or write.
1256 */
1257 spinlock_t mmu_unsync_pages_lock;
365c8868 1258
4d5c5d0f 1259 struct list_head assigned_dev_head;
19de40a8 1260 struct iommu_domain *iommu_domain;
d96eb2c6 1261 bool iommu_noncoherent;
e0f0bbc5
AW
1262#define __KVM_HAVE_ARCH_NONCOHERENT_DMA
1263 atomic_t noncoherent_dma_count;
5544eb9b
PB
1264#define __KVM_HAVE_ARCH_ASSIGNED_DEVICE
1265 atomic_t assigned_device_count;
d7deeeb0
ZX
1266 struct kvm_pic *vpic;
1267 struct kvm_ioapic *vioapic;
7837699f 1268 struct kvm_pit *vpit;
42720138 1269 atomic_t vapics_in_nmi_mode;
1e08ec4a 1270 struct mutex apic_map_lock;
6fcd9cbc 1271 struct kvm_apic_map __rcu *apic_map;
44d52717 1272 atomic_t apic_map_dirty;
bfc6d222 1273
a01b45e9 1274 bool apic_access_memslot_enabled;
2008fab3
SC
1275 bool apic_access_memslot_inhibited;
1276
1277 /* Protects apicv_inhibit_reasons */
1278 struct rw_semaphore apicv_update_lock;
4e19c36f 1279 unsigned long apicv_inhibit_reasons;
18068523
GOC
1280
1281 gpa_t wall_clock;
b7ebfb05 1282
4d5422ce 1283 bool mwait_in_guest;
caa057a2 1284 bool hlt_in_guest;
b31c114b 1285 bool pause_in_guest;
b5170063 1286 bool cstate_in_guest;
4d5422ce 1287
5550af4d 1288 unsigned long irq_sources_bitmap;
afbcf7ab 1289 s64 kvmclock_offset;
869b4421
PB
1290
1291 /*
1292 * This also protects nr_vcpus_matched_tsc which is read from a
1293 * preemption-disabled region, so it must be a raw spinlock.
1294 */
038f8c11 1295 raw_spinlock_t tsc_write_lock;
f38e098f 1296 u64 last_tsc_nsec;
f38e098f 1297 u64 last_tsc_write;
5d3cb0f6 1298 u32 last_tsc_khz;
828ca896 1299 u64 last_tsc_offset;
e26101b1
ZA
1300 u64 cur_tsc_nsec;
1301 u64 cur_tsc_write;
1302 u64 cur_tsc_offset;
0d3da0d2 1303 u64 cur_tsc_generation;
b48aa97e 1304 int nr_vcpus_matched_tsc;
ffde22ac 1305
ffbb61d0
DW
1306 u32 default_tsc_khz;
1307
869b4421 1308 seqcount_raw_spinlock_t pvclock_sc;
d828199e
MT
1309 bool use_master_clock;
1310 u64 master_kernel_ns;
a5a1d1c2 1311 u64 master_cycle_now;
7e44e449 1312 struct delayed_work kvmclock_update_work;
332967a3 1313 struct delayed_work kvmclock_sync_work;
d828199e 1314
ffde22ac 1315 struct kvm_xen_hvm_config xen_hvm_config;
55cd8e5a 1316
6ef768fa
PB
1317 /* reads protected by irq_srcu, writes by irq_lock */
1318 struct hlist_head mask_notifier_list;
1319
e83d5887 1320 struct kvm_hv hyperv;
a3833b81 1321 struct kvm_xen xen;
b034cf01 1322
a826faf1 1323 bool backwards_tsc_observed;
54750f2c 1324 bool boot_vcpu_runs_old_kvmclock;
d71ba788 1325 u32 bsp_vcpu_id;
90de4a18
NA
1326
1327 u64 disabled_quirks;
49df6397 1328
49776faf 1329 enum kvm_irqchip_mode irqchip_mode;
b053b2ae 1330 u8 nr_reserved_ioapic_pins;
52004014
FW
1331
1332 bool disabled_lapic_found;
44a95dae 1333
37131313 1334 bool x2apic_format;
c519265f 1335 bool x2apic_broadcast_quirk_disabled;
6fbbde9a
DS
1336
1337 bool guest_can_read_msr_platform_info;
59073aaf 1338 bool exception_payload_enabled;
66bb8a06 1339
ed235117
CQ
1340 bool triple_fault_event;
1341
b318e8de 1342 bool bus_lock_detection_enabled;
ba7bb663 1343 bool enable_pmu;
2f4073e0
TX
1344
1345 u32 notify_window;
1346 u32 notify_vmexit_flags;
19238e75
AL
1347 /*
1348 * If exit_on_emulation_error is set, and the in-kernel instruction
1349 * emulator fails to emulate an instruction, allow userspace
1350 * the opportunity to look at it.
1351 */
1352 bool exit_on_emulation_error;
b318e8de 1353
1ae09954
AG
1354 /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
1355 u32 user_space_msr_mask;
b318e8de 1356 struct kvm_x86_msr_filter __rcu *msr_filter;
fe6b6bc8 1357
0dbb1123
AK
1358 u32 hypercall_exit_enabled;
1359
70210c04
SC
1360 /* Guest can access the SGX PROVISIONKEY. */
1361 bool sgx_provisioning_allowed;
1362
14329b82 1363 struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
55c510e2 1364 struct task_struct *nx_huge_page_recovery_thread;
fe5db27d 1365
897218ff 1366#ifdef CONFIG_X86_64
d25ceb92
SC
1367 /* The number of TDP MMU pages across all roots. */
1368 atomic64_t tdp_mmu_pages;
1369
c0dba6e4 1370 /*
1f98f2bd
DM
1371 * List of struct kvm_mmu_pages being used as roots.
1372 * All struct kvm_mmu_pages in the list should have
c0dba6e4 1373 * tdp_mmu_page set.
c0e64238
BG
1374 *
1375 * For reads, this list is protected by:
1376 * the MMU lock in read mode + RCU or
1377 * the MMU lock in write mode
1378 *
1379 * For writes, this list is protected by:
1380 * the MMU lock in read mode + the tdp_mmu_pages_lock or
1381 * the MMU lock in write mode
1382 *
1383 * Roots will remain in the list until their tdp_mmu_root_count
1384 * drops to zero, at which point the thread that decremented the
1385 * count to zero should removed the root from the list and clean
1386 * it up, freeing the root after an RCU grace period.
c0dba6e4 1387 */
02c00b3a 1388 struct list_head tdp_mmu_roots;
c0dba6e4 1389
9a77daac
BG
1390 /*
1391 * Protects accesses to the following fields when the MMU lock
1392 * is held in read mode:
c0e64238 1393 * - tdp_mmu_roots (above)
f96c48e9 1394 * - the link field of kvm_mmu_page structs used by the TDP MMU
55c510e2
SC
1395 * - possible_nx_huge_pages;
1396 * - the possible_nx_huge_page_link field of kvm_mmu_page structs used
9a77daac
BG
1397 * by the TDP MMU
1398 * It is acceptable, but not necessary, to acquire this lock when
1399 * the thread holds the MMU lock in write mode.
1400 */
1401 spinlock_t tdp_mmu_pages_lock;
22b94c4b 1402 struct workqueue_struct *tdp_mmu_zap_wq;
897218ff 1403#endif /* CONFIG_X86_64 */
a2557408
BG
1404
1405 /*
1e76a3ce
DS
1406 * If set, at least one shadow root has been allocated. This flag
1407 * is used as one input when determining whether certain memslot
1408 * related allocations are necessary.
a2557408 1409 */
1e76a3ce 1410 bool shadow_root_allocated;
3c86c0d3
VP
1411
1412#if IS_ENABLED(CONFIG_HYPERV)
1413 hpa_t hv_root_tdp;
1414 spinlock_t hv_root_tdp_lock;
1415#endif
35875316
ZG
1416 /*
1417 * VM-scope maximum vCPU ID. Used to determine the size of structures
1418 * that increase along with the maximum vCPU ID, in which case, using
1419 * the global KVM_MAX_VCPU_IDS may lead to significant memory waste.
1420 */
1421 u32 max_vcpu_ids;
084cc29f
BG
1422
1423 bool disable_nx_huge_pages;
ada51a9d
DM
1424
1425 /*
1426 * Memory caches used to allocate shadow pages when performing eager
1427 * page splitting. No need for a shadowed_info_cache since eager page
1428 * splitting only allocates direct shadow pages.
1429 *
1430 * Protected by kvm->slots_lock.
1431 */
1432 struct kvm_mmu_memory_cache split_shadow_page_cache;
1433 struct kvm_mmu_memory_cache split_page_header_cache;
1434
1435 /*
1436 * Memory cache used to allocate pte_list_desc structs while splitting
1437 * huge pages. In the worst case, to split one huge page, 512
1438 * pte_list_desc structs are needed to add each lower level leaf sptep
1439 * to the rmap plus 1 to extend the parent_ptes rmap of the lower level
1440 * page table.
1441 *
1442 * Protected by kvm->slots_lock.
1443 */
1444#define SPLIT_DESC_CACHE_MIN_NR_OBJECTS (SPTE_ENT_PER_PAGE + 1)
1445 struct kvm_mmu_memory_cache split_desc_cache;
d69fb81f
ZX
1446};
1447
0711456c 1448struct kvm_vm_stat {
0193cc90 1449 struct kvm_vm_stat_generic generic;
e3cb6fa0
PB
1450 u64 mmu_shadow_zapped;
1451 u64 mmu_pte_write;
1452 u64 mmu_pde_zapped;
1453 u64 mmu_flooded;
1454 u64 mmu_recycled;
1455 u64 mmu_cache_miss;
1456 u64 mmu_unsync;
71f51d2c
MZ
1457 union {
1458 struct {
1459 atomic64_t pages_4k;
1460 atomic64_t pages_2m;
1461 atomic64_t pages_1g;
1462 };
1463 atomic64_t pages[KVM_NR_PAGE_SIZES];
1464 };
e3cb6fa0
PB
1465 u64 nx_lpage_splits;
1466 u64 max_mmu_page_hash_collisions;
ec1cf69c 1467 u64 max_mmu_rmap_size;
0711456c
ZX
1468};
1469
77b4c255 1470struct kvm_vcpu_stat {
0193cc90 1471 struct kvm_vcpu_stat_generic generic;
1075d41e 1472 u64 pf_taken;
8a7e75d4 1473 u64 pf_fixed;
1075d41e
SC
1474 u64 pf_emulate;
1475 u64 pf_spurious;
1476 u64 pf_fast;
1477 u64 pf_mmio_spte_created;
8a7e75d4
SJS
1478 u64 pf_guest;
1479 u64 tlb_flush;
1480 u64 invlpg;
1481
1482 u64 exits;
1483 u64 io_exits;
1484 u64 mmio_exits;
1485 u64 signal_exits;
1486 u64 irq_window_exits;
1487 u64 nmi_window_exits;
c595ceee 1488 u64 l1d_flush;
8a7e75d4 1489 u64 halt_exits;
8a7e75d4
SJS
1490 u64 request_irq_exits;
1491 u64 irq_exits;
1492 u64 host_state_reload;
8a7e75d4
SJS
1493 u64 fpu_reload;
1494 u64 insn_emulation;
1495 u64 insn_emulation_fail;
1496 u64 hypercalls;
1497 u64 irq_injections;
1498 u64 nmi_injections;
0f1e261e 1499 u64 req_event;
43c11d91 1500 u64 nested_run;
4a7132ef
WL
1501 u64 directed_yield_attempted;
1502 u64 directed_yield_successful;
6cd88243
PB
1503 u64 preemption_reported;
1504 u64 preemption_other;
d5a0483f 1505 u64 guest_mode;
2f4073e0 1506 u64 notify_window_exits;
77b4c255 1507};
ad312c7c 1508
8a76d7f2
JR
1509struct x86_instruction_info;
1510
8fe8ab46
WA
1511struct msr_data {
1512 bool host_initiated;
1513 u32 index;
1514 u64 data;
1515};
1516
cb5281a5
PB
1517struct kvm_lapic_irq {
1518 u32 vector;
b7cb2231
PB
1519 u16 delivery_mode;
1520 u16 dest_mode;
1521 bool level;
1522 u16 trig_mode;
cb5281a5
PB
1523 u32 shorthand;
1524 u32 dest_id;
93bbf0b8 1525 bool msi_redir_hint;
cb5281a5
PB
1526};
1527
c96001c5
PX
1528static inline u16 kvm_lapic_irq_dest_mode(bool dest_mode_logical)
1529{
1530 return dest_mode_logical ? APIC_DEST_LOGICAL : APIC_DEST_PHYSICAL;
1531}
1532
ea4a5ff8 1533struct kvm_x86_ops {
9dadfc4a
SC
1534 const char *name;
1535
d83420c2
SC
1536 int (*check_processor_compatibility)(void);
1537
13a34e06
RK
1538 int (*hardware_enable)(void);
1539 void (*hardware_disable)(void);
6e4fd06f 1540 void (*hardware_unsetup)(void);
5719455f 1541 bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
7c1b761b 1542 void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
ea4a5ff8 1543
562b6b08 1544 unsigned int vm_size;
03543133
SS
1545 int (*vm_init)(struct kvm *kvm);
1546 void (*vm_destroy)(struct kvm *kvm);
1547
ea4a5ff8 1548 /* Create, but do not attach this VCPU */
d588bb9b 1549 int (*vcpu_precreate)(struct kvm *kvm);
987b2594 1550 int (*vcpu_create)(struct kvm_vcpu *vcpu);
ea4a5ff8 1551 void (*vcpu_free)(struct kvm_vcpu *vcpu);
d28bc9dd 1552 void (*vcpu_reset)(struct kvm_vcpu *vcpu, bool init_event);
ea4a5ff8 1553
e27bc044 1554 void (*prepare_switch_to_guest)(struct kvm_vcpu *vcpu);
ea4a5ff8
ZX
1555 void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
1556 void (*vcpu_put)(struct kvm_vcpu *vcpu);
ea4a5ff8 1557
6986982f 1558 void (*update_exception_bitmap)(struct kvm_vcpu *vcpu);
609e36d3 1559 int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
8fe8ab46 1560 int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr);
ea4a5ff8
ZX
1561 u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
1562 void (*get_segment)(struct kvm_vcpu *vcpu,
1563 struct kvm_segment *var, int seg);
2e4d2653 1564 int (*get_cpl)(struct kvm_vcpu *vcpu);
ea4a5ff8
ZX
1565 void (*set_segment)(struct kvm_vcpu *vcpu,
1566 struct kvm_segment *var, int seg);
1567 void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
ea4a5ff8 1568 void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
405329fc 1569 void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
c2fe3cd4
SC
1570 bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
1571 void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
72f211ec 1572 int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
89a27f4d
GN
1573 void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
1574 void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
1575 void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
1576 void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
c77fb5fe 1577 void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu);
020df079 1578 void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
5fdbf976 1579 void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
ea4a5ff8
ZX
1580 unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
1581 void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
c5063551 1582 bool (*get_if_flag)(struct kvm_vcpu *vcpu);
ea4a5ff8 1583
e27bc044
SC
1584 void (*flush_tlb_all)(struct kvm_vcpu *vcpu);
1585 void (*flush_tlb_current)(struct kvm_vcpu *vcpu);
8a1300ff 1586 int (*flush_remote_tlbs)(struct kvm *kvm);
9ed3bf41
SC
1587 int (*flush_remote_tlbs_range)(struct kvm *kvm, gfn_t gfn,
1588 gfn_t nr_pages);
ea4a5ff8 1589
faff8758
JS
1590 /*
1591 * Flush any TLB entries associated with the given GVA.
1592 * Does not need to flush GPA->HPA mappings.
1593 * Can potentially get non-canonical addresses through INVLPGs, which
1594 * the implementation may choose to ignore if appropriate.
1595 */
e27bc044 1596 void (*flush_tlb_gva)(struct kvm_vcpu *vcpu, gva_t addr);
ea4a5ff8 1597
e64419d9
SC
1598 /*
1599 * Flush any TLB entries created by the guest. Like tlb_flush_gva(),
1600 * does not need to flush GPA->HPA mappings.
1601 */
e27bc044 1602 void (*flush_tlb_guest)(struct kvm_vcpu *vcpu);
e64419d9 1603
fc4fad79 1604 int (*vcpu_pre_run)(struct kvm_vcpu *vcpu);
e27bc044 1605 enum exit_fastpath_completion (*vcpu_run)(struct kvm_vcpu *vcpu);
1e9e2622
WL
1606 int (*handle_exit)(struct kvm_vcpu *vcpu,
1607 enum exit_fastpath_completion exit_fastpath);
f8ea7c60 1608 int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
5ef8acbd 1609 void (*update_emulated_instruction)(struct kvm_vcpu *vcpu);
2809f5d2 1610 void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
37ccdcbe 1611 u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu);
ea4a5ff8
ZX
1612 void (*patch_hypercall)(struct kvm_vcpu *vcpu,
1613 unsigned char *hypercall_addr);
2d613912 1614 void (*inject_irq)(struct kvm_vcpu *vcpu, bool reinjected);
e27bc044 1615 void (*inject_nmi)(struct kvm_vcpu *vcpu);
6ad75c5c 1616 void (*inject_exception)(struct kvm_vcpu *vcpu);
b463a6f7 1617 void (*cancel_injection)(struct kvm_vcpu *vcpu);
c9d40913
PB
1618 int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
1619 int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
3cfc3092
JK
1620 bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
1621 void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
c9a7953f
JK
1622 void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
1623 void (*enable_irq_window)(struct kvm_vcpu *vcpu);
95ba8273 1624 void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
7491b7b2 1625 bool (*check_apicv_inhibit_reasons)(enum kvm_apicv_inhibit reason);
b3f257a8 1626 const unsigned long required_apicv_inhibits;
2008fab3 1627 bool allow_apicv_in_x2apic_without_x2apic_virtualization;
d62caabb 1628 void (*refresh_apicv_exec_ctrl)(struct kvm_vcpu *vcpu);
c7c9c56c 1629 void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr);
d39850f5 1630 void (*hwapic_isr_update)(int isr);
e6c67d8c 1631 bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu);
6308630b 1632 void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
8d860bbe 1633 void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu);
a4148b7c 1634 void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu);
57dfd7b5
SC
1635 void (*deliver_interrupt)(struct kvm_lapic *apic, int delivery_mode,
1636 int trig_mode, int vector);
76dfafd5 1637 int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
ea4a5ff8 1638 int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
2ac52ab8 1639 int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
ba28401b 1640 u8 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
344f414f 1641
e83bc09c
SC
1642 void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
1643 int root_level);
727a7e27 1644
f5f48ee1
SY
1645 bool (*has_wbinvd_exit)(void);
1646
307a94c7
IS
1647 u64 (*get_l2_tsc_offset)(struct kvm_vcpu *vcpu);
1648 u64 (*get_l2_tsc_multiplier)(struct kvm_vcpu *vcpu);
edcfe540 1649 void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
1ab9287a 1650 void (*write_tsc_multiplier)(struct kvm_vcpu *vcpu, u64 multiplier);
99e3e30a 1651
235ba74f 1652 /*
0a62a031
DE
1653 * Retrieve somewhat arbitrary exit information. Intended to
1654 * be used only from within tracepoints or error paths.
235ba74f 1655 */
0a62a031
DE
1656 void (*get_exit_info)(struct kvm_vcpu *vcpu, u32 *reason,
1657 u64 *info1, u64 *info2,
235ba74f 1658 u32 *exit_int_info, u32 *exit_int_info_err_code);
8a76d7f2
JR
1659
1660 int (*check_intercept)(struct kvm_vcpu *vcpu,
1661 struct x86_instruction_info *info,
21f1b8f2
SC
1662 enum x86_intercept_stage stage,
1663 struct x86_exception *exception);
a9ab13ff 1664 void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
7f5581f5 1665
d264ee0c 1666 void (*request_immediate_exit)(struct kvm_vcpu *vcpu);
ae97a3b8
RK
1667
1668 void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
88178fd4
KH
1669
1670 /*
a018eba5
SC
1671 * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero
1672 * value indicates CPU dirty logging is unsupported or disabled.
88178fd4 1673 */
6dd03800 1674 int cpu_dirty_log_size;
a85863c2 1675 void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
bab4165e 1676
33b22172 1677 const struct kvm_x86_nested_ops *nested_ops;
efc64404 1678
d1ed092f
SS
1679 void (*vcpu_blocking)(struct kvm_vcpu *vcpu);
1680 void (*vcpu_unblocking)(struct kvm_vcpu *vcpu);
1681
e27bc044 1682 int (*pi_update_irte)(struct kvm *kvm, unsigned int host_irq,
efc64404 1683 uint32_t guest_irq, bool set);
e27bc044 1684 void (*pi_start_assignment)(struct kvm *kvm);
be8ca170 1685 void (*apicv_post_state_restore)(struct kvm_vcpu *vcpu);
17e433b5 1686 bool (*dy_apicv_has_pending_interrupt)(struct kvm_vcpu *vcpu);
ce7a058a 1687
f9927982
SC
1688 int (*set_hv_timer)(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
1689 bool *expired);
ce7a058a 1690 void (*cancel_hv_timer)(struct kvm_vcpu *vcpu);
c45dcc71
AR
1691
1692 void (*setup_mce)(struct kvm_vcpu *vcpu);
0234bf88 1693
31e83e21 1694#ifdef CONFIG_KVM_SMM
c9d40913 1695 int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
58c1d206
ML
1696 int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
1697 int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
c9d40913 1698 void (*enable_smi_window)(struct kvm_vcpu *vcpu);
31e83e21 1699#endif
5acc5c06 1700
03d004cd
SC
1701 int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
1702 int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
1703 int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
54526d1f 1704 int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
b5663931 1705 int (*vm_move_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
683412cc 1706 void (*guest_memory_reclaimed)(struct kvm *kvm);
801e459a
TL
1707
1708 int (*get_msr_feature)(struct kvm_msr_entry *entry);
57b119da 1709
4d31d9ef
SC
1710 bool (*can_emulate_instruction)(struct kvm_vcpu *vcpu, int emul_type,
1711 void *insn, int insn_len);
4b9852f4
LA
1712
1713 bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
b83237ad 1714 int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
93dff2fe
JM
1715
1716 void (*migrate_timers)(struct kvm_vcpu *vcpu);
51de8151 1717 void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
f9a4d621 1718 int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err);
647daca2
TL
1719
1720 void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector);
d5fa597e
ML
1721
1722 /*
1723 * Returns vCPU specific APICv inhibit reasons
1724 */
1725 unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
ea4a5ff8
ZX
1726};
1727
33b22172 1728struct kvm_x86_nested_ops {
f7e57078 1729 void (*leave_nested)(struct kvm_vcpu *vcpu);
7709aba8
SC
1730 bool (*is_exception_vmexit)(struct kvm_vcpu *vcpu, u8 vector,
1731 u32 error_code);
33b22172 1732 int (*check_events)(struct kvm_vcpu *vcpu);
5b4ac1a1 1733 bool (*has_events)(struct kvm_vcpu *vcpu);
cb6a32c2 1734 void (*triple_fault)(struct kvm_vcpu *vcpu);
33b22172
PB
1735 int (*get_state)(struct kvm_vcpu *vcpu,
1736 struct kvm_nested_state __user *user_kvm_nested_state,
1737 unsigned user_data_size);
1738 int (*set_state)(struct kvm_vcpu *vcpu,
1739 struct kvm_nested_state __user *user_kvm_nested_state,
1740 struct kvm_nested_state *kvm_state);
729c15c2 1741 bool (*get_nested_state_pages)(struct kvm_vcpu *vcpu);
02f5fb2e 1742 int (*write_log_dirty)(struct kvm_vcpu *vcpu, gpa_t l2_gpa);
33b22172
PB
1743
1744 int (*enable_evmcs)(struct kvm_vcpu *vcpu,
1745 uint16_t *vmcs_version);
1746 uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
b0c9c25e 1747 void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu);
ea4a5ff8
ZX
1748};
1749
d008dfdb 1750struct kvm_x86_init_ops {
d008dfdb 1751 int (*hardware_setup)(void);
33271a9e 1752 unsigned int (*handle_intel_pt_intr)(void);
d008dfdb
SC
1753
1754 struct kvm_x86_ops *runtime_ops;
34886e79 1755 struct kvm_pmu_ops *pmu_ops;
d008dfdb
SC
1756};
1757
af585b92 1758struct kvm_arch_async_pf {
7c90705b 1759 u32 token;
af585b92 1760 gfn_t gfn;
fb67e14f 1761 unsigned long cr3;
c4806acd 1762 bool direct_map;
af585b92
GN
1763};
1764
9cc39a5a 1765extern u32 __read_mostly kvm_nr_uret_msrs;
91661989 1766extern u64 __read_mostly host_efer;
3edd6839 1767extern bool __read_mostly allow_smaller_maxphyaddr;
fdf513e3 1768extern bool __read_mostly enable_apicv;
afaf0b2f 1769extern struct kvm_x86_ops kvm_x86_ops;
97896d04 1770
9af5471b
JB
1771#define KVM_X86_OP(func) \
1772 DECLARE_STATIC_CALL(kvm_x86_##func, *(((struct kvm_x86_ops *)0)->func));
e4fc23ba 1773#define KVM_X86_OP_OPTIONAL KVM_X86_OP
5be2226f 1774#define KVM_X86_OP_OPTIONAL_RET0 KVM_X86_OP
9af5471b
JB
1775#include <asm/kvm-x86-ops.h>
1776
4f8396b9
SC
1777int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops);
1778void kvm_x86_vendor_exit(void);
1779
434a1e94
SC
1780#define __KVM_HAVE_ARCH_VM_ALLOC
1781static inline struct kvm *kvm_arch_alloc_vm(void)
1782{
88dca4ca 1783 return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
434a1e94 1784}
78b497f2
JG
1785
1786#define __KVM_HAVE_ARCH_VM_FREE
562b6b08 1787void kvm_arch_free_vm(struct kvm *kvm);
434a1e94 1788
b08660e5
TL
1789#define __KVM_HAVE_ARCH_FLUSH_REMOTE_TLB
1790static inline int kvm_arch_flush_remote_tlb(struct kvm *kvm)
1791{
8a1300ff
SC
1792 if (kvm_x86_ops.flush_remote_tlbs &&
1793 !static_call(kvm_x86_flush_remote_tlbs)(kvm))
b08660e5
TL
1794 return 0;
1795 else
1796 return -ENOTSUPP;
1797}
1798
e1bfc245
SC
1799#define kvm_arch_pmi_in_guest(vcpu) \
1800 ((vcpu) && (vcpu)->arch.handling_intr_from_guest)
1801
982bae43 1802void __init kvm_mmu_x86_module_init(void);
1d0e8480
SC
1803int kvm_mmu_vendor_module_init(void);
1804void kvm_mmu_vendor_module_exit(void);
54f1585a
ZX
1805
1806void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
1807int kvm_mmu_create(struct kvm_vcpu *vcpu);
a1a39128 1808int kvm_mmu_init_vm(struct kvm *kvm);
13d268ca 1809void kvm_mmu_uninit_vm(struct kvm *kvm);
54f1585a 1810
49c6f875 1811void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu);
8a3c1a33 1812void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
1c91cad4 1813void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
269e9552 1814 const struct kvm_memory_slot *memslot,
3c9bd400 1815 int start_level);
a3fe5dbd
DM
1816void kvm_mmu_slot_try_split_huge_pages(struct kvm *kvm,
1817 const struct kvm_memory_slot *memslot,
1818 int target_level);
cb00a70b
DM
1819void kvm_mmu_try_split_huge_pages(struct kvm *kvm,
1820 const struct kvm_memory_slot *memslot,
1821 u64 start, u64 end,
1822 int target_level);
3ea3b7fa 1823void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
f36f3f28 1824 const struct kvm_memory_slot *memslot);
f4b4b180 1825void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
269e9552 1826 const struct kvm_memory_slot *memslot);
54f1585a 1827void kvm_mmu_zap_all(struct kvm *kvm);
15248258 1828void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
bc8a3d89 1829void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned long kvm_nr_mmu_pages);
54f1585a 1830
2df4a5eb 1831int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
cc4b6871 1832
3200f405 1833int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
9f811285 1834 const void *val, int bytes);
2f333bcb 1835
6ef768fa
PB
1836struct kvm_irq_mask_notifier {
1837 void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
1838 int irq;
1839 struct hlist_node link;
1840};
1841
1842void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
1843 struct kvm_irq_mask_notifier *kimn);
1844void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
1845 struct kvm_irq_mask_notifier *kimn);
1846void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
1847 bool mask);
1848
2f333bcb 1849extern bool tdp_enabled;
9f811285 1850
a3e06bbe
LJ
1851u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
1852
41577ab8
SC
1853/*
1854 * EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
1855 * userspace I/O) to indicate that the emulation context
d9f6e12f 1856 * should be reused as is, i.e. skip initialization of
41577ab8
SC
1857 * emulation context, instruction fetch and decode.
1858 *
1859 * EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
1860 * Indicates that only select instructions (tagged with
1861 * EmulateOnUD) should be emulated (to minimize the emulator
1862 * attack surface). See also EMULTYPE_TRAP_UD_FORCED.
1863 *
1864 * EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
1865 * decode the instruction length. For use *only* by
906fa904
HW
1866 * kvm_x86_ops.skip_emulated_instruction() implementations if
1867 * EMULTYPE_COMPLETE_USER_EXIT is not set.
41577ab8 1868 *
92daa48b
SC
1869 * EMULTYPE_ALLOW_RETRY_PF - Set when the emulator should resume the guest to
1870 * retry native execution under certain conditions,
1871 * Can only be set in conjunction with EMULTYPE_PF.
41577ab8
SC
1872 *
1873 * EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
1874 * triggered by KVM's magic "force emulation" prefix,
1875 * which is opt in via module param (off by default).
1876 * Bypasses EmulateOnUD restriction despite emulating
1877 * due to an intercepted #UD (see EMULTYPE_TRAP_UD).
1878 * Used to test the full emulator from userspace.
1879 *
1880 * EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
1881 * backdoor emulation, which is opt in via module param.
d9f6e12f 1882 * VMware backdoor emulation handles select instructions
41577ab8 1883 * and reinjects the #GP for all other cases.
92daa48b
SC
1884 *
1885 * EMULTYPE_PF - Set when emulating MMIO by way of an intercepted #PF, in which
1886 * case the CR2/GPA value pass on the stack is valid.
906fa904
HW
1887 *
1888 * EMULTYPE_COMPLETE_USER_EXIT - Set when the emulator should update interruptibility
1889 * state and inject single-step #DBs after skipping
1890 * an instruction (after completing userspace I/O).
258d985f
SC
1891 *
1892 * EMULTYPE_WRITE_PF_TO_SP - Set when emulating an intercepted page fault that
1893 * is attempting to write a gfn that contains one or
1894 * more of the PTEs used to translate the write itself,
1895 * and the owning page table is being shadowed by KVM.
1896 * If emulation of the faulting instruction fails and
1897 * this flag is set, KVM will exit to userspace instead
1898 * of retrying emulation as KVM cannot make forward
1899 * progress.
1900 *
1901 * If emulation fails for a write to guest page tables,
1902 * KVM unprotects (zaps) the shadow page for the target
1903 * gfn and resumes the guest to retry the non-emulatable
1904 * instruction (on hardware). Unprotecting the gfn
1905 * doesn't allow forward progress for a self-changing
1906 * access because doing so also zaps the translation for
1907 * the gfn, i.e. retrying the instruction will hit a
1908 * !PRESENT fault, which results in a new shadow page
1909 * and sends KVM back to square one.
41577ab8 1910 */
571008da
SY
1911#define EMULTYPE_NO_DECODE (1 << 0)
1912#define EMULTYPE_TRAP_UD (1 << 1)
ba8afb6b 1913#define EMULTYPE_SKIP (1 << 2)
92daa48b 1914#define EMULTYPE_ALLOW_RETRY_PF (1 << 3)
b4000606 1915#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
42cbf068 1916#define EMULTYPE_VMWARE_GP (1 << 5)
92daa48b 1917#define EMULTYPE_PF (1 << 6)
906fa904 1918#define EMULTYPE_COMPLETE_USER_EXIT (1 << 7)
258d985f 1919#define EMULTYPE_WRITE_PF_TO_SP (1 << 8)
92daa48b 1920
c60658d1
SC
1921int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
1922int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
1923 void *insn, int insn_len);
e615e355
DE
1924void __kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu,
1925 u64 *data, u8 ndata);
1926void kvm_prepare_emulation_failure_exit(struct kvm_vcpu *vcpu);
35be0ade 1927
f2b4b7dd 1928void kvm_enable_efer_bits(u64);
384bb783 1929bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
edef5c36 1930int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, bool host_initiated);
f20935d8
SC
1931int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data);
1932int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
1edce0a9
SC
1933int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
1934int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
5ff3a351
SC
1935int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
1936int kvm_emulate_invd(struct kvm_vcpu *vcpu);
1937int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
1938int kvm_handle_invalid_op(struct kvm_vcpu *vcpu);
1939int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
54f1585a 1940
dca7f128 1941int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
6a908b62 1942int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
54f1585a 1943int kvm_emulate_halt(struct kvm_vcpu *vcpu);
1460179d 1944int kvm_emulate_halt_noskip(struct kvm_vcpu *vcpu);
647daca2 1945int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
f5f48ee1 1946int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
54f1585a 1947
3e6e0aab 1948void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
c53da4f3 1949void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
c697518a 1950int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
2b4a273b 1951void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
3e6e0aab 1952
7f3d35fd
KW
1953int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
1954 int reason, bool has_error_code, u32 error_code);
37817f29 1955
f27ad38a 1956void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0);
5b51cb13 1957void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4);
49a9b07e 1958int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
2390218b 1959int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3);
a83b29c6 1960int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
eea1cff9 1961int kvm_set_cr8(struct kvm_vcpu *vcpu, unsigned long cr8);
020df079 1962int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val);
29d6ca41 1963void kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val);
2d3ad1f4
AK
1964unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
1965void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
92f9895c 1966int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
54f1585a 1967
609e36d3 1968int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
8fe8ab46 1969int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
54f1585a 1970
91586a3b
JK
1971unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
1972void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
c483c454 1973int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
91586a3b 1974
298101da
AK
1975void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
1976void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
4d5523cf 1977void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long payload);
ce7ddec4
JR
1978void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr);
1979void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
6389ee94 1980void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault);
7709aba8 1981void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
53b3d8e9 1982 struct x86_exception *fault);
0a79b009 1983bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
16f8a6f9 1984bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
298101da 1985
1a577b72
MT
1986static inline int __kvm_irq_line_state(unsigned long *irq_state,
1987 int irq_source_id, int level)
1988{
1989 /* Logical OR for level trig interrupt */
1990 if (level)
1991 __set_bit(irq_source_id, irq_state);
1992 else
1993 __clear_bit(irq_source_id, irq_state);
1994
1995 return !!(*irq_state);
1996}
1997
1998int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
1999void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);
3de42dc0 2000
3419ffc8
SY
2001void kvm_inject_nmi(struct kvm_vcpu *vcpu);
2002
7c86663b
PB
2003void kvm_update_dr7(struct kvm_vcpu *vcpu);
2004
1cb3f3ae 2005int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
0c1c92f1 2006void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
6a82cd1c 2007 ulong roots_to_free);
0c1c92f1 2008void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
ab9ae313
AK
2009gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
2010 struct x86_exception *exception);
ab9ae313
AK
2011gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
2012 struct x86_exception *exception);
2013gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
2014 struct x86_exception *exception);
54f1585a 2015
4e19c36f 2016bool kvm_apicv_activated(struct kvm *kvm);
d5fa597e 2017bool kvm_vcpu_apicv_activated(struct kvm_vcpu *vcpu);
2008fab3 2018void __kvm_vcpu_update_apicv(struct kvm_vcpu *vcpu);
320af55a
SC
2019void __kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
2020 enum kvm_apicv_inhibit reason, bool set);
2021void kvm_set_or_clear_apicv_inhibit(struct kvm *kvm,
2022 enum kvm_apicv_inhibit reason, bool set);
2023
2024static inline void kvm_set_apicv_inhibit(struct kvm *kvm,
2025 enum kvm_apicv_inhibit reason)
2026{
2027 kvm_set_or_clear_apicv_inhibit(kvm, reason, true);
2028}
d62caabb 2029
320af55a
SC
2030static inline void kvm_clear_apicv_inhibit(struct kvm *kvm,
2031 enum kvm_apicv_inhibit reason)
2032{
2033 kvm_set_or_clear_apicv_inhibit(kvm, reason, false);
2034}
b0a1637f 2035
54f1585a
ZX
2036int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
2037
736c291c 2038int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
dc25e89e 2039 void *insn, int insn_len);
a7052897 2040void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva);
753b43c9 2041void kvm_mmu_invalidate_addr(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
cd42853e 2042 u64 addr, unsigned long roots);
eb4b248e 2043void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid);
b5129100 2044void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd);
34c16eec 2045
746700d2
WH
2046void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level,
2047 int tdp_max_root_level, int tdp_huge_page_level);
18552672 2048
d6e88aec 2049static inline u16 kvm_read_ldt(void)
ec6d273d
ZX
2050{
2051 u16 ldt;
2052 asm("sldt %0" : "=g"(ldt));
2053 return ldt;
2054}
2055
d6e88aec 2056static inline void kvm_load_ldt(u16 sel)
ec6d273d
ZX
2057{
2058 asm("lldt %0" : : "rm"(sel));
2059}
ec6d273d 2060
ec6d273d
ZX
2061#ifdef CONFIG_X86_64
2062static inline unsigned long read_msr(unsigned long msr)
2063{
2064 u64 value;
2065
2066 rdmsrl(msr, value);
2067 return value;
2068}
2069#endif
2070
c1a5d4f9
AK
2071static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
2072{
2073 kvm_queue_exception_e(vcpu, GP_VECTOR, error_code);
2074}
2075
ec6d273d
ZX
2076#define TSS_IOPB_BASE_OFFSET 0x66
2077#define TSS_BASE_SIZE 0x68
2078#define TSS_IOPB_SIZE (65536 / 8)
2079#define TSS_REDIRECTION_SIZE (256 / 8)
7d76b4d3
JP
2080#define RMODE_TSS_SIZE \
2081 (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 1)
53e0aa7b 2082
37817f29
IE
2083enum {
2084 TASK_SWITCH_CALL = 0,
2085 TASK_SWITCH_IRET = 1,
2086 TASK_SWITCH_JMP = 2,
2087 TASK_SWITCH_GATE = 3,
2088};
2089
32e69f23 2090#define HF_GUEST_MASK (1 << 0) /* VCPU is in guest-mode */
a7662aa5
PB
2091
2092#ifdef CONFIG_KVM_SMM
32e69f23
ML
2093#define HF_SMM_MASK (1 << 1)
2094#define HF_SMM_INSIDE_NMI_MASK (1 << 2)
1371d904 2095
ba97bb07
PB
2096# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
2097# define KVM_ADDRESS_SPACE_NUM 2
2098# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
2099# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
2100#else
2101# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
2102#endif
1371d904 2103
e930bffe 2104#define KVM_ARCH_WANT_MMU_NOTIFIER
5f7c292b 2105
c7c9c56c 2106int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
a1b37100 2107int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
71cc849b 2108int kvm_cpu_has_extint(struct kvm_vcpu *v);
a1b37100 2109int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
0b71785d 2110int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
d28bc9dd 2111void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
e930bffe 2112
4180bf1b 2113int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
bdf7ffc8 2114 unsigned long ipi_bitmap_high, u32 min,
4180bf1b
WL
2115 unsigned long icr, int op_64_bit);
2116
e5fda4bb 2117int kvm_add_user_return_msr(u32 msr);
8ea8b8d6 2118int kvm_find_user_return_msr(u32 msr);
7e34fbd0 2119int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
18863bdd 2120
61a05d44
SC
2121static inline bool kvm_is_supported_user_return_msr(u32 msr)
2122{
2123 return kvm_find_user_return_msr(msr) >= 0;
2124}
2125
62711e5a 2126u64 kvm_scale_tsc(u64 tsc, u64 ratio);
4ba76538 2127u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
83150f29
IS
2128u64 kvm_calc_nested_tsc_offset(u64 l1_offset, u64 l2_offset, u64 l2_multiplier);
2129u64 kvm_calc_nested_tsc_multiplier(u64 l1_multiplier, u64 l2_multiplier);
35181e86 2130
82b32774 2131unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
f92653ee
JK
2132bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
2133
2860c4b1 2134void kvm_make_scan_ioapic_request(struct kvm *kvm);
7ee30bc1
NNL
2135void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
2136 unsigned long *vcpu_bitmap);
2860c4b1 2137
2a18b7e7 2138bool kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
af585b92
GN
2139 struct kvm_async_pf *work);
2140void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
2141 struct kvm_async_pf *work);
56028d08
GN
2142void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
2143 struct kvm_async_pf *work);
557a961a 2144void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu);
7c0ade6c 2145bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu);
af585b92
GN
2146extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
2147
6affcbed
KH
2148int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
2149int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
d264ee0c 2150void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);
db8fcefa 2151
ff5a983c
PX
2152void __user *__x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
2153 u32 size);
d71ba788
PB
2154bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu);
2155bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu);
f5132b01 2156
8feb4a04
FW
2157bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
2158 struct kvm_vcpu **dest_vcpu);
2159
37131313 2160void kvm_set_msi_irq(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
d84f1e07 2161 struct kvm_lapic_irq *irq);
197a4f4b 2162
fdcf7562
AG
2163static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq)
2164{
2165 /* We can only post Fixed and LowPrio IRQs */
637543a8
SS
2166 return (irq->delivery_mode == APIC_DM_FIXED ||
2167 irq->delivery_mode == APIC_DM_LOWEST);
fdcf7562
AG
2168}
2169
d1ed092f
SS
2170static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
2171{
b3646477 2172 static_call_cond(kvm_x86_vcpu_blocking)(vcpu);
d1ed092f
SS
2173}
2174
2175static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
2176{
b3646477 2177 static_call_cond(kvm_x86_vcpu_unblocking)(vcpu);
d1ed092f
SS
2178}
2179
7d669f50
SS
2180static inline int kvm_cpu_get_apicid(int mps_cpu)
2181{
2182#ifdef CONFIG_X86_LOCAL_APIC
64063505 2183 return default_cpu_present_to_apicid(mps_cpu);
7d669f50
SS
2184#else
2185 WARN_ON_ONCE(1);
2186 return BAD_APICID;
2187#endif
2188}
2189
1e76a3ce 2190int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
d501f747 2191
c68dc1b5
OU
2192#define KVM_CLOCK_VALID_FLAGS \
2193 (KVM_CLOCK_TSC_STABLE | KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
d501f747 2194
6d849191
OU
2195#define KVM_X86_VALID_QUIRKS \
2196 (KVM_X86_QUIRK_LINT0_REENABLED | \
2197 KVM_X86_QUIRK_CD_NW_CLEARED | \
2198 KVM_X86_QUIRK_LAPIC_MMIO_HOLE | \
2199 KVM_X86_QUIRK_OUT_7E_INC_RIP | \
f1a9761f 2200 KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
bfbcc81b 2201 KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
43bb9e00 2202 KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS)
6d849191 2203
1965aae3 2204#endif /* _ASM_X86_KVM_HOST_H */