* RW: 64bit by default, can be overriden for 32bit VMs
* TAC: Trap ACTLR
* TSC: Trap SMC
+ * TVM: Trap VM ops (until M+C set in SCTLR_EL1)
* TSW: Trap cache operations by set/way
* TWE: Trap WFE
* TWI: Trap WFI
* SWIO: Turn set/way invalidates into set/way clean+invalidate
*/
#define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \
- HCR_BSU_IS | HCR_FB | HCR_TAC | \
+ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \
HCR_AMO | HCR_IMO | HCR_FMO | \
HCR_SWIO | HCR_TIDCP | HCR_RW)
#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF)
/* VTCR_EL2 Registers bits */
#define VTCR_EL2_PS_MASK (7 << 16)
-#define VTCR_EL2_PS_40B (2 << 16)
#define VTCR_EL2_TG0_MASK (1 << 14)
#define VTCR_EL2_TG0_4K (0 << 14)
#define VTCR_EL2_TG0_64K (1 << 14)
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_64K | \
- VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
- VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
- VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
+ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+ VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
#define VTTBR_X (38 - VTCR_EL2_T0SZ_40B)
#else
/*
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
*/
-#define VTCR_EL2_FLAGS (VTCR_EL2_PS_40B | VTCR_EL2_TG0_4K | \
- VTCR_EL2_SH0_INNER | VTCR_EL2_ORGN0_WBWA | \
- VTCR_EL2_IRGN0_WBWA | VTCR_EL2_SL0_LVL1 | \
- VTCR_EL2_T0SZ_40B)
+#define VTCR_EL2_FLAGS (VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
+ VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
+ VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B)
#define VTTBR_X (37 - VTCR_EL2_T0SZ_40B)
#endif
#define SPRN_ACOP 0x1F /* Available Coprocessor Register */
#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
#define SPRN_TEXASR 0x82 /* Transaction EXception & Summary */
+ #define TEXASR_FS __MASK(63-36) /* Transaction Failure Summary */
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
#define SPRN_CTRLF 0x088
#define SPRN_SPRG3 0x113 /* Special Purpose Register General 3 */
#define SPRN_USPRG3 0x103 /* SPRG3 userspace read */
#define SPRN_SPRG4 0x114 /* Special Purpose Register General 4 */
+#define SPRN_USPRG4 0x104 /* SPRG4 userspace read */
#define SPRN_SPRG5 0x115 /* Special Purpose Register General 5 */
+#define SPRN_USPRG5 0x105 /* SPRG5 userspace read */
#define SPRN_SPRG6 0x116 /* Special Purpose Register General 6 */
+#define SPRN_USPRG6 0x106 /* SPRG6 userspace read */
#define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */
+#define SPRN_USPRG7 0x107 /* SPRG7 userspace read */
#define SPRN_SRR0 0x01A /* Save/Restore Register 0 */
#define SPRN_SRR1 0x01B /* Save/Restore Register 1 */
#define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */
#define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */
#define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */
#define MMCR0_TBEE 0x00400000UL /* time base exception enable */
+#define MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */
#define MMCR0_EBE 0x00100000UL /* Event based branch enable */
#define MMCR0_PMCC 0x000c0000UL /* PMC control */
#define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */
#define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/
#define MMCR0_PMCjCE 0x00004000UL /* PMCj count enable*/
#define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */
+#define MMCR0_PMAO_SYNC 0x00000800UL /* PMU interrupt is synchronous */
#define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */
#define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */
#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */
#define SPRN_EBBHR 804 /* Event based branch handler register */
#define SPRN_EBBRR 805 /* Event based branch return register */
#define SPRN_BESCR 806 /* Branch event status and control register */
+#define BESCR_GE 0x8000000000000000ULL /* Global Enable */
#define SPRN_WORT 895 /* Workload optimization register - thread */
#define SPRN_PMC1 787
* 64-bit embedded
* - SPRG0 generic exception scratch
* - SPRG2 TLB exception stack
- * - SPRG3 critical exception scratch and
- * CPU and NUMA node for VDSO getcpu (user visible)
+ * - SPRG3 critical exception scratch (user visible, sorry!)
* - SPRG4 unused (user visible)
* - SPRG6 TLB miss scratch (user visible, sorry !)
- * - SPRG7 critical exception scratch
+ * - SPRG7 CPU and NUMA node for VDSO getcpu (user visible)
* - SPRG8 machine check exception scratch
* - SPRG9 debug exception scratch
*
#define SPRN_SPRG_SCRATCH0 SPRN_SPRG2
#define SPRN_SPRG_HPACA SPRN_HSPRG0
#define SPRN_SPRG_HSCRATCH0 SPRN_HSPRG1
+#define SPRN_SPRG_VDSO_READ SPRN_USPRG3
+#define SPRN_SPRG_VDSO_WRITE SPRN_SPRG3
#define GET_PACA(rX) \
BEGIN_FTR_SECTION_NESTED(66); \
#define SPRN_SPRG_TLB_SCRATCH SPRN_SPRG6
#define SPRN_SPRG_GEN_SCRATCH SPRN_SPRG0
#define SPRN_SPRG_GDBELL_SCRATCH SPRN_SPRG_GEN_SCRATCH
+#define SPRN_SPRG_VDSO_READ SPRN_USPRG7
+#define SPRN_SPRG_VDSO_WRITE SPRN_SPRG7
#define SET_PACA(rX) mtspr SPRN_SPRG_PACA,rX
#define GET_PACA(rX) mfspr rX,SPRN_SPRG_PACA
#define PVR_8560 0x80200000
#define PVR_VER_E500V1 0x8020
#define PVR_VER_E500V2 0x8021
+#define PVR_VER_E500MC 0x8023
+#define PVR_VER_E5500 0x8024
#define PVR_VER_E6500 0x8040
/*
#include <asm/exception-64s.h>
#include <asm/kvm_book3s_asm.h>
#include <asm/mmu-hash64.h>
+ #include <asm/tm.h>
+
+ #define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
#ifdef __LITTLE_ENDIAN__
#error Need to fix lppaca and SLB shadow accesses in little endian mode
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Restore SPRG3 */
- ld r3,PACA_SPRG3(r13)
- mtspr SPRN_SPRG3,r3
+ ld r3,PACA_SPRG_VDSO(r13)
+ mtspr SPRN_SPRG_VDSO_WRITE,r3
/* Reload the host's PMU registers */
ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
ld r3, HSTATE_MMCR(r13)
ld r4, HSTATE_MMCR + 8(r13)
ld r5, HSTATE_MMCR + 16(r13)
+ ld r6, HSTATE_MMCR + 24(r13)
+ ld r7, HSTATE_MMCR + 32(r13)
mtspr SPRN_MMCR1, r4
mtspr SPRN_MMCRA, r5
+ mtspr SPRN_SIAR, r6
+ mtspr SPRN_SDAR, r7
+ BEGIN_FTR_SECTION
+ ld r8, HSTATE_MMCR + 40(r13)
+ ld r9, HSTATE_MMCR + 48(r13)
+ mtspr SPRN_MMCR2, r8
+ mtspr SPRN_SIER, r9
+ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_MMCR0, r3
isync
23:
END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
+ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ BEGIN_FTR_SECTION
+ b skip_tm
+ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
+
+ /* Turn on TM/FP/VSX/VMX so we can restore them. */
+ mfmsr r5
+ li r6, MSR_TM >> 32
+ sldi r6, r6, 32
+ or r5, r5, r6
+ ori r5, r5, MSR_FP
+ oris r5, r5, (MSR_VEC | MSR_VSX)@h
+ mtmsrd r5
+
+ /*
+ * The user may change these outside of a transaction, so they must
+ * always be context switched.
+ */
+ ld r5, VCPU_TFHAR(r4)
+ ld r6, VCPU_TFIAR(r4)
+ ld r7, VCPU_TEXASR(r4)
+ mtspr SPRN_TFHAR, r5
+ mtspr SPRN_TFIAR, r6
+ mtspr SPRN_TEXASR, r7
+
+ ld r5, VCPU_MSR(r4)
+ rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+ beq skip_tm /* TM not active in guest */
+
+ /* Make sure the failure summary is set, otherwise we'll program check
+ * when we trechkpt. It's possible that this might have been not set
+ * on a kvmppc_set_one_reg() call but we shouldn't let this crash the
+ * host.
+ */
+ oris r7, r7, (TEXASR_FS)@h
+ mtspr SPRN_TEXASR, r7
+
+ /*
+ * We need to load up the checkpointed state for the guest.
+ * We need to do this early as it will blow away any GPRs, VSRs and
+ * some SPRs.
+ */
+
+ mr r31, r4
+ addi r3, r31, VCPU_FPRS_TM
+ bl .load_fp_state
+ addi r3, r31, VCPU_VRS_TM
+ bl .load_vr_state
+ mr r4, r31
+ lwz r7, VCPU_VRSAVE_TM(r4)
+ mtspr SPRN_VRSAVE, r7
+
+ ld r5, VCPU_LR_TM(r4)
+ lwz r6, VCPU_CR_TM(r4)
+ ld r7, VCPU_CTR_TM(r4)
+ ld r8, VCPU_AMR_TM(r4)
+ ld r9, VCPU_TAR_TM(r4)
+ mtlr r5
+ mtcr r6
+ mtctr r7
+ mtspr SPRN_AMR, r8
+ mtspr SPRN_TAR, r9
+
+ /*
+ * Load up PPR and DSCR values but don't put them in the actual SPRs
+ * till the last moment to avoid running with userspace PPR and DSCR for
+ * too long.
+ */
+ ld r29, VCPU_DSCR_TM(r4)
+ ld r30, VCPU_PPR_TM(r4)
+
+ std r2, PACATMSCRATCH(r13) /* Save TOC */
+
+ /* Clear the MSR RI since r1, r13 are all going to be foobar. */
+ li r5, 0
+ mtmsrd r5, 1
+
+ /* Load GPRs r0-r28 */
+ reg = 0
+ .rept 29
+ ld reg, VCPU_GPRS_TM(reg)(r31)
+ reg = reg + 1
+ .endr
+
+ mtspr SPRN_DSCR, r29
+ mtspr SPRN_PPR, r30
+
+ /* Load final GPRs */
+ ld 29, VCPU_GPRS_TM(29)(r31)
+ ld 30, VCPU_GPRS_TM(30)(r31)
+ ld 31, VCPU_GPRS_TM(31)(r31)
+
+ /* TM checkpointed state is now setup. All GPRs are now volatile. */
+ TRECHKPT
+
+ /* Now let's get back the state we need. */
+ HMT_MEDIUM
+ GET_PACA(r13)
+ ld r29, HSTATE_DSCR(r13)
+ mtspr SPRN_DSCR, r29
+ ld r4, HSTATE_KVM_VCPU(r13)
+ ld r1, HSTATE_HOST_R1(r13)
+ ld r2, PACATMSCRATCH(r13)
+
+ /* Set the MSR RI since we have our registers back. */
+ li r5, MSR_RI
+ mtmsrd r5, 1
+ skip_tm:
+ #endif
+
/* Load guest PMU registers */
/* R4 is live here (vcpu pointer) */
li r3, 1
ld r6, VCPU_VTB(r4)
mtspr SPRN_IC, r5
mtspr SPRN_VTB, r6
- #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- ld r5, VCPU_TFHAR(r4)
- ld r6, VCPU_TFIAR(r4)
- ld r7, VCPU_TEXASR(r4)
- mtspr SPRN_TFHAR, r5
- mtspr SPRN_TFIAR, r6
- mtspr SPRN_TEXASR, r7
- #endif
ld r8, VCPU_EBBHR(r4)
mtspr SPRN_EBBHR, r8
ld r5, VCPU_EBBRR(r4)
* Set the decrementer to the guest decrementer.
*/
ld r8,VCPU_DEC_EXPIRES(r4)
+ /* r8 is a host timebase value here, convert to guest TB */
+ ld r5,HSTATE_KVM_VCORE(r13)
+ ld r6,VCORE_TB_OFFSET(r5)
+ add r8,r8,r6
mftb r7
subf r3,r7,r8
mtspr SPRN_DEC,r3
12: mtspr SPRN_SRR0, r10
mr r10,r0
mtspr SPRN_SRR1, r11
- ld r11, VCPU_INTR_MSR(r4)
+ mr r9, r4
+ bl kvmppc_msr_interrupt
5:
/*
mftb r6
extsw r5,r5
add r5,r5,r6
+ /* r5 is a guest timebase value here, convert to host TB */
+ ld r3,HSTATE_KVM_VCORE(r13)
+ ld r4,VCORE_TB_OFFSET(r3)
+ subf r5,r4,r5
std r5,VCPU_DEC_EXPIRES(r9)
BEGIN_FTR_SECTION
b 8f
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
- /* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
- mfmsr r8
- li r0, 1
- rldimi r8, r0, MSR_TM_LG, 63-MSR_TM_LG
- mtmsrd r8
-
/* Save POWER8-specific registers */
mfspr r5, SPRN_IAMR
mfspr r6, SPRN_PSPB
std r5, VCPU_IC(r9)
std r6, VCPU_VTB(r9)
std r7, VCPU_TAR(r9)
- #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- mfspr r5, SPRN_TFHAR
- mfspr r6, SPRN_TFIAR
- mfspr r7, SPRN_TEXASR
- std r5, VCPU_TFHAR(r9)
- std r6, VCPU_TFIAR(r9)
- std r7, VCPU_TEXASR(r9)
- #endif
mfspr r8, SPRN_EBBHR
std r8, VCPU_EBBHR(r9)
mfspr r5, SPRN_EBBRR
ld r8,VCORE_TB_OFFSET(r5)
cmpdi r8,0
beq 17f
- mftb r6 /* current host timebase */
+ mftb r6 /* current guest timebase */
subf r8,r8,r6
mtspr SPRN_TBU40,r8 /* update upper 40 bits */
mftb r7 /* check if lower 24 bits overflowed */
mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
li r10, BOOK3S_INTERRUPT_DATA_STORAGE
- ld r11, VCPU_INTR_MSR(r9)
+ bl kvmppc_msr_interrupt
fast_interrupt_c_return:
6: ld r7, VCPU_CTR(r9)
lwz r8, VCPU_XER(r9)
1: mtspr SPRN_SRR0, r10
mtspr SPRN_SRR1, r11
li r10, BOOK3S_INTERRUPT_INST_STORAGE
- ld r11, VCPU_INTR_MSR(r9)
+ bl kvmppc_msr_interrupt
b fast_interrupt_c_return
3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
mtspr SPRN_SRR0,r10
mtspr SPRN_SRR1,r11
li r10, BOOK3S_INTERRUPT_SYSCALL
- ld r11, VCPU_INTR_MSR(r9)
+ bl kvmppc_msr_interrupt
mr r4,r9
b fast_guest_return
.long 0 /* 0x10 - H_CLEAR_MOD */
.long 0 /* 0x14 - H_CLEAR_REF */
.long .kvmppc_h_protect - hcall_real_table
- .long 0 /* 0x1c - H_GET_TCE */
+ .long .kvmppc_h_get_tce - hcall_real_table
.long .kvmppc_h_put_tce - hcall_real_table
.long 0 /* 0x24 - H_SET_SPRG0 */
.long .kvmppc_h_set_dabr - hcall_real_table
beq mc_cont
/* If not, deliver a machine check. SRR0/1 are already set */
li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
- ld r11, VCPU_INTR_MSR(r9)
+ bl kvmppc_msr_interrupt
b fast_interrupt_c_return
/*
mfspr r6,SPRN_VRSAVE
stw r6,VCPU_VRSAVE(r31)
mtlr r30
- mtmsrd r5
- isync
blr
/*
*/
kvmppc_bad_host_intr:
b .
+
+ /*
+ * This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
+ * from VCPU_INTR_MSR and is modified based on the required TM state changes.
+ * r11 has the guest MSR value (in/out)
+ * r9 has a vcpu pointer (in)
+ * r0 is used as a scratch register
+ */
+ kvmppc_msr_interrupt:
+ rldicl r0, r11, 64 - MSR_TS_S_LG, 62
+ cmpwi r0, 2 /* Check if we are in transactional state.. */
+ ld r11, VCPU_INTR_MSR(r9)
+ bne 1f
+ /* ... if transactional, change to suspended */
+ li r0, 1
+ 1: rldimi r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+ blr
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_host.h>
+ #include <linux/kvm.h>
#include <asm/debug.h>
#include <asm/cpu.h>
+ #include <asm/isc.h>
#define KVM_MAX_VCPUS 64
#define KVM_USER_MEM_SLOTS 32
+ /*
+ * These seem to be used for allocating ->chip in the routing table,
+ * which we don't use. 4096 is an out-of-thin-air value. If we need
+ * to look at ->chip later on, we'll need to revisit this.
+ */
+ #define KVM_NR_IRQCHIPS 1
+ #define KVM_IRQCHIP_NUM_PINS 4096
+
struct sca_entry {
atomic_t scn;
__u32 reserved;
__u64 gbea; /* 0x0180 */
__u8 reserved188[24]; /* 0x0188 */
__u32 fac; /* 0x01a0 */
- __u8 reserved1a4[58]; /* 0x01a4 */
+ __u8 reserved1a4[20]; /* 0x01a4 */
+ __u64 cbrlo; /* 0x01b8 */
- __u8 reserved1c0[40]; /* 0x01c0 */
++ __u8 reserved1c0[30]; /* 0x01c0 */
+ __u64 pp; /* 0x01de */
+ __u8 reserved1e6[2]; /* 0x01e6 */
__u64 itdba; /* 0x01e8 */
__u8 reserved1f0[16]; /* 0x01f0 */
} __attribute__((packed));
u32 instruction_stsi;
u32 instruction_stfl;
u32 instruction_tprot;
+ u32 instruction_essa;
u32 instruction_sigp_sense;
u32 instruction_sigp_sense_running;
u32 instruction_sigp_external_call;
u32 diagnose_9c;
};
- struct kvm_s390_io_info {
- __u16 subchannel_id; /* 0x0b8 */
- __u16 subchannel_nr; /* 0x0ba */
- __u32 io_int_parm; /* 0x0bc */
- __u32 io_int_word; /* 0x0c0 */
- };
-
- struct kvm_s390_ext_info {
- __u32 ext_params;
- __u64 ext_params2;
- };
-
#define PGM_OPERATION 0x01
#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
#define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07
- struct kvm_s390_pgm_info {
- __u16 code;
- };
-
- struct kvm_s390_prefix_info {
- __u32 address;
- };
-
- struct kvm_s390_extcall_info {
- __u16 code;
- };
-
- struct kvm_s390_emerg_info {
- __u16 code;
- };
-
- struct kvm_s390_mchk_info {
- __u64 cr14;
- __u64 mcic;
- };
-
struct kvm_s390_interrupt_info {
struct list_head list;
u64 type;
struct list_head list;
atomic_t active;
int next_rr_cpu;
- unsigned long idle_mask[(KVM_MAX_VCPUS + sizeof(long) - 1)
- / sizeof(long)];
- struct kvm_s390_local_interrupt *local_int[KVM_MAX_VCPUS];
+ unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ unsigned int irq_count;
};
u64 stidp_data;
};
struct gmap *gmap;
+ #define KVM_S390_PFAULT_TOKEN_INVALID (-1UL)
+ unsigned long pfault_token;
+ unsigned long pfault_select;
+ unsigned long pfault_compare;
};
struct kvm_vm_stat {
struct kvm_arch_memory_slot {
};
+ struct s390_map_info {
+ struct list_head list;
+ __u64 guest_addr;
+ __u64 addr;
+ struct page *page;
+ };
+
+ struct s390_io_adapter {
+ unsigned int id;
+ int isc;
+ bool maskable;
+ bool masked;
+ bool swap;
+ struct rw_semaphore maps_lock;
+ struct list_head maps;
+ atomic_t nr_maps;
+ };
+
+ #define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
+ #define MAX_S390_ADAPTER_MAPS 256
+
struct kvm_arch{
struct sca_block *sca;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
+ struct kvm_device *flic;
struct gmap *gmap;
int css_support;
+ int use_irqchip;
+ struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
};
#define KVM_HVA_ERR_BAD (-1UL)
return IS_ERR_VALUE(addr);
}
+ #define ASYNC_PF_PER_VCPU 64
+ struct kvm_vcpu;
+ struct kvm_async_pf;
+ struct kvm_arch_async_pf {
+ unsigned long pfault_token;
+ };
+
+ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
+
+ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
+ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work);
+
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
#endif
#define _PAGE_READ 0x010 /* SW pte read bit */
#define _PAGE_WRITE 0x020 /* SW pte write bit */
#define _PAGE_SPECIAL 0x040 /* SW associated with special page */
+#define _PAGE_UNUSED 0x080 /* SW bit for pgste usage state */
#define __HAVE_ARCH_PTE_SPECIAL
/* Set of bits not changed in pte_modify */
#endif /* CONFIG_64BIT */
+/* Guest Page State used for virtualization */
+#define _PGSTE_GPS_ZERO 0x0000000080000000UL
+#define _PGSTE_GPS_USAGE_MASK 0x0000000003000000UL
+#define _PGSTE_GPS_USAGE_STABLE 0x0000000000000000UL
+#define _PGSTE_GPS_USAGE_UNUSED 0x0000000001000000UL
+
/*
* A user page table pointer has the space-switch-event bit, the
* private-space-control bit and the storage-alteration-event-control
return pte_val(pte) == _PAGE_INVALID;
}
+static inline int pte_swap(pte_t pte)
+{
+ /* Bit pattern: (pte & 0x603) == 0x402 */
+ return (pte_val(pte) & (_PAGE_INVALID | _PAGE_PROTECT |
+ _PAGE_TYPE | _PAGE_PRESENT))
+ == (_PAGE_INVALID | _PAGE_TYPE);
+}
+
static inline int pte_file(pte_t pte)
{
/* Bit pattern: (pte & 0x601) == 0x600 */
* @table: pointer to the page directory
* @asce: address space control element for gmap page table
* @crst_list: list of all crst tables used in the guest address space
+ * @pfault_enabled: defines if pfaults are applicable for the guest
*/
struct gmap {
struct list_head list;
unsigned long asce;
void *private;
struct list_head crst_list;
+ bool pfault_enabled;
};
/**
unsigned long __gmap_fault(unsigned long address, struct gmap *);
unsigned long gmap_fault(unsigned long address, struct gmap *);
void gmap_discard(unsigned long from, unsigned long to, struct gmap *);
+void __gmap_zap(unsigned long address, struct gmap *);
void gmap_register_ipte_notifier(struct gmap_notifier *);
void gmap_unregister_ipte_notifier(struct gmap_notifier *);
int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len);
-void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *);
+void gmap_do_ipte_notify(struct mm_struct *, pte_t *);
static inline pgste_t pgste_ipte_notify(struct mm_struct *mm,
- unsigned long addr,
pte_t *ptep, pgste_t pgste)
{
#ifdef CONFIG_PGSTE
if (pgste_val(pgste) & PGSTE_IN_BIT) {
pgste_val(pgste) &= ~PGSTE_IN_BIT;
- gmap_do_ipte_notify(mm, addr, ptep);
+ gmap_do_ipte_notify(mm, ptep);
}
#endif
return pgste;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
+ pgste_val(pgste) &= ~_PGSTE_GPS_ZERO;
pgste_set_key(ptep, pgste, entry);
pgste_set_pte(ptep, entry);
pgste_set_unlock(ptep, pgste);
return (pte_val(pte) & _PAGE_YOUNG) != 0;
}
+#define __HAVE_ARCH_PTE_UNUSED
+static inline int pte_unused(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_UNUSED;
+}
+
/*
* pgd/pmd/pte modification functions
*/
static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
{
- if (!(pte_val(*ptep) & _PAGE_INVALID)) {
+ unsigned long pto = (unsigned long) ptep;
+
#ifndef CONFIG_64BIT
- /* pto must point to the start of the segment table */
- pte_t *pto = (pte_t *) (((unsigned long) ptep) & 0x7ffffc00);
-#else
- /* ipte in zarch mode can do the math */
- pte_t *pto = ptep;
+ /* pto in ESA mode must point to the start of the segment table */
+ pto &= 0x7ffffc00;
#endif
- asm volatile(
- " ipte %2,%3"
- : "=m" (*ptep) : "m" (*ptep),
- "a" (pto), "a" (address));
- }
+ /* Invalidation + global TLB flush for the pte */
+ asm volatile(
+ " ipte %2,%3"
+ : "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+}
+
+static inline void ptep_flush_direct(struct mm_struct *mm,
+ unsigned long address, pte_t *ptep)
+{
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ return;
+ __ptep_ipte(address, ptep);
}
static inline void ptep_flush_lazy(struct mm_struct *mm,
unsigned long address, pte_t *ptep)
{
- int active = (mm == current->active_mm) ? 1 : 0;
+ int active, count;
- if (atomic_read(&mm->context.attach_count) > active)
- __ptep_ipte(address, ptep);
- else
+ if (pte_val(*ptep) & _PAGE_INVALID)
+ return;
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pte_val(*ptep) |= _PAGE_INVALID;
mm->context.flush_mm = 1;
+ } else
+ __ptep_ipte(address, ptep);
+ atomic_sub(0x10000, &mm->context.attach_count);
}
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste);
+ pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
}
pte = *ptep;
- __ptep_ipte(addr, ptep);
+ ptep_flush_direct(vma->vm_mm, addr, ptep);
young = pte_young(pte);
pte = pte_mkold(pte);
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
}
pte = *ptep;
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste_ipte_notify(mm, address, ptep, pgste);
+ pgste_ipte_notify(mm, ptep, pgste);
}
pte = *ptep;
ptep_flush_lazy(mm, address, ptep);
- pte_val(*ptep) |= _PAGE_INVALID;
if (mm_has_pgste(mm)) {
pgste = pgste_update_all(&pte, pgste);
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
+ pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
}
pte = *ptep;
- __ptep_ipte(address, ptep);
+ ptep_flush_direct(vma->vm_mm, address, ptep);
pte_val(*ptep) = _PAGE_INVALID;
if (mm_has_pgste(vma->vm_mm)) {
+ if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) ==
+ _PGSTE_GPS_USAGE_UNUSED)
+ pte_val(pte) |= _PAGE_UNUSED;
pgste = pgste_update_all(&pte, pgste);
pgste_set_unlock(ptep, pgste);
}
if (!full && mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
}
pte = *ptep;
if (pte_write(pte)) {
if (mm_has_pgste(mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(mm, address, ptep, pgste);
+ pgste = pgste_ipte_notify(mm, ptep, pgste);
}
ptep_flush_lazy(mm, address, ptep);
return 0;
if (mm_has_pgste(vma->vm_mm)) {
pgste = pgste_get_lock(ptep);
- pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste);
+ pgste = pgste_ipte_notify(vma->vm_mm, ptep, pgste);
}
- __ptep_ipte(address, ptep);
+ ptep_flush_direct(vma->vm_mm, address, ptep);
if (mm_has_pgste(vma->vm_mm)) {
pgste_set_pte(ptep, entry);
static inline void pmdp_flush_lazy(struct mm_struct *mm,
unsigned long address, pmd_t *pmdp)
{
- int active = (mm == current->active_mm) ? 1 : 0;
+ int active, count;
- if ((atomic_read(&mm->context.attach_count) & 0xffff) > active)
- __pmd_idte(address, pmdp);
- else
+ active = (mm == current->active_mm) ? 1 : 0;
+ count = atomic_add_return(0x10000, &mm->context.attach_count);
+ if ((count & 0xffff) <= active) {
+ pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
mm->context.flush_mm = 1;
+ } else
+ __pmd_idte(address, pmdp);
+ atomic_sub(0x10000, &mm->context.attach_count);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/irq.h>
#include <asm/irq_regs.h>
#include <asm/cputime.h>
#include <asm/lowcore.h>
[IRQIO_PCI] = {.name = "PCI", .desc = "[I/O] PCI Interrupt" },
[IRQIO_MSI] = {.name = "MSI", .desc = "[I/O] MSI Interrupt" },
[IRQIO_VIR] = {.name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
+ [IRQIO_VAI] = {.name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
[NMI_NMI] = {.name = "NMI", .desc = "[NMI] Machine Check"},
[CPU_RST] = {.name = "RST", .desc = "[CPU] CPU Restart"},
};
#include <linux/kvm.h>
#include <linux/kvm_host.h>
+#include <asm/pgalloc.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
+ #include "gaccess.h"
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
return 0;
}
+ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
+ {
+ struct prs_parm {
+ u16 code;
+ u16 subcode;
+ u16 parm_len;
+ u16 parm_version;
+ u64 token_addr;
+ u64 select_mask;
+ u64 compare_mask;
+ u64 zarch;
+ };
+ struct prs_parm parm;
+ int rc;
+ u16 rx = (vcpu->arch.sie_block->ipa & 0xf0) >> 4;
+ u16 ry = (vcpu->arch.sie_block->ipa & 0x0f);
+ unsigned long hva_token = KVM_HVA_ERR_BAD;
+
+ if (vcpu->run->s.regs.gprs[rx] & 7)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ if (copy_from_guest(vcpu, &parm, vcpu->run->s.regs.gprs[rx], sizeof(parm)))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+ if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ switch (parm.subcode) {
+ case 0: /* TOKEN */
+ if (vcpu->arch.pfault_token != KVM_S390_PFAULT_TOKEN_INVALID) {
+ /*
+ * If the pagefault handshake is already activated,
+ * the token must not be changed. We have to return
+ * decimal 8 instead, as mandated in SC24-6084.
+ */
+ vcpu->run->s.regs.gprs[ry] = 8;
+ return 0;
+ }
+
+ if ((parm.compare_mask & parm.select_mask) != parm.compare_mask ||
+ parm.token_addr & 7 || parm.zarch != 0x8000000000000000ULL)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ hva_token = gfn_to_hva(vcpu->kvm, gpa_to_gfn(parm.token_addr));
+ if (kvm_is_error_hva(hva_token))
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+
+ vcpu->arch.pfault_token = parm.token_addr;
+ vcpu->arch.pfault_select = parm.select_mask;
+ vcpu->arch.pfault_compare = parm.compare_mask;
+ vcpu->run->s.regs.gprs[ry] = 0;
+ rc = 0;
+ break;
+ case 1: /*
+ * CANCEL
+ * Specification allows to let already pending tokens survive
+ * the cancel, therefore to reduce code complexity, we assume
+ * all outstanding tokens are already pending.
+ */
+ if (parm.token_addr || parm.select_mask ||
+ parm.compare_mask || parm.zarch)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ vcpu->run->s.regs.gprs[ry] = 0;
+ /*
+ * If the pfault handling was not established or is already
+ * canceled SC24-6084 requests to return decimal 4.
+ */
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ vcpu->run->s.regs.gprs[ry] = 4;
+ else
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+
+ rc = 0;
+ break;
+ default:
+ rc = -EOPNOTSUPP;
+ break;
+ }
+
+ return rc;
+ }
+
static int __diag_time_slice_end(struct kvm_vcpu *vcpu)
{
VCPU_EVENT(vcpu, 5, "%s", "diag time slice end");
switch (subcode) {
case 3:
vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR;
+ page_table_reset_pgste(current->mm, 0, TASK_SIZE);
break;
case 4:
vcpu->run->s390_reset_flags = 0;
+ page_table_reset_pgste(current->mm, 0, TASK_SIZE);
break;
default:
return -EOPNOTSUPP;
return __diag_time_slice_end(vcpu);
case 0x9c:
return __diag_time_slice_end_directed(vcpu);
+ case 0x258:
+ return __diag_page_ref_service(vcpu);
case 0x308:
return __diag_ipl_functions(vcpu);
case 0x500:
{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
+ { "instruction_essa", VCPU_STAT(instruction_essa) },
{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
#ifdef CONFIG_KVM_S390_UCONTROL
case KVM_CAP_S390_UCONTROL:
#endif
+ case KVM_CAP_ASYNC_PF:
case KVM_CAP_SYNC_REGS:
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
case KVM_CAP_IOEVENTFD:
+ case KVM_CAP_DEVICE_CTRL:
+ case KVM_CAP_ENABLE_CAP_VM:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
return 0;
}
+ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+ {
+ int r;
+
+ if (cap->flags)
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_S390_IRQCHIP:
+ kvm->arch.use_irqchip = 1;
+ r = 0;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+ return r;
+ }
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
r = kvm_s390_inject_vm(kvm, &s390int);
break;
}
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ break;
+ r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+ break;
+ }
+ case KVM_CREATE_IRQCHIP: {
+ struct kvm_irq_routing_entry routing;
+
+ r = -EINVAL;
+ if (kvm->arch.use_irqchip) {
+ /* Set up dummy routing. */
+ memset(&routing, 0, sizeof(routing));
+ kvm_set_irq_routing(kvm, &routing, 0, 0);
+ r = 0;
+ }
+ break;
+ }
default:
r = -ENOTTY;
}
{
int rc;
char debug_name[16];
+ static unsigned long sca_offset;
rc = -EINVAL;
#ifdef CONFIG_KVM_S390_UCONTROL
kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL);
if (!kvm->arch.sca)
goto out_err;
+ spin_lock(&kvm_lock);
+ sca_offset = (sca_offset + 16) & 0x7f0;
+ kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
+ spin_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);
if (!kvm->arch.gmap)
goto out_nogmap;
kvm->arch.gmap->private = kvm;
+ kvm->arch.gmap->pfault_enabled = 0;
}
kvm->arch.css_support = 0;
+ kvm->arch.use_irqchip = 0;
return 0;
out_nogmap:
{
VCPU_EVENT(vcpu, 3, "%s", "free cpu");
trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
+ kvm_clear_async_pf_completion_queue(vcpu);
if (!kvm_is_ucontrol(vcpu->kvm)) {
clear_bit(63 - vcpu->vcpu_id,
(unsigned long *) &vcpu->kvm->arch.sca->mcn);
if (kvm_is_ucontrol(vcpu->kvm))
gmap_free(vcpu->arch.gmap);
+ if (vcpu->arch.sie_block->cbrlo)
+ __free_page(__pfn_to_page(
+ vcpu->arch.sie_block->cbrlo >> PAGE_SHIFT));
free_page((unsigned long)(vcpu->arch.sie_block));
+
kvm_vcpu_uninit(vcpu);
kmem_cache_free(kvm_vcpu_cache, vcpu);
}
debug_unregister(kvm->arch.dbf);
if (!kvm_is_ucontrol(kvm))
gmap_free(kvm->arch.gmap);
+ kvm_s390_destroy_adapters(kvm);
}
/* Section: vcpu related */
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
{
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
if (kvm_is_ucontrol(vcpu->kvm)) {
vcpu->arch.gmap = gmap_alloc(current->mm);
if (!vcpu->arch.gmap)
vcpu->arch.guest_fpregs.fpc = 0;
asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
vcpu->arch.sie_block->gbea = 1;
+ vcpu->arch.sie_block->pp = 0;
+ vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
+ kvm_clear_async_pf_completion_queue(vcpu);
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
+ kvm_s390_clear_local_irqs(vcpu);
}
int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
+ struct page *cbrl;
+
atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
CPUSTAT_SM |
CPUSTAT_STOPPED |
vcpu->arch.sie_block->ecb2 = 8;
vcpu->arch.sie_block->eca = 0xC1002001U;
vcpu->arch.sie_block->fac = (int) (long) vfacilities;
+ if (kvm_enabled_cmma()) {
+ cbrl = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (cbrl) {
+ vcpu->arch.sie_block->ecb2 |= 0x80;
+ vcpu->arch.sie_block->ecb2 &= ~0x08;
+ vcpu->arch.sie_block->cbrlo = page_to_phys(cbrl);
+ }
+ }
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet,
(unsigned long) vcpu);
spin_lock_init(&vcpu->arch.local_int.lock);
INIT_LIST_HEAD(&vcpu->arch.local_int.list);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
- spin_lock(&kvm->arch.float_int.lock);
- kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int;
vcpu->arch.local_int.wq = &vcpu->wq;
vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
- spin_unlock(&kvm->arch.float_int.lock);
rc = kvm_vcpu_init(vcpu, kvm, id);
if (rc)
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
{
- /* kvm common code refers to this, but never calls it */
- BUG();
- return 0;
+ return kvm_cpu_has_interrupt(vcpu);
}
void s390_vcpu_block(struct kvm_vcpu *vcpu)
r = put_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
+ case KVM_REG_S390_PFTOKEN:
+ r = put_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = put_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = put_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = put_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = put_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
default:
break;
}
r = get_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr);
break;
+ case KVM_REG_S390_PFTOKEN:
+ r = get_user(vcpu->arch.pfault_token,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFCOMPARE:
+ r = get_user(vcpu->arch.pfault_compare,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PFSELECT:
+ r = get_user(vcpu->arch.pfault_select,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_PP:
+ r = get_user(vcpu->arch.sie_block->pp,
+ (u64 __user *)reg->addr);
+ break;
+ case KVM_REG_S390_GBEA:
+ r = get_user(vcpu->arch.sie_block->gbea,
+ (u64 __user *)reg->addr);
+ break;
default:
break;
}
return 0;
}
+ static long kvm_arch_fault_in_sync(struct kvm_vcpu *vcpu)
+ {
+ long rc;
+ hva_t fault = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+ struct mm_struct *mm = current->mm;
+ down_read(&mm->mmap_sem);
+ rc = get_user_pages(current, mm, fault, 1, 1, 0, NULL, NULL);
+ up_read(&mm->mmap_sem);
+ return rc;
+ }
+
+ static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
+ unsigned long token)
+ {
+ struct kvm_s390_interrupt inti;
+ inti.parm64 = token;
+
+ if (start_token) {
+ inti.type = KVM_S390_INT_PFAULT_INIT;
+ WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+ } else {
+ inti.type = KVM_S390_INT_PFAULT_DONE;
+ WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
+ }
+ }
+
+ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+ {
+ trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
+ }
+
+ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+ {
+ trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
+ __kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
+ }
+
+ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+ {
+ /* s390 will always inject the page directly */
+ }
+
+ bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
+ {
+ /*
+ * s390 will always inject the page directly,
+ * but we still want check_async_completion to cleanup
+ */
+ return true;
+ }
+
+ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
+ {
+ hva_t hva;
+ struct kvm_arch_async_pf arch;
+ int rc;
+
+ if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
+ return 0;
+ if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
+ vcpu->arch.pfault_compare)
+ return 0;
+ if (psw_extint_disabled(vcpu))
+ return 0;
+ if (kvm_cpu_has_interrupt(vcpu))
+ return 0;
+ if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+ return 0;
+ if (!vcpu->arch.gmap->pfault_enabled)
+ return 0;
+
+ hva = gmap_fault(current->thread.gmap_addr, vcpu->arch.gmap);
+ if (copy_from_guest(vcpu, &arch.pfault_token, vcpu->arch.pfault_token, 8))
+ return 0;
+
+ rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
+ return rc;
+ }
+
static int vcpu_pre_run(struct kvm_vcpu *vcpu)
{
int rc, cpuflags;
+ /*
+ * On s390 notifications for arriving pages will be delivered directly
+ * to the guest but the house keeping for completed pfaults is
+ * handled outside the worker.
+ */
+ kvm_check_async_pf_completion(vcpu);
+
memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16);
if (need_resched())
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
- int rc;
+ int rc = -1;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
current->thread.gmap_addr;
vcpu->run->s390_ucontrol.pgm_code = 0x10;
rc = -EREMOTE;
- } else {
+
+ } else if (current->thread.gmap_pfault) {
+ trace_kvm_s390_major_guest_pfault(vcpu);
+ current->thread.gmap_pfault = 0;
+ if (kvm_arch_setup_async_pf(vcpu) ||
+ (kvm_arch_fault_in_sync(vcpu) >= 0))
+ rc = 0;
+ }
+
+ if (rc == -1) {
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu);
rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc == 0) {
if (kvm_is_ucontrol(vcpu->kvm))
- rc = -EOPNOTSUPP;
+ /* Don't exit for host interrupts. */
+ rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0;
else
rc = kvm_handle_sie_intercept(vcpu);
}
return rc;
}
+bool kvm_enabled_cmma(void)
+{
+ if (!MACHINE_IS_LPAR)
+ return false;
+ /* only enable for z10 and later */
+ if (!MACHINE_HAS_EDAT1)
+ return false;
+ return true;
+}
+
static int __vcpu_run(struct kvm_vcpu *vcpu)
{
int rc, exit_reason;
atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
- BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL);
-
switch (kvm_run->exit_reason) {
case KVM_EXIT_S390_SIEIC:
case KVM_EXIT_UNKNOWN:
void kvm_s390_tasklet(unsigned long parm);
void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu);
void kvm_s390_deliver_pending_machine_checks(struct kvm_vcpu *vcpu);
+ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu);
int __must_check kvm_s390_inject_vm(struct kvm *kvm,
struct kvm_s390_interrupt *s390int);
int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 cr6, u64 schid);
+ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in priv.c */
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
void exit_sie(struct kvm_vcpu *vcpu);
void exit_sie_sync(struct kvm_vcpu *vcpu);
+/* are we going to support cmma? */
+bool kvm_enabled_cmma(void);
/* implemented in diag.c */
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu);
+ /* implemented in interrupt.c */
+ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
+ int psw_extint_disabled(struct kvm_vcpu *vcpu);
+ void kvm_s390_destroy_adapters(struct kvm *kvm);
+
#endif
static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
{
- struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
int cpus = 0;
int n;
- spin_lock(&fi->lock);
- for (n = 0; n < KVM_MAX_VCPUS; n++)
- if (fi->local_int[n])
- cpus++;
- spin_unlock(&fi->lock);
+ cpus = atomic_read(&vcpu->kvm->online_vcpus);
/* deal with other level 3 hypervisors */
if (stsi(mem, 3, 2, 2))
return 0;
}
+static int handle_essa(struct kvm_vcpu *vcpu)
+{
+ /* entries expected to be 1FF */
+ int entries = (vcpu->arch.sie_block->cbrlo & ~PAGE_MASK) >> 3;
+ unsigned long *cbrlo, cbrle;
+ struct gmap *gmap;
+ int i;
+
+ VCPU_EVENT(vcpu, 5, "cmma release %d pages", entries);
+ gmap = vcpu->arch.gmap;
+ vcpu->stat.instruction_essa++;
+ if (!kvm_enabled_cmma() || !vcpu->arch.sie_block->cbrlo)
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
+ if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
+ return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
+
+ if (((vcpu->arch.sie_block->ipb & 0xf0000000) >> 28) > 6)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+
+ /* Rewind PSW to repeat the ESSA instruction */
+ vcpu->arch.sie_block->gpsw.addr =
+ __rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+ vcpu->arch.sie_block->cbrlo &= PAGE_MASK; /* reset nceo */
+ cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
+ down_read(&gmap->mm->mmap_sem);
+ for (i = 0; i < entries; ++i) {
+ cbrle = cbrlo[i];
+ if (unlikely(cbrle & ~PAGE_MASK || cbrle < 2 * PAGE_SIZE))
+ /* invalid entry */
+ break;
+ /* try to free backing */
+ __gmap_zap(cbrle, gmap);
+ }
+ up_read(&gmap->mm->mmap_sem);
+ if (i < entries)
+ return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
+ return 0;
+}
+
static const intercept_handler_t b9_handlers[256] = {
[0x8d] = handle_epsw,
+ [0xab] = handle_essa,
[0xaf] = handle_pfmf,
};
#define XSTATE_CPUID 0x0000000d
-#define XSTATE_FP 0x1
-#define XSTATE_SSE 0x2
-#define XSTATE_YMM 0x4
-#define XSTATE_BNDREGS 0x8
-#define XSTATE_BNDCSR 0x10
+#define XSTATE_FP 0x1
+#define XSTATE_SSE 0x2
+#define XSTATE_YMM 0x4
+#define XSTATE_BNDREGS 0x8
+#define XSTATE_BNDCSR 0x10
+#define XSTATE_OPMASK 0x20
+#define XSTATE_ZMM_Hi256 0x40
+#define XSTATE_Hi16_ZMM 0x80
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
+ /* Bit 63 of XCR0 is reserved for future expansion */
+ #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
#define FXSAVE_SIZE 512
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
/* Supported features which support lazy state saving */
-#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
+#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
+ | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
/* Supported features which require eager state saving */
#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
#define MSR_SMI_COUNT 0x00000034
#define MSR_IA32_FEATURE_CONTROL 0x0000003a
#define MSR_IA32_TSC_ADJUST 0x0000003b
+ #define MSR_IA32_BNDCFGS 0x00000d90
#define FEATURE_CONTROL_LOCKED (1<<0)
#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
#define THERM_LOG_THRESHOLD1 (1 << 9)
/* MISC_ENABLE bits: architectural */
-#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0)
-#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1)
-#define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7)
-#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11)
-#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12)
-#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16)
-#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18)
-#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << 22)
-#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23)
-#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34)
+#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
+#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
+#define MSR_IA32_MISC_ENABLE_TCC_BIT 1
+#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
+#define MSR_IA32_MISC_ENABLE_EMON_BIT 7
+#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
+#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
+#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT);
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
-#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << 2)
-#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << 3)
-#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4)
-#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6)
-#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8)
-#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9)
-#define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10)
-#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10)
-#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13)
-#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19)
-#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20)
-#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << 24)
-#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37)
-#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38)
-#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39)
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
+#define MSR_IA32_MISC_ENABLE_TM1_BIT 3
+#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
+#define MSR_IA32_MISC_ENABLE_TM2_BIT 13
+#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
#define MSR_IA32_TSC_DEADLINE 0x000006E0
int feature_bit = 0;
u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
- xstate_bv &= ~XSTATE_FPSSE;
+ xstate_bv &= XSTATE_EXTEND_MASK;
while (xstate_bv) {
if (xstate_bv & 0x1) {
u32 eax, ebx, ecx, edx;
return ret;
}
+ u64 kvm_supported_xcr0(void)
+ {
+ u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+
+ if (!kvm_x86_ops->mpx_supported())
+ xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+
+ return xcr0;
+ }
+
void kvm_update_cpuid(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
} else {
vcpu->arch.guest_supported_xcr0 =
(best->eax | ((u64)best->edx << 32)) &
- host_xcr0 & KVM_SUPPORTED_XCR0;
- vcpu->arch.guest_xstate_size =
- xstate_required_size(vcpu->arch.guest_supported_xcr0);
+ kvm_supported_xcr0();
+ vcpu->arch.guest_xstate_size = best->ebx =
+ xstate_required_size(vcpu->arch.xcr0);
}
kvm_pmu_cpuid_update(vcpu);
entry->flags = 0;
}
- static bool supported_xcr0_bit(unsigned bit)
- {
- u64 mask = ((u64)1 << bit);
-
- return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
- }
-
#define F(x) bit(X86_FEATURE_##x)
static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
+ unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
/* cpuid 1.edx */
const u32 kvm_supported_word0_x86_features =
F(TSC) | F(MSR) | F(PAE) | F(MCE) |
F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) |
F(MTRR) | F(PGE) | F(MCA) | F(CMOV) |
- F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) |
+ F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) |
0 /* Reserved, DS, ACPI */ | F(MMX) |
F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) |
0 /* HTT, TM, Reserved, PBE */;
/* cpuid 7.0.ebx */
const u32 kvm_supported_word9_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | f_invpcid | F(RTM);
+ F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
+ F(ADX);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
}
case 0xd: {
int idx, i;
+ u64 supported = kvm_supported_xcr0();
- entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
- entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
+ entry->eax &= supported;
+ entry->edx &= supported >> 32;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (idx = 1, i = 1; idx < 64; ++idx) {
+ u64 mask = ((u64)1 << idx);
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(&entry[i], function, idx);
- if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+ if (entry[i].eax == 0 || !(supported & mask))
continue;
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
#include <asm/perf_event.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
+ #include <asm/debugreg.h>
#include <asm/kvm_para.h>
#include <asm/virtext.h>
return vmcb->control.intercept_cr & (1U << bit);
}
- static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
+ static inline void set_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_dr |= (1U << bit);
+ vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ)
+ | (1 << INTERCEPT_DR1_READ)
+ | (1 << INTERCEPT_DR2_READ)
+ | (1 << INTERCEPT_DR3_READ)
+ | (1 << INTERCEPT_DR4_READ)
+ | (1 << INTERCEPT_DR5_READ)
+ | (1 << INTERCEPT_DR6_READ)
+ | (1 << INTERCEPT_DR7_READ)
+ | (1 << INTERCEPT_DR0_WRITE)
+ | (1 << INTERCEPT_DR1_WRITE)
+ | (1 << INTERCEPT_DR2_WRITE)
+ | (1 << INTERCEPT_DR3_WRITE)
+ | (1 << INTERCEPT_DR4_WRITE)
+ | (1 << INTERCEPT_DR5_WRITE)
+ | (1 << INTERCEPT_DR6_WRITE)
+ | (1 << INTERCEPT_DR7_WRITE);
recalc_intercepts(svm);
}
- static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
+ static inline void clr_dr_intercepts(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_host_vmcb(svm);
- vmcb->control.intercept_dr &= ~(1U << bit);
+ vmcb->control.intercept_dr = 0;
recalc_intercepts(svm);
}
set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR0_READ);
- set_dr_intercept(svm, INTERCEPT_DR1_READ);
- set_dr_intercept(svm, INTERCEPT_DR2_READ);
- set_dr_intercept(svm, INTERCEPT_DR3_READ);
- set_dr_intercept(svm, INTERCEPT_DR4_READ);
- set_dr_intercept(svm, INTERCEPT_DR5_READ);
- set_dr_intercept(svm, INTERCEPT_DR6_READ);
- set_dr_intercept(svm, INTERCEPT_DR7_READ);
-
- set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
- set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
+ set_dr_intercepts(svm);
set_exception_intercept(svm, PF_VECTOR);
set_exception_intercept(svm, UD_VECTOR);
mark_dirty(svm->vmcb, VMCB_DR);
}
+ static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
+ {
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ get_debugreg(vcpu->arch.db[0], 0);
+ get_debugreg(vcpu->arch.db[1], 1);
+ get_debugreg(vcpu->arch.db[2], 2);
+ get_debugreg(vcpu->arch.db[3], 3);
+ vcpu->arch.dr6 = svm_get_dr6(vcpu);
+ vcpu->arch.dr7 = svm->vmcb->save.dr7;
+
+ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
+ set_dr_intercepts(svm);
+ }
+
static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
{
struct vcpu_svm *svm = to_svm(vcpu);
clr_intercept(svm, INTERCEPT_IRET);
svm->vcpu.arch.hflags |= HF_IRET_MASK;
svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
+ kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
return 1;
}
unsigned long val;
int err;
+ if (svm->vcpu.guest_debug == 0) {
+ /*
+ * No more DR vmexits; force a reload of the debug registers
+ * and reenter on this instruction. The next vmexit will
+ * retrieve the full state of the debug registers.
+ */
+ clr_dr_intercepts(svm);
+ svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT;
+ return 1;
+ }
+
if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
return emulate_on_interception(svm);
u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
/* instruction emulation calls kvm_set_cr8() */
r = cr_interception(svm);
- if (irqchip_in_kernel(svm->vcpu.kvm)) {
- clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+ if (irqchip_in_kernel(svm->vcpu.kvm))
return r;
- }
if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
return r;
kvm_run->exit_reason = KVM_EXIT_SET_TPR;
if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
return;
+ clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+
if (irr == -1)
return;
return ret;
}
- static int enable_irq_window(struct kvm_vcpu *vcpu)
+ static void enable_irq_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
svm_set_vintr(svm);
svm_inject_irq(svm, 0x0);
}
- return 0;
}
- static int enable_nmi_window(struct kvm_vcpu *vcpu)
+ static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
== HF_NMI_MASK)
- return 0; /* IRET will cause a vm exit */
+ return; /* IRET will cause a vm exit */
/*
* Something prevents NMI from been injected. Single step over possible
svm->nmi_singlestep = true;
svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
update_db_bp_intercept(vcpu);
- return 0;
}
static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
return false;
}
+ static bool svm_mpx_supported(void)
+ {
+ return false;
+ }
+
static bool svm_has_wbinvd_exit(void)
{
return true;
.get_dr6 = svm_get_dr6,
.set_dr6 = svm_set_dr6,
.set_dr7 = svm_set_dr7,
+ .sync_dirty_debug_regs = svm_sync_dirty_debug_regs,
.cache_reg = svm_cache_reg,
.get_rflags = svm_get_rflags,
.set_rflags = svm_set_rflags,
.rdtscp_supported = svm_rdtscp_supported,
.invpcid_supported = svm_invpcid_supported,
+ .mpx_supported = svm_mpx_supported,
.set_supported_cpuid = svm_set_supported_cpuid,
static void mark_page_dirty_in_slot(struct kvm *kvm,
struct kvm_memory_slot *memslot, gfn_t gfn);
-bool kvm_rebooting;
+__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
static bool largepages_enabled = true;
void kvm_flush_remote_tlbs(struct kvm *kvm)
{
- long dirty_count = kvm->tlbs_dirty;
-
- smp_mb();
if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
- cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
+ kvm->tlbs_dirty = false;
}
EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
continue;
if (vcpu == me)
continue;
- if (waitqueue_active(&vcpu->wq))
+ if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
case KVM_DEV_TYPE_ARM_VGIC_V2:
ops = &kvm_arm_vgic_v2_ops;
break;
+ #endif
+ #ifdef CONFIG_S390
+ case KVM_DEV_TYPE_FLIC:
+ ops = &kvm_flic_ops;
+ break;
#endif
default:
return -ENODEV;