KVM: x86: Add support for user wait instructions
authorTao Xu <tao3.xu@intel.com>
Tue, 16 Jul 2019 06:55:49 +0000 (14:55 +0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Tue, 24 Sep 2019 12:34:20 +0000 (14:34 +0200)
UMONITOR, UMWAIT and TPAUSE are a set of user wait instructions.
This patch adds support for user wait instructions in KVM. Availability
of the user wait instructions is indicated by the presence of the CPUID
feature flag WAITPKG CPUID.0x07.0x0:ECX[5]. User wait instructions may
be executed at any privilege level, and use 32bit IA32_UMWAIT_CONTROL MSR
to set the maximum time.

The behavior of user wait instructions in VMX non-root operation is
determined first by the setting of the "enable user wait and pause"
secondary processor-based VM-execution control bit 26.
If the VM-execution control is 0, UMONITOR/UMWAIT/TPAUSE cause
an invalid-opcode exception (#UD).
If the VM-execution control is 1, treatment is based on the
setting of the â€œRDTSC exiting†VM-execution control. Because KVM never
enables RDTSC exiting, if the instruction causes a delay, the amount of
time delayed is called here the physical delay. The physical delay is
first computed by determining the virtual delay. If
IA32_UMWAIT_CONTROL[31:2] is zero, the virtual delay is the value in
EDX:EAX minus the value that RDTSC would return; if
IA32_UMWAIT_CONTROL[31:2] is not zero, the virtual delay is the minimum
of that difference and AND(IA32_UMWAIT_CONTROL,FFFFFFFCH).

Because umwait and tpause can put a (psysical) CPU into a power saving
state, by default we dont't expose it to kvm and enable it only when
guest CPUID has it.

Detailed information about user wait instructions can be found in the
latest Intel 64 and IA-32 Architectures Software Developer's Manual.

Co-developed-by: Jingqi Liu <jingqi.liu@intel.com>
Signed-off-by: Jingqi Liu <jingqi.liu@intel.com>
Signed-off-by: Tao Xu <tao3.xu@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/include/asm/vmx.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c

index b15e6465870f8c5fb99b34823dc86d1b7597ea89..fdad81626829cb05692f876624f52db08e520193 100644 (file)
@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PT_USE_GPA              0x01000000
 #define SECONDARY_EXEC_MODE_BASED_EPT_EXEC     0x00400000
 #define SECONDARY_EXEC_TSC_SCALING              0x02000000
+#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE   0x04000000
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
 #define PIN_BASED_NMI_EXITING                   0x00000008
index dd5985eb61b4c3ca617e26d145066cbfb149c217..4e5a835989d17c19f601df7e59ae548af614e255 100644 (file)
@@ -360,7 +360,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
                F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
                F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
                F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
-               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
+               F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
 
        /* cpuid 7.0.edx*/
        const u32 kvm_cpuid_7_0_edx_x86_features =
index d6664ee3d1276c6213275d6b84962b9f2cbb8289..7aa69716d5160421c388154d83db11e15a494606 100644 (file)
@@ -247,6 +247,12 @@ static inline bool vmx_xsaves_supported(void)
                SECONDARY_EXEC_XSAVES;
 }
 
+static inline bool vmx_waitpkg_supported(void)
+{
+       return vmcs_config.cpu_based_2nd_exec_ctrl &
+               SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+}
+
 static inline bool cpu_has_vmx_tsc_scaling(void)
 {
        return vmcs_config.cpu_based_2nd_exec_ctrl &
index 75ed0a63abbea0b5a2cfc1702ad51d6e673ffa8d..c5b7ba795f95c149e93a43127e90dd2564fd9d86 100644 (file)
@@ -2089,6 +2089,7 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
                                  SECONDARY_EXEC_ENABLE_INVPCID |
                                  SECONDARY_EXEC_RDTSCP |
                                  SECONDARY_EXEC_XSAVES |
+                                 SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
                                  SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                                  SECONDARY_EXEC_APIC_REGISTER_VIRT |
                                  SECONDARY_EXEC_ENABLE_VMFUNC);
index ef98311ad15356741527704d2dfaf790dcd2cef6..bb55f54e29b1509a70c31e1f0a75fb69f5d4b17f 100644 (file)
@@ -2323,6 +2323,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf,
                        SECONDARY_EXEC_RDRAND_EXITING |
                        SECONDARY_EXEC_ENABLE_PML |
                        SECONDARY_EXEC_TSC_SCALING |
+                       SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
                        SECONDARY_EXEC_PT_USE_GPA |
                        SECONDARY_EXEC_PT_CONCEAL_VMX |
                        SECONDARY_EXEC_ENABLE_VMFUNC |
@@ -4059,6 +4060,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
                }
        }
 
+       if (vmx_waitpkg_supported()) {
+               bool waitpkg_enabled =
+                       guest_cpuid_has(vcpu, X86_FEATURE_WAITPKG);
+
+               if (!waitpkg_enabled)
+                       exec_control &= ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+
+               if (nested) {
+                       if (waitpkg_enabled)
+                               vmx->nested.msrs.secondary_ctls_high |=
+                                       SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+                       else
+                               vmx->nested.msrs.secondary_ctls_high &=
+                                       ~SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
+               }
+       }
+
        vmx->secondary_exec_control = exec_control;
 }