KVM: x86: Recompute PID.ON when clearing PID.SN
authorLuwei Kang <luwei.kang@intel.com>
Thu, 14 Feb 2019 02:48:07 +0000 (10:48 +0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 14 Feb 2019 15:20:31 +0000 (16:20 +0100)
Some Posted-Interrupts from passthrough devices may be lost or
overwritten when the vCPU is in runnable state.

The SN (Suppress Notification) of PID (Posted Interrupt Descriptor) will
be set when the vCPU is preempted (vCPU in KVM_MP_STATE_RUNNABLE state but
not running on physical CPU). If a posted interrupt comes at this time,
the irq remapping facility will set the bit of PIR (Posted Interrupt
Requests) but not ON (Outstanding Notification).  Then, the interrupt
will not be seen by KVM, which always expects PID.ON=1 if PID.PIR=1
as documented in the Intel processor SDM but not in the VT-d specification.
To fix this, restore the invariant after PID.SN is cleared.

Signed-off-by: Luwei Kang <luwei.kang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c

index 95d61804500199934ca167f4bc37892401d43855..b9a27fc7c0be32d09789124b944861bf3dcd5b2b 100644 (file)
@@ -1193,21 +1193,6 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
        if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
                return;
 
-       /*
-        * First handle the simple case where no cmpxchg is necessary; just
-        * allow posting non-urgent interrupts.
-        *
-        * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
-        * PI.NDST: pi_post_block will do it for us and the wakeup_handler
-        * expects the VCPU to be on the blocked_vcpu_list that matches
-        * PI.NDST.
-        */
-       if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
-           vcpu->cpu == cpu) {
-               pi_clear_sn(pi_desc);
-               return;
-       }
-
        /* The full case.  */
        do {
                old.control = new.control = pi_desc->control;
@@ -1222,6 +1207,17 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
                new.sn = 0;
        } while (cmpxchg64(&pi_desc->control, old.control,
                           new.control) != old.control);
+
+       /*
+        * Clear SN before reading the bitmap.  The VT-d firmware
+        * writes the bitmap and reads SN atomically (5.2.3 in the
+        * spec), so it doesn't really have a memory barrier that
+        * pairs with this, but we cannot do that and we need one.
+        */
+       smp_mb__after_atomic();
+
+       if (!bitmap_empty((unsigned long *)pi_desc->pir, NR_VECTORS))
+               pi_set_on(pi_desc);
 }
 
 /*
index 99328954c2fc1c43d8573f727b0e9219a2828065..0ac0a64c7790fb772b31d61179480548e6d06082 100644 (file)
@@ -337,16 +337,16 @@ static inline int pi_test_and_set_pir(int vector, struct pi_desc *pi_desc)
        return test_and_set_bit(vector, (unsigned long *)pi_desc->pir);
 }
 
-static inline void pi_clear_sn(struct pi_desc *pi_desc)
+static inline void pi_set_sn(struct pi_desc *pi_desc)
 {
-       return clear_bit(POSTED_INTR_SN,
+       return set_bit(POSTED_INTR_SN,
                        (unsigned long *)&pi_desc->control);
 }
 
-static inline void pi_set_sn(struct pi_desc *pi_desc)
+static inline void pi_set_on(struct pi_desc *pi_desc)
 {
-       return set_bit(POSTED_INTR_SN,
-                       (unsigned long *)&pi_desc->control);
+       set_bit(POSTED_INTR_ON,
+               (unsigned long *)&pi_desc->control);
 }
 
 static inline void pi_clear_on(struct pi_desc *pi_desc)
index e67ecf25e69087ff6aa8711001a59b175477a3b1..941f932373d03547f606ea6a20fd6161c48c6398 100644 (file)
@@ -7801,7 +7801,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
         * 1) We should set ->mode before checking ->requests.  Please see
         * the comment in kvm_vcpu_exiting_guest_mode().
         *
-        * 2) For APICv, we should set ->mode before checking PIR.ON.  This
+        * 2) For APICv, we should set ->mode before checking PID.ON. This
         * pairs with the memory barrier implicit in pi_test_and_set_on
         * (see vmx_deliver_posted_interrupt).
         *