KVM: x86: Reset IRTE to host control if *new* route isn't postable
authorSean Christopherson <seanjc@google.com>
Fri, 4 Apr 2025 19:38:17 +0000 (12:38 -0700)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 24 Apr 2025 13:52:31 +0000 (09:52 -0400)
Restore an IRTE back to host control (remapped or posted MSI mode) if the
*new* GSI route prevents posting the IRQ directly to a vCPU, regardless of
the GSI routing type.  Updating the IRTE if and only if the new GSI is an
MSI results in KVM leaving an IRTE posting to a vCPU.

The dangling IRTE can result in interrupts being incorrectly delivered to
the guest, and in the worst case scenario can result in use-after-free,
e.g. if the VM is torn down, but the underlying host IRQ isn't freed.

Fixes: efc644048ecd ("KVM: x86: Update IRTE for posted-interrupts")
Fixes: 411b44ba80ab ("svm: Implements update_pi_irte hook to setup posted interrupt")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-ID: <20250404193923.1413163-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
arch/x86/kvm/svm/avic.c
arch/x86/kvm/vmx/posted_intr.c

index a961e6e670506cfdd8c1da0c5ab919550d2abac7..8e09f6ae98fd997cda6c5ae15325073f5efa91da 100644 (file)
@@ -896,6 +896,7 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
 {
        struct kvm_kernel_irq_routing_entry *e;
        struct kvm_irq_routing_table *irq_rt;
+       bool enable_remapped_mode = true;
        int idx, ret = 0;
 
        if (!kvm_arch_has_assigned_device(kvm) || !kvm_arch_has_irq_bypass())
@@ -932,6 +933,8 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
                    kvm_vcpu_apicv_active(&svm->vcpu)) {
                        struct amd_iommu_pi_data pi;
 
+                       enable_remapped_mode = false;
+
                        /* Try to enable guest_mode in IRTE */
                        pi.base = __sme_set(page_to_phys(svm->avic_backing_page) &
                                            AVIC_HPA_MASK);
@@ -950,33 +953,6 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
                         */
                        if (!ret && pi.is_guest_mode)
                                svm_ir_list_add(svm, &pi);
-               } else {
-                       /* Use legacy mode in IRTE */
-                       struct amd_iommu_pi_data pi;
-
-                       /**
-                        * Here, pi is used to:
-                        * - Tell IOMMU to use legacy mode for this interrupt.
-                        * - Retrieve ga_tag of prior interrupt remapping data.
-                        */
-                       pi.prev_ga_tag = 0;
-                       pi.is_guest_mode = false;
-                       ret = irq_set_vcpu_affinity(host_irq, &pi);
-
-                       /**
-                        * Check if the posted interrupt was previously
-                        * setup with the guest_mode by checking if the ga_tag
-                        * was cached. If so, we need to clean up the per-vcpu
-                        * ir_list.
-                        */
-                       if (!ret && pi.prev_ga_tag) {
-                               int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
-                               struct kvm_vcpu *vcpu;
-
-                               vcpu = kvm_get_vcpu_by_id(kvm, id);
-                               if (vcpu)
-                                       svm_ir_list_del(to_svm(vcpu), &pi);
-                       }
                }
 
                if (!ret && svm) {
@@ -992,6 +968,34 @@ int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
        }
 
        ret = 0;
+       if (enable_remapped_mode) {
+               /* Use legacy mode in IRTE */
+               struct amd_iommu_pi_data pi;
+
+               /**
+                * Here, pi is used to:
+                * - Tell IOMMU to use legacy mode for this interrupt.
+                * - Retrieve ga_tag of prior interrupt remapping data.
+                */
+               pi.prev_ga_tag = 0;
+               pi.is_guest_mode = false;
+               ret = irq_set_vcpu_affinity(host_irq, &pi);
+
+               /**
+                * Check if the posted interrupt was previously
+                * setup with the guest_mode by checking if the ga_tag
+                * was cached. If so, we need to clean up the per-vcpu
+                * ir_list.
+                */
+               if (!ret && pi.prev_ga_tag) {
+                       int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
+                       struct kvm_vcpu *vcpu;
+
+                       vcpu = kvm_get_vcpu_by_id(kvm, id);
+                       if (vcpu)
+                               svm_ir_list_del(to_svm(vcpu), &pi);
+               }
+       }
 out:
        srcu_read_unlock(&kvm->irq_srcu, idx);
        return ret;
index 51116fe69a5001b91d95bf8c3417533b1e2c55e0..d70e5b90087d86f26e0137e2dbc9b0e8f55adede 100644 (file)
@@ -297,6 +297,7 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
 {
        struct kvm_kernel_irq_routing_entry *e;
        struct kvm_irq_routing_table *irq_rt;
+       bool enable_remapped_mode = true;
        struct kvm_lapic_irq irq;
        struct kvm_vcpu *vcpu;
        struct vcpu_data vcpu_info;
@@ -335,21 +336,8 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
 
                kvm_set_msi_irq(kvm, e, &irq);
                if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu) ||
-                   !kvm_irq_is_postable(&irq)) {
-                       /*
-                        * Make sure the IRTE is in remapped mode if
-                        * we don't handle it in posted mode.
-                        */
-                       ret = irq_set_vcpu_affinity(host_irq, NULL);
-                       if (ret < 0) {
-                               printk(KERN_INFO
-                                  "failed to back to remapped mode, irq: %u\n",
-                                  host_irq);
-                               goto out;
-                       }
-
+                   !kvm_irq_is_postable(&irq))
                        continue;
-               }
 
                vcpu_info.pi_desc_addr = __pa(vcpu_to_pi_desc(vcpu));
                vcpu_info.vector = irq.vector;
@@ -357,11 +345,12 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
                trace_kvm_pi_irte_update(host_irq, vcpu->vcpu_id, e->gsi,
                                vcpu_info.vector, vcpu_info.pi_desc_addr, set);
 
-               if (set)
-                       ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
-               else
-                       ret = irq_set_vcpu_affinity(host_irq, NULL);
+               if (!set)
+                       continue;
 
+               enable_remapped_mode = false;
+
+               ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
                if (ret < 0) {
                        printk(KERN_INFO "%s: failed to update PI IRTE\n",
                                        __func__);
@@ -369,6 +358,9 @@ int vmx_pi_update_irte(struct kvm *kvm, unsigned int host_irq,
                }
        }
 
+       if (enable_remapped_mode)
+               ret = irq_set_vcpu_affinity(host_irq, NULL);
+
        ret = 0;
 out:
        srcu_read_unlock(&kvm->irq_srcu, idx);