x86/irq: Plug vector setup race
authorThomas Gleixner <tglx@linutronix.de>
Thu, 24 Jul 2025 10:49:30 +0000 (12:49 +0200)
committerThomas Gleixner <tglx@linutronix.de>
Mon, 4 Aug 2025 21:34:03 +0000 (23:34 +0200)
Hogan reported a vector setup race, which overwrites the interrupt
descriptor in the per CPU vector array resulting in a disfunctional device.

CPU0 CPU1
interrupt is raised in APIC IRR
but not handled
  free_irq()
    per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN;

  request_irq() common_interrupt()
     d = this_cpu_read(vector_irq[vector]);

    per_cpu(vector_irq, CPU1)[vector] = desc;

       if (d == VECTOR_SHUTDOWN)
    this_cpu_write(vector_irq[vector], VECTOR_UNUSED);

free_irq() cannot observe the pending vector in the CPU1 APIC as there is
no way to query the remote CPUs APIC IRR.

This requires that request_irq() uses the same vector/CPU as the one which
was freed, but this also can be triggered by a spurious interrupt.

Interestingly enough this problem managed to be hidden for more than a
decade.

Prevent this by reevaluating vector_irq under the vector lock, which is
held by the interrupt activation code when vector_irq is updated.

To avoid ifdeffery or IS_ENABLED() nonsense, move the
[un]lock_vector_lock() declarations out under the
CONFIG_IRQ_DOMAIN_HIERARCHY guard as it's only provided when
CONFIG_X86_LOCAL_APIC=y.

The current CONFIG_IRQ_DOMAIN_HIERARCHY guard is selected by
CONFIG_X86_LOCAL_APIC, but can also be selected by other parts of the
Kconfig system, which makes 32-bit UP builds with CONFIG_X86_LOCAL_APIC=n
fail.

Can we just get rid of this !APIC nonsense once and forever?

Fixes: 9345005f4eed ("x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs")
Reported-by: Hogan Wang <hogan.wang@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Hogan Wang <hogan.wang@huawei.com>
Link: https://lore.kernel.org/all/draft-87ikjhrhhh.ffs@tglx
arch/x86/include/asm/hw_irq.h
arch/x86/kernel/irq.c

index 162ebd73a6981e4f98213d4327829579de0b3747..cbe19e6690801ad5e4eff088bb041631fdec1250 100644 (file)
@@ -92,8 +92,6 @@ struct irq_cfg {
 
 extern struct irq_cfg *irq_cfg(unsigned int irq);
 extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
-extern void lock_vector_lock(void);
-extern void unlock_vector_lock(void);
 #ifdef CONFIG_SMP
 extern void vector_schedule_cleanup(struct irq_cfg *);
 extern void irq_complete_move(struct irq_cfg *cfg);
@@ -101,12 +99,16 @@ extern void irq_complete_move(struct irq_cfg *cfg);
 static inline void vector_schedule_cleanup(struct irq_cfg *c) { }
 static inline void irq_complete_move(struct irq_cfg *c) { }
 #endif
-
 extern void apic_ack_edge(struct irq_data *data);
-#else  /*  CONFIG_IRQ_DOMAIN_HIERARCHY */
+#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
+#ifdef CONFIG_X86_LOCAL_APIC
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
+#else
 static inline void lock_vector_lock(void) {}
 static inline void unlock_vector_lock(void) {}
-#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
+#endif
 
 /* Statistics */
 extern atomic_t irq_err_count;
index 9ed29ff10e59d053606d05493db56c51d5624171..10721a125226941477d2749f2acce1017ed188a0 100644 (file)
@@ -256,26 +256,59 @@ static __always_inline void handle_irq(struct irq_desc *desc,
                __handle_irq(desc, regs);
 }
 
-static __always_inline int call_irq_handler(int vector, struct pt_regs *regs)
+static struct irq_desc *reevaluate_vector(int vector)
 {
-       struct irq_desc *desc;
-       int ret = 0;
+       struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);
+
+       if (!IS_ERR_OR_NULL(desc))
+               return desc;
+
+       if (desc == VECTOR_UNUSED)
+               pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector);
+       else
+               __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+       return NULL;
+}
+
+static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs)
+{
+       struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);
 
-       desc = __this_cpu_read(vector_irq[vector]);
        if (likely(!IS_ERR_OR_NULL(desc))) {
                handle_irq(desc, regs);
-       } else {
-               ret = -EINVAL;
-               if (desc == VECTOR_UNUSED) {
-                       pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n",
-                                            __func__, smp_processor_id(),
-                                            vector);
-               } else {
-                       __this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
-               }
+               return true;
        }
 
-       return ret;
+       /*
+        * Reevaluate with vector_lock held to prevent a race against
+        * request_irq() setting up the vector:
+        *
+        * CPU0                         CPU1
+        *                              interrupt is raised in APIC IRR
+        *                              but not handled
+        * free_irq()
+        *   per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN;
+        *
+        * request_irq()                common_interrupt()
+        *                                d = this_cpu_read(vector_irq[vector]);
+        *
+        * per_cpu(vector_irq, CPU1)[vector] = desc;
+        *
+        *                                if (d == VECTOR_SHUTDOWN)
+        *                                  this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
+        *
+        * This requires that the same vector on the same target CPU is
+        * handed out or that a spurious interrupt hits that CPU/vector.
+        */
+       lock_vector_lock();
+       desc = reevaluate_vector(vector);
+       unlock_vector_lock();
+
+       if (!desc)
+               return false;
+
+       handle_irq(desc, regs);
+       return true;
 }
 
 /*
@@ -289,7 +322,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt)
        /* entry code tells RCU that we're not quiescent.  Check it. */
        RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
 
-       if (unlikely(call_irq_handler(vector, regs)))
+       if (unlikely(!call_irq_handler(vector, regs)))
                apic_eoi();
 
        set_irq_regs(old_regs);