irq_work: Optimize irq_work_single()
authorPeter Zijlstra <peterz@infradead.org>
Thu, 18 Jun 2020 20:28:37 +0000 (22:28 +0200)
committerPeter Zijlstra <peterz@infradead.org>
Tue, 24 Nov 2020 15:47:49 +0000 (16:47 +0100)
Trade one atomic op for a full memory barrier.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
include/linux/irqflags.h
kernel/irq_work.c

index fef2d43a7a1d8feaee373e0a3f8f337a1961e909..8de0e1373de70c48ca363e913e26b62f2e616dd3 100644 (file)
@@ -107,14 +107,14 @@ do {                                              \
                  current->irq_config = 0;                      \
          } while (0)
 
-# define lockdep_irq_work_enter(__work)                                        \
+# define lockdep_irq_work_enter(_flags)                                        \
          do {                                                          \
-                 if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\
+                 if (!((_flags) & IRQ_WORK_HARD_IRQ))                  \
                        current->irq_config = 1;                        \
          } while (0)
-# define lockdep_irq_work_exit(__work)                                 \
+# define lockdep_irq_work_exit(_flags)                                 \
          do {                                                          \
-                 if (!(atomic_read(&__work->node.a_flags) & IRQ_WORK_HARD_IRQ))\
+                 if (!((_flags) & IRQ_WORK_HARD_IRQ))                  \
                        current->irq_config = 0;                        \
          } while (0)
 
index fbff25adb57425b2b5b8117587b6a5aa11fc3677..e8da1e71583a1472c64ce75b4025f919770b082c 100644 (file)
@@ -34,7 +34,7 @@ static bool irq_work_claim(struct irq_work *work)
        oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags);
        /*
         * If the work is already pending, no need to raise the IPI.
-        * The pairing atomic_fetch_andnot() in irq_work_run() makes sure
+        * The pairing smp_mb() in irq_work_single() makes sure
         * everything we did before is visible.
         */
        if (oflags & IRQ_WORK_PENDING)
@@ -136,22 +136,27 @@ void irq_work_single(void *arg)
        int flags;
 
        /*
-        * Clear the PENDING bit, after this point the @work
-        * can be re-used.
-        * Make it immediately visible so that other CPUs trying
-        * to claim that work don't rely on us to handle their data
-        * while we are in the middle of the func.
+        * Clear the PENDING bit, after this point the @work can be re-used.
+        * The PENDING bit acts as a lock, and we own it, so we can clear it
+        * without atomic ops.
         */
-       flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->node.a_flags);
+       flags = atomic_read(&work->node.a_flags);
+       flags &= ~IRQ_WORK_PENDING;
+       atomic_set(&work->node.a_flags, flags);
+
+       /*
+        * See irq_work_claim().
+        */
+       smp_mb();
 
-       lockdep_irq_work_enter(work);
+       lockdep_irq_work_enter(flags);
        work->func(work);
-       lockdep_irq_work_exit(work);
+       lockdep_irq_work_exit(flags);
+
        /*
-        * Clear the BUSY bit and return to the free state if
-        * no-one else claimed it meanwhile.
+        * Clear the BUSY bit, if set, and return to the free state if no-one
+        * else claimed it meanwhile.
         */
-       flags &= ~IRQ_WORK_PENDING;
        (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY);
 }