Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

[linux-2.6-block.git] / include / linux / spinlock.h
diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h

index ef018a6e49854dcc68b729846734dde02c92f7ff..69e079c5ff985bdc49d8b933ea688159f448e2f1 100644 (file)
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -118,16 +118,39 @@ do {                                                              \
  #endif
  
  /*
- * Despite its name it doesn't necessarily has to be a full barrier.
- * It should only guarantee that a STORE before the critical section
- * can not be reordered with LOADs and STOREs inside this section.
- * spin_lock() is the one-way barrier, this LOAD can not escape out
- * of the region. So the default implementation simply ensures that
- * a STORE can not move into the critical section, smp_wmb() should
- * serialize it with another STORE done by spin_lock().
+ * This barrier must provide two things:
+ *
+ *   - it must guarantee a STORE before the spin_lock() is ordered against a
+ *     LOAD after it, see the comments at its two usage sites.
+ *
+ *   - it must ensure the critical section is RCsc.
+ *
+ * The latter is important for cases where we observe values written by other
+ * CPUs in spin-loops, without barriers, while being subject to scheduling.
+ *
+ * CPU0                        CPU1                    CPU2
+ *
+ *                     for (;;) {
+ *                       if (READ_ONCE(X))
+ *                         break;
+ *                     }
+ * X=1
+ *                     <sched-out>
+ *                                             <sched-in>
+ *                                             r = X;
+ *
+ * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
+ * we get migrated and CPU2 sees X==0.
+ *
+ * Since most load-store architectures implement ACQUIRE with an smp_mb() after
+ * the LL/SC loop, they need no further barriers. Similarly all our TSO
+ * architectures imply an smp_mb() for each atomic instruction and equally don't
+ * need more.
+ *
+ * Architectures that can implement ACQUIRE better need to take care.
   */
-#ifndef smp_mb__before_spinlock
-#define smp_mb__before_spinlock()      smp_wmb()
+#ifndef smp_mb__after_spinlock
+#define smp_mb__after_spinlock()       do { } while (0)
  #endif
  
  #ifdef CONFIG_DEBUG_SPINLOCK