arm64: cmpxchg: patch in lse instructions when supported by the CPU
authorWill Deacon <will.deacon@arm.com>
Thu, 23 Apr 2015 19:08:49 +0000 (20:08 +0100)
committerWill Deacon <will.deacon@arm.com>
Mon, 27 Jul 2015 14:28:51 +0000 (15:28 +0100)
On CPUs which support the LSE atomic instructions introduced in ARMv8.1,
it makes sense to use them in preference to ll/sc sequences.

This patch introduces runtime patching of our cmpxchg primitives so that
the LSE cas instruction is used instead.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
arch/arm64/include/asm/atomic.h
arch/arm64/include/asm/atomic_ll_sc.h
arch/arm64/include/asm/atomic_lse.h
arch/arm64/include/asm/cmpxchg.h

index 836226d5e12cd6d56a6192200747e7f9ae1a6798..1fe8f209aeb44f1d959efda457cd187b4972247b 100644 (file)
@@ -24,7 +24,6 @@
 #include <linux/types.h>
 
 #include <asm/barrier.h>
-#include <asm/cmpxchg.h>
 #include <asm/lse.h>
 
 #define ATOMIC_INIT(i) { (i) }
@@ -41,6 +40,8 @@
 
 #undef __ARM64_IN_ATOMIC_IMPL
 
+#include <asm/cmpxchg.h>
+
 /*
  * On ARM, ordinary assignment (str instruction) doesn't clear the local
  * strex/ldrex monitor on some implementations. The reason we can use it for
index 4b981ba57e788bfe4d7c8494ac938d089c3512a4..4864158d486eed51991708a57112ffa979cf074a 100644 (file)
@@ -215,4 +215,42 @@ __LL_SC_PREFIX(atomic64_dec_if_positive(atomic64_t *v))
 }
 __LL_SC_EXPORT(atomic64_dec_if_positive);
 
+#define __CMPXCHG_CASE(w, sz, name, mb, cl)                            \
+__LL_SC_INLINE unsigned long                                           \
+__LL_SC_PREFIX(__cmpxchg_case_##name(volatile void *ptr,               \
+                                    unsigned long old,                 \
+                                    unsigned long new))                \
+{                                                                      \
+       unsigned long tmp, oldval;                                      \
+                                                                       \
+       asm volatile(                                                   \
+       "       " #mb "\n"                                              \
+       "1:     ldxr" #sz "\t%" #w "[oldval], %[v]\n"                   \
+       "       eor     %" #w "[tmp], %" #w "[oldval], %" #w "[old]\n"  \
+       "       cbnz    %" #w "[tmp], 2f\n"                             \
+       "       stxr" #sz "\t%w[tmp], %" #w "[new], %[v]\n"             \
+       "       cbnz    %w[tmp], 1b\n"                                  \
+       "       " #mb "\n"                                              \
+       "       mov     %" #w "[oldval], %" #w "[old]\n"                \
+       "2:"                                                            \
+       : [tmp] "=&r" (tmp), [oldval] "=&r" (oldval),                   \
+         [v] "+Q" (*(unsigned long *)ptr)                              \
+       : [old] "Lr" (old), [new] "r" (new)                             \
+       : cl);                                                          \
+                                                                       \
+       return oldval;                                                  \
+}                                                                      \
+__LL_SC_EXPORT(__cmpxchg_case_##name);
+
+__CMPXCHG_CASE(w, b,    1,        ,         )
+__CMPXCHG_CASE(w, h,    2,        ,         )
+__CMPXCHG_CASE(w,  ,    4,        ,         )
+__CMPXCHG_CASE( ,  ,    8,        ,         )
+__CMPXCHG_CASE(w, b, mb_1, dmb ish, "memory")
+__CMPXCHG_CASE(w, h, mb_2, dmb ish, "memory")
+__CMPXCHG_CASE(w,  , mb_4, dmb ish, "memory")
+__CMPXCHG_CASE( ,  , mb_8, dmb ish, "memory")
+
+#undef __CMPXCHG_CASE
+
 #endif /* __ASM_ATOMIC_LL_SC_H */
index 6e21b5e0c9d62b210cf57231d7ff0e05a8c77d79..b39ae4c1451ab94dc111f9d7f1343e20ae5bcd8d 100644 (file)
@@ -349,4 +349,43 @@ static inline long atomic64_dec_if_positive(atomic64_t *v)
 
 #undef __LL_SC_ATOMIC64
 
+#define __LL_SC_CMPXCHG(op)    __LL_SC_CALL(__cmpxchg_case_##op)
+
+#define __CMPXCHG_CASE(w, sz, name, mb, cl...)                         \
+static inline unsigned long __cmpxchg_case_##name(volatile void *ptr,  \
+                                                 unsigned long old,    \
+                                                 unsigned long new)    \
+{                                                                      \
+       register unsigned long x0 asm ("x0") = (unsigned long)ptr;      \
+       register unsigned long x1 asm ("x1") = old;                     \
+       register unsigned long x2 asm ("x2") = new;                     \
+                                                                       \
+       asm volatile(ARM64_LSE_ATOMIC_INSN(                             \
+       /* LL/SC */                                                     \
+       "nop\n"                                                         \
+       __LL_SC_CMPXCHG(name)                                           \
+       "nop",                                                          \
+       /* LSE atomics */                                               \
+       "       mov     " #w "30, %" #w "[old]\n"                       \
+       "       cas" #mb #sz "\t" #w "30, %" #w "[new], %[v]\n"         \
+       "       mov     %" #w "[ret], " #w "30")                        \
+       : [ret] "+r" (x0), [v] "+Q" (*(unsigned long *)ptr)             \
+       : [old] "r" (x1), [new] "r" (x2)                                \
+       : "x30" , ##cl);                                                \
+                                                                       \
+       return x0;                                                      \
+}
+
+__CMPXCHG_CASE(w, b,    1,   )
+__CMPXCHG_CASE(w, h,    2,   )
+__CMPXCHG_CASE(w,  ,    4,   )
+__CMPXCHG_CASE(x,  ,    8,   )
+__CMPXCHG_CASE(w, b, mb_1, al, "memory")
+__CMPXCHG_CASE(w, h, mb_2, al, "memory")
+__CMPXCHG_CASE(w,  , mb_4, al, "memory")
+__CMPXCHG_CASE(x,  , mb_8, al, "memory")
+
+#undef __LL_SC_CMPXCHG
+#undef __CMPXCHG_CASE
+
 #endif /* __ASM_ATOMIC_LSE_H */
index d0cce80689026c908542bb9382188a5d8a18bc3d..60a558127cefbdeab4f7f7e523bf6249ba61d681 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/bug.h>
 #include <linux/mmdebug.h>
 
+#include <asm/atomic.h>
 #include <asm/barrier.h>
 #include <asm/lse.h>
 
@@ -111,74 +112,20 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size
 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
                                      unsigned long new, int size)
 {
-       unsigned long oldval = 0, res;
-
        switch (size) {
        case 1:
-               do {
-                       asm volatile("// __cmpxchg1\n"
-                       "       ldxrb   %w1, %2\n"
-                       "       mov     %w0, #0\n"
-                       "       cmp     %w1, %w3\n"
-                       "       b.ne    1f\n"
-                       "       stxrb   %w0, %w4, %2\n"
-                       "1:\n"
-                               : "=&r" (res), "=&r" (oldval), "+Q" (*(u8 *)ptr)
-                               : "Ir" (old), "r" (new)
-                               : "cc");
-               } while (res);
-               break;
-
+               return __cmpxchg_case_1(ptr, old, new);
        case 2:
-               do {
-                       asm volatile("// __cmpxchg2\n"
-                       "       ldxrh   %w1, %2\n"
-                       "       mov     %w0, #0\n"
-                       "       cmp     %w1, %w3\n"
-                       "       b.ne    1f\n"
-                       "       stxrh   %w0, %w4, %2\n"
-                       "1:\n"
-                               : "=&r" (res), "=&r" (oldval), "+Q" (*(u16 *)ptr)
-                               : "Ir" (old), "r" (new)
-                               : "cc");
-               } while (res);
-               break;
-
+               return __cmpxchg_case_2(ptr, old, new);
        case 4:
-               do {
-                       asm volatile("// __cmpxchg4\n"
-                       "       ldxr    %w1, %2\n"
-                       "       mov     %w0, #0\n"
-                       "       cmp     %w1, %w3\n"
-                       "       b.ne    1f\n"
-                       "       stxr    %w0, %w4, %2\n"
-                       "1:\n"
-                               : "=&r" (res), "=&r" (oldval), "+Q" (*(u32 *)ptr)
-                               : "Ir" (old), "r" (new)
-                               : "cc");
-               } while (res);
-               break;
-
+               return __cmpxchg_case_4(ptr, old, new);
        case 8:
-               do {
-                       asm volatile("// __cmpxchg8\n"
-                       "       ldxr    %1, %2\n"
-                       "       mov     %w0, #0\n"
-                       "       cmp     %1, %3\n"
-                       "       b.ne    1f\n"
-                       "       stxr    %w0, %4, %2\n"
-                       "1:\n"
-                               : "=&r" (res), "=&r" (oldval), "+Q" (*(u64 *)ptr)
-                               : "Ir" (old), "r" (new)
-                               : "cc");
-               } while (res);
-               break;
-
+               return __cmpxchg_case_8(ptr, old, new);
        default:
                BUILD_BUG();
        }
 
-       return oldval;
+       unreachable();
 }
 
 #define system_has_cmpxchg_double()     1
@@ -229,13 +176,20 @@ static inline int __cmpxchg_double_mb(volatile void *ptr1, volatile void *ptr2,
 static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
                                         unsigned long new, int size)
 {
-       unsigned long ret;
-
-       smp_mb();
-       ret = __cmpxchg(ptr, old, new, size);
-       smp_mb();
+       switch (size) {
+       case 1:
+               return __cmpxchg_case_mb_1(ptr, old, new);
+       case 2:
+               return __cmpxchg_case_mb_2(ptr, old, new);
+       case 4:
+               return __cmpxchg_case_mb_4(ptr, old, new);
+       case 8:
+               return __cmpxchg_case_mb_8(ptr, old, new);
+       default:
+               BUILD_BUG();
+       }
 
-       return ret;
+       unreachable();
 }
 
 #define cmpxchg(ptr, o, n) \