locking/atomic/x86: Introduce arch_try_cmpxchg64_local()
authorUros Bizjak <ubizjak@gmail.com>
Sun, 14 Apr 2024 16:12:43 +0000 (18:12 +0200)
committerIngo Molnar <mingo@kernel.org>
Sun, 14 Apr 2024 20:40:54 +0000 (22:40 +0200)
Introduce arch_try_cmpxchg64_local() for 64-bit and 32-bit targets
to improve code using cmpxchg64_local().  On 64-bit targets, the
generated assembly improves from:

    3e28: 31 c0                 xor    %eax,%eax
    3e2a: 4d 0f b1 7d 00        cmpxchg %r15,0x0(%r13)
    3e2f: 48 85 c0              test   %rax,%rax
    3e32: 0f 85 9f 00 00 00     jne    3ed7 <...>

to:

    3e28: 31 c0                 xor    %eax,%eax
    3e2a: 4d 0f b1 7d 00        cmpxchg %r15,0x0(%r13)
    3e2f: 0f 85 9f 00 00 00     jne    3ed4 <...>

where a TEST instruction after CMPXCHG is saved.  The improvements
for 32-bit targets are even more noticeable, because double-word
compare after CMPXCHG8B gets eliminated.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Waiman Long <longman@redhat.com>
Link: https://lore.kernel.org/r/20240414161257.49145-1-ubizjak@gmail.com
arch/x86/include/asm/cmpxchg_32.h
arch/x86/include/asm/cmpxchg_64.h

index 9e0d330dd5d0add6ab74b46db715b64a623740a0..9dedc13d5a77c800f24df1eb33ff8649e03422b8 100644 (file)
@@ -64,6 +64,11 @@ static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 ne
        return __arch_try_cmpxchg64(ptr, oldp, new, LOCK_PREFIX);
 }
 
+static __always_inline bool __try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+       return __arch_try_cmpxchg64(ptr, oldp, new,);
+}
+
 #ifdef CONFIG_X86_CMPXCHG64
 
 #define arch_cmpxchg64 __cmpxchg64
@@ -72,6 +77,8 @@ static __always_inline bool __try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 ne
 
 #define arch_try_cmpxchg64 __try_cmpxchg64
 
+#define arch_try_cmpxchg64_local __try_cmpxchg64_local
+
 #else
 
 /*
@@ -150,6 +157,33 @@ static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64
 }
 #define arch_try_cmpxchg64 arch_try_cmpxchg64
 
+#define __arch_try_cmpxchg64_emu_local(_ptr, _oldp, _new)              \
+({                                                                     \
+       union __u64_halves o = { .full = *(_oldp), },                   \
+                          n = { .full = (_new), };                     \
+       bool ret;                                                       \
+                                                                       \
+       asm volatile(ALTERNATIVE("call cmpxchg8b_emu",                  \
+                                "cmpxchg8b %[ptr]", X86_FEATURE_CX8)   \
+                    CC_SET(e)                                          \
+                    : CC_OUT(e) (ret),                                 \
+                      [ptr] "+m" (*(_ptr)),                            \
+                      "+a" (o.low), "+d" (o.high)                      \
+                    : "b" (n.low), "c" (n.high), "S" (_ptr)            \
+                    : "memory");                                       \
+                                                                       \
+       if (unlikely(!ret))                                             \
+               *(_oldp) = o.full;                                      \
+                                                                       \
+       likely(ret);                                                    \
+})
+
+static __always_inline bool arch_try_cmpxchg64_local(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+       return __arch_try_cmpxchg64_emu_local(ptr, oldp, new);
+}
+#define arch_try_cmpxchg64_local arch_try_cmpxchg64_local
+
 #endif
 
 #define system_has_cmpxchg64()         boot_cpu_has(X86_FEATURE_CX8)
index c1d6cd58f80940347141cdf0ecdb5953c3e899d3..5e241306db26a78c0e43636cd0978121929b3139 100644 (file)
        arch_try_cmpxchg((ptr), (po), (n));                             \
 })
 
+#define arch_try_cmpxchg64_local(ptr, po, n)                           \
+({                                                                     \
+       BUILD_BUG_ON(sizeof(*(ptr)) != 8);                              \
+       arch_try_cmpxchg_local((ptr), (po), (n));                       \
+})
+
 union __u128_halves {
        u128 full;
        struct {