Merge patch series "Rework & improve riscv cmpxchg.h and atomic.h"
authorPalmer Dabbelt <palmer@rivosinc.com>
Mon, 8 Apr 2024 17:55:03 +0000 (10:55 -0700)
committerPalmer Dabbelt <palmer@rivosinc.com>
Sun, 28 Apr 2024 21:50:33 +0000 (14:50 -0700)
Leonardo Bras <leobras@redhat.com> says:

While studying riscv's cmpxchg.h file, I got really interested in
understanding how RISCV asm implemented the different versions of
{cmp,}xchg.

When I understood the pattern, it made sense for me to remove the
duplications and create macros to make it easier to understand what exactly
changes between the versions: Instruction sufixes & barriers.

Also, did the same kind of work on atomic.c.

After that, I noted both cmpxchg and xchg only accept variables of
size 4 and 8, compared to x86 and arm64 which do 1,2,4,8.

Now that deduplication is done, it is quite direct to implement them
for variable sizes 1 and 2, so I did it. Then Guo Ren already presented
me some possible users :)

I did compare the generated asm on a test.c that contained usage for every
changed function, and could not detect any change on patches 1 + 2 + 3
compared with upstream.

Pathes 4 & 5 were compiled-tested, merged with guoren/qspinlock_v11 and
booted just fine with qemu -machine virt -append "qspinlock".

(tree: https://gitlab.com/LeoBras/linux/-/commits/guo_qspinlock_v11)

Latest tests happened based on this tree:
https://github.com/guoren83/linux/tree/qspinlock_v12

* b4-shazam-lts:
  riscv/cmpxchg: Implement xchg for variables of size 1 and 2
  riscv/cmpxchg: Implement cmpxchg for variables of size 1 and 2
  riscv/atomic.h : Deduplicate arch_atomic.*
  riscv/cmpxchg: Deduplicate cmpxchg() asm and macros
  riscv/cmpxchg: Deduplicate xchg() asm functions

Link: https://lore.kernel.org/r/20240103163203.72768-2-leobras@redhat.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1  2 
arch/riscv/include/asm/atomic.h
arch/riscv/include/asm/cmpxchg.h

Simple merge
index 2fee65cc8443246c07ca1f2c53e896cad426ae77,26cea2395aae8c31019fa0f8d0822d8af959364d..4d23f0c35b94970da5802a1ac807c0db7186f4e1
@@@ -8,27 -8,64 +8,63 @@@
  
  #include <linux/bug.h>
  
 -#include <asm/barrier.h>
  #include <asm/fence.h>
  
- #define __xchg_relaxed(ptr, new, size)                                        \
+ #define __arch_xchg_masked(prepend, append, r, p, n)                  \
+ ({                                                                    \
+       u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);                     \
+       ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;  \
+       ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)   \
+                       << __s;                                         \
+       ulong __newx = (ulong)(n) << __s;                               \
+       ulong __retx;                                                   \
+       ulong __rc;                                                     \
+                                                                       \
+       __asm__ __volatile__ (                                          \
+              prepend                                                  \
+              "0:      lr.w %0, %2\n"                                  \
+              "        and  %1, %0, %z4\n"                             \
+              "        or   %1, %1, %z3\n"                             \
+              "        sc.w %1, %1, %2\n"                              \
+              "        bnez %1, 0b\n"                                  \
+              append                                                   \
+              : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))       \
+              : "rJ" (__newx), "rJ" (~__mask)                          \
+              : "memory");                                             \
+                                                                       \
+       r = (__typeof__(*(p)))((__retx & __mask) >> __s);               \
+ })
+ #define __arch_xchg(sfx, prepend, append, r, p, n)                    \
+ ({                                                                    \
+       __asm__ __volatile__ (                                          \
+               prepend                                                 \
+               "       amoswap" sfx " %0, %2, %1\n"                    \
+               append                                                  \
+               : "=r" (r), "+A" (*(p))                                 \
+               : "r" (n)                                               \
+               : "memory");                                            \
+ })
+ #define _arch_xchg(ptr, new, sfx, prepend, append)                    \
  ({                                                                    \
        __typeof__(ptr) __ptr = (ptr);                                  \
-       __typeof__(new) __new = (new);                                  \
-       __typeof__(*(ptr)) __ret;                                       \
-       switch (size) {                                                 \
+       __typeof__(*(__ptr)) __new = (new);                             \
+       __typeof__(*(__ptr)) __ret;                                     \
+                                                                       \
+       switch (sizeof(*__ptr)) {                                       \
+       case 1:                                                         \
+       case 2:                                                         \
+               __arch_xchg_masked(prepend, append,                     \
+                                  __ret, __ptr, __new);                \
+               break;                                                  \
        case 4:                                                         \
-               __asm__ __volatile__ (                                  \
-                       "       amoswap.w %0, %2, %1\n"                 \
-                       : "=r" (__ret), "+A" (*__ptr)                   \
-                       : "r" (__new)                                   \
-                       : "memory");                                    \
+               __arch_xchg(".w" sfx, prepend, append,                  \
+                             __ret, __ptr, __new);                     \
                break;                                                  \
        case 8:                                                         \
-               __asm__ __volatile__ (                                  \
-                       "       amoswap.d %0, %2, %1\n"                 \
-                       : "=r" (__ret), "+A" (*__ptr)                   \
-                       : "r" (__new)                                   \
-                       : "memory");                                    \
+               __arch_xchg(".d" sfx, prepend, append,                  \
+                             __ret, __ptr, __new);                     \
                break;                                                  \
        default:                                                        \
                BUILD_BUG();                                            \