riscv: Implement cmpxchg8/16() using Zabha
authorAlexandre Ghiti <alexghiti@rivosinc.com>
Sun, 3 Nov 2024 14:51:45 +0000 (15:51 +0100)
committerPalmer Dabbelt <palmer@rivosinc.com>
Mon, 11 Nov 2024 15:33:12 +0000 (07:33 -0800)
This adds runtime support for Zabha in cmpxchg8/16() operations.

Note that in the absence of Zacas support in the toolchain, CAS
instructions from Zabha won't be used.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Andrea Parri <parri.andrea@gmail.com>
Link: https://lore.kernel.org/r/20241103145153.105097-6-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
arch/riscv/Kconfig
arch/riscv/Makefile
arch/riscv/include/asm/cmpxchg.h
arch/riscv/include/asm/hwcap.h
arch/riscv/kernel/cpufeature.c

index 3542efe3088b99edc638f3a74539abefaf25f767..668be90a42e45c51d759a0b0e1719142d147086b 100644 (file)
@@ -632,6 +632,24 @@ config RISCV_ISA_ZAWRS
          use of these instructions in the kernel when the Zawrs extension is
          detected at boot.
 
+config TOOLCHAIN_HAS_ZABHA
+       bool
+       default y
+       depends on !64BIT || $(cc-option,-mabi=lp64 -march=rv64ima_zabha)
+       depends on !32BIT || $(cc-option,-mabi=ilp32 -march=rv32ima_zabha)
+       depends on AS_HAS_OPTION_ARCH
+
+config RISCV_ISA_ZABHA
+       bool "Zabha extension support for atomic byte/halfword operations"
+       depends on TOOLCHAIN_HAS_ZABHA
+       depends on RISCV_ALTERNATIVE
+       default y
+       help
+         Enable the use of the Zabha ISA-extension to implement kernel
+         byte/halfword atomic memory operations when it is detected at boot.
+
+         If you don't know what to do here, say Y.
+
 config TOOLCHAIN_HAS_ZACAS
        bool
        default y
index 3700a15744130e1a3f319f3c999042a39b322f1b..9fe1ee740dda8544f5f85aaf37b322de53859205 100644 (file)
@@ -85,6 +85,9 @@ endif
 # Check if the toolchain supports Zacas
 riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZACAS) := $(riscv-march-y)_zacas
 
+# Check if the toolchain supports Zabha
+riscv-march-$(CONFIG_TOOLCHAIN_HAS_ZABHA) := $(riscv-march-y)_zabha
+
 # Remove F,D,V from isa string for all. Keep extensions between "fd" and "v" by
 # matching non-v and non-multi-letter extensions out with the filter ([^v_]*)
 KBUILD_CFLAGS += -march=$(shell echo $(riscv-march-y) | sed -E 's/(rv32ima|rv64ima)fd([^v_]*)v?/\1\2/')
index 39c1daf39f6af7fc1e433777a9ffa12bad41c7b6..1f4cd12e4664efb798b7adf2a5c9466b669bc802 100644 (file)
  * indicated by comparing RETURN with OLD.
  */
 
-#define __arch_cmpxchg_masked(sc_sfx, prepend, append, r, p, o, n)     \
-({                                                                     \
-       u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);                     \
-       ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;  \
-       ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)   \
-                       << __s;                                         \
-       ulong __newx = (ulong)(n) << __s;                               \
-       ulong __oldx = (ulong)(o) << __s;                               \
-       ulong __retx;                                                   \
-       ulong __rc;                                                     \
-                                                                       \
-       __asm__ __volatile__ (                                          \
-               prepend                                                 \
-               "0:     lr.w %0, %2\n"                                  \
-               "       and  %1, %0, %z5\n"                             \
-               "       bne  %1, %z3, 1f\n"                             \
-               "       and  %1, %0, %z6\n"                             \
-               "       or   %1, %1, %z4\n"                             \
-               "       sc.w" sc_sfx " %1, %1, %2\n"                    \
-               "       bnez %1, 0b\n"                                  \
-               append                                                  \
-               "1:\n"                                                  \
-               : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))      \
-               : "rJ" ((long)__oldx), "rJ" (__newx),                   \
-                 "rJ" (__mask), "rJ" (~__mask)                         \
-               : "memory");                                            \
-                                                                       \
-       r = (__typeof__(*(p)))((__retx & __mask) >> __s);               \
+#define __arch_cmpxchg_masked(sc_sfx, cas_sfx, prepend, append, r, p, o, n)    \
+({                                                                             \
+       if (IS_ENABLED(CONFIG_RISCV_ISA_ZABHA) &&                               \
+           IS_ENABLED(CONFIG_RISCV_ISA_ZACAS) &&                               \
+           riscv_has_extension_unlikely(RISCV_ISA_EXT_ZABHA) &&                \
+           riscv_has_extension_unlikely(RISCV_ISA_EXT_ZACAS)) {                \
+               r = o;                                                          \
+                                                                               \
+               __asm__ __volatile__ (                                          \
+                       prepend                                                 \
+                       "       amocas" cas_sfx " %0, %z2, %1\n"                \
+                       append                                                  \
+                       : "+&r" (r), "+A" (*(p))                                \
+                       : "rJ" (n)                                              \
+                       : "memory");                                            \
+       } else {                                                                \
+               u32 *__ptr32b = (u32 *)((ulong)(p) & ~0x3);                     \
+               ulong __s = ((ulong)(p) & (0x4 - sizeof(*p))) * BITS_PER_BYTE;  \
+               ulong __mask = GENMASK(((sizeof(*p)) * BITS_PER_BYTE) - 1, 0)   \
+                              << __s;                                          \
+               ulong __newx = (ulong)(n) << __s;                               \
+               ulong __oldx = (ulong)(o) << __s;                               \
+               ulong __retx;                                                   \
+               ulong __rc;                                                     \
+                                                                               \
+               __asm__ __volatile__ (                                          \
+                       prepend                                                 \
+                       "0:     lr.w %0, %2\n"                                  \
+                       "       and  %1, %0, %z5\n"                             \
+                       "       bne  %1, %z3, 1f\n"                             \
+                       "       and  %1, %0, %z6\n"                             \
+                       "       or   %1, %1, %z4\n"                             \
+                       "       sc.w" sc_sfx " %1, %1, %2\n"                    \
+                       "       bnez %1, 0b\n"                                  \
+                       append                                                  \
+                       "1:\n"                                                  \
+                       : "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b))      \
+                       : "rJ" ((long)__oldx), "rJ" (__newx),                   \
+                         "rJ" (__mask), "rJ" (~__mask)                         \
+                       : "memory");                                            \
+                                                                               \
+               r = (__typeof__(*(p)))((__retx & __mask) >> __s);               \
+       }                                                                       \
 })
 
 #define __arch_cmpxchg(lr_sfx, sc_cas_sfx, prepend, append, r, p, co, o, n)    \
                                                                        \
        switch (sizeof(*__ptr)) {                                       \
        case 1:                                                         \
+               __arch_cmpxchg_masked(sc_cas_sfx, ".b" sc_cas_sfx,      \
+                                       prepend, append,                \
+                                       __ret, __ptr, __old, __new);    \
+               break;                                                  \
        case 2:                                                         \
-               __arch_cmpxchg_masked(sc_cas_sfx, prepend, append,      \
+               __arch_cmpxchg_masked(sc_cas_sfx, ".h" sc_cas_sfx,      \
+                                       prepend, append,                \
                                        __ret, __ptr, __old, __new);    \
                break;                                                  \
        case 4:                                                         \
index 46d9de54179ed40aa7b1ea0ec011fd6eea7218df..74bcb0e2bd1f7917ee13e78288fd16e943493942 100644 (file)
@@ -93,6 +93,7 @@
 #define RISCV_ISA_EXT_ZCMOP            84
 #define RISCV_ISA_EXT_ZAWRS            85
 #define RISCV_ISA_EXT_SVVPTC           86
+#define RISCV_ISA_EXT_ZABHA            87
 
 #define RISCV_ISA_EXT_XLINUXENVCFG     127
 
index 3a8eeaa9310c32fce2141aff534dc4432b32abbe..5e743d8d34f5a91b29d4c2d328f89b8dcdb4e26a 100644 (file)
@@ -322,6 +322,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
        __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
        __RISCV_ISA_EXT_DATA(zihpm, RISCV_ISA_EXT_ZIHPM),
        __RISCV_ISA_EXT_DATA(zimop, RISCV_ISA_EXT_ZIMOP),
+       __RISCV_ISA_EXT_DATA(zabha, RISCV_ISA_EXT_ZABHA),
        __RISCV_ISA_EXT_DATA(zacas, RISCV_ISA_EXT_ZACAS),
        __RISCV_ISA_EXT_DATA(zawrs, RISCV_ISA_EXT_ZAWRS),
        __RISCV_ISA_EXT_DATA(zfa, RISCV_ISA_EXT_ZFA),