LoongArch: Relax memory ordering for atomic operations
authorWANG Rui <wangrui@loongson.cn>
Wed, 8 Nov 2023 06:12:15 +0000 (14:12 +0800)
committerHuacai Chen <chenhuacai@loongson.cn>
Wed, 8 Nov 2023 06:12:15 +0000 (14:12 +0800)
This patch relaxes the implementation while satisfying the memory ordering
requirements for atomic operations, which will help improve performance on
LA664+.

Unixbench with full threads (8)
                                           before       after
  Dhrystone 2 using register variables   203910714.2  203909539.8   0.00%
  Double-Precision Whetstone                 37930.9        37931   0.00%
  Execl Throughput                           29431.5      29545.8   0.39%
  File Copy 1024 bufsize 2000 maxblocks    6645759.5      6676320   0.46%
  File Copy 256 bufsize 500 maxblocks      2138772.4    2144182.4   0.25%
  File Copy 4096 bufsize 8000 maxblocks   11640698.4     11602703  -0.33%
  Pipe Throughput                          8849077.7    8917009.4   0.77%
  Pipe-based Context Switching             1255108.5    1287277.3   2.56%
  Process Creation                           50825.9      50442.1  -0.76%
  Shell Scripts (1 concurrent)               25795.8      25942.3   0.57%
  Shell Scripts (8 concurrent)                3812.6       3835.2   0.59%
  System Call Overhead                     9248212.6    9353348.6   1.14%
                                                                  =======
  System Benchmarks Index Score               8076.6       8114.4   0.47%

Signed-off-by: WANG Rui <wangrui@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
arch/loongarch/include/asm/atomic.h

index e27f0c72d3242b58aec094d40199ab30160583a5..99af8b3160a88f9ec99490179525f80b59b6d044 100644 (file)
 static inline void arch_atomic_##op(int i, atomic_t *v)                        \
 {                                                                      \
        __asm__ __volatile__(                                           \
-       "am"#asm_op"_db.w" " $zero, %1, %0      \n"                     \
+       "am"#asm_op".w" " $zero, %1, %0 \n"                             \
        : "+ZB" (v->counter)                                            \
        : "r" (I)                                                       \
        : "memory");                                                    \
 }
 
-#define ATOMIC_OP_RETURN(op, I, asm_op, c_op)                          \
-static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)        \
+#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix)              \
+static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v)        \
 {                                                                      \
        int result;                                                     \
                                                                        \
        __asm__ __volatile__(                                           \
-       "am"#asm_op"_db.w" " %1, %2, %0         \n"                     \
+       "am"#asm_op#mb".w" " %1, %2, %0         \n"                     \
        : "+ZB" (v->counter), "=&r" (result)                            \
        : "r" (I)                                                       \
        : "memory");                                                    \
@@ -56,13 +56,13 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v)     \
        return result c_op I;                                           \
 }
 
-#define ATOMIC_FETCH_OP(op, I, asm_op)                                 \
-static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
+#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix)                     \
+static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v)   \
 {                                                                      \
        int result;                                                     \
                                                                        \
        __asm__ __volatile__(                                           \
-       "am"#asm_op"_db.w" " %1, %2, %0         \n"                     \
+       "am"#asm_op#mb".w" " %1, %2, %0         \n"                     \
        : "+ZB" (v->counter), "=&r" (result)                            \
        : "r" (I)                                                       \
        : "memory");                                                    \
@@ -72,29 +72,53 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v)      \
 
 #define ATOMIC_OPS(op, I, asm_op, c_op)                                        \
        ATOMIC_OP(op, I, asm_op)                                        \
-       ATOMIC_OP_RETURN(op, I, asm_op, c_op)                           \
-       ATOMIC_FETCH_OP(op, I, asm_op)
+       ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db,         )            \
+       ATOMIC_OP_RETURN(op, I, asm_op, c_op,    , _relaxed)            \
+       ATOMIC_FETCH_OP(op, I, asm_op, _db,         )                   \
+       ATOMIC_FETCH_OP(op, I, asm_op,    , _relaxed)
 
 ATOMIC_OPS(add, i, add, +)
 ATOMIC_OPS(sub, -i, add, +)
 
+#define arch_atomic_add_return         arch_atomic_add_return
+#define arch_atomic_add_return_acquire arch_atomic_add_return
+#define arch_atomic_add_return_release arch_atomic_add_return
 #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
+#define arch_atomic_sub_return         arch_atomic_sub_return
+#define arch_atomic_sub_return_acquire arch_atomic_sub_return
+#define arch_atomic_sub_return_release arch_atomic_sub_return
 #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
+#define arch_atomic_fetch_add          arch_atomic_fetch_add
+#define arch_atomic_fetch_add_acquire  arch_atomic_fetch_add
+#define arch_atomic_fetch_add_release  arch_atomic_fetch_add
 #define arch_atomic_fetch_add_relaxed  arch_atomic_fetch_add_relaxed
+#define arch_atomic_fetch_sub          arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub_acquire  arch_atomic_fetch_sub
+#define arch_atomic_fetch_sub_release  arch_atomic_fetch_sub
 #define arch_atomic_fetch_sub_relaxed  arch_atomic_fetch_sub_relaxed
 
 #undef ATOMIC_OPS
 
 #define ATOMIC_OPS(op, I, asm_op)                                      \
        ATOMIC_OP(op, I, asm_op)                                        \
-       ATOMIC_FETCH_OP(op, I, asm_op)
+       ATOMIC_FETCH_OP(op, I, asm_op, _db,         )                   \
+       ATOMIC_FETCH_OP(op, I, asm_op,    , _relaxed)
 
 ATOMIC_OPS(and, i, and)
 ATOMIC_OPS(or, i, or)
 ATOMIC_OPS(xor, i, xor)
 
+#define arch_atomic_fetch_and          arch_atomic_fetch_and
+#define arch_atomic_fetch_and_acquire  arch_atomic_fetch_and
+#define arch_atomic_fetch_and_release  arch_atomic_fetch_and
 #define arch_atomic_fetch_and_relaxed  arch_atomic_fetch_and_relaxed
+#define arch_atomic_fetch_or           arch_atomic_fetch_or
+#define arch_atomic_fetch_or_acquire   arch_atomic_fetch_or
+#define arch_atomic_fetch_or_release   arch_atomic_fetch_or
 #define arch_atomic_fetch_or_relaxed   arch_atomic_fetch_or_relaxed
+#define arch_atomic_fetch_xor          arch_atomic_fetch_xor
+#define arch_atomic_fetch_xor_acquire  arch_atomic_fetch_xor
+#define arch_atomic_fetch_xor_release  arch_atomic_fetch_xor
 #define arch_atomic_fetch_xor_relaxed  arch_atomic_fetch_xor_relaxed
 
 #undef ATOMIC_OPS
@@ -172,18 +196,18 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v)
 static inline void arch_atomic64_##op(long i, atomic64_t *v)           \
 {                                                                      \
        __asm__ __volatile__(                                           \
-       "am"#asm_op"_db.d " " $zero, %1, %0     \n"                     \
+       "am"#asm_op".d " " $zero, %1, %0        \n"                     \
        : "+ZB" (v->counter)                                            \
        : "r" (I)                                                       \
        : "memory");                                                    \
 }
 
-#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op)                                        \
-static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v)  \
+#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix)                    \
+static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t *v)  \
 {                                                                              \
        long result;                                                            \
        __asm__ __volatile__(                                                   \
-       "am"#asm_op"_db.d " " %1, %2, %0                \n"                     \
+       "am"#asm_op#mb".d " " %1, %2, %0                \n"                     \
        : "+ZB" (v->counter), "=&r" (result)                                    \
        : "r" (I)                                                               \
        : "memory");                                                            \
@@ -191,13 +215,13 @@ static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v)     \
        return result c_op I;                                                   \
 }
 
-#define ATOMIC64_FETCH_OP(op, I, asm_op)                                       \
-static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v)   \
+#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix)                           \
+static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v)     \
 {                                                                              \
        long result;                                                            \
                                                                                \
        __asm__ __volatile__(                                                   \
-       "am"#asm_op"_db.d " " %1, %2, %0                \n"                     \
+       "am"#asm_op#mb".d " " %1, %2, %0                \n"                     \
        : "+ZB" (v->counter), "=&r" (result)                                    \
        : "r" (I)                                                               \
        : "memory");                                                            \
@@ -207,29 +231,53 @@ static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v)      \
 
 #define ATOMIC64_OPS(op, I, asm_op, c_op)                                    \
        ATOMIC64_OP(op, I, asm_op)                                            \
-       ATOMIC64_OP_RETURN(op, I, asm_op, c_op)                               \
-       ATOMIC64_FETCH_OP(op, I, asm_op)
+       ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db,         )                \
+       ATOMIC64_OP_RETURN(op, I, asm_op, c_op,    , _relaxed)                \
+       ATOMIC64_FETCH_OP(op, I, asm_op, _db,         )                       \
+       ATOMIC64_FETCH_OP(op, I, asm_op,    , _relaxed)
 
 ATOMIC64_OPS(add, i, add, +)
 ATOMIC64_OPS(sub, -i, add, +)
 
+#define arch_atomic64_add_return               arch_atomic64_add_return
+#define arch_atomic64_add_return_acquire       arch_atomic64_add_return
+#define arch_atomic64_add_return_release       arch_atomic64_add_return
 #define arch_atomic64_add_return_relaxed       arch_atomic64_add_return_relaxed
+#define arch_atomic64_sub_return               arch_atomic64_sub_return
+#define arch_atomic64_sub_return_acquire       arch_atomic64_sub_return
+#define arch_atomic64_sub_return_release       arch_atomic64_sub_return
 #define arch_atomic64_sub_return_relaxed       arch_atomic64_sub_return_relaxed
+#define arch_atomic64_fetch_add                        arch_atomic64_fetch_add
+#define arch_atomic64_fetch_add_acquire                arch_atomic64_fetch_add
+#define arch_atomic64_fetch_add_release                arch_atomic64_fetch_add
 #define arch_atomic64_fetch_add_relaxed                arch_atomic64_fetch_add_relaxed
+#define arch_atomic64_fetch_sub                        arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub_acquire                arch_atomic64_fetch_sub
+#define arch_atomic64_fetch_sub_release                arch_atomic64_fetch_sub
 #define arch_atomic64_fetch_sub_relaxed                arch_atomic64_fetch_sub_relaxed
 
 #undef ATOMIC64_OPS
 
 #define ATOMIC64_OPS(op, I, asm_op)                                          \
        ATOMIC64_OP(op, I, asm_op)                                            \
-       ATOMIC64_FETCH_OP(op, I, asm_op)
+       ATOMIC64_FETCH_OP(op, I, asm_op, _db,         )                       \
+       ATOMIC64_FETCH_OP(op, I, asm_op,    , _relaxed)
 
 ATOMIC64_OPS(and, i, and)
 ATOMIC64_OPS(or, i, or)
 ATOMIC64_OPS(xor, i, xor)
 
+#define arch_atomic64_fetch_and                arch_atomic64_fetch_and
+#define arch_atomic64_fetch_and_acquire        arch_atomic64_fetch_and
+#define arch_atomic64_fetch_and_release        arch_atomic64_fetch_and
 #define arch_atomic64_fetch_and_relaxed        arch_atomic64_fetch_and_relaxed
+#define arch_atomic64_fetch_or         arch_atomic64_fetch_or
+#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or
+#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or
 #define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed
+#define arch_atomic64_fetch_xor                arch_atomic64_fetch_xor
+#define arch_atomic64_fetch_xor_acquire        arch_atomic64_fetch_xor
+#define arch_atomic64_fetch_xor_release        arch_atomic64_fetch_xor
 #define arch_atomic64_fetch_xor_relaxed        arch_atomic64_fetch_xor_relaxed
 
 #undef ATOMIC64_OPS