bpf, riscv64: Support load-acquire and store-release instructions
authorAndrea Parri <parri.andrea@gmail.com>
Wed, 7 May 2025 03:43:01 +0000 (03:43 +0000)
committerAlexei Starovoitov <ast@kernel.org>
Fri, 9 May 2025 17:05:27 +0000 (10:05 -0700)
Support BPF load-acquire (BPF_LOAD_ACQ) and store-release
(BPF_STORE_REL) instructions in the riscv64 JIT compiler.  For example,
consider the following 64-bit load-acquire (assuming little-endian):

  db 10 00 00 00 01 00 00  r1 = load_acquire((u64 *)(r1 + 0x0))
  95 00 00 00 00 00 00 00  exit

  opcode (0xdb): BPF_ATOMIC | BPF_DW | BPF_STX
  imm (0x00000100): BPF_LOAD_ACQ

The JIT compiler will emit an LD instruction followed by a FENCE R,RW
instruction for the above, e.g.:

  ld x7,0(x6)
  fence r,rw

Similarly, consider the following 16-bit store-release:

  cb 21 00 00 10 01 00 00  store_release((u16 *)(r1 + 0x0), w2)
  95 00 00 00 00 00 00 00  exit

  opcode (0xcb): BPF_ATOMIC | BPF_H | BPF_STX
  imm (0x00000110): BPF_STORE_REL

A FENCE RW,W instruction followed by an SH instruction will be emitted,
e.g.:

  fence rw,w
  sh x2,0(x4)

8-bit and 16-bit load-acquires are zero-extending (cf., LBU, LHU).  The
verifier always rejects misaligned load-acquires/store-releases (even if
BPF_F_ANY_ALIGNMENT is set), so the emitted load and store instructions
are guaranteed to be single-copy atomic.

Introduce primitives to emit the relevant (and the most common/used in
the kernel) fences, i.e. fences with R -> RW, RW -> W and RW -> RW.

Rename emit_atomic() to emit_atomic_rmw() to make it clear that it only
handles RMW atomics, and replace its is64 parameter to allow to perform
the required checks on the opsize (BPF_SIZE(code)).

Acked-by: Björn Töpel <bjorn@kernel.org>
Tested-by: Björn Töpel <bjorn@rivosinc.com> # QEMU/RVA23
Signed-off-by: Andrea Parri <parri.andrea@gmail.com>
Co-developed-by: Peilin Ye <yepeilin@google.com>
Signed-off-by: Peilin Ye <yepeilin@google.com>
Reviewed-by: Pu Lehui <pulehui@huawei.com>
Link: https://lore.kernel.org/r/3059c560e537ad43ed19055d2ebbd970c698095a.1746588351.git.yepeilin@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
arch/riscv/net/bpf_jit.h
arch/riscv/net/bpf_jit_comp64.c

index 1d1c78d4cff1eee85eb244b5da7e3d933274857a..e7b032dfd17f0ff8e2e23c8921b6914b6402d00c 100644 (file)
@@ -608,6 +608,21 @@ static inline u32 rv_fence(u8 pred, u8 succ)
        return rv_i_insn(imm11_0, 0, 0, 0, 0xf);
 }
 
+static inline void emit_fence_r_rw(struct rv_jit_context *ctx)
+{
+       emit(rv_fence(0x2, 0x3), ctx);
+}
+
+static inline void emit_fence_rw_w(struct rv_jit_context *ctx)
+{
+       emit(rv_fence(0x3, 0x1), ctx);
+}
+
+static inline void emit_fence_rw_rw(struct rv_jit_context *ctx)
+{
+       emit(rv_fence(0x3, 0x3), ctx);
+}
+
 static inline u32 rv_nop(void)
 {
        return rv_i_insn(0, 0, 0, 0, 0x13);
index 953b6a20c69f15e48e5579438b607da1e9022081..8767f032f2de989ea73c67cee870581a0ab9f0c1 100644 (file)
@@ -607,11 +607,65 @@ static void emit_store_64(u8 rd, s32 off, u8 rs, struct rv_jit_context *ctx)
        emit_sd(RV_REG_T1, 0, rs, ctx);
 }
 
-static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
-                       struct rv_jit_context *ctx)
+static int emit_atomic_ld_st(u8 rd, u8 rs, s16 off, s32 imm, u8 code, struct rv_jit_context *ctx)
+{
+       switch (imm) {
+       /* dst_reg = load_acquire(src_reg + off16) */
+       case BPF_LOAD_ACQ:
+               switch (BPF_SIZE(code)) {
+               case BPF_B:
+                       emit_load_8(false, rd, off, rs, ctx);
+                       break;
+               case BPF_H:
+                       emit_load_16(false, rd, off, rs, ctx);
+                       break;
+               case BPF_W:
+                       emit_load_32(false, rd, off, rs, ctx);
+                       break;
+               case BPF_DW:
+                       emit_load_64(false, rd, off, rs, ctx);
+                       break;
+               }
+               emit_fence_r_rw(ctx);
+               break;
+       /* store_release(dst_reg + off16, src_reg) */
+       case BPF_STORE_REL:
+               emit_fence_rw_w(ctx);
+               switch (BPF_SIZE(code)) {
+               case BPF_B:
+                       emit_store_8(rd, off, rs, ctx);
+                       break;
+               case BPF_H:
+                       emit_store_16(rd, off, rs, ctx);
+                       break;
+               case BPF_W:
+                       emit_store_32(rd, off, rs, ctx);
+                       break;
+               case BPF_DW:
+                       emit_store_64(rd, off, rs, ctx);
+                       break;
+               }
+               break;
+       default:
+               pr_err_once("bpf-jit: invalid atomic load/store opcode %02x\n", imm);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int emit_atomic_rmw(u8 rd, u8 rs, s16 off, s32 imm, u8 code,
+                          struct rv_jit_context *ctx)
 {
        u8 r0;
        int jmp_offset;
+       bool is64;
+
+       if (BPF_SIZE(code) != BPF_W && BPF_SIZE(code) != BPF_DW) {
+               pr_err_once("bpf-jit: 1- and 2-byte RMW atomics are not supported\n");
+               return -EINVAL;
+       }
+       is64 = BPF_SIZE(code) == BPF_DW;
 
        if (off) {
                if (is_12b_int(off)) {
@@ -688,9 +742,14 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64,
                     rv_sc_w(RV_REG_T3, rs, rd, 0, 1), ctx);
                jmp_offset = ninsns_rvoff(-6);
                emit(rv_bne(RV_REG_T3, 0, jmp_offset >> 1), ctx);
-               emit(rv_fence(0x3, 0x3), ctx);
+               emit_fence_rw_rw(ctx);
                break;
+       default:
+               pr_err_once("bpf-jit: invalid atomic RMW opcode %02x\n", imm);
+               return -EINVAL;
        }
+
+       return 0;
 }
 
 #define BPF_FIXUP_OFFSET_MASK   GENMASK(26, 0)
@@ -1962,10 +2021,16 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
        case BPF_STX | BPF_MEM | BPF_DW:
                emit_store_64(rd, off, rs, ctx);
                break;
+       case BPF_STX | BPF_ATOMIC | BPF_B:
+       case BPF_STX | BPF_ATOMIC | BPF_H:
        case BPF_STX | BPF_ATOMIC | BPF_W:
        case BPF_STX | BPF_ATOMIC | BPF_DW:
-               emit_atomic(rd, rs, off, imm,
-                           BPF_SIZE(code) == BPF_DW, ctx);
+               if (bpf_atomic_is_load_store(insn))
+                       ret = emit_atomic_ld_st(rd, rs, off, imm, code, ctx);
+               else
+                       ret = emit_atomic_rmw(rd, rs, off, imm, code, ctx);
+               if (ret)
+                       return ret;
                break;
 
        case BPF_STX | BPF_PROBE_MEM32 | BPF_B: