1 // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2 /* Copyright (C) 2016-2018 Netronome Systems, Inc. */
4 #define pr_fmt(fmt) "NFP net bpf: " fmt
8 #include <linux/filter.h>
9 #include <linux/kernel.h>
10 #include <linux/pkt_cls.h>
11 #include <linux/reciprocal_div.h>
12 #include <linux/unistd.h>
15 #include "../nfp_asm.h"
16 #include "../nfp_net_ctrl.h"
18 /* --- NFP prog --- */
19 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
20 * It's safe to modify the next pointers (but not pos).
22 #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \
23 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
24 next = list_next_entry(pos, l); \
25 &(nfp_prog)->insns != &pos->l && \
26 &(nfp_prog)->insns != &next->l; \
27 pos = nfp_meta_next(pos), \
28 next = nfp_meta_next(pos))
30 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \
31 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
32 next = list_next_entry(pos, l), \
33 next2 = list_next_entry(next, l); \
34 &(nfp_prog)->insns != &pos->l && \
35 &(nfp_prog)->insns != &next->l && \
36 &(nfp_prog)->insns != &next2->l; \
37 pos = nfp_meta_next(pos), \
38 next = nfp_meta_next(pos), \
39 next2 = nfp_meta_next(next))
42 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
44 return meta->l.prev != &nfp_prog->insns;
47 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
49 if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) {
50 pr_warn("instruction limit reached (%u NFP instructions)\n",
52 nfp_prog->error = -ENOSPC;
56 nfp_prog->prog[nfp_prog->prog_len] = insn;
60 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
62 return nfp_prog->prog_len;
66 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
68 /* If there is a recorded error we may have dropped instructions;
69 * that doesn't have to be due to translator bug, and the translation
70 * will fail anyway, so just return OK.
74 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
77 /* --- Emitters --- */
79 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
80 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
85 insn = FIELD_PREP(OP_CMD_A_SRC, areg) |
86 FIELD_PREP(OP_CMD_CTX, ctx) |
87 FIELD_PREP(OP_CMD_B_SRC, breg) |
88 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
89 FIELD_PREP(OP_CMD_XFER, xfer) |
90 FIELD_PREP(OP_CMD_CNT, size) |
91 FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
92 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
93 FIELD_PREP(OP_CMD_INDIR, indir) |
94 FIELD_PREP(OP_CMD_MODE, mode);
96 nfp_prog_push(nfp_prog, insn);
100 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
101 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
103 struct nfp_insn_re_regs reg;
106 err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false);
108 nfp_prog->error = err;
112 pr_err("cmd can't swap arguments\n");
113 nfp_prog->error = -EFAULT;
116 if (reg.dst_lmextn || reg.src_lmextn) {
117 pr_err("cmd can't use LMextn\n");
118 nfp_prog->error = -EFAULT;
122 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
127 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
128 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
130 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
134 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
135 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
137 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
141 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
142 enum br_ctx_signal_state css, u16 addr, u8 defer)
144 u16 addr_lo, addr_hi;
147 addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
148 addr_hi = addr != addr_lo;
151 FIELD_PREP(OP_BR_MASK, mask) |
152 FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
153 FIELD_PREP(OP_BR_CSS, css) |
154 FIELD_PREP(OP_BR_DEFBR, defer) |
155 FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
156 FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
158 nfp_prog_push(nfp_prog, insn);
162 emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer,
163 enum nfp_relo_type relo)
165 if (mask == BR_UNC && defer > 2) {
166 pr_err("BUG: branch defer out of bounds %d\n", defer);
167 nfp_prog->error = -EFAULT;
171 __emit_br(nfp_prog, mask,
172 mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
173 BR_CSS_NONE, addr, defer);
175 nfp_prog->prog[nfp_prog->prog_len - 1] |=
176 FIELD_PREP(OP_RELO_TYPE, relo);
180 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
182 emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
186 __emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
187 bool set, bool src_lmextn)
189 u16 addr_lo, addr_hi;
192 addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
193 addr_hi = addr != addr_lo;
195 insn = OP_BR_BIT_BASE |
196 FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
197 FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
198 FIELD_PREP(OP_BR_BIT_BV, set) |
199 FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
200 FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
201 FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
202 FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
204 nfp_prog_push(nfp_prog, insn);
208 emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
209 u8 defer, bool set, enum nfp_relo_type relo)
211 struct nfp_insn_re_regs reg;
214 /* NOTE: The bit to test is specified as an rotation amount, such that
215 * the bit to test will be placed on the MSB of the result when
216 * doing a rotate right. For bit X, we need right rotate X + 1.
220 err = swreg_to_restricted(reg_none(), src, reg_imm(bit), ®, false);
222 nfp_prog->error = err;
226 __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
229 nfp_prog->prog[nfp_prog->prog_len - 1] |=
230 FIELD_PREP(OP_RELO_TYPE, relo);
234 emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
236 emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
240 __emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
241 u8 defer, bool dst_lmextn, bool src_lmextn)
245 insn = OP_BR_ALU_BASE |
246 FIELD_PREP(OP_BR_ALU_A_SRC, areg) |
247 FIELD_PREP(OP_BR_ALU_B_SRC, breg) |
248 FIELD_PREP(OP_BR_ALU_DEFBR, defer) |
249 FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) |
250 FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) |
251 FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn);
253 nfp_prog_push(nfp_prog, insn);
256 static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer)
258 struct nfp_insn_ur_regs reg;
261 err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), ®);
263 nfp_prog->error = err;
267 __emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn,
272 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
273 enum immed_width width, bool invert,
274 enum immed_shift shift, bool wr_both,
275 bool dst_lmextn, bool src_lmextn)
279 insn = OP_IMMED_BASE |
280 FIELD_PREP(OP_IMMED_A_SRC, areg) |
281 FIELD_PREP(OP_IMMED_B_SRC, breg) |
282 FIELD_PREP(OP_IMMED_IMM, imm_hi) |
283 FIELD_PREP(OP_IMMED_WIDTH, width) |
284 FIELD_PREP(OP_IMMED_INV, invert) |
285 FIELD_PREP(OP_IMMED_SHIFT, shift) |
286 FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
287 FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
288 FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
290 nfp_prog_push(nfp_prog, insn);
294 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
295 enum immed_width width, bool invert, enum immed_shift shift)
297 struct nfp_insn_ur_regs reg;
300 if (swreg_type(dst) == NN_REG_IMM) {
301 nfp_prog->error = -EFAULT;
305 err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®);
307 nfp_prog->error = err;
311 /* Use reg.dst when destination is No-Dest. */
312 __emit_immed(nfp_prog,
313 swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
314 reg.breg, imm >> 8, width, invert, shift,
315 reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
319 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
320 enum shf_sc sc, u8 shift,
321 u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
322 bool dst_lmextn, bool src_lmextn)
326 if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
327 nfp_prog->error = -EFAULT;
331 /* NFP shift instruction has something special. If shift direction is
332 * left then shift amount of 1 to 31 is specified as 32 minus the amount
335 * But no need to do this for indirect shift which has shift amount be
336 * 0. Even after we do this subtraction, shift amount 0 will be turned
337 * into 32 which will eventually be encoded the same as 0 because only
338 * low 5 bits are encoded, but shift amount be 32 will fail the
339 * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of
342 if (sc == SHF_SC_L_SHF && shift)
346 FIELD_PREP(OP_SHF_A_SRC, areg) |
347 FIELD_PREP(OP_SHF_SC, sc) |
348 FIELD_PREP(OP_SHF_B_SRC, breg) |
349 FIELD_PREP(OP_SHF_I8, i8) |
350 FIELD_PREP(OP_SHF_SW, sw) |
351 FIELD_PREP(OP_SHF_DST, dst) |
352 FIELD_PREP(OP_SHF_SHIFT, shift) |
353 FIELD_PREP(OP_SHF_OP, op) |
354 FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
355 FIELD_PREP(OP_SHF_WR_AB, wr_both) |
356 FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
357 FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
359 nfp_prog_push(nfp_prog, insn);
363 emit_shf(struct nfp_prog *nfp_prog, swreg dst,
364 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
366 struct nfp_insn_re_regs reg;
369 err = swreg_to_restricted(dst, lreg, rreg, ®, true);
371 nfp_prog->error = err;
375 __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
376 reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
377 reg.dst_lmextn, reg.src_lmextn);
381 emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
382 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
384 if (sc == SHF_SC_R_ROT) {
385 pr_err("indirect shift is not allowed on rotation\n");
386 nfp_prog->error = -EFAULT;
390 emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
394 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
395 u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
396 bool dst_lmextn, bool src_lmextn)
401 FIELD_PREP(OP_ALU_A_SRC, areg) |
402 FIELD_PREP(OP_ALU_B_SRC, breg) |
403 FIELD_PREP(OP_ALU_DST, dst) |
404 FIELD_PREP(OP_ALU_SW, swap) |
405 FIELD_PREP(OP_ALU_OP, op) |
406 FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
407 FIELD_PREP(OP_ALU_WR_AB, wr_both) |
408 FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
409 FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
411 nfp_prog_push(nfp_prog, insn);
415 emit_alu(struct nfp_prog *nfp_prog, swreg dst,
416 swreg lreg, enum alu_op op, swreg rreg)
418 struct nfp_insn_ur_regs reg;
421 err = swreg_to_unrestricted(dst, lreg, rreg, ®);
423 nfp_prog->error = err;
427 __emit_alu(nfp_prog, reg.dst, reg.dst_ab,
428 reg.areg, op, reg.breg, reg.swap, reg.wr_both,
429 reg.dst_lmextn, reg.src_lmextn);
433 __emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg,
434 enum mul_type type, enum mul_step step, u16 breg, bool swap,
435 bool wr_both, bool dst_lmextn, bool src_lmextn)
440 FIELD_PREP(OP_MUL_A_SRC, areg) |
441 FIELD_PREP(OP_MUL_B_SRC, breg) |
442 FIELD_PREP(OP_MUL_STEP, step) |
443 FIELD_PREP(OP_MUL_DST_AB, dst_ab) |
444 FIELD_PREP(OP_MUL_SW, swap) |
445 FIELD_PREP(OP_MUL_TYPE, type) |
446 FIELD_PREP(OP_MUL_WR_AB, wr_both) |
447 FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) |
448 FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn);
450 nfp_prog_push(nfp_prog, insn);
454 emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type,
455 enum mul_step step, swreg rreg)
457 struct nfp_insn_ur_regs reg;
461 if (type == MUL_TYPE_START && step != MUL_STEP_NONE) {
462 nfp_prog->error = -EINVAL;
466 if (step == MUL_LAST || step == MUL_LAST_2) {
467 /* When type is step and step Number is LAST or LAST2, left
468 * source is used as destination.
470 err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®);
473 err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®);
478 nfp_prog->error = err;
482 __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap,
483 reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
487 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
488 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
489 bool zero, bool swap, bool wr_both,
490 bool dst_lmextn, bool src_lmextn)
495 FIELD_PREP(OP_LDF_A_SRC, areg) |
496 FIELD_PREP(OP_LDF_SC, sc) |
497 FIELD_PREP(OP_LDF_B_SRC, breg) |
498 FIELD_PREP(OP_LDF_I8, imm8) |
499 FIELD_PREP(OP_LDF_SW, swap) |
500 FIELD_PREP(OP_LDF_ZF, zero) |
501 FIELD_PREP(OP_LDF_BMASK, bmask) |
502 FIELD_PREP(OP_LDF_SHF, shift) |
503 FIELD_PREP(OP_LDF_WR_AB, wr_both) |
504 FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
505 FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
507 nfp_prog_push(nfp_prog, insn);
511 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
512 enum shf_sc sc, u8 shift, bool zero)
514 struct nfp_insn_re_regs reg;
517 /* Note: ld_field is special as it uses one of the src regs as dst */
518 err = swreg_to_restricted(dst, dst, src, ®, true);
520 nfp_prog->error = err;
524 __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
525 reg.i8, zero, reg.swap, reg.wr_both,
526 reg.dst_lmextn, reg.src_lmextn);
530 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
531 enum shf_sc sc, u8 shift)
533 emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
537 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
538 bool dst_lmextn, bool src_lmextn)
542 insn = OP_LCSR_BASE |
543 FIELD_PREP(OP_LCSR_A_SRC, areg) |
544 FIELD_PREP(OP_LCSR_B_SRC, breg) |
545 FIELD_PREP(OP_LCSR_WRITE, wr) |
546 FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
547 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
548 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
550 nfp_prog_push(nfp_prog, insn);
553 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
555 struct nfp_insn_ur_regs reg;
558 /* This instruction takes immeds instead of reg_none() for the ignored
559 * operand, but we can't encode 2 immeds in one instr with our normal
560 * swreg infra so if param is an immed, we encode as reg_none() and
561 * copy the immed to both operands.
563 if (swreg_type(src) == NN_REG_IMM) {
564 err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®);
567 err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®);
570 nfp_prog->error = err;
574 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
575 false, reg.src_lmextn);
578 /* CSR value is read in following immed[gpr, 0] */
579 static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
581 __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
584 static void emit_nop(struct nfp_prog *nfp_prog)
586 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
589 /* --- Wrappers --- */
590 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
592 if (!(imm & 0xffff0000)) {
594 *shift = IMMED_SHIFT_0B;
595 } else if (!(imm & 0xff0000ff)) {
597 *shift = IMMED_SHIFT_1B;
598 } else if (!(imm & 0x0000ffff)) {
600 *shift = IMMED_SHIFT_2B;
608 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
610 enum immed_shift shift;
613 if (pack_immed(imm, &val, &shift)) {
614 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
615 } else if (pack_immed(~imm, &val, &shift)) {
616 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
618 emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
619 false, IMMED_SHIFT_0B);
620 emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
621 false, IMMED_SHIFT_2B);
626 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
627 enum nfp_relo_type relo)
630 pr_err("relocation of a large immediate!\n");
631 nfp_prog->error = -EFAULT;
634 emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
636 nfp_prog->prog[nfp_prog->prog_len - 1] |=
637 FIELD_PREP(OP_RELO_TYPE, relo);
640 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
641 * If the @imm is small enough encode it directly in operand and return
642 * otherwise load @imm to a spare register and return its encoding.
644 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
646 if (FIELD_FIT(UR_REG_IMM_MAX, imm))
649 wrp_immed(nfp_prog, tmp_reg, imm);
653 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
654 * If the @imm is small enough encode it directly in operand and return
655 * otherwise load @imm to a spare register and return its encoding.
657 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
659 if (FIELD_FIT(RE_REG_IMM_MAX, imm))
662 wrp_immed(nfp_prog, tmp_reg, imm);
666 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
672 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
674 emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
677 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
679 wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
682 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
683 * result to @dst from low end.
686 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
689 enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
690 u8 mask = (1 << field_len) - 1;
692 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
695 /* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
696 * result to @dst from offset, there is no change on the other bits of @dst.
699 wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
700 u8 field_len, u8 offset)
702 enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
703 u8 mask = ((1 << field_len) - 1) << offset;
705 emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
709 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
710 swreg *rega, swreg *regb)
712 if (offset == reg_imm(0)) {
713 *rega = reg_a(src_gpr);
714 *regb = reg_b(src_gpr + 1);
718 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
719 emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
721 *rega = imm_a(nfp_prog);
722 *regb = imm_b(nfp_prog);
725 /* NFP has Command Push Pull bus which supports bluk memory operations. */
726 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
728 bool descending_seq = meta->ldst_gather_len < 0;
729 s16 len = abs(meta->ldst_gather_len);
735 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
736 src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
737 src_base = reg_a(meta->insn.src_reg * 2);
738 xfer_num = round_up(len, 4) / 4;
741 addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base,
744 /* Setup PREV_ALU fields to override memory read length. */
746 wrp_immed(nfp_prog, reg_none(),
747 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
749 /* Memory read from source addr into transfer-in registers. */
750 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
751 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
752 src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
754 /* Move from transfer-in to transfer-out. */
755 for (i = 0; i < xfer_num; i++)
756 wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
758 off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
761 /* Use single direct_ref write8. */
762 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
763 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
765 } else if (len <= 32 && IS_ALIGNED(len, 4)) {
766 /* Use single direct_ref write32. */
767 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
768 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
770 } else if (len <= 32) {
771 /* Use single indirect_ref write8. */
772 wrp_immed(nfp_prog, reg_none(),
773 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
774 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
775 reg_a(meta->paired_st->dst_reg * 2), off,
776 len - 1, CMD_CTX_SWAP);
777 } else if (IS_ALIGNED(len, 4)) {
778 /* Use single indirect_ref write32. */
779 wrp_immed(nfp_prog, reg_none(),
780 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
781 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
782 reg_a(meta->paired_st->dst_reg * 2), off,
783 xfer_num - 1, CMD_CTX_SWAP);
784 } else if (len <= 40) {
785 /* Use one direct_ref write32 to write the first 32-bytes, then
786 * another direct_ref write8 to write the remaining bytes.
788 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
789 reg_a(meta->paired_st->dst_reg * 2), off, 7,
792 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
794 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
795 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
798 /* Use one indirect_ref write32 to write 4-bytes aligned length,
799 * then another direct_ref write8 to write the remaining bytes.
803 wrp_immed(nfp_prog, reg_none(),
804 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
805 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
806 reg_a(meta->paired_st->dst_reg * 2), off,
807 xfer_num - 2, CMD_CTX_SWAP);
808 new_off = meta->paired_st->off + (xfer_num - 1) * 4;
809 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
810 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
811 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
812 (len & 0x3) - 1, CMD_CTX_SWAP);
815 /* TODO: The following extra load is to make sure data flow be identical
816 * before and after we do memory copy optimization.
818 * The load destination register is not guaranteed to be dead, so we
819 * need to make sure it is loaded with the value the same as before
820 * this transformation.
822 * These extra loads could be removed once we have accurate register
827 else if (BPF_SIZE(meta->insn.code) != BPF_DW)
828 xfer_num = xfer_num - 1;
830 xfer_num = xfer_num - 2;
832 switch (BPF_SIZE(meta->insn.code)) {
834 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
835 reg_xfer(xfer_num), 1,
836 IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
839 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
840 reg_xfer(xfer_num), 2, (len & 3) ^ 2);
843 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
847 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
849 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
850 reg_xfer(xfer_num + 1));
854 if (BPF_SIZE(meta->insn.code) != BPF_DW)
855 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
861 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
866 /* We load the value from the address indicated in @offset and then
867 * shift out the data we don't need. Note: this is big endian!
870 shift = size < 4 ? 4 - size : 0;
872 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
873 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
877 emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
878 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
880 for (; i * 4 < size; i++)
881 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
884 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
890 data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
891 swreg lreg, swreg rreg, int size, enum cmd_mode mode)
896 /* We load the value from the address indicated in rreg + lreg and then
897 * mask out the data we don't need. Note: this is little endian!
900 mask = size < 4 ? GENMASK(size - 1, 0) : 0;
902 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
903 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
907 emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
908 reg_xfer(0), SHF_SC_NONE, 0, true);
910 for (; i * 4 < size; i++)
911 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
914 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
920 data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
923 return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
928 data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
933 addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b);
935 return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
936 size, CMD_MODE_40b_BA);
940 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
944 /* Calculate the true offset (src_reg + imm) */
945 tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
946 emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
948 /* Check packet length (size guaranteed to fit b/c it's u8) */
949 emit_alu(nfp_prog, imm_a(nfp_prog),
950 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
951 emit_alu(nfp_prog, reg_none(),
952 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
953 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
956 return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
959 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
963 /* Check packet length */
964 tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
965 emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
966 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
969 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
970 return data_ld(nfp_prog, tmp_reg, 0, size);
974 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
979 for (i = 0; i * 4 < size; i++)
980 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
982 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
983 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
989 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
992 wrp_immed(nfp_prog, reg_xfer(0), imm);
994 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
996 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
997 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
1003 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
1004 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1008 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
1009 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1012 bool should_inc = needs_inc && new_gpr && !last;
1019 if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
1024 /* Move the entire word */
1026 wrp_mov(nfp_prog, reg_both(dst),
1027 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
1031 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
1036 mask = (1 << size) - 1;
1039 if (WARN_ON_ONCE(mask > 0xf))
1042 shf = abs(src_byte - dst_byte) * 8;
1043 if (src_byte == dst_byte) {
1045 } else if (src_byte < dst_byte) {
1052 /* ld_field can address fewer indexes, if offset too large do RMW.
1053 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
1055 if (idx <= RE_REG_LM_IDX_MAX) {
1056 reg = reg_lm(lm3 ? 3 : 0, idx);
1058 reg = imm_a(nfp_prog);
1059 /* If it's not the first part of the load and we start a new GPR
1060 * that means we are loading a second part of the LMEM word into
1061 * a new GPR. IOW we've already looked that LMEM word and
1062 * therefore it has been loaded into imm_a().
1064 if (first || !new_gpr)
1065 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
1068 emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
1071 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
1077 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
1078 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1081 bool should_inc = needs_inc && new_gpr && !last;
1088 if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
1093 /* Move the entire word */
1096 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
1101 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
1106 mask = (1 << size) - 1;
1109 if (WARN_ON_ONCE(mask > 0xf))
1112 shf = abs(src_byte - dst_byte) * 8;
1113 if (src_byte == dst_byte) {
1115 } else if (src_byte < dst_byte) {
1122 /* ld_field can address fewer indexes, if offset too large do RMW.
1123 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
1125 if (idx <= RE_REG_LM_IDX_MAX) {
1126 reg = reg_lm(lm3 ? 3 : 0, idx);
1128 reg = imm_a(nfp_prog);
1129 /* Only first and last LMEM locations are going to need RMW,
1130 * the middle location will be overwritten fully.
1133 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
1136 emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
1138 if (new_gpr || last) {
1139 if (idx > RE_REG_LM_IDX_MAX)
1140 wrp_mov(nfp_prog, reg_lm(0, idx), reg);
1142 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
1149 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1150 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
1151 bool clr_gpr, lmem_step step)
1153 s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off;
1154 bool first = true, last;
1155 bool needs_inc = false;
1156 swreg stack_off_reg;
1162 if (meta->ptr_not_const ||
1163 meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) {
1164 /* Use of the last encountered ptr_off is OK, they all have
1165 * the same alignment. Depend on low bits of value being
1166 * discarded when written to LMaddr register.
1168 stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
1169 stack_imm(nfp_prog));
1171 emit_alu(nfp_prog, imm_b(nfp_prog),
1172 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
1175 } else if (off + size <= 64) {
1176 /* We can reach bottom 64B with LMaddr0 */
1178 } else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
1179 /* We have to set up a new pointer. If we know the offset
1180 * and the entire access falls into a single 32 byte aligned
1181 * window we won't have to increment the LM pointer.
1182 * The 32 byte alignment is imporant because offset is ORed in
1183 * not added when doing *l$indexN[off].
1185 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
1186 stack_imm(nfp_prog));
1187 emit_alu(nfp_prog, imm_b(nfp_prog),
1188 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1192 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
1193 stack_imm(nfp_prog));
1195 emit_alu(nfp_prog, imm_b(nfp_prog),
1196 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1201 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
1202 /* For size < 4 one slot will be filled by zeroing of upper. */
1203 wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
1206 if (clr_gpr && size < 8)
1207 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1213 slice_size = min(size, 4 - gpr_byte);
1214 slice_end = min(off + slice_size, round_up(off + 1, 4));
1215 slice_size = slice_end - off;
1217 last = slice_size == size;
1222 ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
1223 first, gpr != prev_gpr, last, lm3, needs_inc);
1230 gpr_byte += slice_size;
1231 if (gpr_byte >= 4) {
1244 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
1248 if (alu_op == ALU_OP_AND) {
1250 wrp_immed(nfp_prog, reg_both(dst), 0);
1254 if (alu_op == ALU_OP_OR) {
1256 wrp_immed(nfp_prog, reg_both(dst), ~0U);
1260 if (alu_op == ALU_OP_XOR) {
1262 emit_alu(nfp_prog, reg_both(dst), reg_none(),
1263 ALU_OP_NOT, reg_b(dst));
1268 tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1269 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
1273 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1274 enum alu_op alu_op, bool skip)
1276 const struct bpf_insn *insn = &meta->insn;
1277 u64 imm = insn->imm; /* sign extend */
1280 meta->flags |= FLAG_INSN_SKIP_NOOP;
1284 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
1285 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
1291 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1294 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1296 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1297 emit_alu(nfp_prog, reg_both(dst + 1),
1298 reg_a(dst + 1), alu_op, reg_b(src + 1));
1304 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1307 const struct bpf_insn *insn = &meta->insn;
1309 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
1310 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1316 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1319 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1321 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1322 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1328 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
1329 enum br_mask br_mask, u16 off)
1331 emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
1332 emit_br(nfp_prog, br_mask, off, 0);
1336 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1337 enum alu_op alu_op, enum br_mask br_mask)
1339 const struct bpf_insn *insn = &meta->insn;
1341 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
1342 insn->src_reg * 2, br_mask, insn->off);
1343 if (is_mbpf_jmp64(meta))
1344 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1345 insn->src_reg * 2 + 1, br_mask, insn->off);
1350 static const struct jmp_code_map {
1351 enum br_mask br_mask;
1353 } jmp_code_map[] = {
1354 [BPF_JGT >> 4] = { BR_BLO, true },
1355 [BPF_JGE >> 4] = { BR_BHS, false },
1356 [BPF_JLT >> 4] = { BR_BLO, false },
1357 [BPF_JLE >> 4] = { BR_BHS, true },
1358 [BPF_JSGT >> 4] = { BR_BLT, true },
1359 [BPF_JSGE >> 4] = { BR_BGE, false },
1360 [BPF_JSLT >> 4] = { BR_BLT, false },
1361 [BPF_JSLE >> 4] = { BR_BGE, true },
1364 static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
1368 op = BPF_OP(meta->insn.code) >> 4;
1369 /* br_mask of 0 is BR_BEQ which we don't use in jump code table */
1370 if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
1371 !jmp_code_map[op].br_mask,
1372 "no code found for jump instruction"))
1375 return &jmp_code_map[op];
1378 static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1380 const struct bpf_insn *insn = &meta->insn;
1381 u64 imm = insn->imm; /* sign extend */
1382 const struct jmp_code_map *code;
1383 enum alu_op alu_op, carry_op;
1384 u8 reg = insn->dst_reg * 2;
1387 code = nfp_jmp_code_get(meta);
1391 alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
1392 carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
1394 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1396 emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
1398 emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
1400 if (is_mbpf_jmp64(meta)) {
1401 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1403 emit_alu(nfp_prog, reg_none(),
1404 reg_a(reg + 1), carry_op, tmp_reg);
1406 emit_alu(nfp_prog, reg_none(),
1407 tmp_reg, carry_op, reg_a(reg + 1));
1410 emit_br(nfp_prog, code->br_mask, insn->off, 0);
1415 static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1417 const struct bpf_insn *insn = &meta->insn;
1418 const struct jmp_code_map *code;
1421 code = nfp_jmp_code_get(meta);
1425 areg = insn->dst_reg * 2;
1426 breg = insn->src_reg * 2;
1434 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1435 if (is_mbpf_jmp64(meta))
1436 emit_alu(nfp_prog, reg_none(),
1437 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1438 emit_br(nfp_prog, code->br_mask, insn->off, 0);
1443 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1445 emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1447 emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1452 wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
1453 swreg rreg, bool gen_high_half)
1455 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
1456 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg);
1457 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg);
1458 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg);
1459 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg);
1460 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none());
1462 emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2,
1465 wrp_immed(nfp_prog, dst_hi, 0);
1469 wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
1472 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
1473 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg);
1474 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg);
1475 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none());
1479 wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1480 bool gen_high_half, bool ropnd_from_reg)
1482 swreg multiplier, multiplicand, dst_hi, dst_lo;
1483 const struct bpf_insn *insn = &meta->insn;
1484 u32 lopnd_max, ropnd_max;
1487 dst_reg = insn->dst_reg;
1488 multiplicand = reg_a(dst_reg * 2);
1489 dst_hi = reg_both(dst_reg * 2 + 1);
1490 dst_lo = reg_both(dst_reg * 2);
1491 lopnd_max = meta->umax_dst;
1492 if (ropnd_from_reg) {
1493 multiplier = reg_b(insn->src_reg * 2);
1494 ropnd_max = meta->umax_src;
1496 u32 imm = insn->imm;
1498 multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1501 if (lopnd_max > U16_MAX || ropnd_max > U16_MAX)
1502 wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier,
1505 wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier);
1510 static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
1512 swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
1513 struct reciprocal_value_adv rvalue;
1517 if (imm > U32_MAX) {
1518 wrp_immed(nfp_prog, dst_both, 0);
1522 /* NOTE: because we are using "reciprocal_value_adv" which doesn't
1523 * support "divisor > (1u << 31)", we need to JIT separate NFP sequence
1524 * to handle such case which actually equals to the result of unsigned
1525 * comparison "dst >= imm" which could be calculated using the following
1528 * alu[--, dst, -, imm]
1530 * alu[dst, imm, +carry, 0]
1533 if (imm > 1U << 31) {
1534 swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1536 emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
1537 wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
1538 emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
1543 rvalue = reciprocal_value_adv(imm, 32);
1545 if (rvalue.is_wide_m && !(imm & 1)) {
1546 pre_shift = fls(imm & -imm) - 1;
1547 rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
1551 magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
1552 if (imm == 1U << exp) {
1553 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1555 } else if (rvalue.is_wide_m) {
1556 wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
1558 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
1560 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1562 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
1564 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1565 SHF_SC_R_SHF, rvalue.sh - 1);
1568 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
1569 dst_b, SHF_SC_R_SHF, pre_shift);
1570 wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
1571 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
1572 dst_b, SHF_SC_R_SHF, rvalue.sh);
1578 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1580 swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
1581 struct nfp_bpf_cap_adjust_head *adjust_head;
1582 u32 ret_einval, end;
1584 adjust_head = &nfp_prog->bpf->adjust_head;
1586 /* Optimized version - 5 vs 14 cycles */
1587 if (nfp_prog->adjust_head_location != UINT_MAX) {
1588 if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
1591 emit_alu(nfp_prog, pptr_reg(nfp_prog),
1592 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
1593 emit_alu(nfp_prog, plen_reg(nfp_prog),
1594 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1595 emit_alu(nfp_prog, pv_len(nfp_prog),
1596 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1598 wrp_immed(nfp_prog, reg_both(0), 0);
1599 wrp_immed(nfp_prog, reg_both(1), 0);
1601 /* TODO: when adjust head is guaranteed to succeed we can
1602 * also eliminate the following if (r0 == 0) branch.
1608 ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
1609 end = ret_einval + 2;
1611 /* We need to use a temp because offset is just a part of the pkt ptr */
1612 emit_alu(nfp_prog, tmp,
1613 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
1615 /* Validate result will fit within FW datapath constraints */
1616 emit_alu(nfp_prog, reg_none(),
1617 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
1618 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1619 emit_alu(nfp_prog, reg_none(),
1620 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
1621 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1623 /* Validate the length is at least ETH_HLEN */
1624 emit_alu(nfp_prog, tmp_len,
1625 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1626 emit_alu(nfp_prog, reg_none(),
1627 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
1628 emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1630 /* Load the ret code */
1631 wrp_immed(nfp_prog, reg_both(0), 0);
1632 wrp_immed(nfp_prog, reg_both(1), 0);
1634 /* Modify the packet metadata */
1635 emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
1637 /* Skip over the -EINVAL ret code (defer 2) */
1638 emit_br(nfp_prog, BR_UNC, end, 2);
1640 emit_alu(nfp_prog, plen_reg(nfp_prog),
1641 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1642 emit_alu(nfp_prog, pv_len(nfp_prog),
1643 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1645 /* return -EINVAL target */
1646 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1649 wrp_immed(nfp_prog, reg_both(0), -22);
1650 wrp_immed(nfp_prog, reg_both(1), ~0);
1652 if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1658 static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1660 u32 ret_einval, end;
1663 BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN));
1665 plen = imm_a(nfp_prog);
1666 delta = reg_a(2 * 2);
1668 ret_einval = nfp_prog_current_offset(nfp_prog) + 9;
1669 end = nfp_prog_current_offset(nfp_prog) + 11;
1671 /* Calculate resulting length */
1672 emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta);
1673 /* delta == 0 is not allowed by the kernel, add must overflow to make
1676 emit_br(nfp_prog, BR_BCC, ret_einval, 0);
1678 /* if (new_len < 14) then -EINVAL */
1679 emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN));
1680 emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1682 emit_alu(nfp_prog, plen_reg(nfp_prog),
1683 plen_reg(nfp_prog), ALU_OP_ADD, delta);
1684 emit_alu(nfp_prog, pv_len(nfp_prog),
1685 pv_len(nfp_prog), ALU_OP_ADD, delta);
1687 emit_br(nfp_prog, BR_UNC, end, 2);
1688 wrp_immed(nfp_prog, reg_both(0), 0);
1689 wrp_immed(nfp_prog, reg_both(1), 0);
1691 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1694 wrp_immed(nfp_prog, reg_both(0), -22);
1695 wrp_immed(nfp_prog, reg_both(1), ~0);
1697 if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1704 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1710 /* We only have to reload LM0 if the key is not at start of stack */
1711 lm_off = nfp_prog->stack_frame_depth;
1712 lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
1713 load_lm_ptr = meta->arg2.var_off || lm_off;
1715 /* Set LM0 to start of key */
1717 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
1718 if (meta->func_id == BPF_FUNC_map_update_elem)
1719 emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
1721 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1723 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1725 /* Load map ID into A0 */
1726 wrp_mov(nfp_prog, reg_a(0), reg_a(2));
1728 /* Load the return address into B0 */
1729 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1731 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1734 /* Reset the LM0 pointer */
1738 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
1739 wrp_nops(nfp_prog, 3);
1745 nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1747 __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
1748 /* CSR value is read in following immed[gpr, 0] */
1749 emit_immed(nfp_prog, reg_both(0), 0,
1750 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1751 emit_immed(nfp_prog, reg_both(1), 0,
1752 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1757 nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1762 ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
1764 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
1766 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1769 /* Load ptr type into A1 */
1770 wrp_mov(nfp_prog, reg_a(1), ptr_type);
1772 /* Load the return address into B0 */
1773 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1775 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1782 nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1786 jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5;
1788 /* Make sure the queue id fits into FW field */
1789 emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2),
1790 ALU_OP_AND_NOT_B, reg_imm(0xff));
1791 emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2);
1793 /* Set the 'queue selected' bit and the queue value */
1794 emit_shf(nfp_prog, pv_qsel_set(nfp_prog),
1795 pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1),
1796 SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT);
1797 emit_ld_field(nfp_prog,
1798 pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2),
1800 /* Delay slots end here, we will jump over next instruction if queue
1801 * value fits into the field.
1803 emit_ld_field(nfp_prog,
1804 pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX),
1807 if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt))
1813 /* --- Callbacks --- */
1814 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1816 const struct bpf_insn *insn = &meta->insn;
1817 u8 dst = insn->dst_reg * 2;
1818 u8 src = insn->src_reg * 2;
1820 if (insn->src_reg == BPF_REG_10) {
1821 swreg stack_depth_reg;
1823 stack_depth_reg = ur_load_imm_any(nfp_prog,
1824 nfp_prog->stack_frame_depth,
1825 stack_imm(nfp_prog));
1826 emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog),
1827 ALU_OP_ADD, stack_depth_reg);
1828 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1830 wrp_reg_mov(nfp_prog, dst, src);
1831 wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1837 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1839 u64 imm = meta->insn.imm; /* sign extend */
1841 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1842 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1847 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1849 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1852 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1854 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1857 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1859 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1862 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1864 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1867 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1869 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1872 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1874 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1877 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1879 const struct bpf_insn *insn = &meta->insn;
1881 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1882 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1883 reg_b(insn->src_reg * 2));
1884 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1885 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1886 reg_b(insn->src_reg * 2 + 1));
1891 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1893 const struct bpf_insn *insn = &meta->insn;
1894 u64 imm = insn->imm; /* sign extend */
1896 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1897 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1902 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1904 const struct bpf_insn *insn = &meta->insn;
1906 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1907 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1908 reg_b(insn->src_reg * 2));
1909 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1910 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1911 reg_b(insn->src_reg * 2 + 1));
1916 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1918 const struct bpf_insn *insn = &meta->insn;
1919 u64 imm = insn->imm; /* sign extend */
1921 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1922 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1927 static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1929 return wrp_mul(nfp_prog, meta, true, true);
1932 static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1934 return wrp_mul(nfp_prog, meta, true, false);
1937 static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1939 const struct bpf_insn *insn = &meta->insn;
1941 return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm);
1944 static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1946 /* NOTE: verifier hook has rejected cases for which verifier doesn't
1947 * know whether the source operand is constant or not.
1949 return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src);
1952 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1954 const struct bpf_insn *insn = &meta->insn;
1956 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1957 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1958 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1959 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1965 * if shift_amt >= 32
1966 * dst_high = dst_low << shift_amt[4:0]
1969 * dst_high = (dst_high, dst_low) >> (32 - shift_amt)
1970 * dst_low = dst_low << shift_amt
1972 * The indirect shift will use the same logic at runtime.
1974 static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
1979 if (shift_amt < 32) {
1980 emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
1981 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
1983 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
1984 reg_b(dst), SHF_SC_L_SHF, shift_amt);
1985 } else if (shift_amt == 32) {
1986 wrp_reg_mov(nfp_prog, dst + 1, dst);
1987 wrp_immed(nfp_prog, reg_both(dst), 0);
1988 } else if (shift_amt > 32) {
1989 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
1990 reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
1991 wrp_immed(nfp_prog, reg_both(dst), 0);
1997 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1999 const struct bpf_insn *insn = &meta->insn;
2000 u8 dst = insn->dst_reg * 2;
2002 return __shl_imm64(nfp_prog, dst, insn->imm);
2005 static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2007 emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
2009 emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
2010 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
2011 reg_b(dst), SHF_SC_R_DSHF);
2014 /* NOTE: for indirect left shift, HIGH part should be calculated first. */
2015 static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2017 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2018 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2019 reg_b(dst), SHF_SC_L_SHF);
2022 static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2024 shl_reg64_lt32_high(nfp_prog, dst, src);
2025 shl_reg64_lt32_low(nfp_prog, dst, src);
2028 static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2030 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2031 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2032 reg_b(dst), SHF_SC_L_SHF);
2033 wrp_immed(nfp_prog, reg_both(dst), 0);
2036 static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2038 const struct bpf_insn *insn = &meta->insn;
2042 dst = insn->dst_reg * 2;
2043 umin = meta->umin_src;
2044 umax = meta->umax_src;
2046 return __shl_imm64(nfp_prog, dst, umin);
2048 src = insn->src_reg * 2;
2050 shl_reg64_lt32(nfp_prog, dst, src);
2051 } else if (umin >= 32) {
2052 shl_reg64_ge32(nfp_prog, dst, src);
2054 /* Generate different instruction sequences depending on runtime
2055 * value of shift amount.
2057 u16 label_ge32, label_end;
2059 label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
2060 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2062 shl_reg64_lt32_high(nfp_prog, dst, src);
2063 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2064 emit_br(nfp_prog, BR_UNC, label_end, 2);
2065 /* shl_reg64_lt32_low packed in delay slot. */
2066 shl_reg64_lt32_low(nfp_prog, dst, src);
2068 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2070 shl_reg64_ge32(nfp_prog, dst, src);
2072 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2080 * if shift_amt >= 32
2082 * dst_low = dst_high >> shift_amt[4:0]
2084 * dst_high = dst_high >> shift_amt
2085 * dst_low = (dst_high, dst_low) >> shift_amt
2087 * The indirect shift will use the same logic at runtime.
2089 static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2094 if (shift_amt < 32) {
2095 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2096 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
2097 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2098 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
2099 } else if (shift_amt == 32) {
2100 wrp_reg_mov(nfp_prog, dst, dst + 1);
2101 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2102 } else if (shift_amt > 32) {
2103 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2104 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
2105 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2111 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2113 const struct bpf_insn *insn = &meta->insn;
2114 u8 dst = insn->dst_reg * 2;
2116 return __shr_imm64(nfp_prog, dst, insn->imm);
2119 /* NOTE: for indirect right shift, LOW part should be calculated first. */
2120 static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2122 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2123 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2124 reg_b(dst + 1), SHF_SC_R_SHF);
2127 static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2129 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2130 emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2131 reg_b(dst), SHF_SC_R_DSHF);
2134 static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2136 shr_reg64_lt32_low(nfp_prog, dst, src);
2137 shr_reg64_lt32_high(nfp_prog, dst, src);
2140 static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2142 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2143 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2144 reg_b(dst + 1), SHF_SC_R_SHF);
2145 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2148 static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2150 const struct bpf_insn *insn = &meta->insn;
2154 dst = insn->dst_reg * 2;
2155 umin = meta->umin_src;
2156 umax = meta->umax_src;
2158 return __shr_imm64(nfp_prog, dst, umin);
2160 src = insn->src_reg * 2;
2162 shr_reg64_lt32(nfp_prog, dst, src);
2163 } else if (umin >= 32) {
2164 shr_reg64_ge32(nfp_prog, dst, src);
2166 /* Generate different instruction sequences depending on runtime
2167 * value of shift amount.
2169 u16 label_ge32, label_end;
2171 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
2172 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2173 shr_reg64_lt32_low(nfp_prog, dst, src);
2174 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2175 emit_br(nfp_prog, BR_UNC, label_end, 2);
2176 /* shr_reg64_lt32_high packed in delay slot. */
2177 shr_reg64_lt32_high(nfp_prog, dst, src);
2179 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2181 shr_reg64_ge32(nfp_prog, dst, src);
2183 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2190 /* Code logic is the same as __shr_imm64 except ashr requires signedness bit
2191 * told through PREV_ALU result.
2193 static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2198 if (shift_amt < 32) {
2199 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2200 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
2201 /* Set signedness bit. */
2202 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
2204 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2205 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
2206 } else if (shift_amt == 32) {
2207 /* NOTE: this also helps setting signedness bit. */
2208 wrp_reg_mov(nfp_prog, dst, dst + 1);
2209 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2210 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2211 } else if (shift_amt > 32) {
2212 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
2214 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2215 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
2216 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2217 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2223 static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2225 const struct bpf_insn *insn = &meta->insn;
2226 u8 dst = insn->dst_reg * 2;
2228 return __ashr_imm64(nfp_prog, dst, insn->imm);
2231 static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2233 /* NOTE: the first insn will set both indirect shift amount (source A)
2234 * and signedness bit (MSB of result).
2236 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
2237 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2238 reg_b(dst + 1), SHF_SC_R_SHF);
2241 static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2243 /* NOTE: it is the same as logic shift because we don't need to shift in
2244 * signedness bit when the shift amount is less than 32.
2246 return shr_reg64_lt32_low(nfp_prog, dst, src);
2249 static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2251 ashr_reg64_lt32_low(nfp_prog, dst, src);
2252 ashr_reg64_lt32_high(nfp_prog, dst, src);
2255 static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2257 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
2258 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2259 reg_b(dst + 1), SHF_SC_R_SHF);
2260 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2261 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2264 /* Like ashr_imm64, but need to use indirect shift. */
2265 static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2267 const struct bpf_insn *insn = &meta->insn;
2271 dst = insn->dst_reg * 2;
2272 umin = meta->umin_src;
2273 umax = meta->umax_src;
2275 return __ashr_imm64(nfp_prog, dst, umin);
2277 src = insn->src_reg * 2;
2279 ashr_reg64_lt32(nfp_prog, dst, src);
2280 } else if (umin >= 32) {
2281 ashr_reg64_ge32(nfp_prog, dst, src);
2283 u16 label_ge32, label_end;
2285 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
2286 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2287 ashr_reg64_lt32_low(nfp_prog, dst, src);
2288 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2289 emit_br(nfp_prog, BR_UNC, label_end, 2);
2290 /* ashr_reg64_lt32_high packed in delay slot. */
2291 ashr_reg64_lt32_high(nfp_prog, dst, src);
2293 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2295 ashr_reg64_ge32(nfp_prog, dst, src);
2297 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2304 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2306 const struct bpf_insn *insn = &meta->insn;
2308 wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
2309 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2314 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2316 const struct bpf_insn *insn = &meta->insn;
2318 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
2319 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2324 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2326 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
2329 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2331 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR);
2334 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2336 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
2339 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2341 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND);
2344 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2346 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
2349 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2351 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR);
2354 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2356 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
2359 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2361 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD);
2364 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2366 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
2369 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2371 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB);
2374 static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2376 return wrp_mul(nfp_prog, meta, false, true);
2379 static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2381 return wrp_mul(nfp_prog, meta, false, false);
2384 static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2386 return div_reg64(nfp_prog, meta);
2389 static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2391 return div_imm64(nfp_prog, meta);
2394 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2396 u8 dst = meta->insn.dst_reg * 2;
2398 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
2399 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2404 static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2407 /* Set signedness bit (MSB of result). */
2408 emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR,
2410 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2411 reg_b(dst), SHF_SC_R_SHF, shift_amt);
2413 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2418 static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2420 const struct bpf_insn *insn = &meta->insn;
2424 dst = insn->dst_reg * 2;
2425 umin = meta->umin_src;
2426 umax = meta->umax_src;
2428 return __ashr_imm(nfp_prog, dst, umin);
2430 src = insn->src_reg * 2;
2431 /* NOTE: the first insn will set both indirect shift amount (source A)
2432 * and signedness bit (MSB of result).
2434 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst));
2435 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2436 reg_b(dst), SHF_SC_R_SHF);
2437 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2442 static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2444 const struct bpf_insn *insn = &meta->insn;
2445 u8 dst = insn->dst_reg * 2;
2447 return __ashr_imm(nfp_prog, dst, insn->imm);
2450 static int __shr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2453 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2454 reg_b(dst), SHF_SC_R_SHF, shift_amt);
2455 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2459 static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2461 const struct bpf_insn *insn = &meta->insn;
2462 u8 dst = insn->dst_reg * 2;
2464 return __shr_imm(nfp_prog, dst, insn->imm);
2467 static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2469 const struct bpf_insn *insn = &meta->insn;
2473 dst = insn->dst_reg * 2;
2474 umin = meta->umin_src;
2475 umax = meta->umax_src;
2477 return __shr_imm(nfp_prog, dst, umin);
2479 src = insn->src_reg * 2;
2480 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2481 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2482 reg_b(dst), SHF_SC_R_SHF);
2483 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2487 static int __shl_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2490 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2491 reg_b(dst), SHF_SC_L_SHF, shift_amt);
2492 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2496 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2498 const struct bpf_insn *insn = &meta->insn;
2499 u8 dst = insn->dst_reg * 2;
2501 return __shl_imm(nfp_prog, dst, insn->imm);
2504 static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2506 const struct bpf_insn *insn = &meta->insn;
2510 dst = insn->dst_reg * 2;
2511 umin = meta->umin_src;
2512 umax = meta->umax_src;
2514 return __shl_imm(nfp_prog, dst, umin);
2516 src = insn->src_reg * 2;
2517 shl_reg64_lt32_low(nfp_prog, dst, src);
2518 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2522 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2524 const struct bpf_insn *insn = &meta->insn;
2525 u8 gpr = insn->dst_reg * 2;
2527 switch (insn->imm) {
2529 emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
2531 emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
2534 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
2537 wrp_end32(nfp_prog, reg_a(gpr), gpr);
2538 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
2541 wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
2543 wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
2544 wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
2551 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2553 struct nfp_insn_meta *prev = nfp_meta_prev(meta);
2557 dst = prev->insn.dst_reg * 2;
2558 imm_lo = prev->insn.imm;
2559 imm_hi = meta->insn.imm;
2561 wrp_immed(nfp_prog, reg_both(dst), imm_lo);
2563 /* mov is always 1 insn, load imm may be two, so try to use mov */
2564 if (imm_hi == imm_lo)
2565 wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
2567 wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
2572 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2574 meta->double_cb = imm_ld8_part2;
2578 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2580 return construct_data_ld(nfp_prog, meta->insn.imm, 1);
2583 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2585 return construct_data_ld(nfp_prog, meta->insn.imm, 2);
2588 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2590 return construct_data_ld(nfp_prog, meta->insn.imm, 4);
2593 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2595 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2596 meta->insn.src_reg * 2, 1);
2599 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2601 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2602 meta->insn.src_reg * 2, 2);
2605 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2607 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2608 meta->insn.src_reg * 2, 4);
2612 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2613 unsigned int size, unsigned int ptr_off)
2615 return mem_op_stack(nfp_prog, meta, size, ptr_off,
2616 meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
2617 true, wrp_lmem_load);
2620 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2623 swreg dst = reg_both(meta->insn.dst_reg * 2);
2625 switch (meta->insn.off) {
2626 case offsetof(struct __sk_buff, len):
2627 if (size != FIELD_SIZEOF(struct __sk_buff, len))
2629 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
2631 case offsetof(struct __sk_buff, data):
2632 if (size != FIELD_SIZEOF(struct __sk_buff, data))
2634 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
2636 case offsetof(struct __sk_buff, data_end):
2637 if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
2639 emit_alu(nfp_prog, dst,
2640 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
2646 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2651 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2654 swreg dst = reg_both(meta->insn.dst_reg * 2);
2656 switch (meta->insn.off) {
2657 case offsetof(struct xdp_md, data):
2658 if (size != FIELD_SIZEOF(struct xdp_md, data))
2660 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
2662 case offsetof(struct xdp_md, data_end):
2663 if (size != FIELD_SIZEOF(struct xdp_md, data_end))
2665 emit_alu(nfp_prog, dst,
2666 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
2672 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2678 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2683 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2685 return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
2686 tmp_reg, meta->insn.dst_reg * 2, size);
2690 mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2695 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2697 return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
2698 tmp_reg, meta->insn.dst_reg * 2, size);
2702 mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
2703 struct nfp_insn_meta *meta)
2705 s16 range_start = meta->pkt_cache.range_start;
2706 s16 range_end = meta->pkt_cache.range_end;
2707 swreg src_base, off;
2711 off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
2712 src_base = reg_a(meta->insn.src_reg * 2);
2713 len = range_end - range_start;
2714 xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
2716 indir = len > 8 * REG_WIDTH;
2717 /* Setup PREV_ALU for indirect mode. */
2719 wrp_immed(nfp_prog, reg_none(),
2720 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
2722 /* Cache memory into transfer-in registers. */
2723 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
2724 off, xfer_num - 1, CMD_CTX_SWAP, indir);
2728 mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
2729 struct nfp_insn_meta *meta,
2732 s16 range_start = meta->pkt_cache.range_start;
2733 s16 insn_off = meta->insn.off - range_start;
2734 swreg dst_lo, dst_hi, src_lo, src_mid;
2735 u8 dst_gpr = meta->insn.dst_reg * 2;
2736 u8 len_lo = size, len_mid = 0;
2737 u8 idx = insn_off / REG_WIDTH;
2738 u8 off = insn_off % REG_WIDTH;
2740 dst_hi = reg_both(dst_gpr + 1);
2741 dst_lo = reg_both(dst_gpr);
2742 src_lo = reg_xfer(idx);
2744 /* The read length could involve as many as three registers. */
2745 if (size > REG_WIDTH - off) {
2746 /* Calculate the part in the second register. */
2747 len_lo = REG_WIDTH - off;
2748 len_mid = size - len_lo;
2750 /* Calculate the part in the third register. */
2751 if (size > 2 * REG_WIDTH - off)
2752 len_mid = REG_WIDTH;
2755 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
2758 wrp_immed(nfp_prog, dst_hi, 0);
2762 src_mid = reg_xfer(idx + 1);
2764 if (size <= REG_WIDTH) {
2765 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
2766 wrp_immed(nfp_prog, dst_hi, 0);
2768 swreg src_hi = reg_xfer(idx + 2);
2770 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
2771 REG_WIDTH - len_lo, len_lo);
2772 wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
2773 REG_WIDTH - len_lo);
2774 wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
2782 mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
2783 struct nfp_insn_meta *meta,
2786 swreg dst_lo, dst_hi, src_lo;
2789 idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
2790 dst_gpr = meta->insn.dst_reg * 2;
2791 dst_hi = reg_both(dst_gpr + 1);
2792 dst_lo = reg_both(dst_gpr);
2793 src_lo = reg_xfer(idx);
2795 if (size < REG_WIDTH) {
2796 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
2797 wrp_immed(nfp_prog, dst_hi, 0);
2798 } else if (size == REG_WIDTH) {
2799 wrp_mov(nfp_prog, dst_lo, src_lo);
2800 wrp_immed(nfp_prog, dst_hi, 0);
2802 swreg src_hi = reg_xfer(idx + 1);
2804 wrp_mov(nfp_prog, dst_lo, src_lo);
2805 wrp_mov(nfp_prog, dst_hi, src_hi);
2812 mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
2813 struct nfp_insn_meta *meta, unsigned int size)
2815 u8 off = meta->insn.off - meta->pkt_cache.range_start;
2817 if (IS_ALIGNED(off, REG_WIDTH))
2818 return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
2820 return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
2824 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2827 if (meta->ldst_gather_len)
2828 return nfp_cpp_memcpy(nfp_prog, meta);
2830 if (meta->ptr.type == PTR_TO_CTX) {
2831 if (nfp_prog->type == BPF_PROG_TYPE_XDP)
2832 return mem_ldx_xdp(nfp_prog, meta, size);
2834 return mem_ldx_skb(nfp_prog, meta, size);
2837 if (meta->ptr.type == PTR_TO_PACKET) {
2838 if (meta->pkt_cache.range_end) {
2839 if (meta->pkt_cache.do_init)
2840 mem_ldx_data_init_pktcache(nfp_prog, meta);
2842 return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
2844 return mem_ldx_data(nfp_prog, meta, size);
2848 if (meta->ptr.type == PTR_TO_STACK)
2849 return mem_ldx_stack(nfp_prog, meta, size,
2850 meta->ptr.off + meta->ptr.var_off.value);
2852 if (meta->ptr.type == PTR_TO_MAP_VALUE)
2853 return mem_ldx_emem(nfp_prog, meta, size);
2858 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2860 return mem_ldx(nfp_prog, meta, 1);
2863 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2865 return mem_ldx(nfp_prog, meta, 2);
2868 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2870 return mem_ldx(nfp_prog, meta, 4);
2873 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2875 return mem_ldx(nfp_prog, meta, 8);
2879 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2882 u64 imm = meta->insn.imm; /* sign extend */
2885 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2887 return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2891 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2894 if (meta->ptr.type == PTR_TO_PACKET)
2895 return mem_st_data(nfp_prog, meta, size);
2900 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2902 return mem_st(nfp_prog, meta, 1);
2905 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2907 return mem_st(nfp_prog, meta, 2);
2910 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2912 return mem_st(nfp_prog, meta, 4);
2915 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2917 return mem_st(nfp_prog, meta, 8);
2921 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2926 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2928 return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2929 meta->insn.src_reg * 2, size);
2933 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2934 unsigned int size, unsigned int ptr_off)
2936 return mem_op_stack(nfp_prog, meta, size, ptr_off,
2937 meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
2938 false, wrp_lmem_store);
2941 static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2943 switch (meta->insn.off) {
2944 case offsetof(struct xdp_md, rx_queue_index):
2945 return nfp_queue_select(nfp_prog, meta);
2948 WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */
2953 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2956 if (meta->ptr.type == PTR_TO_PACKET)
2957 return mem_stx_data(nfp_prog, meta, size);
2959 if (meta->ptr.type == PTR_TO_STACK)
2960 return mem_stx_stack(nfp_prog, meta, size,
2961 meta->ptr.off + meta->ptr.var_off.value);
2966 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2968 return mem_stx(nfp_prog, meta, 1);
2971 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2973 return mem_stx(nfp_prog, meta, 2);
2976 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2978 if (meta->ptr.type == PTR_TO_CTX)
2979 if (nfp_prog->type == BPF_PROG_TYPE_XDP)
2980 return mem_stx_xdp(nfp_prog, meta);
2981 return mem_stx(nfp_prog, meta, 4);
2984 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2986 return mem_stx(nfp_prog, meta, 8);
2990 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
2992 u8 dst_gpr = meta->insn.dst_reg * 2;
2993 u8 src_gpr = meta->insn.src_reg * 2;
2994 unsigned int full_add, out;
2995 swreg addra, addrb, off;
2997 off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2999 /* We can fit 16 bits into command immediate, if we know the immediate
3000 * is guaranteed to either always or never fit into 16 bit we only
3001 * generate code to handle that particular case, otherwise generate
3004 out = nfp_prog_current_offset(nfp_prog);
3005 full_add = nfp_prog_current_offset(nfp_prog);
3007 if (meta->insn.off) {
3011 if (meta->xadd_maybe_16bit) {
3015 if (meta->xadd_over_16bit)
3017 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
3022 /* Generate the branch for choosing add_imm vs add */
3023 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
3024 swreg max_imm = imm_a(nfp_prog);
3026 wrp_immed(nfp_prog, max_imm, 0xffff);
3027 emit_alu(nfp_prog, reg_none(),
3028 max_imm, ALU_OP_SUB, reg_b(src_gpr));
3029 emit_alu(nfp_prog, reg_none(),
3030 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
3031 emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
3035 /* If insn has an offset add to the address */
3036 if (!meta->insn.off) {
3037 addra = reg_a(dst_gpr);
3038 addrb = reg_b(dst_gpr + 1);
3040 emit_alu(nfp_prog, imma_a(nfp_prog),
3041 reg_a(dst_gpr), ALU_OP_ADD, off);
3042 emit_alu(nfp_prog, imma_b(nfp_prog),
3043 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
3044 addra = imma_a(nfp_prog);
3045 addrb = imma_b(nfp_prog);
3048 /* Generate the add_imm if 16 bits are possible */
3049 if (meta->xadd_maybe_16bit) {
3050 swreg prev_alu = imm_a(nfp_prog);
3052 wrp_immed(nfp_prog, prev_alu,
3053 FIELD_PREP(CMD_OVE_DATA, 2) |
3055 FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
3056 wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
3057 emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
3058 addra, addrb, 0, CMD_CTX_NO_SWAP);
3060 if (meta->xadd_over_16bit)
3061 emit_br(nfp_prog, BR_UNC, out, 0);
3064 if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
3067 /* Generate the add if 16 bits are not guaranteed */
3068 if (meta->xadd_over_16bit) {
3069 emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
3070 addra, addrb, is64 << 2,
3071 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
3073 wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
3075 wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
3078 if (!nfp_prog_confirm_current_offset(nfp_prog, out))
3084 static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3086 return mem_xadd(nfp_prog, meta, false);
3089 static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3091 return mem_xadd(nfp_prog, meta, true);
3094 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3096 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
3101 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3103 const struct bpf_insn *insn = &meta->insn;
3104 u64 imm = insn->imm; /* sign extend */
3105 swreg or1, or2, tmp_reg;
3107 or1 = reg_a(insn->dst_reg * 2);
3108 or2 = reg_b(insn->dst_reg * 2 + 1);
3111 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3112 emit_alu(nfp_prog, imm_a(nfp_prog),
3113 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
3114 or1 = imm_a(nfp_prog);
3118 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3119 emit_alu(nfp_prog, imm_b(nfp_prog),
3120 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
3121 or2 = imm_b(nfp_prog);
3124 emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
3125 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3130 static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3132 const struct bpf_insn *insn = &meta->insn;
3135 tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog));
3136 emit_alu(nfp_prog, reg_none(),
3137 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
3138 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3143 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3145 const struct bpf_insn *insn = &meta->insn;
3146 u64 imm = insn->imm; /* sign extend */
3147 u8 dst_gpr = insn->dst_reg * 2;
3150 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3151 emit_alu(nfp_prog, imm_b(nfp_prog),
3152 reg_a(dst_gpr), ALU_OP_AND, tmp_reg);
3153 /* Upper word of the mask can only be 0 or ~0 from sign extension,
3154 * so either ignore it or OR the whole thing in.
3156 if (is_mbpf_jmp64(meta) && imm >> 32) {
3157 emit_alu(nfp_prog, reg_none(),
3158 reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog));
3160 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3165 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3167 const struct bpf_insn *insn = &meta->insn;
3168 u64 imm = insn->imm; /* sign extend */
3169 bool is_jmp32 = is_mbpf_jmp32(meta);
3174 emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE,
3175 reg_b(insn->dst_reg * 2));
3177 emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
3178 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
3179 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3183 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3184 emit_alu(nfp_prog, reg_none(),
3185 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
3186 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3191 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3192 emit_alu(nfp_prog, reg_none(),
3193 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
3194 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3199 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3201 const struct bpf_insn *insn = &meta->insn;
3203 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
3204 ALU_OP_XOR, reg_b(insn->src_reg * 2));
3205 if (is_mbpf_jmp64(meta)) {
3206 emit_alu(nfp_prog, imm_b(nfp_prog),
3207 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR,
3208 reg_b(insn->src_reg * 2 + 1));
3209 emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR,
3212 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3217 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3219 return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
3222 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3224 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
3228 bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3230 u32 ret_tgt, stack_depth, offset_br;
3233 stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN);
3234 /* Space for saving the return address is accounted for by the callee,
3235 * so stack_depth can be zero for the main function.
3238 tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
3239 stack_imm(nfp_prog));
3240 emit_alu(nfp_prog, stack_reg(nfp_prog),
3241 stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg);
3242 emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
3243 NFP_CSR_ACT_LM_ADDR0);
3246 /* Two cases for jumping to the callee:
3248 * - If callee uses and needs to save R6~R9 then:
3249 * 1. Put the start offset of the callee into imm_b(). This will
3250 * require a fixup step, as we do not necessarily know this
3252 * 2. Put the return address from the callee to the caller into
3253 * register ret_reg().
3254 * 3. (After defer slots are consumed) Jump to the subroutine that
3255 * pushes the registers to the stack.
3256 * The subroutine acts as a trampoline, and returns to the address in
3257 * imm_b(), i.e. jumps to the callee.
3259 * - If callee does not need to save R6~R9 then just load return
3260 * address to the caller in ret_reg(), and jump to the callee
3263 * Using ret_reg() to pass the return address to the callee is set here
3264 * as a convention. The callee can then push this address onto its
3265 * stack frame in its prologue. The advantages of passing the return
3266 * address through ret_reg(), instead of pushing it to the stack right
3267 * here, are the following:
3268 * - It looks cleaner.
3269 * - If the called function is called multiple time, we get a lower
3271 * - We save two no-op instructions that should be added just before
3272 * the emit_br() when stack depth is not null otherwise.
3273 * - If we ever find a register to hold the return address during whole
3274 * execution of the callee, we will not have to push the return
3275 * address to the stack for leaf functions.
3277 if (!meta->jmp_dst) {
3278 pr_err("BUG: BPF-to-BPF call has no destination recorded\n");
3281 if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) {
3282 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
3283 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2,
3284 RELO_BR_GO_CALL_PUSH_REGS);
3285 offset_br = nfp_prog_current_offset(nfp_prog);
3286 wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL);
3288 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
3289 emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1);
3290 offset_br = nfp_prog_current_offset(nfp_prog);
3292 wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL);
3294 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
3298 tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
3299 stack_imm(nfp_prog));
3300 emit_alu(nfp_prog, stack_reg(nfp_prog),
3301 stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
3302 emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
3303 NFP_CSR_ACT_LM_ADDR0);
3304 wrp_nops(nfp_prog, 3);
3307 meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog);
3308 meta->num_insns_after_br -= offset_br;
3313 static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3315 switch (meta->insn.imm) {
3316 case BPF_FUNC_xdp_adjust_head:
3317 return adjust_head(nfp_prog, meta);
3318 case BPF_FUNC_xdp_adjust_tail:
3319 return adjust_tail(nfp_prog, meta);
3320 case BPF_FUNC_map_lookup_elem:
3321 case BPF_FUNC_map_update_elem:
3322 case BPF_FUNC_map_delete_elem:
3323 return map_call_stack_common(nfp_prog, meta);
3324 case BPF_FUNC_get_prandom_u32:
3325 return nfp_get_prandom_u32(nfp_prog, meta);
3326 case BPF_FUNC_perf_event_output:
3327 return nfp_perf_event_output(nfp_prog, meta);
3329 WARN_ONCE(1, "verifier allowed unsupported function\n");
3334 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3336 if (is_mbpf_pseudo_call(meta))
3337 return bpf_to_bpf_call(nfp_prog, meta);
3339 return helper_call(nfp_prog, meta);
3342 static bool nfp_is_main_function(struct nfp_insn_meta *meta)
3344 return meta->subprog_idx == 0;
3347 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3349 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
3355 nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3357 if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) {
3358 /* Pop R6~R9 to the stack via related subroutine.
3359 * We loaded the return address to the caller into ret_reg().
3360 * This means that the subroutine does not come back here, we
3361 * make it jump back to the subprogram caller directly!
3363 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1,
3364 RELO_BR_GO_CALL_POP_REGS);
3365 /* Pop return address from the stack. */
3366 wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
3368 /* Pop return address from the stack. */
3369 wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
3370 /* Jump back to caller if no callee-saved registers were used
3371 * by the subprogram.
3373 emit_rtn(nfp_prog, ret_reg(nfp_prog), 0);
3379 static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3381 if (nfp_is_main_function(meta))
3382 return goto_out(nfp_prog, meta);
3384 return nfp_subprog_epilogue(nfp_prog, meta);
3387 static const instr_cb_t instr_cb[256] = {
3388 [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64,
3389 [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64,
3390 [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64,
3391 [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64,
3392 [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64,
3393 [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64,
3394 [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64,
3395 [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64,
3396 [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64,
3397 [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64,
3398 [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
3399 [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
3400 [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64,
3401 [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64,
3402 [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64,
3403 [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64,
3404 [BPF_ALU64 | BPF_NEG] = neg_reg64,
3405 [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
3406 [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
3407 [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64,
3408 [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
3409 [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
3410 [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
3411 [BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
3412 [BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
3413 [BPF_ALU | BPF_XOR | BPF_X] = xor_reg,
3414 [BPF_ALU | BPF_XOR | BPF_K] = xor_imm,
3415 [BPF_ALU | BPF_AND | BPF_X] = and_reg,
3416 [BPF_ALU | BPF_AND | BPF_K] = and_imm,
3417 [BPF_ALU | BPF_OR | BPF_X] = or_reg,
3418 [BPF_ALU | BPF_OR | BPF_K] = or_imm,
3419 [BPF_ALU | BPF_ADD | BPF_X] = add_reg,
3420 [BPF_ALU | BPF_ADD | BPF_K] = add_imm,
3421 [BPF_ALU | BPF_SUB | BPF_X] = sub_reg,
3422 [BPF_ALU | BPF_SUB | BPF_K] = sub_imm,
3423 [BPF_ALU | BPF_MUL | BPF_X] = mul_reg,
3424 [BPF_ALU | BPF_MUL | BPF_K] = mul_imm,
3425 [BPF_ALU | BPF_DIV | BPF_X] = div_reg,
3426 [BPF_ALU | BPF_DIV | BPF_K] = div_imm,
3427 [BPF_ALU | BPF_NEG] = neg_reg,
3428 [BPF_ALU | BPF_LSH | BPF_X] = shl_reg,
3429 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm,
3430 [BPF_ALU | BPF_RSH | BPF_X] = shr_reg,
3431 [BPF_ALU | BPF_RSH | BPF_K] = shr_imm,
3432 [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg,
3433 [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm,
3434 [BPF_ALU | BPF_END | BPF_X] = end_reg32,
3435 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8,
3436 [BPF_LD | BPF_ABS | BPF_B] = data_ld1,
3437 [BPF_LD | BPF_ABS | BPF_H] = data_ld2,
3438 [BPF_LD | BPF_ABS | BPF_W] = data_ld4,
3439 [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1,
3440 [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2,
3441 [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4,
3442 [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1,
3443 [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2,
3444 [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4,
3445 [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8,
3446 [BPF_STX | BPF_MEM | BPF_B] = mem_stx1,
3447 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2,
3448 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4,
3449 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8,
3450 [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4,
3451 [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8,
3452 [BPF_ST | BPF_MEM | BPF_B] = mem_st1,
3453 [BPF_ST | BPF_MEM | BPF_H] = mem_st2,
3454 [BPF_ST | BPF_MEM | BPF_W] = mem_st4,
3455 [BPF_ST | BPF_MEM | BPF_DW] = mem_st8,
3456 [BPF_JMP | BPF_JA | BPF_K] = jump,
3457 [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
3458 [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm,
3459 [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm,
3460 [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm,
3461 [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm,
3462 [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm,
3463 [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm,
3464 [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm,
3465 [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm,
3466 [BPF_JMP | BPF_JSET | BPF_K] = jset_imm,
3467 [BPF_JMP | BPF_JNE | BPF_K] = jne_imm,
3468 [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg,
3469 [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg,
3470 [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg,
3471 [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg,
3472 [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg,
3473 [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg,
3474 [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg,
3475 [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg,
3476 [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg,
3477 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
3478 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
3479 [BPF_JMP32 | BPF_JEQ | BPF_K] = jeq32_imm,
3480 [BPF_JMP32 | BPF_JGT | BPF_K] = cmp_imm,
3481 [BPF_JMP32 | BPF_JGE | BPF_K] = cmp_imm,
3482 [BPF_JMP32 | BPF_JLT | BPF_K] = cmp_imm,
3483 [BPF_JMP32 | BPF_JLE | BPF_K] = cmp_imm,
3484 [BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm,
3485 [BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm,
3486 [BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm,
3487 [BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm,
3488 [BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm,
3489 [BPF_JMP32 | BPF_JNE | BPF_K] = jne_imm,
3490 [BPF_JMP32 | BPF_JEQ | BPF_X] = jeq_reg,
3491 [BPF_JMP32 | BPF_JGT | BPF_X] = cmp_reg,
3492 [BPF_JMP32 | BPF_JGE | BPF_X] = cmp_reg,
3493 [BPF_JMP32 | BPF_JLT | BPF_X] = cmp_reg,
3494 [BPF_JMP32 | BPF_JLE | BPF_X] = cmp_reg,
3495 [BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg,
3496 [BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg,
3497 [BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg,
3498 [BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg,
3499 [BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg,
3500 [BPF_JMP32 | BPF_JNE | BPF_X] = jne_reg,
3501 [BPF_JMP | BPF_CALL] = call,
3502 [BPF_JMP | BPF_EXIT] = jmp_exit,
3505 /* --- Assembler logic --- */
3507 nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
3508 struct nfp_insn_meta *jmp_dst, u32 br_idx)
3510 if (immed_get_value(nfp_prog->prog[br_idx + 1])) {
3511 pr_err("BUG: failed to fix up callee register saving\n");
3515 immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off);
3520 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
3522 struct nfp_insn_meta *meta, *jmp_dst;
3526 list_for_each_entry(meta, &nfp_prog->insns, l) {
3527 if (meta->flags & FLAG_INSN_SKIP_MASK)
3529 if (!is_mbpf_jmp(meta))
3531 if (meta->insn.code == (BPF_JMP | BPF_EXIT) &&
3532 !nfp_is_main_function(meta))
3534 if (is_mbpf_helper_call(meta))
3537 if (list_is_last(&meta->l, &nfp_prog->insns))
3538 br_idx = nfp_prog->last_bpf_off;
3540 br_idx = list_next_entry(meta, l)->off - 1;
3542 /* For BPF-to-BPF function call, a stack adjustment sequence is
3543 * generated after the return instruction. Therefore, we must
3544 * withdraw the length of this sequence to have br_idx pointing
3545 * to where the "branch" NFP instruction is expected to be.
3547 if (is_mbpf_pseudo_call(meta))
3548 br_idx -= meta->num_insns_after_br;
3550 if (!nfp_is_br(nfp_prog->prog[br_idx])) {
3551 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
3552 br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
3556 if (meta->insn.code == (BPF_JMP | BPF_EXIT))
3559 /* Leave special branches for later */
3560 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
3561 RELO_BR_REL && !is_mbpf_pseudo_call(meta))
3564 if (!meta->jmp_dst) {
3565 pr_err("Non-exit jump doesn't have destination info recorded!!\n");
3569 jmp_dst = meta->jmp_dst;
3571 if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) {
3572 pr_err("Branch landing on removed instruction!!\n");
3576 if (is_mbpf_pseudo_call(meta) &&
3577 nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) {
3578 err = nfp_fixup_immed_relo(nfp_prog, meta,
3584 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
3588 for (idx = meta->off; idx <= br_idx; idx++) {
3589 if (!nfp_is_br(nfp_prog->prog[idx]))
3591 br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
3598 static void nfp_intro(struct nfp_prog *nfp_prog)
3600 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
3601 emit_alu(nfp_prog, plen_reg(nfp_prog),
3602 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
3606 nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3608 /* Save return address into the stack. */
3609 wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog));
3613 nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3615 unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth;
3617 nfp_prog->stack_frame_depth = round_up(depth, 4);
3618 nfp_subprog_prologue(nfp_prog, meta);
3621 bool nfp_is_subprog_start(struct nfp_insn_meta *meta)
3623 return meta->flags & FLAG_INSN_IS_SUBPROG_START;
3626 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
3628 /* TC direct-action mode:
3629 * 0,1 ok NOT SUPPORTED[1]
3630 * 2 drop 0x22 -> drop, count as stat1
3631 * 4,5 nuke 0x02 -> drop
3632 * 7 redir 0x44 -> redir, count as stat2
3633 * * unspec 0x11 -> pass, count as stat0
3635 * [1] We can't support OK and RECLASSIFY because we can't tell TC
3636 * the exact decision made. We are forced to support UNSPEC
3637 * to handle aborts so that's the only one we handle for passing
3638 * packets up the stack.
3640 /* Target for aborts */
3641 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
3643 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3645 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3646 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
3648 /* Target for normal exits */
3649 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
3651 /* if R0 > 7 jump to abort */
3652 emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
3653 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
3654 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3656 wrp_immed(nfp_prog, reg_b(2), 0x41221211);
3657 wrp_immed(nfp_prog, reg_b(3), 0x41001211);
3659 emit_shf(nfp_prog, reg_a(1),
3660 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
3662 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3663 emit_shf(nfp_prog, reg_a(2),
3664 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
3666 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3667 emit_shf(nfp_prog, reg_b(2),
3668 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
3670 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3672 emit_shf(nfp_prog, reg_b(2),
3673 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
3674 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
3677 static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
3679 /* XDP return codes:
3680 * 0 aborted 0x82 -> drop, count as stat3
3681 * 1 drop 0x22 -> drop, count as stat1
3682 * 2 pass 0x11 -> pass, count as stat0
3683 * 3 tx 0x44 -> redir, count as stat2
3684 * * unknown 0x82 -> drop, count as stat3
3686 /* Target for aborts */
3687 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
3689 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3691 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3692 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
3694 /* Target for normal exits */
3695 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
3697 /* if R0 > 3 jump to abort */
3698 emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
3699 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
3701 wrp_immed(nfp_prog, reg_b(2), 0x44112282);
3703 emit_shf(nfp_prog, reg_a(1),
3704 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
3706 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3707 emit_shf(nfp_prog, reg_b(2),
3708 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
3710 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3712 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3713 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
3716 static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog)
3720 for (idx = 1; idx < nfp_prog->subprog_cnt; idx++)
3721 if (nfp_prog->subprog[idx].needs_reg_push)
3727 static void nfp_push_callee_registers(struct nfp_prog *nfp_prog)
3731 /* Subroutine: Save all callee saved registers (R6 ~ R9).
3732 * imm_b() holds the return address.
3734 nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog);
3735 for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
3736 u8 adj = (reg - BPF_REG_0) * 2;
3737 u8 idx = (reg - BPF_REG_6) * 2;
3739 /* The first slot in the stack frame is used to push the return
3740 * address in bpf_to_bpf_call(), start just after.
3742 wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj));
3744 if (reg == BPF_REG_8)
3745 /* Prepare to jump back, last 3 insns use defer slots */
3746 emit_rtn(nfp_prog, imm_b(nfp_prog), 3);
3748 wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1));
3752 static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog)
3756 /* Subroutine: Restore all callee saved registers (R6 ~ R9).
3757 * ret_reg() holds the return address.
3759 nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog);
3760 for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
3761 u8 adj = (reg - BPF_REG_0) * 2;
3762 u8 idx = (reg - BPF_REG_6) * 2;
3764 /* The first slot in the stack frame holds the return address,
3765 * start popping just after that.
3767 wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx));
3769 if (reg == BPF_REG_8)
3770 /* Prepare to jump back, last 3 insns use defer slots */
3771 emit_rtn(nfp_prog, ret_reg(nfp_prog), 3);
3773 wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1));
3777 static void nfp_outro(struct nfp_prog *nfp_prog)
3779 switch (nfp_prog->type) {
3780 case BPF_PROG_TYPE_SCHED_CLS:
3781 nfp_outro_tc_da(nfp_prog);
3783 case BPF_PROG_TYPE_XDP:
3784 nfp_outro_xdp(nfp_prog);
3790 if (!nfp_prog_needs_callee_reg_save(nfp_prog))
3793 nfp_push_callee_registers(nfp_prog);
3794 nfp_pop_callee_registers(nfp_prog);
3797 static int nfp_translate(struct nfp_prog *nfp_prog)
3799 struct nfp_insn_meta *meta;
3803 depth = nfp_prog->subprog[0].stack_depth;
3804 nfp_prog->stack_frame_depth = round_up(depth, 4);
3806 nfp_intro(nfp_prog);
3807 if (nfp_prog->error)
3808 return nfp_prog->error;
3810 list_for_each_entry(meta, &nfp_prog->insns, l) {
3811 instr_cb_t cb = instr_cb[meta->insn.code];
3813 meta->off = nfp_prog_current_offset(nfp_prog);
3815 if (nfp_is_subprog_start(meta)) {
3816 nfp_start_subprog(nfp_prog, meta);
3817 if (nfp_prog->error)
3818 return nfp_prog->error;
3821 if (meta->flags & FLAG_INSN_SKIP_MASK) {
3822 nfp_prog->n_translated++;
3826 if (nfp_meta_has_prev(nfp_prog, meta) &&
3827 nfp_meta_prev(meta)->double_cb)
3828 cb = nfp_meta_prev(meta)->double_cb;
3831 err = cb(nfp_prog, meta);
3834 if (nfp_prog->error)
3835 return nfp_prog->error;
3837 nfp_prog->n_translated++;
3840 nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
3842 nfp_outro(nfp_prog);
3843 if (nfp_prog->error)
3844 return nfp_prog->error;
3846 wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
3847 if (nfp_prog->error)
3848 return nfp_prog->error;
3850 return nfp_fixup_branches(nfp_prog);
3853 /* --- Optimizations --- */
3854 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
3856 struct nfp_insn_meta *meta;
3858 list_for_each_entry(meta, &nfp_prog->insns, l) {
3859 struct bpf_insn insn = meta->insn;
3861 /* Programs converted from cBPF start with register xoring */
3862 if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
3863 insn.src_reg == insn.dst_reg)
3866 /* Programs start with R6 = R1 but we ignore the skb pointer */
3867 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
3868 insn.src_reg == 1 && insn.dst_reg == 6)
3869 meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
3871 /* Return as soon as something doesn't match */
3872 if (!(meta->flags & FLAG_INSN_SKIP_MASK))
3877 /* abs(insn.imm) will fit better into unrestricted reg immediate -
3878 * convert add/sub of a negative number into a sub/add of a positive one.
3880 static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
3882 struct nfp_insn_meta *meta;
3884 list_for_each_entry(meta, &nfp_prog->insns, l) {
3885 struct bpf_insn insn = meta->insn;
3887 if (meta->flags & FLAG_INSN_SKIP_MASK)
3890 if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta))
3892 if (BPF_SRC(insn.code) != BPF_K)
3897 if (is_mbpf_jmp(meta)) {
3898 switch (BPF_OP(insn.code)) {
3903 meta->jump_neg_op = true;
3909 if (BPF_OP(insn.code) == BPF_ADD)
3910 insn.code = BPF_CLASS(insn.code) | BPF_SUB;
3911 else if (BPF_OP(insn.code) == BPF_SUB)
3912 insn.code = BPF_CLASS(insn.code) | BPF_ADD;
3916 meta->insn.code = insn.code | BPF_K;
3919 meta->insn.imm = -insn.imm;
3923 /* Remove masking after load since our load guarantees this is not needed */
3924 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
3926 struct nfp_insn_meta *meta1, *meta2;
3927 const s32 exp_mask[] = {
3928 [BPF_B] = 0x000000ffU,
3929 [BPF_H] = 0x0000ffffU,
3930 [BPF_W] = 0xffffffffU,
3933 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3934 struct bpf_insn insn, next;
3939 if (BPF_CLASS(insn.code) != BPF_LD)
3941 if (BPF_MODE(insn.code) != BPF_ABS &&
3942 BPF_MODE(insn.code) != BPF_IND)
3945 if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
3948 if (!exp_mask[BPF_SIZE(insn.code)])
3950 if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
3953 if (next.src_reg || next.dst_reg)
3956 if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
3959 meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
3963 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
3965 struct nfp_insn_meta *meta1, *meta2, *meta3;
3967 nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
3968 struct bpf_insn insn, next1, next2;
3971 next1 = meta2->insn;
3972 next2 = meta3->insn;
3974 if (BPF_CLASS(insn.code) != BPF_LD)
3976 if (BPF_MODE(insn.code) != BPF_ABS &&
3977 BPF_MODE(insn.code) != BPF_IND)
3979 if (BPF_SIZE(insn.code) != BPF_W)
3982 if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
3983 next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
3984 !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
3985 next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
3988 if (next1.src_reg || next1.dst_reg ||
3989 next2.src_reg || next2.dst_reg)
3992 if (next1.imm != 0x20 || next2.imm != 0x20)
3995 if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
3996 meta3->flags & FLAG_INSN_IS_JUMP_DST)
3999 meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
4000 meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
4004 /* load/store pair that forms memory copy sould look like the following:
4006 * ld_width R, [addr_src + offset_src]
4007 * st_width [addr_dest + offset_dest], R
4009 * The destination register of load and source register of store should
4010 * be the same, load and store should also perform at the same width.
4011 * If either of addr_src or addr_dest is stack pointer, we don't do the
4012 * CPP optimization as stack is modelled by registers on NFP.
4015 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
4016 struct nfp_insn_meta *st_meta)
4018 struct bpf_insn *ld = &ld_meta->insn;
4019 struct bpf_insn *st = &st_meta->insn;
4021 if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
4024 if (ld_meta->ptr.type != PTR_TO_PACKET &&
4025 ld_meta->ptr.type != PTR_TO_MAP_VALUE)
4028 if (st_meta->ptr.type != PTR_TO_PACKET)
4031 if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
4034 if (ld->dst_reg != st->src_reg)
4037 /* There is jump to the store insn in this pair. */
4038 if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
4044 /* Currently, we only support chaining load/store pairs if:
4046 * - Their address base registers are the same.
4047 * - Their address offsets are in the same order.
4048 * - They operate at the same memory width.
4049 * - There is no jump into the middle of them.
4052 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
4053 struct nfp_insn_meta *st_meta,
4054 struct bpf_insn *prev_ld,
4055 struct bpf_insn *prev_st)
4057 u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
4058 struct bpf_insn *ld = &ld_meta->insn;
4059 struct bpf_insn *st = &st_meta->insn;
4060 s16 prev_ld_off, prev_st_off;
4062 /* This pair is the start pair. */
4066 prev_size = BPF_LDST_BYTES(prev_ld);
4067 curr_size = BPF_LDST_BYTES(ld);
4068 prev_ld_base = prev_ld->src_reg;
4069 prev_st_base = prev_st->dst_reg;
4070 prev_ld_dst = prev_ld->dst_reg;
4071 prev_ld_off = prev_ld->off;
4072 prev_st_off = prev_st->off;
4074 if (ld->dst_reg != prev_ld_dst)
4077 if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
4080 if (curr_size != prev_size)
4083 /* There is jump to the head of this pair. */
4084 if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
4087 /* Both in ascending order. */
4088 if (prev_ld_off + prev_size == ld->off &&
4089 prev_st_off + prev_size == st->off)
4092 /* Both in descending order. */
4093 if (ld->off + curr_size == prev_ld_off &&
4094 st->off + curr_size == prev_st_off)
4100 /* Return TRUE if cross memory access happens. Cross memory access means
4101 * store area is overlapping with load area that a later load might load
4102 * the value from previous store, for this case we can't treat the sequence
4103 * as an memory copy.
4106 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
4107 struct nfp_insn_meta *head_st_meta)
4109 s16 head_ld_off, head_st_off, ld_off;
4111 /* Different pointer types does not overlap. */
4112 if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
4115 /* load and store are both PTR_TO_PACKET, check ID info. */
4116 if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
4119 /* Canonicalize the offsets. Turn all of them against the original
4122 head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
4123 head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
4124 ld_off = ld->off + head_ld_meta->ptr.off;
4126 /* Ascending order cross. */
4127 if (ld_off > head_ld_off &&
4128 head_ld_off < head_st_off && ld_off >= head_st_off)
4131 /* Descending order cross. */
4132 if (ld_off < head_ld_off &&
4133 head_ld_off > head_st_off && ld_off <= head_st_off)
4139 /* This pass try to identify the following instructoin sequences.
4141 * load R, [regA + offA]
4142 * store [regB + offB], R
4143 * load R, [regA + offA + const_imm_A]
4144 * store [regB + offB + const_imm_A], R
4145 * load R, [regA + offA + 2 * const_imm_A]
4146 * store [regB + offB + 2 * const_imm_A], R
4149 * Above sequence is typically generated by compiler when lowering
4150 * memcpy. NFP prefer using CPP instructions to accelerate it.
4152 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
4154 struct nfp_insn_meta *head_ld_meta = NULL;
4155 struct nfp_insn_meta *head_st_meta = NULL;
4156 struct nfp_insn_meta *meta1, *meta2;
4157 struct bpf_insn *prev_ld = NULL;
4158 struct bpf_insn *prev_st = NULL;
4161 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
4162 struct bpf_insn *ld = &meta1->insn;
4163 struct bpf_insn *st = &meta2->insn;
4165 /* Reset record status if any of the following if true:
4166 * - The current insn pair is not load/store.
4167 * - The load/store pair doesn't chain with previous one.
4168 * - The chained load/store pair crossed with previous pair.
4169 * - The chained load/store pair has a total size of memory
4170 * copy beyond 128 bytes which is the maximum length a
4171 * single NFP CPP command can transfer.
4173 if (!curr_pair_is_memcpy(meta1, meta2) ||
4174 !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
4176 (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
4178 head_ld_meta->ldst_gather_len >= 128))) {
4183 s16 prev_ld_off = prev_ld->off;
4184 s16 prev_st_off = prev_st->off;
4185 s16 head_ld_off = head_ld_meta->insn.off;
4187 if (prev_ld_off < head_ld_off) {
4188 head_ld_meta->insn.off = prev_ld_off;
4189 head_st_meta->insn.off = prev_st_off;
4190 head_ld_meta->ldst_gather_len =
4191 -head_ld_meta->ldst_gather_len;
4194 head_ld_meta->paired_st = &head_st_meta->insn;
4195 head_st_meta->flags |=
4196 FLAG_INSN_SKIP_PREC_DEPENDENT;
4198 head_ld_meta->ldst_gather_len = 0;
4201 /* If the chain is ended by an load/store pair then this
4202 * could serve as the new head of the the next chain.
4204 if (curr_pair_is_memcpy(meta1, meta2)) {
4205 head_ld_meta = meta1;
4206 head_st_meta = meta2;
4207 head_ld_meta->ldst_gather_len =
4209 meta1 = nfp_meta_next(meta1);
4210 meta2 = nfp_meta_next(meta2);
4215 head_ld_meta = NULL;
4216 head_st_meta = NULL;
4225 if (!head_ld_meta) {
4226 head_ld_meta = meta1;
4227 head_st_meta = meta2;
4229 meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
4230 meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
4233 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
4234 meta1 = nfp_meta_next(meta1);
4235 meta2 = nfp_meta_next(meta2);
4242 static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
4244 struct nfp_insn_meta *meta, *range_node = NULL;
4245 s16 range_start = 0, range_end = 0;
4246 bool cache_avail = false;
4247 struct bpf_insn *insn;
4248 s32 range_ptr_off = 0;
4249 u32 range_ptr_id = 0;
4251 list_for_each_entry(meta, &nfp_prog->insns, l) {
4252 if (meta->flags & FLAG_INSN_IS_JUMP_DST)
4253 cache_avail = false;
4255 if (meta->flags & FLAG_INSN_SKIP_MASK)
4260 if (is_mbpf_store_pkt(meta) ||
4261 insn->code == (BPF_JMP | BPF_CALL) ||
4262 is_mbpf_classic_store_pkt(meta) ||
4263 is_mbpf_classic_load(meta)) {
4264 cache_avail = false;
4268 if (!is_mbpf_load(meta))
4271 if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
4272 cache_avail = false;
4279 goto end_current_then_start_new;
4283 /* Check ID to make sure two reads share the same
4284 * variable offset against PTR_TO_PACKET, and check OFF
4285 * to make sure they also share the same constant
4288 * OFFs don't really need to be the same, because they
4289 * are the constant offsets against PTR_TO_PACKET, so
4290 * for different OFFs, we could canonicalize them to
4291 * offsets against original packet pointer. We don't
4294 if (meta->ptr.id == range_ptr_id &&
4295 meta->ptr.off == range_ptr_off) {
4296 s16 new_start = range_start;
4297 s16 end, off = insn->off;
4298 s16 new_end = range_end;
4299 bool changed = false;
4301 if (off < range_start) {
4306 end = off + BPF_LDST_BYTES(insn);
4307 if (end > range_end) {
4315 if (new_end - new_start <= 64) {
4316 /* Install new range. */
4317 range_start = new_start;
4318 range_end = new_end;
4323 end_current_then_start_new:
4324 range_node->pkt_cache.range_start = range_start;
4325 range_node->pkt_cache.range_end = range_end;
4328 range_node->pkt_cache.do_init = true;
4329 range_ptr_id = range_node->ptr.id;
4330 range_ptr_off = range_node->ptr.off;
4331 range_start = insn->off;
4332 range_end = insn->off + BPF_LDST_BYTES(insn);
4336 range_node->pkt_cache.range_start = range_start;
4337 range_node->pkt_cache.range_end = range_end;
4340 list_for_each_entry(meta, &nfp_prog->insns, l) {
4341 if (meta->flags & FLAG_INSN_SKIP_MASK)
4344 if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
4345 if (meta->pkt_cache.do_init) {
4346 range_start = meta->pkt_cache.range_start;
4347 range_end = meta->pkt_cache.range_end;
4349 meta->pkt_cache.range_start = range_start;
4350 meta->pkt_cache.range_end = range_end;
4356 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
4358 nfp_bpf_opt_reg_init(nfp_prog);
4360 nfp_bpf_opt_neg_add_sub(nfp_prog);
4361 nfp_bpf_opt_ld_mask(nfp_prog);
4362 nfp_bpf_opt_ld_shift(nfp_prog);
4363 nfp_bpf_opt_ldst_gather(nfp_prog);
4364 nfp_bpf_opt_pkt_cache(nfp_prog);
4369 static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
4371 struct nfp_insn_meta *meta1, *meta2;
4372 struct nfp_bpf_map *nfp_map;
4373 struct bpf_map *map;
4376 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
4377 if (meta1->flags & FLAG_INSN_SKIP_MASK ||
4378 meta2->flags & FLAG_INSN_SKIP_MASK)
4381 if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
4382 meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
4385 map = (void *)(unsigned long)((u32)meta1->insn.imm |
4386 (u64)meta2->insn.imm << 32);
4387 if (bpf_map_offload_neutral(map)) {
4390 nfp_map = map_to_offmap(map)->dev_priv;
4394 meta1->insn.imm = id;
4395 meta2->insn.imm = 0;
4401 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
4403 __le64 *ustore = (__force __le64 *)prog;
4406 for (i = 0; i < len; i++) {
4409 err = nfp_ustore_check_valid_no_ecc(prog[i]);
4413 ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i]));
4419 static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog)
4423 prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL);
4427 nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64);
4428 memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len);
4429 kvfree(nfp_prog->prog);
4430 nfp_prog->prog = prog;
4433 int nfp_bpf_jit(struct nfp_prog *nfp_prog)
4437 ret = nfp_bpf_replace_map_ptrs(nfp_prog);
4441 ret = nfp_bpf_optimize(nfp_prog);
4445 ret = nfp_translate(nfp_prog);
4447 pr_err("Translation failed with error %d (translated: %u)\n",
4448 ret, nfp_prog->n_translated);
4452 nfp_bpf_prog_trim(nfp_prog);
4457 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog)
4459 struct nfp_insn_meta *meta;
4461 /* Another pass to record jump information. */
4462 list_for_each_entry(meta, &nfp_prog->insns, l) {
4463 struct nfp_insn_meta *dst_meta;
4464 u64 code = meta->insn.code;
4465 unsigned int dst_idx;
4468 if (!is_mbpf_jmp(meta))
4470 if (BPF_OP(code) == BPF_EXIT)
4472 if (is_mbpf_helper_call(meta))
4475 /* If opcode is BPF_CALL at this point, this can only be a
4476 * BPF-to-BPF call (a.k.a pseudo call).
4478 pseudo_call = BPF_OP(code) == BPF_CALL;
4481 dst_idx = meta->n + 1 + meta->insn.imm;
4483 dst_idx = meta->n + 1 + meta->insn.off;
4485 dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx);
4488 dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START;
4490 dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
4491 meta->jmp_dst = dst_meta;
4495 bool nfp_bpf_supported_opcode(u8 code)
4497 return !!instr_cb[code];
4500 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
4506 prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
4509 return ERR_PTR(-ENOMEM);
4511 for (i = 0; i < nfp_prog->prog_len; i++) {
4512 enum nfp_relo_type special;
4516 special = FIELD_GET(OP_RELO_TYPE, prog[i]);
4521 br_add_offset(&prog[i], bv->start_off);
4523 case RELO_BR_GO_OUT:
4524 br_set_offset(&prog[i],
4525 nfp_prog->tgt_out + bv->start_off);
4527 case RELO_BR_GO_ABORT:
4528 br_set_offset(&prog[i],
4529 nfp_prog->tgt_abort + bv->start_off);
4531 case RELO_BR_GO_CALL_PUSH_REGS:
4532 if (!nfp_prog->tgt_call_push_regs) {
4533 pr_err("BUG: failed to detect subprogram registers needs\n");
4537 off = nfp_prog->tgt_call_push_regs + bv->start_off;
4538 br_set_offset(&prog[i], off);
4540 case RELO_BR_GO_CALL_POP_REGS:
4541 if (!nfp_prog->tgt_call_pop_regs) {
4542 pr_err("BUG: failed to detect subprogram registers needs\n");
4546 off = nfp_prog->tgt_call_pop_regs + bv->start_off;
4547 br_set_offset(&prog[i], off);
4549 case RELO_BR_NEXT_PKT:
4550 br_set_offset(&prog[i], bv->tgt_done);
4552 case RELO_BR_HELPER:
4553 val = br_get_offset(prog[i]);
4556 case BPF_FUNC_map_lookup_elem:
4557 val = nfp_prog->bpf->helpers.map_lookup;
4559 case BPF_FUNC_map_update_elem:
4560 val = nfp_prog->bpf->helpers.map_update;
4562 case BPF_FUNC_map_delete_elem:
4563 val = nfp_prog->bpf->helpers.map_delete;
4565 case BPF_FUNC_perf_event_output:
4566 val = nfp_prog->bpf->helpers.perf_event_output;
4569 pr_err("relocation of unknown helper %d\n",
4574 br_set_offset(&prog[i], val);
4576 case RELO_IMMED_REL:
4577 immed_add_value(&prog[i], bv->start_off);
4581 prog[i] &= ~OP_RELO_TYPE;
4584 err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len);
4592 return ERR_PTR(err);