nfp: bpf: fix static check error through tightening shift amount adjustment
[linux-2.6-block.git] / drivers / net / ethernet / netronome / nfp / bpf / jit.c
CommitLineData
96de2506
JK
1// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
2/* Copyright (C) 2016-2018 Netronome Systems, Inc. */
cd7df56e
JK
3
4#define pr_fmt(fmt) "NFP net bpf: " fmt
5
0d49eaf4 6#include <linux/bug.h>
cd7df56e
JK
7#include <linux/bpf.h>
8#include <linux/filter.h>
2a952b03 9#include <linux/kernel.h>
cd7df56e 10#include <linux/pkt_cls.h>
2a952b03 11#include <linux/reciprocal_div.h>
cd7df56e
JK
12#include <linux/unistd.h>
13
d9ae7f2b
JK
14#include "main.h"
15#include "../nfp_asm.h"
d985888f 16#include "../nfp_net_ctrl.h"
cd7df56e
JK
17
18/* --- NFP prog --- */
19/* Foreach "multiple" entries macros provide pos and next<n> pointers.
20 * It's safe to modify the next pointers (but not pos).
21 */
22#define nfp_for_each_insn_walk2(nfp_prog, pos, next) \
23 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
24 next = list_next_entry(pos, l); \
25 &(nfp_prog)->insns != &pos->l && \
26 &(nfp_prog)->insns != &next->l; \
27 pos = nfp_meta_next(pos), \
28 next = nfp_meta_next(pos))
29
30#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \
31 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
32 next = list_next_entry(pos, l), \
33 next2 = list_next_entry(next, l); \
34 &(nfp_prog)->insns != &pos->l && \
35 &(nfp_prog)->insns != &next->l && \
36 &(nfp_prog)->insns != &next2->l; \
37 pos = nfp_meta_next(pos), \
38 next = nfp_meta_next(pos), \
39 next2 = nfp_meta_next(next))
40
cd7df56e
JK
41static bool
42nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
43{
44 return meta->l.prev != &nfp_prog->insns;
45}
46
cd7df56e
JK
47static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
48{
e8a4796e
JK
49 if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) {
50 pr_warn("instruction limit reached (%u NFP instructions)\n",
51 nfp_prog->prog_len);
cd7df56e
JK
52 nfp_prog->error = -ENOSPC;
53 return;
54 }
55
56 nfp_prog->prog[nfp_prog->prog_len] = insn;
57 nfp_prog->prog_len++;
58}
59
60static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
61{
2314fe9e 62 return nfp_prog->prog_len;
cd7df56e
JK
63}
64
0d49eaf4
JK
65static bool
66nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
67{
68 /* If there is a recorded error we may have dropped instructions;
69 * that doesn't have to be due to translator bug, and the translation
70 * will fail anyway, so just return OK.
71 */
72 if (nfp_prog->error)
73 return true;
74 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
75}
76
cd7df56e 77/* --- Emitters --- */
cd7df56e
JK
78static void
79__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
b556ddd9
JK
80 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
81 bool indir)
cd7df56e 82{
cd7df56e
JK
83 u64 insn;
84
cd7df56e
JK
85 insn = FIELD_PREP(OP_CMD_A_SRC, areg) |
86 FIELD_PREP(OP_CMD_CTX, ctx) |
87 FIELD_PREP(OP_CMD_B_SRC, breg) |
88 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
89 FIELD_PREP(OP_CMD_XFER, xfer) |
90 FIELD_PREP(OP_CMD_CNT, size) |
b556ddd9 91 FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
cd7df56e 92 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
5468a8b9 93 FIELD_PREP(OP_CMD_INDIR, indir) |
cd7df56e
JK
94 FIELD_PREP(OP_CMD_MODE, mode);
95
96 nfp_prog_push(nfp_prog, insn);
97}
98
99static void
5468a8b9 100emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
b556ddd9 101 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
cd7df56e
JK
102{
103 struct nfp_insn_re_regs reg;
104 int err;
105
106 err = swreg_to_restricted(reg_none(), lreg, rreg, &reg, false);
107 if (err) {
108 nfp_prog->error = err;
109 return;
110 }
111 if (reg.swap) {
112 pr_err("cmd can't swap arguments\n");
113 nfp_prog->error = -EFAULT;
114 return;
115 }
995e101f
JK
116 if (reg.dst_lmextn || reg.src_lmextn) {
117 pr_err("cmd can't use LMextn\n");
118 nfp_prog->error = -EFAULT;
119 return;
120 }
cd7df56e 121
b556ddd9 122 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
5468a8b9
JK
123 indir);
124}
125
126static void
127emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
b556ddd9 128 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
5468a8b9 129{
b556ddd9 130 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
cd7df56e
JK
131}
132
9879a381
JW
133static void
134emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
b556ddd9 135 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
9879a381 136{
b556ddd9 137 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
9879a381
JW
138}
139
cd7df56e
JK
140static void
141__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
142 enum br_ctx_signal_state css, u16 addr, u8 defer)
143{
144 u16 addr_lo, addr_hi;
145 u64 insn;
146
147 addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
148 addr_hi = addr != addr_lo;
149
150 insn = OP_BR_BASE |
151 FIELD_PREP(OP_BR_MASK, mask) |
152 FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
153 FIELD_PREP(OP_BR_CSS, css) |
154 FIELD_PREP(OP_BR_DEFBR, defer) |
155 FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
156 FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
157
158 nfp_prog_push(nfp_prog, insn);
159}
160
2314fe9e
JK
161static void
162emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer,
163 enum nfp_relo_type relo)
e3b8baf0 164{
2314fe9e 165 if (mask == BR_UNC && defer > 2) {
e3b8baf0
JK
166 pr_err("BUG: branch defer out of bounds %d\n", defer);
167 nfp_prog->error = -EFAULT;
168 return;
169 }
2314fe9e
JK
170
171 __emit_br(nfp_prog, mask,
172 mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
173 BR_CSS_NONE, addr, defer);
174
175 nfp_prog->prog[nfp_prog->prog_len - 1] |=
176 FIELD_PREP(OP_RELO_TYPE, relo);
e3b8baf0
JK
177}
178
cd7df56e
JK
179static void
180emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
181{
2314fe9e 182 emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
cd7df56e
JK
183}
184
991f5b36
JW
185static void
186__emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
187 bool set, bool src_lmextn)
188{
189 u16 addr_lo, addr_hi;
190 u64 insn;
191
192 addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
193 addr_hi = addr != addr_lo;
194
195 insn = OP_BR_BIT_BASE |
196 FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
197 FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
198 FIELD_PREP(OP_BR_BIT_BV, set) |
199 FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
200 FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
201 FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
202 FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
203
204 nfp_prog_push(nfp_prog, insn);
205}
206
207static void
208emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
209 u8 defer, bool set, enum nfp_relo_type relo)
210{
211 struct nfp_insn_re_regs reg;
212 int err;
213
214 /* NOTE: The bit to test is specified as an rotation amount, such that
215 * the bit to test will be placed on the MSB of the result when
216 * doing a rotate right. For bit X, we need right rotate X + 1.
217 */
218 bit += 1;
219
220 err = swreg_to_restricted(reg_none(), src, reg_imm(bit), &reg, false);
221 if (err) {
222 nfp_prog->error = err;
223 return;
224 }
225
226 __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
227 reg.src_lmextn);
228
229 nfp_prog->prog[nfp_prog->prog_len - 1] |=
230 FIELD_PREP(OP_RELO_TYPE, relo);
231}
232
233static void
234emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
235{
236 emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
237}
238
389f263b
QM
239static void
240__emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
241 u8 defer, bool dst_lmextn, bool src_lmextn)
242{
243 u64 insn;
244
245 insn = OP_BR_ALU_BASE |
246 FIELD_PREP(OP_BR_ALU_A_SRC, areg) |
247 FIELD_PREP(OP_BR_ALU_B_SRC, breg) |
248 FIELD_PREP(OP_BR_ALU_DEFBR, defer) |
249 FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) |
250 FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) |
251 FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn);
252
253 nfp_prog_push(nfp_prog, insn);
254}
255
256static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer)
257{
258 struct nfp_insn_ur_regs reg;
259 int err;
260
261 err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), &reg);
262 if (err) {
263 nfp_prog->error = err;
264 return;
265 }
266
267 __emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn,
268 reg.src_lmextn);
269}
270
cd7df56e
JK
271static void
272__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
273 enum immed_width width, bool invert,
995e101f
JK
274 enum immed_shift shift, bool wr_both,
275 bool dst_lmextn, bool src_lmextn)
cd7df56e
JK
276{
277 u64 insn;
278
279 insn = OP_IMMED_BASE |
280 FIELD_PREP(OP_IMMED_A_SRC, areg) |
281 FIELD_PREP(OP_IMMED_B_SRC, breg) |
282 FIELD_PREP(OP_IMMED_IMM, imm_hi) |
283 FIELD_PREP(OP_IMMED_WIDTH, width) |
284 FIELD_PREP(OP_IMMED_INV, invert) |
285 FIELD_PREP(OP_IMMED_SHIFT, shift) |
995e101f
JK
286 FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
287 FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
288 FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
cd7df56e
JK
289
290 nfp_prog_push(nfp_prog, insn);
291}
292
293static void
b3f868df 294emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
cd7df56e
JK
295 enum immed_width width, bool invert, enum immed_shift shift)
296{
297 struct nfp_insn_ur_regs reg;
298 int err;
299
b3f868df 300 if (swreg_type(dst) == NN_REG_IMM) {
cd7df56e
JK
301 nfp_prog->error = -EFAULT;
302 return;
303 }
304
305 err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), &reg);
306 if (err) {
307 nfp_prog->error = err;
308 return;
309 }
310
3239e7bb
JW
311 /* Use reg.dst when destination is No-Dest. */
312 __emit_immed(nfp_prog,
313 swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
314 reg.breg, imm >> 8, width, invert, shift,
315 reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
cd7df56e
JK
316}
317
318static void
319__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
320 enum shf_sc sc, u8 shift,
995e101f
JK
321 u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
322 bool dst_lmextn, bool src_lmextn)
cd7df56e
JK
323{
324 u64 insn;
325
326 if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
327 nfp_prog->error = -EFAULT;
328 return;
329 }
330
69e168eb
JW
331 /* NFP shift instruction has something special. If shift direction is
332 * left then shift amount of 1 to 31 is specified as 32 minus the amount
333 * to shift.
334 *
335 * But no need to do this for indirect shift which has shift amount be
336 * 0. Even after we do this subtraction, shift amount 0 will be turned
337 * into 32 which will eventually be encoded the same as 0 because only
338 * low 5 bits are encoded, but shift amount be 32 will fail the
339 * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of
340 * mask range.
341 */
342 if (sc == SHF_SC_L_SHF && shift)
cd7df56e
JK
343 shift = 32 - shift;
344
345 insn = OP_SHF_BASE |
346 FIELD_PREP(OP_SHF_A_SRC, areg) |
347 FIELD_PREP(OP_SHF_SC, sc) |
348 FIELD_PREP(OP_SHF_B_SRC, breg) |
349 FIELD_PREP(OP_SHF_I8, i8) |
350 FIELD_PREP(OP_SHF_SW, sw) |
351 FIELD_PREP(OP_SHF_DST, dst) |
352 FIELD_PREP(OP_SHF_SHIFT, shift) |
353 FIELD_PREP(OP_SHF_OP, op) |
354 FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
995e101f
JK
355 FIELD_PREP(OP_SHF_WR_AB, wr_both) |
356 FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
357 FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
cd7df56e
JK
358
359 nfp_prog_push(nfp_prog, insn);
360}
361
362static void
b3f868df
JK
363emit_shf(struct nfp_prog *nfp_prog, swreg dst,
364 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
cd7df56e
JK
365{
366 struct nfp_insn_re_regs reg;
367 int err;
368
369 err = swreg_to_restricted(dst, lreg, rreg, &reg, true);
370 if (err) {
371 nfp_prog->error = err;
372 return;
373 }
374
375 __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
995e101f
JK
376 reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
377 reg.dst_lmextn, reg.src_lmextn);
cd7df56e
JK
378}
379
991f5b36
JW
380static void
381emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
382 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
383{
384 if (sc == SHF_SC_R_ROT) {
385 pr_err("indirect shift is not allowed on rotation\n");
386 nfp_prog->error = -EFAULT;
387 return;
388 }
389
390 emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
391}
392
cd7df56e
JK
393static void
394__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
995e101f
JK
395 u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
396 bool dst_lmextn, bool src_lmextn)
cd7df56e
JK
397{
398 u64 insn;
399
400 insn = OP_ALU_BASE |
401 FIELD_PREP(OP_ALU_A_SRC, areg) |
402 FIELD_PREP(OP_ALU_B_SRC, breg) |
403 FIELD_PREP(OP_ALU_DST, dst) |
404 FIELD_PREP(OP_ALU_SW, swap) |
405 FIELD_PREP(OP_ALU_OP, op) |
406 FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
995e101f
JK
407 FIELD_PREP(OP_ALU_WR_AB, wr_both) |
408 FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
409 FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
cd7df56e
JK
410
411 nfp_prog_push(nfp_prog, insn);
412}
413
414static void
b3f868df
JK
415emit_alu(struct nfp_prog *nfp_prog, swreg dst,
416 swreg lreg, enum alu_op op, swreg rreg)
cd7df56e
JK
417{
418 struct nfp_insn_ur_regs reg;
419 int err;
420
421 err = swreg_to_unrestricted(dst, lreg, rreg, &reg);
422 if (err) {
423 nfp_prog->error = err;
424 return;
425 }
426
427 __emit_alu(nfp_prog, reg.dst, reg.dst_ab,
995e101f
JK
428 reg.areg, op, reg.breg, reg.swap, reg.wr_both,
429 reg.dst_lmextn, reg.src_lmextn);
cd7df56e
JK
430}
431
d3d23fdb
JW
432static void
433__emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg,
434 enum mul_type type, enum mul_step step, u16 breg, bool swap,
435 bool wr_both, bool dst_lmextn, bool src_lmextn)
436{
437 u64 insn;
438
439 insn = OP_MUL_BASE |
440 FIELD_PREP(OP_MUL_A_SRC, areg) |
441 FIELD_PREP(OP_MUL_B_SRC, breg) |
442 FIELD_PREP(OP_MUL_STEP, step) |
443 FIELD_PREP(OP_MUL_DST_AB, dst_ab) |
444 FIELD_PREP(OP_MUL_SW, swap) |
445 FIELD_PREP(OP_MUL_TYPE, type) |
446 FIELD_PREP(OP_MUL_WR_AB, wr_both) |
447 FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) |
448 FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn);
449
450 nfp_prog_push(nfp_prog, insn);
451}
452
453static void
454emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type,
455 enum mul_step step, swreg rreg)
456{
457 struct nfp_insn_ur_regs reg;
458 u16 areg;
459 int err;
460
461 if (type == MUL_TYPE_START && step != MUL_STEP_NONE) {
462 nfp_prog->error = -EINVAL;
463 return;
464 }
465
466 if (step == MUL_LAST || step == MUL_LAST_2) {
467 /* When type is step and step Number is LAST or LAST2, left
468 * source is used as destination.
469 */
470 err = swreg_to_unrestricted(lreg, reg_none(), rreg, &reg);
471 areg = reg.dst;
472 } else {
473 err = swreg_to_unrestricted(reg_none(), lreg, rreg, &reg);
474 areg = reg.areg;
475 }
476
477 if (err) {
478 nfp_prog->error = err;
479 return;
480 }
481
482 __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap,
483 reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
484}
485
cd7df56e
JK
486static void
487__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
488 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
995e101f
JK
489 bool zero, bool swap, bool wr_both,
490 bool dst_lmextn, bool src_lmextn)
cd7df56e
JK
491{
492 u64 insn;
493
494 insn = OP_LDF_BASE |
495 FIELD_PREP(OP_LDF_A_SRC, areg) |
496 FIELD_PREP(OP_LDF_SC, sc) |
497 FIELD_PREP(OP_LDF_B_SRC, breg) |
498 FIELD_PREP(OP_LDF_I8, imm8) |
499 FIELD_PREP(OP_LDF_SW, swap) |
500 FIELD_PREP(OP_LDF_ZF, zero) |
501 FIELD_PREP(OP_LDF_BMASK, bmask) |
502 FIELD_PREP(OP_LDF_SHF, shift) |
995e101f
JK
503 FIELD_PREP(OP_LDF_WR_AB, wr_both) |
504 FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
505 FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
cd7df56e
JK
506
507 nfp_prog_push(nfp_prog, insn);
508}
509
510static void
bc8c80a8
JK
511emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
512 enum shf_sc sc, u8 shift, bool zero)
cd7df56e
JK
513{
514 struct nfp_insn_re_regs reg;
515 int err;
516
2de1be1d
JK
517 /* Note: ld_field is special as it uses one of the src regs as dst */
518 err = swreg_to_restricted(dst, dst, src, &reg, true);
cd7df56e
JK
519 if (err) {
520 nfp_prog->error = err;
521 return;
522 }
523
524 __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
995e101f
JK
525 reg.i8, zero, reg.swap, reg.wr_both,
526 reg.dst_lmextn, reg.src_lmextn);
cd7df56e
JK
527}
528
529static void
b3f868df 530emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
cd7df56e
JK
531 enum shf_sc sc, u8 shift)
532{
bc8c80a8 533 emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
cd7df56e
JK
534}
535
2df03a50
JK
536static void
537__emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
538 bool dst_lmextn, bool src_lmextn)
539{
540 u64 insn;
541
542 insn = OP_LCSR_BASE |
543 FIELD_PREP(OP_LCSR_A_SRC, areg) |
544 FIELD_PREP(OP_LCSR_B_SRC, breg) |
545 FIELD_PREP(OP_LCSR_WRITE, wr) |
df4a37d8 546 FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
2df03a50
JK
547 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
548 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
549
550 nfp_prog_push(nfp_prog, insn);
551}
552
553static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
554{
555 struct nfp_insn_ur_regs reg;
556 int err;
557
558 /* This instruction takes immeds instead of reg_none() for the ignored
559 * operand, but we can't encode 2 immeds in one instr with our normal
560 * swreg infra so if param is an immed, we encode as reg_none() and
561 * copy the immed to both operands.
562 */
563 if (swreg_type(src) == NN_REG_IMM) {
564 err = swreg_to_unrestricted(reg_none(), src, reg_none(), &reg);
565 reg.breg = reg.areg;
566 } else {
567 err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), &reg);
568 }
569 if (err) {
570 nfp_prog->error = err;
571 return;
572 }
573
df4a37d8 574 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
2df03a50
JK
575 false, reg.src_lmextn);
576}
577
df4a37d8
JK
578/* CSR value is read in following immed[gpr, 0] */
579static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
580{
581 __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
582}
583
1c03e03f
JK
584static void emit_nop(struct nfp_prog *nfp_prog)
585{
586 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
587}
588
cd7df56e
JK
589/* --- Wrappers --- */
590static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
591{
592 if (!(imm & 0xffff0000)) {
593 *val = imm;
594 *shift = IMMED_SHIFT_0B;
595 } else if (!(imm & 0xff0000ff)) {
596 *val = imm >> 8;
597 *shift = IMMED_SHIFT_1B;
598 } else if (!(imm & 0x0000ffff)) {
599 *val = imm >> 16;
600 *shift = IMMED_SHIFT_2B;
601 } else {
602 return false;
603 }
604
605 return true;
606}
607
b3f868df 608static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
cd7df56e
JK
609{
610 enum immed_shift shift;
611 u16 val;
612
613 if (pack_immed(imm, &val, &shift)) {
614 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
615 } else if (pack_immed(~imm, &val, &shift)) {
616 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
617 } else {
618 emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
619 false, IMMED_SHIFT_0B);
620 emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
621 false, IMMED_SHIFT_2B);
622 }
623}
624
77a3d311
JK
625static void
626wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
627 enum nfp_relo_type relo)
628{
629 if (imm > 0xffff) {
630 pr_err("relocation of a large immediate!\n");
631 nfp_prog->error = -EFAULT;
632 return;
633 }
634 emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
635
636 nfp_prog->prog[nfp_prog->prog_len - 1] |=
637 FIELD_PREP(OP_RELO_TYPE, relo);
638}
639
cd7df56e
JK
640/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
641 * If the @imm is small enough encode it directly in operand and return
642 * otherwise load @imm to a spare register and return its encoding.
643 */
b3f868df 644static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
cd7df56e
JK
645{
646 if (FIELD_FIT(UR_REG_IMM_MAX, imm))
647 return reg_imm(imm);
648
649 wrp_immed(nfp_prog, tmp_reg, imm);
650 return tmp_reg;
651}
652
653/* re_load_imm_any() - encode immediate or use tmp register (restricted)
654 * If the @imm is small enough encode it directly in operand and return
655 * otherwise load @imm to a spare register and return its encoding.
656 */
b3f868df 657static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
cd7df56e
JK
658{
659 if (FIELD_FIT(RE_REG_IMM_MAX, imm))
660 return reg_imm(imm);
661
662 wrp_immed(nfp_prog, tmp_reg, imm);
663 return tmp_reg;
664}
665
ff42bb9f
JK
666static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
667{
668 while (count--)
669 emit_nop(nfp_prog);
670}
671
c000dfb5
JK
672static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
673{
674 emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
675}
676
cd7df56e
JK
677static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
678{
c000dfb5 679 wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
cd7df56e
JK
680}
681
9879a381
JW
682/* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
683 * result to @dst from low end.
684 */
685static void
686wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
687 u8 offset)
688{
689 enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
690 u8 mask = (1 << field_len) - 1;
691
692 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
693}
694
91ff69e8
JW
695/* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
696 * result to @dst from offset, there is no change on the other bits of @dst.
697 */
698static void
699wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
700 u8 field_len, u8 offset)
701{
702 enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
703 u8 mask = ((1 << field_len) - 1) << offset;
704
705 emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
706}
707
3dd43c33
JK
708static void
709addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
710 swreg *rega, swreg *regb)
711{
712 if (offset == reg_imm(0)) {
713 *rega = reg_a(src_gpr);
714 *regb = reg_b(src_gpr + 1);
715 return;
716 }
717
718 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
719 emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
720 reg_imm(0));
721 *rega = imm_a(nfp_prog);
722 *regb = imm_b(nfp_prog);
723}
724
9879a381
JW
725/* NFP has Command Push Pull bus which supports bluk memory operations. */
726static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
727{
728 bool descending_seq = meta->ldst_gather_len < 0;
729 s16 len = abs(meta->ldst_gather_len);
730 swreg src_base, off;
3dd43c33 731 bool src_40bit_addr;
9879a381
JW
732 unsigned int i;
733 u8 xfer_num;
734
9879a381 735 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
3dd43c33 736 src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
9879a381
JW
737 src_base = reg_a(meta->insn.src_reg * 2);
738 xfer_num = round_up(len, 4) / 4;
739
3dd43c33 740 if (src_40bit_addr)
cc0dff6d 741 addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base,
3dd43c33
JK
742 &off);
743
8c900538
JW
744 /* Setup PREV_ALU fields to override memory read length. */
745 if (len > 32)
746 wrp_immed(nfp_prog, reg_none(),
747 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
748
9879a381 749 /* Memory read from source addr into transfer-in registers. */
3dd43c33
JK
750 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
751 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
b556ddd9 752 src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
9879a381
JW
753
754 /* Move from transfer-in to transfer-out. */
755 for (i = 0; i < xfer_num; i++)
756 wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
757
758 off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
759
760 if (len <= 8) {
761 /* Use single direct_ref write8. */
762 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
763 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
b556ddd9 764 CMD_CTX_SWAP);
8c900538 765 } else if (len <= 32 && IS_ALIGNED(len, 4)) {
9879a381
JW
766 /* Use single direct_ref write32. */
767 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
768 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
b556ddd9 769 CMD_CTX_SWAP);
8c900538 770 } else if (len <= 32) {
9879a381
JW
771 /* Use single indirect_ref write8. */
772 wrp_immed(nfp_prog, reg_none(),
773 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
774 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
775 reg_a(meta->paired_st->dst_reg * 2), off,
b556ddd9 776 len - 1, CMD_CTX_SWAP);
8c900538
JW
777 } else if (IS_ALIGNED(len, 4)) {
778 /* Use single indirect_ref write32. */
779 wrp_immed(nfp_prog, reg_none(),
780 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
781 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
782 reg_a(meta->paired_st->dst_reg * 2), off,
b556ddd9 783 xfer_num - 1, CMD_CTX_SWAP);
8c900538
JW
784 } else if (len <= 40) {
785 /* Use one direct_ref write32 to write the first 32-bytes, then
786 * another direct_ref write8 to write the remaining bytes.
787 */
788 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
789 reg_a(meta->paired_st->dst_reg * 2), off, 7,
b556ddd9 790 CMD_CTX_SWAP);
8c900538
JW
791
792 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
793 imm_b(nfp_prog));
794 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
795 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
b556ddd9 796 CMD_CTX_SWAP);
8c900538
JW
797 } else {
798 /* Use one indirect_ref write32 to write 4-bytes aligned length,
799 * then another direct_ref write8 to write the remaining bytes.
800 */
801 u8 new_off;
802
803 wrp_immed(nfp_prog, reg_none(),
804 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
805 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
806 reg_a(meta->paired_st->dst_reg * 2), off,
b556ddd9 807 xfer_num - 2, CMD_CTX_SWAP);
8c900538
JW
808 new_off = meta->paired_st->off + (xfer_num - 1) * 4;
809 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
810 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
811 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
b556ddd9 812 (len & 0x3) - 1, CMD_CTX_SWAP);
9879a381
JW
813 }
814
815 /* TODO: The following extra load is to make sure data flow be identical
816 * before and after we do memory copy optimization.
817 *
818 * The load destination register is not guaranteed to be dead, so we
819 * need to make sure it is loaded with the value the same as before
820 * this transformation.
821 *
822 * These extra loads could be removed once we have accurate register
823 * usage information.
824 */
825 if (descending_seq)
826 xfer_num = 0;
827 else if (BPF_SIZE(meta->insn.code) != BPF_DW)
828 xfer_num = xfer_num - 1;
829 else
830 xfer_num = xfer_num - 2;
831
832 switch (BPF_SIZE(meta->insn.code)) {
833 case BPF_B:
834 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
835 reg_xfer(xfer_num), 1,
836 IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
837 break;
838 case BPF_H:
839 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
840 reg_xfer(xfer_num), 2, (len & 3) ^ 2);
841 break;
842 case BPF_W:
843 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
844 reg_xfer(0));
845 break;
846 case BPF_DW:
847 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
848 reg_xfer(xfer_num));
849 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
850 reg_xfer(xfer_num + 1));
851 break;
852 }
853
854 if (BPF_SIZE(meta->insn.code) != BPF_DW)
855 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
856
857 return 0;
858}
859
cd7df56e 860static int
0a793977 861data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
cd7df56e
JK
862{
863 unsigned int i;
864 u16 shift, sz;
cd7df56e
JK
865
866 /* We load the value from the address indicated in @offset and then
867 * shift out the data we don't need. Note: this is big endian!
868 */
0a793977 869 sz = max(size, 4);
cd7df56e
JK
870 shift = size < 4 ? 4 - size : 0;
871
0a793977 872 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
b556ddd9 873 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
cd7df56e
JK
874
875 i = 0;
876 if (shift)
0a793977 877 emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
cd7df56e
JK
878 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
879 else
880 for (; i * 4 < size; i++)
0a793977 881 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
cd7df56e
JK
882
883 if (i < 2)
0a793977 884 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
cd7df56e
JK
885
886 return 0;
887}
888
2ca71441 889static int
3dd43c33
JK
890data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
891 swreg lreg, swreg rreg, int size, enum cmd_mode mode)
2ca71441
JK
892{
893 unsigned int i;
894 u8 mask, sz;
895
3dd43c33 896 /* We load the value from the address indicated in rreg + lreg and then
2ca71441
JK
897 * mask out the data we don't need. Note: this is little endian!
898 */
899 sz = max(size, 4);
900 mask = size < 4 ? GENMASK(size - 1, 0) : 0;
901
3dd43c33 902 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
b556ddd9 903 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
2ca71441
JK
904
905 i = 0;
906 if (mask)
907 emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
908 reg_xfer(0), SHF_SC_NONE, 0, true);
909 else
910 for (; i * 4 < size; i++)
911 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
912
913 if (i < 2)
914 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
915
916 return 0;
917}
918
3dd43c33
JK
919static int
920data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
921 u8 dst_gpr, u8 size)
922{
923 return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
924 size, CMD_MODE_32b);
925}
926
927static int
928data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
929 u8 dst_gpr, u8 size)
930{
931 swreg rega, regb;
932
933 addr40_offset(nfp_prog, src_gpr, offset, &rega, &regb);
934
935 return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
936 size, CMD_MODE_40b_BA);
937}
938
0a793977
JK
939static int
940construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
941{
942 swreg tmp_reg;
943
944 /* Calculate the true offset (src_reg + imm) */
945 tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
946 emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
947
948 /* Check packet length (size guaranteed to fit b/c it's u8) */
949 emit_alu(nfp_prog, imm_a(nfp_prog),
950 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
951 emit_alu(nfp_prog, reg_none(),
952 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
e84797fe 953 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
0a793977
JK
954
955 /* Load data */
956 return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
957}
958
cd7df56e
JK
959static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
960{
0a793977
JK
961 swreg tmp_reg;
962
963 /* Check packet length */
964 tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
965 emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
e84797fe 966 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
0a793977
JK
967
968 /* Load data */
969 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
970 return data_ld(nfp_prog, tmp_reg, 0, size);
cd7df56e
JK
971}
972
e663fe38
JK
973static int
974data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
975 u8 src_gpr, u8 size)
976{
977 unsigned int i;
978
979 for (i = 0; i * 4 < size; i++)
980 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
981
982 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
b556ddd9 983 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
e663fe38
JK
984
985 return 0;
986}
987
988static int
989data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
990 u64 imm, u8 size)
991{
992 wrp_immed(nfp_prog, reg_xfer(0), imm);
993 if (size == 8)
994 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
995
996 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
b556ddd9 997 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
e663fe38
JK
998
999 return 0;
1000}
1001
ee9133a8
JK
1002typedef int
1003(*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
2df03a50
JK
1004 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1005 bool needs_inc);
a82b23fb
JK
1006
1007static int
1008wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
2df03a50
JK
1009 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1010 bool needs_inc)
a82b23fb 1011{
2df03a50 1012 bool should_inc = needs_inc && new_gpr && !last;
a82b23fb
JK
1013 u32 idx, src_byte;
1014 enum shf_sc sc;
1015 swreg reg;
1016 int shf;
1017 u8 mask;
1018
1019 if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
1020 return -EOPNOTSUPP;
1021
1022 idx = off / 4;
1023
1024 /* Move the entire word */
1025 if (size == 4) {
2df03a50
JK
1026 wrp_mov(nfp_prog, reg_both(dst),
1027 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
a82b23fb
JK
1028 return 0;
1029 }
1030
2df03a50
JK
1031 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
1032 return -EOPNOTSUPP;
1033
a82b23fb
JK
1034 src_byte = off % 4;
1035
1036 mask = (1 << size) - 1;
1037 mask <<= dst_byte;
1038
1039 if (WARN_ON_ONCE(mask > 0xf))
1040 return -EOPNOTSUPP;
1041
1042 shf = abs(src_byte - dst_byte) * 8;
1043 if (src_byte == dst_byte) {
1044 sc = SHF_SC_NONE;
1045 } else if (src_byte < dst_byte) {
1046 shf = 32 - shf;
1047 sc = SHF_SC_L_SHF;
1048 } else {
1049 sc = SHF_SC_R_SHF;
1050 }
1051
1052 /* ld_field can address fewer indexes, if offset too large do RMW.
1053 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
1054 */
1055 if (idx <= RE_REG_LM_IDX_MAX) {
2df03a50 1056 reg = reg_lm(lm3 ? 3 : 0, idx);
a82b23fb
JK
1057 } else {
1058 reg = imm_a(nfp_prog);
9a90c83c
JK
1059 /* If it's not the first part of the load and we start a new GPR
1060 * that means we are loading a second part of the LMEM word into
1061 * a new GPR. IOW we've already looked that LMEM word and
1062 * therefore it has been loaded into imm_a().
1063 */
1064 if (first || !new_gpr)
1065 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
a82b23fb
JK
1066 }
1067
1068 emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
1069
2df03a50
JK
1070 if (should_inc)
1071 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
1072
a82b23fb
JK
1073 return 0;
1074}
ee9133a8
JK
1075
1076static int
1077wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
2df03a50
JK
1078 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1079 bool needs_inc)
ee9133a8 1080{
2df03a50 1081 bool should_inc = needs_inc && new_gpr && !last;
ee9133a8
JK
1082 u32 idx, dst_byte;
1083 enum shf_sc sc;
1084 swreg reg;
1085 int shf;
1086 u8 mask;
1087
1088 if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
1089 return -EOPNOTSUPP;
1090
1091 idx = off / 4;
1092
1093 /* Move the entire word */
1094 if (size == 4) {
2df03a50
JK
1095 wrp_mov(nfp_prog,
1096 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
1097 reg_b(src));
ee9133a8
JK
1098 return 0;
1099 }
1100
2df03a50
JK
1101 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
1102 return -EOPNOTSUPP;
1103
ee9133a8
JK
1104 dst_byte = off % 4;
1105
1106 mask = (1 << size) - 1;
1107 mask <<= dst_byte;
1108
1109 if (WARN_ON_ONCE(mask > 0xf))
1110 return -EOPNOTSUPP;
1111
1112 shf = abs(src_byte - dst_byte) * 8;
1113 if (src_byte == dst_byte) {
1114 sc = SHF_SC_NONE;
1115 } else if (src_byte < dst_byte) {
1116 shf = 32 - shf;
1117 sc = SHF_SC_L_SHF;
1118 } else {
1119 sc = SHF_SC_R_SHF;
1120 }
1121
1122 /* ld_field can address fewer indexes, if offset too large do RMW.
1123 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
1124 */
1125 if (idx <= RE_REG_LM_IDX_MAX) {
2df03a50 1126 reg = reg_lm(lm3 ? 3 : 0, idx);
ee9133a8
JK
1127 } else {
1128 reg = imm_a(nfp_prog);
9a90c83c
JK
1129 /* Only first and last LMEM locations are going to need RMW,
1130 * the middle location will be overwritten fully.
1131 */
1132 if (first || last)
1133 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
ee9133a8
JK
1134 }
1135
1136 emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
1137
9a90c83c
JK
1138 if (new_gpr || last) {
1139 if (idx > RE_REG_LM_IDX_MAX)
1140 wrp_mov(nfp_prog, reg_lm(0, idx), reg);
2df03a50
JK
1141 if (should_inc)
1142 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
9a90c83c 1143 }
ee9133a8
JK
1144
1145 return 0;
1146}
1147
1148static int
1149mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
b14157ee
JK
1150 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
1151 bool clr_gpr, lmem_step step)
ee9133a8 1152{
1a7e62e6 1153 s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off;
9a90c83c 1154 bool first = true, last;
2df03a50
JK
1155 bool needs_inc = false;
1156 swreg stack_off_reg;
a82b23fb 1157 u8 prev_gpr = 255;
ee9133a8 1158 u32 gpr_byte = 0;
2df03a50 1159 bool lm3 = true;
ee9133a8
JK
1160 int ret;
1161
7ff0ccde
QM
1162 if (meta->ptr_not_const ||
1163 meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) {
b14157ee
JK
1164 /* Use of the last encountered ptr_off is OK, they all have
1165 * the same alignment. Depend on low bits of value being
1166 * discarded when written to LMaddr register.
1167 */
1168 stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
1169 stack_imm(nfp_prog));
1170
1171 emit_alu(nfp_prog, imm_b(nfp_prog),
1172 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
1173
1174 needs_inc = true;
1175 } else if (off + size <= 64) {
2df03a50
JK
1176 /* We can reach bottom 64B with LMaddr0 */
1177 lm3 = false;
1178 } else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
1179 /* We have to set up a new pointer. If we know the offset
1180 * and the entire access falls into a single 32 byte aligned
1181 * window we won't have to increment the LM pointer.
1182 * The 32 byte alignment is imporant because offset is ORed in
1183 * not added when doing *l$indexN[off].
1184 */
1185 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
1186 stack_imm(nfp_prog));
1187 emit_alu(nfp_prog, imm_b(nfp_prog),
1188 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1189
1190 off %= 32;
1191 } else {
1192 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
1193 stack_imm(nfp_prog));
1194
1195 emit_alu(nfp_prog, imm_b(nfp_prog),
1196 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1197
1198 needs_inc = true;
1199 }
1200 if (lm3) {
1201 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
1202 /* For size < 4 one slot will be filled by zeroing of upper. */
1203 wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
1204 }
1205
a82b23fb
JK
1206 if (clr_gpr && size < 8)
1207 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1208
ee9133a8
JK
1209 while (size) {
1210 u32 slice_end;
1211 u8 slice_size;
1212
1213 slice_size = min(size, 4 - gpr_byte);
1214 slice_end = min(off + slice_size, round_up(off + 1, 4));
1215 slice_size = slice_end - off;
1216
9a90c83c
JK
1217 last = slice_size == size;
1218
2df03a50
JK
1219 if (needs_inc)
1220 off %= 4;
1221
a82b23fb 1222 ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
2df03a50 1223 first, gpr != prev_gpr, last, lm3, needs_inc);
ee9133a8
JK
1224 if (ret)
1225 return ret;
1226
a82b23fb 1227 prev_gpr = gpr;
9a90c83c
JK
1228 first = false;
1229
ee9133a8
JK
1230 gpr_byte += slice_size;
1231 if (gpr_byte >= 4) {
1232 gpr_byte -= 4;
1233 gpr++;
1234 }
1235
1236 size -= slice_size;
1237 off += slice_size;
1238 }
1239
1240 return 0;
1241}
1242
cd7df56e
JK
1243static void
1244wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
1245{
b3f868df 1246 swreg tmp_reg;
cd7df56e
JK
1247
1248 if (alu_op == ALU_OP_AND) {
1249 if (!imm)
1250 wrp_immed(nfp_prog, reg_both(dst), 0);
1251 if (!imm || !~imm)
1252 return;
1253 }
1254 if (alu_op == ALU_OP_OR) {
1255 if (!~imm)
1256 wrp_immed(nfp_prog, reg_both(dst), ~0U);
1257 if (!imm || !~imm)
1258 return;
1259 }
1260 if (alu_op == ALU_OP_XOR) {
1261 if (!~imm)
1262 emit_alu(nfp_prog, reg_both(dst), reg_none(),
5d42ced1 1263 ALU_OP_NOT, reg_b(dst));
cd7df56e
JK
1264 if (!imm || !~imm)
1265 return;
1266 }
1267
1268 tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1269 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
1270}
1271
1272static int
1273wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1274 enum alu_op alu_op, bool skip)
1275{
1276 const struct bpf_insn *insn = &meta->insn;
1277 u64 imm = insn->imm; /* sign extend */
1278
1279 if (skip) {
91a87a58 1280 meta->flags |= FLAG_INSN_SKIP_NOOP;
cd7df56e
JK
1281 return 0;
1282 }
1283
1284 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
1285 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
1286
1287 return 0;
1288}
1289
1290static int
1291wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1292 enum alu_op alu_op)
1293{
1294 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1295
1296 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1297 emit_alu(nfp_prog, reg_both(dst + 1),
1298 reg_a(dst + 1), alu_op, reg_b(src + 1));
1299
1300 return 0;
1301}
1302
1303static int
1304wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
f036ebd9 1305 enum alu_op alu_op)
cd7df56e
JK
1306{
1307 const struct bpf_insn *insn = &meta->insn;
1308
cd7df56e
JK
1309 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
1310 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1311
1312 return 0;
1313}
1314
1315static int
1316wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1317 enum alu_op alu_op)
1318{
1319 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1320
1321 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1322 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1323
1324 return 0;
1325}
1326
1327static void
1328wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
1329 enum br_mask br_mask, u16 off)
1330{
1331 emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
1332 emit_br(nfp_prog, br_mask, off, 0);
1333}
1334
1335static int
1336wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1337 enum alu_op alu_op, enum br_mask br_mask)
1338{
1339 const struct bpf_insn *insn = &meta->insn;
1340
cd7df56e
JK
1341 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
1342 insn->src_reg * 2, br_mask, insn->off);
46144839
JW
1343 if (is_mbpf_jmp64(meta))
1344 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1345 insn->src_reg * 2 + 1, br_mask, insn->off);
cd7df56e
JK
1346
1347 return 0;
1348}
1349
61dd8f00
JK
1350static const struct jmp_code_map {
1351 enum br_mask br_mask;
1352 bool swap;
1353} jmp_code_map[] = {
1354 [BPF_JGT >> 4] = { BR_BLO, true },
1355 [BPF_JGE >> 4] = { BR_BHS, false },
1356 [BPF_JLT >> 4] = { BR_BLO, false },
1357 [BPF_JLE >> 4] = { BR_BHS, true },
1358 [BPF_JSGT >> 4] = { BR_BLT, true },
1359 [BPF_JSGE >> 4] = { BR_BGE, false },
1360 [BPF_JSLT >> 4] = { BR_BLT, false },
1361 [BPF_JSLE >> 4] = { BR_BGE, true },
1362};
1363
1364static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
1365{
1366 unsigned int op;
1367
1368 op = BPF_OP(meta->insn.code) >> 4;
1369 /* br_mask of 0 is BR_BEQ which we don't use in jump code table */
1370 if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
1371 !jmp_code_map[op].br_mask,
1372 "no code found for jump instruction"))
1373 return NULL;
1374
1375 return &jmp_code_map[op];
1376}
1377
1378static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
cd7df56e
JK
1379{
1380 const struct bpf_insn *insn = &meta->insn;
1381 u64 imm = insn->imm; /* sign extend */
61dd8f00 1382 const struct jmp_code_map *code;
7bdc97be 1383 enum alu_op alu_op, carry_op;
cd7df56e 1384 u8 reg = insn->dst_reg * 2;
b3f868df 1385 swreg tmp_reg;
cd7df56e 1386
61dd8f00
JK
1387 code = nfp_jmp_code_get(meta);
1388 if (!code)
1389 return -EINVAL;
1390
7bdc97be
JK
1391 alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
1392 carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
1393
cd7df56e 1394 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
61dd8f00 1395 if (!code->swap)
7bdc97be 1396 emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
cd7df56e 1397 else
7bdc97be 1398 emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
cd7df56e 1399
46144839
JW
1400 if (is_mbpf_jmp64(meta)) {
1401 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1402 if (!code->swap)
1403 emit_alu(nfp_prog, reg_none(),
1404 reg_a(reg + 1), carry_op, tmp_reg);
1405 else
1406 emit_alu(nfp_prog, reg_none(),
1407 tmp_reg, carry_op, reg_a(reg + 1));
1408 }
cd7df56e 1409
61dd8f00 1410 emit_br(nfp_prog, code->br_mask, insn->off, 0);
cd7df56e
JK
1411
1412 return 0;
1413}
1414
61dd8f00 1415static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
cd7df56e
JK
1416{
1417 const struct bpf_insn *insn = &meta->insn;
61dd8f00 1418 const struct jmp_code_map *code;
26fa818d
JK
1419 u8 areg, breg;
1420
61dd8f00
JK
1421 code = nfp_jmp_code_get(meta);
1422 if (!code)
1423 return -EINVAL;
1424
26fa818d
JK
1425 areg = insn->dst_reg * 2;
1426 breg = insn->src_reg * 2;
cd7df56e 1427
61dd8f00 1428 if (code->swap) {
cd7df56e
JK
1429 areg ^= breg;
1430 breg ^= areg;
1431 areg ^= breg;
1432 }
1433
1434 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
46144839
JW
1435 if (is_mbpf_jmp64(meta))
1436 emit_alu(nfp_prog, reg_none(),
1437 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
61dd8f00 1438 emit_br(nfp_prog, code->br_mask, insn->off, 0);
cd7df56e
JK
1439
1440 return 0;
1441}
1442
3119d1fd
JK
1443static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1444{
1445 emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1446 SHF_SC_R_ROT, 8);
1447 emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1448 SHF_SC_R_ROT, 16);
1449}
1450
d3d23fdb
JW
1451static void
1452wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
1453 swreg rreg, bool gen_high_half)
1454{
1455 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
1456 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg);
1457 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg);
1458 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg);
1459 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg);
1460 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none());
1461 if (gen_high_half)
1462 emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2,
1463 reg_none());
1464 else
1465 wrp_immed(nfp_prog, dst_hi, 0);
1466}
1467
1468static void
1469wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
1470 swreg rreg)
1471{
1472 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
1473 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg);
1474 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg);
1475 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none());
1476}
1477
1478static int
1479wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1480 bool gen_high_half, bool ropnd_from_reg)
1481{
1482 swreg multiplier, multiplicand, dst_hi, dst_lo;
1483 const struct bpf_insn *insn = &meta->insn;
1484 u32 lopnd_max, ropnd_max;
1485 u8 dst_reg;
1486
1487 dst_reg = insn->dst_reg;
1488 multiplicand = reg_a(dst_reg * 2);
1489 dst_hi = reg_both(dst_reg * 2 + 1);
1490 dst_lo = reg_both(dst_reg * 2);
1491 lopnd_max = meta->umax_dst;
1492 if (ropnd_from_reg) {
1493 multiplier = reg_b(insn->src_reg * 2);
1494 ropnd_max = meta->umax_src;
1495 } else {
1496 u32 imm = insn->imm;
1497
1498 multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1499 ropnd_max = imm;
1500 }
1501 if (lopnd_max > U16_MAX || ropnd_max > U16_MAX)
1502 wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier,
1503 gen_high_half);
1504 else
1505 wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier);
1506
1507 return 0;
1508}
1509
2a952b03
JW
1510static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
1511{
1512 swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
9fb410a8
JW
1513 struct reciprocal_value_adv rvalue;
1514 u8 pre_shift, exp;
2a952b03
JW
1515 swreg magic;
1516
1517 if (imm > U32_MAX) {
1518 wrp_immed(nfp_prog, dst_both, 0);
1519 return 0;
1520 }
1521
9fb410a8
JW
1522 /* NOTE: because we are using "reciprocal_value_adv" which doesn't
1523 * support "divisor > (1u << 31)", we need to JIT separate NFP sequence
1524 * to handle such case which actually equals to the result of unsigned
1525 * comparison "dst >= imm" which could be calculated using the following
1526 * NFP sequence:
1527 *
1528 * alu[--, dst, -, imm]
1529 * immed[imm, 0]
1530 * alu[dst, imm, +carry, 0]
1531 *
1532 */
1533 if (imm > 1U << 31) {
1534 swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1535
1536 emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
1537 wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
1538 emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
1539 reg_imm(0));
1540 return 0;
1541 }
1542
1543 rvalue = reciprocal_value_adv(imm, 32);
1544 exp = rvalue.exp;
1545 if (rvalue.is_wide_m && !(imm & 1)) {
1546 pre_shift = fls(imm & -imm) - 1;
1547 rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
1548 } else {
1549 pre_shift = 0;
1550 }
2a952b03 1551 magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
9fb410a8
JW
1552 if (imm == 1U << exp) {
1553 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1554 SHF_SC_R_SHF, exp);
1555 } else if (rvalue.is_wide_m) {
1556 wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
1557 magic, true);
1558 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
1559 imm_b(nfp_prog));
1560 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1561 SHF_SC_R_SHF, 1);
1562 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
1563 imm_b(nfp_prog));
1564 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1565 SHF_SC_R_SHF, rvalue.sh - 1);
1566 } else {
1567 if (pre_shift)
1568 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
1569 dst_b, SHF_SC_R_SHF, pre_shift);
1570 wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
1571 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
1572 dst_b, SHF_SC_R_SHF, rvalue.sh);
1573 }
2a952b03
JW
1574
1575 return 0;
1576}
1577
0d49eaf4
JK
1578static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1579{
1580 swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
1581 struct nfp_bpf_cap_adjust_head *adjust_head;
1582 u32 ret_einval, end;
1583
1584 adjust_head = &nfp_prog->bpf->adjust_head;
1585
8231f844
JK
1586 /* Optimized version - 5 vs 14 cycles */
1587 if (nfp_prog->adjust_head_location != UINT_MAX) {
1588 if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
1589 return -EINVAL;
1590
1591 emit_alu(nfp_prog, pptr_reg(nfp_prog),
1592 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
1593 emit_alu(nfp_prog, plen_reg(nfp_prog),
1594 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1595 emit_alu(nfp_prog, pv_len(nfp_prog),
1596 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1597
1598 wrp_immed(nfp_prog, reg_both(0), 0);
1599 wrp_immed(nfp_prog, reg_both(1), 0);
1600
1601 /* TODO: when adjust head is guaranteed to succeed we can
1602 * also eliminate the following if (r0 == 0) branch.
1603 */
1604
1605 return 0;
1606 }
1607
0d49eaf4
JK
1608 ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
1609 end = ret_einval + 2;
1610
1611 /* We need to use a temp because offset is just a part of the pkt ptr */
1612 emit_alu(nfp_prog, tmp,
1613 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
1614
1615 /* Validate result will fit within FW datapath constraints */
1616 emit_alu(nfp_prog, reg_none(),
1617 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
1618 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1619 emit_alu(nfp_prog, reg_none(),
1620 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
1621 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1622
1623 /* Validate the length is at least ETH_HLEN */
1624 emit_alu(nfp_prog, tmp_len,
1625 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1626 emit_alu(nfp_prog, reg_none(),
1627 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
1628 emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1629
1630 /* Load the ret code */
1631 wrp_immed(nfp_prog, reg_both(0), 0);
1632 wrp_immed(nfp_prog, reg_both(1), 0);
1633
1634 /* Modify the packet metadata */
1635 emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
1636
1637 /* Skip over the -EINVAL ret code (defer 2) */
2314fe9e 1638 emit_br(nfp_prog, BR_UNC, end, 2);
0d49eaf4
JK
1639
1640 emit_alu(nfp_prog, plen_reg(nfp_prog),
1641 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1642 emit_alu(nfp_prog, pv_len(nfp_prog),
1643 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1644
1645 /* return -EINVAL target */
1646 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1647 return -EINVAL;
1648
1649 wrp_immed(nfp_prog, reg_both(0), -22);
1650 wrp_immed(nfp_prog, reg_both(1), ~0);
1651
1652 if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1653 return -EINVAL;
1654
1655 return 0;
1656}
1657
0c261593
JK
1658static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1659{
1660 u32 ret_einval, end;
1661 swreg plen, delta;
1662
1663 BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN));
1664
1665 plen = imm_a(nfp_prog);
1666 delta = reg_a(2 * 2);
1667
1668 ret_einval = nfp_prog_current_offset(nfp_prog) + 9;
1669 end = nfp_prog_current_offset(nfp_prog) + 11;
1670
1671 /* Calculate resulting length */
1672 emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta);
1673 /* delta == 0 is not allowed by the kernel, add must overflow to make
1674 * length smaller.
1675 */
1676 emit_br(nfp_prog, BR_BCC, ret_einval, 0);
1677
1678 /* if (new_len < 14) then -EINVAL */
1679 emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN));
1680 emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1681
1682 emit_alu(nfp_prog, plen_reg(nfp_prog),
1683 plen_reg(nfp_prog), ALU_OP_ADD, delta);
1684 emit_alu(nfp_prog, pv_len(nfp_prog),
1685 pv_len(nfp_prog), ALU_OP_ADD, delta);
1686
1687 emit_br(nfp_prog, BR_UNC, end, 2);
1688 wrp_immed(nfp_prog, reg_both(0), 0);
1689 wrp_immed(nfp_prog, reg_both(1), 0);
1690
1691 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1692 return -EINVAL;
1693
1694 wrp_immed(nfp_prog, reg_both(0), -22);
1695 wrp_immed(nfp_prog, reg_both(1), ~0);
1696
1697 if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1698 return -EINVAL;
1699
1700 return 0;
1701}
1702
77a3d311 1703static int
fc448497 1704map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
77a3d311 1705{
77a3d311
JK
1706 bool load_lm_ptr;
1707 u32 ret_tgt;
1708 s64 lm_off;
77a3d311
JK
1709
1710 /* We only have to reload LM0 if the key is not at start of stack */
1a7e62e6 1711 lm_off = nfp_prog->stack_frame_depth;
2f46e0c1
JK
1712 lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
1713 load_lm_ptr = meta->arg2.var_off || lm_off;
77a3d311
JK
1714
1715 /* Set LM0 to start of key */
1716 if (load_lm_ptr)
1717 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
44d65a47
JK
1718 if (meta->func_id == BPF_FUNC_map_update_elem)
1719 emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
77a3d311 1720
fc448497 1721 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
77a3d311
JK
1722 2, RELO_BR_HELPER);
1723 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1724
1725 /* Load map ID into A0 */
b4264c96 1726 wrp_mov(nfp_prog, reg_a(0), reg_a(2));
77a3d311
JK
1727
1728 /* Load the return address into B0 */
1729 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1730
1731 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1732 return -EINVAL;
1733
1734 /* Reset the LM0 pointer */
1735 if (!load_lm_ptr)
1736 return 0;
1737
9c9e5323 1738 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
77a3d311
JK
1739 wrp_nops(nfp_prog, 3);
1740
1741 return 0;
1742}
1743
df4a37d8
JK
1744static int
1745nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1746{
1747 __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
1748 /* CSR value is read in following immed[gpr, 0] */
1749 emit_immed(nfp_prog, reg_both(0), 0,
1750 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1751 emit_immed(nfp_prog, reg_both(1), 0,
1752 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1753 return 0;
1754}
1755
9816dd35
JK
1756static int
1757nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1758{
1759 swreg ptr_type;
1760 u32 ret_tgt;
1761
1762 ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
1763
1764 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
1765
1766 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1767 2, RELO_BR_HELPER);
1768
1769 /* Load ptr type into A1 */
1770 wrp_mov(nfp_prog, reg_a(1), ptr_type);
1771
1772 /* Load the return address into B0 */
1773 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1774
1775 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1776 return -EINVAL;
1777
1778 return 0;
1779}
1780
d985888f
JK
1781static int
1782nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1783{
1784 u32 jmp_tgt;
1785
1786 jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5;
1787
1788 /* Make sure the queue id fits into FW field */
1789 emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2),
1790 ALU_OP_AND_NOT_B, reg_imm(0xff));
1791 emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2);
1792
1793 /* Set the 'queue selected' bit and the queue value */
1794 emit_shf(nfp_prog, pv_qsel_set(nfp_prog),
1795 pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1),
1796 SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT);
1797 emit_ld_field(nfp_prog,
1798 pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2),
1799 SHF_SC_NONE, 0);
1800 /* Delay slots end here, we will jump over next instruction if queue
1801 * value fits into the field.
1802 */
1803 emit_ld_field(nfp_prog,
1804 pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX),
1805 SHF_SC_NONE, 0);
1806
1807 if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt))
1808 return -EINVAL;
1809
1810 return 0;
1811}
1812
cd7df56e
JK
1813/* --- Callbacks --- */
1814static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1815{
1816 const struct bpf_insn *insn = &meta->insn;
b14157ee
JK
1817 u8 dst = insn->dst_reg * 2;
1818 u8 src = insn->src_reg * 2;
1819
1820 if (insn->src_reg == BPF_REG_10) {
1821 swreg stack_depth_reg;
cd7df56e 1822
b14157ee 1823 stack_depth_reg = ur_load_imm_any(nfp_prog,
1a7e62e6 1824 nfp_prog->stack_frame_depth,
b14157ee 1825 stack_imm(nfp_prog));
1a7e62e6
QM
1826 emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog),
1827 ALU_OP_ADD, stack_depth_reg);
b14157ee
JK
1828 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1829 } else {
1830 wrp_reg_mov(nfp_prog, dst, src);
1831 wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1832 }
cd7df56e
JK
1833
1834 return 0;
1835}
1836
1837static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1838{
1839 u64 imm = meta->insn.imm; /* sign extend */
1840
1841 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1842 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1843
1844 return 0;
1845}
1846
1847static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1848{
1849 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1850}
1851
1852static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1853{
1854 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1855}
1856
1857static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1858{
1859 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1860}
1861
1862static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1863{
1864 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1865}
1866
1867static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1868{
1869 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1870}
1871
1872static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1873{
1874 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1875}
1876
1877static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1878{
1879 const struct bpf_insn *insn = &meta->insn;
1880
1881 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1882 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1883 reg_b(insn->src_reg * 2));
1884 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1885 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1886 reg_b(insn->src_reg * 2 + 1));
1887
1888 return 0;
1889}
1890
1891static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1892{
1893 const struct bpf_insn *insn = &meta->insn;
1894 u64 imm = insn->imm; /* sign extend */
1895
1896 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1897 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1898
1899 return 0;
1900}
1901
1902static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1903{
1904 const struct bpf_insn *insn = &meta->insn;
1905
1906 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1907 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1908 reg_b(insn->src_reg * 2));
1909 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1910 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1911 reg_b(insn->src_reg * 2 + 1));
1912
1913 return 0;
1914}
1915
1916static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1917{
1918 const struct bpf_insn *insn = &meta->insn;
1919 u64 imm = insn->imm; /* sign extend */
1920
1921 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1922 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1923
1924 return 0;
1925}
1926
d3d23fdb
JW
1927static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1928{
1929 return wrp_mul(nfp_prog, meta, true, true);
1930}
1931
1932static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1933{
1934 return wrp_mul(nfp_prog, meta, true, false);
1935}
1936
2a952b03
JW
1937static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1938{
1939 const struct bpf_insn *insn = &meta->insn;
1940
1941 return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm);
1942}
1943
1944static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1945{
1946 /* NOTE: verifier hook has rejected cases for which verifier doesn't
1947 * know whether the source operand is constant or not.
1948 */
1949 return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src);
1950}
1951
254ef4d7
JW
1952static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1953{
1954 const struct bpf_insn *insn = &meta->insn;
1955
1956 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1957 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1958 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1959 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1960
1961 return 0;
1962}
1963
991f5b36
JW
1964/* Pseudo code:
1965 * if shift_amt >= 32
1966 * dst_high = dst_low << shift_amt[4:0]
1967 * dst_low = 0;
1968 * else
1969 * dst_high = (dst_high, dst_low) >> (32 - shift_amt)
1970 * dst_low = dst_low << shift_amt
1971 *
1972 * The indirect shift will use the same logic at runtime.
1973 */
1974static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
1975{
db0a4b3b
JW
1976 if (!shift_amt)
1977 return 0;
1978
991f5b36
JW
1979 if (shift_amt < 32) {
1980 emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
1981 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
1982 32 - shift_amt);
1983 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
1984 reg_b(dst), SHF_SC_L_SHF, shift_amt);
1985 } else if (shift_amt == 32) {
3cae1319
JK
1986 wrp_reg_mov(nfp_prog, dst + 1, dst);
1987 wrp_immed(nfp_prog, reg_both(dst), 0);
991f5b36
JW
1988 } else if (shift_amt > 32) {
1989 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
1990 reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
3cae1319
JK
1991 wrp_immed(nfp_prog, reg_both(dst), 0);
1992 }
cd7df56e
JK
1993
1994 return 0;
1995}
1996
991f5b36 1997static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
cd7df56e
JK
1998{
1999 const struct bpf_insn *insn = &meta->insn;
3cae1319
JK
2000 u8 dst = insn->dst_reg * 2;
2001
991f5b36
JW
2002 return __shl_imm64(nfp_prog, dst, insn->imm);
2003}
2004
2005static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2006{
2007 emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
2008 reg_b(src));
2009 emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
2010 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
2011 reg_b(dst), SHF_SC_R_DSHF);
2012}
2013
2014/* NOTE: for indirect left shift, HIGH part should be calculated first. */
2015static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2016{
2017 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2018 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2019 reg_b(dst), SHF_SC_L_SHF);
2020}
2021
2022static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2023{
2024 shl_reg64_lt32_high(nfp_prog, dst, src);
2025 shl_reg64_lt32_low(nfp_prog, dst, src);
2026}
2027
2028static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2029{
2030 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2031 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2032 reg_b(dst), SHF_SC_L_SHF);
2033 wrp_immed(nfp_prog, reg_both(dst), 0);
2034}
2035
2036static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2037{
2038 const struct bpf_insn *insn = &meta->insn;
2039 u64 umin, umax;
2040 u8 dst, src;
2041
2042 dst = insn->dst_reg * 2;
662c5472
JW
2043 umin = meta->umin_src;
2044 umax = meta->umax_src;
991f5b36
JW
2045 if (umin == umax)
2046 return __shl_imm64(nfp_prog, dst, umin);
2047
2048 src = insn->src_reg * 2;
2049 if (umax < 32) {
2050 shl_reg64_lt32(nfp_prog, dst, src);
2051 } else if (umin >= 32) {
2052 shl_reg64_ge32(nfp_prog, dst, src);
2053 } else {
2054 /* Generate different instruction sequences depending on runtime
2055 * value of shift amount.
2056 */
2057 u16 label_ge32, label_end;
2058
2059 label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
2060 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2061
2062 shl_reg64_lt32_high(nfp_prog, dst, src);
2063 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2064 emit_br(nfp_prog, BR_UNC, label_end, 2);
2065 /* shl_reg64_lt32_low packed in delay slot. */
2066 shl_reg64_lt32_low(nfp_prog, dst, src);
2067
2068 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2069 return -EINVAL;
2070 shl_reg64_ge32(nfp_prog, dst, src);
2071
2072 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2073 return -EINVAL;
2074 }
2075
2076 return 0;
2077}
2078
2079/* Pseudo code:
2080 * if shift_amt >= 32
2081 * dst_high = 0;
2082 * dst_low = dst_high >> shift_amt[4:0]
2083 * else
2084 * dst_high = dst_high >> shift_amt
2085 * dst_low = (dst_high, dst_low) >> shift_amt
2086 *
2087 * The indirect shift will use the same logic at runtime.
2088 */
2089static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2090{
db0a4b3b
JW
2091 if (!shift_amt)
2092 return 0;
2093
991f5b36
JW
2094 if (shift_amt < 32) {
2095 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2096 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
2097 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2098 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
2099 } else if (shift_amt == 32) {
3cae1319
JK
2100 wrp_reg_mov(nfp_prog, dst, dst + 1);
2101 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
991f5b36
JW
2102 } else if (shift_amt > 32) {
2103 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2104 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
3cae1319
JK
2105 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2106 }
cd7df56e
JK
2107
2108 return 0;
2109}
2110
991f5b36
JW
2111static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2112{
2113 const struct bpf_insn *insn = &meta->insn;
2114 u8 dst = insn->dst_reg * 2;
2115
2116 return __shr_imm64(nfp_prog, dst, insn->imm);
2117}
2118
2119/* NOTE: for indirect right shift, LOW part should be calculated first. */
2120static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2121{
2122 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2123 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2124 reg_b(dst + 1), SHF_SC_R_SHF);
2125}
2126
2127static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2128{
2129 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2130 emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2131 reg_b(dst), SHF_SC_R_DSHF);
2132}
2133
2134static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2135{
2136 shr_reg64_lt32_low(nfp_prog, dst, src);
2137 shr_reg64_lt32_high(nfp_prog, dst, src);
2138}
2139
2140static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2141{
2142 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2143 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2144 reg_b(dst + 1), SHF_SC_R_SHF);
2145 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2146}
2147
2148static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2149{
2150 const struct bpf_insn *insn = &meta->insn;
2151 u64 umin, umax;
2152 u8 dst, src;
2153
2154 dst = insn->dst_reg * 2;
662c5472
JW
2155 umin = meta->umin_src;
2156 umax = meta->umax_src;
991f5b36
JW
2157 if (umin == umax)
2158 return __shr_imm64(nfp_prog, dst, umin);
2159
2160 src = insn->src_reg * 2;
2161 if (umax < 32) {
2162 shr_reg64_lt32(nfp_prog, dst, src);
2163 } else if (umin >= 32) {
2164 shr_reg64_ge32(nfp_prog, dst, src);
2165 } else {
2166 /* Generate different instruction sequences depending on runtime
2167 * value of shift amount.
2168 */
2169 u16 label_ge32, label_end;
2170
2171 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
2172 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2173 shr_reg64_lt32_low(nfp_prog, dst, src);
2174 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2175 emit_br(nfp_prog, BR_UNC, label_end, 2);
2176 /* shr_reg64_lt32_high packed in delay slot. */
2177 shr_reg64_lt32_high(nfp_prog, dst, src);
2178
2179 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2180 return -EINVAL;
2181 shr_reg64_ge32(nfp_prog, dst, src);
2182
2183 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2184 return -EINVAL;
2185 }
2186
2187 return 0;
2188}
2189
f43d0f17
JW
2190/* Code logic is the same as __shr_imm64 except ashr requires signedness bit
2191 * told through PREV_ALU result.
2192 */
c217abcc 2193static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
f43d0f17 2194{
db0a4b3b
JW
2195 if (!shift_amt)
2196 return 0;
2197
c217abcc 2198 if (shift_amt < 32) {
f43d0f17 2199 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
c217abcc 2200 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
f43d0f17
JW
2201 /* Set signedness bit. */
2202 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
2203 reg_imm(0));
2204 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
c217abcc
JW
2205 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
2206 } else if (shift_amt == 32) {
f43d0f17
JW
2207 /* NOTE: this also helps setting signedness bit. */
2208 wrp_reg_mov(nfp_prog, dst, dst + 1);
2209 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2210 reg_b(dst + 1), SHF_SC_R_SHF, 31);
c217abcc 2211 } else if (shift_amt > 32) {
f43d0f17
JW
2212 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
2213 reg_imm(0));
2214 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
c217abcc 2215 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
f43d0f17
JW
2216 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2217 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2218 }
2219
2220 return 0;
2221}
2222
c217abcc
JW
2223static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2224{
2225 const struct bpf_insn *insn = &meta->insn;
2226 u8 dst = insn->dst_reg * 2;
2227
2228 return __ashr_imm64(nfp_prog, dst, insn->imm);
2229}
2230
2231static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2232{
2233 /* NOTE: the first insn will set both indirect shift amount (source A)
2234 * and signedness bit (MSB of result).
2235 */
2236 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
2237 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2238 reg_b(dst + 1), SHF_SC_R_SHF);
2239}
2240
2241static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2242{
2243 /* NOTE: it is the same as logic shift because we don't need to shift in
2244 * signedness bit when the shift amount is less than 32.
2245 */
2246 return shr_reg64_lt32_low(nfp_prog, dst, src);
2247}
2248
2249static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2250{
2251 ashr_reg64_lt32_low(nfp_prog, dst, src);
2252 ashr_reg64_lt32_high(nfp_prog, dst, src);
2253}
2254
2255static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2256{
2257 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
2258 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2259 reg_b(dst + 1), SHF_SC_R_SHF);
2260 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2261 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2262}
2263
2264/* Like ashr_imm64, but need to use indirect shift. */
2265static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2266{
2267 const struct bpf_insn *insn = &meta->insn;
2268 u64 umin, umax;
2269 u8 dst, src;
2270
2271 dst = insn->dst_reg * 2;
662c5472
JW
2272 umin = meta->umin_src;
2273 umax = meta->umax_src;
c217abcc
JW
2274 if (umin == umax)
2275 return __ashr_imm64(nfp_prog, dst, umin);
2276
2277 src = insn->src_reg * 2;
2278 if (umax < 32) {
2279 ashr_reg64_lt32(nfp_prog, dst, src);
2280 } else if (umin >= 32) {
2281 ashr_reg64_ge32(nfp_prog, dst, src);
2282 } else {
2283 u16 label_ge32, label_end;
2284
2285 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
2286 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2287 ashr_reg64_lt32_low(nfp_prog, dst, src);
2288 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2289 emit_br(nfp_prog, BR_UNC, label_end, 2);
2290 /* ashr_reg64_lt32_high packed in delay slot. */
2291 ashr_reg64_lt32_high(nfp_prog, dst, src);
2292
2293 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2294 return -EINVAL;
2295 ashr_reg64_ge32(nfp_prog, dst, src);
2296
2297 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2298 return -EINVAL;
2299 }
2300
2301 return 0;
2302}
2303
cd7df56e
JK
2304static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2305{
2306 const struct bpf_insn *insn = &meta->insn;
2307
2308 wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
2309 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2310
2311 return 0;
2312}
2313
2314static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2315{
2316 const struct bpf_insn *insn = &meta->insn;
2317
2318 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
2319 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2320
2321 return 0;
2322}
2323
2324static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2325{
2326 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
2327}
2328
2329static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2330{
f036ebd9 2331 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR);
cd7df56e
JK
2332}
2333
2334static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2335{
2336 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
2337}
2338
2339static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2340{
f036ebd9 2341 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND);
cd7df56e
JK
2342}
2343
2344static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2345{
2346 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
2347}
2348
2349static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2350{
f036ebd9 2351 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR);
cd7df56e
JK
2352}
2353
2354static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2355{
2356 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
2357}
2358
2359static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2360{
f036ebd9 2361 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD);
cd7df56e
JK
2362}
2363
2364static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2365{
2366 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
2367}
2368
2369static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2370{
f036ebd9 2371 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB);
cd7df56e
JK
2372}
2373
d3d23fdb
JW
2374static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2375{
2376 return wrp_mul(nfp_prog, meta, false, true);
2377}
2378
2379static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2380{
2381 return wrp_mul(nfp_prog, meta, false, false);
2382}
2383
2a952b03
JW
2384static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2385{
2386 return div_reg64(nfp_prog, meta);
2387}
2388
2389static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2390{
2391 return div_imm64(nfp_prog, meta);
2392}
2393
254ef4d7
JW
2394static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2395{
2396 u8 dst = meta->insn.dst_reg * 2;
2397
2398 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
2399 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2400
2401 return 0;
2402}
2403
84708c13
JW
2404static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2405{
db0a4b3b
JW
2406 if (shift_amt) {
2407 /* Set signedness bit (MSB of result). */
2408 emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR,
2409 reg_imm(0));
2410 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2411 reg_b(dst), SHF_SC_R_SHF, shift_amt);
2412 }
84708c13
JW
2413 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2414
2415 return 0;
2416}
2417
2418static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2419{
2420 const struct bpf_insn *insn = &meta->insn;
2421 u64 umin, umax;
2422 u8 dst, src;
2423
2424 dst = insn->dst_reg * 2;
2425 umin = meta->umin_src;
2426 umax = meta->umax_src;
2427 if (umin == umax)
2428 return __ashr_imm(nfp_prog, dst, umin);
2429
2430 src = insn->src_reg * 2;
2431 /* NOTE: the first insn will set both indirect shift amount (source A)
2432 * and signedness bit (MSB of result).
2433 */
2434 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst));
2435 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2436 reg_b(dst), SHF_SC_R_SHF);
2437 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2438
2439 return 0;
2440}
2441
2442static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2443{
2444 const struct bpf_insn *insn = &meta->insn;
2445 u8 dst = insn->dst_reg * 2;
2446
2447 return __ashr_imm(nfp_prog, dst, insn->imm);
2448}
2449
ac7a1717
JW
2450static int __shr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2451{
2452 if (shift_amt)
2453 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2454 reg_b(dst), SHF_SC_R_SHF, shift_amt);
2455 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2456 return 0;
2457}
2458
2459static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2460{
2461 const struct bpf_insn *insn = &meta->insn;
2462 u8 dst = insn->dst_reg * 2;
2463
2464 return __shr_imm(nfp_prog, dst, insn->imm);
2465}
2466
2467static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2468{
2469 const struct bpf_insn *insn = &meta->insn;
2470 u64 umin, umax;
2471 u8 dst, src;
2472
2473 dst = insn->dst_reg * 2;
2474 umin = meta->umin_src;
2475 umax = meta->umax_src;
2476 if (umin == umax)
2477 return __shr_imm(nfp_prog, dst, umin);
2478
2479 src = insn->src_reg * 2;
2480 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2481 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2482 reg_b(dst), SHF_SC_R_SHF);
2483 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2484 return 0;
2485}
2486
2487static int __shl_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2488{
2489 if (shift_amt)
2490 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2491 reg_b(dst), SHF_SC_L_SHF, shift_amt);
2492 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2493 return 0;
2494}
2495
cd7df56e
JK
2496static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2497{
2498 const struct bpf_insn *insn = &meta->insn;
ac7a1717 2499 u8 dst = insn->dst_reg * 2;
cd7df56e 2500
ac7a1717
JW
2501 return __shl_imm(nfp_prog, dst, insn->imm);
2502}
cd7df56e 2503
ac7a1717
JW
2504static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2505{
2506 const struct bpf_insn *insn = &meta->insn;
2507 u64 umin, umax;
2508 u8 dst, src;
2509
2510 dst = insn->dst_reg * 2;
2511 umin = meta->umin_src;
2512 umax = meta->umax_src;
2513 if (umin == umax)
2514 return __shl_imm(nfp_prog, dst, umin);
2515
2516 src = insn->src_reg * 2;
2517 shl_reg64_lt32_low(nfp_prog, dst, src);
2518 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
cd7df56e
JK
2519 return 0;
2520}
2521
3119d1fd
JK
2522static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2523{
2524 const struct bpf_insn *insn = &meta->insn;
2525 u8 gpr = insn->dst_reg * 2;
2526
2527 switch (insn->imm) {
2528 case 16:
2529 emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
2530 SHF_SC_R_ROT, 8);
2531 emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
2532 SHF_SC_R_SHF, 16);
2533
2534 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
2535 break;
2536 case 32:
2537 wrp_end32(nfp_prog, reg_a(gpr), gpr);
2538 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
2539 break;
2540 case 64:
2541 wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
2542
2543 wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
2544 wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
2545 break;
2546 }
2547
2548 return 0;
2549}
2550
cd7df56e
JK
2551static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2552{
9f16c8ab
JK
2553 struct nfp_insn_meta *prev = nfp_meta_prev(meta);
2554 u32 imm_lo, imm_hi;
2555 u8 dst;
2556
2557 dst = prev->insn.dst_reg * 2;
2558 imm_lo = prev->insn.imm;
2559 imm_hi = meta->insn.imm;
2560
2561 wrp_immed(nfp_prog, reg_both(dst), imm_lo);
2562
2563 /* mov is always 1 insn, load imm may be two, so try to use mov */
2564 if (imm_hi == imm_lo)
2565 wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
2566 else
2567 wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
cd7df56e
JK
2568
2569 return 0;
2570}
2571
2572static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2573{
cd7df56e 2574 meta->double_cb = imm_ld8_part2;
cd7df56e
JK
2575 return 0;
2576}
2577
2578static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2579{
2580 return construct_data_ld(nfp_prog, meta->insn.imm, 1);
2581}
2582
2583static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2584{
2585 return construct_data_ld(nfp_prog, meta->insn.imm, 2);
2586}
2587
2588static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2589{
2590 return construct_data_ld(nfp_prog, meta->insn.imm, 4);
2591}
2592
2593static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2594{
2595 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
0a793977 2596 meta->insn.src_reg * 2, 1);
cd7df56e
JK
2597}
2598
2599static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2600{
2601 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
0a793977 2602 meta->insn.src_reg * 2, 2);
cd7df56e
JK
2603}
2604
2605static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2606{
2607 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
0a793977 2608 meta->insn.src_reg * 2, 4);
cd7df56e
JK
2609}
2610
a82b23fb
JK
2611static int
2612mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
d3488480 2613 unsigned int size, unsigned int ptr_off)
a82b23fb 2614{
d3488480 2615 return mem_op_stack(nfp_prog, meta, size, ptr_off,
b14157ee
JK
2616 meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
2617 true, wrp_lmem_load);
a82b23fb
JK
2618}
2619
943c57b9
JK
2620static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2621 u8 size)
cd7df56e 2622{
bfddbc8a
JK
2623 swreg dst = reg_both(meta->insn.dst_reg * 2);
2624
943c57b9 2625 switch (meta->insn.off) {
c6c580d7
JK
2626 case offsetof(struct __sk_buff, len):
2627 if (size != FIELD_SIZEOF(struct __sk_buff, len))
943c57b9 2628 return -EOPNOTSUPP;
bfddbc8a
JK
2629 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
2630 break;
c6c580d7
JK
2631 case offsetof(struct __sk_buff, data):
2632 if (size != FIELD_SIZEOF(struct __sk_buff, data))
bfddbc8a
JK
2633 return -EOPNOTSUPP;
2634 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
2635 break;
c6c580d7
JK
2636 case offsetof(struct __sk_buff, data_end):
2637 if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
bfddbc8a
JK
2638 return -EOPNOTSUPP;
2639 emit_alu(nfp_prog, dst,
2640 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
943c57b9
JK
2641 break;
2642 default:
46c50518 2643 return -EOPNOTSUPP;
943c57b9
JK
2644 }
2645
2646 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
cd7df56e 2647
6d677075
JK
2648 return 0;
2649}
2650
943c57b9
JK
2651static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2652 u8 size)
6d677075 2653{
b3f868df 2654 swreg dst = reg_both(meta->insn.dst_reg * 2);
6d677075 2655
943c57b9 2656 switch (meta->insn.off) {
c6c580d7
JK
2657 case offsetof(struct xdp_md, data):
2658 if (size != FIELD_SIZEOF(struct xdp_md, data))
2659 return -EOPNOTSUPP;
943c57b9
JK
2660 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
2661 break;
c6c580d7
JK
2662 case offsetof(struct xdp_md, data_end):
2663 if (size != FIELD_SIZEOF(struct xdp_md, data_end))
2664 return -EOPNOTSUPP;
943c57b9
JK
2665 emit_alu(nfp_prog, dst,
2666 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
2667 break;
2668 default:
2669 return -EOPNOTSUPP;
2670 }
6d677075 2671
943c57b9 2672 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
6cd80b55 2673
cd7df56e
JK
2674 return 0;
2675}
2676
2ca71441
JK
2677static int
2678mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2679 unsigned int size)
2680{
2681 swreg tmp_reg;
2682
2683 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2684
3dd43c33
JK
2685 return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
2686 tmp_reg, meta->insn.dst_reg * 2, size);
2687}
2688
2689static int
2690mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2691 unsigned int size)
2692{
2693 swreg tmp_reg;
2694
2695 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2696
2697 return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
2698 tmp_reg, meta->insn.dst_reg * 2, size);
2ca71441
JK
2699}
2700
be759237
JW
2701static void
2702mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
2703 struct nfp_insn_meta *meta)
2704{
2705 s16 range_start = meta->pkt_cache.range_start;
2706 s16 range_end = meta->pkt_cache.range_end;
2707 swreg src_base, off;
2708 u8 xfer_num, len;
2709 bool indir;
2710
2711 off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
2712 src_base = reg_a(meta->insn.src_reg * 2);
2713 len = range_end - range_start;
2714 xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
2715
2716 indir = len > 8 * REG_WIDTH;
2717 /* Setup PREV_ALU for indirect mode. */
2718 if (indir)
2719 wrp_immed(nfp_prog, reg_none(),
2720 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
2721
2722 /* Cache memory into transfer-in registers. */
2723 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
b556ddd9 2724 off, xfer_num - 1, CMD_CTX_SWAP, indir);
be759237
JW
2725}
2726
91ff69e8
JW
2727static int
2728mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
2729 struct nfp_insn_meta *meta,
2730 unsigned int size)
2731{
2732 s16 range_start = meta->pkt_cache.range_start;
2733 s16 insn_off = meta->insn.off - range_start;
2734 swreg dst_lo, dst_hi, src_lo, src_mid;
2735 u8 dst_gpr = meta->insn.dst_reg * 2;
2736 u8 len_lo = size, len_mid = 0;
2737 u8 idx = insn_off / REG_WIDTH;
2738 u8 off = insn_off % REG_WIDTH;
2739
2740 dst_hi = reg_both(dst_gpr + 1);
2741 dst_lo = reg_both(dst_gpr);
2742 src_lo = reg_xfer(idx);
2743
2744 /* The read length could involve as many as three registers. */
2745 if (size > REG_WIDTH - off) {
2746 /* Calculate the part in the second register. */
2747 len_lo = REG_WIDTH - off;
2748 len_mid = size - len_lo;
2749
2750 /* Calculate the part in the third register. */
2751 if (size > 2 * REG_WIDTH - off)
2752 len_mid = REG_WIDTH;
2753 }
2754
2755 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
2756
2757 if (!len_mid) {
2758 wrp_immed(nfp_prog, dst_hi, 0);
2759 return 0;
2760 }
2761
2762 src_mid = reg_xfer(idx + 1);
2763
2764 if (size <= REG_WIDTH) {
2765 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
2766 wrp_immed(nfp_prog, dst_hi, 0);
2767 } else {
2768 swreg src_hi = reg_xfer(idx + 2);
2769
2770 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
2771 REG_WIDTH - len_lo, len_lo);
2772 wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
2773 REG_WIDTH - len_lo);
2774 wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
2775 len_lo);
2776 }
2777
2778 return 0;
2779}
2780
be759237
JW
2781static int
2782mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
2783 struct nfp_insn_meta *meta,
2784 unsigned int size)
2785{
2786 swreg dst_lo, dst_hi, src_lo;
2787 u8 dst_gpr, idx;
2788
2789 idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
2790 dst_gpr = meta->insn.dst_reg * 2;
2791 dst_hi = reg_both(dst_gpr + 1);
2792 dst_lo = reg_both(dst_gpr);
2793 src_lo = reg_xfer(idx);
2794
2795 if (size < REG_WIDTH) {
2796 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
2797 wrp_immed(nfp_prog, dst_hi, 0);
2798 } else if (size == REG_WIDTH) {
2799 wrp_mov(nfp_prog, dst_lo, src_lo);
2800 wrp_immed(nfp_prog, dst_hi, 0);
2801 } else {
2802 swreg src_hi = reg_xfer(idx + 1);
2803
2804 wrp_mov(nfp_prog, dst_lo, src_lo);
2805 wrp_mov(nfp_prog, dst_hi, src_hi);
2806 }
2807
2808 return 0;
2809}
2810
2811static int
2812mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
2813 struct nfp_insn_meta *meta, unsigned int size)
2814{
2815 u8 off = meta->insn.off - meta->pkt_cache.range_start;
2816
91ff69e8
JW
2817 if (IS_ALIGNED(off, REG_WIDTH))
2818 return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
be759237 2819
91ff69e8 2820 return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
be759237
JW
2821}
2822
2ca71441
JK
2823static int
2824mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2825 unsigned int size)
2826{
9879a381
JW
2827 if (meta->ldst_gather_len)
2828 return nfp_cpp_memcpy(nfp_prog, meta);
2829
2ca71441 2830 if (meta->ptr.type == PTR_TO_CTX) {
012bb8a8 2831 if (nfp_prog->type == BPF_PROG_TYPE_XDP)
2ca71441
JK
2832 return mem_ldx_xdp(nfp_prog, meta, size);
2833 else
2834 return mem_ldx_skb(nfp_prog, meta, size);
2835 }
2836
be759237
JW
2837 if (meta->ptr.type == PTR_TO_PACKET) {
2838 if (meta->pkt_cache.range_end) {
2839 if (meta->pkt_cache.do_init)
2840 mem_ldx_data_init_pktcache(nfp_prog, meta);
2841
2842 return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
2843 } else {
2844 return mem_ldx_data(nfp_prog, meta, size);
2845 }
2846 }
2ca71441 2847
a82b23fb 2848 if (meta->ptr.type == PTR_TO_STACK)
d3488480
JK
2849 return mem_ldx_stack(nfp_prog, meta, size,
2850 meta->ptr.off + meta->ptr.var_off.value);
3dd43c33
JK
2851
2852 if (meta->ptr.type == PTR_TO_MAP_VALUE)
2853 return mem_ldx_emem(nfp_prog, meta, size);
a82b23fb 2854
2ca71441
JK
2855 return -EOPNOTSUPP;
2856}
2857
2858static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2859{
2860 return mem_ldx(nfp_prog, meta, 1);
2861}
2862
2863static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2864{
2865 return mem_ldx(nfp_prog, meta, 2);
2866}
2867
6d677075
JK
2868static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2869{
2ca71441
JK
2870 return mem_ldx(nfp_prog, meta, 4);
2871}
2872
2873static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2874{
2875 return mem_ldx(nfp_prog, meta, 8);
6d677075
JK
2876}
2877
e663fe38
JK
2878static int
2879mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2880 unsigned int size)
2881{
2882 u64 imm = meta->insn.imm; /* sign extend */
2883 swreg off_reg;
2884
2885 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2886
2887 return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2888 imm, size);
2889}
2890
2891static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2892 unsigned int size)
19d0f54e 2893{
e663fe38
JK
2894 if (meta->ptr.type == PTR_TO_PACKET)
2895 return mem_st_data(nfp_prog, meta, size);
2896
46c50518 2897 return -EOPNOTSUPP;
19d0f54e
JK
2898}
2899
e663fe38
JK
2900static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2901{
2902 return mem_st(nfp_prog, meta, 1);
2903}
2904
2905static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2906{
2907 return mem_st(nfp_prog, meta, 2);
2908}
2909
2910static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2911{
2912 return mem_st(nfp_prog, meta, 4);
2913}
2914
2915static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
6d677075 2916{
e663fe38
JK
2917 return mem_st(nfp_prog, meta, 8);
2918}
2919
2920static int
2921mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2922 unsigned int size)
2923{
2924 swreg off_reg;
2925
2926 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2927
2928 return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2929 meta->insn.src_reg * 2, size);
2930}
2931
ee9133a8
JK
2932static int
2933mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
d3488480 2934 unsigned int size, unsigned int ptr_off)
ee9133a8 2935{
d3488480 2936 return mem_op_stack(nfp_prog, meta, size, ptr_off,
b14157ee
JK
2937 meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
2938 false, wrp_lmem_store);
ee9133a8
JK
2939}
2940
d985888f
JK
2941static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2942{
2943 switch (meta->insn.off) {
2944 case offsetof(struct xdp_md, rx_queue_index):
2945 return nfp_queue_select(nfp_prog, meta);
2946 }
2947
2948 WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */
2949 return -EOPNOTSUPP;
2950}
2951
e663fe38
JK
2952static int
2953mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2954 unsigned int size)
2955{
2956 if (meta->ptr.type == PTR_TO_PACKET)
2957 return mem_stx_data(nfp_prog, meta, size);
2958
ee9133a8 2959 if (meta->ptr.type == PTR_TO_STACK)
d3488480
JK
2960 return mem_stx_stack(nfp_prog, meta, size,
2961 meta->ptr.off + meta->ptr.var_off.value);
ee9133a8 2962
46c50518 2963 return -EOPNOTSUPP;
6d677075
JK
2964}
2965
e663fe38
JK
2966static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2967{
2968 return mem_stx(nfp_prog, meta, 1);
2969}
2970
2971static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2972{
2973 return mem_stx(nfp_prog, meta, 2);
2974}
2975
6d677075
JK
2976static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2977{
d985888f
JK
2978 if (meta->ptr.type == PTR_TO_CTX)
2979 if (nfp_prog->type == BPF_PROG_TYPE_XDP)
2980 return mem_stx_xdp(nfp_prog, meta);
e663fe38
JK
2981 return mem_stx(nfp_prog, meta, 4);
2982}
2ca71441 2983
e663fe38
JK
2984static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2985{
2986 return mem_stx(nfp_prog, meta, 8);
6d677075
JK
2987}
2988
dcb0c27f
JK
2989static int
2990mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
2991{
dcb0c27f
JK
2992 u8 dst_gpr = meta->insn.dst_reg * 2;
2993 u8 src_gpr = meta->insn.src_reg * 2;
41aed09c
JK
2994 unsigned int full_add, out;
2995 swreg addra, addrb, off;
dcb0c27f
JK
2996
2997 off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2998
41aed09c
JK
2999 /* We can fit 16 bits into command immediate, if we know the immediate
3000 * is guaranteed to either always or never fit into 16 bit we only
3001 * generate code to handle that particular case, otherwise generate
3002 * code for both.
3003 */
3004 out = nfp_prog_current_offset(nfp_prog);
3005 full_add = nfp_prog_current_offset(nfp_prog);
3006
3007 if (meta->insn.off) {
3008 out += 2;
3009 full_add += 2;
3010 }
3011 if (meta->xadd_maybe_16bit) {
3012 out += 3;
3013 full_add += 3;
3014 }
3015 if (meta->xadd_over_16bit)
3016 out += 2 + is64;
3017 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
3018 out += 5;
3019 full_add += 5;
3020 }
3021
3022 /* Generate the branch for choosing add_imm vs add */
3023 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
3024 swreg max_imm = imm_a(nfp_prog);
3025
3026 wrp_immed(nfp_prog, max_imm, 0xffff);
3027 emit_alu(nfp_prog, reg_none(),
3028 max_imm, ALU_OP_SUB, reg_b(src_gpr));
3029 emit_alu(nfp_prog, reg_none(),
3030 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
3031 emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
3032 /* defer for add */
3033 }
3034
dcb0c27f
JK
3035 /* If insn has an offset add to the address */
3036 if (!meta->insn.off) {
3037 addra = reg_a(dst_gpr);
3038 addrb = reg_b(dst_gpr + 1);
3039 } else {
3040 emit_alu(nfp_prog, imma_a(nfp_prog),
3041 reg_a(dst_gpr), ALU_OP_ADD, off);
3042 emit_alu(nfp_prog, imma_b(nfp_prog),
3043 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
3044 addra = imma_a(nfp_prog);
3045 addrb = imma_b(nfp_prog);
3046 }
3047
41aed09c
JK
3048 /* Generate the add_imm if 16 bits are possible */
3049 if (meta->xadd_maybe_16bit) {
3050 swreg prev_alu = imm_a(nfp_prog);
3051
3052 wrp_immed(nfp_prog, prev_alu,
3053 FIELD_PREP(CMD_OVE_DATA, 2) |
3054 CMD_OVE_LEN |
3055 FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
3056 wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
3057 emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
3058 addra, addrb, 0, CMD_CTX_NO_SWAP);
3059
3060 if (meta->xadd_over_16bit)
3061 emit_br(nfp_prog, BR_UNC, out, 0);
3062 }
3063
3064 if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
3065 return -EINVAL;
3066
3067 /* Generate the add if 16 bits are not guaranteed */
3068 if (meta->xadd_over_16bit) {
3069 emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
3070 addra, addrb, is64 << 2,
3071 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
3072
3073 wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
3074 if (is64)
3075 wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
3076 }
3077
3078 if (!nfp_prog_confirm_current_offset(nfp_prog, out))
3079 return -EINVAL;
dcb0c27f
JK
3080
3081 return 0;
3082}
3083
3084static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3085{
3086 return mem_xadd(nfp_prog, meta, false);
3087}
3088
3089static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3090{
3091 return mem_xadd(nfp_prog, meta, true);
3092}
3093
cd7df56e
JK
3094static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3095{
cd7df56e
JK
3096 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
3097
3098 return 0;
3099}
3100
3101static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3102{
3103 const struct bpf_insn *insn = &meta->insn;
3104 u64 imm = insn->imm; /* sign extend */
b3f868df
JK
3105 swreg or1, or2, tmp_reg;
3106
3107 or1 = reg_a(insn->dst_reg * 2);
3108 or2 = reg_b(insn->dst_reg * 2 + 1);
cd7df56e 3109
cd7df56e
JK
3110 if (imm & ~0U) {
3111 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3112 emit_alu(nfp_prog, imm_a(nfp_prog),
3113 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
3114 or1 = imm_a(nfp_prog);
3115 }
3116
3117 if (imm >> 32) {
3118 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3119 emit_alu(nfp_prog, imm_b(nfp_prog),
3120 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
3121 or2 = imm_b(nfp_prog);
3122 }
3123
3124 emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
3125 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3126
3127 return 0;
3128}
3129
46144839
JW
3130static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3131{
3132 const struct bpf_insn *insn = &meta->insn;
3133 swreg tmp_reg;
3134
3135 tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog));
3136 emit_alu(nfp_prog, reg_none(),
3137 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
3138 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3139
3140 return 0;
3141}
3142
cd7df56e
JK
3143static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3144{
3145 const struct bpf_insn *insn = &meta->insn;
3146 u64 imm = insn->imm; /* sign extend */
4987eacc 3147 u8 dst_gpr = insn->dst_reg * 2;
b3f868df 3148 swreg tmp_reg;
cd7df56e 3149
4987eacc
JK
3150 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3151 emit_alu(nfp_prog, imm_b(nfp_prog),
3152 reg_a(dst_gpr), ALU_OP_AND, tmp_reg);
3153 /* Upper word of the mask can only be 0 or ~0 from sign extension,
3154 * so either ignore it or OR the whole thing in.
3155 */
46144839 3156 if (is_mbpf_jmp64(meta) && imm >> 32) {
cd7df56e 3157 emit_alu(nfp_prog, reg_none(),
4987eacc 3158 reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog));
46144839 3159 }
4987eacc 3160 emit_br(nfp_prog, BR_BNE, insn->off, 0);
cd7df56e
JK
3161
3162 return 0;
3163}
3164
3165static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3166{
3167 const struct bpf_insn *insn = &meta->insn;
3168 u64 imm = insn->imm; /* sign extend */
46144839 3169 bool is_jmp32 = is_mbpf_jmp32(meta);
b3f868df 3170 swreg tmp_reg;
cd7df56e 3171
cd7df56e 3172 if (!imm) {
46144839
JW
3173 if (is_jmp32)
3174 emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE,
3175 reg_b(insn->dst_reg * 2));
3176 else
3177 emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
3178 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
cd7df56e 3179 emit_br(nfp_prog, BR_BNE, insn->off, 0);
82837370 3180 return 0;
cd7df56e
JK
3181 }
3182
3183 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3184 emit_alu(nfp_prog, reg_none(),
3185 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
3186 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3187
46144839
JW
3188 if (is_jmp32)
3189 return 0;
3190
cd7df56e
JK
3191 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3192 emit_alu(nfp_prog, reg_none(),
3193 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
3194 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3195
3196 return 0;
3197}
3198
3199static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3200{
3201 const struct bpf_insn *insn = &meta->insn;
3202
cd7df56e
JK
3203 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
3204 ALU_OP_XOR, reg_b(insn->src_reg * 2));
46144839
JW
3205 if (is_mbpf_jmp64(meta)) {
3206 emit_alu(nfp_prog, imm_b(nfp_prog),
3207 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR,
3208 reg_b(insn->src_reg * 2 + 1));
3209 emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR,
3210 imm_b(nfp_prog));
3211 }
cd7df56e
JK
3212 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3213
3214 return 0;
3215}
3216
cd7df56e
JK
3217static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3218{
3219 return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
3220}
3221
3222static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3223{
3224 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
3225}
3226
389f263b
QM
3227static int
3228bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3229{
bdf4c66f 3230 u32 ret_tgt, stack_depth, offset_br;
389f263b
QM
3231 swreg tmp_reg;
3232
3233 stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN);
3234 /* Space for saving the return address is accounted for by the callee,
3235 * so stack_depth can be zero for the main function.
3236 */
3237 if (stack_depth) {
3238 tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
3239 stack_imm(nfp_prog));
3240 emit_alu(nfp_prog, stack_reg(nfp_prog),
3241 stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg);
3242 emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
3243 NFP_CSR_ACT_LM_ADDR0);
3244 }
3245
44549623
QM
3246 /* Two cases for jumping to the callee:
3247 *
3248 * - If callee uses and needs to save R6~R9 then:
389f263b
QM
3249 * 1. Put the start offset of the callee into imm_b(). This will
3250 * require a fixup step, as we do not necessarily know this
3251 * address yet.
3252 * 2. Put the return address from the callee to the caller into
3253 * register ret_reg().
3254 * 3. (After defer slots are consumed) Jump to the subroutine that
3255 * pushes the registers to the stack.
44549623
QM
3256 * The subroutine acts as a trampoline, and returns to the address in
3257 * imm_b(), i.e. jumps to the callee.
3258 *
3259 * - If callee does not need to save R6~R9 then just load return
3260 * address to the caller in ret_reg(), and jump to the callee
3261 * directly.
389f263b
QM
3262 *
3263 * Using ret_reg() to pass the return address to the callee is set here
3264 * as a convention. The callee can then push this address onto its
3265 * stack frame in its prologue. The advantages of passing the return
3266 * address through ret_reg(), instead of pushing it to the stack right
3267 * here, are the following:
3268 * - It looks cleaner.
3269 * - If the called function is called multiple time, we get a lower
3270 * program size.
3271 * - We save two no-op instructions that should be added just before
3272 * the emit_br() when stack depth is not null otherwise.
3273 * - If we ever find a register to hold the return address during whole
3274 * execution of the callee, we will not have to push the return
3275 * address to the stack for leaf functions.
3276 */
44549623
QM
3277 if (!meta->jmp_dst) {
3278 pr_err("BUG: BPF-to-BPF call has no destination recorded\n");
3279 return -ELOOP;
3280 }
3281 if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) {
3282 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
3283 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2,
3284 RELO_BR_GO_CALL_PUSH_REGS);
3285 offset_br = nfp_prog_current_offset(nfp_prog);
3286 wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL);
3287 } else {
3288 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
e90287f3 3289 emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1);
44549623
QM
3290 offset_br = nfp_prog_current_offset(nfp_prog);
3291 }
389f263b
QM
3292 wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL);
3293
3294 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
3295 return -EINVAL;
3296
3297 if (stack_depth) {
3298 tmp_reg = ur_load_imm_any(nfp_prog, stack_depth,
3299 stack_imm(nfp_prog));
3300 emit_alu(nfp_prog, stack_reg(nfp_prog),
3301 stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
3302 emit_csr_wr(nfp_prog, stack_reg(nfp_prog),
3303 NFP_CSR_ACT_LM_ADDR0);
3304 wrp_nops(nfp_prog, 3);
3305 }
3306
bdf4c66f
QM
3307 meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog);
3308 meta->num_insns_after_br -= offset_br;
3309
389f263b
QM
3310 return 0;
3311}
3312
3313static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2cb230bd
JK
3314{
3315 switch (meta->insn.imm) {
0d49eaf4
JK
3316 case BPF_FUNC_xdp_adjust_head:
3317 return adjust_head(nfp_prog, meta);
0c261593
JK
3318 case BPF_FUNC_xdp_adjust_tail:
3319 return adjust_tail(nfp_prog, meta);
77a3d311 3320 case BPF_FUNC_map_lookup_elem:
44d65a47 3321 case BPF_FUNC_map_update_elem:
bfee64de 3322 case BPF_FUNC_map_delete_elem:
fc448497 3323 return map_call_stack_common(nfp_prog, meta);
df4a37d8
JK
3324 case BPF_FUNC_get_prandom_u32:
3325 return nfp_get_prandom_u32(nfp_prog, meta);
9816dd35
JK
3326 case BPF_FUNC_perf_event_output:
3327 return nfp_perf_event_output(nfp_prog, meta);
2cb230bd
JK
3328 default:
3329 WARN_ONCE(1, "verifier allowed unsupported function\n");
3330 return -EOPNOTSUPP;
3331 }
3332}
3333
389f263b
QM
3334static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3335{
3336 if (is_mbpf_pseudo_call(meta))
3337 return bpf_to_bpf_call(nfp_prog, meta);
3338 else
3339 return helper_call(nfp_prog, meta);
3340}
3341
3342static bool nfp_is_main_function(struct nfp_insn_meta *meta)
3343{
3344 return meta->subprog_idx == 0;
3345}
3346
cd7df56e
JK
3347static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3348{
e84797fe 3349 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
cd7df56e
JK
3350
3351 return 0;
3352}
3353
389f263b
QM
3354static int
3355nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3356{
44549623
QM
3357 if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) {
3358 /* Pop R6~R9 to the stack via related subroutine.
3359 * We loaded the return address to the caller into ret_reg().
3360 * This means that the subroutine does not come back here, we
3361 * make it jump back to the subprogram caller directly!
3362 */
3363 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1,
3364 RELO_BR_GO_CALL_POP_REGS);
3365 /* Pop return address from the stack. */
3366 wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
3367 } else {
3368 /* Pop return address from the stack. */
3369 wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0));
3370 /* Jump back to caller if no callee-saved registers were used
3371 * by the subprogram.
3372 */
3373 emit_rtn(nfp_prog, ret_reg(nfp_prog), 0);
3374 }
389f263b
QM
3375
3376 return 0;
3377}
3378
3379static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3380{
3381 if (nfp_is_main_function(meta))
3382 return goto_out(nfp_prog, meta);
3383 else
3384 return nfp_subprog_epilogue(nfp_prog, meta);
3385}
3386
cd7df56e
JK
3387static const instr_cb_t instr_cb[256] = {
3388 [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64,
3389 [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64,
3390 [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64,
3391 [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64,
3392 [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64,
3393 [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64,
3394 [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64,
3395 [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64,
3396 [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64,
3397 [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64,
3398 [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
3399 [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
d3d23fdb
JW
3400 [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64,
3401 [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64,
2a952b03
JW
3402 [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64,
3403 [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64,
254ef4d7 3404 [BPF_ALU64 | BPF_NEG] = neg_reg64,
991f5b36 3405 [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
cd7df56e 3406 [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
991f5b36 3407 [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64,
cd7df56e 3408 [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
c217abcc 3409 [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
f43d0f17 3410 [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
cd7df56e
JK
3411 [BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
3412 [BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
3413 [BPF_ALU | BPF_XOR | BPF_X] = xor_reg,
3414 [BPF_ALU | BPF_XOR | BPF_K] = xor_imm,
3415 [BPF_ALU | BPF_AND | BPF_X] = and_reg,
3416 [BPF_ALU | BPF_AND | BPF_K] = and_imm,
3417 [BPF_ALU | BPF_OR | BPF_X] = or_reg,
3418 [BPF_ALU | BPF_OR | BPF_K] = or_imm,
3419 [BPF_ALU | BPF_ADD | BPF_X] = add_reg,
3420 [BPF_ALU | BPF_ADD | BPF_K] = add_imm,
3421 [BPF_ALU | BPF_SUB | BPF_X] = sub_reg,
3422 [BPF_ALU | BPF_SUB | BPF_K] = sub_imm,
d3d23fdb
JW
3423 [BPF_ALU | BPF_MUL | BPF_X] = mul_reg,
3424 [BPF_ALU | BPF_MUL | BPF_K] = mul_imm,
2a952b03
JW
3425 [BPF_ALU | BPF_DIV | BPF_X] = div_reg,
3426 [BPF_ALU | BPF_DIV | BPF_K] = div_imm,
254ef4d7 3427 [BPF_ALU | BPF_NEG] = neg_reg,
ac7a1717 3428 [BPF_ALU | BPF_LSH | BPF_X] = shl_reg,
cd7df56e 3429 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm,
ac7a1717
JW
3430 [BPF_ALU | BPF_RSH | BPF_X] = shr_reg,
3431 [BPF_ALU | BPF_RSH | BPF_K] = shr_imm,
84708c13
JW
3432 [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg,
3433 [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm,
3119d1fd 3434 [BPF_ALU | BPF_END | BPF_X] = end_reg32,
cd7df56e
JK
3435 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8,
3436 [BPF_LD | BPF_ABS | BPF_B] = data_ld1,
3437 [BPF_LD | BPF_ABS | BPF_H] = data_ld2,
3438 [BPF_LD | BPF_ABS | BPF_W] = data_ld4,
3439 [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1,
3440 [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2,
3441 [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4,
2ca71441
JK
3442 [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1,
3443 [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2,
cd7df56e 3444 [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4,
2ca71441 3445 [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8,
e663fe38
JK
3446 [BPF_STX | BPF_MEM | BPF_B] = mem_stx1,
3447 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2,
19d0f54e 3448 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4,
e663fe38 3449 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8,
dcb0c27f
JK
3450 [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4,
3451 [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8,
e663fe38
JK
3452 [BPF_ST | BPF_MEM | BPF_B] = mem_st1,
3453 [BPF_ST | BPF_MEM | BPF_H] = mem_st2,
3454 [BPF_ST | BPF_MEM | BPF_W] = mem_st4,
3455 [BPF_ST | BPF_MEM | BPF_DW] = mem_st8,
cd7df56e
JK
3456 [BPF_JMP | BPF_JA | BPF_K] = jump,
3457 [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
61dd8f00
JK
3458 [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm,
3459 [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm,
3460 [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm,
3461 [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm,
3462 [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm,
3463 [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm,
3464 [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm,
3465 [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm,
cd7df56e
JK
3466 [BPF_JMP | BPF_JSET | BPF_K] = jset_imm,
3467 [BPF_JMP | BPF_JNE | BPF_K] = jne_imm,
3468 [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg,
61dd8f00
JK
3469 [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg,
3470 [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg,
3471 [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg,
3472 [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg,
3473 [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg,
3474 [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg,
3475 [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg,
3476 [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg,
cd7df56e
JK
3477 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
3478 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
46144839
JW
3479 [BPF_JMP32 | BPF_JEQ | BPF_K] = jeq32_imm,
3480 [BPF_JMP32 | BPF_JGT | BPF_K] = cmp_imm,
3481 [BPF_JMP32 | BPF_JGE | BPF_K] = cmp_imm,
3482 [BPF_JMP32 | BPF_JLT | BPF_K] = cmp_imm,
3483 [BPF_JMP32 | BPF_JLE | BPF_K] = cmp_imm,
3484 [BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm,
3485 [BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm,
3486 [BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm,
3487 [BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm,
3488 [BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm,
3489 [BPF_JMP32 | BPF_JNE | BPF_K] = jne_imm,
3490 [BPF_JMP32 | BPF_JEQ | BPF_X] = jeq_reg,
3491 [BPF_JMP32 | BPF_JGT | BPF_X] = cmp_reg,
3492 [BPF_JMP32 | BPF_JGE | BPF_X] = cmp_reg,
3493 [BPF_JMP32 | BPF_JLT | BPF_X] = cmp_reg,
3494 [BPF_JMP32 | BPF_JLE | BPF_X] = cmp_reg,
3495 [BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg,
3496 [BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg,
3497 [BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg,
3498 [BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg,
3499 [BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg,
3500 [BPF_JMP32 | BPF_JNE | BPF_X] = jne_reg,
2cb230bd 3501 [BPF_JMP | BPF_CALL] = call,
389f263b 3502 [BPF_JMP | BPF_EXIT] = jmp_exit,
cd7df56e
JK
3503};
3504
cd7df56e 3505/* --- Assembler logic --- */
2178f3f0
QM
3506static int
3507nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
3508 struct nfp_insn_meta *jmp_dst, u32 br_idx)
3509{
3510 if (immed_get_value(nfp_prog->prog[br_idx + 1])) {
3511 pr_err("BUG: failed to fix up callee register saving\n");
3512 return -EINVAL;
3513 }
3514
3515 immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off);
3516
3517 return 0;
3518}
3519
cd7df56e
JK
3520static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
3521{
5b674140 3522 struct nfp_insn_meta *meta, *jmp_dst;
854dc87d 3523 u32 idx, br_idx;
2178f3f0 3524 int err;
cd7df56e 3525
854dc87d 3526 list_for_each_entry(meta, &nfp_prog->insns, l) {
91a87a58 3527 if (meta->flags & FLAG_INSN_SKIP_MASK)
cd7df56e 3528 continue;
46144839 3529 if (!is_mbpf_jmp(meta))
cd7df56e 3530 continue;
bdf4c66f
QM
3531 if (meta->insn.code == (BPF_JMP | BPF_EXIT) &&
3532 !nfp_is_main_function(meta))
3533 continue;
3534 if (is_mbpf_helper_call(meta))
3535 continue;
cd7df56e 3536
5b674140 3537 if (list_is_last(&meta->l, &nfp_prog->insns))
2314fe9e 3538 br_idx = nfp_prog->last_bpf_off;
5b674140 3539 else
2314fe9e 3540 br_idx = list_next_entry(meta, l)->off - 1;
854dc87d 3541
bdf4c66f
QM
3542 /* For BPF-to-BPF function call, a stack adjustment sequence is
3543 * generated after the return instruction. Therefore, we must
3544 * withdraw the length of this sequence to have br_idx pointing
3545 * to where the "branch" NFP instruction is expected to be.
3546 */
3547 if (is_mbpf_pseudo_call(meta))
3548 br_idx -= meta->num_insns_after_br;
3549
cd7df56e
JK
3550 if (!nfp_is_br(nfp_prog->prog[br_idx])) {
3551 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
3552 br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
3553 return -ELOOP;
3554 }
bdf4c66f
QM
3555
3556 if (meta->insn.code == (BPF_JMP | BPF_EXIT))
3557 continue;
3558
cd7df56e 3559 /* Leave special branches for later */
2314fe9e 3560 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
2178f3f0 3561 RELO_BR_REL && !is_mbpf_pseudo_call(meta))
cd7df56e
JK
3562 continue;
3563
5b674140
JW
3564 if (!meta->jmp_dst) {
3565 pr_err("Non-exit jump doesn't have destination info recorded!!\n");
cd7df56e
JK
3566 return -ELOOP;
3567 }
3568
5b674140 3569 jmp_dst = meta->jmp_dst;
cd7df56e 3570
91a87a58 3571 if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) {
cd7df56e
JK
3572 pr_err("Branch landing on removed instruction!!\n");
3573 return -ELOOP;
3574 }
3575
44549623
QM
3576 if (is_mbpf_pseudo_call(meta) &&
3577 nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) {
2178f3f0
QM
3578 err = nfp_fixup_immed_relo(nfp_prog, meta,
3579 jmp_dst, br_idx);
3580 if (err)
3581 return err;
3582 }
3583
3584 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
3585 RELO_BR_REL)
3586 continue;
3587
2314fe9e 3588 for (idx = meta->off; idx <= br_idx; idx++) {
cd7df56e
JK
3589 if (!nfp_is_br(nfp_prog->prog[idx]))
3590 continue;
5b674140 3591 br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
cd7df56e
JK
3592 }
3593 }
3594
cd7df56e
JK
3595 return 0;
3596}
3597
3598static void nfp_intro(struct nfp_prog *nfp_prog)
3599{
18e53b6c
JK
3600 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
3601 emit_alu(nfp_prog, plen_reg(nfp_prog),
3602 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
cd7df56e
JK
3603}
3604
389f263b
QM
3605static void
3606nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3607{
3608 /* Save return address into the stack. */
3609 wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog));
3610}
3611
3612static void
3613nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3614{
3615 unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth;
3616
3617 nfp_prog->stack_frame_depth = round_up(depth, 4);
3618 nfp_subprog_prologue(nfp_prog, meta);
3619}
3620
3621bool nfp_is_subprog_start(struct nfp_insn_meta *meta)
3622{
3623 return meta->flags & FLAG_INSN_IS_SUBPROG_START;
3624}
3625
e3b8baf0
JK
3626static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
3627{
3628 /* TC direct-action mode:
3629 * 0,1 ok NOT SUPPORTED[1]
3630 * 2 drop 0x22 -> drop, count as stat1
3631 * 4,5 nuke 0x02 -> drop
3632 * 7 redir 0x44 -> redir, count as stat2
3633 * * unspec 0x11 -> pass, count as stat0
3634 *
3635 * [1] We can't support OK and RECLASSIFY because we can't tell TC
3636 * the exact decision made. We are forced to support UNSPEC
3637 * to handle aborts so that's the only one we handle for passing
3638 * packets up the stack.
3639 */
3640 /* Target for aborts */
3641 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
3642
e84797fe 3643 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
e3b8baf0 3644
c000dfb5 3645 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
e3b8baf0
JK
3646 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
3647
3648 /* Target for normal exits */
3649 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
3650
3651 /* if R0 > 7 jump to abort */
3652 emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
3653 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
c000dfb5 3654 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
e3b8baf0
JK
3655
3656 wrp_immed(nfp_prog, reg_b(2), 0x41221211);
3657 wrp_immed(nfp_prog, reg_b(3), 0x41001211);
3658
3659 emit_shf(nfp_prog, reg_a(1),
3660 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
3661
3662 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3663 emit_shf(nfp_prog, reg_a(2),
3664 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
3665
3666 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3667 emit_shf(nfp_prog, reg_b(2),
3668 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
3669
e84797fe 3670 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
e3b8baf0
JK
3671
3672 emit_shf(nfp_prog, reg_b(2),
3673 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
3674 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
3675}
3676
6d677075
JK
3677static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
3678{
3679 /* XDP return codes:
3680 * 0 aborted 0x82 -> drop, count as stat3
3681 * 1 drop 0x22 -> drop, count as stat1
3682 * 2 pass 0x11 -> pass, count as stat0
3683 * 3 tx 0x44 -> redir, count as stat2
3684 * * unknown 0x82 -> drop, count as stat3
3685 */
3686 /* Target for aborts */
3687 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
3688
e84797fe 3689 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
6d677075 3690
c000dfb5 3691 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
6d677075
JK
3692 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
3693
3694 /* Target for normal exits */
3695 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
3696
3697 /* if R0 > 3 jump to abort */
3698 emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
3699 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
3700
3701 wrp_immed(nfp_prog, reg_b(2), 0x44112282);
3702
3703 emit_shf(nfp_prog, reg_a(1),
3704 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
3705
3706 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3707 emit_shf(nfp_prog, reg_b(2),
3708 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
3709
e84797fe 3710 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
6d677075 3711
c000dfb5 3712 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
6d677075
JK
3713 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
3714}
3715
44549623
QM
3716static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog)
3717{
3718 unsigned int idx;
3719
3720 for (idx = 1; idx < nfp_prog->subprog_cnt; idx++)
3721 if (nfp_prog->subprog[idx].needs_reg_push)
3722 return true;
3723
3724 return false;
3725}
3726
389f263b
QM
3727static void nfp_push_callee_registers(struct nfp_prog *nfp_prog)
3728{
3729 u8 reg;
3730
3731 /* Subroutine: Save all callee saved registers (R6 ~ R9).
3732 * imm_b() holds the return address.
3733 */
3734 nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog);
3735 for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
3736 u8 adj = (reg - BPF_REG_0) * 2;
3737 u8 idx = (reg - BPF_REG_6) * 2;
3738
3739 /* The first slot in the stack frame is used to push the return
3740 * address in bpf_to_bpf_call(), start just after.
3741 */
3742 wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj));
3743
3744 if (reg == BPF_REG_8)
3745 /* Prepare to jump back, last 3 insns use defer slots */
3746 emit_rtn(nfp_prog, imm_b(nfp_prog), 3);
3747
3748 wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1));
3749 }
3750}
3751
3752static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog)
3753{
3754 u8 reg;
3755
3756 /* Subroutine: Restore all callee saved registers (R6 ~ R9).
3757 * ret_reg() holds the return address.
3758 */
3759 nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog);
3760 for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) {
3761 u8 adj = (reg - BPF_REG_0) * 2;
3762 u8 idx = (reg - BPF_REG_6) * 2;
3763
3764 /* The first slot in the stack frame holds the return address,
3765 * start popping just after that.
3766 */
3767 wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx));
3768
3769 if (reg == BPF_REG_8)
3770 /* Prepare to jump back, last 3 insns use defer slots */
3771 emit_rtn(nfp_prog, ret_reg(nfp_prog), 3);
3772
3773 wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1));
3774 }
3775}
3776
cd7df56e
JK
3777static void nfp_outro(struct nfp_prog *nfp_prog)
3778{
012bb8a8
JK
3779 switch (nfp_prog->type) {
3780 case BPF_PROG_TYPE_SCHED_CLS:
e3b8baf0
JK
3781 nfp_outro_tc_da(nfp_prog);
3782 break;
012bb8a8 3783 case BPF_PROG_TYPE_XDP:
6d677075
JK
3784 nfp_outro_xdp(nfp_prog);
3785 break;
012bb8a8
JK
3786 default:
3787 WARN_ON(1);
cd7df56e 3788 }
389f263b 3789
44549623 3790 if (!nfp_prog_needs_callee_reg_save(nfp_prog))
389f263b
QM
3791 return;
3792
3793 nfp_push_callee_registers(nfp_prog);
3794 nfp_pop_callee_registers(nfp_prog);
cd7df56e
JK
3795}
3796
3797static int nfp_translate(struct nfp_prog *nfp_prog)
3798{
3799 struct nfp_insn_meta *meta;
389f263b 3800 unsigned int depth;
ff42bb9f 3801 int err;
cd7df56e 3802
389f263b
QM
3803 depth = nfp_prog->subprog[0].stack_depth;
3804 nfp_prog->stack_frame_depth = round_up(depth, 4);
3805
cd7df56e
JK
3806 nfp_intro(nfp_prog);
3807 if (nfp_prog->error)
3808 return nfp_prog->error;
3809
3810 list_for_each_entry(meta, &nfp_prog->insns, l) {
3811 instr_cb_t cb = instr_cb[meta->insn.code];
3812
3813 meta->off = nfp_prog_current_offset(nfp_prog);
3814
389f263b
QM
3815 if (nfp_is_subprog_start(meta)) {
3816 nfp_start_subprog(nfp_prog, meta);
3817 if (nfp_prog->error)
3818 return nfp_prog->error;
3819 }
3820
91a87a58 3821 if (meta->flags & FLAG_INSN_SKIP_MASK) {
cd7df56e
JK
3822 nfp_prog->n_translated++;
3823 continue;
3824 }
3825
3826 if (nfp_meta_has_prev(nfp_prog, meta) &&
3827 nfp_meta_prev(meta)->double_cb)
3828 cb = nfp_meta_prev(meta)->double_cb;
3829 if (!cb)
3830 return -ENOENT;
3831 err = cb(nfp_prog, meta);
3832 if (err)
3833 return err;
e8a4796e
JK
3834 if (nfp_prog->error)
3835 return nfp_prog->error;
cd7df56e
JK
3836
3837 nfp_prog->n_translated++;
3838 }
3839
854dc87d
JW
3840 nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
3841
cd7df56e
JK
3842 nfp_outro(nfp_prog);
3843 if (nfp_prog->error)
3844 return nfp_prog->error;
3845
ff42bb9f 3846 wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
1c03e03f
JK
3847 if (nfp_prog->error)
3848 return nfp_prog->error;
3849
cd7df56e
JK
3850 return nfp_fixup_branches(nfp_prog);
3851}
3852
cd7df56e
JK
3853/* --- Optimizations --- */
3854static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
3855{
3856 struct nfp_insn_meta *meta;
3857
3858 list_for_each_entry(meta, &nfp_prog->insns, l) {
3859 struct bpf_insn insn = meta->insn;
3860
3861 /* Programs converted from cBPF start with register xoring */
3862 if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
3863 insn.src_reg == insn.dst_reg)
3864 continue;
3865
3866 /* Programs start with R6 = R1 but we ignore the skb pointer */
3867 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
3868 insn.src_reg == 1 && insn.dst_reg == 6)
91a87a58 3869 meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
cd7df56e
JK
3870
3871 /* Return as soon as something doesn't match */
91a87a58 3872 if (!(meta->flags & FLAG_INSN_SKIP_MASK))
cd7df56e
JK
3873 return;
3874 }
3875}
3876
6c59500c
JK
3877/* abs(insn.imm) will fit better into unrestricted reg immediate -
3878 * convert add/sub of a negative number into a sub/add of a positive one.
3879 */
3880static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
3881{
3882 struct nfp_insn_meta *meta;
3883
3884 list_for_each_entry(meta, &nfp_prog->insns, l) {
3885 struct bpf_insn insn = meta->insn;
3886
91a87a58 3887 if (meta->flags & FLAG_INSN_SKIP_MASK)
6c59500c
JK
3888 continue;
3889
46144839 3890 if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta))
6c59500c
JK
3891 continue;
3892 if (BPF_SRC(insn.code) != BPF_K)
3893 continue;
3894 if (insn.imm >= 0)
3895 continue;
3896
46144839 3897 if (is_mbpf_jmp(meta)) {
7bdc97be
JK
3898 switch (BPF_OP(insn.code)) {
3899 case BPF_JGE:
3900 case BPF_JSGE:
3901 case BPF_JLT:
3902 case BPF_JSLT:
3903 meta->jump_neg_op = true;
3904 break;
3905 default:
3906 continue;
3907 }
3908 } else {
3909 if (BPF_OP(insn.code) == BPF_ADD)
3910 insn.code = BPF_CLASS(insn.code) | BPF_SUB;
3911 else if (BPF_OP(insn.code) == BPF_SUB)
3912 insn.code = BPF_CLASS(insn.code) | BPF_ADD;
3913 else
3914 continue;
6c59500c 3915
7bdc97be
JK
3916 meta->insn.code = insn.code | BPF_K;
3917 }
6c59500c
JK
3918
3919 meta->insn.imm = -insn.imm;
3920 }
3921}
3922
cd7df56e
JK
3923/* Remove masking after load since our load guarantees this is not needed */
3924static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
3925{
3926 struct nfp_insn_meta *meta1, *meta2;
3927 const s32 exp_mask[] = {
3928 [BPF_B] = 0x000000ffU,
3929 [BPF_H] = 0x0000ffffU,
3930 [BPF_W] = 0xffffffffU,
3931 };
3932
3933 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3934 struct bpf_insn insn, next;
3935
3936 insn = meta1->insn;
3937 next = meta2->insn;
3938
3939 if (BPF_CLASS(insn.code) != BPF_LD)
3940 continue;
3941 if (BPF_MODE(insn.code) != BPF_ABS &&
3942 BPF_MODE(insn.code) != BPF_IND)
3943 continue;
3944
3945 if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
3946 continue;
3947
3948 if (!exp_mask[BPF_SIZE(insn.code)])
3949 continue;
3950 if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
3951 continue;
3952
3953 if (next.src_reg || next.dst_reg)
3954 continue;
3955
1266f5d6
JW
3956 if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
3957 continue;
3958
91a87a58 3959 meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
cd7df56e
JK
3960 }
3961}
3962
3963static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
3964{
3965 struct nfp_insn_meta *meta1, *meta2, *meta3;
3966
3967 nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
3968 struct bpf_insn insn, next1, next2;
3969
3970 insn = meta1->insn;
3971 next1 = meta2->insn;
3972 next2 = meta3->insn;
3973
3974 if (BPF_CLASS(insn.code) != BPF_LD)
3975 continue;
3976 if (BPF_MODE(insn.code) != BPF_ABS &&
3977 BPF_MODE(insn.code) != BPF_IND)
3978 continue;
3979 if (BPF_SIZE(insn.code) != BPF_W)
3980 continue;
3981
3982 if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
3983 next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
3984 !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
3985 next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
3986 continue;
3987
3988 if (next1.src_reg || next1.dst_reg ||
3989 next2.src_reg || next2.dst_reg)
3990 continue;
3991
3992 if (next1.imm != 0x20 || next2.imm != 0x20)
3993 continue;
3994
29fe46ef
JW
3995 if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
3996 meta3->flags & FLAG_INSN_IS_JUMP_DST)
3997 continue;
3998
91a87a58
JK
3999 meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
4000 meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
cd7df56e
JK
4001 }
4002}
4003
6bc7103c
JW
4004/* load/store pair that forms memory copy sould look like the following:
4005 *
4006 * ld_width R, [addr_src + offset_src]
4007 * st_width [addr_dest + offset_dest], R
4008 *
4009 * The destination register of load and source register of store should
4010 * be the same, load and store should also perform at the same width.
4011 * If either of addr_src or addr_dest is stack pointer, we don't do the
4012 * CPP optimization as stack is modelled by registers on NFP.
4013 */
4014static bool
4015curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
4016 struct nfp_insn_meta *st_meta)
4017{
4018 struct bpf_insn *ld = &ld_meta->insn;
4019 struct bpf_insn *st = &st_meta->insn;
4020
4021 if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
4022 return false;
4023
cc0dff6d
JW
4024 if (ld_meta->ptr.type != PTR_TO_PACKET &&
4025 ld_meta->ptr.type != PTR_TO_MAP_VALUE)
6bc7103c
JW
4026 return false;
4027
4028 if (st_meta->ptr.type != PTR_TO_PACKET)
4029 return false;
4030
4031 if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
4032 return false;
4033
4034 if (ld->dst_reg != st->src_reg)
4035 return false;
4036
4037 /* There is jump to the store insn in this pair. */
4038 if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
4039 return false;
4040
4041 return true;
4042}
4043
4044/* Currently, we only support chaining load/store pairs if:
4045 *
4046 * - Their address base registers are the same.
4047 * - Their address offsets are in the same order.
4048 * - They operate at the same memory width.
4049 * - There is no jump into the middle of them.
4050 */
4051static bool
4052curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
4053 struct nfp_insn_meta *st_meta,
4054 struct bpf_insn *prev_ld,
4055 struct bpf_insn *prev_st)
4056{
4057 u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
4058 struct bpf_insn *ld = &ld_meta->insn;
4059 struct bpf_insn *st = &st_meta->insn;
4060 s16 prev_ld_off, prev_st_off;
4061
4062 /* This pair is the start pair. */
4063 if (!prev_ld)
4064 return true;
4065
4066 prev_size = BPF_LDST_BYTES(prev_ld);
4067 curr_size = BPF_LDST_BYTES(ld);
4068 prev_ld_base = prev_ld->src_reg;
4069 prev_st_base = prev_st->dst_reg;
4070 prev_ld_dst = prev_ld->dst_reg;
4071 prev_ld_off = prev_ld->off;
4072 prev_st_off = prev_st->off;
4073
4074 if (ld->dst_reg != prev_ld_dst)
4075 return false;
4076
4077 if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
4078 return false;
4079
4080 if (curr_size != prev_size)
4081 return false;
4082
4083 /* There is jump to the head of this pair. */
4084 if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
4085 return false;
4086
4087 /* Both in ascending order. */
4088 if (prev_ld_off + prev_size == ld->off &&
4089 prev_st_off + prev_size == st->off)
4090 return true;
4091
4092 /* Both in descending order. */
4093 if (ld->off + curr_size == prev_ld_off &&
4094 st->off + curr_size == prev_st_off)
4095 return true;
4096
4097 return false;
4098}
4099
4100/* Return TRUE if cross memory access happens. Cross memory access means
4101 * store area is overlapping with load area that a later load might load
4102 * the value from previous store, for this case we can't treat the sequence
4103 * as an memory copy.
4104 */
4105static bool
4106cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
4107 struct nfp_insn_meta *head_st_meta)
4108{
4109 s16 head_ld_off, head_st_off, ld_off;
4110
4111 /* Different pointer types does not overlap. */
4112 if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
4113 return false;
4114
4115 /* load and store are both PTR_TO_PACKET, check ID info. */
4116 if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
4117 return true;
4118
4119 /* Canonicalize the offsets. Turn all of them against the original
4120 * base register.
4121 */
4122 head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
4123 head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
4124 ld_off = ld->off + head_ld_meta->ptr.off;
4125
4126 /* Ascending order cross. */
4127 if (ld_off > head_ld_off &&
4128 head_ld_off < head_st_off && ld_off >= head_st_off)
4129 return true;
4130
4131 /* Descending order cross. */
4132 if (ld_off < head_ld_off &&
4133 head_ld_off > head_st_off && ld_off <= head_st_off)
4134 return true;
4135
4136 return false;
4137}
4138
4139/* This pass try to identify the following instructoin sequences.
4140 *
4141 * load R, [regA + offA]
4142 * store [regB + offB], R
4143 * load R, [regA + offA + const_imm_A]
4144 * store [regB + offB + const_imm_A], R
4145 * load R, [regA + offA + 2 * const_imm_A]
4146 * store [regB + offB + 2 * const_imm_A], R
4147 * ...
4148 *
4149 * Above sequence is typically generated by compiler when lowering
4150 * memcpy. NFP prefer using CPP instructions to accelerate it.
4151 */
4152static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
4153{
4154 struct nfp_insn_meta *head_ld_meta = NULL;
4155 struct nfp_insn_meta *head_st_meta = NULL;
4156 struct nfp_insn_meta *meta1, *meta2;
4157 struct bpf_insn *prev_ld = NULL;
4158 struct bpf_insn *prev_st = NULL;
4159 u8 count = 0;
4160
4161 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
4162 struct bpf_insn *ld = &meta1->insn;
4163 struct bpf_insn *st = &meta2->insn;
4164
4165 /* Reset record status if any of the following if true:
4166 * - The current insn pair is not load/store.
4167 * - The load/store pair doesn't chain with previous one.
4168 * - The chained load/store pair crossed with previous pair.
4169 * - The chained load/store pair has a total size of memory
4170 * copy beyond 128 bytes which is the maximum length a
4171 * single NFP CPP command can transfer.
4172 */
4173 if (!curr_pair_is_memcpy(meta1, meta2) ||
4174 !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
4175 prev_st) ||
4176 (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
4177 head_st_meta) ||
4178 head_ld_meta->ldst_gather_len >= 128))) {
4179 if (!count)
4180 continue;
4181
4182 if (count > 1) {
4183 s16 prev_ld_off = prev_ld->off;
4184 s16 prev_st_off = prev_st->off;
4185 s16 head_ld_off = head_ld_meta->insn.off;
4186
4187 if (prev_ld_off < head_ld_off) {
4188 head_ld_meta->insn.off = prev_ld_off;
4189 head_st_meta->insn.off = prev_st_off;
4190 head_ld_meta->ldst_gather_len =
4191 -head_ld_meta->ldst_gather_len;
4192 }
4193
4194 head_ld_meta->paired_st = &head_st_meta->insn;
91a87a58
JK
4195 head_st_meta->flags |=
4196 FLAG_INSN_SKIP_PREC_DEPENDENT;
6bc7103c
JW
4197 } else {
4198 head_ld_meta->ldst_gather_len = 0;
4199 }
4200
4201 /* If the chain is ended by an load/store pair then this
4202 * could serve as the new head of the the next chain.
4203 */
4204 if (curr_pair_is_memcpy(meta1, meta2)) {
4205 head_ld_meta = meta1;
4206 head_st_meta = meta2;
4207 head_ld_meta->ldst_gather_len =
4208 BPF_LDST_BYTES(ld);
4209 meta1 = nfp_meta_next(meta1);
4210 meta2 = nfp_meta_next(meta2);
4211 prev_ld = ld;
4212 prev_st = st;
4213 count = 1;
4214 } else {
4215 head_ld_meta = NULL;
4216 head_st_meta = NULL;
4217 prev_ld = NULL;
4218 prev_st = NULL;
4219 count = 0;
4220 }
4221
4222 continue;
4223 }
4224
4225 if (!head_ld_meta) {
4226 head_ld_meta = meta1;
4227 head_st_meta = meta2;
4228 } else {
91a87a58
JK
4229 meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
4230 meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT;
6bc7103c
JW
4231 }
4232
4233 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
4234 meta1 = nfp_meta_next(meta1);
4235 meta2 = nfp_meta_next(meta2);
4236 prev_ld = ld;
4237 prev_st = st;
4238 count++;
4239 }
4240}
4241
87b10ecd
JW
4242static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
4243{
4244 struct nfp_insn_meta *meta, *range_node = NULL;
4245 s16 range_start = 0, range_end = 0;
4246 bool cache_avail = false;
4247 struct bpf_insn *insn;
4248 s32 range_ptr_off = 0;
4249 u32 range_ptr_id = 0;
4250
4251 list_for_each_entry(meta, &nfp_prog->insns, l) {
4252 if (meta->flags & FLAG_INSN_IS_JUMP_DST)
4253 cache_avail = false;
4254
91a87a58 4255 if (meta->flags & FLAG_INSN_SKIP_MASK)
87b10ecd
JW
4256 continue;
4257
4258 insn = &meta->insn;
4259
4260 if (is_mbpf_store_pkt(meta) ||
4261 insn->code == (BPF_JMP | BPF_CALL) ||
4262 is_mbpf_classic_store_pkt(meta) ||
4263 is_mbpf_classic_load(meta)) {
4264 cache_avail = false;
4265 continue;
4266 }
4267
4268 if (!is_mbpf_load(meta))
4269 continue;
4270
4271 if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
4272 cache_avail = false;
4273 continue;
4274 }
4275
4276 if (!cache_avail) {
4277 cache_avail = true;
4278 if (range_node)
4279 goto end_current_then_start_new;
4280 goto start_new;
4281 }
4282
4283 /* Check ID to make sure two reads share the same
4284 * variable offset against PTR_TO_PACKET, and check OFF
4285 * to make sure they also share the same constant
4286 * offset.
4287 *
4288 * OFFs don't really need to be the same, because they
4289 * are the constant offsets against PTR_TO_PACKET, so
4290 * for different OFFs, we could canonicalize them to
4291 * offsets against original packet pointer. We don't
4292 * support this.
4293 */
4294 if (meta->ptr.id == range_ptr_id &&
4295 meta->ptr.off == range_ptr_off) {
4296 s16 new_start = range_start;
4297 s16 end, off = insn->off;
4298 s16 new_end = range_end;
4299 bool changed = false;
4300
4301 if (off < range_start) {
4302 new_start = off;
4303 changed = true;
4304 }
4305
4306 end = off + BPF_LDST_BYTES(insn);
4307 if (end > range_end) {
4308 new_end = end;
4309 changed = true;
4310 }
4311
4312 if (!changed)
4313 continue;
4314
4315 if (new_end - new_start <= 64) {
4316 /* Install new range. */
4317 range_start = new_start;
4318 range_end = new_end;
4319 continue;
4320 }
4321 }
4322
4323end_current_then_start_new:
4324 range_node->pkt_cache.range_start = range_start;
4325 range_node->pkt_cache.range_end = range_end;
4326start_new:
4327 range_node = meta;
4328 range_node->pkt_cache.do_init = true;
4329 range_ptr_id = range_node->ptr.id;
4330 range_ptr_off = range_node->ptr.off;
4331 range_start = insn->off;
4332 range_end = insn->off + BPF_LDST_BYTES(insn);
4333 }
4334
4335 if (range_node) {
4336 range_node->pkt_cache.range_start = range_start;
4337 range_node->pkt_cache.range_end = range_end;
4338 }
4339
4340 list_for_each_entry(meta, &nfp_prog->insns, l) {
91a87a58 4341 if (meta->flags & FLAG_INSN_SKIP_MASK)
87b10ecd
JW
4342 continue;
4343
4344 if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
4345 if (meta->pkt_cache.do_init) {
4346 range_start = meta->pkt_cache.range_start;
4347 range_end = meta->pkt_cache.range_end;
4348 } else {
4349 meta->pkt_cache.range_start = range_start;
4350 meta->pkt_cache.range_end = range_end;
4351 }
4352 }
4353 }
4354}
4355
cd7df56e
JK
4356static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
4357{
cd7df56e
JK
4358 nfp_bpf_opt_reg_init(nfp_prog);
4359
6c59500c 4360 nfp_bpf_opt_neg_add_sub(nfp_prog);
cd7df56e
JK
4361 nfp_bpf_opt_ld_mask(nfp_prog);
4362 nfp_bpf_opt_ld_shift(nfp_prog);
6bc7103c 4363 nfp_bpf_opt_ldst_gather(nfp_prog);
87b10ecd 4364 nfp_bpf_opt_pkt_cache(nfp_prog);
cd7df56e
JK
4365
4366 return 0;
4367}
4368
b4264c96
JK
4369static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
4370{
4371 struct nfp_insn_meta *meta1, *meta2;
4372 struct nfp_bpf_map *nfp_map;
4373 struct bpf_map *map;
ab01f4ac 4374 u32 id;
b4264c96
JK
4375
4376 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
91a87a58
JK
4377 if (meta1->flags & FLAG_INSN_SKIP_MASK ||
4378 meta2->flags & FLAG_INSN_SKIP_MASK)
b4264c96
JK
4379 continue;
4380
4381 if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
4382 meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
4383 continue;
4384
4385 map = (void *)(unsigned long)((u32)meta1->insn.imm |
4386 (u64)meta2->insn.imm << 32);
ab01f4ac
JK
4387 if (bpf_map_offload_neutral(map)) {
4388 id = map->id;
4389 } else {
4390 nfp_map = map_to_offmap(map)->dev_priv;
4391 id = nfp_map->tid;
4392 }
b4264c96 4393
ab01f4ac 4394 meta1->insn.imm = id;
b4264c96
JK
4395 meta2->insn.imm = 0;
4396 }
4397
4398 return 0;
4399}
4400
2314fe9e 4401static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
fd068ddc 4402{
2314fe9e 4403 __le64 *ustore = (__force __le64 *)prog;
fd068ddc
JK
4404 int i;
4405
2314fe9e 4406 for (i = 0; i < len; i++) {
fd068ddc
JK
4407 int err;
4408
2314fe9e 4409 err = nfp_ustore_check_valid_no_ecc(prog[i]);
fd068ddc
JK
4410 if (err)
4411 return err;
4412
2314fe9e 4413 ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i]));
fd068ddc
JK
4414 }
4415
4416 return 0;
4417}
4418
44a12ecc
JK
4419static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog)
4420{
4421 void *prog;
4422
4423 prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL);
4424 if (!prog)
4425 return;
4426
4427 nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64);
4428 memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len);
4429 kvfree(nfp_prog->prog);
4430 nfp_prog->prog = prog;
4431}
4432
c6c580d7 4433int nfp_bpf_jit(struct nfp_prog *nfp_prog)
cd7df56e 4434{
cd7df56e
JK
4435 int ret;
4436
b4264c96
JK
4437 ret = nfp_bpf_replace_map_ptrs(nfp_prog);
4438 if (ret)
4439 return ret;
4440
cd7df56e
JK
4441 ret = nfp_bpf_optimize(nfp_prog);
4442 if (ret)
9314c442 4443 return ret;
cd7df56e
JK
4444
4445 ret = nfp_translate(nfp_prog);
4446 if (ret) {
4447 pr_err("Translation failed with error %d (translated: %u)\n",
4448 ret, nfp_prog->n_translated);
9314c442 4449 return -EINVAL;
cd7df56e
JK
4450 }
4451
44a12ecc
JK
4452 nfp_bpf_prog_trim(nfp_prog);
4453
2314fe9e 4454 return ret;
cd7df56e 4455}
1549921d 4456
e2fc6114 4457void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog)
1549921d
JK
4458{
4459 struct nfp_insn_meta *meta;
4460
4461 /* Another pass to record jump information. */
4462 list_for_each_entry(meta, &nfp_prog->insns, l) {
e3b49dc6 4463 struct nfp_insn_meta *dst_meta;
1549921d 4464 u64 code = meta->insn.code;
e3b49dc6
QM
4465 unsigned int dst_idx;
4466 bool pseudo_call;
1549921d 4467
46144839 4468 if (!is_mbpf_jmp(meta))
e3b49dc6
QM
4469 continue;
4470 if (BPF_OP(code) == BPF_EXIT)
4471 continue;
4472 if (is_mbpf_helper_call(meta))
4473 continue;
1549921d 4474
e3b49dc6
QM
4475 /* If opcode is BPF_CALL at this point, this can only be a
4476 * BPF-to-BPF call (a.k.a pseudo call).
4477 */
4478 pseudo_call = BPF_OP(code) == BPF_CALL;
1549921d 4479
e3b49dc6
QM
4480 if (pseudo_call)
4481 dst_idx = meta->n + 1 + meta->insn.imm;
4482 else
4483 dst_idx = meta->n + 1 + meta->insn.off;
4484
e2fc6114 4485 dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx);
e3b49dc6
QM
4486
4487 if (pseudo_call)
4488 dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START;
4489
4490 dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
4491 meta->jmp_dst = dst_meta;
1549921d
JK
4492 }
4493}
2314fe9e 4494
74801e50
QM
4495bool nfp_bpf_supported_opcode(u8 code)
4496{
4497 return !!instr_cb[code];
4498}
4499
2314fe9e
JK
4500void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
4501{
4502 unsigned int i;
4503 u64 *prog;
4504 int err;
4505
4506 prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
4507 GFP_KERNEL);
4508 if (!prog)
4509 return ERR_PTR(-ENOMEM);
4510
4511 for (i = 0; i < nfp_prog->prog_len; i++) {
4512 enum nfp_relo_type special;
77a3d311 4513 u32 val;
389f263b 4514 u16 off;
2314fe9e
JK
4515
4516 special = FIELD_GET(OP_RELO_TYPE, prog[i]);
4517 switch (special) {
4518 case RELO_NONE:
4519 continue;
4520 case RELO_BR_REL:
4521 br_add_offset(&prog[i], bv->start_off);
4522 break;
4523 case RELO_BR_GO_OUT:
4524 br_set_offset(&prog[i],
4525 nfp_prog->tgt_out + bv->start_off);
4526 break;
4527 case RELO_BR_GO_ABORT:
4528 br_set_offset(&prog[i],
4529 nfp_prog->tgt_abort + bv->start_off);
4530 break;
389f263b 4531 case RELO_BR_GO_CALL_PUSH_REGS:
44549623
QM
4532 if (!nfp_prog->tgt_call_push_regs) {
4533 pr_err("BUG: failed to detect subprogram registers needs\n");
4534 err = -EINVAL;
4535 goto err_free_prog;
4536 }
389f263b
QM
4537 off = nfp_prog->tgt_call_push_regs + bv->start_off;
4538 br_set_offset(&prog[i], off);
4539 break;
4540 case RELO_BR_GO_CALL_POP_REGS:
44549623
QM
4541 if (!nfp_prog->tgt_call_pop_regs) {
4542 pr_err("BUG: failed to detect subprogram registers needs\n");
4543 err = -EINVAL;
4544 goto err_free_prog;
4545 }
389f263b
QM
4546 off = nfp_prog->tgt_call_pop_regs + bv->start_off;
4547 br_set_offset(&prog[i], off);
4548 break;
2314fe9e
JK
4549 case RELO_BR_NEXT_PKT:
4550 br_set_offset(&prog[i], bv->tgt_done);
4551 break;
77a3d311
JK
4552 case RELO_BR_HELPER:
4553 val = br_get_offset(prog[i]);
4554 val -= BR_OFF_RELO;
4555 switch (val) {
4556 case BPF_FUNC_map_lookup_elem:
4557 val = nfp_prog->bpf->helpers.map_lookup;
4558 break;
44d65a47
JK
4559 case BPF_FUNC_map_update_elem:
4560 val = nfp_prog->bpf->helpers.map_update;
4561 break;
bfee64de
JK
4562 case BPF_FUNC_map_delete_elem:
4563 val = nfp_prog->bpf->helpers.map_delete;
4564 break;
9816dd35
JK
4565 case BPF_FUNC_perf_event_output:
4566 val = nfp_prog->bpf->helpers.perf_event_output;
4567 break;
77a3d311
JK
4568 default:
4569 pr_err("relocation of unknown helper %d\n",
4570 val);
4571 err = -EINVAL;
4572 goto err_free_prog;
4573 }
4574 br_set_offset(&prog[i], val);
4575 break;
4576 case RELO_IMMED_REL:
4577 immed_add_value(&prog[i], bv->start_off);
4578 break;
2314fe9e
JK
4579 }
4580
4581 prog[i] &= ~OP_RELO_TYPE;
4582 }
4583
4584 err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len);
4585 if (err)
4586 goto err_free_prog;
4587
4588 return prog;
4589
4590err_free_prog:
4591 kfree(prog);
4592 return ERR_PTR(err);
4593}