Commit | Line | Data |
---|---|---|
96de2506 JK |
1 | // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) |
2 | /* Copyright (C) 2016-2018 Netronome Systems, Inc. */ | |
cd7df56e JK |
3 | |
4 | #define pr_fmt(fmt) "NFP net bpf: " fmt | |
5 | ||
0d49eaf4 | 6 | #include <linux/bug.h> |
cd7df56e JK |
7 | #include <linux/bpf.h> |
8 | #include <linux/filter.h> | |
2a952b03 | 9 | #include <linux/kernel.h> |
cd7df56e | 10 | #include <linux/pkt_cls.h> |
2a952b03 | 11 | #include <linux/reciprocal_div.h> |
cd7df56e JK |
12 | #include <linux/unistd.h> |
13 | ||
d9ae7f2b JK |
14 | #include "main.h" |
15 | #include "../nfp_asm.h" | |
d985888f | 16 | #include "../nfp_net_ctrl.h" |
cd7df56e JK |
17 | |
18 | /* --- NFP prog --- */ | |
19 | /* Foreach "multiple" entries macros provide pos and next<n> pointers. | |
20 | * It's safe to modify the next pointers (but not pos). | |
21 | */ | |
22 | #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ | |
23 | for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ | |
24 | next = list_next_entry(pos, l); \ | |
25 | &(nfp_prog)->insns != &pos->l && \ | |
26 | &(nfp_prog)->insns != &next->l; \ | |
27 | pos = nfp_meta_next(pos), \ | |
28 | next = nfp_meta_next(pos)) | |
29 | ||
30 | #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ | |
31 | for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ | |
32 | next = list_next_entry(pos, l), \ | |
33 | next2 = list_next_entry(next, l); \ | |
34 | &(nfp_prog)->insns != &pos->l && \ | |
35 | &(nfp_prog)->insns != &next->l && \ | |
36 | &(nfp_prog)->insns != &next2->l; \ | |
37 | pos = nfp_meta_next(pos), \ | |
38 | next = nfp_meta_next(pos), \ | |
39 | next2 = nfp_meta_next(next)) | |
40 | ||
cd7df56e JK |
41 | static bool |
42 | nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
43 | { | |
44 | return meta->l.prev != &nfp_prog->insns; | |
45 | } | |
46 | ||
cd7df56e JK |
47 | static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) |
48 | { | |
e8a4796e JK |
49 | if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) { |
50 | pr_warn("instruction limit reached (%u NFP instructions)\n", | |
51 | nfp_prog->prog_len); | |
cd7df56e JK |
52 | nfp_prog->error = -ENOSPC; |
53 | return; | |
54 | } | |
55 | ||
56 | nfp_prog->prog[nfp_prog->prog_len] = insn; | |
57 | nfp_prog->prog_len++; | |
58 | } | |
59 | ||
60 | static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) | |
61 | { | |
2314fe9e | 62 | return nfp_prog->prog_len; |
cd7df56e JK |
63 | } |
64 | ||
0d49eaf4 JK |
65 | static bool |
66 | nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) | |
67 | { | |
68 | /* If there is a recorded error we may have dropped instructions; | |
69 | * that doesn't have to be due to translator bug, and the translation | |
70 | * will fail anyway, so just return OK. | |
71 | */ | |
72 | if (nfp_prog->error) | |
73 | return true; | |
74 | return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); | |
75 | } | |
76 | ||
cd7df56e | 77 | /* --- Emitters --- */ |
cd7df56e JK |
78 | static void |
79 | __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, | |
b556ddd9 JK |
80 | u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx, |
81 | bool indir) | |
cd7df56e | 82 | { |
cd7df56e JK |
83 | u64 insn; |
84 | ||
cd7df56e JK |
85 | insn = FIELD_PREP(OP_CMD_A_SRC, areg) | |
86 | FIELD_PREP(OP_CMD_CTX, ctx) | | |
87 | FIELD_PREP(OP_CMD_B_SRC, breg) | | |
88 | FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | | |
89 | FIELD_PREP(OP_CMD_XFER, xfer) | | |
90 | FIELD_PREP(OP_CMD_CNT, size) | | |
b556ddd9 | 91 | FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) | |
cd7df56e | 92 | FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | |
5468a8b9 | 93 | FIELD_PREP(OP_CMD_INDIR, indir) | |
cd7df56e JK |
94 | FIELD_PREP(OP_CMD_MODE, mode); |
95 | ||
96 | nfp_prog_push(nfp_prog, insn); | |
97 | } | |
98 | ||
99 | static void | |
5468a8b9 | 100 | emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, |
b556ddd9 | 101 | swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir) |
cd7df56e JK |
102 | { |
103 | struct nfp_insn_re_regs reg; | |
104 | int err; | |
105 | ||
106 | err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); | |
107 | if (err) { | |
108 | nfp_prog->error = err; | |
109 | return; | |
110 | } | |
111 | if (reg.swap) { | |
112 | pr_err("cmd can't swap arguments\n"); | |
113 | nfp_prog->error = -EFAULT; | |
114 | return; | |
115 | } | |
995e101f JK |
116 | if (reg.dst_lmextn || reg.src_lmextn) { |
117 | pr_err("cmd can't use LMextn\n"); | |
118 | nfp_prog->error = -EFAULT; | |
119 | return; | |
120 | } | |
cd7df56e | 121 | |
b556ddd9 | 122 | __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx, |
5468a8b9 JK |
123 | indir); |
124 | } | |
125 | ||
126 | static void | |
127 | emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, | |
b556ddd9 | 128 | swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) |
5468a8b9 | 129 | { |
b556ddd9 | 130 | emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false); |
cd7df56e JK |
131 | } |
132 | ||
9879a381 JW |
133 | static void |
134 | emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, | |
b556ddd9 | 135 | swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) |
9879a381 | 136 | { |
b556ddd9 | 137 | emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true); |
9879a381 JW |
138 | } |
139 | ||
cd7df56e JK |
140 | static void |
141 | __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, | |
142 | enum br_ctx_signal_state css, u16 addr, u8 defer) | |
143 | { | |
144 | u16 addr_lo, addr_hi; | |
145 | u64 insn; | |
146 | ||
147 | addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); | |
148 | addr_hi = addr != addr_lo; | |
149 | ||
150 | insn = OP_BR_BASE | | |
151 | FIELD_PREP(OP_BR_MASK, mask) | | |
152 | FIELD_PREP(OP_BR_EV_PIP, ev_pip) | | |
153 | FIELD_PREP(OP_BR_CSS, css) | | |
154 | FIELD_PREP(OP_BR_DEFBR, defer) | | |
155 | FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | | |
156 | FIELD_PREP(OP_BR_ADDR_HI, addr_hi); | |
157 | ||
158 | nfp_prog_push(nfp_prog, insn); | |
159 | } | |
160 | ||
2314fe9e JK |
161 | static void |
162 | emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer, | |
163 | enum nfp_relo_type relo) | |
e3b8baf0 | 164 | { |
2314fe9e | 165 | if (mask == BR_UNC && defer > 2) { |
e3b8baf0 JK |
166 | pr_err("BUG: branch defer out of bounds %d\n", defer); |
167 | nfp_prog->error = -EFAULT; | |
168 | return; | |
169 | } | |
2314fe9e JK |
170 | |
171 | __emit_br(nfp_prog, mask, | |
172 | mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, | |
173 | BR_CSS_NONE, addr, defer); | |
174 | ||
175 | nfp_prog->prog[nfp_prog->prog_len - 1] |= | |
176 | FIELD_PREP(OP_RELO_TYPE, relo); | |
e3b8baf0 JK |
177 | } |
178 | ||
cd7df56e JK |
179 | static void |
180 | emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) | |
181 | { | |
2314fe9e | 182 | emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL); |
cd7df56e JK |
183 | } |
184 | ||
991f5b36 JW |
185 | static void |
186 | __emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer, | |
187 | bool set, bool src_lmextn) | |
188 | { | |
189 | u16 addr_lo, addr_hi; | |
190 | u64 insn; | |
191 | ||
192 | addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO)); | |
193 | addr_hi = addr != addr_lo; | |
194 | ||
195 | insn = OP_BR_BIT_BASE | | |
196 | FIELD_PREP(OP_BR_BIT_A_SRC, areg) | | |
197 | FIELD_PREP(OP_BR_BIT_B_SRC, breg) | | |
198 | FIELD_PREP(OP_BR_BIT_BV, set) | | |
199 | FIELD_PREP(OP_BR_BIT_DEFBR, defer) | | |
200 | FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) | | |
201 | FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) | | |
202 | FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn); | |
203 | ||
204 | nfp_prog_push(nfp_prog, insn); | |
205 | } | |
206 | ||
207 | static void | |
208 | emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, | |
209 | u8 defer, bool set, enum nfp_relo_type relo) | |
210 | { | |
211 | struct nfp_insn_re_regs reg; | |
212 | int err; | |
213 | ||
214 | /* NOTE: The bit to test is specified as an rotation amount, such that | |
215 | * the bit to test will be placed on the MSB of the result when | |
216 | * doing a rotate right. For bit X, we need right rotate X + 1. | |
217 | */ | |
218 | bit += 1; | |
219 | ||
220 | err = swreg_to_restricted(reg_none(), src, reg_imm(bit), ®, false); | |
221 | if (err) { | |
222 | nfp_prog->error = err; | |
223 | return; | |
224 | } | |
225 | ||
226 | __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set, | |
227 | reg.src_lmextn); | |
228 | ||
229 | nfp_prog->prog[nfp_prog->prog_len - 1] |= | |
230 | FIELD_PREP(OP_RELO_TYPE, relo); | |
231 | } | |
232 | ||
233 | static void | |
234 | emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer) | |
235 | { | |
236 | emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL); | |
237 | } | |
238 | ||
389f263b QM |
239 | static void |
240 | __emit_br_alu(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, | |
241 | u8 defer, bool dst_lmextn, bool src_lmextn) | |
242 | { | |
243 | u64 insn; | |
244 | ||
245 | insn = OP_BR_ALU_BASE | | |
246 | FIELD_PREP(OP_BR_ALU_A_SRC, areg) | | |
247 | FIELD_PREP(OP_BR_ALU_B_SRC, breg) | | |
248 | FIELD_PREP(OP_BR_ALU_DEFBR, defer) | | |
249 | FIELD_PREP(OP_BR_ALU_IMM_HI, imm_hi) | | |
250 | FIELD_PREP(OP_BR_ALU_SRC_LMEXTN, src_lmextn) | | |
251 | FIELD_PREP(OP_BR_ALU_DST_LMEXTN, dst_lmextn); | |
252 | ||
253 | nfp_prog_push(nfp_prog, insn); | |
254 | } | |
255 | ||
256 | static void emit_rtn(struct nfp_prog *nfp_prog, swreg base, u8 defer) | |
257 | { | |
258 | struct nfp_insn_ur_regs reg; | |
259 | int err; | |
260 | ||
261 | err = swreg_to_unrestricted(reg_none(), base, reg_imm(0), ®); | |
262 | if (err) { | |
263 | nfp_prog->error = err; | |
264 | return; | |
265 | } | |
266 | ||
267 | __emit_br_alu(nfp_prog, reg.areg, reg.breg, 0, defer, reg.dst_lmextn, | |
268 | reg.src_lmextn); | |
269 | } | |
270 | ||
cd7df56e JK |
271 | static void |
272 | __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, | |
273 | enum immed_width width, bool invert, | |
995e101f JK |
274 | enum immed_shift shift, bool wr_both, |
275 | bool dst_lmextn, bool src_lmextn) | |
cd7df56e JK |
276 | { |
277 | u64 insn; | |
278 | ||
279 | insn = OP_IMMED_BASE | | |
280 | FIELD_PREP(OP_IMMED_A_SRC, areg) | | |
281 | FIELD_PREP(OP_IMMED_B_SRC, breg) | | |
282 | FIELD_PREP(OP_IMMED_IMM, imm_hi) | | |
283 | FIELD_PREP(OP_IMMED_WIDTH, width) | | |
284 | FIELD_PREP(OP_IMMED_INV, invert) | | |
285 | FIELD_PREP(OP_IMMED_SHIFT, shift) | | |
995e101f JK |
286 | FIELD_PREP(OP_IMMED_WR_AB, wr_both) | |
287 | FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) | | |
288 | FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn); | |
cd7df56e JK |
289 | |
290 | nfp_prog_push(nfp_prog, insn); | |
291 | } | |
292 | ||
293 | static void | |
b3f868df | 294 | emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, |
cd7df56e JK |
295 | enum immed_width width, bool invert, enum immed_shift shift) |
296 | { | |
297 | struct nfp_insn_ur_regs reg; | |
298 | int err; | |
299 | ||
b3f868df | 300 | if (swreg_type(dst) == NN_REG_IMM) { |
cd7df56e JK |
301 | nfp_prog->error = -EFAULT; |
302 | return; | |
303 | } | |
304 | ||
305 | err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); | |
306 | if (err) { | |
307 | nfp_prog->error = err; | |
308 | return; | |
309 | } | |
310 | ||
3239e7bb JW |
311 | /* Use reg.dst when destination is No-Dest. */ |
312 | __emit_immed(nfp_prog, | |
313 | swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg, | |
314 | reg.breg, imm >> 8, width, invert, shift, | |
315 | reg.wr_both, reg.dst_lmextn, reg.src_lmextn); | |
cd7df56e JK |
316 | } |
317 | ||
318 | static void | |
319 | __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, | |
320 | enum shf_sc sc, u8 shift, | |
995e101f JK |
321 | u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both, |
322 | bool dst_lmextn, bool src_lmextn) | |
cd7df56e JK |
323 | { |
324 | u64 insn; | |
325 | ||
326 | if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { | |
327 | nfp_prog->error = -EFAULT; | |
328 | return; | |
329 | } | |
330 | ||
69e168eb JW |
331 | /* NFP shift instruction has something special. If shift direction is |
332 | * left then shift amount of 1 to 31 is specified as 32 minus the amount | |
333 | * to shift. | |
334 | * | |
335 | * But no need to do this for indirect shift which has shift amount be | |
336 | * 0. Even after we do this subtraction, shift amount 0 will be turned | |
337 | * into 32 which will eventually be encoded the same as 0 because only | |
338 | * low 5 bits are encoded, but shift amount be 32 will fail the | |
339 | * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of | |
340 | * mask range. | |
341 | */ | |
342 | if (sc == SHF_SC_L_SHF && shift) | |
cd7df56e JK |
343 | shift = 32 - shift; |
344 | ||
345 | insn = OP_SHF_BASE | | |
346 | FIELD_PREP(OP_SHF_A_SRC, areg) | | |
347 | FIELD_PREP(OP_SHF_SC, sc) | | |
348 | FIELD_PREP(OP_SHF_B_SRC, breg) | | |
349 | FIELD_PREP(OP_SHF_I8, i8) | | |
350 | FIELD_PREP(OP_SHF_SW, sw) | | |
351 | FIELD_PREP(OP_SHF_DST, dst) | | |
352 | FIELD_PREP(OP_SHF_SHIFT, shift) | | |
353 | FIELD_PREP(OP_SHF_OP, op) | | |
354 | FIELD_PREP(OP_SHF_DST_AB, dst_ab) | | |
995e101f JK |
355 | FIELD_PREP(OP_SHF_WR_AB, wr_both) | |
356 | FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) | | |
357 | FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn); | |
cd7df56e JK |
358 | |
359 | nfp_prog_push(nfp_prog, insn); | |
360 | } | |
361 | ||
362 | static void | |
b3f868df JK |
363 | emit_shf(struct nfp_prog *nfp_prog, swreg dst, |
364 | swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift) | |
cd7df56e JK |
365 | { |
366 | struct nfp_insn_re_regs reg; | |
367 | int err; | |
368 | ||
369 | err = swreg_to_restricted(dst, lreg, rreg, ®, true); | |
370 | if (err) { | |
371 | nfp_prog->error = err; | |
372 | return; | |
373 | } | |
374 | ||
375 | __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, | |
995e101f JK |
376 | reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both, |
377 | reg.dst_lmextn, reg.src_lmextn); | |
cd7df56e JK |
378 | } |
379 | ||
991f5b36 JW |
380 | static void |
381 | emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst, | |
382 | swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc) | |
383 | { | |
384 | if (sc == SHF_SC_R_ROT) { | |
385 | pr_err("indirect shift is not allowed on rotation\n"); | |
386 | nfp_prog->error = -EFAULT; | |
387 | return; | |
388 | } | |
389 | ||
390 | emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0); | |
391 | } | |
392 | ||
cd7df56e JK |
393 | static void |
394 | __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, | |
995e101f JK |
395 | u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both, |
396 | bool dst_lmextn, bool src_lmextn) | |
cd7df56e JK |
397 | { |
398 | u64 insn; | |
399 | ||
400 | insn = OP_ALU_BASE | | |
401 | FIELD_PREP(OP_ALU_A_SRC, areg) | | |
402 | FIELD_PREP(OP_ALU_B_SRC, breg) | | |
403 | FIELD_PREP(OP_ALU_DST, dst) | | |
404 | FIELD_PREP(OP_ALU_SW, swap) | | |
405 | FIELD_PREP(OP_ALU_OP, op) | | |
406 | FIELD_PREP(OP_ALU_DST_AB, dst_ab) | | |
995e101f JK |
407 | FIELD_PREP(OP_ALU_WR_AB, wr_both) | |
408 | FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) | | |
409 | FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn); | |
cd7df56e JK |
410 | |
411 | nfp_prog_push(nfp_prog, insn); | |
412 | } | |
413 | ||
414 | static void | |
b3f868df JK |
415 | emit_alu(struct nfp_prog *nfp_prog, swreg dst, |
416 | swreg lreg, enum alu_op op, swreg rreg) | |
cd7df56e JK |
417 | { |
418 | struct nfp_insn_ur_regs reg; | |
419 | int err; | |
420 | ||
421 | err = swreg_to_unrestricted(dst, lreg, rreg, ®); | |
422 | if (err) { | |
423 | nfp_prog->error = err; | |
424 | return; | |
425 | } | |
426 | ||
427 | __emit_alu(nfp_prog, reg.dst, reg.dst_ab, | |
995e101f JK |
428 | reg.areg, op, reg.breg, reg.swap, reg.wr_both, |
429 | reg.dst_lmextn, reg.src_lmextn); | |
cd7df56e JK |
430 | } |
431 | ||
d3d23fdb JW |
432 | static void |
433 | __emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg, | |
434 | enum mul_type type, enum mul_step step, u16 breg, bool swap, | |
435 | bool wr_both, bool dst_lmextn, bool src_lmextn) | |
436 | { | |
437 | u64 insn; | |
438 | ||
439 | insn = OP_MUL_BASE | | |
440 | FIELD_PREP(OP_MUL_A_SRC, areg) | | |
441 | FIELD_PREP(OP_MUL_B_SRC, breg) | | |
442 | FIELD_PREP(OP_MUL_STEP, step) | | |
443 | FIELD_PREP(OP_MUL_DST_AB, dst_ab) | | |
444 | FIELD_PREP(OP_MUL_SW, swap) | | |
445 | FIELD_PREP(OP_MUL_TYPE, type) | | |
446 | FIELD_PREP(OP_MUL_WR_AB, wr_both) | | |
447 | FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) | | |
448 | FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn); | |
449 | ||
450 | nfp_prog_push(nfp_prog, insn); | |
451 | } | |
452 | ||
453 | static void | |
454 | emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type, | |
455 | enum mul_step step, swreg rreg) | |
456 | { | |
457 | struct nfp_insn_ur_regs reg; | |
458 | u16 areg; | |
459 | int err; | |
460 | ||
461 | if (type == MUL_TYPE_START && step != MUL_STEP_NONE) { | |
462 | nfp_prog->error = -EINVAL; | |
463 | return; | |
464 | } | |
465 | ||
466 | if (step == MUL_LAST || step == MUL_LAST_2) { | |
467 | /* When type is step and step Number is LAST or LAST2, left | |
468 | * source is used as destination. | |
469 | */ | |
470 | err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®); | |
471 | areg = reg.dst; | |
472 | } else { | |
473 | err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®); | |
474 | areg = reg.areg; | |
475 | } | |
476 | ||
477 | if (err) { | |
478 | nfp_prog->error = err; | |
479 | return; | |
480 | } | |
481 | ||
482 | __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap, | |
483 | reg.wr_both, reg.dst_lmextn, reg.src_lmextn); | |
484 | } | |
485 | ||
cd7df56e JK |
486 | static void |
487 | __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, | |
488 | u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, | |
995e101f JK |
489 | bool zero, bool swap, bool wr_both, |
490 | bool dst_lmextn, bool src_lmextn) | |
cd7df56e JK |
491 | { |
492 | u64 insn; | |
493 | ||
494 | insn = OP_LDF_BASE | | |
495 | FIELD_PREP(OP_LDF_A_SRC, areg) | | |
496 | FIELD_PREP(OP_LDF_SC, sc) | | |
497 | FIELD_PREP(OP_LDF_B_SRC, breg) | | |
498 | FIELD_PREP(OP_LDF_I8, imm8) | | |
499 | FIELD_PREP(OP_LDF_SW, swap) | | |
500 | FIELD_PREP(OP_LDF_ZF, zero) | | |
501 | FIELD_PREP(OP_LDF_BMASK, bmask) | | |
502 | FIELD_PREP(OP_LDF_SHF, shift) | | |
995e101f JK |
503 | FIELD_PREP(OP_LDF_WR_AB, wr_both) | |
504 | FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) | | |
505 | FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn); | |
cd7df56e JK |
506 | |
507 | nfp_prog_push(nfp_prog, insn); | |
508 | } | |
509 | ||
510 | static void | |
bc8c80a8 JK |
511 | emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, |
512 | enum shf_sc sc, u8 shift, bool zero) | |
cd7df56e JK |
513 | { |
514 | struct nfp_insn_re_regs reg; | |
515 | int err; | |
516 | ||
2de1be1d JK |
517 | /* Note: ld_field is special as it uses one of the src regs as dst */ |
518 | err = swreg_to_restricted(dst, dst, src, ®, true); | |
cd7df56e JK |
519 | if (err) { |
520 | nfp_prog->error = err; | |
521 | return; | |
522 | } | |
523 | ||
524 | __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, | |
995e101f JK |
525 | reg.i8, zero, reg.swap, reg.wr_both, |
526 | reg.dst_lmextn, reg.src_lmextn); | |
cd7df56e JK |
527 | } |
528 | ||
529 | static void | |
b3f868df | 530 | emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src, |
cd7df56e JK |
531 | enum shf_sc sc, u8 shift) |
532 | { | |
bc8c80a8 | 533 | emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false); |
cd7df56e JK |
534 | } |
535 | ||
2df03a50 JK |
536 | static void |
537 | __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, | |
538 | bool dst_lmextn, bool src_lmextn) | |
539 | { | |
540 | u64 insn; | |
541 | ||
542 | insn = OP_LCSR_BASE | | |
543 | FIELD_PREP(OP_LCSR_A_SRC, areg) | | |
544 | FIELD_PREP(OP_LCSR_B_SRC, breg) | | |
545 | FIELD_PREP(OP_LCSR_WRITE, wr) | | |
df4a37d8 | 546 | FIELD_PREP(OP_LCSR_ADDR, addr / 4) | |
2df03a50 JK |
547 | FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | |
548 | FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); | |
549 | ||
550 | nfp_prog_push(nfp_prog, insn); | |
551 | } | |
552 | ||
553 | static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) | |
554 | { | |
555 | struct nfp_insn_ur_regs reg; | |
556 | int err; | |
557 | ||
558 | /* This instruction takes immeds instead of reg_none() for the ignored | |
559 | * operand, but we can't encode 2 immeds in one instr with our normal | |
560 | * swreg infra so if param is an immed, we encode as reg_none() and | |
561 | * copy the immed to both operands. | |
562 | */ | |
563 | if (swreg_type(src) == NN_REG_IMM) { | |
564 | err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®); | |
565 | reg.breg = reg.areg; | |
566 | } else { | |
567 | err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®); | |
568 | } | |
569 | if (err) { | |
570 | nfp_prog->error = err; | |
571 | return; | |
572 | } | |
573 | ||
df4a37d8 | 574 | __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr, |
2df03a50 JK |
575 | false, reg.src_lmextn); |
576 | } | |
577 | ||
df4a37d8 JK |
578 | /* CSR value is read in following immed[gpr, 0] */ |
579 | static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr) | |
580 | { | |
581 | __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false); | |
582 | } | |
583 | ||
1c03e03f JK |
584 | static void emit_nop(struct nfp_prog *nfp_prog) |
585 | { | |
586 | __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); | |
587 | } | |
588 | ||
cd7df56e JK |
589 | /* --- Wrappers --- */ |
590 | static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) | |
591 | { | |
592 | if (!(imm & 0xffff0000)) { | |
593 | *val = imm; | |
594 | *shift = IMMED_SHIFT_0B; | |
595 | } else if (!(imm & 0xff0000ff)) { | |
596 | *val = imm >> 8; | |
597 | *shift = IMMED_SHIFT_1B; | |
598 | } else if (!(imm & 0x0000ffff)) { | |
599 | *val = imm >> 16; | |
600 | *shift = IMMED_SHIFT_2B; | |
601 | } else { | |
602 | return false; | |
603 | } | |
604 | ||
605 | return true; | |
606 | } | |
607 | ||
b3f868df | 608 | static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm) |
cd7df56e JK |
609 | { |
610 | enum immed_shift shift; | |
611 | u16 val; | |
612 | ||
613 | if (pack_immed(imm, &val, &shift)) { | |
614 | emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); | |
615 | } else if (pack_immed(~imm, &val, &shift)) { | |
616 | emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); | |
617 | } else { | |
618 | emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, | |
619 | false, IMMED_SHIFT_0B); | |
620 | emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, | |
621 | false, IMMED_SHIFT_2B); | |
622 | } | |
623 | } | |
624 | ||
77a3d311 JK |
625 | static void |
626 | wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm, | |
627 | enum nfp_relo_type relo) | |
628 | { | |
629 | if (imm > 0xffff) { | |
630 | pr_err("relocation of a large immediate!\n"); | |
631 | nfp_prog->error = -EFAULT; | |
632 | return; | |
633 | } | |
634 | emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); | |
635 | ||
636 | nfp_prog->prog[nfp_prog->prog_len - 1] |= | |
637 | FIELD_PREP(OP_RELO_TYPE, relo); | |
638 | } | |
639 | ||
cd7df56e JK |
640 | /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) |
641 | * If the @imm is small enough encode it directly in operand and return | |
642 | * otherwise load @imm to a spare register and return its encoding. | |
643 | */ | |
b3f868df | 644 | static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) |
cd7df56e JK |
645 | { |
646 | if (FIELD_FIT(UR_REG_IMM_MAX, imm)) | |
647 | return reg_imm(imm); | |
648 | ||
649 | wrp_immed(nfp_prog, tmp_reg, imm); | |
650 | return tmp_reg; | |
651 | } | |
652 | ||
653 | /* re_load_imm_any() - encode immediate or use tmp register (restricted) | |
654 | * If the @imm is small enough encode it directly in operand and return | |
655 | * otherwise load @imm to a spare register and return its encoding. | |
656 | */ | |
b3f868df | 657 | static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg) |
cd7df56e JK |
658 | { |
659 | if (FIELD_FIT(RE_REG_IMM_MAX, imm)) | |
660 | return reg_imm(imm); | |
661 | ||
662 | wrp_immed(nfp_prog, tmp_reg, imm); | |
663 | return tmp_reg; | |
664 | } | |
665 | ||
ff42bb9f JK |
666 | static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) |
667 | { | |
668 | while (count--) | |
669 | emit_nop(nfp_prog); | |
670 | } | |
671 | ||
c000dfb5 JK |
672 | static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) |
673 | { | |
674 | emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); | |
675 | } | |
676 | ||
cd7df56e JK |
677 | static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) |
678 | { | |
c000dfb5 | 679 | wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); |
cd7df56e JK |
680 | } |
681 | ||
9879a381 JW |
682 | /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the |
683 | * result to @dst from low end. | |
684 | */ | |
685 | static void | |
686 | wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, | |
687 | u8 offset) | |
688 | { | |
689 | enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE; | |
690 | u8 mask = (1 << field_len) - 1; | |
691 | ||
692 | emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); | |
693 | } | |
694 | ||
91ff69e8 JW |
695 | /* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the |
696 | * result to @dst from offset, there is no change on the other bits of @dst. | |
697 | */ | |
698 | static void | |
699 | wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, | |
700 | u8 field_len, u8 offset) | |
701 | { | |
702 | enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE; | |
703 | u8 mask = ((1 << field_len) - 1) << offset; | |
704 | ||
705 | emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8); | |
706 | } | |
707 | ||
3dd43c33 JK |
708 | static void |
709 | addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, | |
710 | swreg *rega, swreg *regb) | |
711 | { | |
712 | if (offset == reg_imm(0)) { | |
713 | *rega = reg_a(src_gpr); | |
714 | *regb = reg_b(src_gpr + 1); | |
715 | return; | |
716 | } | |
717 | ||
718 | emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset); | |
719 | emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C, | |
720 | reg_imm(0)); | |
721 | *rega = imm_a(nfp_prog); | |
722 | *regb = imm_b(nfp_prog); | |
723 | } | |
724 | ||
9879a381 JW |
725 | /* NFP has Command Push Pull bus which supports bluk memory operations. */ |
726 | static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
727 | { | |
728 | bool descending_seq = meta->ldst_gather_len < 0; | |
729 | s16 len = abs(meta->ldst_gather_len); | |
730 | swreg src_base, off; | |
3dd43c33 | 731 | bool src_40bit_addr; |
9879a381 JW |
732 | unsigned int i; |
733 | u8 xfer_num; | |
734 | ||
9879a381 | 735 | off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); |
3dd43c33 | 736 | src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE; |
9879a381 JW |
737 | src_base = reg_a(meta->insn.src_reg * 2); |
738 | xfer_num = round_up(len, 4) / 4; | |
739 | ||
3dd43c33 | 740 | if (src_40bit_addr) |
cc0dff6d | 741 | addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base, |
3dd43c33 JK |
742 | &off); |
743 | ||
8c900538 JW |
744 | /* Setup PREV_ALU fields to override memory read length. */ |
745 | if (len > 32) | |
746 | wrp_immed(nfp_prog, reg_none(), | |
747 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); | |
748 | ||
9879a381 | 749 | /* Memory read from source addr into transfer-in registers. */ |
3dd43c33 JK |
750 | emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, |
751 | src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0, | |
b556ddd9 | 752 | src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32); |
9879a381 JW |
753 | |
754 | /* Move from transfer-in to transfer-out. */ | |
755 | for (i = 0; i < xfer_num; i++) | |
756 | wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i)); | |
757 | ||
758 | off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog)); | |
759 | ||
760 | if (len <= 8) { | |
761 | /* Use single direct_ref write8. */ | |
762 | emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, | |
763 | reg_a(meta->paired_st->dst_reg * 2), off, len - 1, | |
b556ddd9 | 764 | CMD_CTX_SWAP); |
8c900538 | 765 | } else if (len <= 32 && IS_ALIGNED(len, 4)) { |
9879a381 JW |
766 | /* Use single direct_ref write32. */ |
767 | emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, | |
768 | reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, | |
b556ddd9 | 769 | CMD_CTX_SWAP); |
8c900538 | 770 | } else if (len <= 32) { |
9879a381 JW |
771 | /* Use single indirect_ref write8. */ |
772 | wrp_immed(nfp_prog, reg_none(), | |
773 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); | |
774 | emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, | |
775 | reg_a(meta->paired_st->dst_reg * 2), off, | |
b556ddd9 | 776 | len - 1, CMD_CTX_SWAP); |
8c900538 JW |
777 | } else if (IS_ALIGNED(len, 4)) { |
778 | /* Use single indirect_ref write32. */ | |
779 | wrp_immed(nfp_prog, reg_none(), | |
780 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); | |
781 | emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, | |
782 | reg_a(meta->paired_st->dst_reg * 2), off, | |
b556ddd9 | 783 | xfer_num - 1, CMD_CTX_SWAP); |
8c900538 JW |
784 | } else if (len <= 40) { |
785 | /* Use one direct_ref write32 to write the first 32-bytes, then | |
786 | * another direct_ref write8 to write the remaining bytes. | |
787 | */ | |
788 | emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, | |
789 | reg_a(meta->paired_st->dst_reg * 2), off, 7, | |
b556ddd9 | 790 | CMD_CTX_SWAP); |
8c900538 JW |
791 | |
792 | off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, | |
793 | imm_b(nfp_prog)); | |
794 | emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, | |
795 | reg_a(meta->paired_st->dst_reg * 2), off, len - 33, | |
b556ddd9 | 796 | CMD_CTX_SWAP); |
8c900538 JW |
797 | } else { |
798 | /* Use one indirect_ref write32 to write 4-bytes aligned length, | |
799 | * then another direct_ref write8 to write the remaining bytes. | |
800 | */ | |
801 | u8 new_off; | |
802 | ||
803 | wrp_immed(nfp_prog, reg_none(), | |
804 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); | |
805 | emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, | |
806 | reg_a(meta->paired_st->dst_reg * 2), off, | |
b556ddd9 | 807 | xfer_num - 2, CMD_CTX_SWAP); |
8c900538 JW |
808 | new_off = meta->paired_st->off + (xfer_num - 1) * 4; |
809 | off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); | |
810 | emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, | |
811 | xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, | |
b556ddd9 | 812 | (len & 0x3) - 1, CMD_CTX_SWAP); |
9879a381 JW |
813 | } |
814 | ||
815 | /* TODO: The following extra load is to make sure data flow be identical | |
816 | * before and after we do memory copy optimization. | |
817 | * | |
818 | * The load destination register is not guaranteed to be dead, so we | |
819 | * need to make sure it is loaded with the value the same as before | |
820 | * this transformation. | |
821 | * | |
822 | * These extra loads could be removed once we have accurate register | |
823 | * usage information. | |
824 | */ | |
825 | if (descending_seq) | |
826 | xfer_num = 0; | |
827 | else if (BPF_SIZE(meta->insn.code) != BPF_DW) | |
828 | xfer_num = xfer_num - 1; | |
829 | else | |
830 | xfer_num = xfer_num - 2; | |
831 | ||
832 | switch (BPF_SIZE(meta->insn.code)) { | |
833 | case BPF_B: | |
834 | wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), | |
835 | reg_xfer(xfer_num), 1, | |
836 | IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1); | |
837 | break; | |
838 | case BPF_H: | |
839 | wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), | |
840 | reg_xfer(xfer_num), 2, (len & 3) ^ 2); | |
841 | break; | |
842 | case BPF_W: | |
843 | wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), | |
844 | reg_xfer(0)); | |
845 | break; | |
846 | case BPF_DW: | |
847 | wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), | |
848 | reg_xfer(xfer_num)); | |
849 | wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), | |
850 | reg_xfer(xfer_num + 1)); | |
851 | break; | |
852 | } | |
853 | ||
854 | if (BPF_SIZE(meta->insn.code) != BPF_DW) | |
855 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); | |
856 | ||
857 | return 0; | |
858 | } | |
859 | ||
cd7df56e | 860 | static int |
0a793977 | 861 | data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) |
cd7df56e JK |
862 | { |
863 | unsigned int i; | |
864 | u16 shift, sz; | |
cd7df56e JK |
865 | |
866 | /* We load the value from the address indicated in @offset and then | |
867 | * shift out the data we don't need. Note: this is big endian! | |
868 | */ | |
0a793977 | 869 | sz = max(size, 4); |
cd7df56e JK |
870 | shift = size < 4 ? 4 - size : 0; |
871 | ||
0a793977 | 872 | emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, |
b556ddd9 | 873 | pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP); |
cd7df56e JK |
874 | |
875 | i = 0; | |
876 | if (shift) | |
0a793977 | 877 | emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE, |
cd7df56e JK |
878 | reg_xfer(0), SHF_SC_R_SHF, shift * 8); |
879 | else | |
880 | for (; i * 4 < size; i++) | |
0a793977 | 881 | wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); |
cd7df56e JK |
882 | |
883 | if (i < 2) | |
0a793977 | 884 | wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); |
cd7df56e JK |
885 | |
886 | return 0; | |
887 | } | |
888 | ||
2ca71441 | 889 | static int |
3dd43c33 JK |
890 | data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, |
891 | swreg lreg, swreg rreg, int size, enum cmd_mode mode) | |
2ca71441 JK |
892 | { |
893 | unsigned int i; | |
894 | u8 mask, sz; | |
895 | ||
3dd43c33 | 896 | /* We load the value from the address indicated in rreg + lreg and then |
2ca71441 JK |
897 | * mask out the data we don't need. Note: this is little endian! |
898 | */ | |
899 | sz = max(size, 4); | |
900 | mask = size < 4 ? GENMASK(size - 1, 0) : 0; | |
901 | ||
3dd43c33 | 902 | emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0, |
b556ddd9 | 903 | lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP); |
2ca71441 JK |
904 | |
905 | i = 0; | |
906 | if (mask) | |
907 | emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask, | |
908 | reg_xfer(0), SHF_SC_NONE, 0, true); | |
909 | else | |
910 | for (; i * 4 < size; i++) | |
911 | wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i)); | |
912 | ||
913 | if (i < 2) | |
914 | wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0); | |
915 | ||
916 | return 0; | |
917 | } | |
918 | ||
3dd43c33 JK |
919 | static int |
920 | data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, | |
921 | u8 dst_gpr, u8 size) | |
922 | { | |
923 | return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset, | |
924 | size, CMD_MODE_32b); | |
925 | } | |
926 | ||
927 | static int | |
928 | data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, | |
929 | u8 dst_gpr, u8 size) | |
930 | { | |
931 | swreg rega, regb; | |
932 | ||
933 | addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b); | |
934 | ||
935 | return data_ld_host_order(nfp_prog, dst_gpr, rega, regb, | |
936 | size, CMD_MODE_40b_BA); | |
937 | } | |
938 | ||
0a793977 JK |
939 | static int |
940 | construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) | |
941 | { | |
942 | swreg tmp_reg; | |
943 | ||
944 | /* Calculate the true offset (src_reg + imm) */ | |
945 | tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); | |
946 | emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg); | |
947 | ||
948 | /* Check packet length (size guaranteed to fit b/c it's u8) */ | |
949 | emit_alu(nfp_prog, imm_a(nfp_prog), | |
950 | imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); | |
951 | emit_alu(nfp_prog, reg_none(), | |
952 | plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); | |
e84797fe | 953 | emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); |
0a793977 JK |
954 | |
955 | /* Load data */ | |
956 | return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); | |
957 | } | |
958 | ||
cd7df56e JK |
959 | static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) |
960 | { | |
0a793977 JK |
961 | swreg tmp_reg; |
962 | ||
963 | /* Check packet length */ | |
964 | tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); | |
965 | emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); | |
e84797fe | 966 | emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); |
0a793977 JK |
967 | |
968 | /* Load data */ | |
969 | tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); | |
970 | return data_ld(nfp_prog, tmp_reg, 0, size); | |
cd7df56e JK |
971 | } |
972 | ||
e663fe38 JK |
973 | static int |
974 | data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, | |
975 | u8 src_gpr, u8 size) | |
976 | { | |
977 | unsigned int i; | |
978 | ||
979 | for (i = 0; i * 4 < size; i++) | |
980 | wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); | |
981 | ||
982 | emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, | |
b556ddd9 | 983 | reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); |
e663fe38 JK |
984 | |
985 | return 0; | |
986 | } | |
987 | ||
988 | static int | |
989 | data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, | |
990 | u64 imm, u8 size) | |
991 | { | |
992 | wrp_immed(nfp_prog, reg_xfer(0), imm); | |
993 | if (size == 8) | |
994 | wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); | |
995 | ||
996 | emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, | |
b556ddd9 | 997 | reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); |
e663fe38 JK |
998 | |
999 | return 0; | |
1000 | } | |
1001 | ||
ee9133a8 JK |
1002 | typedef int |
1003 | (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off, | |
2df03a50 JK |
1004 | unsigned int size, bool first, bool new_gpr, bool last, bool lm3, |
1005 | bool needs_inc); | |
a82b23fb JK |
1006 | |
1007 | static int | |
1008 | wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off, | |
2df03a50 JK |
1009 | unsigned int size, bool first, bool new_gpr, bool last, bool lm3, |
1010 | bool needs_inc) | |
a82b23fb | 1011 | { |
2df03a50 | 1012 | bool should_inc = needs_inc && new_gpr && !last; |
a82b23fb JK |
1013 | u32 idx, src_byte; |
1014 | enum shf_sc sc; | |
1015 | swreg reg; | |
1016 | int shf; | |
1017 | u8 mask; | |
1018 | ||
1019 | if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4)) | |
1020 | return -EOPNOTSUPP; | |
1021 | ||
1022 | idx = off / 4; | |
1023 | ||
1024 | /* Move the entire word */ | |
1025 | if (size == 4) { | |
2df03a50 JK |
1026 | wrp_mov(nfp_prog, reg_both(dst), |
1027 | should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx)); | |
a82b23fb JK |
1028 | return 0; |
1029 | } | |
1030 | ||
2df03a50 JK |
1031 | if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) |
1032 | return -EOPNOTSUPP; | |
1033 | ||
a82b23fb JK |
1034 | src_byte = off % 4; |
1035 | ||
1036 | mask = (1 << size) - 1; | |
1037 | mask <<= dst_byte; | |
1038 | ||
1039 | if (WARN_ON_ONCE(mask > 0xf)) | |
1040 | return -EOPNOTSUPP; | |
1041 | ||
1042 | shf = abs(src_byte - dst_byte) * 8; | |
1043 | if (src_byte == dst_byte) { | |
1044 | sc = SHF_SC_NONE; | |
1045 | } else if (src_byte < dst_byte) { | |
1046 | shf = 32 - shf; | |
1047 | sc = SHF_SC_L_SHF; | |
1048 | } else { | |
1049 | sc = SHF_SC_R_SHF; | |
1050 | } | |
1051 | ||
1052 | /* ld_field can address fewer indexes, if offset too large do RMW. | |
1053 | * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. | |
1054 | */ | |
1055 | if (idx <= RE_REG_LM_IDX_MAX) { | |
2df03a50 | 1056 | reg = reg_lm(lm3 ? 3 : 0, idx); |
a82b23fb JK |
1057 | } else { |
1058 | reg = imm_a(nfp_prog); | |
9a90c83c JK |
1059 | /* If it's not the first part of the load and we start a new GPR |
1060 | * that means we are loading a second part of the LMEM word into | |
1061 | * a new GPR. IOW we've already looked that LMEM word and | |
1062 | * therefore it has been loaded into imm_a(). | |
1063 | */ | |
1064 | if (first || !new_gpr) | |
1065 | wrp_mov(nfp_prog, reg, reg_lm(0, idx)); | |
a82b23fb JK |
1066 | } |
1067 | ||
1068 | emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr); | |
1069 | ||
2df03a50 JK |
1070 | if (should_inc) |
1071 | wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); | |
1072 | ||
a82b23fb JK |
1073 | return 0; |
1074 | } | |
ee9133a8 JK |
1075 | |
1076 | static int | |
1077 | wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off, | |
2df03a50 JK |
1078 | unsigned int size, bool first, bool new_gpr, bool last, bool lm3, |
1079 | bool needs_inc) | |
ee9133a8 | 1080 | { |
2df03a50 | 1081 | bool should_inc = needs_inc && new_gpr && !last; |
ee9133a8 JK |
1082 | u32 idx, dst_byte; |
1083 | enum shf_sc sc; | |
1084 | swreg reg; | |
1085 | int shf; | |
1086 | u8 mask; | |
1087 | ||
1088 | if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4)) | |
1089 | return -EOPNOTSUPP; | |
1090 | ||
1091 | idx = off / 4; | |
1092 | ||
1093 | /* Move the entire word */ | |
1094 | if (size == 4) { | |
2df03a50 JK |
1095 | wrp_mov(nfp_prog, |
1096 | should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx), | |
1097 | reg_b(src)); | |
ee9133a8 JK |
1098 | return 0; |
1099 | } | |
1100 | ||
2df03a50 JK |
1101 | if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX)) |
1102 | return -EOPNOTSUPP; | |
1103 | ||
ee9133a8 JK |
1104 | dst_byte = off % 4; |
1105 | ||
1106 | mask = (1 << size) - 1; | |
1107 | mask <<= dst_byte; | |
1108 | ||
1109 | if (WARN_ON_ONCE(mask > 0xf)) | |
1110 | return -EOPNOTSUPP; | |
1111 | ||
1112 | shf = abs(src_byte - dst_byte) * 8; | |
1113 | if (src_byte == dst_byte) { | |
1114 | sc = SHF_SC_NONE; | |
1115 | } else if (src_byte < dst_byte) { | |
1116 | shf = 32 - shf; | |
1117 | sc = SHF_SC_L_SHF; | |
1118 | } else { | |
1119 | sc = SHF_SC_R_SHF; | |
1120 | } | |
1121 | ||
1122 | /* ld_field can address fewer indexes, if offset too large do RMW. | |
1123 | * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes. | |
1124 | */ | |
1125 | if (idx <= RE_REG_LM_IDX_MAX) { | |
2df03a50 | 1126 | reg = reg_lm(lm3 ? 3 : 0, idx); |
ee9133a8 JK |
1127 | } else { |
1128 | reg = imm_a(nfp_prog); | |
9a90c83c JK |
1129 | /* Only first and last LMEM locations are going to need RMW, |
1130 | * the middle location will be overwritten fully. | |
1131 | */ | |
1132 | if (first || last) | |
1133 | wrp_mov(nfp_prog, reg, reg_lm(0, idx)); | |
ee9133a8 JK |
1134 | } |
1135 | ||
1136 | emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf); | |
1137 | ||
9a90c83c JK |
1138 | if (new_gpr || last) { |
1139 | if (idx > RE_REG_LM_IDX_MAX) | |
1140 | wrp_mov(nfp_prog, reg_lm(0, idx), reg); | |
2df03a50 JK |
1141 | if (should_inc) |
1142 | wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3)); | |
9a90c83c | 1143 | } |
ee9133a8 JK |
1144 | |
1145 | return 0; | |
1146 | } | |
1147 | ||
1148 | static int | |
1149 | mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
b14157ee JK |
1150 | unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr, |
1151 | bool clr_gpr, lmem_step step) | |
ee9133a8 | 1152 | { |
1a7e62e6 | 1153 | s32 off = nfp_prog->stack_frame_depth + meta->insn.off + ptr_off; |
9a90c83c | 1154 | bool first = true, last; |
2df03a50 JK |
1155 | bool needs_inc = false; |
1156 | swreg stack_off_reg; | |
a82b23fb | 1157 | u8 prev_gpr = 255; |
ee9133a8 | 1158 | u32 gpr_byte = 0; |
2df03a50 | 1159 | bool lm3 = true; |
ee9133a8 JK |
1160 | int ret; |
1161 | ||
7ff0ccde QM |
1162 | if (meta->ptr_not_const || |
1163 | meta->flags & FLAG_INSN_PTR_CALLER_STACK_FRAME) { | |
b14157ee JK |
1164 | /* Use of the last encountered ptr_off is OK, they all have |
1165 | * the same alignment. Depend on low bits of value being | |
1166 | * discarded when written to LMaddr register. | |
1167 | */ | |
1168 | stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off, | |
1169 | stack_imm(nfp_prog)); | |
1170 | ||
1171 | emit_alu(nfp_prog, imm_b(nfp_prog), | |
1172 | reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg); | |
1173 | ||
1174 | needs_inc = true; | |
1175 | } else if (off + size <= 64) { | |
2df03a50 JK |
1176 | /* We can reach bottom 64B with LMaddr0 */ |
1177 | lm3 = false; | |
1178 | } else if (round_down(off, 32) == round_down(off + size - 1, 32)) { | |
1179 | /* We have to set up a new pointer. If we know the offset | |
1180 | * and the entire access falls into a single 32 byte aligned | |
1181 | * window we won't have to increment the LM pointer. | |
1182 | * The 32 byte alignment is imporant because offset is ORed in | |
1183 | * not added when doing *l$indexN[off]. | |
1184 | */ | |
1185 | stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32), | |
1186 | stack_imm(nfp_prog)); | |
1187 | emit_alu(nfp_prog, imm_b(nfp_prog), | |
1188 | stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); | |
1189 | ||
1190 | off %= 32; | |
1191 | } else { | |
1192 | stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4), | |
1193 | stack_imm(nfp_prog)); | |
1194 | ||
1195 | emit_alu(nfp_prog, imm_b(nfp_prog), | |
1196 | stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg); | |
1197 | ||
1198 | needs_inc = true; | |
1199 | } | |
1200 | if (lm3) { | |
1201 | emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3); | |
1202 | /* For size < 4 one slot will be filled by zeroing of upper. */ | |
1203 | wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3); | |
1204 | } | |
1205 | ||
a82b23fb JK |
1206 | if (clr_gpr && size < 8) |
1207 | wrp_immed(nfp_prog, reg_both(gpr + 1), 0); | |
1208 | ||
ee9133a8 JK |
1209 | while (size) { |
1210 | u32 slice_end; | |
1211 | u8 slice_size; | |
1212 | ||
1213 | slice_size = min(size, 4 - gpr_byte); | |
1214 | slice_end = min(off + slice_size, round_up(off + 1, 4)); | |
1215 | slice_size = slice_end - off; | |
1216 | ||
9a90c83c JK |
1217 | last = slice_size == size; |
1218 | ||
2df03a50 JK |
1219 | if (needs_inc) |
1220 | off %= 4; | |
1221 | ||
a82b23fb | 1222 | ret = step(nfp_prog, gpr, gpr_byte, off, slice_size, |
2df03a50 | 1223 | first, gpr != prev_gpr, last, lm3, needs_inc); |
ee9133a8 JK |
1224 | if (ret) |
1225 | return ret; | |
1226 | ||
a82b23fb | 1227 | prev_gpr = gpr; |
9a90c83c JK |
1228 | first = false; |
1229 | ||
ee9133a8 JK |
1230 | gpr_byte += slice_size; |
1231 | if (gpr_byte >= 4) { | |
1232 | gpr_byte -= 4; | |
1233 | gpr++; | |
1234 | } | |
1235 | ||
1236 | size -= slice_size; | |
1237 | off += slice_size; | |
1238 | } | |
1239 | ||
1240 | return 0; | |
1241 | } | |
1242 | ||
cd7df56e JK |
1243 | static void |
1244 | wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) | |
1245 | { | |
b3f868df | 1246 | swreg tmp_reg; |
cd7df56e JK |
1247 | |
1248 | if (alu_op == ALU_OP_AND) { | |
1249 | if (!imm) | |
1250 | wrp_immed(nfp_prog, reg_both(dst), 0); | |
1251 | if (!imm || !~imm) | |
1252 | return; | |
1253 | } | |
1254 | if (alu_op == ALU_OP_OR) { | |
1255 | if (!~imm) | |
1256 | wrp_immed(nfp_prog, reg_both(dst), ~0U); | |
1257 | if (!imm || !~imm) | |
1258 | return; | |
1259 | } | |
1260 | if (alu_op == ALU_OP_XOR) { | |
1261 | if (!~imm) | |
1262 | emit_alu(nfp_prog, reg_both(dst), reg_none(), | |
5d42ced1 | 1263 | ALU_OP_NOT, reg_b(dst)); |
cd7df56e JK |
1264 | if (!imm || !~imm) |
1265 | return; | |
1266 | } | |
1267 | ||
1268 | tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); | |
1269 | emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); | |
1270 | } | |
1271 | ||
1272 | static int | |
1273 | wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
1274 | enum alu_op alu_op, bool skip) | |
1275 | { | |
1276 | const struct bpf_insn *insn = &meta->insn; | |
1277 | u64 imm = insn->imm; /* sign extend */ | |
1278 | ||
1279 | if (skip) { | |
91a87a58 | 1280 | meta->flags |= FLAG_INSN_SKIP_NOOP; |
cd7df56e JK |
1281 | return 0; |
1282 | } | |
1283 | ||
1284 | wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); | |
1285 | wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); | |
1286 | ||
1287 | return 0; | |
1288 | } | |
1289 | ||
1290 | static int | |
1291 | wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
1292 | enum alu_op alu_op) | |
1293 | { | |
1294 | u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; | |
1295 | ||
1296 | emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); | |
1297 | emit_alu(nfp_prog, reg_both(dst + 1), | |
1298 | reg_a(dst + 1), alu_op, reg_b(src + 1)); | |
1299 | ||
1300 | return 0; | |
1301 | } | |
1302 | ||
1303 | static int | |
1304 | wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
f036ebd9 | 1305 | enum alu_op alu_op) |
cd7df56e JK |
1306 | { |
1307 | const struct bpf_insn *insn = &meta->insn; | |
1308 | ||
cd7df56e JK |
1309 | wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); |
1310 | wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); | |
1311 | ||
1312 | return 0; | |
1313 | } | |
1314 | ||
1315 | static int | |
1316 | wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
1317 | enum alu_op alu_op) | |
1318 | { | |
1319 | u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; | |
1320 | ||
1321 | emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); | |
1322 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); | |
1323 | ||
1324 | return 0; | |
1325 | } | |
1326 | ||
1327 | static void | |
1328 | wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, | |
1329 | enum br_mask br_mask, u16 off) | |
1330 | { | |
1331 | emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); | |
1332 | emit_br(nfp_prog, br_mask, off, 0); | |
1333 | } | |
1334 | ||
1335 | static int | |
1336 | wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
1337 | enum alu_op alu_op, enum br_mask br_mask) | |
1338 | { | |
1339 | const struct bpf_insn *insn = &meta->insn; | |
1340 | ||
cd7df56e JK |
1341 | wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, |
1342 | insn->src_reg * 2, br_mask, insn->off); | |
46144839 JW |
1343 | if (is_mbpf_jmp64(meta)) |
1344 | wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, | |
1345 | insn->src_reg * 2 + 1, br_mask, insn->off); | |
cd7df56e JK |
1346 | |
1347 | return 0; | |
1348 | } | |
1349 | ||
61dd8f00 JK |
1350 | static const struct jmp_code_map { |
1351 | enum br_mask br_mask; | |
1352 | bool swap; | |
1353 | } jmp_code_map[] = { | |
1354 | [BPF_JGT >> 4] = { BR_BLO, true }, | |
1355 | [BPF_JGE >> 4] = { BR_BHS, false }, | |
1356 | [BPF_JLT >> 4] = { BR_BLO, false }, | |
1357 | [BPF_JLE >> 4] = { BR_BHS, true }, | |
1358 | [BPF_JSGT >> 4] = { BR_BLT, true }, | |
1359 | [BPF_JSGE >> 4] = { BR_BGE, false }, | |
1360 | [BPF_JSLT >> 4] = { BR_BLT, false }, | |
1361 | [BPF_JSLE >> 4] = { BR_BGE, true }, | |
1362 | }; | |
1363 | ||
1364 | static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta) | |
1365 | { | |
1366 | unsigned int op; | |
1367 | ||
1368 | op = BPF_OP(meta->insn.code) >> 4; | |
1369 | /* br_mask of 0 is BR_BEQ which we don't use in jump code table */ | |
1370 | if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) || | |
1371 | !jmp_code_map[op].br_mask, | |
1372 | "no code found for jump instruction")) | |
1373 | return NULL; | |
1374 | ||
1375 | return &jmp_code_map[op]; | |
1376 | } | |
1377 | ||
1378 | static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
cd7df56e JK |
1379 | { |
1380 | const struct bpf_insn *insn = &meta->insn; | |
1381 | u64 imm = insn->imm; /* sign extend */ | |
61dd8f00 | 1382 | const struct jmp_code_map *code; |
7bdc97be | 1383 | enum alu_op alu_op, carry_op; |
cd7df56e | 1384 | u8 reg = insn->dst_reg * 2; |
b3f868df | 1385 | swreg tmp_reg; |
cd7df56e | 1386 | |
61dd8f00 JK |
1387 | code = nfp_jmp_code_get(meta); |
1388 | if (!code) | |
1389 | return -EINVAL; | |
1390 | ||
7bdc97be JK |
1391 | alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB; |
1392 | carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C; | |
1393 | ||
cd7df56e | 1394 | tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); |
61dd8f00 | 1395 | if (!code->swap) |
7bdc97be | 1396 | emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg); |
cd7df56e | 1397 | else |
7bdc97be | 1398 | emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg)); |
cd7df56e | 1399 | |
46144839 JW |
1400 | if (is_mbpf_jmp64(meta)) { |
1401 | tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); | |
1402 | if (!code->swap) | |
1403 | emit_alu(nfp_prog, reg_none(), | |
1404 | reg_a(reg + 1), carry_op, tmp_reg); | |
1405 | else | |
1406 | emit_alu(nfp_prog, reg_none(), | |
1407 | tmp_reg, carry_op, reg_a(reg + 1)); | |
1408 | } | |
cd7df56e | 1409 | |
61dd8f00 | 1410 | emit_br(nfp_prog, code->br_mask, insn->off, 0); |
cd7df56e JK |
1411 | |
1412 | return 0; | |
1413 | } | |
1414 | ||
61dd8f00 | 1415 | static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
cd7df56e JK |
1416 | { |
1417 | const struct bpf_insn *insn = &meta->insn; | |
61dd8f00 | 1418 | const struct jmp_code_map *code; |
26fa818d JK |
1419 | u8 areg, breg; |
1420 | ||
61dd8f00 JK |
1421 | code = nfp_jmp_code_get(meta); |
1422 | if (!code) | |
1423 | return -EINVAL; | |
1424 | ||
26fa818d JK |
1425 | areg = insn->dst_reg * 2; |
1426 | breg = insn->src_reg * 2; | |
cd7df56e | 1427 | |
61dd8f00 | 1428 | if (code->swap) { |
cd7df56e JK |
1429 | areg ^= breg; |
1430 | breg ^= areg; | |
1431 | areg ^= breg; | |
1432 | } | |
1433 | ||
1434 | emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); | |
46144839 JW |
1435 | if (is_mbpf_jmp64(meta)) |
1436 | emit_alu(nfp_prog, reg_none(), | |
1437 | reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); | |
61dd8f00 | 1438 | emit_br(nfp_prog, code->br_mask, insn->off, 0); |
cd7df56e JK |
1439 | |
1440 | return 0; | |
1441 | } | |
1442 | ||
3119d1fd JK |
1443 | static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out) |
1444 | { | |
1445 | emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in, | |
1446 | SHF_SC_R_ROT, 8); | |
1447 | emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out), | |
1448 | SHF_SC_R_ROT, 16); | |
1449 | } | |
1450 | ||
d3d23fdb JW |
1451 | static void |
1452 | wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, | |
1453 | swreg rreg, bool gen_high_half) | |
1454 | { | |
1455 | emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); | |
1456 | emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg); | |
1457 | emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg); | |
1458 | emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg); | |
1459 | emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg); | |
1460 | emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none()); | |
1461 | if (gen_high_half) | |
1462 | emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2, | |
1463 | reg_none()); | |
1464 | else | |
1465 | wrp_immed(nfp_prog, dst_hi, 0); | |
1466 | } | |
1467 | ||
1468 | static void | |
1469 | wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg, | |
1470 | swreg rreg) | |
1471 | { | |
1472 | emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg); | |
1473 | emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg); | |
1474 | emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg); | |
1475 | emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none()); | |
1476 | } | |
1477 | ||
1478 | static int | |
1479 | wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
1480 | bool gen_high_half, bool ropnd_from_reg) | |
1481 | { | |
1482 | swreg multiplier, multiplicand, dst_hi, dst_lo; | |
1483 | const struct bpf_insn *insn = &meta->insn; | |
1484 | u32 lopnd_max, ropnd_max; | |
1485 | u8 dst_reg; | |
1486 | ||
1487 | dst_reg = insn->dst_reg; | |
1488 | multiplicand = reg_a(dst_reg * 2); | |
1489 | dst_hi = reg_both(dst_reg * 2 + 1); | |
1490 | dst_lo = reg_both(dst_reg * 2); | |
1491 | lopnd_max = meta->umax_dst; | |
1492 | if (ropnd_from_reg) { | |
1493 | multiplier = reg_b(insn->src_reg * 2); | |
1494 | ropnd_max = meta->umax_src; | |
1495 | } else { | |
1496 | u32 imm = insn->imm; | |
1497 | ||
1498 | multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); | |
1499 | ropnd_max = imm; | |
1500 | } | |
1501 | if (lopnd_max > U16_MAX || ropnd_max > U16_MAX) | |
1502 | wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier, | |
1503 | gen_high_half); | |
1504 | else | |
1505 | wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier); | |
1506 | ||
1507 | return 0; | |
1508 | } | |
1509 | ||
2a952b03 JW |
1510 | static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm) |
1511 | { | |
1512 | swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst); | |
9fb410a8 JW |
1513 | struct reciprocal_value_adv rvalue; |
1514 | u8 pre_shift, exp; | |
2a952b03 JW |
1515 | swreg magic; |
1516 | ||
1517 | if (imm > U32_MAX) { | |
1518 | wrp_immed(nfp_prog, dst_both, 0); | |
1519 | return 0; | |
1520 | } | |
1521 | ||
9fb410a8 JW |
1522 | /* NOTE: because we are using "reciprocal_value_adv" which doesn't |
1523 | * support "divisor > (1u << 31)", we need to JIT separate NFP sequence | |
1524 | * to handle such case which actually equals to the result of unsigned | |
1525 | * comparison "dst >= imm" which could be calculated using the following | |
1526 | * NFP sequence: | |
1527 | * | |
1528 | * alu[--, dst, -, imm] | |
1529 | * immed[imm, 0] | |
1530 | * alu[dst, imm, +carry, 0] | |
1531 | * | |
1532 | */ | |
1533 | if (imm > 1U << 31) { | |
1534 | swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); | |
1535 | ||
1536 | emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b); | |
1537 | wrp_immed(nfp_prog, imm_a(nfp_prog), 0); | |
1538 | emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C, | |
1539 | reg_imm(0)); | |
1540 | return 0; | |
1541 | } | |
1542 | ||
1543 | rvalue = reciprocal_value_adv(imm, 32); | |
1544 | exp = rvalue.exp; | |
1545 | if (rvalue.is_wide_m && !(imm & 1)) { | |
1546 | pre_shift = fls(imm & -imm) - 1; | |
1547 | rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift); | |
1548 | } else { | |
1549 | pre_shift = 0; | |
1550 | } | |
2a952b03 | 1551 | magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog)); |
9fb410a8 JW |
1552 | if (imm == 1U << exp) { |
1553 | emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, | |
1554 | SHF_SC_R_SHF, exp); | |
1555 | } else if (rvalue.is_wide_m) { | |
1556 | wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a, | |
1557 | magic, true); | |
1558 | emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB, | |
1559 | imm_b(nfp_prog)); | |
1560 | emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, | |
1561 | SHF_SC_R_SHF, 1); | |
1562 | emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD, | |
1563 | imm_b(nfp_prog)); | |
1564 | emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b, | |
1565 | SHF_SC_R_SHF, rvalue.sh - 1); | |
1566 | } else { | |
1567 | if (pre_shift) | |
1568 | emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, | |
1569 | dst_b, SHF_SC_R_SHF, pre_shift); | |
1570 | wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true); | |
1571 | emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, | |
1572 | dst_b, SHF_SC_R_SHF, rvalue.sh); | |
1573 | } | |
2a952b03 JW |
1574 | |
1575 | return 0; | |
1576 | } | |
1577 | ||
0d49eaf4 JK |
1578 | static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1579 | { | |
1580 | swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog); | |
1581 | struct nfp_bpf_cap_adjust_head *adjust_head; | |
1582 | u32 ret_einval, end; | |
1583 | ||
1584 | adjust_head = &nfp_prog->bpf->adjust_head; | |
1585 | ||
8231f844 JK |
1586 | /* Optimized version - 5 vs 14 cycles */ |
1587 | if (nfp_prog->adjust_head_location != UINT_MAX) { | |
1588 | if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n)) | |
1589 | return -EINVAL; | |
1590 | ||
1591 | emit_alu(nfp_prog, pptr_reg(nfp_prog), | |
1592 | reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog)); | |
1593 | emit_alu(nfp_prog, plen_reg(nfp_prog), | |
1594 | plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); | |
1595 | emit_alu(nfp_prog, pv_len(nfp_prog), | |
1596 | pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); | |
1597 | ||
1598 | wrp_immed(nfp_prog, reg_both(0), 0); | |
1599 | wrp_immed(nfp_prog, reg_both(1), 0); | |
1600 | ||
1601 | /* TODO: when adjust head is guaranteed to succeed we can | |
1602 | * also eliminate the following if (r0 == 0) branch. | |
1603 | */ | |
1604 | ||
1605 | return 0; | |
1606 | } | |
1607 | ||
0d49eaf4 JK |
1608 | ret_einval = nfp_prog_current_offset(nfp_prog) + 14; |
1609 | end = ret_einval + 2; | |
1610 | ||
1611 | /* We need to use a temp because offset is just a part of the pkt ptr */ | |
1612 | emit_alu(nfp_prog, tmp, | |
1613 | reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog)); | |
1614 | ||
1615 | /* Validate result will fit within FW datapath constraints */ | |
1616 | emit_alu(nfp_prog, reg_none(), | |
1617 | tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min)); | |
1618 | emit_br(nfp_prog, BR_BLO, ret_einval, 0); | |
1619 | emit_alu(nfp_prog, reg_none(), | |
1620 | reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp); | |
1621 | emit_br(nfp_prog, BR_BLO, ret_einval, 0); | |
1622 | ||
1623 | /* Validate the length is at least ETH_HLEN */ | |
1624 | emit_alu(nfp_prog, tmp_len, | |
1625 | plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); | |
1626 | emit_alu(nfp_prog, reg_none(), | |
1627 | tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN)); | |
1628 | emit_br(nfp_prog, BR_BMI, ret_einval, 0); | |
1629 | ||
1630 | /* Load the ret code */ | |
1631 | wrp_immed(nfp_prog, reg_both(0), 0); | |
1632 | wrp_immed(nfp_prog, reg_both(1), 0); | |
1633 | ||
1634 | /* Modify the packet metadata */ | |
1635 | emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); | |
1636 | ||
1637 | /* Skip over the -EINVAL ret code (defer 2) */ | |
2314fe9e | 1638 | emit_br(nfp_prog, BR_UNC, end, 2); |
0d49eaf4 JK |
1639 | |
1640 | emit_alu(nfp_prog, plen_reg(nfp_prog), | |
1641 | plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); | |
1642 | emit_alu(nfp_prog, pv_len(nfp_prog), | |
1643 | pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); | |
1644 | ||
1645 | /* return -EINVAL target */ | |
1646 | if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) | |
1647 | return -EINVAL; | |
1648 | ||
1649 | wrp_immed(nfp_prog, reg_both(0), -22); | |
1650 | wrp_immed(nfp_prog, reg_both(1), ~0); | |
1651 | ||
1652 | if (!nfp_prog_confirm_current_offset(nfp_prog, end)) | |
1653 | return -EINVAL; | |
1654 | ||
1655 | return 0; | |
1656 | } | |
1657 | ||
0c261593 JK |
1658 | static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1659 | { | |
1660 | u32 ret_einval, end; | |
1661 | swreg plen, delta; | |
1662 | ||
1663 | BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN)); | |
1664 | ||
1665 | plen = imm_a(nfp_prog); | |
1666 | delta = reg_a(2 * 2); | |
1667 | ||
1668 | ret_einval = nfp_prog_current_offset(nfp_prog) + 9; | |
1669 | end = nfp_prog_current_offset(nfp_prog) + 11; | |
1670 | ||
1671 | /* Calculate resulting length */ | |
1672 | emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta); | |
1673 | /* delta == 0 is not allowed by the kernel, add must overflow to make | |
1674 | * length smaller. | |
1675 | */ | |
1676 | emit_br(nfp_prog, BR_BCC, ret_einval, 0); | |
1677 | ||
1678 | /* if (new_len < 14) then -EINVAL */ | |
1679 | emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN)); | |
1680 | emit_br(nfp_prog, BR_BMI, ret_einval, 0); | |
1681 | ||
1682 | emit_alu(nfp_prog, plen_reg(nfp_prog), | |
1683 | plen_reg(nfp_prog), ALU_OP_ADD, delta); | |
1684 | emit_alu(nfp_prog, pv_len(nfp_prog), | |
1685 | pv_len(nfp_prog), ALU_OP_ADD, delta); | |
1686 | ||
1687 | emit_br(nfp_prog, BR_UNC, end, 2); | |
1688 | wrp_immed(nfp_prog, reg_both(0), 0); | |
1689 | wrp_immed(nfp_prog, reg_both(1), 0); | |
1690 | ||
1691 | if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval)) | |
1692 | return -EINVAL; | |
1693 | ||
1694 | wrp_immed(nfp_prog, reg_both(0), -22); | |
1695 | wrp_immed(nfp_prog, reg_both(1), ~0); | |
1696 | ||
1697 | if (!nfp_prog_confirm_current_offset(nfp_prog, end)) | |
1698 | return -EINVAL; | |
1699 | ||
1700 | return 0; | |
1701 | } | |
1702 | ||
77a3d311 | 1703 | static int |
fc448497 | 1704 | map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
77a3d311 | 1705 | { |
77a3d311 JK |
1706 | bool load_lm_ptr; |
1707 | u32 ret_tgt; | |
1708 | s64 lm_off; | |
77a3d311 JK |
1709 | |
1710 | /* We only have to reload LM0 if the key is not at start of stack */ | |
1a7e62e6 | 1711 | lm_off = nfp_prog->stack_frame_depth; |
2f46e0c1 JK |
1712 | lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off; |
1713 | load_lm_ptr = meta->arg2.var_off || lm_off; | |
77a3d311 JK |
1714 | |
1715 | /* Set LM0 to start of key */ | |
1716 | if (load_lm_ptr) | |
1717 | emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0); | |
44d65a47 JK |
1718 | if (meta->func_id == BPF_FUNC_map_update_elem) |
1719 | emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); | |
77a3d311 | 1720 | |
fc448497 | 1721 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, |
77a3d311 JK |
1722 | 2, RELO_BR_HELPER); |
1723 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; | |
1724 | ||
1725 | /* Load map ID into A0 */ | |
b4264c96 | 1726 | wrp_mov(nfp_prog, reg_a(0), reg_a(2)); |
77a3d311 JK |
1727 | |
1728 | /* Load the return address into B0 */ | |
1729 | wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); | |
1730 | ||
1731 | if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) | |
1732 | return -EINVAL; | |
1733 | ||
1734 | /* Reset the LM0 pointer */ | |
1735 | if (!load_lm_ptr) | |
1736 | return 0; | |
1737 | ||
9c9e5323 | 1738 | emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0); |
77a3d311 JK |
1739 | wrp_nops(nfp_prog, 3); |
1740 | ||
1741 | return 0; | |
1742 | } | |
1743 | ||
df4a37d8 JK |
1744 | static int |
1745 | nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1746 | { | |
1747 | __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM); | |
1748 | /* CSR value is read in following immed[gpr, 0] */ | |
1749 | emit_immed(nfp_prog, reg_both(0), 0, | |
1750 | IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); | |
1751 | emit_immed(nfp_prog, reg_both(1), 0, | |
1752 | IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); | |
1753 | return 0; | |
1754 | } | |
1755 | ||
9816dd35 JK |
1756 | static int |
1757 | nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1758 | { | |
1759 | swreg ptr_type; | |
1760 | u32 ret_tgt; | |
1761 | ||
1762 | ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog)); | |
1763 | ||
1764 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; | |
1765 | ||
1766 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, | |
1767 | 2, RELO_BR_HELPER); | |
1768 | ||
1769 | /* Load ptr type into A1 */ | |
1770 | wrp_mov(nfp_prog, reg_a(1), ptr_type); | |
1771 | ||
1772 | /* Load the return address into B0 */ | |
1773 | wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL); | |
1774 | ||
1775 | if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) | |
1776 | return -EINVAL; | |
1777 | ||
1778 | return 0; | |
1779 | } | |
1780 | ||
d985888f JK |
1781 | static int |
1782 | nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1783 | { | |
1784 | u32 jmp_tgt; | |
1785 | ||
1786 | jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5; | |
1787 | ||
1788 | /* Make sure the queue id fits into FW field */ | |
1789 | emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2), | |
1790 | ALU_OP_AND_NOT_B, reg_imm(0xff)); | |
1791 | emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2); | |
1792 | ||
1793 | /* Set the 'queue selected' bit and the queue value */ | |
1794 | emit_shf(nfp_prog, pv_qsel_set(nfp_prog), | |
1795 | pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1), | |
1796 | SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT); | |
1797 | emit_ld_field(nfp_prog, | |
1798 | pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2), | |
1799 | SHF_SC_NONE, 0); | |
1800 | /* Delay slots end here, we will jump over next instruction if queue | |
1801 | * value fits into the field. | |
1802 | */ | |
1803 | emit_ld_field(nfp_prog, | |
1804 | pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX), | |
1805 | SHF_SC_NONE, 0); | |
1806 | ||
1807 | if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt)) | |
1808 | return -EINVAL; | |
1809 | ||
1810 | return 0; | |
1811 | } | |
1812 | ||
cd7df56e JK |
1813 | /* --- Callbacks --- */ |
1814 | static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1815 | { | |
1816 | const struct bpf_insn *insn = &meta->insn; | |
b14157ee JK |
1817 | u8 dst = insn->dst_reg * 2; |
1818 | u8 src = insn->src_reg * 2; | |
1819 | ||
1820 | if (insn->src_reg == BPF_REG_10) { | |
1821 | swreg stack_depth_reg; | |
cd7df56e | 1822 | |
b14157ee | 1823 | stack_depth_reg = ur_load_imm_any(nfp_prog, |
1a7e62e6 | 1824 | nfp_prog->stack_frame_depth, |
b14157ee | 1825 | stack_imm(nfp_prog)); |
1a7e62e6 QM |
1826 | emit_alu(nfp_prog, reg_both(dst), stack_reg(nfp_prog), |
1827 | ALU_OP_ADD, stack_depth_reg); | |
b14157ee JK |
1828 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); |
1829 | } else { | |
1830 | wrp_reg_mov(nfp_prog, dst, src); | |
1831 | wrp_reg_mov(nfp_prog, dst + 1, src + 1); | |
1832 | } | |
cd7df56e JK |
1833 | |
1834 | return 0; | |
1835 | } | |
1836 | ||
1837 | static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1838 | { | |
1839 | u64 imm = meta->insn.imm; /* sign extend */ | |
1840 | ||
1841 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); | |
1842 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); | |
1843 | ||
1844 | return 0; | |
1845 | } | |
1846 | ||
1847 | static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1848 | { | |
1849 | return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); | |
1850 | } | |
1851 | ||
1852 | static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1853 | { | |
1854 | return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); | |
1855 | } | |
1856 | ||
1857 | static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1858 | { | |
1859 | return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); | |
1860 | } | |
1861 | ||
1862 | static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1863 | { | |
1864 | return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); | |
1865 | } | |
1866 | ||
1867 | static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1868 | { | |
1869 | return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); | |
1870 | } | |
1871 | ||
1872 | static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1873 | { | |
1874 | return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); | |
1875 | } | |
1876 | ||
1877 | static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1878 | { | |
1879 | const struct bpf_insn *insn = &meta->insn; | |
1880 | ||
1881 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), | |
1882 | reg_a(insn->dst_reg * 2), ALU_OP_ADD, | |
1883 | reg_b(insn->src_reg * 2)); | |
1884 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), | |
1885 | reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, | |
1886 | reg_b(insn->src_reg * 2 + 1)); | |
1887 | ||
1888 | return 0; | |
1889 | } | |
1890 | ||
1891 | static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1892 | { | |
1893 | const struct bpf_insn *insn = &meta->insn; | |
1894 | u64 imm = insn->imm; /* sign extend */ | |
1895 | ||
1896 | wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); | |
1897 | wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); | |
1898 | ||
1899 | return 0; | |
1900 | } | |
1901 | ||
1902 | static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1903 | { | |
1904 | const struct bpf_insn *insn = &meta->insn; | |
1905 | ||
1906 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), | |
1907 | reg_a(insn->dst_reg * 2), ALU_OP_SUB, | |
1908 | reg_b(insn->src_reg * 2)); | |
1909 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), | |
1910 | reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, | |
1911 | reg_b(insn->src_reg * 2 + 1)); | |
1912 | ||
1913 | return 0; | |
1914 | } | |
1915 | ||
1916 | static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1917 | { | |
1918 | const struct bpf_insn *insn = &meta->insn; | |
1919 | u64 imm = insn->imm; /* sign extend */ | |
1920 | ||
1921 | wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); | |
1922 | wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); | |
1923 | ||
1924 | return 0; | |
1925 | } | |
1926 | ||
d3d23fdb JW |
1927 | static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1928 | { | |
1929 | return wrp_mul(nfp_prog, meta, true, true); | |
1930 | } | |
1931 | ||
1932 | static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1933 | { | |
1934 | return wrp_mul(nfp_prog, meta, true, false); | |
1935 | } | |
1936 | ||
2a952b03 JW |
1937 | static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1938 | { | |
1939 | const struct bpf_insn *insn = &meta->insn; | |
1940 | ||
1941 | return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm); | |
1942 | } | |
1943 | ||
1944 | static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
1945 | { | |
1946 | /* NOTE: verifier hook has rejected cases for which verifier doesn't | |
1947 | * know whether the source operand is constant or not. | |
1948 | */ | |
1949 | return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src); | |
1950 | } | |
1951 | ||
254ef4d7 JW |
1952 | static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
1953 | { | |
1954 | const struct bpf_insn *insn = &meta->insn; | |
1955 | ||
1956 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0), | |
1957 | ALU_OP_SUB, reg_b(insn->dst_reg * 2)); | |
1958 | emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0), | |
1959 | ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1)); | |
1960 | ||
1961 | return 0; | |
1962 | } | |
1963 | ||
991f5b36 JW |
1964 | /* Pseudo code: |
1965 | * if shift_amt >= 32 | |
1966 | * dst_high = dst_low << shift_amt[4:0] | |
1967 | * dst_low = 0; | |
1968 | * else | |
1969 | * dst_high = (dst_high, dst_low) >> (32 - shift_amt) | |
1970 | * dst_low = dst_low << shift_amt | |
1971 | * | |
1972 | * The indirect shift will use the same logic at runtime. | |
1973 | */ | |
1974 | static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) | |
1975 | { | |
db0a4b3b JW |
1976 | if (!shift_amt) |
1977 | return 0; | |
1978 | ||
991f5b36 JW |
1979 | if (shift_amt < 32) { |
1980 | emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), | |
1981 | SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF, | |
1982 | 32 - shift_amt); | |
1983 | emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, | |
1984 | reg_b(dst), SHF_SC_L_SHF, shift_amt); | |
1985 | } else if (shift_amt == 32) { | |
3cae1319 JK |
1986 | wrp_reg_mov(nfp_prog, dst + 1, dst); |
1987 | wrp_immed(nfp_prog, reg_both(dst), 0); | |
991f5b36 JW |
1988 | } else if (shift_amt > 32) { |
1989 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, | |
1990 | reg_b(dst), SHF_SC_L_SHF, shift_amt - 32); | |
3cae1319 JK |
1991 | wrp_immed(nfp_prog, reg_both(dst), 0); |
1992 | } | |
cd7df56e JK |
1993 | |
1994 | return 0; | |
1995 | } | |
1996 | ||
991f5b36 | 1997 | static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
cd7df56e JK |
1998 | { |
1999 | const struct bpf_insn *insn = &meta->insn; | |
3cae1319 JK |
2000 | u8 dst = insn->dst_reg * 2; |
2001 | ||
991f5b36 JW |
2002 | return __shl_imm64(nfp_prog, dst, insn->imm); |
2003 | } | |
2004 | ||
2005 | static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2006 | { | |
2007 | emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB, | |
2008 | reg_b(src)); | |
2009 | emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0)); | |
2010 | emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE, | |
2011 | reg_b(dst), SHF_SC_R_DSHF); | |
2012 | } | |
2013 | ||
2014 | /* NOTE: for indirect left shift, HIGH part should be calculated first. */ | |
2015 | static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2016 | { | |
2017 | emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); | |
2018 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, | |
2019 | reg_b(dst), SHF_SC_L_SHF); | |
2020 | } | |
2021 | ||
2022 | static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2023 | { | |
2024 | shl_reg64_lt32_high(nfp_prog, dst, src); | |
2025 | shl_reg64_lt32_low(nfp_prog, dst, src); | |
2026 | } | |
2027 | ||
2028 | static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2029 | { | |
2030 | emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); | |
2031 | emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, | |
2032 | reg_b(dst), SHF_SC_L_SHF); | |
2033 | wrp_immed(nfp_prog, reg_both(dst), 0); | |
2034 | } | |
2035 | ||
2036 | static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2037 | { | |
2038 | const struct bpf_insn *insn = &meta->insn; | |
2039 | u64 umin, umax; | |
2040 | u8 dst, src; | |
2041 | ||
2042 | dst = insn->dst_reg * 2; | |
662c5472 JW |
2043 | umin = meta->umin_src; |
2044 | umax = meta->umax_src; | |
991f5b36 JW |
2045 | if (umin == umax) |
2046 | return __shl_imm64(nfp_prog, dst, umin); | |
2047 | ||
2048 | src = insn->src_reg * 2; | |
2049 | if (umax < 32) { | |
2050 | shl_reg64_lt32(nfp_prog, dst, src); | |
2051 | } else if (umin >= 32) { | |
2052 | shl_reg64_ge32(nfp_prog, dst, src); | |
2053 | } else { | |
2054 | /* Generate different instruction sequences depending on runtime | |
2055 | * value of shift amount. | |
2056 | */ | |
2057 | u16 label_ge32, label_end; | |
2058 | ||
2059 | label_ge32 = nfp_prog_current_offset(nfp_prog) + 7; | |
2060 | emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); | |
2061 | ||
2062 | shl_reg64_lt32_high(nfp_prog, dst, src); | |
2063 | label_end = nfp_prog_current_offset(nfp_prog) + 6; | |
2064 | emit_br(nfp_prog, BR_UNC, label_end, 2); | |
2065 | /* shl_reg64_lt32_low packed in delay slot. */ | |
2066 | shl_reg64_lt32_low(nfp_prog, dst, src); | |
2067 | ||
2068 | if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) | |
2069 | return -EINVAL; | |
2070 | shl_reg64_ge32(nfp_prog, dst, src); | |
2071 | ||
2072 | if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) | |
2073 | return -EINVAL; | |
2074 | } | |
2075 | ||
2076 | return 0; | |
2077 | } | |
2078 | ||
2079 | /* Pseudo code: | |
2080 | * if shift_amt >= 32 | |
2081 | * dst_high = 0; | |
2082 | * dst_low = dst_high >> shift_amt[4:0] | |
2083 | * else | |
2084 | * dst_high = dst_high >> shift_amt | |
2085 | * dst_low = (dst_high, dst_low) >> shift_amt | |
2086 | * | |
2087 | * The indirect shift will use the same logic at runtime. | |
2088 | */ | |
2089 | static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) | |
2090 | { | |
db0a4b3b JW |
2091 | if (!shift_amt) |
2092 | return 0; | |
2093 | ||
991f5b36 JW |
2094 | if (shift_amt < 32) { |
2095 | emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, | |
2096 | reg_b(dst), SHF_SC_R_DSHF, shift_amt); | |
2097 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, | |
2098 | reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); | |
2099 | } else if (shift_amt == 32) { | |
3cae1319 JK |
2100 | wrp_reg_mov(nfp_prog, dst, dst + 1); |
2101 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); | |
991f5b36 JW |
2102 | } else if (shift_amt > 32) { |
2103 | emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, | |
2104 | reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); | |
3cae1319 JK |
2105 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); |
2106 | } | |
cd7df56e JK |
2107 | |
2108 | return 0; | |
2109 | } | |
2110 | ||
991f5b36 JW |
2111 | static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2112 | { | |
2113 | const struct bpf_insn *insn = &meta->insn; | |
2114 | u8 dst = insn->dst_reg * 2; | |
2115 | ||
2116 | return __shr_imm64(nfp_prog, dst, insn->imm); | |
2117 | } | |
2118 | ||
2119 | /* NOTE: for indirect right shift, LOW part should be calculated first. */ | |
2120 | static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2121 | { | |
2122 | emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); | |
2123 | emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE, | |
2124 | reg_b(dst + 1), SHF_SC_R_SHF); | |
2125 | } | |
2126 | ||
2127 | static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2128 | { | |
2129 | emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); | |
2130 | emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, | |
2131 | reg_b(dst), SHF_SC_R_DSHF); | |
2132 | } | |
2133 | ||
2134 | static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2135 | { | |
2136 | shr_reg64_lt32_low(nfp_prog, dst, src); | |
2137 | shr_reg64_lt32_high(nfp_prog, dst, src); | |
2138 | } | |
2139 | ||
2140 | static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2141 | { | |
2142 | emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); | |
2143 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, | |
2144 | reg_b(dst + 1), SHF_SC_R_SHF); | |
2145 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); | |
2146 | } | |
2147 | ||
2148 | static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2149 | { | |
2150 | const struct bpf_insn *insn = &meta->insn; | |
2151 | u64 umin, umax; | |
2152 | u8 dst, src; | |
2153 | ||
2154 | dst = insn->dst_reg * 2; | |
662c5472 JW |
2155 | umin = meta->umin_src; |
2156 | umax = meta->umax_src; | |
991f5b36 JW |
2157 | if (umin == umax) |
2158 | return __shr_imm64(nfp_prog, dst, umin); | |
2159 | ||
2160 | src = insn->src_reg * 2; | |
2161 | if (umax < 32) { | |
2162 | shr_reg64_lt32(nfp_prog, dst, src); | |
2163 | } else if (umin >= 32) { | |
2164 | shr_reg64_ge32(nfp_prog, dst, src); | |
2165 | } else { | |
2166 | /* Generate different instruction sequences depending on runtime | |
2167 | * value of shift amount. | |
2168 | */ | |
2169 | u16 label_ge32, label_end; | |
2170 | ||
2171 | label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; | |
2172 | emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); | |
2173 | shr_reg64_lt32_low(nfp_prog, dst, src); | |
2174 | label_end = nfp_prog_current_offset(nfp_prog) + 6; | |
2175 | emit_br(nfp_prog, BR_UNC, label_end, 2); | |
2176 | /* shr_reg64_lt32_high packed in delay slot. */ | |
2177 | shr_reg64_lt32_high(nfp_prog, dst, src); | |
2178 | ||
2179 | if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) | |
2180 | return -EINVAL; | |
2181 | shr_reg64_ge32(nfp_prog, dst, src); | |
2182 | ||
2183 | if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) | |
2184 | return -EINVAL; | |
2185 | } | |
2186 | ||
2187 | return 0; | |
2188 | } | |
2189 | ||
f43d0f17 JW |
2190 | /* Code logic is the same as __shr_imm64 except ashr requires signedness bit |
2191 | * told through PREV_ALU result. | |
2192 | */ | |
c217abcc | 2193 | static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) |
f43d0f17 | 2194 | { |
db0a4b3b JW |
2195 | if (!shift_amt) |
2196 | return 0; | |
2197 | ||
c217abcc | 2198 | if (shift_amt < 32) { |
f43d0f17 | 2199 | emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE, |
c217abcc | 2200 | reg_b(dst), SHF_SC_R_DSHF, shift_amt); |
f43d0f17 JW |
2201 | /* Set signedness bit. */ |
2202 | emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, | |
2203 | reg_imm(0)); | |
2204 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, | |
c217abcc JW |
2205 | reg_b(dst + 1), SHF_SC_R_SHF, shift_amt); |
2206 | } else if (shift_amt == 32) { | |
f43d0f17 JW |
2207 | /* NOTE: this also helps setting signedness bit. */ |
2208 | wrp_reg_mov(nfp_prog, dst, dst + 1); | |
2209 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, | |
2210 | reg_b(dst + 1), SHF_SC_R_SHF, 31); | |
c217abcc | 2211 | } else if (shift_amt > 32) { |
f43d0f17 JW |
2212 | emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR, |
2213 | reg_imm(0)); | |
2214 | emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, | |
c217abcc | 2215 | reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32); |
f43d0f17 JW |
2216 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, |
2217 | reg_b(dst + 1), SHF_SC_R_SHF, 31); | |
2218 | } | |
2219 | ||
2220 | return 0; | |
2221 | } | |
2222 | ||
c217abcc JW |
2223 | static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2224 | { | |
2225 | const struct bpf_insn *insn = &meta->insn; | |
2226 | u8 dst = insn->dst_reg * 2; | |
2227 | ||
2228 | return __ashr_imm64(nfp_prog, dst, insn->imm); | |
2229 | } | |
2230 | ||
2231 | static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2232 | { | |
2233 | /* NOTE: the first insn will set both indirect shift amount (source A) | |
2234 | * and signedness bit (MSB of result). | |
2235 | */ | |
2236 | emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); | |
2237 | emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, | |
2238 | reg_b(dst + 1), SHF_SC_R_SHF); | |
2239 | } | |
2240 | ||
2241 | static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2242 | { | |
2243 | /* NOTE: it is the same as logic shift because we don't need to shift in | |
2244 | * signedness bit when the shift amount is less than 32. | |
2245 | */ | |
2246 | return shr_reg64_lt32_low(nfp_prog, dst, src); | |
2247 | } | |
2248 | ||
2249 | static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2250 | { | |
2251 | ashr_reg64_lt32_low(nfp_prog, dst, src); | |
2252 | ashr_reg64_lt32_high(nfp_prog, dst, src); | |
2253 | } | |
2254 | ||
2255 | static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src) | |
2256 | { | |
2257 | emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1)); | |
2258 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, | |
2259 | reg_b(dst + 1), SHF_SC_R_SHF); | |
2260 | emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR, | |
2261 | reg_b(dst + 1), SHF_SC_R_SHF, 31); | |
2262 | } | |
2263 | ||
2264 | /* Like ashr_imm64, but need to use indirect shift. */ | |
2265 | static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2266 | { | |
2267 | const struct bpf_insn *insn = &meta->insn; | |
2268 | u64 umin, umax; | |
2269 | u8 dst, src; | |
2270 | ||
2271 | dst = insn->dst_reg * 2; | |
662c5472 JW |
2272 | umin = meta->umin_src; |
2273 | umax = meta->umax_src; | |
c217abcc JW |
2274 | if (umin == umax) |
2275 | return __ashr_imm64(nfp_prog, dst, umin); | |
2276 | ||
2277 | src = insn->src_reg * 2; | |
2278 | if (umax < 32) { | |
2279 | ashr_reg64_lt32(nfp_prog, dst, src); | |
2280 | } else if (umin >= 32) { | |
2281 | ashr_reg64_ge32(nfp_prog, dst, src); | |
2282 | } else { | |
2283 | u16 label_ge32, label_end; | |
2284 | ||
2285 | label_ge32 = nfp_prog_current_offset(nfp_prog) + 6; | |
2286 | emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0); | |
2287 | ashr_reg64_lt32_low(nfp_prog, dst, src); | |
2288 | label_end = nfp_prog_current_offset(nfp_prog) + 6; | |
2289 | emit_br(nfp_prog, BR_UNC, label_end, 2); | |
2290 | /* ashr_reg64_lt32_high packed in delay slot. */ | |
2291 | ashr_reg64_lt32_high(nfp_prog, dst, src); | |
2292 | ||
2293 | if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32)) | |
2294 | return -EINVAL; | |
2295 | ashr_reg64_ge32(nfp_prog, dst, src); | |
2296 | ||
2297 | if (!nfp_prog_confirm_current_offset(nfp_prog, label_end)) | |
2298 | return -EINVAL; | |
2299 | } | |
2300 | ||
2301 | return 0; | |
2302 | } | |
2303 | ||
cd7df56e JK |
2304 | static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2305 | { | |
2306 | const struct bpf_insn *insn = &meta->insn; | |
2307 | ||
2308 | wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); | |
2309 | wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); | |
2310 | ||
2311 | return 0; | |
2312 | } | |
2313 | ||
2314 | static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2315 | { | |
2316 | const struct bpf_insn *insn = &meta->insn; | |
2317 | ||
2318 | wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); | |
2319 | wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); | |
2320 | ||
2321 | return 0; | |
2322 | } | |
2323 | ||
2324 | static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2325 | { | |
2326 | return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); | |
2327 | } | |
2328 | ||
2329 | static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2330 | { | |
f036ebd9 | 2331 | return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR); |
cd7df56e JK |
2332 | } |
2333 | ||
2334 | static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2335 | { | |
2336 | return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); | |
2337 | } | |
2338 | ||
2339 | static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2340 | { | |
f036ebd9 | 2341 | return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND); |
cd7df56e JK |
2342 | } |
2343 | ||
2344 | static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2345 | { | |
2346 | return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); | |
2347 | } | |
2348 | ||
2349 | static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2350 | { | |
f036ebd9 | 2351 | return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR); |
cd7df56e JK |
2352 | } |
2353 | ||
2354 | static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2355 | { | |
2356 | return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); | |
2357 | } | |
2358 | ||
2359 | static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2360 | { | |
f036ebd9 | 2361 | return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD); |
cd7df56e JK |
2362 | } |
2363 | ||
2364 | static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2365 | { | |
2366 | return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); | |
2367 | } | |
2368 | ||
2369 | static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2370 | { | |
f036ebd9 | 2371 | return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB); |
cd7df56e JK |
2372 | } |
2373 | ||
d3d23fdb JW |
2374 | static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2375 | { | |
2376 | return wrp_mul(nfp_prog, meta, false, true); | |
2377 | } | |
2378 | ||
2379 | static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2380 | { | |
2381 | return wrp_mul(nfp_prog, meta, false, false); | |
2382 | } | |
2383 | ||
2a952b03 JW |
2384 | static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2385 | { | |
2386 | return div_reg64(nfp_prog, meta); | |
2387 | } | |
2388 | ||
2389 | static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2390 | { | |
2391 | return div_imm64(nfp_prog, meta); | |
2392 | } | |
2393 | ||
254ef4d7 JW |
2394 | static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2395 | { | |
2396 | u8 dst = meta->insn.dst_reg * 2; | |
2397 | ||
2398 | emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst)); | |
2399 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); | |
2400 | ||
2401 | return 0; | |
2402 | } | |
2403 | ||
84708c13 JW |
2404 | static int __ashr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) |
2405 | { | |
db0a4b3b JW |
2406 | if (shift_amt) { |
2407 | /* Set signedness bit (MSB of result). */ | |
2408 | emit_alu(nfp_prog, reg_none(), reg_a(dst), ALU_OP_OR, | |
2409 | reg_imm(0)); | |
2410 | emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, | |
2411 | reg_b(dst), SHF_SC_R_SHF, shift_amt); | |
2412 | } | |
84708c13 JW |
2413 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); |
2414 | ||
2415 | return 0; | |
2416 | } | |
2417 | ||
2418 | static int ashr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2419 | { | |
2420 | const struct bpf_insn *insn = &meta->insn; | |
2421 | u64 umin, umax; | |
2422 | u8 dst, src; | |
2423 | ||
2424 | dst = insn->dst_reg * 2; | |
2425 | umin = meta->umin_src; | |
2426 | umax = meta->umax_src; | |
2427 | if (umin == umax) | |
2428 | return __ashr_imm(nfp_prog, dst, umin); | |
2429 | ||
2430 | src = insn->src_reg * 2; | |
2431 | /* NOTE: the first insn will set both indirect shift amount (source A) | |
2432 | * and signedness bit (MSB of result). | |
2433 | */ | |
2434 | emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst)); | |
2435 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR, | |
2436 | reg_b(dst), SHF_SC_R_SHF); | |
2437 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); | |
2438 | ||
2439 | return 0; | |
2440 | } | |
2441 | ||
2442 | static int ashr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2443 | { | |
2444 | const struct bpf_insn *insn = &meta->insn; | |
2445 | u8 dst = insn->dst_reg * 2; | |
2446 | ||
2447 | return __ashr_imm(nfp_prog, dst, insn->imm); | |
2448 | } | |
2449 | ||
ac7a1717 JW |
2450 | static int __shr_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) |
2451 | { | |
2452 | if (shift_amt) | |
2453 | emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, | |
2454 | reg_b(dst), SHF_SC_R_SHF, shift_amt); | |
2455 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); | |
2456 | return 0; | |
2457 | } | |
2458 | ||
2459 | static int shr_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2460 | { | |
2461 | const struct bpf_insn *insn = &meta->insn; | |
2462 | u8 dst = insn->dst_reg * 2; | |
2463 | ||
2464 | return __shr_imm(nfp_prog, dst, insn->imm); | |
2465 | } | |
2466 | ||
2467 | static int shr_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2468 | { | |
2469 | const struct bpf_insn *insn = &meta->insn; | |
2470 | u64 umin, umax; | |
2471 | u8 dst, src; | |
2472 | ||
2473 | dst = insn->dst_reg * 2; | |
2474 | umin = meta->umin_src; | |
2475 | umax = meta->umax_src; | |
2476 | if (umin == umax) | |
2477 | return __shr_imm(nfp_prog, dst, umin); | |
2478 | ||
2479 | src = insn->src_reg * 2; | |
2480 | emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0)); | |
2481 | emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, | |
2482 | reg_b(dst), SHF_SC_R_SHF); | |
2483 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); | |
2484 | return 0; | |
2485 | } | |
2486 | ||
2487 | static int __shl_imm(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt) | |
2488 | { | |
2489 | if (shift_amt) | |
2490 | emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE, | |
2491 | reg_b(dst), SHF_SC_L_SHF, shift_amt); | |
2492 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); | |
2493 | return 0; | |
2494 | } | |
2495 | ||
cd7df56e JK |
2496 | static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2497 | { | |
2498 | const struct bpf_insn *insn = &meta->insn; | |
ac7a1717 | 2499 | u8 dst = insn->dst_reg * 2; |
cd7df56e | 2500 | |
ac7a1717 JW |
2501 | return __shl_imm(nfp_prog, dst, insn->imm); |
2502 | } | |
cd7df56e | 2503 | |
ac7a1717 JW |
2504 | static int shl_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2505 | { | |
2506 | const struct bpf_insn *insn = &meta->insn; | |
2507 | u64 umin, umax; | |
2508 | u8 dst, src; | |
2509 | ||
2510 | dst = insn->dst_reg * 2; | |
2511 | umin = meta->umin_src; | |
2512 | umax = meta->umax_src; | |
2513 | if (umin == umax) | |
2514 | return __shl_imm(nfp_prog, dst, umin); | |
2515 | ||
2516 | src = insn->src_reg * 2; | |
2517 | shl_reg64_lt32_low(nfp_prog, dst, src); | |
2518 | wrp_immed(nfp_prog, reg_both(dst + 1), 0); | |
cd7df56e JK |
2519 | return 0; |
2520 | } | |
2521 | ||
3119d1fd JK |
2522 | static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2523 | { | |
2524 | const struct bpf_insn *insn = &meta->insn; | |
2525 | u8 gpr = insn->dst_reg * 2; | |
2526 | ||
2527 | switch (insn->imm) { | |
2528 | case 16: | |
2529 | emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr), | |
2530 | SHF_SC_R_ROT, 8); | |
2531 | emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr), | |
2532 | SHF_SC_R_SHF, 16); | |
2533 | ||
2534 | wrp_immed(nfp_prog, reg_both(gpr + 1), 0); | |
2535 | break; | |
2536 | case 32: | |
2537 | wrp_end32(nfp_prog, reg_a(gpr), gpr); | |
2538 | wrp_immed(nfp_prog, reg_both(gpr + 1), 0); | |
2539 | break; | |
2540 | case 64: | |
2541 | wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1)); | |
2542 | ||
2543 | wrp_end32(nfp_prog, reg_a(gpr), gpr + 1); | |
2544 | wrp_end32(nfp_prog, imm_a(nfp_prog), gpr); | |
2545 | break; | |
2546 | } | |
2547 | ||
2548 | return 0; | |
2549 | } | |
2550 | ||
cd7df56e JK |
2551 | static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2552 | { | |
9f16c8ab JK |
2553 | struct nfp_insn_meta *prev = nfp_meta_prev(meta); |
2554 | u32 imm_lo, imm_hi; | |
2555 | u8 dst; | |
2556 | ||
2557 | dst = prev->insn.dst_reg * 2; | |
2558 | imm_lo = prev->insn.imm; | |
2559 | imm_hi = meta->insn.imm; | |
2560 | ||
2561 | wrp_immed(nfp_prog, reg_both(dst), imm_lo); | |
2562 | ||
2563 | /* mov is always 1 insn, load imm may be two, so try to use mov */ | |
2564 | if (imm_hi == imm_lo) | |
2565 | wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst)); | |
2566 | else | |
2567 | wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi); | |
cd7df56e JK |
2568 | |
2569 | return 0; | |
2570 | } | |
2571 | ||
2572 | static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2573 | { | |
cd7df56e | 2574 | meta->double_cb = imm_ld8_part2; |
cd7df56e JK |
2575 | return 0; |
2576 | } | |
2577 | ||
2578 | static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2579 | { | |
2580 | return construct_data_ld(nfp_prog, meta->insn.imm, 1); | |
2581 | } | |
2582 | ||
2583 | static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2584 | { | |
2585 | return construct_data_ld(nfp_prog, meta->insn.imm, 2); | |
2586 | } | |
2587 | ||
2588 | static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2589 | { | |
2590 | return construct_data_ld(nfp_prog, meta->insn.imm, 4); | |
2591 | } | |
2592 | ||
2593 | static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2594 | { | |
2595 | return construct_data_ind_ld(nfp_prog, meta->insn.imm, | |
0a793977 | 2596 | meta->insn.src_reg * 2, 1); |
cd7df56e JK |
2597 | } |
2598 | ||
2599 | static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2600 | { | |
2601 | return construct_data_ind_ld(nfp_prog, meta->insn.imm, | |
0a793977 | 2602 | meta->insn.src_reg * 2, 2); |
cd7df56e JK |
2603 | } |
2604 | ||
2605 | static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2606 | { | |
2607 | return construct_data_ind_ld(nfp_prog, meta->insn.imm, | |
0a793977 | 2608 | meta->insn.src_reg * 2, 4); |
cd7df56e JK |
2609 | } |
2610 | ||
a82b23fb JK |
2611 | static int |
2612 | mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
d3488480 | 2613 | unsigned int size, unsigned int ptr_off) |
a82b23fb | 2614 | { |
d3488480 | 2615 | return mem_op_stack(nfp_prog, meta, size, ptr_off, |
b14157ee JK |
2616 | meta->insn.dst_reg * 2, meta->insn.src_reg * 2, |
2617 | true, wrp_lmem_load); | |
a82b23fb JK |
2618 | } |
2619 | ||
943c57b9 JK |
2620 | static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2621 | u8 size) | |
cd7df56e | 2622 | { |
bfddbc8a JK |
2623 | swreg dst = reg_both(meta->insn.dst_reg * 2); |
2624 | ||
943c57b9 | 2625 | switch (meta->insn.off) { |
c6c580d7 JK |
2626 | case offsetof(struct __sk_buff, len): |
2627 | if (size != FIELD_SIZEOF(struct __sk_buff, len)) | |
943c57b9 | 2628 | return -EOPNOTSUPP; |
bfddbc8a JK |
2629 | wrp_mov(nfp_prog, dst, plen_reg(nfp_prog)); |
2630 | break; | |
c6c580d7 JK |
2631 | case offsetof(struct __sk_buff, data): |
2632 | if (size != FIELD_SIZEOF(struct __sk_buff, data)) | |
bfddbc8a JK |
2633 | return -EOPNOTSUPP; |
2634 | wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); | |
2635 | break; | |
c6c580d7 JK |
2636 | case offsetof(struct __sk_buff, data_end): |
2637 | if (size != FIELD_SIZEOF(struct __sk_buff, data_end)) | |
bfddbc8a JK |
2638 | return -EOPNOTSUPP; |
2639 | emit_alu(nfp_prog, dst, | |
2640 | plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); | |
943c57b9 JK |
2641 | break; |
2642 | default: | |
46c50518 | 2643 | return -EOPNOTSUPP; |
943c57b9 JK |
2644 | } |
2645 | ||
2646 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); | |
cd7df56e | 2647 | |
6d677075 JK |
2648 | return 0; |
2649 | } | |
2650 | ||
943c57b9 JK |
2651 | static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, |
2652 | u8 size) | |
6d677075 | 2653 | { |
b3f868df | 2654 | swreg dst = reg_both(meta->insn.dst_reg * 2); |
6d677075 | 2655 | |
943c57b9 | 2656 | switch (meta->insn.off) { |
c6c580d7 JK |
2657 | case offsetof(struct xdp_md, data): |
2658 | if (size != FIELD_SIZEOF(struct xdp_md, data)) | |
2659 | return -EOPNOTSUPP; | |
943c57b9 JK |
2660 | wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog)); |
2661 | break; | |
c6c580d7 JK |
2662 | case offsetof(struct xdp_md, data_end): |
2663 | if (size != FIELD_SIZEOF(struct xdp_md, data_end)) | |
2664 | return -EOPNOTSUPP; | |
943c57b9 JK |
2665 | emit_alu(nfp_prog, dst, |
2666 | plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog)); | |
2667 | break; | |
2668 | default: | |
2669 | return -EOPNOTSUPP; | |
2670 | } | |
6d677075 | 2671 | |
943c57b9 | 2672 | wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); |
6cd80b55 | 2673 | |
cd7df56e JK |
2674 | return 0; |
2675 | } | |
2676 | ||
2ca71441 JK |
2677 | static int |
2678 | mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
2679 | unsigned int size) | |
2680 | { | |
2681 | swreg tmp_reg; | |
2682 | ||
2683 | tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); | |
2684 | ||
3dd43c33 JK |
2685 | return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2, |
2686 | tmp_reg, meta->insn.dst_reg * 2, size); | |
2687 | } | |
2688 | ||
2689 | static int | |
2690 | mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
2691 | unsigned int size) | |
2692 | { | |
2693 | swreg tmp_reg; | |
2694 | ||
2695 | tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); | |
2696 | ||
2697 | return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2, | |
2698 | tmp_reg, meta->insn.dst_reg * 2, size); | |
2ca71441 JK |
2699 | } |
2700 | ||
be759237 JW |
2701 | static void |
2702 | mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog, | |
2703 | struct nfp_insn_meta *meta) | |
2704 | { | |
2705 | s16 range_start = meta->pkt_cache.range_start; | |
2706 | s16 range_end = meta->pkt_cache.range_end; | |
2707 | swreg src_base, off; | |
2708 | u8 xfer_num, len; | |
2709 | bool indir; | |
2710 | ||
2711 | off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog)); | |
2712 | src_base = reg_a(meta->insn.src_reg * 2); | |
2713 | len = range_end - range_start; | |
2714 | xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH; | |
2715 | ||
2716 | indir = len > 8 * REG_WIDTH; | |
2717 | /* Setup PREV_ALU for indirect mode. */ | |
2718 | if (indir) | |
2719 | wrp_immed(nfp_prog, reg_none(), | |
2720 | CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); | |
2721 | ||
2722 | /* Cache memory into transfer-in registers. */ | |
2723 | emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, | |
b556ddd9 | 2724 | off, xfer_num - 1, CMD_CTX_SWAP, indir); |
be759237 JW |
2725 | } |
2726 | ||
91ff69e8 JW |
2727 | static int |
2728 | mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog, | |
2729 | struct nfp_insn_meta *meta, | |
2730 | unsigned int size) | |
2731 | { | |
2732 | s16 range_start = meta->pkt_cache.range_start; | |
2733 | s16 insn_off = meta->insn.off - range_start; | |
2734 | swreg dst_lo, dst_hi, src_lo, src_mid; | |
2735 | u8 dst_gpr = meta->insn.dst_reg * 2; | |
2736 | u8 len_lo = size, len_mid = 0; | |
2737 | u8 idx = insn_off / REG_WIDTH; | |
2738 | u8 off = insn_off % REG_WIDTH; | |
2739 | ||
2740 | dst_hi = reg_both(dst_gpr + 1); | |
2741 | dst_lo = reg_both(dst_gpr); | |
2742 | src_lo = reg_xfer(idx); | |
2743 | ||
2744 | /* The read length could involve as many as three registers. */ | |
2745 | if (size > REG_WIDTH - off) { | |
2746 | /* Calculate the part in the second register. */ | |
2747 | len_lo = REG_WIDTH - off; | |
2748 | len_mid = size - len_lo; | |
2749 | ||
2750 | /* Calculate the part in the third register. */ | |
2751 | if (size > 2 * REG_WIDTH - off) | |
2752 | len_mid = REG_WIDTH; | |
2753 | } | |
2754 | ||
2755 | wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off); | |
2756 | ||
2757 | if (!len_mid) { | |
2758 | wrp_immed(nfp_prog, dst_hi, 0); | |
2759 | return 0; | |
2760 | } | |
2761 | ||
2762 | src_mid = reg_xfer(idx + 1); | |
2763 | ||
2764 | if (size <= REG_WIDTH) { | |
2765 | wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo); | |
2766 | wrp_immed(nfp_prog, dst_hi, 0); | |
2767 | } else { | |
2768 | swreg src_hi = reg_xfer(idx + 2); | |
2769 | ||
2770 | wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, | |
2771 | REG_WIDTH - len_lo, len_lo); | |
2772 | wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo, | |
2773 | REG_WIDTH - len_lo); | |
2774 | wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo, | |
2775 | len_lo); | |
2776 | } | |
2777 | ||
2778 | return 0; | |
2779 | } | |
2780 | ||
be759237 JW |
2781 | static int |
2782 | mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog, | |
2783 | struct nfp_insn_meta *meta, | |
2784 | unsigned int size) | |
2785 | { | |
2786 | swreg dst_lo, dst_hi, src_lo; | |
2787 | u8 dst_gpr, idx; | |
2788 | ||
2789 | idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH; | |
2790 | dst_gpr = meta->insn.dst_reg * 2; | |
2791 | dst_hi = reg_both(dst_gpr + 1); | |
2792 | dst_lo = reg_both(dst_gpr); | |
2793 | src_lo = reg_xfer(idx); | |
2794 | ||
2795 | if (size < REG_WIDTH) { | |
2796 | wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0); | |
2797 | wrp_immed(nfp_prog, dst_hi, 0); | |
2798 | } else if (size == REG_WIDTH) { | |
2799 | wrp_mov(nfp_prog, dst_lo, src_lo); | |
2800 | wrp_immed(nfp_prog, dst_hi, 0); | |
2801 | } else { | |
2802 | swreg src_hi = reg_xfer(idx + 1); | |
2803 | ||
2804 | wrp_mov(nfp_prog, dst_lo, src_lo); | |
2805 | wrp_mov(nfp_prog, dst_hi, src_hi); | |
2806 | } | |
2807 | ||
2808 | return 0; | |
2809 | } | |
2810 | ||
2811 | static int | |
2812 | mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog, | |
2813 | struct nfp_insn_meta *meta, unsigned int size) | |
2814 | { | |
2815 | u8 off = meta->insn.off - meta->pkt_cache.range_start; | |
2816 | ||
91ff69e8 JW |
2817 | if (IS_ALIGNED(off, REG_WIDTH)) |
2818 | return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size); | |
be759237 | 2819 | |
91ff69e8 | 2820 | return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size); |
be759237 JW |
2821 | } |
2822 | ||
2ca71441 JK |
2823 | static int |
2824 | mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
2825 | unsigned int size) | |
2826 | { | |
9879a381 JW |
2827 | if (meta->ldst_gather_len) |
2828 | return nfp_cpp_memcpy(nfp_prog, meta); | |
2829 | ||
2ca71441 | 2830 | if (meta->ptr.type == PTR_TO_CTX) { |
012bb8a8 | 2831 | if (nfp_prog->type == BPF_PROG_TYPE_XDP) |
2ca71441 JK |
2832 | return mem_ldx_xdp(nfp_prog, meta, size); |
2833 | else | |
2834 | return mem_ldx_skb(nfp_prog, meta, size); | |
2835 | } | |
2836 | ||
be759237 JW |
2837 | if (meta->ptr.type == PTR_TO_PACKET) { |
2838 | if (meta->pkt_cache.range_end) { | |
2839 | if (meta->pkt_cache.do_init) | |
2840 | mem_ldx_data_init_pktcache(nfp_prog, meta); | |
2841 | ||
2842 | return mem_ldx_data_from_pktcache(nfp_prog, meta, size); | |
2843 | } else { | |
2844 | return mem_ldx_data(nfp_prog, meta, size); | |
2845 | } | |
2846 | } | |
2ca71441 | 2847 | |
a82b23fb | 2848 | if (meta->ptr.type == PTR_TO_STACK) |
d3488480 JK |
2849 | return mem_ldx_stack(nfp_prog, meta, size, |
2850 | meta->ptr.off + meta->ptr.var_off.value); | |
3dd43c33 JK |
2851 | |
2852 | if (meta->ptr.type == PTR_TO_MAP_VALUE) | |
2853 | return mem_ldx_emem(nfp_prog, meta, size); | |
a82b23fb | 2854 | |
2ca71441 JK |
2855 | return -EOPNOTSUPP; |
2856 | } | |
2857 | ||
2858 | static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2859 | { | |
2860 | return mem_ldx(nfp_prog, meta, 1); | |
2861 | } | |
2862 | ||
2863 | static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2864 | { | |
2865 | return mem_ldx(nfp_prog, meta, 2); | |
2866 | } | |
2867 | ||
6d677075 JK |
2868 | static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2869 | { | |
2ca71441 JK |
2870 | return mem_ldx(nfp_prog, meta, 4); |
2871 | } | |
2872 | ||
2873 | static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2874 | { | |
2875 | return mem_ldx(nfp_prog, meta, 8); | |
6d677075 JK |
2876 | } |
2877 | ||
e663fe38 JK |
2878 | static int |
2879 | mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
2880 | unsigned int size) | |
2881 | { | |
2882 | u64 imm = meta->insn.imm; /* sign extend */ | |
2883 | swreg off_reg; | |
2884 | ||
2885 | off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); | |
2886 | ||
2887 | return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, | |
2888 | imm, size); | |
2889 | } | |
2890 | ||
2891 | static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
2892 | unsigned int size) | |
19d0f54e | 2893 | { |
e663fe38 JK |
2894 | if (meta->ptr.type == PTR_TO_PACKET) |
2895 | return mem_st_data(nfp_prog, meta, size); | |
2896 | ||
46c50518 | 2897 | return -EOPNOTSUPP; |
19d0f54e JK |
2898 | } |
2899 | ||
e663fe38 JK |
2900 | static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2901 | { | |
2902 | return mem_st(nfp_prog, meta, 1); | |
2903 | } | |
2904 | ||
2905 | static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2906 | { | |
2907 | return mem_st(nfp_prog, meta, 2); | |
2908 | } | |
2909 | ||
2910 | static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2911 | { | |
2912 | return mem_st(nfp_prog, meta, 4); | |
2913 | } | |
2914 | ||
2915 | static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
6d677075 | 2916 | { |
e663fe38 JK |
2917 | return mem_st(nfp_prog, meta, 8); |
2918 | } | |
2919 | ||
2920 | static int | |
2921 | mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
2922 | unsigned int size) | |
2923 | { | |
2924 | swreg off_reg; | |
2925 | ||
2926 | off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); | |
2927 | ||
2928 | return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg, | |
2929 | meta->insn.src_reg * 2, size); | |
2930 | } | |
2931 | ||
ee9133a8 JK |
2932 | static int |
2933 | mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
d3488480 | 2934 | unsigned int size, unsigned int ptr_off) |
ee9133a8 | 2935 | { |
d3488480 | 2936 | return mem_op_stack(nfp_prog, meta, size, ptr_off, |
b14157ee JK |
2937 | meta->insn.src_reg * 2, meta->insn.dst_reg * 2, |
2938 | false, wrp_lmem_store); | |
ee9133a8 JK |
2939 | } |
2940 | ||
d985888f JK |
2941 | static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2942 | { | |
2943 | switch (meta->insn.off) { | |
2944 | case offsetof(struct xdp_md, rx_queue_index): | |
2945 | return nfp_queue_select(nfp_prog, meta); | |
2946 | } | |
2947 | ||
2948 | WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */ | |
2949 | return -EOPNOTSUPP; | |
2950 | } | |
2951 | ||
e663fe38 JK |
2952 | static int |
2953 | mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
2954 | unsigned int size) | |
2955 | { | |
2956 | if (meta->ptr.type == PTR_TO_PACKET) | |
2957 | return mem_stx_data(nfp_prog, meta, size); | |
2958 | ||
ee9133a8 | 2959 | if (meta->ptr.type == PTR_TO_STACK) |
d3488480 JK |
2960 | return mem_stx_stack(nfp_prog, meta, size, |
2961 | meta->ptr.off + meta->ptr.var_off.value); | |
ee9133a8 | 2962 | |
46c50518 | 2963 | return -EOPNOTSUPP; |
6d677075 JK |
2964 | } |
2965 | ||
e663fe38 JK |
2966 | static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2967 | { | |
2968 | return mem_stx(nfp_prog, meta, 1); | |
2969 | } | |
2970 | ||
2971 | static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2972 | { | |
2973 | return mem_stx(nfp_prog, meta, 2); | |
2974 | } | |
2975 | ||
6d677075 JK |
2976 | static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2977 | { | |
d985888f JK |
2978 | if (meta->ptr.type == PTR_TO_CTX) |
2979 | if (nfp_prog->type == BPF_PROG_TYPE_XDP) | |
2980 | return mem_stx_xdp(nfp_prog, meta); | |
e663fe38 JK |
2981 | return mem_stx(nfp_prog, meta, 4); |
2982 | } | |
2ca71441 | 2983 | |
e663fe38 JK |
2984 | static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
2985 | { | |
2986 | return mem_stx(nfp_prog, meta, 8); | |
6d677075 JK |
2987 | } |
2988 | ||
dcb0c27f JK |
2989 | static int |
2990 | mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) | |
2991 | { | |
dcb0c27f JK |
2992 | u8 dst_gpr = meta->insn.dst_reg * 2; |
2993 | u8 src_gpr = meta->insn.src_reg * 2; | |
41aed09c JK |
2994 | unsigned int full_add, out; |
2995 | swreg addra, addrb, off; | |
dcb0c27f JK |
2996 | |
2997 | off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); | |
2998 | ||
41aed09c JK |
2999 | /* We can fit 16 bits into command immediate, if we know the immediate |
3000 | * is guaranteed to either always or never fit into 16 bit we only | |
3001 | * generate code to handle that particular case, otherwise generate | |
3002 | * code for both. | |
3003 | */ | |
3004 | out = nfp_prog_current_offset(nfp_prog); | |
3005 | full_add = nfp_prog_current_offset(nfp_prog); | |
3006 | ||
3007 | if (meta->insn.off) { | |
3008 | out += 2; | |
3009 | full_add += 2; | |
3010 | } | |
3011 | if (meta->xadd_maybe_16bit) { | |
3012 | out += 3; | |
3013 | full_add += 3; | |
3014 | } | |
3015 | if (meta->xadd_over_16bit) | |
3016 | out += 2 + is64; | |
3017 | if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { | |
3018 | out += 5; | |
3019 | full_add += 5; | |
3020 | } | |
3021 | ||
3022 | /* Generate the branch for choosing add_imm vs add */ | |
3023 | if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { | |
3024 | swreg max_imm = imm_a(nfp_prog); | |
3025 | ||
3026 | wrp_immed(nfp_prog, max_imm, 0xffff); | |
3027 | emit_alu(nfp_prog, reg_none(), | |
3028 | max_imm, ALU_OP_SUB, reg_b(src_gpr)); | |
3029 | emit_alu(nfp_prog, reg_none(), | |
3030 | reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1)); | |
3031 | emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0); | |
3032 | /* defer for add */ | |
3033 | } | |
3034 | ||
dcb0c27f JK |
3035 | /* If insn has an offset add to the address */ |
3036 | if (!meta->insn.off) { | |
3037 | addra = reg_a(dst_gpr); | |
3038 | addrb = reg_b(dst_gpr + 1); | |
3039 | } else { | |
3040 | emit_alu(nfp_prog, imma_a(nfp_prog), | |
3041 | reg_a(dst_gpr), ALU_OP_ADD, off); | |
3042 | emit_alu(nfp_prog, imma_b(nfp_prog), | |
3043 | reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0)); | |
3044 | addra = imma_a(nfp_prog); | |
3045 | addrb = imma_b(nfp_prog); | |
3046 | } | |
3047 | ||
41aed09c JK |
3048 | /* Generate the add_imm if 16 bits are possible */ |
3049 | if (meta->xadd_maybe_16bit) { | |
3050 | swreg prev_alu = imm_a(nfp_prog); | |
3051 | ||
3052 | wrp_immed(nfp_prog, prev_alu, | |
3053 | FIELD_PREP(CMD_OVE_DATA, 2) | | |
3054 | CMD_OVE_LEN | | |
3055 | FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); | |
3056 | wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2); | |
3057 | emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0, | |
3058 | addra, addrb, 0, CMD_CTX_NO_SWAP); | |
3059 | ||
3060 | if (meta->xadd_over_16bit) | |
3061 | emit_br(nfp_prog, BR_UNC, out, 0); | |
3062 | } | |
3063 | ||
3064 | if (!nfp_prog_confirm_current_offset(nfp_prog, full_add)) | |
3065 | return -EINVAL; | |
3066 | ||
3067 | /* Generate the add if 16 bits are not guaranteed */ | |
3068 | if (meta->xadd_over_16bit) { | |
3069 | emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0, | |
3070 | addra, addrb, is64 << 2, | |
3071 | is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1); | |
3072 | ||
3073 | wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr)); | |
3074 | if (is64) | |
3075 | wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1)); | |
3076 | } | |
3077 | ||
3078 | if (!nfp_prog_confirm_current_offset(nfp_prog, out)) | |
3079 | return -EINVAL; | |
dcb0c27f JK |
3080 | |
3081 | return 0; | |
3082 | } | |
3083 | ||
3084 | static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3085 | { | |
3086 | return mem_xadd(nfp_prog, meta, false); | |
3087 | } | |
3088 | ||
3089 | static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3090 | { | |
3091 | return mem_xadd(nfp_prog, meta, true); | |
3092 | } | |
3093 | ||
cd7df56e JK |
3094 | static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3095 | { | |
cd7df56e JK |
3096 | emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); |
3097 | ||
3098 | return 0; | |
3099 | } | |
3100 | ||
3101 | static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3102 | { | |
3103 | const struct bpf_insn *insn = &meta->insn; | |
3104 | u64 imm = insn->imm; /* sign extend */ | |
b3f868df JK |
3105 | swreg or1, or2, tmp_reg; |
3106 | ||
3107 | or1 = reg_a(insn->dst_reg * 2); | |
3108 | or2 = reg_b(insn->dst_reg * 2 + 1); | |
cd7df56e | 3109 | |
cd7df56e JK |
3110 | if (imm & ~0U) { |
3111 | tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); | |
3112 | emit_alu(nfp_prog, imm_a(nfp_prog), | |
3113 | reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); | |
3114 | or1 = imm_a(nfp_prog); | |
3115 | } | |
3116 | ||
3117 | if (imm >> 32) { | |
3118 | tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); | |
3119 | emit_alu(nfp_prog, imm_b(nfp_prog), | |
3120 | reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); | |
3121 | or2 = imm_b(nfp_prog); | |
3122 | } | |
3123 | ||
3124 | emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); | |
3125 | emit_br(nfp_prog, BR_BEQ, insn->off, 0); | |
3126 | ||
3127 | return 0; | |
3128 | } | |
3129 | ||
46144839 JW |
3130 | static int jeq32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3131 | { | |
3132 | const struct bpf_insn *insn = &meta->insn; | |
3133 | swreg tmp_reg; | |
3134 | ||
3135 | tmp_reg = ur_load_imm_any(nfp_prog, insn->imm, imm_b(nfp_prog)); | |
3136 | emit_alu(nfp_prog, reg_none(), | |
3137 | reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); | |
3138 | emit_br(nfp_prog, BR_BEQ, insn->off, 0); | |
3139 | ||
3140 | return 0; | |
3141 | } | |
3142 | ||
cd7df56e JK |
3143 | static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3144 | { | |
3145 | const struct bpf_insn *insn = &meta->insn; | |
3146 | u64 imm = insn->imm; /* sign extend */ | |
4987eacc | 3147 | u8 dst_gpr = insn->dst_reg * 2; |
b3f868df | 3148 | swreg tmp_reg; |
cd7df56e | 3149 | |
4987eacc JK |
3150 | tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); |
3151 | emit_alu(nfp_prog, imm_b(nfp_prog), | |
3152 | reg_a(dst_gpr), ALU_OP_AND, tmp_reg); | |
3153 | /* Upper word of the mask can only be 0 or ~0 from sign extension, | |
3154 | * so either ignore it or OR the whole thing in. | |
3155 | */ | |
46144839 | 3156 | if (is_mbpf_jmp64(meta) && imm >> 32) { |
cd7df56e | 3157 | emit_alu(nfp_prog, reg_none(), |
4987eacc | 3158 | reg_a(dst_gpr + 1), ALU_OP_OR, imm_b(nfp_prog)); |
46144839 | 3159 | } |
4987eacc | 3160 | emit_br(nfp_prog, BR_BNE, insn->off, 0); |
cd7df56e JK |
3161 | |
3162 | return 0; | |
3163 | } | |
3164 | ||
3165 | static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3166 | { | |
3167 | const struct bpf_insn *insn = &meta->insn; | |
3168 | u64 imm = insn->imm; /* sign extend */ | |
46144839 | 3169 | bool is_jmp32 = is_mbpf_jmp32(meta); |
b3f868df | 3170 | swreg tmp_reg; |
cd7df56e | 3171 | |
cd7df56e | 3172 | if (!imm) { |
46144839 JW |
3173 | if (is_jmp32) |
3174 | emit_alu(nfp_prog, reg_none(), reg_none(), ALU_OP_NONE, | |
3175 | reg_b(insn->dst_reg * 2)); | |
3176 | else | |
3177 | emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), | |
3178 | ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); | |
cd7df56e | 3179 | emit_br(nfp_prog, BR_BNE, insn->off, 0); |
82837370 | 3180 | return 0; |
cd7df56e JK |
3181 | } |
3182 | ||
3183 | tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); | |
3184 | emit_alu(nfp_prog, reg_none(), | |
3185 | reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); | |
3186 | emit_br(nfp_prog, BR_BNE, insn->off, 0); | |
3187 | ||
46144839 JW |
3188 | if (is_jmp32) |
3189 | return 0; | |
3190 | ||
cd7df56e JK |
3191 | tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); |
3192 | emit_alu(nfp_prog, reg_none(), | |
3193 | reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); | |
3194 | emit_br(nfp_prog, BR_BNE, insn->off, 0); | |
3195 | ||
3196 | return 0; | |
3197 | } | |
3198 | ||
3199 | static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3200 | { | |
3201 | const struct bpf_insn *insn = &meta->insn; | |
3202 | ||
cd7df56e JK |
3203 | emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), |
3204 | ALU_OP_XOR, reg_b(insn->src_reg * 2)); | |
46144839 JW |
3205 | if (is_mbpf_jmp64(meta)) { |
3206 | emit_alu(nfp_prog, imm_b(nfp_prog), | |
3207 | reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, | |
3208 | reg_b(insn->src_reg * 2 + 1)); | |
3209 | emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, | |
3210 | imm_b(nfp_prog)); | |
3211 | } | |
cd7df56e JK |
3212 | emit_br(nfp_prog, BR_BEQ, insn->off, 0); |
3213 | ||
3214 | return 0; | |
3215 | } | |
3216 | ||
cd7df56e JK |
3217 | static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3218 | { | |
3219 | return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); | |
3220 | } | |
3221 | ||
3222 | static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3223 | { | |
3224 | return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); | |
3225 | } | |
3226 | ||
389f263b QM |
3227 | static int |
3228 | bpf_to_bpf_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3229 | { | |
bdf4c66f | 3230 | u32 ret_tgt, stack_depth, offset_br; |
389f263b QM |
3231 | swreg tmp_reg; |
3232 | ||
3233 | stack_depth = round_up(nfp_prog->stack_frame_depth, STACK_FRAME_ALIGN); | |
3234 | /* Space for saving the return address is accounted for by the callee, | |
3235 | * so stack_depth can be zero for the main function. | |
3236 | */ | |
3237 | if (stack_depth) { | |
3238 | tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, | |
3239 | stack_imm(nfp_prog)); | |
3240 | emit_alu(nfp_prog, stack_reg(nfp_prog), | |
3241 | stack_reg(nfp_prog), ALU_OP_ADD, tmp_reg); | |
3242 | emit_csr_wr(nfp_prog, stack_reg(nfp_prog), | |
3243 | NFP_CSR_ACT_LM_ADDR0); | |
3244 | } | |
3245 | ||
44549623 QM |
3246 | /* Two cases for jumping to the callee: |
3247 | * | |
3248 | * - If callee uses and needs to save R6~R9 then: | |
389f263b QM |
3249 | * 1. Put the start offset of the callee into imm_b(). This will |
3250 | * require a fixup step, as we do not necessarily know this | |
3251 | * address yet. | |
3252 | * 2. Put the return address from the callee to the caller into | |
3253 | * register ret_reg(). | |
3254 | * 3. (After defer slots are consumed) Jump to the subroutine that | |
3255 | * pushes the registers to the stack. | |
44549623 QM |
3256 | * The subroutine acts as a trampoline, and returns to the address in |
3257 | * imm_b(), i.e. jumps to the callee. | |
3258 | * | |
3259 | * - If callee does not need to save R6~R9 then just load return | |
3260 | * address to the caller in ret_reg(), and jump to the callee | |
3261 | * directly. | |
389f263b QM |
3262 | * |
3263 | * Using ret_reg() to pass the return address to the callee is set here | |
3264 | * as a convention. The callee can then push this address onto its | |
3265 | * stack frame in its prologue. The advantages of passing the return | |
3266 | * address through ret_reg(), instead of pushing it to the stack right | |
3267 | * here, are the following: | |
3268 | * - It looks cleaner. | |
3269 | * - If the called function is called multiple time, we get a lower | |
3270 | * program size. | |
3271 | * - We save two no-op instructions that should be added just before | |
3272 | * the emit_br() when stack depth is not null otherwise. | |
3273 | * - If we ever find a register to hold the return address during whole | |
3274 | * execution of the callee, we will not have to push the return | |
3275 | * address to the stack for leaf functions. | |
3276 | */ | |
44549623 QM |
3277 | if (!meta->jmp_dst) { |
3278 | pr_err("BUG: BPF-to-BPF call has no destination recorded\n"); | |
3279 | return -ELOOP; | |
3280 | } | |
3281 | if (nfp_prog->subprog[meta->jmp_dst->subprog_idx].needs_reg_push) { | |
3282 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 3; | |
3283 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, | |
3284 | RELO_BR_GO_CALL_PUSH_REGS); | |
3285 | offset_br = nfp_prog_current_offset(nfp_prog); | |
3286 | wrp_immed_relo(nfp_prog, imm_b(nfp_prog), 0, RELO_IMMED_REL); | |
3287 | } else { | |
3288 | ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; | |
e90287f3 | 3289 | emit_br(nfp_prog, BR_UNC, meta->insn.imm, 1); |
44549623 QM |
3290 | offset_br = nfp_prog_current_offset(nfp_prog); |
3291 | } | |
389f263b QM |
3292 | wrp_immed_relo(nfp_prog, ret_reg(nfp_prog), ret_tgt, RELO_IMMED_REL); |
3293 | ||
3294 | if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt)) | |
3295 | return -EINVAL; | |
3296 | ||
3297 | if (stack_depth) { | |
3298 | tmp_reg = ur_load_imm_any(nfp_prog, stack_depth, | |
3299 | stack_imm(nfp_prog)); | |
3300 | emit_alu(nfp_prog, stack_reg(nfp_prog), | |
3301 | stack_reg(nfp_prog), ALU_OP_SUB, tmp_reg); | |
3302 | emit_csr_wr(nfp_prog, stack_reg(nfp_prog), | |
3303 | NFP_CSR_ACT_LM_ADDR0); | |
3304 | wrp_nops(nfp_prog, 3); | |
3305 | } | |
3306 | ||
bdf4c66f QM |
3307 | meta->num_insns_after_br = nfp_prog_current_offset(nfp_prog); |
3308 | meta->num_insns_after_br -= offset_br; | |
3309 | ||
389f263b QM |
3310 | return 0; |
3311 | } | |
3312 | ||
3313 | static int helper_call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
2cb230bd JK |
3314 | { |
3315 | switch (meta->insn.imm) { | |
0d49eaf4 JK |
3316 | case BPF_FUNC_xdp_adjust_head: |
3317 | return adjust_head(nfp_prog, meta); | |
0c261593 JK |
3318 | case BPF_FUNC_xdp_adjust_tail: |
3319 | return adjust_tail(nfp_prog, meta); | |
77a3d311 | 3320 | case BPF_FUNC_map_lookup_elem: |
44d65a47 | 3321 | case BPF_FUNC_map_update_elem: |
bfee64de | 3322 | case BPF_FUNC_map_delete_elem: |
fc448497 | 3323 | return map_call_stack_common(nfp_prog, meta); |
df4a37d8 JK |
3324 | case BPF_FUNC_get_prandom_u32: |
3325 | return nfp_get_prandom_u32(nfp_prog, meta); | |
9816dd35 JK |
3326 | case BPF_FUNC_perf_event_output: |
3327 | return nfp_perf_event_output(nfp_prog, meta); | |
2cb230bd JK |
3328 | default: |
3329 | WARN_ONCE(1, "verifier allowed unsupported function\n"); | |
3330 | return -EOPNOTSUPP; | |
3331 | } | |
3332 | } | |
3333 | ||
389f263b QM |
3334 | static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3335 | { | |
3336 | if (is_mbpf_pseudo_call(meta)) | |
3337 | return bpf_to_bpf_call(nfp_prog, meta); | |
3338 | else | |
3339 | return helper_call(nfp_prog, meta); | |
3340 | } | |
3341 | ||
3342 | static bool nfp_is_main_function(struct nfp_insn_meta *meta) | |
3343 | { | |
3344 | return meta->subprog_idx == 0; | |
3345 | } | |
3346 | ||
cd7df56e JK |
3347 | static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) |
3348 | { | |
e84797fe | 3349 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); |
cd7df56e JK |
3350 | |
3351 | return 0; | |
3352 | } | |
3353 | ||
389f263b QM |
3354 | static int |
3355 | nfp_subprog_epilogue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3356 | { | |
44549623 QM |
3357 | if (nfp_prog->subprog[meta->subprog_idx].needs_reg_push) { |
3358 | /* Pop R6~R9 to the stack via related subroutine. | |
3359 | * We loaded the return address to the caller into ret_reg(). | |
3360 | * This means that the subroutine does not come back here, we | |
3361 | * make it jump back to the subprogram caller directly! | |
3362 | */ | |
3363 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 1, | |
3364 | RELO_BR_GO_CALL_POP_REGS); | |
3365 | /* Pop return address from the stack. */ | |
3366 | wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); | |
3367 | } else { | |
3368 | /* Pop return address from the stack. */ | |
3369 | wrp_mov(nfp_prog, ret_reg(nfp_prog), reg_lm(0, 0)); | |
3370 | /* Jump back to caller if no callee-saved registers were used | |
3371 | * by the subprogram. | |
3372 | */ | |
3373 | emit_rtn(nfp_prog, ret_reg(nfp_prog), 0); | |
3374 | } | |
389f263b QM |
3375 | |
3376 | return 0; | |
3377 | } | |
3378 | ||
3379 | static int jmp_exit(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3380 | { | |
3381 | if (nfp_is_main_function(meta)) | |
3382 | return goto_out(nfp_prog, meta); | |
3383 | else | |
3384 | return nfp_subprog_epilogue(nfp_prog, meta); | |
3385 | } | |
3386 | ||
cd7df56e JK |
3387 | static const instr_cb_t instr_cb[256] = { |
3388 | [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, | |
3389 | [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, | |
3390 | [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, | |
3391 | [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, | |
3392 | [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, | |
3393 | [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, | |
3394 | [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, | |
3395 | [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, | |
3396 | [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, | |
3397 | [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, | |
3398 | [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, | |
3399 | [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, | |
d3d23fdb JW |
3400 | [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64, |
3401 | [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64, | |
2a952b03 JW |
3402 | [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64, |
3403 | [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64, | |
254ef4d7 | 3404 | [BPF_ALU64 | BPF_NEG] = neg_reg64, |
991f5b36 | 3405 | [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64, |
cd7df56e | 3406 | [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, |
991f5b36 | 3407 | [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64, |
cd7df56e | 3408 | [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, |
c217abcc | 3409 | [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64, |
f43d0f17 | 3410 | [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64, |
cd7df56e JK |
3411 | [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, |
3412 | [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, | |
3413 | [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, | |
3414 | [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, | |
3415 | [BPF_ALU | BPF_AND | BPF_X] = and_reg, | |
3416 | [BPF_ALU | BPF_AND | BPF_K] = and_imm, | |
3417 | [BPF_ALU | BPF_OR | BPF_X] = or_reg, | |
3418 | [BPF_ALU | BPF_OR | BPF_K] = or_imm, | |
3419 | [BPF_ALU | BPF_ADD | BPF_X] = add_reg, | |
3420 | [BPF_ALU | BPF_ADD | BPF_K] = add_imm, | |
3421 | [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, | |
3422 | [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, | |
d3d23fdb JW |
3423 | [BPF_ALU | BPF_MUL | BPF_X] = mul_reg, |
3424 | [BPF_ALU | BPF_MUL | BPF_K] = mul_imm, | |
2a952b03 JW |
3425 | [BPF_ALU | BPF_DIV | BPF_X] = div_reg, |
3426 | [BPF_ALU | BPF_DIV | BPF_K] = div_imm, | |
254ef4d7 | 3427 | [BPF_ALU | BPF_NEG] = neg_reg, |
ac7a1717 | 3428 | [BPF_ALU | BPF_LSH | BPF_X] = shl_reg, |
cd7df56e | 3429 | [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, |
ac7a1717 JW |
3430 | [BPF_ALU | BPF_RSH | BPF_X] = shr_reg, |
3431 | [BPF_ALU | BPF_RSH | BPF_K] = shr_imm, | |
84708c13 JW |
3432 | [BPF_ALU | BPF_ARSH | BPF_X] = ashr_reg, |
3433 | [BPF_ALU | BPF_ARSH | BPF_K] = ashr_imm, | |
3119d1fd | 3434 | [BPF_ALU | BPF_END | BPF_X] = end_reg32, |
cd7df56e JK |
3435 | [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, |
3436 | [BPF_LD | BPF_ABS | BPF_B] = data_ld1, | |
3437 | [BPF_LD | BPF_ABS | BPF_H] = data_ld2, | |
3438 | [BPF_LD | BPF_ABS | BPF_W] = data_ld4, | |
3439 | [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, | |
3440 | [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, | |
3441 | [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, | |
2ca71441 JK |
3442 | [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1, |
3443 | [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2, | |
cd7df56e | 3444 | [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, |
2ca71441 | 3445 | [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8, |
e663fe38 JK |
3446 | [BPF_STX | BPF_MEM | BPF_B] = mem_stx1, |
3447 | [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, | |
19d0f54e | 3448 | [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, |
e663fe38 | 3449 | [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, |
dcb0c27f JK |
3450 | [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4, |
3451 | [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8, | |
e663fe38 JK |
3452 | [BPF_ST | BPF_MEM | BPF_B] = mem_st1, |
3453 | [BPF_ST | BPF_MEM | BPF_H] = mem_st2, | |
3454 | [BPF_ST | BPF_MEM | BPF_W] = mem_st4, | |
3455 | [BPF_ST | BPF_MEM | BPF_DW] = mem_st8, | |
cd7df56e JK |
3456 | [BPF_JMP | BPF_JA | BPF_K] = jump, |
3457 | [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, | |
61dd8f00 JK |
3458 | [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm, |
3459 | [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm, | |
3460 | [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm, | |
3461 | [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm, | |
3462 | [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm, | |
3463 | [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm, | |
3464 | [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm, | |
3465 | [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm, | |
cd7df56e JK |
3466 | [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, |
3467 | [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, | |
3468 | [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, | |
61dd8f00 JK |
3469 | [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg, |
3470 | [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg, | |
3471 | [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg, | |
3472 | [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg, | |
3473 | [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg, | |
3474 | [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg, | |
3475 | [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg, | |
3476 | [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg, | |
cd7df56e JK |
3477 | [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, |
3478 | [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, | |
46144839 JW |
3479 | [BPF_JMP32 | BPF_JEQ | BPF_K] = jeq32_imm, |
3480 | [BPF_JMP32 | BPF_JGT | BPF_K] = cmp_imm, | |
3481 | [BPF_JMP32 | BPF_JGE | BPF_K] = cmp_imm, | |
3482 | [BPF_JMP32 | BPF_JLT | BPF_K] = cmp_imm, | |
3483 | [BPF_JMP32 | BPF_JLE | BPF_K] = cmp_imm, | |
3484 | [BPF_JMP32 | BPF_JSGT | BPF_K] =cmp_imm, | |
3485 | [BPF_JMP32 | BPF_JSGE | BPF_K] =cmp_imm, | |
3486 | [BPF_JMP32 | BPF_JSLT | BPF_K] =cmp_imm, | |
3487 | [BPF_JMP32 | BPF_JSLE | BPF_K] =cmp_imm, | |
3488 | [BPF_JMP32 | BPF_JSET | BPF_K] =jset_imm, | |
3489 | [BPF_JMP32 | BPF_JNE | BPF_K] = jne_imm, | |
3490 | [BPF_JMP32 | BPF_JEQ | BPF_X] = jeq_reg, | |
3491 | [BPF_JMP32 | BPF_JGT | BPF_X] = cmp_reg, | |
3492 | [BPF_JMP32 | BPF_JGE | BPF_X] = cmp_reg, | |
3493 | [BPF_JMP32 | BPF_JLT | BPF_X] = cmp_reg, | |
3494 | [BPF_JMP32 | BPF_JLE | BPF_X] = cmp_reg, | |
3495 | [BPF_JMP32 | BPF_JSGT | BPF_X] =cmp_reg, | |
3496 | [BPF_JMP32 | BPF_JSGE | BPF_X] =cmp_reg, | |
3497 | [BPF_JMP32 | BPF_JSLT | BPF_X] =cmp_reg, | |
3498 | [BPF_JMP32 | BPF_JSLE | BPF_X] =cmp_reg, | |
3499 | [BPF_JMP32 | BPF_JSET | BPF_X] =jset_reg, | |
3500 | [BPF_JMP32 | BPF_JNE | BPF_X] = jne_reg, | |
2cb230bd | 3501 | [BPF_JMP | BPF_CALL] = call, |
389f263b | 3502 | [BPF_JMP | BPF_EXIT] = jmp_exit, |
cd7df56e JK |
3503 | }; |
3504 | ||
cd7df56e | 3505 | /* --- Assembler logic --- */ |
2178f3f0 QM |
3506 | static int |
3507 | nfp_fixup_immed_relo(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, | |
3508 | struct nfp_insn_meta *jmp_dst, u32 br_idx) | |
3509 | { | |
3510 | if (immed_get_value(nfp_prog->prog[br_idx + 1])) { | |
3511 | pr_err("BUG: failed to fix up callee register saving\n"); | |
3512 | return -EINVAL; | |
3513 | } | |
3514 | ||
3515 | immed_set_value(&nfp_prog->prog[br_idx + 1], jmp_dst->off); | |
3516 | ||
3517 | return 0; | |
3518 | } | |
3519 | ||
cd7df56e JK |
3520 | static int nfp_fixup_branches(struct nfp_prog *nfp_prog) |
3521 | { | |
5b674140 | 3522 | struct nfp_insn_meta *meta, *jmp_dst; |
854dc87d | 3523 | u32 idx, br_idx; |
2178f3f0 | 3524 | int err; |
cd7df56e | 3525 | |
854dc87d | 3526 | list_for_each_entry(meta, &nfp_prog->insns, l) { |
91a87a58 | 3527 | if (meta->flags & FLAG_INSN_SKIP_MASK) |
cd7df56e | 3528 | continue; |
46144839 | 3529 | if (!is_mbpf_jmp(meta)) |
cd7df56e | 3530 | continue; |
bdf4c66f QM |
3531 | if (meta->insn.code == (BPF_JMP | BPF_EXIT) && |
3532 | !nfp_is_main_function(meta)) | |
3533 | continue; | |
3534 | if (is_mbpf_helper_call(meta)) | |
3535 | continue; | |
cd7df56e | 3536 | |
5b674140 | 3537 | if (list_is_last(&meta->l, &nfp_prog->insns)) |
2314fe9e | 3538 | br_idx = nfp_prog->last_bpf_off; |
5b674140 | 3539 | else |
2314fe9e | 3540 | br_idx = list_next_entry(meta, l)->off - 1; |
854dc87d | 3541 | |
bdf4c66f QM |
3542 | /* For BPF-to-BPF function call, a stack adjustment sequence is |
3543 | * generated after the return instruction. Therefore, we must | |
3544 | * withdraw the length of this sequence to have br_idx pointing | |
3545 | * to where the "branch" NFP instruction is expected to be. | |
3546 | */ | |
3547 | if (is_mbpf_pseudo_call(meta)) | |
3548 | br_idx -= meta->num_insns_after_br; | |
3549 | ||
cd7df56e JK |
3550 | if (!nfp_is_br(nfp_prog->prog[br_idx])) { |
3551 | pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", | |
3552 | br_idx, meta->insn.code, nfp_prog->prog[br_idx]); | |
3553 | return -ELOOP; | |
3554 | } | |
bdf4c66f QM |
3555 | |
3556 | if (meta->insn.code == (BPF_JMP | BPF_EXIT)) | |
3557 | continue; | |
3558 | ||
cd7df56e | 3559 | /* Leave special branches for later */ |
2314fe9e | 3560 | if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != |
2178f3f0 | 3561 | RELO_BR_REL && !is_mbpf_pseudo_call(meta)) |
cd7df56e JK |
3562 | continue; |
3563 | ||
5b674140 JW |
3564 | if (!meta->jmp_dst) { |
3565 | pr_err("Non-exit jump doesn't have destination info recorded!!\n"); | |
cd7df56e JK |
3566 | return -ELOOP; |
3567 | } | |
3568 | ||
5b674140 | 3569 | jmp_dst = meta->jmp_dst; |
cd7df56e | 3570 | |
91a87a58 | 3571 | if (jmp_dst->flags & FLAG_INSN_SKIP_PREC_DEPENDENT) { |
cd7df56e JK |
3572 | pr_err("Branch landing on removed instruction!!\n"); |
3573 | return -ELOOP; | |
3574 | } | |
3575 | ||
44549623 QM |
3576 | if (is_mbpf_pseudo_call(meta) && |
3577 | nfp_prog->subprog[jmp_dst->subprog_idx].needs_reg_push) { | |
2178f3f0 QM |
3578 | err = nfp_fixup_immed_relo(nfp_prog, meta, |
3579 | jmp_dst, br_idx); | |
3580 | if (err) | |
3581 | return err; | |
3582 | } | |
3583 | ||
3584 | if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != | |
3585 | RELO_BR_REL) | |
3586 | continue; | |
3587 | ||
2314fe9e | 3588 | for (idx = meta->off; idx <= br_idx; idx++) { |
cd7df56e JK |
3589 | if (!nfp_is_br(nfp_prog->prog[idx])) |
3590 | continue; | |
5b674140 | 3591 | br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); |
cd7df56e JK |
3592 | } |
3593 | } | |
3594 | ||
cd7df56e JK |
3595 | return 0; |
3596 | } | |
3597 | ||
3598 | static void nfp_intro(struct nfp_prog *nfp_prog) | |
3599 | { | |
18e53b6c JK |
3600 | wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0)); |
3601 | emit_alu(nfp_prog, plen_reg(nfp_prog), | |
3602 | plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog)); | |
cd7df56e JK |
3603 | } |
3604 | ||
389f263b QM |
3605 | static void |
3606 | nfp_subprog_prologue(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3607 | { | |
3608 | /* Save return address into the stack. */ | |
3609 | wrp_mov(nfp_prog, reg_lm(0, 0), ret_reg(nfp_prog)); | |
3610 | } | |
3611 | ||
3612 | static void | |
3613 | nfp_start_subprog(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) | |
3614 | { | |
3615 | unsigned int depth = nfp_prog->subprog[meta->subprog_idx].stack_depth; | |
3616 | ||
3617 | nfp_prog->stack_frame_depth = round_up(depth, 4); | |
3618 | nfp_subprog_prologue(nfp_prog, meta); | |
3619 | } | |
3620 | ||
3621 | bool nfp_is_subprog_start(struct nfp_insn_meta *meta) | |
3622 | { | |
3623 | return meta->flags & FLAG_INSN_IS_SUBPROG_START; | |
3624 | } | |
3625 | ||
e3b8baf0 JK |
3626 | static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) |
3627 | { | |
3628 | /* TC direct-action mode: | |
3629 | * 0,1 ok NOT SUPPORTED[1] | |
3630 | * 2 drop 0x22 -> drop, count as stat1 | |
3631 | * 4,5 nuke 0x02 -> drop | |
3632 | * 7 redir 0x44 -> redir, count as stat2 | |
3633 | * * unspec 0x11 -> pass, count as stat0 | |
3634 | * | |
3635 | * [1] We can't support OK and RECLASSIFY because we can't tell TC | |
3636 | * the exact decision made. We are forced to support UNSPEC | |
3637 | * to handle aborts so that's the only one we handle for passing | |
3638 | * packets up the stack. | |
3639 | */ | |
3640 | /* Target for aborts */ | |
3641 | nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); | |
3642 | ||
e84797fe | 3643 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); |
e3b8baf0 | 3644 | |
c000dfb5 | 3645 | wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); |
e3b8baf0 JK |
3646 | emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); |
3647 | ||
3648 | /* Target for normal exits */ | |
3649 | nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); | |
3650 | ||
3651 | /* if R0 > 7 jump to abort */ | |
3652 | emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); | |
3653 | emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); | |
c000dfb5 | 3654 | wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); |
e3b8baf0 JK |
3655 | |
3656 | wrp_immed(nfp_prog, reg_b(2), 0x41221211); | |
3657 | wrp_immed(nfp_prog, reg_b(3), 0x41001211); | |
3658 | ||
3659 | emit_shf(nfp_prog, reg_a(1), | |
3660 | reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); | |
3661 | ||
3662 | emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); | |
3663 | emit_shf(nfp_prog, reg_a(2), | |
3664 | reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); | |
3665 | ||
3666 | emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); | |
3667 | emit_shf(nfp_prog, reg_b(2), | |
3668 | reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); | |
3669 | ||
e84797fe | 3670 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); |
e3b8baf0 JK |
3671 | |
3672 | emit_shf(nfp_prog, reg_b(2), | |
3673 | reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); | |
3674 | emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); | |
3675 | } | |
3676 | ||
6d677075 JK |
3677 | static void nfp_outro_xdp(struct nfp_prog *nfp_prog) |
3678 | { | |
3679 | /* XDP return codes: | |
3680 | * 0 aborted 0x82 -> drop, count as stat3 | |
3681 | * 1 drop 0x22 -> drop, count as stat1 | |
3682 | * 2 pass 0x11 -> pass, count as stat0 | |
3683 | * 3 tx 0x44 -> redir, count as stat2 | |
3684 | * * unknown 0x82 -> drop, count as stat3 | |
3685 | */ | |
3686 | /* Target for aborts */ | |
3687 | nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); | |
3688 | ||
e84797fe | 3689 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); |
6d677075 | 3690 | |
c000dfb5 | 3691 | wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); |
6d677075 JK |
3692 | emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); |
3693 | ||
3694 | /* Target for normal exits */ | |
3695 | nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); | |
3696 | ||
3697 | /* if R0 > 3 jump to abort */ | |
3698 | emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0)); | |
3699 | emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); | |
3700 | ||
3701 | wrp_immed(nfp_prog, reg_b(2), 0x44112282); | |
3702 | ||
3703 | emit_shf(nfp_prog, reg_a(1), | |
3704 | reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3); | |
3705 | ||
3706 | emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); | |
3707 | emit_shf(nfp_prog, reg_b(2), | |
3708 | reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); | |
3709 | ||
e84797fe | 3710 | emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); |
6d677075 | 3711 | |
c000dfb5 | 3712 | wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); |
6d677075 JK |
3713 | emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); |
3714 | } | |
3715 | ||
44549623 QM |
3716 | static bool nfp_prog_needs_callee_reg_save(struct nfp_prog *nfp_prog) |
3717 | { | |
3718 | unsigned int idx; | |
3719 | ||
3720 | for (idx = 1; idx < nfp_prog->subprog_cnt; idx++) | |
3721 | if (nfp_prog->subprog[idx].needs_reg_push) | |
3722 | return true; | |
3723 | ||
3724 | return false; | |
3725 | } | |
3726 | ||
389f263b QM |
3727 | static void nfp_push_callee_registers(struct nfp_prog *nfp_prog) |
3728 | { | |
3729 | u8 reg; | |
3730 | ||
3731 | /* Subroutine: Save all callee saved registers (R6 ~ R9). | |
3732 | * imm_b() holds the return address. | |
3733 | */ | |
3734 | nfp_prog->tgt_call_push_regs = nfp_prog_current_offset(nfp_prog); | |
3735 | for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { | |
3736 | u8 adj = (reg - BPF_REG_0) * 2; | |
3737 | u8 idx = (reg - BPF_REG_6) * 2; | |
3738 | ||
3739 | /* The first slot in the stack frame is used to push the return | |
3740 | * address in bpf_to_bpf_call(), start just after. | |
3741 | */ | |
3742 | wrp_mov(nfp_prog, reg_lm(0, 1 + idx), reg_b(adj)); | |
3743 | ||
3744 | if (reg == BPF_REG_8) | |
3745 | /* Prepare to jump back, last 3 insns use defer slots */ | |
3746 | emit_rtn(nfp_prog, imm_b(nfp_prog), 3); | |
3747 | ||
3748 | wrp_mov(nfp_prog, reg_lm(0, 1 + idx + 1), reg_b(adj + 1)); | |
3749 | } | |
3750 | } | |
3751 | ||
3752 | static void nfp_pop_callee_registers(struct nfp_prog *nfp_prog) | |
3753 | { | |
3754 | u8 reg; | |
3755 | ||
3756 | /* Subroutine: Restore all callee saved registers (R6 ~ R9). | |
3757 | * ret_reg() holds the return address. | |
3758 | */ | |
3759 | nfp_prog->tgt_call_pop_regs = nfp_prog_current_offset(nfp_prog); | |
3760 | for (reg = BPF_REG_6; reg <= BPF_REG_9; reg++) { | |
3761 | u8 adj = (reg - BPF_REG_0) * 2; | |
3762 | u8 idx = (reg - BPF_REG_6) * 2; | |
3763 | ||
3764 | /* The first slot in the stack frame holds the return address, | |
3765 | * start popping just after that. | |
3766 | */ | |
3767 | wrp_mov(nfp_prog, reg_both(adj), reg_lm(0, 1 + idx)); | |
3768 | ||
3769 | if (reg == BPF_REG_8) | |
3770 | /* Prepare to jump back, last 3 insns use defer slots */ | |
3771 | emit_rtn(nfp_prog, ret_reg(nfp_prog), 3); | |
3772 | ||
3773 | wrp_mov(nfp_prog, reg_both(adj + 1), reg_lm(0, 1 + idx + 1)); | |
3774 | } | |
3775 | } | |
3776 | ||
cd7df56e JK |
3777 | static void nfp_outro(struct nfp_prog *nfp_prog) |
3778 | { | |
012bb8a8 JK |
3779 | switch (nfp_prog->type) { |
3780 | case BPF_PROG_TYPE_SCHED_CLS: | |
e3b8baf0 JK |
3781 | nfp_outro_tc_da(nfp_prog); |
3782 | break; | |
012bb8a8 | 3783 | case BPF_PROG_TYPE_XDP: |
6d677075 JK |
3784 | nfp_outro_xdp(nfp_prog); |
3785 | break; | |
012bb8a8 JK |
3786 | default: |
3787 | WARN_ON(1); | |
cd7df56e | 3788 | } |
389f263b | 3789 | |
44549623 | 3790 | if (!nfp_prog_needs_callee_reg_save(nfp_prog)) |
389f263b QM |
3791 | return; |
3792 | ||
3793 | nfp_push_callee_registers(nfp_prog); | |
3794 | nfp_pop_callee_registers(nfp_prog); | |
cd7df56e JK |
3795 | } |
3796 | ||
3797 | static int nfp_translate(struct nfp_prog *nfp_prog) | |
3798 | { | |
3799 | struct nfp_insn_meta *meta; | |
389f263b | 3800 | unsigned int depth; |
ff42bb9f | 3801 | int err; |
cd7df56e | 3802 | |
389f263b QM |
3803 | depth = nfp_prog->subprog[0].stack_depth; |
3804 | nfp_prog->stack_frame_depth = round_up(depth, 4); | |
3805 | ||
cd7df56e JK |
3806 | nfp_intro(nfp_prog); |
3807 | if (nfp_prog->error) | |
3808 | return nfp_prog->error; | |
3809 | ||
3810 | list_for_each_entry(meta, &nfp_prog->insns, l) { | |
3811 | instr_cb_t cb = instr_cb[meta->insn.code]; | |
3812 | ||
3813 | meta->off = nfp_prog_current_offset(nfp_prog); | |
3814 | ||
389f263b QM |
3815 | if (nfp_is_subprog_start(meta)) { |
3816 | nfp_start_subprog(nfp_prog, meta); | |
3817 | if (nfp_prog->error) | |
3818 | return nfp_prog->error; | |
3819 | } | |
3820 | ||
91a87a58 | 3821 | if (meta->flags & FLAG_INSN_SKIP_MASK) { |
cd7df56e JK |
3822 | nfp_prog->n_translated++; |
3823 | continue; | |
3824 | } | |
3825 | ||
3826 | if (nfp_meta_has_prev(nfp_prog, meta) && | |
3827 | nfp_meta_prev(meta)->double_cb) | |
3828 | cb = nfp_meta_prev(meta)->double_cb; | |
3829 | if (!cb) | |
3830 | return -ENOENT; | |
3831 | err = cb(nfp_prog, meta); | |
3832 | if (err) | |
3833 | return err; | |
e8a4796e JK |
3834 | if (nfp_prog->error) |
3835 | return nfp_prog->error; | |
cd7df56e JK |
3836 | |
3837 | nfp_prog->n_translated++; | |
3838 | } | |
3839 | ||
854dc87d JW |
3840 | nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1; |
3841 | ||
cd7df56e JK |
3842 | nfp_outro(nfp_prog); |
3843 | if (nfp_prog->error) | |
3844 | return nfp_prog->error; | |
3845 | ||
ff42bb9f | 3846 | wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW); |
1c03e03f JK |
3847 | if (nfp_prog->error) |
3848 | return nfp_prog->error; | |
3849 | ||
cd7df56e JK |
3850 | return nfp_fixup_branches(nfp_prog); |
3851 | } | |
3852 | ||
cd7df56e JK |
3853 | /* --- Optimizations --- */ |
3854 | static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) | |
3855 | { | |
3856 | struct nfp_insn_meta *meta; | |
3857 | ||
3858 | list_for_each_entry(meta, &nfp_prog->insns, l) { | |
3859 | struct bpf_insn insn = meta->insn; | |
3860 | ||
3861 | /* Programs converted from cBPF start with register xoring */ | |
3862 | if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && | |
3863 | insn.src_reg == insn.dst_reg) | |
3864 | continue; | |
3865 | ||
3866 | /* Programs start with R6 = R1 but we ignore the skb pointer */ | |
3867 | if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && | |
3868 | insn.src_reg == 1 && insn.dst_reg == 6) | |
91a87a58 | 3869 | meta->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
cd7df56e JK |
3870 | |
3871 | /* Return as soon as something doesn't match */ | |
91a87a58 | 3872 | if (!(meta->flags & FLAG_INSN_SKIP_MASK)) |
cd7df56e JK |
3873 | return; |
3874 | } | |
3875 | } | |
3876 | ||
6c59500c JK |
3877 | /* abs(insn.imm) will fit better into unrestricted reg immediate - |
3878 | * convert add/sub of a negative number into a sub/add of a positive one. | |
3879 | */ | |
3880 | static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog) | |
3881 | { | |
3882 | struct nfp_insn_meta *meta; | |
3883 | ||
3884 | list_for_each_entry(meta, &nfp_prog->insns, l) { | |
3885 | struct bpf_insn insn = meta->insn; | |
3886 | ||
91a87a58 | 3887 | if (meta->flags & FLAG_INSN_SKIP_MASK) |
6c59500c JK |
3888 | continue; |
3889 | ||
46144839 | 3890 | if (!is_mbpf_alu(meta) && !is_mbpf_jmp(meta)) |
6c59500c JK |
3891 | continue; |
3892 | if (BPF_SRC(insn.code) != BPF_K) | |
3893 | continue; | |
3894 | if (insn.imm >= 0) | |
3895 | continue; | |
3896 | ||
46144839 | 3897 | if (is_mbpf_jmp(meta)) { |
7bdc97be JK |
3898 | switch (BPF_OP(insn.code)) { |
3899 | case BPF_JGE: | |
3900 | case BPF_JSGE: | |
3901 | case BPF_JLT: | |
3902 | case BPF_JSLT: | |
3903 | meta->jump_neg_op = true; | |
3904 | break; | |
3905 | default: | |
3906 | continue; | |
3907 | } | |
3908 | } else { | |
3909 | if (BPF_OP(insn.code) == BPF_ADD) | |
3910 | insn.code = BPF_CLASS(insn.code) | BPF_SUB; | |
3911 | else if (BPF_OP(insn.code) == BPF_SUB) | |
3912 | insn.code = BPF_CLASS(insn.code) | BPF_ADD; | |
3913 | else | |
3914 | continue; | |
6c59500c | 3915 | |
7bdc97be JK |
3916 | meta->insn.code = insn.code | BPF_K; |
3917 | } | |
6c59500c JK |
3918 | |
3919 | meta->insn.imm = -insn.imm; | |
3920 | } | |
3921 | } | |
3922 | ||
cd7df56e JK |
3923 | /* Remove masking after load since our load guarantees this is not needed */ |
3924 | static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) | |
3925 | { | |
3926 | struct nfp_insn_meta *meta1, *meta2; | |
3927 | const s32 exp_mask[] = { | |
3928 | [BPF_B] = 0x000000ffU, | |
3929 | [BPF_H] = 0x0000ffffU, | |
3930 | [BPF_W] = 0xffffffffU, | |
3931 | }; | |
3932 | ||
3933 | nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { | |
3934 | struct bpf_insn insn, next; | |
3935 | ||
3936 | insn = meta1->insn; | |
3937 | next = meta2->insn; | |
3938 | ||
3939 | if (BPF_CLASS(insn.code) != BPF_LD) | |
3940 | continue; | |
3941 | if (BPF_MODE(insn.code) != BPF_ABS && | |
3942 | BPF_MODE(insn.code) != BPF_IND) | |
3943 | continue; | |
3944 | ||
3945 | if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) | |
3946 | continue; | |
3947 | ||
3948 | if (!exp_mask[BPF_SIZE(insn.code)]) | |
3949 | continue; | |
3950 | if (exp_mask[BPF_SIZE(insn.code)] != next.imm) | |
3951 | continue; | |
3952 | ||
3953 | if (next.src_reg || next.dst_reg) | |
3954 | continue; | |
3955 | ||
1266f5d6 JW |
3956 | if (meta2->flags & FLAG_INSN_IS_JUMP_DST) |
3957 | continue; | |
3958 | ||
91a87a58 | 3959 | meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
cd7df56e JK |
3960 | } |
3961 | } | |
3962 | ||
3963 | static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) | |
3964 | { | |
3965 | struct nfp_insn_meta *meta1, *meta2, *meta3; | |
3966 | ||
3967 | nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { | |
3968 | struct bpf_insn insn, next1, next2; | |
3969 | ||
3970 | insn = meta1->insn; | |
3971 | next1 = meta2->insn; | |
3972 | next2 = meta3->insn; | |
3973 | ||
3974 | if (BPF_CLASS(insn.code) != BPF_LD) | |
3975 | continue; | |
3976 | if (BPF_MODE(insn.code) != BPF_ABS && | |
3977 | BPF_MODE(insn.code) != BPF_IND) | |
3978 | continue; | |
3979 | if (BPF_SIZE(insn.code) != BPF_W) | |
3980 | continue; | |
3981 | ||
3982 | if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && | |
3983 | next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && | |
3984 | !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && | |
3985 | next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) | |
3986 | continue; | |
3987 | ||
3988 | if (next1.src_reg || next1.dst_reg || | |
3989 | next2.src_reg || next2.dst_reg) | |
3990 | continue; | |
3991 | ||
3992 | if (next1.imm != 0x20 || next2.imm != 0x20) | |
3993 | continue; | |
3994 | ||
29fe46ef JW |
3995 | if (meta2->flags & FLAG_INSN_IS_JUMP_DST || |
3996 | meta3->flags & FLAG_INSN_IS_JUMP_DST) | |
3997 | continue; | |
3998 | ||
91a87a58 JK |
3999 | meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
4000 | meta3->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; | |
cd7df56e JK |
4001 | } |
4002 | } | |
4003 | ||
6bc7103c JW |
4004 | /* load/store pair that forms memory copy sould look like the following: |
4005 | * | |
4006 | * ld_width R, [addr_src + offset_src] | |
4007 | * st_width [addr_dest + offset_dest], R | |
4008 | * | |
4009 | * The destination register of load and source register of store should | |
4010 | * be the same, load and store should also perform at the same width. | |
4011 | * If either of addr_src or addr_dest is stack pointer, we don't do the | |
4012 | * CPP optimization as stack is modelled by registers on NFP. | |
4013 | */ | |
4014 | static bool | |
4015 | curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, | |
4016 | struct nfp_insn_meta *st_meta) | |
4017 | { | |
4018 | struct bpf_insn *ld = &ld_meta->insn; | |
4019 | struct bpf_insn *st = &st_meta->insn; | |
4020 | ||
4021 | if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta)) | |
4022 | return false; | |
4023 | ||
cc0dff6d JW |
4024 | if (ld_meta->ptr.type != PTR_TO_PACKET && |
4025 | ld_meta->ptr.type != PTR_TO_MAP_VALUE) | |
6bc7103c JW |
4026 | return false; |
4027 | ||
4028 | if (st_meta->ptr.type != PTR_TO_PACKET) | |
4029 | return false; | |
4030 | ||
4031 | if (BPF_SIZE(ld->code) != BPF_SIZE(st->code)) | |
4032 | return false; | |
4033 | ||
4034 | if (ld->dst_reg != st->src_reg) | |
4035 | return false; | |
4036 | ||
4037 | /* There is jump to the store insn in this pair. */ | |
4038 | if (st_meta->flags & FLAG_INSN_IS_JUMP_DST) | |
4039 | return false; | |
4040 | ||
4041 | return true; | |
4042 | } | |
4043 | ||
4044 | /* Currently, we only support chaining load/store pairs if: | |
4045 | * | |
4046 | * - Their address base registers are the same. | |
4047 | * - Their address offsets are in the same order. | |
4048 | * - They operate at the same memory width. | |
4049 | * - There is no jump into the middle of them. | |
4050 | */ | |
4051 | static bool | |
4052 | curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta, | |
4053 | struct nfp_insn_meta *st_meta, | |
4054 | struct bpf_insn *prev_ld, | |
4055 | struct bpf_insn *prev_st) | |
4056 | { | |
4057 | u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst; | |
4058 | struct bpf_insn *ld = &ld_meta->insn; | |
4059 | struct bpf_insn *st = &st_meta->insn; | |
4060 | s16 prev_ld_off, prev_st_off; | |
4061 | ||
4062 | /* This pair is the start pair. */ | |
4063 | if (!prev_ld) | |
4064 | return true; | |
4065 | ||
4066 | prev_size = BPF_LDST_BYTES(prev_ld); | |
4067 | curr_size = BPF_LDST_BYTES(ld); | |
4068 | prev_ld_base = prev_ld->src_reg; | |
4069 | prev_st_base = prev_st->dst_reg; | |
4070 | prev_ld_dst = prev_ld->dst_reg; | |
4071 | prev_ld_off = prev_ld->off; | |
4072 | prev_st_off = prev_st->off; | |
4073 | ||
4074 | if (ld->dst_reg != prev_ld_dst) | |
4075 | return false; | |
4076 | ||
4077 | if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base) | |
4078 | return false; | |
4079 | ||
4080 | if (curr_size != prev_size) | |
4081 | return false; | |
4082 | ||
4083 | /* There is jump to the head of this pair. */ | |
4084 | if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST) | |
4085 | return false; | |
4086 | ||
4087 | /* Both in ascending order. */ | |
4088 | if (prev_ld_off + prev_size == ld->off && | |
4089 | prev_st_off + prev_size == st->off) | |
4090 | return true; | |
4091 | ||
4092 | /* Both in descending order. */ | |
4093 | if (ld->off + curr_size == prev_ld_off && | |
4094 | st->off + curr_size == prev_st_off) | |
4095 | return true; | |
4096 | ||
4097 | return false; | |
4098 | } | |
4099 | ||
4100 | /* Return TRUE if cross memory access happens. Cross memory access means | |
4101 | * store area is overlapping with load area that a later load might load | |
4102 | * the value from previous store, for this case we can't treat the sequence | |
4103 | * as an memory copy. | |
4104 | */ | |
4105 | static bool | |
4106 | cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta, | |
4107 | struct nfp_insn_meta *head_st_meta) | |
4108 | { | |
4109 | s16 head_ld_off, head_st_off, ld_off; | |
4110 | ||
4111 | /* Different pointer types does not overlap. */ | |
4112 | if (head_ld_meta->ptr.type != head_st_meta->ptr.type) | |
4113 | return false; | |
4114 | ||
4115 | /* load and store are both PTR_TO_PACKET, check ID info. */ | |
4116 | if (head_ld_meta->ptr.id != head_st_meta->ptr.id) | |
4117 | return true; | |
4118 | ||
4119 | /* Canonicalize the offsets. Turn all of them against the original | |
4120 | * base register. | |
4121 | */ | |
4122 | head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off; | |
4123 | head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off; | |
4124 | ld_off = ld->off + head_ld_meta->ptr.off; | |
4125 | ||
4126 | /* Ascending order cross. */ | |
4127 | if (ld_off > head_ld_off && | |
4128 | head_ld_off < head_st_off && ld_off >= head_st_off) | |
4129 | return true; | |
4130 | ||
4131 | /* Descending order cross. */ | |
4132 | if (ld_off < head_ld_off && | |
4133 | head_ld_off > head_st_off && ld_off <= head_st_off) | |
4134 | return true; | |
4135 | ||
4136 | return false; | |
4137 | } | |
4138 | ||
4139 | /* This pass try to identify the following instructoin sequences. | |
4140 | * | |
4141 | * load R, [regA + offA] | |
4142 | * store [regB + offB], R | |
4143 | * load R, [regA + offA + const_imm_A] | |
4144 | * store [regB + offB + const_imm_A], R | |
4145 | * load R, [regA + offA + 2 * const_imm_A] | |
4146 | * store [regB + offB + 2 * const_imm_A], R | |
4147 | * ... | |
4148 | * | |
4149 | * Above sequence is typically generated by compiler when lowering | |
4150 | * memcpy. NFP prefer using CPP instructions to accelerate it. | |
4151 | */ | |
4152 | static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) | |
4153 | { | |
4154 | struct nfp_insn_meta *head_ld_meta = NULL; | |
4155 | struct nfp_insn_meta *head_st_meta = NULL; | |
4156 | struct nfp_insn_meta *meta1, *meta2; | |
4157 | struct bpf_insn *prev_ld = NULL; | |
4158 | struct bpf_insn *prev_st = NULL; | |
4159 | u8 count = 0; | |
4160 | ||
4161 | nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { | |
4162 | struct bpf_insn *ld = &meta1->insn; | |
4163 | struct bpf_insn *st = &meta2->insn; | |
4164 | ||
4165 | /* Reset record status if any of the following if true: | |
4166 | * - The current insn pair is not load/store. | |
4167 | * - The load/store pair doesn't chain with previous one. | |
4168 | * - The chained load/store pair crossed with previous pair. | |
4169 | * - The chained load/store pair has a total size of memory | |
4170 | * copy beyond 128 bytes which is the maximum length a | |
4171 | * single NFP CPP command can transfer. | |
4172 | */ | |
4173 | if (!curr_pair_is_memcpy(meta1, meta2) || | |
4174 | !curr_pair_chain_with_previous(meta1, meta2, prev_ld, | |
4175 | prev_st) || | |
4176 | (head_ld_meta && (cross_mem_access(ld, head_ld_meta, | |
4177 | head_st_meta) || | |
4178 | head_ld_meta->ldst_gather_len >= 128))) { | |
4179 | if (!count) | |
4180 | continue; | |
4181 | ||
4182 | if (count > 1) { | |
4183 | s16 prev_ld_off = prev_ld->off; | |
4184 | s16 prev_st_off = prev_st->off; | |
4185 | s16 head_ld_off = head_ld_meta->insn.off; | |
4186 | ||
4187 | if (prev_ld_off < head_ld_off) { | |
4188 | head_ld_meta->insn.off = prev_ld_off; | |
4189 | head_st_meta->insn.off = prev_st_off; | |
4190 | head_ld_meta->ldst_gather_len = | |
4191 | -head_ld_meta->ldst_gather_len; | |
4192 | } | |
4193 | ||
4194 | head_ld_meta->paired_st = &head_st_meta->insn; | |
91a87a58 JK |
4195 | head_st_meta->flags |= |
4196 | FLAG_INSN_SKIP_PREC_DEPENDENT; | |
6bc7103c JW |
4197 | } else { |
4198 | head_ld_meta->ldst_gather_len = 0; | |
4199 | } | |
4200 | ||
4201 | /* If the chain is ended by an load/store pair then this | |
4202 | * could serve as the new head of the the next chain. | |
4203 | */ | |
4204 | if (curr_pair_is_memcpy(meta1, meta2)) { | |
4205 | head_ld_meta = meta1; | |
4206 | head_st_meta = meta2; | |
4207 | head_ld_meta->ldst_gather_len = | |
4208 | BPF_LDST_BYTES(ld); | |
4209 | meta1 = nfp_meta_next(meta1); | |
4210 | meta2 = nfp_meta_next(meta2); | |
4211 | prev_ld = ld; | |
4212 | prev_st = st; | |
4213 | count = 1; | |
4214 | } else { | |
4215 | head_ld_meta = NULL; | |
4216 | head_st_meta = NULL; | |
4217 | prev_ld = NULL; | |
4218 | prev_st = NULL; | |
4219 | count = 0; | |
4220 | } | |
4221 | ||
4222 | continue; | |
4223 | } | |
4224 | ||
4225 | if (!head_ld_meta) { | |
4226 | head_ld_meta = meta1; | |
4227 | head_st_meta = meta2; | |
4228 | } else { | |
91a87a58 JK |
4229 | meta1->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; |
4230 | meta2->flags |= FLAG_INSN_SKIP_PREC_DEPENDENT; | |
6bc7103c JW |
4231 | } |
4232 | ||
4233 | head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); | |
4234 | meta1 = nfp_meta_next(meta1); | |
4235 | meta2 = nfp_meta_next(meta2); | |
4236 | prev_ld = ld; | |
4237 | prev_st = st; | |
4238 | count++; | |
4239 | } | |
4240 | } | |
4241 | ||
87b10ecd JW |
4242 | static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog) |
4243 | { | |
4244 | struct nfp_insn_meta *meta, *range_node = NULL; | |
4245 | s16 range_start = 0, range_end = 0; | |
4246 | bool cache_avail = false; | |
4247 | struct bpf_insn *insn; | |
4248 | s32 range_ptr_off = 0; | |
4249 | u32 range_ptr_id = 0; | |
4250 | ||
4251 | list_for_each_entry(meta, &nfp_prog->insns, l) { | |
4252 | if (meta->flags & FLAG_INSN_IS_JUMP_DST) | |
4253 | cache_avail = false; | |
4254 | ||
91a87a58 | 4255 | if (meta->flags & FLAG_INSN_SKIP_MASK) |
87b10ecd JW |
4256 | continue; |
4257 | ||
4258 | insn = &meta->insn; | |
4259 | ||
4260 | if (is_mbpf_store_pkt(meta) || | |
4261 | insn->code == (BPF_JMP | BPF_CALL) || | |
4262 | is_mbpf_classic_store_pkt(meta) || | |
4263 | is_mbpf_classic_load(meta)) { | |
4264 | cache_avail = false; | |
4265 | continue; | |
4266 | } | |
4267 | ||
4268 | if (!is_mbpf_load(meta)) | |
4269 | continue; | |
4270 | ||
4271 | if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) { | |
4272 | cache_avail = false; | |
4273 | continue; | |
4274 | } | |
4275 | ||
4276 | if (!cache_avail) { | |
4277 | cache_avail = true; | |
4278 | if (range_node) | |
4279 | goto end_current_then_start_new; | |
4280 | goto start_new; | |
4281 | } | |
4282 | ||
4283 | /* Check ID to make sure two reads share the same | |
4284 | * variable offset against PTR_TO_PACKET, and check OFF | |
4285 | * to make sure they also share the same constant | |
4286 | * offset. | |
4287 | * | |
4288 | * OFFs don't really need to be the same, because they | |
4289 | * are the constant offsets against PTR_TO_PACKET, so | |
4290 | * for different OFFs, we could canonicalize them to | |
4291 | * offsets against original packet pointer. We don't | |
4292 | * support this. | |
4293 | */ | |
4294 | if (meta->ptr.id == range_ptr_id && | |
4295 | meta->ptr.off == range_ptr_off) { | |
4296 | s16 new_start = range_start; | |
4297 | s16 end, off = insn->off; | |
4298 | s16 new_end = range_end; | |
4299 | bool changed = false; | |
4300 | ||
4301 | if (off < range_start) { | |
4302 | new_start = off; | |
4303 | changed = true; | |
4304 | } | |
4305 | ||
4306 | end = off + BPF_LDST_BYTES(insn); | |
4307 | if (end > range_end) { | |
4308 | new_end = end; | |
4309 | changed = true; | |
4310 | } | |
4311 | ||
4312 | if (!changed) | |
4313 | continue; | |
4314 | ||
4315 | if (new_end - new_start <= 64) { | |
4316 | /* Install new range. */ | |
4317 | range_start = new_start; | |
4318 | range_end = new_end; | |
4319 | continue; | |
4320 | } | |
4321 | } | |
4322 | ||
4323 | end_current_then_start_new: | |
4324 | range_node->pkt_cache.range_start = range_start; | |
4325 | range_node->pkt_cache.range_end = range_end; | |
4326 | start_new: | |
4327 | range_node = meta; | |
4328 | range_node->pkt_cache.do_init = true; | |
4329 | range_ptr_id = range_node->ptr.id; | |
4330 | range_ptr_off = range_node->ptr.off; | |
4331 | range_start = insn->off; | |
4332 | range_end = insn->off + BPF_LDST_BYTES(insn); | |
4333 | } | |
4334 | ||
4335 | if (range_node) { | |
4336 | range_node->pkt_cache.range_start = range_start; | |
4337 | range_node->pkt_cache.range_end = range_end; | |
4338 | } | |
4339 | ||
4340 | list_for_each_entry(meta, &nfp_prog->insns, l) { | |
91a87a58 | 4341 | if (meta->flags & FLAG_INSN_SKIP_MASK) |
87b10ecd JW |
4342 | continue; |
4343 | ||
4344 | if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) { | |
4345 | if (meta->pkt_cache.do_init) { | |
4346 | range_start = meta->pkt_cache.range_start; | |
4347 | range_end = meta->pkt_cache.range_end; | |
4348 | } else { | |
4349 | meta->pkt_cache.range_start = range_start; | |
4350 | meta->pkt_cache.range_end = range_end; | |
4351 | } | |
4352 | } | |
4353 | } | |
4354 | } | |
4355 | ||
cd7df56e JK |
4356 | static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) |
4357 | { | |
cd7df56e JK |
4358 | nfp_bpf_opt_reg_init(nfp_prog); |
4359 | ||
6c59500c | 4360 | nfp_bpf_opt_neg_add_sub(nfp_prog); |
cd7df56e JK |
4361 | nfp_bpf_opt_ld_mask(nfp_prog); |
4362 | nfp_bpf_opt_ld_shift(nfp_prog); | |
6bc7103c | 4363 | nfp_bpf_opt_ldst_gather(nfp_prog); |
87b10ecd | 4364 | nfp_bpf_opt_pkt_cache(nfp_prog); |
cd7df56e JK |
4365 | |
4366 | return 0; | |
4367 | } | |
4368 | ||
b4264c96 JK |
4369 | static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog) |
4370 | { | |
4371 | struct nfp_insn_meta *meta1, *meta2; | |
4372 | struct nfp_bpf_map *nfp_map; | |
4373 | struct bpf_map *map; | |
ab01f4ac | 4374 | u32 id; |
b4264c96 JK |
4375 | |
4376 | nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { | |
91a87a58 JK |
4377 | if (meta1->flags & FLAG_INSN_SKIP_MASK || |
4378 | meta2->flags & FLAG_INSN_SKIP_MASK) | |
b4264c96 JK |
4379 | continue; |
4380 | ||
4381 | if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) || | |
4382 | meta1->insn.src_reg != BPF_PSEUDO_MAP_FD) | |
4383 | continue; | |
4384 | ||
4385 | map = (void *)(unsigned long)((u32)meta1->insn.imm | | |
4386 | (u64)meta2->insn.imm << 32); | |
ab01f4ac JK |
4387 | if (bpf_map_offload_neutral(map)) { |
4388 | id = map->id; | |
4389 | } else { | |
4390 | nfp_map = map_to_offmap(map)->dev_priv; | |
4391 | id = nfp_map->tid; | |
4392 | } | |
b4264c96 | 4393 | |
ab01f4ac | 4394 | meta1->insn.imm = id; |
b4264c96 JK |
4395 | meta2->insn.imm = 0; |
4396 | } | |
4397 | ||
4398 | return 0; | |
4399 | } | |
4400 | ||
2314fe9e | 4401 | static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) |
fd068ddc | 4402 | { |
2314fe9e | 4403 | __le64 *ustore = (__force __le64 *)prog; |
fd068ddc JK |
4404 | int i; |
4405 | ||
2314fe9e | 4406 | for (i = 0; i < len; i++) { |
fd068ddc JK |
4407 | int err; |
4408 | ||
2314fe9e | 4409 | err = nfp_ustore_check_valid_no_ecc(prog[i]); |
fd068ddc JK |
4410 | if (err) |
4411 | return err; | |
4412 | ||
2314fe9e | 4413 | ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i])); |
fd068ddc JK |
4414 | } |
4415 | ||
4416 | return 0; | |
4417 | } | |
4418 | ||
44a12ecc JK |
4419 | static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog) |
4420 | { | |
4421 | void *prog; | |
4422 | ||
4423 | prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL); | |
4424 | if (!prog) | |
4425 | return; | |
4426 | ||
4427 | nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64); | |
4428 | memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len); | |
4429 | kvfree(nfp_prog->prog); | |
4430 | nfp_prog->prog = prog; | |
4431 | } | |
4432 | ||
c6c580d7 | 4433 | int nfp_bpf_jit(struct nfp_prog *nfp_prog) |
cd7df56e | 4434 | { |
cd7df56e JK |
4435 | int ret; |
4436 | ||
b4264c96 JK |
4437 | ret = nfp_bpf_replace_map_ptrs(nfp_prog); |
4438 | if (ret) | |
4439 | return ret; | |
4440 | ||
cd7df56e JK |
4441 | ret = nfp_bpf_optimize(nfp_prog); |
4442 | if (ret) | |
9314c442 | 4443 | return ret; |
cd7df56e JK |
4444 | |
4445 | ret = nfp_translate(nfp_prog); | |
4446 | if (ret) { | |
4447 | pr_err("Translation failed with error %d (translated: %u)\n", | |
4448 | ret, nfp_prog->n_translated); | |
9314c442 | 4449 | return -EINVAL; |
cd7df56e JK |
4450 | } |
4451 | ||
44a12ecc JK |
4452 | nfp_bpf_prog_trim(nfp_prog); |
4453 | ||
2314fe9e | 4454 | return ret; |
cd7df56e | 4455 | } |
1549921d | 4456 | |
e2fc6114 | 4457 | void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog) |
1549921d JK |
4458 | { |
4459 | struct nfp_insn_meta *meta; | |
4460 | ||
4461 | /* Another pass to record jump information. */ | |
4462 | list_for_each_entry(meta, &nfp_prog->insns, l) { | |
e3b49dc6 | 4463 | struct nfp_insn_meta *dst_meta; |
1549921d | 4464 | u64 code = meta->insn.code; |
e3b49dc6 QM |
4465 | unsigned int dst_idx; |
4466 | bool pseudo_call; | |
1549921d | 4467 | |
46144839 | 4468 | if (!is_mbpf_jmp(meta)) |
e3b49dc6 QM |
4469 | continue; |
4470 | if (BPF_OP(code) == BPF_EXIT) | |
4471 | continue; | |
4472 | if (is_mbpf_helper_call(meta)) | |
4473 | continue; | |
1549921d | 4474 | |
e3b49dc6 QM |
4475 | /* If opcode is BPF_CALL at this point, this can only be a |
4476 | * BPF-to-BPF call (a.k.a pseudo call). | |
4477 | */ | |
4478 | pseudo_call = BPF_OP(code) == BPF_CALL; | |
1549921d | 4479 | |
e3b49dc6 QM |
4480 | if (pseudo_call) |
4481 | dst_idx = meta->n + 1 + meta->insn.imm; | |
4482 | else | |
4483 | dst_idx = meta->n + 1 + meta->insn.off; | |
4484 | ||
e2fc6114 | 4485 | dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_idx); |
e3b49dc6 QM |
4486 | |
4487 | if (pseudo_call) | |
4488 | dst_meta->flags |= FLAG_INSN_IS_SUBPROG_START; | |
4489 | ||
4490 | dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; | |
4491 | meta->jmp_dst = dst_meta; | |
1549921d JK |
4492 | } |
4493 | } | |
2314fe9e | 4494 | |
74801e50 QM |
4495 | bool nfp_bpf_supported_opcode(u8 code) |
4496 | { | |
4497 | return !!instr_cb[code]; | |
4498 | } | |
4499 | ||
2314fe9e JK |
4500 | void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) |
4501 | { | |
4502 | unsigned int i; | |
4503 | u64 *prog; | |
4504 | int err; | |
4505 | ||
4506 | prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64), | |
4507 | GFP_KERNEL); | |
4508 | if (!prog) | |
4509 | return ERR_PTR(-ENOMEM); | |
4510 | ||
4511 | for (i = 0; i < nfp_prog->prog_len; i++) { | |
4512 | enum nfp_relo_type special; | |
77a3d311 | 4513 | u32 val; |
389f263b | 4514 | u16 off; |
2314fe9e JK |
4515 | |
4516 | special = FIELD_GET(OP_RELO_TYPE, prog[i]); | |
4517 | switch (special) { | |
4518 | case RELO_NONE: | |
4519 | continue; | |
4520 | case RELO_BR_REL: | |
4521 | br_add_offset(&prog[i], bv->start_off); | |
4522 | break; | |
4523 | case RELO_BR_GO_OUT: | |
4524 | br_set_offset(&prog[i], | |
4525 | nfp_prog->tgt_out + bv->start_off); | |
4526 | break; | |
4527 | case RELO_BR_GO_ABORT: | |
4528 | br_set_offset(&prog[i], | |
4529 | nfp_prog->tgt_abort + bv->start_off); | |
4530 | break; | |
389f263b | 4531 | case RELO_BR_GO_CALL_PUSH_REGS: |
44549623 QM |
4532 | if (!nfp_prog->tgt_call_push_regs) { |
4533 | pr_err("BUG: failed to detect subprogram registers needs\n"); | |
4534 | err = -EINVAL; | |
4535 | goto err_free_prog; | |
4536 | } | |
389f263b QM |
4537 | off = nfp_prog->tgt_call_push_regs + bv->start_off; |
4538 | br_set_offset(&prog[i], off); | |
4539 | break; | |
4540 | case RELO_BR_GO_CALL_POP_REGS: | |
44549623 QM |
4541 | if (!nfp_prog->tgt_call_pop_regs) { |
4542 | pr_err("BUG: failed to detect subprogram registers needs\n"); | |
4543 | err = -EINVAL; | |
4544 | goto err_free_prog; | |
4545 | } | |
389f263b QM |
4546 | off = nfp_prog->tgt_call_pop_regs + bv->start_off; |
4547 | br_set_offset(&prog[i], off); | |
4548 | break; | |
2314fe9e JK |
4549 | case RELO_BR_NEXT_PKT: |
4550 | br_set_offset(&prog[i], bv->tgt_done); | |
4551 | break; | |
77a3d311 JK |
4552 | case RELO_BR_HELPER: |
4553 | val = br_get_offset(prog[i]); | |
4554 | val -= BR_OFF_RELO; | |
4555 | switch (val) { | |
4556 | case BPF_FUNC_map_lookup_elem: | |
4557 | val = nfp_prog->bpf->helpers.map_lookup; | |
4558 | break; | |
44d65a47 JK |
4559 | case BPF_FUNC_map_update_elem: |
4560 | val = nfp_prog->bpf->helpers.map_update; | |
4561 | break; | |
bfee64de JK |
4562 | case BPF_FUNC_map_delete_elem: |
4563 | val = nfp_prog->bpf->helpers.map_delete; | |
4564 | break; | |
9816dd35 JK |
4565 | case BPF_FUNC_perf_event_output: |
4566 | val = nfp_prog->bpf->helpers.perf_event_output; | |
4567 | break; | |
77a3d311 JK |
4568 | default: |
4569 | pr_err("relocation of unknown helper %d\n", | |
4570 | val); | |
4571 | err = -EINVAL; | |
4572 | goto err_free_prog; | |
4573 | } | |
4574 | br_set_offset(&prog[i], val); | |
4575 | break; | |
4576 | case RELO_IMMED_REL: | |
4577 | immed_add_value(&prog[i], bv->start_off); | |
4578 | break; | |
2314fe9e JK |
4579 | } |
4580 | ||
4581 | prog[i] &= ~OP_RELO_TYPE; | |
4582 | } | |
4583 | ||
4584 | err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len); | |
4585 | if (err) | |
4586 | goto err_free_prog; | |
4587 | ||
4588 | return prog; | |
4589 | ||
4590 | err_free_prog: | |
4591 | kfree(prog); | |
4592 | return ERR_PTR(err); | |
4593 | } |