Commit | Line | Data |
---|---|---|
156d0e29 NR |
1 | /* |
2 | * bpf_jit_comp64.c: eBPF JIT compiler | |
3 | * | |
4 | * Copyright 2016 Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> | |
5 | * IBM Corporation | |
6 | * | |
7 | * Based on the powerpc classic BPF JIT compiler by Matt Evans | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * as published by the Free Software Foundation; version 2 | |
12 | * of the License. | |
13 | */ | |
14 | #include <linux/moduleloader.h> | |
15 | #include <asm/cacheflush.h> | |
16 | #include <linux/netdevice.h> | |
17 | #include <linux/filter.h> | |
18 | #include <linux/if_vlan.h> | |
19 | #include <asm/kprobes.h> | |
ce076141 | 20 | #include <linux/bpf.h> |
156d0e29 NR |
21 | |
22 | #include "bpf_jit64.h" | |
23 | ||
24 | int bpf_jit_enable __read_mostly; | |
25 | ||
26 | static void bpf_jit_fill_ill_insns(void *area, unsigned int size) | |
27 | { | |
6acdc9a6 | 28 | memset32(area, BREAKPOINT_INSTRUCTION, size/4); |
156d0e29 NR |
29 | } |
30 | ||
31 | static inline void bpf_flush_icache(void *start, void *end) | |
32 | { | |
33 | smp_wmb(); | |
34 | flush_icache_range((unsigned long)start, (unsigned long)end); | |
35 | } | |
36 | ||
37 | static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i) | |
38 | { | |
39 | return (ctx->seen & (1 << (31 - b2p[i]))); | |
40 | } | |
41 | ||
42 | static inline void bpf_set_seen_register(struct codegen_context *ctx, int i) | |
43 | { | |
44 | ctx->seen |= (1 << (31 - b2p[i])); | |
45 | } | |
46 | ||
47 | static inline bool bpf_has_stack_frame(struct codegen_context *ctx) | |
48 | { | |
49 | /* | |
50 | * We only need a stack frame if: | |
51 | * - we call other functions (kernel helpers), or | |
52 | * - the bpf program uses its stack area | |
53 | * The latter condition is deduced from the usage of BPF_REG_FP | |
54 | */ | |
55 | return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP); | |
56 | } | |
57 | ||
7b847f52 NR |
58 | /* |
59 | * When not setting up our own stackframe, the redzone usage is: | |
60 | * | |
61 | * [ prev sp ] <------------- | |
62 | * [ ... ] | | |
63 | * sp (r1) ---> [ stack pointer ] -------------- | |
64 | * [ nv gpr save area ] 8*8 | |
65 | * [ tail_call_cnt ] 8 | |
66 | * [ local_tmp_var ] 8 | |
67 | * [ unused red zone ] 208 bytes protected | |
68 | */ | |
69 | static int bpf_jit_stack_local(struct codegen_context *ctx) | |
70 | { | |
71 | if (bpf_has_stack_frame(ctx)) | |
ac0761eb | 72 | return STACK_FRAME_MIN_SIZE + ctx->stack_size; |
7b847f52 NR |
73 | else |
74 | return -(BPF_PPC_STACK_SAVE + 16); | |
75 | } | |
76 | ||
ce076141 NR |
77 | static int bpf_jit_stack_tailcallcnt(struct codegen_context *ctx) |
78 | { | |
79 | return bpf_jit_stack_local(ctx) + 8; | |
80 | } | |
81 | ||
7b847f52 NR |
82 | static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) |
83 | { | |
84 | if (reg >= BPF_PPC_NVR_MIN && reg < 32) | |
ac0761eb SD |
85 | return (bpf_has_stack_frame(ctx) ? |
86 | (BPF_PPC_STACKFRAME + ctx->stack_size) : 0) | |
87 | - (8 * (32 - reg)); | |
7b847f52 NR |
88 | |
89 | pr_err("BPF JIT is asking about unknown registers"); | |
90 | BUG(); | |
91 | } | |
92 | ||
156d0e29 NR |
93 | static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) |
94 | { | |
95 | /* | |
96 | * Load skb->len and skb->data_len | |
97 | * r3 points to skb | |
98 | */ | |
99 | PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len)); | |
100 | PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len)); | |
101 | /* header_len = len - data_len */ | |
102 | PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]); | |
103 | ||
104 | /* skb->data pointer */ | |
105 | PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); | |
106 | } | |
107 | ||
ce076141 | 108 | static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) |
156d0e29 | 109 | { |
ce076141 NR |
110 | int i; |
111 | ||
156d0e29 | 112 | /* |
ce076141 NR |
113 | * Initialize tail_call_cnt if we do tail calls. |
114 | * Otherwise, put in NOPs so that it can be skipped when we are | |
115 | * invoked through a tail call. | |
156d0e29 | 116 | */ |
ce076141 NR |
117 | if (ctx->seen & SEEN_TAILCALL) { |
118 | PPC_LI(b2p[TMP_REG_1], 0); | |
119 | /* this goes in the redzone */ | |
120 | PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8)); | |
121 | } else { | |
122 | PPC_NOP(); | |
123 | PPC_NOP(); | |
124 | } | |
156d0e29 | 125 | |
ce076141 | 126 | #define BPF_TAILCALL_PROLOGUE_SIZE 8 |
156d0e29 | 127 | |
7b847f52 | 128 | if (bpf_has_stack_frame(ctx)) { |
156d0e29 NR |
129 | /* |
130 | * We need a stack frame, but we don't necessarily need to | |
131 | * save/restore LR unless we call other functions | |
132 | */ | |
133 | if (ctx->seen & SEEN_FUNC) { | |
134 | EMIT(PPC_INST_MFLR | __PPC_RT(R0)); | |
135 | PPC_BPF_STL(0, 1, PPC_LR_STKOFF); | |
136 | } | |
137 | ||
ac0761eb | 138 | PPC_BPF_STLU(1, 1, -(BPF_PPC_STACKFRAME + ctx->stack_size)); |
156d0e29 NR |
139 | } |
140 | ||
141 | /* | |
142 | * Back up non-volatile regs -- BPF registers 6-10 | |
143 | * If we haven't created our own stack frame, we save these | |
144 | * in the protected zone below the previous stack frame | |
145 | */ | |
146 | for (i = BPF_REG_6; i <= BPF_REG_10; i++) | |
147 | if (bpf_is_seen_register(ctx, i)) | |
7b847f52 | 148 | PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); |
156d0e29 NR |
149 | |
150 | /* | |
151 | * Save additional non-volatile regs if we cache skb | |
152 | * Also, setup skb data | |
153 | */ | |
154 | if (ctx->seen & SEEN_SKB) { | |
155 | PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, | |
7b847f52 | 156 | bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); |
156d0e29 | 157 | PPC_BPF_STL(b2p[SKB_DATA_REG], 1, |
7b847f52 | 158 | bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); |
156d0e29 NR |
159 | bpf_jit_emit_skb_loads(image, ctx); |
160 | } | |
161 | ||
162 | /* Setup frame pointer to point to the bpf stack area */ | |
163 | if (bpf_is_seen_register(ctx, BPF_REG_FP)) | |
164 | PPC_ADDI(b2p[BPF_REG_FP], 1, | |
ac0761eb | 165 | STACK_FRAME_MIN_SIZE + ctx->stack_size); |
156d0e29 NR |
166 | } |
167 | ||
ce076141 | 168 | static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx) |
156d0e29 NR |
169 | { |
170 | int i; | |
156d0e29 | 171 | |
156d0e29 NR |
172 | /* Restore NVRs */ |
173 | for (i = BPF_REG_6; i <= BPF_REG_10; i++) | |
174 | if (bpf_is_seen_register(ctx, i)) | |
7b847f52 | 175 | PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); |
156d0e29 NR |
176 | |
177 | /* Restore non-volatile registers used for skb cache */ | |
178 | if (ctx->seen & SEEN_SKB) { | |
179 | PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, | |
7b847f52 | 180 | bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); |
156d0e29 | 181 | PPC_BPF_LL(b2p[SKB_DATA_REG], 1, |
7b847f52 | 182 | bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); |
156d0e29 NR |
183 | } |
184 | ||
185 | /* Tear down our stack frame */ | |
7b847f52 | 186 | if (bpf_has_stack_frame(ctx)) { |
ac0761eb | 187 | PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); |
156d0e29 NR |
188 | if (ctx->seen & SEEN_FUNC) { |
189 | PPC_BPF_LL(0, 1, PPC_LR_STKOFF); | |
190 | PPC_MTLR(0); | |
191 | } | |
192 | } | |
ce076141 NR |
193 | } |
194 | ||
195 | static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) | |
196 | { | |
197 | bpf_jit_emit_common_epilogue(image, ctx); | |
198 | ||
199 | /* Move result to r3 */ | |
200 | PPC_MR(3, b2p[BPF_REG_0]); | |
156d0e29 NR |
201 | |
202 | PPC_BLR(); | |
203 | } | |
204 | ||
ce076141 NR |
205 | static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) |
206 | { | |
207 | #ifdef PPC64_ELF_ABI_v1 | |
208 | /* func points to the function descriptor */ | |
209 | PPC_LI64(b2p[TMP_REG_2], func); | |
210 | /* Load actual entry point from function descriptor */ | |
211 | PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); | |
212 | /* ... and move it to LR */ | |
213 | PPC_MTLR(b2p[TMP_REG_1]); | |
214 | /* | |
215 | * Load TOC from function descriptor at offset 8. | |
216 | * We can clobber r2 since we get called through a | |
217 | * function pointer (so caller will save/restore r2) | |
218 | * and since we don't use a TOC ourself. | |
219 | */ | |
220 | PPC_BPF_LL(2, b2p[TMP_REG_2], 8); | |
221 | #else | |
222 | /* We can clobber r12 */ | |
223 | PPC_FUNC_ADDR(12, func); | |
224 | PPC_MTLR(12); | |
225 | #endif | |
226 | PPC_BLRL(); | |
227 | } | |
228 | ||
229 | static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) | |
230 | { | |
231 | /* | |
232 | * By now, the eBPF program has already setup parameters in r3, r4 and r5 | |
233 | * r3/BPF_REG_1 - pointer to ctx -- passed as is to the next bpf program | |
234 | * r4/BPF_REG_2 - pointer to bpf_array | |
235 | * r5/BPF_REG_3 - index in bpf_array | |
236 | */ | |
237 | int b2p_bpf_array = b2p[BPF_REG_2]; | |
238 | int b2p_index = b2p[BPF_REG_3]; | |
239 | ||
240 | /* | |
241 | * if (index >= array->map.max_entries) | |
242 | * goto out; | |
243 | */ | |
244 | PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)); | |
245 | PPC_CMPLW(b2p_index, b2p[TMP_REG_1]); | |
246 | PPC_BCC(COND_GE, out); | |
247 | ||
248 | /* | |
249 | * if (tail_call_cnt > MAX_TAIL_CALL_CNT) | |
250 | * goto out; | |
251 | */ | |
252 | PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); | |
253 | PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); | |
254 | PPC_BCC(COND_GT, out); | |
255 | ||
256 | /* | |
257 | * tail_call_cnt++; | |
258 | */ | |
259 | PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1); | |
260 | PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); | |
261 | ||
262 | /* prog = array->ptrs[index]; */ | |
263 | PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); | |
264 | PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); | |
265 | PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); | |
266 | ||
267 | /* | |
268 | * if (prog == NULL) | |
269 | * goto out; | |
270 | */ | |
271 | PPC_CMPLDI(b2p[TMP_REG_1], 0); | |
272 | PPC_BCC(COND_EQ, out); | |
273 | ||
274 | /* goto *(prog->bpf_func + prologue_size); */ | |
275 | PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); | |
276 | #ifdef PPC64_ELF_ABI_v1 | |
277 | /* skip past the function descriptor */ | |
278 | PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], | |
279 | FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE); | |
280 | #else | |
281 | PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE); | |
282 | #endif | |
283 | PPC_MTCTR(b2p[TMP_REG_1]); | |
284 | ||
285 | /* tear down stack, restore NVRs, ... */ | |
286 | bpf_jit_emit_common_epilogue(image, ctx); | |
287 | ||
288 | PPC_BCTR(); | |
289 | /* out: */ | |
290 | } | |
291 | ||
156d0e29 NR |
292 | /* Assemble the body code between the prologue & epilogue */ |
293 | static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, | |
294 | struct codegen_context *ctx, | |
295 | u32 *addrs) | |
296 | { | |
297 | const struct bpf_insn *insn = fp->insnsi; | |
298 | int flen = fp->len; | |
299 | int i; | |
300 | ||
301 | /* Start of epilogue code - will only be valid 2nd pass onwards */ | |
302 | u32 exit_addr = addrs[flen]; | |
303 | ||
304 | for (i = 0; i < flen; i++) { | |
305 | u32 code = insn[i].code; | |
306 | u32 dst_reg = b2p[insn[i].dst_reg]; | |
307 | u32 src_reg = b2p[insn[i].src_reg]; | |
308 | s16 off = insn[i].off; | |
309 | s32 imm = insn[i].imm; | |
310 | u64 imm64; | |
311 | u8 *func; | |
312 | u32 true_cond; | |
156d0e29 NR |
313 | |
314 | /* | |
315 | * addrs[] maps a BPF bytecode address into a real offset from | |
316 | * the start of the body code. | |
317 | */ | |
318 | addrs[i] = ctx->idx * 4; | |
319 | ||
320 | /* | |
321 | * As an optimization, we note down which non-volatile registers | |
322 | * are used so that we can only save/restore those in our | |
323 | * prologue and epilogue. We do this here regardless of whether | |
324 | * the actual BPF instruction uses src/dst registers or not | |
325 | * (for instance, BPF_CALL does not use them). The expectation | |
326 | * is that those instructions will have src_reg/dst_reg set to | |
327 | * 0. Even otherwise, we just lose some prologue/epilogue | |
328 | * optimization but everything else should work without | |
329 | * any issues. | |
330 | */ | |
7b847f52 | 331 | if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32) |
156d0e29 | 332 | bpf_set_seen_register(ctx, insn[i].dst_reg); |
7b847f52 | 333 | if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32) |
156d0e29 NR |
334 | bpf_set_seen_register(ctx, insn[i].src_reg); |
335 | ||
336 | switch (code) { | |
337 | /* | |
338 | * Arithmetic operations: ADD/SUB/MUL/DIV/MOD/NEG | |
339 | */ | |
340 | case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */ | |
341 | case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */ | |
342 | PPC_ADD(dst_reg, dst_reg, src_reg); | |
343 | goto bpf_alu32_trunc; | |
344 | case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */ | |
345 | case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */ | |
346 | PPC_SUB(dst_reg, dst_reg, src_reg); | |
347 | goto bpf_alu32_trunc; | |
348 | case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */ | |
349 | case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */ | |
350 | case BPF_ALU64 | BPF_ADD | BPF_K: /* dst += imm */ | |
351 | case BPF_ALU64 | BPF_SUB | BPF_K: /* dst -= imm */ | |
352 | if (BPF_OP(code) == BPF_SUB) | |
353 | imm = -imm; | |
354 | if (imm) { | |
355 | if (imm >= -32768 && imm < 32768) | |
356 | PPC_ADDI(dst_reg, dst_reg, IMM_L(imm)); | |
357 | else { | |
358 | PPC_LI32(b2p[TMP_REG_1], imm); | |
359 | PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]); | |
360 | } | |
361 | } | |
362 | goto bpf_alu32_trunc; | |
363 | case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */ | |
364 | case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */ | |
365 | if (BPF_CLASS(code) == BPF_ALU) | |
366 | PPC_MULW(dst_reg, dst_reg, src_reg); | |
367 | else | |
368 | PPC_MULD(dst_reg, dst_reg, src_reg); | |
369 | goto bpf_alu32_trunc; | |
370 | case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */ | |
371 | case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */ | |
372 | if (imm >= -32768 && imm < 32768) | |
373 | PPC_MULI(dst_reg, dst_reg, IMM_L(imm)); | |
374 | else { | |
375 | PPC_LI32(b2p[TMP_REG_1], imm); | |
376 | if (BPF_CLASS(code) == BPF_ALU) | |
377 | PPC_MULW(dst_reg, dst_reg, | |
378 | b2p[TMP_REG_1]); | |
379 | else | |
380 | PPC_MULD(dst_reg, dst_reg, | |
381 | b2p[TMP_REG_1]); | |
382 | } | |
383 | goto bpf_alu32_trunc; | |
384 | case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */ | |
385 | case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */ | |
386 | PPC_CMPWI(src_reg, 0); | |
387 | PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12); | |
388 | PPC_LI(b2p[BPF_REG_0], 0); | |
389 | PPC_JMP(exit_addr); | |
390 | if (BPF_OP(code) == BPF_MOD) { | |
391 | PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg); | |
392 | PPC_MULW(b2p[TMP_REG_1], src_reg, | |
393 | b2p[TMP_REG_1]); | |
394 | PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); | |
395 | } else | |
396 | PPC_DIVWU(dst_reg, dst_reg, src_reg); | |
397 | goto bpf_alu32_trunc; | |
398 | case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ | |
399 | case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ | |
400 | PPC_CMPDI(src_reg, 0); | |
401 | PPC_BCC_SHORT(COND_NE, (ctx->idx * 4) + 12); | |
402 | PPC_LI(b2p[BPF_REG_0], 0); | |
403 | PPC_JMP(exit_addr); | |
404 | if (BPF_OP(code) == BPF_MOD) { | |
405 | PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg); | |
406 | PPC_MULD(b2p[TMP_REG_1], src_reg, | |
407 | b2p[TMP_REG_1]); | |
408 | PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); | |
409 | } else | |
410 | PPC_DIVD(dst_reg, dst_reg, src_reg); | |
411 | break; | |
412 | case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ | |
413 | case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ | |
414 | case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */ | |
415 | case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */ | |
416 | if (imm == 0) | |
417 | return -EINVAL; | |
418 | else if (imm == 1) | |
419 | goto bpf_alu32_trunc; | |
420 | ||
421 | PPC_LI32(b2p[TMP_REG_1], imm); | |
422 | switch (BPF_CLASS(code)) { | |
423 | case BPF_ALU: | |
424 | if (BPF_OP(code) == BPF_MOD) { | |
425 | PPC_DIVWU(b2p[TMP_REG_2], dst_reg, | |
426 | b2p[TMP_REG_1]); | |
427 | PPC_MULW(b2p[TMP_REG_1], | |
428 | b2p[TMP_REG_1], | |
429 | b2p[TMP_REG_2]); | |
430 | PPC_SUB(dst_reg, dst_reg, | |
431 | b2p[TMP_REG_1]); | |
432 | } else | |
433 | PPC_DIVWU(dst_reg, dst_reg, | |
434 | b2p[TMP_REG_1]); | |
435 | break; | |
436 | case BPF_ALU64: | |
437 | if (BPF_OP(code) == BPF_MOD) { | |
438 | PPC_DIVD(b2p[TMP_REG_2], dst_reg, | |
439 | b2p[TMP_REG_1]); | |
440 | PPC_MULD(b2p[TMP_REG_1], | |
441 | b2p[TMP_REG_1], | |
442 | b2p[TMP_REG_2]); | |
443 | PPC_SUB(dst_reg, dst_reg, | |
444 | b2p[TMP_REG_1]); | |
445 | } else | |
446 | PPC_DIVD(dst_reg, dst_reg, | |
447 | b2p[TMP_REG_1]); | |
448 | break; | |
449 | } | |
450 | goto bpf_alu32_trunc; | |
451 | case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */ | |
452 | case BPF_ALU64 | BPF_NEG: /* dst = -dst */ | |
453 | PPC_NEG(dst_reg, dst_reg); | |
454 | goto bpf_alu32_trunc; | |
455 | ||
456 | /* | |
457 | * Logical operations: AND/OR/XOR/[A]LSH/[A]RSH | |
458 | */ | |
459 | case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */ | |
460 | case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */ | |
461 | PPC_AND(dst_reg, dst_reg, src_reg); | |
462 | goto bpf_alu32_trunc; | |
463 | case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */ | |
464 | case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ | |
465 | if (!IMM_H(imm)) | |
466 | PPC_ANDI(dst_reg, dst_reg, IMM_L(imm)); | |
467 | else { | |
468 | /* Sign-extended */ | |
469 | PPC_LI32(b2p[TMP_REG_1], imm); | |
470 | PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]); | |
471 | } | |
472 | goto bpf_alu32_trunc; | |
473 | case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */ | |
474 | case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */ | |
475 | PPC_OR(dst_reg, dst_reg, src_reg); | |
476 | goto bpf_alu32_trunc; | |
477 | case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */ | |
478 | case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */ | |
479 | if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { | |
480 | /* Sign-extended */ | |
481 | PPC_LI32(b2p[TMP_REG_1], imm); | |
482 | PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]); | |
483 | } else { | |
484 | if (IMM_L(imm)) | |
485 | PPC_ORI(dst_reg, dst_reg, IMM_L(imm)); | |
486 | if (IMM_H(imm)) | |
487 | PPC_ORIS(dst_reg, dst_reg, IMM_H(imm)); | |
488 | } | |
489 | goto bpf_alu32_trunc; | |
490 | case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */ | |
491 | case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */ | |
492 | PPC_XOR(dst_reg, dst_reg, src_reg); | |
493 | goto bpf_alu32_trunc; | |
494 | case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */ | |
495 | case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */ | |
496 | if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) { | |
497 | /* Sign-extended */ | |
498 | PPC_LI32(b2p[TMP_REG_1], imm); | |
499 | PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]); | |
500 | } else { | |
501 | if (IMM_L(imm)) | |
502 | PPC_XORI(dst_reg, dst_reg, IMM_L(imm)); | |
503 | if (IMM_H(imm)) | |
504 | PPC_XORIS(dst_reg, dst_reg, IMM_H(imm)); | |
505 | } | |
506 | goto bpf_alu32_trunc; | |
507 | case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */ | |
508 | /* slw clears top 32 bits */ | |
509 | PPC_SLW(dst_reg, dst_reg, src_reg); | |
510 | break; | |
511 | case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */ | |
512 | PPC_SLD(dst_reg, dst_reg, src_reg); | |
513 | break; | |
514 | case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */ | |
515 | /* with imm 0, we still need to clear top 32 bits */ | |
516 | PPC_SLWI(dst_reg, dst_reg, imm); | |
517 | break; | |
518 | case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */ | |
519 | if (imm != 0) | |
520 | PPC_SLDI(dst_reg, dst_reg, imm); | |
521 | break; | |
522 | case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */ | |
523 | PPC_SRW(dst_reg, dst_reg, src_reg); | |
524 | break; | |
525 | case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */ | |
526 | PPC_SRD(dst_reg, dst_reg, src_reg); | |
527 | break; | |
528 | case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */ | |
529 | PPC_SRWI(dst_reg, dst_reg, imm); | |
530 | break; | |
531 | case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */ | |
532 | if (imm != 0) | |
533 | PPC_SRDI(dst_reg, dst_reg, imm); | |
534 | break; | |
535 | case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */ | |
536 | PPC_SRAD(dst_reg, dst_reg, src_reg); | |
537 | break; | |
538 | case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */ | |
539 | if (imm != 0) | |
540 | PPC_SRADI(dst_reg, dst_reg, imm); | |
541 | break; | |
542 | ||
543 | /* | |
544 | * MOV | |
545 | */ | |
546 | case BPF_ALU | BPF_MOV | BPF_X: /* (u32) dst = src */ | |
547 | case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */ | |
548 | PPC_MR(dst_reg, src_reg); | |
549 | goto bpf_alu32_trunc; | |
550 | case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */ | |
551 | case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */ | |
552 | PPC_LI32(dst_reg, imm); | |
553 | if (imm < 0) | |
554 | goto bpf_alu32_trunc; | |
555 | break; | |
556 | ||
557 | bpf_alu32_trunc: | |
558 | /* Truncate to 32-bits */ | |
559 | if (BPF_CLASS(code) == BPF_ALU) | |
560 | PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31); | |
561 | break; | |
562 | ||
563 | /* | |
564 | * BPF_FROM_BE/LE | |
565 | */ | |
566 | case BPF_ALU | BPF_END | BPF_FROM_LE: | |
567 | case BPF_ALU | BPF_END | BPF_FROM_BE: | |
568 | #ifdef __BIG_ENDIAN__ | |
569 | if (BPF_SRC(code) == BPF_FROM_BE) | |
570 | goto emit_clear; | |
571 | #else /* !__BIG_ENDIAN__ */ | |
572 | if (BPF_SRC(code) == BPF_FROM_LE) | |
573 | goto emit_clear; | |
574 | #endif | |
575 | switch (imm) { | |
576 | case 16: | |
577 | /* Rotate 8 bits left & mask with 0x0000ff00 */ | |
578 | PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23); | |
579 | /* Rotate 8 bits right & insert LSB to reg */ | |
580 | PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31); | |
581 | /* Move result back to dst_reg */ | |
582 | PPC_MR(dst_reg, b2p[TMP_REG_1]); | |
583 | break; | |
584 | case 32: | |
585 | /* | |
586 | * Rotate word left by 8 bits: | |
587 | * 2 bytes are already in their final position | |
588 | * -- byte 2 and 4 (of bytes 1, 2, 3 and 4) | |
589 | */ | |
590 | PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31); | |
591 | /* Rotate 24 bits and insert byte 1 */ | |
592 | PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7); | |
593 | /* Rotate 24 bits and insert byte 3 */ | |
594 | PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23); | |
595 | PPC_MR(dst_reg, b2p[TMP_REG_1]); | |
596 | break; | |
597 | case 64: | |
598 | /* | |
599 | * Way easier and faster(?) to store the value | |
600 | * into stack and then use ldbrx | |
601 | * | |
156d0e29 NR |
602 | * ctx->seen will be reliable in pass2, but |
603 | * the instructions generated will remain the | |
604 | * same across all passes | |
605 | */ | |
7b847f52 NR |
606 | PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx)); |
607 | PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); | |
156d0e29 NR |
608 | PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); |
609 | break; | |
610 | } | |
611 | break; | |
612 | ||
613 | emit_clear: | |
614 | switch (imm) { | |
615 | case 16: | |
616 | /* zero-extend 16 bits into 64 bits */ | |
617 | PPC_RLDICL(dst_reg, dst_reg, 0, 48); | |
618 | break; | |
619 | case 32: | |
620 | /* zero-extend 32 bits into 64 bits */ | |
621 | PPC_RLDICL(dst_reg, dst_reg, 0, 32); | |
622 | break; | |
623 | case 64: | |
624 | /* nop */ | |
625 | break; | |
626 | } | |
627 | break; | |
628 | ||
629 | /* | |
630 | * BPF_ST(X) | |
631 | */ | |
632 | case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */ | |
633 | case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */ | |
634 | if (BPF_CLASS(code) == BPF_ST) { | |
635 | PPC_LI(b2p[TMP_REG_1], imm); | |
636 | src_reg = b2p[TMP_REG_1]; | |
637 | } | |
638 | PPC_STB(src_reg, dst_reg, off); | |
639 | break; | |
640 | case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */ | |
641 | case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */ | |
642 | if (BPF_CLASS(code) == BPF_ST) { | |
643 | PPC_LI(b2p[TMP_REG_1], imm); | |
644 | src_reg = b2p[TMP_REG_1]; | |
645 | } | |
646 | PPC_STH(src_reg, dst_reg, off); | |
647 | break; | |
648 | case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */ | |
649 | case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */ | |
650 | if (BPF_CLASS(code) == BPF_ST) { | |
651 | PPC_LI32(b2p[TMP_REG_1], imm); | |
652 | src_reg = b2p[TMP_REG_1]; | |
653 | } | |
654 | PPC_STW(src_reg, dst_reg, off); | |
655 | break; | |
656 | case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */ | |
657 | case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */ | |
658 | if (BPF_CLASS(code) == BPF_ST) { | |
659 | PPC_LI32(b2p[TMP_REG_1], imm); | |
660 | src_reg = b2p[TMP_REG_1]; | |
661 | } | |
662 | PPC_STD(src_reg, dst_reg, off); | |
663 | break; | |
664 | ||
665 | /* | |
666 | * BPF_STX XADD (atomic_add) | |
667 | */ | |
668 | /* *(u32 *)(dst + off) += src */ | |
669 | case BPF_STX | BPF_XADD | BPF_W: | |
670 | /* Get EA into TMP_REG_1 */ | |
671 | PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); | |
672 | /* error if EA is not word-aligned */ | |
673 | PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x03); | |
674 | PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + 12); | |
675 | PPC_LI(b2p[BPF_REG_0], 0); | |
676 | PPC_JMP(exit_addr); | |
677 | /* load value from memory into TMP_REG_2 */ | |
678 | PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); | |
679 | /* add value from src_reg into this */ | |
680 | PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); | |
681 | /* store result back */ | |
682 | PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); | |
683 | /* we're done if this succeeded */ | |
684 | PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); | |
685 | /* otherwise, let's try once more */ | |
686 | PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); | |
687 | PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); | |
688 | PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); | |
689 | /* exit if the store was not successful */ | |
690 | PPC_LI(b2p[BPF_REG_0], 0); | |
691 | PPC_BCC(COND_NE, exit_addr); | |
692 | break; | |
693 | /* *(u64 *)(dst + off) += src */ | |
694 | case BPF_STX | BPF_XADD | BPF_DW: | |
695 | PPC_ADDI(b2p[TMP_REG_1], dst_reg, off); | |
696 | /* error if EA is not doubleword-aligned */ | |
697 | PPC_ANDI(b2p[TMP_REG_2], b2p[TMP_REG_1], 0x07); | |
698 | PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (3*4)); | |
699 | PPC_LI(b2p[BPF_REG_0], 0); | |
700 | PPC_JMP(exit_addr); | |
701 | PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); | |
702 | PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); | |
703 | PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); | |
704 | PPC_BCC_SHORT(COND_EQ, (ctx->idx * 4) + (7*4)); | |
705 | PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0); | |
706 | PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg); | |
707 | PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]); | |
708 | PPC_LI(b2p[BPF_REG_0], 0); | |
709 | PPC_BCC(COND_NE, exit_addr); | |
710 | break; | |
711 | ||
712 | /* | |
713 | * BPF_LDX | |
714 | */ | |
715 | /* dst = *(u8 *)(ul) (src + off) */ | |
716 | case BPF_LDX | BPF_MEM | BPF_B: | |
717 | PPC_LBZ(dst_reg, src_reg, off); | |
718 | break; | |
719 | /* dst = *(u16 *)(ul) (src + off) */ | |
720 | case BPF_LDX | BPF_MEM | BPF_H: | |
721 | PPC_LHZ(dst_reg, src_reg, off); | |
722 | break; | |
723 | /* dst = *(u32 *)(ul) (src + off) */ | |
724 | case BPF_LDX | BPF_MEM | BPF_W: | |
725 | PPC_LWZ(dst_reg, src_reg, off); | |
726 | break; | |
727 | /* dst = *(u64 *)(ul) (src + off) */ | |
728 | case BPF_LDX | BPF_MEM | BPF_DW: | |
729 | PPC_LD(dst_reg, src_reg, off); | |
730 | break; | |
731 | ||
732 | /* | |
733 | * Doubleword load | |
734 | * 16 byte instruction that uses two 'struct bpf_insn' | |
735 | */ | |
736 | case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */ | |
737 | imm64 = ((u64)(u32) insn[i].imm) | | |
738 | (((u64)(u32) insn[i+1].imm) << 32); | |
739 | /* Adjust for two bpf instructions */ | |
740 | addrs[++i] = ctx->idx * 4; | |
741 | PPC_LI64(dst_reg, imm64); | |
742 | break; | |
743 | ||
744 | /* | |
745 | * Return/Exit | |
746 | */ | |
747 | case BPF_JMP | BPF_EXIT: | |
748 | /* | |
749 | * If this isn't the very last instruction, branch to | |
750 | * the epilogue. If we _are_ the last instruction, | |
751 | * we'll just fall through to the epilogue. | |
752 | */ | |
753 | if (i != flen - 1) | |
754 | PPC_JMP(exit_addr); | |
755 | /* else fall through to the epilogue */ | |
756 | break; | |
757 | ||
758 | /* | |
759 | * Call kernel helper | |
760 | */ | |
761 | case BPF_JMP | BPF_CALL: | |
762 | ctx->seen |= SEEN_FUNC; | |
763 | func = (u8 *) __bpf_call_base + imm; | |
764 | ||
765 | /* Save skb pointer if we need to re-cache skb data */ | |
87338c8e DB |
766 | if ((ctx->seen & SEEN_SKB) && |
767 | bpf_helper_changes_pkt_data(func)) | |
7b847f52 | 768 | PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); |
156d0e29 NR |
769 | |
770 | bpf_jit_emit_func_call(image, ctx, (u64)func); | |
771 | ||
772 | /* move return value from r3 to BPF_REG_0 */ | |
773 | PPC_MR(b2p[BPF_REG_0], 3); | |
774 | ||
775 | /* refresh skb cache */ | |
87338c8e DB |
776 | if ((ctx->seen & SEEN_SKB) && |
777 | bpf_helper_changes_pkt_data(func)) { | |
156d0e29 | 778 | /* reload skb pointer to r3 */ |
7b847f52 | 779 | PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); |
156d0e29 NR |
780 | bpf_jit_emit_skb_loads(image, ctx); |
781 | } | |
782 | break; | |
783 | ||
784 | /* | |
785 | * Jumps and branches | |
786 | */ | |
787 | case BPF_JMP | BPF_JA: | |
788 | PPC_JMP(addrs[i + 1 + off]); | |
789 | break; | |
790 | ||
791 | case BPF_JMP | BPF_JGT | BPF_K: | |
792 | case BPF_JMP | BPF_JGT | BPF_X: | |
793 | case BPF_JMP | BPF_JSGT | BPF_K: | |
794 | case BPF_JMP | BPF_JSGT | BPF_X: | |
795 | true_cond = COND_GT; | |
796 | goto cond_branch; | |
20dbf5cc DB |
797 | case BPF_JMP | BPF_JLT | BPF_K: |
798 | case BPF_JMP | BPF_JLT | BPF_X: | |
799 | case BPF_JMP | BPF_JSLT | BPF_K: | |
800 | case BPF_JMP | BPF_JSLT | BPF_X: | |
801 | true_cond = COND_LT; | |
802 | goto cond_branch; | |
156d0e29 NR |
803 | case BPF_JMP | BPF_JGE | BPF_K: |
804 | case BPF_JMP | BPF_JGE | BPF_X: | |
805 | case BPF_JMP | BPF_JSGE | BPF_K: | |
806 | case BPF_JMP | BPF_JSGE | BPF_X: | |
807 | true_cond = COND_GE; | |
808 | goto cond_branch; | |
20dbf5cc DB |
809 | case BPF_JMP | BPF_JLE | BPF_K: |
810 | case BPF_JMP | BPF_JLE | BPF_X: | |
811 | case BPF_JMP | BPF_JSLE | BPF_K: | |
812 | case BPF_JMP | BPF_JSLE | BPF_X: | |
813 | true_cond = COND_LE; | |
814 | goto cond_branch; | |
156d0e29 NR |
815 | case BPF_JMP | BPF_JEQ | BPF_K: |
816 | case BPF_JMP | BPF_JEQ | BPF_X: | |
817 | true_cond = COND_EQ; | |
818 | goto cond_branch; | |
819 | case BPF_JMP | BPF_JNE | BPF_K: | |
820 | case BPF_JMP | BPF_JNE | BPF_X: | |
821 | true_cond = COND_NE; | |
822 | goto cond_branch; | |
823 | case BPF_JMP | BPF_JSET | BPF_K: | |
824 | case BPF_JMP | BPF_JSET | BPF_X: | |
825 | true_cond = COND_NE; | |
826 | /* Fall through */ | |
827 | ||
828 | cond_branch: | |
829 | switch (code) { | |
830 | case BPF_JMP | BPF_JGT | BPF_X: | |
20dbf5cc | 831 | case BPF_JMP | BPF_JLT | BPF_X: |
156d0e29 | 832 | case BPF_JMP | BPF_JGE | BPF_X: |
20dbf5cc | 833 | case BPF_JMP | BPF_JLE | BPF_X: |
156d0e29 NR |
834 | case BPF_JMP | BPF_JEQ | BPF_X: |
835 | case BPF_JMP | BPF_JNE | BPF_X: | |
836 | /* unsigned comparison */ | |
837 | PPC_CMPLD(dst_reg, src_reg); | |
838 | break; | |
839 | case BPF_JMP | BPF_JSGT | BPF_X: | |
20dbf5cc | 840 | case BPF_JMP | BPF_JSLT | BPF_X: |
156d0e29 | 841 | case BPF_JMP | BPF_JSGE | BPF_X: |
20dbf5cc | 842 | case BPF_JMP | BPF_JSLE | BPF_X: |
156d0e29 NR |
843 | /* signed comparison */ |
844 | PPC_CMPD(dst_reg, src_reg); | |
845 | break; | |
846 | case BPF_JMP | BPF_JSET | BPF_X: | |
847 | PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, src_reg); | |
848 | break; | |
849 | case BPF_JMP | BPF_JNE | BPF_K: | |
850 | case BPF_JMP | BPF_JEQ | BPF_K: | |
851 | case BPF_JMP | BPF_JGT | BPF_K: | |
20dbf5cc | 852 | case BPF_JMP | BPF_JLT | BPF_K: |
156d0e29 | 853 | case BPF_JMP | BPF_JGE | BPF_K: |
20dbf5cc | 854 | case BPF_JMP | BPF_JLE | BPF_K: |
156d0e29 NR |
855 | /* |
856 | * Need sign-extended load, so only positive | |
857 | * values can be used as imm in cmpldi | |
858 | */ | |
859 | if (imm >= 0 && imm < 32768) | |
860 | PPC_CMPLDI(dst_reg, imm); | |
861 | else { | |
862 | /* sign-extending load */ | |
863 | PPC_LI32(b2p[TMP_REG_1], imm); | |
864 | /* ... but unsigned comparison */ | |
865 | PPC_CMPLD(dst_reg, b2p[TMP_REG_1]); | |
866 | } | |
867 | break; | |
868 | case BPF_JMP | BPF_JSGT | BPF_K: | |
20dbf5cc | 869 | case BPF_JMP | BPF_JSLT | BPF_K: |
156d0e29 | 870 | case BPF_JMP | BPF_JSGE | BPF_K: |
20dbf5cc | 871 | case BPF_JMP | BPF_JSLE | BPF_K: |
156d0e29 NR |
872 | /* |
873 | * signed comparison, so any 16-bit value | |
874 | * can be used in cmpdi | |
875 | */ | |
876 | if (imm >= -32768 && imm < 32768) | |
877 | PPC_CMPDI(dst_reg, imm); | |
878 | else { | |
879 | PPC_LI32(b2p[TMP_REG_1], imm); | |
880 | PPC_CMPD(dst_reg, b2p[TMP_REG_1]); | |
881 | } | |
882 | break; | |
883 | case BPF_JMP | BPF_JSET | BPF_K: | |
884 | /* andi does not sign-extend the immediate */ | |
885 | if (imm >= 0 && imm < 32768) | |
886 | /* PPC_ANDI is _only/always_ dot-form */ | |
887 | PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm); | |
888 | else { | |
889 | PPC_LI32(b2p[TMP_REG_1], imm); | |
890 | PPC_AND_DOT(b2p[TMP_REG_1], dst_reg, | |
891 | b2p[TMP_REG_1]); | |
892 | } | |
893 | break; | |
894 | } | |
895 | PPC_BCC(true_cond, addrs[i + 1 + off]); | |
896 | break; | |
897 | ||
898 | /* | |
899 | * Loads from packet header/data | |
900 | * Assume 32-bit input value in imm and X (src_reg) | |
901 | */ | |
902 | ||
903 | /* Absolute loads */ | |
904 | case BPF_LD | BPF_W | BPF_ABS: | |
905 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word); | |
906 | goto common_load_abs; | |
907 | case BPF_LD | BPF_H | BPF_ABS: | |
908 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half); | |
909 | goto common_load_abs; | |
910 | case BPF_LD | BPF_B | BPF_ABS: | |
911 | func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte); | |
912 | common_load_abs: | |
913 | /* | |
914 | * Load from [imm] | |
915 | * Load into r4, which can just be passed onto | |
916 | * skb load helpers as the second parameter | |
917 | */ | |
918 | PPC_LI32(4, imm); | |
919 | goto common_load; | |
920 | ||
921 | /* Indirect loads */ | |
922 | case BPF_LD | BPF_W | BPF_IND: | |
923 | func = (u8 *)sk_load_word; | |
924 | goto common_load_ind; | |
925 | case BPF_LD | BPF_H | BPF_IND: | |
926 | func = (u8 *)sk_load_half; | |
927 | goto common_load_ind; | |
928 | case BPF_LD | BPF_B | BPF_IND: | |
929 | func = (u8 *)sk_load_byte; | |
930 | common_load_ind: | |
931 | /* | |
932 | * Load from [src_reg + imm] | |
933 | * Treat src_reg as a 32-bit value | |
934 | */ | |
935 | PPC_EXTSW(4, src_reg); | |
936 | if (imm) { | |
937 | if (imm >= -32768 && imm < 32768) | |
938 | PPC_ADDI(4, 4, IMM_L(imm)); | |
939 | else { | |
940 | PPC_LI32(b2p[TMP_REG_1], imm); | |
941 | PPC_ADD(4, 4, b2p[TMP_REG_1]); | |
942 | } | |
943 | } | |
944 | ||
945 | common_load: | |
946 | ctx->seen |= SEEN_SKB; | |
947 | ctx->seen |= SEEN_FUNC; | |
948 | bpf_jit_emit_func_call(image, ctx, (u64)func); | |
949 | ||
950 | /* | |
951 | * Helper returns 'lt' condition on error, and an | |
952 | * appropriate return value in BPF_REG_0 | |
953 | */ | |
954 | PPC_BCC(COND_LT, exit_addr); | |
955 | break; | |
956 | ||
957 | /* | |
ce076141 | 958 | * Tail call |
156d0e29 | 959 | */ |
71189fa9 | 960 | case BPF_JMP | BPF_TAIL_CALL: |
ce076141 NR |
961 | ctx->seen |= SEEN_TAILCALL; |
962 | bpf_jit_emit_tail_call(image, ctx, addrs[i + 1]); | |
963 | break; | |
156d0e29 NR |
964 | |
965 | default: | |
966 | /* | |
967 | * The filter contains something cruel & unusual. | |
968 | * We don't handle it, but also there shouldn't be | |
969 | * anything missing from our list. | |
970 | */ | |
971 | pr_err_ratelimited("eBPF filter opcode %04x (@%d) unsupported\n", | |
972 | code, i); | |
973 | return -ENOTSUPP; | |
974 | } | |
975 | } | |
976 | ||
977 | /* Set end-of-body-code address for exit. */ | |
978 | addrs[i] = ctx->idx * 4; | |
979 | ||
980 | return 0; | |
981 | } | |
982 | ||
156d0e29 NR |
983 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) |
984 | { | |
985 | u32 proglen; | |
986 | u32 alloclen; | |
987 | u8 *image = NULL; | |
988 | u32 *code_base; | |
989 | u32 *addrs; | |
990 | struct codegen_context cgctx; | |
991 | int pass; | |
992 | int flen; | |
993 | struct bpf_binary_header *bpf_hdr; | |
b7b7013c NR |
994 | struct bpf_prog *org_fp = fp; |
995 | struct bpf_prog *tmp_fp; | |
996 | bool bpf_blinded = false; | |
156d0e29 NR |
997 | |
998 | if (!bpf_jit_enable) | |
b7b7013c NR |
999 | return org_fp; |
1000 | ||
1001 | tmp_fp = bpf_jit_blind_constants(org_fp); | |
1002 | if (IS_ERR(tmp_fp)) | |
1003 | return org_fp; | |
1004 | ||
1005 | if (tmp_fp != org_fp) { | |
1006 | bpf_blinded = true; | |
1007 | fp = tmp_fp; | |
1008 | } | |
156d0e29 NR |
1009 | |
1010 | flen = fp->len; | |
1011 | addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); | |
b7b7013c NR |
1012 | if (addrs == NULL) { |
1013 | fp = org_fp; | |
1014 | goto out; | |
1015 | } | |
1016 | ||
1017 | memset(&cgctx, 0, sizeof(struct codegen_context)); | |
156d0e29 | 1018 | |
ac0761eb SD |
1019 | /* Make sure that the stack is quadword aligned. */ |
1020 | cgctx.stack_size = round_up(fp->aux->stack_depth, 16); | |
1021 | ||
156d0e29 | 1022 | /* Scouting faux-generate pass 0 */ |
b7b7013c | 1023 | if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { |
156d0e29 | 1024 | /* We hit something illegal or unsupported. */ |
b7b7013c | 1025 | fp = org_fp; |
156d0e29 | 1026 | goto out; |
b7b7013c | 1027 | } |
156d0e29 NR |
1028 | |
1029 | /* | |
1030 | * Pretend to build prologue, given the features we've seen. This will | |
1031 | * update ctgtx.idx as it pretends to output instructions, then we can | |
1032 | * calculate total size from idx. | |
1033 | */ | |
1034 | bpf_jit_build_prologue(0, &cgctx); | |
1035 | bpf_jit_build_epilogue(0, &cgctx); | |
1036 | ||
1037 | proglen = cgctx.idx * 4; | |
1038 | alloclen = proglen + FUNCTION_DESCR_SIZE; | |
1039 | ||
1040 | bpf_hdr = bpf_jit_binary_alloc(alloclen, &image, 4, | |
1041 | bpf_jit_fill_ill_insns); | |
b7b7013c NR |
1042 | if (!bpf_hdr) { |
1043 | fp = org_fp; | |
156d0e29 | 1044 | goto out; |
b7b7013c | 1045 | } |
156d0e29 NR |
1046 | |
1047 | code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); | |
1048 | ||
1049 | /* Code generation passes 1-2 */ | |
1050 | for (pass = 1; pass < 3; pass++) { | |
1051 | /* Now build the prologue, body code & epilogue for real. */ | |
1052 | cgctx.idx = 0; | |
1053 | bpf_jit_build_prologue(code_base, &cgctx); | |
1054 | bpf_jit_build_body(fp, code_base, &cgctx, addrs); | |
1055 | bpf_jit_build_epilogue(code_base, &cgctx); | |
1056 | ||
1057 | if (bpf_jit_enable > 1) | |
1058 | pr_info("Pass %d: shrink = %d, seen = 0x%x\n", pass, | |
1059 | proglen - (cgctx.idx * 4), cgctx.seen); | |
1060 | } | |
1061 | ||
1062 | if (bpf_jit_enable > 1) | |
1063 | /* | |
1064 | * Note that we output the base address of the code_base | |
1065 | * rather than image, since opcodes are in code_base. | |
1066 | */ | |
1067 | bpf_jit_dump(flen, proglen, pass, code_base); | |
1068 | ||
156d0e29 | 1069 | #ifdef PPC64_ELF_ABI_v1 |
052de33c DB |
1070 | /* Function descriptor nastiness: Address + TOC */ |
1071 | ((u64 *)image)[0] = (u64)code_base; | |
1072 | ((u64 *)image)[1] = local_paca->kernel_toc; | |
156d0e29 | 1073 | #endif |
052de33c DB |
1074 | |
1075 | fp->bpf_func = (void *)image; | |
1076 | fp->jited = 1; | |
783d28dd | 1077 | fp->jited_len = alloclen; |
156d0e29 | 1078 | |
10528b9c | 1079 | bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); |
156d0e29 NR |
1080 | |
1081 | out: | |
1082 | kfree(addrs); | |
b7b7013c NR |
1083 | |
1084 | if (bpf_blinded) | |
1085 | bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); | |
1086 | ||
156d0e29 NR |
1087 | return fp; |
1088 | } | |
1089 | ||
74451e66 | 1090 | /* Overriding bpf_jit_free() as we don't set images read-only. */ |
156d0e29 NR |
1091 | void bpf_jit_free(struct bpf_prog *fp) |
1092 | { | |
1093 | unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; | |
1094 | struct bpf_binary_header *bpf_hdr = (void *)addr; | |
1095 | ||
1096 | if (fp->jited) | |
1097 | bpf_jit_binary_free(bpf_hdr); | |
1098 | ||
1099 | bpf_prog_unlock_free(fp); | |
1100 | } |