Commit | Line | Data |
---|---|---|
caab277b | 1 | // SPDX-License-Identifier: GPL-2.0-only |
e54bcde3 ZSL |
2 | /* |
3 | * BPF JIT compiler for ARM64 | |
4 | * | |
42ff712b | 5 | * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com> |
e54bcde3 ZSL |
6 | */ |
7 | ||
8 | #define pr_fmt(fmt) "bpf_jit: " fmt | |
9 | ||
80083428 | 10 | #include <linux/bitfield.h> |
ddb55992 | 11 | #include <linux/bpf.h> |
e54bcde3 | 12 | #include <linux/filter.h> |
b2ad54e1 | 13 | #include <linux/memory.h> |
e54bcde3 | 14 | #include <linux/printk.h> |
e54bcde3 | 15 | #include <linux/slab.h> |
b569c1c6 | 16 | |
d6e2cc56 | 17 | #include <asm/asm-extable.h> |
e54bcde3 ZSL |
18 | #include <asm/byteorder.h> |
19 | #include <asm/cacheflush.h> | |
b569c1c6 | 20 | #include <asm/debug-monitors.h> |
3e00e39d | 21 | #include <asm/insn.h> |
b2ad54e1 | 22 | #include <asm/patching.h> |
d4bbc30b | 23 | #include <asm/set_memory.h> |
e54bcde3 ZSL |
24 | |
25 | #include "bpf_jit.h" | |
26 | ||
26eb042e DB |
27 | #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) |
28 | #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) | |
ddb55992 | 29 | #define TCALL_CNT (MAX_BPF_JIT_REG + 2) |
7005cade | 30 | #define TMP_REG_3 (MAX_BPF_JIT_REG + 3) |
5b3d19b9 | 31 | #define FP_BOTTOM (MAX_BPF_JIT_REG + 4) |
339af577 | 32 | #define ARENA_VM_START (MAX_BPF_JIT_REG + 5) |
e54bcde3 | 33 | |
1902472b HT |
34 | #define check_imm(bits, imm) do { \ |
35 | if ((((imm) > 0) && ((imm) >> (bits))) || \ | |
36 | (((imm) < 0) && (~(imm) >> (bits)))) { \ | |
37 | pr_info("[%2d] imm=%d(0x%x) out of range\n", \ | |
38 | i, imm, imm); \ | |
39 | return -EINVAL; \ | |
40 | } \ | |
41 | } while (0) | |
42 | #define check_imm19(imm) check_imm(19, imm) | |
43 | #define check_imm26(imm) check_imm(26, imm) | |
44 | ||
e54bcde3 ZSL |
45 | /* Map BPF registers to A64 registers */ |
46 | static const int bpf2a64[] = { | |
47 | /* return value from in-kernel function, and exit value from eBPF */ | |
48 | [BPF_REG_0] = A64_R(7), | |
49 | /* arguments from eBPF program to in-kernel function */ | |
50 | [BPF_REG_1] = A64_R(0), | |
51 | [BPF_REG_2] = A64_R(1), | |
52 | [BPF_REG_3] = A64_R(2), | |
53 | [BPF_REG_4] = A64_R(3), | |
54 | [BPF_REG_5] = A64_R(4), | |
55 | /* callee saved registers that in-kernel function will preserve */ | |
56 | [BPF_REG_6] = A64_R(19), | |
57 | [BPF_REG_7] = A64_R(20), | |
58 | [BPF_REG_8] = A64_R(21), | |
59 | [BPF_REG_9] = A64_R(22), | |
60 | /* read-only frame pointer to access stack */ | |
ec0738db | 61 | [BPF_REG_FP] = A64_R(25), |
06edc59c | 62 | /* temporary registers for BPF JIT */ |
4c1cd4fd YS |
63 | [TMP_REG_1] = A64_R(10), |
64 | [TMP_REG_2] = A64_R(11), | |
7005cade | 65 | [TMP_REG_3] = A64_R(12), |
ddb55992 ZSL |
66 | /* tail_call_cnt */ |
67 | [TCALL_CNT] = A64_R(26), | |
26eb042e DB |
68 | /* temporary register for blinding constants */ |
69 | [BPF_REG_AX] = A64_R(9), | |
5b3d19b9 | 70 | [FP_BOTTOM] = A64_R(27), |
339af577 PM |
71 | /* callee saved register for kern_vm_start address */ |
72 | [ARENA_VM_START] = A64_R(28), | |
e54bcde3 ZSL |
73 | }; |
74 | ||
75 | struct jit_ctx { | |
76 | const struct bpf_prog *prog; | |
77 | int idx; | |
51c9fbb1 | 78 | int epilogue_offset; |
e54bcde3 | 79 | int *offset; |
80083428 | 80 | int exentry_idx; |
425e1ed7 | 81 | __le32 *image; |
1dad391d | 82 | __le32 *ro_image; |
f1c9eed7 | 83 | u32 stack_size; |
5b3d19b9 | 84 | int fpb_offset; |
4dd31243 | 85 | u64 user_vm_start; |
e54bcde3 ZSL |
86 | }; |
87 | ||
b2ad54e1 XK |
88 | struct bpf_plt { |
89 | u32 insn_ldr; /* load target */ | |
90 | u32 insn_br; /* branch to target */ | |
91 | u64 target; /* target value */ | |
92 | }; | |
93 | ||
94 | #define PLT_TARGET_SIZE sizeof_field(struct bpf_plt, target) | |
95 | #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target) | |
96 | ||
e54bcde3 ZSL |
97 | static inline void emit(const u32 insn, struct jit_ctx *ctx) |
98 | { | |
99 | if (ctx->image != NULL) | |
100 | ctx->image[ctx->idx] = cpu_to_le32(insn); | |
101 | ||
102 | ctx->idx++; | |
103 | } | |
104 | ||
6d2eea6f DB |
105 | static inline void emit_a64_mov_i(const int is64, const int reg, |
106 | const s32 val, struct jit_ctx *ctx) | |
107 | { | |
108 | u16 hi = val >> 16; | |
109 | u16 lo = val & 0xffff; | |
110 | ||
111 | if (hi & 0x8000) { | |
112 | if (hi == 0xffff) { | |
113 | emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx); | |
114 | } else { | |
115 | emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx); | |
116 | if (lo != 0xffff) | |
117 | emit(A64_MOVK(is64, reg, lo, 0), ctx); | |
118 | } | |
119 | } else { | |
120 | emit(A64_MOVZ(is64, reg, lo, 0), ctx); | |
121 | if (hi) | |
122 | emit(A64_MOVK(is64, reg, hi, 16), ctx); | |
123 | } | |
124 | } | |
125 | ||
126 | static int i64_i16_blocks(const u64 val, bool inverse) | |
127 | { | |
128 | return (((val >> 0) & 0xffff) != (inverse ? 0xffff : 0x0000)) + | |
129 | (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) + | |
130 | (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) + | |
131 | (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000)); | |
132 | } | |
133 | ||
e54bcde3 ZSL |
134 | static inline void emit_a64_mov_i64(const int reg, const u64 val, |
135 | struct jit_ctx *ctx) | |
136 | { | |
6d2eea6f DB |
137 | u64 nrm_tmp = val, rev_tmp = ~val; |
138 | bool inverse; | |
139 | int shift; | |
140 | ||
141 | if (!(nrm_tmp >> 32)) | |
142 | return emit_a64_mov_i(0, reg, (u32)val, ctx); | |
143 | ||
144 | inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false); | |
145 | shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) : | |
146 | (fls64(nrm_tmp) - 1)), 16), 0); | |
147 | if (inverse) | |
148 | emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx); | |
149 | else | |
150 | emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); | |
151 | shift -= 16; | |
152 | while (shift >= 0) { | |
153 | if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000)) | |
154 | emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx); | |
155 | shift -= 16; | |
e54bcde3 ZSL |
156 | } |
157 | } | |
158 | ||
b2ad54e1 XK |
159 | static inline void emit_bti(u32 insn, struct jit_ctx *ctx) |
160 | { | |
161 | if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) | |
162 | emit(insn, ctx); | |
163 | } | |
164 | ||
6d2eea6f | 165 | /* |
cc2b8ed1 AB |
166 | * Kernel addresses in the vmalloc space use at most 48 bits, and the |
167 | * remaining bits are guaranteed to be 0x1. So we can compose the address | |
168 | * with a fixed length movn/movk/movk sequence. | |
6d2eea6f | 169 | */ |
db496944 AS |
170 | static inline void emit_addr_mov_i64(const int reg, const u64 val, |
171 | struct jit_ctx *ctx) | |
172 | { | |
173 | u64 tmp = val; | |
174 | int shift = 0; | |
175 | ||
cc2b8ed1 AB |
176 | emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx); |
177 | while (shift < 32) { | |
db496944 AS |
178 | tmp >>= 16; |
179 | shift += 16; | |
180 | emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx); | |
181 | } | |
182 | } | |
183 | ||
efc9909f XK |
184 | static inline void emit_call(u64 target, struct jit_ctx *ctx) |
185 | { | |
186 | u8 tmp = bpf2a64[TMP_REG_1]; | |
187 | ||
188 | emit_addr_mov_i64(tmp, target, ctx); | |
189 | emit(A64_BLR(tmp), ctx); | |
190 | } | |
191 | ||
32f6865c | 192 | static inline int bpf2a64_offset(int bpf_insn, int off, |
e54bcde3 ZSL |
193 | const struct jit_ctx *ctx) |
194 | { | |
32f6865c IA |
195 | /* BPF JMP offset is relative to the next instruction */ |
196 | bpf_insn++; | |
197 | /* | |
198 | * Whereas arm64 branch instructions encode the offset | |
199 | * from the branch itself, so we must subtract 1 from the | |
200 | * instruction offset. | |
201 | */ | |
202 | return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1); | |
e54bcde3 ZSL |
203 | } |
204 | ||
b569c1c6 DB |
205 | static void jit_fill_hole(void *area, unsigned int size) |
206 | { | |
425e1ed7 | 207 | __le32 *ptr; |
b569c1c6 DB |
208 | /* We are guaranteed to have aligned memory. */ |
209 | for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) | |
210 | *ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT); | |
211 | } | |
212 | ||
1dad391d PM |
213 | int bpf_arch_text_invalidate(void *dst, size_t len) |
214 | { | |
215 | if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len)) | |
216 | return -EINVAL; | |
217 | ||
218 | return 0; | |
219 | } | |
220 | ||
e54bcde3 ZSL |
221 | static inline int epilogue_offset(const struct jit_ctx *ctx) |
222 | { | |
51c9fbb1 ZSL |
223 | int to = ctx->epilogue_offset; |
224 | int from = ctx->idx; | |
e54bcde3 ZSL |
225 | |
226 | return to - from; | |
227 | } | |
228 | ||
fd868f14 LN |
229 | static bool is_addsub_imm(u32 imm) |
230 | { | |
231 | /* Either imm12 or shifted imm12. */ | |
232 | return !(imm & ~0xfff) || !(imm & ~0xfff000); | |
233 | } | |
234 | ||
7db6c0f1 XK |
235 | /* |
236 | * There are 3 types of AArch64 LDR/STR (immediate) instruction: | |
237 | * Post-index, Pre-index, Unsigned offset. | |
238 | * | |
239 | * For BPF ldr/str, the "unsigned offset" type is sufficient. | |
240 | * | |
241 | * "Unsigned offset" type LDR(immediate) format: | |
242 | * | |
243 | * 3 2 1 0 | |
244 | * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 | |
245 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
246 | * |x x|1 1 1 0 0 1 0 1| imm12 | Rn | Rt | | |
247 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
248 | * scale | |
249 | * | |
250 | * "Unsigned offset" type STR(immediate) format: | |
251 | * 3 2 1 0 | |
252 | * 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 | |
253 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
254 | * |x x|1 1 1 0 0 1 0 0| imm12 | Rn | Rt | | |
255 | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | |
256 | * scale | |
257 | * | |
258 | * The offset is calculated from imm12 and scale in the following way: | |
259 | * | |
260 | * offset = (u64)imm12 << scale | |
261 | */ | |
5b3d19b9 | 262 | static bool is_lsi_offset(int offset, int scale) |
7db6c0f1 XK |
263 | { |
264 | if (offset < 0) | |
265 | return false; | |
266 | ||
267 | if (offset > (0xFFF << scale)) | |
268 | return false; | |
269 | ||
270 | if (offset & ((1 << scale) - 1)) | |
271 | return false; | |
272 | ||
273 | return true; | |
274 | } | |
275 | ||
b2ad54e1 XK |
276 | /* generated prologue: |
277 | * bti c // if CONFIG_ARM64_BTI_KERNEL | |
278 | * mov x9, lr | |
279 | * nop // POKE_OFFSET | |
280 | * paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL | |
281 | * stp x29, lr, [sp, #-16]! | |
282 | * mov x29, sp | |
283 | * stp x19, x20, [sp, #-16]! | |
284 | * stp x21, x22, [sp, #-16]! | |
285 | * stp x25, x26, [sp, #-16]! | |
286 | * stp x27, x28, [sp, #-16]! | |
287 | * mov x25, sp | |
288 | * mov tcc, #0 | |
289 | * // PROLOGUE_OFFSET | |
290 | */ | |
291 | ||
292 | #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0) | |
293 | #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0) | |
294 | ||
295 | /* Offset of nop instruction in bpf prog entry to be poked */ | |
296 | #define POKE_OFFSET (BTI_INSNS + 1) | |
297 | ||
a2284d91 | 298 | /* Tail call offset to jump into */ |
b2ad54e1 | 299 | #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 8) |
ddb55992 | 300 | |
22fc0e80 | 301 | static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf, |
339af577 | 302 | bool is_exception_cb, u64 arena_vm_start) |
e54bcde3 | 303 | { |
f1c9eed7 | 304 | const struct bpf_prog *prog = ctx->prog; |
9af27da6 | 305 | const bool is_main_prog = !bpf_is_subprog(prog); |
e54bcde3 ZSL |
306 | const u8 r6 = bpf2a64[BPF_REG_6]; |
307 | const u8 r7 = bpf2a64[BPF_REG_7]; | |
308 | const u8 r8 = bpf2a64[BPF_REG_8]; | |
309 | const u8 r9 = bpf2a64[BPF_REG_9]; | |
310 | const u8 fp = bpf2a64[BPF_REG_FP]; | |
ddb55992 | 311 | const u8 tcc = bpf2a64[TCALL_CNT]; |
5b3d19b9 | 312 | const u8 fpb = bpf2a64[FP_BOTTOM]; |
339af577 | 313 | const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; |
ddb55992 ZSL |
314 | const int idx0 = ctx->idx; |
315 | int cur_offset; | |
e54bcde3 | 316 | |
ec0738db YS |
317 | /* |
318 | * BPF prog stack layout | |
319 | * | |
320 | * high | |
321 | * original A64_SP => 0:+-----+ BPF prologue | |
322 | * |FP/LR| | |
323 | * current A64_FP => -16:+-----+ | |
324 | * | ... | callee saved registers | |
4c1cd4fd | 325 | * BPF fp register => -64:+-----+ <= (BPF_FP) |
ec0738db YS |
326 | * | | |
327 | * | ... | BPF prog stack | |
328 | * | | | |
f1c9eed7 | 329 | * +-----+ <= (BPF_FP - prog->aux->stack_depth) |
09ece3d0 | 330 | * |RSVD | padding |
f1c9eed7 | 331 | * current A64_SP => +-----+ <= (BPF_FP - ctx->stack_size) |
ec0738db YS |
332 | * | | |
333 | * | ... | Function call stack | |
334 | * | | | |
335 | * +-----+ | |
336 | * low | |
337 | * | |
338 | */ | |
339 | ||
a3f25d61 AD |
340 | /* bpf function may be invoked by 3 instruction types: |
341 | * 1. bl, attached via freplace to bpf prog via short jump | |
342 | * 2. br, attached via freplace to bpf prog via long jump | |
343 | * 3. blr, working as a function pointer, used by emit_call. | |
344 | * So BTI_JC should used here to support both br and blr. | |
345 | */ | |
346 | emit_bti(A64_BTI_JC, ctx); | |
b2ad54e1 XK |
347 | |
348 | emit(A64_MOV(1, A64_R(9), A64_LR), ctx); | |
349 | emit(A64_NOP, ctx); | |
350 | ||
22fc0e80 PM |
351 | if (!is_exception_cb) { |
352 | /* Sign lr */ | |
353 | if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) | |
354 | emit(A64_PACIASP, ctx); | |
355 | /* Save FP and LR registers to stay align with ARM64 AAPCS */ | |
356 | emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); | |
357 | emit(A64_MOV(1, A64_FP, A64_SP), ctx); | |
358 | ||
359 | /* Save callee-saved registers */ | |
360 | emit(A64_PUSH(r6, r7, A64_SP), ctx); | |
361 | emit(A64_PUSH(r8, r9, A64_SP), ctx); | |
362 | emit(A64_PUSH(fp, tcc, A64_SP), ctx); | |
363 | emit(A64_PUSH(fpb, A64_R(28), A64_SP), ctx); | |
364 | } else { | |
365 | /* | |
366 | * Exception callback receives FP of Main Program as third | |
367 | * parameter | |
368 | */ | |
369 | emit(A64_MOV(1, A64_FP, A64_R(2)), ctx); | |
370 | /* | |
371 | * Main Program already pushed the frame record and the | |
372 | * callee-saved registers. The exception callback will not push | |
373 | * anything and re-use the main program's stack. | |
374 | * | |
375 | * 10 registers are on the stack | |
376 | */ | |
377 | emit(A64_SUB_I(1, A64_SP, A64_FP, 80), ctx); | |
378 | } | |
e54bcde3 | 379 | |
ddb55992 | 380 | /* Set up BPF prog stack base register */ |
e54bcde3 ZSL |
381 | emit(A64_MOV(1, fp, A64_SP), ctx); |
382 | ||
d4609a5d | 383 | if (!ebpf_from_cbpf && is_main_prog) { |
56ea6a8b DB |
384 | /* Initialize tail_call_cnt */ |
385 | emit(A64_MOVZ(1, tcc, 0, 0), ctx); | |
ddb55992 | 386 | |
56ea6a8b DB |
387 | cur_offset = ctx->idx - idx0; |
388 | if (cur_offset != PROLOGUE_OFFSET) { | |
389 | pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", | |
390 | cur_offset, PROLOGUE_OFFSET); | |
391 | return -1; | |
392 | } | |
fa76cfe6 MB |
393 | |
394 | /* BTI landing pad for the tail call, done with a BR */ | |
b2ad54e1 | 395 | emit_bti(A64_BTI_J, ctx); |
ddb55992 | 396 | } |
a2284d91 | 397 | |
22fc0e80 PM |
398 | /* |
399 | * Program acting as exception boundary should save all ARM64 | |
400 | * Callee-saved registers as the exception callback needs to recover | |
401 | * all ARM64 Callee-saved registers in its epilogue. | |
402 | */ | |
403 | if (prog->aux->exception_boundary) { | |
404 | /* | |
405 | * As we are pushing two more registers, BPF_FP should be moved | |
406 | * 16 bytes | |
407 | */ | |
408 | emit(A64_SUB_I(1, fp, fp, 16), ctx); | |
409 | emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx); | |
410 | } | |
411 | ||
5b3d19b9 XK |
412 | emit(A64_SUB_I(1, fpb, fp, ctx->fpb_offset), ctx); |
413 | ||
3f287098 TY |
414 | /* Stack must be multiples of 16B */ |
415 | ctx->stack_size = round_up(prog->aux->stack_depth, 16); | |
a2284d91 DB |
416 | |
417 | /* Set up function call stack */ | |
418 | emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); | |
339af577 PM |
419 | |
420 | if (arena_vm_start) | |
421 | emit_a64_mov_i64(arena_vm_base, arena_vm_start, ctx); | |
422 | ||
ddb55992 ZSL |
423 | return 0; |
424 | } | |
425 | ||
426 | static int out_offset = -1; /* initialized on the first pass of build_body() */ | |
427 | static int emit_bpf_tail_call(struct jit_ctx *ctx) | |
428 | { | |
429 | /* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */ | |
430 | const u8 r2 = bpf2a64[BPF_REG_2]; | |
431 | const u8 r3 = bpf2a64[BPF_REG_3]; | |
432 | ||
433 | const u8 tmp = bpf2a64[TMP_REG_1]; | |
434 | const u8 prg = bpf2a64[TMP_REG_2]; | |
435 | const u8 tcc = bpf2a64[TCALL_CNT]; | |
436 | const int idx0 = ctx->idx; | |
437 | #define cur_offset (ctx->idx - idx0) | |
438 | #define jmp_offset (out_offset - (cur_offset)) | |
439 | size_t off; | |
440 | ||
441 | /* if (index >= array->map.max_entries) | |
442 | * goto out; | |
443 | */ | |
444 | off = offsetof(struct bpf_array, map.max_entries); | |
445 | emit_a64_mov_i64(tmp, off, ctx); | |
446 | emit(A64_LDR32(tmp, r2, tmp), ctx); | |
16338a9b | 447 | emit(A64_MOV(0, r3, r3), ctx); |
ddb55992 | 448 | emit(A64_CMP(0, r3, tmp), ctx); |
16338a9b | 449 | emit(A64_B_(A64_COND_CS, jmp_offset), ctx); |
ddb55992 | 450 | |
ebf7f6f0 TY |
451 | /* |
452 | * if (tail_call_cnt >= MAX_TAIL_CALL_CNT) | |
ddb55992 ZSL |
453 | * goto out; |
454 | * tail_call_cnt++; | |
455 | */ | |
456 | emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); | |
457 | emit(A64_CMP(1, tcc, tmp), ctx); | |
ebf7f6f0 | 458 | emit(A64_B_(A64_COND_CS, jmp_offset), ctx); |
ddb55992 ZSL |
459 | emit(A64_ADD_I(1, tcc, tcc, 1), ctx); |
460 | ||
461 | /* prog = array->ptrs[index]; | |
462 | * if (prog == NULL) | |
463 | * goto out; | |
464 | */ | |
465 | off = offsetof(struct bpf_array, ptrs); | |
466 | emit_a64_mov_i64(tmp, off, ctx); | |
d8b54110 DB |
467 | emit(A64_ADD(1, tmp, r2, tmp), ctx); |
468 | emit(A64_LSL(1, prg, r3, 3), ctx); | |
469 | emit(A64_LDR64(prg, tmp, prg), ctx); | |
ddb55992 ZSL |
470 | emit(A64_CBZ(1, prg, jmp_offset), ctx); |
471 | ||
a2284d91 | 472 | /* goto *(prog->bpf_func + prologue_offset); */ |
ddb55992 ZSL |
473 | off = offsetof(struct bpf_prog, bpf_func); |
474 | emit_a64_mov_i64(tmp, off, ctx); | |
475 | emit(A64_LDR64(tmp, prg, tmp), ctx); | |
476 | emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); | |
a2284d91 | 477 | emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); |
ddb55992 ZSL |
478 | emit(A64_BR(tmp), ctx); |
479 | ||
480 | /* out: */ | |
481 | if (out_offset == -1) | |
482 | out_offset = cur_offset; | |
483 | if (cur_offset != out_offset) { | |
484 | pr_err_once("tail_call out_offset = %d, expected %d!\n", | |
485 | cur_offset, out_offset); | |
486 | return -1; | |
487 | } | |
488 | return 0; | |
489 | #undef cur_offset | |
490 | #undef jmp_offset | |
e54bcde3 ZSL |
491 | } |
492 | ||
1902472b HT |
493 | #ifdef CONFIG_ARM64_LSE_ATOMICS |
494 | static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) | |
495 | { | |
496 | const u8 code = insn->code; | |
e612b5c1 | 497 | const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; |
1902472b HT |
498 | const u8 dst = bpf2a64[insn->dst_reg]; |
499 | const u8 src = bpf2a64[insn->src_reg]; | |
500 | const u8 tmp = bpf2a64[TMP_REG_1]; | |
501 | const u8 tmp2 = bpf2a64[TMP_REG_2]; | |
502 | const bool isdw = BPF_SIZE(code) == BPF_DW; | |
e612b5c1 | 503 | const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC; |
1902472b | 504 | const s16 off = insn->off; |
e612b5c1 | 505 | u8 reg = dst; |
1902472b | 506 | |
e612b5c1 PM |
507 | if (off || arena) { |
508 | if (off) { | |
509 | emit_a64_mov_i(1, tmp, off, ctx); | |
510 | emit(A64_ADD(1, tmp, tmp, dst), ctx); | |
511 | reg = tmp; | |
512 | } | |
513 | if (arena) { | |
514 | emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx); | |
515 | reg = tmp; | |
516 | } | |
1902472b HT |
517 | } |
518 | ||
519 | switch (insn->imm) { | |
520 | /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ | |
521 | case BPF_ADD: | |
522 | emit(A64_STADD(isdw, reg, src), ctx); | |
523 | break; | |
524 | case BPF_AND: | |
525 | emit(A64_MVN(isdw, tmp2, src), ctx); | |
526 | emit(A64_STCLR(isdw, reg, tmp2), ctx); | |
527 | break; | |
528 | case BPF_OR: | |
529 | emit(A64_STSET(isdw, reg, src), ctx); | |
530 | break; | |
531 | case BPF_XOR: | |
532 | emit(A64_STEOR(isdw, reg, src), ctx); | |
533 | break; | |
534 | /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ | |
535 | case BPF_ADD | BPF_FETCH: | |
536 | emit(A64_LDADDAL(isdw, src, reg, src), ctx); | |
537 | break; | |
538 | case BPF_AND | BPF_FETCH: | |
539 | emit(A64_MVN(isdw, tmp2, src), ctx); | |
540 | emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx); | |
541 | break; | |
542 | case BPF_OR | BPF_FETCH: | |
543 | emit(A64_LDSETAL(isdw, src, reg, src), ctx); | |
544 | break; | |
545 | case BPF_XOR | BPF_FETCH: | |
546 | emit(A64_LDEORAL(isdw, src, reg, src), ctx); | |
547 | break; | |
548 | /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ | |
549 | case BPF_XCHG: | |
550 | emit(A64_SWPAL(isdw, src, reg, src), ctx); | |
551 | break; | |
552 | /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ | |
553 | case BPF_CMPXCHG: | |
554 | emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx); | |
555 | break; | |
556 | default: | |
557 | pr_err_once("unknown atomic op code %02x\n", insn->imm); | |
558 | return -EINVAL; | |
559 | } | |
560 | ||
561 | return 0; | |
562 | } | |
563 | #else | |
564 | static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) | |
565 | { | |
566 | return -EINVAL; | |
567 | } | |
568 | #endif | |
569 | ||
570 | static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx) | |
571 | { | |
572 | const u8 code = insn->code; | |
573 | const u8 dst = bpf2a64[insn->dst_reg]; | |
574 | const u8 src = bpf2a64[insn->src_reg]; | |
575 | const u8 tmp = bpf2a64[TMP_REG_1]; | |
576 | const u8 tmp2 = bpf2a64[TMP_REG_2]; | |
577 | const u8 tmp3 = bpf2a64[TMP_REG_3]; | |
578 | const int i = insn - ctx->prog->insnsi; | |
579 | const s32 imm = insn->imm; | |
580 | const s16 off = insn->off; | |
581 | const bool isdw = BPF_SIZE(code) == BPF_DW; | |
582 | u8 reg; | |
583 | s32 jmp_offset; | |
584 | ||
e612b5c1 PM |
585 | if (BPF_MODE(code) == BPF_PROBE_ATOMIC) { |
586 | /* ll_sc based atomics don't support unsafe pointers yet. */ | |
587 | pr_err_once("unknown atomic opcode %02x\n", code); | |
588 | return -EINVAL; | |
589 | } | |
590 | ||
1902472b HT |
591 | if (!off) { |
592 | reg = dst; | |
593 | } else { | |
594 | emit_a64_mov_i(1, tmp, off, ctx); | |
595 | emit(A64_ADD(1, tmp, tmp, dst), ctx); | |
596 | reg = tmp; | |
597 | } | |
598 | ||
599 | if (imm == BPF_ADD || imm == BPF_AND || | |
600 | imm == BPF_OR || imm == BPF_XOR) { | |
601 | /* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */ | |
602 | emit(A64_LDXR(isdw, tmp2, reg), ctx); | |
603 | if (imm == BPF_ADD) | |
604 | emit(A64_ADD(isdw, tmp2, tmp2, src), ctx); | |
605 | else if (imm == BPF_AND) | |
606 | emit(A64_AND(isdw, tmp2, tmp2, src), ctx); | |
607 | else if (imm == BPF_OR) | |
608 | emit(A64_ORR(isdw, tmp2, tmp2, src), ctx); | |
609 | else | |
610 | emit(A64_EOR(isdw, tmp2, tmp2, src), ctx); | |
611 | emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx); | |
612 | jmp_offset = -3; | |
613 | check_imm19(jmp_offset); | |
614 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); | |
615 | } else if (imm == (BPF_ADD | BPF_FETCH) || | |
616 | imm == (BPF_AND | BPF_FETCH) || | |
617 | imm == (BPF_OR | BPF_FETCH) || | |
618 | imm == (BPF_XOR | BPF_FETCH)) { | |
619 | /* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */ | |
620 | const u8 ax = bpf2a64[BPF_REG_AX]; | |
621 | ||
622 | emit(A64_MOV(isdw, ax, src), ctx); | |
623 | emit(A64_LDXR(isdw, src, reg), ctx); | |
624 | if (imm == (BPF_ADD | BPF_FETCH)) | |
625 | emit(A64_ADD(isdw, tmp2, src, ax), ctx); | |
626 | else if (imm == (BPF_AND | BPF_FETCH)) | |
627 | emit(A64_AND(isdw, tmp2, src, ax), ctx); | |
628 | else if (imm == (BPF_OR | BPF_FETCH)) | |
629 | emit(A64_ORR(isdw, tmp2, src, ax), ctx); | |
630 | else | |
631 | emit(A64_EOR(isdw, tmp2, src, ax), ctx); | |
632 | emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); | |
633 | jmp_offset = -3; | |
634 | check_imm19(jmp_offset); | |
635 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); | |
636 | emit(A64_DMB_ISH, ctx); | |
637 | } else if (imm == BPF_XCHG) { | |
638 | /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ | |
639 | emit(A64_MOV(isdw, tmp2, src), ctx); | |
640 | emit(A64_LDXR(isdw, src, reg), ctx); | |
641 | emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx); | |
642 | jmp_offset = -2; | |
643 | check_imm19(jmp_offset); | |
644 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); | |
645 | emit(A64_DMB_ISH, ctx); | |
646 | } else if (imm == BPF_CMPXCHG) { | |
647 | /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ | |
648 | const u8 r0 = bpf2a64[BPF_REG_0]; | |
649 | ||
650 | emit(A64_MOV(isdw, tmp2, r0), ctx); | |
651 | emit(A64_LDXR(isdw, r0, reg), ctx); | |
652 | emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx); | |
653 | jmp_offset = 4; | |
654 | check_imm19(jmp_offset); | |
655 | emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx); | |
656 | emit(A64_STLXR(isdw, src, reg, tmp3), ctx); | |
657 | jmp_offset = -4; | |
658 | check_imm19(jmp_offset); | |
659 | emit(A64_CBNZ(0, tmp3, jmp_offset), ctx); | |
660 | emit(A64_DMB_ISH, ctx); | |
661 | } else { | |
662 | pr_err_once("unknown atomic op code %02x\n", imm); | |
663 | return -EINVAL; | |
664 | } | |
665 | ||
666 | return 0; | |
667 | } | |
668 | ||
b2ad54e1 XK |
669 | void dummy_tramp(void); |
670 | ||
671 | asm ( | |
672 | " .pushsection .text, \"ax\", @progbits\n" | |
33f32e50 | 673 | " .global dummy_tramp\n" |
b2ad54e1 XK |
674 | " .type dummy_tramp, %function\n" |
675 | "dummy_tramp:" | |
676 | #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) | |
677 | " bti j\n" /* dummy_tramp is called via "br x10" */ | |
678 | #endif | |
339ed900 XK |
679 | " mov x10, x30\n" |
680 | " mov x30, x9\n" | |
b2ad54e1 XK |
681 | " ret x10\n" |
682 | " .size dummy_tramp, .-dummy_tramp\n" | |
683 | " .popsection\n" | |
684 | ); | |
685 | ||
686 | /* build a plt initialized like this: | |
687 | * | |
688 | * plt: | |
689 | * ldr tmp, target | |
690 | * br tmp | |
691 | * target: | |
692 | * .quad dummy_tramp | |
693 | * | |
694 | * when a long jump trampoline is attached, target is filled with the | |
695 | * trampoline address, and when the trampoline is removed, target is | |
696 | * restored to dummy_tramp address. | |
697 | */ | |
698 | static void build_plt(struct jit_ctx *ctx) | |
699 | { | |
700 | const u8 tmp = bpf2a64[TMP_REG_1]; | |
701 | struct bpf_plt *plt = NULL; | |
702 | ||
703 | /* make sure target is 64-bit aligned */ | |
704 | if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2) | |
705 | emit(A64_NOP, ctx); | |
706 | ||
707 | plt = (struct bpf_plt *)(ctx->image + ctx->idx); | |
708 | /* plt is called via bl, no BTI needed here */ | |
709 | emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx); | |
710 | emit(A64_BR(tmp), ctx); | |
711 | ||
712 | if (ctx->image) | |
713 | plt->target = (u64)&dummy_tramp; | |
714 | } | |
715 | ||
22fc0e80 | 716 | static void build_epilogue(struct jit_ctx *ctx, bool is_exception_cb) |
e54bcde3 ZSL |
717 | { |
718 | const u8 r0 = bpf2a64[BPF_REG_0]; | |
719 | const u8 r6 = bpf2a64[BPF_REG_6]; | |
720 | const u8 r7 = bpf2a64[BPF_REG_7]; | |
721 | const u8 r8 = bpf2a64[BPF_REG_8]; | |
722 | const u8 r9 = bpf2a64[BPF_REG_9]; | |
723 | const u8 fp = bpf2a64[BPF_REG_FP]; | |
5b3d19b9 | 724 | const u8 fpb = bpf2a64[FP_BOTTOM]; |
e54bcde3 ZSL |
725 | |
726 | /* We're done with BPF stack */ | |
f1c9eed7 | 727 | emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); |
e54bcde3 | 728 | |
22fc0e80 PM |
729 | /* |
730 | * Program acting as exception boundary pushes R23 and R24 in addition | |
731 | * to BPF callee-saved registers. Exception callback uses the boundary | |
732 | * program's stack frame, so recover these extra registers in the above | |
733 | * two cases. | |
734 | */ | |
735 | if (ctx->prog->aux->exception_boundary || is_exception_cb) | |
736 | emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx); | |
737 | ||
5b3d19b9 XK |
738 | /* Restore x27 and x28 */ |
739 | emit(A64_POP(fpb, A64_R(28), A64_SP), ctx); | |
ec0738db YS |
740 | /* Restore fs (x25) and x26 */ |
741 | emit(A64_POP(fp, A64_R(26), A64_SP), ctx); | |
742 | ||
e54bcde3 | 743 | /* Restore callee-saved register */ |
e54bcde3 ZSL |
744 | emit(A64_POP(r8, r9, A64_SP), ctx); |
745 | emit(A64_POP(r6, r7, A64_SP), ctx); | |
746 | ||
ec0738db YS |
747 | /* Restore FP/LR registers */ |
748 | emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); | |
e54bcde3 ZSL |
749 | |
750 | /* Set return value */ | |
751 | emit(A64_MOV(1, A64_R(0), r0), ctx); | |
752 | ||
042152c2 XK |
753 | /* Authenticate lr */ |
754 | if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL)) | |
755 | emit(A64_AUTIASP, ctx); | |
756 | ||
e54bcde3 ZSL |
757 | emit(A64_RET(A64_LR), ctx); |
758 | } | |
759 | ||
80083428 JPB |
760 | #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) |
761 | #define BPF_FIXUP_REG_MASK GENMASK(31, 27) | |
339af577 | 762 | #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */ |
80083428 | 763 | |
d6e2cc56 MR |
764 | bool ex_handler_bpf(const struct exception_table_entry *ex, |
765 | struct pt_regs *regs) | |
80083428 JPB |
766 | { |
767 | off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); | |
768 | int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); | |
769 | ||
339af577 PM |
770 | if (dst_reg != DONT_CLEAR) |
771 | regs->regs[dst_reg] = 0; | |
80083428 | 772 | regs->pc = (unsigned long)&ex->fixup - offset; |
e8c328d7 | 773 | return true; |
80083428 JPB |
774 | } |
775 | ||
776 | /* For accesses to BTF pointers, add an entry to the exception table */ | |
777 | static int add_exception_handler(const struct bpf_insn *insn, | |
778 | struct jit_ctx *ctx, | |
779 | int dst_reg) | |
780 | { | |
1dad391d PM |
781 | off_t ins_offset; |
782 | off_t fixup_offset; | |
80083428 JPB |
783 | unsigned long pc; |
784 | struct exception_table_entry *ex; | |
785 | ||
786 | if (!ctx->image) | |
787 | /* First pass */ | |
788 | return 0; | |
789 | ||
cc88f540 | 790 | if (BPF_MODE(insn->code) != BPF_PROBE_MEM && |
339af577 | 791 | BPF_MODE(insn->code) != BPF_PROBE_MEMSX && |
e612b5c1 PM |
792 | BPF_MODE(insn->code) != BPF_PROBE_MEM32 && |
793 | BPF_MODE(insn->code) != BPF_PROBE_ATOMIC) | |
80083428 JPB |
794 | return 0; |
795 | ||
796 | if (!ctx->prog->aux->extable || | |
797 | WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) | |
798 | return -EINVAL; | |
799 | ||
800 | ex = &ctx->prog->aux->extable[ctx->exentry_idx]; | |
1dad391d | 801 | pc = (unsigned long)&ctx->ro_image[ctx->idx - 1]; |
80083428 | 802 | |
1dad391d PM |
803 | /* |
804 | * This is the relative offset of the instruction that may fault from | |
805 | * the exception table itself. This will be written to the exception | |
806 | * table and if this instruction faults, the destination register will | |
807 | * be set to '0' and the execution will jump to the next instruction. | |
808 | */ | |
809 | ins_offset = pc - (long)&ex->insn; | |
810 | if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) | |
80083428 | 811 | return -ERANGE; |
80083428 JPB |
812 | |
813 | /* | |
814 | * Since the extable follows the program, the fixup offset is always | |
815 | * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value | |
816 | * to keep things simple, and put the destination register in the upper | |
817 | * bits. We don't need to worry about buildtime or runtime sort | |
818 | * modifying the upper bits because the table is already sorted, and | |
819 | * isn't part of the main exception table. | |
1dad391d PM |
820 | * |
821 | * The fixup_offset is set to the next instruction from the instruction | |
822 | * that may fault. The execution will jump to this after handling the | |
823 | * fault. | |
80083428 | 824 | */ |
1dad391d PM |
825 | fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); |
826 | if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) | |
80083428 JPB |
827 | return -ERANGE; |
828 | ||
1dad391d PM |
829 | /* |
830 | * The offsets above have been calculated using the RO buffer but we | |
831 | * need to use the R/W buffer for writes. | |
832 | * switch ex to rw buffer for writing. | |
833 | */ | |
834 | ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image); | |
835 | ||
836 | ex->insn = ins_offset; | |
837 | ||
339af577 PM |
838 | if (BPF_CLASS(insn->code) != BPF_LDX) |
839 | dst_reg = DONT_CLEAR; | |
840 | ||
1dad391d | 841 | ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) | |
80083428 JPB |
842 | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg); |
843 | ||
d6e2cc56 MR |
844 | ex->type = EX_TYPE_BPF; |
845 | ||
80083428 JPB |
846 | ctx->exentry_idx++; |
847 | return 0; | |
848 | } | |
849 | ||
30d3d94c ZSL |
850 | /* JITs an eBPF instruction. |
851 | * Returns: | |
852 | * 0 - successfully JITed an 8-byte eBPF instruction. | |
853 | * >0 - successfully JITed a 16-byte eBPF instruction. | |
854 | * <0 - failed to JIT. | |
855 | */ | |
8c11ea5c DB |
856 | static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, |
857 | bool extra_pass) | |
e54bcde3 ZSL |
858 | { |
859 | const u8 code = insn->code; | |
339af577 PM |
860 | u8 dst = bpf2a64[insn->dst_reg]; |
861 | u8 src = bpf2a64[insn->src_reg]; | |
e54bcde3 ZSL |
862 | const u8 tmp = bpf2a64[TMP_REG_1]; |
863 | const u8 tmp2 = bpf2a64[TMP_REG_2]; | |
5b3d19b9 XK |
864 | const u8 fp = bpf2a64[BPF_REG_FP]; |
865 | const u8 fpb = bpf2a64[FP_BOTTOM]; | |
339af577 | 866 | const u8 arena_vm_base = bpf2a64[ARENA_VM_START]; |
e54bcde3 ZSL |
867 | const s16 off = insn->off; |
868 | const s32 imm = insn->imm; | |
869 | const int i = insn - ctx->prog->insnsi; | |
654b65a0 JW |
870 | const bool is64 = BPF_CLASS(code) == BPF_ALU64 || |
871 | BPF_CLASS(code) == BPF_JMP; | |
1902472b | 872 | u8 jmp_cond; |
e54bcde3 | 873 | s32 jmp_offset; |
fd49591c | 874 | u32 a64_insn; |
5b3d19b9 XK |
875 | u8 src_adj; |
876 | u8 dst_adj; | |
877 | int off_adj; | |
80083428 | 878 | int ret; |
cc88f540 | 879 | bool sign_extend; |
e54bcde3 ZSL |
880 | |
881 | switch (code) { | |
882 | /* dst = src */ | |
883 | case BPF_ALU | BPF_MOV | BPF_X: | |
884 | case BPF_ALU64 | BPF_MOV | BPF_X: | |
4dd31243 PM |
885 | if (insn_is_cast_user(insn)) { |
886 | emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits | |
887 | emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx); | |
888 | emit(A64_LSL(1, dst, dst, 32), ctx); | |
889 | emit(A64_CBZ(1, tmp, 2), ctx); | |
890 | emit(A64_ORR(1, tmp, dst, tmp), ctx); | |
891 | emit(A64_MOV(1, dst, tmp), ctx); | |
892 | break; | |
7a4c3222 PM |
893 | } else if (insn_is_mov_percpu_addr(insn)) { |
894 | if (dst != src) | |
895 | emit(A64_MOV(1, dst, src), ctx); | |
896 | if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) | |
897 | emit(A64_MRS_TPIDR_EL2(tmp), ctx); | |
898 | else | |
899 | emit(A64_MRS_TPIDR_EL1(tmp), ctx); | |
900 | emit(A64_ADD(1, dst, dst, tmp), ctx); | |
901 | break; | |
4dd31243 | 902 | } |
bb0a1d6b XK |
903 | switch (insn->off) { |
904 | case 0: | |
905 | emit(A64_MOV(is64, dst, src), ctx); | |
906 | break; | |
907 | case 8: | |
908 | emit(A64_SXTB(is64, dst, src), ctx); | |
909 | break; | |
910 | case 16: | |
911 | emit(A64_SXTH(is64, dst, src), ctx); | |
912 | break; | |
913 | case 32: | |
914 | emit(A64_SXTW(is64, dst, src), ctx); | |
915 | break; | |
916 | } | |
e54bcde3 ZSL |
917 | break; |
918 | /* dst = dst OP src */ | |
919 | case BPF_ALU | BPF_ADD | BPF_X: | |
920 | case BPF_ALU64 | BPF_ADD | BPF_X: | |
921 | emit(A64_ADD(is64, dst, dst, src), ctx); | |
922 | break; | |
923 | case BPF_ALU | BPF_SUB | BPF_X: | |
924 | case BPF_ALU64 | BPF_SUB | BPF_X: | |
925 | emit(A64_SUB(is64, dst, dst, src), ctx); | |
926 | break; | |
927 | case BPF_ALU | BPF_AND | BPF_X: | |
928 | case BPF_ALU64 | BPF_AND | BPF_X: | |
929 | emit(A64_AND(is64, dst, dst, src), ctx); | |
930 | break; | |
931 | case BPF_ALU | BPF_OR | BPF_X: | |
932 | case BPF_ALU64 | BPF_OR | BPF_X: | |
933 | emit(A64_ORR(is64, dst, dst, src), ctx); | |
934 | break; | |
935 | case BPF_ALU | BPF_XOR | BPF_X: | |
936 | case BPF_ALU64 | BPF_XOR | BPF_X: | |
937 | emit(A64_EOR(is64, dst, dst, src), ctx); | |
938 | break; | |
939 | case BPF_ALU | BPF_MUL | BPF_X: | |
940 | case BPF_ALU64 | BPF_MUL | BPF_X: | |
941 | emit(A64_MUL(is64, dst, dst, src), ctx); | |
942 | break; | |
943 | case BPF_ALU | BPF_DIV | BPF_X: | |
944 | case BPF_ALU64 | BPF_DIV | BPF_X: | |
68b18191 XK |
945 | if (!off) |
946 | emit(A64_UDIV(is64, dst, dst, src), ctx); | |
947 | else | |
948 | emit(A64_SDIV(is64, dst, dst, src), ctx); | |
119220d8 | 949 | break; |
e54bcde3 ZSL |
950 | case BPF_ALU | BPF_MOD | BPF_X: |
951 | case BPF_ALU64 | BPF_MOD | BPF_X: | |
68b18191 XK |
952 | if (!off) |
953 | emit(A64_UDIV(is64, tmp, dst, src), ctx); | |
954 | else | |
955 | emit(A64_SDIV(is64, tmp, dst, src), ctx); | |
119220d8 | 956 | emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); |
e54bcde3 | 957 | break; |
d65a634a ZSL |
958 | case BPF_ALU | BPF_LSH | BPF_X: |
959 | case BPF_ALU64 | BPF_LSH | BPF_X: | |
960 | emit(A64_LSLV(is64, dst, dst, src), ctx); | |
961 | break; | |
962 | case BPF_ALU | BPF_RSH | BPF_X: | |
963 | case BPF_ALU64 | BPF_RSH | BPF_X: | |
964 | emit(A64_LSRV(is64, dst, dst, src), ctx); | |
965 | break; | |
966 | case BPF_ALU | BPF_ARSH | BPF_X: | |
967 | case BPF_ALU64 | BPF_ARSH | BPF_X: | |
968 | emit(A64_ASRV(is64, dst, dst, src), ctx); | |
969 | break; | |
e54bcde3 ZSL |
970 | /* dst = -dst */ |
971 | case BPF_ALU | BPF_NEG: | |
972 | case BPF_ALU64 | BPF_NEG: | |
973 | emit(A64_NEG(is64, dst, dst), ctx); | |
974 | break; | |
975 | /* dst = BSWAP##imm(dst) */ | |
976 | case BPF_ALU | BPF_END | BPF_FROM_LE: | |
977 | case BPF_ALU | BPF_END | BPF_FROM_BE: | |
1104247f | 978 | case BPF_ALU64 | BPF_END | BPF_FROM_LE: |
e54bcde3 | 979 | #ifdef CONFIG_CPU_BIG_ENDIAN |
1104247f | 980 | if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE) |
d63903bb | 981 | goto emit_bswap_uxt; |
e54bcde3 | 982 | #else /* !CONFIG_CPU_BIG_ENDIAN */ |
1104247f | 983 | if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE) |
d63903bb | 984 | goto emit_bswap_uxt; |
e54bcde3 ZSL |
985 | #endif |
986 | switch (imm) { | |
987 | case 16: | |
988 | emit(A64_REV16(is64, dst, dst), ctx); | |
d63903bb XW |
989 | /* zero-extend 16 bits into 64 bits */ |
990 | emit(A64_UXTH(is64, dst, dst), ctx); | |
e54bcde3 ZSL |
991 | break; |
992 | case 32: | |
a51cd6bf | 993 | emit(A64_REV32(0, dst, dst), ctx); |
d63903bb | 994 | /* upper 32 bits already cleared */ |
e54bcde3 ZSL |
995 | break; |
996 | case 64: | |
997 | emit(A64_REV64(dst, dst), ctx); | |
998 | break; | |
999 | } | |
1000 | break; | |
d63903bb XW |
1001 | emit_bswap_uxt: |
1002 | switch (imm) { | |
1003 | case 16: | |
1004 | /* zero-extend 16 bits into 64 bits */ | |
1005 | emit(A64_UXTH(is64, dst, dst), ctx); | |
1006 | break; | |
1007 | case 32: | |
1008 | /* zero-extend 32 bits into 64 bits */ | |
1009 | emit(A64_UXTW(is64, dst, dst), ctx); | |
1010 | break; | |
1011 | case 64: | |
1012 | /* nop */ | |
1013 | break; | |
1014 | } | |
1015 | break; | |
e54bcde3 ZSL |
1016 | /* dst = imm */ |
1017 | case BPF_ALU | BPF_MOV | BPF_K: | |
1018 | case BPF_ALU64 | BPF_MOV | BPF_K: | |
1019 | emit_a64_mov_i(is64, dst, imm, ctx); | |
1020 | break; | |
1021 | /* dst = dst OP imm */ | |
1022 | case BPF_ALU | BPF_ADD | BPF_K: | |
1023 | case BPF_ALU64 | BPF_ADD | BPF_K: | |
fd868f14 LN |
1024 | if (is_addsub_imm(imm)) { |
1025 | emit(A64_ADD_I(is64, dst, dst, imm), ctx); | |
1026 | } else if (is_addsub_imm(-imm)) { | |
1027 | emit(A64_SUB_I(is64, dst, dst, -imm), ctx); | |
1028 | } else { | |
1029 | emit_a64_mov_i(is64, tmp, imm, ctx); | |
1030 | emit(A64_ADD(is64, dst, dst, tmp), ctx); | |
1031 | } | |
e54bcde3 ZSL |
1032 | break; |
1033 | case BPF_ALU | BPF_SUB | BPF_K: | |
1034 | case BPF_ALU64 | BPF_SUB | BPF_K: | |
fd868f14 LN |
1035 | if (is_addsub_imm(imm)) { |
1036 | emit(A64_SUB_I(is64, dst, dst, imm), ctx); | |
1037 | } else if (is_addsub_imm(-imm)) { | |
1038 | emit(A64_ADD_I(is64, dst, dst, -imm), ctx); | |
1039 | } else { | |
1040 | emit_a64_mov_i(is64, tmp, imm, ctx); | |
1041 | emit(A64_SUB(is64, dst, dst, tmp), ctx); | |
1042 | } | |
e54bcde3 ZSL |
1043 | break; |
1044 | case BPF_ALU | BPF_AND | BPF_K: | |
1045 | case BPF_ALU64 | BPF_AND | BPF_K: | |
fd49591c LN |
1046 | a64_insn = A64_AND_I(is64, dst, dst, imm); |
1047 | if (a64_insn != AARCH64_BREAK_FAULT) { | |
1048 | emit(a64_insn, ctx); | |
1049 | } else { | |
1050 | emit_a64_mov_i(is64, tmp, imm, ctx); | |
1051 | emit(A64_AND(is64, dst, dst, tmp), ctx); | |
1052 | } | |
e54bcde3 ZSL |
1053 | break; |
1054 | case BPF_ALU | BPF_OR | BPF_K: | |
1055 | case BPF_ALU64 | BPF_OR | BPF_K: | |
fd49591c LN |
1056 | a64_insn = A64_ORR_I(is64, dst, dst, imm); |
1057 | if (a64_insn != AARCH64_BREAK_FAULT) { | |
1058 | emit(a64_insn, ctx); | |
1059 | } else { | |
1060 | emit_a64_mov_i(is64, tmp, imm, ctx); | |
1061 | emit(A64_ORR(is64, dst, dst, tmp), ctx); | |
1062 | } | |
e54bcde3 ZSL |
1063 | break; |
1064 | case BPF_ALU | BPF_XOR | BPF_K: | |
1065 | case BPF_ALU64 | BPF_XOR | BPF_K: | |
fd49591c LN |
1066 | a64_insn = A64_EOR_I(is64, dst, dst, imm); |
1067 | if (a64_insn != AARCH64_BREAK_FAULT) { | |
1068 | emit(a64_insn, ctx); | |
1069 | } else { | |
1070 | emit_a64_mov_i(is64, tmp, imm, ctx); | |
1071 | emit(A64_EOR(is64, dst, dst, tmp), ctx); | |
1072 | } | |
e54bcde3 ZSL |
1073 | break; |
1074 | case BPF_ALU | BPF_MUL | BPF_K: | |
1075 | case BPF_ALU64 | BPF_MUL | BPF_K: | |
e54bcde3 ZSL |
1076 | emit_a64_mov_i(is64, tmp, imm, ctx); |
1077 | emit(A64_MUL(is64, dst, dst, tmp), ctx); | |
1078 | break; | |
1079 | case BPF_ALU | BPF_DIV | BPF_K: | |
1080 | case BPF_ALU64 | BPF_DIV | BPF_K: | |
e54bcde3 | 1081 | emit_a64_mov_i(is64, tmp, imm, ctx); |
68b18191 XK |
1082 | if (!off) |
1083 | emit(A64_UDIV(is64, dst, dst, tmp), ctx); | |
1084 | else | |
1085 | emit(A64_SDIV(is64, dst, dst, tmp), ctx); | |
e54bcde3 ZSL |
1086 | break; |
1087 | case BPF_ALU | BPF_MOD | BPF_K: | |
1088 | case BPF_ALU64 | BPF_MOD | BPF_K: | |
e54bcde3 | 1089 | emit_a64_mov_i(is64, tmp2, imm, ctx); |
68b18191 XK |
1090 | if (!off) |
1091 | emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); | |
1092 | else | |
1093 | emit(A64_SDIV(is64, tmp, dst, tmp2), ctx); | |
504792e0 | 1094 | emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); |
e54bcde3 ZSL |
1095 | break; |
1096 | case BPF_ALU | BPF_LSH | BPF_K: | |
1097 | case BPF_ALU64 | BPF_LSH | BPF_K: | |
1098 | emit(A64_LSL(is64, dst, dst, imm), ctx); | |
1099 | break; | |
1100 | case BPF_ALU | BPF_RSH | BPF_K: | |
1101 | case BPF_ALU64 | BPF_RSH | BPF_K: | |
1102 | emit(A64_LSR(is64, dst, dst, imm), ctx); | |
1103 | break; | |
1104 | case BPF_ALU | BPF_ARSH | BPF_K: | |
1105 | case BPF_ALU64 | BPF_ARSH | BPF_K: | |
1106 | emit(A64_ASR(is64, dst, dst, imm), ctx); | |
1107 | break; | |
1108 | ||
e54bcde3 ZSL |
1109 | /* JUMP off */ |
1110 | case BPF_JMP | BPF_JA: | |
c32b6ee5 XK |
1111 | case BPF_JMP32 | BPF_JA: |
1112 | if (BPF_CLASS(code) == BPF_JMP) | |
1113 | jmp_offset = bpf2a64_offset(i, off, ctx); | |
1114 | else | |
1115 | jmp_offset = bpf2a64_offset(i, imm, ctx); | |
e54bcde3 ZSL |
1116 | check_imm26(jmp_offset); |
1117 | emit(A64_B(jmp_offset), ctx); | |
1118 | break; | |
1119 | /* IF (dst COND src) JUMP off */ | |
1120 | case BPF_JMP | BPF_JEQ | BPF_X: | |
1121 | case BPF_JMP | BPF_JGT | BPF_X: | |
c362b2f3 | 1122 | case BPF_JMP | BPF_JLT | BPF_X: |
e54bcde3 | 1123 | case BPF_JMP | BPF_JGE | BPF_X: |
c362b2f3 | 1124 | case BPF_JMP | BPF_JLE | BPF_X: |
e54bcde3 ZSL |
1125 | case BPF_JMP | BPF_JNE | BPF_X: |
1126 | case BPF_JMP | BPF_JSGT | BPF_X: | |
c362b2f3 | 1127 | case BPF_JMP | BPF_JSLT | BPF_X: |
e54bcde3 | 1128 | case BPF_JMP | BPF_JSGE | BPF_X: |
c362b2f3 | 1129 | case BPF_JMP | BPF_JSLE | BPF_X: |
654b65a0 JW |
1130 | case BPF_JMP32 | BPF_JEQ | BPF_X: |
1131 | case BPF_JMP32 | BPF_JGT | BPF_X: | |
1132 | case BPF_JMP32 | BPF_JLT | BPF_X: | |
1133 | case BPF_JMP32 | BPF_JGE | BPF_X: | |
1134 | case BPF_JMP32 | BPF_JLE | BPF_X: | |
1135 | case BPF_JMP32 | BPF_JNE | BPF_X: | |
1136 | case BPF_JMP32 | BPF_JSGT | BPF_X: | |
1137 | case BPF_JMP32 | BPF_JSLT | BPF_X: | |
1138 | case BPF_JMP32 | BPF_JSGE | BPF_X: | |
1139 | case BPF_JMP32 | BPF_JSLE | BPF_X: | |
1140 | emit(A64_CMP(is64, dst, src), ctx); | |
e54bcde3 | 1141 | emit_cond_jmp: |
32f6865c | 1142 | jmp_offset = bpf2a64_offset(i, off, ctx); |
e54bcde3 ZSL |
1143 | check_imm19(jmp_offset); |
1144 | switch (BPF_OP(code)) { | |
1145 | case BPF_JEQ: | |
1146 | jmp_cond = A64_COND_EQ; | |
1147 | break; | |
1148 | case BPF_JGT: | |
1149 | jmp_cond = A64_COND_HI; | |
1150 | break; | |
c362b2f3 DB |
1151 | case BPF_JLT: |
1152 | jmp_cond = A64_COND_CC; | |
1153 | break; | |
e54bcde3 ZSL |
1154 | case BPF_JGE: |
1155 | jmp_cond = A64_COND_CS; | |
1156 | break; | |
c362b2f3 DB |
1157 | case BPF_JLE: |
1158 | jmp_cond = A64_COND_LS; | |
1159 | break; | |
98397fc5 | 1160 | case BPF_JSET: |
e54bcde3 ZSL |
1161 | case BPF_JNE: |
1162 | jmp_cond = A64_COND_NE; | |
1163 | break; | |
1164 | case BPF_JSGT: | |
1165 | jmp_cond = A64_COND_GT; | |
1166 | break; | |
c362b2f3 DB |
1167 | case BPF_JSLT: |
1168 | jmp_cond = A64_COND_LT; | |
1169 | break; | |
e54bcde3 ZSL |
1170 | case BPF_JSGE: |
1171 | jmp_cond = A64_COND_GE; | |
1172 | break; | |
c362b2f3 DB |
1173 | case BPF_JSLE: |
1174 | jmp_cond = A64_COND_LE; | |
1175 | break; | |
e54bcde3 ZSL |
1176 | default: |
1177 | return -EFAULT; | |
1178 | } | |
1179 | emit(A64_B_(jmp_cond, jmp_offset), ctx); | |
1180 | break; | |
1181 | case BPF_JMP | BPF_JSET | BPF_X: | |
654b65a0 JW |
1182 | case BPF_JMP32 | BPF_JSET | BPF_X: |
1183 | emit(A64_TST(is64, dst, src), ctx); | |
e54bcde3 ZSL |
1184 | goto emit_cond_jmp; |
1185 | /* IF (dst COND imm) JUMP off */ | |
1186 | case BPF_JMP | BPF_JEQ | BPF_K: | |
1187 | case BPF_JMP | BPF_JGT | BPF_K: | |
c362b2f3 | 1188 | case BPF_JMP | BPF_JLT | BPF_K: |
e54bcde3 | 1189 | case BPF_JMP | BPF_JGE | BPF_K: |
c362b2f3 | 1190 | case BPF_JMP | BPF_JLE | BPF_K: |
e54bcde3 ZSL |
1191 | case BPF_JMP | BPF_JNE | BPF_K: |
1192 | case BPF_JMP | BPF_JSGT | BPF_K: | |
c362b2f3 | 1193 | case BPF_JMP | BPF_JSLT | BPF_K: |
e54bcde3 | 1194 | case BPF_JMP | BPF_JSGE | BPF_K: |
c362b2f3 | 1195 | case BPF_JMP | BPF_JSLE | BPF_K: |
654b65a0 JW |
1196 | case BPF_JMP32 | BPF_JEQ | BPF_K: |
1197 | case BPF_JMP32 | BPF_JGT | BPF_K: | |
1198 | case BPF_JMP32 | BPF_JLT | BPF_K: | |
1199 | case BPF_JMP32 | BPF_JGE | BPF_K: | |
1200 | case BPF_JMP32 | BPF_JLE | BPF_K: | |
1201 | case BPF_JMP32 | BPF_JNE | BPF_K: | |
1202 | case BPF_JMP32 | BPF_JSGT | BPF_K: | |
1203 | case BPF_JMP32 | BPF_JSLT | BPF_K: | |
1204 | case BPF_JMP32 | BPF_JSGE | BPF_K: | |
1205 | case BPF_JMP32 | BPF_JSLE | BPF_K: | |
fd868f14 LN |
1206 | if (is_addsub_imm(imm)) { |
1207 | emit(A64_CMP_I(is64, dst, imm), ctx); | |
1208 | } else if (is_addsub_imm(-imm)) { | |
1209 | emit(A64_CMN_I(is64, dst, -imm), ctx); | |
1210 | } else { | |
1211 | emit_a64_mov_i(is64, tmp, imm, ctx); | |
1212 | emit(A64_CMP(is64, dst, tmp), ctx); | |
1213 | } | |
e54bcde3 ZSL |
1214 | goto emit_cond_jmp; |
1215 | case BPF_JMP | BPF_JSET | BPF_K: | |
654b65a0 | 1216 | case BPF_JMP32 | BPF_JSET | BPF_K: |
fd49591c LN |
1217 | a64_insn = A64_TST_I(is64, dst, imm); |
1218 | if (a64_insn != AARCH64_BREAK_FAULT) { | |
1219 | emit(a64_insn, ctx); | |
1220 | } else { | |
1221 | emit_a64_mov_i(is64, tmp, imm, ctx); | |
1222 | emit(A64_TST(is64, dst, tmp), ctx); | |
1223 | } | |
e54bcde3 ZSL |
1224 | goto emit_cond_jmp; |
1225 | /* function call */ | |
1226 | case BPF_JMP | BPF_CALL: | |
1227 | { | |
1228 | const u8 r0 = bpf2a64[BPF_REG_0]; | |
8c11ea5c DB |
1229 | bool func_addr_fixed; |
1230 | u64 func_addr; | |
75fe4c0b PM |
1231 | u32 cpu_offset; |
1232 | ||
1233 | /* Implement helper call to bpf_get_smp_processor_id() inline */ | |
1234 | if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { | |
1235 | cpu_offset = offsetof(struct thread_info, cpu); | |
1236 | ||
1237 | emit(A64_MRS_SP_EL0(tmp), ctx); | |
1238 | if (is_lsi_offset(cpu_offset, 2)) { | |
1239 | emit(A64_LDR32I(r0, tmp, cpu_offset), ctx); | |
1240 | } else { | |
1241 | emit_a64_mov_i(1, tmp2, cpu_offset, ctx); | |
1242 | emit(A64_LDR32(r0, tmp, tmp2), ctx); | |
1243 | } | |
1244 | break; | |
1245 | } | |
e54bcde3 | 1246 | |
8c11ea5c DB |
1247 | ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, |
1248 | &func_addr, &func_addr_fixed); | |
1249 | if (ret < 0) | |
1250 | return ret; | |
efc9909f | 1251 | emit_call(func_addr, ctx); |
e54bcde3 | 1252 | emit(A64_MOV(1, r0, A64_R(0)), ctx); |
e54bcde3 ZSL |
1253 | break; |
1254 | } | |
ddb55992 | 1255 | /* tail call */ |
71189fa9 | 1256 | case BPF_JMP | BPF_TAIL_CALL: |
ddb55992 ZSL |
1257 | if (emit_bpf_tail_call(ctx)) |
1258 | return -EFAULT; | |
1259 | break; | |
e54bcde3 ZSL |
1260 | /* function return */ |
1261 | case BPF_JMP | BPF_EXIT: | |
51c9fbb1 ZSL |
1262 | /* Optimization: when last instruction is EXIT, |
1263 | simply fallthrough to epilogue. */ | |
e54bcde3 ZSL |
1264 | if (i == ctx->prog->len - 1) |
1265 | break; | |
1266 | jmp_offset = epilogue_offset(ctx); | |
1267 | check_imm26(jmp_offset); | |
1268 | emit(A64_B(jmp_offset), ctx); | |
1269 | break; | |
1270 | ||
30d3d94c ZSL |
1271 | /* dst = imm64 */ |
1272 | case BPF_LD | BPF_IMM | BPF_DW: | |
1273 | { | |
1274 | const struct bpf_insn insn1 = insn[1]; | |
1275 | u64 imm64; | |
1276 | ||
1e4df6b7 | 1277 | imm64 = (u64)insn1.imm << 32 | (u32)imm; |
e4a41c2c HT |
1278 | if (bpf_pseudo_func(insn)) |
1279 | emit_addr_mov_i64(dst, imm64, ctx); | |
1280 | else | |
1281 | emit_a64_mov_i64(dst, imm64, ctx); | |
30d3d94c ZSL |
1282 | |
1283 | return 1; | |
1284 | } | |
1285 | ||
cc88f540 | 1286 | /* LDX: dst = (u64)*(unsigned size *)(src + off) */ |
e54bcde3 ZSL |
1287 | case BPF_LDX | BPF_MEM | BPF_W: |
1288 | case BPF_LDX | BPF_MEM | BPF_H: | |
1289 | case BPF_LDX | BPF_MEM | BPF_B: | |
1290 | case BPF_LDX | BPF_MEM | BPF_DW: | |
80083428 JPB |
1291 | case BPF_LDX | BPF_PROBE_MEM | BPF_DW: |
1292 | case BPF_LDX | BPF_PROBE_MEM | BPF_W: | |
1293 | case BPF_LDX | BPF_PROBE_MEM | BPF_H: | |
1294 | case BPF_LDX | BPF_PROBE_MEM | BPF_B: | |
cc88f540 XK |
1295 | /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */ |
1296 | case BPF_LDX | BPF_MEMSX | BPF_B: | |
1297 | case BPF_LDX | BPF_MEMSX | BPF_H: | |
1298 | case BPF_LDX | BPF_MEMSX | BPF_W: | |
1299 | case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: | |
1300 | case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: | |
1301 | case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: | |
339af577 PM |
1302 | case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: |
1303 | case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: | |
1304 | case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: | |
1305 | case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: | |
1306 | if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { | |
1307 | emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx); | |
1308 | src = tmp2; | |
1309 | } | |
1310 | if (ctx->fpb_offset > 0 && src == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { | |
5b3d19b9 XK |
1311 | src_adj = fpb; |
1312 | off_adj = off + ctx->fpb_offset; | |
1313 | } else { | |
1314 | src_adj = src; | |
1315 | off_adj = off; | |
1316 | } | |
cc88f540 XK |
1317 | sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || |
1318 | BPF_MODE(insn->code) == BPF_PROBE_MEMSX); | |
e54bcde3 ZSL |
1319 | switch (BPF_SIZE(code)) { |
1320 | case BPF_W: | |
5b3d19b9 | 1321 | if (is_lsi_offset(off_adj, 2)) { |
cc88f540 XK |
1322 | if (sign_extend) |
1323 | emit(A64_LDRSWI(dst, src_adj, off_adj), ctx); | |
1324 | else | |
1325 | emit(A64_LDR32I(dst, src_adj, off_adj), ctx); | |
7db6c0f1 XK |
1326 | } else { |
1327 | emit_a64_mov_i(1, tmp, off, ctx); | |
cc88f540 | 1328 | if (sign_extend) |
114b5b3b | 1329 | emit(A64_LDRSW(dst, src, tmp), ctx); |
cc88f540 XK |
1330 | else |
1331 | emit(A64_LDR32(dst, src, tmp), ctx); | |
7db6c0f1 | 1332 | } |
e54bcde3 ZSL |
1333 | break; |
1334 | case BPF_H: | |
5b3d19b9 | 1335 | if (is_lsi_offset(off_adj, 1)) { |
cc88f540 XK |
1336 | if (sign_extend) |
1337 | emit(A64_LDRSHI(dst, src_adj, off_adj), ctx); | |
1338 | else | |
1339 | emit(A64_LDRHI(dst, src_adj, off_adj), ctx); | |
7db6c0f1 XK |
1340 | } else { |
1341 | emit_a64_mov_i(1, tmp, off, ctx); | |
cc88f540 XK |
1342 | if (sign_extend) |
1343 | emit(A64_LDRSH(dst, src, tmp), ctx); | |
1344 | else | |
1345 | emit(A64_LDRH(dst, src, tmp), ctx); | |
7db6c0f1 | 1346 | } |
e54bcde3 ZSL |
1347 | break; |
1348 | case BPF_B: | |
5b3d19b9 | 1349 | if (is_lsi_offset(off_adj, 0)) { |
cc88f540 XK |
1350 | if (sign_extend) |
1351 | emit(A64_LDRSBI(dst, src_adj, off_adj), ctx); | |
1352 | else | |
1353 | emit(A64_LDRBI(dst, src_adj, off_adj), ctx); | |
7db6c0f1 XK |
1354 | } else { |
1355 | emit_a64_mov_i(1, tmp, off, ctx); | |
cc88f540 XK |
1356 | if (sign_extend) |
1357 | emit(A64_LDRSB(dst, src, tmp), ctx); | |
1358 | else | |
1359 | emit(A64_LDRB(dst, src, tmp), ctx); | |
7db6c0f1 | 1360 | } |
e54bcde3 ZSL |
1361 | break; |
1362 | case BPF_DW: | |
5b3d19b9 XK |
1363 | if (is_lsi_offset(off_adj, 3)) { |
1364 | emit(A64_LDR64I(dst, src_adj, off_adj), ctx); | |
7db6c0f1 XK |
1365 | } else { |
1366 | emit_a64_mov_i(1, tmp, off, ctx); | |
1367 | emit(A64_LDR64(dst, src, tmp), ctx); | |
1368 | } | |
e54bcde3 ZSL |
1369 | break; |
1370 | } | |
80083428 JPB |
1371 | |
1372 | ret = add_exception_handler(insn, ctx, dst); | |
1373 | if (ret) | |
1374 | return ret; | |
e54bcde3 ZSL |
1375 | break; |
1376 | ||
f5e81d11 DB |
1377 | /* speculation barrier */ |
1378 | case BPF_ST | BPF_NOSPEC: | |
1379 | /* | |
1380 | * Nothing required here. | |
1381 | * | |
1382 | * In case of arm64, we rely on the firmware mitigation of | |
1383 | * Speculative Store Bypass as controlled via the ssbd kernel | |
1384 | * parameter. Whenever the mitigation is enabled, it works | |
1385 | * for all of the kernel code with no need to provide any | |
1386 | * additional instructions. | |
1387 | */ | |
1388 | break; | |
1389 | ||
e54bcde3 ZSL |
1390 | /* ST: *(size *)(dst + off) = imm */ |
1391 | case BPF_ST | BPF_MEM | BPF_W: | |
1392 | case BPF_ST | BPF_MEM | BPF_H: | |
1393 | case BPF_ST | BPF_MEM | BPF_B: | |
1394 | case BPF_ST | BPF_MEM | BPF_DW: | |
339af577 PM |
1395 | case BPF_ST | BPF_PROBE_MEM32 | BPF_B: |
1396 | case BPF_ST | BPF_PROBE_MEM32 | BPF_H: | |
1397 | case BPF_ST | BPF_PROBE_MEM32 | BPF_W: | |
1398 | case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: | |
1399 | if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { | |
1400 | emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); | |
1401 | dst = tmp2; | |
1402 | } | |
1403 | if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { | |
5b3d19b9 XK |
1404 | dst_adj = fpb; |
1405 | off_adj = off + ctx->fpb_offset; | |
1406 | } else { | |
1407 | dst_adj = dst; | |
1408 | off_adj = off; | |
1409 | } | |
df849ba3 | 1410 | /* Load imm to a register then store it */ |
df849ba3 YS |
1411 | emit_a64_mov_i(1, tmp, imm, ctx); |
1412 | switch (BPF_SIZE(code)) { | |
1413 | case BPF_W: | |
5b3d19b9 XK |
1414 | if (is_lsi_offset(off_adj, 2)) { |
1415 | emit(A64_STR32I(tmp, dst_adj, off_adj), ctx); | |
7db6c0f1 XK |
1416 | } else { |
1417 | emit_a64_mov_i(1, tmp2, off, ctx); | |
1418 | emit(A64_STR32(tmp, dst, tmp2), ctx); | |
1419 | } | |
df849ba3 YS |
1420 | break; |
1421 | case BPF_H: | |
5b3d19b9 XK |
1422 | if (is_lsi_offset(off_adj, 1)) { |
1423 | emit(A64_STRHI(tmp, dst_adj, off_adj), ctx); | |
7db6c0f1 XK |
1424 | } else { |
1425 | emit_a64_mov_i(1, tmp2, off, ctx); | |
1426 | emit(A64_STRH(tmp, dst, tmp2), ctx); | |
1427 | } | |
df849ba3 YS |
1428 | break; |
1429 | case BPF_B: | |
5b3d19b9 XK |
1430 | if (is_lsi_offset(off_adj, 0)) { |
1431 | emit(A64_STRBI(tmp, dst_adj, off_adj), ctx); | |
7db6c0f1 XK |
1432 | } else { |
1433 | emit_a64_mov_i(1, tmp2, off, ctx); | |
1434 | emit(A64_STRB(tmp, dst, tmp2), ctx); | |
1435 | } | |
df849ba3 YS |
1436 | break; |
1437 | case BPF_DW: | |
5b3d19b9 XK |
1438 | if (is_lsi_offset(off_adj, 3)) { |
1439 | emit(A64_STR64I(tmp, dst_adj, off_adj), ctx); | |
7db6c0f1 XK |
1440 | } else { |
1441 | emit_a64_mov_i(1, tmp2, off, ctx); | |
1442 | emit(A64_STR64(tmp, dst, tmp2), ctx); | |
1443 | } | |
df849ba3 YS |
1444 | break; |
1445 | } | |
339af577 PM |
1446 | |
1447 | ret = add_exception_handler(insn, ctx, dst); | |
1448 | if (ret) | |
1449 | return ret; | |
df849ba3 | 1450 | break; |
e54bcde3 ZSL |
1451 | |
1452 | /* STX: *(size *)(dst + off) = src */ | |
1453 | case BPF_STX | BPF_MEM | BPF_W: | |
1454 | case BPF_STX | BPF_MEM | BPF_H: | |
1455 | case BPF_STX | BPF_MEM | BPF_B: | |
1456 | case BPF_STX | BPF_MEM | BPF_DW: | |
339af577 PM |
1457 | case BPF_STX | BPF_PROBE_MEM32 | BPF_B: |
1458 | case BPF_STX | BPF_PROBE_MEM32 | BPF_H: | |
1459 | case BPF_STX | BPF_PROBE_MEM32 | BPF_W: | |
1460 | case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: | |
1461 | if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { | |
1462 | emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx); | |
1463 | dst = tmp2; | |
1464 | } | |
1465 | if (ctx->fpb_offset > 0 && dst == fp && BPF_MODE(insn->code) != BPF_PROBE_MEM32) { | |
5b3d19b9 XK |
1466 | dst_adj = fpb; |
1467 | off_adj = off + ctx->fpb_offset; | |
1468 | } else { | |
1469 | dst_adj = dst; | |
1470 | off_adj = off; | |
1471 | } | |
e54bcde3 ZSL |
1472 | switch (BPF_SIZE(code)) { |
1473 | case BPF_W: | |
5b3d19b9 XK |
1474 | if (is_lsi_offset(off_adj, 2)) { |
1475 | emit(A64_STR32I(src, dst_adj, off_adj), ctx); | |
7db6c0f1 XK |
1476 | } else { |
1477 | emit_a64_mov_i(1, tmp, off, ctx); | |
1478 | emit(A64_STR32(src, dst, tmp), ctx); | |
1479 | } | |
e54bcde3 ZSL |
1480 | break; |
1481 | case BPF_H: | |
5b3d19b9 XK |
1482 | if (is_lsi_offset(off_adj, 1)) { |
1483 | emit(A64_STRHI(src, dst_adj, off_adj), ctx); | |
7db6c0f1 XK |
1484 | } else { |
1485 | emit_a64_mov_i(1, tmp, off, ctx); | |
1486 | emit(A64_STRH(src, dst, tmp), ctx); | |
1487 | } | |
e54bcde3 ZSL |
1488 | break; |
1489 | case BPF_B: | |
5b3d19b9 XK |
1490 | if (is_lsi_offset(off_adj, 0)) { |
1491 | emit(A64_STRBI(src, dst_adj, off_adj), ctx); | |
7db6c0f1 XK |
1492 | } else { |
1493 | emit_a64_mov_i(1, tmp, off, ctx); | |
1494 | emit(A64_STRB(src, dst, tmp), ctx); | |
1495 | } | |
e54bcde3 ZSL |
1496 | break; |
1497 | case BPF_DW: | |
5b3d19b9 XK |
1498 | if (is_lsi_offset(off_adj, 3)) { |
1499 | emit(A64_STR64I(src, dst_adj, off_adj), ctx); | |
7db6c0f1 XK |
1500 | } else { |
1501 | emit_a64_mov_i(1, tmp, off, ctx); | |
1502 | emit(A64_STR64(src, dst, tmp), ctx); | |
1503 | } | |
e54bcde3 ZSL |
1504 | break; |
1505 | } | |
339af577 PM |
1506 | |
1507 | ret = add_exception_handler(insn, ctx, dst); | |
1508 | if (ret) | |
1509 | return ret; | |
e54bcde3 | 1510 | break; |
34b8ab09 | 1511 | |
91c960b0 BJ |
1512 | case BPF_STX | BPF_ATOMIC | BPF_W: |
1513 | case BPF_STX | BPF_ATOMIC | BPF_DW: | |
e612b5c1 PM |
1514 | case BPF_STX | BPF_PROBE_ATOMIC | BPF_W: |
1515 | case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW: | |
1902472b HT |
1516 | if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) |
1517 | ret = emit_lse_atomic(insn, ctx); | |
1518 | else | |
1519 | ret = emit_ll_sc_atomic(insn, ctx); | |
1520 | if (ret) | |
1521 | return ret; | |
e612b5c1 PM |
1522 | |
1523 | ret = add_exception_handler(insn, ctx, dst); | |
1524 | if (ret) | |
1525 | return ret; | |
85f68fe8 | 1526 | break; |
e54bcde3 | 1527 | |
e54bcde3 ZSL |
1528 | default: |
1529 | pr_err_once("unknown opcode %02x\n", code); | |
1530 | return -EINVAL; | |
1531 | } | |
1532 | ||
1533 | return 0; | |
1534 | } | |
1535 | ||
5b3d19b9 XK |
1536 | /* |
1537 | * Return 0 if FP may change at runtime, otherwise find the minimum negative | |
1538 | * offset to FP, converts it to positive number, and align down to 8 bytes. | |
1539 | */ | |
1540 | static int find_fpb_offset(struct bpf_prog *prog) | |
1541 | { | |
1542 | int i; | |
1543 | int offset = 0; | |
1544 | ||
1545 | for (i = 0; i < prog->len; i++) { | |
1546 | const struct bpf_insn *insn = &prog->insnsi[i]; | |
1547 | const u8 class = BPF_CLASS(insn->code); | |
1548 | const u8 mode = BPF_MODE(insn->code); | |
1549 | const u8 src = insn->src_reg; | |
1550 | const u8 dst = insn->dst_reg; | |
1551 | const s32 imm = insn->imm; | |
1552 | const s16 off = insn->off; | |
1553 | ||
1554 | switch (class) { | |
1555 | case BPF_STX: | |
1556 | case BPF_ST: | |
1557 | /* fp holds atomic operation result */ | |
1558 | if (class == BPF_STX && mode == BPF_ATOMIC && | |
1559 | ((imm == BPF_XCHG || | |
1560 | imm == (BPF_FETCH | BPF_ADD) || | |
1561 | imm == (BPF_FETCH | BPF_AND) || | |
1562 | imm == (BPF_FETCH | BPF_XOR) || | |
1563 | imm == (BPF_FETCH | BPF_OR)) && | |
1564 | src == BPF_REG_FP)) | |
1565 | return 0; | |
1566 | ||
1567 | if (mode == BPF_MEM && dst == BPF_REG_FP && | |
1568 | off < offset) | |
1569 | offset = insn->off; | |
1570 | break; | |
1571 | ||
1572 | case BPF_JMP32: | |
1573 | case BPF_JMP: | |
1574 | break; | |
1575 | ||
1576 | case BPF_LDX: | |
1577 | case BPF_LD: | |
1578 | /* fp holds load result */ | |
1579 | if (dst == BPF_REG_FP) | |
1580 | return 0; | |
1581 | ||
1582 | if (class == BPF_LDX && mode == BPF_MEM && | |
1583 | src == BPF_REG_FP && off < offset) | |
1584 | offset = off; | |
1585 | break; | |
1586 | ||
1587 | case BPF_ALU: | |
1588 | case BPF_ALU64: | |
1589 | default: | |
1590 | /* fp holds ALU result */ | |
1591 | if (dst == BPF_REG_FP) | |
1592 | return 0; | |
1593 | } | |
1594 | } | |
1595 | ||
1596 | if (offset < 0) { | |
1597 | /* | |
1598 | * safely be converted to a positive 'int', since insn->off | |
1599 | * is 's16' | |
1600 | */ | |
1601 | offset = -offset; | |
1602 | /* align down to 8 bytes */ | |
1603 | offset = ALIGN_DOWN(offset, 8); | |
1604 | } | |
1605 | ||
1606 | return offset; | |
1607 | } | |
1608 | ||
8c11ea5c | 1609 | static int build_body(struct jit_ctx *ctx, bool extra_pass) |
e54bcde3 ZSL |
1610 | { |
1611 | const struct bpf_prog *prog = ctx->prog; | |
1612 | int i; | |
1613 | ||
32f6865c IA |
1614 | /* |
1615 | * - offset[0] offset of the end of prologue, | |
1616 | * start of the 1st instruction. | |
1617 | * - offset[1] - offset of the end of 1st instruction, | |
1618 | * start of the 2nd instruction | |
1619 | * [....] | |
1620 | * - offset[3] - offset of the end of 3rd instruction, | |
1621 | * start of 4th instruction | |
1622 | */ | |
e54bcde3 ZSL |
1623 | for (i = 0; i < prog->len; i++) { |
1624 | const struct bpf_insn *insn = &prog->insnsi[i]; | |
1625 | int ret; | |
1626 | ||
32f6865c IA |
1627 | if (ctx->image == NULL) |
1628 | ctx->offset[i] = ctx->idx; | |
8c11ea5c | 1629 | ret = build_insn(insn, ctx, extra_pass); |
30d3d94c ZSL |
1630 | if (ret > 0) { |
1631 | i++; | |
ddc665a4 DB |
1632 | if (ctx->image == NULL) |
1633 | ctx->offset[i] = ctx->idx; | |
30d3d94c ZSL |
1634 | continue; |
1635 | } | |
e54bcde3 ZSL |
1636 | if (ret) |
1637 | return ret; | |
1638 | } | |
32f6865c IA |
1639 | /* |
1640 | * offset is allocated with prog->len + 1 so fill in | |
1641 | * the last element with the offset after the last | |
1642 | * instruction (end of program) | |
1643 | */ | |
1644 | if (ctx->image == NULL) | |
1645 | ctx->offset[i] = ctx->idx; | |
e54bcde3 ZSL |
1646 | |
1647 | return 0; | |
1648 | } | |
1649 | ||
42ff712b ZSL |
1650 | static int validate_code(struct jit_ctx *ctx) |
1651 | { | |
1652 | int i; | |
1653 | ||
1654 | for (i = 0; i < ctx->idx; i++) { | |
1655 | u32 a64_insn = le32_to_cpu(ctx->image[i]); | |
1656 | ||
1657 | if (a64_insn == AARCH64_BREAK_FAULT) | |
1658 | return -1; | |
1659 | } | |
efc9909f XK |
1660 | return 0; |
1661 | } | |
1662 | ||
1663 | static int validate_ctx(struct jit_ctx *ctx) | |
1664 | { | |
1665 | if (validate_code(ctx)) | |
1666 | return -1; | |
42ff712b | 1667 | |
80083428 JPB |
1668 | if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) |
1669 | return -1; | |
1670 | ||
42ff712b ZSL |
1671 | return 0; |
1672 | } | |
1673 | ||
e54bcde3 ZSL |
1674 | static inline void bpf_flush_icache(void *start, void *end) |
1675 | { | |
1676 | flush_icache_range((unsigned long)start, (unsigned long)end); | |
1677 | } | |
1678 | ||
db496944 AS |
1679 | struct arm64_jit_data { |
1680 | struct bpf_binary_header *header; | |
1dad391d PM |
1681 | u8 *ro_image; |
1682 | struct bpf_binary_header *ro_header; | |
db496944 AS |
1683 | struct jit_ctx ctx; |
1684 | }; | |
1685 | ||
d1c55ab5 | 1686 | struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) |
e54bcde3 | 1687 | { |
b2ad54e1 | 1688 | int image_size, prog_size, extable_size, extable_align, extable_offset; |
26eb042e | 1689 | struct bpf_prog *tmp, *orig_prog = prog; |
b569c1c6 | 1690 | struct bpf_binary_header *header; |
1dad391d | 1691 | struct bpf_binary_header *ro_header; |
db496944 | 1692 | struct arm64_jit_data *jit_data; |
56ea6a8b | 1693 | bool was_classic = bpf_prog_was_classic(prog); |
26eb042e | 1694 | bool tmp_blinded = false; |
db496944 | 1695 | bool extra_pass = false; |
e54bcde3 | 1696 | struct jit_ctx ctx; |
339af577 | 1697 | u64 arena_vm_start; |
b569c1c6 | 1698 | u8 *image_ptr; |
1dad391d | 1699 | u8 *ro_image_ptr; |
e54bcde3 | 1700 | |
60b58afc | 1701 | if (!prog->jit_requested) |
26eb042e DB |
1702 | return orig_prog; |
1703 | ||
1704 | tmp = bpf_jit_blind_constants(prog); | |
1705 | /* If blinding was requested and we failed during blinding, | |
1706 | * we must fall back to the interpreter. | |
1707 | */ | |
1708 | if (IS_ERR(tmp)) | |
1709 | return orig_prog; | |
1710 | if (tmp != prog) { | |
1711 | tmp_blinded = true; | |
1712 | prog = tmp; | |
1713 | } | |
e54bcde3 | 1714 | |
339af577 | 1715 | arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); |
db496944 AS |
1716 | jit_data = prog->aux->jit_data; |
1717 | if (!jit_data) { | |
1718 | jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); | |
1719 | if (!jit_data) { | |
1720 | prog = orig_prog; | |
1721 | goto out; | |
1722 | } | |
1723 | prog->aux->jit_data = jit_data; | |
1724 | } | |
1725 | if (jit_data->ctx.offset) { | |
1726 | ctx = jit_data->ctx; | |
1dad391d PM |
1727 | ro_image_ptr = jit_data->ro_image; |
1728 | ro_header = jit_data->ro_header; | |
db496944 | 1729 | header = jit_data->header; |
1dad391d PM |
1730 | image_ptr = (void *)header + ((void *)ro_image_ptr |
1731 | - (void *)ro_header); | |
db496944 | 1732 | extra_pass = true; |
80083428 | 1733 | prog_size = sizeof(u32) * ctx.idx; |
db496944 AS |
1734 | goto skip_init_ctx; |
1735 | } | |
e54bcde3 ZSL |
1736 | memset(&ctx, 0, sizeof(ctx)); |
1737 | ctx.prog = prog; | |
1738 | ||
19f68ed6 | 1739 | ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL); |
26eb042e DB |
1740 | if (ctx.offset == NULL) { |
1741 | prog = orig_prog; | |
db496944 | 1742 | goto out_off; |
26eb042e | 1743 | } |
e54bcde3 | 1744 | |
5b3d19b9 | 1745 | ctx.fpb_offset = find_fpb_offset(prog); |
4dd31243 | 1746 | ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); |
5b3d19b9 | 1747 | |
68e4f238 HT |
1748 | /* |
1749 | * 1. Initial fake pass to compute ctx->idx and ctx->offset. | |
1750 | * | |
1751 | * BPF line info needs ctx->offset[i] to be the offset of | |
1752 | * instruction[i] in jited image, so build prologue first. | |
1753 | */ | |
339af577 PM |
1754 | if (build_prologue(&ctx, was_classic, prog->aux->exception_cb, |
1755 | arena_vm_start)) { | |
26eb042e DB |
1756 | prog = orig_prog; |
1757 | goto out_off; | |
1758 | } | |
e54bcde3 | 1759 | |
68e4f238 | 1760 | if (build_body(&ctx, extra_pass)) { |
ddb55992 ZSL |
1761 | prog = orig_prog; |
1762 | goto out_off; | |
1763 | } | |
51c9fbb1 ZSL |
1764 | |
1765 | ctx.epilogue_offset = ctx.idx; | |
22fc0e80 | 1766 | build_epilogue(&ctx, prog->aux->exception_cb); |
b2ad54e1 | 1767 | build_plt(&ctx); |
e54bcde3 | 1768 | |
b2ad54e1 | 1769 | extable_align = __alignof__(struct exception_table_entry); |
80083428 JPB |
1770 | extable_size = prog->aux->num_exentries * |
1771 | sizeof(struct exception_table_entry); | |
1772 | ||
e54bcde3 | 1773 | /* Now we know the actual image size. */ |
80083428 | 1774 | prog_size = sizeof(u32) * ctx.idx; |
b2ad54e1 XK |
1775 | /* also allocate space for plt target */ |
1776 | extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align); | |
1777 | image_size = extable_offset + extable_size; | |
1dad391d PM |
1778 | ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, |
1779 | sizeof(u32), &header, &image_ptr, | |
1780 | jit_fill_hole); | |
1781 | if (!ro_header) { | |
26eb042e DB |
1782 | prog = orig_prog; |
1783 | goto out_off; | |
1784 | } | |
e54bcde3 ZSL |
1785 | |
1786 | /* 2. Now, the actual pass. */ | |
1787 | ||
1dad391d PM |
1788 | /* |
1789 | * Use the image(RW) for writing the JITed instructions. But also save | |
1790 | * the ro_image(RX) for calculating the offsets in the image. The RW | |
1791 | * image will be later copied to the RX image from where the program | |
1792 | * will run. The bpf_jit_binary_pack_finalize() will do this copy in the | |
1793 | * final step. | |
1794 | */ | |
425e1ed7 | 1795 | ctx.image = (__le32 *)image_ptr; |
1dad391d | 1796 | ctx.ro_image = (__le32 *)ro_image_ptr; |
80083428 | 1797 | if (extable_size) |
1dad391d | 1798 | prog->aux->extable = (void *)ro_image_ptr + extable_offset; |
db496944 | 1799 | skip_init_ctx: |
e54bcde3 | 1800 | ctx.idx = 0; |
80083428 | 1801 | ctx.exentry_idx = 0; |
b569c1c6 | 1802 | |
339af577 | 1803 | build_prologue(&ctx, was_classic, prog->aux->exception_cb, arena_vm_start); |
e54bcde3 | 1804 | |
8c11ea5c | 1805 | if (build_body(&ctx, extra_pass)) { |
26eb042e | 1806 | prog = orig_prog; |
1dad391d | 1807 | goto out_free_hdr; |
60ef0494 | 1808 | } |
e54bcde3 | 1809 | |
22fc0e80 | 1810 | build_epilogue(&ctx, prog->aux->exception_cb); |
b2ad54e1 | 1811 | build_plt(&ctx); |
e54bcde3 | 1812 | |
42ff712b | 1813 | /* 3. Extra pass to validate JITed code. */ |
efc9909f | 1814 | if (validate_ctx(&ctx)) { |
26eb042e | 1815 | prog = orig_prog; |
1dad391d | 1816 | goto out_free_hdr; |
42ff712b ZSL |
1817 | } |
1818 | ||
e54bcde3 ZSL |
1819 | /* And we're done. */ |
1820 | if (bpf_jit_enable > 1) | |
80083428 | 1821 | bpf_jit_dump(prog->len, prog_size, 2, ctx.image); |
e54bcde3 | 1822 | |
db496944 AS |
1823 | if (!prog->is_func || extra_pass) { |
1824 | if (extra_pass && ctx.idx != jit_data->ctx.idx) { | |
1825 | pr_err_once("multi-func JIT bug %d != %d\n", | |
1826 | ctx.idx, jit_data->ctx.idx); | |
db496944 AS |
1827 | prog->bpf_func = NULL; |
1828 | prog->jited = 0; | |
10f3b29c | 1829 | prog->jited_len = 0; |
1dad391d PM |
1830 | goto out_free_hdr; |
1831 | } | |
1832 | if (WARN_ON(bpf_jit_binary_pack_finalize(prog, ro_header, | |
1833 | header))) { | |
1834 | /* ro_header has been freed */ | |
1835 | ro_header = NULL; | |
1836 | prog = orig_prog; | |
db496944 AS |
1837 | goto out_off; |
1838 | } | |
1dad391d PM |
1839 | /* |
1840 | * The instructions have now been copied to the ROX region from | |
1841 | * where they will execute. Now the data cache has to be cleaned to | |
1842 | * the PoU and the I-cache has to be invalidated for the VAs. | |
1843 | */ | |
1844 | bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx); | |
db496944 AS |
1845 | } else { |
1846 | jit_data->ctx = ctx; | |
1dad391d | 1847 | jit_data->ro_image = ro_image_ptr; |
db496944 | 1848 | jit_data->header = header; |
1dad391d | 1849 | jit_data->ro_header = ro_header; |
db496944 | 1850 | } |
1dad391d PM |
1851 | |
1852 | prog->bpf_func = (void *)ctx.ro_image; | |
a91263d5 | 1853 | prog->jited = 1; |
80083428 | 1854 | prog->jited_len = prog_size; |
26eb042e | 1855 | |
db496944 | 1856 | if (!prog->is_func || extra_pass) { |
dda7596c HT |
1857 | int i; |
1858 | ||
1859 | /* offset[prog->len] is the size of program */ | |
1860 | for (i = 0; i <= prog->len; i++) | |
1861 | ctx.offset[i] *= AARCH64_INSN_SIZE; | |
32f6865c | 1862 | bpf_prog_fill_jited_linfo(prog, ctx.offset + 1); |
26eb042e | 1863 | out_off: |
19f68ed6 | 1864 | kvfree(ctx.offset); |
db496944 AS |
1865 | kfree(jit_data); |
1866 | prog->aux->jit_data = NULL; | |
1867 | } | |
26eb042e DB |
1868 | out: |
1869 | if (tmp_blinded) | |
1870 | bpf_jit_prog_release_other(prog, prog == orig_prog ? | |
1871 | tmp : orig_prog); | |
d1c55ab5 | 1872 | return prog; |
1dad391d PM |
1873 | |
1874 | out_free_hdr: | |
1875 | if (header) { | |
1876 | bpf_arch_text_copy(&ro_header->size, &header->size, | |
1877 | sizeof(header->size)); | |
1878 | bpf_jit_binary_pack_free(ro_header, header); | |
1879 | } | |
1880 | goto out_off; | |
e54bcde3 | 1881 | } |
91fc957c | 1882 | |
b5e975d2 HT |
1883 | bool bpf_jit_supports_kfunc_call(void) |
1884 | { | |
1885 | return true; | |
1886 | } | |
1887 | ||
1dad391d PM |
1888 | void *bpf_arch_text_copy(void *dst, void *src, size_t len) |
1889 | { | |
1890 | if (!aarch64_insn_copy(dst, src, len)) | |
1891 | return ERR_PTR(-EINVAL); | |
1892 | return dst; | |
1893 | } | |
1894 | ||
5d63ae90 LB |
1895 | u64 bpf_jit_alloc_exec_limit(void) |
1896 | { | |
b89ddf4c | 1897 | return VMALLOC_END - VMALLOC_START; |
5d63ae90 LB |
1898 | } |
1899 | ||
d4609a5d JS |
1900 | /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ |
1901 | bool bpf_jit_supports_subprog_tailcalls(void) | |
1902 | { | |
1903 | return true; | |
1904 | } | |
b2ad54e1 | 1905 | |
efc9909f XK |
1906 | static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l, |
1907 | int args_off, int retval_off, int run_ctx_off, | |
1908 | bool save_ret) | |
1909 | { | |
aada4766 | 1910 | __le32 *branch; |
efc9909f XK |
1911 | u64 enter_prog; |
1912 | u64 exit_prog; | |
1913 | struct bpf_prog *p = l->link.prog; | |
1914 | int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); | |
1915 | ||
271de525 MKL |
1916 | enter_prog = (u64)bpf_trampoline_enter(p); |
1917 | exit_prog = (u64)bpf_trampoline_exit(p); | |
efc9909f XK |
1918 | |
1919 | if (l->cookie == 0) { | |
1920 | /* if cookie is zero, one instruction is enough to store it */ | |
1921 | emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx); | |
1922 | } else { | |
1923 | emit_a64_mov_i64(A64_R(10), l->cookie, ctx); | |
1924 | emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off), | |
1925 | ctx); | |
1926 | } | |
1927 | ||
1928 | /* save p to callee saved register x19 to avoid loading p with mov_i64 | |
1929 | * each time. | |
1930 | */ | |
1931 | emit_addr_mov_i64(A64_R(19), (const u64)p, ctx); | |
1932 | ||
1933 | /* arg1: prog */ | |
1934 | emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); | |
1935 | /* arg2: &run_ctx */ | |
1936 | emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx); | |
1937 | ||
1938 | emit_call(enter_prog, ctx); | |
1939 | ||
dc7d7447 XK |
1940 | /* save return value to callee saved register x20 */ |
1941 | emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx); | |
1942 | ||
efc9909f XK |
1943 | /* if (__bpf_prog_enter(prog) == 0) |
1944 | * goto skip_exec_of_prog; | |
1945 | */ | |
1946 | branch = ctx->image + ctx->idx; | |
1947 | emit(A64_NOP, ctx); | |
1948 | ||
efc9909f XK |
1949 | emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx); |
1950 | if (!p->jited) | |
1951 | emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx); | |
1952 | ||
1953 | emit_call((const u64)p->bpf_func, ctx); | |
1954 | ||
1955 | if (save_ret) | |
1956 | emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); | |
1957 | ||
1958 | if (ctx->image) { | |
1959 | int offset = &ctx->image[ctx->idx] - branch; | |
aada4766 | 1960 | *branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset)); |
efc9909f XK |
1961 | } |
1962 | ||
1963 | /* arg1: prog */ | |
1964 | emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx); | |
1965 | /* arg2: start time */ | |
1966 | emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx); | |
1967 | /* arg3: &run_ctx */ | |
1968 | emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx); | |
1969 | ||
1970 | emit_call(exit_prog, ctx); | |
1971 | } | |
1972 | ||
1973 | static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, | |
1974 | int args_off, int retval_off, int run_ctx_off, | |
aada4766 | 1975 | __le32 **branches) |
efc9909f XK |
1976 | { |
1977 | int i; | |
1978 | ||
1979 | /* The first fmod_ret program will receive a garbage return value. | |
1980 | * Set this to 0 to avoid confusing the program. | |
1981 | */ | |
1982 | emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); | |
1983 | for (i = 0; i < tl->nr_links; i++) { | |
1984 | invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off, | |
1985 | run_ctx_off, true); | |
1986 | /* if (*(u64 *)(sp + retval_off) != 0) | |
1987 | * goto do_fexit; | |
1988 | */ | |
1989 | emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); | |
1990 | /* Save the location of branch, and generate a nop. | |
1991 | * This nop will be replaced with a cbnz later. | |
1992 | */ | |
1993 | branches[i] = ctx->image + ctx->idx; | |
1994 | emit(A64_NOP, ctx); | |
1995 | } | |
1996 | } | |
1997 | ||
90564f1e | 1998 | static void save_args(struct jit_ctx *ctx, int args_off, int nregs) |
efc9909f XK |
1999 | { |
2000 | int i; | |
2001 | ||
90564f1e | 2002 | for (i = 0; i < nregs; i++) { |
efc9909f XK |
2003 | emit(A64_STR64I(i, A64_SP, args_off), ctx); |
2004 | args_off += 8; | |
2005 | } | |
2006 | } | |
2007 | ||
90564f1e | 2008 | static void restore_args(struct jit_ctx *ctx, int args_off, int nregs) |
efc9909f XK |
2009 | { |
2010 | int i; | |
2011 | ||
90564f1e | 2012 | for (i = 0; i < nregs; i++) { |
efc9909f XK |
2013 | emit(A64_LDR64I(i, A64_SP, args_off), ctx); |
2014 | args_off += 8; | |
2015 | } | |
2016 | } | |
2017 | ||
2018 | /* Based on the x86's implementation of arch_prepare_bpf_trampoline(). | |
2019 | * | |
2020 | * bpf prog and function entry before bpf trampoline hooked: | |
2021 | * mov x9, lr | |
2022 | * nop | |
2023 | * | |
2024 | * bpf prog and function entry after bpf trampoline hooked: | |
2025 | * mov x9, lr | |
2026 | * bl <bpf_trampoline or plt> | |
2027 | * | |
2028 | */ | |
2029 | static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, | |
7a3d9a15 | 2030 | struct bpf_tramp_links *tlinks, void *func_addr, |
90564f1e | 2031 | int nregs, u32 flags) |
efc9909f XK |
2032 | { |
2033 | int i; | |
2034 | int stack_size; | |
2035 | int retaddr_off; | |
2036 | int regs_off; | |
2037 | int retval_off; | |
2038 | int args_off; | |
90564f1e | 2039 | int nregs_off; |
efc9909f XK |
2040 | int ip_off; |
2041 | int run_ctx_off; | |
2042 | struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; | |
2043 | struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; | |
2044 | struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; | |
2045 | bool save_ret; | |
aada4766 | 2046 | __le32 **branches = NULL; |
efc9909f XK |
2047 | |
2048 | /* trampoline stack layout: | |
2049 | * [ parent ip ] | |
2050 | * [ FP ] | |
2051 | * SP + retaddr_off [ self ip ] | |
2052 | * [ FP ] | |
2053 | * | |
2054 | * [ padding ] align SP to multiples of 16 | |
2055 | * | |
2056 | * [ x20 ] callee saved reg x20 | |
2057 | * SP + regs_off [ x19 ] callee saved reg x19 | |
2058 | * | |
2059 | * SP + retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or | |
2060 | * BPF_TRAMP_F_RET_FENTRY_RET | |
2061 | * | |
90564f1e | 2062 | * [ arg reg N ] |
efc9909f | 2063 | * [ ... ] |
90564f1e | 2064 | * SP + args_off [ arg reg 1 ] |
efc9909f | 2065 | * |
90564f1e | 2066 | * SP + nregs_off [ arg regs count ] |
efc9909f XK |
2067 | * |
2068 | * SP + ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag | |
2069 | * | |
2070 | * SP + run_ctx_off [ bpf_tramp_run_ctx ] | |
2071 | */ | |
2072 | ||
2073 | stack_size = 0; | |
2074 | run_ctx_off = stack_size; | |
2075 | /* room for bpf_tramp_run_ctx */ | |
2076 | stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8); | |
2077 | ||
2078 | ip_off = stack_size; | |
2079 | /* room for IP address argument */ | |
2080 | if (flags & BPF_TRAMP_F_IP_ARG) | |
2081 | stack_size += 8; | |
2082 | ||
90564f1e | 2083 | nregs_off = stack_size; |
efc9909f XK |
2084 | /* room for args count */ |
2085 | stack_size += 8; | |
2086 | ||
2087 | args_off = stack_size; | |
2088 | /* room for args */ | |
90564f1e | 2089 | stack_size += nregs * 8; |
efc9909f XK |
2090 | |
2091 | /* room for return value */ | |
2092 | retval_off = stack_size; | |
2093 | save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); | |
2094 | if (save_ret) | |
2095 | stack_size += 8; | |
2096 | ||
2097 | /* room for callee saved registers, currently x19 and x20 are used */ | |
2098 | regs_off = stack_size; | |
2099 | stack_size += 16; | |
2100 | ||
2101 | /* round up to multiples of 16 to avoid SPAlignmentFault */ | |
2102 | stack_size = round_up(stack_size, 16); | |
2103 | ||
2104 | /* return address locates above FP */ | |
2105 | retaddr_off = stack_size + 8; | |
2106 | ||
2107 | /* bpf trampoline may be invoked by 3 instruction types: | |
2108 | * 1. bl, attached to bpf prog or kernel function via short jump | |
2109 | * 2. br, attached to bpf prog or kernel function via long jump | |
2110 | * 3. blr, working as a function pointer, used by struct_ops. | |
2111 | * So BTI_JC should used here to support both br and blr. | |
2112 | */ | |
2113 | emit_bti(A64_BTI_JC, ctx); | |
2114 | ||
2115 | /* frame for parent function */ | |
2116 | emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx); | |
2117 | emit(A64_MOV(1, A64_FP, A64_SP), ctx); | |
2118 | ||
2119 | /* frame for patched function */ | |
2120 | emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); | |
2121 | emit(A64_MOV(1, A64_FP, A64_SP), ctx); | |
2122 | ||
2123 | /* allocate stack space */ | |
2124 | emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx); | |
2125 | ||
2126 | if (flags & BPF_TRAMP_F_IP_ARG) { | |
2127 | /* save ip address of the traced function */ | |
7a3d9a15 | 2128 | emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx); |
efc9909f XK |
2129 | emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx); |
2130 | } | |
2131 | ||
90564f1e FR |
2132 | /* save arg regs count*/ |
2133 | emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx); | |
2134 | emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx); | |
efc9909f | 2135 | |
90564f1e FR |
2136 | /* save arg regs */ |
2137 | save_args(ctx, args_off, nregs); | |
efc9909f XK |
2138 | |
2139 | /* save callee saved registers */ | |
2140 | emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx); | |
2141 | emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx); | |
2142 | ||
2143 | if (flags & BPF_TRAMP_F_CALL_ORIG) { | |
2144 | emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); | |
2145 | emit_call((const u64)__bpf_tramp_enter, ctx); | |
2146 | } | |
2147 | ||
2148 | for (i = 0; i < fentry->nr_links; i++) | |
2149 | invoke_bpf_prog(ctx, fentry->links[i], args_off, | |
2150 | retval_off, run_ctx_off, | |
2151 | flags & BPF_TRAMP_F_RET_FENTRY_RET); | |
2152 | ||
2153 | if (fmod_ret->nr_links) { | |
aada4766 | 2154 | branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *), |
efc9909f XK |
2155 | GFP_KERNEL); |
2156 | if (!branches) | |
2157 | return -ENOMEM; | |
2158 | ||
2159 | invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off, | |
2160 | run_ctx_off, branches); | |
2161 | } | |
2162 | ||
2163 | if (flags & BPF_TRAMP_F_CALL_ORIG) { | |
90564f1e | 2164 | restore_args(ctx, args_off, nregs); |
efc9909f XK |
2165 | /* call original func */ |
2166 | emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx); | |
738a96c4 XK |
2167 | emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx); |
2168 | emit(A64_RET(A64_R(10)), ctx); | |
efc9909f XK |
2169 | /* store return value */ |
2170 | emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); | |
2171 | /* reserve a nop for bpf_tramp_image_put */ | |
96b0f5ad | 2172 | im->ip_after_call = ctx->ro_image + ctx->idx; |
efc9909f XK |
2173 | emit(A64_NOP, ctx); |
2174 | } | |
2175 | ||
2176 | /* update the branches saved in invoke_bpf_mod_ret with cbnz */ | |
2177 | for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { | |
2178 | int offset = &ctx->image[ctx->idx] - branches[i]; | |
aada4766 | 2179 | *branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset)); |
efc9909f XK |
2180 | } |
2181 | ||
2182 | for (i = 0; i < fexit->nr_links; i++) | |
2183 | invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off, | |
2184 | run_ctx_off, false); | |
2185 | ||
2186 | if (flags & BPF_TRAMP_F_CALL_ORIG) { | |
96b0f5ad | 2187 | im->ip_epilogue = ctx->ro_image + ctx->idx; |
efc9909f XK |
2188 | emit_addr_mov_i64(A64_R(0), (const u64)im, ctx); |
2189 | emit_call((const u64)__bpf_tramp_exit, ctx); | |
2190 | } | |
2191 | ||
2192 | if (flags & BPF_TRAMP_F_RESTORE_REGS) | |
90564f1e | 2193 | restore_args(ctx, args_off, nregs); |
efc9909f XK |
2194 | |
2195 | /* restore callee saved register x19 and x20 */ | |
2196 | emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx); | |
2197 | emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx); | |
2198 | ||
2199 | if (save_ret) | |
2200 | emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx); | |
2201 | ||
2202 | /* reset SP */ | |
2203 | emit(A64_MOV(1, A64_SP, A64_FP), ctx); | |
2204 | ||
2205 | /* pop frames */ | |
2206 | emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx); | |
2207 | emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx); | |
2208 | ||
2209 | if (flags & BPF_TRAMP_F_SKIP_FRAME) { | |
2210 | /* skip patched function, return to parent */ | |
2211 | emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); | |
2212 | emit(A64_RET(A64_R(9)), ctx); | |
2213 | } else { | |
2214 | /* return to patched function */ | |
2215 | emit(A64_MOV(1, A64_R(10), A64_LR), ctx); | |
2216 | emit(A64_MOV(1, A64_LR, A64_R(9)), ctx); | |
2217 | emit(A64_RET(A64_R(10)), ctx); | |
2218 | } | |
2219 | ||
efc9909f XK |
2220 | kfree(branches); |
2221 | ||
2222 | return ctx->idx; | |
2223 | } | |
2224 | ||
96d1b7c0 | 2225 | static int btf_func_model_nregs(const struct btf_func_model *m) |
efc9909f | 2226 | { |
90564f1e | 2227 | int nregs = m->nr_args; |
96d1b7c0 | 2228 | int i; |
efc9909f | 2229 | |
90564f1e | 2230 | /* extra registers needed for struct argument */ |
eb707dde | 2231 | for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { |
90564f1e | 2232 | /* The arg_size is at most 16 bytes, enforced by the verifier. */ |
eb707dde | 2233 | if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) |
90564f1e | 2234 | nregs += (m->arg_size[i] + 7) / 8 - 1; |
eb707dde YS |
2235 | } |
2236 | ||
96d1b7c0 SL |
2237 | return nregs; |
2238 | } | |
2239 | ||
2240 | int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, | |
2241 | struct bpf_tramp_links *tlinks, void *func_addr) | |
2242 | { | |
2243 | struct jit_ctx ctx = { | |
2244 | .image = NULL, | |
2245 | .idx = 0, | |
2246 | }; | |
2247 | struct bpf_tramp_image im; | |
2248 | int nregs, ret; | |
2249 | ||
2250 | nregs = btf_func_model_nregs(m); | |
90564f1e FR |
2251 | /* the first 8 registers are used for arguments */ |
2252 | if (nregs > 8) | |
2253 | return -ENOTSUPP; | |
2254 | ||
96d1b7c0 | 2255 | ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags); |
efc9909f XK |
2256 | if (ret < 0) |
2257 | return ret; | |
2258 | ||
96d1b7c0 SL |
2259 | return ret < 0 ? ret : ret * AARCH64_INSN_SIZE; |
2260 | } | |
efc9909f | 2261 | |
96b0f5ad PM |
2262 | void *arch_alloc_bpf_trampoline(unsigned int size) |
2263 | { | |
2264 | return bpf_prog_pack_alloc(size, jit_fill_hole); | |
2265 | } | |
2266 | ||
2267 | void arch_free_bpf_trampoline(void *image, unsigned int size) | |
2268 | { | |
2269 | bpf_prog_pack_free(image, size); | |
2270 | } | |
2271 | ||
c733239f | 2272 | int arch_protect_bpf_trampoline(void *image, unsigned int size) |
96b0f5ad | 2273 | { |
c733239f | 2274 | return 0; |
96b0f5ad PM |
2275 | } |
2276 | ||
96b0f5ad PM |
2277 | int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image, |
2278 | void *ro_image_end, const struct btf_func_model *m, | |
96d1b7c0 SL |
2279 | u32 flags, struct bpf_tramp_links *tlinks, |
2280 | void *func_addr) | |
2281 | { | |
2282 | int ret, nregs; | |
96b0f5ad PM |
2283 | void *image, *tmp; |
2284 | u32 size = ro_image_end - ro_image; | |
2285 | ||
2286 | /* image doesn't need to be in module memory range, so we can | |
2287 | * use kvmalloc. | |
2288 | */ | |
2289 | image = kvmalloc(size, GFP_KERNEL); | |
2290 | if (!image) | |
2291 | return -ENOMEM; | |
2292 | ||
96d1b7c0 SL |
2293 | struct jit_ctx ctx = { |
2294 | .image = image, | |
96b0f5ad | 2295 | .ro_image = ro_image, |
96d1b7c0 SL |
2296 | .idx = 0, |
2297 | }; | |
2298 | ||
2299 | nregs = btf_func_model_nregs(m); | |
2300 | /* the first 8 registers are used for arguments */ | |
2301 | if (nregs > 8) | |
2302 | return -ENOTSUPP; | |
2303 | ||
96b0f5ad | 2304 | jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image)); |
7a3d9a15 | 2305 | ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags); |
efc9909f | 2306 | |
96b0f5ad | 2307 | if (ret > 0 && validate_code(&ctx) < 0) { |
efc9909f | 2308 | ret = -EINVAL; |
96b0f5ad PM |
2309 | goto out; |
2310 | } | |
efc9909f XK |
2311 | |
2312 | if (ret > 0) | |
2313 | ret *= AARCH64_INSN_SIZE; | |
2314 | ||
96b0f5ad PM |
2315 | tmp = bpf_arch_text_copy(ro_image, image, size); |
2316 | if (IS_ERR(tmp)) { | |
2317 | ret = PTR_ERR(tmp); | |
2318 | goto out; | |
2319 | } | |
2320 | ||
2321 | bpf_flush_icache(ro_image, ro_image + size); | |
2322 | out: | |
2323 | kvfree(image); | |
efc9909f XK |
2324 | return ret; |
2325 | } | |
2326 | ||
b2ad54e1 XK |
2327 | static bool is_long_jump(void *ip, void *target) |
2328 | { | |
2329 | long offset; | |
2330 | ||
2331 | /* NULL target means this is a NOP */ | |
2332 | if (!target) | |
2333 | return false; | |
2334 | ||
2335 | offset = (long)target - (long)ip; | |
2336 | return offset < -SZ_128M || offset >= SZ_128M; | |
2337 | } | |
2338 | ||
2339 | static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip, | |
2340 | void *addr, void *plt, u32 *insn) | |
2341 | { | |
2342 | void *target; | |
2343 | ||
2344 | if (!addr) { | |
2345 | *insn = aarch64_insn_gen_nop(); | |
2346 | return 0; | |
2347 | } | |
2348 | ||
2349 | if (is_long_jump(ip, addr)) | |
2350 | target = plt; | |
2351 | else | |
2352 | target = addr; | |
2353 | ||
2354 | *insn = aarch64_insn_gen_branch_imm((unsigned long)ip, | |
2355 | (unsigned long)target, | |
2356 | type); | |
2357 | ||
2358 | return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT; | |
2359 | } | |
2360 | ||
2361 | /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf | |
2362 | * trampoline with the branch instruction from @ip to @new_addr. If @old_addr | |
2363 | * or @new_addr is NULL, the old or new instruction is NOP. | |
2364 | * | |
2365 | * When @ip is the bpf prog entry, a bpf trampoline is being attached or | |
2366 | * detached. Since bpf trampoline and bpf prog are allocated separately with | |
2367 | * vmalloc, the address distance may exceed 128MB, the maximum branch range. | |
2368 | * So long jump should be handled. | |
2369 | * | |
2370 | * When a bpf prog is constructed, a plt pointing to empty trampoline | |
2371 | * dummy_tramp is placed at the end: | |
2372 | * | |
2373 | * bpf_prog: | |
2374 | * mov x9, lr | |
2375 | * nop // patchsite | |
2376 | * ... | |
2377 | * ret | |
2378 | * | |
2379 | * plt: | |
2380 | * ldr x10, target | |
2381 | * br x10 | |
2382 | * target: | |
2383 | * .quad dummy_tramp // plt target | |
2384 | * | |
2385 | * This is also the state when no trampoline is attached. | |
2386 | * | |
2387 | * When a short-jump bpf trampoline is attached, the patchsite is patched | |
2388 | * to a bl instruction to the trampoline directly: | |
2389 | * | |
2390 | * bpf_prog: | |
2391 | * mov x9, lr | |
2392 | * bl <short-jump bpf trampoline address> // patchsite | |
2393 | * ... | |
2394 | * ret | |
2395 | * | |
2396 | * plt: | |
2397 | * ldr x10, target | |
2398 | * br x10 | |
2399 | * target: | |
2400 | * .quad dummy_tramp // plt target | |
2401 | * | |
2402 | * When a long-jump bpf trampoline is attached, the plt target is filled with | |
2403 | * the trampoline address and the patchsite is patched to a bl instruction to | |
2404 | * the plt: | |
2405 | * | |
2406 | * bpf_prog: | |
2407 | * mov x9, lr | |
2408 | * bl plt // patchsite | |
2409 | * ... | |
2410 | * ret | |
2411 | * | |
2412 | * plt: | |
2413 | * ldr x10, target | |
2414 | * br x10 | |
2415 | * target: | |
2416 | * .quad <long-jump bpf trampoline address> // plt target | |
2417 | * | |
2418 | * The dummy_tramp is used to prevent another CPU from jumping to unknown | |
2419 | * locations during the patching process, making the patching process easier. | |
2420 | */ | |
2421 | int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, | |
2422 | void *old_addr, void *new_addr) | |
2423 | { | |
2424 | int ret; | |
2425 | u32 old_insn; | |
2426 | u32 new_insn; | |
2427 | u32 replaced; | |
2428 | struct bpf_plt *plt = NULL; | |
2429 | unsigned long size = 0UL; | |
2430 | unsigned long offset = ~0UL; | |
2431 | enum aarch64_insn_branch_type branch_type; | |
2432 | char namebuf[KSYM_NAME_LEN]; | |
2433 | void *image = NULL; | |
2434 | u64 plt_target = 0ULL; | |
2435 | bool poking_bpf_entry; | |
2436 | ||
2437 | if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf)) | |
2438 | /* Only poking bpf text is supported. Since kernel function | |
2439 | * entry is set up by ftrace, we reply on ftrace to poke kernel | |
2440 | * functions. | |
2441 | */ | |
2442 | return -ENOTSUPP; | |
2443 | ||
2444 | image = ip - offset; | |
2445 | /* zero offset means we're poking bpf prog entry */ | |
2446 | poking_bpf_entry = (offset == 0UL); | |
2447 | ||
2448 | /* bpf prog entry, find plt and the real patchsite */ | |
2449 | if (poking_bpf_entry) { | |
2450 | /* plt locates at the end of bpf prog */ | |
2451 | plt = image + size - PLT_TARGET_OFFSET; | |
2452 | ||
2453 | /* skip to the nop instruction in bpf prog entry: | |
2454 | * bti c // if BTI enabled | |
2455 | * mov x9, x30 | |
2456 | * nop | |
2457 | */ | |
2458 | ip = image + POKE_OFFSET * AARCH64_INSN_SIZE; | |
2459 | } | |
2460 | ||
2461 | /* long jump is only possible at bpf prog entry */ | |
2462 | if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) && | |
2463 | !poking_bpf_entry)) | |
2464 | return -EINVAL; | |
2465 | ||
2466 | if (poke_type == BPF_MOD_CALL) | |
2467 | branch_type = AARCH64_INSN_BRANCH_LINK; | |
2468 | else | |
2469 | branch_type = AARCH64_INSN_BRANCH_NOLINK; | |
2470 | ||
2471 | if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) | |
2472 | return -EFAULT; | |
2473 | ||
2474 | if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) | |
2475 | return -EFAULT; | |
2476 | ||
2477 | if (is_long_jump(ip, new_addr)) | |
2478 | plt_target = (u64)new_addr; | |
2479 | else if (is_long_jump(ip, old_addr)) | |
2480 | /* if the old target is a long jump and the new target is not, | |
2481 | * restore the plt target to dummy_tramp, so there is always a | |
2482 | * legal and harmless address stored in plt target, and we'll | |
2483 | * never jump from plt to an unknown place. | |
2484 | */ | |
2485 | plt_target = (u64)&dummy_tramp; | |
2486 | ||
2487 | if (plt_target) { | |
2488 | /* non-zero plt_target indicates we're patching a bpf prog, | |
2489 | * which is read only. | |
2490 | */ | |
2491 | if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) | |
2492 | return -EFAULT; | |
2493 | WRITE_ONCE(plt->target, plt_target); | |
2494 | set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); | |
2495 | /* since plt target points to either the new trampoline | |
2496 | * or dummy_tramp, even if another CPU reads the old plt | |
2497 | * target value before fetching the bl instruction to plt, | |
2498 | * it will be brought back by dummy_tramp, so no barrier is | |
2499 | * required here. | |
2500 | */ | |
2501 | } | |
2502 | ||
2503 | /* if the old target and the new target are both long jumps, no | |
2504 | * patching is required | |
2505 | */ | |
2506 | if (old_insn == new_insn) | |
2507 | return 0; | |
2508 | ||
2509 | mutex_lock(&text_mutex); | |
2510 | if (aarch64_insn_read(ip, &replaced)) { | |
2511 | ret = -EFAULT; | |
2512 | goto out; | |
2513 | } | |
2514 | ||
2515 | if (replaced != old_insn) { | |
2516 | ret = -EFAULT; | |
2517 | goto out; | |
2518 | } | |
2519 | ||
2520 | /* We call aarch64_insn_patch_text_nosync() to replace instruction | |
2521 | * atomically, so no other CPUs will fetch a half-new and half-old | |
2522 | * instruction. But there is chance that another CPU executes the | |
2523 | * old instruction after the patching operation finishes (e.g., | |
2524 | * pipeline not flushed, or icache not synchronized yet). | |
2525 | * | |
2526 | * 1. when a new trampoline is attached, it is not a problem for | |
2527 | * different CPUs to jump to different trampolines temporarily. | |
2528 | * | |
2529 | * 2. when an old trampoline is freed, we should wait for all other | |
2530 | * CPUs to exit the trampoline and make sure the trampoline is no | |
2531 | * longer reachable, since bpf_tramp_image_put() function already | |
2532 | * uses percpu_ref and task-based rcu to do the sync, no need to call | |
2533 | * the sync version here, see bpf_tramp_image_put() for details. | |
2534 | */ | |
2535 | ret = aarch64_insn_patch_text_nosync(ip, new_insn); | |
2536 | out: | |
2537 | mutex_unlock(&text_mutex); | |
2538 | ||
2539 | return ret; | |
2540 | } | |
18a45f12 HT |
2541 | |
2542 | bool bpf_jit_supports_ptr_xchg(void) | |
2543 | { | |
2544 | return true; | |
2545 | } | |
22fc0e80 PM |
2546 | |
2547 | bool bpf_jit_supports_exceptions(void) | |
2548 | { | |
2549 | /* We unwind through both kernel frames starting from within bpf_throw | |
2550 | * call and BPF frames. Therefore we require FP unwinder to be enabled | |
2551 | * to walk kernel frames and reach BPF frames in the stack trace. | |
2552 | * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y | |
2553 | */ | |
2554 | return true; | |
2555 | } | |
1dad391d | 2556 | |
4dd31243 PM |
2557 | bool bpf_jit_supports_arena(void) |
2558 | { | |
2559 | return true; | |
2560 | } | |
2561 | ||
e612b5c1 PM |
2562 | bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena) |
2563 | { | |
2564 | if (!in_arena) | |
2565 | return true; | |
2566 | switch (insn->code) { | |
2567 | case BPF_STX | BPF_ATOMIC | BPF_W: | |
2568 | case BPF_STX | BPF_ATOMIC | BPF_DW: | |
2569 | if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) | |
2570 | return false; | |
2571 | } | |
2572 | return true; | |
2573 | } | |
2574 | ||
7a4c3222 PM |
2575 | bool bpf_jit_supports_percpu_insn(void) |
2576 | { | |
2577 | return true; | |
2578 | } | |
2579 | ||
75fe4c0b PM |
2580 | bool bpf_jit_inlines_helper_call(s32 imm) |
2581 | { | |
2582 | switch (imm) { | |
2583 | case BPF_FUNC_get_smp_processor_id: | |
2584 | return true; | |
2585 | default: | |
2586 | return false; | |
2587 | } | |
2588 | } | |
2589 | ||
1dad391d PM |
2590 | void bpf_jit_free(struct bpf_prog *prog) |
2591 | { | |
2592 | if (prog->jited) { | |
2593 | struct arm64_jit_data *jit_data = prog->aux->jit_data; | |
2594 | struct bpf_binary_header *hdr; | |
2595 | ||
2596 | /* | |
2597 | * If we fail the final pass of JIT (from jit_subprogs), | |
2598 | * the program may not be finalized yet. Call finalize here | |
2599 | * before freeing it. | |
2600 | */ | |
2601 | if (jit_data) { | |
2602 | bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size, | |
2603 | sizeof(jit_data->header->size)); | |
2604 | kfree(jit_data); | |
2605 | } | |
2606 | hdr = bpf_jit_binary_pack_hdr(prog); | |
2607 | bpf_jit_binary_pack_free(hdr, NULL); | |
2608 | WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); | |
2609 | } | |
2610 | ||
2611 | bpf_prog_unlock_free(prog); | |
2612 | } |