1 // SPDX-License-Identifier: GPL-2.0
3 * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
5 * Author: Wang YanQing (udknight@gmail.com)
6 * The code based on code and ideas from:
7 * Eric Dumazet (eric.dumazet@gmail.com)
9 * Shubham Bansal <illusionist.neo@gmail.com>
12 #include <linux/netdevice.h>
13 #include <linux/filter.h>
14 #include <linux/if_vlan.h>
15 #include <asm/cacheflush.h>
16 #include <asm/set_memory.h>
17 #include <asm/nospec-branch.h>
18 #include <linux/bpf.h>
21 * eBPF prog stack layout:
24 * original ESP => +-----+
25 * | | callee saved registers
27 * | ... | eBPF JIT scratch space
28 * BPF_FP,IA32_EBP => +-----+
29 * | ... | eBPF prog stack
31 * |RSVD | JIT scratchpad
32 * current ESP => +-----+
34 * | ... | Function call stack
39 * The callee saved registers:
42 * original ESP => +------------------+ \
44 * current EBP => +------------------+ } callee saved registers
46 * +------------------+ /
50 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
63 #define EMIT(bytes, len) \
64 do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
66 #define EMIT1(b1) EMIT(b1, 1)
67 #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
68 #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
69 #define EMIT4(b1, b2, b3, b4) \
70 EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
72 #define EMIT1_off32(b1, off) \
73 do { EMIT1(b1); EMIT(off, 4); } while (0)
74 #define EMIT2_off32(b1, b2, off) \
75 do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
76 #define EMIT3_off32(b1, b2, b3, off) \
77 do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
78 #define EMIT4_off32(b1, b2, b3, b4, off) \
79 do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
81 #define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
83 static bool is_imm8(int value)
85 return value <= 127 && value >= -128;
88 static bool is_simm32(s64 value)
90 return value == (s64) (s32) value;
93 #define STACK_OFFSET(k) (k)
94 #define TCALL_CNT (MAX_BPF_JIT_REG + 0) /* Tail Call Count */
96 #define IA32_EAX (0x0)
97 #define IA32_EBX (0x3)
98 #define IA32_ECX (0x1)
99 #define IA32_EDX (0x2)
100 #define IA32_ESI (0x6)
101 #define IA32_EDI (0x7)
102 #define IA32_EBP (0x5)
103 #define IA32_ESP (0x4)
106 * List of x86 cond jumps opcodes (. + s8)
107 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
110 #define IA32_JAE 0x73
112 #define IA32_JNE 0x75
113 #define IA32_JBE 0x76
116 #define IA32_JGE 0x7D
117 #define IA32_JLE 0x7E
121 * Map eBPF registers to IA32 32bit registers or stack scratch space.
123 * 1. All the registers, R0-R10, are mapped to scratch space on stack.
124 * 2. We need two 64 bit temp registers to do complex operations on eBPF
126 * 3. For performance reason, the BPF_REG_AX for blinding constant, is
127 * mapped to real hardware register pair, IA32_ESI and IA32_EDI.
129 * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
130 * registers, we have to map each eBPF registers with two IA32 32 bit regs
131 * or scratch memory space and we have to build eBPF 64 bit register from those.
133 * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
135 static const u8 bpf2ia32[][2] = {
136 /* Return value from in-kernel function, and exit value from eBPF */
137 [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
139 /* The arguments from eBPF program to in-kernel function */
140 /* Stored on stack scratch space */
141 [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
142 [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
143 [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
144 [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
145 [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
147 /* Callee saved registers that in-kernel function will preserve */
148 /* Stored on stack scratch space */
149 [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
150 [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
151 [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
152 [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
154 /* Read only Frame Pointer to access Stack */
155 [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
157 /* Temporary register for blinding constants. */
158 [BPF_REG_AX] = {IA32_ESI, IA32_EDI},
160 /* Tail call count. Stored on stack scratch space. */
161 [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
164 #define dst_lo dst[0]
165 #define dst_hi dst[1]
166 #define src_lo src[0]
167 #define src_hi src[1]
169 #define STACK_ALIGNMENT 8
171 * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
172 * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
173 * BPF_REG_FP, BPF_REG_AX and Tail call counts.
175 #define SCRATCH_SIZE 96
177 /* Total stack size used in JITed code */
178 #define _STACK_SIZE \
181 + 4 /* Extra space for skb_copy_bits buffer */)
183 #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
185 /* Get the offset of eBPF REGISTERs stored on scratch space. */
186 #define STACK_VAR(off) (off)
188 /* Offset of skb_copy_bits buffer */
189 #define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
191 /* Encode 'dst_reg' register into IA32 opcode 'byte' */
192 static u8 add_1reg(u8 byte, u32 dst_reg)
194 return byte + dst_reg;
197 /* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
198 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
200 return byte + dst_reg + (src_reg << 3);
203 static void jit_fill_hole(void *area, unsigned int size)
205 /* Fill whole space with int3 instructions */
206 memset(area, 0xcc, size);
209 static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
218 EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
219 /* mov dword ptr [ebp+off],eax */
220 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
223 EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
224 STACK_VAR(dst), val);
228 EMIT2(0x33, add_2reg(0xC0, dst, dst));
230 EMIT2_off32(0xC7, add_1reg(0xC0, dst),
236 /* dst = imm (4 bytes)*/
237 static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
238 bool sstk, u8 **pprog)
242 u8 sreg = sstk ? IA32_EAX : src;
245 /* mov eax,dword ptr [ebp+off] */
246 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
248 /* mov dword ptr [ebp+off],eax */
249 EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
252 EMIT2(0x89, add_2reg(0xC0, dst, sreg));
258 static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
259 const u8 src[], bool dstk,
260 bool sstk, u8 **pprog)
262 emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
264 /* complete 8 byte move */
265 emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
267 /* zero out high 4 bytes */
268 emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
271 /* Sign extended move */
272 static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
273 const u32 val, bool dstk, u8 **pprog)
277 if (is64 && (val & (1<<31)))
279 emit_ia32_mov_i(dst_lo, val, dstk, pprog);
280 emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
284 * ALU operation (32 bit)
287 static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
288 bool sstk, u8 **pprog)
292 u8 sreg = sstk ? IA32_ECX : src;
295 /* mov ecx,dword ptr [ebp+off] */
296 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
299 /* mov eax,dword ptr [ebp+off] */
300 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
303 EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
306 EMIT2(0xF7, add_1reg(0xE0, sreg));
309 /* mov dword ptr [ebp+off],eax */
310 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
314 EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
319 static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
320 bool dstk, u8 **pprog)
324 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
325 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
327 if (dstk && val != 64) {
328 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
330 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
336 * Emit 'movzwl eax,ax' to zero extend 16-bit
340 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
341 /* xor dreg_hi,dreg_hi */
342 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
345 /* xor dreg_hi,dreg_hi */
346 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
353 if (dstk && val != 64) {
354 /* mov dword ptr [ebp+off],dreg_lo */
355 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
357 /* mov dword ptr [ebp+off],dreg_hi */
358 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
364 static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
365 bool dstk, u8 **pprog)
369 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
370 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
373 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
375 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
380 /* Emit 'ror %ax, 8' to swap lower 2 bytes */
382 EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
385 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
387 /* xor dreg_hi,dreg_hi */
388 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
391 /* Emit 'bswap eax' to swap lower 4 bytes */
393 EMIT1(add_1reg(0xC8, dreg_lo));
395 /* xor dreg_hi,dreg_hi */
396 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
399 /* Emit 'bswap eax' to swap lower 4 bytes */
401 EMIT1(add_1reg(0xC8, dreg_lo));
403 /* Emit 'bswap edx' to swap lower 4 bytes */
405 EMIT1(add_1reg(0xC8, dreg_hi));
407 /* mov ecx,dreg_hi */
408 EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
409 /* mov dreg_hi,dreg_lo */
410 EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
411 /* mov dreg_lo,ecx */
412 EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
417 /* mov dword ptr [ebp+off],dreg_lo */
418 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
420 /* mov dword ptr [ebp+off],dreg_hi */
421 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
428 * ALU operation (32 bit)
429 * dst = dst (div|mod) src
431 static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
432 bool dstk, bool sstk, u8 **pprog)
438 /* mov ecx,dword ptr [ebp+off] */
439 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
441 else if (src != IA32_ECX)
443 EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
446 /* mov eax,dword ptr [ebp+off] */
447 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
451 EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
454 EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
456 EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
460 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
463 EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
466 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
469 EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
475 * ALU operation (32 bit)
476 * dst = dst (shift) src
478 static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
479 bool dstk, bool sstk, u8 **pprog)
483 u8 dreg = dstk ? IA32_EAX : dst;
487 /* mov eax,dword ptr [ebp+off] */
488 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
491 /* mov ecx,dword ptr [ebp+off] */
492 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
493 else if (src != IA32_ECX)
495 EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
507 EMIT2(0xD3, add_1reg(b2, dreg));
510 /* mov dword ptr [ebp+off],dreg */
511 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
516 * ALU operation (32 bit)
519 static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
520 const u8 dst, const u8 src, bool dstk,
521 bool sstk, u8 **pprog)
525 u8 sreg = sstk ? IA32_EAX : src;
526 u8 dreg = dstk ? IA32_EDX : dst;
529 /* mov eax,dword ptr [ebp+off] */
530 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
533 /* mov eax,dword ptr [ebp+off] */
534 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
536 switch (BPF_OP(op)) {
537 /* dst = dst + src */
540 EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
542 EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
544 /* dst = dst - src */
547 EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
549 EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
551 /* dst = dst | src */
553 EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
555 /* dst = dst & src */
557 EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
559 /* dst = dst ^ src */
561 EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
566 /* mov dword ptr [ebp+off],dreg */
567 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
572 /* ALU operation (64 bit) */
573 static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
574 const u8 dst[], const u8 src[],
575 bool dstk, bool sstk,
580 emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
582 emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
585 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
590 * ALU operation (32 bit)
593 static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
594 const u8 dst, const s32 val, bool dstk,
599 u8 dreg = dstk ? IA32_EAX : dst;
603 /* mov eax,dword ptr [ebp+off] */
604 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
608 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
611 /* dst = dst + val */
615 EMIT3(0x83, add_1reg(0xD0, dreg), val);
617 EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
620 EMIT3(0x83, add_1reg(0xC0, dreg), val);
622 EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
625 /* dst = dst - val */
629 EMIT3(0x83, add_1reg(0xD8, dreg), val);
631 EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
634 EMIT3(0x83, add_1reg(0xE8, dreg), val);
636 EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
639 /* dst = dst | val */
642 EMIT3(0x83, add_1reg(0xC8, dreg), val);
644 EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
646 /* dst = dst & val */
649 EMIT3(0x83, add_1reg(0xE0, dreg), val);
651 EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
653 /* dst = dst ^ val */
656 EMIT3(0x83, add_1reg(0xF0, dreg), val);
658 EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
661 EMIT2(0xF7, add_1reg(0xD8, dreg));
666 /* mov dword ptr [ebp+off],dreg */
667 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
672 /* ALU operation (64 bit) */
673 static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
674 const u8 dst[], const u32 val,
675 bool dstk, u8 **pprog)
680 if (is64 && (val & (1<<31)))
683 emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
685 emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
687 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
692 /* dst = ~dst (64 bit) */
693 static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
697 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
698 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
701 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
703 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
708 EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
709 /* sub dreg_lo,ecx */
710 EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX));
711 /* mov dreg_lo,ecx */
712 EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
715 EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
716 /* sbb dreg_hi,ecx */
717 EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX));
718 /* mov dreg_hi,ecx */
719 EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX));
722 /* mov dword ptr [ebp+off],dreg_lo */
723 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
725 /* mov dword ptr [ebp+off],dreg_hi */
726 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
732 /* dst = dst << src */
733 static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
734 bool dstk, bool sstk, u8 **pprog)
738 static int jmp_label1 = -1;
739 static int jmp_label2 = -1;
740 static int jmp_label3 = -1;
741 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
742 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
745 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
747 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
752 /* mov ecx,dword ptr [ebp+off] */
753 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
757 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
760 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
761 /* Jumps when >= 32 */
762 if (is_imm8(jmp_label(jmp_label1, 2)))
763 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
765 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
769 EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
770 /* mov ebx,dreg_lo */
771 EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
773 EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
775 /* IA32_ECX = -IA32_ECX + 32 */
777 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
779 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
782 EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
784 EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
787 if (is_imm8(jmp_label(jmp_label3, 2)))
788 EMIT2(0xEB, jmp_label(jmp_label3, 2));
790 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
793 if (jmp_label1 == -1)
797 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
798 /* Jumps when >= 64 */
799 if (is_imm8(jmp_label(jmp_label2, 2)))
800 EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
802 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
806 EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
808 EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
809 /* mov dreg_hi,dreg_lo */
810 EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
812 /* xor dreg_lo,dreg_lo */
813 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
816 if (is_imm8(jmp_label(jmp_label3, 2)))
817 EMIT2(0xEB, jmp_label(jmp_label3, 2));
819 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
822 if (jmp_label2 == -1)
824 /* xor dreg_lo,dreg_lo */
825 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
826 /* xor dreg_hi,dreg_hi */
827 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
829 if (jmp_label3 == -1)
833 /* mov dword ptr [ebp+off],dreg_lo */
834 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
836 /* mov dword ptr [ebp+off],dreg_hi */
837 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
844 /* dst = dst >> src (signed)*/
845 static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
846 bool dstk, bool sstk, u8 **pprog)
850 static int jmp_label1 = -1;
851 static int jmp_label2 = -1;
852 static int jmp_label3 = -1;
853 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
854 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
857 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
859 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
864 /* mov ecx,dword ptr [ebp+off] */
865 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
869 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
872 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
873 /* Jumps when >= 32 */
874 if (is_imm8(jmp_label(jmp_label1, 2)))
875 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
877 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
880 /* lshr dreg_lo,cl */
881 EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
882 /* mov ebx,dreg_hi */
883 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
884 /* ashr dreg_hi,cl */
885 EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
887 /* IA32_ECX = -IA32_ECX + 32 */
889 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
891 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
894 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
896 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
899 if (is_imm8(jmp_label(jmp_label3, 2)))
900 EMIT2(0xEB, jmp_label(jmp_label3, 2));
902 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
905 if (jmp_label1 == -1)
909 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
910 /* Jumps when >= 64 */
911 if (is_imm8(jmp_label(jmp_label2, 2)))
912 EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
914 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
918 EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
919 /* ashr dreg_hi,cl */
920 EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
921 /* mov dreg_lo,dreg_hi */
922 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
924 /* ashr dreg_hi,imm8 */
925 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
928 if (is_imm8(jmp_label(jmp_label3, 2)))
929 EMIT2(0xEB, jmp_label(jmp_label3, 2));
931 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
934 if (jmp_label2 == -1)
936 /* ashr dreg_hi,imm8 */
937 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
938 /* mov dreg_lo,dreg_hi */
939 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
941 if (jmp_label3 == -1)
945 /* mov dword ptr [ebp+off],dreg_lo */
946 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
948 /* mov dword ptr [ebp+off],dreg_hi */
949 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
956 /* dst = dst >> src */
957 static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
958 bool sstk, u8 **pprog)
962 static int jmp_label1 = -1;
963 static int jmp_label2 = -1;
964 static int jmp_label3 = -1;
965 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
966 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
969 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
971 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
976 /* mov ecx,dword ptr [ebp+off] */
977 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
981 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
984 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
985 /* Jumps when >= 32 */
986 if (is_imm8(jmp_label(jmp_label1, 2)))
987 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
989 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
992 /* lshr dreg_lo,cl */
993 EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
994 /* mov ebx,dreg_hi */
995 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
997 EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
999 /* IA32_ECX = -IA32_ECX + 32 */
1001 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1003 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1006 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1007 /* or dreg_lo,ebx */
1008 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1011 if (is_imm8(jmp_label(jmp_label3, 2)))
1012 EMIT2(0xEB, jmp_label(jmp_label3, 2));
1014 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1017 if (jmp_label1 == -1)
1020 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
1021 /* Jumps when >= 64 */
1022 if (is_imm8(jmp_label(jmp_label2, 2)))
1023 EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
1025 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
1029 EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
1030 /* shr dreg_hi,cl */
1031 EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
1032 /* mov dreg_lo,dreg_hi */
1033 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1034 /* xor dreg_hi,dreg_hi */
1035 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1038 if (is_imm8(jmp_label(jmp_label3, 2)))
1039 EMIT2(0xEB, jmp_label(jmp_label3, 2));
1041 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1044 if (jmp_label2 == -1)
1046 /* xor dreg_lo,dreg_lo */
1047 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1048 /* xor dreg_hi,dreg_hi */
1049 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1051 if (jmp_label3 == -1)
1055 /* mov dword ptr [ebp+off],dreg_lo */
1056 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1058 /* mov dword ptr [ebp+off],dreg_hi */
1059 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1066 /* dst = dst << val */
1067 static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
1068 bool dstk, u8 **pprog)
1072 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1073 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1076 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1078 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1081 /* Do LSH operation */
1083 /* shl dreg_hi,imm8 */
1084 EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
1085 /* mov ebx,dreg_lo */
1086 EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
1087 /* shl dreg_lo,imm8 */
1088 EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
1090 /* IA32_ECX = 32 - val */
1094 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1096 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1098 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1101 EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
1102 /* or dreg_hi,ebx */
1103 EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
1104 } else if (val >= 32 && val < 64) {
1105 u32 value = val - 32;
1107 /* shl dreg_lo,imm8 */
1108 EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
1109 /* mov dreg_hi,dreg_lo */
1110 EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
1111 /* xor dreg_lo,dreg_lo */
1112 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1114 /* xor dreg_lo,dreg_lo */
1115 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1116 /* xor dreg_hi,dreg_hi */
1117 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1121 /* mov dword ptr [ebp+off],dreg_lo */
1122 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1124 /* mov dword ptr [ebp+off],dreg_hi */
1125 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1131 /* dst = dst >> val */
1132 static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
1133 bool dstk, u8 **pprog)
1137 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1138 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1141 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1143 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1147 /* Do RSH operation */
1149 /* shr dreg_lo,imm8 */
1150 EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1151 /* mov ebx,dreg_hi */
1152 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1153 /* shr dreg_hi,imm8 */
1154 EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
1156 /* IA32_ECX = 32 - val */
1160 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1162 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1164 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1167 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1168 /* or dreg_lo,ebx */
1169 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1170 } else if (val >= 32 && val < 64) {
1171 u32 value = val - 32;
1173 /* shr dreg_hi,imm8 */
1174 EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
1175 /* mov dreg_lo,dreg_hi */
1176 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1177 /* xor dreg_hi,dreg_hi */
1178 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1180 /* xor dreg_lo,dreg_lo */
1181 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1182 /* xor dreg_hi,dreg_hi */
1183 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1187 /* mov dword ptr [ebp+off],dreg_lo */
1188 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1190 /* mov dword ptr [ebp+off],dreg_hi */
1191 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1197 /* dst = dst >> val (signed) */
1198 static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
1199 bool dstk, u8 **pprog)
1203 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1204 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1207 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1209 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1212 /* Do RSH operation */
1214 /* shr dreg_lo,imm8 */
1215 EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1216 /* mov ebx,dreg_hi */
1217 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1218 /* ashr dreg_hi,imm8 */
1219 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
1221 /* IA32_ECX = 32 - val */
1225 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1227 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1229 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1232 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1233 /* or dreg_lo,ebx */
1234 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1235 } else if (val >= 32 && val < 64) {
1236 u32 value = val - 32;
1238 /* ashr dreg_hi,imm8 */
1239 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
1240 /* mov dreg_lo,dreg_hi */
1241 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1243 /* ashr dreg_hi,imm8 */
1244 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1246 /* ashr dreg_hi,imm8 */
1247 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1248 /* mov dreg_lo,dreg_hi */
1249 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1253 /* mov dword ptr [ebp+off],dreg_lo */
1254 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1256 /* mov dword ptr [ebp+off],dreg_hi */
1257 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1263 static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
1264 bool sstk, u8 **pprog)
1270 /* mov eax,dword ptr [ebp+off] */
1271 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1274 /* mov eax,dst_hi */
1275 EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
1278 /* mul dword ptr [ebp+off] */
1279 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1282 EMIT2(0xF7, add_1reg(0xE0, src_lo));
1285 EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1288 /* mov eax,dword ptr [ebp+off] */
1289 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1292 /* mov eax,dst_lo */
1293 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1296 /* mul dword ptr [ebp+off] */
1297 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
1300 EMIT2(0xF7, add_1reg(0xE0, src_hi));
1303 EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1306 /* mov eax,dword ptr [ebp+off] */
1307 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1310 /* mov eax,dst_lo */
1311 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1314 /* mul dword ptr [ebp+off] */
1315 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1318 EMIT2(0xF7, add_1reg(0xE0, src_lo));
1321 EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1324 /* mov dword ptr [ebp+off],eax */
1325 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1327 /* mov dword ptr [ebp+off],ecx */
1328 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1331 /* mov dst_lo,eax */
1332 EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1333 /* mov dst_hi,ecx */
1334 EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1340 static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
1341 bool dstk, u8 **pprog)
1347 hi = val & (1<<31) ? (u32)~0 : 0;
1348 /* movl eax,imm32 */
1349 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1351 /* mul dword ptr [ebp+off] */
1352 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
1355 EMIT2(0xF7, add_1reg(0xE0, dst_hi));
1358 EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1360 /* movl eax,imm32 */
1361 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
1363 /* mul dword ptr [ebp+off] */
1364 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1367 EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1369 EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1371 /* movl eax,imm32 */
1372 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1374 /* mul dword ptr [ebp+off] */
1375 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1378 EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1381 EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1384 /* mov dword ptr [ebp+off],eax */
1385 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1387 /* mov dword ptr [ebp+off],ecx */
1388 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1391 /* mov dword ptr [ebp+off],eax */
1392 EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1393 /* mov dword ptr [ebp+off],ecx */
1394 EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1400 static int bpf_size_to_x86_bytes(int bpf_size)
1402 if (bpf_size == BPF_W)
1404 else if (bpf_size == BPF_H)
1406 else if (bpf_size == BPF_B)
1408 else if (bpf_size == BPF_DW)
1409 return 4; /* imm32 */
1414 struct jit_context {
1415 int cleanup_addr; /* Epilogue code offset */
1418 /* Maximum number of bytes emitted while JITing one eBPF insn */
1419 #define BPF_MAX_INSN_SIZE 128
1420 #define BPF_INSN_SAFETY 64
1422 #define PROLOGUE_SIZE 35
1425 * Emit prologue code for BPF program and check it's size.
1426 * bpf_tail_call helper will skip it while jumping into another program.
1428 static void emit_prologue(u8 **pprog, u32 stack_depth)
1432 const u8 *r1 = bpf2ia32[BPF_REG_1];
1433 const u8 fplo = bpf2ia32[BPF_REG_FP][0];
1434 const u8 fphi = bpf2ia32[BPF_REG_FP][1];
1435 const u8 *tcc = bpf2ia32[TCALL_CNT];
1448 /* sub esp,STACK_SIZE */
1449 EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1450 /* sub ebp,SCRATCH_SIZE+4+12*/
1451 EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16);
1453 EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1455 /* Set up BPF prog stack base register */
1456 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
1457 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
1459 /* Move BPF_CTX (EAX) to BPF_REG_R1 */
1460 /* mov dword ptr [ebp+off],eax */
1461 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1462 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
1464 /* Initialize Tail Count */
1465 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
1466 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1468 BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
1472 /* Emit epilogue code for BPF program */
1473 static void emit_epilogue(u8 **pprog, u32 stack_depth)
1476 const u8 *r0 = bpf2ia32[BPF_REG_0];
1479 /* mov eax,dword ptr [ebp+off]*/
1480 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
1481 /* mov edx,dword ptr [ebp+off]*/
1482 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1484 /* add ebp,SCRATCH_SIZE+4+12*/
1485 EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16);
1487 /* mov ebx,dword ptr [ebp-12]*/
1488 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
1489 /* mov esi,dword ptr [ebp-8]*/
1490 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
1491 /* mov edi,dword ptr [ebp-4]*/
1492 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
1494 EMIT1(0xC9); /* leave */
1495 EMIT1(0xC3); /* ret */
1500 * Generate the following code:
1501 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
1502 * if (index >= array->map.max_entries)
1504 * if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
1506 * prog = array->ptrs[index];
1509 * goto *(prog->bpf_func + prologue_size);
1512 static void emit_bpf_tail_call(u8 **pprog)
1516 const u8 *r1 = bpf2ia32[BPF_REG_1];
1517 const u8 *r2 = bpf2ia32[BPF_REG_2];
1518 const u8 *r3 = bpf2ia32[BPF_REG_3];
1519 const u8 *tcc = bpf2ia32[TCALL_CNT];
1521 static int jmp_label1 = -1;
1524 * if (index >= array->map.max_entries)
1527 /* mov eax,dword ptr [ebp+off] */
1528 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
1529 /* mov edx,dword ptr [ebp+off] */
1530 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
1532 /* cmp dword ptr [eax+off],edx */
1533 EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
1534 offsetof(struct bpf_array, map.max_entries));
1536 EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
1539 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
1542 lo = (u32)MAX_TAIL_CALL_CNT;
1543 hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1544 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1545 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1548 EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
1551 EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
1554 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
1557 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
1559 EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
1561 /* mov dword ptr [ebp+off],eax */
1562 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1563 /* mov dword ptr [ebp+off],edx */
1564 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1566 /* prog = array->ptrs[index]; */
1567 /* mov edx, [eax + edx * 4 + offsetof(...)] */
1568 EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
1575 EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
1577 EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
1579 /* goto *(prog->bpf_func + prologue_size); */
1580 /* mov edx, dword ptr [edx + 32] */
1581 EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
1582 offsetof(struct bpf_prog, bpf_func));
1583 /* add edx,prologue_size */
1584 EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
1586 /* mov eax,dword ptr [ebp+off] */
1587 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1590 * Now we're ready to jump into next BPF program:
1591 * eax == ctx (1st arg)
1592 * edx == prog->bpf_func + prologue_size
1594 RETPOLINE_EDX_BPF_JIT();
1596 if (jmp_label1 == -1)
1603 /* Push the scratch stack register on top of the stack. */
1604 static inline void emit_push_r64(const u8 src[], u8 **pprog)
1609 /* mov ecx,dword ptr [ebp+off] */
1610 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
1614 /* mov ecx,dword ptr [ebp+off] */
1615 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1622 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1623 int oldproglen, struct jit_context *ctx)
1625 struct bpf_insn *insn = bpf_prog->insnsi;
1626 int insn_cnt = bpf_prog->len;
1627 bool seen_exit = false;
1628 u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
1633 emit_prologue(&prog, bpf_prog->aux->stack_depth);
1635 for (i = 0; i < insn_cnt; i++, insn++) {
1636 const s32 imm32 = insn->imm;
1637 const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1638 const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
1639 const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
1640 const u8 code = insn->code;
1641 const u8 *dst = bpf2ia32[insn->dst_reg];
1642 const u8 *src = bpf2ia32[insn->src_reg];
1643 const u8 *r0 = bpf2ia32[BPF_REG_0];
1650 /* ALU operations */
1652 case BPF_ALU | BPF_MOV | BPF_K:
1653 case BPF_ALU | BPF_MOV | BPF_X:
1654 case BPF_ALU64 | BPF_MOV | BPF_K:
1655 case BPF_ALU64 | BPF_MOV | BPF_X:
1656 switch (BPF_SRC(code)) {
1658 emit_ia32_mov_r64(is64, dst, src, dstk,
1662 /* Sign-extend immediate value to dst reg */
1663 emit_ia32_mov_i64(is64, dst, imm32,
1668 /* dst = dst + src/imm */
1669 /* dst = dst - src/imm */
1670 /* dst = dst | src/imm */
1671 /* dst = dst & src/imm */
1672 /* dst = dst ^ src/imm */
1673 /* dst = dst * src/imm */
1674 /* dst = dst << src */
1675 /* dst = dst >> src */
1676 case BPF_ALU | BPF_ADD | BPF_K:
1677 case BPF_ALU | BPF_ADD | BPF_X:
1678 case BPF_ALU | BPF_SUB | BPF_K:
1679 case BPF_ALU | BPF_SUB | BPF_X:
1680 case BPF_ALU | BPF_OR | BPF_K:
1681 case BPF_ALU | BPF_OR | BPF_X:
1682 case BPF_ALU | BPF_AND | BPF_K:
1683 case BPF_ALU | BPF_AND | BPF_X:
1684 case BPF_ALU | BPF_XOR | BPF_K:
1685 case BPF_ALU | BPF_XOR | BPF_X:
1686 case BPF_ALU64 | BPF_ADD | BPF_K:
1687 case BPF_ALU64 | BPF_ADD | BPF_X:
1688 case BPF_ALU64 | BPF_SUB | BPF_K:
1689 case BPF_ALU64 | BPF_SUB | BPF_X:
1690 case BPF_ALU64 | BPF_OR | BPF_K:
1691 case BPF_ALU64 | BPF_OR | BPF_X:
1692 case BPF_ALU64 | BPF_AND | BPF_K:
1693 case BPF_ALU64 | BPF_AND | BPF_X:
1694 case BPF_ALU64 | BPF_XOR | BPF_K:
1695 case BPF_ALU64 | BPF_XOR | BPF_X:
1696 switch (BPF_SRC(code)) {
1698 emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1699 src, dstk, sstk, &prog);
1702 emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1703 imm32, dstk, &prog);
1707 case BPF_ALU | BPF_MUL | BPF_K:
1708 case BPF_ALU | BPF_MUL | BPF_X:
1709 switch (BPF_SRC(code)) {
1711 emit_ia32_mul_r(dst_lo, src_lo, dstk,
1716 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1718 emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
1722 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1724 case BPF_ALU | BPF_LSH | BPF_X:
1725 case BPF_ALU | BPF_RSH | BPF_X:
1726 case BPF_ALU | BPF_ARSH | BPF_K:
1727 case BPF_ALU | BPF_ARSH | BPF_X:
1728 switch (BPF_SRC(code)) {
1730 emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
1735 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1737 emit_ia32_shift_r(BPF_OP(code), dst_lo,
1738 IA32_ECX, dstk, false,
1742 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1744 /* dst = dst / src(imm) */
1745 /* dst = dst % src(imm) */
1746 case BPF_ALU | BPF_DIV | BPF_K:
1747 case BPF_ALU | BPF_DIV | BPF_X:
1748 case BPF_ALU | BPF_MOD | BPF_K:
1749 case BPF_ALU | BPF_MOD | BPF_X:
1750 switch (BPF_SRC(code)) {
1752 emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1753 src_lo, dstk, sstk, &prog);
1757 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1759 emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1760 IA32_ECX, dstk, false,
1764 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1766 case BPF_ALU64 | BPF_DIV | BPF_K:
1767 case BPF_ALU64 | BPF_DIV | BPF_X:
1768 case BPF_ALU64 | BPF_MOD | BPF_K:
1769 case BPF_ALU64 | BPF_MOD | BPF_X:
1771 /* dst = dst >> imm */
1772 /* dst = dst << imm */
1773 case BPF_ALU | BPF_RSH | BPF_K:
1774 case BPF_ALU | BPF_LSH | BPF_K:
1775 if (unlikely(imm32 > 31))
1778 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1779 emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1781 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1783 /* dst = dst << imm */
1784 case BPF_ALU64 | BPF_LSH | BPF_K:
1785 if (unlikely(imm32 > 63))
1787 emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
1789 /* dst = dst >> imm */
1790 case BPF_ALU64 | BPF_RSH | BPF_K:
1791 if (unlikely(imm32 > 63))
1793 emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
1795 /* dst = dst << src */
1796 case BPF_ALU64 | BPF_LSH | BPF_X:
1797 emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
1799 /* dst = dst >> src */
1800 case BPF_ALU64 | BPF_RSH | BPF_X:
1801 emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
1803 /* dst = dst >> src (signed) */
1804 case BPF_ALU64 | BPF_ARSH | BPF_X:
1805 emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
1807 /* dst = dst >> imm (signed) */
1808 case BPF_ALU64 | BPF_ARSH | BPF_K:
1809 if (unlikely(imm32 > 63))
1811 emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
1814 case BPF_ALU | BPF_NEG:
1815 emit_ia32_alu_i(is64, false, BPF_OP(code),
1816 dst_lo, 0, dstk, &prog);
1817 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1819 /* dst = ~dst (64 bit) */
1820 case BPF_ALU64 | BPF_NEG:
1821 emit_ia32_neg64(dst, dstk, &prog);
1823 /* dst = dst * src/imm */
1824 case BPF_ALU64 | BPF_MUL | BPF_X:
1825 case BPF_ALU64 | BPF_MUL | BPF_K:
1826 switch (BPF_SRC(code)) {
1828 emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
1831 emit_ia32_mul_i64(dst, imm32, dstk, &prog);
1835 /* dst = htole(dst) */
1836 case BPF_ALU | BPF_END | BPF_FROM_LE:
1837 emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
1839 /* dst = htobe(dst) */
1840 case BPF_ALU | BPF_END | BPF_FROM_BE:
1841 emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
1844 case BPF_LD | BPF_IMM | BPF_DW: {
1848 emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
1849 emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
1854 /* ST: *(u8*)(dst_reg + off) = imm */
1855 case BPF_ST | BPF_MEM | BPF_H:
1856 case BPF_ST | BPF_MEM | BPF_B:
1857 case BPF_ST | BPF_MEM | BPF_W:
1858 case BPF_ST | BPF_MEM | BPF_DW:
1860 /* mov eax,dword ptr [ebp+off] */
1861 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1864 /* mov eax,dst_lo */
1865 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1867 switch (BPF_SIZE(code)) {
1869 EMIT(0xC6, 1); break;
1871 EMIT2(0x66, 0xC7); break;
1874 EMIT(0xC7, 1); break;
1877 if (is_imm8(insn->off))
1878 EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
1880 EMIT1_off32(add_1reg(0x80, IA32_EAX),
1882 EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
1884 if (BPF_SIZE(code) == BPF_DW) {
1887 hi = imm32 & (1<<31) ? (u32)~0 : 0;
1888 EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
1894 /* STX: *(u8*)(dst_reg + off) = src_reg */
1895 case BPF_STX | BPF_MEM | BPF_B:
1896 case BPF_STX | BPF_MEM | BPF_H:
1897 case BPF_STX | BPF_MEM | BPF_W:
1898 case BPF_STX | BPF_MEM | BPF_DW:
1900 /* mov eax,dword ptr [ebp+off] */
1901 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1904 /* mov eax,dst_lo */
1905 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1908 /* mov edx,dword ptr [ebp+off] */
1909 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1912 /* mov edx,src_lo */
1913 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
1915 switch (BPF_SIZE(code)) {
1917 EMIT(0x88, 1); break;
1919 EMIT2(0x66, 0x89); break;
1922 EMIT(0x89, 1); break;
1925 if (is_imm8(insn->off))
1926 EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1929 EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1932 if (BPF_SIZE(code) == BPF_DW) {
1934 /* mov edi,dword ptr [ebp+off] */
1935 EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
1939 /* mov edi,src_hi */
1940 EMIT2(0x8B, add_2reg(0xC0, src_hi,
1943 if (is_imm8(insn->off + 4)) {
1944 EMIT2(add_2reg(0x40, IA32_EAX,
1948 EMIT1(add_2reg(0x80, IA32_EAX,
1950 EMIT(insn->off + 4, 4);
1955 /* LDX: dst_reg = *(u8*)(src_reg + off) */
1956 case BPF_LDX | BPF_MEM | BPF_B:
1957 case BPF_LDX | BPF_MEM | BPF_H:
1958 case BPF_LDX | BPF_MEM | BPF_W:
1959 case BPF_LDX | BPF_MEM | BPF_DW:
1961 /* mov eax,dword ptr [ebp+off] */
1962 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1965 /* mov eax,dword ptr [ebp+off] */
1966 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
1968 switch (BPF_SIZE(code)) {
1970 EMIT2(0x0F, 0xB6); break;
1972 EMIT2(0x0F, 0xB7); break;
1975 EMIT(0x8B, 1); break;
1978 if (is_imm8(insn->off))
1979 EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1982 EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1986 /* mov dword ptr [ebp+off],edx */
1987 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1990 /* mov dst_lo,edx */
1991 EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
1992 switch (BPF_SIZE(code)) {
1997 EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
2001 EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
2006 add_2reg(0x80, IA32_EAX, IA32_EDX),
2010 add_2reg(0x40, IA32_EBP,
2015 add_2reg(0xC0, dst_hi, IA32_EDX));
2022 case BPF_JMP | BPF_CALL:
2024 const u8 *r1 = bpf2ia32[BPF_REG_1];
2025 const u8 *r2 = bpf2ia32[BPF_REG_2];
2026 const u8 *r3 = bpf2ia32[BPF_REG_3];
2027 const u8 *r4 = bpf2ia32[BPF_REG_4];
2028 const u8 *r5 = bpf2ia32[BPF_REG_5];
2030 if (insn->src_reg == BPF_PSEUDO_CALL)
2033 func = (u8 *) __bpf_call_base + imm32;
2034 jmp_offset = func - (image + addrs[i]);
2036 if (!imm32 || !is_simm32(jmp_offset)) {
2037 pr_err("unsupported BPF func %d addr %p image %p\n",
2038 imm32, func, image);
2042 /* mov eax,dword ptr [ebp+off] */
2043 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2045 /* mov edx,dword ptr [ebp+off] */
2046 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2049 emit_push_r64(r5, &prog);
2050 emit_push_r64(r4, &prog);
2051 emit_push_r64(r3, &prog);
2052 emit_push_r64(r2, &prog);
2054 EMIT1_off32(0xE8, jmp_offset + 9);
2056 /* mov dword ptr [ebp+off],eax */
2057 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
2059 /* mov dword ptr [ebp+off],edx */
2060 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2064 EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
2067 case BPF_JMP | BPF_TAIL_CALL:
2068 emit_bpf_tail_call(&prog);
2072 case BPF_JMP | BPF_JEQ | BPF_X:
2073 case BPF_JMP | BPF_JNE | BPF_X:
2074 case BPF_JMP | BPF_JGT | BPF_X:
2075 case BPF_JMP | BPF_JLT | BPF_X:
2076 case BPF_JMP | BPF_JGE | BPF_X:
2077 case BPF_JMP | BPF_JLE | BPF_X:
2078 case BPF_JMP | BPF_JSGT | BPF_X:
2079 case BPF_JMP | BPF_JSLE | BPF_X:
2080 case BPF_JMP | BPF_JSLT | BPF_X:
2081 case BPF_JMP | BPF_JSGE | BPF_X: {
2082 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2083 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2084 u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2085 u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2088 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2090 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2095 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2097 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
2101 /* cmp dreg_hi,sreg_hi */
2102 EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2104 /* cmp dreg_lo,sreg_lo */
2105 EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2108 case BPF_JMP | BPF_JSET | BPF_X: {
2109 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2110 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2111 u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2112 u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2115 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2117 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2122 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2124 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
2127 /* and dreg_lo,sreg_lo */
2128 EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2129 /* and dreg_hi,sreg_hi */
2130 EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2131 /* or dreg_lo,dreg_hi */
2132 EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2135 case BPF_JMP | BPF_JSET | BPF_K: {
2137 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2138 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2139 u8 sreg_lo = IA32_ECX;
2140 u8 sreg_hi = IA32_EBX;
2143 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2145 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2148 hi = imm32 & (1<<31) ? (u32)~0 : 0;
2151 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2153 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2155 /* and dreg_lo,sreg_lo */
2156 EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2157 /* and dreg_hi,sreg_hi */
2158 EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2159 /* or dreg_lo,dreg_hi */
2160 EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2163 case BPF_JMP | BPF_JEQ | BPF_K:
2164 case BPF_JMP | BPF_JNE | BPF_K:
2165 case BPF_JMP | BPF_JGT | BPF_K:
2166 case BPF_JMP | BPF_JLT | BPF_K:
2167 case BPF_JMP | BPF_JGE | BPF_K:
2168 case BPF_JMP | BPF_JLE | BPF_K:
2169 case BPF_JMP | BPF_JSGT | BPF_K:
2170 case BPF_JMP | BPF_JSLE | BPF_K:
2171 case BPF_JMP | BPF_JSLT | BPF_K:
2172 case BPF_JMP | BPF_JSGE | BPF_K: {
2174 u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2175 u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2176 u8 sreg_lo = IA32_ECX;
2177 u8 sreg_hi = IA32_EBX;
2180 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2182 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2186 hi = imm32 & (1<<31) ? (u32)~0 : 0;
2188 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2190 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2192 /* cmp dreg_hi,sreg_hi */
2193 EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2195 /* cmp dreg_lo,sreg_lo */
2196 EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2198 emit_cond_jmp: /* Convert BPF opcode to x86 */
2199 switch (BPF_OP(code)) {
2205 jmp_cond = IA32_JNE;
2208 /* GT is unsigned '>', JA in x86 */
2212 /* LT is unsigned '<', JB in x86 */
2216 /* GE is unsigned '>=', JAE in x86 */
2217 jmp_cond = IA32_JAE;
2220 /* LE is unsigned '<=', JBE in x86 */
2221 jmp_cond = IA32_JBE;
2224 /* Signed '>', GT in x86 */
2228 /* Signed '<', LT in x86 */
2232 /* Signed '>=', GE in x86 */
2233 jmp_cond = IA32_JGE;
2236 /* Signed '<=', LE in x86 */
2237 jmp_cond = IA32_JLE;
2239 default: /* to silence GCC warning */
2242 jmp_offset = addrs[i + insn->off] - addrs[i];
2243 if (is_imm8(jmp_offset)) {
2244 EMIT2(jmp_cond, jmp_offset);
2245 } else if (is_simm32(jmp_offset)) {
2246 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2248 pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2254 case BPF_JMP | BPF_JA:
2255 if (insn->off == -1)
2256 /* -1 jmp instructions will always jump
2257 * backwards two bytes. Explicitly handling
2258 * this case avoids wasting too many passes
2259 * when there are long sequences of replaced
2264 jmp_offset = addrs[i + insn->off] - addrs[i];
2267 /* Optimize out nop jumps */
2270 if (is_imm8(jmp_offset)) {
2271 EMIT2(0xEB, jmp_offset);
2272 } else if (is_simm32(jmp_offset)) {
2273 EMIT1_off32(0xE9, jmp_offset);
2275 pr_err("jmp gen bug %llx\n", jmp_offset);
2280 case BPF_LD | BPF_ABS | BPF_W:
2281 case BPF_LD | BPF_ABS | BPF_H:
2282 case BPF_LD | BPF_ABS | BPF_B:
2283 case BPF_LD | BPF_IND | BPF_W:
2284 case BPF_LD | BPF_IND | BPF_H:
2285 case BPF_LD | BPF_IND | BPF_B:
2288 const u8 *r6 = bpf2ia32[BPF_REG_6];
2290 /* Setting up first argument */
2291 /* mov eax,dword ptr [ebp+off] */
2292 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2295 /* Setting up second argument */
2296 if (BPF_MODE(code) == BPF_ABS) {
2297 /* mov %edx, imm32 */
2298 EMIT1_off32(0xBA, imm32);
2301 /* mov edx,dword ptr [ebp+off] */
2302 EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
2306 /* mov edx,src_lo */
2307 EMIT2(0x8B, add_2reg(0xC0, src_lo,
2312 EMIT3(0x83, 0xC2, imm32);
2314 /* add %edx,imm32 */
2315 EMIT2_off32(0x81, 0xC2, imm32);
2319 /* Setting up third argument */
2320 switch (BPF_SIZE(code)) {
2336 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
2339 EMIT2(0x8B, add_2reg(0xC0, IA32_EBP, IA32_EBX));
2341 EMIT3(0x83, add_1reg(0xC0, IA32_EBX), SKB_BUFFER);
2345 /* Setting up function pointer to call */
2347 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX),
2348 (unsigned int)bpf_load_pointer);
2350 EMIT2(0xFF, add_1reg(0xD0, IA32_EBX));
2352 EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 4);
2354 EMIT2(0x33, add_2reg(0xC0, IA32_EDX, IA32_EDX));
2356 /* mov dword ptr [ebp+off],eax */
2357 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2359 /* mov dword ptr [ebp+off],edx */
2360 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2364 * Check if return address is NULL or not.
2365 * If NULL then jump to epilogue else continue
2366 * to load the value from retn address
2368 EMIT3(0x83, add_1reg(0xF8, IA32_EAX), 0);
2369 jmp_offset = ctx->cleanup_addr - addrs[i];
2371 switch (BPF_SIZE(code)) {
2383 EMIT2_off32(0x0F, IA32_JE + 0x10, jmp_offset);
2384 /* Load value from the address */
2385 switch (BPF_SIZE(code)) {
2389 /* Emit 'bswap eax' */
2390 EMIT2(0x0F, add_1reg(0xC8, IA32_EAX));
2393 EMIT3(0x0F, 0xB7, 0x0);
2395 EMIT3(0xC1, add_1reg(0xC8, IA32_EAX), 8);
2398 EMIT3(0x0F, 0xB6, 0x0);
2402 /* mov dword ptr [ebp+off],eax */
2403 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
2407 /* STX XADD: lock *(u32 *)(dst + off) += src */
2408 case BPF_STX | BPF_XADD | BPF_W:
2409 /* STX XADD: lock *(u64 *)(dst + off) += src */
2410 case BPF_STX | BPF_XADD | BPF_DW:
2412 case BPF_JMP | BPF_EXIT:
2414 jmp_offset = ctx->cleanup_addr - addrs[i];
2418 /* Update cleanup_addr */
2419 ctx->cleanup_addr = proglen;
2420 emit_epilogue(&prog, bpf_prog->aux->stack_depth);
2423 pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2427 * This error will be seen if new instruction was added
2428 * to interpreter, but not to JIT or if there is junk in
2431 pr_err("bpf_jit: unknown opcode %02x\n", code);
2436 if (ilen > BPF_MAX_INSN_SIZE) {
2437 pr_err("bpf_jit: fatal insn size error\n");
2442 if (unlikely(proglen + ilen > oldproglen)) {
2443 pr_err("bpf_jit: fatal error\n");
2446 memcpy(image + proglen, temp, ilen);
2455 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2457 struct bpf_binary_header *header = NULL;
2458 struct bpf_prog *tmp, *orig_prog = prog;
2459 int proglen, oldproglen = 0;
2460 struct jit_context ctx = {};
2461 bool tmp_blinded = false;
2467 if (!prog->jit_requested)
2470 tmp = bpf_jit_blind_constants(prog);
2472 * If blinding was requested and we failed during blinding,
2473 * we must fall back to the interpreter.
2482 addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
2489 * Before first pass, make a rough estimation of addrs[]
2490 * each BPF instruction is translated to less than 64 bytes
2492 for (proglen = 0, i = 0; i < prog->len; i++) {
2496 ctx.cleanup_addr = proglen;
2499 * JITed image shrinks with every pass and the loop iterates
2500 * until the image stops shrinking. Very large BPF programs
2501 * may converge on the last pass. In such case do one more
2502 * pass to emit the final image.
2504 for (pass = 0; pass < 20 || image; pass++) {
2505 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
2510 bpf_jit_binary_free(header);
2515 if (proglen != oldproglen) {
2516 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2517 proglen, oldproglen);
2522 if (proglen == oldproglen) {
2523 header = bpf_jit_binary_alloc(proglen, &image,
2530 oldproglen = proglen;
2534 if (bpf_jit_enable > 1)
2535 bpf_jit_dump(prog->len, proglen, pass + 1, image);
2538 bpf_jit_binary_lock_ro(header);
2539 prog->bpf_func = (void *)image;
2541 prog->jited_len = proglen;
2550 bpf_jit_prog_release_other(prog, prog == orig_prog ?