bpf, x86_32: add eBPF JIT compiler for ia32
[linux-2.6-block.git] / arch / x86 / net / bpf_jit_comp32.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
4  *
5  * Author: Wang YanQing (udknight@gmail.com)
6  * The code based on code and ideas from:
7  * Eric Dumazet (eric.dumazet@gmail.com)
8  * and from:
9  * Shubham Bansal <illusionist.neo@gmail.com>
10  */
11
12 #include <linux/netdevice.h>
13 #include <linux/filter.h>
14 #include <linux/if_vlan.h>
15 #include <asm/cacheflush.h>
16 #include <asm/set_memory.h>
17 #include <asm/nospec-branch.h>
18 #include <linux/bpf.h>
19
20 /*
21  * eBPF prog stack layout:
22  *
23  *                         high
24  * original ESP =>        +-----+
25  *                        |     | callee saved registers
26  *                        +-----+
27  *                        | ... | eBPF JIT scratch space
28  * BPF_FP,IA32_EBP  =>    +-----+
29  *                        | ... | eBPF prog stack
30  *                        +-----+
31  *                        |RSVD | JIT scratchpad
32  * current ESP =>         +-----+
33  *                        |     |
34  *                        | ... | Function call stack
35  *                        |     |
36  *                        +-----+
37  *                          low
38  *
39  * The callee saved registers:
40  *
41  *                                high
42  * original ESP =>        +------------------+ \
43  *                        |        ebp       | |
44  * current EBP =>         +------------------+ } callee saved registers
45  *                        |    ebx,esi,edi   | |
46  *                        +------------------+ /
47  *                                low
48  */
49
50 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
51 {
52         if (len == 1)
53                 *ptr = bytes;
54         else if (len == 2)
55                 *(u16 *)ptr = bytes;
56         else {
57                 *(u32 *)ptr = bytes;
58                 barrier();
59         }
60         return ptr + len;
61 }
62
63 #define EMIT(bytes, len) \
64         do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
65
66 #define EMIT1(b1)               EMIT(b1, 1)
67 #define EMIT2(b1, b2)           EMIT((b1) + ((b2) << 8), 2)
68 #define EMIT3(b1, b2, b3)       EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
69 #define EMIT4(b1, b2, b3, b4)   \
70         EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
71
72 #define EMIT1_off32(b1, off) \
73         do { EMIT1(b1); EMIT(off, 4); } while (0)
74 #define EMIT2_off32(b1, b2, off) \
75         do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
76 #define EMIT3_off32(b1, b2, b3, off) \
77         do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
78 #define EMIT4_off32(b1, b2, b3, b4, off) \
79         do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
80
81 #define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
82
83 static bool is_imm8(int value)
84 {
85         return value <= 127 && value >= -128;
86 }
87
88 static bool is_simm32(s64 value)
89 {
90         return value == (s64) (s32) value;
91 }
92
93 #define STACK_OFFSET(k) (k)
94 #define TCALL_CNT       (MAX_BPF_JIT_REG + 0)   /* Tail Call Count */
95
96 #define IA32_EAX        (0x0)
97 #define IA32_EBX        (0x3)
98 #define IA32_ECX        (0x1)
99 #define IA32_EDX        (0x2)
100 #define IA32_ESI        (0x6)
101 #define IA32_EDI        (0x7)
102 #define IA32_EBP        (0x5)
103 #define IA32_ESP        (0x4)
104
105 /*
106  * List of x86 cond jumps opcodes (. + s8)
107  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
108  */
109 #define IA32_JB  0x72
110 #define IA32_JAE 0x73
111 #define IA32_JE  0x74
112 #define IA32_JNE 0x75
113 #define IA32_JBE 0x76
114 #define IA32_JA  0x77
115 #define IA32_JL  0x7C
116 #define IA32_JGE 0x7D
117 #define IA32_JLE 0x7E
118 #define IA32_JG  0x7F
119
120 /*
121  * Map eBPF registers to IA32 32bit registers or stack scratch space.
122  *
123  * 1. All the registers, R0-R10, are mapped to scratch space on stack.
124  * 2. We need two 64 bit temp registers to do complex operations on eBPF
125  *    registers.
126  * 3. For performance reason, the BPF_REG_AX for blinding constant, is
127  *    mapped to real hardware register pair, IA32_ESI and IA32_EDI.
128  *
129  * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
130  * registers, we have to map each eBPF registers with two IA32 32 bit regs
131  * or scratch memory space and we have to build eBPF 64 bit register from those.
132  *
133  * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
134  */
135 static const u8 bpf2ia32[][2] = {
136         /* Return value from in-kernel function, and exit value from eBPF */
137         [BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
138
139         /* The arguments from eBPF program to in-kernel function */
140         /* Stored on stack scratch space */
141         [BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
142         [BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
143         [BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
144         [BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
145         [BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
146
147         /* Callee saved registers that in-kernel function will preserve */
148         /* Stored on stack scratch space */
149         [BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
150         [BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
151         [BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
152         [BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
153
154         /* Read only Frame Pointer to access Stack */
155         [BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
156
157         /* Temporary register for blinding constants. */
158         [BPF_REG_AX] = {IA32_ESI, IA32_EDI},
159
160         /* Tail call count. Stored on stack scratch space. */
161         [TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
162 };
163
164 #define dst_lo  dst[0]
165 #define dst_hi  dst[1]
166 #define src_lo  src[0]
167 #define src_hi  src[1]
168
169 #define STACK_ALIGNMENT 8
170 /*
171  * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
172  * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
173  * BPF_REG_FP, BPF_REG_AX and Tail call counts.
174  */
175 #define SCRATCH_SIZE 96
176
177 /* Total stack size used in JITed code */
178 #define _STACK_SIZE \
179         (stack_depth + \
180          + SCRATCH_SIZE + \
181          + 4 /* Extra space for skb_copy_bits buffer */)
182
183 #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
184
185 /* Get the offset of eBPF REGISTERs stored on scratch space. */
186 #define STACK_VAR(off) (off)
187
188 /* Offset of skb_copy_bits buffer */
189 #define SKB_BUFFER STACK_VAR(SCRATCH_SIZE)
190
191 /* Encode 'dst_reg' register into IA32 opcode 'byte' */
192 static u8 add_1reg(u8 byte, u32 dst_reg)
193 {
194         return byte + dst_reg;
195 }
196
197 /* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
198 static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
199 {
200         return byte + dst_reg + (src_reg << 3);
201 }
202
203 static void jit_fill_hole(void *area, unsigned int size)
204 {
205         /* Fill whole space with int3 instructions */
206         memset(area, 0xcc, size);
207 }
208
209 static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
210                                    u8 **pprog)
211 {
212         u8 *prog = *pprog;
213         int cnt = 0;
214
215         if (dstk) {
216                 if (val == 0) {
217                         /* xor eax,eax */
218                         EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
219                         /* mov dword ptr [ebp+off],eax */
220                         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
221                               STACK_VAR(dst));
222                 } else {
223                         EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
224                                     STACK_VAR(dst), val);
225                 }
226         } else {
227                 if (val == 0)
228                         EMIT2(0x33, add_2reg(0xC0, dst, dst));
229                 else
230                         EMIT2_off32(0xC7, add_1reg(0xC0, dst),
231                                     val);
232         }
233         *pprog = prog;
234 }
235
236 /* dst = imm (4 bytes)*/
237 static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
238                                    bool sstk, u8 **pprog)
239 {
240         u8 *prog = *pprog;
241         int cnt = 0;
242         u8 sreg = sstk ? IA32_EAX : src;
243
244         if (sstk)
245                 /* mov eax,dword ptr [ebp+off] */
246                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
247         if (dstk)
248                 /* mov dword ptr [ebp+off],eax */
249                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
250         else
251                 /* mov dst,sreg */
252                 EMIT2(0x89, add_2reg(0xC0, dst, sreg));
253
254         *pprog = prog;
255 }
256
257 /* dst = src */
258 static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
259                                      const u8 src[], bool dstk,
260                                      bool sstk, u8 **pprog)
261 {
262         emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
263         if (is64)
264                 /* complete 8 byte move */
265                 emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
266         else
267                 /* zero out high 4 bytes */
268                 emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
269 }
270
271 /* Sign extended move */
272 static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
273                                      const u32 val, bool dstk, u8 **pprog)
274 {
275         u32 hi = 0;
276
277         if (is64 && (val & (1<<31)))
278                 hi = (u32)~0;
279         emit_ia32_mov_i(dst_lo, val, dstk, pprog);
280         emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
281 }
282
283 /*
284  * ALU operation (32 bit)
285  * dst = dst * src
286  */
287 static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
288                                    bool sstk, u8 **pprog)
289 {
290         u8 *prog = *pprog;
291         int cnt = 0;
292         u8 sreg = sstk ? IA32_ECX : src;
293
294         if (sstk)
295                 /* mov ecx,dword ptr [ebp+off] */
296                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
297
298         if (dstk)
299                 /* mov eax,dword ptr [ebp+off] */
300                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
301         else
302                 /* mov eax,dst */
303                 EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
304
305
306         EMIT2(0xF7, add_1reg(0xE0, sreg));
307
308         if (dstk)
309                 /* mov dword ptr [ebp+off],eax */
310                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
311                       STACK_VAR(dst));
312         else
313                 /* mov dst,eax */
314                 EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
315
316         *pprog = prog;
317 }
318
319 static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
320                                          bool dstk, u8 **pprog)
321 {
322         u8 *prog = *pprog;
323         int cnt = 0;
324         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
325         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
326
327         if (dstk && val != 64) {
328                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
329                       STACK_VAR(dst_lo));
330                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
331                       STACK_VAR(dst_hi));
332         }
333         switch (val) {
334         case 16:
335                 /*
336                  * Emit 'movzwl eax,ax' to zero extend 16-bit
337                  * into 64 bit
338                  */
339                 EMIT2(0x0F, 0xB7);
340                 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
341                 /* xor dreg_hi,dreg_hi */
342                 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
343                 break;
344         case 32:
345                 /* xor dreg_hi,dreg_hi */
346                 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
347                 break;
348         case 64:
349                 /* nop */
350                 break;
351         }
352
353         if (dstk && val != 64) {
354                 /* mov dword ptr [ebp+off],dreg_lo */
355                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
356                       STACK_VAR(dst_lo));
357                 /* mov dword ptr [ebp+off],dreg_hi */
358                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
359                       STACK_VAR(dst_hi));
360         }
361         *pprog = prog;
362 }
363
364 static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
365                                        bool dstk, u8 **pprog)
366 {
367         u8 *prog = *pprog;
368         int cnt = 0;
369         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
370         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
371
372         if (dstk) {
373                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
374                       STACK_VAR(dst_lo));
375                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
376                       STACK_VAR(dst_hi));
377         }
378         switch (val) {
379         case 16:
380                 /* Emit 'ror %ax, 8' to swap lower 2 bytes */
381                 EMIT1(0x66);
382                 EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
383
384                 EMIT2(0x0F, 0xB7);
385                 EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
386
387                 /* xor dreg_hi,dreg_hi */
388                 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
389                 break;
390         case 32:
391                 /* Emit 'bswap eax' to swap lower 4 bytes */
392                 EMIT1(0x0F);
393                 EMIT1(add_1reg(0xC8, dreg_lo));
394
395                 /* xor dreg_hi,dreg_hi */
396                 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
397                 break;
398         case 64:
399                 /* Emit 'bswap eax' to swap lower 4 bytes */
400                 EMIT1(0x0F);
401                 EMIT1(add_1reg(0xC8, dreg_lo));
402
403                 /* Emit 'bswap edx' to swap lower 4 bytes */
404                 EMIT1(0x0F);
405                 EMIT1(add_1reg(0xC8, dreg_hi));
406
407                 /* mov ecx,dreg_hi */
408                 EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
409                 /* mov dreg_hi,dreg_lo */
410                 EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
411                 /* mov dreg_lo,ecx */
412                 EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
413
414                 break;
415         }
416         if (dstk) {
417                 /* mov dword ptr [ebp+off],dreg_lo */
418                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
419                       STACK_VAR(dst_lo));
420                 /* mov dword ptr [ebp+off],dreg_hi */
421                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
422                       STACK_VAR(dst_hi));
423         }
424         *pprog = prog;
425 }
426
427 /*
428  * ALU operation (32 bit)
429  * dst = dst (div|mod) src
430  */
431 static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
432                                        bool dstk, bool sstk, u8 **pprog)
433 {
434         u8 *prog = *pprog;
435         int cnt = 0;
436
437         if (sstk)
438                 /* mov ecx,dword ptr [ebp+off] */
439                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
440                       STACK_VAR(src));
441         else if (src != IA32_ECX)
442                 /* mov ecx,src */
443                 EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
444
445         if (dstk)
446                 /* mov eax,dword ptr [ebp+off] */
447                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
448                       STACK_VAR(dst));
449         else
450                 /* mov eax,dst */
451                 EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
452
453         /* xor edx,edx */
454         EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
455         /* div ecx */
456         EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
457
458         if (op == BPF_MOD) {
459                 if (dstk)
460                         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
461                               STACK_VAR(dst));
462                 else
463                         EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
464         } else {
465                 if (dstk)
466                         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
467                               STACK_VAR(dst));
468                 else
469                         EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
470         }
471         *pprog = prog;
472 }
473
474 /*
475  * ALU operation (32 bit)
476  * dst = dst (shift) src
477  */
478 static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
479                                      bool dstk, bool sstk, u8 **pprog)
480 {
481         u8 *prog = *pprog;
482         int cnt = 0;
483         u8 dreg = dstk ? IA32_EAX : dst;
484         u8 b2;
485
486         if (dstk)
487                 /* mov eax,dword ptr [ebp+off] */
488                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
489
490         if (sstk)
491                 /* mov ecx,dword ptr [ebp+off] */
492                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
493         else if (src != IA32_ECX)
494                 /* mov ecx,src */
495                 EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
496
497         switch (op) {
498         case BPF_LSH:
499                 b2 = 0xE0; break;
500         case BPF_RSH:
501                 b2 = 0xE8; break;
502         case BPF_ARSH:
503                 b2 = 0xF8; break;
504         default:
505                 return;
506         }
507         EMIT2(0xD3, add_1reg(b2, dreg));
508
509         if (dstk)
510                 /* mov dword ptr [ebp+off],dreg */
511                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
512         *pprog = prog;
513 }
514
515 /*
516  * ALU operation (32 bit)
517  * dst = dst (op) src
518  */
519 static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
520                                    const u8 dst, const u8 src, bool dstk,
521                                    bool sstk, u8 **pprog)
522 {
523         u8 *prog = *pprog;
524         int cnt = 0;
525         u8 sreg = sstk ? IA32_EAX : src;
526         u8 dreg = dstk ? IA32_EDX : dst;
527
528         if (sstk)
529                 /* mov eax,dword ptr [ebp+off] */
530                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
531
532         if (dstk)
533                 /* mov eax,dword ptr [ebp+off] */
534                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
535
536         switch (BPF_OP(op)) {
537         /* dst = dst + src */
538         case BPF_ADD:
539                 if (hi && is64)
540                         EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
541                 else
542                         EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
543                 break;
544         /* dst = dst - src */
545         case BPF_SUB:
546                 if (hi && is64)
547                         EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
548                 else
549                         EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
550                 break;
551         /* dst = dst | src */
552         case BPF_OR:
553                 EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
554                 break;
555         /* dst = dst & src */
556         case BPF_AND:
557                 EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
558                 break;
559         /* dst = dst ^ src */
560         case BPF_XOR:
561                 EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
562                 break;
563         }
564
565         if (dstk)
566                 /* mov dword ptr [ebp+off],dreg */
567                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
568                       STACK_VAR(dst));
569         *pprog = prog;
570 }
571
572 /* ALU operation (64 bit) */
573 static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
574                                      const u8 dst[], const u8 src[],
575                                      bool dstk,  bool sstk,
576                                      u8 **pprog)
577 {
578         u8 *prog = *pprog;
579
580         emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
581         if (is64)
582                 emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
583                                 &prog);
584         else
585                 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
586         *pprog = prog;
587 }
588
589 /*
590  * ALU operation (32 bit)
591  * dst = dst (op) val
592  */
593 static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
594                                    const u8 dst, const s32 val, bool dstk,
595                                    u8 **pprog)
596 {
597         u8 *prog = *pprog;
598         int cnt = 0;
599         u8 dreg = dstk ? IA32_EAX : dst;
600         u8 sreg = IA32_EDX;
601
602         if (dstk)
603                 /* mov eax,dword ptr [ebp+off] */
604                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
605
606         if (!is_imm8(val))
607                 /* mov edx,imm32*/
608                 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
609
610         switch (op) {
611         /* dst = dst + val */
612         case BPF_ADD:
613                 if (hi && is64) {
614                         if (is_imm8(val))
615                                 EMIT3(0x83, add_1reg(0xD0, dreg), val);
616                         else
617                                 EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
618                 } else {
619                         if (is_imm8(val))
620                                 EMIT3(0x83, add_1reg(0xC0, dreg), val);
621                         else
622                                 EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
623                 }
624                 break;
625         /* dst = dst - val */
626         case BPF_SUB:
627                 if (hi && is64) {
628                         if (is_imm8(val))
629                                 EMIT3(0x83, add_1reg(0xD8, dreg), val);
630                         else
631                                 EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
632                 } else {
633                         if (is_imm8(val))
634                                 EMIT3(0x83, add_1reg(0xE8, dreg), val);
635                         else
636                                 EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
637                 }
638                 break;
639         /* dst = dst | val */
640         case BPF_OR:
641                 if (is_imm8(val))
642                         EMIT3(0x83, add_1reg(0xC8, dreg), val);
643                 else
644                         EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
645                 break;
646         /* dst = dst & val */
647         case BPF_AND:
648                 if (is_imm8(val))
649                         EMIT3(0x83, add_1reg(0xE0, dreg), val);
650                 else
651                         EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
652                 break;
653         /* dst = dst ^ val */
654         case BPF_XOR:
655                 if (is_imm8(val))
656                         EMIT3(0x83, add_1reg(0xF0, dreg), val);
657                 else
658                         EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
659                 break;
660         case BPF_NEG:
661                 EMIT2(0xF7, add_1reg(0xD8, dreg));
662                 break;
663         }
664
665         if (dstk)
666                 /* mov dword ptr [ebp+off],dreg */
667                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
668                       STACK_VAR(dst));
669         *pprog = prog;
670 }
671
672 /* ALU operation (64 bit) */
673 static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
674                                      const u8 dst[], const u32 val,
675                                      bool dstk, u8 **pprog)
676 {
677         u8 *prog = *pprog;
678         u32 hi = 0;
679
680         if (is64 && (val & (1<<31)))
681                 hi = (u32)~0;
682
683         emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
684         if (is64)
685                 emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
686         else
687                 emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
688
689         *pprog = prog;
690 }
691
692 /* dst = ~dst (64 bit) */
693 static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
694 {
695         u8 *prog = *pprog;
696         int cnt = 0;
697         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
698         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
699
700         if (dstk) {
701                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
702                       STACK_VAR(dst_lo));
703                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
704                       STACK_VAR(dst_hi));
705         }
706
707         /* xor ecx,ecx */
708         EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
709         /* sub dreg_lo,ecx */
710         EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX));
711         /* mov dreg_lo,ecx */
712         EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
713
714         /* xor ecx,ecx */
715         EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
716         /* sbb dreg_hi,ecx */
717         EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX));
718         /* mov dreg_hi,ecx */
719         EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX));
720
721         if (dstk) {
722                 /* mov dword ptr [ebp+off],dreg_lo */
723                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
724                       STACK_VAR(dst_lo));
725                 /* mov dword ptr [ebp+off],dreg_hi */
726                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
727                       STACK_VAR(dst_hi));
728         }
729         *pprog = prog;
730 }
731
732 /* dst = dst << src */
733 static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
734                                      bool dstk, bool sstk, u8 **pprog)
735 {
736         u8 *prog = *pprog;
737         int cnt = 0;
738         static int jmp_label1 = -1;
739         static int jmp_label2 = -1;
740         static int jmp_label3 = -1;
741         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
742         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
743
744         if (dstk) {
745                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
746                       STACK_VAR(dst_lo));
747                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
748                       STACK_VAR(dst_hi));
749         }
750
751         if (sstk)
752                 /* mov ecx,dword ptr [ebp+off] */
753                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
754                       STACK_VAR(src_lo));
755         else
756                 /* mov ecx,src_lo */
757                 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
758
759         /* cmp ecx,32 */
760         EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
761         /* Jumps when >= 32 */
762         if (is_imm8(jmp_label(jmp_label1, 2)))
763                 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
764         else
765                 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
766
767         /* < 32 */
768         /* shl dreg_hi,cl */
769         EMIT2(0xD3, add_1reg(0xE0, dreg_hi));
770         /* mov ebx,dreg_lo */
771         EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
772         /* shl dreg_lo,cl */
773         EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
774
775         /* IA32_ECX = -IA32_ECX + 32 */
776         /* neg ecx */
777         EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
778         /* add ecx,32 */
779         EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
780
781         /* shr ebx,cl */
782         EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
783         /* or dreg_hi,ebx */
784         EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
785
786         /* goto out; */
787         if (is_imm8(jmp_label(jmp_label3, 2)))
788                 EMIT2(0xEB, jmp_label(jmp_label3, 2));
789         else
790                 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
791
792         /* >= 32 */
793         if (jmp_label1 == -1)
794                 jmp_label1 = cnt;
795
796         /* cmp ecx,64 */
797         EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
798         /* Jumps when >= 64 */
799         if (is_imm8(jmp_label(jmp_label2, 2)))
800                 EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
801         else
802                 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
803
804         /* >= 32 && < 64 */
805         /* sub ecx,32 */
806         EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
807         /* shl dreg_lo,cl */
808         EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
809         /* mov dreg_hi,dreg_lo */
810         EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
811
812         /* xor dreg_lo,dreg_lo */
813         EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
814
815         /* goto out; */
816         if (is_imm8(jmp_label(jmp_label3, 2)))
817                 EMIT2(0xEB, jmp_label(jmp_label3, 2));
818         else
819                 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
820
821         /* >= 64 */
822         if (jmp_label2 == -1)
823                 jmp_label2 = cnt;
824         /* xor dreg_lo,dreg_lo */
825         EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
826         /* xor dreg_hi,dreg_hi */
827         EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
828
829         if (jmp_label3 == -1)
830                 jmp_label3 = cnt;
831
832         if (dstk) {
833                 /* mov dword ptr [ebp+off],dreg_lo */
834                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
835                       STACK_VAR(dst_lo));
836                 /* mov dword ptr [ebp+off],dreg_hi */
837                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
838                       STACK_VAR(dst_hi));
839         }
840         /* out: */
841         *pprog = prog;
842 }
843
844 /* dst = dst >> src (signed)*/
845 static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
846                                       bool dstk, bool sstk, u8 **pprog)
847 {
848         u8 *prog = *pprog;
849         int cnt = 0;
850         static int jmp_label1 = -1;
851         static int jmp_label2 = -1;
852         static int jmp_label3 = -1;
853         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
854         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
855
856         if (dstk) {
857                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
858                       STACK_VAR(dst_lo));
859                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
860                       STACK_VAR(dst_hi));
861         }
862
863         if (sstk)
864                 /* mov ecx,dword ptr [ebp+off] */
865                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
866                       STACK_VAR(src_lo));
867         else
868                 /* mov ecx,src_lo */
869                 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
870
871         /* cmp ecx,32 */
872         EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
873         /* Jumps when >= 32 */
874         if (is_imm8(jmp_label(jmp_label1, 2)))
875                 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
876         else
877                 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
878
879         /* < 32 */
880         /* lshr dreg_lo,cl */
881         EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
882         /* mov ebx,dreg_hi */
883         EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
884         /* ashr dreg_hi,cl */
885         EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
886
887         /* IA32_ECX = -IA32_ECX + 32 */
888         /* neg ecx */
889         EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
890         /* add ecx,32 */
891         EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
892
893         /* shl ebx,cl */
894         EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
895         /* or dreg_lo,ebx */
896         EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
897
898         /* goto out; */
899         if (is_imm8(jmp_label(jmp_label3, 2)))
900                 EMIT2(0xEB, jmp_label(jmp_label3, 2));
901         else
902                 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
903
904         /* >= 32 */
905         if (jmp_label1 == -1)
906                 jmp_label1 = cnt;
907
908         /* cmp ecx,64 */
909         EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
910         /* Jumps when >= 64 */
911         if (is_imm8(jmp_label(jmp_label2, 2)))
912                 EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
913         else
914                 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
915
916         /* >= 32 && < 64 */
917         /* sub ecx,32 */
918         EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
919         /* ashr dreg_hi,cl */
920         EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
921         /* mov dreg_lo,dreg_hi */
922         EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
923
924         /* ashr dreg_hi,imm8 */
925         EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
926
927         /* goto out; */
928         if (is_imm8(jmp_label(jmp_label3, 2)))
929                 EMIT2(0xEB, jmp_label(jmp_label3, 2));
930         else
931                 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
932
933         /* >= 64 */
934         if (jmp_label2 == -1)
935                 jmp_label2 = cnt;
936         /* ashr dreg_hi,imm8 */
937         EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
938         /* mov dreg_lo,dreg_hi */
939         EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
940
941         if (jmp_label3 == -1)
942                 jmp_label3 = cnt;
943
944         if (dstk) {
945                 /* mov dword ptr [ebp+off],dreg_lo */
946                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
947                       STACK_VAR(dst_lo));
948                 /* mov dword ptr [ebp+off],dreg_hi */
949                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
950                       STACK_VAR(dst_hi));
951         }
952         /* out: */
953         *pprog = prog;
954 }
955
956 /* dst = dst >> src */
957 static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
958                                      bool sstk, u8 **pprog)
959 {
960         u8 *prog = *pprog;
961         int cnt = 0;
962         static int jmp_label1 = -1;
963         static int jmp_label2 = -1;
964         static int jmp_label3 = -1;
965         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
966         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
967
968         if (dstk) {
969                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
970                       STACK_VAR(dst_lo));
971                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
972                       STACK_VAR(dst_hi));
973         }
974
975         if (sstk)
976                 /* mov ecx,dword ptr [ebp+off] */
977                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
978                       STACK_VAR(src_lo));
979         else
980                 /* mov ecx,src_lo */
981                 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
982
983         /* cmp ecx,32 */
984         EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
985         /* Jumps when >= 32 */
986         if (is_imm8(jmp_label(jmp_label1, 2)))
987                 EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
988         else
989                 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label1, 6));
990
991         /* < 32 */
992         /* lshr dreg_lo,cl */
993         EMIT2(0xD3, add_1reg(0xE8, dreg_lo));
994         /* mov ebx,dreg_hi */
995         EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
996         /* shr dreg_hi,cl */
997         EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
998
999         /* IA32_ECX = -IA32_ECX + 32 */
1000         /* neg ecx */
1001         EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1002         /* add ecx,32 */
1003         EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1004
1005         /* shl ebx,cl */
1006         EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1007         /* or dreg_lo,ebx */
1008         EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1009
1010         /* goto out; */
1011         if (is_imm8(jmp_label(jmp_label3, 2)))
1012                 EMIT2(0xEB, jmp_label(jmp_label3, 2));
1013         else
1014                 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1015
1016         /* >= 32 */
1017         if (jmp_label1 == -1)
1018                 jmp_label1 = cnt;
1019         /* cmp ecx,64 */
1020         EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 64);
1021         /* Jumps when >= 64 */
1022         if (is_imm8(jmp_label(jmp_label2, 2)))
1023                 EMIT2(IA32_JAE, jmp_label(jmp_label2, 2));
1024         else
1025                 EMIT2_off32(0x0F, IA32_JAE + 0x10, jmp_label(jmp_label2, 6));
1026
1027         /* >= 32 && < 64 */
1028         /* sub ecx,32 */
1029         EMIT3(0x83, add_1reg(0xE8, IA32_ECX), 32);
1030         /* shr dreg_hi,cl */
1031         EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
1032         /* mov dreg_lo,dreg_hi */
1033         EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1034         /* xor dreg_hi,dreg_hi */
1035         EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1036
1037         /* goto out; */
1038         if (is_imm8(jmp_label(jmp_label3, 2)))
1039                 EMIT2(0xEB, jmp_label(jmp_label3, 2));
1040         else
1041                 EMIT1_off32(0xE9, jmp_label(jmp_label3, 5));
1042
1043         /* >= 64 */
1044         if (jmp_label2 == -1)
1045                 jmp_label2 = cnt;
1046         /* xor dreg_lo,dreg_lo */
1047         EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1048         /* xor dreg_hi,dreg_hi */
1049         EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1050
1051         if (jmp_label3 == -1)
1052                 jmp_label3 = cnt;
1053
1054         if (dstk) {
1055                 /* mov dword ptr [ebp+off],dreg_lo */
1056                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1057                       STACK_VAR(dst_lo));
1058                 /* mov dword ptr [ebp+off],dreg_hi */
1059                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1060                       STACK_VAR(dst_hi));
1061         }
1062         /* out: */
1063         *pprog = prog;
1064 }
1065
1066 /* dst = dst << val */
1067 static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
1068                                      bool dstk, u8 **pprog)
1069 {
1070         u8 *prog = *pprog;
1071         int cnt = 0;
1072         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1073         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1074
1075         if (dstk) {
1076                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1077                       STACK_VAR(dst_lo));
1078                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1079                       STACK_VAR(dst_hi));
1080         }
1081         /* Do LSH operation */
1082         if (val < 32) {
1083                 /* shl dreg_hi,imm8 */
1084                 EMIT3(0xC1, add_1reg(0xE0, dreg_hi), val);
1085                 /* mov ebx,dreg_lo */
1086                 EMIT2(0x8B, add_2reg(0xC0, dreg_lo, IA32_EBX));
1087                 /* shl dreg_lo,imm8 */
1088                 EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
1089
1090                 /* IA32_ECX = 32 - val */
1091                 /* mov ecx,val */
1092                 EMIT2(0xB1, val);
1093                 /* movzx ecx,ecx */
1094                 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1095                 /* neg ecx */
1096                 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1097                 /* add ecx,32 */
1098                 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1099
1100                 /* shr ebx,cl */
1101                 EMIT2(0xD3, add_1reg(0xE8, IA32_EBX));
1102                 /* or dreg_hi,ebx */
1103                 EMIT2(0x09, add_2reg(0xC0, dreg_hi, IA32_EBX));
1104         } else if (val >= 32 && val < 64) {
1105                 u32 value = val - 32;
1106
1107                 /* shl dreg_lo,imm8 */
1108                 EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
1109                 /* mov dreg_hi,dreg_lo */
1110                 EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
1111                 /* xor dreg_lo,dreg_lo */
1112                 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1113         } else {
1114                 /* xor dreg_lo,dreg_lo */
1115                 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1116                 /* xor dreg_hi,dreg_hi */
1117                 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1118         }
1119
1120         if (dstk) {
1121                 /* mov dword ptr [ebp+off],dreg_lo */
1122                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1123                       STACK_VAR(dst_lo));
1124                 /* mov dword ptr [ebp+off],dreg_hi */
1125                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1126                       STACK_VAR(dst_hi));
1127         }
1128         *pprog = prog;
1129 }
1130
1131 /* dst = dst >> val */
1132 static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
1133                                      bool dstk, u8 **pprog)
1134 {
1135         u8 *prog = *pprog;
1136         int cnt = 0;
1137         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1138         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1139
1140         if (dstk) {
1141                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1142                       STACK_VAR(dst_lo));
1143                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1144                       STACK_VAR(dst_hi));
1145         }
1146
1147         /* Do RSH operation */
1148         if (val < 32) {
1149                 /* shr dreg_lo,imm8 */
1150                 EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1151                 /* mov ebx,dreg_hi */
1152                 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1153                 /* shr dreg_hi,imm8 */
1154                 EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
1155
1156                 /* IA32_ECX = 32 - val */
1157                 /* mov ecx,val */
1158                 EMIT2(0xB1, val);
1159                 /* movzx ecx,ecx */
1160                 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1161                 /* neg ecx */
1162                 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1163                 /* add ecx,32 */
1164                 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1165
1166                 /* shl ebx,cl */
1167                 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1168                 /* or dreg_lo,ebx */
1169                 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1170         } else if (val >= 32 && val < 64) {
1171                 u32 value = val - 32;
1172
1173                 /* shr dreg_hi,imm8 */
1174                 EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
1175                 /* mov dreg_lo,dreg_hi */
1176                 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1177                 /* xor dreg_hi,dreg_hi */
1178                 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1179         } else {
1180                 /* xor dreg_lo,dreg_lo */
1181                 EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
1182                 /* xor dreg_hi,dreg_hi */
1183                 EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
1184         }
1185
1186         if (dstk) {
1187                 /* mov dword ptr [ebp+off],dreg_lo */
1188                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1189                       STACK_VAR(dst_lo));
1190                 /* mov dword ptr [ebp+off],dreg_hi */
1191                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1192                       STACK_VAR(dst_hi));
1193         }
1194         *pprog = prog;
1195 }
1196
1197 /* dst = dst >> val (signed) */
1198 static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
1199                                       bool dstk, u8 **pprog)
1200 {
1201         u8 *prog = *pprog;
1202         int cnt = 0;
1203         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1204         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1205
1206         if (dstk) {
1207                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1208                       STACK_VAR(dst_lo));
1209                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1210                       STACK_VAR(dst_hi));
1211         }
1212         /* Do RSH operation */
1213         if (val < 32) {
1214                 /* shr dreg_lo,imm8 */
1215                 EMIT3(0xC1, add_1reg(0xE8, dreg_lo), val);
1216                 /* mov ebx,dreg_hi */
1217                 EMIT2(0x8B, add_2reg(0xC0, dreg_hi, IA32_EBX));
1218                 /* ashr dreg_hi,imm8 */
1219                 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
1220
1221                 /* IA32_ECX = 32 - val */
1222                 /* mov ecx,val */
1223                 EMIT2(0xB1, val);
1224                 /* movzx ecx,ecx */
1225                 EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
1226                 /* neg ecx */
1227                 EMIT2(0xF7, add_1reg(0xD8, IA32_ECX));
1228                 /* add ecx,32 */
1229                 EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 32);
1230
1231                 /* shl ebx,cl */
1232                 EMIT2(0xD3, add_1reg(0xE0, IA32_EBX));
1233                 /* or dreg_lo,ebx */
1234                 EMIT2(0x09, add_2reg(0xC0, dreg_lo, IA32_EBX));
1235         } else if (val >= 32 && val < 64) {
1236                 u32 value = val - 32;
1237
1238                 /* ashr dreg_hi,imm8 */
1239                 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
1240                 /* mov dreg_lo,dreg_hi */
1241                 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1242
1243                 /* ashr dreg_hi,imm8 */
1244                 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1245         } else {
1246                 /* ashr dreg_hi,imm8 */
1247                 EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1248                 /* mov dreg_lo,dreg_hi */
1249                 EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1250         }
1251
1252         if (dstk) {
1253                 /* mov dword ptr [ebp+off],dreg_lo */
1254                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1255                       STACK_VAR(dst_lo));
1256                 /* mov dword ptr [ebp+off],dreg_hi */
1257                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1258                       STACK_VAR(dst_hi));
1259         }
1260         *pprog = prog;
1261 }
1262
1263 static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
1264                                      bool sstk, u8 **pprog)
1265 {
1266         u8 *prog = *pprog;
1267         int cnt = 0;
1268
1269         if (dstk)
1270                 /* mov eax,dword ptr [ebp+off] */
1271                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1272                       STACK_VAR(dst_hi));
1273         else
1274                 /* mov eax,dst_hi */
1275                 EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
1276
1277         if (sstk)
1278                 /* mul dword ptr [ebp+off] */
1279                 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1280         else
1281                 /* mul src_lo */
1282                 EMIT2(0xF7, add_1reg(0xE0, src_lo));
1283
1284         /* mov ecx,eax */
1285         EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1286
1287         if (dstk)
1288                 /* mov eax,dword ptr [ebp+off] */
1289                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1290                       STACK_VAR(dst_lo));
1291         else
1292                 /* mov eax,dst_lo */
1293                 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1294
1295         if (sstk)
1296                 /* mul dword ptr [ebp+off] */
1297                 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
1298         else
1299                 /* mul src_hi */
1300                 EMIT2(0xF7, add_1reg(0xE0, src_hi));
1301
1302         /* add eax,eax */
1303         EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1304
1305         if (dstk)
1306                 /* mov eax,dword ptr [ebp+off] */
1307                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1308                       STACK_VAR(dst_lo));
1309         else
1310                 /* mov eax,dst_lo */
1311                 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1312
1313         if (sstk)
1314                 /* mul dword ptr [ebp+off] */
1315                 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1316         else
1317                 /* mul src_lo */
1318                 EMIT2(0xF7, add_1reg(0xE0, src_lo));
1319
1320         /* add ecx,edx */
1321         EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1322
1323         if (dstk) {
1324                 /* mov dword ptr [ebp+off],eax */
1325                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1326                       STACK_VAR(dst_lo));
1327                 /* mov dword ptr [ebp+off],ecx */
1328                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1329                       STACK_VAR(dst_hi));
1330         } else {
1331                 /* mov dst_lo,eax */
1332                 EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1333                 /* mov dst_hi,ecx */
1334                 EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1335         }
1336
1337         *pprog = prog;
1338 }
1339
1340 static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
1341                                      bool dstk, u8 **pprog)
1342 {
1343         u8 *prog = *pprog;
1344         int cnt = 0;
1345         u32 hi;
1346
1347         hi = val & (1<<31) ? (u32)~0 : 0;
1348         /* movl eax,imm32 */
1349         EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1350         if (dstk)
1351                 /* mul dword ptr [ebp+off] */
1352                 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
1353         else
1354                 /* mul dst_hi */
1355                 EMIT2(0xF7, add_1reg(0xE0, dst_hi));
1356
1357         /* mov ecx,eax */
1358         EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1359
1360         /* movl eax,imm32 */
1361         EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
1362         if (dstk)
1363                 /* mul dword ptr [ebp+off] */
1364                 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1365         else
1366                 /* mul dst_lo */
1367                 EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1368         /* add ecx,eax */
1369         EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1370
1371         /* movl eax,imm32 */
1372         EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1373         if (dstk)
1374                 /* mul dword ptr [ebp+off] */
1375                 EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1376         else
1377                 /* mul dst_lo */
1378                 EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1379
1380         /* add ecx,edx */
1381         EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1382
1383         if (dstk) {
1384                 /* mov dword ptr [ebp+off],eax */
1385                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1386                       STACK_VAR(dst_lo));
1387                 /* mov dword ptr [ebp+off],ecx */
1388                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1389                       STACK_VAR(dst_hi));
1390         } else {
1391                 /* mov dword ptr [ebp+off],eax */
1392                 EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1393                 /* mov dword ptr [ebp+off],ecx */
1394                 EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1395         }
1396
1397         *pprog = prog;
1398 }
1399
1400 static int bpf_size_to_x86_bytes(int bpf_size)
1401 {
1402         if (bpf_size == BPF_W)
1403                 return 4;
1404         else if (bpf_size == BPF_H)
1405                 return 2;
1406         else if (bpf_size == BPF_B)
1407                 return 1;
1408         else if (bpf_size == BPF_DW)
1409                 return 4; /* imm32 */
1410         else
1411                 return 0;
1412 }
1413
1414 struct jit_context {
1415         int cleanup_addr; /* Epilogue code offset */
1416 };
1417
1418 /* Maximum number of bytes emitted while JITing one eBPF insn */
1419 #define BPF_MAX_INSN_SIZE       128
1420 #define BPF_INSN_SAFETY         64
1421
1422 #define PROLOGUE_SIZE 35
1423
1424 /*
1425  * Emit prologue code for BPF program and check it's size.
1426  * bpf_tail_call helper will skip it while jumping into another program.
1427  */
1428 static void emit_prologue(u8 **pprog, u32 stack_depth)
1429 {
1430         u8 *prog = *pprog;
1431         int cnt = 0;
1432         const u8 *r1 = bpf2ia32[BPF_REG_1];
1433         const u8 fplo = bpf2ia32[BPF_REG_FP][0];
1434         const u8 fphi = bpf2ia32[BPF_REG_FP][1];
1435         const u8 *tcc = bpf2ia32[TCALL_CNT];
1436
1437         /* push ebp */
1438         EMIT1(0x55);
1439         /* mov ebp,esp */
1440         EMIT2(0x89, 0xE5);
1441         /* push edi */
1442         EMIT1(0x57);
1443         /* push esi */
1444         EMIT1(0x56);
1445         /* push ebx */
1446         EMIT1(0x53);
1447
1448         /* sub esp,STACK_SIZE */
1449         EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1450         /* sub ebp,SCRATCH_SIZE+4+12*/
1451         EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 16);
1452         /* xor ebx,ebx */
1453         EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1454
1455         /* Set up BPF prog stack base register */
1456         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
1457         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
1458
1459         /* Move BPF_CTX (EAX) to BPF_REG_R1 */
1460         /* mov dword ptr [ebp+off],eax */
1461         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1462         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
1463
1464         /* Initialize Tail Count */
1465         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
1466         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1467
1468         BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
1469         *pprog = prog;
1470 }
1471
1472 /* Emit epilogue code for BPF program */
1473 static void emit_epilogue(u8 **pprog, u32 stack_depth)
1474 {
1475         u8 *prog = *pprog;
1476         const u8 *r0 = bpf2ia32[BPF_REG_0];
1477         int cnt = 0;
1478
1479         /* mov eax,dword ptr [ebp+off]*/
1480         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
1481         /* mov edx,dword ptr [ebp+off]*/
1482         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1483
1484         /* add ebp,SCRATCH_SIZE+4+12*/
1485         EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 16);
1486
1487         /* mov ebx,dword ptr [ebp-12]*/
1488         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
1489         /* mov esi,dword ptr [ebp-8]*/
1490         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
1491         /* mov edi,dword ptr [ebp-4]*/
1492         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
1493
1494         EMIT1(0xC9); /* leave */
1495         EMIT1(0xC3); /* ret */
1496         *pprog = prog;
1497 }
1498
1499 /*
1500  * Generate the following code:
1501  * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
1502  *   if (index >= array->map.max_entries)
1503  *     goto out;
1504  *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
1505  *     goto out;
1506  *   prog = array->ptrs[index];
1507  *   if (prog == NULL)
1508  *     goto out;
1509  *   goto *(prog->bpf_func + prologue_size);
1510  * out:
1511  */
1512 static void emit_bpf_tail_call(u8 **pprog)
1513 {
1514         u8 *prog = *pprog;
1515         int cnt = 0;
1516         const u8 *r1 = bpf2ia32[BPF_REG_1];
1517         const u8 *r2 = bpf2ia32[BPF_REG_2];
1518         const u8 *r3 = bpf2ia32[BPF_REG_3];
1519         const u8 *tcc = bpf2ia32[TCALL_CNT];
1520         u32 lo, hi;
1521         static int jmp_label1 = -1;
1522
1523         /*
1524          * if (index >= array->map.max_entries)
1525          *     goto out;
1526          */
1527         /* mov eax,dword ptr [ebp+off] */
1528         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
1529         /* mov edx,dword ptr [ebp+off] */
1530         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
1531
1532         /* cmp dword ptr [eax+off],edx */
1533         EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
1534               offsetof(struct bpf_array, map.max_entries));
1535         /* jbe out */
1536         EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
1537
1538         /*
1539          * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
1540          *     goto out;
1541          */
1542         lo = (u32)MAX_TAIL_CALL_CNT;
1543         hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1544         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1545         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1546
1547         /* cmp edx,hi */
1548         EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
1549         EMIT2(IA32_JNE, 3);
1550         /* cmp ecx,lo */
1551         EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
1552
1553         /* ja out */
1554         EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
1555
1556         /* add eax,0x1 */
1557         EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
1558         /* adc ebx,0x0 */
1559         EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
1560
1561         /* mov dword ptr [ebp+off],eax */
1562         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1563         /* mov dword ptr [ebp+off],edx */
1564         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1565
1566         /* prog = array->ptrs[index]; */
1567         /* mov edx, [eax + edx * 4 + offsetof(...)] */
1568         EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
1569
1570         /*
1571          * if (prog == NULL)
1572          *     goto out;
1573          */
1574         /* test edx,edx */
1575         EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
1576         /* je out */
1577         EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
1578
1579         /* goto *(prog->bpf_func + prologue_size); */
1580         /* mov edx, dword ptr [edx + 32] */
1581         EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
1582               offsetof(struct bpf_prog, bpf_func));
1583         /* add edx,prologue_size */
1584         EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
1585
1586         /* mov eax,dword ptr [ebp+off] */
1587         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1588
1589         /*
1590          * Now we're ready to jump into next BPF program:
1591          * eax == ctx (1st arg)
1592          * edx == prog->bpf_func + prologue_size
1593          */
1594         RETPOLINE_EDX_BPF_JIT();
1595
1596         if (jmp_label1 == -1)
1597                 jmp_label1 = cnt;
1598
1599         /* out: */
1600         *pprog = prog;
1601 }
1602
1603 /* Push the scratch stack register on top of the stack. */
1604 static inline void emit_push_r64(const u8 src[], u8 **pprog)
1605 {
1606         u8 *prog = *pprog;
1607         int cnt = 0;
1608
1609         /* mov ecx,dword ptr [ebp+off] */
1610         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
1611         /* push ecx */
1612         EMIT1(0x51);
1613
1614         /* mov ecx,dword ptr [ebp+off] */
1615         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1616         /* push ecx */
1617         EMIT1(0x51);
1618
1619         *pprog = prog;
1620 }
1621
1622 static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1623                   int oldproglen, struct jit_context *ctx)
1624 {
1625         struct bpf_insn *insn = bpf_prog->insnsi;
1626         int insn_cnt = bpf_prog->len;
1627         bool seen_exit = false;
1628         u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
1629         int i, cnt = 0;
1630         int proglen = 0;
1631         u8 *prog = temp;
1632
1633         emit_prologue(&prog, bpf_prog->aux->stack_depth);
1634
1635         for (i = 0; i < insn_cnt; i++, insn++) {
1636                 const s32 imm32 = insn->imm;
1637                 const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1638                 const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
1639                 const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
1640                 const u8 code = insn->code;
1641                 const u8 *dst = bpf2ia32[insn->dst_reg];
1642                 const u8 *src = bpf2ia32[insn->src_reg];
1643                 const u8 *r0 = bpf2ia32[BPF_REG_0];
1644                 s64 jmp_offset;
1645                 u8 jmp_cond;
1646                 int ilen;
1647                 u8 *func;
1648
1649                 switch (code) {
1650                 /* ALU operations */
1651                 /* dst = src */
1652                 case BPF_ALU | BPF_MOV | BPF_K:
1653                 case BPF_ALU | BPF_MOV | BPF_X:
1654                 case BPF_ALU64 | BPF_MOV | BPF_K:
1655                 case BPF_ALU64 | BPF_MOV | BPF_X:
1656                         switch (BPF_SRC(code)) {
1657                         case BPF_X:
1658                                 emit_ia32_mov_r64(is64, dst, src, dstk,
1659                                                   sstk, &prog);
1660                                 break;
1661                         case BPF_K:
1662                                 /* Sign-extend immediate value to dst reg */
1663                                 emit_ia32_mov_i64(is64, dst, imm32,
1664                                                   dstk, &prog);
1665                                 break;
1666                         }
1667                         break;
1668                 /* dst = dst + src/imm */
1669                 /* dst = dst - src/imm */
1670                 /* dst = dst | src/imm */
1671                 /* dst = dst & src/imm */
1672                 /* dst = dst ^ src/imm */
1673                 /* dst = dst * src/imm */
1674                 /* dst = dst << src */
1675                 /* dst = dst >> src */
1676                 case BPF_ALU | BPF_ADD | BPF_K:
1677                 case BPF_ALU | BPF_ADD | BPF_X:
1678                 case BPF_ALU | BPF_SUB | BPF_K:
1679                 case BPF_ALU | BPF_SUB | BPF_X:
1680                 case BPF_ALU | BPF_OR | BPF_K:
1681                 case BPF_ALU | BPF_OR | BPF_X:
1682                 case BPF_ALU | BPF_AND | BPF_K:
1683                 case BPF_ALU | BPF_AND | BPF_X:
1684                 case BPF_ALU | BPF_XOR | BPF_K:
1685                 case BPF_ALU | BPF_XOR | BPF_X:
1686                 case BPF_ALU64 | BPF_ADD | BPF_K:
1687                 case BPF_ALU64 | BPF_ADD | BPF_X:
1688                 case BPF_ALU64 | BPF_SUB | BPF_K:
1689                 case BPF_ALU64 | BPF_SUB | BPF_X:
1690                 case BPF_ALU64 | BPF_OR | BPF_K:
1691                 case BPF_ALU64 | BPF_OR | BPF_X:
1692                 case BPF_ALU64 | BPF_AND | BPF_K:
1693                 case BPF_ALU64 | BPF_AND | BPF_X:
1694                 case BPF_ALU64 | BPF_XOR | BPF_K:
1695                 case BPF_ALU64 | BPF_XOR | BPF_X:
1696                         switch (BPF_SRC(code)) {
1697                         case BPF_X:
1698                                 emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1699                                                   src, dstk, sstk, &prog);
1700                                 break;
1701                         case BPF_K:
1702                                 emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1703                                                   imm32, dstk, &prog);
1704                                 break;
1705                         }
1706                         break;
1707                 case BPF_ALU | BPF_MUL | BPF_K:
1708                 case BPF_ALU | BPF_MUL | BPF_X:
1709                         switch (BPF_SRC(code)) {
1710                         case BPF_X:
1711                                 emit_ia32_mul_r(dst_lo, src_lo, dstk,
1712                                                 sstk, &prog);
1713                                 break;
1714                         case BPF_K:
1715                                 /* mov ecx,imm32*/
1716                                 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1717                                             imm32);
1718                                 emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
1719                                                 false, &prog);
1720                                 break;
1721                         }
1722                         emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1723                         break;
1724                 case BPF_ALU | BPF_LSH | BPF_X:
1725                 case BPF_ALU | BPF_RSH | BPF_X:
1726                 case BPF_ALU | BPF_ARSH | BPF_K:
1727                 case BPF_ALU | BPF_ARSH | BPF_X:
1728                         switch (BPF_SRC(code)) {
1729                         case BPF_X:
1730                                 emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
1731                                                   dstk, sstk, &prog);
1732                                 break;
1733                         case BPF_K:
1734                                 /* mov ecx,imm32*/
1735                                 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1736                                             imm32);
1737                                 emit_ia32_shift_r(BPF_OP(code), dst_lo,
1738                                                   IA32_ECX, dstk, false,
1739                                                   &prog);
1740                                 break;
1741                         }
1742                         emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1743                         break;
1744                 /* dst = dst / src(imm) */
1745                 /* dst = dst % src(imm) */
1746                 case BPF_ALU | BPF_DIV | BPF_K:
1747                 case BPF_ALU | BPF_DIV | BPF_X:
1748                 case BPF_ALU | BPF_MOD | BPF_K:
1749                 case BPF_ALU | BPF_MOD | BPF_X:
1750                         switch (BPF_SRC(code)) {
1751                         case BPF_X:
1752                                 emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1753                                                     src_lo, dstk, sstk, &prog);
1754                                 break;
1755                         case BPF_K:
1756                                 /* mov ecx,imm32*/
1757                                 EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1758                                             imm32);
1759                                 emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1760                                                     IA32_ECX, dstk, false,
1761                                                     &prog);
1762                                 break;
1763                         }
1764                         emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1765                         break;
1766                 case BPF_ALU64 | BPF_DIV | BPF_K:
1767                 case BPF_ALU64 | BPF_DIV | BPF_X:
1768                 case BPF_ALU64 | BPF_MOD | BPF_K:
1769                 case BPF_ALU64 | BPF_MOD | BPF_X:
1770                         goto notyet;
1771                 /* dst = dst >> imm */
1772                 /* dst = dst << imm */
1773                 case BPF_ALU | BPF_RSH | BPF_K:
1774                 case BPF_ALU | BPF_LSH | BPF_K:
1775                         if (unlikely(imm32 > 31))
1776                                 return -EINVAL;
1777                         /* mov ecx,imm32*/
1778                         EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1779                         emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1780                                           false, &prog);
1781                         emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1782                         break;
1783                 /* dst = dst << imm */
1784                 case BPF_ALU64 | BPF_LSH | BPF_K:
1785                         if (unlikely(imm32 > 63))
1786                                 return -EINVAL;
1787                         emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
1788                         break;
1789                 /* dst = dst >> imm */
1790                 case BPF_ALU64 | BPF_RSH | BPF_K:
1791                         if (unlikely(imm32 > 63))
1792                                 return -EINVAL;
1793                         emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
1794                         break;
1795                 /* dst = dst << src */
1796                 case BPF_ALU64 | BPF_LSH | BPF_X:
1797                         emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
1798                         break;
1799                 /* dst = dst >> src */
1800                 case BPF_ALU64 | BPF_RSH | BPF_X:
1801                         emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
1802                         break;
1803                 /* dst = dst >> src (signed) */
1804                 case BPF_ALU64 | BPF_ARSH | BPF_X:
1805                         emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
1806                         break;
1807                 /* dst = dst >> imm (signed) */
1808                 case BPF_ALU64 | BPF_ARSH | BPF_K:
1809                         if (unlikely(imm32 > 63))
1810                                 return -EINVAL;
1811                         emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
1812                         break;
1813                 /* dst = ~dst */
1814                 case BPF_ALU | BPF_NEG:
1815                         emit_ia32_alu_i(is64, false, BPF_OP(code),
1816                                         dst_lo, 0, dstk, &prog);
1817                         emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1818                         break;
1819                 /* dst = ~dst (64 bit) */
1820                 case BPF_ALU64 | BPF_NEG:
1821                         emit_ia32_neg64(dst, dstk, &prog);
1822                         break;
1823                 /* dst = dst * src/imm */
1824                 case BPF_ALU64 | BPF_MUL | BPF_X:
1825                 case BPF_ALU64 | BPF_MUL | BPF_K:
1826                         switch (BPF_SRC(code)) {
1827                         case BPF_X:
1828                                 emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
1829                                 break;
1830                         case BPF_K:
1831                                 emit_ia32_mul_i64(dst, imm32, dstk, &prog);
1832                                 break;
1833                         }
1834                         break;
1835                 /* dst = htole(dst) */
1836                 case BPF_ALU | BPF_END | BPF_FROM_LE:
1837                         emit_ia32_to_le_r64(dst, imm32, dstk, &prog);
1838                         break;
1839                 /* dst = htobe(dst) */
1840                 case BPF_ALU | BPF_END | BPF_FROM_BE:
1841                         emit_ia32_to_be_r64(dst, imm32, dstk, &prog);
1842                         break;
1843                 /* dst = imm64 */
1844                 case BPF_LD | BPF_IMM | BPF_DW: {
1845                         s32 hi, lo = imm32;
1846
1847                         hi = insn[1].imm;
1848                         emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
1849                         emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
1850                         insn++;
1851                         i++;
1852                         break;
1853                 }
1854                 /* ST: *(u8*)(dst_reg + off) = imm */
1855                 case BPF_ST | BPF_MEM | BPF_H:
1856                 case BPF_ST | BPF_MEM | BPF_B:
1857                 case BPF_ST | BPF_MEM | BPF_W:
1858                 case BPF_ST | BPF_MEM | BPF_DW:
1859                         if (dstk)
1860                                 /* mov eax,dword ptr [ebp+off] */
1861                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1862                                       STACK_VAR(dst_lo));
1863                         else
1864                                 /* mov eax,dst_lo */
1865                                 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1866
1867                         switch (BPF_SIZE(code)) {
1868                         case BPF_B:
1869                                 EMIT(0xC6, 1); break;
1870                         case BPF_H:
1871                                 EMIT2(0x66, 0xC7); break;
1872                         case BPF_W:
1873                         case BPF_DW:
1874                                 EMIT(0xC7, 1); break;
1875                         }
1876
1877                         if (is_imm8(insn->off))
1878                                 EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
1879                         else
1880                                 EMIT1_off32(add_1reg(0x80, IA32_EAX),
1881                                             insn->off);
1882                         EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
1883
1884                         if (BPF_SIZE(code) == BPF_DW) {
1885                                 u32 hi;
1886
1887                                 hi = imm32 & (1<<31) ? (u32)~0 : 0;
1888                                 EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
1889                                             insn->off + 4);
1890                                 EMIT(hi, 4);
1891                         }
1892                         break;
1893
1894                 /* STX: *(u8*)(dst_reg + off) = src_reg */
1895                 case BPF_STX | BPF_MEM | BPF_B:
1896                 case BPF_STX | BPF_MEM | BPF_H:
1897                 case BPF_STX | BPF_MEM | BPF_W:
1898                 case BPF_STX | BPF_MEM | BPF_DW:
1899                         if (dstk)
1900                                 /* mov eax,dword ptr [ebp+off] */
1901                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1902                                       STACK_VAR(dst_lo));
1903                         else
1904                                 /* mov eax,dst_lo */
1905                                 EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1906
1907                         if (sstk)
1908                                 /* mov edx,dword ptr [ebp+off] */
1909                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1910                                       STACK_VAR(src_lo));
1911                         else
1912                                 /* mov edx,src_lo */
1913                                 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
1914
1915                         switch (BPF_SIZE(code)) {
1916                         case BPF_B:
1917                                 EMIT(0x88, 1); break;
1918                         case BPF_H:
1919                                 EMIT2(0x66, 0x89); break;
1920                         case BPF_W:
1921                         case BPF_DW:
1922                                 EMIT(0x89, 1); break;
1923                         }
1924
1925                         if (is_imm8(insn->off))
1926                                 EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1927                                       insn->off);
1928                         else
1929                                 EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1930                                             insn->off);
1931
1932                         if (BPF_SIZE(code) == BPF_DW) {
1933                                 if (sstk)
1934                                         /* mov edi,dword ptr [ebp+off] */
1935                                         EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
1936                                                              IA32_EDX),
1937                                               STACK_VAR(src_hi));
1938                                 else
1939                                         /* mov edi,src_hi */
1940                                         EMIT2(0x8B, add_2reg(0xC0, src_hi,
1941                                                              IA32_EDX));
1942                                 EMIT1(0x89);
1943                                 if (is_imm8(insn->off + 4)) {
1944                                         EMIT2(add_2reg(0x40, IA32_EAX,
1945                                                        IA32_EDX),
1946                                               insn->off + 4);
1947                                 } else {
1948                                         EMIT1(add_2reg(0x80, IA32_EAX,
1949                                                        IA32_EDX));
1950                                         EMIT(insn->off + 4, 4);
1951                                 }
1952                         }
1953                         break;
1954
1955                 /* LDX: dst_reg = *(u8*)(src_reg + off) */
1956                 case BPF_LDX | BPF_MEM | BPF_B:
1957                 case BPF_LDX | BPF_MEM | BPF_H:
1958                 case BPF_LDX | BPF_MEM | BPF_W:
1959                 case BPF_LDX | BPF_MEM | BPF_DW:
1960                         if (sstk)
1961                                 /* mov eax,dword ptr [ebp+off] */
1962                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1963                                       STACK_VAR(src_lo));
1964                         else
1965                                 /* mov eax,dword ptr [ebp+off] */
1966                                 EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
1967
1968                         switch (BPF_SIZE(code)) {
1969                         case BPF_B:
1970                                 EMIT2(0x0F, 0xB6); break;
1971                         case BPF_H:
1972                                 EMIT2(0x0F, 0xB7); break;
1973                         case BPF_W:
1974                         case BPF_DW:
1975                                 EMIT(0x8B, 1); break;
1976                         }
1977
1978                         if (is_imm8(insn->off))
1979                                 EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1980                                       insn->off);
1981                         else
1982                                 EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1983                                             insn->off);
1984
1985                         if (dstk)
1986                                 /* mov dword ptr [ebp+off],edx */
1987                                 EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1988                                       STACK_VAR(dst_lo));
1989                         else
1990                                 /* mov dst_lo,edx */
1991                                 EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
1992                         switch (BPF_SIZE(code)) {
1993                         case BPF_B:
1994                         case BPF_H:
1995                         case BPF_W:
1996                                 if (dstk) {
1997                                         EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
1998                                               STACK_VAR(dst_hi));
1999                                         EMIT(0x0, 4);
2000                                 } else {
2001                                         EMIT3(0xC7, add_1reg(0xC0, dst_hi), 0);
2002                                 }
2003                                 break;
2004                         case BPF_DW:
2005                                 EMIT2_off32(0x8B,
2006                                             add_2reg(0x80, IA32_EAX, IA32_EDX),
2007                                             insn->off + 4);
2008                                 if (dstk)
2009                                         EMIT3(0x89,
2010                                               add_2reg(0x40, IA32_EBP,
2011                                                        IA32_EDX),
2012                                               STACK_VAR(dst_hi));
2013                                 else
2014                                         EMIT2(0x89,
2015                                               add_2reg(0xC0, dst_hi, IA32_EDX));
2016                                 break;
2017                         default:
2018                                 break;
2019                         }
2020                         break;
2021                 /* call */
2022                 case BPF_JMP | BPF_CALL:
2023                 {
2024                         const u8 *r1 = bpf2ia32[BPF_REG_1];
2025                         const u8 *r2 = bpf2ia32[BPF_REG_2];
2026                         const u8 *r3 = bpf2ia32[BPF_REG_3];
2027                         const u8 *r4 = bpf2ia32[BPF_REG_4];
2028                         const u8 *r5 = bpf2ia32[BPF_REG_5];
2029
2030                         if (insn->src_reg == BPF_PSEUDO_CALL)
2031                                 goto notyet;
2032
2033                         func = (u8 *) __bpf_call_base + imm32;
2034                         jmp_offset = func - (image + addrs[i]);
2035
2036                         if (!imm32 || !is_simm32(jmp_offset)) {
2037                                 pr_err("unsupported BPF func %d addr %p image %p\n",
2038                                        imm32, func, image);
2039                                 return -EINVAL;
2040                         }
2041
2042                         /* mov eax,dword ptr [ebp+off] */
2043                         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2044                               STACK_VAR(r1[0]));
2045                         /* mov edx,dword ptr [ebp+off] */
2046                         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2047                               STACK_VAR(r1[1]));
2048
2049                         emit_push_r64(r5, &prog);
2050                         emit_push_r64(r4, &prog);
2051                         emit_push_r64(r3, &prog);
2052                         emit_push_r64(r2, &prog);
2053
2054                         EMIT1_off32(0xE8, jmp_offset + 9);
2055
2056                         /* mov dword ptr [ebp+off],eax */
2057                         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
2058                               STACK_VAR(r0[0]));
2059                         /* mov dword ptr [ebp+off],edx */
2060                         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2061                               STACK_VAR(r0[1]));
2062
2063                         /* add esp,32 */
2064                         EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
2065                         break;
2066                 }
2067                 case BPF_JMP | BPF_TAIL_CALL:
2068                         emit_bpf_tail_call(&prog);
2069                         break;
2070
2071                 /* cond jump */
2072                 case BPF_JMP | BPF_JEQ | BPF_X:
2073                 case BPF_JMP | BPF_JNE | BPF_X:
2074                 case BPF_JMP | BPF_JGT | BPF_X:
2075                 case BPF_JMP | BPF_JLT | BPF_X:
2076                 case BPF_JMP | BPF_JGE | BPF_X:
2077                 case BPF_JMP | BPF_JLE | BPF_X:
2078                 case BPF_JMP | BPF_JSGT | BPF_X:
2079                 case BPF_JMP | BPF_JSLE | BPF_X:
2080                 case BPF_JMP | BPF_JSLT | BPF_X:
2081                 case BPF_JMP | BPF_JSGE | BPF_X: {
2082                         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2083                         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2084                         u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2085                         u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2086
2087                         if (dstk) {
2088                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2089                                       STACK_VAR(dst_lo));
2090                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2091                                       STACK_VAR(dst_hi));
2092                         }
2093
2094                         if (sstk) {
2095                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2096                                       STACK_VAR(src_lo));
2097                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
2098                                       STACK_VAR(src_hi));
2099                         }
2100
2101                         /* cmp dreg_hi,sreg_hi */
2102                         EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2103                         EMIT2(IA32_JNE, 2);
2104                         /* cmp dreg_lo,sreg_lo */
2105                         EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2106                         goto emit_cond_jmp;
2107                 }
2108                 case BPF_JMP | BPF_JSET | BPF_X: {
2109                         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2110                         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2111                         u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2112                         u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2113
2114                         if (dstk) {
2115                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2116                                       STACK_VAR(dst_lo));
2117                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2118                                       STACK_VAR(dst_hi));
2119                         }
2120
2121                         if (sstk) {
2122                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2123                                       STACK_VAR(src_lo));
2124                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX),
2125                                       STACK_VAR(src_hi));
2126                         }
2127                         /* and dreg_lo,sreg_lo */
2128                         EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2129                         /* and dreg_hi,sreg_hi */
2130                         EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2131                         /* or dreg_lo,dreg_hi */
2132                         EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2133                         goto emit_cond_jmp;
2134                 }
2135                 case BPF_JMP | BPF_JSET | BPF_K: {
2136                         u32 hi;
2137                         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2138                         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2139                         u8 sreg_lo = IA32_ECX;
2140                         u8 sreg_hi = IA32_EBX;
2141
2142                         if (dstk) {
2143                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2144                                       STACK_VAR(dst_lo));
2145                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2146                                       STACK_VAR(dst_hi));
2147                         }
2148                         hi = imm32 & (1<<31) ? (u32)~0 : 0;
2149
2150                         /* mov ecx,imm32 */
2151                         EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2152                         /* mov ebx,imm32 */
2153                         EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2154
2155                         /* and dreg_lo,sreg_lo */
2156                         EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2157                         /* and dreg_hi,sreg_hi */
2158                         EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2159                         /* or dreg_lo,dreg_hi */
2160                         EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2161                         goto emit_cond_jmp;
2162                 }
2163                 case BPF_JMP | BPF_JEQ | BPF_K:
2164                 case BPF_JMP | BPF_JNE | BPF_K:
2165                 case BPF_JMP | BPF_JGT | BPF_K:
2166                 case BPF_JMP | BPF_JLT | BPF_K:
2167                 case BPF_JMP | BPF_JGE | BPF_K:
2168                 case BPF_JMP | BPF_JLE | BPF_K:
2169                 case BPF_JMP | BPF_JSGT | BPF_K:
2170                 case BPF_JMP | BPF_JSLE | BPF_K:
2171                 case BPF_JMP | BPF_JSLT | BPF_K:
2172                 case BPF_JMP | BPF_JSGE | BPF_K: {
2173                         u32 hi;
2174                         u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2175                         u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2176                         u8 sreg_lo = IA32_ECX;
2177                         u8 sreg_hi = IA32_EBX;
2178
2179                         if (dstk) {
2180                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2181                                       STACK_VAR(dst_lo));
2182                                 EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2183                                       STACK_VAR(dst_hi));
2184                         }
2185
2186                         hi = imm32 & (1<<31) ? (u32)~0 : 0;
2187                         /* mov ecx,imm32 */
2188                         EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2189                         /* mov ebx,imm32 */
2190                         EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2191
2192                         /* cmp dreg_hi,sreg_hi */
2193                         EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2194                         EMIT2(IA32_JNE, 2);
2195                         /* cmp dreg_lo,sreg_lo */
2196                         EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2197
2198 emit_cond_jmp:          /* Convert BPF opcode to x86 */
2199                         switch (BPF_OP(code)) {
2200                         case BPF_JEQ:
2201                                 jmp_cond = IA32_JE;
2202                                 break;
2203                         case BPF_JSET:
2204                         case BPF_JNE:
2205                                 jmp_cond = IA32_JNE;
2206                                 break;
2207                         case BPF_JGT:
2208                                 /* GT is unsigned '>', JA in x86 */
2209                                 jmp_cond = IA32_JA;
2210                                 break;
2211                         case BPF_JLT:
2212                                 /* LT is unsigned '<', JB in x86 */
2213                                 jmp_cond = IA32_JB;
2214                                 break;
2215                         case BPF_JGE:
2216                                 /* GE is unsigned '>=', JAE in x86 */
2217                                 jmp_cond = IA32_JAE;
2218                                 break;
2219                         case BPF_JLE:
2220                                 /* LE is unsigned '<=', JBE in x86 */
2221                                 jmp_cond = IA32_JBE;
2222                                 break;
2223                         case BPF_JSGT:
2224                                 /* Signed '>', GT in x86 */
2225                                 jmp_cond = IA32_JG;
2226                                 break;
2227                         case BPF_JSLT:
2228                                 /* Signed '<', LT in x86 */
2229                                 jmp_cond = IA32_JL;
2230                                 break;
2231                         case BPF_JSGE:
2232                                 /* Signed '>=', GE in x86 */
2233                                 jmp_cond = IA32_JGE;
2234                                 break;
2235                         case BPF_JSLE:
2236                                 /* Signed '<=', LE in x86 */
2237                                 jmp_cond = IA32_JLE;
2238                                 break;
2239                         default: /* to silence GCC warning */
2240                                 return -EFAULT;
2241                         }
2242                         jmp_offset = addrs[i + insn->off] - addrs[i];
2243                         if (is_imm8(jmp_offset)) {
2244                                 EMIT2(jmp_cond, jmp_offset);
2245                         } else if (is_simm32(jmp_offset)) {
2246                                 EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2247                         } else {
2248                                 pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2249                                 return -EFAULT;
2250                         }
2251
2252                         break;
2253                 }
2254                 case BPF_JMP | BPF_JA:
2255                         if (insn->off == -1)
2256                                 /* -1 jmp instructions will always jump
2257                                  * backwards two bytes. Explicitly handling
2258                                  * this case avoids wasting too many passes
2259                                  * when there are long sequences of replaced
2260                                  * dead code.
2261                                  */
2262                                 jmp_offset = -2;
2263                         else
2264                                 jmp_offset = addrs[i + insn->off] - addrs[i];
2265
2266                         if (!jmp_offset)
2267                                 /* Optimize out nop jumps */
2268                                 break;
2269 emit_jmp:
2270                         if (is_imm8(jmp_offset)) {
2271                                 EMIT2(0xEB, jmp_offset);
2272                         } else if (is_simm32(jmp_offset)) {
2273                                 EMIT1_off32(0xE9, jmp_offset);
2274                         } else {
2275                                 pr_err("jmp gen bug %llx\n", jmp_offset);
2276                                 return -EFAULT;
2277                         }
2278                         break;
2279
2280                 case BPF_LD | BPF_ABS | BPF_W:
2281                 case BPF_LD | BPF_ABS | BPF_H:
2282                 case BPF_LD | BPF_ABS | BPF_B:
2283                 case BPF_LD | BPF_IND | BPF_W:
2284                 case BPF_LD | BPF_IND | BPF_H:
2285                 case BPF_LD | BPF_IND | BPF_B:
2286                 {
2287                         int size;
2288                         const u8 *r6 = bpf2ia32[BPF_REG_6];
2289
2290                         /* Setting up first argument */
2291                         /* mov eax,dword ptr [ebp+off] */
2292                         EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2293                               STACK_VAR(r6[0]));
2294
2295                         /* Setting up second argument */
2296                         if (BPF_MODE(code) == BPF_ABS) {
2297                                 /* mov %edx, imm32 */
2298                                 EMIT1_off32(0xBA, imm32);
2299                         } else {
2300                                 if (sstk)
2301                                         /* mov edx,dword ptr [ebp+off] */
2302                                         EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
2303                                                              IA32_EDX),
2304                                               STACK_VAR(src_lo));
2305                                 else
2306                                         /* mov edx,src_lo */
2307                                         EMIT2(0x8B, add_2reg(0xC0, src_lo,
2308                                                              IA32_EDX));
2309                                 if (imm32) {
2310                                         if (is_imm8(imm32))
2311                                                 /* add %edx,imm8 */
2312                                                 EMIT3(0x83, 0xC2, imm32);
2313                                         else
2314                                                 /* add %edx,imm32 */
2315                                                 EMIT2_off32(0x81, 0xC2, imm32);
2316                                 }
2317                         }
2318
2319                         /* Setting up third argument */
2320                         switch (BPF_SIZE(code)) {
2321                         case BPF_W:
2322                                 size = 4;
2323                                 break;
2324                         case BPF_H:
2325                                 size = 2;
2326                                 break;
2327                         case BPF_B:
2328                                 size = 1;
2329                                 break;
2330                         default:
2331                                 return -EINVAL;
2332                         }
2333                         /* mov ecx,val */
2334                         EMIT2(0xB1, size);
2335                         /* movzx ecx,ecx */
2336                         EMIT3(0x0F, 0xB6, add_2reg(0xC0, IA32_ECX, IA32_ECX));
2337
2338                         /* mov ebx,ebp */
2339                         EMIT2(0x8B, add_2reg(0xC0, IA32_EBP, IA32_EBX));
2340                         /* add %ebx,imm8 */
2341                         EMIT3(0x83, add_1reg(0xC0, IA32_EBX), SKB_BUFFER);
2342                         /* push ebx */
2343                         EMIT1(0x53);
2344
2345                         /* Setting up function pointer to call */
2346                         /* mov ebx,imm32*/
2347                         EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX),
2348                                     (unsigned int)bpf_load_pointer);
2349
2350                         EMIT2(0xFF, add_1reg(0xD0, IA32_EBX));
2351                         /* add %esp,4 */
2352                         EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 4);
2353                         /* xor edx,edx */
2354                         EMIT2(0x33, add_2reg(0xC0, IA32_EDX, IA32_EDX));
2355
2356                         /* mov dword ptr [ebp+off],eax */
2357                         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2358                               STACK_VAR(r0[0]));
2359                         /* mov dword ptr [ebp+off],edx */
2360                         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2361                               STACK_VAR(r0[1]));
2362
2363                         /*
2364                          * Check if return address is NULL or not.
2365                          * If NULL then jump to epilogue else continue
2366                          * to load the value from retn address
2367                          */
2368                         EMIT3(0x83, add_1reg(0xF8, IA32_EAX), 0);
2369                         jmp_offset = ctx->cleanup_addr - addrs[i];
2370
2371                         switch (BPF_SIZE(code)) {
2372                         case BPF_W:
2373                                 jmp_offset += 7;
2374                                 break;
2375                         case BPF_H:
2376                                 jmp_offset += 10;
2377                                 break;
2378                         case BPF_B:
2379                                 jmp_offset += 6;
2380                                 break;
2381                         }
2382
2383                         EMIT2_off32(0x0F, IA32_JE + 0x10, jmp_offset);
2384                         /* Load value from the address */
2385                         switch (BPF_SIZE(code)) {
2386                         case BPF_W:
2387                                 /* mov eax,[eax] */
2388                                 EMIT2(0x8B, 0x0);
2389                                 /* Emit 'bswap eax' */
2390                                 EMIT2(0x0F, add_1reg(0xC8, IA32_EAX));
2391                                 break;
2392                         case BPF_H:
2393                                 EMIT3(0x0F, 0xB7, 0x0);
2394                                 EMIT1(0x66);
2395                                 EMIT3(0xC1, add_1reg(0xC8, IA32_EAX), 8);
2396                                 break;
2397                         case BPF_B:
2398                                 EMIT3(0x0F, 0xB6, 0x0);
2399                                 break;
2400                         }
2401
2402                         /* mov dword ptr [ebp+off],eax */
2403                         EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
2404                               STACK_VAR(r0[0]));
2405                         break;
2406                 }
2407                 /* STX XADD: lock *(u32 *)(dst + off) += src */
2408                 case BPF_STX | BPF_XADD | BPF_W:
2409                 /* STX XADD: lock *(u64 *)(dst + off) += src */
2410                 case BPF_STX | BPF_XADD | BPF_DW:
2411                         goto notyet;
2412                 case BPF_JMP | BPF_EXIT:
2413                         if (seen_exit) {
2414                                 jmp_offset = ctx->cleanup_addr - addrs[i];
2415                                 goto emit_jmp;
2416                         }
2417                         seen_exit = true;
2418                         /* Update cleanup_addr */
2419                         ctx->cleanup_addr = proglen;
2420                         emit_epilogue(&prog, bpf_prog->aux->stack_depth);
2421                         break;
2422 notyet:
2423                         pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2424                         return -EFAULT;
2425                 default:
2426                         /*
2427                          * This error will be seen if new instruction was added
2428                          * to interpreter, but not to JIT or if there is junk in
2429                          * bpf_prog
2430                          */
2431                         pr_err("bpf_jit: unknown opcode %02x\n", code);
2432                         return -EINVAL;
2433                 }
2434
2435                 ilen = prog - temp;
2436                 if (ilen > BPF_MAX_INSN_SIZE) {
2437                         pr_err("bpf_jit: fatal insn size error\n");
2438                         return -EFAULT;
2439                 }
2440
2441                 if (image) {
2442                         if (unlikely(proglen + ilen > oldproglen)) {
2443                                 pr_err("bpf_jit: fatal error\n");
2444                                 return -EFAULT;
2445                         }
2446                         memcpy(image + proglen, temp, ilen);
2447                 }
2448                 proglen += ilen;
2449                 addrs[i] = proglen;
2450                 prog = temp;
2451         }
2452         return proglen;
2453 }
2454
2455 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2456 {
2457         struct bpf_binary_header *header = NULL;
2458         struct bpf_prog *tmp, *orig_prog = prog;
2459         int proglen, oldproglen = 0;
2460         struct jit_context ctx = {};
2461         bool tmp_blinded = false;
2462         u8 *image = NULL;
2463         int *addrs;
2464         int pass;
2465         int i;
2466
2467         if (!prog->jit_requested)
2468                 return orig_prog;
2469
2470         tmp = bpf_jit_blind_constants(prog);
2471         /*
2472          * If blinding was requested and we failed during blinding,
2473          * we must fall back to the interpreter.
2474          */
2475         if (IS_ERR(tmp))
2476                 return orig_prog;
2477         if (tmp != prog) {
2478                 tmp_blinded = true;
2479                 prog = tmp;
2480         }
2481
2482         addrs = kmalloc(prog->len * sizeof(*addrs), GFP_KERNEL);
2483         if (!addrs) {
2484                 prog = orig_prog;
2485                 goto out;
2486         }
2487
2488         /*
2489          * Before first pass, make a rough estimation of addrs[]
2490          * each BPF instruction is translated to less than 64 bytes
2491          */
2492         for (proglen = 0, i = 0; i < prog->len; i++) {
2493                 proglen += 64;
2494                 addrs[i] = proglen;
2495         }
2496         ctx.cleanup_addr = proglen;
2497
2498         /*
2499          * JITed image shrinks with every pass and the loop iterates
2500          * until the image stops shrinking. Very large BPF programs
2501          * may converge on the last pass. In such case do one more
2502          * pass to emit the final image.
2503          */
2504         for (pass = 0; pass < 20 || image; pass++) {
2505                 proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
2506                 if (proglen <= 0) {
2507 out_image:
2508                         image = NULL;
2509                         if (header)
2510                                 bpf_jit_binary_free(header);
2511                         prog = orig_prog;
2512                         goto out_addrs;
2513                 }
2514                 if (image) {
2515                         if (proglen != oldproglen) {
2516                                 pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2517                                        proglen, oldproglen);
2518                                 goto out_image;
2519                         }
2520                         break;
2521                 }
2522                 if (proglen == oldproglen) {
2523                         header = bpf_jit_binary_alloc(proglen, &image,
2524                                                       1, jit_fill_hole);
2525                         if (!header) {
2526                                 prog = orig_prog;
2527                                 goto out_addrs;
2528                         }
2529                 }
2530                 oldproglen = proglen;
2531                 cond_resched();
2532         }
2533
2534         if (bpf_jit_enable > 1)
2535                 bpf_jit_dump(prog->len, proglen, pass + 1, image);
2536
2537         if (image) {
2538                 bpf_jit_binary_lock_ro(header);
2539                 prog->bpf_func = (void *)image;
2540                 prog->jited = 1;
2541                 prog->jited_len = proglen;
2542         } else {
2543                 prog = orig_prog;
2544         }
2545
2546 out_addrs:
2547         kfree(addrs);
2548 out:
2549         if (tmp_blinded)
2550                 bpf_jit_prog_release_other(prog, prog == orig_prog ?
2551                                            tmp : orig_prog);
2552         return prog;
2553 }