sparc64: Add eBPF JIT.
[linux-block.git] / arch / sparc / net / bpf_jit_comp_32.c
1 #include <linux/moduleloader.h>
2 #include <linux/workqueue.h>
3 #include <linux/netdevice.h>
4 #include <linux/filter.h>
5 #include <linux/cache.h>
6 #include <linux/if_vlan.h>
7
8 #include <asm/cacheflush.h>
9 #include <asm/ptrace.h>
10
11 #include "bpf_jit_32.h"
12
13 int bpf_jit_enable __read_mostly;
14
15 static inline bool is_simm13(unsigned int value)
16 {
17         return value + 0x1000 < 0x2000;
18 }
19
20 #define SEEN_DATAREF 1 /* might call external helpers */
21 #define SEEN_XREG    2 /* ebx is used */
22 #define SEEN_MEM     4 /* use mem[] for temporary storage */
23
24 #define S13(X)          ((X) & 0x1fff)
25 #define IMMED           0x00002000
26 #define RD(X)           ((X) << 25)
27 #define RS1(X)          ((X) << 14)
28 #define RS2(X)          ((X))
29 #define OP(X)           ((X) << 30)
30 #define OP2(X)          ((X) << 22)
31 #define OP3(X)          ((X) << 19)
32 #define COND(X)         ((X) << 25)
33 #define F1(X)           OP(X)
34 #define F2(X, Y)        (OP(X) | OP2(Y))
35 #define F3(X, Y)        (OP(X) | OP3(Y))
36
37 #define CONDN           COND(0x0)
38 #define CONDE           COND(0x1)
39 #define CONDLE          COND(0x2)
40 #define CONDL           COND(0x3)
41 #define CONDLEU         COND(0x4)
42 #define CONDCS          COND(0x5)
43 #define CONDNEG         COND(0x6)
44 #define CONDVC          COND(0x7)
45 #define CONDA           COND(0x8)
46 #define CONDNE          COND(0x9)
47 #define CONDG           COND(0xa)
48 #define CONDGE          COND(0xb)
49 #define CONDGU          COND(0xc)
50 #define CONDCC          COND(0xd)
51 #define CONDPOS         COND(0xe)
52 #define CONDVS          COND(0xf)
53
54 #define CONDGEU         CONDCC
55 #define CONDLU          CONDCS
56
57 #define WDISP22(X)      (((X) >> 2) & 0x3fffff)
58
59 #define BA              (F2(0, 2) | CONDA)
60 #define BGU             (F2(0, 2) | CONDGU)
61 #define BLEU            (F2(0, 2) | CONDLEU)
62 #define BGEU            (F2(0, 2) | CONDGEU)
63 #define BLU             (F2(0, 2) | CONDLU)
64 #define BE              (F2(0, 2) | CONDE)
65 #define BNE             (F2(0, 2) | CONDNE)
66
67 #define BE_PTR          BE
68
69 #define SETHI(K, REG)   \
70         (F2(0, 0x4) | RD(REG) | (((K) >> 10) & 0x3fffff))
71 #define OR_LO(K, REG)   \
72         (F3(2, 0x02) | IMMED | RS1(REG) | ((K) & 0x3ff) | RD(REG))
73
74 #define ADD             F3(2, 0x00)
75 #define AND             F3(2, 0x01)
76 #define ANDCC           F3(2, 0x11)
77 #define OR              F3(2, 0x02)
78 #define XOR             F3(2, 0x03)
79 #define SUB             F3(2, 0x04)
80 #define SUBCC           F3(2, 0x14)
81 #define MUL             F3(2, 0x0a)     /* umul */
82 #define DIV             F3(2, 0x0e)     /* udiv */
83 #define SLL             F3(2, 0x25)
84 #define SRL             F3(2, 0x26)
85 #define JMPL            F3(2, 0x38)
86 #define CALL            F1(1)
87 #define BR              F2(0, 0x01)
88 #define RD_Y            F3(2, 0x28)
89 #define WR_Y            F3(2, 0x30)
90
91 #define LD32            F3(3, 0x00)
92 #define LD8             F3(3, 0x01)
93 #define LD16            F3(3, 0x02)
94 #define LD64            F3(3, 0x0b)
95 #define ST32            F3(3, 0x04)
96
97 #define LDPTR           LD32
98 #define BASE_STACKFRAME 96
99
100 #define LD32I           (LD32 | IMMED)
101 #define LD8I            (LD8 | IMMED)
102 #define LD16I           (LD16 | IMMED)
103 #define LD64I           (LD64 | IMMED)
104 #define LDPTRI          (LDPTR | IMMED)
105 #define ST32I           (ST32 | IMMED)
106
107 #define emit_nop()              \
108 do {                            \
109         *prog++ = SETHI(0, G0); \
110 } while (0)
111
112 #define emit_neg()                                      \
113 do {    /* sub %g0, r_A, r_A */                         \
114         *prog++ = SUB | RS1(G0) | RS2(r_A) | RD(r_A);   \
115 } while (0)
116
117 #define emit_reg_move(FROM, TO)                         \
118 do {    /* or %g0, FROM, TO */                          \
119         *prog++ = OR | RS1(G0) | RS2(FROM) | RD(TO);    \
120 } while (0)
121
122 #define emit_clear(REG)                                 \
123 do {    /* or %g0, %g0, REG */                          \
124         *prog++ = OR | RS1(G0) | RS2(G0) | RD(REG);     \
125 } while (0)
126
127 #define emit_set_const(K, REG)                                  \
128 do {    /* sethi %hi(K), REG */                                 \
129         *prog++ = SETHI(K, REG);                                \
130         /* or REG, %lo(K), REG */                               \
131         *prog++ = OR_LO(K, REG);                                \
132 } while (0)
133
134         /* Emit
135          *
136          *      OP      r_A, r_X, r_A
137          */
138 #define emit_alu_X(OPCODE)                                      \
139 do {                                                            \
140         seen |= SEEN_XREG;                                      \
141         *prog++ = OPCODE | RS1(r_A) | RS2(r_X) | RD(r_A);       \
142 } while (0)
143
144         /* Emit either:
145          *
146          *      OP      r_A, K, r_A
147          *
148          * or
149          *
150          *      sethi   %hi(K), r_TMP
151          *      or      r_TMP, %lo(K), r_TMP
152          *      OP      r_A, r_TMP, r_A
153          *
154          * depending upon whether K fits in a signed 13-bit
155          * immediate instruction field.  Emit nothing if K
156          * is zero.
157          */
158 #define emit_alu_K(OPCODE, K)                                   \
159 do {                                                            \
160         if (K || OPCODE == AND || OPCODE == MUL) {              \
161                 unsigned int _insn = OPCODE;                    \
162                 _insn |= RS1(r_A) | RD(r_A);                    \
163                 if (is_simm13(K)) {                             \
164                         *prog++ = _insn | IMMED | S13(K);       \
165                 } else {                                        \
166                         emit_set_const(K, r_TMP);               \
167                         *prog++ = _insn | RS2(r_TMP);           \
168                 }                                               \
169         }                                                       \
170 } while (0)
171
172 #define emit_loadimm(K, DEST)                                           \
173 do {                                                                    \
174         if (is_simm13(K)) {                                             \
175                 /* or %g0, K, DEST */                                   \
176                 *prog++ = OR | IMMED | RS1(G0) | S13(K) | RD(DEST);     \
177         } else {                                                        \
178                 emit_set_const(K, DEST);                                \
179         }                                                               \
180 } while (0)
181
182 #define emit_loadptr(BASE, STRUCT, FIELD, DEST)                         \
183 do {    unsigned int _off = offsetof(STRUCT, FIELD);                    \
184         BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(void *));    \
185         *prog++ = LDPTRI | RS1(BASE) | S13(_off) | RD(DEST);            \
186 } while (0)
187
188 #define emit_load32(BASE, STRUCT, FIELD, DEST)                          \
189 do {    unsigned int _off = offsetof(STRUCT, FIELD);                    \
190         BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u32));       \
191         *prog++ = LD32I | RS1(BASE) | S13(_off) | RD(DEST);             \
192 } while (0)
193
194 #define emit_load16(BASE, STRUCT, FIELD, DEST)                          \
195 do {    unsigned int _off = offsetof(STRUCT, FIELD);                    \
196         BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u16));       \
197         *prog++ = LD16I | RS1(BASE) | S13(_off) | RD(DEST);             \
198 } while (0)
199
200 #define __emit_load8(BASE, STRUCT, FIELD, DEST)                         \
201 do {    unsigned int _off = offsetof(STRUCT, FIELD);                    \
202         *prog++ = LD8I | RS1(BASE) | S13(_off) | RD(DEST);              \
203 } while (0)
204
205 #define emit_load8(BASE, STRUCT, FIELD, DEST)                           \
206 do {    BUILD_BUG_ON(FIELD_SIZEOF(STRUCT, FIELD) != sizeof(u8));        \
207         __emit_load8(BASE, STRUCT, FIELD, DEST);                        \
208 } while (0)
209
210 #define BIAS (-4)
211
212 #define emit_ldmem(OFF, DEST)                                           \
213 do {    *prog++ = LD32I | RS1(SP) | S13(BIAS - (OFF)) | RD(DEST);       \
214 } while (0)
215
216 #define emit_stmem(OFF, SRC)                                            \
217 do {    *prog++ = ST32I | RS1(SP) | S13(BIAS - (OFF)) | RD(SRC);        \
218 } while (0)
219
220 #ifdef CONFIG_SMP
221 #define emit_load_cpu(REG)                                              \
222         emit_load32(G6, struct thread_info, cpu, REG)
223 #else
224 #define emit_load_cpu(REG)      emit_clear(REG)
225 #endif
226
227 #define emit_skb_loadptr(FIELD, DEST) \
228         emit_loadptr(r_SKB, struct sk_buff, FIELD, DEST)
229 #define emit_skb_load32(FIELD, DEST) \
230         emit_load32(r_SKB, struct sk_buff, FIELD, DEST)
231 #define emit_skb_load16(FIELD, DEST) \
232         emit_load16(r_SKB, struct sk_buff, FIELD, DEST)
233 #define __emit_skb_load8(FIELD, DEST) \
234         __emit_load8(r_SKB, struct sk_buff, FIELD, DEST)
235 #define emit_skb_load8(FIELD, DEST) \
236         emit_load8(r_SKB, struct sk_buff, FIELD, DEST)
237
238 #define emit_jmpl(BASE, IMM_OFF, LREG) \
239         *prog++ = (JMPL | IMMED | RS1(BASE) | S13(IMM_OFF) | RD(LREG))
240
241 #define emit_call(FUNC)                                 \
242 do {    void *_here = image + addrs[i] - 8;             \
243         unsigned int _off = (void *)(FUNC) - _here;     \
244         *prog++ = CALL | (((_off) >> 2) & 0x3fffffff);  \
245         emit_nop();                                     \
246 } while (0)
247
248 #define emit_branch(BR_OPC, DEST)                       \
249 do {    unsigned int _here = addrs[i] - 8;              \
250         *prog++ = BR_OPC | WDISP22((DEST) - _here);     \
251 } while (0)
252
253 #define emit_branch_off(BR_OPC, OFF)                    \
254 do {    *prog++ = BR_OPC | WDISP22(OFF);                \
255 } while (0)
256
257 #define emit_jump(DEST)         emit_branch(BA, DEST)
258
259 #define emit_read_y(REG)        *prog++ = RD_Y | RD(REG)
260 #define emit_write_y(REG)       *prog++ = WR_Y | IMMED | RS1(REG) | S13(0)
261
262 #define emit_cmp(R1, R2) \
263         *prog++ = (SUBCC | RS1(R1) | RS2(R2) | RD(G0))
264
265 #define emit_cmpi(R1, IMM) \
266         *prog++ = (SUBCC | IMMED | RS1(R1) | S13(IMM) | RD(G0));
267
268 #define emit_btst(R1, R2) \
269         *prog++ = (ANDCC | RS1(R1) | RS2(R2) | RD(G0))
270
271 #define emit_btsti(R1, IMM) \
272         *prog++ = (ANDCC | IMMED | RS1(R1) | S13(IMM) | RD(G0));
273
274 #define emit_sub(R1, R2, R3) \
275         *prog++ = (SUB | RS1(R1) | RS2(R2) | RD(R3))
276
277 #define emit_subi(R1, IMM, R3) \
278         *prog++ = (SUB | IMMED | RS1(R1) | S13(IMM) | RD(R3))
279
280 #define emit_add(R1, R2, R3) \
281         *prog++ = (ADD | RS1(R1) | RS2(R2) | RD(R3))
282
283 #define emit_addi(R1, IMM, R3) \
284         *prog++ = (ADD | IMMED | RS1(R1) | S13(IMM) | RD(R3))
285
286 #define emit_and(R1, R2, R3) \
287         *prog++ = (AND | RS1(R1) | RS2(R2) | RD(R3))
288
289 #define emit_andi(R1, IMM, R3) \
290         *prog++ = (AND | IMMED | RS1(R1) | S13(IMM) | RD(R3))
291
292 #define emit_alloc_stack(SZ) \
293         *prog++ = (SUB | IMMED | RS1(SP) | S13(SZ) | RD(SP))
294
295 #define emit_release_stack(SZ) \
296         *prog++ = (ADD | IMMED | RS1(SP) | S13(SZ) | RD(SP))
297
298 /* A note about branch offset calculations.  The addrs[] array,
299  * indexed by BPF instruction, records the address after all the
300  * sparc instructions emitted for that BPF instruction.
301  *
302  * The most common case is to emit a branch at the end of such
303  * a code sequence.  So this would be two instructions, the
304  * branch and it's delay slot.
305  *
306  * Therefore by default the branch emitters calculate the branch
307  * offset field as:
308  *
309  *      destination - (addrs[i] - 8)
310  *
311  * This "addrs[i] - 8" is the address of the branch itself or
312  * what "." would be in assembler notation.  The "8" part is
313  * how we take into consideration the branch and it's delay
314  * slot mentioned above.
315  *
316  * Sometimes we need to emit a branch earlier in the code
317  * sequence.  And in these situations we adjust "destination"
318  * to accommodate this difference.  For example, if we needed
319  * to emit a branch (and it's delay slot) right before the
320  * final instruction emitted for a BPF opcode, we'd use
321  * "destination + 4" instead of just plain "destination" above.
322  *
323  * This is why you see all of these funny emit_branch() and
324  * emit_jump() calls with adjusted offsets.
325  */
326
327 void bpf_jit_compile(struct bpf_prog *fp)
328 {
329         unsigned int cleanup_addr, proglen, oldproglen = 0;
330         u32 temp[8], *prog, *func, seen = 0, pass;
331         const struct sock_filter *filter = fp->insns;
332         int i, flen = fp->len, pc_ret0 = -1;
333         unsigned int *addrs;
334         void *image;
335
336         if (!bpf_jit_enable)
337                 return;
338
339         addrs = kmalloc(flen * sizeof(*addrs), GFP_KERNEL);
340         if (addrs == NULL)
341                 return;
342
343         /* Before first pass, make a rough estimation of addrs[]
344          * each bpf instruction is translated to less than 64 bytes
345          */
346         for (proglen = 0, i = 0; i < flen; i++) {
347                 proglen += 64;
348                 addrs[i] = proglen;
349         }
350         cleanup_addr = proglen; /* epilogue address */
351         image = NULL;
352         for (pass = 0; pass < 10; pass++) {
353                 u8 seen_or_pass0 = (pass == 0) ? (SEEN_XREG | SEEN_DATAREF | SEEN_MEM) : seen;
354
355                 /* no prologue/epilogue for trivial filters (RET something) */
356                 proglen = 0;
357                 prog = temp;
358
359                 /* Prologue */
360                 if (seen_or_pass0) {
361                         if (seen_or_pass0 & SEEN_MEM) {
362                                 unsigned int sz = BASE_STACKFRAME;
363                                 sz += BPF_MEMWORDS * sizeof(u32);
364                                 emit_alloc_stack(sz);
365                         }
366
367                         /* Make sure we dont leek kernel memory. */
368                         if (seen_or_pass0 & SEEN_XREG)
369                                 emit_clear(r_X);
370
371                         /* If this filter needs to access skb data,
372                          * load %o4 and %o5 with:
373                          *  %o4 = skb->len - skb->data_len
374                          *  %o5 = skb->data
375                          * And also back up %o7 into r_saved_O7 so we can
376                          * invoke the stubs using 'call'.
377                          */
378                         if (seen_or_pass0 & SEEN_DATAREF) {
379                                 emit_load32(r_SKB, struct sk_buff, len, r_HEADLEN);
380                                 emit_load32(r_SKB, struct sk_buff, data_len, r_TMP);
381                                 emit_sub(r_HEADLEN, r_TMP, r_HEADLEN);
382                                 emit_loadptr(r_SKB, struct sk_buff, data, r_SKB_DATA);
383                         }
384                 }
385                 emit_reg_move(O7, r_saved_O7);
386
387                 /* Make sure we dont leak kernel information to the user. */
388                 if (bpf_needs_clear_a(&filter[0]))
389                         emit_clear(r_A); /* A = 0 */
390
391                 for (i = 0; i < flen; i++) {
392                         unsigned int K = filter[i].k;
393                         unsigned int t_offset;
394                         unsigned int f_offset;
395                         u32 t_op, f_op;
396                         u16 code = bpf_anc_helper(&filter[i]);
397                         int ilen;
398
399                         switch (code) {
400                         case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
401                                 emit_alu_X(ADD);
402                                 break;
403                         case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
404                                 emit_alu_K(ADD, K);
405                                 break;
406                         case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
407                                 emit_alu_X(SUB);
408                                 break;
409                         case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
410                                 emit_alu_K(SUB, K);
411                                 break;
412                         case BPF_ALU | BPF_AND | BPF_X: /* A &= X */
413                                 emit_alu_X(AND);
414                                 break;
415                         case BPF_ALU | BPF_AND | BPF_K: /* A &= K */
416                                 emit_alu_K(AND, K);
417                                 break;
418                         case BPF_ALU | BPF_OR | BPF_X:  /* A |= X */
419                                 emit_alu_X(OR);
420                                 break;
421                         case BPF_ALU | BPF_OR | BPF_K:  /* A |= K */
422                                 emit_alu_K(OR, K);
423                                 break;
424                         case BPF_ANC | SKF_AD_ALU_XOR_X: /* A ^= X; */
425                         case BPF_ALU | BPF_XOR | BPF_X:
426                                 emit_alu_X(XOR);
427                                 break;
428                         case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
429                                 emit_alu_K(XOR, K);
430                                 break;
431                         case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X */
432                                 emit_alu_X(SLL);
433                                 break;
434                         case BPF_ALU | BPF_LSH | BPF_K: /* A <<= K */
435                                 emit_alu_K(SLL, K);
436                                 break;
437                         case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X */
438                                 emit_alu_X(SRL);
439                                 break;
440                         case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K */
441                                 emit_alu_K(SRL, K);
442                                 break;
443                         case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
444                                 emit_alu_X(MUL);
445                                 break;
446                         case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
447                                 emit_alu_K(MUL, K);
448                                 break;
449                         case BPF_ALU | BPF_DIV | BPF_K: /* A /= K with K != 0*/
450                                 if (K == 1)
451                                         break;
452                                 emit_write_y(G0);
453                                 /* The Sparc v8 architecture requires
454                                  * three instructions between a %y
455                                  * register write and the first use.
456                                  */
457                                 emit_nop();
458                                 emit_nop();
459                                 emit_nop();
460                                 emit_alu_K(DIV, K);
461                                 break;
462                         case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
463                                 emit_cmpi(r_X, 0);
464                                 if (pc_ret0 > 0) {
465                                         t_offset = addrs[pc_ret0 - 1];
466                                         emit_branch(BE, t_offset + 20);
467                                         emit_nop(); /* delay slot */
468                                 } else {
469                                         emit_branch_off(BNE, 16);
470                                         emit_nop();
471                                         emit_jump(cleanup_addr + 20);
472                                         emit_clear(r_A);
473                                 }
474                                 emit_write_y(G0);
475                                 /* The Sparc v8 architecture requires
476                                  * three instructions between a %y
477                                  * register write and the first use.
478                                  */
479                                 emit_nop();
480                                 emit_nop();
481                                 emit_nop();
482                                 emit_alu_X(DIV);
483                                 break;
484                         case BPF_ALU | BPF_NEG:
485                                 emit_neg();
486                                 break;
487                         case BPF_RET | BPF_K:
488                                 if (!K) {
489                                         if (pc_ret0 == -1)
490                                                 pc_ret0 = i;
491                                         emit_clear(r_A);
492                                 } else {
493                                         emit_loadimm(K, r_A);
494                                 }
495                                 /* Fallthrough */
496                         case BPF_RET | BPF_A:
497                                 if (seen_or_pass0) {
498                                         if (i != flen - 1) {
499                                                 emit_jump(cleanup_addr);
500                                                 emit_nop();
501                                                 break;
502                                         }
503                                         if (seen_or_pass0 & SEEN_MEM) {
504                                                 unsigned int sz = BASE_STACKFRAME;
505                                                 sz += BPF_MEMWORDS * sizeof(u32);
506                                                 emit_release_stack(sz);
507                                         }
508                                 }
509                                 /* jmpl %r_saved_O7 + 8, %g0 */
510                                 emit_jmpl(r_saved_O7, 8, G0);
511                                 emit_reg_move(r_A, O0); /* delay slot */
512                                 break;
513                         case BPF_MISC | BPF_TAX:
514                                 seen |= SEEN_XREG;
515                                 emit_reg_move(r_A, r_X);
516                                 break;
517                         case BPF_MISC | BPF_TXA:
518                                 seen |= SEEN_XREG;
519                                 emit_reg_move(r_X, r_A);
520                                 break;
521                         case BPF_ANC | SKF_AD_CPU:
522                                 emit_load_cpu(r_A);
523                                 break;
524                         case BPF_ANC | SKF_AD_PROTOCOL:
525                                 emit_skb_load16(protocol, r_A);
526                                 break;
527                         case BPF_ANC | SKF_AD_PKTTYPE:
528                                 __emit_skb_load8(__pkt_type_offset, r_A);
529                                 emit_andi(r_A, PKT_TYPE_MAX, r_A);
530                                 emit_alu_K(SRL, 5);
531                                 break;
532                         case BPF_ANC | SKF_AD_IFINDEX:
533                                 emit_skb_loadptr(dev, r_A);
534                                 emit_cmpi(r_A, 0);
535                                 emit_branch(BE_PTR, cleanup_addr + 4);
536                                 emit_nop();
537                                 emit_load32(r_A, struct net_device, ifindex, r_A);
538                                 break;
539                         case BPF_ANC | SKF_AD_MARK:
540                                 emit_skb_load32(mark, r_A);
541                                 break;
542                         case BPF_ANC | SKF_AD_QUEUE:
543                                 emit_skb_load16(queue_mapping, r_A);
544                                 break;
545                         case BPF_ANC | SKF_AD_HATYPE:
546                                 emit_skb_loadptr(dev, r_A);
547                                 emit_cmpi(r_A, 0);
548                                 emit_branch(BE_PTR, cleanup_addr + 4);
549                                 emit_nop();
550                                 emit_load16(r_A, struct net_device, type, r_A);
551                                 break;
552                         case BPF_ANC | SKF_AD_RXHASH:
553                                 emit_skb_load32(hash, r_A);
554                                 break;
555                         case BPF_ANC | SKF_AD_VLAN_TAG:
556                         case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
557                                 emit_skb_load16(vlan_tci, r_A);
558                                 if (code != (BPF_ANC | SKF_AD_VLAN_TAG)) {
559                                         emit_alu_K(SRL, 12);
560                                         emit_andi(r_A, 1, r_A);
561                                 } else {
562                                         emit_loadimm(~VLAN_TAG_PRESENT, r_TMP);
563                                         emit_and(r_A, r_TMP, r_A);
564                                 }
565                                 break;
566                         case BPF_LD | BPF_W | BPF_LEN:
567                                 emit_skb_load32(len, r_A);
568                                 break;
569                         case BPF_LDX | BPF_W | BPF_LEN:
570                                 emit_skb_load32(len, r_X);
571                                 break;
572                         case BPF_LD | BPF_IMM:
573                                 emit_loadimm(K, r_A);
574                                 break;
575                         case BPF_LDX | BPF_IMM:
576                                 emit_loadimm(K, r_X);
577                                 break;
578                         case BPF_LD | BPF_MEM:
579                                 seen |= SEEN_MEM;
580                                 emit_ldmem(K * 4, r_A);
581                                 break;
582                         case BPF_LDX | BPF_MEM:
583                                 seen |= SEEN_MEM | SEEN_XREG;
584                                 emit_ldmem(K * 4, r_X);
585                                 break;
586                         case BPF_ST:
587                                 seen |= SEEN_MEM;
588                                 emit_stmem(K * 4, r_A);
589                                 break;
590                         case BPF_STX:
591                                 seen |= SEEN_MEM | SEEN_XREG;
592                                 emit_stmem(K * 4, r_X);
593                                 break;
594
595 #define CHOOSE_LOAD_FUNC(K, func) \
596         ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
597
598                         case BPF_LD | BPF_W | BPF_ABS:
599                                 func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_word);
600 common_load:                    seen |= SEEN_DATAREF;
601                                 emit_loadimm(K, r_OFF);
602                                 emit_call(func);
603                                 break;
604                         case BPF_LD | BPF_H | BPF_ABS:
605                                 func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_half);
606                                 goto common_load;
607                         case BPF_LD | BPF_B | BPF_ABS:
608                                 func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte);
609                                 goto common_load;
610                         case BPF_LDX | BPF_B | BPF_MSH:
611                                 func = CHOOSE_LOAD_FUNC(K, bpf_jit_load_byte_msh);
612                                 goto common_load;
613                         case BPF_LD | BPF_W | BPF_IND:
614                                 func = bpf_jit_load_word;
615 common_load_ind:                seen |= SEEN_DATAREF | SEEN_XREG;
616                                 if (K) {
617                                         if (is_simm13(K)) {
618                                                 emit_addi(r_X, K, r_OFF);
619                                         } else {
620                                                 emit_loadimm(K, r_TMP);
621                                                 emit_add(r_X, r_TMP, r_OFF);
622                                         }
623                                 } else {
624                                         emit_reg_move(r_X, r_OFF);
625                                 }
626                                 emit_call(func);
627                                 break;
628                         case BPF_LD | BPF_H | BPF_IND:
629                                 func = bpf_jit_load_half;
630                                 goto common_load_ind;
631                         case BPF_LD | BPF_B | BPF_IND:
632                                 func = bpf_jit_load_byte;
633                                 goto common_load_ind;
634                         case BPF_JMP | BPF_JA:
635                                 emit_jump(addrs[i + K]);
636                                 emit_nop();
637                                 break;
638
639 #define COND_SEL(CODE, TOP, FOP)        \
640         case CODE:                      \
641                 t_op = TOP;             \
642                 f_op = FOP;             \
643                 goto cond_branch
644
645                         COND_SEL(BPF_JMP | BPF_JGT | BPF_K, BGU, BLEU);
646                         COND_SEL(BPF_JMP | BPF_JGE | BPF_K, BGEU, BLU);
647                         COND_SEL(BPF_JMP | BPF_JEQ | BPF_K, BE, BNE);
648                         COND_SEL(BPF_JMP | BPF_JSET | BPF_K, BNE, BE);
649                         COND_SEL(BPF_JMP | BPF_JGT | BPF_X, BGU, BLEU);
650                         COND_SEL(BPF_JMP | BPF_JGE | BPF_X, BGEU, BLU);
651                         COND_SEL(BPF_JMP | BPF_JEQ | BPF_X, BE, BNE);
652                         COND_SEL(BPF_JMP | BPF_JSET | BPF_X, BNE, BE);
653
654 cond_branch:                    f_offset = addrs[i + filter[i].jf];
655                                 t_offset = addrs[i + filter[i].jt];
656
657                                 /* same targets, can avoid doing the test :) */
658                                 if (filter[i].jt == filter[i].jf) {
659                                         emit_jump(t_offset);
660                                         emit_nop();
661                                         break;
662                                 }
663
664                                 switch (code) {
665                                 case BPF_JMP | BPF_JGT | BPF_X:
666                                 case BPF_JMP | BPF_JGE | BPF_X:
667                                 case BPF_JMP | BPF_JEQ | BPF_X:
668                                         seen |= SEEN_XREG;
669                                         emit_cmp(r_A, r_X);
670                                         break;
671                                 case BPF_JMP | BPF_JSET | BPF_X:
672                                         seen |= SEEN_XREG;
673                                         emit_btst(r_A, r_X);
674                                         break;
675                                 case BPF_JMP | BPF_JEQ | BPF_K:
676                                 case BPF_JMP | BPF_JGT | BPF_K:
677                                 case BPF_JMP | BPF_JGE | BPF_K:
678                                         if (is_simm13(K)) {
679                                                 emit_cmpi(r_A, K);
680                                         } else {
681                                                 emit_loadimm(K, r_TMP);
682                                                 emit_cmp(r_A, r_TMP);
683                                         }
684                                         break;
685                                 case BPF_JMP | BPF_JSET | BPF_K:
686                                         if (is_simm13(K)) {
687                                                 emit_btsti(r_A, K);
688                                         } else {
689                                                 emit_loadimm(K, r_TMP);
690                                                 emit_btst(r_A, r_TMP);
691                                         }
692                                         break;
693                                 }
694                                 if (filter[i].jt != 0) {
695                                         if (filter[i].jf)
696                                                 t_offset += 8;
697                                         emit_branch(t_op, t_offset);
698                                         emit_nop(); /* delay slot */
699                                         if (filter[i].jf) {
700                                                 emit_jump(f_offset);
701                                                 emit_nop();
702                                         }
703                                         break;
704                                 }
705                                 emit_branch(f_op, f_offset);
706                                 emit_nop(); /* delay slot */
707                                 break;
708
709                         default:
710                                 /* hmm, too complex filter, give up with jit compiler */
711                                 goto out;
712                         }
713                         ilen = (void *) prog - (void *) temp;
714                         if (image) {
715                                 if (unlikely(proglen + ilen > oldproglen)) {
716                                         pr_err("bpb_jit_compile fatal error\n");
717                                         kfree(addrs);
718                                         module_memfree(image);
719                                         return;
720                                 }
721                                 memcpy(image + proglen, temp, ilen);
722                         }
723                         proglen += ilen;
724                         addrs[i] = proglen;
725                         prog = temp;
726                 }
727                 /* last bpf instruction is always a RET :
728                  * use it to give the cleanup instruction(s) addr
729                  */
730                 cleanup_addr = proglen - 8; /* jmpl; mov r_A,%o0; */
731                 if (seen_or_pass0 & SEEN_MEM)
732                         cleanup_addr -= 4; /* add %sp, X, %sp; */
733
734                 if (image) {
735                         if (proglen != oldproglen)
736                                 pr_err("bpb_jit_compile proglen=%u != oldproglen=%u\n",
737                                        proglen, oldproglen);
738                         break;
739                 }
740                 if (proglen == oldproglen) {
741                         image = module_alloc(proglen);
742                         if (!image)
743                                 goto out;
744                 }
745                 oldproglen = proglen;
746         }
747
748         if (bpf_jit_enable > 1)
749                 bpf_jit_dump(flen, proglen, pass + 1, image);
750
751         if (image) {
752                 fp->bpf_func = (void *)image;
753                 fp->jited = 1;
754         }
755 out:
756         kfree(addrs);
757         return;
758 }
759
760 void bpf_jit_free(struct bpf_prog *fp)
761 {
762         if (fp->jited)
763                 module_memfree(fp->bpf_func);
764
765         bpf_prog_unlock_free(fp);
766 }