net: filter: rename sk_chk_filter() -> bpf_check_classic()
[linux-block.git] / kernel / bpf / core.c
1 /*
2  * Linux Socket Filter - Kernel level socket filtering
3  *
4  * Based on the design of the Berkeley Packet Filter. The new
5  * internal format has been designed by PLUMgrid:
6  *
7  *      Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8  *
9  * Authors:
10  *
11  *      Jay Schulist <jschlst@samba.org>
12  *      Alexei Starovoitov <ast@plumgrid.com>
13  *      Daniel Borkmann <dborkman@redhat.com>
14  *
15  * This program is free software; you can redistribute it and/or
16  * modify it under the terms of the GNU General Public License
17  * as published by the Free Software Foundation; either version
18  * 2 of the License, or (at your option) any later version.
19  *
20  * Andi Kleen - Fix a few bad bugs and races.
21  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
22  */
23 #include <linux/filter.h>
24 #include <linux/skbuff.h>
25 #include <asm/unaligned.h>
26
27 /* Registers */
28 #define BPF_R0  regs[BPF_REG_0]
29 #define BPF_R1  regs[BPF_REG_1]
30 #define BPF_R2  regs[BPF_REG_2]
31 #define BPF_R3  regs[BPF_REG_3]
32 #define BPF_R4  regs[BPF_REG_4]
33 #define BPF_R5  regs[BPF_REG_5]
34 #define BPF_R6  regs[BPF_REG_6]
35 #define BPF_R7  regs[BPF_REG_7]
36 #define BPF_R8  regs[BPF_REG_8]
37 #define BPF_R9  regs[BPF_REG_9]
38 #define BPF_R10 regs[BPF_REG_10]
39
40 /* Named registers */
41 #define DST     regs[insn->dst_reg]
42 #define SRC     regs[insn->src_reg]
43 #define FP      regs[BPF_REG_FP]
44 #define ARG1    regs[BPF_REG_ARG1]
45 #define CTX     regs[BPF_REG_CTX]
46 #define IMM     insn->imm
47
48 /* No hurry in this branch
49  *
50  * Exported for the bpf jit load helper.
51  */
52 void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, unsigned int size)
53 {
54         u8 *ptr = NULL;
55
56         if (k >= SKF_NET_OFF)
57                 ptr = skb_network_header(skb) + k - SKF_NET_OFF;
58         else if (k >= SKF_LL_OFF)
59                 ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
60         if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
61                 return ptr;
62
63         return NULL;
64 }
65
66 /* Base function for offset calculation. Needs to go into .text section,
67  * therefore keeping it non-static as well; will also be used by JITs
68  * anyway later on, so do not let the compiler omit it.
69  */
70 noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
71 {
72         return 0;
73 }
74
75 /**
76  *      __sk_run_filter - run a filter on a given context
77  *      @ctx: buffer to run the filter on
78  *      @insn: filter to apply
79  *
80  * Decode and apply filter instructions to the skb->data. Return length to
81  * keep, 0 for none. @ctx is the data we are operating on, @insn is the
82  * array of filter instructions.
83  */
84 static unsigned int __sk_run_filter(void *ctx, const struct bpf_insn *insn)
85 {
86         u64 stack[MAX_BPF_STACK / sizeof(u64)];
87         u64 regs[MAX_BPF_REG], tmp;
88         static const void *jumptable[256] = {
89                 [0 ... 255] = &&default_label,
90                 /* Now overwrite non-defaults ... */
91                 /* 32 bit ALU operations */
92                 [BPF_ALU | BPF_ADD | BPF_X] = &&ALU_ADD_X,
93                 [BPF_ALU | BPF_ADD | BPF_K] = &&ALU_ADD_K,
94                 [BPF_ALU | BPF_SUB | BPF_X] = &&ALU_SUB_X,
95                 [BPF_ALU | BPF_SUB | BPF_K] = &&ALU_SUB_K,
96                 [BPF_ALU | BPF_AND | BPF_X] = &&ALU_AND_X,
97                 [BPF_ALU | BPF_AND | BPF_K] = &&ALU_AND_K,
98                 [BPF_ALU | BPF_OR | BPF_X]  = &&ALU_OR_X,
99                 [BPF_ALU | BPF_OR | BPF_K]  = &&ALU_OR_K,
100                 [BPF_ALU | BPF_LSH | BPF_X] = &&ALU_LSH_X,
101                 [BPF_ALU | BPF_LSH | BPF_K] = &&ALU_LSH_K,
102                 [BPF_ALU | BPF_RSH | BPF_X] = &&ALU_RSH_X,
103                 [BPF_ALU | BPF_RSH | BPF_K] = &&ALU_RSH_K,
104                 [BPF_ALU | BPF_XOR | BPF_X] = &&ALU_XOR_X,
105                 [BPF_ALU | BPF_XOR | BPF_K] = &&ALU_XOR_K,
106                 [BPF_ALU | BPF_MUL | BPF_X] = &&ALU_MUL_X,
107                 [BPF_ALU | BPF_MUL | BPF_K] = &&ALU_MUL_K,
108                 [BPF_ALU | BPF_MOV | BPF_X] = &&ALU_MOV_X,
109                 [BPF_ALU | BPF_MOV | BPF_K] = &&ALU_MOV_K,
110                 [BPF_ALU | BPF_DIV | BPF_X] = &&ALU_DIV_X,
111                 [BPF_ALU | BPF_DIV | BPF_K] = &&ALU_DIV_K,
112                 [BPF_ALU | BPF_MOD | BPF_X] = &&ALU_MOD_X,
113                 [BPF_ALU | BPF_MOD | BPF_K] = &&ALU_MOD_K,
114                 [BPF_ALU | BPF_NEG] = &&ALU_NEG,
115                 [BPF_ALU | BPF_END | BPF_TO_BE] = &&ALU_END_TO_BE,
116                 [BPF_ALU | BPF_END | BPF_TO_LE] = &&ALU_END_TO_LE,
117                 /* 64 bit ALU operations */
118                 [BPF_ALU64 | BPF_ADD | BPF_X] = &&ALU64_ADD_X,
119                 [BPF_ALU64 | BPF_ADD | BPF_K] = &&ALU64_ADD_K,
120                 [BPF_ALU64 | BPF_SUB | BPF_X] = &&ALU64_SUB_X,
121                 [BPF_ALU64 | BPF_SUB | BPF_K] = &&ALU64_SUB_K,
122                 [BPF_ALU64 | BPF_AND | BPF_X] = &&ALU64_AND_X,
123                 [BPF_ALU64 | BPF_AND | BPF_K] = &&ALU64_AND_K,
124                 [BPF_ALU64 | BPF_OR | BPF_X] = &&ALU64_OR_X,
125                 [BPF_ALU64 | BPF_OR | BPF_K] = &&ALU64_OR_K,
126                 [BPF_ALU64 | BPF_LSH | BPF_X] = &&ALU64_LSH_X,
127                 [BPF_ALU64 | BPF_LSH | BPF_K] = &&ALU64_LSH_K,
128                 [BPF_ALU64 | BPF_RSH | BPF_X] = &&ALU64_RSH_X,
129                 [BPF_ALU64 | BPF_RSH | BPF_K] = &&ALU64_RSH_K,
130                 [BPF_ALU64 | BPF_XOR | BPF_X] = &&ALU64_XOR_X,
131                 [BPF_ALU64 | BPF_XOR | BPF_K] = &&ALU64_XOR_K,
132                 [BPF_ALU64 | BPF_MUL | BPF_X] = &&ALU64_MUL_X,
133                 [BPF_ALU64 | BPF_MUL | BPF_K] = &&ALU64_MUL_K,
134                 [BPF_ALU64 | BPF_MOV | BPF_X] = &&ALU64_MOV_X,
135                 [BPF_ALU64 | BPF_MOV | BPF_K] = &&ALU64_MOV_K,
136                 [BPF_ALU64 | BPF_ARSH | BPF_X] = &&ALU64_ARSH_X,
137                 [BPF_ALU64 | BPF_ARSH | BPF_K] = &&ALU64_ARSH_K,
138                 [BPF_ALU64 | BPF_DIV | BPF_X] = &&ALU64_DIV_X,
139                 [BPF_ALU64 | BPF_DIV | BPF_K] = &&ALU64_DIV_K,
140                 [BPF_ALU64 | BPF_MOD | BPF_X] = &&ALU64_MOD_X,
141                 [BPF_ALU64 | BPF_MOD | BPF_K] = &&ALU64_MOD_K,
142                 [BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
143                 /* Call instruction */
144                 [BPF_JMP | BPF_CALL] = &&JMP_CALL,
145                 /* Jumps */
146                 [BPF_JMP | BPF_JA] = &&JMP_JA,
147                 [BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
148                 [BPF_JMP | BPF_JEQ | BPF_K] = &&JMP_JEQ_K,
149                 [BPF_JMP | BPF_JNE | BPF_X] = &&JMP_JNE_X,
150                 [BPF_JMP | BPF_JNE | BPF_K] = &&JMP_JNE_K,
151                 [BPF_JMP | BPF_JGT | BPF_X] = &&JMP_JGT_X,
152                 [BPF_JMP | BPF_JGT | BPF_K] = &&JMP_JGT_K,
153                 [BPF_JMP | BPF_JGE | BPF_X] = &&JMP_JGE_X,
154                 [BPF_JMP | BPF_JGE | BPF_K] = &&JMP_JGE_K,
155                 [BPF_JMP | BPF_JSGT | BPF_X] = &&JMP_JSGT_X,
156                 [BPF_JMP | BPF_JSGT | BPF_K] = &&JMP_JSGT_K,
157                 [BPF_JMP | BPF_JSGE | BPF_X] = &&JMP_JSGE_X,
158                 [BPF_JMP | BPF_JSGE | BPF_K] = &&JMP_JSGE_K,
159                 [BPF_JMP | BPF_JSET | BPF_X] = &&JMP_JSET_X,
160                 [BPF_JMP | BPF_JSET | BPF_K] = &&JMP_JSET_K,
161                 /* Program return */
162                 [BPF_JMP | BPF_EXIT] = &&JMP_EXIT,
163                 /* Store instructions */
164                 [BPF_STX | BPF_MEM | BPF_B] = &&STX_MEM_B,
165                 [BPF_STX | BPF_MEM | BPF_H] = &&STX_MEM_H,
166                 [BPF_STX | BPF_MEM | BPF_W] = &&STX_MEM_W,
167                 [BPF_STX | BPF_MEM | BPF_DW] = &&STX_MEM_DW,
168                 [BPF_STX | BPF_XADD | BPF_W] = &&STX_XADD_W,
169                 [BPF_STX | BPF_XADD | BPF_DW] = &&STX_XADD_DW,
170                 [BPF_ST | BPF_MEM | BPF_B] = &&ST_MEM_B,
171                 [BPF_ST | BPF_MEM | BPF_H] = &&ST_MEM_H,
172                 [BPF_ST | BPF_MEM | BPF_W] = &&ST_MEM_W,
173                 [BPF_ST | BPF_MEM | BPF_DW] = &&ST_MEM_DW,
174                 /* Load instructions */
175                 [BPF_LDX | BPF_MEM | BPF_B] = &&LDX_MEM_B,
176                 [BPF_LDX | BPF_MEM | BPF_H] = &&LDX_MEM_H,
177                 [BPF_LDX | BPF_MEM | BPF_W] = &&LDX_MEM_W,
178                 [BPF_LDX | BPF_MEM | BPF_DW] = &&LDX_MEM_DW,
179                 [BPF_LD | BPF_ABS | BPF_W] = &&LD_ABS_W,
180                 [BPF_LD | BPF_ABS | BPF_H] = &&LD_ABS_H,
181                 [BPF_LD | BPF_ABS | BPF_B] = &&LD_ABS_B,
182                 [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
183                 [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
184                 [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
185         };
186         void *ptr;
187         int off;
188
189 #define CONT     ({ insn++; goto select_insn; })
190 #define CONT_JMP ({ insn++; goto select_insn; })
191
192         FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)];
193         ARG1 = (u64) (unsigned long) ctx;
194
195         /* Registers used in classic BPF programs need to be reset first. */
196         regs[BPF_REG_A] = 0;
197         regs[BPF_REG_X] = 0;
198
199 select_insn:
200         goto *jumptable[insn->code];
201
202         /* ALU */
203 #define ALU(OPCODE, OP)                 \
204         ALU64_##OPCODE##_X:             \
205                 DST = DST OP SRC;       \
206                 CONT;                   \
207         ALU_##OPCODE##_X:               \
208                 DST = (u32) DST OP (u32) SRC;   \
209                 CONT;                   \
210         ALU64_##OPCODE##_K:             \
211                 DST = DST OP IMM;               \
212                 CONT;                   \
213         ALU_##OPCODE##_K:               \
214                 DST = (u32) DST OP (u32) IMM;   \
215                 CONT;
216
217         ALU(ADD,  +)
218         ALU(SUB,  -)
219         ALU(AND,  &)
220         ALU(OR,   |)
221         ALU(LSH, <<)
222         ALU(RSH, >>)
223         ALU(XOR,  ^)
224         ALU(MUL,  *)
225 #undef ALU
226         ALU_NEG:
227                 DST = (u32) -DST;
228                 CONT;
229         ALU64_NEG:
230                 DST = -DST;
231                 CONT;
232         ALU_MOV_X:
233                 DST = (u32) SRC;
234                 CONT;
235         ALU_MOV_K:
236                 DST = (u32) IMM;
237                 CONT;
238         ALU64_MOV_X:
239                 DST = SRC;
240                 CONT;
241         ALU64_MOV_K:
242                 DST = IMM;
243                 CONT;
244         ALU64_ARSH_X:
245                 (*(s64 *) &DST) >>= SRC;
246                 CONT;
247         ALU64_ARSH_K:
248                 (*(s64 *) &DST) >>= IMM;
249                 CONT;
250         ALU64_MOD_X:
251                 if (unlikely(SRC == 0))
252                         return 0;
253                 tmp = DST;
254                 DST = do_div(tmp, SRC);
255                 CONT;
256         ALU_MOD_X:
257                 if (unlikely(SRC == 0))
258                         return 0;
259                 tmp = (u32) DST;
260                 DST = do_div(tmp, (u32) SRC);
261                 CONT;
262         ALU64_MOD_K:
263                 tmp = DST;
264                 DST = do_div(tmp, IMM);
265                 CONT;
266         ALU_MOD_K:
267                 tmp = (u32) DST;
268                 DST = do_div(tmp, (u32) IMM);
269                 CONT;
270         ALU64_DIV_X:
271                 if (unlikely(SRC == 0))
272                         return 0;
273                 do_div(DST, SRC);
274                 CONT;
275         ALU_DIV_X:
276                 if (unlikely(SRC == 0))
277                         return 0;
278                 tmp = (u32) DST;
279                 do_div(tmp, (u32) SRC);
280                 DST = (u32) tmp;
281                 CONT;
282         ALU64_DIV_K:
283                 do_div(DST, IMM);
284                 CONT;
285         ALU_DIV_K:
286                 tmp = (u32) DST;
287                 do_div(tmp, (u32) IMM);
288                 DST = (u32) tmp;
289                 CONT;
290         ALU_END_TO_BE:
291                 switch (IMM) {
292                 case 16:
293                         DST = (__force u16) cpu_to_be16(DST);
294                         break;
295                 case 32:
296                         DST = (__force u32) cpu_to_be32(DST);
297                         break;
298                 case 64:
299                         DST = (__force u64) cpu_to_be64(DST);
300                         break;
301                 }
302                 CONT;
303         ALU_END_TO_LE:
304                 switch (IMM) {
305                 case 16:
306                         DST = (__force u16) cpu_to_le16(DST);
307                         break;
308                 case 32:
309                         DST = (__force u32) cpu_to_le32(DST);
310                         break;
311                 case 64:
312                         DST = (__force u64) cpu_to_le64(DST);
313                         break;
314                 }
315                 CONT;
316
317         /* CALL */
318         JMP_CALL:
319                 /* Function call scratches BPF_R1-BPF_R5 registers,
320                  * preserves BPF_R6-BPF_R9, and stores return value
321                  * into BPF_R0.
322                  */
323                 BPF_R0 = (__bpf_call_base + insn->imm)(BPF_R1, BPF_R2, BPF_R3,
324                                                        BPF_R4, BPF_R5);
325                 CONT;
326
327         /* JMP */
328         JMP_JA:
329                 insn += insn->off;
330                 CONT;
331         JMP_JEQ_X:
332                 if (DST == SRC) {
333                         insn += insn->off;
334                         CONT_JMP;
335                 }
336                 CONT;
337         JMP_JEQ_K:
338                 if (DST == IMM) {
339                         insn += insn->off;
340                         CONT_JMP;
341                 }
342                 CONT;
343         JMP_JNE_X:
344                 if (DST != SRC) {
345                         insn += insn->off;
346                         CONT_JMP;
347                 }
348                 CONT;
349         JMP_JNE_K:
350                 if (DST != IMM) {
351                         insn += insn->off;
352                         CONT_JMP;
353                 }
354                 CONT;
355         JMP_JGT_X:
356                 if (DST > SRC) {
357                         insn += insn->off;
358                         CONT_JMP;
359                 }
360                 CONT;
361         JMP_JGT_K:
362                 if (DST > IMM) {
363                         insn += insn->off;
364                         CONT_JMP;
365                 }
366                 CONT;
367         JMP_JGE_X:
368                 if (DST >= SRC) {
369                         insn += insn->off;
370                         CONT_JMP;
371                 }
372                 CONT;
373         JMP_JGE_K:
374                 if (DST >= IMM) {
375                         insn += insn->off;
376                         CONT_JMP;
377                 }
378                 CONT;
379         JMP_JSGT_X:
380                 if (((s64) DST) > ((s64) SRC)) {
381                         insn += insn->off;
382                         CONT_JMP;
383                 }
384                 CONT;
385         JMP_JSGT_K:
386                 if (((s64) DST) > ((s64) IMM)) {
387                         insn += insn->off;
388                         CONT_JMP;
389                 }
390                 CONT;
391         JMP_JSGE_X:
392                 if (((s64) DST) >= ((s64) SRC)) {
393                         insn += insn->off;
394                         CONT_JMP;
395                 }
396                 CONT;
397         JMP_JSGE_K:
398                 if (((s64) DST) >= ((s64) IMM)) {
399                         insn += insn->off;
400                         CONT_JMP;
401                 }
402                 CONT;
403         JMP_JSET_X:
404                 if (DST & SRC) {
405                         insn += insn->off;
406                         CONT_JMP;
407                 }
408                 CONT;
409         JMP_JSET_K:
410                 if (DST & IMM) {
411                         insn += insn->off;
412                         CONT_JMP;
413                 }
414                 CONT;
415         JMP_EXIT:
416                 return BPF_R0;
417
418         /* STX and ST and LDX*/
419 #define LDST(SIZEOP, SIZE)                                              \
420         STX_MEM_##SIZEOP:                                               \
421                 *(SIZE *)(unsigned long) (DST + insn->off) = SRC;       \
422                 CONT;                                                   \
423         ST_MEM_##SIZEOP:                                                \
424                 *(SIZE *)(unsigned long) (DST + insn->off) = IMM;       \
425                 CONT;                                                   \
426         LDX_MEM_##SIZEOP:                                               \
427                 DST = *(SIZE *)(unsigned long) (SRC + insn->off);       \
428                 CONT;
429
430         LDST(B,   u8)
431         LDST(H,  u16)
432         LDST(W,  u32)
433         LDST(DW, u64)
434 #undef LDST
435         STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */
436                 atomic_add((u32) SRC, (atomic_t *)(unsigned long)
437                            (DST + insn->off));
438                 CONT;
439         STX_XADD_DW: /* lock xadd *(u64 *)(dst_reg + off16) += src_reg */
440                 atomic64_add((u64) SRC, (atomic64_t *)(unsigned long)
441                              (DST + insn->off));
442                 CONT;
443         LD_ABS_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + imm32)) */
444                 off = IMM;
445 load_word:
446                 /* BPF_LD + BPD_ABS and BPF_LD + BPF_IND insns are
447                  * only appearing in the programs where ctx ==
448                  * skb. All programs keep 'ctx' in regs[BPF_REG_CTX]
449                  * == BPF_R6, sk_convert_filter() saves it in BPF_R6,
450                  * internal BPF verifier will check that BPF_R6 ==
451                  * ctx.
452                  *
453                  * BPF_ABS and BPF_IND are wrappers of function calls,
454                  * so they scratch BPF_R1-BPF_R5 registers, preserve
455                  * BPF_R6-BPF_R9, and store return value into BPF_R0.
456                  *
457                  * Implicit input:
458                  *   ctx == skb == BPF_R6 == CTX
459                  *
460                  * Explicit input:
461                  *   SRC == any register
462                  *   IMM == 32-bit immediate
463                  *
464                  * Output:
465                  *   BPF_R0 - 8/16/32-bit skb data converted to cpu endianness
466                  */
467
468                 ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 4, &tmp);
469                 if (likely(ptr != NULL)) {
470                         BPF_R0 = get_unaligned_be32(ptr);
471                         CONT;
472                 }
473
474                 return 0;
475         LD_ABS_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + imm32)) */
476                 off = IMM;
477 load_half:
478                 ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 2, &tmp);
479                 if (likely(ptr != NULL)) {
480                         BPF_R0 = get_unaligned_be16(ptr);
481                         CONT;
482                 }
483
484                 return 0;
485         LD_ABS_B: /* BPF_R0 = *(u8 *) (skb->data + imm32) */
486                 off = IMM;
487 load_byte:
488                 ptr = bpf_load_pointer((struct sk_buff *) (unsigned long) CTX, off, 1, &tmp);
489                 if (likely(ptr != NULL)) {
490                         BPF_R0 = *(u8 *)ptr;
491                         CONT;
492                 }
493
494                 return 0;
495         LD_IND_W: /* BPF_R0 = ntohl(*(u32 *) (skb->data + src_reg + imm32)) */
496                 off = IMM + SRC;
497                 goto load_word;
498         LD_IND_H: /* BPF_R0 = ntohs(*(u16 *) (skb->data + src_reg + imm32)) */
499                 off = IMM + SRC;
500                 goto load_half;
501         LD_IND_B: /* BPF_R0 = *(u8 *) (skb->data + src_reg + imm32) */
502                 off = IMM + SRC;
503                 goto load_byte;
504
505         default_label:
506                 /* If we ever reach this, we have a bug somewhere. */
507                 WARN_RATELIMIT(1, "unknown opcode %02x\n", insn->code);
508                 return 0;
509 }
510
511 void __weak bpf_int_jit_compile(struct sk_filter *prog)
512 {
513 }
514
515 /**
516  *      sk_filter_select_runtime - select execution runtime for BPF program
517  *      @fp: sk_filter populated with internal BPF program
518  *
519  * try to JIT internal BPF program, if JIT is not available select interpreter
520  * BPF program will be executed via SK_RUN_FILTER() macro
521  */
522 void sk_filter_select_runtime(struct sk_filter *fp)
523 {
524         fp->bpf_func = (void *) __sk_run_filter;
525
526         /* Probe if internal BPF can be JITed */
527         bpf_int_jit_compile(fp);
528 }
529 EXPORT_SYMBOL_GPL(sk_filter_select_runtime);
530
531 /* free internal BPF program */
532 void sk_filter_free(struct sk_filter *fp)
533 {
534         bpf_jit_free(fp);
535 }
536 EXPORT_SYMBOL_GPL(sk_filter_free);