bpf: add upper complexity limit to verifier log
[linux-2.6-block.git] / net / core / filter.c
CommitLineData
1da177e4
LT
1/*
2 * Linux Socket Filter - Kernel level socket filtering
3 *
bd4cf0ed
AS
4 * Based on the design of the Berkeley Packet Filter. The new
5 * internal format has been designed by PLUMgrid:
1da177e4 6 *
bd4cf0ed
AS
7 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
8 *
9 * Authors:
10 *
11 * Jay Schulist <jschlst@samba.org>
12 * Alexei Starovoitov <ast@plumgrid.com>
13 * Daniel Borkmann <dborkman@redhat.com>
1da177e4
LT
14 *
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version
18 * 2 of the License, or (at your option) any later version.
19 *
20 * Andi Kleen - Fix a few bad bugs and races.
4df95ff4 21 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
1da177e4
LT
22 */
23
24#include <linux/module.h>
25#include <linux/types.h>
1da177e4
LT
26#include <linux/mm.h>
27#include <linux/fcntl.h>
28#include <linux/socket.h>
91b8270f 29#include <linux/sock_diag.h>
1da177e4
LT
30#include <linux/in.h>
31#include <linux/inet.h>
32#include <linux/netdevice.h>
33#include <linux/if_packet.h>
c491680f 34#include <linux/if_arp.h>
5a0e3ad6 35#include <linux/gfp.h>
1da177e4
LT
36#include <net/ip.h>
37#include <net/protocol.h>
4738c1db 38#include <net/netlink.h>
1da177e4
LT
39#include <linux/skbuff.h>
40#include <net/sock.h>
10b89ee4 41#include <net/flow_dissector.h>
1da177e4
LT
42#include <linux/errno.h>
43#include <linux/timer.h>
7c0f6ba6 44#include <linux/uaccess.h>
40daafc8 45#include <asm/unaligned.h>
d66f2b91 46#include <asm/cmpxchg.h>
1da177e4 47#include <linux/filter.h>
86e4ca66 48#include <linux/ratelimit.h>
46b325c7 49#include <linux/seccomp.h>
f3335031 50#include <linux/if_vlan.h>
89aa0758 51#include <linux/bpf.h>
d691f9e8 52#include <net/sch_generic.h>
8d20aabe 53#include <net/cls_cgroup.h>
d3aa45ce 54#include <net/dst_metadata.h>
c46646d0 55#include <net/dst.h>
538950a1 56#include <net/sock_reuseport.h>
b1d9fc41 57#include <net/busy_poll.h>
8c4b4c7e 58#include <net/tcp.h>
5acaee0a 59#include <linux/bpf_trace.h>
1da177e4 60
43db6d65 61/**
f4979fce 62 * sk_filter_trim_cap - run a packet through a socket filter
43db6d65
SH
63 * @sk: sock associated with &sk_buff
64 * @skb: buffer to filter
f4979fce 65 * @cap: limit on how short the eBPF program may trim the packet
43db6d65 66 *
ff936a04
AS
67 * Run the eBPF program and then cut skb->data to correct size returned by
68 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
43db6d65 69 * than pkt_len we keep whole skb->data. This is the socket level
ff936a04 70 * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
43db6d65
SH
71 * be accepted or -EPERM if the packet should be tossed.
72 *
73 */
f4979fce 74int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
43db6d65
SH
75{
76 int err;
77 struct sk_filter *filter;
78
c93bdd0e
MG
79 /*
80 * If the skb was allocated from pfmemalloc reserves, only
81 * allow SOCK_MEMALLOC sockets to use it as this socket is
82 * helping free memory
83 */
8fe809a9
ED
84 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
85 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
c93bdd0e 86 return -ENOMEM;
8fe809a9 87 }
c11cd3a6
DM
88 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
89 if (err)
90 return err;
91
43db6d65
SH
92 err = security_sock_rcv_skb(sk, skb);
93 if (err)
94 return err;
95
80f8f102
ED
96 rcu_read_lock();
97 filter = rcu_dereference(sk->sk_filter);
43db6d65 98 if (filter) {
8f917bba
WB
99 struct sock *save_sk = skb->sk;
100 unsigned int pkt_len;
101
102 skb->sk = sk;
103 pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
8f917bba 104 skb->sk = save_sk;
d1f496fd 105 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
43db6d65 106 }
80f8f102 107 rcu_read_unlock();
43db6d65
SH
108
109 return err;
110}
f4979fce 111EXPORT_SYMBOL(sk_filter_trim_cap);
43db6d65 112
f3694e00 113BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb)
bd4cf0ed 114{
f3694e00 115 return skb_get_poff(skb);
bd4cf0ed
AS
116}
117
f3694e00 118BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
bd4cf0ed 119{
bd4cf0ed
AS
120 struct nlattr *nla;
121
122 if (skb_is_nonlinear(skb))
123 return 0;
124
05ab8f26
MK
125 if (skb->len < sizeof(struct nlattr))
126 return 0;
127
30743837 128 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
129 return 0;
130
30743837 131 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
bd4cf0ed
AS
132 if (nla)
133 return (void *) nla - (void *) skb->data;
134
135 return 0;
136}
137
f3694e00 138BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
bd4cf0ed 139{
bd4cf0ed
AS
140 struct nlattr *nla;
141
142 if (skb_is_nonlinear(skb))
143 return 0;
144
05ab8f26
MK
145 if (skb->len < sizeof(struct nlattr))
146 return 0;
147
30743837 148 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
149 return 0;
150
30743837
DB
151 nla = (struct nlattr *) &skb->data[a];
152 if (nla->nla_len > skb->len - a)
bd4cf0ed
AS
153 return 0;
154
30743837 155 nla = nla_find_nested(nla, x);
bd4cf0ed
AS
156 if (nla)
157 return (void *) nla - (void *) skb->data;
158
159 return 0;
160}
161
f3694e00 162BPF_CALL_0(__get_raw_cpu_id)
bd4cf0ed
AS
163{
164 return raw_smp_processor_id();
165}
166
80b48c44
DB
167static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
168 .func = __get_raw_cpu_id,
169 .gpl_only = false,
170 .ret_type = RET_INTEGER,
171};
172
9bac3d6d
AS
173static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
174 struct bpf_insn *insn_buf)
175{
176 struct bpf_insn *insn = insn_buf;
177
178 switch (skb_field) {
179 case SKF_AD_MARK:
180 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
181
182 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
183 offsetof(struct sk_buff, mark));
184 break;
185
186 case SKF_AD_PKTTYPE:
187 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
188 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
189#ifdef __BIG_ENDIAN_BITFIELD
190 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
191#endif
192 break;
193
194 case SKF_AD_QUEUE:
195 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
196
197 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
198 offsetof(struct sk_buff, queue_mapping));
199 break;
c2497395 200
c2497395
AS
201 case SKF_AD_VLAN_TAG:
202 case SKF_AD_VLAN_TAG_PRESENT:
203 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
204 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
205
206 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
207 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
208 offsetof(struct sk_buff, vlan_tci));
209 if (skb_field == SKF_AD_VLAN_TAG) {
210 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
211 ~VLAN_TAG_PRESENT);
212 } else {
213 /* dst_reg >>= 12 */
214 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
215 /* dst_reg &= 1 */
216 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
217 }
218 break;
9bac3d6d
AS
219 }
220
221 return insn - insn_buf;
222}
223
bd4cf0ed 224static bool convert_bpf_extensions(struct sock_filter *fp,
2695fb55 225 struct bpf_insn **insnp)
bd4cf0ed 226{
2695fb55 227 struct bpf_insn *insn = *insnp;
9bac3d6d 228 u32 cnt;
bd4cf0ed
AS
229
230 switch (fp->k) {
231 case SKF_AD_OFF + SKF_AD_PROTOCOL:
0b8c707d
DB
232 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
233
234 /* A = *(u16 *) (CTX + offsetof(protocol)) */
235 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
236 offsetof(struct sk_buff, protocol));
237 /* A = ntohs(A) [emitting a nop or swap16] */
238 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
bd4cf0ed
AS
239 break;
240
241 case SKF_AD_OFF + SKF_AD_PKTTYPE:
9bac3d6d
AS
242 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
243 insn += cnt - 1;
bd4cf0ed
AS
244 break;
245
246 case SKF_AD_OFF + SKF_AD_IFINDEX:
247 case SKF_AD_OFF + SKF_AD_HATYPE:
bd4cf0ed
AS
248 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
249 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
f8f6d679 250
f035a515 251 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
f8f6d679
DB
252 BPF_REG_TMP, BPF_REG_CTX,
253 offsetof(struct sk_buff, dev));
254 /* if (tmp != 0) goto pc + 1 */
255 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
256 *insn++ = BPF_EXIT_INSN();
257 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
258 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
259 offsetof(struct net_device, ifindex));
260 else
261 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
262 offsetof(struct net_device, type));
bd4cf0ed
AS
263 break;
264
265 case SKF_AD_OFF + SKF_AD_MARK:
9bac3d6d
AS
266 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
267 insn += cnt - 1;
bd4cf0ed
AS
268 break;
269
270 case SKF_AD_OFF + SKF_AD_RXHASH:
271 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
272
9739eef1
AS
273 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
274 offsetof(struct sk_buff, hash));
bd4cf0ed
AS
275 break;
276
277 case SKF_AD_OFF + SKF_AD_QUEUE:
9bac3d6d
AS
278 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
279 insn += cnt - 1;
bd4cf0ed
AS
280 break;
281
282 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
c2497395
AS
283 cnt = convert_skb_access(SKF_AD_VLAN_TAG,
284 BPF_REG_A, BPF_REG_CTX, insn);
285 insn += cnt - 1;
286 break;
bd4cf0ed 287
c2497395
AS
288 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
289 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
290 BPF_REG_A, BPF_REG_CTX, insn);
291 insn += cnt - 1;
bd4cf0ed
AS
292 break;
293
27cd5452
MS
294 case SKF_AD_OFF + SKF_AD_VLAN_TPID:
295 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
296
297 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
298 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
299 offsetof(struct sk_buff, vlan_proto));
300 /* A = ntohs(A) [emitting a nop or swap16] */
301 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
302 break;
303
bd4cf0ed
AS
304 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
305 case SKF_AD_OFF + SKF_AD_NLATTR:
306 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
307 case SKF_AD_OFF + SKF_AD_CPU:
4cd3675e 308 case SKF_AD_OFF + SKF_AD_RANDOM:
e430f34e 309 /* arg1 = CTX */
f8f6d679 310 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
bd4cf0ed 311 /* arg2 = A */
f8f6d679 312 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
bd4cf0ed 313 /* arg3 = X */
f8f6d679 314 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
e430f34e 315 /* Emit call(arg1=CTX, arg2=A, arg3=X) */
bd4cf0ed
AS
316 switch (fp->k) {
317 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
f8f6d679 318 *insn = BPF_EMIT_CALL(__skb_get_pay_offset);
bd4cf0ed
AS
319 break;
320 case SKF_AD_OFF + SKF_AD_NLATTR:
f8f6d679 321 *insn = BPF_EMIT_CALL(__skb_get_nlattr);
bd4cf0ed
AS
322 break;
323 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
f8f6d679 324 *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
bd4cf0ed
AS
325 break;
326 case SKF_AD_OFF + SKF_AD_CPU:
f8f6d679 327 *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
bd4cf0ed 328 break;
4cd3675e 329 case SKF_AD_OFF + SKF_AD_RANDOM:
3ad00405
DB
330 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
331 bpf_user_rnd_init_once();
4cd3675e 332 break;
bd4cf0ed
AS
333 }
334 break;
335
336 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
9739eef1
AS
337 /* A ^= X */
338 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
339 break;
340
341 default:
342 /* This is just a dummy call to avoid letting the compiler
343 * evict __bpf_call_base() as an optimization. Placed here
344 * where no-one bothers.
345 */
346 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
347 return false;
348 }
349
350 *insnp = insn;
351 return true;
352}
353
354/**
8fb575ca 355 * bpf_convert_filter - convert filter program
bd4cf0ed
AS
356 * @prog: the user passed filter program
357 * @len: the length of the user passed filter program
50bbfed9 358 * @new_prog: allocated 'struct bpf_prog' or NULL
bd4cf0ed
AS
359 * @new_len: pointer to store length of converted program
360 *
1f504ec9
TK
361 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
362 * style extended BPF (eBPF).
bd4cf0ed
AS
363 * Conversion workflow:
364 *
365 * 1) First pass for calculating the new program length:
8fb575ca 366 * bpf_convert_filter(old_prog, old_len, NULL, &new_len)
bd4cf0ed
AS
367 *
368 * 2) 2nd pass to remap in two passes: 1st pass finds new
369 * jump offsets, 2nd pass remapping:
8fb575ca 370 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
bd4cf0ed 371 */
d9e12f42 372static int bpf_convert_filter(struct sock_filter *prog, int len,
50bbfed9 373 struct bpf_prog *new_prog, int *new_len)
bd4cf0ed 374{
50bbfed9
AS
375 int new_flen = 0, pass = 0, target, i, stack_off;
376 struct bpf_insn *new_insn, *first_insn = NULL;
bd4cf0ed
AS
377 struct sock_filter *fp;
378 int *addrs = NULL;
379 u8 bpf_src;
380
381 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
30743837 382 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
bd4cf0ed 383
6f9a093b 384 if (len <= 0 || len > BPF_MAXINSNS)
bd4cf0ed
AS
385 return -EINVAL;
386
387 if (new_prog) {
50bbfed9 388 first_insn = new_prog->insnsi;
658da937
DB
389 addrs = kcalloc(len, sizeof(*addrs),
390 GFP_KERNEL | __GFP_NOWARN);
bd4cf0ed
AS
391 if (!addrs)
392 return -ENOMEM;
393 }
394
395do_pass:
50bbfed9 396 new_insn = first_insn;
bd4cf0ed
AS
397 fp = prog;
398
8b614aeb 399 /* Classic BPF related prologue emission. */
50bbfed9 400 if (new_prog) {
8b614aeb
DB
401 /* Classic BPF expects A and X to be reset first. These need
402 * to be guaranteed to be the first two instructions.
403 */
404 *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
405 *new_insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
406
407 /* All programs must keep CTX in callee saved BPF_REG_CTX.
408 * In eBPF case it's done by the compiler, here we need to
409 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
410 */
411 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
412 } else {
413 new_insn += 3;
414 }
bd4cf0ed
AS
415
416 for (i = 0; i < len; fp++, i++) {
2695fb55
AS
417 struct bpf_insn tmp_insns[6] = { };
418 struct bpf_insn *insn = tmp_insns;
bd4cf0ed
AS
419
420 if (addrs)
50bbfed9 421 addrs[i] = new_insn - first_insn;
bd4cf0ed
AS
422
423 switch (fp->code) {
424 /* All arithmetic insns and skb loads map as-is. */
425 case BPF_ALU | BPF_ADD | BPF_X:
426 case BPF_ALU | BPF_ADD | BPF_K:
427 case BPF_ALU | BPF_SUB | BPF_X:
428 case BPF_ALU | BPF_SUB | BPF_K:
429 case BPF_ALU | BPF_AND | BPF_X:
430 case BPF_ALU | BPF_AND | BPF_K:
431 case BPF_ALU | BPF_OR | BPF_X:
432 case BPF_ALU | BPF_OR | BPF_K:
433 case BPF_ALU | BPF_LSH | BPF_X:
434 case BPF_ALU | BPF_LSH | BPF_K:
435 case BPF_ALU | BPF_RSH | BPF_X:
436 case BPF_ALU | BPF_RSH | BPF_K:
437 case BPF_ALU | BPF_XOR | BPF_X:
438 case BPF_ALU | BPF_XOR | BPF_K:
439 case BPF_ALU | BPF_MUL | BPF_X:
440 case BPF_ALU | BPF_MUL | BPF_K:
441 case BPF_ALU | BPF_DIV | BPF_X:
442 case BPF_ALU | BPF_DIV | BPF_K:
443 case BPF_ALU | BPF_MOD | BPF_X:
444 case BPF_ALU | BPF_MOD | BPF_K:
445 case BPF_ALU | BPF_NEG:
446 case BPF_LD | BPF_ABS | BPF_W:
447 case BPF_LD | BPF_ABS | BPF_H:
448 case BPF_LD | BPF_ABS | BPF_B:
449 case BPF_LD | BPF_IND | BPF_W:
450 case BPF_LD | BPF_IND | BPF_H:
451 case BPF_LD | BPF_IND | BPF_B:
452 /* Check for overloaded BPF extension and
453 * directly convert it if found, otherwise
454 * just move on with mapping.
455 */
456 if (BPF_CLASS(fp->code) == BPF_LD &&
457 BPF_MODE(fp->code) == BPF_ABS &&
458 convert_bpf_extensions(fp, &insn))
459 break;
460
f8f6d679 461 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
bd4cf0ed
AS
462 break;
463
f8f6d679
DB
464 /* Jump transformation cannot use BPF block macros
465 * everywhere as offset calculation and target updates
466 * require a bit more work than the rest, i.e. jump
467 * opcodes map as-is, but offsets need adjustment.
468 */
469
470#define BPF_EMIT_JMP \
bd4cf0ed
AS
471 do { \
472 if (target >= len || target < 0) \
473 goto err; \
474 insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
475 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
476 insn->off -= insn - tmp_insns; \
477 } while (0)
478
f8f6d679
DB
479 case BPF_JMP | BPF_JA:
480 target = i + fp->k + 1;
481 insn->code = fp->code;
482 BPF_EMIT_JMP;
bd4cf0ed
AS
483 break;
484
485 case BPF_JMP | BPF_JEQ | BPF_K:
486 case BPF_JMP | BPF_JEQ | BPF_X:
487 case BPF_JMP | BPF_JSET | BPF_K:
488 case BPF_JMP | BPF_JSET | BPF_X:
489 case BPF_JMP | BPF_JGT | BPF_K:
490 case BPF_JMP | BPF_JGT | BPF_X:
491 case BPF_JMP | BPF_JGE | BPF_K:
492 case BPF_JMP | BPF_JGE | BPF_X:
493 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
494 /* BPF immediates are signed, zero extend
495 * immediate into tmp register and use it
496 * in compare insn.
497 */
f8f6d679 498 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
bd4cf0ed 499
e430f34e
AS
500 insn->dst_reg = BPF_REG_A;
501 insn->src_reg = BPF_REG_TMP;
bd4cf0ed
AS
502 bpf_src = BPF_X;
503 } else {
e430f34e 504 insn->dst_reg = BPF_REG_A;
bd4cf0ed
AS
505 insn->imm = fp->k;
506 bpf_src = BPF_SRC(fp->code);
19539ce7 507 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
1da177e4 508 }
bd4cf0ed
AS
509
510 /* Common case where 'jump_false' is next insn. */
511 if (fp->jf == 0) {
512 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
513 target = i + fp->jt + 1;
f8f6d679 514 BPF_EMIT_JMP;
bd4cf0ed 515 break;
1da177e4 516 }
bd4cf0ed 517
92b31a9a
DB
518 /* Convert some jumps when 'jump_true' is next insn. */
519 if (fp->jt == 0) {
520 switch (BPF_OP(fp->code)) {
521 case BPF_JEQ:
522 insn->code = BPF_JMP | BPF_JNE | bpf_src;
523 break;
524 case BPF_JGT:
525 insn->code = BPF_JMP | BPF_JLE | bpf_src;
526 break;
527 case BPF_JGE:
528 insn->code = BPF_JMP | BPF_JLT | bpf_src;
529 break;
530 default:
531 goto jmp_rest;
532 }
533
bd4cf0ed 534 target = i + fp->jf + 1;
f8f6d679 535 BPF_EMIT_JMP;
bd4cf0ed 536 break;
0b05b2a4 537 }
92b31a9a 538jmp_rest:
bd4cf0ed
AS
539 /* Other jumps are mapped into two insns: Jxx and JA. */
540 target = i + fp->jt + 1;
541 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
f8f6d679 542 BPF_EMIT_JMP;
bd4cf0ed
AS
543 insn++;
544
545 insn->code = BPF_JMP | BPF_JA;
546 target = i + fp->jf + 1;
f8f6d679 547 BPF_EMIT_JMP;
bd4cf0ed
AS
548 break;
549
550 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
551 case BPF_LDX | BPF_MSH | BPF_B:
9739eef1 552 /* tmp = A */
f8f6d679 553 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
1268e253 554 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
f8f6d679 555 *insn++ = BPF_LD_ABS(BPF_B, fp->k);
9739eef1 556 /* A &= 0xf */
f8f6d679 557 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
9739eef1 558 /* A <<= 2 */
f8f6d679 559 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
9739eef1 560 /* X = A */
f8f6d679 561 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
9739eef1 562 /* A = tmp */
f8f6d679 563 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
bd4cf0ed
AS
564 break;
565
6205b9cf
DB
566 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
567 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
568 */
bd4cf0ed
AS
569 case BPF_RET | BPF_A:
570 case BPF_RET | BPF_K:
6205b9cf
DB
571 if (BPF_RVAL(fp->code) == BPF_K)
572 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
573 0, fp->k);
9739eef1 574 *insn = BPF_EXIT_INSN();
bd4cf0ed
AS
575 break;
576
577 /* Store to stack. */
578 case BPF_ST:
579 case BPF_STX:
50bbfed9 580 stack_off = fp->k * 4 + 4;
f8f6d679
DB
581 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
582 BPF_ST ? BPF_REG_A : BPF_REG_X,
50bbfed9
AS
583 -stack_off);
584 /* check_load_and_stores() verifies that classic BPF can
585 * load from stack only after write, so tracking
586 * stack_depth for ST|STX insns is enough
587 */
588 if (new_prog && new_prog->aux->stack_depth < stack_off)
589 new_prog->aux->stack_depth = stack_off;
bd4cf0ed
AS
590 break;
591
592 /* Load from stack. */
593 case BPF_LD | BPF_MEM:
594 case BPF_LDX | BPF_MEM:
50bbfed9 595 stack_off = fp->k * 4 + 4;
f8f6d679
DB
596 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
597 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
50bbfed9 598 -stack_off);
bd4cf0ed
AS
599 break;
600
601 /* A = K or X = K */
602 case BPF_LD | BPF_IMM:
603 case BPF_LDX | BPF_IMM:
f8f6d679
DB
604 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
605 BPF_REG_A : BPF_REG_X, fp->k);
bd4cf0ed
AS
606 break;
607
608 /* X = A */
609 case BPF_MISC | BPF_TAX:
f8f6d679 610 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
bd4cf0ed
AS
611 break;
612
613 /* A = X */
614 case BPF_MISC | BPF_TXA:
f8f6d679 615 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
616 break;
617
618 /* A = skb->len or X = skb->len */
619 case BPF_LD | BPF_W | BPF_LEN:
620 case BPF_LDX | BPF_W | BPF_LEN:
f8f6d679
DB
621 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
622 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
623 offsetof(struct sk_buff, len));
bd4cf0ed
AS
624 break;
625
f8f6d679 626 /* Access seccomp_data fields. */
bd4cf0ed 627 case BPF_LDX | BPF_ABS | BPF_W:
9739eef1
AS
628 /* A = *(u32 *) (ctx + K) */
629 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
bd4cf0ed
AS
630 break;
631
ca9f1fd2 632 /* Unknown instruction. */
1da177e4 633 default:
bd4cf0ed 634 goto err;
1da177e4 635 }
bd4cf0ed
AS
636
637 insn++;
638 if (new_prog)
639 memcpy(new_insn, tmp_insns,
640 sizeof(*insn) * (insn - tmp_insns));
bd4cf0ed 641 new_insn += insn - tmp_insns;
1da177e4
LT
642 }
643
bd4cf0ed
AS
644 if (!new_prog) {
645 /* Only calculating new length. */
50bbfed9 646 *new_len = new_insn - first_insn;
bd4cf0ed
AS
647 return 0;
648 }
649
650 pass++;
50bbfed9
AS
651 if (new_flen != new_insn - first_insn) {
652 new_flen = new_insn - first_insn;
bd4cf0ed
AS
653 if (pass > 2)
654 goto err;
bd4cf0ed
AS
655 goto do_pass;
656 }
657
658 kfree(addrs);
659 BUG_ON(*new_len != new_flen);
1da177e4 660 return 0;
bd4cf0ed
AS
661err:
662 kfree(addrs);
663 return -EINVAL;
1da177e4
LT
664}
665
bd4cf0ed 666/* Security:
bd4cf0ed 667 *
2d5311e4 668 * As we dont want to clear mem[] array for each packet going through
8ea6e345 669 * __bpf_prog_run(), we check that filter loaded by user never try to read
2d5311e4 670 * a cell if not previously written, and we check all branches to be sure
25985edc 671 * a malicious user doesn't try to abuse us.
2d5311e4 672 */
ec31a05c 673static int check_load_and_stores(const struct sock_filter *filter, int flen)
2d5311e4 674{
34805931 675 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
2d5311e4
ED
676 int pc, ret = 0;
677
678 BUILD_BUG_ON(BPF_MEMWORDS > 16);
34805931 679
99e72a0f 680 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
2d5311e4
ED
681 if (!masks)
682 return -ENOMEM;
34805931 683
2d5311e4
ED
684 memset(masks, 0xff, flen * sizeof(*masks));
685
686 for (pc = 0; pc < flen; pc++) {
687 memvalid &= masks[pc];
688
689 switch (filter[pc].code) {
34805931
DB
690 case BPF_ST:
691 case BPF_STX:
2d5311e4
ED
692 memvalid |= (1 << filter[pc].k);
693 break;
34805931
DB
694 case BPF_LD | BPF_MEM:
695 case BPF_LDX | BPF_MEM:
2d5311e4
ED
696 if (!(memvalid & (1 << filter[pc].k))) {
697 ret = -EINVAL;
698 goto error;
699 }
700 break;
34805931
DB
701 case BPF_JMP | BPF_JA:
702 /* A jump must set masks on target */
2d5311e4
ED
703 masks[pc + 1 + filter[pc].k] &= memvalid;
704 memvalid = ~0;
705 break;
34805931
DB
706 case BPF_JMP | BPF_JEQ | BPF_K:
707 case BPF_JMP | BPF_JEQ | BPF_X:
708 case BPF_JMP | BPF_JGE | BPF_K:
709 case BPF_JMP | BPF_JGE | BPF_X:
710 case BPF_JMP | BPF_JGT | BPF_K:
711 case BPF_JMP | BPF_JGT | BPF_X:
712 case BPF_JMP | BPF_JSET | BPF_K:
713 case BPF_JMP | BPF_JSET | BPF_X:
714 /* A jump must set masks on targets */
2d5311e4
ED
715 masks[pc + 1 + filter[pc].jt] &= memvalid;
716 masks[pc + 1 + filter[pc].jf] &= memvalid;
717 memvalid = ~0;
718 break;
719 }
720 }
721error:
722 kfree(masks);
723 return ret;
724}
725
34805931
DB
726static bool chk_code_allowed(u16 code_to_probe)
727{
728 static const bool codes[] = {
729 /* 32 bit ALU operations */
730 [BPF_ALU | BPF_ADD | BPF_K] = true,
731 [BPF_ALU | BPF_ADD | BPF_X] = true,
732 [BPF_ALU | BPF_SUB | BPF_K] = true,
733 [BPF_ALU | BPF_SUB | BPF_X] = true,
734 [BPF_ALU | BPF_MUL | BPF_K] = true,
735 [BPF_ALU | BPF_MUL | BPF_X] = true,
736 [BPF_ALU | BPF_DIV | BPF_K] = true,
737 [BPF_ALU | BPF_DIV | BPF_X] = true,
738 [BPF_ALU | BPF_MOD | BPF_K] = true,
739 [BPF_ALU | BPF_MOD | BPF_X] = true,
740 [BPF_ALU | BPF_AND | BPF_K] = true,
741 [BPF_ALU | BPF_AND | BPF_X] = true,
742 [BPF_ALU | BPF_OR | BPF_K] = true,
743 [BPF_ALU | BPF_OR | BPF_X] = true,
744 [BPF_ALU | BPF_XOR | BPF_K] = true,
745 [BPF_ALU | BPF_XOR | BPF_X] = true,
746 [BPF_ALU | BPF_LSH | BPF_K] = true,
747 [BPF_ALU | BPF_LSH | BPF_X] = true,
748 [BPF_ALU | BPF_RSH | BPF_K] = true,
749 [BPF_ALU | BPF_RSH | BPF_X] = true,
750 [BPF_ALU | BPF_NEG] = true,
751 /* Load instructions */
752 [BPF_LD | BPF_W | BPF_ABS] = true,
753 [BPF_LD | BPF_H | BPF_ABS] = true,
754 [BPF_LD | BPF_B | BPF_ABS] = true,
755 [BPF_LD | BPF_W | BPF_LEN] = true,
756 [BPF_LD | BPF_W | BPF_IND] = true,
757 [BPF_LD | BPF_H | BPF_IND] = true,
758 [BPF_LD | BPF_B | BPF_IND] = true,
759 [BPF_LD | BPF_IMM] = true,
760 [BPF_LD | BPF_MEM] = true,
761 [BPF_LDX | BPF_W | BPF_LEN] = true,
762 [BPF_LDX | BPF_B | BPF_MSH] = true,
763 [BPF_LDX | BPF_IMM] = true,
764 [BPF_LDX | BPF_MEM] = true,
765 /* Store instructions */
766 [BPF_ST] = true,
767 [BPF_STX] = true,
768 /* Misc instructions */
769 [BPF_MISC | BPF_TAX] = true,
770 [BPF_MISC | BPF_TXA] = true,
771 /* Return instructions */
772 [BPF_RET | BPF_K] = true,
773 [BPF_RET | BPF_A] = true,
774 /* Jump instructions */
775 [BPF_JMP | BPF_JA] = true,
776 [BPF_JMP | BPF_JEQ | BPF_K] = true,
777 [BPF_JMP | BPF_JEQ | BPF_X] = true,
778 [BPF_JMP | BPF_JGE | BPF_K] = true,
779 [BPF_JMP | BPF_JGE | BPF_X] = true,
780 [BPF_JMP | BPF_JGT | BPF_K] = true,
781 [BPF_JMP | BPF_JGT | BPF_X] = true,
782 [BPF_JMP | BPF_JSET | BPF_K] = true,
783 [BPF_JMP | BPF_JSET | BPF_X] = true,
784 };
785
786 if (code_to_probe >= ARRAY_SIZE(codes))
787 return false;
788
789 return codes[code_to_probe];
790}
791
f7bd9e36
DB
792static bool bpf_check_basics_ok(const struct sock_filter *filter,
793 unsigned int flen)
794{
795 if (filter == NULL)
796 return false;
797 if (flen == 0 || flen > BPF_MAXINSNS)
798 return false;
799
800 return true;
801}
802
1da177e4 803/**
4df95ff4 804 * bpf_check_classic - verify socket filter code
1da177e4
LT
805 * @filter: filter to verify
806 * @flen: length of filter
807 *
808 * Check the user's filter code. If we let some ugly
809 * filter code slip through kaboom! The filter must contain
93699863
KK
810 * no references or jumps that are out of range, no illegal
811 * instructions, and must end with a RET instruction.
1da177e4 812 *
7b11f69f
KK
813 * All jumps are forward as they are not signed.
814 *
815 * Returns 0 if the rule set is legal or -EINVAL if not.
1da177e4 816 */
d9e12f42
NS
817static int bpf_check_classic(const struct sock_filter *filter,
818 unsigned int flen)
1da177e4 819{
aa1113d9 820 bool anc_found;
34805931 821 int pc;
1da177e4 822
34805931 823 /* Check the filter code now */
1da177e4 824 for (pc = 0; pc < flen; pc++) {
ec31a05c 825 const struct sock_filter *ftest = &filter[pc];
93699863 826
34805931
DB
827 /* May we actually operate on this code? */
828 if (!chk_code_allowed(ftest->code))
cba328fc 829 return -EINVAL;
34805931 830
93699863 831 /* Some instructions need special checks */
34805931
DB
832 switch (ftest->code) {
833 case BPF_ALU | BPF_DIV | BPF_K:
834 case BPF_ALU | BPF_MOD | BPF_K:
835 /* Check for division by zero */
b6069a95
ED
836 if (ftest->k == 0)
837 return -EINVAL;
838 break;
229394e8
RV
839 case BPF_ALU | BPF_LSH | BPF_K:
840 case BPF_ALU | BPF_RSH | BPF_K:
841 if (ftest->k >= 32)
842 return -EINVAL;
843 break;
34805931
DB
844 case BPF_LD | BPF_MEM:
845 case BPF_LDX | BPF_MEM:
846 case BPF_ST:
847 case BPF_STX:
848 /* Check for invalid memory addresses */
93699863
KK
849 if (ftest->k >= BPF_MEMWORDS)
850 return -EINVAL;
851 break;
34805931
DB
852 case BPF_JMP | BPF_JA:
853 /* Note, the large ftest->k might cause loops.
93699863
KK
854 * Compare this with conditional jumps below,
855 * where offsets are limited. --ANK (981016)
856 */
34805931 857 if (ftest->k >= (unsigned int)(flen - pc - 1))
93699863 858 return -EINVAL;
01f2f3f6 859 break;
34805931
DB
860 case BPF_JMP | BPF_JEQ | BPF_K:
861 case BPF_JMP | BPF_JEQ | BPF_X:
862 case BPF_JMP | BPF_JGE | BPF_K:
863 case BPF_JMP | BPF_JGE | BPF_X:
864 case BPF_JMP | BPF_JGT | BPF_K:
865 case BPF_JMP | BPF_JGT | BPF_X:
866 case BPF_JMP | BPF_JSET | BPF_K:
867 case BPF_JMP | BPF_JSET | BPF_X:
868 /* Both conditionals must be safe */
e35bedf3 869 if (pc + ftest->jt + 1 >= flen ||
93699863
KK
870 pc + ftest->jf + 1 >= flen)
871 return -EINVAL;
cba328fc 872 break;
34805931
DB
873 case BPF_LD | BPF_W | BPF_ABS:
874 case BPF_LD | BPF_H | BPF_ABS:
875 case BPF_LD | BPF_B | BPF_ABS:
aa1113d9 876 anc_found = false;
34805931
DB
877 if (bpf_anc_helper(ftest) & BPF_ANC)
878 anc_found = true;
879 /* Ancillary operation unknown or unsupported */
aa1113d9
DB
880 if (anc_found == false && ftest->k >= SKF_AD_OFF)
881 return -EINVAL;
01f2f3f6
HPP
882 }
883 }
93699863 884
34805931 885 /* Last instruction must be a RET code */
01f2f3f6 886 switch (filter[flen - 1].code) {
34805931
DB
887 case BPF_RET | BPF_K:
888 case BPF_RET | BPF_A:
2d5311e4 889 return check_load_and_stores(filter, flen);
cba328fc 890 }
34805931 891
cba328fc 892 return -EINVAL;
1da177e4
LT
893}
894
7ae457c1
AS
895static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
896 const struct sock_fprog *fprog)
a3ea269b 897{
009937e7 898 unsigned int fsize = bpf_classic_proglen(fprog);
a3ea269b
DB
899 struct sock_fprog_kern *fkprog;
900
901 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
902 if (!fp->orig_prog)
903 return -ENOMEM;
904
905 fkprog = fp->orig_prog;
906 fkprog->len = fprog->len;
658da937
DB
907
908 fkprog->filter = kmemdup(fp->insns, fsize,
909 GFP_KERNEL | __GFP_NOWARN);
a3ea269b
DB
910 if (!fkprog->filter) {
911 kfree(fp->orig_prog);
912 return -ENOMEM;
913 }
914
915 return 0;
916}
917
7ae457c1 918static void bpf_release_orig_filter(struct bpf_prog *fp)
a3ea269b
DB
919{
920 struct sock_fprog_kern *fprog = fp->orig_prog;
921
922 if (fprog) {
923 kfree(fprog->filter);
924 kfree(fprog);
925 }
926}
927
7ae457c1
AS
928static void __bpf_prog_release(struct bpf_prog *prog)
929{
24701ece 930 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
89aa0758
AS
931 bpf_prog_put(prog);
932 } else {
933 bpf_release_orig_filter(prog);
934 bpf_prog_free(prog);
935 }
7ae457c1
AS
936}
937
34c5bd66
PN
938static void __sk_filter_release(struct sk_filter *fp)
939{
7ae457c1
AS
940 __bpf_prog_release(fp->prog);
941 kfree(fp);
34c5bd66
PN
942}
943
47e958ea 944/**
46bcf14f 945 * sk_filter_release_rcu - Release a socket filter by rcu_head
47e958ea
PE
946 * @rcu: rcu_head that contains the sk_filter to free
947 */
fbc907f0 948static void sk_filter_release_rcu(struct rcu_head *rcu)
47e958ea
PE
949{
950 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
951
34c5bd66 952 __sk_filter_release(fp);
47e958ea 953}
fbc907f0
DB
954
955/**
956 * sk_filter_release - release a socket filter
957 * @fp: filter to remove
958 *
959 * Remove a filter from a socket and release its resources.
960 */
961static void sk_filter_release(struct sk_filter *fp)
962{
4c355cdf 963 if (refcount_dec_and_test(&fp->refcnt))
fbc907f0
DB
964 call_rcu(&fp->rcu, sk_filter_release_rcu);
965}
966
967void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
968{
7ae457c1 969 u32 filter_size = bpf_prog_size(fp->prog->len);
fbc907f0 970
278571ba
AS
971 atomic_sub(filter_size, &sk->sk_omem_alloc);
972 sk_filter_release(fp);
fbc907f0 973}
47e958ea 974
278571ba
AS
975/* try to charge the socket memory if there is space available
976 * return true on success
977 */
4c355cdf 978static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
bd4cf0ed 979{
7ae457c1 980 u32 filter_size = bpf_prog_size(fp->prog->len);
278571ba
AS
981
982 /* same check as in sock_kmalloc() */
983 if (filter_size <= sysctl_optmem_max &&
984 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
278571ba
AS
985 atomic_add(filter_size, &sk->sk_omem_alloc);
986 return true;
bd4cf0ed 987 }
278571ba 988 return false;
bd4cf0ed
AS
989}
990
4c355cdf
RE
991bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
992{
eefca20e
ED
993 if (!refcount_inc_not_zero(&fp->refcnt))
994 return false;
995
996 if (!__sk_filter_charge(sk, fp)) {
997 sk_filter_release(fp);
998 return false;
999 }
1000 return true;
4c355cdf
RE
1001}
1002
7ae457c1 1003static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
bd4cf0ed
AS
1004{
1005 struct sock_filter *old_prog;
7ae457c1 1006 struct bpf_prog *old_fp;
34805931 1007 int err, new_len, old_len = fp->len;
bd4cf0ed
AS
1008
1009 /* We are free to overwrite insns et al right here as it
1010 * won't be used at this point in time anymore internally
1011 * after the migration to the internal BPF instruction
1012 * representation.
1013 */
1014 BUILD_BUG_ON(sizeof(struct sock_filter) !=
2695fb55 1015 sizeof(struct bpf_insn));
bd4cf0ed 1016
bd4cf0ed
AS
1017 /* Conversion cannot happen on overlapping memory areas,
1018 * so we need to keep the user BPF around until the 2nd
1019 * pass. At this time, the user BPF is stored in fp->insns.
1020 */
1021 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
658da937 1022 GFP_KERNEL | __GFP_NOWARN);
bd4cf0ed
AS
1023 if (!old_prog) {
1024 err = -ENOMEM;
1025 goto out_err;
1026 }
1027
1028 /* 1st pass: calculate the new program length. */
8fb575ca 1029 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
bd4cf0ed
AS
1030 if (err)
1031 goto out_err_free;
1032
1033 /* Expand fp for appending the new filter representation. */
1034 old_fp = fp;
60a3b225 1035 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
bd4cf0ed
AS
1036 if (!fp) {
1037 /* The old_fp is still around in case we couldn't
1038 * allocate new memory, so uncharge on that one.
1039 */
1040 fp = old_fp;
1041 err = -ENOMEM;
1042 goto out_err_free;
1043 }
1044
bd4cf0ed
AS
1045 fp->len = new_len;
1046
2695fb55 1047 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
50bbfed9 1048 err = bpf_convert_filter(old_prog, old_len, fp, &new_len);
bd4cf0ed 1049 if (err)
8fb575ca 1050 /* 2nd bpf_convert_filter() can fail only if it fails
bd4cf0ed
AS
1051 * to allocate memory, remapping must succeed. Note,
1052 * that at this time old_fp has already been released
278571ba 1053 * by krealloc().
bd4cf0ed
AS
1054 */
1055 goto out_err_free;
1056
d1c55ab5 1057 fp = bpf_prog_select_runtime(fp, &err);
290af866
AS
1058 if (err)
1059 goto out_err_free;
5fe821a9 1060
bd4cf0ed
AS
1061 kfree(old_prog);
1062 return fp;
1063
1064out_err_free:
1065 kfree(old_prog);
1066out_err:
7ae457c1 1067 __bpf_prog_release(fp);
bd4cf0ed
AS
1068 return ERR_PTR(err);
1069}
1070
ac67eb2c
DB
1071static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1072 bpf_aux_classic_check_t trans)
302d6637
JP
1073{
1074 int err;
1075
bd4cf0ed 1076 fp->bpf_func = NULL;
a91263d5 1077 fp->jited = 0;
302d6637 1078
4df95ff4 1079 err = bpf_check_classic(fp->insns, fp->len);
418c96ac 1080 if (err) {
7ae457c1 1081 __bpf_prog_release(fp);
bd4cf0ed 1082 return ERR_PTR(err);
418c96ac 1083 }
302d6637 1084
4ae92bc7
NS
1085 /* There might be additional checks and transformations
1086 * needed on classic filters, f.e. in case of seccomp.
1087 */
1088 if (trans) {
1089 err = trans(fp->insns, fp->len);
1090 if (err) {
1091 __bpf_prog_release(fp);
1092 return ERR_PTR(err);
1093 }
1094 }
1095
bd4cf0ed
AS
1096 /* Probe if we can JIT compile the filter and if so, do
1097 * the compilation of the filter.
1098 */
302d6637 1099 bpf_jit_compile(fp);
bd4cf0ed
AS
1100
1101 /* JIT compiler couldn't process this filter, so do the
1102 * internal BPF translation for the optimized interpreter.
1103 */
5fe821a9 1104 if (!fp->jited)
7ae457c1 1105 fp = bpf_migrate_filter(fp);
bd4cf0ed
AS
1106
1107 return fp;
302d6637
JP
1108}
1109
1110/**
7ae457c1 1111 * bpf_prog_create - create an unattached filter
c6c4b97c 1112 * @pfp: the unattached filter that is created
677a9fd3 1113 * @fprog: the filter program
302d6637 1114 *
c6c4b97c 1115 * Create a filter independent of any socket. We first run some
302d6637
JP
1116 * sanity checks on it to make sure it does not explode on us later.
1117 * If an error occurs or there is insufficient memory for the filter
1118 * a negative errno code is returned. On success the return is zero.
1119 */
7ae457c1 1120int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
302d6637 1121{
009937e7 1122 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1 1123 struct bpf_prog *fp;
302d6637
JP
1124
1125 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1126 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
302d6637
JP
1127 return -EINVAL;
1128
60a3b225 1129 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
302d6637
JP
1130 if (!fp)
1131 return -ENOMEM;
a3ea269b 1132
302d6637
JP
1133 memcpy(fp->insns, fprog->filter, fsize);
1134
302d6637 1135 fp->len = fprog->len;
a3ea269b
DB
1136 /* Since unattached filters are not copied back to user
1137 * space through sk_get_filter(), we do not need to hold
1138 * a copy here, and can spare us the work.
1139 */
1140 fp->orig_prog = NULL;
302d6637 1141
7ae457c1 1142 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1143 * memory in case something goes wrong.
1144 */
4ae92bc7 1145 fp = bpf_prepare_filter(fp, NULL);
bd4cf0ed
AS
1146 if (IS_ERR(fp))
1147 return PTR_ERR(fp);
302d6637
JP
1148
1149 *pfp = fp;
1150 return 0;
302d6637 1151}
7ae457c1 1152EXPORT_SYMBOL_GPL(bpf_prog_create);
302d6637 1153
ac67eb2c
DB
1154/**
1155 * bpf_prog_create_from_user - create an unattached filter from user buffer
1156 * @pfp: the unattached filter that is created
1157 * @fprog: the filter program
1158 * @trans: post-classic verifier transformation handler
bab18991 1159 * @save_orig: save classic BPF program
ac67eb2c
DB
1160 *
1161 * This function effectively does the same as bpf_prog_create(), only
1162 * that it builds up its insns buffer from user space provided buffer.
1163 * It also allows for passing a bpf_aux_classic_check_t handler.
1164 */
1165int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
bab18991 1166 bpf_aux_classic_check_t trans, bool save_orig)
ac67eb2c
DB
1167{
1168 unsigned int fsize = bpf_classic_proglen(fprog);
1169 struct bpf_prog *fp;
bab18991 1170 int err;
ac67eb2c
DB
1171
1172 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1173 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
ac67eb2c
DB
1174 return -EINVAL;
1175
1176 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1177 if (!fp)
1178 return -ENOMEM;
1179
1180 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1181 __bpf_prog_free(fp);
1182 return -EFAULT;
1183 }
1184
1185 fp->len = fprog->len;
ac67eb2c
DB
1186 fp->orig_prog = NULL;
1187
bab18991
DB
1188 if (save_orig) {
1189 err = bpf_prog_store_orig_filter(fp, fprog);
1190 if (err) {
1191 __bpf_prog_free(fp);
1192 return -ENOMEM;
1193 }
1194 }
1195
ac67eb2c
DB
1196 /* bpf_prepare_filter() already takes care of freeing
1197 * memory in case something goes wrong.
1198 */
1199 fp = bpf_prepare_filter(fp, trans);
1200 if (IS_ERR(fp))
1201 return PTR_ERR(fp);
1202
1203 *pfp = fp;
1204 return 0;
1205}
2ea273d7 1206EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
ac67eb2c 1207
7ae457c1 1208void bpf_prog_destroy(struct bpf_prog *fp)
302d6637 1209{
7ae457c1 1210 __bpf_prog_release(fp);
302d6637 1211}
7ae457c1 1212EXPORT_SYMBOL_GPL(bpf_prog_destroy);
302d6637 1213
8ced425e 1214static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
49b31e57
DB
1215{
1216 struct sk_filter *fp, *old_fp;
1217
1218 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1219 if (!fp)
1220 return -ENOMEM;
1221
1222 fp->prog = prog;
49b31e57 1223
4c355cdf 1224 if (!__sk_filter_charge(sk, fp)) {
49b31e57
DB
1225 kfree(fp);
1226 return -ENOMEM;
1227 }
4c355cdf 1228 refcount_set(&fp->refcnt, 1);
49b31e57 1229
8ced425e
HFS
1230 old_fp = rcu_dereference_protected(sk->sk_filter,
1231 lockdep_sock_is_held(sk));
49b31e57 1232 rcu_assign_pointer(sk->sk_filter, fp);
8ced425e 1233
49b31e57
DB
1234 if (old_fp)
1235 sk_filter_uncharge(sk, old_fp);
1236
1237 return 0;
1238}
1239
538950a1
CG
1240static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
1241{
1242 struct bpf_prog *old_prog;
1243 int err;
1244
1245 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1246 return -ENOMEM;
1247
fa463497 1248 if (sk_unhashed(sk) && sk->sk_reuseport) {
538950a1
CG
1249 err = reuseport_alloc(sk);
1250 if (err)
1251 return err;
1252 } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
1253 /* The socket wasn't bound with SO_REUSEPORT */
1254 return -EINVAL;
1255 }
1256
1257 old_prog = reuseport_attach_prog(sk, prog);
1258 if (old_prog)
1259 bpf_prog_destroy(old_prog);
1260
1261 return 0;
1262}
1263
1264static
1265struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1da177e4 1266{
009937e7 1267 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1 1268 struct bpf_prog *prog;
1da177e4
LT
1269 int err;
1270
d59577b6 1271 if (sock_flag(sk, SOCK_FILTER_LOCKED))
538950a1 1272 return ERR_PTR(-EPERM);
d59577b6 1273
1da177e4 1274 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1275 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
538950a1 1276 return ERR_PTR(-EINVAL);
1da177e4 1277
f7bd9e36 1278 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
7ae457c1 1279 if (!prog)
538950a1 1280 return ERR_PTR(-ENOMEM);
a3ea269b 1281
7ae457c1 1282 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
c0d1379a 1283 __bpf_prog_free(prog);
538950a1 1284 return ERR_PTR(-EFAULT);
1da177e4
LT
1285 }
1286
7ae457c1 1287 prog->len = fprog->len;
1da177e4 1288
7ae457c1 1289 err = bpf_prog_store_orig_filter(prog, fprog);
a3ea269b 1290 if (err) {
c0d1379a 1291 __bpf_prog_free(prog);
538950a1 1292 return ERR_PTR(-ENOMEM);
a3ea269b
DB
1293 }
1294
7ae457c1 1295 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1296 * memory in case something goes wrong.
1297 */
538950a1
CG
1298 return bpf_prepare_filter(prog, NULL);
1299}
1300
1301/**
1302 * sk_attach_filter - attach a socket filter
1303 * @fprog: the filter program
1304 * @sk: the socket to use
1305 *
1306 * Attach the user's filter code. We first run some sanity checks on
1307 * it to make sure it does not explode on us later. If an error
1308 * occurs or there is insufficient memory for the filter a negative
1309 * errno code is returned. On success the return is zero.
1310 */
8ced425e 1311int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
538950a1
CG
1312{
1313 struct bpf_prog *prog = __get_filter(fprog, sk);
1314 int err;
1315
7ae457c1
AS
1316 if (IS_ERR(prog))
1317 return PTR_ERR(prog);
1318
8ced425e 1319 err = __sk_attach_prog(prog, sk);
49b31e57 1320 if (err < 0) {
7ae457c1 1321 __bpf_prog_release(prog);
49b31e57 1322 return err;
278571ba
AS
1323 }
1324
d3904b73 1325 return 0;
1da177e4 1326}
8ced425e 1327EXPORT_SYMBOL_GPL(sk_attach_filter);
1da177e4 1328
538950a1 1329int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
89aa0758 1330{
538950a1 1331 struct bpf_prog *prog = __get_filter(fprog, sk);
49b31e57 1332 int err;
89aa0758 1333
538950a1
CG
1334 if (IS_ERR(prog))
1335 return PTR_ERR(prog);
1336
1337 err = __reuseport_attach_prog(prog, sk);
1338 if (err < 0) {
1339 __bpf_prog_release(prog);
1340 return err;
1341 }
1342
1343 return 0;
1344}
1345
1346static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1347{
89aa0758 1348 if (sock_flag(sk, SOCK_FILTER_LOCKED))
538950a1 1349 return ERR_PTR(-EPERM);
89aa0758 1350
113214be 1351 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
538950a1
CG
1352}
1353
1354int sk_attach_bpf(u32 ufd, struct sock *sk)
1355{
1356 struct bpf_prog *prog = __get_bpf(ufd, sk);
1357 int err;
1358
1359 if (IS_ERR(prog))
1360 return PTR_ERR(prog);
1361
8ced425e 1362 err = __sk_attach_prog(prog, sk);
49b31e57 1363 if (err < 0) {
89aa0758 1364 bpf_prog_put(prog);
49b31e57 1365 return err;
89aa0758
AS
1366 }
1367
89aa0758
AS
1368 return 0;
1369}
1370
538950a1
CG
1371int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1372{
1373 struct bpf_prog *prog = __get_bpf(ufd, sk);
1374 int err;
1375
1376 if (IS_ERR(prog))
1377 return PTR_ERR(prog);
1378
1379 err = __reuseport_attach_prog(prog, sk);
1380 if (err < 0) {
1381 bpf_prog_put(prog);
1382 return err;
1383 }
1384
1385 return 0;
1386}
1387
21cafc1d
DB
1388struct bpf_scratchpad {
1389 union {
1390 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1391 u8 buff[MAX_BPF_STACK];
1392 };
1393};
1394
1395static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
91bc4822 1396
5293efe6
DB
1397static inline int __bpf_try_make_writable(struct sk_buff *skb,
1398 unsigned int write_len)
1399{
1400 return skb_ensure_writable(skb, write_len);
1401}
1402
db58ba45
AS
1403static inline int bpf_try_make_writable(struct sk_buff *skb,
1404 unsigned int write_len)
1405{
5293efe6 1406 int err = __bpf_try_make_writable(skb, write_len);
db58ba45 1407
6aaae2b6 1408 bpf_compute_data_pointers(skb);
db58ba45
AS
1409 return err;
1410}
1411
36bbef52
DB
1412static int bpf_try_make_head_writable(struct sk_buff *skb)
1413{
1414 return bpf_try_make_writable(skb, skb_headlen(skb));
1415}
1416
a2bfe6bf
DB
1417static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1418{
1419 if (skb_at_tc_ingress(skb))
1420 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1421}
1422
8065694e
DB
1423static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1424{
1425 if (skb_at_tc_ingress(skb))
1426 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1427}
1428
f3694e00
DB
1429BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1430 const void *, from, u32, len, u64, flags)
608cd71a 1431{
608cd71a
AS
1432 void *ptr;
1433
8afd54c8 1434 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
781c53bc 1435 return -EINVAL;
0ed661d5 1436 if (unlikely(offset > 0xffff))
608cd71a 1437 return -EFAULT;
db58ba45 1438 if (unlikely(bpf_try_make_writable(skb, offset + len)))
608cd71a
AS
1439 return -EFAULT;
1440
0ed661d5 1441 ptr = skb->data + offset;
781c53bc 1442 if (flags & BPF_F_RECOMPUTE_CSUM)
479ffccc 1443 __skb_postpull_rcsum(skb, ptr, len, offset);
608cd71a
AS
1444
1445 memcpy(ptr, from, len);
1446
781c53bc 1447 if (flags & BPF_F_RECOMPUTE_CSUM)
479ffccc 1448 __skb_postpush_rcsum(skb, ptr, len, offset);
8afd54c8
DB
1449 if (flags & BPF_F_INVALIDATE_HASH)
1450 skb_clear_hash(skb);
f8ffad69 1451
608cd71a
AS
1452 return 0;
1453}
1454
577c50aa 1455static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
608cd71a
AS
1456 .func = bpf_skb_store_bytes,
1457 .gpl_only = false,
1458 .ret_type = RET_INTEGER,
1459 .arg1_type = ARG_PTR_TO_CTX,
1460 .arg2_type = ARG_ANYTHING,
39f19ebb
AS
1461 .arg3_type = ARG_PTR_TO_MEM,
1462 .arg4_type = ARG_CONST_SIZE,
91bc4822
AS
1463 .arg5_type = ARG_ANYTHING,
1464};
1465
f3694e00
DB
1466BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1467 void *, to, u32, len)
05c74e5e 1468{
05c74e5e
DB
1469 void *ptr;
1470
0ed661d5 1471 if (unlikely(offset > 0xffff))
074f528e 1472 goto err_clear;
05c74e5e
DB
1473
1474 ptr = skb_header_pointer(skb, offset, len, to);
1475 if (unlikely(!ptr))
074f528e 1476 goto err_clear;
05c74e5e
DB
1477 if (ptr != to)
1478 memcpy(to, ptr, len);
1479
1480 return 0;
074f528e
DB
1481err_clear:
1482 memset(to, 0, len);
1483 return -EFAULT;
05c74e5e
DB
1484}
1485
577c50aa 1486static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
05c74e5e
DB
1487 .func = bpf_skb_load_bytes,
1488 .gpl_only = false,
1489 .ret_type = RET_INTEGER,
1490 .arg1_type = ARG_PTR_TO_CTX,
1491 .arg2_type = ARG_ANYTHING,
39f19ebb
AS
1492 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1493 .arg4_type = ARG_CONST_SIZE,
05c74e5e
DB
1494};
1495
36bbef52
DB
1496BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1497{
1498 /* Idea is the following: should the needed direct read/write
1499 * test fail during runtime, we can pull in more data and redo
1500 * again, since implicitly, we invalidate previous checks here.
1501 *
1502 * Or, since we know how much we need to make read/writeable,
1503 * this can be done once at the program beginning for direct
1504 * access case. By this we overcome limitations of only current
1505 * headroom being accessible.
1506 */
1507 return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1508}
1509
1510static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1511 .func = bpf_skb_pull_data,
1512 .gpl_only = false,
1513 .ret_type = RET_INTEGER,
1514 .arg1_type = ARG_PTR_TO_CTX,
1515 .arg2_type = ARG_ANYTHING,
1516};
1517
f3694e00
DB
1518BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1519 u64, from, u64, to, u64, flags)
91bc4822 1520{
0ed661d5 1521 __sum16 *ptr;
91bc4822 1522
781c53bc
DB
1523 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1524 return -EINVAL;
0ed661d5 1525 if (unlikely(offset > 0xffff || offset & 1))
91bc4822 1526 return -EFAULT;
0ed661d5 1527 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
91bc4822
AS
1528 return -EFAULT;
1529
0ed661d5 1530 ptr = (__sum16 *)(skb->data + offset);
781c53bc 1531 switch (flags & BPF_F_HDR_FIELD_MASK) {
8050c0f0
DB
1532 case 0:
1533 if (unlikely(from != 0))
1534 return -EINVAL;
1535
1536 csum_replace_by_diff(ptr, to);
1537 break;
91bc4822
AS
1538 case 2:
1539 csum_replace2(ptr, from, to);
1540 break;
1541 case 4:
1542 csum_replace4(ptr, from, to);
1543 break;
1544 default:
1545 return -EINVAL;
1546 }
1547
91bc4822
AS
1548 return 0;
1549}
1550
577c50aa 1551static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
91bc4822
AS
1552 .func = bpf_l3_csum_replace,
1553 .gpl_only = false,
1554 .ret_type = RET_INTEGER,
1555 .arg1_type = ARG_PTR_TO_CTX,
1556 .arg2_type = ARG_ANYTHING,
1557 .arg3_type = ARG_ANYTHING,
1558 .arg4_type = ARG_ANYTHING,
1559 .arg5_type = ARG_ANYTHING,
1560};
1561
f3694e00
DB
1562BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1563 u64, from, u64, to, u64, flags)
91bc4822 1564{
781c53bc 1565 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
2f72959a 1566 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
d1b662ad 1567 bool do_mforce = flags & BPF_F_MARK_ENFORCE;
0ed661d5 1568 __sum16 *ptr;
91bc4822 1569
d1b662ad
DB
1570 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1571 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
781c53bc 1572 return -EINVAL;
0ed661d5 1573 if (unlikely(offset > 0xffff || offset & 1))
91bc4822 1574 return -EFAULT;
0ed661d5 1575 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
91bc4822
AS
1576 return -EFAULT;
1577
0ed661d5 1578 ptr = (__sum16 *)(skb->data + offset);
d1b662ad 1579 if (is_mmzero && !do_mforce && !*ptr)
2f72959a 1580 return 0;
91bc4822 1581
781c53bc 1582 switch (flags & BPF_F_HDR_FIELD_MASK) {
7d672345
DB
1583 case 0:
1584 if (unlikely(from != 0))
1585 return -EINVAL;
1586
1587 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1588 break;
91bc4822
AS
1589 case 2:
1590 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1591 break;
1592 case 4:
1593 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1594 break;
1595 default:
1596 return -EINVAL;
1597 }
1598
2f72959a
DB
1599 if (is_mmzero && !*ptr)
1600 *ptr = CSUM_MANGLED_0;
91bc4822
AS
1601 return 0;
1602}
1603
577c50aa 1604static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
91bc4822
AS
1605 .func = bpf_l4_csum_replace,
1606 .gpl_only = false,
1607 .ret_type = RET_INTEGER,
1608 .arg1_type = ARG_PTR_TO_CTX,
1609 .arg2_type = ARG_ANYTHING,
1610 .arg3_type = ARG_ANYTHING,
1611 .arg4_type = ARG_ANYTHING,
1612 .arg5_type = ARG_ANYTHING,
608cd71a
AS
1613};
1614
f3694e00
DB
1615BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1616 __be32 *, to, u32, to_size, __wsum, seed)
7d672345 1617{
21cafc1d 1618 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
f3694e00 1619 u32 diff_size = from_size + to_size;
7d672345
DB
1620 int i, j = 0;
1621
1622 /* This is quite flexible, some examples:
1623 *
1624 * from_size == 0, to_size > 0, seed := csum --> pushing data
1625 * from_size > 0, to_size == 0, seed := csum --> pulling data
1626 * from_size > 0, to_size > 0, seed := 0 --> diffing data
1627 *
1628 * Even for diffing, from_size and to_size don't need to be equal.
1629 */
1630 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
1631 diff_size > sizeof(sp->diff)))
1632 return -EINVAL;
1633
1634 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
1635 sp->diff[j] = ~from[i];
1636 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
1637 sp->diff[j] = to[i];
1638
1639 return csum_partial(sp->diff, diff_size, seed);
1640}
1641
577c50aa 1642static const struct bpf_func_proto bpf_csum_diff_proto = {
7d672345
DB
1643 .func = bpf_csum_diff,
1644 .gpl_only = false,
36bbef52 1645 .pkt_access = true,
7d672345 1646 .ret_type = RET_INTEGER,
db1ac496 1647 .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
39f19ebb 1648 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
db1ac496 1649 .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
39f19ebb 1650 .arg4_type = ARG_CONST_SIZE_OR_ZERO,
7d672345
DB
1651 .arg5_type = ARG_ANYTHING,
1652};
1653
36bbef52
DB
1654BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
1655{
1656 /* The interface is to be used in combination with bpf_csum_diff()
1657 * for direct packet writes. csum rotation for alignment as well
1658 * as emulating csum_sub() can be done from the eBPF program.
1659 */
1660 if (skb->ip_summed == CHECKSUM_COMPLETE)
1661 return (skb->csum = csum_add(skb->csum, csum));
1662
1663 return -ENOTSUPP;
1664}
1665
1666static const struct bpf_func_proto bpf_csum_update_proto = {
1667 .func = bpf_csum_update,
1668 .gpl_only = false,
1669 .ret_type = RET_INTEGER,
1670 .arg1_type = ARG_PTR_TO_CTX,
1671 .arg2_type = ARG_ANYTHING,
1672};
1673
a70b506e
DB
1674static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
1675{
a70b506e
DB
1676 return dev_forward_skb(dev, skb);
1677}
1678
4e3264d2
MKL
1679static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
1680 struct sk_buff *skb)
1681{
1682 int ret = ____dev_forward_skb(dev, skb);
1683
1684 if (likely(!ret)) {
1685 skb->dev = dev;
1686 ret = netif_rx(skb);
1687 }
1688
1689 return ret;
1690}
1691
a70b506e
DB
1692static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
1693{
1694 int ret;
1695
1696 if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
1697 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
1698 kfree_skb(skb);
1699 return -ENETDOWN;
1700 }
1701
1702 skb->dev = dev;
1703
1704 __this_cpu_inc(xmit_recursion);
1705 ret = dev_queue_xmit(skb);
1706 __this_cpu_dec(xmit_recursion);
1707
1708 return ret;
1709}
1710
4e3264d2
MKL
1711static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
1712 u32 flags)
1713{
1714 /* skb->mac_len is not set on normal egress */
1715 unsigned int mlen = skb->network_header - skb->mac_header;
1716
1717 __skb_pull(skb, mlen);
1718
1719 /* At ingress, the mac header has already been pulled once.
1720 * At egress, skb_pospull_rcsum has to be done in case that
1721 * the skb is originated from ingress (i.e. a forwarded skb)
1722 * to ensure that rcsum starts at net header.
1723 */
1724 if (!skb_at_tc_ingress(skb))
1725 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
1726 skb_pop_mac_header(skb);
1727 skb_reset_mac_len(skb);
1728 return flags & BPF_F_INGRESS ?
1729 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
1730}
1731
1732static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
1733 u32 flags)
1734{
3a0af8fd
TG
1735 /* Verify that a link layer header is carried */
1736 if (unlikely(skb->mac_header >= skb->network_header)) {
1737 kfree_skb(skb);
1738 return -ERANGE;
1739 }
1740
4e3264d2
MKL
1741 bpf_push_mac_rcsum(skb);
1742 return flags & BPF_F_INGRESS ?
1743 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
1744}
1745
1746static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
1747 u32 flags)
1748{
c491680f 1749 if (dev_is_mac_header_xmit(dev))
4e3264d2 1750 return __bpf_redirect_common(skb, dev, flags);
c491680f
DB
1751 else
1752 return __bpf_redirect_no_mac(skb, dev, flags);
4e3264d2
MKL
1753}
1754
f3694e00 1755BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
3896d655 1756{
3896d655 1757 struct net_device *dev;
36bbef52
DB
1758 struct sk_buff *clone;
1759 int ret;
3896d655 1760
781c53bc
DB
1761 if (unlikely(flags & ~(BPF_F_INGRESS)))
1762 return -EINVAL;
1763
3896d655
AS
1764 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
1765 if (unlikely(!dev))
1766 return -EINVAL;
1767
36bbef52
DB
1768 clone = skb_clone(skb, GFP_ATOMIC);
1769 if (unlikely(!clone))
3896d655
AS
1770 return -ENOMEM;
1771
36bbef52
DB
1772 /* For direct write, we need to keep the invariant that the skbs
1773 * we're dealing with need to be uncloned. Should uncloning fail
1774 * here, we need to free the just generated clone to unclone once
1775 * again.
1776 */
1777 ret = bpf_try_make_head_writable(skb);
1778 if (unlikely(ret)) {
1779 kfree_skb(clone);
1780 return -ENOMEM;
1781 }
1782
4e3264d2 1783 return __bpf_redirect(clone, dev, flags);
3896d655
AS
1784}
1785
577c50aa 1786static const struct bpf_func_proto bpf_clone_redirect_proto = {
3896d655
AS
1787 .func = bpf_clone_redirect,
1788 .gpl_only = false,
1789 .ret_type = RET_INTEGER,
1790 .arg1_type = ARG_PTR_TO_CTX,
1791 .arg2_type = ARG_ANYTHING,
1792 .arg3_type = ARG_ANYTHING,
1793};
1794
27b29f63
AS
1795struct redirect_info {
1796 u32 ifindex;
1797 u32 flags;
97f91a7c 1798 struct bpf_map *map;
11393cc9 1799 struct bpf_map *map_to_flush;
7c300131 1800 unsigned long map_owner;
27b29f63
AS
1801};
1802
1803static DEFINE_PER_CPU(struct redirect_info, redirect_info);
781c53bc 1804
f3694e00 1805BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
27b29f63
AS
1806{
1807 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
1808
781c53bc
DB
1809 if (unlikely(flags & ~(BPF_F_INGRESS)))
1810 return TC_ACT_SHOT;
1811
27b29f63
AS
1812 ri->ifindex = ifindex;
1813 ri->flags = flags;
781c53bc 1814
27b29f63
AS
1815 return TC_ACT_REDIRECT;
1816}
1817
1818int skb_do_redirect(struct sk_buff *skb)
1819{
1820 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
1821 struct net_device *dev;
1822
1823 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
1824 ri->ifindex = 0;
1825 if (unlikely(!dev)) {
1826 kfree_skb(skb);
1827 return -EINVAL;
1828 }
1829
4e3264d2 1830 return __bpf_redirect(skb, dev, ri->flags);
27b29f63
AS
1831}
1832
577c50aa 1833static const struct bpf_func_proto bpf_redirect_proto = {
27b29f63
AS
1834 .func = bpf_redirect,
1835 .gpl_only = false,
1836 .ret_type = RET_INTEGER,
1837 .arg1_type = ARG_ANYTHING,
1838 .arg2_type = ARG_ANYTHING,
1839};
1840
34f79502
JF
1841BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
1842 struct bpf_map *, map, u32, key, u64, flags)
174a79ff 1843{
34f79502 1844 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
174a79ff 1845
bfa64075 1846 /* If user passes invalid input drop the packet. */
174a79ff 1847 if (unlikely(flags))
bfa64075 1848 return SK_DROP;
174a79ff 1849
34f79502
JF
1850 tcb->bpf.key = key;
1851 tcb->bpf.flags = flags;
1852 tcb->bpf.map = map;
174a79ff 1853
bfa64075 1854 return SK_PASS;
174a79ff
JF
1855}
1856
34f79502 1857struct sock *do_sk_redirect_map(struct sk_buff *skb)
174a79ff 1858{
34f79502 1859 struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
174a79ff
JF
1860 struct sock *sk = NULL;
1861
34f79502
JF
1862 if (tcb->bpf.map) {
1863 sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
174a79ff 1864
34f79502
JF
1865 tcb->bpf.key = 0;
1866 tcb->bpf.map = NULL;
174a79ff
JF
1867 }
1868
1869 return sk;
1870}
1871
1872static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
1873 .func = bpf_sk_redirect_map,
1874 .gpl_only = false,
1875 .ret_type = RET_INTEGER,
34f79502
JF
1876 .arg1_type = ARG_PTR_TO_CTX,
1877 .arg2_type = ARG_CONST_MAP_PTR,
174a79ff 1878 .arg3_type = ARG_ANYTHING,
34f79502 1879 .arg4_type = ARG_ANYTHING,
174a79ff
JF
1880};
1881
f3694e00 1882BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
8d20aabe 1883{
f3694e00 1884 return task_get_classid(skb);
8d20aabe
DB
1885}
1886
1887static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
1888 .func = bpf_get_cgroup_classid,
1889 .gpl_only = false,
1890 .ret_type = RET_INTEGER,
1891 .arg1_type = ARG_PTR_TO_CTX,
1892};
1893
f3694e00 1894BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
c46646d0 1895{
f3694e00 1896 return dst_tclassid(skb);
c46646d0
DB
1897}
1898
1899static const struct bpf_func_proto bpf_get_route_realm_proto = {
1900 .func = bpf_get_route_realm,
1901 .gpl_only = false,
1902 .ret_type = RET_INTEGER,
1903 .arg1_type = ARG_PTR_TO_CTX,
1904};
1905
f3694e00 1906BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
13c5c240
DB
1907{
1908 /* If skb_clear_hash() was called due to mangling, we can
1909 * trigger SW recalculation here. Later access to hash
1910 * can then use the inline skb->hash via context directly
1911 * instead of calling this helper again.
1912 */
f3694e00 1913 return skb_get_hash(skb);
13c5c240
DB
1914}
1915
1916static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
1917 .func = bpf_get_hash_recalc,
1918 .gpl_only = false,
1919 .ret_type = RET_INTEGER,
1920 .arg1_type = ARG_PTR_TO_CTX,
1921};
1922
7a4b28c6
DB
1923BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
1924{
1925 /* After all direct packet write, this can be used once for
1926 * triggering a lazy recalc on next skb_get_hash() invocation.
1927 */
1928 skb_clear_hash(skb);
1929 return 0;
1930}
1931
1932static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
1933 .func = bpf_set_hash_invalid,
1934 .gpl_only = false,
1935 .ret_type = RET_INTEGER,
1936 .arg1_type = ARG_PTR_TO_CTX,
1937};
1938
ded092cd
DB
1939BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
1940{
1941 /* Set user specified hash as L4(+), so that it gets returned
1942 * on skb_get_hash() call unless BPF prog later on triggers a
1943 * skb_clear_hash().
1944 */
1945 __skb_set_sw_hash(skb, hash, true);
1946 return 0;
1947}
1948
1949static const struct bpf_func_proto bpf_set_hash_proto = {
1950 .func = bpf_set_hash,
1951 .gpl_only = false,
1952 .ret_type = RET_INTEGER,
1953 .arg1_type = ARG_PTR_TO_CTX,
1954 .arg2_type = ARG_ANYTHING,
1955};
1956
f3694e00
DB
1957BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
1958 u16, vlan_tci)
4e10df9a 1959{
db58ba45 1960 int ret;
4e10df9a
AS
1961
1962 if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
1963 vlan_proto != htons(ETH_P_8021AD)))
1964 vlan_proto = htons(ETH_P_8021Q);
1965
8065694e 1966 bpf_push_mac_rcsum(skb);
db58ba45 1967 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
8065694e
DB
1968 bpf_pull_mac_rcsum(skb);
1969
6aaae2b6 1970 bpf_compute_data_pointers(skb);
db58ba45 1971 return ret;
4e10df9a
AS
1972}
1973
1974const struct bpf_func_proto bpf_skb_vlan_push_proto = {
1975 .func = bpf_skb_vlan_push,
1976 .gpl_only = false,
1977 .ret_type = RET_INTEGER,
1978 .arg1_type = ARG_PTR_TO_CTX,
1979 .arg2_type = ARG_ANYTHING,
1980 .arg3_type = ARG_ANYTHING,
1981};
4d9c5c53 1982EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
4e10df9a 1983
f3694e00 1984BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
4e10df9a 1985{
db58ba45 1986 int ret;
4e10df9a 1987
8065694e 1988 bpf_push_mac_rcsum(skb);
db58ba45 1989 ret = skb_vlan_pop(skb);
8065694e
DB
1990 bpf_pull_mac_rcsum(skb);
1991
6aaae2b6 1992 bpf_compute_data_pointers(skb);
db58ba45 1993 return ret;
4e10df9a
AS
1994}
1995
1996const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
1997 .func = bpf_skb_vlan_pop,
1998 .gpl_only = false,
1999 .ret_type = RET_INTEGER,
2000 .arg1_type = ARG_PTR_TO_CTX,
2001};
4d9c5c53 2002EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
4e10df9a 2003
6578171a
DB
2004static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
2005{
2006 /* Caller already did skb_cow() with len as headroom,
2007 * so no need to do it here.
2008 */
2009 skb_push(skb, len);
2010 memmove(skb->data, skb->data + len, off);
2011 memset(skb->data + off, 0, len);
2012
2013 /* No skb_postpush_rcsum(skb, skb->data + off, len)
2014 * needed here as it does not change the skb->csum
2015 * result for checksum complete when summing over
2016 * zeroed blocks.
2017 */
2018 return 0;
2019}
2020
2021static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
2022{
2023 /* skb_ensure_writable() is not needed here, as we're
2024 * already working on an uncloned skb.
2025 */
2026 if (unlikely(!pskb_may_pull(skb, off + len)))
2027 return -ENOMEM;
2028
2029 skb_postpull_rcsum(skb, skb->data + off, len);
2030 memmove(skb->data + len, skb->data, off);
2031 __skb_pull(skb, len);
2032
2033 return 0;
2034}
2035
2036static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
2037{
2038 bool trans_same = skb->transport_header == skb->network_header;
2039 int ret;
2040
2041 /* There's no need for __skb_push()/__skb_pull() pair to
2042 * get to the start of the mac header as we're guaranteed
2043 * to always start from here under eBPF.
2044 */
2045 ret = bpf_skb_generic_push(skb, off, len);
2046 if (likely(!ret)) {
2047 skb->mac_header -= len;
2048 skb->network_header -= len;
2049 if (trans_same)
2050 skb->transport_header = skb->network_header;
2051 }
2052
2053 return ret;
2054}
2055
2056static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
2057{
2058 bool trans_same = skb->transport_header == skb->network_header;
2059 int ret;
2060
2061 /* Same here, __skb_push()/__skb_pull() pair not needed. */
2062 ret = bpf_skb_generic_pop(skb, off, len);
2063 if (likely(!ret)) {
2064 skb->mac_header += len;
2065 skb->network_header += len;
2066 if (trans_same)
2067 skb->transport_header = skb->network_header;
2068 }
2069
2070 return ret;
2071}
2072
2073static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
2074{
2075 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
0daf4349 2076 u32 off = skb_mac_header_len(skb);
6578171a
DB
2077 int ret;
2078
2079 ret = skb_cow(skb, len_diff);
2080 if (unlikely(ret < 0))
2081 return ret;
2082
2083 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2084 if (unlikely(ret < 0))
2085 return ret;
2086
2087 if (skb_is_gso(skb)) {
880388aa
DM
2088 /* SKB_GSO_TCPV4 needs to be changed into
2089 * SKB_GSO_TCPV6.
6578171a
DB
2090 */
2091 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
2092 skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4;
2093 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;
2094 }
2095
2096 /* Due to IPv6 header, MSS needs to be downgraded. */
2097 skb_shinfo(skb)->gso_size -= len_diff;
2098 /* Header must be checked, and gso_segs recomputed. */
2099 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2100 skb_shinfo(skb)->gso_segs = 0;
2101 }
2102
2103 skb->protocol = htons(ETH_P_IPV6);
2104 skb_clear_hash(skb);
2105
2106 return 0;
2107}
2108
2109static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
2110{
2111 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
0daf4349 2112 u32 off = skb_mac_header_len(skb);
6578171a
DB
2113 int ret;
2114
2115 ret = skb_unclone(skb, GFP_ATOMIC);
2116 if (unlikely(ret < 0))
2117 return ret;
2118
2119 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
2120 if (unlikely(ret < 0))
2121 return ret;
2122
2123 if (skb_is_gso(skb)) {
880388aa
DM
2124 /* SKB_GSO_TCPV6 needs to be changed into
2125 * SKB_GSO_TCPV4.
6578171a
DB
2126 */
2127 if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
2128 skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6;
2129 skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4;
2130 }
2131
2132 /* Due to IPv4 header, MSS can be upgraded. */
2133 skb_shinfo(skb)->gso_size += len_diff;
2134 /* Header must be checked, and gso_segs recomputed. */
2135 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2136 skb_shinfo(skb)->gso_segs = 0;
2137 }
2138
2139 skb->protocol = htons(ETH_P_IP);
2140 skb_clear_hash(skb);
2141
2142 return 0;
2143}
2144
2145static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
2146{
2147 __be16 from_proto = skb->protocol;
2148
2149 if (from_proto == htons(ETH_P_IP) &&
2150 to_proto == htons(ETH_P_IPV6))
2151 return bpf_skb_proto_4_to_6(skb);
2152
2153 if (from_proto == htons(ETH_P_IPV6) &&
2154 to_proto == htons(ETH_P_IP))
2155 return bpf_skb_proto_6_to_4(skb);
2156
2157 return -ENOTSUPP;
2158}
2159
f3694e00
DB
2160BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
2161 u64, flags)
6578171a 2162{
6578171a
DB
2163 int ret;
2164
2165 if (unlikely(flags))
2166 return -EINVAL;
2167
2168 /* General idea is that this helper does the basic groundwork
2169 * needed for changing the protocol, and eBPF program fills the
2170 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
2171 * and other helpers, rather than passing a raw buffer here.
2172 *
2173 * The rationale is to keep this minimal and without a need to
2174 * deal with raw packet data. F.e. even if we would pass buffers
2175 * here, the program still needs to call the bpf_lX_csum_replace()
2176 * helpers anyway. Plus, this way we keep also separation of
2177 * concerns, since f.e. bpf_skb_store_bytes() should only take
2178 * care of stores.
2179 *
2180 * Currently, additional options and extension header space are
2181 * not supported, but flags register is reserved so we can adapt
2182 * that. For offloads, we mark packet as dodgy, so that headers
2183 * need to be verified first.
2184 */
2185 ret = bpf_skb_proto_xlat(skb, proto);
6aaae2b6 2186 bpf_compute_data_pointers(skb);
6578171a
DB
2187 return ret;
2188}
2189
2190static const struct bpf_func_proto bpf_skb_change_proto_proto = {
2191 .func = bpf_skb_change_proto,
2192 .gpl_only = false,
2193 .ret_type = RET_INTEGER,
2194 .arg1_type = ARG_PTR_TO_CTX,
2195 .arg2_type = ARG_ANYTHING,
2196 .arg3_type = ARG_ANYTHING,
2197};
2198
f3694e00 2199BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
d2485c42 2200{
d2485c42 2201 /* We only allow a restricted subset to be changed for now. */
45c7fffa
DB
2202 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
2203 !skb_pkt_type_ok(pkt_type)))
d2485c42
DB
2204 return -EINVAL;
2205
2206 skb->pkt_type = pkt_type;
2207 return 0;
2208}
2209
2210static const struct bpf_func_proto bpf_skb_change_type_proto = {
2211 .func = bpf_skb_change_type,
2212 .gpl_only = false,
2213 .ret_type = RET_INTEGER,
2214 .arg1_type = ARG_PTR_TO_CTX,
2215 .arg2_type = ARG_ANYTHING,
2216};
2217
2be7e212
DB
2218static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
2219{
2220 switch (skb->protocol) {
2221 case htons(ETH_P_IP):
2222 return sizeof(struct iphdr);
2223 case htons(ETH_P_IPV6):
2224 return sizeof(struct ipv6hdr);
2225 default:
2226 return ~0U;
2227 }
2228}
2229
2230static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
2231{
2232 u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
2233 int ret;
2234
2235 ret = skb_cow(skb, len_diff);
2236 if (unlikely(ret < 0))
2237 return ret;
2238
2239 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2240 if (unlikely(ret < 0))
2241 return ret;
2242
2243 if (skb_is_gso(skb)) {
2244 /* Due to header grow, MSS needs to be downgraded. */
2245 skb_shinfo(skb)->gso_size -= len_diff;
2246 /* Header must be checked, and gso_segs recomputed. */
2247 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2248 skb_shinfo(skb)->gso_segs = 0;
2249 }
2250
2251 return 0;
2252}
2253
2254static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
2255{
2256 u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
2257 int ret;
2258
2259 ret = skb_unclone(skb, GFP_ATOMIC);
2260 if (unlikely(ret < 0))
2261 return ret;
2262
2263 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
2264 if (unlikely(ret < 0))
2265 return ret;
2266
2267 if (skb_is_gso(skb)) {
2268 /* Due to header shrink, MSS can be upgraded. */
2269 skb_shinfo(skb)->gso_size += len_diff;
2270 /* Header must be checked, and gso_segs recomputed. */
2271 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
2272 skb_shinfo(skb)->gso_segs = 0;
2273 }
2274
2275 return 0;
2276}
2277
2278static u32 __bpf_skb_max_len(const struct sk_buff *skb)
2279{
2280 return skb->dev->mtu + skb->dev->hard_header_len;
2281}
2282
2283static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
2284{
2285 bool trans_same = skb->transport_header == skb->network_header;
2286 u32 len_cur, len_diff_abs = abs(len_diff);
2287 u32 len_min = bpf_skb_net_base_len(skb);
2288 u32 len_max = __bpf_skb_max_len(skb);
2289 __be16 proto = skb->protocol;
2290 bool shrink = len_diff < 0;
2291 int ret;
2292
2293 if (unlikely(len_diff_abs > 0xfffU))
2294 return -EFAULT;
2295 if (unlikely(proto != htons(ETH_P_IP) &&
2296 proto != htons(ETH_P_IPV6)))
2297 return -ENOTSUPP;
2298
2299 len_cur = skb->len - skb_network_offset(skb);
2300 if (skb_transport_header_was_set(skb) && !trans_same)
2301 len_cur = skb_network_header_len(skb);
2302 if ((shrink && (len_diff_abs >= len_cur ||
2303 len_cur - len_diff_abs < len_min)) ||
2304 (!shrink && (skb->len + len_diff_abs > len_max &&
2305 !skb_is_gso(skb))))
2306 return -ENOTSUPP;
2307
2308 ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
2309 bpf_skb_net_grow(skb, len_diff_abs);
2310
6aaae2b6 2311 bpf_compute_data_pointers(skb);
e4a6a342 2312 return ret;
2be7e212
DB
2313}
2314
2315BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
2316 u32, mode, u64, flags)
2317{
2318 if (unlikely(flags))
2319 return -EINVAL;
2320 if (likely(mode == BPF_ADJ_ROOM_NET))
2321 return bpf_skb_adjust_net(skb, len_diff);
2322
2323 return -ENOTSUPP;
2324}
2325
2326static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
2327 .func = bpf_skb_adjust_room,
2328 .gpl_only = false,
2329 .ret_type = RET_INTEGER,
2330 .arg1_type = ARG_PTR_TO_CTX,
2331 .arg2_type = ARG_ANYTHING,
2332 .arg3_type = ARG_ANYTHING,
2333 .arg4_type = ARG_ANYTHING,
2334};
2335
5293efe6
DB
2336static u32 __bpf_skb_min_len(const struct sk_buff *skb)
2337{
2338 u32 min_len = skb_network_offset(skb);
2339
2340 if (skb_transport_header_was_set(skb))
2341 min_len = skb_transport_offset(skb);
2342 if (skb->ip_summed == CHECKSUM_PARTIAL)
2343 min_len = skb_checksum_start_offset(skb) +
2344 skb->csum_offset + sizeof(__sum16);
2345 return min_len;
2346}
2347
5293efe6
DB
2348static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
2349{
2350 unsigned int old_len = skb->len;
2351 int ret;
2352
2353 ret = __skb_grow_rcsum(skb, new_len);
2354 if (!ret)
2355 memset(skb->data + old_len, 0, new_len - old_len);
2356 return ret;
2357}
2358
2359static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
2360{
2361 return __skb_trim_rcsum(skb, new_len);
2362}
2363
f3694e00
DB
2364BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
2365 u64, flags)
5293efe6 2366{
5293efe6
DB
2367 u32 max_len = __bpf_skb_max_len(skb);
2368 u32 min_len = __bpf_skb_min_len(skb);
5293efe6
DB
2369 int ret;
2370
2371 if (unlikely(flags || new_len > max_len || new_len < min_len))
2372 return -EINVAL;
2373 if (skb->encapsulation)
2374 return -ENOTSUPP;
2375
2376 /* The basic idea of this helper is that it's performing the
2377 * needed work to either grow or trim an skb, and eBPF program
2378 * rewrites the rest via helpers like bpf_skb_store_bytes(),
2379 * bpf_lX_csum_replace() and others rather than passing a raw
2380 * buffer here. This one is a slow path helper and intended
2381 * for replies with control messages.
2382 *
2383 * Like in bpf_skb_change_proto(), we want to keep this rather
2384 * minimal and without protocol specifics so that we are able
2385 * to separate concerns as in bpf_skb_store_bytes() should only
2386 * be the one responsible for writing buffers.
2387 *
2388 * It's really expected to be a slow path operation here for
2389 * control message replies, so we're implicitly linearizing,
2390 * uncloning and drop offloads from the skb by this.
2391 */
2392 ret = __bpf_try_make_writable(skb, skb->len);
2393 if (!ret) {
2394 if (new_len > skb->len)
2395 ret = bpf_skb_grow_rcsum(skb, new_len);
2396 else if (new_len < skb->len)
2397 ret = bpf_skb_trim_rcsum(skb, new_len);
2398 if (!ret && skb_is_gso(skb))
2399 skb_gso_reset(skb);
2400 }
2401
6aaae2b6 2402 bpf_compute_data_pointers(skb);
5293efe6
DB
2403 return ret;
2404}
2405
2406static const struct bpf_func_proto bpf_skb_change_tail_proto = {
2407 .func = bpf_skb_change_tail,
2408 .gpl_only = false,
2409 .ret_type = RET_INTEGER,
2410 .arg1_type = ARG_PTR_TO_CTX,
2411 .arg2_type = ARG_ANYTHING,
2412 .arg3_type = ARG_ANYTHING,
2413};
2414
3a0af8fd
TG
2415BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
2416 u64, flags)
2417{
2418 u32 max_len = __bpf_skb_max_len(skb);
2419 u32 new_len = skb->len + head_room;
2420 int ret;
2421
2422 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
2423 new_len < skb->len))
2424 return -EINVAL;
2425
2426 ret = skb_cow(skb, head_room);
2427 if (likely(!ret)) {
2428 /* Idea for this helper is that we currently only
2429 * allow to expand on mac header. This means that
2430 * skb->protocol network header, etc, stay as is.
2431 * Compared to bpf_skb_change_tail(), we're more
2432 * flexible due to not needing to linearize or
2433 * reset GSO. Intention for this helper is to be
2434 * used by an L3 skb that needs to push mac header
2435 * for redirection into L2 device.
2436 */
2437 __skb_push(skb, head_room);
2438 memset(skb->data, 0, head_room);
2439 skb_reset_mac_header(skb);
2440 }
2441
6aaae2b6 2442 bpf_compute_data_pointers(skb);
3a0af8fd
TG
2443 return 0;
2444}
2445
2446static const struct bpf_func_proto bpf_skb_change_head_proto = {
2447 .func = bpf_skb_change_head,
2448 .gpl_only = false,
2449 .ret_type = RET_INTEGER,
2450 .arg1_type = ARG_PTR_TO_CTX,
2451 .arg2_type = ARG_ANYTHING,
2452 .arg3_type = ARG_ANYTHING,
2453};
2454
de8f3a83
DB
2455static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
2456{
2457 return xdp_data_meta_unsupported(xdp) ? 0 :
2458 xdp->data - xdp->data_meta;
2459}
2460
17bedab2
MKL
2461BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
2462{
de8f3a83
DB
2463 unsigned long metalen = xdp_get_metalen(xdp);
2464 void *data_start = xdp->data_hard_start + metalen;
17bedab2
MKL
2465 void *data = xdp->data + offset;
2466
de8f3a83 2467 if (unlikely(data < data_start ||
17bedab2
MKL
2468 data > xdp->data_end - ETH_HLEN))
2469 return -EINVAL;
2470
de8f3a83
DB
2471 if (metalen)
2472 memmove(xdp->data_meta + offset,
2473 xdp->data_meta, metalen);
2474 xdp->data_meta += offset;
17bedab2
MKL
2475 xdp->data = data;
2476
2477 return 0;
2478}
2479
2480static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
2481 .func = bpf_xdp_adjust_head,
2482 .gpl_only = false,
2483 .ret_type = RET_INTEGER,
2484 .arg1_type = ARG_PTR_TO_CTX,
2485 .arg2_type = ARG_ANYTHING,
2486};
2487
de8f3a83
DB
2488BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
2489{
2490 void *meta = xdp->data_meta + offset;
2491 unsigned long metalen = xdp->data - meta;
2492
2493 if (xdp_data_meta_unsupported(xdp))
2494 return -ENOTSUPP;
2495 if (unlikely(meta < xdp->data_hard_start ||
2496 meta > xdp->data))
2497 return -EINVAL;
2498 if (unlikely((metalen & (sizeof(__u32) - 1)) ||
2499 (metalen > 32)))
2500 return -EACCES;
2501
2502 xdp->data_meta = meta;
2503
2504 return 0;
2505}
2506
2507static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
2508 .func = bpf_xdp_adjust_meta,
2509 .gpl_only = false,
2510 .ret_type = RET_INTEGER,
2511 .arg1_type = ARG_PTR_TO_CTX,
2512 .arg2_type = ARG_ANYTHING,
2513};
2514
11393cc9
JF
2515static int __bpf_tx_xdp(struct net_device *dev,
2516 struct bpf_map *map,
2517 struct xdp_buff *xdp,
2518 u32 index)
814abfab 2519{
11393cc9
JF
2520 int err;
2521
2522 if (!dev->netdev_ops->ndo_xdp_xmit) {
11393cc9 2523 return -EOPNOTSUPP;
814abfab 2524 }
11393cc9
JF
2525
2526 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
2527 if (err)
2528 return err;
9c270af3
JDB
2529 dev->netdev_ops->ndo_xdp_flush(dev);
2530 return 0;
2531}
2532
2533static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
2534 struct bpf_map *map,
2535 struct xdp_buff *xdp,
2536 u32 index)
2537{
2538 int err;
2539
2540 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2541 struct net_device *dev = fwd;
2542
2543 if (!dev->netdev_ops->ndo_xdp_xmit)
2544 return -EOPNOTSUPP;
2545
2546 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
2547 if (err)
2548 return err;
11393cc9 2549 __dev_map_insert_ctx(map, index);
9c270af3
JDB
2550
2551 } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
2552 struct bpf_cpu_map_entry *rcpu = fwd;
2553
2554 err = cpu_map_enqueue(rcpu, xdp, dev_rx);
2555 if (err)
2556 return err;
2557 __cpu_map_insert_ctx(map, index);
2558 }
e4a8e817 2559 return 0;
814abfab
JF
2560}
2561
11393cc9
JF
2562void xdp_do_flush_map(void)
2563{
2564 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2565 struct bpf_map *map = ri->map_to_flush;
2566
11393cc9 2567 ri->map_to_flush = NULL;
9c270af3
JDB
2568 if (map) {
2569 switch (map->map_type) {
2570 case BPF_MAP_TYPE_DEVMAP:
2571 __dev_map_flush(map);
2572 break;
2573 case BPF_MAP_TYPE_CPUMAP:
2574 __cpu_map_flush(map);
2575 break;
2576 default:
2577 break;
2578 }
2579 }
11393cc9
JF
2580}
2581EXPORT_SYMBOL_GPL(xdp_do_flush_map);
2582
9c270af3
JDB
2583static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
2584{
2585 switch (map->map_type) {
2586 case BPF_MAP_TYPE_DEVMAP:
2587 return __dev_map_lookup_elem(map, index);
2588 case BPF_MAP_TYPE_CPUMAP:
2589 return __cpu_map_lookup_elem(map, index);
2590 default:
2591 return NULL;
2592 }
2593}
2594
7c300131
DB
2595static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
2596 unsigned long aux)
2597{
2598 return (unsigned long)xdp_prog->aux != aux;
2599}
2600
e4a8e817
DB
2601static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
2602 struct bpf_prog *xdp_prog)
97f91a7c
JF
2603{
2604 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
7c300131 2605 unsigned long map_owner = ri->map_owner;
97f91a7c 2606 struct bpf_map *map = ri->map;
11393cc9 2607 u32 index = ri->ifindex;
9c270af3 2608 void *fwd = NULL;
4c03bdd7 2609 int err;
97f91a7c
JF
2610
2611 ri->ifindex = 0;
2612 ri->map = NULL;
7c300131 2613 ri->map_owner = 0;
109980b8 2614
7c300131 2615 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
96c5508e
JDB
2616 err = -EFAULT;
2617 map = NULL;
2618 goto err;
2619 }
97f91a7c 2620
9c270af3 2621 fwd = __xdp_map_lookup_elem(map, index);
4c03bdd7
JDB
2622 if (!fwd) {
2623 err = -EINVAL;
f5836ca5 2624 goto err;
4c03bdd7 2625 }
e4a8e817 2626 if (ri->map_to_flush && ri->map_to_flush != map)
11393cc9
JF
2627 xdp_do_flush_map();
2628
9c270af3 2629 err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
f5836ca5
JDB
2630 if (unlikely(err))
2631 goto err;
2632
2633 ri->map_to_flush = map;
59a30896 2634 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
f5836ca5
JDB
2635 return 0;
2636err:
59a30896 2637 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
97f91a7c
JF
2638 return err;
2639}
2640
5acaee0a
JF
2641int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
2642 struct bpf_prog *xdp_prog)
814abfab
JF
2643{
2644 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
5acaee0a 2645 struct net_device *fwd;
eb48d682 2646 u32 index = ri->ifindex;
4c03bdd7 2647 int err;
814abfab 2648
97f91a7c
JF
2649 if (ri->map)
2650 return xdp_do_redirect_map(dev, xdp, xdp_prog);
2651
eb48d682 2652 fwd = dev_get_by_index_rcu(dev_net(dev), index);
814abfab 2653 ri->ifindex = 0;
5acaee0a 2654 if (unlikely(!fwd)) {
4c03bdd7 2655 err = -EINVAL;
f5836ca5 2656 goto err;
814abfab
JF
2657 }
2658
4c03bdd7 2659 err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
f5836ca5
JDB
2660 if (unlikely(err))
2661 goto err;
2662
2663 _trace_xdp_redirect(dev, xdp_prog, index);
2664 return 0;
2665err:
2666 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
4c03bdd7 2667 return err;
814abfab
JF
2668}
2669EXPORT_SYMBOL_GPL(xdp_do_redirect);
2670
9c270af3
JDB
2671static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
2672{
2673 unsigned int len;
2674
2675 if (unlikely(!(fwd->flags & IFF_UP)))
2676 return -ENETDOWN;
2677
2678 len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
2679 if (skb->len > len)
2680 return -EMSGSIZE;
2681
2682 return 0;
2683}
2684
c060bc61
XS
2685static int xdp_do_generic_redirect_map(struct net_device *dev,
2686 struct sk_buff *skb,
2687 struct bpf_prog *xdp_prog)
6103aa96
JF
2688{
2689 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
7c300131 2690 unsigned long map_owner = ri->map_owner;
96c5508e
JDB
2691 struct bpf_map *map = ri->map;
2692 struct net_device *fwd = NULL;
eb48d682 2693 u32 index = ri->ifindex;
2facaad6 2694 int err = 0;
6103aa96 2695
6103aa96 2696 ri->ifindex = 0;
96c5508e 2697 ri->map = NULL;
7c300131 2698 ri->map_owner = 0;
96c5508e 2699
9c270af3
JDB
2700 if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
2701 err = -EFAULT;
2702 map = NULL;
2703 goto err;
96c5508e 2704 }
9c270af3 2705 fwd = __xdp_map_lookup_elem(map, index);
2facaad6
JDB
2706 if (unlikely(!fwd)) {
2707 err = -EINVAL;
f5836ca5 2708 goto err;
6103aa96
JF
2709 }
2710
9c270af3
JDB
2711 if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2712 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2713 goto err;
2714 skb->dev = fwd;
2715 } else {
2716 /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
2717 err = -EBADRQC;
f5836ca5 2718 goto err;
2facaad6 2719 }
6103aa96 2720
9c270af3
JDB
2721 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
2722 return 0;
2723err:
2724 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
2725 return err;
2726}
2727
2728int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
2729 struct bpf_prog *xdp_prog)
2730{
2731 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2732 u32 index = ri->ifindex;
2733 struct net_device *fwd;
2734 int err = 0;
2735
2736 if (ri->map)
2737 return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
2738
2739 ri->ifindex = 0;
2740 fwd = dev_get_by_index_rcu(dev_net(dev), index);
2741 if (unlikely(!fwd)) {
2742 err = -EINVAL;
f5836ca5 2743 goto err;
2facaad6
JDB
2744 }
2745
9c270af3
JDB
2746 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2747 goto err;
2748
2facaad6 2749 skb->dev = fwd;
9c270af3 2750 _trace_xdp_redirect(dev, xdp_prog, index);
f5836ca5
JDB
2751 return 0;
2752err:
9c270af3 2753 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2facaad6 2754 return err;
6103aa96
JF
2755}
2756EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
2757
814abfab
JF
2758BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
2759{
2760 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2761
2762 if (unlikely(flags))
2763 return XDP_ABORTED;
2764
2765 ri->ifindex = ifindex;
2766 ri->flags = flags;
109980b8 2767 ri->map = NULL;
7c300131 2768 ri->map_owner = 0;
e4a8e817 2769
814abfab
JF
2770 return XDP_REDIRECT;
2771}
2772
2773static const struct bpf_func_proto bpf_xdp_redirect_proto = {
2774 .func = bpf_xdp_redirect,
2775 .gpl_only = false,
2776 .ret_type = RET_INTEGER,
2777 .arg1_type = ARG_ANYTHING,
2778 .arg2_type = ARG_ANYTHING,
2779};
2780
109980b8 2781BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
7c300131 2782 unsigned long, map_owner)
e4a8e817
DB
2783{
2784 struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2785
2786 if (unlikely(flags))
2787 return XDP_ABORTED;
2788
2789 ri->ifindex = ifindex;
2790 ri->flags = flags;
2791 ri->map = map;
109980b8 2792 ri->map_owner = map_owner;
e4a8e817
DB
2793
2794 return XDP_REDIRECT;
2795}
2796
109980b8
DB
2797/* Note, arg4 is hidden from users and populated by the verifier
2798 * with the right pointer.
2799 */
e4a8e817
DB
2800static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
2801 .func = bpf_xdp_redirect_map,
2802 .gpl_only = false,
2803 .ret_type = RET_INTEGER,
2804 .arg1_type = ARG_CONST_MAP_PTR,
2805 .arg2_type = ARG_ANYTHING,
2806 .arg3_type = ARG_ANYTHING,
2807};
2808
17bedab2 2809bool bpf_helper_changes_pkt_data(void *func)
4e10df9a 2810{
36bbef52
DB
2811 if (func == bpf_skb_vlan_push ||
2812 func == bpf_skb_vlan_pop ||
2813 func == bpf_skb_store_bytes ||
2814 func == bpf_skb_change_proto ||
3a0af8fd 2815 func == bpf_skb_change_head ||
36bbef52 2816 func == bpf_skb_change_tail ||
2be7e212 2817 func == bpf_skb_adjust_room ||
36bbef52 2818 func == bpf_skb_pull_data ||
41703a73 2819 func == bpf_clone_redirect ||
36bbef52 2820 func == bpf_l3_csum_replace ||
17bedab2 2821 func == bpf_l4_csum_replace ||
de8f3a83
DB
2822 func == bpf_xdp_adjust_head ||
2823 func == bpf_xdp_adjust_meta)
3697649f
DB
2824 return true;
2825
4e10df9a
AS
2826 return false;
2827}
2828
555c8a86 2829static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
aa7145c1 2830 unsigned long off, unsigned long len)
555c8a86 2831{
aa7145c1 2832 void *ptr = skb_header_pointer(skb, off, len, dst_buff);
555c8a86
DB
2833
2834 if (unlikely(!ptr))
2835 return len;
2836 if (ptr != dst_buff)
2837 memcpy(dst_buff, ptr, len);
2838
2839 return 0;
2840}
2841
f3694e00
DB
2842BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
2843 u64, flags, void *, meta, u64, meta_size)
555c8a86 2844{
555c8a86 2845 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
555c8a86
DB
2846
2847 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
2848 return -EINVAL;
2849 if (unlikely(skb_size > skb->len))
2850 return -EFAULT;
2851
2852 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
2853 bpf_skb_copy);
2854}
2855
2856static const struct bpf_func_proto bpf_skb_event_output_proto = {
2857 .func = bpf_skb_event_output,
2858 .gpl_only = true,
2859 .ret_type = RET_INTEGER,
2860 .arg1_type = ARG_PTR_TO_CTX,
2861 .arg2_type = ARG_CONST_MAP_PTR,
2862 .arg3_type = ARG_ANYTHING,
39f19ebb
AS
2863 .arg4_type = ARG_PTR_TO_MEM,
2864 .arg5_type = ARG_CONST_SIZE,
555c8a86
DB
2865};
2866
c6c33454
DB
2867static unsigned short bpf_tunnel_key_af(u64 flags)
2868{
2869 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
2870}
2871
f3694e00
DB
2872BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
2873 u32, size, u64, flags)
d3aa45ce 2874{
c6c33454
DB
2875 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
2876 u8 compat[sizeof(struct bpf_tunnel_key)];
074f528e
DB
2877 void *to_orig = to;
2878 int err;
d3aa45ce 2879
074f528e
DB
2880 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
2881 err = -EINVAL;
2882 goto err_clear;
2883 }
2884 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
2885 err = -EPROTO;
2886 goto err_clear;
2887 }
c6c33454 2888 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
074f528e 2889 err = -EINVAL;
c6c33454 2890 switch (size) {
4018ab18 2891 case offsetof(struct bpf_tunnel_key, tunnel_label):
c0e760c9 2892 case offsetof(struct bpf_tunnel_key, tunnel_ext):
4018ab18 2893 goto set_compat;
c6c33454
DB
2894 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
2895 /* Fixup deprecated structure layouts here, so we have
2896 * a common path later on.
2897 */
2898 if (ip_tunnel_info_af(info) != AF_INET)
074f528e 2899 goto err_clear;
4018ab18 2900set_compat:
c6c33454
DB
2901 to = (struct bpf_tunnel_key *)compat;
2902 break;
2903 default:
074f528e 2904 goto err_clear;
c6c33454
DB
2905 }
2906 }
d3aa45ce
AS
2907
2908 to->tunnel_id = be64_to_cpu(info->key.tun_id);
c6c33454
DB
2909 to->tunnel_tos = info->key.tos;
2910 to->tunnel_ttl = info->key.ttl;
2911
4018ab18 2912 if (flags & BPF_F_TUNINFO_IPV6) {
c6c33454
DB
2913 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
2914 sizeof(to->remote_ipv6));
4018ab18
DB
2915 to->tunnel_label = be32_to_cpu(info->key.label);
2916 } else {
c6c33454 2917 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
4018ab18 2918 }
c6c33454
DB
2919
2920 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
074f528e 2921 memcpy(to_orig, to, size);
d3aa45ce
AS
2922
2923 return 0;
074f528e
DB
2924err_clear:
2925 memset(to_orig, 0, size);
2926 return err;
d3aa45ce
AS
2927}
2928
577c50aa 2929static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
d3aa45ce
AS
2930 .func = bpf_skb_get_tunnel_key,
2931 .gpl_only = false,
2932 .ret_type = RET_INTEGER,
2933 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
2934 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
2935 .arg3_type = ARG_CONST_SIZE,
d3aa45ce
AS
2936 .arg4_type = ARG_ANYTHING,
2937};
2938
f3694e00 2939BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
14ca0751 2940{
14ca0751 2941 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
074f528e 2942 int err;
14ca0751
DB
2943
2944 if (unlikely(!info ||
074f528e
DB
2945 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
2946 err = -ENOENT;
2947 goto err_clear;
2948 }
2949 if (unlikely(size < info->options_len)) {
2950 err = -ENOMEM;
2951 goto err_clear;
2952 }
14ca0751
DB
2953
2954 ip_tunnel_info_opts_get(to, info);
074f528e
DB
2955 if (size > info->options_len)
2956 memset(to + info->options_len, 0, size - info->options_len);
14ca0751
DB
2957
2958 return info->options_len;
074f528e
DB
2959err_clear:
2960 memset(to, 0, size);
2961 return err;
14ca0751
DB
2962}
2963
2964static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
2965 .func = bpf_skb_get_tunnel_opt,
2966 .gpl_only = false,
2967 .ret_type = RET_INTEGER,
2968 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
2969 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
2970 .arg3_type = ARG_CONST_SIZE,
14ca0751
DB
2971};
2972
d3aa45ce
AS
2973static struct metadata_dst __percpu *md_dst;
2974
f3694e00
DB
2975BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
2976 const struct bpf_tunnel_key *, from, u32, size, u64, flags)
d3aa45ce 2977{
d3aa45ce 2978 struct metadata_dst *md = this_cpu_ptr(md_dst);
c6c33454 2979 u8 compat[sizeof(struct bpf_tunnel_key)];
d3aa45ce
AS
2980 struct ip_tunnel_info *info;
2981
22080870
DB
2982 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
2983 BPF_F_DONT_FRAGMENT)))
d3aa45ce 2984 return -EINVAL;
c6c33454
DB
2985 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
2986 switch (size) {
4018ab18 2987 case offsetof(struct bpf_tunnel_key, tunnel_label):
c0e760c9 2988 case offsetof(struct bpf_tunnel_key, tunnel_ext):
c6c33454
DB
2989 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
2990 /* Fixup deprecated structure layouts here, so we have
2991 * a common path later on.
2992 */
2993 memcpy(compat, from, size);
2994 memset(compat + size, 0, sizeof(compat) - size);
f3694e00 2995 from = (const struct bpf_tunnel_key *) compat;
c6c33454
DB
2996 break;
2997 default:
2998 return -EINVAL;
2999 }
3000 }
c0e760c9
DB
3001 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
3002 from->tunnel_ext))
4018ab18 3003 return -EINVAL;
d3aa45ce
AS
3004
3005 skb_dst_drop(skb);
3006 dst_hold((struct dst_entry *) md);
3007 skb_dst_set(skb, (struct dst_entry *) md);
3008
3009 info = &md->u.tun_info;
3010 info->mode = IP_TUNNEL_INFO_TX;
c6c33454 3011
db3c6139 3012 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
22080870
DB
3013 if (flags & BPF_F_DONT_FRAGMENT)
3014 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
792f3dd6
WT
3015 if (flags & BPF_F_ZERO_CSUM_TX)
3016 info->key.tun_flags &= ~TUNNEL_CSUM;
22080870 3017
d3aa45ce 3018 info->key.tun_id = cpu_to_be64(from->tunnel_id);
c6c33454
DB
3019 info->key.tos = from->tunnel_tos;
3020 info->key.ttl = from->tunnel_ttl;
3021
3022 if (flags & BPF_F_TUNINFO_IPV6) {
3023 info->mode |= IP_TUNNEL_INFO_IPV6;
3024 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
3025 sizeof(from->remote_ipv6));
4018ab18
DB
3026 info->key.label = cpu_to_be32(from->tunnel_label) &
3027 IPV6_FLOWLABEL_MASK;
c6c33454
DB
3028 } else {
3029 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
3030 }
d3aa45ce
AS
3031
3032 return 0;
3033}
3034
577c50aa 3035static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
d3aa45ce
AS
3036 .func = bpf_skb_set_tunnel_key,
3037 .gpl_only = false,
3038 .ret_type = RET_INTEGER,
3039 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
3040 .arg2_type = ARG_PTR_TO_MEM,
3041 .arg3_type = ARG_CONST_SIZE,
d3aa45ce
AS
3042 .arg4_type = ARG_ANYTHING,
3043};
3044
f3694e00
DB
3045BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
3046 const u8 *, from, u32, size)
14ca0751 3047{
14ca0751
DB
3048 struct ip_tunnel_info *info = skb_tunnel_info(skb);
3049 const struct metadata_dst *md = this_cpu_ptr(md_dst);
3050
3051 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
3052 return -EINVAL;
fca5fdf6 3053 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
14ca0751
DB
3054 return -ENOMEM;
3055
3056 ip_tunnel_info_opts_set(info, from, size);
3057
3058 return 0;
3059}
3060
3061static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
3062 .func = bpf_skb_set_tunnel_opt,
3063 .gpl_only = false,
3064 .ret_type = RET_INTEGER,
3065 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
3066 .arg2_type = ARG_PTR_TO_MEM,
3067 .arg3_type = ARG_CONST_SIZE,
14ca0751
DB
3068};
3069
3070static const struct bpf_func_proto *
3071bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
d3aa45ce
AS
3072{
3073 if (!md_dst) {
d66f2b91
JK
3074 struct metadata_dst __percpu *tmp;
3075
3076 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
3077 METADATA_IP_TUNNEL,
3078 GFP_KERNEL);
3079 if (!tmp)
d3aa45ce 3080 return NULL;
d66f2b91
JK
3081 if (cmpxchg(&md_dst, NULL, tmp))
3082 metadata_dst_free_percpu(tmp);
d3aa45ce 3083 }
14ca0751
DB
3084
3085 switch (which) {
3086 case BPF_FUNC_skb_set_tunnel_key:
3087 return &bpf_skb_set_tunnel_key_proto;
3088 case BPF_FUNC_skb_set_tunnel_opt:
3089 return &bpf_skb_set_tunnel_opt_proto;
3090 default:
3091 return NULL;
3092 }
d3aa45ce
AS
3093}
3094
f3694e00
DB
3095BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
3096 u32, idx)
4a482f34 3097{
4a482f34
MKL
3098 struct bpf_array *array = container_of(map, struct bpf_array, map);
3099 struct cgroup *cgrp;
3100 struct sock *sk;
4a482f34 3101
2d48c5f9 3102 sk = skb_to_full_sk(skb);
4a482f34
MKL
3103 if (!sk || !sk_fullsock(sk))
3104 return -ENOENT;
f3694e00 3105 if (unlikely(idx >= array->map.max_entries))
4a482f34
MKL
3106 return -E2BIG;
3107
f3694e00 3108 cgrp = READ_ONCE(array->ptrs[idx]);
4a482f34
MKL
3109 if (unlikely(!cgrp))
3110 return -EAGAIN;
3111
54fd9c2d 3112 return sk_under_cgroup_hierarchy(sk, cgrp);
4a482f34
MKL
3113}
3114
747ea55e
DB
3115static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
3116 .func = bpf_skb_under_cgroup,
4a482f34
MKL
3117 .gpl_only = false,
3118 .ret_type = RET_INTEGER,
3119 .arg1_type = ARG_PTR_TO_CTX,
3120 .arg2_type = ARG_CONST_MAP_PTR,
3121 .arg3_type = ARG_ANYTHING,
3122};
4a482f34 3123
4de16969
DB
3124static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
3125 unsigned long off, unsigned long len)
3126{
3127 memcpy(dst_buff, src_buff + off, len);
3128 return 0;
3129}
3130
f3694e00
DB
3131BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
3132 u64, flags, void *, meta, u64, meta_size)
4de16969 3133{
4de16969 3134 u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4de16969
DB
3135
3136 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
3137 return -EINVAL;
3138 if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
3139 return -EFAULT;
3140
9c471370
MKL
3141 return bpf_event_output(map, flags, meta, meta_size, xdp->data,
3142 xdp_size, bpf_xdp_copy);
4de16969
DB
3143}
3144
3145static const struct bpf_func_proto bpf_xdp_event_output_proto = {
3146 .func = bpf_xdp_event_output,
3147 .gpl_only = true,
3148 .ret_type = RET_INTEGER,
3149 .arg1_type = ARG_PTR_TO_CTX,
3150 .arg2_type = ARG_CONST_MAP_PTR,
3151 .arg3_type = ARG_ANYTHING,
39f19ebb
AS
3152 .arg4_type = ARG_PTR_TO_MEM,
3153 .arg5_type = ARG_CONST_SIZE,
4de16969
DB
3154};
3155
91b8270f
CF
3156BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
3157{
3158 return skb->sk ? sock_gen_cookie(skb->sk) : 0;
3159}
3160
3161static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
3162 .func = bpf_get_socket_cookie,
3163 .gpl_only = false,
3164 .ret_type = RET_INTEGER,
3165 .arg1_type = ARG_PTR_TO_CTX,
3166};
3167
6acc5c29
CF
3168BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
3169{
3170 struct sock *sk = sk_to_full_sk(skb->sk);
3171 kuid_t kuid;
3172
3173 if (!sk || !sk_fullsock(sk))
3174 return overflowuid;
3175 kuid = sock_net_uid(sock_net(sk), sk);
3176 return from_kuid_munged(sock_net(sk)->user_ns, kuid);
3177}
3178
3179static const struct bpf_func_proto bpf_get_socket_uid_proto = {
3180 .func = bpf_get_socket_uid,
3181 .gpl_only = false,
3182 .ret_type = RET_INTEGER,
3183 .arg1_type = ARG_PTR_TO_CTX,
3184};
3185
8c4b4c7e
LB
3186BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3187 int, level, int, optname, char *, optval, int, optlen)
3188{
3189 struct sock *sk = bpf_sock->sk;
3190 int ret = 0;
3191 int val;
3192
3193 if (!sk_fullsock(sk))
3194 return -EINVAL;
3195
3196 if (level == SOL_SOCKET) {
3197 if (optlen != sizeof(int))
3198 return -EINVAL;
3199 val = *((int *)optval);
3200
3201 /* Only some socketops are supported */
3202 switch (optname) {
3203 case SO_RCVBUF:
3204 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
3205 sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
3206 break;
3207 case SO_SNDBUF:
3208 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
3209 sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
3210 break;
3211 case SO_MAX_PACING_RATE:
3212 sk->sk_max_pacing_rate = val;
3213 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
3214 sk->sk_max_pacing_rate);
3215 break;
3216 case SO_PRIORITY:
3217 sk->sk_priority = val;
3218 break;
3219 case SO_RCVLOWAT:
3220 if (val < 0)
3221 val = INT_MAX;
3222 sk->sk_rcvlowat = val ? : 1;
3223 break;
3224 case SO_MARK:
3225 sk->sk_mark = val;
3226 break;
3227 default:
3228 ret = -EINVAL;
3229 }
a5192c52 3230#ifdef CONFIG_INET
8c4b4c7e
LB
3231 } else if (level == SOL_TCP &&
3232 sk->sk_prot->setsockopt == tcp_setsockopt) {
91b5b21c
LB
3233 if (optname == TCP_CONGESTION) {
3234 char name[TCP_CA_NAME_MAX];
ebfa00c5 3235 bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
91b5b21c
LB
3236
3237 strncpy(name, optval, min_t(long, optlen,
3238 TCP_CA_NAME_MAX-1));
3239 name[TCP_CA_NAME_MAX-1] = 0;
ebfa00c5 3240 ret = tcp_set_congestion_control(sk, name, false, reinit);
91b5b21c 3241 } else {
fc747810
LB
3242 struct tcp_sock *tp = tcp_sk(sk);
3243
3244 if (optlen != sizeof(int))
3245 return -EINVAL;
3246
3247 val = *((int *)optval);
3248 /* Only some options are supported */
3249 switch (optname) {
3250 case TCP_BPF_IW:
3251 if (val <= 0 || tp->data_segs_out > 0)
3252 ret = -EINVAL;
3253 else
3254 tp->snd_cwnd = val;
3255 break;
13bf9641
LB
3256 case TCP_BPF_SNDCWND_CLAMP:
3257 if (val <= 0) {
3258 ret = -EINVAL;
3259 } else {
3260 tp->snd_cwnd_clamp = val;
3261 tp->snd_ssthresh = val;
3262 }
6d3f06a0 3263 break;
fc747810
LB
3264 default:
3265 ret = -EINVAL;
3266 }
91b5b21c 3267 }
91b5b21c 3268#endif
8c4b4c7e
LB
3269 } else {
3270 ret = -EINVAL;
3271 }
3272 return ret;
3273}
3274
3275static const struct bpf_func_proto bpf_setsockopt_proto = {
3276 .func = bpf_setsockopt,
cd86d1fd 3277 .gpl_only = false,
8c4b4c7e
LB
3278 .ret_type = RET_INTEGER,
3279 .arg1_type = ARG_PTR_TO_CTX,
3280 .arg2_type = ARG_ANYTHING,
3281 .arg3_type = ARG_ANYTHING,
3282 .arg4_type = ARG_PTR_TO_MEM,
3283 .arg5_type = ARG_CONST_SIZE,
3284};
3285
cd86d1fd
LB
3286BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3287 int, level, int, optname, char *, optval, int, optlen)
3288{
3289 struct sock *sk = bpf_sock->sk;
cd86d1fd
LB
3290
3291 if (!sk_fullsock(sk))
3292 goto err_clear;
3293
3294#ifdef CONFIG_INET
3295 if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
3296 if (optname == TCP_CONGESTION) {
3297 struct inet_connection_sock *icsk = inet_csk(sk);
3298
3299 if (!icsk->icsk_ca_ops || optlen <= 1)
3300 goto err_clear;
3301 strncpy(optval, icsk->icsk_ca_ops->name, optlen);
3302 optval[optlen - 1] = 0;
3303 } else {
3304 goto err_clear;
3305 }
3306 } else {
3307 goto err_clear;
3308 }
aa2bc739 3309 return 0;
cd86d1fd
LB
3310#endif
3311err_clear:
3312 memset(optval, 0, optlen);
3313 return -EINVAL;
3314}
3315
3316static const struct bpf_func_proto bpf_getsockopt_proto = {
3317 .func = bpf_getsockopt,
3318 .gpl_only = false,
3319 .ret_type = RET_INTEGER,
3320 .arg1_type = ARG_PTR_TO_CTX,
3321 .arg2_type = ARG_ANYTHING,
3322 .arg3_type = ARG_ANYTHING,
3323 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
3324 .arg5_type = ARG_CONST_SIZE,
3325};
3326
d4052c4a 3327static const struct bpf_func_proto *
2492d3b8 3328bpf_base_func_proto(enum bpf_func_id func_id)
89aa0758
AS
3329{
3330 switch (func_id) {
3331 case BPF_FUNC_map_lookup_elem:
3332 return &bpf_map_lookup_elem_proto;
3333 case BPF_FUNC_map_update_elem:
3334 return &bpf_map_update_elem_proto;
3335 case BPF_FUNC_map_delete_elem:
3336 return &bpf_map_delete_elem_proto;
03e69b50
DB
3337 case BPF_FUNC_get_prandom_u32:
3338 return &bpf_get_prandom_u32_proto;
c04167ce 3339 case BPF_FUNC_get_smp_processor_id:
80b48c44 3340 return &bpf_get_raw_smp_processor_id_proto;
2d0e30c3
DB
3341 case BPF_FUNC_get_numa_node_id:
3342 return &bpf_get_numa_node_id_proto;
04fd61ab
AS
3343 case BPF_FUNC_tail_call:
3344 return &bpf_tail_call_proto;
17ca8cbf
DB
3345 case BPF_FUNC_ktime_get_ns:
3346 return &bpf_ktime_get_ns_proto;
0756ea3e 3347 case BPF_FUNC_trace_printk:
1be7f75d
AS
3348 if (capable(CAP_SYS_ADMIN))
3349 return bpf_get_trace_printk_proto();
89aa0758
AS
3350 default:
3351 return NULL;
3352 }
3353}
3354
ae2cf1c4
DA
3355static const struct bpf_func_proto *
3356sock_filter_func_proto(enum bpf_func_id func_id)
3357{
3358 switch (func_id) {
3359 /* inet and inet6 sockets are created in a process
3360 * context so there is always a valid uid/gid
3361 */
3362 case BPF_FUNC_get_current_uid_gid:
3363 return &bpf_get_current_uid_gid_proto;
3364 default:
3365 return bpf_base_func_proto(func_id);
3366 }
3367}
3368
2492d3b8
DB
3369static const struct bpf_func_proto *
3370sk_filter_func_proto(enum bpf_func_id func_id)
3371{
3372 switch (func_id) {
3373 case BPF_FUNC_skb_load_bytes:
3374 return &bpf_skb_load_bytes_proto;
91b8270f
CF
3375 case BPF_FUNC_get_socket_cookie:
3376 return &bpf_get_socket_cookie_proto;
6acc5c29
CF
3377 case BPF_FUNC_get_socket_uid:
3378 return &bpf_get_socket_uid_proto;
2492d3b8
DB
3379 default:
3380 return bpf_base_func_proto(func_id);
3381 }
3382}
3383
608cd71a
AS
3384static const struct bpf_func_proto *
3385tc_cls_act_func_proto(enum bpf_func_id func_id)
3386{
3387 switch (func_id) {
3388 case BPF_FUNC_skb_store_bytes:
3389 return &bpf_skb_store_bytes_proto;
05c74e5e
DB
3390 case BPF_FUNC_skb_load_bytes:
3391 return &bpf_skb_load_bytes_proto;
36bbef52
DB
3392 case BPF_FUNC_skb_pull_data:
3393 return &bpf_skb_pull_data_proto;
7d672345
DB
3394 case BPF_FUNC_csum_diff:
3395 return &bpf_csum_diff_proto;
36bbef52
DB
3396 case BPF_FUNC_csum_update:
3397 return &bpf_csum_update_proto;
91bc4822
AS
3398 case BPF_FUNC_l3_csum_replace:
3399 return &bpf_l3_csum_replace_proto;
3400 case BPF_FUNC_l4_csum_replace:
3401 return &bpf_l4_csum_replace_proto;
3896d655
AS
3402 case BPF_FUNC_clone_redirect:
3403 return &bpf_clone_redirect_proto;
8d20aabe
DB
3404 case BPF_FUNC_get_cgroup_classid:
3405 return &bpf_get_cgroup_classid_proto;
4e10df9a
AS
3406 case BPF_FUNC_skb_vlan_push:
3407 return &bpf_skb_vlan_push_proto;
3408 case BPF_FUNC_skb_vlan_pop:
3409 return &bpf_skb_vlan_pop_proto;
6578171a
DB
3410 case BPF_FUNC_skb_change_proto:
3411 return &bpf_skb_change_proto_proto;
d2485c42
DB
3412 case BPF_FUNC_skb_change_type:
3413 return &bpf_skb_change_type_proto;
2be7e212
DB
3414 case BPF_FUNC_skb_adjust_room:
3415 return &bpf_skb_adjust_room_proto;
5293efe6
DB
3416 case BPF_FUNC_skb_change_tail:
3417 return &bpf_skb_change_tail_proto;
d3aa45ce
AS
3418 case BPF_FUNC_skb_get_tunnel_key:
3419 return &bpf_skb_get_tunnel_key_proto;
3420 case BPF_FUNC_skb_set_tunnel_key:
14ca0751
DB
3421 return bpf_get_skb_set_tunnel_proto(func_id);
3422 case BPF_FUNC_skb_get_tunnel_opt:
3423 return &bpf_skb_get_tunnel_opt_proto;
3424 case BPF_FUNC_skb_set_tunnel_opt:
3425 return bpf_get_skb_set_tunnel_proto(func_id);
27b29f63
AS
3426 case BPF_FUNC_redirect:
3427 return &bpf_redirect_proto;
c46646d0
DB
3428 case BPF_FUNC_get_route_realm:
3429 return &bpf_get_route_realm_proto;
13c5c240
DB
3430 case BPF_FUNC_get_hash_recalc:
3431 return &bpf_get_hash_recalc_proto;
7a4b28c6
DB
3432 case BPF_FUNC_set_hash_invalid:
3433 return &bpf_set_hash_invalid_proto;
ded092cd
DB
3434 case BPF_FUNC_set_hash:
3435 return &bpf_set_hash_proto;
bd570ff9 3436 case BPF_FUNC_perf_event_output:
555c8a86 3437 return &bpf_skb_event_output_proto;
80b48c44
DB
3438 case BPF_FUNC_get_smp_processor_id:
3439 return &bpf_get_smp_processor_id_proto;
747ea55e
DB
3440 case BPF_FUNC_skb_under_cgroup:
3441 return &bpf_skb_under_cgroup_proto;
91b8270f
CF
3442 case BPF_FUNC_get_socket_cookie:
3443 return &bpf_get_socket_cookie_proto;
6acc5c29
CF
3444 case BPF_FUNC_get_socket_uid:
3445 return &bpf_get_socket_uid_proto;
608cd71a 3446 default:
2492d3b8 3447 return bpf_base_func_proto(func_id);
608cd71a
AS
3448 }
3449}
3450
6a773a15
BB
3451static const struct bpf_func_proto *
3452xdp_func_proto(enum bpf_func_id func_id)
3453{
4de16969
DB
3454 switch (func_id) {
3455 case BPF_FUNC_perf_event_output:
3456 return &bpf_xdp_event_output_proto;
669dc4d7
DB
3457 case BPF_FUNC_get_smp_processor_id:
3458 return &bpf_get_smp_processor_id_proto;
205c3807
DB
3459 case BPF_FUNC_csum_diff:
3460 return &bpf_csum_diff_proto;
17bedab2
MKL
3461 case BPF_FUNC_xdp_adjust_head:
3462 return &bpf_xdp_adjust_head_proto;
de8f3a83
DB
3463 case BPF_FUNC_xdp_adjust_meta:
3464 return &bpf_xdp_adjust_meta_proto;
814abfab
JF
3465 case BPF_FUNC_redirect:
3466 return &bpf_xdp_redirect_proto;
97f91a7c 3467 case BPF_FUNC_redirect_map:
e4a8e817 3468 return &bpf_xdp_redirect_map_proto;
4de16969 3469 default:
2492d3b8 3470 return bpf_base_func_proto(func_id);
4de16969 3471 }
6a773a15
BB
3472}
3473
3a0af8fd
TG
3474static const struct bpf_func_proto *
3475lwt_inout_func_proto(enum bpf_func_id func_id)
3476{
3477 switch (func_id) {
3478 case BPF_FUNC_skb_load_bytes:
3479 return &bpf_skb_load_bytes_proto;
3480 case BPF_FUNC_skb_pull_data:
3481 return &bpf_skb_pull_data_proto;
3482 case BPF_FUNC_csum_diff:
3483 return &bpf_csum_diff_proto;
3484 case BPF_FUNC_get_cgroup_classid:
3485 return &bpf_get_cgroup_classid_proto;
3486 case BPF_FUNC_get_route_realm:
3487 return &bpf_get_route_realm_proto;
3488 case BPF_FUNC_get_hash_recalc:
3489 return &bpf_get_hash_recalc_proto;
3490 case BPF_FUNC_perf_event_output:
3491 return &bpf_skb_event_output_proto;
3492 case BPF_FUNC_get_smp_processor_id:
3493 return &bpf_get_smp_processor_id_proto;
3494 case BPF_FUNC_skb_under_cgroup:
3495 return &bpf_skb_under_cgroup_proto;
3496 default:
2492d3b8 3497 return bpf_base_func_proto(func_id);
3a0af8fd
TG
3498 }
3499}
3500
8c4b4c7e
LB
3501static const struct bpf_func_proto *
3502 sock_ops_func_proto(enum bpf_func_id func_id)
3503{
3504 switch (func_id) {
3505 case BPF_FUNC_setsockopt:
3506 return &bpf_setsockopt_proto;
cd86d1fd
LB
3507 case BPF_FUNC_getsockopt:
3508 return &bpf_getsockopt_proto;
174a79ff
JF
3509 case BPF_FUNC_sock_map_update:
3510 return &bpf_sock_map_update_proto;
8c4b4c7e
LB
3511 default:
3512 return bpf_base_func_proto(func_id);
3513 }
3514}
3515
b005fd18
JF
3516static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id)
3517{
3518 switch (func_id) {
8a31db56
JF
3519 case BPF_FUNC_skb_store_bytes:
3520 return &bpf_skb_store_bytes_proto;
b005fd18
JF
3521 case BPF_FUNC_skb_load_bytes:
3522 return &bpf_skb_load_bytes_proto;
8a31db56
JF
3523 case BPF_FUNC_skb_pull_data:
3524 return &bpf_skb_pull_data_proto;
3525 case BPF_FUNC_skb_change_tail:
3526 return &bpf_skb_change_tail_proto;
3527 case BPF_FUNC_skb_change_head:
3528 return &bpf_skb_change_head_proto;
b005fd18
JF
3529 case BPF_FUNC_get_socket_cookie:
3530 return &bpf_get_socket_cookie_proto;
3531 case BPF_FUNC_get_socket_uid:
3532 return &bpf_get_socket_uid_proto;
174a79ff
JF
3533 case BPF_FUNC_sk_redirect_map:
3534 return &bpf_sk_redirect_map_proto;
b005fd18
JF
3535 default:
3536 return bpf_base_func_proto(func_id);
3537 }
3538}
3539
3a0af8fd
TG
3540static const struct bpf_func_proto *
3541lwt_xmit_func_proto(enum bpf_func_id func_id)
3542{
3543 switch (func_id) {
3544 case BPF_FUNC_skb_get_tunnel_key:
3545 return &bpf_skb_get_tunnel_key_proto;
3546 case BPF_FUNC_skb_set_tunnel_key:
3547 return bpf_get_skb_set_tunnel_proto(func_id);
3548 case BPF_FUNC_skb_get_tunnel_opt:
3549 return &bpf_skb_get_tunnel_opt_proto;
3550 case BPF_FUNC_skb_set_tunnel_opt:
3551 return bpf_get_skb_set_tunnel_proto(func_id);
3552 case BPF_FUNC_redirect:
3553 return &bpf_redirect_proto;
3554 case BPF_FUNC_clone_redirect:
3555 return &bpf_clone_redirect_proto;
3556 case BPF_FUNC_skb_change_tail:
3557 return &bpf_skb_change_tail_proto;
3558 case BPF_FUNC_skb_change_head:
3559 return &bpf_skb_change_head_proto;
3560 case BPF_FUNC_skb_store_bytes:
3561 return &bpf_skb_store_bytes_proto;
3562 case BPF_FUNC_csum_update:
3563 return &bpf_csum_update_proto;
3564 case BPF_FUNC_l3_csum_replace:
3565 return &bpf_l3_csum_replace_proto;
3566 case BPF_FUNC_l4_csum_replace:
3567 return &bpf_l4_csum_replace_proto;
3568 case BPF_FUNC_set_hash_invalid:
3569 return &bpf_set_hash_invalid_proto;
3570 default:
3571 return lwt_inout_func_proto(func_id);
3572 }
3573}
3574
f96da094
DB
3575static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
3576 struct bpf_insn_access_aux *info)
23994631 3577{
f96da094 3578 const int size_default = sizeof(__u32);
23994631 3579
9bac3d6d
AS
3580 if (off < 0 || off >= sizeof(struct __sk_buff))
3581 return false;
62c7989b 3582
4936e352 3583 /* The verifier guarantees that size > 0. */
9bac3d6d
AS
3584 if (off % size != 0)
3585 return false;
62c7989b
DB
3586
3587 switch (off) {
f96da094
DB
3588 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
3589 if (off + size > offsetofend(struct __sk_buff, cb[4]))
62c7989b
DB
3590 return false;
3591 break;
8a31db56
JF
3592 case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
3593 case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
3594 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
3595 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
f96da094 3596 case bpf_ctx_range(struct __sk_buff, data):
de8f3a83 3597 case bpf_ctx_range(struct __sk_buff, data_meta):
f96da094
DB
3598 case bpf_ctx_range(struct __sk_buff, data_end):
3599 if (size != size_default)
23994631 3600 return false;
31fd8581
YS
3601 break;
3602 default:
f96da094 3603 /* Only narrow read access allowed for now. */
31fd8581 3604 if (type == BPF_WRITE) {
f96da094 3605 if (size != size_default)
31fd8581
YS
3606 return false;
3607 } else {
f96da094
DB
3608 bpf_ctx_record_field_size(info, size_default);
3609 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
23994631 3610 return false;
31fd8581 3611 }
62c7989b 3612 }
9bac3d6d
AS
3613
3614 return true;
3615}
3616
d691f9e8 3617static bool sk_filter_is_valid_access(int off, int size,
19de99f7 3618 enum bpf_access_type type,
23994631 3619 struct bpf_insn_access_aux *info)
d691f9e8 3620{
db58ba45 3621 switch (off) {
f96da094
DB
3622 case bpf_ctx_range(struct __sk_buff, tc_classid):
3623 case bpf_ctx_range(struct __sk_buff, data):
de8f3a83 3624 case bpf_ctx_range(struct __sk_buff, data_meta):
f96da094 3625 case bpf_ctx_range(struct __sk_buff, data_end):
8a31db56 3626 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
045efa82 3627 return false;
db58ba45 3628 }
045efa82 3629
d691f9e8
AS
3630 if (type == BPF_WRITE) {
3631 switch (off) {
f96da094 3632 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
d691f9e8
AS
3633 break;
3634 default:
3635 return false;
3636 }
3637 }
3638
f96da094 3639 return bpf_skb_is_valid_access(off, size, type, info);
d691f9e8
AS
3640}
3641
3a0af8fd
TG
3642static bool lwt_is_valid_access(int off, int size,
3643 enum bpf_access_type type,
23994631 3644 struct bpf_insn_access_aux *info)
3a0af8fd
TG
3645{
3646 switch (off) {
f96da094 3647 case bpf_ctx_range(struct __sk_buff, tc_classid):
8a31db56 3648 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
de8f3a83 3649 case bpf_ctx_range(struct __sk_buff, data_meta):
3a0af8fd
TG
3650 return false;
3651 }
3652
3653 if (type == BPF_WRITE) {
3654 switch (off) {
f96da094
DB
3655 case bpf_ctx_range(struct __sk_buff, mark):
3656 case bpf_ctx_range(struct __sk_buff, priority):
3657 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
3a0af8fd
TG
3658 break;
3659 default:
3660 return false;
3661 }
3662 }
3663
f96da094
DB
3664 switch (off) {
3665 case bpf_ctx_range(struct __sk_buff, data):
3666 info->reg_type = PTR_TO_PACKET;
3667 break;
3668 case bpf_ctx_range(struct __sk_buff, data_end):
3669 info->reg_type = PTR_TO_PACKET_END;
3670 break;
3671 }
3672
3673 return bpf_skb_is_valid_access(off, size, type, info);
3a0af8fd
TG
3674}
3675
61023658
DA
3676static bool sock_filter_is_valid_access(int off, int size,
3677 enum bpf_access_type type,
23994631 3678 struct bpf_insn_access_aux *info)
61023658
DA
3679{
3680 if (type == BPF_WRITE) {
3681 switch (off) {
3682 case offsetof(struct bpf_sock, bound_dev_if):
482dca93 3683 case offsetof(struct bpf_sock, mark):
482dca93
DA
3684 case offsetof(struct bpf_sock, priority):
3685 break;
61023658
DA
3686 default:
3687 return false;
3688 }
3689 }
3690
3691 if (off < 0 || off + size > sizeof(struct bpf_sock))
3692 return false;
61023658
DA
3693 /* The verifier guarantees that size > 0. */
3694 if (off % size != 0)
3695 return false;
61023658
DA
3696 if (size != sizeof(__u32))
3697 return false;
3698
3699 return true;
3700}
3701
047b0ecd
DB
3702static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
3703 const struct bpf_prog *prog, int drop_verdict)
36bbef52
DB
3704{
3705 struct bpf_insn *insn = insn_buf;
3706
3707 if (!direct_write)
3708 return 0;
3709
3710 /* if (!skb->cloned)
3711 * goto start;
3712 *
3713 * (Fast-path, otherwise approximation that we might be
3714 * a clone, do the rest in helper.)
3715 */
3716 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
3717 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
3718 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
3719
3720 /* ret = bpf_skb_pull_data(skb, 0); */
3721 *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
3722 *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
3723 *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
3724 BPF_FUNC_skb_pull_data);
3725 /* if (!ret)
3726 * goto restore;
3727 * return TC_ACT_SHOT;
3728 */
3729 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
047b0ecd 3730 *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
36bbef52
DB
3731 *insn++ = BPF_EXIT_INSN();
3732
3733 /* restore: */
3734 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
3735 /* start: */
3736 *insn++ = prog->insnsi[0];
3737
3738 return insn - insn_buf;
3739}
3740
047b0ecd
DB
3741static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
3742 const struct bpf_prog *prog)
3743{
3744 return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
3745}
3746
d691f9e8 3747static bool tc_cls_act_is_valid_access(int off, int size,
19de99f7 3748 enum bpf_access_type type,
23994631 3749 struct bpf_insn_access_aux *info)
d691f9e8
AS
3750{
3751 if (type == BPF_WRITE) {
3752 switch (off) {
f96da094
DB
3753 case bpf_ctx_range(struct __sk_buff, mark):
3754 case bpf_ctx_range(struct __sk_buff, tc_index):
3755 case bpf_ctx_range(struct __sk_buff, priority):
3756 case bpf_ctx_range(struct __sk_buff, tc_classid):
3757 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
d691f9e8
AS
3758 break;
3759 default:
3760 return false;
3761 }
3762 }
19de99f7 3763
f96da094
DB
3764 switch (off) {
3765 case bpf_ctx_range(struct __sk_buff, data):
3766 info->reg_type = PTR_TO_PACKET;
3767 break;
de8f3a83
DB
3768 case bpf_ctx_range(struct __sk_buff, data_meta):
3769 info->reg_type = PTR_TO_PACKET_META;
3770 break;
f96da094
DB
3771 case bpf_ctx_range(struct __sk_buff, data_end):
3772 info->reg_type = PTR_TO_PACKET_END;
3773 break;
8a31db56
JF
3774 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
3775 return false;
f96da094
DB
3776 }
3777
3778 return bpf_skb_is_valid_access(off, size, type, info);
d691f9e8
AS
3779}
3780
1afaf661 3781static bool __is_valid_xdp_access(int off, int size)
6a773a15
BB
3782{
3783 if (off < 0 || off >= sizeof(struct xdp_md))
3784 return false;
3785 if (off % size != 0)
3786 return false;
6088b582 3787 if (size != sizeof(__u32))
6a773a15
BB
3788 return false;
3789
3790 return true;
3791}
3792
3793static bool xdp_is_valid_access(int off, int size,
3794 enum bpf_access_type type,
23994631 3795 struct bpf_insn_access_aux *info)
6a773a15
BB
3796{
3797 if (type == BPF_WRITE)
3798 return false;
3799
3800 switch (off) {
3801 case offsetof(struct xdp_md, data):
23994631 3802 info->reg_type = PTR_TO_PACKET;
6a773a15 3803 break;
de8f3a83
DB
3804 case offsetof(struct xdp_md, data_meta):
3805 info->reg_type = PTR_TO_PACKET_META;
3806 break;
6a773a15 3807 case offsetof(struct xdp_md, data_end):
23994631 3808 info->reg_type = PTR_TO_PACKET_END;
6a773a15
BB
3809 break;
3810 }
3811
1afaf661 3812 return __is_valid_xdp_access(off, size);
6a773a15
BB
3813}
3814
3815void bpf_warn_invalid_xdp_action(u32 act)
3816{
9beb8bed
DB
3817 const u32 act_max = XDP_REDIRECT;
3818
3819 WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
3820 act > act_max ? "Illegal" : "Driver unsupported",
3821 act);
6a773a15
BB
3822}
3823EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
3824
40304b2a
LB
3825static bool __is_valid_sock_ops_access(int off, int size)
3826{
3827 if (off < 0 || off >= sizeof(struct bpf_sock_ops))
3828 return false;
3829 /* The verifier guarantees that size > 0. */
3830 if (off % size != 0)
3831 return false;
3832 if (size != sizeof(__u32))
3833 return false;
3834
3835 return true;
3836}
3837
3838static bool sock_ops_is_valid_access(int off, int size,
3839 enum bpf_access_type type,
3840 struct bpf_insn_access_aux *info)
3841{
3842 if (type == BPF_WRITE) {
3843 switch (off) {
3844 case offsetof(struct bpf_sock_ops, op) ...
3845 offsetof(struct bpf_sock_ops, replylong[3]):
3846 break;
3847 default:
3848 return false;
3849 }
3850 }
3851
3852 return __is_valid_sock_ops_access(off, size);
3853}
3854
8a31db56
JF
3855static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
3856 const struct bpf_prog *prog)
3857{
047b0ecd 3858 return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
8a31db56
JF
3859}
3860
b005fd18
JF
3861static bool sk_skb_is_valid_access(int off, int size,
3862 enum bpf_access_type type,
3863 struct bpf_insn_access_aux *info)
3864{
de8f3a83
DB
3865 switch (off) {
3866 case bpf_ctx_range(struct __sk_buff, tc_classid):
3867 case bpf_ctx_range(struct __sk_buff, data_meta):
3868 return false;
3869 }
3870
8a31db56
JF
3871 if (type == BPF_WRITE) {
3872 switch (off) {
8a31db56
JF
3873 case bpf_ctx_range(struct __sk_buff, tc_index):
3874 case bpf_ctx_range(struct __sk_buff, priority):
3875 break;
3876 default:
3877 return false;
3878 }
3879 }
3880
b005fd18 3881 switch (off) {
f7e9cb1e 3882 case bpf_ctx_range(struct __sk_buff, mark):
8a31db56 3883 return false;
b005fd18
JF
3884 case bpf_ctx_range(struct __sk_buff, data):
3885 info->reg_type = PTR_TO_PACKET;
3886 break;
3887 case bpf_ctx_range(struct __sk_buff, data_end):
3888 info->reg_type = PTR_TO_PACKET_END;
3889 break;
3890 }
3891
3892 return bpf_skb_is_valid_access(off, size, type, info);
3893}
3894
2492d3b8
DB
3895static u32 bpf_convert_ctx_access(enum bpf_access_type type,
3896 const struct bpf_insn *si,
3897 struct bpf_insn *insn_buf,
f96da094 3898 struct bpf_prog *prog, u32 *target_size)
9bac3d6d
AS
3899{
3900 struct bpf_insn *insn = insn_buf;
6b8cc1d1 3901 int off;
9bac3d6d 3902
6b8cc1d1 3903 switch (si->off) {
9bac3d6d 3904 case offsetof(struct __sk_buff, len):
6b8cc1d1 3905 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
3906 bpf_target_off(struct sk_buff, len, 4,
3907 target_size));
9bac3d6d
AS
3908 break;
3909
0b8c707d 3910 case offsetof(struct __sk_buff, protocol):
6b8cc1d1 3911 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
3912 bpf_target_off(struct sk_buff, protocol, 2,
3913 target_size));
0b8c707d
DB
3914 break;
3915
27cd5452 3916 case offsetof(struct __sk_buff, vlan_proto):
6b8cc1d1 3917 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
3918 bpf_target_off(struct sk_buff, vlan_proto, 2,
3919 target_size));
27cd5452
MS
3920 break;
3921
bcad5718 3922 case offsetof(struct __sk_buff, priority):
754f1e6a 3923 if (type == BPF_WRITE)
6b8cc1d1 3924 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
3925 bpf_target_off(struct sk_buff, priority, 4,
3926 target_size));
754f1e6a 3927 else
6b8cc1d1 3928 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
3929 bpf_target_off(struct sk_buff, priority, 4,
3930 target_size));
bcad5718
DB
3931 break;
3932
37e82c2f 3933 case offsetof(struct __sk_buff, ingress_ifindex):
6b8cc1d1 3934 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
3935 bpf_target_off(struct sk_buff, skb_iif, 4,
3936 target_size));
37e82c2f
AS
3937 break;
3938
3939 case offsetof(struct __sk_buff, ifindex):
f035a515 3940 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
6b8cc1d1 3941 si->dst_reg, si->src_reg,
37e82c2f 3942 offsetof(struct sk_buff, dev));
6b8cc1d1
DB
3943 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
3944 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
f96da094
DB
3945 bpf_target_off(struct net_device, ifindex, 4,
3946 target_size));
37e82c2f
AS
3947 break;
3948
ba7591d8 3949 case offsetof(struct __sk_buff, hash):
6b8cc1d1 3950 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
3951 bpf_target_off(struct sk_buff, hash, 4,
3952 target_size));
ba7591d8
DB
3953 break;
3954
9bac3d6d 3955 case offsetof(struct __sk_buff, mark):
d691f9e8 3956 if (type == BPF_WRITE)
6b8cc1d1 3957 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
3958 bpf_target_off(struct sk_buff, mark, 4,
3959 target_size));
d691f9e8 3960 else
6b8cc1d1 3961 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
3962 bpf_target_off(struct sk_buff, mark, 4,
3963 target_size));
d691f9e8 3964 break;
9bac3d6d
AS
3965
3966 case offsetof(struct __sk_buff, pkt_type):
f96da094
DB
3967 *target_size = 1;
3968 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
3969 PKT_TYPE_OFFSET());
3970 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
3971#ifdef __BIG_ENDIAN_BITFIELD
3972 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
3973#endif
3974 break;
9bac3d6d
AS
3975
3976 case offsetof(struct __sk_buff, queue_mapping):
f96da094
DB
3977 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
3978 bpf_target_off(struct sk_buff, queue_mapping, 2,
3979 target_size));
3980 break;
c2497395 3981
c2497395 3982 case offsetof(struct __sk_buff, vlan_present):
c2497395 3983 case offsetof(struct __sk_buff, vlan_tci):
f96da094
DB
3984 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
3985
3986 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
3987 bpf_target_off(struct sk_buff, vlan_tci, 2,
3988 target_size));
3989 if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
3990 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
3991 ~VLAN_TAG_PRESENT);
3992 } else {
3993 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
3994 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
3995 }
3996 break;
d691f9e8
AS
3997
3998 case offsetof(struct __sk_buff, cb[0]) ...
f96da094 3999 offsetofend(struct __sk_buff, cb[4]) - 1:
d691f9e8 4000 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
62c7989b
DB
4001 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
4002 offsetof(struct qdisc_skb_cb, data)) %
4003 sizeof(__u64));
d691f9e8 4004
ff936a04 4005 prog->cb_access = 1;
6b8cc1d1
DB
4006 off = si->off;
4007 off -= offsetof(struct __sk_buff, cb[0]);
4008 off += offsetof(struct sk_buff, cb);
4009 off += offsetof(struct qdisc_skb_cb, data);
d691f9e8 4010 if (type == BPF_WRITE)
62c7989b 4011 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
6b8cc1d1 4012 si->src_reg, off);
d691f9e8 4013 else
62c7989b 4014 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
6b8cc1d1 4015 si->src_reg, off);
d691f9e8
AS
4016 break;
4017
045efa82 4018 case offsetof(struct __sk_buff, tc_classid):
6b8cc1d1
DB
4019 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
4020
4021 off = si->off;
4022 off -= offsetof(struct __sk_buff, tc_classid);
4023 off += offsetof(struct sk_buff, cb);
4024 off += offsetof(struct qdisc_skb_cb, tc_classid);
f96da094 4025 *target_size = 2;
09c37a2c 4026 if (type == BPF_WRITE)
6b8cc1d1
DB
4027 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
4028 si->src_reg, off);
09c37a2c 4029 else
6b8cc1d1
DB
4030 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
4031 si->src_reg, off);
045efa82
DB
4032 break;
4033
db58ba45 4034 case offsetof(struct __sk_buff, data):
f035a515 4035 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
6b8cc1d1 4036 si->dst_reg, si->src_reg,
db58ba45
AS
4037 offsetof(struct sk_buff, data));
4038 break;
4039
de8f3a83
DB
4040 case offsetof(struct __sk_buff, data_meta):
4041 off = si->off;
4042 off -= offsetof(struct __sk_buff, data_meta);
4043 off += offsetof(struct sk_buff, cb);
4044 off += offsetof(struct bpf_skb_data_end, data_meta);
4045 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
4046 si->src_reg, off);
4047 break;
4048
db58ba45 4049 case offsetof(struct __sk_buff, data_end):
6b8cc1d1
DB
4050 off = si->off;
4051 off -= offsetof(struct __sk_buff, data_end);
4052 off += offsetof(struct sk_buff, cb);
4053 off += offsetof(struct bpf_skb_data_end, data_end);
4054 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
4055 si->src_reg, off);
db58ba45
AS
4056 break;
4057
d691f9e8
AS
4058 case offsetof(struct __sk_buff, tc_index):
4059#ifdef CONFIG_NET_SCHED
d691f9e8 4060 if (type == BPF_WRITE)
6b8cc1d1 4061 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
4062 bpf_target_off(struct sk_buff, tc_index, 2,
4063 target_size));
d691f9e8 4064 else
6b8cc1d1 4065 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
4066 bpf_target_off(struct sk_buff, tc_index, 2,
4067 target_size));
d691f9e8 4068#else
2ed46ce4 4069 *target_size = 2;
d691f9e8 4070 if (type == BPF_WRITE)
6b8cc1d1 4071 *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
d691f9e8 4072 else
6b8cc1d1 4073 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
b1d9fc41
DB
4074#endif
4075 break;
4076
4077 case offsetof(struct __sk_buff, napi_id):
4078#if defined(CONFIG_NET_RX_BUSY_POLL)
b1d9fc41 4079 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
4080 bpf_target_off(struct sk_buff, napi_id, 4,
4081 target_size));
b1d9fc41
DB
4082 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
4083 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
4084#else
2ed46ce4 4085 *target_size = 4;
b1d9fc41 4086 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
d691f9e8 4087#endif
6b8cc1d1 4088 break;
8a31db56
JF
4089 case offsetof(struct __sk_buff, family):
4090 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
4091
4092 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4093 si->dst_reg, si->src_reg,
4094 offsetof(struct sk_buff, sk));
4095 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4096 bpf_target_off(struct sock_common,
4097 skc_family,
4098 2, target_size));
4099 break;
4100 case offsetof(struct __sk_buff, remote_ip4):
4101 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
4102
4103 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4104 si->dst_reg, si->src_reg,
4105 offsetof(struct sk_buff, sk));
4106 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4107 bpf_target_off(struct sock_common,
4108 skc_daddr,
4109 4, target_size));
4110 break;
4111 case offsetof(struct __sk_buff, local_ip4):
4112 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
4113 skc_rcv_saddr) != 4);
4114
4115 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4116 si->dst_reg, si->src_reg,
4117 offsetof(struct sk_buff, sk));
4118 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4119 bpf_target_off(struct sock_common,
4120 skc_rcv_saddr,
4121 4, target_size));
4122 break;
4123 case offsetof(struct __sk_buff, remote_ip6[0]) ...
4124 offsetof(struct __sk_buff, remote_ip6[3]):
4125#if IS_ENABLED(CONFIG_IPV6)
4126 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
4127 skc_v6_daddr.s6_addr32[0]) != 4);
4128
4129 off = si->off;
4130 off -= offsetof(struct __sk_buff, remote_ip6[0]);
4131
4132 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4133 si->dst_reg, si->src_reg,
4134 offsetof(struct sk_buff, sk));
4135 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4136 offsetof(struct sock_common,
4137 skc_v6_daddr.s6_addr32[0]) +
4138 off);
4139#else
4140 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4141#endif
4142 break;
4143 case offsetof(struct __sk_buff, local_ip6[0]) ...
4144 offsetof(struct __sk_buff, local_ip6[3]):
4145#if IS_ENABLED(CONFIG_IPV6)
4146 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
4147 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
4148
4149 off = si->off;
4150 off -= offsetof(struct __sk_buff, local_ip6[0]);
4151
4152 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4153 si->dst_reg, si->src_reg,
4154 offsetof(struct sk_buff, sk));
4155 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4156 offsetof(struct sock_common,
4157 skc_v6_rcv_saddr.s6_addr32[0]) +
4158 off);
4159#else
4160 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4161#endif
4162 break;
4163
4164 case offsetof(struct __sk_buff, remote_port):
4165 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
4166
4167 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4168 si->dst_reg, si->src_reg,
4169 offsetof(struct sk_buff, sk));
4170 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4171 bpf_target_off(struct sock_common,
4172 skc_dport,
4173 2, target_size));
4174#ifndef __BIG_ENDIAN_BITFIELD
4175 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
4176#endif
4177 break;
4178
4179 case offsetof(struct __sk_buff, local_port):
4180 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
4181
4182 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4183 si->dst_reg, si->src_reg,
4184 offsetof(struct sk_buff, sk));
4185 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4186 bpf_target_off(struct sock_common,
4187 skc_num, 2, target_size));
4188 break;
9bac3d6d
AS
4189 }
4190
4191 return insn - insn_buf;
89aa0758
AS
4192}
4193
61023658 4194static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
6b8cc1d1 4195 const struct bpf_insn *si,
61023658 4196 struct bpf_insn *insn_buf,
f96da094 4197 struct bpf_prog *prog, u32 *target_size)
61023658
DA
4198{
4199 struct bpf_insn *insn = insn_buf;
4200
6b8cc1d1 4201 switch (si->off) {
61023658
DA
4202 case offsetof(struct bpf_sock, bound_dev_if):
4203 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
4204
4205 if (type == BPF_WRITE)
6b8cc1d1 4206 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
61023658
DA
4207 offsetof(struct sock, sk_bound_dev_if));
4208 else
6b8cc1d1 4209 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
61023658
DA
4210 offsetof(struct sock, sk_bound_dev_if));
4211 break;
aa4c1037 4212
482dca93
DA
4213 case offsetof(struct bpf_sock, mark):
4214 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
4215
4216 if (type == BPF_WRITE)
4217 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
4218 offsetof(struct sock, sk_mark));
4219 else
4220 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4221 offsetof(struct sock, sk_mark));
4222 break;
4223
4224 case offsetof(struct bpf_sock, priority):
4225 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
4226
4227 if (type == BPF_WRITE)
4228 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
4229 offsetof(struct sock, sk_priority));
4230 else
4231 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4232 offsetof(struct sock, sk_priority));
4233 break;
4234
aa4c1037
DA
4235 case offsetof(struct bpf_sock, family):
4236 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
4237
6b8cc1d1 4238 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
aa4c1037
DA
4239 offsetof(struct sock, sk_family));
4240 break;
4241
4242 case offsetof(struct bpf_sock, type):
6b8cc1d1 4243 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
aa4c1037 4244 offsetof(struct sock, __sk_flags_offset));
6b8cc1d1
DB
4245 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
4246 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
aa4c1037
DA
4247 break;
4248
4249 case offsetof(struct bpf_sock, protocol):
6b8cc1d1 4250 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
aa4c1037 4251 offsetof(struct sock, __sk_flags_offset));
6b8cc1d1
DB
4252 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
4253 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
aa4c1037 4254 break;
61023658
DA
4255 }
4256
4257 return insn - insn_buf;
4258}
4259
6b8cc1d1
DB
4260static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
4261 const struct bpf_insn *si,
374fb54e 4262 struct bpf_insn *insn_buf,
f96da094 4263 struct bpf_prog *prog, u32 *target_size)
374fb54e
DB
4264{
4265 struct bpf_insn *insn = insn_buf;
4266
6b8cc1d1 4267 switch (si->off) {
374fb54e 4268 case offsetof(struct __sk_buff, ifindex):
374fb54e 4269 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
6b8cc1d1 4270 si->dst_reg, si->src_reg,
374fb54e 4271 offsetof(struct sk_buff, dev));
6b8cc1d1 4272 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
f96da094
DB
4273 bpf_target_off(struct net_device, ifindex, 4,
4274 target_size));
374fb54e
DB
4275 break;
4276 default:
f96da094
DB
4277 return bpf_convert_ctx_access(type, si, insn_buf, prog,
4278 target_size);
374fb54e
DB
4279 }
4280
4281 return insn - insn_buf;
4282}
4283
6b8cc1d1
DB
4284static u32 xdp_convert_ctx_access(enum bpf_access_type type,
4285 const struct bpf_insn *si,
6a773a15 4286 struct bpf_insn *insn_buf,
f96da094 4287 struct bpf_prog *prog, u32 *target_size)
6a773a15
BB
4288{
4289 struct bpf_insn *insn = insn_buf;
4290
6b8cc1d1 4291 switch (si->off) {
6a773a15 4292 case offsetof(struct xdp_md, data):
f035a515 4293 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
6b8cc1d1 4294 si->dst_reg, si->src_reg,
6a773a15
BB
4295 offsetof(struct xdp_buff, data));
4296 break;
de8f3a83
DB
4297 case offsetof(struct xdp_md, data_meta):
4298 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
4299 si->dst_reg, si->src_reg,
4300 offsetof(struct xdp_buff, data_meta));
4301 break;
6a773a15 4302 case offsetof(struct xdp_md, data_end):
f035a515 4303 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
6b8cc1d1 4304 si->dst_reg, si->src_reg,
6a773a15
BB
4305 offsetof(struct xdp_buff, data_end));
4306 break;
02dd3291
JDB
4307 case offsetof(struct xdp_md, ingress_ifindex):
4308 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
4309 si->dst_reg, si->src_reg,
4310 offsetof(struct xdp_buff, rxq));
4311 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
4312 si->dst_reg, si->dst_reg,
4313 offsetof(struct xdp_rxq_info, dev));
4314 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
daaf24c6 4315 offsetof(struct net_device, ifindex));
02dd3291
JDB
4316 break;
4317 case offsetof(struct xdp_md, rx_queue_index):
4318 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
4319 si->dst_reg, si->src_reg,
4320 offsetof(struct xdp_buff, rxq));
4321 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
daaf24c6
JDB
4322 offsetof(struct xdp_rxq_info,
4323 queue_index));
02dd3291 4324 break;
6a773a15
BB
4325 }
4326
4327 return insn - insn_buf;
4328}
4329
40304b2a
LB
4330static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
4331 const struct bpf_insn *si,
4332 struct bpf_insn *insn_buf,
f96da094
DB
4333 struct bpf_prog *prog,
4334 u32 *target_size)
40304b2a
LB
4335{
4336 struct bpf_insn *insn = insn_buf;
4337 int off;
4338
4339 switch (si->off) {
4340 case offsetof(struct bpf_sock_ops, op) ...
4341 offsetof(struct bpf_sock_ops, replylong[3]):
4342 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
4343 FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
4344 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
4345 FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
4346 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
4347 FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
4348 off = si->off;
4349 off -= offsetof(struct bpf_sock_ops, op);
4350 off += offsetof(struct bpf_sock_ops_kern, op);
4351 if (type == BPF_WRITE)
4352 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
4353 off);
4354 else
4355 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4356 off);
4357 break;
4358
4359 case offsetof(struct bpf_sock_ops, family):
4360 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
4361
4362 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4363 struct bpf_sock_ops_kern, sk),
4364 si->dst_reg, si->src_reg,
4365 offsetof(struct bpf_sock_ops_kern, sk));
4366 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4367 offsetof(struct sock_common, skc_family));
4368 break;
4369
4370 case offsetof(struct bpf_sock_ops, remote_ip4):
4371 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
4372
4373 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4374 struct bpf_sock_ops_kern, sk),
4375 si->dst_reg, si->src_reg,
4376 offsetof(struct bpf_sock_ops_kern, sk));
4377 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4378 offsetof(struct sock_common, skc_daddr));
4379 break;
4380
4381 case offsetof(struct bpf_sock_ops, local_ip4):
4382 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
4383
4384 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4385 struct bpf_sock_ops_kern, sk),
4386 si->dst_reg, si->src_reg,
4387 offsetof(struct bpf_sock_ops_kern, sk));
4388 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4389 offsetof(struct sock_common,
4390 skc_rcv_saddr));
4391 break;
4392
4393 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
4394 offsetof(struct bpf_sock_ops, remote_ip6[3]):
4395#if IS_ENABLED(CONFIG_IPV6)
4396 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
4397 skc_v6_daddr.s6_addr32[0]) != 4);
4398
4399 off = si->off;
4400 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
4401 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4402 struct bpf_sock_ops_kern, sk),
4403 si->dst_reg, si->src_reg,
4404 offsetof(struct bpf_sock_ops_kern, sk));
4405 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4406 offsetof(struct sock_common,
4407 skc_v6_daddr.s6_addr32[0]) +
4408 off);
4409#else
4410 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4411#endif
4412 break;
4413
4414 case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
4415 offsetof(struct bpf_sock_ops, local_ip6[3]):
4416#if IS_ENABLED(CONFIG_IPV6)
4417 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
4418 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
4419
4420 off = si->off;
4421 off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
4422 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4423 struct bpf_sock_ops_kern, sk),
4424 si->dst_reg, si->src_reg,
4425 offsetof(struct bpf_sock_ops_kern, sk));
4426 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4427 offsetof(struct sock_common,
4428 skc_v6_rcv_saddr.s6_addr32[0]) +
4429 off);
4430#else
4431 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4432#endif
4433 break;
4434
4435 case offsetof(struct bpf_sock_ops, remote_port):
4436 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
4437
4438 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4439 struct bpf_sock_ops_kern, sk),
4440 si->dst_reg, si->src_reg,
4441 offsetof(struct bpf_sock_ops_kern, sk));
4442 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4443 offsetof(struct sock_common, skc_dport));
4444#ifndef __BIG_ENDIAN_BITFIELD
4445 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
4446#endif
4447 break;
4448
4449 case offsetof(struct bpf_sock_ops, local_port):
4450 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
4451
4452 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4453 struct bpf_sock_ops_kern, sk),
4454 si->dst_reg, si->src_reg,
4455 offsetof(struct bpf_sock_ops_kern, sk));
4456 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4457 offsetof(struct sock_common, skc_num));
4458 break;
f19397a5
LB
4459
4460 case offsetof(struct bpf_sock_ops, is_fullsock):
4461 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
4462 struct bpf_sock_ops_kern,
4463 is_fullsock),
4464 si->dst_reg, si->src_reg,
4465 offsetof(struct bpf_sock_ops_kern,
4466 is_fullsock));
4467 break;
4468
4469/* Helper macro for adding read access to tcp_sock fields. */
4470#define SOCK_OPS_GET_TCP32(FIELD_NAME) \
4471 do { \
4472 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, FIELD_NAME) != 4); \
4473 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
4474 struct bpf_sock_ops_kern, \
4475 is_fullsock), \
4476 si->dst_reg, si->src_reg, \
4477 offsetof(struct bpf_sock_ops_kern, \
4478 is_fullsock)); \
4479 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2); \
4480 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
4481 struct bpf_sock_ops_kern, sk),\
4482 si->dst_reg, si->src_reg, \
4483 offsetof(struct bpf_sock_ops_kern, sk));\
4484 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, \
4485 offsetof(struct tcp_sock, FIELD_NAME)); \
4486 } while (0)
4487
4488 case offsetof(struct bpf_sock_ops, snd_cwnd):
4489 SOCK_OPS_GET_TCP32(snd_cwnd);
4490 break;
4491
4492 case offsetof(struct bpf_sock_ops, srtt_us):
4493 SOCK_OPS_GET_TCP32(srtt_us);
4494 break;
40304b2a
LB
4495 }
4496 return insn - insn_buf;
4497}
4498
8108a775
JF
4499static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
4500 const struct bpf_insn *si,
4501 struct bpf_insn *insn_buf,
4502 struct bpf_prog *prog, u32 *target_size)
4503{
4504 struct bpf_insn *insn = insn_buf;
4505 int off;
4506
4507 switch (si->off) {
4508 case offsetof(struct __sk_buff, data_end):
4509 off = si->off;
4510 off -= offsetof(struct __sk_buff, data_end);
4511 off += offsetof(struct sk_buff, cb);
4512 off += offsetof(struct tcp_skb_cb, bpf.data_end);
4513 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
4514 si->src_reg, off);
4515 break;
4516 default:
4517 return bpf_convert_ctx_access(type, si, insn_buf, prog,
4518 target_size);
4519 }
4520
4521 return insn - insn_buf;
4522}
4523
7de16e3a 4524const struct bpf_verifier_ops sk_filter_verifier_ops = {
4936e352
DB
4525 .get_func_proto = sk_filter_func_proto,
4526 .is_valid_access = sk_filter_is_valid_access,
2492d3b8 4527 .convert_ctx_access = bpf_convert_ctx_access,
89aa0758
AS
4528};
4529
7de16e3a 4530const struct bpf_prog_ops sk_filter_prog_ops = {
61f3c964 4531 .test_run = bpf_prog_test_run_skb,
7de16e3a
JK
4532};
4533
4534const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
4936e352
DB
4535 .get_func_proto = tc_cls_act_func_proto,
4536 .is_valid_access = tc_cls_act_is_valid_access,
374fb54e 4537 .convert_ctx_access = tc_cls_act_convert_ctx_access,
36bbef52 4538 .gen_prologue = tc_cls_act_prologue,
7de16e3a
JK
4539};
4540
4541const struct bpf_prog_ops tc_cls_act_prog_ops = {
1cf1cae9 4542 .test_run = bpf_prog_test_run_skb,
608cd71a
AS
4543};
4544
7de16e3a 4545const struct bpf_verifier_ops xdp_verifier_ops = {
6a773a15
BB
4546 .get_func_proto = xdp_func_proto,
4547 .is_valid_access = xdp_is_valid_access,
4548 .convert_ctx_access = xdp_convert_ctx_access,
7de16e3a
JK
4549};
4550
4551const struct bpf_prog_ops xdp_prog_ops = {
1cf1cae9 4552 .test_run = bpf_prog_test_run_xdp,
6a773a15
BB
4553};
4554
7de16e3a 4555const struct bpf_verifier_ops cg_skb_verifier_ops = {
966789fb 4556 .get_func_proto = sk_filter_func_proto,
0e33661d 4557 .is_valid_access = sk_filter_is_valid_access,
2492d3b8 4558 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
4559};
4560
4561const struct bpf_prog_ops cg_skb_prog_ops = {
1cf1cae9 4562 .test_run = bpf_prog_test_run_skb,
0e33661d
DM
4563};
4564
7de16e3a 4565const struct bpf_verifier_ops lwt_inout_verifier_ops = {
3a0af8fd
TG
4566 .get_func_proto = lwt_inout_func_proto,
4567 .is_valid_access = lwt_is_valid_access,
2492d3b8 4568 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
4569};
4570
4571const struct bpf_prog_ops lwt_inout_prog_ops = {
1cf1cae9 4572 .test_run = bpf_prog_test_run_skb,
3a0af8fd
TG
4573};
4574
7de16e3a 4575const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
3a0af8fd
TG
4576 .get_func_proto = lwt_xmit_func_proto,
4577 .is_valid_access = lwt_is_valid_access,
2492d3b8 4578 .convert_ctx_access = bpf_convert_ctx_access,
3a0af8fd 4579 .gen_prologue = tc_cls_act_prologue,
7de16e3a
JK
4580};
4581
4582const struct bpf_prog_ops lwt_xmit_prog_ops = {
1cf1cae9 4583 .test_run = bpf_prog_test_run_skb,
3a0af8fd
TG
4584};
4585
7de16e3a 4586const struct bpf_verifier_ops cg_sock_verifier_ops = {
ae2cf1c4 4587 .get_func_proto = sock_filter_func_proto,
61023658
DA
4588 .is_valid_access = sock_filter_is_valid_access,
4589 .convert_ctx_access = sock_filter_convert_ctx_access,
4590};
4591
7de16e3a
JK
4592const struct bpf_prog_ops cg_sock_prog_ops = {
4593};
4594
4595const struct bpf_verifier_ops sock_ops_verifier_ops = {
8c4b4c7e 4596 .get_func_proto = sock_ops_func_proto,
40304b2a
LB
4597 .is_valid_access = sock_ops_is_valid_access,
4598 .convert_ctx_access = sock_ops_convert_ctx_access,
4599};
4600
7de16e3a
JK
4601const struct bpf_prog_ops sock_ops_prog_ops = {
4602};
4603
4604const struct bpf_verifier_ops sk_skb_verifier_ops = {
b005fd18
JF
4605 .get_func_proto = sk_skb_func_proto,
4606 .is_valid_access = sk_skb_is_valid_access,
8108a775 4607 .convert_ctx_access = sk_skb_convert_ctx_access,
8a31db56 4608 .gen_prologue = sk_skb_prologue,
b005fd18
JF
4609};
4610
7de16e3a
JK
4611const struct bpf_prog_ops sk_skb_prog_ops = {
4612};
4613
8ced425e 4614int sk_detach_filter(struct sock *sk)
55b33325
PE
4615{
4616 int ret = -ENOENT;
4617 struct sk_filter *filter;
4618
d59577b6
VB
4619 if (sock_flag(sk, SOCK_FILTER_LOCKED))
4620 return -EPERM;
4621
8ced425e
HFS
4622 filter = rcu_dereference_protected(sk->sk_filter,
4623 lockdep_sock_is_held(sk));
55b33325 4624 if (filter) {
a9b3cd7f 4625 RCU_INIT_POINTER(sk->sk_filter, NULL);
46bcf14f 4626 sk_filter_uncharge(sk, filter);
55b33325
PE
4627 ret = 0;
4628 }
a3ea269b 4629
55b33325
PE
4630 return ret;
4631}
8ced425e 4632EXPORT_SYMBOL_GPL(sk_detach_filter);
a8fc9277 4633
a3ea269b
DB
4634int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
4635 unsigned int len)
a8fc9277 4636{
a3ea269b 4637 struct sock_fprog_kern *fprog;
a8fc9277 4638 struct sk_filter *filter;
a3ea269b 4639 int ret = 0;
a8fc9277
PE
4640
4641 lock_sock(sk);
4642 filter = rcu_dereference_protected(sk->sk_filter,
8ced425e 4643 lockdep_sock_is_held(sk));
a8fc9277
PE
4644 if (!filter)
4645 goto out;
a3ea269b
DB
4646
4647 /* We're copying the filter that has been originally attached,
93d08b69
DB
4648 * so no conversion/decode needed anymore. eBPF programs that
4649 * have no original program cannot be dumped through this.
a3ea269b 4650 */
93d08b69 4651 ret = -EACCES;
7ae457c1 4652 fprog = filter->prog->orig_prog;
93d08b69
DB
4653 if (!fprog)
4654 goto out;
a3ea269b
DB
4655
4656 ret = fprog->len;
a8fc9277 4657 if (!len)
a3ea269b 4658 /* User space only enquires number of filter blocks. */
a8fc9277 4659 goto out;
a3ea269b 4660
a8fc9277 4661 ret = -EINVAL;
a3ea269b 4662 if (len < fprog->len)
a8fc9277
PE
4663 goto out;
4664
4665 ret = -EFAULT;
009937e7 4666 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
a3ea269b 4667 goto out;
a8fc9277 4668
a3ea269b
DB
4669 /* Instead of bytes, the API requests to return the number
4670 * of filter blocks.
4671 */
4672 ret = fprog->len;
a8fc9277
PE
4673out:
4674 release_sock(sk);
4675 return ret;
4676}