Merge tag 'driver-core-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / net / core / filter.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * Linux Socket Filter - Kernel level socket filtering
4 *
bd4cf0ed
AS
5 * Based on the design of the Berkeley Packet Filter. The new
6 * internal format has been designed by PLUMgrid:
1da177e4 7 *
bd4cf0ed
AS
8 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
9 *
10 * Authors:
11 *
12 * Jay Schulist <jschlst@samba.org>
13 * Alexei Starovoitov <ast@plumgrid.com>
14 * Daniel Borkmann <dborkman@redhat.com>
1da177e4 15 *
1da177e4 16 * Andi Kleen - Fix a few bad bugs and races.
4df95ff4 17 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
1da177e4
LT
18 */
19
201e2c1b 20#include <linux/atomic.h>
864b656f 21#include <linux/bpf_verifier.h>
1da177e4
LT
22#include <linux/module.h>
23#include <linux/types.h>
1da177e4
LT
24#include <linux/mm.h>
25#include <linux/fcntl.h>
26#include <linux/socket.h>
91b8270f 27#include <linux/sock_diag.h>
1da177e4
LT
28#include <linux/in.h>
29#include <linux/inet.h>
30#include <linux/netdevice.h>
31#include <linux/if_packet.h>
c491680f 32#include <linux/if_arp.h>
5a0e3ad6 33#include <linux/gfp.h>
d74bad4e 34#include <net/inet_common.h>
1da177e4
LT
35#include <net/ip.h>
36#include <net/protocol.h>
4738c1db 37#include <net/netlink.h>
1da177e4 38#include <linux/skbuff.h>
604326b4 39#include <linux/skmsg.h>
1da177e4 40#include <net/sock.h>
10b89ee4 41#include <net/flow_dissector.h>
1da177e4
LT
42#include <linux/errno.h>
43#include <linux/timer.h>
7c0f6ba6 44#include <linux/uaccess.h>
40daafc8 45#include <asm/unaligned.h>
1da177e4 46#include <linux/filter.h>
86e4ca66 47#include <linux/ratelimit.h>
46b325c7 48#include <linux/seccomp.h>
f3335031 49#include <linux/if_vlan.h>
89aa0758 50#include <linux/bpf.h>
af7ec138 51#include <linux/btf.h>
d691f9e8 52#include <net/sch_generic.h>
8d20aabe 53#include <net/cls_cgroup.h>
d3aa45ce 54#include <net/dst_metadata.h>
c46646d0 55#include <net/dst.h>
538950a1 56#include <net/sock_reuseport.h>
b1d9fc41 57#include <net/busy_poll.h>
8c4b4c7e 58#include <net/tcp.h>
12bed760 59#include <net/xfrm.h>
6acc9b43 60#include <net/udp.h>
5acaee0a 61#include <linux/bpf_trace.h>
02671e23 62#include <net/xdp_sock.h>
87f5fc7e 63#include <linux/inetdevice.h>
6acc9b43
JS
64#include <net/inet_hashtables.h>
65#include <net/inet6_hashtables.h>
87f5fc7e 66#include <net/ip_fib.h>
5481d73f 67#include <net/nexthop.h>
87f5fc7e
DA
68#include <net/flow.h>
69#include <net/arp.h>
fe94cc29 70#include <net/ipv6.h>
6acc9b43 71#include <net/net_namespace.h>
fe94cc29
MX
72#include <linux/seg6_local.h>
73#include <net/seg6.h>
74#include <net/seg6_local.h>
52f27877 75#include <net/lwtunnel.h>
3616d08b 76#include <net/ipv6_stubs.h>
6ac99e8f 77#include <net/bpf_sk_storage.h>
478cfbdf 78#include <net/transp_v6.h>
c9a0f3b8 79#include <linux/btf_ids.h>
18ebe16d 80#include <net/tls.h>
7445cf31 81#include <net/xdp.h>
3bc253c2 82#include <net/mptcp.h>
578ce69f 83#include <net/netfilter/nf_conntrack_bpf.h>
1da177e4 84
1df8f55a
MKL
85static const struct bpf_func_proto *
86bpf_sk_base_func_proto(enum bpf_func_id func_id);
87
b1ea9ff6 88int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
4d295e54
CH
89{
90 if (in_compat_syscall()) {
91 struct compat_sock_fprog f32;
92
93 if (len != sizeof(f32))
94 return -EINVAL;
b1ea9ff6 95 if (copy_from_sockptr(&f32, src, sizeof(f32)))
4d295e54
CH
96 return -EFAULT;
97 memset(dst, 0, sizeof(*dst));
98 dst->len = f32.len;
99 dst->filter = compat_ptr(f32.filter);
100 } else {
101 if (len != sizeof(*dst))
102 return -EINVAL;
b1ea9ff6 103 if (copy_from_sockptr(dst, src, sizeof(*dst)))
4d295e54
CH
104 return -EFAULT;
105 }
106
107 return 0;
108}
109EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
110
43db6d65 111/**
f4979fce 112 * sk_filter_trim_cap - run a packet through a socket filter
43db6d65
SH
113 * @sk: sock associated with &sk_buff
114 * @skb: buffer to filter
f4979fce 115 * @cap: limit on how short the eBPF program may trim the packet
43db6d65 116 *
ff936a04
AS
117 * Run the eBPF program and then cut skb->data to correct size returned by
118 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
43db6d65 119 * than pkt_len we keep whole skb->data. This is the socket level
fb7dd8bc 120 * wrapper to bpf_prog_run. It returns 0 if the packet should
43db6d65
SH
121 * be accepted or -EPERM if the packet should be tossed.
122 *
123 */
f4979fce 124int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
43db6d65
SH
125{
126 int err;
127 struct sk_filter *filter;
128
c93bdd0e
MG
129 /*
130 * If the skb was allocated from pfmemalloc reserves, only
131 * allow SOCK_MEMALLOC sockets to use it as this socket is
132 * helping free memory
133 */
8fe809a9
ED
134 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
135 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
c93bdd0e 136 return -ENOMEM;
8fe809a9 137 }
c11cd3a6
DM
138 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
139 if (err)
140 return err;
141
43db6d65
SH
142 err = security_sock_rcv_skb(sk, skb);
143 if (err)
144 return err;
145
80f8f102
ED
146 rcu_read_lock();
147 filter = rcu_dereference(sk->sk_filter);
43db6d65 148 if (filter) {
8f917bba
WB
149 struct sock *save_sk = skb->sk;
150 unsigned int pkt_len;
151
152 skb->sk = sk;
153 pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
8f917bba 154 skb->sk = save_sk;
d1f496fd 155 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
43db6d65 156 }
80f8f102 157 rcu_read_unlock();
43db6d65
SH
158
159 return err;
160}
f4979fce 161EXPORT_SYMBOL(sk_filter_trim_cap);
43db6d65 162
b390134c 163BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
bd4cf0ed 164{
f3694e00 165 return skb_get_poff(skb);
bd4cf0ed
AS
166}
167
b390134c 168BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
bd4cf0ed 169{
bd4cf0ed
AS
170 struct nlattr *nla;
171
172 if (skb_is_nonlinear(skb))
173 return 0;
174
05ab8f26
MK
175 if (skb->len < sizeof(struct nlattr))
176 return 0;
177
30743837 178 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
179 return 0;
180
30743837 181 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
bd4cf0ed
AS
182 if (nla)
183 return (void *) nla - (void *) skb->data;
184
185 return 0;
186}
187
b390134c 188BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
bd4cf0ed 189{
bd4cf0ed
AS
190 struct nlattr *nla;
191
192 if (skb_is_nonlinear(skb))
193 return 0;
194
05ab8f26
MK
195 if (skb->len < sizeof(struct nlattr))
196 return 0;
197
30743837 198 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
199 return 0;
200
30743837
DB
201 nla = (struct nlattr *) &skb->data[a];
202 if (nla->nla_len > skb->len - a)
bd4cf0ed
AS
203 return 0;
204
30743837 205 nla = nla_find_nested(nla, x);
bd4cf0ed
AS
206 if (nla)
207 return (void *) nla - (void *) skb->data;
208
209 return 0;
210}
211
e0cea7ce
DB
212BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
213 data, int, headlen, int, offset)
214{
215 u8 tmp, *ptr;
216 const int len = sizeof(tmp);
217
218 if (offset >= 0) {
219 if (headlen - offset >= len)
220 return *(u8 *)(data + offset);
221 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
222 return tmp;
223 } else {
224 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
225 if (likely(ptr))
226 return *(u8 *)ptr;
227 }
228
229 return -EFAULT;
230}
231
232BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
233 int, offset)
234{
235 return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
236 offset);
237}
238
239BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
240 data, int, headlen, int, offset)
241{
96a233e6 242 __be16 tmp, *ptr;
e0cea7ce
DB
243 const int len = sizeof(tmp);
244
245 if (offset >= 0) {
246 if (headlen - offset >= len)
247 return get_unaligned_be16(data + offset);
248 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
249 return be16_to_cpu(tmp);
250 } else {
251 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
252 if (likely(ptr))
253 return get_unaligned_be16(ptr);
254 }
255
256 return -EFAULT;
257}
258
259BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
260 int, offset)
261{
262 return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
263 offset);
264}
265
266BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
267 data, int, headlen, int, offset)
268{
96a233e6 269 __be32 tmp, *ptr;
e0cea7ce
DB
270 const int len = sizeof(tmp);
271
272 if (likely(offset >= 0)) {
273 if (headlen - offset >= len)
274 return get_unaligned_be32(data + offset);
275 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
276 return be32_to_cpu(tmp);
277 } else {
278 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
279 if (likely(ptr))
280 return get_unaligned_be32(ptr);
281 }
282
283 return -EFAULT;
284}
285
286BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
287 int, offset)
288{
289 return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
290 offset);
291}
292
9bac3d6d
AS
293static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
294 struct bpf_insn *insn_buf)
295{
296 struct bpf_insn *insn = insn_buf;
297
298 switch (skb_field) {
299 case SKF_AD_MARK:
c593642c 300 BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
9bac3d6d
AS
301
302 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
303 offsetof(struct sk_buff, mark));
304 break;
305
306 case SKF_AD_PKTTYPE:
fba84957 307 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET);
9bac3d6d
AS
308 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
309#ifdef __BIG_ENDIAN_BITFIELD
310 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
311#endif
312 break;
313
314 case SKF_AD_QUEUE:
c593642c 315 BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2);
9bac3d6d
AS
316
317 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
318 offsetof(struct sk_buff, queue_mapping));
319 break;
c2497395 320
c2497395 321 case SKF_AD_VLAN_TAG:
c593642c 322 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
c2497395
AS
323
324 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
325 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
326 offsetof(struct sk_buff, vlan_tci));
9c212255
MM
327 break;
328 case SKF_AD_VLAN_TAG_PRESENT:
354259fa
ED
329 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_all) != 4);
330 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
331 offsetof(struct sk_buff, vlan_all));
332 *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
333 *insn++ = BPF_ALU32_IMM(BPF_MOV, dst_reg, 1);
c2497395 334 break;
9bac3d6d
AS
335 }
336
337 return insn - insn_buf;
338}
339
bd4cf0ed 340static bool convert_bpf_extensions(struct sock_filter *fp,
2695fb55 341 struct bpf_insn **insnp)
bd4cf0ed 342{
2695fb55 343 struct bpf_insn *insn = *insnp;
9bac3d6d 344 u32 cnt;
bd4cf0ed
AS
345
346 switch (fp->k) {
347 case SKF_AD_OFF + SKF_AD_PROTOCOL:
c593642c 348 BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2);
0b8c707d
DB
349
350 /* A = *(u16 *) (CTX + offsetof(protocol)) */
351 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
352 offsetof(struct sk_buff, protocol));
353 /* A = ntohs(A) [emitting a nop or swap16] */
354 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
bd4cf0ed
AS
355 break;
356
357 case SKF_AD_OFF + SKF_AD_PKTTYPE:
9bac3d6d
AS
358 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
359 insn += cnt - 1;
bd4cf0ed
AS
360 break;
361
362 case SKF_AD_OFF + SKF_AD_IFINDEX:
363 case SKF_AD_OFF + SKF_AD_HATYPE:
c593642c
PB
364 BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
365 BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);
f8f6d679 366
f035a515 367 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
f8f6d679
DB
368 BPF_REG_TMP, BPF_REG_CTX,
369 offsetof(struct sk_buff, dev));
370 /* if (tmp != 0) goto pc + 1 */
371 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
372 *insn++ = BPF_EXIT_INSN();
373 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
374 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
375 offsetof(struct net_device, ifindex));
376 else
377 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
378 offsetof(struct net_device, type));
bd4cf0ed
AS
379 break;
380
381 case SKF_AD_OFF + SKF_AD_MARK:
9bac3d6d
AS
382 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
383 insn += cnt - 1;
bd4cf0ed
AS
384 break;
385
386 case SKF_AD_OFF + SKF_AD_RXHASH:
c593642c 387 BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
bd4cf0ed 388
9739eef1
AS
389 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
390 offsetof(struct sk_buff, hash));
bd4cf0ed
AS
391 break;
392
393 case SKF_AD_OFF + SKF_AD_QUEUE:
9bac3d6d
AS
394 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
395 insn += cnt - 1;
bd4cf0ed
AS
396 break;
397
398 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
c2497395
AS
399 cnt = convert_skb_access(SKF_AD_VLAN_TAG,
400 BPF_REG_A, BPF_REG_CTX, insn);
401 insn += cnt - 1;
402 break;
bd4cf0ed 403
c2497395
AS
404 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
405 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
406 BPF_REG_A, BPF_REG_CTX, insn);
407 insn += cnt - 1;
bd4cf0ed
AS
408 break;
409
27cd5452 410 case SKF_AD_OFF + SKF_AD_VLAN_TPID:
c593642c 411 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2);
27cd5452
MS
412
413 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
414 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
415 offsetof(struct sk_buff, vlan_proto));
416 /* A = ntohs(A) [emitting a nop or swap16] */
417 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
418 break;
419
bd4cf0ed
AS
420 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
421 case SKF_AD_OFF + SKF_AD_NLATTR:
422 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
423 case SKF_AD_OFF + SKF_AD_CPU:
4cd3675e 424 case SKF_AD_OFF + SKF_AD_RANDOM:
e430f34e 425 /* arg1 = CTX */
f8f6d679 426 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
bd4cf0ed 427 /* arg2 = A */
f8f6d679 428 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
bd4cf0ed 429 /* arg3 = X */
f8f6d679 430 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
e430f34e 431 /* Emit call(arg1=CTX, arg2=A, arg3=X) */
bd4cf0ed
AS
432 switch (fp->k) {
433 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
b390134c 434 *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
bd4cf0ed
AS
435 break;
436 case SKF_AD_OFF + SKF_AD_NLATTR:
b390134c 437 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
bd4cf0ed
AS
438 break;
439 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
b390134c 440 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
bd4cf0ed
AS
441 break;
442 case SKF_AD_OFF + SKF_AD_CPU:
b390134c 443 *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
bd4cf0ed 444 break;
4cd3675e 445 case SKF_AD_OFF + SKF_AD_RANDOM:
3ad00405
DB
446 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
447 bpf_user_rnd_init_once();
4cd3675e 448 break;
bd4cf0ed
AS
449 }
450 break;
451
452 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
9739eef1
AS
453 /* A ^= X */
454 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
455 break;
456
457 default:
458 /* This is just a dummy call to avoid letting the compiler
459 * evict __bpf_call_base() as an optimization. Placed here
460 * where no-one bothers.
461 */
462 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
463 return false;
464 }
465
466 *insnp = insn;
467 return true;
468}
469
e0cea7ce
DB
470static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
471{
472 const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
473 int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
474 bool endian = BPF_SIZE(fp->code) == BPF_H ||
475 BPF_SIZE(fp->code) == BPF_W;
476 bool indirect = BPF_MODE(fp->code) == BPF_IND;
477 const int ip_align = NET_IP_ALIGN;
478 struct bpf_insn *insn = *insnp;
479 int offset = fp->k;
480
481 if (!indirect &&
482 ((unaligned_ok && offset >= 0) ||
483 (!unaligned_ok && offset >= 0 &&
484 offset + ip_align >= 0 &&
485 offset + ip_align % size == 0))) {
59ee4129
DB
486 bool ldx_off_ok = offset <= S16_MAX;
487
e0cea7ce 488 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
d8f3e978
DM
489 if (offset)
490 *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
59ee4129
DB
491 *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
492 size, 2 + endian + (!ldx_off_ok * 2));
493 if (ldx_off_ok) {
494 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
495 BPF_REG_D, offset);
496 } else {
497 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
498 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
499 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
500 BPF_REG_TMP, 0);
501 }
e0cea7ce
DB
502 if (endian)
503 *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
504 *insn++ = BPF_JMP_A(8);
505 }
506
507 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
508 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
509 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
510 if (!indirect) {
511 *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
512 } else {
513 *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
514 if (fp->k)
515 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
516 }
517
518 switch (BPF_SIZE(fp->code)) {
519 case BPF_B:
520 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
521 break;
522 case BPF_H:
523 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
524 break;
525 case BPF_W:
526 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
527 break;
528 default:
529 return false;
530 }
531
532 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
533 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
534 *insn = BPF_EXIT_INSN();
535
536 *insnp = insn;
537 return true;
538}
539
bd4cf0ed 540/**
8fb575ca 541 * bpf_convert_filter - convert filter program
bd4cf0ed
AS
542 * @prog: the user passed filter program
543 * @len: the length of the user passed filter program
50bbfed9 544 * @new_prog: allocated 'struct bpf_prog' or NULL
bd4cf0ed 545 * @new_len: pointer to store length of converted program
e0cea7ce 546 * @seen_ld_abs: bool whether we've seen ld_abs/ind
bd4cf0ed 547 *
1f504ec9
TK
548 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
549 * style extended BPF (eBPF).
bd4cf0ed
AS
550 * Conversion workflow:
551 *
552 * 1) First pass for calculating the new program length:
e0cea7ce 553 * bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
bd4cf0ed
AS
554 *
555 * 2) 2nd pass to remap in two passes: 1st pass finds new
556 * jump offsets, 2nd pass remapping:
e0cea7ce 557 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
bd4cf0ed 558 */
d9e12f42 559static int bpf_convert_filter(struct sock_filter *prog, int len,
e0cea7ce
DB
560 struct bpf_prog *new_prog, int *new_len,
561 bool *seen_ld_abs)
bd4cf0ed 562{
50bbfed9
AS
563 int new_flen = 0, pass = 0, target, i, stack_off;
564 struct bpf_insn *new_insn, *first_insn = NULL;
bd4cf0ed
AS
565 struct sock_filter *fp;
566 int *addrs = NULL;
567 u8 bpf_src;
568
569 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
30743837 570 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
bd4cf0ed 571
6f9a093b 572 if (len <= 0 || len > BPF_MAXINSNS)
bd4cf0ed
AS
573 return -EINVAL;
574
575 if (new_prog) {
50bbfed9 576 first_insn = new_prog->insnsi;
658da937
DB
577 addrs = kcalloc(len, sizeof(*addrs),
578 GFP_KERNEL | __GFP_NOWARN);
bd4cf0ed
AS
579 if (!addrs)
580 return -ENOMEM;
581 }
582
583do_pass:
50bbfed9 584 new_insn = first_insn;
bd4cf0ed
AS
585 fp = prog;
586
8b614aeb 587 /* Classic BPF related prologue emission. */
50bbfed9 588 if (new_prog) {
8b614aeb
DB
589 /* Classic BPF expects A and X to be reset first. These need
590 * to be guaranteed to be the first two instructions.
591 */
1d621674
DB
592 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
593 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
8b614aeb
DB
594
595 /* All programs must keep CTX in callee saved BPF_REG_CTX.
596 * In eBPF case it's done by the compiler, here we need to
597 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
598 */
599 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
e0cea7ce
DB
600 if (*seen_ld_abs) {
601 /* For packet access in classic BPF, cache skb->data
602 * in callee-saved BPF R8 and skb->len - skb->data_len
603 * (headlen) in BPF R9. Since classic BPF is read-only
604 * on CTX, we only need to cache it once.
605 */
606 *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
607 BPF_REG_D, BPF_REG_CTX,
608 offsetof(struct sk_buff, data));
609 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
610 offsetof(struct sk_buff, len));
611 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
612 offsetof(struct sk_buff, data_len));
613 *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
614 }
8b614aeb
DB
615 } else {
616 new_insn += 3;
617 }
bd4cf0ed
AS
618
619 for (i = 0; i < len; fp++, i++) {
e0cea7ce 620 struct bpf_insn tmp_insns[32] = { };
2695fb55 621 struct bpf_insn *insn = tmp_insns;
bd4cf0ed
AS
622
623 if (addrs)
50bbfed9 624 addrs[i] = new_insn - first_insn;
bd4cf0ed
AS
625
626 switch (fp->code) {
627 /* All arithmetic insns and skb loads map as-is. */
628 case BPF_ALU | BPF_ADD | BPF_X:
629 case BPF_ALU | BPF_ADD | BPF_K:
630 case BPF_ALU | BPF_SUB | BPF_X:
631 case BPF_ALU | BPF_SUB | BPF_K:
632 case BPF_ALU | BPF_AND | BPF_X:
633 case BPF_ALU | BPF_AND | BPF_K:
634 case BPF_ALU | BPF_OR | BPF_X:
635 case BPF_ALU | BPF_OR | BPF_K:
636 case BPF_ALU | BPF_LSH | BPF_X:
637 case BPF_ALU | BPF_LSH | BPF_K:
638 case BPF_ALU | BPF_RSH | BPF_X:
639 case BPF_ALU | BPF_RSH | BPF_K:
640 case BPF_ALU | BPF_XOR | BPF_X:
641 case BPF_ALU | BPF_XOR | BPF_K:
642 case BPF_ALU | BPF_MUL | BPF_X:
643 case BPF_ALU | BPF_MUL | BPF_K:
644 case BPF_ALU | BPF_DIV | BPF_X:
645 case BPF_ALU | BPF_DIV | BPF_K:
646 case BPF_ALU | BPF_MOD | BPF_X:
647 case BPF_ALU | BPF_MOD | BPF_K:
648 case BPF_ALU | BPF_NEG:
649 case BPF_LD | BPF_ABS | BPF_W:
650 case BPF_LD | BPF_ABS | BPF_H:
651 case BPF_LD | BPF_ABS | BPF_B:
652 case BPF_LD | BPF_IND | BPF_W:
653 case BPF_LD | BPF_IND | BPF_H:
654 case BPF_LD | BPF_IND | BPF_B:
655 /* Check for overloaded BPF extension and
656 * directly convert it if found, otherwise
657 * just move on with mapping.
658 */
659 if (BPF_CLASS(fp->code) == BPF_LD &&
660 BPF_MODE(fp->code) == BPF_ABS &&
661 convert_bpf_extensions(fp, &insn))
662 break;
e0cea7ce
DB
663 if (BPF_CLASS(fp->code) == BPF_LD &&
664 convert_bpf_ld_abs(fp, &insn)) {
665 *seen_ld_abs = true;
666 break;
667 }
bd4cf0ed 668
68fda450 669 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
f6b1b3bf 670 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
68fda450 671 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
f6b1b3bf
DB
672 /* Error with exception code on div/mod by 0.
673 * For cBPF programs, this was always return 0.
674 */
675 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
676 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
677 *insn++ = BPF_EXIT_INSN();
678 }
68fda450 679
f8f6d679 680 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
bd4cf0ed
AS
681 break;
682
f8f6d679
DB
683 /* Jump transformation cannot use BPF block macros
684 * everywhere as offset calculation and target updates
685 * require a bit more work than the rest, i.e. jump
686 * opcodes map as-is, but offsets need adjustment.
687 */
688
689#define BPF_EMIT_JMP \
bd4cf0ed 690 do { \
050fad7c
DB
691 const s32 off_min = S16_MIN, off_max = S16_MAX; \
692 s32 off; \
693 \
bd4cf0ed
AS
694 if (target >= len || target < 0) \
695 goto err; \
050fad7c 696 off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
bd4cf0ed 697 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
050fad7c
DB
698 off -= insn - tmp_insns; \
699 /* Reject anything not fitting into insn->off. */ \
700 if (off < off_min || off > off_max) \
701 goto err; \
702 insn->off = off; \
bd4cf0ed
AS
703 } while (0)
704
f8f6d679
DB
705 case BPF_JMP | BPF_JA:
706 target = i + fp->k + 1;
707 insn->code = fp->code;
708 BPF_EMIT_JMP;
bd4cf0ed
AS
709 break;
710
711 case BPF_JMP | BPF_JEQ | BPF_K:
712 case BPF_JMP | BPF_JEQ | BPF_X:
713 case BPF_JMP | BPF_JSET | BPF_K:
714 case BPF_JMP | BPF_JSET | BPF_X:
715 case BPF_JMP | BPF_JGT | BPF_K:
716 case BPF_JMP | BPF_JGT | BPF_X:
717 case BPF_JMP | BPF_JGE | BPF_K:
718 case BPF_JMP | BPF_JGE | BPF_X:
719 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
720 /* BPF immediates are signed, zero extend
721 * immediate into tmp register and use it
722 * in compare insn.
723 */
f8f6d679 724 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
bd4cf0ed 725
e430f34e
AS
726 insn->dst_reg = BPF_REG_A;
727 insn->src_reg = BPF_REG_TMP;
bd4cf0ed
AS
728 bpf_src = BPF_X;
729 } else {
e430f34e 730 insn->dst_reg = BPF_REG_A;
bd4cf0ed
AS
731 insn->imm = fp->k;
732 bpf_src = BPF_SRC(fp->code);
19539ce7 733 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
1da177e4 734 }
bd4cf0ed
AS
735
736 /* Common case where 'jump_false' is next insn. */
737 if (fp->jf == 0) {
738 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
739 target = i + fp->jt + 1;
f8f6d679 740 BPF_EMIT_JMP;
bd4cf0ed 741 break;
1da177e4 742 }
bd4cf0ed 743
92b31a9a
DB
744 /* Convert some jumps when 'jump_true' is next insn. */
745 if (fp->jt == 0) {
746 switch (BPF_OP(fp->code)) {
747 case BPF_JEQ:
748 insn->code = BPF_JMP | BPF_JNE | bpf_src;
749 break;
750 case BPF_JGT:
751 insn->code = BPF_JMP | BPF_JLE | bpf_src;
752 break;
753 case BPF_JGE:
754 insn->code = BPF_JMP | BPF_JLT | bpf_src;
755 break;
756 default:
757 goto jmp_rest;
758 }
759
bd4cf0ed 760 target = i + fp->jf + 1;
f8f6d679 761 BPF_EMIT_JMP;
bd4cf0ed 762 break;
0b05b2a4 763 }
92b31a9a 764jmp_rest:
bd4cf0ed
AS
765 /* Other jumps are mapped into two insns: Jxx and JA. */
766 target = i + fp->jt + 1;
767 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
f8f6d679 768 BPF_EMIT_JMP;
bd4cf0ed
AS
769 insn++;
770
771 insn->code = BPF_JMP | BPF_JA;
772 target = i + fp->jf + 1;
f8f6d679 773 BPF_EMIT_JMP;
bd4cf0ed
AS
774 break;
775
776 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
e0cea7ce
DB
777 case BPF_LDX | BPF_MSH | BPF_B: {
778 struct sock_filter tmp = {
779 .code = BPF_LD | BPF_ABS | BPF_B,
780 .k = fp->k,
781 };
782
783 *seen_ld_abs = true;
784
785 /* X = A */
786 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
1268e253 787 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
e0cea7ce
DB
788 convert_bpf_ld_abs(&tmp, &insn);
789 insn++;
9739eef1 790 /* A &= 0xf */
f8f6d679 791 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
9739eef1 792 /* A <<= 2 */
f8f6d679 793 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
e0cea7ce
DB
794 /* tmp = X */
795 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
9739eef1 796 /* X = A */
f8f6d679 797 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
9739eef1 798 /* A = tmp */
f8f6d679 799 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
bd4cf0ed 800 break;
e0cea7ce 801 }
6205b9cf
DB
802 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
803 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
804 */
bd4cf0ed
AS
805 case BPF_RET | BPF_A:
806 case BPF_RET | BPF_K:
6205b9cf
DB
807 if (BPF_RVAL(fp->code) == BPF_K)
808 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
809 0, fp->k);
9739eef1 810 *insn = BPF_EXIT_INSN();
bd4cf0ed
AS
811 break;
812
813 /* Store to stack. */
814 case BPF_ST:
815 case BPF_STX:
50bbfed9 816 stack_off = fp->k * 4 + 4;
f8f6d679
DB
817 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
818 BPF_ST ? BPF_REG_A : BPF_REG_X,
50bbfed9
AS
819 -stack_off);
820 /* check_load_and_stores() verifies that classic BPF can
821 * load from stack only after write, so tracking
822 * stack_depth for ST|STX insns is enough
823 */
824 if (new_prog && new_prog->aux->stack_depth < stack_off)
825 new_prog->aux->stack_depth = stack_off;
bd4cf0ed
AS
826 break;
827
828 /* Load from stack. */
829 case BPF_LD | BPF_MEM:
830 case BPF_LDX | BPF_MEM:
50bbfed9 831 stack_off = fp->k * 4 + 4;
f8f6d679
DB
832 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
833 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
50bbfed9 834 -stack_off);
bd4cf0ed
AS
835 break;
836
837 /* A = K or X = K */
838 case BPF_LD | BPF_IMM:
839 case BPF_LDX | BPF_IMM:
f8f6d679
DB
840 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
841 BPF_REG_A : BPF_REG_X, fp->k);
bd4cf0ed
AS
842 break;
843
844 /* X = A */
845 case BPF_MISC | BPF_TAX:
f8f6d679 846 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
bd4cf0ed
AS
847 break;
848
849 /* A = X */
850 case BPF_MISC | BPF_TXA:
f8f6d679 851 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
852 break;
853
854 /* A = skb->len or X = skb->len */
855 case BPF_LD | BPF_W | BPF_LEN:
856 case BPF_LDX | BPF_W | BPF_LEN:
f8f6d679
DB
857 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
858 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
859 offsetof(struct sk_buff, len));
bd4cf0ed
AS
860 break;
861
f8f6d679 862 /* Access seccomp_data fields. */
bd4cf0ed 863 case BPF_LDX | BPF_ABS | BPF_W:
9739eef1
AS
864 /* A = *(u32 *) (ctx + K) */
865 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
bd4cf0ed
AS
866 break;
867
ca9f1fd2 868 /* Unknown instruction. */
1da177e4 869 default:
bd4cf0ed 870 goto err;
1da177e4 871 }
bd4cf0ed
AS
872
873 insn++;
874 if (new_prog)
875 memcpy(new_insn, tmp_insns,
876 sizeof(*insn) * (insn - tmp_insns));
bd4cf0ed 877 new_insn += insn - tmp_insns;
1da177e4
LT
878 }
879
bd4cf0ed
AS
880 if (!new_prog) {
881 /* Only calculating new length. */
50bbfed9 882 *new_len = new_insn - first_insn;
e0cea7ce
DB
883 if (*seen_ld_abs)
884 *new_len += 4; /* Prologue bits. */
bd4cf0ed
AS
885 return 0;
886 }
887
888 pass++;
50bbfed9
AS
889 if (new_flen != new_insn - first_insn) {
890 new_flen = new_insn - first_insn;
bd4cf0ed
AS
891 if (pass > 2)
892 goto err;
bd4cf0ed
AS
893 goto do_pass;
894 }
895
896 kfree(addrs);
897 BUG_ON(*new_len != new_flen);
1da177e4 898 return 0;
bd4cf0ed
AS
899err:
900 kfree(addrs);
901 return -EINVAL;
1da177e4
LT
902}
903
bd4cf0ed 904/* Security:
bd4cf0ed 905 *
2d5311e4 906 * As we dont want to clear mem[] array for each packet going through
8ea6e345 907 * __bpf_prog_run(), we check that filter loaded by user never try to read
2d5311e4 908 * a cell if not previously written, and we check all branches to be sure
25985edc 909 * a malicious user doesn't try to abuse us.
2d5311e4 910 */
ec31a05c 911static int check_load_and_stores(const struct sock_filter *filter, int flen)
2d5311e4 912{
34805931 913 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
2d5311e4
ED
914 int pc, ret = 0;
915
916 BUILD_BUG_ON(BPF_MEMWORDS > 16);
34805931 917
99e72a0f 918 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
2d5311e4
ED
919 if (!masks)
920 return -ENOMEM;
34805931 921
2d5311e4
ED
922 memset(masks, 0xff, flen * sizeof(*masks));
923
924 for (pc = 0; pc < flen; pc++) {
925 memvalid &= masks[pc];
926
927 switch (filter[pc].code) {
34805931
DB
928 case BPF_ST:
929 case BPF_STX:
2d5311e4
ED
930 memvalid |= (1 << filter[pc].k);
931 break;
34805931
DB
932 case BPF_LD | BPF_MEM:
933 case BPF_LDX | BPF_MEM:
2d5311e4
ED
934 if (!(memvalid & (1 << filter[pc].k))) {
935 ret = -EINVAL;
936 goto error;
937 }
938 break;
34805931
DB
939 case BPF_JMP | BPF_JA:
940 /* A jump must set masks on target */
2d5311e4
ED
941 masks[pc + 1 + filter[pc].k] &= memvalid;
942 memvalid = ~0;
943 break;
34805931
DB
944 case BPF_JMP | BPF_JEQ | BPF_K:
945 case BPF_JMP | BPF_JEQ | BPF_X:
946 case BPF_JMP | BPF_JGE | BPF_K:
947 case BPF_JMP | BPF_JGE | BPF_X:
948 case BPF_JMP | BPF_JGT | BPF_K:
949 case BPF_JMP | BPF_JGT | BPF_X:
950 case BPF_JMP | BPF_JSET | BPF_K:
951 case BPF_JMP | BPF_JSET | BPF_X:
952 /* A jump must set masks on targets */
2d5311e4
ED
953 masks[pc + 1 + filter[pc].jt] &= memvalid;
954 masks[pc + 1 + filter[pc].jf] &= memvalid;
955 memvalid = ~0;
956 break;
957 }
958 }
959error:
960 kfree(masks);
961 return ret;
962}
963
34805931
DB
964static bool chk_code_allowed(u16 code_to_probe)
965{
966 static const bool codes[] = {
967 /* 32 bit ALU operations */
968 [BPF_ALU | BPF_ADD | BPF_K] = true,
969 [BPF_ALU | BPF_ADD | BPF_X] = true,
970 [BPF_ALU | BPF_SUB | BPF_K] = true,
971 [BPF_ALU | BPF_SUB | BPF_X] = true,
972 [BPF_ALU | BPF_MUL | BPF_K] = true,
973 [BPF_ALU | BPF_MUL | BPF_X] = true,
974 [BPF_ALU | BPF_DIV | BPF_K] = true,
975 [BPF_ALU | BPF_DIV | BPF_X] = true,
976 [BPF_ALU | BPF_MOD | BPF_K] = true,
977 [BPF_ALU | BPF_MOD | BPF_X] = true,
978 [BPF_ALU | BPF_AND | BPF_K] = true,
979 [BPF_ALU | BPF_AND | BPF_X] = true,
980 [BPF_ALU | BPF_OR | BPF_K] = true,
981 [BPF_ALU | BPF_OR | BPF_X] = true,
982 [BPF_ALU | BPF_XOR | BPF_K] = true,
983 [BPF_ALU | BPF_XOR | BPF_X] = true,
984 [BPF_ALU | BPF_LSH | BPF_K] = true,
985 [BPF_ALU | BPF_LSH | BPF_X] = true,
986 [BPF_ALU | BPF_RSH | BPF_K] = true,
987 [BPF_ALU | BPF_RSH | BPF_X] = true,
988 [BPF_ALU | BPF_NEG] = true,
989 /* Load instructions */
990 [BPF_LD | BPF_W | BPF_ABS] = true,
991 [BPF_LD | BPF_H | BPF_ABS] = true,
992 [BPF_LD | BPF_B | BPF_ABS] = true,
993 [BPF_LD | BPF_W | BPF_LEN] = true,
994 [BPF_LD | BPF_W | BPF_IND] = true,
995 [BPF_LD | BPF_H | BPF_IND] = true,
996 [BPF_LD | BPF_B | BPF_IND] = true,
997 [BPF_LD | BPF_IMM] = true,
998 [BPF_LD | BPF_MEM] = true,
999 [BPF_LDX | BPF_W | BPF_LEN] = true,
1000 [BPF_LDX | BPF_B | BPF_MSH] = true,
1001 [BPF_LDX | BPF_IMM] = true,
1002 [BPF_LDX | BPF_MEM] = true,
1003 /* Store instructions */
1004 [BPF_ST] = true,
1005 [BPF_STX] = true,
1006 /* Misc instructions */
1007 [BPF_MISC | BPF_TAX] = true,
1008 [BPF_MISC | BPF_TXA] = true,
1009 /* Return instructions */
1010 [BPF_RET | BPF_K] = true,
1011 [BPF_RET | BPF_A] = true,
1012 /* Jump instructions */
1013 [BPF_JMP | BPF_JA] = true,
1014 [BPF_JMP | BPF_JEQ | BPF_K] = true,
1015 [BPF_JMP | BPF_JEQ | BPF_X] = true,
1016 [BPF_JMP | BPF_JGE | BPF_K] = true,
1017 [BPF_JMP | BPF_JGE | BPF_X] = true,
1018 [BPF_JMP | BPF_JGT | BPF_K] = true,
1019 [BPF_JMP | BPF_JGT | BPF_X] = true,
1020 [BPF_JMP | BPF_JSET | BPF_K] = true,
1021 [BPF_JMP | BPF_JSET | BPF_X] = true,
1022 };
1023
1024 if (code_to_probe >= ARRAY_SIZE(codes))
1025 return false;
1026
1027 return codes[code_to_probe];
1028}
1029
f7bd9e36
DB
1030static bool bpf_check_basics_ok(const struct sock_filter *filter,
1031 unsigned int flen)
1032{
1033 if (filter == NULL)
1034 return false;
1035 if (flen == 0 || flen > BPF_MAXINSNS)
1036 return false;
1037
1038 return true;
1039}
1040
1da177e4 1041/**
4df95ff4 1042 * bpf_check_classic - verify socket filter code
1da177e4
LT
1043 * @filter: filter to verify
1044 * @flen: length of filter
1045 *
1046 * Check the user's filter code. If we let some ugly
1047 * filter code slip through kaboom! The filter must contain
93699863
KK
1048 * no references or jumps that are out of range, no illegal
1049 * instructions, and must end with a RET instruction.
1da177e4 1050 *
7b11f69f
KK
1051 * All jumps are forward as they are not signed.
1052 *
1053 * Returns 0 if the rule set is legal or -EINVAL if not.
1da177e4 1054 */
d9e12f42
NS
1055static int bpf_check_classic(const struct sock_filter *filter,
1056 unsigned int flen)
1da177e4 1057{
aa1113d9 1058 bool anc_found;
34805931 1059 int pc;
1da177e4 1060
34805931 1061 /* Check the filter code now */
1da177e4 1062 for (pc = 0; pc < flen; pc++) {
ec31a05c 1063 const struct sock_filter *ftest = &filter[pc];
93699863 1064
34805931
DB
1065 /* May we actually operate on this code? */
1066 if (!chk_code_allowed(ftest->code))
cba328fc 1067 return -EINVAL;
34805931 1068
93699863 1069 /* Some instructions need special checks */
34805931
DB
1070 switch (ftest->code) {
1071 case BPF_ALU | BPF_DIV | BPF_K:
1072 case BPF_ALU | BPF_MOD | BPF_K:
1073 /* Check for division by zero */
b6069a95
ED
1074 if (ftest->k == 0)
1075 return -EINVAL;
1076 break;
229394e8
RV
1077 case BPF_ALU | BPF_LSH | BPF_K:
1078 case BPF_ALU | BPF_RSH | BPF_K:
1079 if (ftest->k >= 32)
1080 return -EINVAL;
1081 break;
34805931
DB
1082 case BPF_LD | BPF_MEM:
1083 case BPF_LDX | BPF_MEM:
1084 case BPF_ST:
1085 case BPF_STX:
1086 /* Check for invalid memory addresses */
93699863
KK
1087 if (ftest->k >= BPF_MEMWORDS)
1088 return -EINVAL;
1089 break;
34805931
DB
1090 case BPF_JMP | BPF_JA:
1091 /* Note, the large ftest->k might cause loops.
93699863
KK
1092 * Compare this with conditional jumps below,
1093 * where offsets are limited. --ANK (981016)
1094 */
34805931 1095 if (ftest->k >= (unsigned int)(flen - pc - 1))
93699863 1096 return -EINVAL;
01f2f3f6 1097 break;
34805931
DB
1098 case BPF_JMP | BPF_JEQ | BPF_K:
1099 case BPF_JMP | BPF_JEQ | BPF_X:
1100 case BPF_JMP | BPF_JGE | BPF_K:
1101 case BPF_JMP | BPF_JGE | BPF_X:
1102 case BPF_JMP | BPF_JGT | BPF_K:
1103 case BPF_JMP | BPF_JGT | BPF_X:
1104 case BPF_JMP | BPF_JSET | BPF_K:
1105 case BPF_JMP | BPF_JSET | BPF_X:
1106 /* Both conditionals must be safe */
e35bedf3 1107 if (pc + ftest->jt + 1 >= flen ||
93699863
KK
1108 pc + ftest->jf + 1 >= flen)
1109 return -EINVAL;
cba328fc 1110 break;
34805931
DB
1111 case BPF_LD | BPF_W | BPF_ABS:
1112 case BPF_LD | BPF_H | BPF_ABS:
1113 case BPF_LD | BPF_B | BPF_ABS:
aa1113d9 1114 anc_found = false;
34805931
DB
1115 if (bpf_anc_helper(ftest) & BPF_ANC)
1116 anc_found = true;
1117 /* Ancillary operation unknown or unsupported */
aa1113d9
DB
1118 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1119 return -EINVAL;
01f2f3f6
HPP
1120 }
1121 }
93699863 1122
34805931 1123 /* Last instruction must be a RET code */
01f2f3f6 1124 switch (filter[flen - 1].code) {
34805931
DB
1125 case BPF_RET | BPF_K:
1126 case BPF_RET | BPF_A:
2d5311e4 1127 return check_load_and_stores(filter, flen);
cba328fc 1128 }
34805931 1129
cba328fc 1130 return -EINVAL;
1da177e4
LT
1131}
1132
7ae457c1
AS
1133static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1134 const struct sock_fprog *fprog)
a3ea269b 1135{
009937e7 1136 unsigned int fsize = bpf_classic_proglen(fprog);
a3ea269b
DB
1137 struct sock_fprog_kern *fkprog;
1138
1139 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1140 if (!fp->orig_prog)
1141 return -ENOMEM;
1142
1143 fkprog = fp->orig_prog;
1144 fkprog->len = fprog->len;
658da937
DB
1145
1146 fkprog->filter = kmemdup(fp->insns, fsize,
1147 GFP_KERNEL | __GFP_NOWARN);
a3ea269b
DB
1148 if (!fkprog->filter) {
1149 kfree(fp->orig_prog);
1150 return -ENOMEM;
1151 }
1152
1153 return 0;
1154}
1155
7ae457c1 1156static void bpf_release_orig_filter(struct bpf_prog *fp)
a3ea269b
DB
1157{
1158 struct sock_fprog_kern *fprog = fp->orig_prog;
1159
1160 if (fprog) {
1161 kfree(fprog->filter);
1162 kfree(fprog);
1163 }
1164}
1165
7ae457c1
AS
1166static void __bpf_prog_release(struct bpf_prog *prog)
1167{
24701ece 1168 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
89aa0758
AS
1169 bpf_prog_put(prog);
1170 } else {
1171 bpf_release_orig_filter(prog);
1172 bpf_prog_free(prog);
1173 }
7ae457c1
AS
1174}
1175
34c5bd66
PN
1176static void __sk_filter_release(struct sk_filter *fp)
1177{
7ae457c1
AS
1178 __bpf_prog_release(fp->prog);
1179 kfree(fp);
34c5bd66
PN
1180}
1181
47e958ea 1182/**
46bcf14f 1183 * sk_filter_release_rcu - Release a socket filter by rcu_head
47e958ea
PE
1184 * @rcu: rcu_head that contains the sk_filter to free
1185 */
fbc907f0 1186static void sk_filter_release_rcu(struct rcu_head *rcu)
47e958ea
PE
1187{
1188 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1189
34c5bd66 1190 __sk_filter_release(fp);
47e958ea 1191}
fbc907f0
DB
1192
1193/**
1194 * sk_filter_release - release a socket filter
1195 * @fp: filter to remove
1196 *
1197 * Remove a filter from a socket and release its resources.
1198 */
1199static void sk_filter_release(struct sk_filter *fp)
1200{
4c355cdf 1201 if (refcount_dec_and_test(&fp->refcnt))
fbc907f0
DB
1202 call_rcu(&fp->rcu, sk_filter_release_rcu);
1203}
1204
1205void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1206{
7ae457c1 1207 u32 filter_size = bpf_prog_size(fp->prog->len);
fbc907f0 1208
278571ba
AS
1209 atomic_sub(filter_size, &sk->sk_omem_alloc);
1210 sk_filter_release(fp);
fbc907f0 1211}
47e958ea 1212
278571ba
AS
1213/* try to charge the socket memory if there is space available
1214 * return true on success
1215 */
4c355cdf 1216static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
bd4cf0ed 1217{
7ae457c1 1218 u32 filter_size = bpf_prog_size(fp->prog->len);
7de6d09f 1219 int optmem_max = READ_ONCE(sysctl_optmem_max);
278571ba
AS
1220
1221 /* same check as in sock_kmalloc() */
7de6d09f
KI
1222 if (filter_size <= optmem_max &&
1223 atomic_read(&sk->sk_omem_alloc) + filter_size < optmem_max) {
278571ba
AS
1224 atomic_add(filter_size, &sk->sk_omem_alloc);
1225 return true;
bd4cf0ed 1226 }
278571ba 1227 return false;
bd4cf0ed
AS
1228}
1229
4c355cdf
RE
1230bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1231{
eefca20e
ED
1232 if (!refcount_inc_not_zero(&fp->refcnt))
1233 return false;
1234
1235 if (!__sk_filter_charge(sk, fp)) {
1236 sk_filter_release(fp);
1237 return false;
1238 }
1239 return true;
4c355cdf
RE
1240}
1241
7ae457c1 1242static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
bd4cf0ed
AS
1243{
1244 struct sock_filter *old_prog;
7ae457c1 1245 struct bpf_prog *old_fp;
34805931 1246 int err, new_len, old_len = fp->len;
e0cea7ce 1247 bool seen_ld_abs = false;
bd4cf0ed 1248
06edc59c
CH
1249 /* We are free to overwrite insns et al right here as it won't be used at
1250 * this point in time anymore internally after the migration to the eBPF
1251 * instruction representation.
bd4cf0ed
AS
1252 */
1253 BUILD_BUG_ON(sizeof(struct sock_filter) !=
2695fb55 1254 sizeof(struct bpf_insn));
bd4cf0ed 1255
bd4cf0ed
AS
1256 /* Conversion cannot happen on overlapping memory areas,
1257 * so we need to keep the user BPF around until the 2nd
1258 * pass. At this time, the user BPF is stored in fp->insns.
1259 */
1260 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
658da937 1261 GFP_KERNEL | __GFP_NOWARN);
bd4cf0ed
AS
1262 if (!old_prog) {
1263 err = -ENOMEM;
1264 goto out_err;
1265 }
1266
1267 /* 1st pass: calculate the new program length. */
e0cea7ce
DB
1268 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
1269 &seen_ld_abs);
bd4cf0ed
AS
1270 if (err)
1271 goto out_err_free;
1272
1273 /* Expand fp for appending the new filter representation. */
1274 old_fp = fp;
60a3b225 1275 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
bd4cf0ed
AS
1276 if (!fp) {
1277 /* The old_fp is still around in case we couldn't
1278 * allocate new memory, so uncharge on that one.
1279 */
1280 fp = old_fp;
1281 err = -ENOMEM;
1282 goto out_err_free;
1283 }
1284
bd4cf0ed
AS
1285 fp->len = new_len;
1286
2695fb55 1287 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
e0cea7ce
DB
1288 err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
1289 &seen_ld_abs);
bd4cf0ed 1290 if (err)
8fb575ca 1291 /* 2nd bpf_convert_filter() can fail only if it fails
bd4cf0ed
AS
1292 * to allocate memory, remapping must succeed. Note,
1293 * that at this time old_fp has already been released
278571ba 1294 * by krealloc().
bd4cf0ed
AS
1295 */
1296 goto out_err_free;
1297
d1c55ab5 1298 fp = bpf_prog_select_runtime(fp, &err);
290af866
AS
1299 if (err)
1300 goto out_err_free;
5fe821a9 1301
bd4cf0ed
AS
1302 kfree(old_prog);
1303 return fp;
1304
1305out_err_free:
1306 kfree(old_prog);
1307out_err:
7ae457c1 1308 __bpf_prog_release(fp);
bd4cf0ed
AS
1309 return ERR_PTR(err);
1310}
1311
ac67eb2c
DB
1312static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1313 bpf_aux_classic_check_t trans)
302d6637
JP
1314{
1315 int err;
1316
bd4cf0ed 1317 fp->bpf_func = NULL;
a91263d5 1318 fp->jited = 0;
302d6637 1319
4df95ff4 1320 err = bpf_check_classic(fp->insns, fp->len);
418c96ac 1321 if (err) {
7ae457c1 1322 __bpf_prog_release(fp);
bd4cf0ed 1323 return ERR_PTR(err);
418c96ac 1324 }
302d6637 1325
4ae92bc7
NS
1326 /* There might be additional checks and transformations
1327 * needed on classic filters, f.e. in case of seccomp.
1328 */
1329 if (trans) {
1330 err = trans(fp->insns, fp->len);
1331 if (err) {
1332 __bpf_prog_release(fp);
1333 return ERR_PTR(err);
1334 }
1335 }
1336
bd4cf0ed
AS
1337 /* Probe if we can JIT compile the filter and if so, do
1338 * the compilation of the filter.
1339 */
302d6637 1340 bpf_jit_compile(fp);
bd4cf0ed 1341
06edc59c
CH
1342 /* JIT compiler couldn't process this filter, so do the eBPF translation
1343 * for the optimized interpreter.
bd4cf0ed 1344 */
5fe821a9 1345 if (!fp->jited)
7ae457c1 1346 fp = bpf_migrate_filter(fp);
bd4cf0ed
AS
1347
1348 return fp;
302d6637
JP
1349}
1350
1351/**
7ae457c1 1352 * bpf_prog_create - create an unattached filter
c6c4b97c 1353 * @pfp: the unattached filter that is created
677a9fd3 1354 * @fprog: the filter program
302d6637 1355 *
c6c4b97c 1356 * Create a filter independent of any socket. We first run some
302d6637
JP
1357 * sanity checks on it to make sure it does not explode on us later.
1358 * If an error occurs or there is insufficient memory for the filter
1359 * a negative errno code is returned. On success the return is zero.
1360 */
7ae457c1 1361int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
302d6637 1362{
009937e7 1363 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1 1364 struct bpf_prog *fp;
302d6637
JP
1365
1366 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1367 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
302d6637
JP
1368 return -EINVAL;
1369
60a3b225 1370 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
302d6637
JP
1371 if (!fp)
1372 return -ENOMEM;
a3ea269b 1373
302d6637
JP
1374 memcpy(fp->insns, fprog->filter, fsize);
1375
302d6637 1376 fp->len = fprog->len;
a3ea269b
DB
1377 /* Since unattached filters are not copied back to user
1378 * space through sk_get_filter(), we do not need to hold
1379 * a copy here, and can spare us the work.
1380 */
1381 fp->orig_prog = NULL;
302d6637 1382
7ae457c1 1383 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1384 * memory in case something goes wrong.
1385 */
4ae92bc7 1386 fp = bpf_prepare_filter(fp, NULL);
bd4cf0ed
AS
1387 if (IS_ERR(fp))
1388 return PTR_ERR(fp);
302d6637
JP
1389
1390 *pfp = fp;
1391 return 0;
302d6637 1392}
7ae457c1 1393EXPORT_SYMBOL_GPL(bpf_prog_create);
302d6637 1394
ac67eb2c
DB
1395/**
1396 * bpf_prog_create_from_user - create an unattached filter from user buffer
1397 * @pfp: the unattached filter that is created
1398 * @fprog: the filter program
1399 * @trans: post-classic verifier transformation handler
bab18991 1400 * @save_orig: save classic BPF program
ac67eb2c
DB
1401 *
1402 * This function effectively does the same as bpf_prog_create(), only
1403 * that it builds up its insns buffer from user space provided buffer.
1404 * It also allows for passing a bpf_aux_classic_check_t handler.
1405 */
1406int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
bab18991 1407 bpf_aux_classic_check_t trans, bool save_orig)
ac67eb2c
DB
1408{
1409 unsigned int fsize = bpf_classic_proglen(fprog);
1410 struct bpf_prog *fp;
bab18991 1411 int err;
ac67eb2c
DB
1412
1413 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1414 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
ac67eb2c
DB
1415 return -EINVAL;
1416
1417 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1418 if (!fp)
1419 return -ENOMEM;
1420
1421 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1422 __bpf_prog_free(fp);
1423 return -EFAULT;
1424 }
1425
1426 fp->len = fprog->len;
ac67eb2c
DB
1427 fp->orig_prog = NULL;
1428
bab18991
DB
1429 if (save_orig) {
1430 err = bpf_prog_store_orig_filter(fp, fprog);
1431 if (err) {
1432 __bpf_prog_free(fp);
1433 return -ENOMEM;
1434 }
1435 }
1436
ac67eb2c
DB
1437 /* bpf_prepare_filter() already takes care of freeing
1438 * memory in case something goes wrong.
1439 */
1440 fp = bpf_prepare_filter(fp, trans);
1441 if (IS_ERR(fp))
1442 return PTR_ERR(fp);
1443
1444 *pfp = fp;
1445 return 0;
1446}
2ea273d7 1447EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
ac67eb2c 1448
7ae457c1 1449void bpf_prog_destroy(struct bpf_prog *fp)
302d6637 1450{
7ae457c1 1451 __bpf_prog_release(fp);
302d6637 1452}
7ae457c1 1453EXPORT_SYMBOL_GPL(bpf_prog_destroy);
302d6637 1454
8ced425e 1455static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
49b31e57
DB
1456{
1457 struct sk_filter *fp, *old_fp;
1458
1459 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1460 if (!fp)
1461 return -ENOMEM;
1462
1463 fp->prog = prog;
49b31e57 1464
4c355cdf 1465 if (!__sk_filter_charge(sk, fp)) {
49b31e57
DB
1466 kfree(fp);
1467 return -ENOMEM;
1468 }
4c355cdf 1469 refcount_set(&fp->refcnt, 1);
49b31e57 1470
8ced425e
HFS
1471 old_fp = rcu_dereference_protected(sk->sk_filter,
1472 lockdep_sock_is_held(sk));
49b31e57 1473 rcu_assign_pointer(sk->sk_filter, fp);
8ced425e 1474
49b31e57
DB
1475 if (old_fp)
1476 sk_filter_uncharge(sk, old_fp);
1477
1478 return 0;
1479}
1480
538950a1
CG
1481static
1482struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1da177e4 1483{
009937e7 1484 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1 1485 struct bpf_prog *prog;
1da177e4
LT
1486 int err;
1487
d59577b6 1488 if (sock_flag(sk, SOCK_FILTER_LOCKED))
538950a1 1489 return ERR_PTR(-EPERM);
d59577b6 1490
1da177e4 1491 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1492 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
538950a1 1493 return ERR_PTR(-EINVAL);
1da177e4 1494
f7bd9e36 1495 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
7ae457c1 1496 if (!prog)
538950a1 1497 return ERR_PTR(-ENOMEM);
a3ea269b 1498
7ae457c1 1499 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
c0d1379a 1500 __bpf_prog_free(prog);
538950a1 1501 return ERR_PTR(-EFAULT);
1da177e4
LT
1502 }
1503
7ae457c1 1504 prog->len = fprog->len;
1da177e4 1505
7ae457c1 1506 err = bpf_prog_store_orig_filter(prog, fprog);
a3ea269b 1507 if (err) {
c0d1379a 1508 __bpf_prog_free(prog);
538950a1 1509 return ERR_PTR(-ENOMEM);
a3ea269b
DB
1510 }
1511
7ae457c1 1512 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1513 * memory in case something goes wrong.
1514 */
538950a1
CG
1515 return bpf_prepare_filter(prog, NULL);
1516}
1517
1518/**
1519 * sk_attach_filter - attach a socket filter
1520 * @fprog: the filter program
1521 * @sk: the socket to use
1522 *
1523 * Attach the user's filter code. We first run some sanity checks on
1524 * it to make sure it does not explode on us later. If an error
1525 * occurs or there is insufficient memory for the filter a negative
1526 * errno code is returned. On success the return is zero.
1527 */
8ced425e 1528int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
538950a1
CG
1529{
1530 struct bpf_prog *prog = __get_filter(fprog, sk);
1531 int err;
1532
7ae457c1
AS
1533 if (IS_ERR(prog))
1534 return PTR_ERR(prog);
1535
8ced425e 1536 err = __sk_attach_prog(prog, sk);
49b31e57 1537 if (err < 0) {
7ae457c1 1538 __bpf_prog_release(prog);
49b31e57 1539 return err;
278571ba
AS
1540 }
1541
d3904b73 1542 return 0;
1da177e4 1543}
8ced425e 1544EXPORT_SYMBOL_GPL(sk_attach_filter);
1da177e4 1545
538950a1 1546int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
89aa0758 1547{
538950a1 1548 struct bpf_prog *prog = __get_filter(fprog, sk);
49b31e57 1549 int err;
89aa0758 1550
538950a1
CG
1551 if (IS_ERR(prog))
1552 return PTR_ERR(prog);
1553
7de6d09f 1554 if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max))
8217ca65
MKL
1555 err = -ENOMEM;
1556 else
1557 err = reuseport_attach_prog(sk, prog);
1558
1559 if (err)
538950a1 1560 __bpf_prog_release(prog);
538950a1 1561
8217ca65 1562 return err;
538950a1
CG
1563}
1564
1565static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1566{
89aa0758 1567 if (sock_flag(sk, SOCK_FILTER_LOCKED))
538950a1 1568 return ERR_PTR(-EPERM);
89aa0758 1569
113214be 1570 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
538950a1
CG
1571}
1572
1573int sk_attach_bpf(u32 ufd, struct sock *sk)
1574{
1575 struct bpf_prog *prog = __get_bpf(ufd, sk);
1576 int err;
1577
1578 if (IS_ERR(prog))
1579 return PTR_ERR(prog);
1580
8ced425e 1581 err = __sk_attach_prog(prog, sk);
49b31e57 1582 if (err < 0) {
89aa0758 1583 bpf_prog_put(prog);
49b31e57 1584 return err;
89aa0758
AS
1585 }
1586
89aa0758
AS
1587 return 0;
1588}
1589
538950a1
CG
1590int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1591{
8217ca65 1592 struct bpf_prog *prog;
538950a1
CG
1593 int err;
1594
8217ca65
MKL
1595 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1596 return -EPERM;
1597
1598 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
45586c70 1599 if (PTR_ERR(prog) == -EINVAL)
8217ca65 1600 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
538950a1
CG
1601 if (IS_ERR(prog))
1602 return PTR_ERR(prog);
1603
8217ca65
MKL
1604 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1605 /* Like other non BPF_PROG_TYPE_SOCKET_FILTER
1606 * bpf prog (e.g. sockmap). It depends on the
1607 * limitation imposed by bpf_prog_load().
1608 * Hence, sysctl_optmem_max is not checked.
1609 */
1610 if ((sk->sk_type != SOCK_STREAM &&
1611 sk->sk_type != SOCK_DGRAM) ||
1612 (sk->sk_protocol != IPPROTO_UDP &&
1613 sk->sk_protocol != IPPROTO_TCP) ||
1614 (sk->sk_family != AF_INET &&
1615 sk->sk_family != AF_INET6)) {
1616 err = -ENOTSUPP;
1617 goto err_prog_put;
1618 }
1619 } else {
1620 /* BPF_PROG_TYPE_SOCKET_FILTER */
7de6d09f 1621 if (bpf_prog_size(prog->len) > READ_ONCE(sysctl_optmem_max)) {
8217ca65
MKL
1622 err = -ENOMEM;
1623 goto err_prog_put;
1624 }
538950a1
CG
1625 }
1626
8217ca65
MKL
1627 err = reuseport_attach_prog(sk, prog);
1628err_prog_put:
1629 if (err)
1630 bpf_prog_put(prog);
1631
1632 return err;
1633}
1634
1635void sk_reuseport_prog_free(struct bpf_prog *prog)
1636{
1637 if (!prog)
1638 return;
1639
1640 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1641 bpf_prog_put(prog);
1642 else
1643 bpf_prog_destroy(prog);
538950a1
CG
1644}
1645
21cafc1d
DB
1646struct bpf_scratchpad {
1647 union {
1648 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1649 u8 buff[MAX_BPF_STACK];
1650 };
1651};
1652
1653static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
91bc4822 1654
5293efe6
DB
1655static inline int __bpf_try_make_writable(struct sk_buff *skb,
1656 unsigned int write_len)
1657{
1658 return skb_ensure_writable(skb, write_len);
1659}
1660
db58ba45
AS
1661static inline int bpf_try_make_writable(struct sk_buff *skb,
1662 unsigned int write_len)
1663{
5293efe6 1664 int err = __bpf_try_make_writable(skb, write_len);
db58ba45 1665
6aaae2b6 1666 bpf_compute_data_pointers(skb);
db58ba45
AS
1667 return err;
1668}
1669
36bbef52
DB
1670static int bpf_try_make_head_writable(struct sk_buff *skb)
1671{
1672 return bpf_try_make_writable(skb, skb_headlen(skb));
1673}
1674
a2bfe6bf
DB
1675static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1676{
1677 if (skb_at_tc_ingress(skb))
1678 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1679}
1680
8065694e
DB
1681static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1682{
1683 if (skb_at_tc_ingress(skb))
1684 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1685}
1686
f3694e00
DB
1687BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1688 const void *, from, u32, len, u64, flags)
608cd71a 1689{
608cd71a
AS
1690 void *ptr;
1691
8afd54c8 1692 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
781c53bc 1693 return -EINVAL;
45969b41 1694 if (unlikely(offset > INT_MAX))
608cd71a 1695 return -EFAULT;
db58ba45 1696 if (unlikely(bpf_try_make_writable(skb, offset + len)))
608cd71a
AS
1697 return -EFAULT;
1698
0ed661d5 1699 ptr = skb->data + offset;
781c53bc 1700 if (flags & BPF_F_RECOMPUTE_CSUM)
479ffccc 1701 __skb_postpull_rcsum(skb, ptr, len, offset);
608cd71a
AS
1702
1703 memcpy(ptr, from, len);
1704
781c53bc 1705 if (flags & BPF_F_RECOMPUTE_CSUM)
479ffccc 1706 __skb_postpush_rcsum(skb, ptr, len, offset);
8afd54c8
DB
1707 if (flags & BPF_F_INVALIDATE_HASH)
1708 skb_clear_hash(skb);
f8ffad69 1709
608cd71a
AS
1710 return 0;
1711}
1712
577c50aa 1713static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
608cd71a
AS
1714 .func = bpf_skb_store_bytes,
1715 .gpl_only = false,
1716 .ret_type = RET_INTEGER,
1717 .arg1_type = ARG_PTR_TO_CTX,
1718 .arg2_type = ARG_ANYTHING,
216e3cd2 1719 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
39f19ebb 1720 .arg4_type = ARG_CONST_SIZE,
91bc4822
AS
1721 .arg5_type = ARG_ANYTHING,
1722};
1723
f3694e00
DB
1724BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1725 void *, to, u32, len)
05c74e5e 1726{
05c74e5e
DB
1727 void *ptr;
1728
45969b41 1729 if (unlikely(offset > INT_MAX))
074f528e 1730 goto err_clear;
05c74e5e
DB
1731
1732 ptr = skb_header_pointer(skb, offset, len, to);
1733 if (unlikely(!ptr))
074f528e 1734 goto err_clear;
05c74e5e
DB
1735 if (ptr != to)
1736 memcpy(to, ptr, len);
1737
1738 return 0;
074f528e
DB
1739err_clear:
1740 memset(to, 0, len);
1741 return -EFAULT;
05c74e5e
DB
1742}
1743
577c50aa 1744static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
05c74e5e
DB
1745 .func = bpf_skb_load_bytes,
1746 .gpl_only = false,
1747 .ret_type = RET_INTEGER,
1748 .arg1_type = ARG_PTR_TO_CTX,
1749 .arg2_type = ARG_ANYTHING,
39f19ebb
AS
1750 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1751 .arg4_type = ARG_CONST_SIZE,
05c74e5e
DB
1752};
1753
089b19a9
SF
1754BPF_CALL_4(bpf_flow_dissector_load_bytes,
1755 const struct bpf_flow_dissector *, ctx, u32, offset,
1756 void *, to, u32, len)
1757{
1758 void *ptr;
1759
1760 if (unlikely(offset > 0xffff))
1761 goto err_clear;
1762
1763 if (unlikely(!ctx->skb))
1764 goto err_clear;
1765
1766 ptr = skb_header_pointer(ctx->skb, offset, len, to);
1767 if (unlikely(!ptr))
1768 goto err_clear;
1769 if (ptr != to)
1770 memcpy(to, ptr, len);
1771
1772 return 0;
1773err_clear:
1774 memset(to, 0, len);
1775 return -EFAULT;
1776}
1777
1778static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
1779 .func = bpf_flow_dissector_load_bytes,
1780 .gpl_only = false,
1781 .ret_type = RET_INTEGER,
1782 .arg1_type = ARG_PTR_TO_CTX,
1783 .arg2_type = ARG_ANYTHING,
1784 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1785 .arg4_type = ARG_CONST_SIZE,
1786};
1787
4e1ec56c
DB
1788BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1789 u32, offset, void *, to, u32, len, u32, start_header)
1790{
3eee1f75 1791 u8 *end = skb_tail_pointer(skb);
0f5d82f1 1792 u8 *start, *ptr;
4e1ec56c 1793
0f5d82f1 1794 if (unlikely(offset > 0xffff))
4e1ec56c
DB
1795 goto err_clear;
1796
1797 switch (start_header) {
1798 case BPF_HDR_START_MAC:
0f5d82f1
YZ
1799 if (unlikely(!skb_mac_header_was_set(skb)))
1800 goto err_clear;
1801 start = skb_mac_header(skb);
4e1ec56c
DB
1802 break;
1803 case BPF_HDR_START_NET:
0f5d82f1 1804 start = skb_network_header(skb);
4e1ec56c
DB
1805 break;
1806 default:
1807 goto err_clear;
1808 }
1809
0f5d82f1
YZ
1810 ptr = start + offset;
1811
1812 if (likely(ptr + len <= end)) {
4e1ec56c
DB
1813 memcpy(to, ptr, len);
1814 return 0;
1815 }
1816
1817err_clear:
1818 memset(to, 0, len);
1819 return -EFAULT;
1820}
1821
1822static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1823 .func = bpf_skb_load_bytes_relative,
1824 .gpl_only = false,
1825 .ret_type = RET_INTEGER,
1826 .arg1_type = ARG_PTR_TO_CTX,
1827 .arg2_type = ARG_ANYTHING,
1828 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1829 .arg4_type = ARG_CONST_SIZE,
1830 .arg5_type = ARG_ANYTHING,
1831};
1832
36bbef52
DB
1833BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1834{
1835 /* Idea is the following: should the needed direct read/write
1836 * test fail during runtime, we can pull in more data and redo
1837 * again, since implicitly, we invalidate previous checks here.
1838 *
1839 * Or, since we know how much we need to make read/writeable,
1840 * this can be done once at the program beginning for direct
1841 * access case. By this we overcome limitations of only current
1842 * headroom being accessible.
1843 */
1844 return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1845}
1846
1847static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1848 .func = bpf_skb_pull_data,
1849 .gpl_only = false,
1850 .ret_type = RET_INTEGER,
1851 .arg1_type = ARG_PTR_TO_CTX,
1852 .arg2_type = ARG_ANYTHING,
1853};
1854
46f8bc92
MKL
1855BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1856{
46f8bc92
MKL
1857 return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1858}
1859
1860static const struct bpf_func_proto bpf_sk_fullsock_proto = {
1861 .func = bpf_sk_fullsock,
1862 .gpl_only = false,
1863 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
1864 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
1865};
1866
0ea488ff
JF
1867static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1868 unsigned int write_len)
1869{
16137b09 1870 return __bpf_try_make_writable(skb, write_len);
0ea488ff
JF
1871}
1872
1873BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1874{
1875 /* Idea is the following: should the needed direct read/write
1876 * test fail during runtime, we can pull in more data and redo
1877 * again, since implicitly, we invalidate previous checks here.
1878 *
1879 * Or, since we know how much we need to make read/writeable,
1880 * this can be done once at the program beginning for direct
1881 * access case. By this we overcome limitations of only current
1882 * headroom being accessible.
1883 */
1884 return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
1885}
1886
1887static const struct bpf_func_proto sk_skb_pull_data_proto = {
1888 .func = sk_skb_pull_data,
1889 .gpl_only = false,
1890 .ret_type = RET_INTEGER,
1891 .arg1_type = ARG_PTR_TO_CTX,
1892 .arg2_type = ARG_ANYTHING,
1893};
1894
f3694e00
DB
1895BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1896 u64, from, u64, to, u64, flags)
91bc4822 1897{
0ed661d5 1898 __sum16 *ptr;
91bc4822 1899
781c53bc
DB
1900 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1901 return -EINVAL;
0ed661d5 1902 if (unlikely(offset > 0xffff || offset & 1))
91bc4822 1903 return -EFAULT;
0ed661d5 1904 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
91bc4822
AS
1905 return -EFAULT;
1906
0ed661d5 1907 ptr = (__sum16 *)(skb->data + offset);
781c53bc 1908 switch (flags & BPF_F_HDR_FIELD_MASK) {
8050c0f0
DB
1909 case 0:
1910 if (unlikely(from != 0))
1911 return -EINVAL;
1912
1913 csum_replace_by_diff(ptr, to);
1914 break;
91bc4822
AS
1915 case 2:
1916 csum_replace2(ptr, from, to);
1917 break;
1918 case 4:
1919 csum_replace4(ptr, from, to);
1920 break;
1921 default:
1922 return -EINVAL;
1923 }
1924
91bc4822
AS
1925 return 0;
1926}
1927
577c50aa 1928static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
91bc4822
AS
1929 .func = bpf_l3_csum_replace,
1930 .gpl_only = false,
1931 .ret_type = RET_INTEGER,
1932 .arg1_type = ARG_PTR_TO_CTX,
1933 .arg2_type = ARG_ANYTHING,
1934 .arg3_type = ARG_ANYTHING,
1935 .arg4_type = ARG_ANYTHING,
1936 .arg5_type = ARG_ANYTHING,
1937};
1938
f3694e00
DB
1939BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1940 u64, from, u64, to, u64, flags)
91bc4822 1941{
781c53bc 1942 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
2f72959a 1943 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
d1b662ad 1944 bool do_mforce = flags & BPF_F_MARK_ENFORCE;
0ed661d5 1945 __sum16 *ptr;
91bc4822 1946
d1b662ad
DB
1947 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1948 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
781c53bc 1949 return -EINVAL;
0ed661d5 1950 if (unlikely(offset > 0xffff || offset & 1))
91bc4822 1951 return -EFAULT;
0ed661d5 1952 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
91bc4822
AS
1953 return -EFAULT;
1954
0ed661d5 1955 ptr = (__sum16 *)(skb->data + offset);
d1b662ad 1956 if (is_mmzero && !do_mforce && !*ptr)
2f72959a 1957 return 0;
91bc4822 1958
781c53bc 1959 switch (flags & BPF_F_HDR_FIELD_MASK) {
7d672345
DB
1960 case 0:
1961 if (unlikely(from != 0))
1962 return -EINVAL;
1963
1964 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1965 break;
91bc4822
AS
1966 case 2:
1967 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1968 break;
1969 case 4:
1970 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1971 break;
1972 default:
1973 return -EINVAL;
1974 }
1975
2f72959a
DB
1976 if (is_mmzero && !*ptr)
1977 *ptr = CSUM_MANGLED_0;
91bc4822
AS
1978 return 0;
1979}
1980
577c50aa 1981static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
91bc4822
AS
1982 .func = bpf_l4_csum_replace,
1983 .gpl_only = false,
1984 .ret_type = RET_INTEGER,
1985 .arg1_type = ARG_PTR_TO_CTX,
1986 .arg2_type = ARG_ANYTHING,
1987 .arg3_type = ARG_ANYTHING,
1988 .arg4_type = ARG_ANYTHING,
1989 .arg5_type = ARG_ANYTHING,
608cd71a
AS
1990};
1991
f3694e00
DB
1992BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1993 __be32 *, to, u32, to_size, __wsum, seed)
7d672345 1994{
21cafc1d 1995 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
f3694e00 1996 u32 diff_size = from_size + to_size;
7d672345
DB
1997 int i, j = 0;
1998
1999 /* This is quite flexible, some examples:
2000 *
2001 * from_size == 0, to_size > 0, seed := csum --> pushing data
2002 * from_size > 0, to_size == 0, seed := csum --> pulling data
2003 * from_size > 0, to_size > 0, seed := 0 --> diffing data
2004 *
2005 * Even for diffing, from_size and to_size don't need to be equal.
2006 */
2007 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
2008 diff_size > sizeof(sp->diff)))
2009 return -EINVAL;
2010
2011 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
2012 sp->diff[j] = ~from[i];
2013 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
2014 sp->diff[j] = to[i];
2015
2016 return csum_partial(sp->diff, diff_size, seed);
2017}
2018
577c50aa 2019static const struct bpf_func_proto bpf_csum_diff_proto = {
7d672345
DB
2020 .func = bpf_csum_diff,
2021 .gpl_only = false,
36bbef52 2022 .pkt_access = true,
7d672345 2023 .ret_type = RET_INTEGER,
216e3cd2 2024 .arg1_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
39f19ebb 2025 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
216e3cd2 2026 .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
39f19ebb 2027 .arg4_type = ARG_CONST_SIZE_OR_ZERO,
7d672345
DB
2028 .arg5_type = ARG_ANYTHING,
2029};
2030
36bbef52
DB
2031BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
2032{
2033 /* The interface is to be used in combination with bpf_csum_diff()
2034 * for direct packet writes. csum rotation for alignment as well
2035 * as emulating csum_sub() can be done from the eBPF program.
2036 */
2037 if (skb->ip_summed == CHECKSUM_COMPLETE)
2038 return (skb->csum = csum_add(skb->csum, csum));
2039
2040 return -ENOTSUPP;
2041}
2042
2043static const struct bpf_func_proto bpf_csum_update_proto = {
2044 .func = bpf_csum_update,
2045 .gpl_only = false,
2046 .ret_type = RET_INTEGER,
2047 .arg1_type = ARG_PTR_TO_CTX,
2048 .arg2_type = ARG_ANYTHING,
2049};
2050
7cdec54f
DB
2051BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
2052{
2053 /* The interface is to be used in combination with bpf_skb_adjust_room()
2054 * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
2055 * is passed as flags, for example.
2056 */
2057 switch (level) {
2058 case BPF_CSUM_LEVEL_INC:
2059 __skb_incr_checksum_unnecessary(skb);
2060 break;
2061 case BPF_CSUM_LEVEL_DEC:
2062 __skb_decr_checksum_unnecessary(skb);
2063 break;
2064 case BPF_CSUM_LEVEL_RESET:
2065 __skb_reset_checksum_unnecessary(skb);
2066 break;
2067 case BPF_CSUM_LEVEL_QUERY:
2068 return skb->ip_summed == CHECKSUM_UNNECESSARY ?
2069 skb->csum_level : -EACCES;
2070 default:
2071 return -EINVAL;
2072 }
2073
2074 return 0;
2075}
2076
2077static const struct bpf_func_proto bpf_csum_level_proto = {
2078 .func = bpf_csum_level,
2079 .gpl_only = false,
2080 .ret_type = RET_INTEGER,
2081 .arg1_type = ARG_PTR_TO_CTX,
2082 .arg2_type = ARG_ANYTHING,
2083};
2084
a70b506e
DB
2085static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
2086{
5f7d5728 2087 return dev_forward_skb_nomtu(dev, skb);
a70b506e
DB
2088}
2089
4e3264d2
MKL
2090static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
2091 struct sk_buff *skb)
2092{
5f7d5728 2093 int ret = ____dev_forward_skb(dev, skb, false);
4e3264d2
MKL
2094
2095 if (likely(!ret)) {
2096 skb->dev = dev;
2097 ret = netif_rx(skb);
2098 }
2099
2100 return ret;
2101}
2102
a70b506e
DB
2103static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
2104{
2105 int ret;
2106
97cdcf37 2107 if (dev_xmit_recursion()) {
a70b506e
DB
2108 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2109 kfree_skb(skb);
2110 return -ENETDOWN;
2111 }
2112
2113 skb->dev = dev;
de799101 2114 skb_clear_tstamp(skb);
a70b506e 2115
97cdcf37 2116 dev_xmit_recursion_inc();
a70b506e 2117 ret = dev_queue_xmit(skb);
97cdcf37 2118 dev_xmit_recursion_dec();
a70b506e
DB
2119
2120 return ret;
2121}
2122
4e3264d2
MKL
2123static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
2124 u32 flags)
2125{
e7c87bd6 2126 unsigned int mlen = skb_network_offset(skb);
4e3264d2 2127
114039b3
SF
2128 if (unlikely(skb->len <= mlen)) {
2129 kfree_skb(skb);
2130 return -ERANGE;
2131 }
2132
e7c87bd6
WB
2133 if (mlen) {
2134 __skb_pull(skb, mlen);
4e3264d2 2135
e7c87bd6
WB
2136 /* At ingress, the mac header has already been pulled once.
2137 * At egress, skb_pospull_rcsum has to be done in case that
2138 * the skb is originated from ingress (i.e. a forwarded skb)
2139 * to ensure that rcsum starts at net header.
2140 */
2141 if (!skb_at_tc_ingress(skb))
2142 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
2143 }
4e3264d2
MKL
2144 skb_pop_mac_header(skb);
2145 skb_reset_mac_len(skb);
2146 return flags & BPF_F_INGRESS ?
2147 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2148}
2149
2150static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
2151 u32 flags)
2152{
3a0af8fd 2153 /* Verify that a link layer header is carried */
114039b3 2154 if (unlikely(skb->mac_header >= skb->network_header || skb->len == 0)) {
3a0af8fd
TG
2155 kfree_skb(skb);
2156 return -ERANGE;
2157 }
2158
4e3264d2
MKL
2159 bpf_push_mac_rcsum(skb);
2160 return flags & BPF_F_INGRESS ?
2161 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2162}
2163
2164static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
2165 u32 flags)
2166{
c491680f 2167 if (dev_is_mac_header_xmit(dev))
4e3264d2 2168 return __bpf_redirect_common(skb, dev, flags);
c491680f
DB
2169 else
2170 return __bpf_redirect_no_mac(skb, dev, flags);
4e3264d2
MKL
2171}
2172
b4ab3141 2173#if IS_ENABLED(CONFIG_IPV6)
ba452c9e
THJ
2174static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb,
2175 struct net_device *dev, struct bpf_nh_params *nh)
b4ab3141 2176{
b4ab3141
DB
2177 u32 hh_len = LL_RESERVED_SPACE(dev);
2178 const struct in6_addr *nexthop;
ba452c9e 2179 struct dst_entry *dst = NULL;
b4ab3141
DB
2180 struct neighbour *neigh;
2181
2182 if (dev_xmit_recursion()) {
2183 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2184 goto out_drop;
2185 }
2186
2187 skb->dev = dev;
de799101 2188 skb_clear_tstamp(skb);
b4ab3141
DB
2189
2190 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
a1e975e1
VA
2191 skb = skb_expand_head(skb, hh_len);
2192 if (!skb)
b4ab3141 2193 return -ENOMEM;
b4ab3141
DB
2194 }
2195
2196 rcu_read_lock_bh();
ba452c9e
THJ
2197 if (!nh) {
2198 dst = skb_dst(skb);
2199 nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst),
2200 &ipv6_hdr(skb)->daddr);
2201 } else {
2202 nexthop = &nh->ipv6_nh;
2203 }
b4ab3141
DB
2204 neigh = ip_neigh_gw6(dev, nexthop);
2205 if (likely(!IS_ERR(neigh))) {
2206 int ret;
2207
2208 sock_confirm_neigh(skb, neigh);
2209 dev_xmit_recursion_inc();
2210 ret = neigh_output(neigh, skb, false);
2211 dev_xmit_recursion_dec();
2212 rcu_read_unlock_bh();
2213 return ret;
2214 }
2215 rcu_read_unlock_bh();
ba452c9e 2216 if (dst)
a1e975e1 2217 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
b4ab3141
DB
2218out_drop:
2219 kfree_skb(skb);
2220 return -ENETDOWN;
2221}
2222
ba452c9e
THJ
2223static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2224 struct bpf_nh_params *nh)
b4ab3141
DB
2225{
2226 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
2227 struct net *net = dev_net(dev);
2228 int err, ret = NET_XMIT_DROP;
b4ab3141 2229
ba452c9e
THJ
2230 if (!nh) {
2231 struct dst_entry *dst;
2232 struct flowi6 fl6 = {
2233 .flowi6_flags = FLOWI_FLAG_ANYSRC,
2234 .flowi6_mark = skb->mark,
2235 .flowlabel = ip6_flowinfo(ip6h),
2236 .flowi6_oif = dev->ifindex,
2237 .flowi6_proto = ip6h->nexthdr,
2238 .daddr = ip6h->daddr,
2239 .saddr = ip6h->saddr,
2240 };
2241
2242 dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL);
2243 if (IS_ERR(dst))
2244 goto out_drop;
b4ab3141 2245
ba452c9e
THJ
2246 skb_dst_set(skb, dst);
2247 } else if (nh->nh_family != AF_INET6) {
2248 goto out_drop;
2249 }
b4ab3141 2250
ba452c9e 2251 err = bpf_out_neigh_v6(net, skb, dev, nh);
b4ab3141
DB
2252 if (unlikely(net_xmit_eval(err)))
2253 dev->stats.tx_errors++;
2254 else
2255 ret = NET_XMIT_SUCCESS;
2256 goto out_xmit;
2257out_drop:
2258 dev->stats.tx_errors++;
2259 kfree_skb(skb);
2260out_xmit:
2261 return ret;
2262}
2263#else
ba452c9e
THJ
2264static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev,
2265 struct bpf_nh_params *nh)
b4ab3141
DB
2266{
2267 kfree_skb(skb);
2268 return NET_XMIT_DROP;
2269}
2270#endif /* CONFIG_IPV6 */
2271
2272#if IS_ENABLED(CONFIG_INET)
ba452c9e
THJ
2273static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb,
2274 struct net_device *dev, struct bpf_nh_params *nh)
b4ab3141 2275{
b4ab3141
DB
2276 u32 hh_len = LL_RESERVED_SPACE(dev);
2277 struct neighbour *neigh;
2278 bool is_v6gw = false;
2279
2280 if (dev_xmit_recursion()) {
2281 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2282 goto out_drop;
2283 }
2284
2285 skb->dev = dev;
de799101 2286 skb_clear_tstamp(skb);
b4ab3141
DB
2287
2288 if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) {
a1e975e1
VA
2289 skb = skb_expand_head(skb, hh_len);
2290 if (!skb)
b4ab3141 2291 return -ENOMEM;
b4ab3141
DB
2292 }
2293
2294 rcu_read_lock_bh();
ba452c9e
THJ
2295 if (!nh) {
2296 struct dst_entry *dst = skb_dst(skb);
2297 struct rtable *rt = container_of(dst, struct rtable, dst);
2298
2299 neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
2300 } else if (nh->nh_family == AF_INET6) {
2301 neigh = ip_neigh_gw6(dev, &nh->ipv6_nh);
2302 is_v6gw = true;
2303 } else if (nh->nh_family == AF_INET) {
2304 neigh = ip_neigh_gw4(dev, nh->ipv4_nh);
2305 } else {
2306 rcu_read_unlock_bh();
2307 goto out_drop;
2308 }
2309
b4ab3141
DB
2310 if (likely(!IS_ERR(neigh))) {
2311 int ret;
2312
2313 sock_confirm_neigh(skb, neigh);
2314 dev_xmit_recursion_inc();
2315 ret = neigh_output(neigh, skb, is_v6gw);
2316 dev_xmit_recursion_dec();
2317 rcu_read_unlock_bh();
2318 return ret;
2319 }
2320 rcu_read_unlock_bh();
2321out_drop:
2322 kfree_skb(skb);
2323 return -ENETDOWN;
2324}
2325
ba452c9e
THJ
2326static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2327 struct bpf_nh_params *nh)
b4ab3141
DB
2328{
2329 const struct iphdr *ip4h = ip_hdr(skb);
2330 struct net *net = dev_net(dev);
2331 int err, ret = NET_XMIT_DROP;
b4ab3141 2332
ba452c9e
THJ
2333 if (!nh) {
2334 struct flowi4 fl4 = {
2335 .flowi4_flags = FLOWI_FLAG_ANYSRC,
2336 .flowi4_mark = skb->mark,
2337 .flowi4_tos = RT_TOS(ip4h->tos),
2338 .flowi4_oif = dev->ifindex,
2339 .flowi4_proto = ip4h->protocol,
2340 .daddr = ip4h->daddr,
2341 .saddr = ip4h->saddr,
2342 };
2343 struct rtable *rt;
2344
2345 rt = ip_route_output_flow(net, &fl4, NULL);
2346 if (IS_ERR(rt))
2347 goto out_drop;
2348 if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
2349 ip_rt_put(rt);
2350 goto out_drop;
2351 }
b4ab3141 2352
ba452c9e
THJ
2353 skb_dst_set(skb, &rt->dst);
2354 }
b4ab3141 2355
ba452c9e 2356 err = bpf_out_neigh_v4(net, skb, dev, nh);
b4ab3141
DB
2357 if (unlikely(net_xmit_eval(err)))
2358 dev->stats.tx_errors++;
2359 else
2360 ret = NET_XMIT_SUCCESS;
2361 goto out_xmit;
2362out_drop:
2363 dev->stats.tx_errors++;
2364 kfree_skb(skb);
2365out_xmit:
2366 return ret;
2367}
2368#else
ba452c9e
THJ
2369static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev,
2370 struct bpf_nh_params *nh)
b4ab3141
DB
2371{
2372 kfree_skb(skb);
2373 return NET_XMIT_DROP;
2374}
2375#endif /* CONFIG_INET */
2376
ba452c9e
THJ
2377static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev,
2378 struct bpf_nh_params *nh)
b4ab3141
DB
2379{
2380 struct ethhdr *ethh = eth_hdr(skb);
2381
2382 if (unlikely(skb->mac_header >= skb->network_header))
2383 goto out;
2384 bpf_push_mac_rcsum(skb);
2385 if (is_multicast_ether_addr(ethh->h_dest))
2386 goto out;
2387
2388 skb_pull(skb, sizeof(*ethh));
2389 skb_unset_mac_header(skb);
2390 skb_reset_network_header(skb);
2391
2392 if (skb->protocol == htons(ETH_P_IP))
ba452c9e 2393 return __bpf_redirect_neigh_v4(skb, dev, nh);
b4ab3141 2394 else if (skb->protocol == htons(ETH_P_IPV6))
ba452c9e 2395 return __bpf_redirect_neigh_v6(skb, dev, nh);
b4ab3141
DB
2396out:
2397 kfree_skb(skb);
2398 return -ENOTSUPP;
2399}
2400
2401/* Internal, non-exposed redirect flags. */
2402enum {
9aa1206e
DB
2403 BPF_F_NEIGH = (1ULL << 1),
2404 BPF_F_PEER = (1ULL << 2),
ba452c9e
THJ
2405 BPF_F_NEXTHOP = (1ULL << 3),
2406#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP)
b4ab3141
DB
2407};
2408
f3694e00 2409BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
3896d655 2410{
3896d655 2411 struct net_device *dev;
36bbef52
DB
2412 struct sk_buff *clone;
2413 int ret;
3896d655 2414
b4ab3141 2415 if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
781c53bc
DB
2416 return -EINVAL;
2417
3896d655
AS
2418 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
2419 if (unlikely(!dev))
2420 return -EINVAL;
2421
36bbef52
DB
2422 clone = skb_clone(skb, GFP_ATOMIC);
2423 if (unlikely(!clone))
3896d655
AS
2424 return -ENOMEM;
2425
36bbef52
DB
2426 /* For direct write, we need to keep the invariant that the skbs
2427 * we're dealing with need to be uncloned. Should uncloning fail
2428 * here, we need to free the just generated clone to unclone once
2429 * again.
2430 */
2431 ret = bpf_try_make_head_writable(skb);
2432 if (unlikely(ret)) {
2433 kfree_skb(clone);
2434 return -ENOMEM;
2435 }
2436
4e3264d2 2437 return __bpf_redirect(clone, dev, flags);
3896d655
AS
2438}
2439
577c50aa 2440static const struct bpf_func_proto bpf_clone_redirect_proto = {
3896d655
AS
2441 .func = bpf_clone_redirect,
2442 .gpl_only = false,
2443 .ret_type = RET_INTEGER,
2444 .arg1_type = ARG_PTR_TO_CTX,
2445 .arg2_type = ARG_ANYTHING,
2446 .arg3_type = ARG_ANYTHING,
2447};
2448
0b19cc0a
TM
2449DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
2450EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
781c53bc 2451
27b29f63
AS
2452int skb_do_redirect(struct sk_buff *skb)
2453{
0b19cc0a 2454 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
9aa1206e 2455 struct net *net = dev_net(skb->dev);
27b29f63 2456 struct net_device *dev;
b4ab3141 2457 u32 flags = ri->flags;
27b29f63 2458
9aa1206e 2459 dev = dev_get_by_index_rcu(net, ri->tgt_index);
4b55cf29 2460 ri->tgt_index = 0;
9aa1206e
DB
2461 ri->flags = 0;
2462 if (unlikely(!dev))
2463 goto out_drop;
2464 if (flags & BPF_F_PEER) {
2465 const struct net_device_ops *ops = dev->netdev_ops;
2466
2467 if (unlikely(!ops->ndo_get_peer_dev ||
2468 !skb_at_tc_ingress(skb)))
2469 goto out_drop;
2470 dev = ops->ndo_get_peer_dev(dev);
2471 if (unlikely(!dev ||
5f7d5728 2472 !(dev->flags & IFF_UP) ||
9aa1206e
DB
2473 net_eq(net, dev_net(dev))))
2474 goto out_drop;
2475 skb->dev = dev;
2476 return -EAGAIN;
27b29f63 2477 }
b4ab3141 2478 return flags & BPF_F_NEIGH ?
ba452c9e
THJ
2479 __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ?
2480 &ri->nh : NULL) :
b4ab3141 2481 __bpf_redirect(skb, dev, flags);
9aa1206e
DB
2482out_drop:
2483 kfree_skb(skb);
2484 return -EINVAL;
b4ab3141
DB
2485}
2486
2487BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
2488{
2489 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2490
2491 if (unlikely(flags & (~(BPF_F_INGRESS) | BPF_F_REDIRECT_INTERNAL)))
2492 return TC_ACT_SHOT;
2493
2494 ri->flags = flags;
2495 ri->tgt_index = ifindex;
2496
2497 return TC_ACT_REDIRECT;
27b29f63
AS
2498}
2499
577c50aa 2500static const struct bpf_func_proto bpf_redirect_proto = {
27b29f63
AS
2501 .func = bpf_redirect,
2502 .gpl_only = false,
2503 .ret_type = RET_INTEGER,
2504 .arg1_type = ARG_ANYTHING,
2505 .arg2_type = ARG_ANYTHING,
2506};
2507
9aa1206e
DB
2508BPF_CALL_2(bpf_redirect_peer, u32, ifindex, u64, flags)
2509{
2510 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2511
2512 if (unlikely(flags))
2513 return TC_ACT_SHOT;
2514
2515 ri->flags = BPF_F_PEER;
2516 ri->tgt_index = ifindex;
2517
2518 return TC_ACT_REDIRECT;
2519}
2520
2521static const struct bpf_func_proto bpf_redirect_peer_proto = {
2522 .func = bpf_redirect_peer,
2523 .gpl_only = false,
2524 .ret_type = RET_INTEGER,
2525 .arg1_type = ARG_ANYTHING,
2526 .arg2_type = ARG_ANYTHING,
2527};
2528
ba452c9e
THJ
2529BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params,
2530 int, plen, u64, flags)
b4ab3141
DB
2531{
2532 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
2533
ba452c9e 2534 if (unlikely((plen && plen < sizeof(*params)) || flags))
b4ab3141
DB
2535 return TC_ACT_SHOT;
2536
ba452c9e 2537 ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0);
b4ab3141
DB
2538 ri->tgt_index = ifindex;
2539
ba452c9e
THJ
2540 BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params));
2541 if (plen)
2542 memcpy(&ri->nh, params, sizeof(ri->nh));
2543
b4ab3141
DB
2544 return TC_ACT_REDIRECT;
2545}
2546
2547static const struct bpf_func_proto bpf_redirect_neigh_proto = {
2548 .func = bpf_redirect_neigh,
2549 .gpl_only = false,
2550 .ret_type = RET_INTEGER,
2551 .arg1_type = ARG_ANYTHING,
216e3cd2 2552 .arg2_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY,
ba452c9e
THJ
2553 .arg3_type = ARG_CONST_SIZE_OR_ZERO,
2554 .arg4_type = ARG_ANYTHING,
b4ab3141
DB
2555};
2556
604326b4 2557BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
2a100317
JF
2558{
2559 msg->apply_bytes = bytes;
2560 return 0;
2561}
2562
2563static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2564 .func = bpf_msg_apply_bytes,
2565 .gpl_only = false,
2566 .ret_type = RET_INTEGER,
2567 .arg1_type = ARG_PTR_TO_CTX,
2568 .arg2_type = ARG_ANYTHING,
2569};
2570
604326b4 2571BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
91843d54
JF
2572{
2573 msg->cork_bytes = bytes;
2574 return 0;
2575}
2576
2577static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2578 .func = bpf_msg_cork_bytes,
2579 .gpl_only = false,
2580 .ret_type = RET_INTEGER,
2581 .arg1_type = ARG_PTR_TO_CTX,
2582 .arg2_type = ARG_ANYTHING,
2583};
2584
604326b4
DB
2585BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
2586 u32, end, u64, flags)
015632bb 2587{
604326b4
DB
2588 u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
2589 u32 first_sge, last_sge, i, shift, bytes_sg_total;
2590 struct scatterlist *sge;
2591 u8 *raw, *to, *from;
015632bb
JF
2592 struct page *page;
2593
2594 if (unlikely(flags || end <= start))
2595 return -EINVAL;
2596
2597 /* First find the starting scatterlist element */
604326b4 2598 i = msg->sg.start;
015632bb 2599 do {
6562e29c 2600 offset += len;
604326b4 2601 len = sk_msg_elem(msg, i)->length;
015632bb
JF
2602 if (start < offset + len)
2603 break;
604326b4
DB
2604 sk_msg_iter_var_next(i);
2605 } while (i != msg->sg.end);
015632bb
JF
2606
2607 if (unlikely(start >= offset + len))
2608 return -EINVAL;
2609
604326b4 2610 first_sge = i;
5b24109b
DB
2611 /* The start may point into the sg element so we need to also
2612 * account for the headroom.
2613 */
2614 bytes_sg_total = start - offset + bytes;
5a8fb33e 2615 if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len)
015632bb 2616 goto out;
015632bb
JF
2617
2618 /* At this point we need to linearize multiple scatterlist
2619 * elements or a single shared page. Either way we need to
2620 * copy into a linear buffer exclusively owned by BPF. Then
2621 * place the buffer in the scatterlist and fixup the original
2622 * entries by removing the entries now in the linear buffer
2623 * and shifting the remaining entries. For now we do not try
2624 * to copy partial entries to avoid complexity of running out
2625 * of sg_entry slots. The downside is reading a single byte
2626 * will copy the entire sg entry.
2627 */
2628 do {
604326b4
DB
2629 copy += sk_msg_elem(msg, i)->length;
2630 sk_msg_iter_var_next(i);
5b24109b 2631 if (bytes_sg_total <= copy)
015632bb 2632 break;
604326b4
DB
2633 } while (i != msg->sg.end);
2634 last_sge = i;
015632bb 2635
5b24109b 2636 if (unlikely(bytes_sg_total > copy))
015632bb
JF
2637 return -EINVAL;
2638
4c3d795c
TD
2639 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2640 get_order(copy));
015632bb
JF
2641 if (unlikely(!page))
2642 return -ENOMEM;
015632bb 2643
604326b4
DB
2644 raw = page_address(page);
2645 i = first_sge;
015632bb 2646 do {
604326b4
DB
2647 sge = sk_msg_elem(msg, i);
2648 from = sg_virt(sge);
2649 len = sge->length;
2650 to = raw + poffset;
015632bb
JF
2651
2652 memcpy(to, from, len);
9db39f4d 2653 poffset += len;
604326b4
DB
2654 sge->length = 0;
2655 put_page(sg_page(sge));
015632bb 2656
604326b4
DB
2657 sk_msg_iter_var_next(i);
2658 } while (i != last_sge);
015632bb 2659
604326b4 2660 sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
015632bb
JF
2661
2662 /* To repair sg ring we need to shift entries. If we only
2663 * had a single entry though we can just replace it and
2664 * be done. Otherwise walk the ring and shift the entries.
2665 */
604326b4
DB
2666 WARN_ON_ONCE(last_sge == first_sge);
2667 shift = last_sge > first_sge ?
2668 last_sge - first_sge - 1 :
031097d9 2669 NR_MSG_FRAG_IDS - first_sge + last_sge - 1;
015632bb
JF
2670 if (!shift)
2671 goto out;
2672
604326b4
DB
2673 i = first_sge;
2674 sk_msg_iter_var_next(i);
015632bb 2675 do {
604326b4 2676 u32 move_from;
015632bb 2677
031097d9
JK
2678 if (i + shift >= NR_MSG_FRAG_IDS)
2679 move_from = i + shift - NR_MSG_FRAG_IDS;
015632bb
JF
2680 else
2681 move_from = i + shift;
604326b4 2682 if (move_from == msg->sg.end)
015632bb
JF
2683 break;
2684
604326b4
DB
2685 msg->sg.data[i] = msg->sg.data[move_from];
2686 msg->sg.data[move_from].length = 0;
2687 msg->sg.data[move_from].page_link = 0;
2688 msg->sg.data[move_from].offset = 0;
2689 sk_msg_iter_var_next(i);
015632bb 2690 } while (1);
604326b4
DB
2691
2692 msg->sg.end = msg->sg.end - shift > msg->sg.end ?
031097d9 2693 msg->sg.end - shift + NR_MSG_FRAG_IDS :
604326b4 2694 msg->sg.end - shift;
015632bb 2695out:
604326b4 2696 msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
015632bb 2697 msg->data_end = msg->data + bytes;
015632bb
JF
2698 return 0;
2699}
2700
2701static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2702 .func = bpf_msg_pull_data,
2703 .gpl_only = false,
2704 .ret_type = RET_INTEGER,
2705 .arg1_type = ARG_PTR_TO_CTX,
2706 .arg2_type = ARG_ANYTHING,
2707 .arg3_type = ARG_ANYTHING,
2708 .arg4_type = ARG_ANYTHING,
2709};
2710
6fff607e
JF
2711BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2712 u32, len, u64, flags)
2713{
2714 struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
6562e29c 2715 u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
6fff607e
JF
2716 u8 *raw, *to, *from;
2717 struct page *page;
2718
2719 if (unlikely(flags))
2720 return -EINVAL;
2721
4a11678f
FM
2722 if (unlikely(len == 0))
2723 return 0;
2724
6fff607e
JF
2725 /* First find the starting scatterlist element */
2726 i = msg->sg.start;
2727 do {
6562e29c 2728 offset += l;
6fff607e
JF
2729 l = sk_msg_elem(msg, i)->length;
2730
2731 if (start < offset + l)
2732 break;
6fff607e
JF
2733 sk_msg_iter_var_next(i);
2734 } while (i != msg->sg.end);
2735
2736 if (start >= offset + l)
2737 return -EINVAL;
2738
2739 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2740
2741 /* If no space available will fallback to copy, we need at
2742 * least one scatterlist elem available to push data into
2743 * when start aligns to the beginning of an element or two
2744 * when it falls inside an element. We handle the start equals
2745 * offset case because its the common case for inserting a
2746 * header.
2747 */
2748 if (!space || (space == 1 && start != offset))
2749 copy = msg->sg.data[i].length;
2750
2751 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2752 get_order(copy + len));
2753 if (unlikely(!page))
2754 return -ENOMEM;
2755
2756 if (copy) {
2757 int front, back;
2758
2759 raw = page_address(page);
2760
2761 psge = sk_msg_elem(msg, i);
2762 front = start - offset;
2763 back = psge->length - front;
2764 from = sg_virt(psge);
2765
2766 if (front)
2767 memcpy(raw, from, front);
2768
2769 if (back) {
2770 from += front;
2771 to = raw + front + len;
2772
2773 memcpy(to, from, back);
2774 }
2775
2776 put_page(sg_page(psge));
2777 } else if (start - offset) {
2778 psge = sk_msg_elem(msg, i);
2779 rsge = sk_msg_elem_cpy(msg, i);
2780
2781 psge->length = start - offset;
2782 rsge.length -= psge->length;
2783 rsge.offset += start;
2784
2785 sk_msg_iter_var_next(i);
2786 sg_unmark_end(psge);
cf21e9ba 2787 sg_unmark_end(&rsge);
6fff607e
JF
2788 sk_msg_iter_next(msg, end);
2789 }
2790
2791 /* Slot(s) to place newly allocated data */
2792 new = i;
2793
2794 /* Shift one or two slots as needed */
2795 if (!copy) {
2796 sge = sk_msg_elem_cpy(msg, i);
2797
2798 sk_msg_iter_var_next(i);
2799 sg_unmark_end(&sge);
2800 sk_msg_iter_next(msg, end);
2801
2802 nsge = sk_msg_elem_cpy(msg, i);
2803 if (rsge.length) {
2804 sk_msg_iter_var_next(i);
2805 nnsge = sk_msg_elem_cpy(msg, i);
2806 }
2807
2808 while (i != msg->sg.end) {
2809 msg->sg.data[i] = sge;
2810 sge = nsge;
2811 sk_msg_iter_var_next(i);
2812 if (rsge.length) {
2813 nsge = nnsge;
2814 nnsge = sk_msg_elem_cpy(msg, i);
2815 } else {
2816 nsge = sk_msg_elem_cpy(msg, i);
2817 }
2818 }
2819 }
2820
2821 /* Place newly allocated data buffer */
2822 sk_mem_charge(msg->sk, len);
2823 msg->sg.size += len;
5a8fb33e 2824 __clear_bit(new, msg->sg.copy);
6fff607e
JF
2825 sg_set_page(&msg->sg.data[new], page, len + copy, 0);
2826 if (rsge.length) {
2827 get_page(sg_page(&rsge));
2828 sk_msg_iter_var_next(new);
2829 msg->sg.data[new] = rsge;
2830 }
2831
2832 sk_msg_compute_data_pointers(msg);
2833 return 0;
2834}
2835
2836static const struct bpf_func_proto bpf_msg_push_data_proto = {
2837 .func = bpf_msg_push_data,
2838 .gpl_only = false,
2839 .ret_type = RET_INTEGER,
2840 .arg1_type = ARG_PTR_TO_CTX,
2841 .arg2_type = ARG_ANYTHING,
2842 .arg3_type = ARG_ANYTHING,
2843 .arg4_type = ARG_ANYTHING,
2844};
2845
7246d8ed
JF
2846static void sk_msg_shift_left(struct sk_msg *msg, int i)
2847{
2848 int prev;
2849
2850 do {
2851 prev = i;
2852 sk_msg_iter_var_next(i);
2853 msg->sg.data[prev] = msg->sg.data[i];
2854 } while (i != msg->sg.end);
2855
2856 sk_msg_iter_prev(msg, end);
2857}
2858
2859static void sk_msg_shift_right(struct sk_msg *msg, int i)
2860{
2861 struct scatterlist tmp, sge;
2862
2863 sk_msg_iter_next(msg, end);
2864 sge = sk_msg_elem_cpy(msg, i);
2865 sk_msg_iter_var_next(i);
2866 tmp = sk_msg_elem_cpy(msg, i);
2867
2868 while (i != msg->sg.end) {
2869 msg->sg.data[i] = sge;
2870 sk_msg_iter_var_next(i);
2871 sge = tmp;
2872 tmp = sk_msg_elem_cpy(msg, i);
2873 }
2874}
2875
2876BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
2877 u32, len, u64, flags)
2878{
6562e29c 2879 u32 i = 0, l = 0, space, offset = 0;
7246d8ed
JF
2880 u64 last = start + len;
2881 int pop;
2882
2883 if (unlikely(flags))
2884 return -EINVAL;
2885
2886 /* First find the starting scatterlist element */
2887 i = msg->sg.start;
2888 do {
6562e29c 2889 offset += l;
7246d8ed
JF
2890 l = sk_msg_elem(msg, i)->length;
2891
2892 if (start < offset + l)
2893 break;
7246d8ed
JF
2894 sk_msg_iter_var_next(i);
2895 } while (i != msg->sg.end);
2896
2897 /* Bounds checks: start and pop must be inside message */
2898 if (start >= offset + l || last >= msg->sg.size)
2899 return -EINVAL;
2900
2901 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2902
2903 pop = len;
2904 /* --------------| offset
2905 * -| start |-------- len -------|
2906 *
2907 * |----- a ----|-------- pop -------|----- b ----|
2908 * |______________________________________________| length
2909 *
2910 *
2911 * a: region at front of scatter element to save
2912 * b: region at back of scatter element to save when length > A + pop
2913 * pop: region to pop from element, same as input 'pop' here will be
2914 * decremented below per iteration.
2915 *
2916 * Two top-level cases to handle when start != offset, first B is non
2917 * zero and second B is zero corresponding to when a pop includes more
2918 * than one element.
2919 *
2920 * Then if B is non-zero AND there is no space allocate space and
2921 * compact A, B regions into page. If there is space shift ring to
2922 * the rigth free'ing the next element in ring to place B, leaving
2923 * A untouched except to reduce length.
2924 */
2925 if (start != offset) {
2926 struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
2927 int a = start;
2928 int b = sge->length - pop - a;
2929
2930 sk_msg_iter_var_next(i);
2931
2932 if (pop < sge->length - a) {
2933 if (space) {
2934 sge->length = a;
2935 sk_msg_shift_right(msg, i);
2936 nsge = sk_msg_elem(msg, i);
2937 get_page(sg_page(sge));
2938 sg_set_page(nsge,
2939 sg_page(sge),
2940 b, sge->offset + pop + a);
2941 } else {
2942 struct page *page, *orig;
2943 u8 *to, *from;
2944
2945 page = alloc_pages(__GFP_NOWARN |
2946 __GFP_COMP | GFP_ATOMIC,
2947 get_order(a + b));
2948 if (unlikely(!page))
2949 return -ENOMEM;
2950
2951 sge->length = a;
2952 orig = sg_page(sge);
2953 from = sg_virt(sge);
2954 to = page_address(page);
2955 memcpy(to, from, a);
2956 memcpy(to + a, from + a + pop, b);
2957 sg_set_page(sge, page, a + b, 0);
2958 put_page(orig);
2959 }
2960 pop = 0;
2961 } else if (pop >= sge->length - a) {
7246d8ed 2962 pop -= (sge->length - a);
3e104c23 2963 sge->length = a;
7246d8ed
JF
2964 }
2965 }
2966
2967 /* From above the current layout _must_ be as follows,
2968 *
2969 * -| offset
2970 * -| start
2971 *
2972 * |---- pop ---|---------------- b ------------|
2973 * |____________________________________________| length
2974 *
2975 * Offset and start of the current msg elem are equal because in the
2976 * previous case we handled offset != start and either consumed the
2977 * entire element and advanced to the next element OR pop == 0.
2978 *
2979 * Two cases to handle here are first pop is less than the length
2980 * leaving some remainder b above. Simply adjust the element's layout
2981 * in this case. Or pop >= length of the element so that b = 0. In this
2982 * case advance to next element decrementing pop.
2983 */
2984 while (pop) {
2985 struct scatterlist *sge = sk_msg_elem(msg, i);
2986
2987 if (pop < sge->length) {
2988 sge->length -= pop;
2989 sge->offset += pop;
2990 pop = 0;
2991 } else {
2992 pop -= sge->length;
2993 sk_msg_shift_left(msg, i);
2994 }
2995 sk_msg_iter_var_next(i);
2996 }
2997
2998 sk_mem_uncharge(msg->sk, len - pop);
2999 msg->sg.size -= (len - pop);
3000 sk_msg_compute_data_pointers(msg);
3001 return 0;
3002}
3003
3004static const struct bpf_func_proto bpf_msg_pop_data_proto = {
3005 .func = bpf_msg_pop_data,
3006 .gpl_only = false,
3007 .ret_type = RET_INTEGER,
3008 .arg1_type = ARG_PTR_TO_CTX,
3009 .arg2_type = ARG_ANYTHING,
3010 .arg3_type = ARG_ANYTHING,
3011 .arg4_type = ARG_ANYTHING,
3012};
3013
5a52ae4e
DB
3014#ifdef CONFIG_CGROUP_NET_CLASSID
3015BPF_CALL_0(bpf_get_cgroup_classid_curr)
3016{
3017 return __task_get_classid(current);
3018}
3019
bed89185 3020const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
5a52ae4e
DB
3021 .func = bpf_get_cgroup_classid_curr,
3022 .gpl_only = false,
3023 .ret_type = RET_INTEGER,
3024};
b426ce83
DB
3025
3026BPF_CALL_1(bpf_skb_cgroup_classid, const struct sk_buff *, skb)
3027{
3028 struct sock *sk = skb_to_full_sk(skb);
3029
3030 if (!sk || !sk_fullsock(sk))
3031 return 0;
3032
3033 return sock_cgroup_classid(&sk->sk_cgrp_data);
3034}
3035
3036static const struct bpf_func_proto bpf_skb_cgroup_classid_proto = {
3037 .func = bpf_skb_cgroup_classid,
3038 .gpl_only = false,
3039 .ret_type = RET_INTEGER,
3040 .arg1_type = ARG_PTR_TO_CTX,
3041};
5a52ae4e
DB
3042#endif
3043
f3694e00 3044BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
8d20aabe 3045{
f3694e00 3046 return task_get_classid(skb);
8d20aabe
DB
3047}
3048
3049static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
3050 .func = bpf_get_cgroup_classid,
3051 .gpl_only = false,
3052 .ret_type = RET_INTEGER,
3053 .arg1_type = ARG_PTR_TO_CTX,
3054};
3055
f3694e00 3056BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
c46646d0 3057{
f3694e00 3058 return dst_tclassid(skb);
c46646d0
DB
3059}
3060
3061static const struct bpf_func_proto bpf_get_route_realm_proto = {
3062 .func = bpf_get_route_realm,
3063 .gpl_only = false,
3064 .ret_type = RET_INTEGER,
3065 .arg1_type = ARG_PTR_TO_CTX,
3066};
3067
f3694e00 3068BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
13c5c240
DB
3069{
3070 /* If skb_clear_hash() was called due to mangling, we can
3071 * trigger SW recalculation here. Later access to hash
3072 * can then use the inline skb->hash via context directly
3073 * instead of calling this helper again.
3074 */
f3694e00 3075 return skb_get_hash(skb);
13c5c240
DB
3076}
3077
3078static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
3079 .func = bpf_get_hash_recalc,
3080 .gpl_only = false,
3081 .ret_type = RET_INTEGER,
3082 .arg1_type = ARG_PTR_TO_CTX,
3083};
3084
7a4b28c6
DB
3085BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
3086{
3087 /* After all direct packet write, this can be used once for
3088 * triggering a lazy recalc on next skb_get_hash() invocation.
3089 */
3090 skb_clear_hash(skb);
3091 return 0;
3092}
3093
3094static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
3095 .func = bpf_set_hash_invalid,
3096 .gpl_only = false,
3097 .ret_type = RET_INTEGER,
3098 .arg1_type = ARG_PTR_TO_CTX,
3099};
3100
ded092cd
DB
3101BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
3102{
3103 /* Set user specified hash as L4(+), so that it gets returned
3104 * on skb_get_hash() call unless BPF prog later on triggers a
3105 * skb_clear_hash().
3106 */
3107 __skb_set_sw_hash(skb, hash, true);
3108 return 0;
3109}
3110
3111static const struct bpf_func_proto bpf_set_hash_proto = {
3112 .func = bpf_set_hash,
3113 .gpl_only = false,
3114 .ret_type = RET_INTEGER,
3115 .arg1_type = ARG_PTR_TO_CTX,
3116 .arg2_type = ARG_ANYTHING,
3117};
3118
f3694e00
DB
3119BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
3120 u16, vlan_tci)
4e10df9a 3121{
db58ba45 3122 int ret;
4e10df9a
AS
3123
3124 if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
3125 vlan_proto != htons(ETH_P_8021AD)))
3126 vlan_proto = htons(ETH_P_8021Q);
3127
8065694e 3128 bpf_push_mac_rcsum(skb);
db58ba45 3129 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
8065694e
DB
3130 bpf_pull_mac_rcsum(skb);
3131
6aaae2b6 3132 bpf_compute_data_pointers(skb);
db58ba45 3133 return ret;
4e10df9a
AS
3134}
3135
93731ef0 3136static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
4e10df9a
AS
3137 .func = bpf_skb_vlan_push,
3138 .gpl_only = false,
3139 .ret_type = RET_INTEGER,
3140 .arg1_type = ARG_PTR_TO_CTX,
3141 .arg2_type = ARG_ANYTHING,
3142 .arg3_type = ARG_ANYTHING,
3143};
3144
f3694e00 3145BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
4e10df9a 3146{
db58ba45 3147 int ret;
4e10df9a 3148
8065694e 3149 bpf_push_mac_rcsum(skb);
db58ba45 3150 ret = skb_vlan_pop(skb);
8065694e
DB
3151 bpf_pull_mac_rcsum(skb);
3152
6aaae2b6 3153 bpf_compute_data_pointers(skb);
db58ba45 3154 return ret;
4e10df9a
AS
3155}
3156
93731ef0 3157static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
4e10df9a
AS
3158 .func = bpf_skb_vlan_pop,
3159 .gpl_only = false,
3160 .ret_type = RET_INTEGER,
3161 .arg1_type = ARG_PTR_TO_CTX,
3162};
3163
6578171a
DB
3164static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
3165{
3166 /* Caller already did skb_cow() with len as headroom,
3167 * so no need to do it here.
3168 */
3169 skb_push(skb, len);
3170 memmove(skb->data, skb->data + len, off);
3171 memset(skb->data + off, 0, len);
3172
3173 /* No skb_postpush_rcsum(skb, skb->data + off, len)
3174 * needed here as it does not change the skb->csum
3175 * result for checksum complete when summing over
3176 * zeroed blocks.
3177 */
3178 return 0;
3179}
3180
3181static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
3182{
3183 /* skb_ensure_writable() is not needed here, as we're
3184 * already working on an uncloned skb.
3185 */
3186 if (unlikely(!pskb_may_pull(skb, off + len)))
3187 return -ENOMEM;
3188
3189 skb_postpull_rcsum(skb, skb->data + off, len);
3190 memmove(skb->data + len, skb->data, off);
3191 __skb_pull(skb, len);
3192
3193 return 0;
3194}
3195
3196static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
3197{
3198 bool trans_same = skb->transport_header == skb->network_header;
3199 int ret;
3200
3201 /* There's no need for __skb_push()/__skb_pull() pair to
3202 * get to the start of the mac header as we're guaranteed
3203 * to always start from here under eBPF.
3204 */
3205 ret = bpf_skb_generic_push(skb, off, len);
3206 if (likely(!ret)) {
3207 skb->mac_header -= len;
3208 skb->network_header -= len;
3209 if (trans_same)
3210 skb->transport_header = skb->network_header;
3211 }
3212
3213 return ret;
3214}
3215
3216static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
3217{
3218 bool trans_same = skb->transport_header == skb->network_header;
3219 int ret;
3220
3221 /* Same here, __skb_push()/__skb_pull() pair not needed. */
3222 ret = bpf_skb_generic_pop(skb, off, len);
3223 if (likely(!ret)) {
3224 skb->mac_header += len;
3225 skb->network_header += len;
3226 if (trans_same)
3227 skb->transport_header = skb->network_header;
3228 }
3229
3230 return ret;
3231}
3232
3233static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
3234{
3235 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
0daf4349 3236 u32 off = skb_mac_header_len(skb);
6578171a
DB
3237 int ret;
3238
3239 ret = skb_cow(skb, len_diff);
3240 if (unlikely(ret < 0))
3241 return ret;
3242
3243 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3244 if (unlikely(ret < 0))
3245 return ret;
3246
3247 if (skb_is_gso(skb)) {
d02f51cb
DA
3248 struct skb_shared_info *shinfo = skb_shinfo(skb);
3249
0bc919d3 3250 /* SKB_GSO_TCPV4 needs to be changed into SKB_GSO_TCPV6. */
d02f51cb
DA
3251 if (shinfo->gso_type & SKB_GSO_TCPV4) {
3252 shinfo->gso_type &= ~SKB_GSO_TCPV4;
3253 shinfo->gso_type |= SKB_GSO_TCPV6;
6578171a 3254 }
6578171a
DB
3255 }
3256
3257 skb->protocol = htons(ETH_P_IPV6);
3258 skb_clear_hash(skb);
3259
3260 return 0;
3261}
3262
3263static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
3264{
3265 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
0daf4349 3266 u32 off = skb_mac_header_len(skb);
6578171a
DB
3267 int ret;
3268
3269 ret = skb_unclone(skb, GFP_ATOMIC);
3270 if (unlikely(ret < 0))
3271 return ret;
3272
3273 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3274 if (unlikely(ret < 0))
3275 return ret;
3276
3277 if (skb_is_gso(skb)) {
d02f51cb
DA
3278 struct skb_shared_info *shinfo = skb_shinfo(skb);
3279
0bc919d3 3280 /* SKB_GSO_TCPV6 needs to be changed into SKB_GSO_TCPV4. */
d02f51cb
DA
3281 if (shinfo->gso_type & SKB_GSO_TCPV6) {
3282 shinfo->gso_type &= ~SKB_GSO_TCPV6;
3283 shinfo->gso_type |= SKB_GSO_TCPV4;
6578171a 3284 }
6578171a
DB
3285 }
3286
3287 skb->protocol = htons(ETH_P_IP);
3288 skb_clear_hash(skb);
3289
3290 return 0;
3291}
3292
3293static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
3294{
3295 __be16 from_proto = skb->protocol;
3296
3297 if (from_proto == htons(ETH_P_IP) &&
3298 to_proto == htons(ETH_P_IPV6))
3299 return bpf_skb_proto_4_to_6(skb);
3300
3301 if (from_proto == htons(ETH_P_IPV6) &&
3302 to_proto == htons(ETH_P_IP))
3303 return bpf_skb_proto_6_to_4(skb);
3304
3305 return -ENOTSUPP;
3306}
3307
f3694e00
DB
3308BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
3309 u64, flags)
6578171a 3310{
6578171a
DB
3311 int ret;
3312
3313 if (unlikely(flags))
3314 return -EINVAL;
3315
3316 /* General idea is that this helper does the basic groundwork
3317 * needed for changing the protocol, and eBPF program fills the
3318 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
3319 * and other helpers, rather than passing a raw buffer here.
3320 *
3321 * The rationale is to keep this minimal and without a need to
3322 * deal with raw packet data. F.e. even if we would pass buffers
3323 * here, the program still needs to call the bpf_lX_csum_replace()
3324 * helpers anyway. Plus, this way we keep also separation of
3325 * concerns, since f.e. bpf_skb_store_bytes() should only take
3326 * care of stores.
3327 *
3328 * Currently, additional options and extension header space are
3329 * not supported, but flags register is reserved so we can adapt
3330 * that. For offloads, we mark packet as dodgy, so that headers
3331 * need to be verified first.
3332 */
3333 ret = bpf_skb_proto_xlat(skb, proto);
6aaae2b6 3334 bpf_compute_data_pointers(skb);
6578171a
DB
3335 return ret;
3336}
3337
3338static const struct bpf_func_proto bpf_skb_change_proto_proto = {
3339 .func = bpf_skb_change_proto,
3340 .gpl_only = false,
3341 .ret_type = RET_INTEGER,
3342 .arg1_type = ARG_PTR_TO_CTX,
3343 .arg2_type = ARG_ANYTHING,
3344 .arg3_type = ARG_ANYTHING,
3345};
3346
f3694e00 3347BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
d2485c42 3348{
d2485c42 3349 /* We only allow a restricted subset to be changed for now. */
45c7fffa
DB
3350 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
3351 !skb_pkt_type_ok(pkt_type)))
d2485c42
DB
3352 return -EINVAL;
3353
3354 skb->pkt_type = pkt_type;
3355 return 0;
3356}
3357
3358static const struct bpf_func_proto bpf_skb_change_type_proto = {
3359 .func = bpf_skb_change_type,
3360 .gpl_only = false,
3361 .ret_type = RET_INTEGER,
3362 .arg1_type = ARG_PTR_TO_CTX,
3363 .arg2_type = ARG_ANYTHING,
3364};
3365
2be7e212
DB
3366static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
3367{
3368 switch (skb->protocol) {
3369 case htons(ETH_P_IP):
3370 return sizeof(struct iphdr);
3371 case htons(ETH_P_IPV6):
3372 return sizeof(struct ipv6hdr);
3373 default:
3374 return ~0U;
3375 }
3376}
3377
868d5235
WB
3378#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
3379 BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3380
3381#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
3382 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
3383 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
58dfc900 3384 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
d01b59c9 3385 BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
58dfc900
AM
3386 BPF_F_ADJ_ROOM_ENCAP_L2( \
3387 BPF_ADJ_ROOM_ENCAP_L2_MASK))
2278f6cc
WB
3388
3389static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
3390 u64 flags)
2be7e212 3391{
58dfc900 3392 u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
868d5235 3393 bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
62b31b42 3394 u16 mac_len = 0, inner_net = 0, inner_trans = 0;
868d5235 3395 unsigned int gso_type = SKB_GSO_DODGY;
2be7e212
DB
3396 int ret;
3397
2278f6cc
WB
3398 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3399 /* udp gso_size delineates datagrams, only allow if fixed */
3400 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3401 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3402 return -ENOTSUPP;
3403 }
d02f51cb 3404
908adce6 3405 ret = skb_cow_head(skb, len_diff);
2be7e212
DB
3406 if (unlikely(ret < 0))
3407 return ret;
3408
868d5235
WB
3409 if (encap) {
3410 if (skb->protocol != htons(ETH_P_IP) &&
3411 skb->protocol != htons(ETH_P_IPV6))
3412 return -ENOTSUPP;
3413
3414 if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
3415 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3416 return -EINVAL;
3417
3418 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
3419 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3420 return -EINVAL;
3421
d01b59c9
XH
3422 if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH &&
3423 inner_mac_len < ETH_HLEN)
3424 return -EINVAL;
3425
868d5235
WB
3426 if (skb->encapsulation)
3427 return -EALREADY;
3428
3429 mac_len = skb->network_header - skb->mac_header;
3430 inner_net = skb->network_header;
58dfc900
AM
3431 if (inner_mac_len > len_diff)
3432 return -EINVAL;
868d5235
WB
3433 inner_trans = skb->transport_header;
3434 }
3435
2be7e212
DB
3436 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3437 if (unlikely(ret < 0))
3438 return ret;
3439
868d5235 3440 if (encap) {
58dfc900 3441 skb->inner_mac_header = inner_net - inner_mac_len;
868d5235
WB
3442 skb->inner_network_header = inner_net;
3443 skb->inner_transport_header = inner_trans;
d01b59c9
XH
3444
3445 if (flags & BPF_F_ADJ_ROOM_ENCAP_L2_ETH)
3446 skb_set_inner_protocol(skb, htons(ETH_P_TEB));
3447 else
3448 skb_set_inner_protocol(skb, skb->protocol);
868d5235
WB
3449
3450 skb->encapsulation = 1;
3451 skb_set_network_header(skb, mac_len);
3452
3453 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3454 gso_type |= SKB_GSO_UDP_TUNNEL;
3455 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
3456 gso_type |= SKB_GSO_GRE;
3457 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3458 gso_type |= SKB_GSO_IPXIP6;
58dfc900 3459 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
868d5235
WB
3460 gso_type |= SKB_GSO_IPXIP4;
3461
3462 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
3463 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
3464 int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
3465 sizeof(struct ipv6hdr) :
3466 sizeof(struct iphdr);
3467
3468 skb_set_transport_header(skb, mac_len + nh_len);
3469 }
1b00e0df
WB
3470
3471 /* Match skb->protocol to new outer l3 protocol */
3472 if (skb->protocol == htons(ETH_P_IP) &&
3473 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3474 skb->protocol = htons(ETH_P_IPV6);
3475 else if (skb->protocol == htons(ETH_P_IPV6) &&
3476 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3477 skb->protocol = htons(ETH_P_IP);
868d5235
WB
3478 }
3479
2be7e212 3480 if (skb_is_gso(skb)) {
d02f51cb
DA
3481 struct skb_shared_info *shinfo = skb_shinfo(skb);
3482
2be7e212 3483 /* Due to header grow, MSS needs to be downgraded. */
2278f6cc
WB
3484 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3485 skb_decrease_gso_size(shinfo, len_diff);
3486
2be7e212 3487 /* Header must be checked, and gso_segs recomputed. */
868d5235 3488 shinfo->gso_type |= gso_type;
d02f51cb 3489 shinfo->gso_segs = 0;
2be7e212
DB
3490 }
3491
3492 return 0;
3493}
3494
2278f6cc
WB
3495static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
3496 u64 flags)
2be7e212 3497{
2be7e212
DB
3498 int ret;
3499
836e66c2
DB
3500 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
3501 BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
43537b8e
WB
3502 return -EINVAL;
3503
2278f6cc
WB
3504 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3505 /* udp gso_size delineates datagrams, only allow if fixed */
3506 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3507 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3508 return -ENOTSUPP;
3509 }
d02f51cb 3510
2be7e212
DB
3511 ret = skb_unclone(skb, GFP_ATOMIC);
3512 if (unlikely(ret < 0))
3513 return ret;
3514
3515 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3516 if (unlikely(ret < 0))
3517 return ret;
3518
3519 if (skb_is_gso(skb)) {
d02f51cb
DA
3520 struct skb_shared_info *shinfo = skb_shinfo(skb);
3521
2be7e212 3522 /* Due to header shrink, MSS can be upgraded. */
2278f6cc
WB
3523 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3524 skb_increase_gso_size(shinfo, len_diff);
3525
2be7e212 3526 /* Header must be checked, and gso_segs recomputed. */
d02f51cb
DA
3527 shinfo->gso_type |= SKB_GSO_DODGY;
3528 shinfo->gso_segs = 0;
2be7e212
DB
3529 }
3530
3531 return 0;
3532}
3533
6306c118 3534#define BPF_SKB_MAX_LEN SKB_MAX_ALLOC
2be7e212 3535
18ebe16d
JF
3536BPF_CALL_4(sk_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3537 u32, mode, u64, flags)
3538{
3539 u32 len_diff_abs = abs(len_diff);
3540 bool shrink = len_diff < 0;
3541 int ret = 0;
3542
3543 if (unlikely(flags || mode))
3544 return -EINVAL;
3545 if (unlikely(len_diff_abs > 0xfffU))
3546 return -EFAULT;
3547
3548 if (!shrink) {
3549 ret = skb_cow(skb, len_diff);
3550 if (unlikely(ret < 0))
3551 return ret;
3552 __skb_push(skb, len_diff_abs);
3553 memset(skb->data, 0, len_diff_abs);
3554 } else {
3555 if (unlikely(!pskb_may_pull(skb, len_diff_abs)))
3556 return -ENOMEM;
3557 __skb_pull(skb, len_diff_abs);
3558 }
18ebe16d
JF
3559 if (tls_sw_has_ctx_rx(skb->sk)) {
3560 struct strp_msg *rxm = strp_msg(skb);
3561
3562 rxm->full_len += len_diff;
3563 }
3564 return ret;
3565}
3566
3567static const struct bpf_func_proto sk_skb_adjust_room_proto = {
3568 .func = sk_skb_adjust_room,
3569 .gpl_only = false,
3570 .ret_type = RET_INTEGER,
3571 .arg1_type = ARG_PTR_TO_CTX,
3572 .arg2_type = ARG_ANYTHING,
3573 .arg3_type = ARG_ANYTHING,
3574 .arg4_type = ARG_ANYTHING,
3575};
3576
14aa3192
WB
3577BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3578 u32, mode, u64, flags)
2be7e212 3579{
2be7e212
DB
3580 u32 len_cur, len_diff_abs = abs(len_diff);
3581 u32 len_min = bpf_skb_net_base_len(skb);
6306c118 3582 u32 len_max = BPF_SKB_MAX_LEN;
2be7e212
DB
3583 __be16 proto = skb->protocol;
3584 bool shrink = len_diff < 0;
14aa3192 3585 u32 off;
2be7e212
DB
3586 int ret;
3587
836e66c2
DB
3588 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
3589 BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
14aa3192 3590 return -EINVAL;
2be7e212
DB
3591 if (unlikely(len_diff_abs > 0xfffU))
3592 return -EFAULT;
3593 if (unlikely(proto != htons(ETH_P_IP) &&
3594 proto != htons(ETH_P_IPV6)))
3595 return -ENOTSUPP;
3596
14aa3192
WB
3597 off = skb_mac_header_len(skb);
3598 switch (mode) {
3599 case BPF_ADJ_ROOM_NET:
3600 off += bpf_skb_net_base_len(skb);
3601 break;
3602 case BPF_ADJ_ROOM_MAC:
3603 break;
3604 default:
3605 return -ENOTSUPP;
3606 }
3607
2be7e212 3608 len_cur = skb->len - skb_network_offset(skb);
2be7e212
DB
3609 if ((shrink && (len_diff_abs >= len_cur ||
3610 len_cur - len_diff_abs < len_min)) ||
3611 (!shrink && (skb->len + len_diff_abs > len_max &&
3612 !skb_is_gso(skb))))
3613 return -ENOTSUPP;
3614
2278f6cc
WB
3615 ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
3616 bpf_skb_net_grow(skb, off, len_diff_abs, flags);
836e66c2
DB
3617 if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
3618 __skb_reset_checksum_unnecessary(skb);
2be7e212 3619
6aaae2b6 3620 bpf_compute_data_pointers(skb);
e4a6a342 3621 return ret;
2be7e212
DB
3622}
3623
2be7e212
DB
3624static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
3625 .func = bpf_skb_adjust_room,
3626 .gpl_only = false,
3627 .ret_type = RET_INTEGER,
3628 .arg1_type = ARG_PTR_TO_CTX,
3629 .arg2_type = ARG_ANYTHING,
3630 .arg3_type = ARG_ANYTHING,
3631 .arg4_type = ARG_ANYTHING,
3632};
3633
5293efe6
DB
3634static u32 __bpf_skb_min_len(const struct sk_buff *skb)
3635{
3636 u32 min_len = skb_network_offset(skb);
3637
3638 if (skb_transport_header_was_set(skb))
3639 min_len = skb_transport_offset(skb);
3640 if (skb->ip_summed == CHECKSUM_PARTIAL)
3641 min_len = skb_checksum_start_offset(skb) +
3642 skb->csum_offset + sizeof(__sum16);
3643 return min_len;
3644}
3645
5293efe6
DB
3646static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
3647{
3648 unsigned int old_len = skb->len;
3649 int ret;
3650
3651 ret = __skb_grow_rcsum(skb, new_len);
3652 if (!ret)
3653 memset(skb->data + old_len, 0, new_len - old_len);
3654 return ret;
3655}
3656
3657static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
3658{
3659 return __skb_trim_rcsum(skb, new_len);
3660}
3661
0ea488ff
JF
3662static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
3663 u64 flags)
5293efe6 3664{
6306c118 3665 u32 max_len = BPF_SKB_MAX_LEN;
5293efe6 3666 u32 min_len = __bpf_skb_min_len(skb);
5293efe6
DB
3667 int ret;
3668
3669 if (unlikely(flags || new_len > max_len || new_len < min_len))
3670 return -EINVAL;
3671 if (skb->encapsulation)
3672 return -ENOTSUPP;
3673
3674 /* The basic idea of this helper is that it's performing the
3675 * needed work to either grow or trim an skb, and eBPF program
3676 * rewrites the rest via helpers like bpf_skb_store_bytes(),
3677 * bpf_lX_csum_replace() and others rather than passing a raw
3678 * buffer here. This one is a slow path helper and intended
3679 * for replies with control messages.
3680 *
3681 * Like in bpf_skb_change_proto(), we want to keep this rather
3682 * minimal and without protocol specifics so that we are able
3683 * to separate concerns as in bpf_skb_store_bytes() should only
3684 * be the one responsible for writing buffers.
3685 *
3686 * It's really expected to be a slow path operation here for
3687 * control message replies, so we're implicitly linearizing,
3688 * uncloning and drop offloads from the skb by this.
3689 */
3690 ret = __bpf_try_make_writable(skb, skb->len);
3691 if (!ret) {
3692 if (new_len > skb->len)
3693 ret = bpf_skb_grow_rcsum(skb, new_len);
3694 else if (new_len < skb->len)
3695 ret = bpf_skb_trim_rcsum(skb, new_len);
3696 if (!ret && skb_is_gso(skb))
3697 skb_gso_reset(skb);
3698 }
0ea488ff
JF
3699 return ret;
3700}
3701
3702BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3703 u64, flags)
3704{
3705 int ret = __bpf_skb_change_tail(skb, new_len, flags);
5293efe6 3706
6aaae2b6 3707 bpf_compute_data_pointers(skb);
5293efe6
DB
3708 return ret;
3709}
3710
3711static const struct bpf_func_proto bpf_skb_change_tail_proto = {
3712 .func = bpf_skb_change_tail,
3713 .gpl_only = false,
3714 .ret_type = RET_INTEGER,
3715 .arg1_type = ARG_PTR_TO_CTX,
3716 .arg2_type = ARG_ANYTHING,
3717 .arg3_type = ARG_ANYTHING,
3718};
3719
0ea488ff 3720BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3a0af8fd 3721 u64, flags)
0ea488ff 3722{
16137b09 3723 return __bpf_skb_change_tail(skb, new_len, flags);
0ea488ff
JF
3724}
3725
3726static const struct bpf_func_proto sk_skb_change_tail_proto = {
3727 .func = sk_skb_change_tail,
3728 .gpl_only = false,
3729 .ret_type = RET_INTEGER,
3730 .arg1_type = ARG_PTR_TO_CTX,
3731 .arg2_type = ARG_ANYTHING,
3732 .arg3_type = ARG_ANYTHING,
3733};
3734
3735static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
3736 u64 flags)
3a0af8fd 3737{
6306c118 3738 u32 max_len = BPF_SKB_MAX_LEN;
3a0af8fd
TG
3739 u32 new_len = skb->len + head_room;
3740 int ret;
3741
3742 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
3743 new_len < skb->len))
3744 return -EINVAL;
3745
3746 ret = skb_cow(skb, head_room);
3747 if (likely(!ret)) {
3748 /* Idea for this helper is that we currently only
3749 * allow to expand on mac header. This means that
3750 * skb->protocol network header, etc, stay as is.
3751 * Compared to bpf_skb_change_tail(), we're more
3752 * flexible due to not needing to linearize or
3753 * reset GSO. Intention for this helper is to be
3754 * used by an L3 skb that needs to push mac header
3755 * for redirection into L2 device.
3756 */
3757 __skb_push(skb, head_room);
3758 memset(skb->data, 0, head_room);
3759 skb_reset_mac_header(skb);
84316ca4 3760 skb_reset_mac_len(skb);
3a0af8fd
TG
3761 }
3762
0ea488ff
JF
3763 return ret;
3764}
3765
3766BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3767 u64, flags)
3768{
3769 int ret = __bpf_skb_change_head(skb, head_room, flags);
3770
6aaae2b6 3771 bpf_compute_data_pointers(skb);
0ea488ff 3772 return ret;
3a0af8fd
TG
3773}
3774
3775static const struct bpf_func_proto bpf_skb_change_head_proto = {
3776 .func = bpf_skb_change_head,
3777 .gpl_only = false,
3778 .ret_type = RET_INTEGER,
3779 .arg1_type = ARG_PTR_TO_CTX,
3780 .arg2_type = ARG_ANYTHING,
3781 .arg3_type = ARG_ANYTHING,
3782};
3783
0ea488ff
JF
3784BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3785 u64, flags)
3786{
16137b09 3787 return __bpf_skb_change_head(skb, head_room, flags);
0ea488ff
JF
3788}
3789
3790static const struct bpf_func_proto sk_skb_change_head_proto = {
3791 .func = sk_skb_change_head,
3792 .gpl_only = false,
3793 .ret_type = RET_INTEGER,
3794 .arg1_type = ARG_PTR_TO_CTX,
3795 .arg2_type = ARG_ANYTHING,
3796 .arg3_type = ARG_ANYTHING,
3797};
0165cc81
LB
3798
3799BPF_CALL_1(bpf_xdp_get_buff_len, struct xdp_buff*, xdp)
3800{
3801 return xdp_get_buff_len(xdp);
3802}
3803
3804static const struct bpf_func_proto bpf_xdp_get_buff_len_proto = {
3805 .func = bpf_xdp_get_buff_len,
3806 .gpl_only = false,
3807 .ret_type = RET_INTEGER,
3808 .arg1_type = ARG_PTR_TO_CTX,
3809};
3810
d9917302
EC
3811BTF_ID_LIST_SINGLE(bpf_xdp_get_buff_len_bpf_ids, struct, xdp_buff)
3812
3813const struct bpf_func_proto bpf_xdp_get_buff_len_trace_proto = {
3814 .func = bpf_xdp_get_buff_len,
3815 .gpl_only = false,
3816 .arg1_type = ARG_PTR_TO_BTF_ID,
3817 .arg1_btf_id = &bpf_xdp_get_buff_len_bpf_ids[0],
3818};
3819
de8f3a83
DB
3820static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3821{
3822 return xdp_data_meta_unsupported(xdp) ? 0 :
3823 xdp->data - xdp->data_meta;
3824}
3825
17bedab2
MKL
3826BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
3827{
6dfb970d 3828 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
de8f3a83 3829 unsigned long metalen = xdp_get_metalen(xdp);
97e19cce 3830 void *data_start = xdp_frame_end + metalen;
17bedab2
MKL
3831 void *data = xdp->data + offset;
3832
de8f3a83 3833 if (unlikely(data < data_start ||
17bedab2
MKL
3834 data > xdp->data_end - ETH_HLEN))
3835 return -EINVAL;
3836
de8f3a83
DB
3837 if (metalen)
3838 memmove(xdp->data_meta + offset,
3839 xdp->data_meta, metalen);
3840 xdp->data_meta += offset;
17bedab2
MKL
3841 xdp->data = data;
3842
3843 return 0;
3844}
3845
3846static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3847 .func = bpf_xdp_adjust_head,
3848 .gpl_only = false,
3849 .ret_type = RET_INTEGER,
3850 .arg1_type = ARG_PTR_TO_CTX,
3851 .arg2_type = ARG_ANYTHING,
3852};
3853
3f364222
LB
3854static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
3855 void *buf, unsigned long len, bool flush)
3856{
3857 unsigned long ptr_len, ptr_off = 0;
3858 skb_frag_t *next_frag, *end_frag;
3859 struct skb_shared_info *sinfo;
3860 void *src, *dst;
3861 u8 *ptr_buf;
3862
3863 if (likely(xdp->data_end - xdp->data >= off + len)) {
3864 src = flush ? buf : xdp->data + off;
3865 dst = flush ? xdp->data + off : buf;
3866 memcpy(dst, src, len);
3867 return;
3868 }
3869
3870 sinfo = xdp_get_shared_info_from_buff(xdp);
3871 end_frag = &sinfo->frags[sinfo->nr_frags];
3872 next_frag = &sinfo->frags[0];
3873
3874 ptr_len = xdp->data_end - xdp->data;
3875 ptr_buf = xdp->data;
3876
3877 while (true) {
3878 if (off < ptr_off + ptr_len) {
3879 unsigned long copy_off = off - ptr_off;
3880 unsigned long copy_len = min(len, ptr_len - copy_off);
3881
3882 src = flush ? buf : ptr_buf + copy_off;
3883 dst = flush ? ptr_buf + copy_off : buf;
3884 memcpy(dst, src, copy_len);
3885
3886 off += copy_len;
3887 len -= copy_len;
3888 buf += copy_len;
3889 }
3890
3891 if (!len || next_frag == end_frag)
3892 break;
3893
3894 ptr_off += ptr_len;
3895 ptr_buf = skb_frag_address(next_frag);
3896 ptr_len = skb_frag_size(next_frag);
3897 next_frag++;
3898 }
3899}
3900
3901static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
3902{
3903 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
3904 u32 size = xdp->data_end - xdp->data;
3905 void *addr = xdp->data;
3906 int i;
3907
3908 if (unlikely(offset > 0xffff || len > 0xffff))
3909 return ERR_PTR(-EFAULT);
3910
3911 if (offset + len > xdp_get_buff_len(xdp))
3912 return ERR_PTR(-EINVAL);
3913
3914 if (offset < size) /* linear area */
3915 goto out;
3916
3917 offset -= size;
3918 for (i = 0; i < sinfo->nr_frags; i++) { /* paged area */
3919 u32 frag_size = skb_frag_size(&sinfo->frags[i]);
3920
3921 if (offset < frag_size) {
3922 addr = skb_frag_address(&sinfo->frags[i]);
3923 size = frag_size;
3924 break;
3925 }
3926 offset -= frag_size;
3927 }
3928out:
bbd52178 3929 return offset + len <= size ? addr + offset : NULL;
3f364222
LB
3930}
3931
3932BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
3933 void *, buf, u32, len)
3934{
3935 void *ptr;
3936
3937 ptr = bpf_xdp_pointer(xdp, offset, len);
3938 if (IS_ERR(ptr))
3939 return PTR_ERR(ptr);
3940
3941 if (!ptr)
3942 bpf_xdp_copy_buf(xdp, offset, buf, len, false);
3943 else
3944 memcpy(buf, ptr, len);
3945
3946 return 0;
3947}
3948
3949static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
3950 .func = bpf_xdp_load_bytes,
3951 .gpl_only = false,
3952 .ret_type = RET_INTEGER,
3953 .arg1_type = ARG_PTR_TO_CTX,
3954 .arg2_type = ARG_ANYTHING,
3955 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
3956 .arg4_type = ARG_CONST_SIZE,
3957};
3958
3959BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
3960 void *, buf, u32, len)
3961{
3962 void *ptr;
3963
3964 ptr = bpf_xdp_pointer(xdp, offset, len);
3965 if (IS_ERR(ptr))
3966 return PTR_ERR(ptr);
3967
3968 if (!ptr)
3969 bpf_xdp_copy_buf(xdp, offset, buf, len, true);
3970 else
3971 memcpy(ptr, buf, len);
3972
3973 return 0;
3974}
3975
3976static const struct bpf_func_proto bpf_xdp_store_bytes_proto = {
3977 .func = bpf_xdp_store_bytes,
3978 .gpl_only = false,
3979 .ret_type = RET_INTEGER,
3980 .arg1_type = ARG_PTR_TO_CTX,
3981 .arg2_type = ARG_ANYTHING,
3982 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
3983 .arg4_type = ARG_CONST_SIZE,
3984};
3985
bf25146a
EC
3986static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
3987{
3988 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
3989 skb_frag_t *frag = &sinfo->frags[sinfo->nr_frags - 1];
3990 struct xdp_rxq_info *rxq = xdp->rxq;
3991 unsigned int tailroom;
3992
3993 if (!rxq->frag_size || rxq->frag_size > xdp->frame_sz)
3994 return -EOPNOTSUPP;
3995
3996 tailroom = rxq->frag_size - skb_frag_size(frag) - skb_frag_off(frag);
3997 if (unlikely(offset > tailroom))
3998 return -EINVAL;
3999
4000 memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
4001 skb_frag_size_add(frag, offset);
4002 sinfo->xdp_frags_size += offset;
4003
4004 return 0;
4005}
4006
4007static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
4008{
4009 struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
4010 int i, n_frags_free = 0, len_free = 0;
4011
4012 if (unlikely(offset > (int)xdp_get_buff_len(xdp) - ETH_HLEN))
4013 return -EINVAL;
4014
4015 for (i = sinfo->nr_frags - 1; i >= 0 && offset > 0; i--) {
4016 skb_frag_t *frag = &sinfo->frags[i];
4017 int shrink = min_t(int, offset, skb_frag_size(frag));
4018
4019 len_free += shrink;
4020 offset -= shrink;
4021
4022 if (skb_frag_size(frag) == shrink) {
4023 struct page *page = skb_frag_page(frag);
4024
4025 __xdp_return(page_address(page), &xdp->rxq->mem,
4026 false, NULL);
4027 n_frags_free++;
4028 } else {
4029 skb_frag_size_sub(frag, shrink);
4030 break;
4031 }
4032 }
4033 sinfo->nr_frags -= n_frags_free;
4034 sinfo->xdp_frags_size -= len_free;
4035
4036 if (unlikely(!sinfo->nr_frags)) {
4037 xdp_buff_clear_frags_flag(xdp);
4038 xdp->data_end -= offset;
4039 }
4040
4041 return 0;
4042}
4043
b32cc5b9
NS
4044BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
4045{
c8741e2b 4046 void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
b32cc5b9
NS
4047 void *data_end = xdp->data_end + offset;
4048
bf25146a
EC
4049 if (unlikely(xdp_buff_has_frags(xdp))) { /* non-linear xdp buff */
4050 if (offset < 0)
4051 return bpf_xdp_frags_shrink_tail(xdp, -offset);
4052
4053 return bpf_xdp_frags_increase_tail(xdp, offset);
4054 }
4055
c8741e2b
JDB
4056 /* Notice that xdp_data_hard_end have reserved some tailroom */
4057 if (unlikely(data_end > data_hard_end))
b32cc5b9
NS
4058 return -EINVAL;
4059
c8741e2b
JDB
4060 /* ALL drivers MUST init xdp->frame_sz, chicken check below */
4061 if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
4062 WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
4063 return -EINVAL;
4064 }
4065
b32cc5b9
NS
4066 if (unlikely(data_end < xdp->data + ETH_HLEN))
4067 return -EINVAL;
4068
ddb47d51
JDB
4069 /* Clear memory area on grow, can contain uninit kernel memory */
4070 if (offset > 0)
4071 memset(xdp->data_end, 0, offset);
4072
b32cc5b9
NS
4073 xdp->data_end = data_end;
4074
4075 return 0;
4076}
4077
4078static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
4079 .func = bpf_xdp_adjust_tail,
4080 .gpl_only = false,
4081 .ret_type = RET_INTEGER,
4082 .arg1_type = ARG_PTR_TO_CTX,
4083 .arg2_type = ARG_ANYTHING,
4084};
4085
de8f3a83
DB
4086BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
4087{
97e19cce 4088 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
de8f3a83
DB
4089 void *meta = xdp->data_meta + offset;
4090 unsigned long metalen = xdp->data - meta;
4091
4092 if (xdp_data_meta_unsupported(xdp))
4093 return -ENOTSUPP;
97e19cce 4094 if (unlikely(meta < xdp_frame_end ||
de8f3a83
DB
4095 meta > xdp->data))
4096 return -EINVAL;
7445cf31 4097 if (unlikely(xdp_metalen_invalid(metalen)))
de8f3a83
DB
4098 return -EACCES;
4099
4100 xdp->data_meta = meta;
4101
4102 return 0;
4103}
4104
4105static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
4106 .func = bpf_xdp_adjust_meta,
4107 .gpl_only = false,
4108 .ret_type = RET_INTEGER,
4109 .arg1_type = ARG_PTR_TO_CTX,
4110 .arg2_type = ARG_ANYTHING,
4111};
4112
d1e91173
MT
4113/**
4114 * DOC: xdp redirect
4115 *
4116 * XDP_REDIRECT works by a three-step process, implemented in the functions
782347b6
THJ
4117 * below:
4118 *
4119 * 1. The bpf_redirect() and bpf_redirect_map() helpers will lookup the target
4120 * of the redirect and store it (along with some other metadata) in a per-CPU
4121 * struct bpf_redirect_info.
4122 *
4123 * 2. When the program returns the XDP_REDIRECT return code, the driver will
4124 * call xdp_do_redirect() which will use the information in struct
4125 * bpf_redirect_info to actually enqueue the frame into a map type-specific
4126 * bulk queue structure.
4127 *
4128 * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
4129 * which will flush all the different bulk queues, thus completing the
4130 * redirect.
d1e91173
MT
4131 */
4132/*
782347b6
THJ
4133 * Pointers to the map entries will be kept around for this whole sequence of
4134 * steps, protected by RCU. However, there is no top-level rcu_read_lock() in
4135 * the core code; instead, the RCU protection relies on everything happening
4136 * inside a single NAPI poll sequence, which means it's between a pair of calls
4137 * to local_bh_disable()/local_bh_enable().
4138 *
4139 * The map entries are marked as __rcu and the map code makes sure to
4140 * dereference those pointers with rcu_dereference_check() in a way that works
4141 * for both sections that to hold an rcu_read_lock() and sections that are
4142 * called from NAPI without a separate rcu_read_lock(). The code below does not
4143 * use RCU annotations, but relies on those in the map code.
4144 */
1d233886 4145void xdp_do_flush(void)
11393cc9 4146{
1d233886 4147 __dev_flush();
332f22a6
BT
4148 __cpu_map_flush();
4149 __xsk_map_flush();
11393cc9 4150}
1d233886 4151EXPORT_SYMBOL_GPL(xdp_do_flush);
11393cc9 4152
e624d4ed
HL
4153void bpf_clear_redirect_map(struct bpf_map *map)
4154{
4155 struct bpf_redirect_info *ri;
4156 int cpu;
4157
4158 for_each_possible_cpu(cpu) {
4159 ri = per_cpu_ptr(&bpf_redirect_info, cpu);
4160 /* Avoid polluting remote cacheline due to writes if
4161 * not needed. Once we pass this test, we need the
4162 * cmpxchg() to make sure it hasn't been changed in
4163 * the meantime by remote CPU.
4164 */
4165 if (unlikely(READ_ONCE(ri->map) == map))
4166 cmpxchg(&ri->map, map, NULL);
4167 }
4168}
4169
879af96f
JM
4170DEFINE_STATIC_KEY_FALSE(bpf_master_redirect_enabled_key);
4171EXPORT_SYMBOL_GPL(bpf_master_redirect_enabled_key);
4172
4173u32 xdp_master_redirect(struct xdp_buff *xdp)
4174{
4175 struct net_device *master, *slave;
4176 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4177
4178 master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev);
4179 slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp);
4180 if (slave && slave != xdp->rxq->dev) {
4181 /* The target device is different from the receiving device, so
4182 * redirect it to the new device.
4183 * Using XDP_REDIRECT gets the correct behaviour from XDP enabled
4184 * drivers to unmap the packet from their rx ring.
4185 */
4186 ri->tgt_index = slave->ifindex;
4187 ri->map_id = INT_MAX;
4188 ri->map_type = BPF_MAP_TYPE_UNSPEC;
4189 return XDP_REDIRECT;
4190 }
4191 return XDP_TX;
4192}
4193EXPORT_SYMBOL_GPL(xdp_master_redirect);
4194
1372d34c
THJ
4195static inline int __xdp_do_redirect_xsk(struct bpf_redirect_info *ri,
4196 struct net_device *dev,
4197 struct xdp_buff *xdp,
4198 struct bpf_prog *xdp_prog)
97f91a7c 4199{
ee75aef2 4200 enum bpf_map_type map_type = ri->map_type;
43e74c02 4201 void *fwd = ri->tgt_value;
ee75aef2 4202 u32 map_id = ri->map_id;
4c03bdd7 4203 int err;
97f91a7c 4204
ee75aef2
BT
4205 ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
4206 ri->map_type = BPF_MAP_TYPE_UNSPEC;
97f91a7c 4207
1372d34c
THJ
4208 err = __xsk_map_redirect(fwd, xdp);
4209 if (unlikely(err))
4210 goto err;
4211
4212 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
4213 return 0;
4214err:
4215 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
4216 return err;
4217}
4218
4219static __always_inline int __xdp_do_redirect_frame(struct bpf_redirect_info *ri,
4220 struct net_device *dev,
4221 struct xdp_frame *xdpf,
4222 struct bpf_prog *xdp_prog)
4223{
4224 enum bpf_map_type map_type = ri->map_type;
4225 void *fwd = ri->tgt_value;
4226 u32 map_id = ri->map_id;
4227 struct bpf_map *map;
4228 int err;
4229
4230 ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
4231 ri->map_type = BPF_MAP_TYPE_UNSPEC;
d53ad5d8 4232
d53ad5d8
THJ
4233 if (unlikely(!xdpf)) {
4234 err = -EOVERFLOW;
4235 goto err;
4236 }
4237
ee75aef2
BT
4238 switch (map_type) {
4239 case BPF_MAP_TYPE_DEVMAP:
4240 fallthrough;
4241 case BPF_MAP_TYPE_DEVMAP_HASH:
e624d4ed
HL
4242 map = READ_ONCE(ri->map);
4243 if (unlikely(map)) {
4244 WRITE_ONCE(ri->map, NULL);
d53ad5d8 4245 err = dev_map_enqueue_multi(xdpf, dev, map,
e624d4ed
HL
4246 ri->flags & BPF_F_EXCLUDE_INGRESS);
4247 } else {
d53ad5d8 4248 err = dev_map_enqueue(fwd, xdpf, dev);
e624d4ed 4249 }
ee75aef2
BT
4250 break;
4251 case BPF_MAP_TYPE_CPUMAP:
d53ad5d8 4252 err = cpu_map_enqueue(fwd, xdpf, dev);
ee75aef2
BT
4253 break;
4254 case BPF_MAP_TYPE_UNSPEC:
4255 if (map_id == INT_MAX) {
4256 fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
4257 if (unlikely(!fwd)) {
4258 err = -EINVAL;
4259 break;
4260 }
d53ad5d8 4261 err = dev_xdp_enqueue(fwd, xdpf, dev);
ee75aef2 4262 break;
1d233886 4263 }
ee75aef2
BT
4264 fallthrough;
4265 default:
4266 err = -EBADRQC;
1d233886
THJ
4267 }
4268
f5836ca5
JDB
4269 if (unlikely(err))
4270 goto err;
4271
ee75aef2 4272 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
f5836ca5
JDB
4273 return 0;
4274err:
ee75aef2 4275 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
97f91a7c
JF
4276 return err;
4277}
1372d34c
THJ
4278
4279int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
4280 struct bpf_prog *xdp_prog)
4281{
4282 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4283 enum bpf_map_type map_type = ri->map_type;
4284
ab0db463
LB
4285 /* XDP_REDIRECT is not fully supported yet for xdp frags since
4286 * not all XDP capable drivers can map non-linear xdp_frame in
4287 * ndo_xdp_xmit.
4288 */
4289 if (unlikely(xdp_buff_has_frags(xdp) &&
4290 map_type != BPF_MAP_TYPE_CPUMAP))
4291 return -EOPNOTSUPP;
4292
1372d34c
THJ
4293 if (map_type == BPF_MAP_TYPE_XSKMAP)
4294 return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
4295
4296 return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
4297 xdp_prog);
4298}
814abfab
JF
4299EXPORT_SYMBOL_GPL(xdp_do_redirect);
4300
1372d34c
THJ
4301int xdp_do_redirect_frame(struct net_device *dev, struct xdp_buff *xdp,
4302 struct xdp_frame *xdpf, struct bpf_prog *xdp_prog)
4303{
4304 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4305 enum bpf_map_type map_type = ri->map_type;
4306
4307 if (map_type == BPF_MAP_TYPE_XSKMAP)
4308 return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
4309
4310 return __xdp_do_redirect_frame(ri, dev, xdpf, xdp_prog);
4311}
4312EXPORT_SYMBOL_GPL(xdp_do_redirect_frame);
4313
c060bc61
XS
4314static int xdp_do_generic_redirect_map(struct net_device *dev,
4315 struct sk_buff *skb,
02671e23 4316 struct xdp_buff *xdp,
f6069b9a 4317 struct bpf_prog *xdp_prog,
ee75aef2
BT
4318 void *fwd,
4319 enum bpf_map_type map_type, u32 map_id)
6103aa96 4320{
0b19cc0a 4321 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
e624d4ed 4322 struct bpf_map *map;
ee75aef2 4323 int err;
6d5fc195 4324
ee75aef2
BT
4325 switch (map_type) {
4326 case BPF_MAP_TYPE_DEVMAP:
4327 fallthrough;
4328 case BPF_MAP_TYPE_DEVMAP_HASH:
e624d4ed
HL
4329 map = READ_ONCE(ri->map);
4330 if (unlikely(map)) {
4331 WRITE_ONCE(ri->map, NULL);
4332 err = dev_map_redirect_multi(dev, skb, xdp_prog, map,
4333 ri->flags & BPF_F_EXCLUDE_INGRESS);
4334 } else {
4335 err = dev_map_generic_redirect(fwd, skb, xdp_prog);
4336 }
6d5fc195 4337 if (unlikely(err))
9c270af3 4338 goto err;
ee75aef2
BT
4339 break;
4340 case BPF_MAP_TYPE_XSKMAP:
4341 err = xsk_generic_rcv(fwd, xdp);
02671e23
BT
4342 if (err)
4343 goto err;
4344 consume_skb(skb);
ee75aef2 4345 break;
11941f8a
KKD
4346 case BPF_MAP_TYPE_CPUMAP:
4347 err = cpu_map_generic_redirect(fwd, skb);
4348 if (unlikely(err))
4349 goto err;
4350 break;
ee75aef2 4351 default:
9c270af3 4352 err = -EBADRQC;
f5836ca5 4353 goto err;
2facaad6 4354 }
6103aa96 4355
ee75aef2 4356 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index);
9c270af3
JDB
4357 return 0;
4358err:
ee75aef2 4359 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map_type, map_id, ri->tgt_index, err);
9c270af3
JDB
4360 return err;
4361}
4362
4363int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
02671e23 4364 struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
9c270af3 4365{
0b19cc0a 4366 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
ee75aef2
BT
4367 enum bpf_map_type map_type = ri->map_type;
4368 void *fwd = ri->tgt_value;
4369 u32 map_id = ri->map_id;
4370 int err;
2facaad6 4371
ee75aef2
BT
4372 ri->map_id = 0; /* Valid map id idr range: [1,INT_MAX[ */
4373 ri->map_type = BPF_MAP_TYPE_UNSPEC;
9c270af3 4374
ee75aef2
BT
4375 if (map_type == BPF_MAP_TYPE_UNSPEC && map_id == INT_MAX) {
4376 fwd = dev_get_by_index_rcu(dev_net(dev), ri->tgt_index);
4377 if (unlikely(!fwd)) {
4378 err = -EINVAL;
4379 goto err;
4380 }
4381
4382 err = xdp_ok_fwd_dev(fwd, skb->len);
4383 if (unlikely(err))
4384 goto err;
4385
4386 skb->dev = fwd;
4387 _trace_xdp_redirect(dev, xdp_prog, ri->tgt_index);
4388 generic_xdp_tx(skb, xdp_prog);
4389 return 0;
4390 }
4391
4392 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, fwd, map_type, map_id);
f5836ca5 4393err:
ee75aef2 4394 _trace_xdp_redirect_err(dev, xdp_prog, ri->tgt_index, err);
2facaad6 4395 return err;
6103aa96 4396}
6103aa96 4397
814abfab
JF
4398BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
4399{
0b19cc0a 4400 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
814abfab
JF
4401
4402 if (unlikely(flags))
4403 return XDP_ABORTED;
4404
ee75aef2
BT
4405 /* NB! Map type UNSPEC and map_id == INT_MAX (never generated
4406 * by map_idr) is used for ifindex based XDP redirect.
4407 */
4b55cf29 4408 ri->tgt_index = ifindex;
ee75aef2
BT
4409 ri->map_id = INT_MAX;
4410 ri->map_type = BPF_MAP_TYPE_UNSPEC;
e4a8e817 4411
814abfab
JF
4412 return XDP_REDIRECT;
4413}
4414
4415static const struct bpf_func_proto bpf_xdp_redirect_proto = {
4416 .func = bpf_xdp_redirect,
4417 .gpl_only = false,
4418 .ret_type = RET_INTEGER,
4419 .arg1_type = ARG_ANYTHING,
4420 .arg2_type = ARG_ANYTHING,
4421};
4422
32637e33 4423BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u64, key,
f6069b9a 4424 u64, flags)
e4a8e817 4425{
32637e33 4426 return map->ops->map_redirect(map, key, flags);
e4a8e817
DB
4427}
4428
4429static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
4430 .func = bpf_xdp_redirect_map,
4431 .gpl_only = false,
4432 .ret_type = RET_INTEGER,
4433 .arg1_type = ARG_CONST_MAP_PTR,
4434 .arg2_type = ARG_ANYTHING,
4435 .arg3_type = ARG_ANYTHING,
4436};
4437
555c8a86 4438static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
aa7145c1 4439 unsigned long off, unsigned long len)
555c8a86 4440{
aa7145c1 4441 void *ptr = skb_header_pointer(skb, off, len, dst_buff);
555c8a86
DB
4442
4443 if (unlikely(!ptr))
4444 return len;
4445 if (ptr != dst_buff)
4446 memcpy(dst_buff, ptr, len);
4447
4448 return 0;
4449}
4450
f3694e00
DB
4451BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
4452 u64, flags, void *, meta, u64, meta_size)
555c8a86 4453{
555c8a86 4454 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
555c8a86
DB
4455
4456 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4457 return -EINVAL;
a7658e1a 4458 if (unlikely(!skb || skb_size > skb->len))
555c8a86
DB
4459 return -EFAULT;
4460
4461 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
4462 bpf_skb_copy);
4463}
4464
4465static const struct bpf_func_proto bpf_skb_event_output_proto = {
4466 .func = bpf_skb_event_output,
4467 .gpl_only = true,
4468 .ret_type = RET_INTEGER,
4469 .arg1_type = ARG_PTR_TO_CTX,
4470 .arg2_type = ARG_CONST_MAP_PTR,
4471 .arg3_type = ARG_ANYTHING,
216e3cd2 4472 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
1728a4f2 4473 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
555c8a86
DB
4474};
4475
9436ef6e 4476BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)
c9a0f3b8 4477
a7658e1a
AS
4478const struct bpf_func_proto bpf_skb_output_proto = {
4479 .func = bpf_skb_event_output,
4480 .gpl_only = true,
4481 .ret_type = RET_INTEGER,
4482 .arg1_type = ARG_PTR_TO_BTF_ID,
9436ef6e 4483 .arg1_btf_id = &bpf_skb_output_btf_ids[0],
a7658e1a
AS
4484 .arg2_type = ARG_CONST_MAP_PTR,
4485 .arg3_type = ARG_ANYTHING,
216e3cd2 4486 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
a7658e1a 4487 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
a7658e1a
AS
4488};
4489
c6c33454
DB
4490static unsigned short bpf_tunnel_key_af(u64 flags)
4491{
4492 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
4493}
4494
f3694e00
DB
4495BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
4496 u32, size, u64, flags)
d3aa45ce 4497{
c6c33454
DB
4498 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
4499 u8 compat[sizeof(struct bpf_tunnel_key)];
074f528e
DB
4500 void *to_orig = to;
4501 int err;
d3aa45ce 4502
44c51472
SL
4503 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6 |
4504 BPF_F_TUNINFO_FLAGS)))) {
074f528e
DB
4505 err = -EINVAL;
4506 goto err_clear;
4507 }
4508 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
4509 err = -EPROTO;
4510 goto err_clear;
4511 }
c6c33454 4512 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
074f528e 4513 err = -EINVAL;
c6c33454 4514 switch (size) {
26101f5a 4515 case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
4018ab18 4516 case offsetof(struct bpf_tunnel_key, tunnel_label):
c0e760c9 4517 case offsetof(struct bpf_tunnel_key, tunnel_ext):
4018ab18 4518 goto set_compat;
c6c33454
DB
4519 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
4520 /* Fixup deprecated structure layouts here, so we have
4521 * a common path later on.
4522 */
4523 if (ip_tunnel_info_af(info) != AF_INET)
074f528e 4524 goto err_clear;
4018ab18 4525set_compat:
c6c33454
DB
4526 to = (struct bpf_tunnel_key *)compat;
4527 break;
4528 default:
074f528e 4529 goto err_clear;
c6c33454
DB
4530 }
4531 }
d3aa45ce
AS
4532
4533 to->tunnel_id = be64_to_cpu(info->key.tun_id);
c6c33454
DB
4534 to->tunnel_tos = info->key.tos;
4535 to->tunnel_ttl = info->key.ttl;
44c51472
SL
4536 if (flags & BPF_F_TUNINFO_FLAGS)
4537 to->tunnel_flags = info->key.tun_flags;
4538 else
4539 to->tunnel_ext = 0;
c6c33454 4540
4018ab18 4541 if (flags & BPF_F_TUNINFO_IPV6) {
c6c33454
DB
4542 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
4543 sizeof(to->remote_ipv6));
26101f5a
KF
4544 memcpy(to->local_ipv6, &info->key.u.ipv6.dst,
4545 sizeof(to->local_ipv6));
4018ab18
DB
4546 to->tunnel_label = be32_to_cpu(info->key.label);
4547 } else {
c6c33454 4548 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
1fbc2e0c 4549 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
26101f5a
KF
4550 to->local_ipv4 = be32_to_cpu(info->key.u.ipv4.dst);
4551 memset(&to->local_ipv6[1], 0, sizeof(__u32) * 3);
1fbc2e0c 4552 to->tunnel_label = 0;
4018ab18 4553 }
c6c33454
DB
4554
4555 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
074f528e 4556 memcpy(to_orig, to, size);
d3aa45ce
AS
4557
4558 return 0;
074f528e
DB
4559err_clear:
4560 memset(to_orig, 0, size);
4561 return err;
d3aa45ce
AS
4562}
4563
577c50aa 4564static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
d3aa45ce
AS
4565 .func = bpf_skb_get_tunnel_key,
4566 .gpl_only = false,
4567 .ret_type = RET_INTEGER,
4568 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
4569 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
4570 .arg3_type = ARG_CONST_SIZE,
d3aa45ce
AS
4571 .arg4_type = ARG_ANYTHING,
4572};
4573
f3694e00 4574BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
14ca0751 4575{
14ca0751 4576 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
074f528e 4577 int err;
14ca0751
DB
4578
4579 if (unlikely(!info ||
074f528e
DB
4580 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
4581 err = -ENOENT;
4582 goto err_clear;
4583 }
4584 if (unlikely(size < info->options_len)) {
4585 err = -ENOMEM;
4586 goto err_clear;
4587 }
14ca0751
DB
4588
4589 ip_tunnel_info_opts_get(to, info);
074f528e
DB
4590 if (size > info->options_len)
4591 memset(to + info->options_len, 0, size - info->options_len);
14ca0751
DB
4592
4593 return info->options_len;
074f528e
DB
4594err_clear:
4595 memset(to, 0, size);
4596 return err;
14ca0751
DB
4597}
4598
4599static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
4600 .func = bpf_skb_get_tunnel_opt,
4601 .gpl_only = false,
4602 .ret_type = RET_INTEGER,
4603 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
4604 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
4605 .arg3_type = ARG_CONST_SIZE,
14ca0751
DB
4606};
4607
d3aa45ce
AS
4608static struct metadata_dst __percpu *md_dst;
4609
f3694e00
DB
4610BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
4611 const struct bpf_tunnel_key *, from, u32, size, u64, flags)
d3aa45ce 4612{
d3aa45ce 4613 struct metadata_dst *md = this_cpu_ptr(md_dst);
c6c33454 4614 u8 compat[sizeof(struct bpf_tunnel_key)];
d3aa45ce
AS
4615 struct ip_tunnel_info *info;
4616
22080870 4617 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
77a5196a 4618 BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
d3aa45ce 4619 return -EINVAL;
c6c33454
DB
4620 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
4621 switch (size) {
26101f5a 4622 case offsetof(struct bpf_tunnel_key, local_ipv6[0]):
4018ab18 4623 case offsetof(struct bpf_tunnel_key, tunnel_label):
c0e760c9 4624 case offsetof(struct bpf_tunnel_key, tunnel_ext):
c6c33454
DB
4625 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
4626 /* Fixup deprecated structure layouts here, so we have
4627 * a common path later on.
4628 */
4629 memcpy(compat, from, size);
4630 memset(compat + size, 0, sizeof(compat) - size);
f3694e00 4631 from = (const struct bpf_tunnel_key *) compat;
c6c33454
DB
4632 break;
4633 default:
4634 return -EINVAL;
4635 }
4636 }
c0e760c9
DB
4637 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
4638 from->tunnel_ext))
4018ab18 4639 return -EINVAL;
d3aa45ce
AS
4640
4641 skb_dst_drop(skb);
4642 dst_hold((struct dst_entry *) md);
4643 skb_dst_set(skb, (struct dst_entry *) md);
4644
4645 info = &md->u.tun_info;
5540fbf4 4646 memset(info, 0, sizeof(*info));
d3aa45ce 4647 info->mode = IP_TUNNEL_INFO_TX;
c6c33454 4648
db3c6139 4649 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
22080870
DB
4650 if (flags & BPF_F_DONT_FRAGMENT)
4651 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
792f3dd6
WT
4652 if (flags & BPF_F_ZERO_CSUM_TX)
4653 info->key.tun_flags &= ~TUNNEL_CSUM;
77a5196a
WT
4654 if (flags & BPF_F_SEQ_NUMBER)
4655 info->key.tun_flags |= TUNNEL_SEQ;
22080870 4656
d3aa45ce 4657 info->key.tun_id = cpu_to_be64(from->tunnel_id);
c6c33454
DB
4658 info->key.tos = from->tunnel_tos;
4659 info->key.ttl = from->tunnel_ttl;
4660
4661 if (flags & BPF_F_TUNINFO_IPV6) {
4662 info->mode |= IP_TUNNEL_INFO_IPV6;
4663 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
4664 sizeof(from->remote_ipv6));
26101f5a
KF
4665 memcpy(&info->key.u.ipv6.src, from->local_ipv6,
4666 sizeof(from->local_ipv6));
4018ab18
DB
4667 info->key.label = cpu_to_be32(from->tunnel_label) &
4668 IPV6_FLOWLABEL_MASK;
c6c33454
DB
4669 } else {
4670 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
26101f5a 4671 info->key.u.ipv4.src = cpu_to_be32(from->local_ipv4);
b8fff748 4672 info->key.flow_flags = FLOWI_FLAG_ANYSRC;
c6c33454 4673 }
d3aa45ce
AS
4674
4675 return 0;
4676}
4677
577c50aa 4678static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
d3aa45ce
AS
4679 .func = bpf_skb_set_tunnel_key,
4680 .gpl_only = false,
4681 .ret_type = RET_INTEGER,
4682 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 4683 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
39f19ebb 4684 .arg3_type = ARG_CONST_SIZE,
d3aa45ce
AS
4685 .arg4_type = ARG_ANYTHING,
4686};
4687
f3694e00
DB
4688BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
4689 const u8 *, from, u32, size)
14ca0751 4690{
14ca0751
DB
4691 struct ip_tunnel_info *info = skb_tunnel_info(skb);
4692 const struct metadata_dst *md = this_cpu_ptr(md_dst);
4693
4694 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
4695 return -EINVAL;
fca5fdf6 4696 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
14ca0751
DB
4697 return -ENOMEM;
4698
256c87c1 4699 ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
14ca0751
DB
4700
4701 return 0;
4702}
4703
4704static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
4705 .func = bpf_skb_set_tunnel_opt,
4706 .gpl_only = false,
4707 .ret_type = RET_INTEGER,
4708 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 4709 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
39f19ebb 4710 .arg3_type = ARG_CONST_SIZE,
14ca0751
DB
4711};
4712
4713static const struct bpf_func_proto *
4714bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
d3aa45ce
AS
4715{
4716 if (!md_dst) {
d66f2b91
JK
4717 struct metadata_dst __percpu *tmp;
4718
4719 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
4720 METADATA_IP_TUNNEL,
4721 GFP_KERNEL);
4722 if (!tmp)
d3aa45ce 4723 return NULL;
d66f2b91
JK
4724 if (cmpxchg(&md_dst, NULL, tmp))
4725 metadata_dst_free_percpu(tmp);
d3aa45ce 4726 }
14ca0751
DB
4727
4728 switch (which) {
4729 case BPF_FUNC_skb_set_tunnel_key:
4730 return &bpf_skb_set_tunnel_key_proto;
4731 case BPF_FUNC_skb_set_tunnel_opt:
4732 return &bpf_skb_set_tunnel_opt_proto;
4733 default:
4734 return NULL;
4735 }
d3aa45ce
AS
4736}
4737
f3694e00
DB
4738BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
4739 u32, idx)
4a482f34 4740{
4a482f34
MKL
4741 struct bpf_array *array = container_of(map, struct bpf_array, map);
4742 struct cgroup *cgrp;
4743 struct sock *sk;
4a482f34 4744
2d48c5f9 4745 sk = skb_to_full_sk(skb);
4a482f34
MKL
4746 if (!sk || !sk_fullsock(sk))
4747 return -ENOENT;
f3694e00 4748 if (unlikely(idx >= array->map.max_entries))
4a482f34
MKL
4749 return -E2BIG;
4750
f3694e00 4751 cgrp = READ_ONCE(array->ptrs[idx]);
4a482f34
MKL
4752 if (unlikely(!cgrp))
4753 return -EAGAIN;
4754
54fd9c2d 4755 return sk_under_cgroup_hierarchy(sk, cgrp);
4a482f34
MKL
4756}
4757
747ea55e
DB
4758static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
4759 .func = bpf_skb_under_cgroup,
4a482f34
MKL
4760 .gpl_only = false,
4761 .ret_type = RET_INTEGER,
4762 .arg1_type = ARG_PTR_TO_CTX,
4763 .arg2_type = ARG_CONST_MAP_PTR,
4764 .arg3_type = ARG_ANYTHING,
4765};
4a482f34 4766
cb20b08e 4767#ifdef CONFIG_SOCK_CGROUP_DATA
f307fa2c
AI
4768static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
4769{
4770 struct cgroup *cgrp;
4771
a5fa25ad
MKL
4772 sk = sk_to_full_sk(sk);
4773 if (!sk || !sk_fullsock(sk))
4774 return 0;
4775
f307fa2c
AI
4776 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4777 return cgroup_id(cgrp);
4778}
4779
cb20b08e
DB
4780BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
4781{
a5fa25ad 4782 return __bpf_sk_cgroup_id(skb->sk);
cb20b08e
DB
4783}
4784
4785static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
4786 .func = bpf_skb_cgroup_id,
4787 .gpl_only = false,
4788 .ret_type = RET_INTEGER,
4789 .arg1_type = ARG_PTR_TO_CTX,
4790};
77236281 4791
f307fa2c
AI
4792static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
4793 int ancestor_level)
77236281 4794{
77236281
AI
4795 struct cgroup *ancestor;
4796 struct cgroup *cgrp;
4797
a5fa25ad
MKL
4798 sk = sk_to_full_sk(sk);
4799 if (!sk || !sk_fullsock(sk))
4800 return 0;
4801
77236281
AI
4802 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4803 ancestor = cgroup_ancestor(cgrp, ancestor_level);
4804 if (!ancestor)
4805 return 0;
4806
74321038 4807 return cgroup_id(ancestor);
77236281
AI
4808}
4809
f307fa2c
AI
4810BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
4811 ancestor_level)
4812{
a5fa25ad 4813 return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level);
f307fa2c
AI
4814}
4815
77236281
AI
4816static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
4817 .func = bpf_skb_ancestor_cgroup_id,
4818 .gpl_only = false,
4819 .ret_type = RET_INTEGER,
4820 .arg1_type = ARG_PTR_TO_CTX,
4821 .arg2_type = ARG_ANYTHING,
4822};
f307fa2c
AI
4823
4824BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
4825{
4826 return __bpf_sk_cgroup_id(sk);
4827}
4828
4829static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
4830 .func = bpf_sk_cgroup_id,
4831 .gpl_only = false,
4832 .ret_type = RET_INTEGER,
a5fa25ad 4833 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
f307fa2c
AI
4834};
4835
4836BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
4837{
4838 return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
4839}
4840
4841static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
4842 .func = bpf_sk_ancestor_cgroup_id,
4843 .gpl_only = false,
4844 .ret_type = RET_INTEGER,
a5fa25ad 4845 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
f307fa2c
AI
4846 .arg2_type = ARG_ANYTHING,
4847};
cb20b08e
DB
4848#endif
4849
3f364222 4850static unsigned long bpf_xdp_copy(void *dst, const void *ctx,
4de16969
DB
4851 unsigned long off, unsigned long len)
4852{
d9917302 4853 struct xdp_buff *xdp = (struct xdp_buff *)ctx;
d9917302 4854
3f364222 4855 bpf_xdp_copy_buf(xdp, off, dst, len, false);
4de16969
DB
4856 return 0;
4857}
4858
f3694e00
DB
4859BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
4860 u64, flags, void *, meta, u64, meta_size)
4de16969 4861{
4de16969 4862 u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4de16969
DB
4863
4864 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4865 return -EINVAL;
d9917302
EC
4866
4867 if (unlikely(!xdp || xdp_size > xdp_get_buff_len(xdp)))
4de16969
DB
4868 return -EFAULT;
4869
d9917302 4870 return bpf_event_output(map, flags, meta, meta_size, xdp,
9c471370 4871 xdp_size, bpf_xdp_copy);
4de16969
DB
4872}
4873
4874static const struct bpf_func_proto bpf_xdp_event_output_proto = {
4875 .func = bpf_xdp_event_output,
4876 .gpl_only = true,
4877 .ret_type = RET_INTEGER,
4878 .arg1_type = ARG_PTR_TO_CTX,
4879 .arg2_type = ARG_CONST_MAP_PTR,
4880 .arg3_type = ARG_ANYTHING,
216e3cd2 4881 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
1728a4f2 4882 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
4de16969
DB
4883};
4884
9436ef6e 4885BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff)
c9a0f3b8 4886
d831ee84
EC
4887const struct bpf_func_proto bpf_xdp_output_proto = {
4888 .func = bpf_xdp_event_output,
4889 .gpl_only = true,
4890 .ret_type = RET_INTEGER,
4891 .arg1_type = ARG_PTR_TO_BTF_ID,
9436ef6e 4892 .arg1_btf_id = &bpf_xdp_output_btf_ids[0],
d831ee84
EC
4893 .arg2_type = ARG_CONST_MAP_PTR,
4894 .arg3_type = ARG_ANYTHING,
216e3cd2 4895 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
d831ee84 4896 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
d831ee84
EC
4897};
4898
91b8270f
CF
4899BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
4900{
92acdc58 4901 return skb->sk ? __sock_gen_cookie(skb->sk) : 0;
91b8270f
CF
4902}
4903
4904static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
4905 .func = bpf_get_socket_cookie,
4906 .gpl_only = false,
4907 .ret_type = RET_INTEGER,
4908 .arg1_type = ARG_PTR_TO_CTX,
4909};
4910
d692f113
AI
4911BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4912{
92acdc58 4913 return __sock_gen_cookie(ctx->sk);
d692f113
AI
4914}
4915
4916static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
4917 .func = bpf_get_socket_cookie_sock_addr,
4918 .gpl_only = false,
4919 .ret_type = RET_INTEGER,
4920 .arg1_type = ARG_PTR_TO_CTX,
4921};
4922
0e53d9e5
DB
4923BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
4924{
92acdc58 4925 return __sock_gen_cookie(ctx);
0e53d9e5
DB
4926}
4927
4928static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
4929 .func = bpf_get_socket_cookie_sock,
4930 .gpl_only = false,
4931 .ret_type = RET_INTEGER,
4932 .arg1_type = ARG_PTR_TO_CTX,
4933};
4934
c5dbb89f
FR
4935BPF_CALL_1(bpf_get_socket_ptr_cookie, struct sock *, sk)
4936{
4937 return sk ? sock_gen_cookie(sk) : 0;
4938}
4939
4940const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto = {
4941 .func = bpf_get_socket_ptr_cookie,
4942 .gpl_only = false,
4943 .ret_type = RET_INTEGER,
4944 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
4945};
4946
d692f113
AI
4947BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
4948{
92acdc58 4949 return __sock_gen_cookie(ctx->sk);
d692f113
AI
4950}
4951
4952static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
4953 .func = bpf_get_socket_cookie_sock_ops,
4954 .gpl_only = false,
4955 .ret_type = RET_INTEGER,
4956 .arg1_type = ARG_PTR_TO_CTX,
4957};
4958
f318903c
DB
4959static u64 __bpf_get_netns_cookie(struct sock *sk)
4960{
3d368ab8
ED
4961 const struct net *net = sk ? sock_net(sk) : &init_net;
4962
4963 return net->net_cookie;
f318903c
DB
4964}
4965
4966BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
4967{
4968 return __bpf_get_netns_cookie(ctx);
4969}
4970
4971static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
4972 .func = bpf_get_netns_cookie_sock,
4973 .gpl_only = false,
4974 .ret_type = RET_INTEGER,
4975 .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
4976};
4977
4978BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4979{
4980 return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4981}
4982
4983static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
4984 .func = bpf_get_netns_cookie_sock_addr,
4985 .gpl_only = false,
4986 .ret_type = RET_INTEGER,
4987 .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
4988};
4989
6cf1770d
XL
4990BPF_CALL_1(bpf_get_netns_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
4991{
4992 return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4993}
4994
4995static const struct bpf_func_proto bpf_get_netns_cookie_sock_ops_proto = {
4996 .func = bpf_get_netns_cookie_sock_ops,
4997 .gpl_only = false,
4998 .ret_type = RET_INTEGER,
4999 .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5000};
5001
fab60e29
XL
5002BPF_CALL_1(bpf_get_netns_cookie_sk_msg, struct sk_msg *, ctx)
5003{
5004 return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
5005}
5006
5007static const struct bpf_func_proto bpf_get_netns_cookie_sk_msg_proto = {
5008 .func = bpf_get_netns_cookie_sk_msg,
5009 .gpl_only = false,
5010 .ret_type = RET_INTEGER,
5011 .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
5012};
5013
6acc5c29
CF
5014BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
5015{
5016 struct sock *sk = sk_to_full_sk(skb->sk);
5017 kuid_t kuid;
5018
5019 if (!sk || !sk_fullsock(sk))
5020 return overflowuid;
5021 kuid = sock_net_uid(sock_net(sk), sk);
5022 return from_kuid_munged(sock_net(sk)->user_ns, kuid);
5023}
5024
5025static const struct bpf_func_proto bpf_get_socket_uid_proto = {
5026 .func = bpf_get_socket_uid,
5027 .gpl_only = false,
5028 .ret_type = RET_INTEGER,
5029 .arg1_type = ARG_PTR_TO_CTX,
5030};
5031
65ddc82d
MKL
5032static int sol_socket_sockopt(struct sock *sk, int optname,
5033 char *optval, int *optlen,
5034 bool getopt)
29003875
MKL
5035{
5036 switch (optname) {
7e41df5d 5037 case SO_REUSEADDR:
29003875
MKL
5038 case SO_SNDBUF:
5039 case SO_RCVBUF:
5040 case SO_KEEPALIVE:
5041 case SO_PRIORITY:
5042 case SO_REUSEPORT:
5043 case SO_RCVLOWAT:
5044 case SO_MARK:
5045 case SO_MAX_PACING_RATE:
5046 case SO_BINDTOIFINDEX:
5047 case SO_TXREHASH:
65ddc82d 5048 if (*optlen != sizeof(int))
29003875
MKL
5049 return -EINVAL;
5050 break;
5051 case SO_BINDTODEVICE:
5052 break;
5053 default:
8c4b4c7e 5054 return -EINVAL;
29003875 5055 }
8c4b4c7e 5056
65ddc82d
MKL
5057 if (getopt) {
5058 if (optname == SO_BINDTODEVICE)
8c4b4c7e 5059 return -EINVAL;
65ddc82d
MKL
5060 return sk_getsockopt(sk, SOL_SOCKET, optname,
5061 KERNEL_SOCKPTR(optval),
5062 KERNEL_SOCKPTR(optlen));
5063 }
70c58997 5064
29003875 5065 return sk_setsockopt(sk, SOL_SOCKET, optname,
65ddc82d 5066 KERNEL_SOCKPTR(optval), *optlen);
29003875 5067}
70c58997 5068
57db31a1
MKL
5069static int bpf_sol_tcp_setsockopt(struct sock *sk, int optname,
5070 char *optval, int optlen)
5071{
5072 struct tcp_sock *tp = tcp_sk(sk);
5073 unsigned long timeout;
5074 int val;
70c58997 5075
57db31a1
MKL
5076 if (optlen != sizeof(int))
5077 return -EINVAL;
6f5c39fa 5078
57db31a1 5079 val = *(int *)optval;
6f5c39fa 5080
57db31a1
MKL
5081 /* Only some options are supported */
5082 switch (optname) {
5083 case TCP_BPF_IW:
5084 if (val <= 0 || tp->data_segs_out > tp->syn_data)
5085 return -EINVAL;
5086 tcp_snd_cwnd_set(tp, val);
5087 break;
5088 case TCP_BPF_SNDCWND_CLAMP:
5089 if (val <= 0)
5090 return -EINVAL;
5091 tp->snd_cwnd_clamp = val;
5092 tp->snd_ssthresh = val;
5093 break;
5094 case TCP_BPF_DELACK_MAX:
5095 timeout = usecs_to_jiffies(val);
5096 if (timeout > TCP_DELACK_MAX ||
5097 timeout < TCP_TIMEOUT_MIN)
5098 return -EINVAL;
5099 inet_csk(sk)->icsk_delack_max = timeout;
5100 break;
5101 case TCP_BPF_RTO_MIN:
5102 timeout = usecs_to_jiffies(val);
5103 if (timeout > TCP_RTO_MIN ||
5104 timeout < TCP_TIMEOUT_MIN)
6f9bd3d7 5105 return -EINVAL;
57db31a1
MKL
5106 inet_csk(sk)->icsk_rto_min = timeout;
5107 break;
5108 default:
5109 return -EINVAL;
5110 }
6f9bd3d7 5111
57db31a1
MKL
5112 return 0;
5113}
6f9bd3d7 5114
1e7d217f
MKL
5115static int sol_tcp_sockopt_congestion(struct sock *sk, char *optval,
5116 int *optlen, bool getopt)
5117{
061ff040
MKL
5118 struct tcp_sock *tp;
5119 int ret;
5120
1e7d217f
MKL
5121 if (*optlen < 2)
5122 return -EINVAL;
5123
5124 if (getopt) {
5125 if (!inet_csk(sk)->icsk_ca_ops)
5126 return -EINVAL;
5127 /* BPF expects NULL-terminated tcp-cc string */
5128 optval[--(*optlen)] = '\0';
5129 return do_tcp_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
5130 KERNEL_SOCKPTR(optval),
5131 KERNEL_SOCKPTR(optlen));
5132 }
5133
5134 /* "cdg" is the only cc that alloc a ptr
5135 * in inet_csk_ca area. The bpf-tcp-cc may
5136 * overwrite this ptr after switching to cdg.
5137 */
5138 if (*optlen >= sizeof("cdg") - 1 && !strncmp("cdg", optval, *optlen))
5139 return -ENOTSUPP;
5140
061ff040
MKL
5141 /* It stops this looping
5142 *
5143 * .init => bpf_setsockopt(tcp_cc) => .init =>
5144 * bpf_setsockopt(tcp_cc)" => .init => ....
5145 *
5146 * The second bpf_setsockopt(tcp_cc) is not allowed
5147 * in order to break the loop when both .init
5148 * are the same bpf prog.
5149 *
5150 * This applies even the second bpf_setsockopt(tcp_cc)
5151 * does not cause a loop. This limits only the first
5152 * '.init' can call bpf_setsockopt(TCP_CONGESTION) to
5153 * pick a fallback cc (eg. peer does not support ECN)
5154 * and the second '.init' cannot fallback to
5155 * another.
5156 */
5157 tp = tcp_sk(sk);
5158 if (tp->bpf_chg_cc_inprogress)
5159 return -EBUSY;
5160
5161 tp->bpf_chg_cc_inprogress = 1;
5162 ret = do_tcp_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
1e7d217f 5163 KERNEL_SOCKPTR(optval), *optlen);
061ff040
MKL
5164 tp->bpf_chg_cc_inprogress = 0;
5165 return ret;
1e7d217f
MKL
5166}
5167
273b7f0f
MKL
5168static int sol_tcp_sockopt(struct sock *sk, int optname,
5169 char *optval, int *optlen,
5170 bool getopt)
0c751f70
MKL
5171{
5172 if (sk->sk_prot->setsockopt != tcp_setsockopt)
5173 return -EINVAL;
91b5b21c 5174
0c751f70 5175 switch (optname) {
7e41df5d
MKL
5176 case TCP_NODELAY:
5177 case TCP_MAXSEG:
0c751f70
MKL
5178 case TCP_KEEPIDLE:
5179 case TCP_KEEPINTVL:
5180 case TCP_KEEPCNT:
5181 case TCP_SYNCNT:
5182 case TCP_WINDOW_CLAMP:
7e41df5d 5183 case TCP_THIN_LINEAR_TIMEOUTS:
0c751f70
MKL
5184 case TCP_USER_TIMEOUT:
5185 case TCP_NOTSENT_LOWAT:
5186 case TCP_SAVE_SYN:
273b7f0f 5187 if (*optlen != sizeof(int))
0c751f70
MKL
5188 return -EINVAL;
5189 break;
5190 case TCP_CONGESTION:
1e7d217f 5191 return sol_tcp_sockopt_congestion(sk, optval, optlen, getopt);
273b7f0f
MKL
5192 case TCP_SAVED_SYN:
5193 if (*optlen < 1)
5194 return -EINVAL;
0c751f70
MKL
5195 break;
5196 default:
273b7f0f
MKL
5197 if (getopt)
5198 return -EINVAL;
5199 return bpf_sol_tcp_setsockopt(sk, optname, optval, *optlen);
5200 }
5201
5202 if (getopt) {
5203 if (optname == TCP_SAVED_SYN) {
fc747810
LB
5204 struct tcp_sock *tp = tcp_sk(sk);
5205
273b7f0f
MKL
5206 if (!tp->saved_syn ||
5207 *optlen > tcp_saved_syn_len(tp->saved_syn))
fc747810 5208 return -EINVAL;
273b7f0f
MKL
5209 memcpy(optval, tp->saved_syn->data, *optlen);
5210 /* It cannot free tp->saved_syn here because it
5211 * does not know if the user space still needs it.
5212 */
5213 return 0;
5214 }
fc747810 5215
273b7f0f
MKL
5216 return do_tcp_getsockopt(sk, SOL_TCP, optname,
5217 KERNEL_SOCKPTR(optval),
5218 KERNEL_SOCKPTR(optlen));
8c4b4c7e 5219 }
0c751f70
MKL
5220
5221 return do_tcp_setsockopt(sk, SOL_TCP, optname,
273b7f0f 5222 KERNEL_SOCKPTR(optval), *optlen);
8c4b4c7e
LB
5223}
5224
fd969f25
MKL
5225static int sol_ip_sockopt(struct sock *sk, int optname,
5226 char *optval, int *optlen,
5227 bool getopt)
9113d7e4 5228{
ee7f1e13
MKL
5229 if (sk->sk_family != AF_INET)
5230 return -EINVAL;
5231
5232 switch (optname) {
5233 case IP_TOS:
fd969f25 5234 if (*optlen != sizeof(int))
ee7f1e13
MKL
5235 return -EINVAL;
5236 break;
5237 default:
5238 return -EINVAL;
5239 }
5240
fd969f25
MKL
5241 if (getopt)
5242 return do_ip_getsockopt(sk, SOL_IP, optname,
5243 KERNEL_SOCKPTR(optval),
5244 KERNEL_SOCKPTR(optlen));
5245
ee7f1e13 5246 return do_ip_setsockopt(sk, SOL_IP, optname,
fd969f25 5247 KERNEL_SOCKPTR(optval), *optlen);
9113d7e4
SF
5248}
5249
38566ec0
MKL
5250static int sol_ipv6_sockopt(struct sock *sk, int optname,
5251 char *optval, int *optlen,
5252 bool getopt)
cd86d1fd 5253{
75b64b68
MKL
5254 if (sk->sk_family != AF_INET6)
5255 return -EINVAL;
beecf11b 5256
75b64b68
MKL
5257 switch (optname) {
5258 case IPV6_TCLASS:
7e41df5d 5259 case IPV6_AUTOFLOWLABEL:
38566ec0 5260 if (*optlen != sizeof(int))
75b64b68
MKL
5261 return -EINVAL;
5262 break;
5263 default:
5264 return -EINVAL;
5265 }
bcd6f4a8 5266
38566ec0
MKL
5267 if (getopt)
5268 return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname,
5269 KERNEL_SOCKPTR(optval),
5270 KERNEL_SOCKPTR(optlen));
1edb6e03 5271
75b64b68 5272 return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname,
38566ec0 5273 KERNEL_SOCKPTR(optval), *optlen);
75b64b68 5274}
cd86d1fd 5275
9113d7e4
SF
5276static int __bpf_setsockopt(struct sock *sk, int level, int optname,
5277 char *optval, int optlen)
8c4b4c7e 5278{
8c4b4c7e
LB
5279 if (!sk_fullsock(sk))
5280 return -EINVAL;
1e215300 5281
75b64b68 5282 if (level == SOL_SOCKET)
65ddc82d 5283 return sol_socket_sockopt(sk, optname, optval, &optlen, false);
75b64b68 5284 else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
fd969f25 5285 return sol_ip_sockopt(sk, optname, optval, &optlen, false);
75b64b68 5286 else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
38566ec0 5287 return sol_ipv6_sockopt(sk, optname, optval, &optlen, false);
75b64b68 5288 else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
273b7f0f 5289 return sol_tcp_sockopt(sk, optname, optval, &optlen, false);
6f5c39fa 5290
75b64b68 5291 return -EINVAL;
8c4b4c7e 5292}
6f5c39fa 5293
9113d7e4 5294static int _bpf_setsockopt(struct sock *sk, int level, int optname,
beecf11b 5295 char *optval, int optlen)
9113d7e4
SF
5296{
5297 if (sk_fullsock(sk))
5298 sock_owned_by_me(sk);
5299 return __bpf_setsockopt(sk, level, optname, optval, optlen);
5300}
6f9bd3d7 5301
9113d7e4
SF
5302static int __bpf_getsockopt(struct sock *sk, int level, int optname,
5303 char *optval, int optlen)
cd86d1fd 5304{
38566ec0 5305 int err, saved_optlen = optlen;
6f9bd3d7 5306
38566ec0
MKL
5307 if (!sk_fullsock(sk)) {
5308 err = -EINVAL;
5309 goto done;
cd86d1fd 5310 }
beecf11b 5311
38566ec0 5312 if (level == SOL_SOCKET)
65ddc82d 5313 err = sol_socket_sockopt(sk, optname, optval, &optlen, true);
38566ec0 5314 else if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP)
273b7f0f 5315 err = sol_tcp_sockopt(sk, optname, optval, &optlen, true);
38566ec0 5316 else if (IS_ENABLED(CONFIG_INET) && level == SOL_IP)
fd969f25 5317 err = sol_ip_sockopt(sk, optname, optval, &optlen, true);
38566ec0
MKL
5318 else if (IS_ENABLED(CONFIG_IPV6) && level == SOL_IPV6)
5319 err = sol_ipv6_sockopt(sk, optname, optval, &optlen, true);
5320 else
5321 err = -EINVAL;
65ddc82d 5322
38566ec0 5323done:
65ddc82d
MKL
5324 if (err)
5325 optlen = 0;
5326 if (optlen < saved_optlen)
5327 memset(optval + optlen, 0, saved_optlen - optlen);
5328 return err;
cd86d1fd
LB
5329}
5330
9113d7e4
SF
5331static int _bpf_getsockopt(struct sock *sk, int level, int optname,
5332 char *optval, int optlen)
5333{
5334 if (sk_fullsock(sk))
5335 sock_owned_by_me(sk);
5336 return __bpf_getsockopt(sk, level, optname, optval, optlen);
5337}
5338
3cee6fb8
MKL
5339BPF_CALL_5(bpf_sk_setsockopt, struct sock *, sk, int, level,
5340 int, optname, char *, optval, int, optlen)
5341{
5342 return _bpf_setsockopt(sk, level, optname, optval, optlen);
5343}
5344
5345const struct bpf_func_proto bpf_sk_setsockopt_proto = {
5346 .func = bpf_sk_setsockopt,
5347 .gpl_only = false,
5348 .ret_type = RET_INTEGER,
5349 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5350 .arg2_type = ARG_ANYTHING,
5351 .arg3_type = ARG_ANYTHING,
216e3cd2 5352 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
3cee6fb8
MKL
5353 .arg5_type = ARG_CONST_SIZE,
5354};
5355
5356BPF_CALL_5(bpf_sk_getsockopt, struct sock *, sk, int, level,
5357 int, optname, char *, optval, int, optlen)
5358{
5359 return _bpf_getsockopt(sk, level, optname, optval, optlen);
5360}
5361
5362const struct bpf_func_proto bpf_sk_getsockopt_proto = {
5363 .func = bpf_sk_getsockopt,
5364 .gpl_only = false,
5365 .ret_type = RET_INTEGER,
5366 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5367 .arg2_type = ARG_ANYTHING,
5368 .arg3_type = ARG_ANYTHING,
5369 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
5370 .arg5_type = ARG_CONST_SIZE,
5371};
5372
9113d7e4
SF
5373BPF_CALL_5(bpf_unlocked_sk_setsockopt, struct sock *, sk, int, level,
5374 int, optname, char *, optval, int, optlen)
5375{
5376 return __bpf_setsockopt(sk, level, optname, optval, optlen);
5377}
5378
5379const struct bpf_func_proto bpf_unlocked_sk_setsockopt_proto = {
5380 .func = bpf_unlocked_sk_setsockopt,
5381 .gpl_only = false,
5382 .ret_type = RET_INTEGER,
5383 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5384 .arg2_type = ARG_ANYTHING,
5385 .arg3_type = ARG_ANYTHING,
5386 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
5387 .arg5_type = ARG_CONST_SIZE,
5388};
5389
5390BPF_CALL_5(bpf_unlocked_sk_getsockopt, struct sock *, sk, int, level,
5391 int, optname, char *, optval, int, optlen)
5392{
5393 return __bpf_getsockopt(sk, level, optname, optval, optlen);
5394}
5395
5396const struct bpf_func_proto bpf_unlocked_sk_getsockopt_proto = {
5397 .func = bpf_unlocked_sk_getsockopt,
5398 .gpl_only = false,
5399 .ret_type = RET_INTEGER,
5400 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
5401 .arg2_type = ARG_ANYTHING,
5402 .arg3_type = ARG_ANYTHING,
5403 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
5404 .arg5_type = ARG_CONST_SIZE,
5405};
5406
beecf11b
SF
5407BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
5408 int, level, int, optname, char *, optval, int, optlen)
5409{
5cdc744c 5410 return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen);
beecf11b
SF
5411}
5412
5413static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
5414 .func = bpf_sock_addr_setsockopt,
5415 .gpl_only = false,
5416 .ret_type = RET_INTEGER,
5417 .arg1_type = ARG_PTR_TO_CTX,
5418 .arg2_type = ARG_ANYTHING,
5419 .arg3_type = ARG_ANYTHING,
216e3cd2 5420 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
beecf11b
SF
5421 .arg5_type = ARG_CONST_SIZE,
5422};
5423
5424BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
5425 int, level, int, optname, char *, optval, int, optlen)
5426{
5427 return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen);
5428}
5429
5430static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
5431 .func = bpf_sock_addr_getsockopt,
5432 .gpl_only = false,
5433 .ret_type = RET_INTEGER,
5434 .arg1_type = ARG_PTR_TO_CTX,
5435 .arg2_type = ARG_ANYTHING,
5436 .arg3_type = ARG_ANYTHING,
5437 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
5438 .arg5_type = ARG_CONST_SIZE,
5439};
5440
5441BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5442 int, level, int, optname, char *, optval, int, optlen)
5443{
5cdc744c 5444 return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
beecf11b
SF
5445}
5446
5447static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
5448 .func = bpf_sock_ops_setsockopt,
5449 .gpl_only = false,
5450 .ret_type = RET_INTEGER,
5451 .arg1_type = ARG_PTR_TO_CTX,
5452 .arg2_type = ARG_ANYTHING,
5453 .arg3_type = ARG_ANYTHING,
216e3cd2 5454 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
beecf11b
SF
5455 .arg5_type = ARG_CONST_SIZE,
5456};
5457
0813a841
MKL
5458static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
5459 int optname, const u8 **start)
5460{
5461 struct sk_buff *syn_skb = bpf_sock->syn_skb;
5462 const u8 *hdr_start;
5463 int ret;
5464
5465 if (syn_skb) {
5466 /* sk is a request_sock here */
5467
5468 if (optname == TCP_BPF_SYN) {
5469 hdr_start = syn_skb->data;
5470 ret = tcp_hdrlen(syn_skb);
267cf9fa 5471 } else if (optname == TCP_BPF_SYN_IP) {
0813a841
MKL
5472 hdr_start = skb_network_header(syn_skb);
5473 ret = skb_network_header_len(syn_skb) +
5474 tcp_hdrlen(syn_skb);
267cf9fa
MKL
5475 } else {
5476 /* optname == TCP_BPF_SYN_MAC */
5477 hdr_start = skb_mac_header(syn_skb);
5478 ret = skb_mac_header_len(syn_skb) +
5479 skb_network_header_len(syn_skb) +
5480 tcp_hdrlen(syn_skb);
0813a841
MKL
5481 }
5482 } else {
5483 struct sock *sk = bpf_sock->sk;
5484 struct saved_syn *saved_syn;
5485
5486 if (sk->sk_state == TCP_NEW_SYN_RECV)
5487 /* synack retransmit. bpf_sock->syn_skb will
5488 * not be available. It has to resort to
5489 * saved_syn (if it is saved).
5490 */
5491 saved_syn = inet_reqsk(sk)->saved_syn;
5492 else
5493 saved_syn = tcp_sk(sk)->saved_syn;
5494
5495 if (!saved_syn)
5496 return -ENOENT;
5497
5498 if (optname == TCP_BPF_SYN) {
5499 hdr_start = saved_syn->data +
267cf9fa 5500 saved_syn->mac_hdrlen +
0813a841
MKL
5501 saved_syn->network_hdrlen;
5502 ret = saved_syn->tcp_hdrlen;
267cf9fa
MKL
5503 } else if (optname == TCP_BPF_SYN_IP) {
5504 hdr_start = saved_syn->data +
5505 saved_syn->mac_hdrlen;
5506 ret = saved_syn->network_hdrlen +
5507 saved_syn->tcp_hdrlen;
0813a841 5508 } else {
267cf9fa
MKL
5509 /* optname == TCP_BPF_SYN_MAC */
5510
5511 /* TCP_SAVE_SYN may not have saved the mac hdr */
5512 if (!saved_syn->mac_hdrlen)
5513 return -ENOENT;
5514
0813a841 5515 hdr_start = saved_syn->data;
267cf9fa
MKL
5516 ret = saved_syn->mac_hdrlen +
5517 saved_syn->network_hdrlen +
0813a841
MKL
5518 saved_syn->tcp_hdrlen;
5519 }
5520 }
5521
5522 *start = hdr_start;
5523 return ret;
5524}
5525
beecf11b
SF
5526BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
5527 int, level, int, optname, char *, optval, int, optlen)
5528{
0813a841 5529 if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
267cf9fa 5530 optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
0813a841
MKL
5531 int ret, copy_len = 0;
5532 const u8 *start;
5533
5534 ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start);
5535 if (ret > 0) {
5536 copy_len = ret;
5537 if (optlen < copy_len) {
5538 copy_len = optlen;
5539 ret = -ENOSPC;
5540 }
5541
5542 memcpy(optval, start, copy_len);
5543 }
5544
5545 /* Zero out unused buffer at the end */
5546 memset(optval + copy_len, 0, optlen - copy_len);
5547
5548 return ret;
5549 }
5550
beecf11b
SF
5551 return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
5552}
5553
5554static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
5555 .func = bpf_sock_ops_getsockopt,
cd86d1fd
LB
5556 .gpl_only = false,
5557 .ret_type = RET_INTEGER,
5558 .arg1_type = ARG_PTR_TO_CTX,
5559 .arg2_type = ARG_ANYTHING,
5560 .arg3_type = ARG_ANYTHING,
5561 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
5562 .arg5_type = ARG_CONST_SIZE,
5563};
5564
b13d8807
LB
5565BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
5566 int, argval)
5567{
5568 struct sock *sk = bpf_sock->sk;
5569 int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
5570
a7dcdf6e 5571 if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
b13d8807
LB
5572 return -EINVAL;
5573
725721a6 5574 tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
b13d8807
LB
5575
5576 return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
b13d8807
LB
5577}
5578
5579static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
5580 .func = bpf_sock_ops_cb_flags_set,
5581 .gpl_only = false,
5582 .ret_type = RET_INTEGER,
5583 .arg1_type = ARG_PTR_TO_CTX,
5584 .arg2_type = ARG_ANYTHING,
5585};
5586
d74bad4e
AI
5587const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
5588EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
5589
5590BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
5591 int, addr_len)
5592{
5593#ifdef CONFIG_INET
5594 struct sock *sk = ctx->sk;
8086fbaf 5595 u32 flags = BIND_FROM_BPF;
d74bad4e
AI
5596 int err;
5597
d74bad4e 5598 err = -EINVAL;
ba024f25
TH
5599 if (addr_len < offsetofend(struct sockaddr, sa_family))
5600 return err;
d74bad4e
AI
5601 if (addr->sa_family == AF_INET) {
5602 if (addr_len < sizeof(struct sockaddr_in))
5603 return err;
8086fbaf
SF
5604 if (((struct sockaddr_in *)addr)->sin_port == htons(0))
5605 flags |= BIND_FORCE_ADDRESS_NO_PORT;
5606 return __inet_bind(sk, addr, addr_len, flags);
d74bad4e
AI
5607#if IS_ENABLED(CONFIG_IPV6)
5608 } else if (addr->sa_family == AF_INET6) {
5609 if (addr_len < SIN6_LEN_RFC2133)
5610 return err;
8086fbaf
SF
5611 if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0))
5612 flags |= BIND_FORCE_ADDRESS_NO_PORT;
d74bad4e
AI
5613 /* ipv6_bpf_stub cannot be NULL, since it's called from
5614 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
5615 */
8086fbaf 5616 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
d74bad4e
AI
5617#endif /* CONFIG_IPV6 */
5618 }
5619#endif /* CONFIG_INET */
5620
5621 return -EAFNOSUPPORT;
5622}
5623
5624static const struct bpf_func_proto bpf_bind_proto = {
5625 .func = bpf_bind,
5626 .gpl_only = false,
5627 .ret_type = RET_INTEGER,
5628 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 5629 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
d74bad4e
AI
5630 .arg3_type = ARG_CONST_SIZE,
5631};
5632
12bed760 5633#ifdef CONFIG_XFRM
94151f5a
EB
5634
5635#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
5636 (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
5637
5638struct metadata_dst __percpu *xfrm_bpf_md_dst;
5639EXPORT_SYMBOL_GPL(xfrm_bpf_md_dst);
5640
5641#endif
5642
12bed760
EB
5643BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
5644 struct bpf_xfrm_state *, to, u32, size, u64, flags)
5645{
5646 const struct sec_path *sp = skb_sec_path(skb);
5647 const struct xfrm_state *x;
5648
5649 if (!sp || unlikely(index >= sp->len || flags))
5650 goto err_clear;
5651
5652 x = sp->xvec[index];
5653
5654 if (unlikely(size != sizeof(struct bpf_xfrm_state)))
5655 goto err_clear;
5656
5657 to->reqid = x->props.reqid;
5658 to->spi = x->id.spi;
5659 to->family = x->props.family;
1fbc2e0c
DB
5660 to->ext = 0;
5661
12bed760
EB
5662 if (to->family == AF_INET6) {
5663 memcpy(to->remote_ipv6, x->props.saddr.a6,
5664 sizeof(to->remote_ipv6));
5665 } else {
5666 to->remote_ipv4 = x->props.saddr.a4;
1fbc2e0c 5667 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
12bed760
EB
5668 }
5669
5670 return 0;
5671err_clear:
5672 memset(to, 0, size);
5673 return -EINVAL;
5674}
5675
5676static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
5677 .func = bpf_skb_get_xfrm_state,
5678 .gpl_only = false,
5679 .ret_type = RET_INTEGER,
5680 .arg1_type = ARG_PTR_TO_CTX,
5681 .arg2_type = ARG_ANYTHING,
5682 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
5683 .arg4_type = ARG_CONST_SIZE,
5684 .arg5_type = ARG_ANYTHING,
5685};
5686#endif
5687
87f5fc7e
DA
5688#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
5689static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
5690 const struct neighbour *neigh,
e1850ea9 5691 const struct net_device *dev, u32 mtu)
87f5fc7e
DA
5692{
5693 memcpy(params->dmac, neigh->ha, ETH_ALEN);
5694 memcpy(params->smac, dev->dev_addr, ETH_ALEN);
5695 params->h_vlan_TCI = 0;
5696 params->h_vlan_proto = 0;
e1850ea9
JDB
5697 if (mtu)
5698 params->mtu_result = mtu; /* union with tot_len */
87f5fc7e 5699
4c79579b 5700 return 0;
87f5fc7e
DA
5701}
5702#endif
5703
5704#if IS_ENABLED(CONFIG_INET)
5705static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4f74fede 5706 u32 flags, bool check_mtu)
87f5fc7e 5707{
eba618ab 5708 struct fib_nh_common *nhc;
87f5fc7e
DA
5709 struct in_device *in_dev;
5710 struct neighbour *neigh;
5711 struct net_device *dev;
5712 struct fib_result res;
87f5fc7e 5713 struct flowi4 fl4;
e1850ea9 5714 u32 mtu = 0;
87f5fc7e
DA
5715 int err;
5716
5717 dev = dev_get_by_index_rcu(net, params->ifindex);
5718 if (unlikely(!dev))
5719 return -ENODEV;
5720
5721 /* verify forwarding is enabled on this interface */
5722 in_dev = __in_dev_get_rcu(dev);
5723 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
4c79579b 5724 return BPF_FIB_LKUP_RET_FWD_DISABLED;
87f5fc7e
DA
5725
5726 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
5727 fl4.flowi4_iif = 1;
5728 fl4.flowi4_oif = params->ifindex;
5729 } else {
5730 fl4.flowi4_iif = params->ifindex;
5731 fl4.flowi4_oif = 0;
5732 }
5733 fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
5734 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
5735 fl4.flowi4_flags = 0;
5736
5737 fl4.flowi4_proto = params->l4_protocol;
5738 fl4.daddr = params->ipv4_dst;
5739 fl4.saddr = params->ipv4_src;
5740 fl4.fl4_sport = params->sport;
5741 fl4.fl4_dport = params->dport;
1869e226 5742 fl4.flowi4_multipath_hash = 0;
87f5fc7e
DA
5743
5744 if (flags & BPF_FIB_LOOKUP_DIRECT) {
5745 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
5746 struct fib_table *tb;
5747
5748 tb = fib_get_table(net, tbid);
5749 if (unlikely(!tb))
4c79579b 5750 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e
DA
5751
5752 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
5753 } else {
5754 fl4.flowi4_mark = 0;
5755 fl4.flowi4_secid = 0;
5756 fl4.flowi4_tun_key.tun_id = 0;
5757 fl4.flowi4_uid = sock_net_uid(net, NULL);
5758
5759 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
5760 }
5761
4c79579b
DA
5762 if (err) {
5763 /* map fib lookup errors to RTN_ type */
5764 if (err == -EINVAL)
5765 return BPF_FIB_LKUP_RET_BLACKHOLE;
5766 if (err == -EHOSTUNREACH)
5767 return BPF_FIB_LKUP_RET_UNREACHABLE;
5768 if (err == -EACCES)
5769 return BPF_FIB_LKUP_RET_PROHIBIT;
5770
5771 return BPF_FIB_LKUP_RET_NOT_FWDED;
5772 }
5773
5774 if (res.type != RTN_UNICAST)
5775 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e 5776
5481d73f 5777 if (fib_info_num_path(res.fi) > 1)
87f5fc7e
DA
5778 fib_select_path(net, &res, &fl4, NULL);
5779
4f74fede
DA
5780 if (check_mtu) {
5781 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
e1850ea9
JDB
5782 if (params->tot_len > mtu) {
5783 params->mtu_result = mtu; /* union with tot_len */
4c79579b 5784 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
e1850ea9 5785 }
4f74fede
DA
5786 }
5787
eba618ab 5788 nhc = res.nhc;
87f5fc7e
DA
5789
5790 /* do not handle lwt encaps right now */
eba618ab 5791 if (nhc->nhc_lwtstate)
4c79579b 5792 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
87f5fc7e 5793
eba618ab 5794 dev = nhc->nhc_dev;
87f5fc7e
DA
5795
5796 params->rt_metric = res.fi->fib_priority;
d1c362e1 5797 params->ifindex = dev->ifindex;
87f5fc7e
DA
5798
5799 /* xdp and cls_bpf programs are run in RCU-bh so
5800 * rcu_read_lock_bh is not needed here
5801 */
6f5f68d0
DA
5802 if (likely(nhc->nhc_gw_family != AF_INET6)) {
5803 if (nhc->nhc_gw_family)
5804 params->ipv4_dst = nhc->nhc_gw.ipv4;
5805
5806 neigh = __ipv4_neigh_lookup_noref(dev,
5807 (__force u32)params->ipv4_dst);
5808 } else {
5809 struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
5810
5811 params->family = AF_INET6;
5812 *dst = nhc->nhc_gw.ipv6;
5813 neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
5814 }
5815
4c79579b
DA
5816 if (!neigh)
5817 return BPF_FIB_LKUP_RET_NO_NEIGH;
87f5fc7e 5818
e1850ea9 5819 return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
87f5fc7e
DA
5820}
5821#endif
5822
5823#if IS_ENABLED(CONFIG_IPV6)
5824static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4f74fede 5825 u32 flags, bool check_mtu)
87f5fc7e
DA
5826{
5827 struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
5828 struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
e55449e7 5829 struct fib6_result res = {};
87f5fc7e
DA
5830 struct neighbour *neigh;
5831 struct net_device *dev;
5832 struct inet6_dev *idev;
87f5fc7e
DA
5833 struct flowi6 fl6;
5834 int strict = 0;
effda4dd 5835 int oif, err;
e1850ea9 5836 u32 mtu = 0;
87f5fc7e
DA
5837
5838 /* link local addresses are never forwarded */
5839 if (rt6_need_strict(dst) || rt6_need_strict(src))
4c79579b 5840 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e
DA
5841
5842 dev = dev_get_by_index_rcu(net, params->ifindex);
5843 if (unlikely(!dev))
5844 return -ENODEV;
5845
5846 idev = __in6_dev_get_safely(dev);
56f0f84e 5847 if (unlikely(!idev || !idev->cnf.forwarding))
4c79579b 5848 return BPF_FIB_LKUP_RET_FWD_DISABLED;
87f5fc7e
DA
5849
5850 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
5851 fl6.flowi6_iif = 1;
5852 oif = fl6.flowi6_oif = params->ifindex;
5853 } else {
5854 oif = fl6.flowi6_iif = params->ifindex;
5855 fl6.flowi6_oif = 0;
5856 strict = RT6_LOOKUP_F_HAS_SADDR;
5857 }
bd3a08aa 5858 fl6.flowlabel = params->flowinfo;
87f5fc7e
DA
5859 fl6.flowi6_scope = 0;
5860 fl6.flowi6_flags = 0;
5861 fl6.mp_hash = 0;
5862
5863 fl6.flowi6_proto = params->l4_protocol;
5864 fl6.daddr = *dst;
5865 fl6.saddr = *src;
5866 fl6.fl6_sport = params->sport;
5867 fl6.fl6_dport = params->dport;
5868
5869 if (flags & BPF_FIB_LOOKUP_DIRECT) {
5870 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
5871 struct fib6_table *tb;
5872
5873 tb = ipv6_stub->fib6_get_table(net, tbid);
5874 if (unlikely(!tb))
4c79579b 5875 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e 5876
effda4dd
DA
5877 err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
5878 strict);
87f5fc7e
DA
5879 } else {
5880 fl6.flowi6_mark = 0;
5881 fl6.flowi6_secid = 0;
5882 fl6.flowi6_tun_key.tun_id = 0;
5883 fl6.flowi6_uid = sock_net_uid(net, NULL);
5884
effda4dd 5885 err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
87f5fc7e
DA
5886 }
5887
effda4dd 5888 if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
b1d40991 5889 res.f6i == net->ipv6.fib6_null_entry))
4c79579b
DA
5890 return BPF_FIB_LKUP_RET_NOT_FWDED;
5891
7d21fec9
DA
5892 switch (res.fib6_type) {
5893 /* only unicast is forwarded */
5894 case RTN_UNICAST:
5895 break;
5896 case RTN_BLACKHOLE:
5897 return BPF_FIB_LKUP_RET_BLACKHOLE;
5898 case RTN_UNREACHABLE:
5899 return BPF_FIB_LKUP_RET_UNREACHABLE;
5900 case RTN_PROHIBIT:
5901 return BPF_FIB_LKUP_RET_PROHIBIT;
5902 default:
4c79579b 5903 return BPF_FIB_LKUP_RET_NOT_FWDED;
7d21fec9 5904 }
87f5fc7e 5905
b1d40991
DA
5906 ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
5907 fl6.flowi6_oif != 0, NULL, strict);
87f5fc7e 5908
4f74fede 5909 if (check_mtu) {
b748f260 5910 mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
e1850ea9
JDB
5911 if (params->tot_len > mtu) {
5912 params->mtu_result = mtu; /* union with tot_len */
4c79579b 5913 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
e1850ea9 5914 }
4f74fede
DA
5915 }
5916
b1d40991 5917 if (res.nh->fib_nh_lws)
4c79579b 5918 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
87f5fc7e 5919
b1d40991
DA
5920 if (res.nh->fib_nh_gw_family)
5921 *dst = res.nh->fib_nh_gw6;
87f5fc7e 5922
b1d40991
DA
5923 dev = res.nh->fib_nh_dev;
5924 params->rt_metric = res.f6i->fib6_metric;
d1c362e1 5925 params->ifindex = dev->ifindex;
87f5fc7e
DA
5926
5927 /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
71df5777 5928 * not needed here.
87f5fc7e 5929 */
71df5777 5930 neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
4c79579b
DA
5931 if (!neigh)
5932 return BPF_FIB_LKUP_RET_NO_NEIGH;
87f5fc7e 5933
e1850ea9 5934 return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
87f5fc7e
DA
5935}
5936#endif
5937
5938BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
5939 struct bpf_fib_lookup *, params, int, plen, u32, flags)
5940{
5941 if (plen < sizeof(*params))
5942 return -EINVAL;
5943
9ce64f19
DA
5944 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5945 return -EINVAL;
5946
87f5fc7e
DA
5947 switch (params->family) {
5948#if IS_ENABLED(CONFIG_INET)
5949 case AF_INET:
5950 return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
4f74fede 5951 flags, true);
87f5fc7e
DA
5952#endif
5953#if IS_ENABLED(CONFIG_IPV6)
5954 case AF_INET6:
5955 return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
4f74fede 5956 flags, true);
87f5fc7e
DA
5957#endif
5958 }
bcece5dc 5959 return -EAFNOSUPPORT;
87f5fc7e
DA
5960}
5961
5962static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
5963 .func = bpf_xdp_fib_lookup,
5964 .gpl_only = true,
5965 .ret_type = RET_INTEGER,
5966 .arg1_type = ARG_PTR_TO_CTX,
5967 .arg2_type = ARG_PTR_TO_MEM,
5968 .arg3_type = ARG_CONST_SIZE,
5969 .arg4_type = ARG_ANYTHING,
5970};
5971
5972BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
5973 struct bpf_fib_lookup *, params, int, plen, u32, flags)
5974{
4f74fede 5975 struct net *net = dev_net(skb->dev);
4c79579b 5976 int rc = -EAFNOSUPPORT;
2c0a10af 5977 bool check_mtu = false;
4f74fede 5978
87f5fc7e
DA
5979 if (plen < sizeof(*params))
5980 return -EINVAL;
5981
9ce64f19
DA
5982 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5983 return -EINVAL;
5984
2c0a10af
JDB
5985 if (params->tot_len)
5986 check_mtu = true;
5987
87f5fc7e
DA
5988 switch (params->family) {
5989#if IS_ENABLED(CONFIG_INET)
5990 case AF_INET:
2c0a10af 5991 rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu);
4f74fede 5992 break;
87f5fc7e
DA
5993#endif
5994#if IS_ENABLED(CONFIG_IPV6)
5995 case AF_INET6:
2c0a10af 5996 rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu);
4f74fede 5997 break;
87f5fc7e
DA
5998#endif
5999 }
4f74fede 6000
2c0a10af 6001 if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) {
4f74fede
DA
6002 struct net_device *dev;
6003
2c0a10af
JDB
6004 /* When tot_len isn't provided by user, check skb
6005 * against MTU of FIB lookup resulting net_device
6006 */
4c79579b 6007 dev = dev_get_by_index_rcu(net, params->ifindex);
4f74fede 6008 if (!is_skb_forwardable(dev, skb))
4c79579b 6009 rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
e1850ea9
JDB
6010
6011 params->mtu_result = dev->mtu; /* union with tot_len */
4f74fede
DA
6012 }
6013
4c79579b 6014 return rc;
87f5fc7e
DA
6015}
6016
6017static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
6018 .func = bpf_skb_fib_lookup,
6019 .gpl_only = true,
6020 .ret_type = RET_INTEGER,
6021 .arg1_type = ARG_PTR_TO_CTX,
6022 .arg2_type = ARG_PTR_TO_MEM,
6023 .arg3_type = ARG_CONST_SIZE,
6024 .arg4_type = ARG_ANYTHING,
6025};
6026
34b2021c
JDB
6027static struct net_device *__dev_via_ifindex(struct net_device *dev_curr,
6028 u32 ifindex)
6029{
6030 struct net *netns = dev_net(dev_curr);
6031
6032 /* Non-redirect use-cases can use ifindex=0 and save ifindex lookup */
6033 if (ifindex == 0)
6034 return dev_curr;
6035
6036 return dev_get_by_index_rcu(netns, ifindex);
6037}
6038
6039BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
6040 u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
6041{
6042 int ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
6043 struct net_device *dev = skb->dev;
6044 int skb_len, dev_len;
6045 int mtu;
6046
6047 if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
6048 return -EINVAL;
6049
e5e35e75 6050 if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
34b2021c
JDB
6051 return -EINVAL;
6052
6053 dev = __dev_via_ifindex(dev, ifindex);
6054 if (unlikely(!dev))
6055 return -ENODEV;
6056
6057 mtu = READ_ONCE(dev->mtu);
6058
6059 dev_len = mtu + dev->hard_header_len;
e5e35e75
JDB
6060
6061 /* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
6062 skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len;
6063
6064 skb_len += len_diff; /* minus result pass check */
34b2021c
JDB
6065 if (skb_len <= dev_len) {
6066 ret = BPF_MTU_CHK_RET_SUCCESS;
6067 goto out;
6068 }
6069 /* At this point, skb->len exceed MTU, but as it include length of all
6070 * segments, it can still be below MTU. The SKB can possibly get
6071 * re-segmented in transmit path (see validate_xmit_skb). Thus, user
6072 * must choose if segs are to be MTU checked.
6073 */
6074 if (skb_is_gso(skb)) {
6075 ret = BPF_MTU_CHK_RET_SUCCESS;
6076
6077 if (flags & BPF_MTU_CHK_SEGS &&
6078 !skb_gso_validate_network_len(skb, mtu))
6079 ret = BPF_MTU_CHK_RET_SEGS_TOOBIG;
6080 }
6081out:
6082 /* BPF verifier guarantees valid pointer */
6083 *mtu_len = mtu;
6084
6085 return ret;
6086}
6087
6088BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
6089 u32, ifindex, u32 *, mtu_len, s32, len_diff, u64, flags)
6090{
6091 struct net_device *dev = xdp->rxq->dev;
6092 int xdp_len = xdp->data_end - xdp->data;
6093 int ret = BPF_MTU_CHK_RET_SUCCESS;
6094 int mtu, dev_len;
6095
6096 /* XDP variant doesn't support multi-buffer segment check (yet) */
6097 if (unlikely(flags))
6098 return -EINVAL;
6099
6100 dev = __dev_via_ifindex(dev, ifindex);
6101 if (unlikely(!dev))
6102 return -ENODEV;
6103
6104 mtu = READ_ONCE(dev->mtu);
6105
6106 /* Add L2-header as dev MTU is L3 size */
6107 dev_len = mtu + dev->hard_header_len;
6108
e5e35e75
JDB
6109 /* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
6110 if (*mtu_len)
6111 xdp_len = *mtu_len + dev->hard_header_len;
6112
34b2021c
JDB
6113 xdp_len += len_diff; /* minus result pass check */
6114 if (xdp_len > dev_len)
6115 ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
6116
6117 /* BPF verifier guarantees valid pointer */
6118 *mtu_len = mtu;
6119
6120 return ret;
6121}
6122
6123static const struct bpf_func_proto bpf_skb_check_mtu_proto = {
6124 .func = bpf_skb_check_mtu,
6125 .gpl_only = true,
6126 .ret_type = RET_INTEGER,
6127 .arg1_type = ARG_PTR_TO_CTX,
6128 .arg2_type = ARG_ANYTHING,
6129 .arg3_type = ARG_PTR_TO_INT,
6130 .arg4_type = ARG_ANYTHING,
6131 .arg5_type = ARG_ANYTHING,
6132};
6133
6134static const struct bpf_func_proto bpf_xdp_check_mtu_proto = {
6135 .func = bpf_xdp_check_mtu,
6136 .gpl_only = true,
6137 .ret_type = RET_INTEGER,
6138 .arg1_type = ARG_PTR_TO_CTX,
6139 .arg2_type = ARG_ANYTHING,
6140 .arg3_type = ARG_PTR_TO_INT,
6141 .arg4_type = ARG_ANYTHING,
6142 .arg5_type = ARG_ANYTHING,
6143};
6144
fe94cc29
MX
6145#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6146static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
6147{
6148 int err;
6149 struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
6150
bb986a50 6151 if (!seg6_validate_srh(srh, len, false))
fe94cc29
MX
6152 return -EINVAL;
6153
6154 switch (type) {
6155 case BPF_LWT_ENCAP_SEG6_INLINE:
6156 if (skb->protocol != htons(ETH_P_IPV6))
6157 return -EBADMSG;
6158
6159 err = seg6_do_srh_inline(skb, srh);
6160 break;
6161 case BPF_LWT_ENCAP_SEG6:
6162 skb_reset_inner_headers(skb);
6163 skb->encapsulation = 1;
6164 err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
6165 break;
6166 default:
6167 return -EINVAL;
6168 }
6169
6170 bpf_compute_data_pointers(skb);
6171 if (err)
6172 return err;
6173
fe94cc29
MX
6174 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
6175
6176 return seg6_lookup_nexthop(skb, NULL, 0);
6177}
6178#endif /* CONFIG_IPV6_SEG6_BPF */
6179
3e0bd37c
PO
6180#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
6181static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
6182 bool ingress)
6183{
52f27877 6184 return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
3e0bd37c
PO
6185}
6186#endif
6187
6188BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
fe94cc29
MX
6189 u32, len)
6190{
6191 switch (type) {
6192#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
6193 case BPF_LWT_ENCAP_SEG6:
6194 case BPF_LWT_ENCAP_SEG6_INLINE:
6195 return bpf_push_seg6_encap(skb, type, hdr, len);
3e0bd37c
PO
6196#endif
6197#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
6198 case BPF_LWT_ENCAP_IP:
6199 return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
fe94cc29
MX
6200#endif
6201 default:
6202 return -EINVAL;
6203 }
6204}
6205
3e0bd37c
PO
6206BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
6207 void *, hdr, u32, len)
6208{
6209 switch (type) {
6210#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
6211 case BPF_LWT_ENCAP_IP:
6212 return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
fe94cc29
MX
6213#endif
6214 default:
6215 return -EINVAL;
6216 }
6217}
6218
3e0bd37c
PO
6219static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
6220 .func = bpf_lwt_in_push_encap,
6221 .gpl_only = false,
6222 .ret_type = RET_INTEGER,
6223 .arg1_type = ARG_PTR_TO_CTX,
6224 .arg2_type = ARG_ANYTHING,
216e3cd2 6225 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
3e0bd37c
PO
6226 .arg4_type = ARG_CONST_SIZE
6227};
6228
6229static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
6230 .func = bpf_lwt_xmit_push_encap,
fe94cc29
MX
6231 .gpl_only = false,
6232 .ret_type = RET_INTEGER,
6233 .arg1_type = ARG_PTR_TO_CTX,
6234 .arg2_type = ARG_ANYTHING,
216e3cd2 6235 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
fe94cc29
MX
6236 .arg4_type = ARG_CONST_SIZE
6237};
6238
61d76980 6239#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
fe94cc29
MX
6240BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
6241 const void *, from, u32, len)
6242{
fe94cc29
MX
6243 struct seg6_bpf_srh_state *srh_state =
6244 this_cpu_ptr(&seg6_bpf_srh_states);
486cdf21 6245 struct ipv6_sr_hdr *srh = srh_state->srh;
fe94cc29 6246 void *srh_tlvs, *srh_end, *ptr;
fe94cc29
MX
6247 int srhoff = 0;
6248
486cdf21 6249 if (srh == NULL)
fe94cc29
MX
6250 return -EINVAL;
6251
fe94cc29
MX
6252 srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
6253 srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
6254
6255 ptr = skb->data + offset;
6256 if (ptr >= srh_tlvs && ptr + len <= srh_end)
486cdf21 6257 srh_state->valid = false;
fe94cc29
MX
6258 else if (ptr < (void *)&srh->flags ||
6259 ptr + len > (void *)&srh->segments)
6260 return -EFAULT;
6261
6262 if (unlikely(bpf_try_make_writable(skb, offset + len)))
6263 return -EFAULT;
486cdf21
MX
6264 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
6265 return -EINVAL;
6266 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
fe94cc29
MX
6267
6268 memcpy(skb->data + offset, from, len);
6269 return 0;
fe94cc29
MX
6270}
6271
6272static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
6273 .func = bpf_lwt_seg6_store_bytes,
6274 .gpl_only = false,
6275 .ret_type = RET_INTEGER,
6276 .arg1_type = ARG_PTR_TO_CTX,
6277 .arg2_type = ARG_ANYTHING,
216e3cd2 6278 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
fe94cc29
MX
6279 .arg4_type = ARG_CONST_SIZE
6280};
6281
486cdf21 6282static void bpf_update_srh_state(struct sk_buff *skb)
fe94cc29 6283{
fe94cc29
MX
6284 struct seg6_bpf_srh_state *srh_state =
6285 this_cpu_ptr(&seg6_bpf_srh_states);
fe94cc29 6286 int srhoff = 0;
fe94cc29 6287
486cdf21
MX
6288 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
6289 srh_state->srh = NULL;
6290 } else {
6291 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
6292 srh_state->hdrlen = srh_state->srh->hdrlen << 3;
6293 srh_state->valid = true;
fe94cc29 6294 }
486cdf21
MX
6295}
6296
6297BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
6298 u32, action, void *, param, u32, param_len)
6299{
6300 struct seg6_bpf_srh_state *srh_state =
6301 this_cpu_ptr(&seg6_bpf_srh_states);
6302 int hdroff = 0;
6303 int err;
fe94cc29
MX
6304
6305 switch (action) {
6306 case SEG6_LOCAL_ACTION_END_X:
486cdf21
MX
6307 if (!seg6_bpf_has_valid_srh(skb))
6308 return -EBADMSG;
fe94cc29
MX
6309 if (param_len != sizeof(struct in6_addr))
6310 return -EINVAL;
6311 return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
6312 case SEG6_LOCAL_ACTION_END_T:
486cdf21
MX
6313 if (!seg6_bpf_has_valid_srh(skb))
6314 return -EBADMSG;
fe94cc29
MX
6315 if (param_len != sizeof(int))
6316 return -EINVAL;
6317 return seg6_lookup_nexthop(skb, NULL, *(int *)param);
486cdf21
MX
6318 case SEG6_LOCAL_ACTION_END_DT6:
6319 if (!seg6_bpf_has_valid_srh(skb))
6320 return -EBADMSG;
fe94cc29
MX
6321 if (param_len != sizeof(int))
6322 return -EINVAL;
486cdf21
MX
6323
6324 if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
6325 return -EBADMSG;
6326 if (!pskb_pull(skb, hdroff))
6327 return -EBADMSG;
6328
6329 skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
6330 skb_reset_network_header(skb);
6331 skb_reset_transport_header(skb);
6332 skb->encapsulation = 0;
6333
6334 bpf_compute_data_pointers(skb);
6335 bpf_update_srh_state(skb);
fe94cc29
MX
6336 return seg6_lookup_nexthop(skb, NULL, *(int *)param);
6337 case SEG6_LOCAL_ACTION_END_B6:
486cdf21
MX
6338 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6339 return -EBADMSG;
fe94cc29
MX
6340 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
6341 param, param_len);
6342 if (!err)
486cdf21
MX
6343 bpf_update_srh_state(skb);
6344
fe94cc29
MX
6345 return err;
6346 case SEG6_LOCAL_ACTION_END_B6_ENCAP:
486cdf21
MX
6347 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
6348 return -EBADMSG;
fe94cc29
MX
6349 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
6350 param, param_len);
6351 if (!err)
486cdf21
MX
6352 bpf_update_srh_state(skb);
6353
fe94cc29
MX
6354 return err;
6355 default:
6356 return -EINVAL;
6357 }
fe94cc29
MX
6358}
6359
6360static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
6361 .func = bpf_lwt_seg6_action,
6362 .gpl_only = false,
6363 .ret_type = RET_INTEGER,
6364 .arg1_type = ARG_PTR_TO_CTX,
6365 .arg2_type = ARG_ANYTHING,
216e3cd2 6366 .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
fe94cc29
MX
6367 .arg4_type = ARG_CONST_SIZE
6368};
6369
6370BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
6371 s32, len)
6372{
fe94cc29
MX
6373 struct seg6_bpf_srh_state *srh_state =
6374 this_cpu_ptr(&seg6_bpf_srh_states);
486cdf21 6375 struct ipv6_sr_hdr *srh = srh_state->srh;
fe94cc29 6376 void *srh_end, *srh_tlvs, *ptr;
fe94cc29
MX
6377 struct ipv6hdr *hdr;
6378 int srhoff = 0;
6379 int ret;
6380
486cdf21 6381 if (unlikely(srh == NULL))
fe94cc29 6382 return -EINVAL;
fe94cc29
MX
6383
6384 srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
6385 ((srh->first_segment + 1) << 4));
6386 srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
6387 srh_state->hdrlen);
6388 ptr = skb->data + offset;
6389
6390 if (unlikely(ptr < srh_tlvs || ptr > srh_end))
6391 return -EFAULT;
6392 if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
6393 return -EFAULT;
6394
6395 if (len > 0) {
6396 ret = skb_cow_head(skb, len);
6397 if (unlikely(ret < 0))
6398 return ret;
6399
6400 ret = bpf_skb_net_hdr_push(skb, offset, len);
6401 } else {
6402 ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
6403 }
6404
6405 bpf_compute_data_pointers(skb);
6406 if (unlikely(ret < 0))
6407 return ret;
6408
6409 hdr = (struct ipv6hdr *)skb->data;
6410 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
6411
486cdf21
MX
6412 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
6413 return -EINVAL;
6414 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
fe94cc29 6415 srh_state->hdrlen += len;
486cdf21 6416 srh_state->valid = false;
fe94cc29 6417 return 0;
fe94cc29
MX
6418}
6419
6420static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
6421 .func = bpf_lwt_seg6_adjust_srh,
6422 .gpl_only = false,
6423 .ret_type = RET_INTEGER,
6424 .arg1_type = ARG_PTR_TO_CTX,
6425 .arg2_type = ARG_ANYTHING,
6426 .arg3_type = ARG_ANYTHING,
6427};
61d76980 6428#endif /* CONFIG_IPV6_SEG6_BPF */
fe94cc29 6429
df3f94a0
AB
6430#ifdef CONFIG_INET
6431static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
c8123ead 6432 int dif, int sdif, u8 family, u8 proto)
6acc9b43 6433{
4461568a 6434 struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo;
6acc9b43
JS
6435 bool refcounted = false;
6436 struct sock *sk = NULL;
6437
6438 if (family == AF_INET) {
6439 __be32 src4 = tuple->ipv4.saddr;
6440 __be32 dst4 = tuple->ipv4.daddr;
6acc9b43
JS
6441
6442 if (proto == IPPROTO_TCP)
4461568a 6443 sk = __inet_lookup(net, hinfo, NULL, 0,
6acc9b43
JS
6444 src4, tuple->ipv4.sport,
6445 dst4, tuple->ipv4.dport,
6446 dif, sdif, &refcounted);
6447 else
6448 sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
6449 dst4, tuple->ipv4.dport,
ba6aac15 6450 dif, sdif, net->ipv4.udp_table, NULL);
8a615c6b 6451#if IS_ENABLED(CONFIG_IPV6)
6acc9b43
JS
6452 } else {
6453 struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
6454 struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
6acc9b43
JS
6455
6456 if (proto == IPPROTO_TCP)
4461568a 6457 sk = __inet6_lookup(net, hinfo, NULL, 0,
6acc9b43 6458 src6, tuple->ipv6.sport,
cac6cc2f 6459 dst6, ntohs(tuple->ipv6.dport),
6acc9b43 6460 dif, sdif, &refcounted);
8a615c6b
JS
6461 else if (likely(ipv6_bpf_stub))
6462 sk = ipv6_bpf_stub->udp6_lib_lookup(net,
6463 src6, tuple->ipv6.sport,
cac6cc2f 6464 dst6, tuple->ipv6.dport,
8a615c6b 6465 dif, sdif,
ba6aac15 6466 net->ipv4.udp_table, NULL);
6acc9b43
JS
6467#endif
6468 }
6469
6470 if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
6471 WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
6472 sk = NULL;
6473 }
6474 return sk;
6475}
6476
edbf8c01 6477/* bpf_skc_lookup performs the core lookup for different types of sockets,
6acc9b43 6478 * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
6acc9b43 6479 */
edbf8c01
LB
6480static struct sock *
6481__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6482 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6483 u64 flags)
6acc9b43 6484{
6acc9b43 6485 struct sock *sk = NULL;
6acc9b43 6486 struct net *net;
2064a132 6487 u8 family;
c8123ead 6488 int sdif;
6acc9b43 6489
9b28ae24
LB
6490 if (len == sizeof(tuple->ipv4))
6491 family = AF_INET;
6492 else if (len == sizeof(tuple->ipv6))
6493 family = AF_INET6;
6494 else
6495 return NULL;
6496
2064a132 6497 if (unlikely(flags || !((s32)netns_id < 0 || netns_id <= S32_MAX)))
6acc9b43
JS
6498 goto out;
6499
c8123ead
NH
6500 if (family == AF_INET)
6501 sdif = inet_sdif(skb);
6acc9b43 6502 else
c8123ead
NH
6503 sdif = inet6_sdif(skb);
6504
f71c6143
JS
6505 if ((s32)netns_id < 0) {
6506 net = caller_net;
4cc1feeb 6507 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
f71c6143 6508 } else {
6acc9b43
JS
6509 net = get_net_ns_by_id(caller_net, netns_id);
6510 if (unlikely(!net))
6511 goto out;
c8123ead 6512 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
6acc9b43 6513 put_net(net);
6acc9b43
JS
6514 }
6515
edbf8c01
LB
6516out:
6517 return sk;
6518}
6519
6520static struct sock *
6521__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6522 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
6523 u64 flags)
6524{
6525 struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
6526 ifindex, proto, netns_id, flags);
6527
f7355a6c 6528 if (sk) {
3046a827
JM
6529 struct sock *sk2 = sk_to_full_sk(sk);
6530
6531 /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
6532 * sock refcnt is decremented to prevent a request_sock leak.
6533 */
6534 if (!sk_fullsock(sk2))
6535 sk2 = NULL;
6536 if (sk2 != sk) {
2e012c74 6537 sock_gen_put(sk);
3046a827
JM
6538 /* Ensure there is no need to bump sk2 refcnt */
6539 if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
6540 WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
6541 return NULL;
6542 }
6543 sk = sk2;
f7355a6c
MKL
6544 }
6545 }
edbf8c01
LB
6546
6547 return sk;
6acc9b43
JS
6548}
6549
edbf8c01
LB
6550static struct sock *
6551bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6552 u8 proto, u64 netns_id, u64 flags)
c8123ead
NH
6553{
6554 struct net *caller_net;
6555 int ifindex;
6556
6557 if (skb->dev) {
6558 caller_net = dev_net(skb->dev);
6559 ifindex = skb->dev->ifindex;
6560 } else {
6561 caller_net = sock_net(skb->sk);
6562 ifindex = 0;
6563 }
6564
edbf8c01
LB
6565 return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
6566 netns_id, flags);
c8123ead
NH
6567}
6568
edbf8c01
LB
6569static struct sock *
6570bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
6571 u8 proto, u64 netns_id, u64 flags)
6572{
6573 struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
6574 flags);
6575
f7355a6c 6576 if (sk) {
3046a827
JM
6577 struct sock *sk2 = sk_to_full_sk(sk);
6578
6579 /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
6580 * sock refcnt is decremented to prevent a request_sock leak.
6581 */
6582 if (!sk_fullsock(sk2))
6583 sk2 = NULL;
6584 if (sk2 != sk) {
2e012c74 6585 sock_gen_put(sk);
3046a827
JM
6586 /* Ensure there is no need to bump sk2 refcnt */
6587 if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
6588 WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
6589 return NULL;
6590 }
6591 sk = sk2;
f7355a6c
MKL
6592 }
6593 }
edbf8c01
LB
6594
6595 return sk;
6596}
6597
6598BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
6599 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6600{
6601 return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
6602 netns_id, flags);
6603}
6604
6605static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
6606 .func = bpf_skc_lookup_tcp,
6607 .gpl_only = false,
6608 .pkt_access = true,
6609 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6610 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 6611 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
edbf8c01
LB
6612 .arg3_type = ARG_CONST_SIZE,
6613 .arg4_type = ARG_ANYTHING,
6614 .arg5_type = ARG_ANYTHING,
6615};
6616
6acc9b43
JS
6617BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
6618 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6619{
edbf8c01
LB
6620 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
6621 netns_id, flags);
6acc9b43
JS
6622}
6623
6624static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
6625 .func = bpf_sk_lookup_tcp,
6626 .gpl_only = false,
6627 .pkt_access = true,
6628 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6629 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 6630 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
6acc9b43
JS
6631 .arg3_type = ARG_CONST_SIZE,
6632 .arg4_type = ARG_ANYTHING,
6633 .arg5_type = ARG_ANYTHING,
6634};
6635
6636BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
6637 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6638{
edbf8c01
LB
6639 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
6640 netns_id, flags);
6acc9b43
JS
6641}
6642
6643static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
6644 .func = bpf_sk_lookup_udp,
6645 .gpl_only = false,
6646 .pkt_access = true,
6647 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6648 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 6649 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
6acc9b43
JS
6650 .arg3_type = ARG_CONST_SIZE,
6651 .arg4_type = ARG_ANYTHING,
6652 .arg5_type = ARG_ANYTHING,
6653};
6654
6655BPF_CALL_1(bpf_sk_release, struct sock *, sk)
6656{
a5fa25ad 6657 if (sk && sk_is_refcounted(sk))
6acc9b43
JS
6658 sock_gen_put(sk);
6659 return 0;
6660}
6661
6662static const struct bpf_func_proto bpf_sk_release_proto = {
6663 .func = bpf_sk_release,
6664 .gpl_only = false,
6665 .ret_type = RET_INTEGER,
8f14852e 6666 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON | OBJ_RELEASE,
6acc9b43 6667};
c8123ead
NH
6668
6669BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
6670 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6671{
6672 struct net *caller_net = dev_net(ctx->rxq->dev);
6673 int ifindex = ctx->rxq->dev->ifindex;
6674
edbf8c01
LB
6675 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
6676 ifindex, IPPROTO_UDP, netns_id,
6677 flags);
c8123ead
NH
6678}
6679
6680static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
6681 .func = bpf_xdp_sk_lookup_udp,
6682 .gpl_only = false,
6683 .pkt_access = true,
6684 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6685 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 6686 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
c8123ead
NH
6687 .arg3_type = ARG_CONST_SIZE,
6688 .arg4_type = ARG_ANYTHING,
6689 .arg5_type = ARG_ANYTHING,
6690};
6691
edbf8c01
LB
6692BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
6693 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6694{
6695 struct net *caller_net = dev_net(ctx->rxq->dev);
6696 int ifindex = ctx->rxq->dev->ifindex;
6697
6698 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
6699 ifindex, IPPROTO_TCP, netns_id,
6700 flags);
6701}
6702
6703static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
6704 .func = bpf_xdp_skc_lookup_tcp,
6705 .gpl_only = false,
6706 .pkt_access = true,
6707 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6708 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 6709 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
edbf8c01
LB
6710 .arg3_type = ARG_CONST_SIZE,
6711 .arg4_type = ARG_ANYTHING,
6712 .arg5_type = ARG_ANYTHING,
6713};
6714
c8123ead
NH
6715BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
6716 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
6717{
6718 struct net *caller_net = dev_net(ctx->rxq->dev);
6719 int ifindex = ctx->rxq->dev->ifindex;
6720
edbf8c01
LB
6721 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
6722 ifindex, IPPROTO_TCP, netns_id,
6723 flags);
c8123ead
NH
6724}
6725
6726static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
6727 .func = bpf_xdp_sk_lookup_tcp,
6728 .gpl_only = false,
6729 .pkt_access = true,
6730 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6731 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 6732 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
c8123ead
NH
6733 .arg3_type = ARG_CONST_SIZE,
6734 .arg4_type = ARG_ANYTHING,
6735 .arg5_type = ARG_ANYTHING,
6736};
6c49e65e 6737
edbf8c01
LB
6738BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
6739 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6740{
6741 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
6742 sock_net(ctx->sk), 0,
6743 IPPROTO_TCP, netns_id, flags);
6744}
6745
6746static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
6747 .func = bpf_sock_addr_skc_lookup_tcp,
6748 .gpl_only = false,
6749 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
6750 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 6751 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
edbf8c01
LB
6752 .arg3_type = ARG_CONST_SIZE,
6753 .arg4_type = ARG_ANYTHING,
6754 .arg5_type = ARG_ANYTHING,
6755};
6756
6c49e65e
AI
6757BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
6758 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6759{
edbf8c01
LB
6760 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
6761 sock_net(ctx->sk), 0, IPPROTO_TCP,
6762 netns_id, flags);
6c49e65e
AI
6763}
6764
6765static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
6766 .func = bpf_sock_addr_sk_lookup_tcp,
6767 .gpl_only = false,
6768 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6769 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 6770 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
6c49e65e
AI
6771 .arg3_type = ARG_CONST_SIZE,
6772 .arg4_type = ARG_ANYTHING,
6773 .arg5_type = ARG_ANYTHING,
6774};
6775
6776BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
6777 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
6778{
edbf8c01
LB
6779 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
6780 sock_net(ctx->sk), 0, IPPROTO_UDP,
6781 netns_id, flags);
6c49e65e
AI
6782}
6783
6784static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
6785 .func = bpf_sock_addr_sk_lookup_udp,
6786 .gpl_only = false,
6787 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6788 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 6789 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
6c49e65e
AI
6790 .arg3_type = ARG_CONST_SIZE,
6791 .arg4_type = ARG_ANYTHING,
6792 .arg5_type = ARG_ANYTHING,
6793};
6794
655a51e5
MKL
6795bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6796 struct bpf_insn_access_aux *info)
6797{
c2cb5e82
SF
6798 if (off < 0 || off >= offsetofend(struct bpf_tcp_sock,
6799 icsk_retransmits))
655a51e5
MKL
6800 return false;
6801
6802 if (off % size != 0)
6803 return false;
6804
6805 switch (off) {
6806 case offsetof(struct bpf_tcp_sock, bytes_received):
6807 case offsetof(struct bpf_tcp_sock, bytes_acked):
6808 return size == sizeof(__u64);
6809 default:
6810 return size == sizeof(__u32);
6811 }
6812}
6813
6814u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
6815 const struct bpf_insn *si,
6816 struct bpf_insn *insn_buf,
6817 struct bpf_prog *prog, u32 *target_size)
6818{
6819 struct bpf_insn *insn = insn_buf;
6820
6821#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
6822 do { \
c593642c
PB
6823 BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) > \
6824 sizeof_field(struct bpf_tcp_sock, FIELD)); \
655a51e5
MKL
6825 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
6826 si->dst_reg, si->src_reg, \
6827 offsetof(struct tcp_sock, FIELD)); \
6828 } while (0)
6829
c2cb5e82
SF
6830#define BPF_INET_SOCK_GET_COMMON(FIELD) \
6831 do { \
c593642c 6832 BUILD_BUG_ON(sizeof_field(struct inet_connection_sock, \
c2cb5e82 6833 FIELD) > \
c593642c 6834 sizeof_field(struct bpf_tcp_sock, FIELD)); \
c2cb5e82
SF
6835 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
6836 struct inet_connection_sock, \
6837 FIELD), \
6838 si->dst_reg, si->src_reg, \
6839 offsetof( \
6840 struct inet_connection_sock, \
6841 FIELD)); \
6842 } while (0)
6843
655a51e5
MKL
6844 if (insn > insn_buf)
6845 return insn - insn_buf;
6846
6847 switch (si->off) {
6848 case offsetof(struct bpf_tcp_sock, rtt_min):
c593642c 6849 BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
655a51e5
MKL
6850 sizeof(struct minmax));
6851 BUILD_BUG_ON(sizeof(struct minmax) <
6852 sizeof(struct minmax_sample));
6853
6854 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
6855 offsetof(struct tcp_sock, rtt_min) +
6856 offsetof(struct minmax_sample, v));
6857 break;
2377b81d
SF
6858 case offsetof(struct bpf_tcp_sock, snd_cwnd):
6859 BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
6860 break;
6861 case offsetof(struct bpf_tcp_sock, srtt_us):
6862 BPF_TCP_SOCK_GET_COMMON(srtt_us);
6863 break;
6864 case offsetof(struct bpf_tcp_sock, snd_ssthresh):
6865 BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
6866 break;
6867 case offsetof(struct bpf_tcp_sock, rcv_nxt):
6868 BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
6869 break;
6870 case offsetof(struct bpf_tcp_sock, snd_nxt):
6871 BPF_TCP_SOCK_GET_COMMON(snd_nxt);
6872 break;
6873 case offsetof(struct bpf_tcp_sock, snd_una):
6874 BPF_TCP_SOCK_GET_COMMON(snd_una);
6875 break;
6876 case offsetof(struct bpf_tcp_sock, mss_cache):
6877 BPF_TCP_SOCK_GET_COMMON(mss_cache);
6878 break;
6879 case offsetof(struct bpf_tcp_sock, ecn_flags):
6880 BPF_TCP_SOCK_GET_COMMON(ecn_flags);
6881 break;
6882 case offsetof(struct bpf_tcp_sock, rate_delivered):
6883 BPF_TCP_SOCK_GET_COMMON(rate_delivered);
6884 break;
6885 case offsetof(struct bpf_tcp_sock, rate_interval_us):
6886 BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
6887 break;
6888 case offsetof(struct bpf_tcp_sock, packets_out):
6889 BPF_TCP_SOCK_GET_COMMON(packets_out);
6890 break;
6891 case offsetof(struct bpf_tcp_sock, retrans_out):
6892 BPF_TCP_SOCK_GET_COMMON(retrans_out);
6893 break;
6894 case offsetof(struct bpf_tcp_sock, total_retrans):
6895 BPF_TCP_SOCK_GET_COMMON(total_retrans);
6896 break;
6897 case offsetof(struct bpf_tcp_sock, segs_in):
6898 BPF_TCP_SOCK_GET_COMMON(segs_in);
6899 break;
6900 case offsetof(struct bpf_tcp_sock, data_segs_in):
6901 BPF_TCP_SOCK_GET_COMMON(data_segs_in);
6902 break;
6903 case offsetof(struct bpf_tcp_sock, segs_out):
6904 BPF_TCP_SOCK_GET_COMMON(segs_out);
6905 break;
6906 case offsetof(struct bpf_tcp_sock, data_segs_out):
6907 BPF_TCP_SOCK_GET_COMMON(data_segs_out);
6908 break;
6909 case offsetof(struct bpf_tcp_sock, lost_out):
6910 BPF_TCP_SOCK_GET_COMMON(lost_out);
6911 break;
6912 case offsetof(struct bpf_tcp_sock, sacked_out):
6913 BPF_TCP_SOCK_GET_COMMON(sacked_out);
6914 break;
6915 case offsetof(struct bpf_tcp_sock, bytes_received):
6916 BPF_TCP_SOCK_GET_COMMON(bytes_received);
6917 break;
6918 case offsetof(struct bpf_tcp_sock, bytes_acked):
6919 BPF_TCP_SOCK_GET_COMMON(bytes_acked);
6920 break;
0357746d
SF
6921 case offsetof(struct bpf_tcp_sock, dsack_dups):
6922 BPF_TCP_SOCK_GET_COMMON(dsack_dups);
6923 break;
6924 case offsetof(struct bpf_tcp_sock, delivered):
6925 BPF_TCP_SOCK_GET_COMMON(delivered);
6926 break;
6927 case offsetof(struct bpf_tcp_sock, delivered_ce):
6928 BPF_TCP_SOCK_GET_COMMON(delivered_ce);
6929 break;
c2cb5e82
SF
6930 case offsetof(struct bpf_tcp_sock, icsk_retransmits):
6931 BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
6932 break;
655a51e5
MKL
6933 }
6934
6935 return insn - insn_buf;
6936}
6937
6938BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
6939{
655a51e5
MKL
6940 if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
6941 return (unsigned long)sk;
6942
6943 return (unsigned long)NULL;
6944}
6945
0d01da6a 6946const struct bpf_func_proto bpf_tcp_sock_proto = {
655a51e5
MKL
6947 .func = bpf_tcp_sock,
6948 .gpl_only = false,
6949 .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
6950 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
6951};
6952
dbafd7dd
MKL
6953BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
6954{
6955 sk = sk_to_full_sk(sk);
6956
6957 if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
6958 return (unsigned long)sk;
6959
6960 return (unsigned long)NULL;
6961}
6962
6963static const struct bpf_func_proto bpf_get_listener_sock_proto = {
6964 .func = bpf_get_listener_sock,
6965 .gpl_only = false,
6966 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6967 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
6968};
6969
f7c917ba 6970BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
6971{
6972 unsigned int iphdr_len;
6973
d7bf2ebe
THJ
6974 switch (skb_protocol(skb, true)) {
6975 case cpu_to_be16(ETH_P_IP):
f7c917ba 6976 iphdr_len = sizeof(struct iphdr);
d7bf2ebe
THJ
6977 break;
6978 case cpu_to_be16(ETH_P_IPV6):
f7c917ba 6979 iphdr_len = sizeof(struct ipv6hdr);
d7bf2ebe
THJ
6980 break;
6981 default:
f7c917ba 6982 return 0;
d7bf2ebe 6983 }
f7c917ba 6984
6985 if (skb_headlen(skb) < iphdr_len)
6986 return 0;
6987
6988 if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
6989 return 0;
6990
6991 return INET_ECN_set_ce(skb);
6992}
6993
fada7fdc
JL
6994bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6995 struct bpf_insn_access_aux *info)
6996{
6997 if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
6998 return false;
6999
7000 if (off % size != 0)
7001 return false;
7002
7003 switch (off) {
7004 default:
7005 return size == sizeof(__u32);
7006 }
7007}
7008
7009u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
7010 const struct bpf_insn *si,
7011 struct bpf_insn *insn_buf,
7012 struct bpf_prog *prog, u32 *target_size)
7013{
7014 struct bpf_insn *insn = insn_buf;
7015
7016#define BPF_XDP_SOCK_GET(FIELD) \
7017 do { \
c593642c
PB
7018 BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) > \
7019 sizeof_field(struct bpf_xdp_sock, FIELD)); \
fada7fdc
JL
7020 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
7021 si->dst_reg, si->src_reg, \
7022 offsetof(struct xdp_sock, FIELD)); \
7023 } while (0)
7024
7025 switch (si->off) {
7026 case offsetof(struct bpf_xdp_sock, queue_id):
7027 BPF_XDP_SOCK_GET(queue_id);
7028 break;
7029 }
7030
7031 return insn - insn_buf;
7032}
7033
f7c917ba 7034static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
7035 .func = bpf_skb_ecn_set_ce,
7036 .gpl_only = false,
7037 .ret_type = RET_INTEGER,
7038 .arg1_type = ARG_PTR_TO_CTX,
7039};
39904084
LB
7040
7041BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
7042 struct tcphdr *, th, u32, th_len)
7043{
7044#ifdef CONFIG_SYN_COOKIES
7045 u32 cookie;
7046 int ret;
7047
c0df236e 7048 if (unlikely(!sk || th_len < sizeof(*th)))
39904084
LB
7049 return -EINVAL;
7050
7051 /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
7052 if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
7053 return -EINVAL;
7054
f2e383b5 7055 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
39904084
LB
7056 return -EINVAL;
7057
7058 if (!th->ack || th->rst || th->syn)
7059 return -ENOENT;
7060
2e8702cc
MM
7061 if (unlikely(iph_len < sizeof(struct iphdr)))
7062 return -EINVAL;
7063
39904084
LB
7064 if (tcp_synq_no_recent_overflow(sk))
7065 return -ENOENT;
7066
7067 cookie = ntohl(th->ack_seq) - 1;
7068
2e8702cc
MM
7069 /* Both struct iphdr and struct ipv6hdr have the version field at the
7070 * same offset so we can cast to the shorter header (struct iphdr).
7071 */
7072 switch (((struct iphdr *)iph)->version) {
7073 case 4:
7074 if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
39904084
LB
7075 return -EINVAL;
7076
7077 ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
7078 break;
7079
7080#if IS_BUILTIN(CONFIG_IPV6)
2e8702cc 7081 case 6:
39904084
LB
7082 if (unlikely(iph_len < sizeof(struct ipv6hdr)))
7083 return -EINVAL;
7084
2e8702cc
MM
7085 if (sk->sk_family != AF_INET6)
7086 return -EINVAL;
7087
39904084
LB
7088 ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
7089 break;
7090#endif /* CONFIG_IPV6 */
7091
7092 default:
7093 return -EPROTONOSUPPORT;
7094 }
7095
7096 if (ret > 0)
7097 return 0;
7098
7099 return -ENOENT;
7100#else
7101 return -ENOTSUPP;
7102#endif
7103}
7104
7105static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
7106 .func = bpf_tcp_check_syncookie,
7107 .gpl_only = true,
7108 .pkt_access = true,
7109 .ret_type = RET_INTEGER,
c0df236e 7110 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
216e3cd2 7111 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
39904084 7112 .arg3_type = ARG_CONST_SIZE,
216e3cd2 7113 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
39904084
LB
7114 .arg5_type = ARG_CONST_SIZE,
7115};
7116
70d66244
PP
7117BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
7118 struct tcphdr *, th, u32, th_len)
7119{
7120#ifdef CONFIG_SYN_COOKIES
7121 u32 cookie;
7122 u16 mss;
7123
c0df236e 7124 if (unlikely(!sk || th_len < sizeof(*th) || th_len != th->doff * 4))
70d66244
PP
7125 return -EINVAL;
7126
7127 if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
7128 return -EINVAL;
7129
f2e383b5 7130 if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
70d66244
PP
7131 return -ENOENT;
7132
7133 if (!th->syn || th->ack || th->fin || th->rst)
7134 return -EINVAL;
7135
7136 if (unlikely(iph_len < sizeof(struct iphdr)))
7137 return -EINVAL;
7138
7139 /* Both struct iphdr and struct ipv6hdr have the version field at the
7140 * same offset so we can cast to the shorter header (struct iphdr).
7141 */
7142 switch (((struct iphdr *)iph)->version) {
7143 case 4:
81ee0eb6 7144 if (sk->sk_family == AF_INET6 && ipv6_only_sock(sk))
70d66244
PP
7145 return -EINVAL;
7146
7147 mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
7148 break;
7149
7150#if IS_BUILTIN(CONFIG_IPV6)
7151 case 6:
7152 if (unlikely(iph_len < sizeof(struct ipv6hdr)))
7153 return -EINVAL;
7154
7155 if (sk->sk_family != AF_INET6)
7156 return -EINVAL;
7157
7158 mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
7159 break;
7160#endif /* CONFIG_IPV6 */
7161
7162 default:
7163 return -EPROTONOSUPPORT;
7164 }
0741be35 7165 if (mss == 0)
70d66244
PP
7166 return -ENOENT;
7167
7168 return cookie | ((u64)mss << 32);
7169#else
7170 return -EOPNOTSUPP;
7171#endif /* CONFIG_SYN_COOKIES */
7172}
7173
7174static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
7175 .func = bpf_tcp_gen_syncookie,
7176 .gpl_only = true, /* __cookie_v*_init_sequence() is GPL */
7177 .pkt_access = true,
7178 .ret_type = RET_INTEGER,
c0df236e 7179 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
216e3cd2 7180 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
70d66244 7181 .arg3_type = ARG_CONST_SIZE,
216e3cd2 7182 .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY,
70d66244
PP
7183 .arg5_type = ARG_CONST_SIZE,
7184};
7185
cf7fbe66
JS
7186BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
7187{
27e5203b 7188 if (!sk || flags != 0)
cf7fbe66
JS
7189 return -EINVAL;
7190 if (!skb_at_tc_ingress(skb))
7191 return -EOPNOTSUPP;
7192 if (unlikely(dev_net(skb->dev) != sock_net(sk)))
7193 return -ENETUNREACH;
8e368dc7 7194 if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
cf7fbe66 7195 return -ESOCKTNOSUPPORT;
7ae215d2
JS
7196 if (sk_is_refcounted(sk) &&
7197 unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
cf7fbe66
JS
7198 return -ENOENT;
7199
7200 skb_orphan(skb);
7201 skb->sk = sk;
7202 skb->destructor = sock_pfree;
7203
7204 return 0;
7205}
7206
7207static const struct bpf_func_proto bpf_sk_assign_proto = {
7208 .func = bpf_sk_assign,
7209 .gpl_only = false,
7210 .ret_type = RET_INTEGER,
7211 .arg1_type = ARG_PTR_TO_CTX,
27e5203b 7212 .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
cf7fbe66
JS
7213 .arg3_type = ARG_ANYTHING,
7214};
7215
0813a841
MKL
7216static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend,
7217 u8 search_kind, const u8 *magic,
7218 u8 magic_len, bool *eol)
7219{
7220 u8 kind, kind_len;
7221
7222 *eol = false;
7223
7224 while (op < opend) {
7225 kind = op[0];
7226
7227 if (kind == TCPOPT_EOL) {
7228 *eol = true;
7229 return ERR_PTR(-ENOMSG);
7230 } else if (kind == TCPOPT_NOP) {
7231 op++;
7232 continue;
7233 }
7234
7235 if (opend - op < 2 || opend - op < op[1] || op[1] < 2)
7236 /* Something is wrong in the received header.
7237 * Follow the TCP stack's tcp_parse_options()
7238 * and just bail here.
7239 */
7240 return ERR_PTR(-EFAULT);
7241
7242 kind_len = op[1];
7243 if (search_kind == kind) {
7244 if (!magic_len)
7245 return op;
7246
7247 if (magic_len > kind_len - 2)
7248 return ERR_PTR(-ENOMSG);
7249
7250 if (!memcmp(&op[2], magic, magic_len))
7251 return op;
7252 }
7253
7254 op += kind_len;
7255 }
7256
7257 return ERR_PTR(-ENOMSG);
7258}
7259
7260BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7261 void *, search_res, u32, len, u64, flags)
7262{
7263 bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
7264 const u8 *op, *opend, *magic, *search = search_res;
7265 u8 search_kind, search_len, copy_len, magic_len;
7266 int ret;
7267
7268 /* 2 byte is the minimal option len except TCPOPT_NOP and
7269 * TCPOPT_EOL which are useless for the bpf prog to learn
7270 * and this helper disallow loading them also.
7271 */
7272 if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
7273 return -EINVAL;
7274
7275 search_kind = search[0];
7276 search_len = search[1];
7277
7278 if (search_len > len || search_kind == TCPOPT_NOP ||
7279 search_kind == TCPOPT_EOL)
7280 return -EINVAL;
7281
7282 if (search_kind == TCPOPT_EXP || search_kind == 253) {
7283 /* 16 or 32 bit magic. +2 for kind and kind length */
7284 if (search_len != 4 && search_len != 6)
7285 return -EINVAL;
7286 magic = &search[2];
7287 magic_len = search_len - 2;
7288 } else {
7289 if (search_len)
7290 return -EINVAL;
7291 magic = NULL;
7292 magic_len = 0;
7293 }
7294
7295 if (load_syn) {
7296 ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op);
7297 if (ret < 0)
7298 return ret;
7299
7300 opend = op + ret;
7301 op += sizeof(struct tcphdr);
7302 } else {
7303 if (!bpf_sock->skb ||
7304 bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
7305 /* This bpf_sock->op cannot call this helper */
7306 return -EPERM;
7307
7308 opend = bpf_sock->skb_data_end;
7309 op = bpf_sock->skb->data + sizeof(struct tcphdr);
7310 }
7311
7312 op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
7313 &eol);
7314 if (IS_ERR(op))
7315 return PTR_ERR(op);
7316
7317 copy_len = op[1];
7318 ret = copy_len;
7319 if (copy_len > len) {
7320 ret = -ENOSPC;
7321 copy_len = len;
7322 }
7323
7324 memcpy(search_res, op, copy_len);
7325 return ret;
7326}
7327
7328static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
7329 .func = bpf_sock_ops_load_hdr_opt,
7330 .gpl_only = false,
7331 .ret_type = RET_INTEGER,
7332 .arg1_type = ARG_PTR_TO_CTX,
7333 .arg2_type = ARG_PTR_TO_MEM,
7334 .arg3_type = ARG_CONST_SIZE,
7335 .arg4_type = ARG_ANYTHING,
7336};
7337
7338BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7339 const void *, from, u32, len, u64, flags)
7340{
7341 u8 new_kind, new_kind_len, magic_len = 0, *opend;
7342 const u8 *op, *new_op, *magic = NULL;
7343 struct sk_buff *skb;
7344 bool eol;
7345
7346 if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
7347 return -EPERM;
7348
7349 if (len < 2 || flags)
7350 return -EINVAL;
7351
7352 new_op = from;
7353 new_kind = new_op[0];
7354 new_kind_len = new_op[1];
7355
7356 if (new_kind_len > len || new_kind == TCPOPT_NOP ||
7357 new_kind == TCPOPT_EOL)
7358 return -EINVAL;
7359
7360 if (new_kind_len > bpf_sock->remaining_opt_len)
7361 return -ENOSPC;
7362
7363 /* 253 is another experimental kind */
7364 if (new_kind == TCPOPT_EXP || new_kind == 253) {
7365 if (new_kind_len < 4)
7366 return -EINVAL;
7367 /* Match for the 2 byte magic also.
7368 * RFC 6994: the magic could be 2 or 4 bytes.
7369 * Hence, matching by 2 byte only is on the
7370 * conservative side but it is the right
7371 * thing to do for the 'search-for-duplication'
7372 * purpose.
7373 */
7374 magic = &new_op[2];
7375 magic_len = 2;
7376 }
7377
7378 /* Check for duplication */
7379 skb = bpf_sock->skb;
7380 op = skb->data + sizeof(struct tcphdr);
7381 opend = bpf_sock->skb_data_end;
7382
7383 op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len,
7384 &eol);
7385 if (!IS_ERR(op))
7386 return -EEXIST;
7387
7388 if (PTR_ERR(op) != -ENOMSG)
7389 return PTR_ERR(op);
7390
7391 if (eol)
7392 /* The option has been ended. Treat it as no more
7393 * header option can be written.
7394 */
7395 return -ENOSPC;
7396
7397 /* No duplication found. Store the header option. */
7398 memcpy(opend, from, new_kind_len);
7399
7400 bpf_sock->remaining_opt_len -= new_kind_len;
7401 bpf_sock->skb_data_end += new_kind_len;
7402
7403 return 0;
7404}
7405
7406static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
7407 .func = bpf_sock_ops_store_hdr_opt,
7408 .gpl_only = false,
7409 .ret_type = RET_INTEGER,
7410 .arg1_type = ARG_PTR_TO_CTX,
216e3cd2 7411 .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY,
0813a841
MKL
7412 .arg3_type = ARG_CONST_SIZE,
7413 .arg4_type = ARG_ANYTHING,
7414};
7415
7416BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
7417 u32, len, u64, flags)
7418{
7419 if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
7420 return -EPERM;
7421
7422 if (flags || len < 2)
7423 return -EINVAL;
7424
7425 if (len > bpf_sock->remaining_opt_len)
7426 return -ENOSPC;
7427
7428 bpf_sock->remaining_opt_len -= len;
7429
7430 return 0;
7431}
7432
7433static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
7434 .func = bpf_sock_ops_reserve_hdr_opt,
7435 .gpl_only = false,
7436 .ret_type = RET_INTEGER,
7437 .arg1_type = ARG_PTR_TO_CTX,
7438 .arg2_type = ARG_ANYTHING,
7439 .arg3_type = ARG_ANYTHING,
7440};
7441
9bb984f2
MKL
7442BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
7443 u64, tstamp, u32, tstamp_type)
8d21ec0e
MKL
7444{
7445 /* skb_clear_delivery_time() is done for inet protocol */
7446 if (skb->protocol != htons(ETH_P_IP) &&
7447 skb->protocol != htons(ETH_P_IPV6))
7448 return -EOPNOTSUPP;
7449
9bb984f2
MKL
7450 switch (tstamp_type) {
7451 case BPF_SKB_TSTAMP_DELIVERY_MONO:
7452 if (!tstamp)
8d21ec0e 7453 return -EINVAL;
9bb984f2 7454 skb->tstamp = tstamp;
8d21ec0e
MKL
7455 skb->mono_delivery_time = 1;
7456 break;
9bb984f2
MKL
7457 case BPF_SKB_TSTAMP_UNSPEC:
7458 if (tstamp)
8d21ec0e
MKL
7459 return -EINVAL;
7460 skb->tstamp = 0;
7461 skb->mono_delivery_time = 0;
7462 break;
7463 default:
9bb984f2 7464 return -EINVAL;
8d21ec0e
MKL
7465 }
7466
7467 return 0;
7468}
7469
9bb984f2
MKL
7470static const struct bpf_func_proto bpf_skb_set_tstamp_proto = {
7471 .func = bpf_skb_set_tstamp,
8d21ec0e
MKL
7472 .gpl_only = false,
7473 .ret_type = RET_INTEGER,
7474 .arg1_type = ARG_PTR_TO_CTX,
7475 .arg2_type = ARG_ANYTHING,
7476 .arg3_type = ARG_ANYTHING,
7477};
7478
33bf9885
MM
7479#ifdef CONFIG_SYN_COOKIES
7480BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv4, struct iphdr *, iph,
7481 struct tcphdr *, th, u32, th_len)
7482{
7483 u32 cookie;
7484 u16 mss;
7485
7486 if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
7487 return -EINVAL;
7488
7489 mss = tcp_parse_mss_option(th, 0) ?: TCP_MSS_DEFAULT;
7490 cookie = __cookie_v4_init_sequence(iph, th, &mss);
7491
7492 return cookie | ((u64)mss << 32);
7493}
7494
7495static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
7496 .func = bpf_tcp_raw_gen_syncookie_ipv4,
7497 .gpl_only = true, /* __cookie_v4_init_sequence() is GPL */
7498 .pkt_access = true,
7499 .ret_type = RET_INTEGER,
7500 .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7501 .arg1_size = sizeof(struct iphdr),
7502 .arg2_type = ARG_PTR_TO_MEM,
7503 .arg3_type = ARG_CONST_SIZE,
7504};
7505
7506BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
7507 struct tcphdr *, th, u32, th_len)
7508{
7509#if IS_BUILTIN(CONFIG_IPV6)
7510 const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
7511 sizeof(struct ipv6hdr);
7512 u32 cookie;
7513 u16 mss;
7514
7515 if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
7516 return -EINVAL;
7517
7518 mss = tcp_parse_mss_option(th, 0) ?: mss_clamp;
7519 cookie = __cookie_v6_init_sequence(iph, th, &mss);
7520
7521 return cookie | ((u64)mss << 32);
7522#else
7523 return -EPROTONOSUPPORT;
7524#endif
7525}
7526
7527static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
7528 .func = bpf_tcp_raw_gen_syncookie_ipv6,
7529 .gpl_only = true, /* __cookie_v6_init_sequence() is GPL */
7530 .pkt_access = true,
7531 .ret_type = RET_INTEGER,
7532 .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7533 .arg1_size = sizeof(struct ipv6hdr),
7534 .arg2_type = ARG_PTR_TO_MEM,
7535 .arg3_type = ARG_CONST_SIZE,
7536};
7537
7538BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
7539 struct tcphdr *, th)
7540{
7541 u32 cookie = ntohl(th->ack_seq) - 1;
7542
7543 if (__cookie_v4_check(iph, th, cookie) > 0)
7544 return 0;
7545
7546 return -EACCES;
7547}
7548
7549static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = {
7550 .func = bpf_tcp_raw_check_syncookie_ipv4,
7551 .gpl_only = true, /* __cookie_v4_check is GPL */
7552 .pkt_access = true,
7553 .ret_type = RET_INTEGER,
7554 .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7555 .arg1_size = sizeof(struct iphdr),
7556 .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7557 .arg2_size = sizeof(struct tcphdr),
7558};
7559
7560BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph,
7561 struct tcphdr *, th)
7562{
7563#if IS_BUILTIN(CONFIG_IPV6)
7564 u32 cookie = ntohl(th->ack_seq) - 1;
7565
7566 if (__cookie_v6_check(iph, th, cookie) > 0)
7567 return 0;
7568
7569 return -EACCES;
7570#else
7571 return -EPROTONOSUPPORT;
7572#endif
7573}
7574
7575static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv6_proto = {
7576 .func = bpf_tcp_raw_check_syncookie_ipv6,
7577 .gpl_only = true, /* __cookie_v6_check is GPL */
7578 .pkt_access = true,
7579 .ret_type = RET_INTEGER,
7580 .arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7581 .arg1_size = sizeof(struct ipv6hdr),
7582 .arg2_type = ARG_PTR_TO_FIXED_SIZE_MEM,
7583 .arg2_size = sizeof(struct tcphdr),
7584};
7585#endif /* CONFIG_SYN_COOKIES */
7586
df3f94a0 7587#endif /* CONFIG_INET */
6acc9b43 7588
fe94cc29
MX
7589bool bpf_helper_changes_pkt_data(void *func)
7590{
7591 if (func == bpf_skb_vlan_push ||
7592 func == bpf_skb_vlan_pop ||
7593 func == bpf_skb_store_bytes ||
7594 func == bpf_skb_change_proto ||
7595 func == bpf_skb_change_head ||
0ea488ff 7596 func == sk_skb_change_head ||
fe94cc29 7597 func == bpf_skb_change_tail ||
0ea488ff 7598 func == sk_skb_change_tail ||
fe94cc29 7599 func == bpf_skb_adjust_room ||
18ebe16d 7600 func == sk_skb_adjust_room ||
fe94cc29 7601 func == bpf_skb_pull_data ||
0ea488ff 7602 func == sk_skb_pull_data ||
fe94cc29
MX
7603 func == bpf_clone_redirect ||
7604 func == bpf_l3_csum_replace ||
7605 func == bpf_l4_csum_replace ||
7606 func == bpf_xdp_adjust_head ||
7607 func == bpf_xdp_adjust_meta ||
7608 func == bpf_msg_pull_data ||
6fff607e 7609 func == bpf_msg_push_data ||
7246d8ed 7610 func == bpf_msg_pop_data ||
fe94cc29 7611 func == bpf_xdp_adjust_tail ||
61d76980 7612#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
fe94cc29
MX
7613 func == bpf_lwt_seg6_store_bytes ||
7614 func == bpf_lwt_seg6_adjust_srh ||
61d76980 7615 func == bpf_lwt_seg6_action ||
0813a841
MKL
7616#endif
7617#ifdef CONFIG_INET
7618 func == bpf_sock_ops_store_hdr_opt ||
61d76980 7619#endif
3e0bd37c
PO
7620 func == bpf_lwt_in_push_encap ||
7621 func == bpf_lwt_xmit_push_encap)
fe94cc29
MX
7622 return true;
7623
7624 return false;
7625}
7626
6890896b 7627const struct bpf_func_proto bpf_event_output_data_proto __weak;
f7c6cb1d 7628const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
89aa0758 7629
ae2cf1c4 7630static const struct bpf_func_proto *
5e43f899 7631sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
ae2cf1c4 7632{
bed89185
SF
7633 const struct bpf_func_proto *func_proto;
7634
7635 func_proto = cgroup_common_func_proto(func_id, prog);
7636 if (func_proto)
7637 return func_proto;
7638
7639 func_proto = cgroup_current_func_proto(func_id, prog);
7640 if (func_proto)
7641 return func_proto;
7642
ae2cf1c4 7643 switch (func_id) {
0e53d9e5
DB
7644 case BPF_FUNC_get_socket_cookie:
7645 return &bpf_get_socket_cookie_sock_proto;
f318903c
DB
7646 case BPF_FUNC_get_netns_cookie:
7647 return &bpf_get_netns_cookie_sock_proto;
fcf752ea
DB
7648 case BPF_FUNC_perf_event_output:
7649 return &bpf_event_output_data_proto;
f7c6cb1d
SF
7650 case BPF_FUNC_sk_storage_get:
7651 return &bpf_sk_storage_get_cg_sock_proto;
5e0bc308
DB
7652 case BPF_FUNC_ktime_get_coarse_ns:
7653 return &bpf_ktime_get_coarse_ns_proto;
ae2cf1c4
DA
7654 default:
7655 return bpf_base_func_proto(func_id);
7656 }
7657}
7658
4fbac77d
AI
7659static const struct bpf_func_proto *
7660sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7661{
bed89185
SF
7662 const struct bpf_func_proto *func_proto;
7663
7664 func_proto = cgroup_common_func_proto(func_id, prog);
7665 if (func_proto)
7666 return func_proto;
7667
7668 func_proto = cgroup_current_func_proto(func_id, prog);
7669 if (func_proto)
7670 return func_proto;
7671
4fbac77d 7672 switch (func_id) {
d74bad4e
AI
7673 case BPF_FUNC_bind:
7674 switch (prog->expected_attach_type) {
7675 case BPF_CGROUP_INET4_CONNECT:
7676 case BPF_CGROUP_INET6_CONNECT:
7677 return &bpf_bind_proto;
7678 default:
7679 return NULL;
7680 }
d692f113
AI
7681 case BPF_FUNC_get_socket_cookie:
7682 return &bpf_get_socket_cookie_sock_addr_proto;
f318903c
DB
7683 case BPF_FUNC_get_netns_cookie:
7684 return &bpf_get_netns_cookie_sock_addr_proto;
fcf752ea
DB
7685 case BPF_FUNC_perf_event_output:
7686 return &bpf_event_output_data_proto;
6c49e65e
AI
7687#ifdef CONFIG_INET
7688 case BPF_FUNC_sk_lookup_tcp:
7689 return &bpf_sock_addr_sk_lookup_tcp_proto;
7690 case BPF_FUNC_sk_lookup_udp:
7691 return &bpf_sock_addr_sk_lookup_udp_proto;
7692 case BPF_FUNC_sk_release:
7693 return &bpf_sk_release_proto;
edbf8c01
LB
7694 case BPF_FUNC_skc_lookup_tcp:
7695 return &bpf_sock_addr_skc_lookup_tcp_proto;
6c49e65e 7696#endif /* CONFIG_INET */
fb85c4a7
SF
7697 case BPF_FUNC_sk_storage_get:
7698 return &bpf_sk_storage_get_proto;
7699 case BPF_FUNC_sk_storage_delete:
7700 return &bpf_sk_storage_delete_proto;
beecf11b
SF
7701 case BPF_FUNC_setsockopt:
7702 switch (prog->expected_attach_type) {
427167c0
SF
7703 case BPF_CGROUP_INET4_BIND:
7704 case BPF_CGROUP_INET6_BIND:
beecf11b
SF
7705 case BPF_CGROUP_INET4_CONNECT:
7706 case BPF_CGROUP_INET6_CONNECT:
4c3384d7
SF
7707 case BPF_CGROUP_UDP4_RECVMSG:
7708 case BPF_CGROUP_UDP6_RECVMSG:
62476cc1
SF
7709 case BPF_CGROUP_UDP4_SENDMSG:
7710 case BPF_CGROUP_UDP6_SENDMSG:
073f4ec1
SF
7711 case BPF_CGROUP_INET4_GETPEERNAME:
7712 case BPF_CGROUP_INET6_GETPEERNAME:
7713 case BPF_CGROUP_INET4_GETSOCKNAME:
7714 case BPF_CGROUP_INET6_GETSOCKNAME:
beecf11b
SF
7715 return &bpf_sock_addr_setsockopt_proto;
7716 default:
7717 return NULL;
7718 }
7719 case BPF_FUNC_getsockopt:
7720 switch (prog->expected_attach_type) {
427167c0
SF
7721 case BPF_CGROUP_INET4_BIND:
7722 case BPF_CGROUP_INET6_BIND:
beecf11b
SF
7723 case BPF_CGROUP_INET4_CONNECT:
7724 case BPF_CGROUP_INET6_CONNECT:
4c3384d7
SF
7725 case BPF_CGROUP_UDP4_RECVMSG:
7726 case BPF_CGROUP_UDP6_RECVMSG:
62476cc1
SF
7727 case BPF_CGROUP_UDP4_SENDMSG:
7728 case BPF_CGROUP_UDP6_SENDMSG:
073f4ec1
SF
7729 case BPF_CGROUP_INET4_GETPEERNAME:
7730 case BPF_CGROUP_INET6_GETPEERNAME:
7731 case BPF_CGROUP_INET4_GETSOCKNAME:
7732 case BPF_CGROUP_INET6_GETSOCKNAME:
beecf11b
SF
7733 return &bpf_sock_addr_getsockopt_proto;
7734 default:
7735 return NULL;
7736 }
4fbac77d 7737 default:
1df8f55a 7738 return bpf_sk_base_func_proto(func_id);
4fbac77d
AI
7739 }
7740}
7741
2492d3b8 7742static const struct bpf_func_proto *
5e43f899 7743sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2492d3b8
DB
7744{
7745 switch (func_id) {
7746 case BPF_FUNC_skb_load_bytes:
7747 return &bpf_skb_load_bytes_proto;
4e1ec56c
DB
7748 case BPF_FUNC_skb_load_bytes_relative:
7749 return &bpf_skb_load_bytes_relative_proto;
91b8270f
CF
7750 case BPF_FUNC_get_socket_cookie:
7751 return &bpf_get_socket_cookie_proto;
6acc5c29
CF
7752 case BPF_FUNC_get_socket_uid:
7753 return &bpf_get_socket_uid_proto;
7c4b90d7
AZ
7754 case BPF_FUNC_perf_event_output:
7755 return &bpf_skb_event_output_proto;
2492d3b8 7756 default:
1df8f55a 7757 return bpf_sk_base_func_proto(func_id);
2492d3b8
DB
7758 }
7759}
7760
6ac99e8f
MKL
7761const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
7762const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
7763
cd339431
RG
7764static const struct bpf_func_proto *
7765cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7766{
bed89185
SF
7767 const struct bpf_func_proto *func_proto;
7768
7769 func_proto = cgroup_common_func_proto(func_id, prog);
7770 if (func_proto)
7771 return func_proto;
7772
cd339431 7773 switch (func_id) {
46f8bc92
MKL
7774 case BPF_FUNC_sk_fullsock:
7775 return &bpf_sk_fullsock_proto;
6ac99e8f
MKL
7776 case BPF_FUNC_sk_storage_get:
7777 return &bpf_sk_storage_get_proto;
7778 case BPF_FUNC_sk_storage_delete:
7779 return &bpf_sk_storage_delete_proto;
7c4b90d7
AZ
7780 case BPF_FUNC_perf_event_output:
7781 return &bpf_skb_event_output_proto;
4ecabd55
RG
7782#ifdef CONFIG_SOCK_CGROUP_DATA
7783 case BPF_FUNC_skb_cgroup_id:
7784 return &bpf_skb_cgroup_id_proto;
06d3e4c9
AI
7785 case BPF_FUNC_skb_ancestor_cgroup_id:
7786 return &bpf_skb_ancestor_cgroup_id_proto;
f307fa2c
AI
7787 case BPF_FUNC_sk_cgroup_id:
7788 return &bpf_sk_cgroup_id_proto;
7789 case BPF_FUNC_sk_ancestor_cgroup_id:
7790 return &bpf_sk_ancestor_cgroup_id_proto;
4ecabd55 7791#endif
655a51e5 7792#ifdef CONFIG_INET
d56c2f95
AI
7793 case BPF_FUNC_sk_lookup_tcp:
7794 return &bpf_sk_lookup_tcp_proto;
7795 case BPF_FUNC_sk_lookup_udp:
7796 return &bpf_sk_lookup_udp_proto;
7797 case BPF_FUNC_sk_release:
7798 return &bpf_sk_release_proto;
7799 case BPF_FUNC_skc_lookup_tcp:
7800 return &bpf_skc_lookup_tcp_proto;
655a51e5
MKL
7801 case BPF_FUNC_tcp_sock:
7802 return &bpf_tcp_sock_proto;
dbafd7dd
MKL
7803 case BPF_FUNC_get_listener_sock:
7804 return &bpf_get_listener_sock_proto;
f7c917ba 7805 case BPF_FUNC_skb_ecn_set_ce:
7806 return &bpf_skb_ecn_set_ce_proto;
655a51e5 7807#endif
cd339431
RG
7808 default:
7809 return sk_filter_func_proto(func_id, prog);
7810 }
7811}
7812
608cd71a 7813static const struct bpf_func_proto *
5e43f899 7814tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
608cd71a
AS
7815{
7816 switch (func_id) {
7817 case BPF_FUNC_skb_store_bytes:
7818 return &bpf_skb_store_bytes_proto;
05c74e5e
DB
7819 case BPF_FUNC_skb_load_bytes:
7820 return &bpf_skb_load_bytes_proto;
4e1ec56c
DB
7821 case BPF_FUNC_skb_load_bytes_relative:
7822 return &bpf_skb_load_bytes_relative_proto;
36bbef52
DB
7823 case BPF_FUNC_skb_pull_data:
7824 return &bpf_skb_pull_data_proto;
7d672345
DB
7825 case BPF_FUNC_csum_diff:
7826 return &bpf_csum_diff_proto;
36bbef52
DB
7827 case BPF_FUNC_csum_update:
7828 return &bpf_csum_update_proto;
7cdec54f
DB
7829 case BPF_FUNC_csum_level:
7830 return &bpf_csum_level_proto;
91bc4822
AS
7831 case BPF_FUNC_l3_csum_replace:
7832 return &bpf_l3_csum_replace_proto;
7833 case BPF_FUNC_l4_csum_replace:
7834 return &bpf_l4_csum_replace_proto;
3896d655
AS
7835 case BPF_FUNC_clone_redirect:
7836 return &bpf_clone_redirect_proto;
8d20aabe
DB
7837 case BPF_FUNC_get_cgroup_classid:
7838 return &bpf_get_cgroup_classid_proto;
4e10df9a
AS
7839 case BPF_FUNC_skb_vlan_push:
7840 return &bpf_skb_vlan_push_proto;
7841 case BPF_FUNC_skb_vlan_pop:
7842 return &bpf_skb_vlan_pop_proto;
6578171a
DB
7843 case BPF_FUNC_skb_change_proto:
7844 return &bpf_skb_change_proto_proto;
d2485c42
DB
7845 case BPF_FUNC_skb_change_type:
7846 return &bpf_skb_change_type_proto;
2be7e212
DB
7847 case BPF_FUNC_skb_adjust_room:
7848 return &bpf_skb_adjust_room_proto;
5293efe6
DB
7849 case BPF_FUNC_skb_change_tail:
7850 return &bpf_skb_change_tail_proto;
6f3f65d8
LC
7851 case BPF_FUNC_skb_change_head:
7852 return &bpf_skb_change_head_proto;
d3aa45ce
AS
7853 case BPF_FUNC_skb_get_tunnel_key:
7854 return &bpf_skb_get_tunnel_key_proto;
7855 case BPF_FUNC_skb_set_tunnel_key:
14ca0751
DB
7856 return bpf_get_skb_set_tunnel_proto(func_id);
7857 case BPF_FUNC_skb_get_tunnel_opt:
7858 return &bpf_skb_get_tunnel_opt_proto;
7859 case BPF_FUNC_skb_set_tunnel_opt:
7860 return bpf_get_skb_set_tunnel_proto(func_id);
27b29f63
AS
7861 case BPF_FUNC_redirect:
7862 return &bpf_redirect_proto;
b4ab3141
DB
7863 case BPF_FUNC_redirect_neigh:
7864 return &bpf_redirect_neigh_proto;
9aa1206e
DB
7865 case BPF_FUNC_redirect_peer:
7866 return &bpf_redirect_peer_proto;
c46646d0
DB
7867 case BPF_FUNC_get_route_realm:
7868 return &bpf_get_route_realm_proto;
13c5c240
DB
7869 case BPF_FUNC_get_hash_recalc:
7870 return &bpf_get_hash_recalc_proto;
7a4b28c6
DB
7871 case BPF_FUNC_set_hash_invalid:
7872 return &bpf_set_hash_invalid_proto;
ded092cd
DB
7873 case BPF_FUNC_set_hash:
7874 return &bpf_set_hash_proto;
bd570ff9 7875 case BPF_FUNC_perf_event_output:
555c8a86 7876 return &bpf_skb_event_output_proto;
80b48c44
DB
7877 case BPF_FUNC_get_smp_processor_id:
7878 return &bpf_get_smp_processor_id_proto;
747ea55e
DB
7879 case BPF_FUNC_skb_under_cgroup:
7880 return &bpf_skb_under_cgroup_proto;
91b8270f
CF
7881 case BPF_FUNC_get_socket_cookie:
7882 return &bpf_get_socket_cookie_proto;
6acc5c29
CF
7883 case BPF_FUNC_get_socket_uid:
7884 return &bpf_get_socket_uid_proto;
cb20b08e
DB
7885 case BPF_FUNC_fib_lookup:
7886 return &bpf_skb_fib_lookup_proto;
34b2021c
JDB
7887 case BPF_FUNC_check_mtu:
7888 return &bpf_skb_check_mtu_proto;
46f8bc92
MKL
7889 case BPF_FUNC_sk_fullsock:
7890 return &bpf_sk_fullsock_proto;
6ac99e8f
MKL
7891 case BPF_FUNC_sk_storage_get:
7892 return &bpf_sk_storage_get_proto;
7893 case BPF_FUNC_sk_storage_delete:
7894 return &bpf_sk_storage_delete_proto;
12bed760
EB
7895#ifdef CONFIG_XFRM
7896 case BPF_FUNC_skb_get_xfrm_state:
7897 return &bpf_skb_get_xfrm_state_proto;
7898#endif
b426ce83
DB
7899#ifdef CONFIG_CGROUP_NET_CLASSID
7900 case BPF_FUNC_skb_cgroup_classid:
7901 return &bpf_skb_cgroup_classid_proto;
7902#endif
cb20b08e
DB
7903#ifdef CONFIG_SOCK_CGROUP_DATA
7904 case BPF_FUNC_skb_cgroup_id:
7905 return &bpf_skb_cgroup_id_proto;
77236281
AI
7906 case BPF_FUNC_skb_ancestor_cgroup_id:
7907 return &bpf_skb_ancestor_cgroup_id_proto;
cb20b08e 7908#endif
df3f94a0 7909#ifdef CONFIG_INET
6acc9b43
JS
7910 case BPF_FUNC_sk_lookup_tcp:
7911 return &bpf_sk_lookup_tcp_proto;
7912 case BPF_FUNC_sk_lookup_udp:
7913 return &bpf_sk_lookup_udp_proto;
7914 case BPF_FUNC_sk_release:
7915 return &bpf_sk_release_proto;
655a51e5
MKL
7916 case BPF_FUNC_tcp_sock:
7917 return &bpf_tcp_sock_proto;
dbafd7dd
MKL
7918 case BPF_FUNC_get_listener_sock:
7919 return &bpf_get_listener_sock_proto;
edbf8c01
LB
7920 case BPF_FUNC_skc_lookup_tcp:
7921 return &bpf_skc_lookup_tcp_proto;
39904084
LB
7922 case BPF_FUNC_tcp_check_syncookie:
7923 return &bpf_tcp_check_syncookie_proto;
315a2029
PO
7924 case BPF_FUNC_skb_ecn_set_ce:
7925 return &bpf_skb_ecn_set_ce_proto;
70d66244
PP
7926 case BPF_FUNC_tcp_gen_syncookie:
7927 return &bpf_tcp_gen_syncookie_proto;
cf7fbe66
JS
7928 case BPF_FUNC_sk_assign:
7929 return &bpf_sk_assign_proto;
9bb984f2
MKL
7930 case BPF_FUNC_skb_set_tstamp:
7931 return &bpf_skb_set_tstamp_proto;
9a4cf073
MM
7932#ifdef CONFIG_SYN_COOKIES
7933 case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
7934 return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
7935 case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
7936 return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
7937 case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
7938 return &bpf_tcp_raw_check_syncookie_ipv4_proto;
7939 case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
7940 return &bpf_tcp_raw_check_syncookie_ipv6_proto;
7941#endif
df3f94a0 7942#endif
608cd71a 7943 default:
1df8f55a 7944 return bpf_sk_base_func_proto(func_id);
608cd71a
AS
7945 }
7946}
7947
6a773a15 7948static const struct bpf_func_proto *
5e43f899 7949xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6a773a15 7950{
4de16969
DB
7951 switch (func_id) {
7952 case BPF_FUNC_perf_event_output:
7953 return &bpf_xdp_event_output_proto;
669dc4d7
DB
7954 case BPF_FUNC_get_smp_processor_id:
7955 return &bpf_get_smp_processor_id_proto;
205c3807
DB
7956 case BPF_FUNC_csum_diff:
7957 return &bpf_csum_diff_proto;
17bedab2
MKL
7958 case BPF_FUNC_xdp_adjust_head:
7959 return &bpf_xdp_adjust_head_proto;
de8f3a83
DB
7960 case BPF_FUNC_xdp_adjust_meta:
7961 return &bpf_xdp_adjust_meta_proto;
814abfab
JF
7962 case BPF_FUNC_redirect:
7963 return &bpf_xdp_redirect_proto;
97f91a7c 7964 case BPF_FUNC_redirect_map:
e4a8e817 7965 return &bpf_xdp_redirect_map_proto;
b32cc5b9
NS
7966 case BPF_FUNC_xdp_adjust_tail:
7967 return &bpf_xdp_adjust_tail_proto;
0165cc81
LB
7968 case BPF_FUNC_xdp_get_buff_len:
7969 return &bpf_xdp_get_buff_len_proto;
3f364222
LB
7970 case BPF_FUNC_xdp_load_bytes:
7971 return &bpf_xdp_load_bytes_proto;
7972 case BPF_FUNC_xdp_store_bytes:
7973 return &bpf_xdp_store_bytes_proto;
87f5fc7e
DA
7974 case BPF_FUNC_fib_lookup:
7975 return &bpf_xdp_fib_lookup_proto;
34b2021c
JDB
7976 case BPF_FUNC_check_mtu:
7977 return &bpf_xdp_check_mtu_proto;
c8123ead
NH
7978#ifdef CONFIG_INET
7979 case BPF_FUNC_sk_lookup_udp:
7980 return &bpf_xdp_sk_lookup_udp_proto;
7981 case BPF_FUNC_sk_lookup_tcp:
7982 return &bpf_xdp_sk_lookup_tcp_proto;
7983 case BPF_FUNC_sk_release:
7984 return &bpf_sk_release_proto;
edbf8c01
LB
7985 case BPF_FUNC_skc_lookup_tcp:
7986 return &bpf_xdp_skc_lookup_tcp_proto;
39904084
LB
7987 case BPF_FUNC_tcp_check_syncookie:
7988 return &bpf_tcp_check_syncookie_proto;
70d66244
PP
7989 case BPF_FUNC_tcp_gen_syncookie:
7990 return &bpf_tcp_gen_syncookie_proto;
33bf9885
MM
7991#ifdef CONFIG_SYN_COOKIES
7992 case BPF_FUNC_tcp_raw_gen_syncookie_ipv4:
7993 return &bpf_tcp_raw_gen_syncookie_ipv4_proto;
7994 case BPF_FUNC_tcp_raw_gen_syncookie_ipv6:
7995 return &bpf_tcp_raw_gen_syncookie_ipv6_proto;
7996 case BPF_FUNC_tcp_raw_check_syncookie_ipv4:
7997 return &bpf_tcp_raw_check_syncookie_ipv4_proto;
7998 case BPF_FUNC_tcp_raw_check_syncookie_ipv6:
7999 return &bpf_tcp_raw_check_syncookie_ipv6_proto;
8000#endif
c8123ead 8001#endif
4de16969 8002 default:
1df8f55a 8003 return bpf_sk_base_func_proto(func_id);
4de16969 8004 }
578ce69f
THJ
8005
8006#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
8007 /* The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The
8008 * kfuncs are defined in two different modules, and we want to be able
8009 * to use them interchangably with the same BTF type ID. Because modules
8010 * can't de-duplicate BTF IDs between each other, we need the type to be
8011 * referenced in the vmlinux BTF or the verifier will get confused about
8012 * the different types. So we add this dummy type reference which will
8013 * be included in vmlinux BTF, allowing both modules to refer to the
8014 * same type ID.
8015 */
8016 BTF_TYPE_EMIT(struct nf_conn___init);
8017#endif
6a773a15
BB
8018}
8019
604326b4
DB
8020const struct bpf_func_proto bpf_sock_map_update_proto __weak;
8021const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
8022
8c4b4c7e 8023static const struct bpf_func_proto *
5e43f899 8024sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8c4b4c7e 8025{
bed89185
SF
8026 const struct bpf_func_proto *func_proto;
8027
8028 func_proto = cgroup_common_func_proto(func_id, prog);
8029 if (func_proto)
8030 return func_proto;
8031
8c4b4c7e
LB
8032 switch (func_id) {
8033 case BPF_FUNC_setsockopt:
beecf11b 8034 return &bpf_sock_ops_setsockopt_proto;
cd86d1fd 8035 case BPF_FUNC_getsockopt:
beecf11b 8036 return &bpf_sock_ops_getsockopt_proto;
b13d8807
LB
8037 case BPF_FUNC_sock_ops_cb_flags_set:
8038 return &bpf_sock_ops_cb_flags_set_proto;
174a79ff
JF
8039 case BPF_FUNC_sock_map_update:
8040 return &bpf_sock_map_update_proto;
81110384
JF
8041 case BPF_FUNC_sock_hash_update:
8042 return &bpf_sock_hash_update_proto;
d692f113
AI
8043 case BPF_FUNC_get_socket_cookie:
8044 return &bpf_get_socket_cookie_sock_ops_proto;
a5a3a828 8045 case BPF_FUNC_perf_event_output:
fcf752ea 8046 return &bpf_event_output_data_proto;
1314ef56
SF
8047 case BPF_FUNC_sk_storage_get:
8048 return &bpf_sk_storage_get_proto;
8049 case BPF_FUNC_sk_storage_delete:
8050 return &bpf_sk_storage_delete_proto;
6cf1770d
XL
8051 case BPF_FUNC_get_netns_cookie:
8052 return &bpf_get_netns_cookie_sock_ops_proto;
1314ef56 8053#ifdef CONFIG_INET
0813a841
MKL
8054 case BPF_FUNC_load_hdr_opt:
8055 return &bpf_sock_ops_load_hdr_opt_proto;
8056 case BPF_FUNC_store_hdr_opt:
8057 return &bpf_sock_ops_store_hdr_opt_proto;
8058 case BPF_FUNC_reserve_hdr_opt:
8059 return &bpf_sock_ops_reserve_hdr_opt_proto;
1314ef56
SF
8060 case BPF_FUNC_tcp_sock:
8061 return &bpf_tcp_sock_proto;
8062#endif /* CONFIG_INET */
8c4b4c7e 8063 default:
1df8f55a 8064 return bpf_sk_base_func_proto(func_id);
8c4b4c7e
LB
8065 }
8066}
8067
604326b4
DB
8068const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
8069const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
8070
5e43f899
AI
8071static const struct bpf_func_proto *
8072sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4f738adb
JF
8073{
8074 switch (func_id) {
8075 case BPF_FUNC_msg_redirect_map:
8076 return &bpf_msg_redirect_map_proto;
81110384
JF
8077 case BPF_FUNC_msg_redirect_hash:
8078 return &bpf_msg_redirect_hash_proto;
2a100317
JF
8079 case BPF_FUNC_msg_apply_bytes:
8080 return &bpf_msg_apply_bytes_proto;
91843d54
JF
8081 case BPF_FUNC_msg_cork_bytes:
8082 return &bpf_msg_cork_bytes_proto;
015632bb
JF
8083 case BPF_FUNC_msg_pull_data:
8084 return &bpf_msg_pull_data_proto;
6fff607e
JF
8085 case BPF_FUNC_msg_push_data:
8086 return &bpf_msg_push_data_proto;
7246d8ed
JF
8087 case BPF_FUNC_msg_pop_data:
8088 return &bpf_msg_pop_data_proto;
abe3cac8
JF
8089 case BPF_FUNC_perf_event_output:
8090 return &bpf_event_output_data_proto;
8091 case BPF_FUNC_get_current_uid_gid:
8092 return &bpf_get_current_uid_gid_proto;
8093 case BPF_FUNC_get_current_pid_tgid:
8094 return &bpf_get_current_pid_tgid_proto;
13d70f5a
JF
8095 case BPF_FUNC_sk_storage_get:
8096 return &bpf_sk_storage_get_proto;
8097 case BPF_FUNC_sk_storage_delete:
8098 return &bpf_sk_storage_delete_proto;
fab60e29
XL
8099 case BPF_FUNC_get_netns_cookie:
8100 return &bpf_get_netns_cookie_sk_msg_proto;
abe3cac8
JF
8101#ifdef CONFIG_CGROUPS
8102 case BPF_FUNC_get_current_cgroup_id:
8103 return &bpf_get_current_cgroup_id_proto;
8104 case BPF_FUNC_get_current_ancestor_cgroup_id:
8105 return &bpf_get_current_ancestor_cgroup_id_proto;
8106#endif
8107#ifdef CONFIG_CGROUP_NET_CLASSID
8108 case BPF_FUNC_get_cgroup_classid:
8109 return &bpf_get_cgroup_classid_curr_proto;
8110#endif
4f738adb 8111 default:
1df8f55a 8112 return bpf_sk_base_func_proto(func_id);
4f738adb
JF
8113 }
8114}
8115
604326b4
DB
8116const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
8117const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
8118
5e43f899
AI
8119static const struct bpf_func_proto *
8120sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
b005fd18
JF
8121{
8122 switch (func_id) {
8a31db56
JF
8123 case BPF_FUNC_skb_store_bytes:
8124 return &bpf_skb_store_bytes_proto;
b005fd18
JF
8125 case BPF_FUNC_skb_load_bytes:
8126 return &bpf_skb_load_bytes_proto;
8a31db56 8127 case BPF_FUNC_skb_pull_data:
0ea488ff 8128 return &sk_skb_pull_data_proto;
8a31db56 8129 case BPF_FUNC_skb_change_tail:
0ea488ff 8130 return &sk_skb_change_tail_proto;
8a31db56 8131 case BPF_FUNC_skb_change_head:
0ea488ff 8132 return &sk_skb_change_head_proto;
18ebe16d
JF
8133 case BPF_FUNC_skb_adjust_room:
8134 return &sk_skb_adjust_room_proto;
b005fd18
JF
8135 case BPF_FUNC_get_socket_cookie:
8136 return &bpf_get_socket_cookie_proto;
8137 case BPF_FUNC_get_socket_uid:
8138 return &bpf_get_socket_uid_proto;
174a79ff
JF
8139 case BPF_FUNC_sk_redirect_map:
8140 return &bpf_sk_redirect_map_proto;
81110384
JF
8141 case BPF_FUNC_sk_redirect_hash:
8142 return &bpf_sk_redirect_hash_proto;
7c4b90d7
AZ
8143 case BPF_FUNC_perf_event_output:
8144 return &bpf_skb_event_output_proto;
df3f94a0 8145#ifdef CONFIG_INET
6acc9b43
JS
8146 case BPF_FUNC_sk_lookup_tcp:
8147 return &bpf_sk_lookup_tcp_proto;
8148 case BPF_FUNC_sk_lookup_udp:
8149 return &bpf_sk_lookup_udp_proto;
8150 case BPF_FUNC_sk_release:
8151 return &bpf_sk_release_proto;
edbf8c01
LB
8152 case BPF_FUNC_skc_lookup_tcp:
8153 return &bpf_skc_lookup_tcp_proto;
df3f94a0 8154#endif
b005fd18 8155 default:
1df8f55a 8156 return bpf_sk_base_func_proto(func_id);
b005fd18
JF
8157 }
8158}
8159
d58e468b
PP
8160static const struct bpf_func_proto *
8161flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8162{
8163 switch (func_id) {
8164 case BPF_FUNC_skb_load_bytes:
089b19a9 8165 return &bpf_flow_dissector_load_bytes_proto;
d58e468b 8166 default:
1df8f55a 8167 return bpf_sk_base_func_proto(func_id);
d58e468b
PP
8168 }
8169}
8170
cd3092c7
MX
8171static const struct bpf_func_proto *
8172lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8173{
8174 switch (func_id) {
8175 case BPF_FUNC_skb_load_bytes:
8176 return &bpf_skb_load_bytes_proto;
8177 case BPF_FUNC_skb_pull_data:
8178 return &bpf_skb_pull_data_proto;
8179 case BPF_FUNC_csum_diff:
8180 return &bpf_csum_diff_proto;
8181 case BPF_FUNC_get_cgroup_classid:
8182 return &bpf_get_cgroup_classid_proto;
8183 case BPF_FUNC_get_route_realm:
8184 return &bpf_get_route_realm_proto;
8185 case BPF_FUNC_get_hash_recalc:
8186 return &bpf_get_hash_recalc_proto;
8187 case BPF_FUNC_perf_event_output:
8188 return &bpf_skb_event_output_proto;
8189 case BPF_FUNC_get_smp_processor_id:
8190 return &bpf_get_smp_processor_id_proto;
8191 case BPF_FUNC_skb_under_cgroup:
8192 return &bpf_skb_under_cgroup_proto;
8193 default:
1df8f55a 8194 return bpf_sk_base_func_proto(func_id);
cd3092c7
MX
8195 }
8196}
8197
8198static const struct bpf_func_proto *
8199lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8200{
8201 switch (func_id) {
8202 case BPF_FUNC_lwt_push_encap:
3e0bd37c 8203 return &bpf_lwt_in_push_encap_proto;
cd3092c7
MX
8204 default:
8205 return lwt_out_func_proto(func_id, prog);
8206 }
8207}
8208
3a0af8fd 8209static const struct bpf_func_proto *
5e43f899 8210lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3a0af8fd
TG
8211{
8212 switch (func_id) {
8213 case BPF_FUNC_skb_get_tunnel_key:
8214 return &bpf_skb_get_tunnel_key_proto;
8215 case BPF_FUNC_skb_set_tunnel_key:
8216 return bpf_get_skb_set_tunnel_proto(func_id);
8217 case BPF_FUNC_skb_get_tunnel_opt:
8218 return &bpf_skb_get_tunnel_opt_proto;
8219 case BPF_FUNC_skb_set_tunnel_opt:
8220 return bpf_get_skb_set_tunnel_proto(func_id);
8221 case BPF_FUNC_redirect:
8222 return &bpf_redirect_proto;
8223 case BPF_FUNC_clone_redirect:
8224 return &bpf_clone_redirect_proto;
8225 case BPF_FUNC_skb_change_tail:
8226 return &bpf_skb_change_tail_proto;
8227 case BPF_FUNC_skb_change_head:
8228 return &bpf_skb_change_head_proto;
8229 case BPF_FUNC_skb_store_bytes:
8230 return &bpf_skb_store_bytes_proto;
8231 case BPF_FUNC_csum_update:
8232 return &bpf_csum_update_proto;
7cdec54f
DB
8233 case BPF_FUNC_csum_level:
8234 return &bpf_csum_level_proto;
3a0af8fd
TG
8235 case BPF_FUNC_l3_csum_replace:
8236 return &bpf_l3_csum_replace_proto;
8237 case BPF_FUNC_l4_csum_replace:
8238 return &bpf_l4_csum_replace_proto;
8239 case BPF_FUNC_set_hash_invalid:
8240 return &bpf_set_hash_invalid_proto;
3e0bd37c
PO
8241 case BPF_FUNC_lwt_push_encap:
8242 return &bpf_lwt_xmit_push_encap_proto;
3a0af8fd 8243 default:
cd3092c7 8244 return lwt_out_func_proto(func_id, prog);
3a0af8fd
TG
8245 }
8246}
8247
004d4b27
MX
8248static const struct bpf_func_proto *
8249lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8250{
8251 switch (func_id) {
61d76980 8252#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
004d4b27
MX
8253 case BPF_FUNC_lwt_seg6_store_bytes:
8254 return &bpf_lwt_seg6_store_bytes_proto;
8255 case BPF_FUNC_lwt_seg6_action:
8256 return &bpf_lwt_seg6_action_proto;
8257 case BPF_FUNC_lwt_seg6_adjust_srh:
8258 return &bpf_lwt_seg6_adjust_srh_proto;
61d76980 8259#endif
004d4b27
MX
8260 default:
8261 return lwt_out_func_proto(func_id, prog);
3a0af8fd
TG
8262 }
8263}
8264
f96da094 8265static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
5e43f899 8266 const struct bpf_prog *prog,
f96da094 8267 struct bpf_insn_access_aux *info)
23994631 8268{
f96da094 8269 const int size_default = sizeof(__u32);
23994631 8270
9bac3d6d
AS
8271 if (off < 0 || off >= sizeof(struct __sk_buff))
8272 return false;
62c7989b 8273
4936e352 8274 /* The verifier guarantees that size > 0. */
9bac3d6d
AS
8275 if (off % size != 0)
8276 return false;
62c7989b
DB
8277
8278 switch (off) {
f96da094
DB
8279 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
8280 if (off + size > offsetofend(struct __sk_buff, cb[4]))
62c7989b
DB
8281 return false;
8282 break;
8a31db56
JF
8283 case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
8284 case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
8285 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
8286 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
f96da094 8287 case bpf_ctx_range(struct __sk_buff, data):
de8f3a83 8288 case bpf_ctx_range(struct __sk_buff, data_meta):
f96da094
DB
8289 case bpf_ctx_range(struct __sk_buff, data_end):
8290 if (size != size_default)
23994631 8291 return false;
31fd8581 8292 break;
b7df9ada 8293 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
089b19a9 8294 return false;
f64c4ace
VF
8295 case bpf_ctx_range(struct __sk_buff, hwtstamp):
8296 if (type == BPF_WRITE || size != sizeof(__u64))
8297 return false;
8298 break;
f11216b2
VD
8299 case bpf_ctx_range(struct __sk_buff, tstamp):
8300 if (size != sizeof(__u64))
8301 return false;
8302 break;
46f8bc92
MKL
8303 case offsetof(struct __sk_buff, sk):
8304 if (type == BPF_WRITE || size != sizeof(__u64))
8305 return false;
8306 info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
8307 break;
9bb984f2 8308 case offsetof(struct __sk_buff, tstamp_type):
8d21ec0e 8309 return false;
9bb984f2 8310 case offsetofend(struct __sk_buff, tstamp_type) ... offsetof(struct __sk_buff, hwtstamp) - 1:
f64c4ace
VF
8311 /* Explicitly prohibit access to padding in __sk_buff. */
8312 return false;
31fd8581 8313 default:
f96da094 8314 /* Only narrow read access allowed for now. */
31fd8581 8315 if (type == BPF_WRITE) {
f96da094 8316 if (size != size_default)
31fd8581
YS
8317 return false;
8318 } else {
f96da094
DB
8319 bpf_ctx_record_field_size(info, size_default);
8320 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
23994631 8321 return false;
31fd8581 8322 }
62c7989b 8323 }
9bac3d6d
AS
8324
8325 return true;
8326}
8327
d691f9e8 8328static bool sk_filter_is_valid_access(int off, int size,
19de99f7 8329 enum bpf_access_type type,
5e43f899 8330 const struct bpf_prog *prog,
23994631 8331 struct bpf_insn_access_aux *info)
d691f9e8 8332{
db58ba45 8333 switch (off) {
f96da094
DB
8334 case bpf_ctx_range(struct __sk_buff, tc_classid):
8335 case bpf_ctx_range(struct __sk_buff, data):
de8f3a83 8336 case bpf_ctx_range(struct __sk_buff, data_meta):
f96da094 8337 case bpf_ctx_range(struct __sk_buff, data_end):
8a31db56 8338 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
f11216b2 8339 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 8340 case bpf_ctx_range(struct __sk_buff, wire_len):
f64c4ace 8341 case bpf_ctx_range(struct __sk_buff, hwtstamp):
045efa82 8342 return false;
db58ba45 8343 }
045efa82 8344
d691f9e8
AS
8345 if (type == BPF_WRITE) {
8346 switch (off) {
f96da094 8347 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
d691f9e8
AS
8348 break;
8349 default:
8350 return false;
8351 }
8352 }
8353
5e43f899 8354 return bpf_skb_is_valid_access(off, size, type, prog, info);
d691f9e8
AS
8355}
8356
b39b5f41
SL
8357static bool cg_skb_is_valid_access(int off, int size,
8358 enum bpf_access_type type,
8359 const struct bpf_prog *prog,
8360 struct bpf_insn_access_aux *info)
8361{
8362 switch (off) {
8363 case bpf_ctx_range(struct __sk_buff, tc_classid):
8364 case bpf_ctx_range(struct __sk_buff, data_meta):
e3da08d0 8365 case bpf_ctx_range(struct __sk_buff, wire_len):
b39b5f41 8366 return false;
ab21c1b5
DB
8367 case bpf_ctx_range(struct __sk_buff, data):
8368 case bpf_ctx_range(struct __sk_buff, data_end):
2c78ee89 8369 if (!bpf_capable())
ab21c1b5
DB
8370 return false;
8371 break;
b39b5f41 8372 }
ab21c1b5 8373
b39b5f41
SL
8374 if (type == BPF_WRITE) {
8375 switch (off) {
8376 case bpf_ctx_range(struct __sk_buff, mark):
8377 case bpf_ctx_range(struct __sk_buff, priority):
8378 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
8379 break;
f11216b2 8380 case bpf_ctx_range(struct __sk_buff, tstamp):
2c78ee89 8381 if (!bpf_capable())
f11216b2
VD
8382 return false;
8383 break;
b39b5f41
SL
8384 default:
8385 return false;
8386 }
8387 }
8388
8389 switch (off) {
8390 case bpf_ctx_range(struct __sk_buff, data):
8391 info->reg_type = PTR_TO_PACKET;
8392 break;
8393 case bpf_ctx_range(struct __sk_buff, data_end):
8394 info->reg_type = PTR_TO_PACKET_END;
8395 break;
8396 }
8397
8398 return bpf_skb_is_valid_access(off, size, type, prog, info);
8399}
8400
3a0af8fd
TG
8401static bool lwt_is_valid_access(int off, int size,
8402 enum bpf_access_type type,
5e43f899 8403 const struct bpf_prog *prog,
23994631 8404 struct bpf_insn_access_aux *info)
3a0af8fd
TG
8405{
8406 switch (off) {
f96da094 8407 case bpf_ctx_range(struct __sk_buff, tc_classid):
8a31db56 8408 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
de8f3a83 8409 case bpf_ctx_range(struct __sk_buff, data_meta):
f11216b2 8410 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 8411 case bpf_ctx_range(struct __sk_buff, wire_len):
f64c4ace 8412 case bpf_ctx_range(struct __sk_buff, hwtstamp):
3a0af8fd
TG
8413 return false;
8414 }
8415
8416 if (type == BPF_WRITE) {
8417 switch (off) {
f96da094
DB
8418 case bpf_ctx_range(struct __sk_buff, mark):
8419 case bpf_ctx_range(struct __sk_buff, priority):
8420 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
3a0af8fd
TG
8421 break;
8422 default:
8423 return false;
8424 }
8425 }
8426
f96da094
DB
8427 switch (off) {
8428 case bpf_ctx_range(struct __sk_buff, data):
8429 info->reg_type = PTR_TO_PACKET;
8430 break;
8431 case bpf_ctx_range(struct __sk_buff, data_end):
8432 info->reg_type = PTR_TO_PACKET_END;
8433 break;
8434 }
8435
5e43f899 8436 return bpf_skb_is_valid_access(off, size, type, prog, info);
3a0af8fd
TG
8437}
8438
aac3fc32
AI
8439/* Attach type specific accesses */
8440static bool __sock_filter_check_attach_type(int off,
8441 enum bpf_access_type access_type,
8442 enum bpf_attach_type attach_type)
61023658 8443{
aac3fc32
AI
8444 switch (off) {
8445 case offsetof(struct bpf_sock, bound_dev_if):
8446 case offsetof(struct bpf_sock, mark):
8447 case offsetof(struct bpf_sock, priority):
8448 switch (attach_type) {
8449 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 8450 case BPF_CGROUP_INET_SOCK_RELEASE:
aac3fc32
AI
8451 goto full_access;
8452 default:
8453 return false;
8454 }
8455 case bpf_ctx_range(struct bpf_sock, src_ip4):
8456 switch (attach_type) {
8457 case BPF_CGROUP_INET4_POST_BIND:
8458 goto read_only;
8459 default:
8460 return false;
8461 }
8462 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
8463 switch (attach_type) {
8464 case BPF_CGROUP_INET6_POST_BIND:
8465 goto read_only;
8466 default:
8467 return false;
8468 }
8469 case bpf_ctx_range(struct bpf_sock, src_port):
8470 switch (attach_type) {
8471 case BPF_CGROUP_INET4_POST_BIND:
8472 case BPF_CGROUP_INET6_POST_BIND:
8473 goto read_only;
61023658
DA
8474 default:
8475 return false;
8476 }
8477 }
aac3fc32
AI
8478read_only:
8479 return access_type == BPF_READ;
8480full_access:
8481 return true;
8482}
8483
46f8bc92
MKL
8484bool bpf_sock_common_is_valid_access(int off, int size,
8485 enum bpf_access_type type,
aac3fc32
AI
8486 struct bpf_insn_access_aux *info)
8487{
aac3fc32 8488 switch (off) {
46f8bc92
MKL
8489 case bpf_ctx_range_till(struct bpf_sock, type, priority):
8490 return false;
8491 default:
8492 return bpf_sock_is_valid_access(off, size, type, info);
aac3fc32 8493 }
aac3fc32
AI
8494}
8495
c64b7983
JS
8496bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
8497 struct bpf_insn_access_aux *info)
aac3fc32 8498{
aa65d696 8499 const int size_default = sizeof(__u32);
4421a582 8500 int field_size;
aa65d696 8501
aac3fc32 8502 if (off < 0 || off >= sizeof(struct bpf_sock))
61023658 8503 return false;
61023658
DA
8504 if (off % size != 0)
8505 return false;
aa65d696
MKL
8506
8507 switch (off) {
8508 case offsetof(struct bpf_sock, state):
8509 case offsetof(struct bpf_sock, family):
8510 case offsetof(struct bpf_sock, type):
8511 case offsetof(struct bpf_sock, protocol):
aa65d696 8512 case offsetof(struct bpf_sock, src_port):
c3c16f2e 8513 case offsetof(struct bpf_sock, rx_queue_mapping):
aa65d696
MKL
8514 case bpf_ctx_range(struct bpf_sock, src_ip4):
8515 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
8516 case bpf_ctx_range(struct bpf_sock, dst_ip4):
8517 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
8518 bpf_ctx_record_field_size(info, size_default);
8519 return bpf_ctx_narrow_access_ok(off, size, size_default);
4421a582
JS
8520 case bpf_ctx_range(struct bpf_sock, dst_port):
8521 field_size = size == size_default ?
8522 size_default : sizeof_field(struct bpf_sock, dst_port);
8523 bpf_ctx_record_field_size(info, field_size);
8524 return bpf_ctx_narrow_access_ok(off, size, field_size);
8525 case offsetofend(struct bpf_sock, dst_port) ...
8526 offsetof(struct bpf_sock, dst_ip4) - 1:
8527 return false;
aa65d696
MKL
8528 }
8529
8530 return size == size_default;
61023658
DA
8531}
8532
c64b7983
JS
8533static bool sock_filter_is_valid_access(int off, int size,
8534 enum bpf_access_type type,
8535 const struct bpf_prog *prog,
8536 struct bpf_insn_access_aux *info)
8537{
8538 if (!bpf_sock_is_valid_access(off, size, type, info))
8539 return false;
8540 return __sock_filter_check_attach_type(off, type,
8541 prog->expected_attach_type);
8542}
8543
b09928b9
DB
8544static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
8545 const struct bpf_prog *prog)
8546{
8547 /* Neither direct read nor direct write requires any preliminary
8548 * action.
8549 */
8550 return 0;
8551}
8552
047b0ecd
DB
8553static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
8554 const struct bpf_prog *prog, int drop_verdict)
36bbef52
DB
8555{
8556 struct bpf_insn *insn = insn_buf;
8557
8558 if (!direct_write)
8559 return 0;
8560
8561 /* if (!skb->cloned)
8562 * goto start;
8563 *
8564 * (Fast-path, otherwise approximation that we might be
8565 * a clone, do the rest in helper.)
8566 */
fba84957 8567 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET);
36bbef52
DB
8568 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
8569 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
8570
8571 /* ret = bpf_skb_pull_data(skb, 0); */
8572 *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
8573 *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
8574 *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
8575 BPF_FUNC_skb_pull_data);
8576 /* if (!ret)
8577 * goto restore;
8578 * return TC_ACT_SHOT;
8579 */
8580 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
047b0ecd 8581 *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
36bbef52
DB
8582 *insn++ = BPF_EXIT_INSN();
8583
8584 /* restore: */
8585 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
8586 /* start: */
8587 *insn++ = prog->insnsi[0];
8588
8589 return insn - insn_buf;
8590}
8591
e0cea7ce
DB
8592static int bpf_gen_ld_abs(const struct bpf_insn *orig,
8593 struct bpf_insn *insn_buf)
8594{
8595 bool indirect = BPF_MODE(orig->code) == BPF_IND;
8596 struct bpf_insn *insn = insn_buf;
8597
e0cea7ce
DB
8598 if (!indirect) {
8599 *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
8600 } else {
8601 *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
8602 if (orig->imm)
8603 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
8604 }
e6a18d36
DB
8605 /* We're guaranteed here that CTX is in R6. */
8606 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
e0cea7ce
DB
8607
8608 switch (BPF_SIZE(orig->code)) {
8609 case BPF_B:
8610 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
8611 break;
8612 case BPF_H:
8613 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
8614 break;
8615 case BPF_W:
8616 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
8617 break;
8618 }
8619
8620 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
8621 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
8622 *insn++ = BPF_EXIT_INSN();
8623
8624 return insn - insn_buf;
8625}
8626
047b0ecd
DB
8627static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
8628 const struct bpf_prog *prog)
8629{
8630 return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
8631}
8632
d691f9e8 8633static bool tc_cls_act_is_valid_access(int off, int size,
19de99f7 8634 enum bpf_access_type type,
5e43f899 8635 const struct bpf_prog *prog,
23994631 8636 struct bpf_insn_access_aux *info)
d691f9e8
AS
8637{
8638 if (type == BPF_WRITE) {
8639 switch (off) {
f96da094
DB
8640 case bpf_ctx_range(struct __sk_buff, mark):
8641 case bpf_ctx_range(struct __sk_buff, tc_index):
8642 case bpf_ctx_range(struct __sk_buff, priority):
8643 case bpf_ctx_range(struct __sk_buff, tc_classid):
8644 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
f11216b2 8645 case bpf_ctx_range(struct __sk_buff, tstamp):
74e31ca8 8646 case bpf_ctx_range(struct __sk_buff, queue_mapping):
d691f9e8
AS
8647 break;
8648 default:
8649 return false;
8650 }
8651 }
19de99f7 8652
f96da094
DB
8653 switch (off) {
8654 case bpf_ctx_range(struct __sk_buff, data):
8655 info->reg_type = PTR_TO_PACKET;
8656 break;
de8f3a83
DB
8657 case bpf_ctx_range(struct __sk_buff, data_meta):
8658 info->reg_type = PTR_TO_PACKET_META;
8659 break;
f96da094
DB
8660 case bpf_ctx_range(struct __sk_buff, data_end):
8661 info->reg_type = PTR_TO_PACKET_END;
8662 break;
8a31db56
JF
8663 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
8664 return false;
9bb984f2 8665 case offsetof(struct __sk_buff, tstamp_type):
8d21ec0e
MKL
8666 /* The convert_ctx_access() on reading and writing
8667 * __sk_buff->tstamp depends on whether the bpf prog
9bb984f2
MKL
8668 * has used __sk_buff->tstamp_type or not.
8669 * Thus, we need to set prog->tstamp_type_access
8d21ec0e
MKL
8670 * earlier during is_valid_access() here.
8671 */
9bb984f2 8672 ((struct bpf_prog *)prog)->tstamp_type_access = 1;
8d21ec0e 8673 return size == sizeof(__u8);
f96da094
DB
8674 }
8675
5e43f899 8676 return bpf_skb_is_valid_access(off, size, type, prog, info);
d691f9e8
AS
8677}
8678
864b656f
DX
8679DEFINE_MUTEX(nf_conn_btf_access_lock);
8680EXPORT_SYMBOL_GPL(nf_conn_btf_access_lock);
8681
6728aea7
KKD
8682int (*nfct_btf_struct_access)(struct bpf_verifier_log *log,
8683 const struct bpf_reg_state *reg,
8684 int off, int size, enum bpf_access_type atype,
8685 u32 *next_btf_id, enum bpf_type_flag *flag);
5a090aa3 8686EXPORT_SYMBOL_GPL(nfct_btf_struct_access);
864b656f
DX
8687
8688static int tc_cls_act_btf_struct_access(struct bpf_verifier_log *log,
6728aea7
KKD
8689 const struct bpf_reg_state *reg,
8690 int off, int size, enum bpf_access_type atype,
8691 u32 *next_btf_id, enum bpf_type_flag *flag)
864b656f
DX
8692{
8693 int ret = -EACCES;
8694
8695 if (atype == BPF_READ)
6728aea7 8696 return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
864b656f
DX
8697
8698 mutex_lock(&nf_conn_btf_access_lock);
5a090aa3 8699 if (nfct_btf_struct_access)
6728aea7 8700 ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
864b656f
DX
8701 mutex_unlock(&nf_conn_btf_access_lock);
8702
8703 return ret;
8704}
8705
1afaf661 8706static bool __is_valid_xdp_access(int off, int size)
6a773a15
BB
8707{
8708 if (off < 0 || off >= sizeof(struct xdp_md))
8709 return false;
8710 if (off % size != 0)
8711 return false;
6088b582 8712 if (size != sizeof(__u32))
6a773a15
BB
8713 return false;
8714
8715 return true;
8716}
8717
8718static bool xdp_is_valid_access(int off, int size,
8719 enum bpf_access_type type,
5e43f899 8720 const struct bpf_prog *prog,
23994631 8721 struct bpf_insn_access_aux *info)
6a773a15 8722{
64b59025
DA
8723 if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
8724 switch (off) {
8725 case offsetof(struct xdp_md, egress_ifindex):
8726 return false;
8727 }
8728 }
8729
0d830032
JK
8730 if (type == BPF_WRITE) {
8731 if (bpf_prog_is_dev_bound(prog->aux)) {
8732 switch (off) {
8733 case offsetof(struct xdp_md, rx_queue_index):
8734 return __is_valid_xdp_access(off, size);
8735 }
8736 }
6a773a15 8737 return false;
0d830032 8738 }
6a773a15
BB
8739
8740 switch (off) {
8741 case offsetof(struct xdp_md, data):
23994631 8742 info->reg_type = PTR_TO_PACKET;
6a773a15 8743 break;
de8f3a83
DB
8744 case offsetof(struct xdp_md, data_meta):
8745 info->reg_type = PTR_TO_PACKET_META;
8746 break;
6a773a15 8747 case offsetof(struct xdp_md, data_end):
23994631 8748 info->reg_type = PTR_TO_PACKET_END;
6a773a15
BB
8749 break;
8750 }
8751
1afaf661 8752 return __is_valid_xdp_access(off, size);
6a773a15
BB
8753}
8754
c8064e5b 8755void bpf_warn_invalid_xdp_action(struct net_device *dev, struct bpf_prog *prog, u32 act)
6a773a15 8756{
9beb8bed
DB
8757 const u32 act_max = XDP_REDIRECT;
8758
c8064e5b 8759 pr_warn_once("%s XDP return value %u on prog %s (id %d) dev %s, expect packet loss!\n",
2cbad989 8760 act > act_max ? "Illegal" : "Driver unsupported",
c8064e5b 8761 act, prog->aux->name, prog->aux->id, dev ? dev->name : "N/A");
6a773a15
BB
8762}
8763EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
8764
864b656f 8765static int xdp_btf_struct_access(struct bpf_verifier_log *log,
6728aea7
KKD
8766 const struct bpf_reg_state *reg,
8767 int off, int size, enum bpf_access_type atype,
8768 u32 *next_btf_id, enum bpf_type_flag *flag)
864b656f
DX
8769{
8770 int ret = -EACCES;
8771
8772 if (atype == BPF_READ)
6728aea7 8773 return btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
864b656f
DX
8774
8775 mutex_lock(&nf_conn_btf_access_lock);
5a090aa3 8776 if (nfct_btf_struct_access)
6728aea7 8777 ret = nfct_btf_struct_access(log, reg, off, size, atype, next_btf_id, flag);
864b656f
DX
8778 mutex_unlock(&nf_conn_btf_access_lock);
8779
8780 return ret;
8781}
8782
4fbac77d
AI
8783static bool sock_addr_is_valid_access(int off, int size,
8784 enum bpf_access_type type,
8785 const struct bpf_prog *prog,
8786 struct bpf_insn_access_aux *info)
8787{
8788 const int size_default = sizeof(__u32);
8789
8790 if (off < 0 || off >= sizeof(struct bpf_sock_addr))
8791 return false;
8792 if (off % size != 0)
8793 return false;
8794
8795 /* Disallow access to IPv6 fields from IPv4 contex and vise
8796 * versa.
8797 */
8798 switch (off) {
8799 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
8800 switch (prog->expected_attach_type) {
8801 case BPF_CGROUP_INET4_BIND:
d74bad4e 8802 case BPF_CGROUP_INET4_CONNECT:
1b66d253
DB
8803 case BPF_CGROUP_INET4_GETPEERNAME:
8804 case BPF_CGROUP_INET4_GETSOCKNAME:
1cedee13 8805 case BPF_CGROUP_UDP4_SENDMSG:
983695fa 8806 case BPF_CGROUP_UDP4_RECVMSG:
4fbac77d
AI
8807 break;
8808 default:
8809 return false;
8810 }
8811 break;
8812 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
8813 switch (prog->expected_attach_type) {
8814 case BPF_CGROUP_INET6_BIND:
d74bad4e 8815 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
8816 case BPF_CGROUP_INET6_GETPEERNAME:
8817 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13 8818 case BPF_CGROUP_UDP6_SENDMSG:
983695fa 8819 case BPF_CGROUP_UDP6_RECVMSG:
1cedee13
AI
8820 break;
8821 default:
8822 return false;
8823 }
8824 break;
8825 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
8826 switch (prog->expected_attach_type) {
8827 case BPF_CGROUP_UDP4_SENDMSG:
8828 break;
8829 default:
8830 return false;
8831 }
8832 break;
8833 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
8834 msg_src_ip6[3]):
8835 switch (prog->expected_attach_type) {
8836 case BPF_CGROUP_UDP6_SENDMSG:
4fbac77d
AI
8837 break;
8838 default:
8839 return false;
8840 }
8841 break;
8842 }
8843
8844 switch (off) {
8845 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
8846 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
1cedee13
AI
8847 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
8848 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
8849 msg_src_ip6[3]):
7aebfa1b 8850 case bpf_ctx_range(struct bpf_sock_addr, user_port):
4fbac77d
AI
8851 if (type == BPF_READ) {
8852 bpf_ctx_record_field_size(info, size_default);
d4ecfeb1
SF
8853
8854 if (bpf_ctx_wide_access_ok(off, size,
8855 struct bpf_sock_addr,
8856 user_ip6))
8857 return true;
8858
8859 if (bpf_ctx_wide_access_ok(off, size,
8860 struct bpf_sock_addr,
8861 msg_src_ip6))
8862 return true;
8863
4fbac77d
AI
8864 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
8865 return false;
8866 } else {
b4399546
SF
8867 if (bpf_ctx_wide_access_ok(off, size,
8868 struct bpf_sock_addr,
8869 user_ip6))
600c70ba
SF
8870 return true;
8871
b4399546
SF
8872 if (bpf_ctx_wide_access_ok(off, size,
8873 struct bpf_sock_addr,
8874 msg_src_ip6))
600c70ba
SF
8875 return true;
8876
4fbac77d
AI
8877 if (size != size_default)
8878 return false;
8879 }
8880 break;
fb85c4a7
SF
8881 case offsetof(struct bpf_sock_addr, sk):
8882 if (type != BPF_READ)
8883 return false;
8884 if (size != sizeof(__u64))
8885 return false;
8886 info->reg_type = PTR_TO_SOCKET;
8887 break;
4fbac77d
AI
8888 default:
8889 if (type == BPF_READ) {
8890 if (size != size_default)
8891 return false;
8892 } else {
8893 return false;
8894 }
8895 }
8896
8897 return true;
8898}
8899
44f0e430
LB
8900static bool sock_ops_is_valid_access(int off, int size,
8901 enum bpf_access_type type,
5e43f899 8902 const struct bpf_prog *prog,
44f0e430 8903 struct bpf_insn_access_aux *info)
40304b2a 8904{
44f0e430
LB
8905 const int size_default = sizeof(__u32);
8906
40304b2a
LB
8907 if (off < 0 || off >= sizeof(struct bpf_sock_ops))
8908 return false;
44f0e430 8909
40304b2a
LB
8910 /* The verifier guarantees that size > 0. */
8911 if (off % size != 0)
8912 return false;
40304b2a 8913
40304b2a
LB
8914 if (type == BPF_WRITE) {
8915 switch (off) {
2585cd62 8916 case offsetof(struct bpf_sock_ops, reply):
6f9bd3d7 8917 case offsetof(struct bpf_sock_ops, sk_txhash):
44f0e430
LB
8918 if (size != size_default)
8919 return false;
40304b2a
LB
8920 break;
8921 default:
8922 return false;
8923 }
44f0e430
LB
8924 } else {
8925 switch (off) {
8926 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
8927 bytes_acked):
8928 if (size != sizeof(__u64))
8929 return false;
8930 break;
1314ef56
SF
8931 case offsetof(struct bpf_sock_ops, sk):
8932 if (size != sizeof(__u64))
8933 return false;
8934 info->reg_type = PTR_TO_SOCKET_OR_NULL;
8935 break;
0813a841
MKL
8936 case offsetof(struct bpf_sock_ops, skb_data):
8937 if (size != sizeof(__u64))
8938 return false;
8939 info->reg_type = PTR_TO_PACKET;
8940 break;
8941 case offsetof(struct bpf_sock_ops, skb_data_end):
8942 if (size != sizeof(__u64))
8943 return false;
8944 info->reg_type = PTR_TO_PACKET_END;
8945 break;
8946 case offsetof(struct bpf_sock_ops, skb_tcp_flags):
8947 bpf_ctx_record_field_size(info, size_default);
8948 return bpf_ctx_narrow_access_ok(off, size,
8949 size_default);
9bb05349
MKL
8950 case offsetof(struct bpf_sock_ops, skb_hwtstamp):
8951 if (size != sizeof(__u64))
8952 return false;
8953 break;
44f0e430
LB
8954 default:
8955 if (size != size_default)
8956 return false;
8957 break;
8958 }
40304b2a
LB
8959 }
8960
44f0e430 8961 return true;
40304b2a
LB
8962}
8963
8a31db56
JF
8964static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
8965 const struct bpf_prog *prog)
8966{
047b0ecd 8967 return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
8a31db56
JF
8968}
8969
b005fd18
JF
8970static bool sk_skb_is_valid_access(int off, int size,
8971 enum bpf_access_type type,
5e43f899 8972 const struct bpf_prog *prog,
b005fd18
JF
8973 struct bpf_insn_access_aux *info)
8974{
de8f3a83
DB
8975 switch (off) {
8976 case bpf_ctx_range(struct __sk_buff, tc_classid):
8977 case bpf_ctx_range(struct __sk_buff, data_meta):
f11216b2 8978 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 8979 case bpf_ctx_range(struct __sk_buff, wire_len):
f64c4ace 8980 case bpf_ctx_range(struct __sk_buff, hwtstamp):
de8f3a83
DB
8981 return false;
8982 }
8983
8a31db56
JF
8984 if (type == BPF_WRITE) {
8985 switch (off) {
8a31db56
JF
8986 case bpf_ctx_range(struct __sk_buff, tc_index):
8987 case bpf_ctx_range(struct __sk_buff, priority):
8988 break;
8989 default:
8990 return false;
8991 }
8992 }
8993
b005fd18 8994 switch (off) {
f7e9cb1e 8995 case bpf_ctx_range(struct __sk_buff, mark):
8a31db56 8996 return false;
b005fd18
JF
8997 case bpf_ctx_range(struct __sk_buff, data):
8998 info->reg_type = PTR_TO_PACKET;
8999 break;
9000 case bpf_ctx_range(struct __sk_buff, data_end):
9001 info->reg_type = PTR_TO_PACKET_END;
9002 break;
9003 }
9004
5e43f899 9005 return bpf_skb_is_valid_access(off, size, type, prog, info);
b005fd18
JF
9006}
9007
4f738adb
JF
9008static bool sk_msg_is_valid_access(int off, int size,
9009 enum bpf_access_type type,
5e43f899 9010 const struct bpf_prog *prog,
4f738adb
JF
9011 struct bpf_insn_access_aux *info)
9012{
9013 if (type == BPF_WRITE)
9014 return false;
9015
bc1b4f01
JF
9016 if (off % size != 0)
9017 return false;
9018
4f738adb
JF
9019 switch (off) {
9020 case offsetof(struct sk_msg_md, data):
9021 info->reg_type = PTR_TO_PACKET;
303def35
JF
9022 if (size != sizeof(__u64))
9023 return false;
4f738adb
JF
9024 break;
9025 case offsetof(struct sk_msg_md, data_end):
9026 info->reg_type = PTR_TO_PACKET_END;
303def35
JF
9027 if (size != sizeof(__u64))
9028 return false;
4f738adb 9029 break;
13d70f5a
JF
9030 case offsetof(struct sk_msg_md, sk):
9031 if (size != sizeof(__u64))
9032 return false;
9033 info->reg_type = PTR_TO_SOCKET;
9034 break;
bc1b4f01
JF
9035 case bpf_ctx_range(struct sk_msg_md, family):
9036 case bpf_ctx_range(struct sk_msg_md, remote_ip4):
9037 case bpf_ctx_range(struct sk_msg_md, local_ip4):
9038 case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
9039 case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
9040 case bpf_ctx_range(struct sk_msg_md, remote_port):
9041 case bpf_ctx_range(struct sk_msg_md, local_port):
9042 case bpf_ctx_range(struct sk_msg_md, size):
303def35
JF
9043 if (size != sizeof(__u32))
9044 return false;
bc1b4f01
JF
9045 break;
9046 default:
4f738adb 9047 return false;
bc1b4f01 9048 }
4f738adb
JF
9049 return true;
9050}
9051
d58e468b
PP
9052static bool flow_dissector_is_valid_access(int off, int size,
9053 enum bpf_access_type type,
9054 const struct bpf_prog *prog,
9055 struct bpf_insn_access_aux *info)
9056{
089b19a9
SF
9057 const int size_default = sizeof(__u32);
9058
9059 if (off < 0 || off >= sizeof(struct __sk_buff))
9060 return false;
9061
2ee7fba0
SF
9062 if (type == BPF_WRITE)
9063 return false;
d58e468b
PP
9064
9065 switch (off) {
9066 case bpf_ctx_range(struct __sk_buff, data):
089b19a9
SF
9067 if (size != size_default)
9068 return false;
d58e468b 9069 info->reg_type = PTR_TO_PACKET;
089b19a9 9070 return true;
d58e468b 9071 case bpf_ctx_range(struct __sk_buff, data_end):
089b19a9
SF
9072 if (size != size_default)
9073 return false;
d58e468b 9074 info->reg_type = PTR_TO_PACKET_END;
089b19a9 9075 return true;
b7df9ada 9076 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
089b19a9
SF
9077 if (size != sizeof(__u64))
9078 return false;
d58e468b 9079 info->reg_type = PTR_TO_FLOW_KEYS;
089b19a9 9080 return true;
2ee7fba0 9081 default:
d58e468b
PP
9082 return false;
9083 }
089b19a9 9084}
d58e468b 9085
089b19a9
SF
9086static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
9087 const struct bpf_insn *si,
9088 struct bpf_insn *insn_buf,
9089 struct bpf_prog *prog,
9090 u32 *target_size)
9091
9092{
9093 struct bpf_insn *insn = insn_buf;
9094
9095 switch (si->off) {
9096 case offsetof(struct __sk_buff, data):
9097 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
9098 si->dst_reg, si->src_reg,
9099 offsetof(struct bpf_flow_dissector, data));
9100 break;
9101
9102 case offsetof(struct __sk_buff, data_end):
9103 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
9104 si->dst_reg, si->src_reg,
9105 offsetof(struct bpf_flow_dissector, data_end));
9106 break;
9107
9108 case offsetof(struct __sk_buff, flow_keys):
9109 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
9110 si->dst_reg, si->src_reg,
9111 offsetof(struct bpf_flow_dissector, flow_keys));
9112 break;
9113 }
9114
9115 return insn - insn_buf;
d58e468b
PP
9116}
9117
9bb984f2
MKL
9118static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
9119 struct bpf_insn *insn)
8d21ec0e
MKL
9120{
9121 __u8 value_reg = si->dst_reg;
9122 __u8 skb_reg = si->src_reg;
9bb984f2 9123 /* AX is needed because src_reg and dst_reg could be the same */
8d21ec0e
MKL
9124 __u8 tmp_reg = BPF_REG_AX;
9125
9126 *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
3b5d4ddf 9127 PKT_VLAN_PRESENT_OFFSET);
9bb984f2
MKL
9128 *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
9129 SKB_MONO_DELIVERY_TIME_MASK, 2);
9130 *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
8d21ec0e 9131 *insn++ = BPF_JMP_A(1);
9bb984f2 9132 *insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
8d21ec0e 9133
8d21ec0e
MKL
9134 return insn;
9135}
9136
9bb05349 9137static struct bpf_insn *bpf_convert_shinfo_access(__u8 dst_reg, __u8 skb_reg,
cf62089b
WB
9138 struct bpf_insn *insn)
9139{
9140 /* si->dst_reg = skb_shinfo(SKB); */
9141#ifdef NET_SKBUFF_DATA_USES_OFFSET
9142 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
9bb05349 9143 BPF_REG_AX, skb_reg,
cf62089b
WB
9144 offsetof(struct sk_buff, end));
9145 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
9bb05349 9146 dst_reg, skb_reg,
cf62089b 9147 offsetof(struct sk_buff, head));
9bb05349 9148 *insn++ = BPF_ALU64_REG(BPF_ADD, dst_reg, BPF_REG_AX);
cf62089b
WB
9149#else
9150 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
9bb05349 9151 dst_reg, skb_reg,
cf62089b
WB
9152 offsetof(struct sk_buff, end));
9153#endif
9154
9155 return insn;
9156}
9157
8d21ec0e
MKL
9158static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
9159 const struct bpf_insn *si,
7449197d
MKL
9160 struct bpf_insn *insn)
9161{
9162 __u8 value_reg = si->dst_reg;
9163 __u8 skb_reg = si->src_reg;
9164
9165#ifdef CONFIG_NET_CLS_ACT
9bb984f2 9166 /* If the tstamp_type is read,
539de932 9167 * the bpf prog is aware the tstamp could have delivery time.
9bb984f2 9168 * Thus, read skb->tstamp as is if tstamp_type_access is true.
539de932 9169 */
9bb984f2 9170 if (!prog->tstamp_type_access) {
539de932 9171 /* AX is needed because src_reg and dst_reg could be the same */
8d21ec0e
MKL
9172 __u8 tmp_reg = BPF_REG_AX;
9173
3b5d4ddf 9174 *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
8d21ec0e 9175 *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
539de932
MKL
9176 TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
9177 *insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
9178 TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
9179 /* skb->tc_at_ingress && skb->mono_delivery_time,
9180 * read 0 as the (rcv) timestamp.
9181 */
8d21ec0e
MKL
9182 *insn++ = BPF_MOV64_IMM(value_reg, 0);
9183 *insn++ = BPF_JMP_A(1);
9184 }
7449197d
MKL
9185#endif
9186
9187 *insn++ = BPF_LDX_MEM(BPF_DW, value_reg, skb_reg,
9188 offsetof(struct sk_buff, tstamp));
9189 return insn;
9190}
9191
8d21ec0e
MKL
9192static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
9193 const struct bpf_insn *si,
7449197d
MKL
9194 struct bpf_insn *insn)
9195{
9196 __u8 value_reg = si->src_reg;
9197 __u8 skb_reg = si->dst_reg;
9198
9199#ifdef CONFIG_NET_CLS_ACT
9bb984f2 9200 /* If the tstamp_type is read,
9d90db97 9201 * the bpf prog is aware the tstamp could have delivery time.
9bb984f2 9202 * Thus, write skb->tstamp as is if tstamp_type_access is true.
9d90db97
MKL
9203 * Otherwise, writing at ingress will have to clear the
9204 * mono_delivery_time bit also.
9205 */
9bb984f2 9206 if (!prog->tstamp_type_access) {
8d21ec0e
MKL
9207 __u8 tmp_reg = BPF_REG_AX;
9208
3b5d4ddf 9209 *insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, PKT_VLAN_PRESENT_OFFSET);
9d90db97
MKL
9210 /* Writing __sk_buff->tstamp as ingress, goto <clear> */
9211 *insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
9212 /* goto <store> */
9213 *insn++ = BPF_JMP_A(2);
9214 /* <clear>: mono_delivery_time */
9215 *insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
9216 *insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, PKT_VLAN_PRESENT_OFFSET);
8d21ec0e 9217 }
7449197d
MKL
9218#endif
9219
9d90db97 9220 /* <store>: skb->tstamp = tstamp */
7449197d
MKL
9221 *insn++ = BPF_STX_MEM(BPF_DW, skb_reg, value_reg,
9222 offsetof(struct sk_buff, tstamp));
9223 return insn;
9224}
9225
2492d3b8
DB
9226static u32 bpf_convert_ctx_access(enum bpf_access_type type,
9227 const struct bpf_insn *si,
9228 struct bpf_insn *insn_buf,
f96da094 9229 struct bpf_prog *prog, u32 *target_size)
9bac3d6d
AS
9230{
9231 struct bpf_insn *insn = insn_buf;
6b8cc1d1 9232 int off;
9bac3d6d 9233
6b8cc1d1 9234 switch (si->off) {
9bac3d6d 9235 case offsetof(struct __sk_buff, len):
6b8cc1d1 9236 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
9237 bpf_target_off(struct sk_buff, len, 4,
9238 target_size));
9bac3d6d
AS
9239 break;
9240
0b8c707d 9241 case offsetof(struct __sk_buff, protocol):
6b8cc1d1 9242 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
9243 bpf_target_off(struct sk_buff, protocol, 2,
9244 target_size));
0b8c707d
DB
9245 break;
9246
27cd5452 9247 case offsetof(struct __sk_buff, vlan_proto):
6b8cc1d1 9248 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
9249 bpf_target_off(struct sk_buff, vlan_proto, 2,
9250 target_size));
27cd5452
MS
9251 break;
9252
bcad5718 9253 case offsetof(struct __sk_buff, priority):
754f1e6a 9254 if (type == BPF_WRITE)
6b8cc1d1 9255 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
9256 bpf_target_off(struct sk_buff, priority, 4,
9257 target_size));
754f1e6a 9258 else
6b8cc1d1 9259 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
9260 bpf_target_off(struct sk_buff, priority, 4,
9261 target_size));
bcad5718
DB
9262 break;
9263
37e82c2f 9264 case offsetof(struct __sk_buff, ingress_ifindex):
6b8cc1d1 9265 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
9266 bpf_target_off(struct sk_buff, skb_iif, 4,
9267 target_size));
37e82c2f
AS
9268 break;
9269
9270 case offsetof(struct __sk_buff, ifindex):
f035a515 9271 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
6b8cc1d1 9272 si->dst_reg, si->src_reg,
37e82c2f 9273 offsetof(struct sk_buff, dev));
6b8cc1d1
DB
9274 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9275 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
f96da094
DB
9276 bpf_target_off(struct net_device, ifindex, 4,
9277 target_size));
37e82c2f
AS
9278 break;
9279
ba7591d8 9280 case offsetof(struct __sk_buff, hash):
6b8cc1d1 9281 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
9282 bpf_target_off(struct sk_buff, hash, 4,
9283 target_size));
ba7591d8
DB
9284 break;
9285
9bac3d6d 9286 case offsetof(struct __sk_buff, mark):
d691f9e8 9287 if (type == BPF_WRITE)
6b8cc1d1 9288 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
9289 bpf_target_off(struct sk_buff, mark, 4,
9290 target_size));
d691f9e8 9291 else
6b8cc1d1 9292 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
9293 bpf_target_off(struct sk_buff, mark, 4,
9294 target_size));
d691f9e8 9295 break;
9bac3d6d
AS
9296
9297 case offsetof(struct __sk_buff, pkt_type):
f96da094
DB
9298 *target_size = 1;
9299 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
fba84957 9300 PKT_TYPE_OFFSET);
f96da094
DB
9301 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
9302#ifdef __BIG_ENDIAN_BITFIELD
9303 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
9304#endif
9305 break;
9bac3d6d
AS
9306
9307 case offsetof(struct __sk_buff, queue_mapping):
74e31ca8
JDB
9308 if (type == BPF_WRITE) {
9309 *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
9310 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
9311 bpf_target_off(struct sk_buff,
9312 queue_mapping,
9313 2, target_size));
9314 } else {
9315 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9316 bpf_target_off(struct sk_buff,
9317 queue_mapping,
9318 2, target_size));
9319 }
f96da094 9320 break;
c2497395 9321
c2497395 9322 case offsetof(struct __sk_buff, vlan_present):
354259fa
ED
9323 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9324 bpf_target_off(struct sk_buff,
9325 vlan_all, 4, target_size));
9326 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9327 *insn++ = BPF_ALU32_IMM(BPF_MOV, si->dst_reg, 1);
9c212255 9328 break;
f96da094 9329
9c212255 9330 case offsetof(struct __sk_buff, vlan_tci):
f96da094
DB
9331 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9332 bpf_target_off(struct sk_buff, vlan_tci, 2,
9333 target_size));
f96da094 9334 break;
d691f9e8
AS
9335
9336 case offsetof(struct __sk_buff, cb[0]) ...
f96da094 9337 offsetofend(struct __sk_buff, cb[4]) - 1:
c593642c 9338 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
62c7989b
DB
9339 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
9340 offsetof(struct qdisc_skb_cb, data)) %
9341 sizeof(__u64));
d691f9e8 9342
ff936a04 9343 prog->cb_access = 1;
6b8cc1d1
DB
9344 off = si->off;
9345 off -= offsetof(struct __sk_buff, cb[0]);
9346 off += offsetof(struct sk_buff, cb);
9347 off += offsetof(struct qdisc_skb_cb, data);
d691f9e8 9348 if (type == BPF_WRITE)
62c7989b 9349 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
6b8cc1d1 9350 si->src_reg, off);
d691f9e8 9351 else
62c7989b 9352 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
6b8cc1d1 9353 si->src_reg, off);
d691f9e8
AS
9354 break;
9355
045efa82 9356 case offsetof(struct __sk_buff, tc_classid):
c593642c 9357 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != 2);
6b8cc1d1
DB
9358
9359 off = si->off;
9360 off -= offsetof(struct __sk_buff, tc_classid);
9361 off += offsetof(struct sk_buff, cb);
9362 off += offsetof(struct qdisc_skb_cb, tc_classid);
f96da094 9363 *target_size = 2;
09c37a2c 9364 if (type == BPF_WRITE)
6b8cc1d1
DB
9365 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
9366 si->src_reg, off);
09c37a2c 9367 else
6b8cc1d1
DB
9368 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
9369 si->src_reg, off);
045efa82
DB
9370 break;
9371
db58ba45 9372 case offsetof(struct __sk_buff, data):
f035a515 9373 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
6b8cc1d1 9374 si->dst_reg, si->src_reg,
db58ba45
AS
9375 offsetof(struct sk_buff, data));
9376 break;
9377
de8f3a83
DB
9378 case offsetof(struct __sk_buff, data_meta):
9379 off = si->off;
9380 off -= offsetof(struct __sk_buff, data_meta);
9381 off += offsetof(struct sk_buff, cb);
9382 off += offsetof(struct bpf_skb_data_end, data_meta);
9383 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
9384 si->src_reg, off);
9385 break;
9386
db58ba45 9387 case offsetof(struct __sk_buff, data_end):
6b8cc1d1
DB
9388 off = si->off;
9389 off -= offsetof(struct __sk_buff, data_end);
9390 off += offsetof(struct sk_buff, cb);
9391 off += offsetof(struct bpf_skb_data_end, data_end);
9392 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
9393 si->src_reg, off);
db58ba45
AS
9394 break;
9395
d691f9e8
AS
9396 case offsetof(struct __sk_buff, tc_index):
9397#ifdef CONFIG_NET_SCHED
d691f9e8 9398 if (type == BPF_WRITE)
6b8cc1d1 9399 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
9400 bpf_target_off(struct sk_buff, tc_index, 2,
9401 target_size));
d691f9e8 9402 else
6b8cc1d1 9403 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
9404 bpf_target_off(struct sk_buff, tc_index, 2,
9405 target_size));
d691f9e8 9406#else
2ed46ce4 9407 *target_size = 2;
d691f9e8 9408 if (type == BPF_WRITE)
6b8cc1d1 9409 *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
d691f9e8 9410 else
6b8cc1d1 9411 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
b1d9fc41
DB
9412#endif
9413 break;
9414
9415 case offsetof(struct __sk_buff, napi_id):
9416#if defined(CONFIG_NET_RX_BUSY_POLL)
b1d9fc41 9417 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
9418 bpf_target_off(struct sk_buff, napi_id, 4,
9419 target_size));
b1d9fc41
DB
9420 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
9421 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
9422#else
2ed46ce4 9423 *target_size = 4;
b1d9fc41 9424 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
d691f9e8 9425#endif
6b8cc1d1 9426 break;
8a31db56 9427 case offsetof(struct __sk_buff, family):
c593642c 9428 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
8a31db56
JF
9429
9430 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
9431 si->dst_reg, si->src_reg,
9432 offsetof(struct sk_buff, sk));
9433 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9434 bpf_target_off(struct sock_common,
9435 skc_family,
9436 2, target_size));
9437 break;
9438 case offsetof(struct __sk_buff, remote_ip4):
c593642c 9439 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
8a31db56
JF
9440
9441 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
9442 si->dst_reg, si->src_reg,
9443 offsetof(struct sk_buff, sk));
9444 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9445 bpf_target_off(struct sock_common,
9446 skc_daddr,
9447 4, target_size));
9448 break;
9449 case offsetof(struct __sk_buff, local_ip4):
c593642c 9450 BUILD_BUG_ON(sizeof_field(struct sock_common,
8a31db56
JF
9451 skc_rcv_saddr) != 4);
9452
9453 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
9454 si->dst_reg, si->src_reg,
9455 offsetof(struct sk_buff, sk));
9456 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9457 bpf_target_off(struct sock_common,
9458 skc_rcv_saddr,
9459 4, target_size));
9460 break;
9461 case offsetof(struct __sk_buff, remote_ip6[0]) ...
9462 offsetof(struct __sk_buff, remote_ip6[3]):
9463#if IS_ENABLED(CONFIG_IPV6)
c593642c 9464 BUILD_BUG_ON(sizeof_field(struct sock_common,
8a31db56
JF
9465 skc_v6_daddr.s6_addr32[0]) != 4);
9466
9467 off = si->off;
9468 off -= offsetof(struct __sk_buff, remote_ip6[0]);
9469
9470 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
9471 si->dst_reg, si->src_reg,
9472 offsetof(struct sk_buff, sk));
9473 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9474 offsetof(struct sock_common,
9475 skc_v6_daddr.s6_addr32[0]) +
9476 off);
9477#else
9478 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9479#endif
9480 break;
9481 case offsetof(struct __sk_buff, local_ip6[0]) ...
9482 offsetof(struct __sk_buff, local_ip6[3]):
9483#if IS_ENABLED(CONFIG_IPV6)
c593642c 9484 BUILD_BUG_ON(sizeof_field(struct sock_common,
8a31db56
JF
9485 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
9486
9487 off = si->off;
9488 off -= offsetof(struct __sk_buff, local_ip6[0]);
9489
9490 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
9491 si->dst_reg, si->src_reg,
9492 offsetof(struct sk_buff, sk));
9493 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9494 offsetof(struct sock_common,
9495 skc_v6_rcv_saddr.s6_addr32[0]) +
9496 off);
9497#else
9498 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9499#endif
9500 break;
9501
9502 case offsetof(struct __sk_buff, remote_port):
c593642c 9503 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
8a31db56
JF
9504
9505 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
9506 si->dst_reg, si->src_reg,
9507 offsetof(struct sk_buff, sk));
9508 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9509 bpf_target_off(struct sock_common,
9510 skc_dport,
9511 2, target_size));
9512#ifndef __BIG_ENDIAN_BITFIELD
9513 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
9514#endif
9515 break;
9516
9517 case offsetof(struct __sk_buff, local_port):
c593642c 9518 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
8a31db56
JF
9519
9520 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
9521 si->dst_reg, si->src_reg,
9522 offsetof(struct sk_buff, sk));
9523 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9524 bpf_target_off(struct sock_common,
9525 skc_num, 2, target_size));
9526 break;
d58e468b 9527
f11216b2 9528 case offsetof(struct __sk_buff, tstamp):
c593642c 9529 BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
f11216b2
VD
9530
9531 if (type == BPF_WRITE)
8d21ec0e 9532 insn = bpf_convert_tstamp_write(prog, si, insn);
f11216b2 9533 else
8d21ec0e
MKL
9534 insn = bpf_convert_tstamp_read(prog, si, insn);
9535 break;
9536
9bb984f2
MKL
9537 case offsetof(struct __sk_buff, tstamp_type):
9538 insn = bpf_convert_tstamp_type_read(si, insn);
e3da08d0
PP
9539 break;
9540
d9ff286a 9541 case offsetof(struct __sk_buff, gso_segs):
9bb05349 9542 insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
d9ff286a
ED
9543 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
9544 si->dst_reg, si->dst_reg,
9545 bpf_target_off(struct skb_shared_info,
9546 gso_segs, 2,
9547 target_size));
9548 break;
cf62089b 9549 case offsetof(struct __sk_buff, gso_size):
9bb05349 9550 insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
cf62089b
WB
9551 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
9552 si->dst_reg, si->dst_reg,
9553 bpf_target_off(struct skb_shared_info,
9554 gso_size, 2,
9555 target_size));
9556 break;
e3da08d0 9557 case offsetof(struct __sk_buff, wire_len):
c593642c 9558 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);
e3da08d0
PP
9559
9560 off = si->off;
9561 off -= offsetof(struct __sk_buff, wire_len);
9562 off += offsetof(struct sk_buff, cb);
9563 off += offsetof(struct qdisc_skb_cb, pkt_len);
9564 *target_size = 4;
9565 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
46f8bc92
MKL
9566 break;
9567
9568 case offsetof(struct __sk_buff, sk):
9569 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
9570 si->dst_reg, si->src_reg,
9571 offsetof(struct sk_buff, sk));
9572 break;
f64c4ace
VF
9573 case offsetof(struct __sk_buff, hwtstamp):
9574 BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8);
9575 BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0);
9576
9bb05349 9577 insn = bpf_convert_shinfo_access(si->dst_reg, si->src_reg, insn);
f64c4ace
VF
9578 *insn++ = BPF_LDX_MEM(BPF_DW,
9579 si->dst_reg, si->dst_reg,
9580 bpf_target_off(struct skb_shared_info,
9581 hwtstamps, 8,
9582 target_size));
9583 break;
9bac3d6d
AS
9584 }
9585
9586 return insn - insn_buf;
89aa0758
AS
9587}
9588
c64b7983
JS
9589u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
9590 const struct bpf_insn *si,
9591 struct bpf_insn *insn_buf,
9592 struct bpf_prog *prog, u32 *target_size)
61023658
DA
9593{
9594 struct bpf_insn *insn = insn_buf;
aac3fc32 9595 int off;
61023658 9596
6b8cc1d1 9597 switch (si->off) {
61023658 9598 case offsetof(struct bpf_sock, bound_dev_if):
c593642c 9599 BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
61023658
DA
9600
9601 if (type == BPF_WRITE)
6b8cc1d1 9602 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
61023658
DA
9603 offsetof(struct sock, sk_bound_dev_if));
9604 else
6b8cc1d1 9605 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
61023658
DA
9606 offsetof(struct sock, sk_bound_dev_if));
9607 break;
aa4c1037 9608
482dca93 9609 case offsetof(struct bpf_sock, mark):
c593642c 9610 BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
482dca93
DA
9611
9612 if (type == BPF_WRITE)
9613 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
9614 offsetof(struct sock, sk_mark));
9615 else
9616 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9617 offsetof(struct sock, sk_mark));
9618 break;
9619
9620 case offsetof(struct bpf_sock, priority):
c593642c 9621 BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
482dca93
DA
9622
9623 if (type == BPF_WRITE)
9624 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
9625 offsetof(struct sock, sk_priority));
9626 else
9627 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9628 offsetof(struct sock, sk_priority));
9629 break;
9630
aa4c1037 9631 case offsetof(struct bpf_sock, family):
aa65d696
MKL
9632 *insn++ = BPF_LDX_MEM(
9633 BPF_FIELD_SIZEOF(struct sock_common, skc_family),
9634 si->dst_reg, si->src_reg,
9635 bpf_target_off(struct sock_common,
9636 skc_family,
c593642c 9637 sizeof_field(struct sock_common,
aa65d696
MKL
9638 skc_family),
9639 target_size));
aa4c1037
DA
9640 break;
9641
9642 case offsetof(struct bpf_sock, type):
bf976514
MM
9643 *insn++ = BPF_LDX_MEM(
9644 BPF_FIELD_SIZEOF(struct sock, sk_type),
9645 si->dst_reg, si->src_reg,
9646 bpf_target_off(struct sock, sk_type,
9647 sizeof_field(struct sock, sk_type),
9648 target_size));
aa4c1037
DA
9649 break;
9650
9651 case offsetof(struct bpf_sock, protocol):
bf976514
MM
9652 *insn++ = BPF_LDX_MEM(
9653 BPF_FIELD_SIZEOF(struct sock, sk_protocol),
9654 si->dst_reg, si->src_reg,
9655 bpf_target_off(struct sock, sk_protocol,
9656 sizeof_field(struct sock, sk_protocol),
9657 target_size));
aa4c1037 9658 break;
aac3fc32
AI
9659
9660 case offsetof(struct bpf_sock, src_ip4):
9661 *insn++ = BPF_LDX_MEM(
9662 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9663 bpf_target_off(struct sock_common, skc_rcv_saddr,
c593642c 9664 sizeof_field(struct sock_common,
aac3fc32
AI
9665 skc_rcv_saddr),
9666 target_size));
9667 break;
9668
aa65d696
MKL
9669 case offsetof(struct bpf_sock, dst_ip4):
9670 *insn++ = BPF_LDX_MEM(
9671 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9672 bpf_target_off(struct sock_common, skc_daddr,
c593642c 9673 sizeof_field(struct sock_common,
aa65d696
MKL
9674 skc_daddr),
9675 target_size));
9676 break;
9677
aac3fc32
AI
9678 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
9679#if IS_ENABLED(CONFIG_IPV6)
9680 off = si->off;
9681 off -= offsetof(struct bpf_sock, src_ip6[0]);
9682 *insn++ = BPF_LDX_MEM(
9683 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9684 bpf_target_off(
9685 struct sock_common,
9686 skc_v6_rcv_saddr.s6_addr32[0],
c593642c 9687 sizeof_field(struct sock_common,
aac3fc32
AI
9688 skc_v6_rcv_saddr.s6_addr32[0]),
9689 target_size) + off);
9690#else
9691 (void)off;
9692 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9693#endif
9694 break;
9695
aa65d696
MKL
9696 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
9697#if IS_ENABLED(CONFIG_IPV6)
9698 off = si->off;
9699 off -= offsetof(struct bpf_sock, dst_ip6[0]);
9700 *insn++ = BPF_LDX_MEM(
9701 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
9702 bpf_target_off(struct sock_common,
9703 skc_v6_daddr.s6_addr32[0],
c593642c 9704 sizeof_field(struct sock_common,
aa65d696
MKL
9705 skc_v6_daddr.s6_addr32[0]),
9706 target_size) + off);
9707#else
9708 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9709 *target_size = 4;
9710#endif
9711 break;
9712
aac3fc32
AI
9713 case offsetof(struct bpf_sock, src_port):
9714 *insn++ = BPF_LDX_MEM(
9715 BPF_FIELD_SIZEOF(struct sock_common, skc_num),
9716 si->dst_reg, si->src_reg,
9717 bpf_target_off(struct sock_common, skc_num,
c593642c 9718 sizeof_field(struct sock_common,
aac3fc32
AI
9719 skc_num),
9720 target_size));
9721 break;
aa65d696
MKL
9722
9723 case offsetof(struct bpf_sock, dst_port):
9724 *insn++ = BPF_LDX_MEM(
9725 BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
9726 si->dst_reg, si->src_reg,
9727 bpf_target_off(struct sock_common, skc_dport,
c593642c 9728 sizeof_field(struct sock_common,
aa65d696
MKL
9729 skc_dport),
9730 target_size));
9731 break;
9732
9733 case offsetof(struct bpf_sock, state):
9734 *insn++ = BPF_LDX_MEM(
9735 BPF_FIELD_SIZEOF(struct sock_common, skc_state),
9736 si->dst_reg, si->src_reg,
9737 bpf_target_off(struct sock_common, skc_state,
c593642c 9738 sizeof_field(struct sock_common,
aa65d696
MKL
9739 skc_state),
9740 target_size));
9741 break;
c3c16f2e 9742 case offsetof(struct bpf_sock, rx_queue_mapping):
4e1beecc 9743#ifdef CONFIG_SOCK_RX_QUEUE_MAPPING
c3c16f2e
AN
9744 *insn++ = BPF_LDX_MEM(
9745 BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
9746 si->dst_reg, si->src_reg,
9747 bpf_target_off(struct sock, sk_rx_queue_mapping,
9748 sizeof_field(struct sock,
9749 sk_rx_queue_mapping),
9750 target_size));
9751 *insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
9752 1);
9753 *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
9754#else
9755 *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
9756 *target_size = 2;
9757#endif
9758 break;
61023658
DA
9759 }
9760
9761 return insn - insn_buf;
9762}
9763
6b8cc1d1
DB
9764static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
9765 const struct bpf_insn *si,
374fb54e 9766 struct bpf_insn *insn_buf,
f96da094 9767 struct bpf_prog *prog, u32 *target_size)
374fb54e
DB
9768{
9769 struct bpf_insn *insn = insn_buf;
9770
6b8cc1d1 9771 switch (si->off) {
374fb54e 9772 case offsetof(struct __sk_buff, ifindex):
374fb54e 9773 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
6b8cc1d1 9774 si->dst_reg, si->src_reg,
374fb54e 9775 offsetof(struct sk_buff, dev));
6b8cc1d1 9776 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
f96da094
DB
9777 bpf_target_off(struct net_device, ifindex, 4,
9778 target_size));
374fb54e
DB
9779 break;
9780 default:
f96da094
DB
9781 return bpf_convert_ctx_access(type, si, insn_buf, prog,
9782 target_size);
374fb54e
DB
9783 }
9784
9785 return insn - insn_buf;
9786}
9787
6b8cc1d1
DB
9788static u32 xdp_convert_ctx_access(enum bpf_access_type type,
9789 const struct bpf_insn *si,
6a773a15 9790 struct bpf_insn *insn_buf,
f96da094 9791 struct bpf_prog *prog, u32 *target_size)
6a773a15
BB
9792{
9793 struct bpf_insn *insn = insn_buf;
9794
6b8cc1d1 9795 switch (si->off) {
6a773a15 9796 case offsetof(struct xdp_md, data):
f035a515 9797 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
6b8cc1d1 9798 si->dst_reg, si->src_reg,
6a773a15
BB
9799 offsetof(struct xdp_buff, data));
9800 break;
de8f3a83
DB
9801 case offsetof(struct xdp_md, data_meta):
9802 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
9803 si->dst_reg, si->src_reg,
9804 offsetof(struct xdp_buff, data_meta));
9805 break;
6a773a15 9806 case offsetof(struct xdp_md, data_end):
f035a515 9807 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
6b8cc1d1 9808 si->dst_reg, si->src_reg,
6a773a15
BB
9809 offsetof(struct xdp_buff, data_end));
9810 break;
02dd3291
JDB
9811 case offsetof(struct xdp_md, ingress_ifindex):
9812 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
9813 si->dst_reg, si->src_reg,
9814 offsetof(struct xdp_buff, rxq));
9815 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
9816 si->dst_reg, si->dst_reg,
9817 offsetof(struct xdp_rxq_info, dev));
9818 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
daaf24c6 9819 offsetof(struct net_device, ifindex));
02dd3291
JDB
9820 break;
9821 case offsetof(struct xdp_md, rx_queue_index):
9822 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
9823 si->dst_reg, si->src_reg,
9824 offsetof(struct xdp_buff, rxq));
9825 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
daaf24c6
JDB
9826 offsetof(struct xdp_rxq_info,
9827 queue_index));
02dd3291 9828 break;
64b59025
DA
9829 case offsetof(struct xdp_md, egress_ifindex):
9830 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
9831 si->dst_reg, si->src_reg,
9832 offsetof(struct xdp_buff, txq));
9833 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
9834 si->dst_reg, si->dst_reg,
9835 offsetof(struct xdp_txq_info, dev));
9836 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9837 offsetof(struct net_device, ifindex));
9838 break;
6a773a15
BB
9839 }
9840
9841 return insn - insn_buf;
9842}
9843
4fbac77d
AI
9844/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
9845 * context Structure, F is Field in context structure that contains a pointer
9846 * to Nested Structure of type NS that has the field NF.
9847 *
9848 * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
9849 * sure that SIZE is not greater than actual size of S.F.NF.
9850 *
9851 * If offset OFF is provided, the load happens from that offset relative to
9852 * offset of NF.
9853 */
9854#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
9855 do { \
9856 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
9857 si->src_reg, offsetof(S, F)); \
9858 *insn++ = BPF_LDX_MEM( \
9859 SIZE, si->dst_reg, si->dst_reg, \
c593642c 9860 bpf_target_off(NS, NF, sizeof_field(NS, NF), \
4fbac77d
AI
9861 target_size) \
9862 + OFF); \
9863 } while (0)
9864
9865#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
9866 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
9867 BPF_FIELD_SIZEOF(NS, NF), 0)
9868
9869/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
9870 * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
9871 *
4fbac77d
AI
9872 * In addition it uses Temporary Field TF (member of struct S) as the 3rd
9873 * "register" since two registers available in convert_ctx_access are not
9874 * enough: we can't override neither SRC, since it contains value to store, nor
9875 * DST since it contains pointer to context that may be used by later
9876 * instructions. But we need a temporary place to save pointer to nested
9877 * structure whose field we want to store to.
9878 */
600c70ba 9879#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
4fbac77d
AI
9880 do { \
9881 int tmp_reg = BPF_REG_9; \
9882 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
9883 --tmp_reg; \
9884 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
9885 --tmp_reg; \
9886 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
9887 offsetof(S, TF)); \
9888 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
9889 si->dst_reg, offsetof(S, F)); \
600c70ba 9890 *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
c593642c 9891 bpf_target_off(NS, NF, sizeof_field(NS, NF), \
4fbac77d
AI
9892 target_size) \
9893 + OFF); \
9894 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
9895 offsetof(S, TF)); \
9896 } while (0)
9897
9898#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
9899 TF) \
9900 do { \
9901 if (type == BPF_WRITE) { \
600c70ba
SF
9902 SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
9903 OFF, TF); \
4fbac77d
AI
9904 } else { \
9905 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
9906 S, NS, F, NF, SIZE, OFF); \
9907 } \
9908 } while (0)
9909
9910#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
9911 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
9912 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
9913
9914static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
9915 const struct bpf_insn *si,
9916 struct bpf_insn *insn_buf,
9917 struct bpf_prog *prog, u32 *target_size)
9918{
7aebfa1b 9919 int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
4fbac77d 9920 struct bpf_insn *insn = insn_buf;
4fbac77d
AI
9921
9922 switch (si->off) {
9923 case offsetof(struct bpf_sock_addr, user_family):
9924 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9925 struct sockaddr, uaddr, sa_family);
9926 break;
9927
9928 case offsetof(struct bpf_sock_addr, user_ip4):
9929 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9930 struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
9931 sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
9932 break;
9933
9934 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
9935 off = si->off;
9936 off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
9937 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9938 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
9939 sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
9940 tmp_reg);
9941 break;
9942
9943 case offsetof(struct bpf_sock_addr, user_port):
9944 /* To get port we need to know sa_family first and then treat
9945 * sockaddr as either sockaddr_in or sockaddr_in6.
9946 * Though we can simplify since port field has same offset and
9947 * size in both structures.
9948 * Here we check this invariant and use just one of the
9949 * structures if it's true.
9950 */
9951 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
9952 offsetof(struct sockaddr_in6, sin6_port));
c593642c
PB
9953 BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
9954 sizeof_field(struct sockaddr_in6, sin6_port));
7aebfa1b
AI
9955 /* Account for sin6_port being smaller than user_port. */
9956 port_size = min(port_size, BPF_LDST_BYTES(si));
9957 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9958 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
9959 sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg);
4fbac77d
AI
9960 break;
9961
9962 case offsetof(struct bpf_sock_addr, family):
9963 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9964 struct sock, sk, sk_family);
9965 break;
9966
9967 case offsetof(struct bpf_sock_addr, type):
bf976514
MM
9968 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9969 struct sock, sk, sk_type);
4fbac77d
AI
9970 break;
9971
9972 case offsetof(struct bpf_sock_addr, protocol):
bf976514
MM
9973 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
9974 struct sock, sk, sk_protocol);
4fbac77d 9975 break;
1cedee13
AI
9976
9977 case offsetof(struct bpf_sock_addr, msg_src_ip4):
9978 /* Treat t_ctx as struct in_addr for msg_src_ip4. */
9979 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9980 struct bpf_sock_addr_kern, struct in_addr, t_ctx,
9981 s_addr, BPF_SIZE(si->code), 0, tmp_reg);
9982 break;
9983
9984 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
9985 msg_src_ip6[3]):
9986 off = si->off;
9987 off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
9988 /* Treat t_ctx as struct in6_addr for msg_src_ip6. */
9989 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
9990 struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
9991 s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
9992 break;
fb85c4a7
SF
9993 case offsetof(struct bpf_sock_addr, sk):
9994 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk),
9995 si->dst_reg, si->src_reg,
9996 offsetof(struct bpf_sock_addr_kern, sk));
9997 break;
4fbac77d
AI
9998 }
9999
10000 return insn - insn_buf;
10001}
10002
40304b2a
LB
10003static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
10004 const struct bpf_insn *si,
10005 struct bpf_insn *insn_buf,
f96da094
DB
10006 struct bpf_prog *prog,
10007 u32 *target_size)
40304b2a
LB
10008{
10009 struct bpf_insn *insn = insn_buf;
10010 int off;
10011
9b1f3d6e
MKL
10012/* Helper macro for adding read access to tcp_sock or sock fields. */
10013#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
10014 do { \
fd09af01 10015 int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2; \
c593642c
PB
10016 BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
10017 sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
fd09af01
JF
10018 if (si->dst_reg == reg || si->src_reg == reg) \
10019 reg--; \
10020 if (si->dst_reg == reg || si->src_reg == reg) \
10021 reg--; \
10022 if (si->dst_reg == si->src_reg) { \
10023 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
10024 offsetof(struct bpf_sock_ops_kern, \
10025 temp)); \
10026 fullsock_reg = reg; \
10027 jmp += 2; \
10028 } \
9b1f3d6e
MKL
10029 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10030 struct bpf_sock_ops_kern, \
10031 is_fullsock), \
fd09af01 10032 fullsock_reg, si->src_reg, \
9b1f3d6e
MKL
10033 offsetof(struct bpf_sock_ops_kern, \
10034 is_fullsock)); \
fd09af01
JF
10035 *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
10036 if (si->dst_reg == si->src_reg) \
10037 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10038 offsetof(struct bpf_sock_ops_kern, \
10039 temp)); \
9b1f3d6e
MKL
10040 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10041 struct bpf_sock_ops_kern, sk),\
10042 si->dst_reg, si->src_reg, \
10043 offsetof(struct bpf_sock_ops_kern, sk));\
10044 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
10045 OBJ_FIELD), \
10046 si->dst_reg, si->dst_reg, \
10047 offsetof(OBJ, OBJ_FIELD)); \
fd09af01
JF
10048 if (si->dst_reg == si->src_reg) { \
10049 *insn++ = BPF_JMP_A(1); \
10050 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10051 offsetof(struct bpf_sock_ops_kern, \
10052 temp)); \
10053 } \
9b1f3d6e
MKL
10054 } while (0)
10055
84f44df6
JF
10056#define SOCK_OPS_GET_SK() \
10057 do { \
10058 int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1; \
10059 if (si->dst_reg == reg || si->src_reg == reg) \
10060 reg--; \
10061 if (si->dst_reg == reg || si->src_reg == reg) \
10062 reg--; \
10063 if (si->dst_reg == si->src_reg) { \
10064 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
10065 offsetof(struct bpf_sock_ops_kern, \
10066 temp)); \
10067 fullsock_reg = reg; \
10068 jmp += 2; \
10069 } \
10070 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10071 struct bpf_sock_ops_kern, \
10072 is_fullsock), \
10073 fullsock_reg, si->src_reg, \
10074 offsetof(struct bpf_sock_ops_kern, \
10075 is_fullsock)); \
10076 *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
10077 if (si->dst_reg == si->src_reg) \
10078 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10079 offsetof(struct bpf_sock_ops_kern, \
10080 temp)); \
10081 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10082 struct bpf_sock_ops_kern, sk),\
10083 si->dst_reg, si->src_reg, \
10084 offsetof(struct bpf_sock_ops_kern, sk));\
10085 if (si->dst_reg == si->src_reg) { \
10086 *insn++ = BPF_JMP_A(1); \
10087 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
10088 offsetof(struct bpf_sock_ops_kern, \
10089 temp)); \
10090 } \
10091 } while (0)
10092
9b1f3d6e
MKL
10093#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
10094 SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
10095
10096/* Helper macro for adding write access to tcp_sock or sock fields.
10097 * The macro is called with two registers, dst_reg which contains a pointer
10098 * to ctx (context) and src_reg which contains the value that should be
10099 * stored. However, we need an additional register since we cannot overwrite
10100 * dst_reg because it may be used later in the program.
10101 * Instead we "borrow" one of the other register. We first save its value
10102 * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
10103 * it at the end of the macro.
10104 */
10105#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
10106 do { \
10107 int reg = BPF_REG_9; \
c593642c
PB
10108 BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
10109 sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
9b1f3d6e
MKL
10110 if (si->dst_reg == reg || si->src_reg == reg) \
10111 reg--; \
10112 if (si->dst_reg == reg || si->src_reg == reg) \
10113 reg--; \
10114 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
10115 offsetof(struct bpf_sock_ops_kern, \
10116 temp)); \
10117 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10118 struct bpf_sock_ops_kern, \
10119 is_fullsock), \
10120 reg, si->dst_reg, \
10121 offsetof(struct bpf_sock_ops_kern, \
10122 is_fullsock)); \
10123 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
10124 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
10125 struct bpf_sock_ops_kern, sk),\
10126 reg, si->dst_reg, \
10127 offsetof(struct bpf_sock_ops_kern, sk));\
10128 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
10129 reg, si->src_reg, \
10130 offsetof(OBJ, OBJ_FIELD)); \
10131 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
10132 offsetof(struct bpf_sock_ops_kern, \
10133 temp)); \
10134 } while (0)
10135
10136#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
10137 do { \
10138 if (TYPE == BPF_WRITE) \
10139 SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
10140 else \
10141 SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
10142 } while (0)
10143
9b1f3d6e
MKL
10144 if (insn > insn_buf)
10145 return insn - insn_buf;
10146
40304b2a 10147 switch (si->off) {
c9985d09
MKL
10148 case offsetof(struct bpf_sock_ops, op):
10149 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
10150 op),
10151 si->dst_reg, si->src_reg,
10152 offsetof(struct bpf_sock_ops_kern, op));
10153 break;
10154
10155 case offsetof(struct bpf_sock_ops, replylong[0]) ...
40304b2a 10156 offsetof(struct bpf_sock_ops, replylong[3]):
c593642c
PB
10157 BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
10158 sizeof_field(struct bpf_sock_ops_kern, reply));
10159 BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
10160 sizeof_field(struct bpf_sock_ops_kern, replylong));
40304b2a 10161 off = si->off;
c9985d09
MKL
10162 off -= offsetof(struct bpf_sock_ops, replylong[0]);
10163 off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
40304b2a
LB
10164 if (type == BPF_WRITE)
10165 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
10166 off);
10167 else
10168 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
10169 off);
10170 break;
10171
10172 case offsetof(struct bpf_sock_ops, family):
c593642c 10173 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
40304b2a
LB
10174
10175 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10176 struct bpf_sock_ops_kern, sk),
10177 si->dst_reg, si->src_reg,
10178 offsetof(struct bpf_sock_ops_kern, sk));
10179 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10180 offsetof(struct sock_common, skc_family));
10181 break;
10182
10183 case offsetof(struct bpf_sock_ops, remote_ip4):
c593642c 10184 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
40304b2a
LB
10185
10186 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10187 struct bpf_sock_ops_kern, sk),
10188 si->dst_reg, si->src_reg,
10189 offsetof(struct bpf_sock_ops_kern, sk));
10190 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10191 offsetof(struct sock_common, skc_daddr));
10192 break;
10193
10194 case offsetof(struct bpf_sock_ops, local_ip4):
c593642c 10195 BUILD_BUG_ON(sizeof_field(struct sock_common,
303def35 10196 skc_rcv_saddr) != 4);
40304b2a
LB
10197
10198 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10199 struct bpf_sock_ops_kern, sk),
10200 si->dst_reg, si->src_reg,
10201 offsetof(struct bpf_sock_ops_kern, sk));
10202 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10203 offsetof(struct sock_common,
10204 skc_rcv_saddr));
10205 break;
10206
10207 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
10208 offsetof(struct bpf_sock_ops, remote_ip6[3]):
10209#if IS_ENABLED(CONFIG_IPV6)
c593642c 10210 BUILD_BUG_ON(sizeof_field(struct sock_common,
40304b2a
LB
10211 skc_v6_daddr.s6_addr32[0]) != 4);
10212
10213 off = si->off;
10214 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
10215 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10216 struct bpf_sock_ops_kern, sk),
10217 si->dst_reg, si->src_reg,
10218 offsetof(struct bpf_sock_ops_kern, sk));
10219 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10220 offsetof(struct sock_common,
10221 skc_v6_daddr.s6_addr32[0]) +
10222 off);
10223#else
10224 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
10225#endif
10226 break;
10227
10228 case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
10229 offsetof(struct bpf_sock_ops, local_ip6[3]):
10230#if IS_ENABLED(CONFIG_IPV6)
c593642c 10231 BUILD_BUG_ON(sizeof_field(struct sock_common,
40304b2a
LB
10232 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
10233
10234 off = si->off;
10235 off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
10236 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10237 struct bpf_sock_ops_kern, sk),
10238 si->dst_reg, si->src_reg,
10239 offsetof(struct bpf_sock_ops_kern, sk));
10240 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10241 offsetof(struct sock_common,
10242 skc_v6_rcv_saddr.s6_addr32[0]) +
10243 off);
10244#else
10245 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
10246#endif
10247 break;
10248
10249 case offsetof(struct bpf_sock_ops, remote_port):
c593642c 10250 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
40304b2a
LB
10251
10252 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10253 struct bpf_sock_ops_kern, sk),
10254 si->dst_reg, si->src_reg,
10255 offsetof(struct bpf_sock_ops_kern, sk));
10256 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10257 offsetof(struct sock_common, skc_dport));
10258#ifndef __BIG_ENDIAN_BITFIELD
10259 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
10260#endif
10261 break;
10262
10263 case offsetof(struct bpf_sock_ops, local_port):
c593642c 10264 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
40304b2a
LB
10265
10266 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10267 struct bpf_sock_ops_kern, sk),
10268 si->dst_reg, si->src_reg,
10269 offsetof(struct bpf_sock_ops_kern, sk));
10270 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10271 offsetof(struct sock_common, skc_num));
10272 break;
f19397a5
LB
10273
10274 case offsetof(struct bpf_sock_ops, is_fullsock):
10275 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10276 struct bpf_sock_ops_kern,
10277 is_fullsock),
10278 si->dst_reg, si->src_reg,
10279 offsetof(struct bpf_sock_ops_kern,
10280 is_fullsock));
10281 break;
10282
44f0e430 10283 case offsetof(struct bpf_sock_ops, state):
c593642c 10284 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
44f0e430
LB
10285
10286 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10287 struct bpf_sock_ops_kern, sk),
10288 si->dst_reg, si->src_reg,
10289 offsetof(struct bpf_sock_ops_kern, sk));
10290 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
10291 offsetof(struct sock_common, skc_state));
10292 break;
10293
10294 case offsetof(struct bpf_sock_ops, rtt_min):
c593642c 10295 BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
44f0e430
LB
10296 sizeof(struct minmax));
10297 BUILD_BUG_ON(sizeof(struct minmax) <
10298 sizeof(struct minmax_sample));
10299
10300 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
10301 struct bpf_sock_ops_kern, sk),
10302 si->dst_reg, si->src_reg,
10303 offsetof(struct bpf_sock_ops_kern, sk));
10304 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10305 offsetof(struct tcp_sock, rtt_min) +
c593642c 10306 sizeof_field(struct minmax_sample, t));
44f0e430
LB
10307 break;
10308
b13d8807
LB
10309 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
10310 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
10311 struct tcp_sock);
10312 break;
44f0e430 10313
44f0e430 10314 case offsetof(struct bpf_sock_ops, sk_txhash):
6f9bd3d7
LB
10315 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
10316 struct sock, type);
44f0e430 10317 break;
2377b81d
SF
10318 case offsetof(struct bpf_sock_ops, snd_cwnd):
10319 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
10320 break;
10321 case offsetof(struct bpf_sock_ops, srtt_us):
10322 SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
10323 break;
10324 case offsetof(struct bpf_sock_ops, snd_ssthresh):
10325 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
10326 break;
10327 case offsetof(struct bpf_sock_ops, rcv_nxt):
10328 SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
10329 break;
10330 case offsetof(struct bpf_sock_ops, snd_nxt):
10331 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
10332 break;
10333 case offsetof(struct bpf_sock_ops, snd_una):
10334 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
10335 break;
10336 case offsetof(struct bpf_sock_ops, mss_cache):
10337 SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
10338 break;
10339 case offsetof(struct bpf_sock_ops, ecn_flags):
10340 SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
10341 break;
10342 case offsetof(struct bpf_sock_ops, rate_delivered):
10343 SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
10344 break;
10345 case offsetof(struct bpf_sock_ops, rate_interval_us):
10346 SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
10347 break;
10348 case offsetof(struct bpf_sock_ops, packets_out):
10349 SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
10350 break;
10351 case offsetof(struct bpf_sock_ops, retrans_out):
10352 SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
10353 break;
10354 case offsetof(struct bpf_sock_ops, total_retrans):
10355 SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
10356 break;
10357 case offsetof(struct bpf_sock_ops, segs_in):
10358 SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
10359 break;
10360 case offsetof(struct bpf_sock_ops, data_segs_in):
10361 SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
10362 break;
10363 case offsetof(struct bpf_sock_ops, segs_out):
10364 SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
10365 break;
10366 case offsetof(struct bpf_sock_ops, data_segs_out):
10367 SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
10368 break;
10369 case offsetof(struct bpf_sock_ops, lost_out):
10370 SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
10371 break;
10372 case offsetof(struct bpf_sock_ops, sacked_out):
10373 SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
10374 break;
10375 case offsetof(struct bpf_sock_ops, bytes_received):
10376 SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
10377 break;
10378 case offsetof(struct bpf_sock_ops, bytes_acked):
10379 SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
10380 break;
1314ef56 10381 case offsetof(struct bpf_sock_ops, sk):
84f44df6 10382 SOCK_OPS_GET_SK();
1314ef56 10383 break;
0813a841
MKL
10384 case offsetof(struct bpf_sock_ops, skb_data_end):
10385 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
10386 skb_data_end),
10387 si->dst_reg, si->src_reg,
10388 offsetof(struct bpf_sock_ops_kern,
10389 skb_data_end));
10390 break;
10391 case offsetof(struct bpf_sock_ops, skb_data):
10392 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
10393 skb),
10394 si->dst_reg, si->src_reg,
10395 offsetof(struct bpf_sock_ops_kern,
10396 skb));
10397 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
10398 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
10399 si->dst_reg, si->dst_reg,
10400 offsetof(struct sk_buff, data));
10401 break;
10402 case offsetof(struct bpf_sock_ops, skb_len):
10403 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
10404 skb),
10405 si->dst_reg, si->src_reg,
10406 offsetof(struct bpf_sock_ops_kern,
10407 skb));
10408 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
10409 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
10410 si->dst_reg, si->dst_reg,
10411 offsetof(struct sk_buff, len));
10412 break;
10413 case offsetof(struct bpf_sock_ops, skb_tcp_flags):
10414 off = offsetof(struct sk_buff, cb);
10415 off += offsetof(struct tcp_skb_cb, tcp_flags);
10416 *target_size = sizeof_field(struct tcp_skb_cb, tcp_flags);
10417 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
10418 skb),
10419 si->dst_reg, si->src_reg,
10420 offsetof(struct bpf_sock_ops_kern,
10421 skb));
10422 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
10423 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb,
10424 tcp_flags),
10425 si->dst_reg, si->dst_reg, off);
10426 break;
9bb05349
MKL
10427 case offsetof(struct bpf_sock_ops, skb_hwtstamp): {
10428 struct bpf_insn *jmp_on_null_skb;
10429
10430 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
10431 skb),
10432 si->dst_reg, si->src_reg,
10433 offsetof(struct bpf_sock_ops_kern,
10434 skb));
10435 /* Reserve one insn to test skb == NULL */
10436 jmp_on_null_skb = insn++;
10437 insn = bpf_convert_shinfo_access(si->dst_reg, si->dst_reg, insn);
10438 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
10439 bpf_target_off(struct skb_shared_info,
10440 hwtstamps, 8,
10441 target_size));
10442 *jmp_on_null_skb = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0,
10443 insn - jmp_on_null_skb - 1);
10444 break;
10445 }
40304b2a
LB
10446 }
10447 return insn - insn_buf;
10448}
10449
16137b09
CW
10450/* data_end = skb->data + skb_headlen() */
10451static struct bpf_insn *bpf_convert_data_end_access(const struct bpf_insn *si,
10452 struct bpf_insn *insn)
10453{
b2c46181
JM
10454 int reg;
10455 int temp_reg_off = offsetof(struct sk_buff, cb) +
10456 offsetof(struct sk_skb_cb, temp_reg);
10457
10458 if (si->src_reg == si->dst_reg) {
10459 /* We need an extra register, choose and save a register. */
10460 reg = BPF_REG_9;
10461 if (si->src_reg == reg || si->dst_reg == reg)
10462 reg--;
10463 if (si->src_reg == reg || si->dst_reg == reg)
10464 reg--;
10465 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, temp_reg_off);
10466 } else {
10467 reg = si->dst_reg;
10468 }
10469
10470 /* reg = skb->data */
16137b09 10471 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
b2c46181 10472 reg, si->src_reg,
16137b09
CW
10473 offsetof(struct sk_buff, data));
10474 /* AX = skb->len */
10475 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
10476 BPF_REG_AX, si->src_reg,
10477 offsetof(struct sk_buff, len));
b2c46181
JM
10478 /* reg = skb->data + skb->len */
10479 *insn++ = BPF_ALU64_REG(BPF_ADD, reg, BPF_REG_AX);
16137b09
CW
10480 /* AX = skb->data_len */
10481 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data_len),
10482 BPF_REG_AX, si->src_reg,
10483 offsetof(struct sk_buff, data_len));
b2c46181
JM
10484
10485 /* reg = skb->data + skb->len - skb->data_len */
10486 *insn++ = BPF_ALU64_REG(BPF_SUB, reg, BPF_REG_AX);
10487
10488 if (si->src_reg == si->dst_reg) {
10489 /* Restore the saved register */
10490 *insn++ = BPF_MOV64_REG(BPF_REG_AX, si->src_reg);
10491 *insn++ = BPF_MOV64_REG(si->dst_reg, reg);
10492 *insn++ = BPF_LDX_MEM(BPF_DW, reg, BPF_REG_AX, temp_reg_off);
10493 }
16137b09
CW
10494
10495 return insn;
10496}
10497
8108a775
JF
10498static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
10499 const struct bpf_insn *si,
10500 struct bpf_insn *insn_buf,
10501 struct bpf_prog *prog, u32 *target_size)
10502{
10503 struct bpf_insn *insn = insn_buf;
e0dc3b93 10504 int off;
8108a775
JF
10505
10506 switch (si->off) {
10507 case offsetof(struct __sk_buff, data_end):
16137b09 10508 insn = bpf_convert_data_end_access(si, insn);
8108a775 10509 break;
e0dc3b93
JF
10510 case offsetof(struct __sk_buff, cb[0]) ...
10511 offsetofend(struct __sk_buff, cb[4]) - 1:
10512 BUILD_BUG_ON(sizeof_field(struct sk_skb_cb, data) < 20);
10513 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
10514 offsetof(struct sk_skb_cb, data)) %
10515 sizeof(__u64));
10516
10517 prog->cb_access = 1;
10518 off = si->off;
10519 off -= offsetof(struct __sk_buff, cb[0]);
10520 off += offsetof(struct sk_buff, cb);
10521 off += offsetof(struct sk_skb_cb, data);
10522 if (type == BPF_WRITE)
10523 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
10524 si->src_reg, off);
10525 else
10526 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
10527 si->src_reg, off);
10528 break;
10529
10530
8108a775
JF
10531 default:
10532 return bpf_convert_ctx_access(type, si, insn_buf, prog,
10533 target_size);
10534 }
10535
10536 return insn - insn_buf;
10537}
10538
4f738adb
JF
10539static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
10540 const struct bpf_insn *si,
10541 struct bpf_insn *insn_buf,
10542 struct bpf_prog *prog, u32 *target_size)
10543{
10544 struct bpf_insn *insn = insn_buf;
720e7f38 10545#if IS_ENABLED(CONFIG_IPV6)
303def35 10546 int off;
720e7f38 10547#endif
4f738adb 10548
7a69c0f2
JF
10549 /* convert ctx uses the fact sg element is first in struct */
10550 BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
10551
4f738adb
JF
10552 switch (si->off) {
10553 case offsetof(struct sk_msg_md, data):
604326b4 10554 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
4f738adb 10555 si->dst_reg, si->src_reg,
604326b4 10556 offsetof(struct sk_msg, data));
4f738adb
JF
10557 break;
10558 case offsetof(struct sk_msg_md, data_end):
604326b4 10559 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
4f738adb 10560 si->dst_reg, si->src_reg,
604326b4 10561 offsetof(struct sk_msg, data_end));
4f738adb 10562 break;
303def35 10563 case offsetof(struct sk_msg_md, family):
c593642c 10564 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
303def35
JF
10565
10566 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 10567 struct sk_msg, sk),
303def35 10568 si->dst_reg, si->src_reg,
604326b4 10569 offsetof(struct sk_msg, sk));
303def35
JF
10570 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10571 offsetof(struct sock_common, skc_family));
10572 break;
10573
10574 case offsetof(struct sk_msg_md, remote_ip4):
c593642c 10575 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
303def35
JF
10576
10577 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 10578 struct sk_msg, sk),
303def35 10579 si->dst_reg, si->src_reg,
604326b4 10580 offsetof(struct sk_msg, sk));
303def35
JF
10581 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10582 offsetof(struct sock_common, skc_daddr));
10583 break;
10584
10585 case offsetof(struct sk_msg_md, local_ip4):
c593642c 10586 BUILD_BUG_ON(sizeof_field(struct sock_common,
303def35
JF
10587 skc_rcv_saddr) != 4);
10588
10589 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 10590 struct sk_msg, sk),
303def35 10591 si->dst_reg, si->src_reg,
604326b4 10592 offsetof(struct sk_msg, sk));
303def35
JF
10593 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10594 offsetof(struct sock_common,
10595 skc_rcv_saddr));
10596 break;
10597
10598 case offsetof(struct sk_msg_md, remote_ip6[0]) ...
10599 offsetof(struct sk_msg_md, remote_ip6[3]):
10600#if IS_ENABLED(CONFIG_IPV6)
c593642c 10601 BUILD_BUG_ON(sizeof_field(struct sock_common,
303def35
JF
10602 skc_v6_daddr.s6_addr32[0]) != 4);
10603
10604 off = si->off;
10605 off -= offsetof(struct sk_msg_md, remote_ip6[0]);
10606 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 10607 struct sk_msg, sk),
303def35 10608 si->dst_reg, si->src_reg,
604326b4 10609 offsetof(struct sk_msg, sk));
303def35
JF
10610 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10611 offsetof(struct sock_common,
10612 skc_v6_daddr.s6_addr32[0]) +
10613 off);
10614#else
10615 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
10616#endif
10617 break;
10618
10619 case offsetof(struct sk_msg_md, local_ip6[0]) ...
10620 offsetof(struct sk_msg_md, local_ip6[3]):
10621#if IS_ENABLED(CONFIG_IPV6)
c593642c 10622 BUILD_BUG_ON(sizeof_field(struct sock_common,
303def35
JF
10623 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
10624
10625 off = si->off;
10626 off -= offsetof(struct sk_msg_md, local_ip6[0]);
10627 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 10628 struct sk_msg, sk),
303def35 10629 si->dst_reg, si->src_reg,
604326b4 10630 offsetof(struct sk_msg, sk));
303def35
JF
10631 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
10632 offsetof(struct sock_common,
10633 skc_v6_rcv_saddr.s6_addr32[0]) +
10634 off);
10635#else
10636 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
10637#endif
10638 break;
10639
10640 case offsetof(struct sk_msg_md, remote_port):
c593642c 10641 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
303def35
JF
10642
10643 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 10644 struct sk_msg, sk),
303def35 10645 si->dst_reg, si->src_reg,
604326b4 10646 offsetof(struct sk_msg, sk));
303def35
JF
10647 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10648 offsetof(struct sock_common, skc_dport));
10649#ifndef __BIG_ENDIAN_BITFIELD
10650 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
10651#endif
10652 break;
10653
10654 case offsetof(struct sk_msg_md, local_port):
c593642c 10655 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
303def35
JF
10656
10657 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 10658 struct sk_msg, sk),
303def35 10659 si->dst_reg, si->src_reg,
604326b4 10660 offsetof(struct sk_msg, sk));
303def35
JF
10661 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
10662 offsetof(struct sock_common, skc_num));
10663 break;
3bdbd022
JF
10664
10665 case offsetof(struct sk_msg_md, size):
10666 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
10667 si->dst_reg, si->src_reg,
10668 offsetof(struct sk_msg_sg, size));
10669 break;
13d70f5a
JF
10670
10671 case offsetof(struct sk_msg_md, sk):
10672 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, sk),
10673 si->dst_reg, si->src_reg,
10674 offsetof(struct sk_msg, sk));
10675 break;
4f738adb
JF
10676 }
10677
10678 return insn - insn_buf;
10679}
10680
7de16e3a 10681const struct bpf_verifier_ops sk_filter_verifier_ops = {
4936e352
DB
10682 .get_func_proto = sk_filter_func_proto,
10683 .is_valid_access = sk_filter_is_valid_access,
2492d3b8 10684 .convert_ctx_access = bpf_convert_ctx_access,
e0cea7ce 10685 .gen_ld_abs = bpf_gen_ld_abs,
89aa0758
AS
10686};
10687
7de16e3a 10688const struct bpf_prog_ops sk_filter_prog_ops = {
61f3c964 10689 .test_run = bpf_prog_test_run_skb,
7de16e3a
JK
10690};
10691
10692const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
4936e352
DB
10693 .get_func_proto = tc_cls_act_func_proto,
10694 .is_valid_access = tc_cls_act_is_valid_access,
374fb54e 10695 .convert_ctx_access = tc_cls_act_convert_ctx_access,
36bbef52 10696 .gen_prologue = tc_cls_act_prologue,
e0cea7ce 10697 .gen_ld_abs = bpf_gen_ld_abs,
864b656f 10698 .btf_struct_access = tc_cls_act_btf_struct_access,
7de16e3a
JK
10699};
10700
10701const struct bpf_prog_ops tc_cls_act_prog_ops = {
1cf1cae9 10702 .test_run = bpf_prog_test_run_skb,
608cd71a
AS
10703};
10704
7de16e3a 10705const struct bpf_verifier_ops xdp_verifier_ops = {
6a773a15
BB
10706 .get_func_proto = xdp_func_proto,
10707 .is_valid_access = xdp_is_valid_access,
10708 .convert_ctx_access = xdp_convert_ctx_access,
b09928b9 10709 .gen_prologue = bpf_noop_prologue,
864b656f 10710 .btf_struct_access = xdp_btf_struct_access,
7de16e3a
JK
10711};
10712
10713const struct bpf_prog_ops xdp_prog_ops = {
1cf1cae9 10714 .test_run = bpf_prog_test_run_xdp,
6a773a15
BB
10715};
10716
7de16e3a 10717const struct bpf_verifier_ops cg_skb_verifier_ops = {
cd339431 10718 .get_func_proto = cg_skb_func_proto,
b39b5f41 10719 .is_valid_access = cg_skb_is_valid_access,
2492d3b8 10720 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
10721};
10722
10723const struct bpf_prog_ops cg_skb_prog_ops = {
1cf1cae9 10724 .test_run = bpf_prog_test_run_skb,
0e33661d
DM
10725};
10726
cd3092c7
MX
10727const struct bpf_verifier_ops lwt_in_verifier_ops = {
10728 .get_func_proto = lwt_in_func_proto,
3a0af8fd 10729 .is_valid_access = lwt_is_valid_access,
2492d3b8 10730 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
10731};
10732
cd3092c7
MX
10733const struct bpf_prog_ops lwt_in_prog_ops = {
10734 .test_run = bpf_prog_test_run_skb,
10735};
10736
10737const struct bpf_verifier_ops lwt_out_verifier_ops = {
10738 .get_func_proto = lwt_out_func_proto,
3a0af8fd 10739 .is_valid_access = lwt_is_valid_access,
2492d3b8 10740 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
10741};
10742
cd3092c7 10743const struct bpf_prog_ops lwt_out_prog_ops = {
1cf1cae9 10744 .test_run = bpf_prog_test_run_skb,
3a0af8fd
TG
10745};
10746
7de16e3a 10747const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
3a0af8fd
TG
10748 .get_func_proto = lwt_xmit_func_proto,
10749 .is_valid_access = lwt_is_valid_access,
2492d3b8 10750 .convert_ctx_access = bpf_convert_ctx_access,
3a0af8fd 10751 .gen_prologue = tc_cls_act_prologue,
7de16e3a
JK
10752};
10753
10754const struct bpf_prog_ops lwt_xmit_prog_ops = {
1cf1cae9 10755 .test_run = bpf_prog_test_run_skb,
3a0af8fd
TG
10756};
10757
004d4b27
MX
10758const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
10759 .get_func_proto = lwt_seg6local_func_proto,
10760 .is_valid_access = lwt_is_valid_access,
10761 .convert_ctx_access = bpf_convert_ctx_access,
10762};
10763
10764const struct bpf_prog_ops lwt_seg6local_prog_ops = {
10765 .test_run = bpf_prog_test_run_skb,
10766};
10767
7de16e3a 10768const struct bpf_verifier_ops cg_sock_verifier_ops = {
ae2cf1c4 10769 .get_func_proto = sock_filter_func_proto,
61023658 10770 .is_valid_access = sock_filter_is_valid_access,
c64b7983 10771 .convert_ctx_access = bpf_sock_convert_ctx_access,
61023658
DA
10772};
10773
7de16e3a
JK
10774const struct bpf_prog_ops cg_sock_prog_ops = {
10775};
10776
4fbac77d
AI
10777const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
10778 .get_func_proto = sock_addr_func_proto,
10779 .is_valid_access = sock_addr_is_valid_access,
10780 .convert_ctx_access = sock_addr_convert_ctx_access,
10781};
10782
10783const struct bpf_prog_ops cg_sock_addr_prog_ops = {
10784};
10785
7de16e3a 10786const struct bpf_verifier_ops sock_ops_verifier_ops = {
8c4b4c7e 10787 .get_func_proto = sock_ops_func_proto,
40304b2a
LB
10788 .is_valid_access = sock_ops_is_valid_access,
10789 .convert_ctx_access = sock_ops_convert_ctx_access,
10790};
10791
7de16e3a
JK
10792const struct bpf_prog_ops sock_ops_prog_ops = {
10793};
10794
10795const struct bpf_verifier_ops sk_skb_verifier_ops = {
b005fd18
JF
10796 .get_func_proto = sk_skb_func_proto,
10797 .is_valid_access = sk_skb_is_valid_access,
8108a775 10798 .convert_ctx_access = sk_skb_convert_ctx_access,
8a31db56 10799 .gen_prologue = sk_skb_prologue,
b005fd18
JF
10800};
10801
7de16e3a
JK
10802const struct bpf_prog_ops sk_skb_prog_ops = {
10803};
10804
4f738adb
JF
10805const struct bpf_verifier_ops sk_msg_verifier_ops = {
10806 .get_func_proto = sk_msg_func_proto,
10807 .is_valid_access = sk_msg_is_valid_access,
10808 .convert_ctx_access = sk_msg_convert_ctx_access,
b09928b9 10809 .gen_prologue = bpf_noop_prologue,
4f738adb
JF
10810};
10811
10812const struct bpf_prog_ops sk_msg_prog_ops = {
10813};
10814
d58e468b
PP
10815const struct bpf_verifier_ops flow_dissector_verifier_ops = {
10816 .get_func_proto = flow_dissector_func_proto,
10817 .is_valid_access = flow_dissector_is_valid_access,
089b19a9 10818 .convert_ctx_access = flow_dissector_convert_ctx_access,
d58e468b
PP
10819};
10820
10821const struct bpf_prog_ops flow_dissector_prog_ops = {
b7a1848e 10822 .test_run = bpf_prog_test_run_flow_dissector,
d58e468b
PP
10823};
10824
8ced425e 10825int sk_detach_filter(struct sock *sk)
55b33325
PE
10826{
10827 int ret = -ENOENT;
10828 struct sk_filter *filter;
10829
d59577b6
VB
10830 if (sock_flag(sk, SOCK_FILTER_LOCKED))
10831 return -EPERM;
10832
8ced425e
HFS
10833 filter = rcu_dereference_protected(sk->sk_filter,
10834 lockdep_sock_is_held(sk));
55b33325 10835 if (filter) {
a9b3cd7f 10836 RCU_INIT_POINTER(sk->sk_filter, NULL);
46bcf14f 10837 sk_filter_uncharge(sk, filter);
55b33325
PE
10838 ret = 0;
10839 }
a3ea269b 10840
55b33325
PE
10841 return ret;
10842}
8ced425e 10843EXPORT_SYMBOL_GPL(sk_detach_filter);
a8fc9277 10844
4ff09db1 10845int sk_get_filter(struct sock *sk, sockptr_t optval, unsigned int len)
a8fc9277 10846{
a3ea269b 10847 struct sock_fprog_kern *fprog;
a8fc9277 10848 struct sk_filter *filter;
a3ea269b 10849 int ret = 0;
a8fc9277 10850
2c5b6bf5 10851 sockopt_lock_sock(sk);
a8fc9277 10852 filter = rcu_dereference_protected(sk->sk_filter,
8ced425e 10853 lockdep_sock_is_held(sk));
a8fc9277
PE
10854 if (!filter)
10855 goto out;
a3ea269b
DB
10856
10857 /* We're copying the filter that has been originally attached,
93d08b69
DB
10858 * so no conversion/decode needed anymore. eBPF programs that
10859 * have no original program cannot be dumped through this.
a3ea269b 10860 */
93d08b69 10861 ret = -EACCES;
7ae457c1 10862 fprog = filter->prog->orig_prog;
93d08b69
DB
10863 if (!fprog)
10864 goto out;
a3ea269b
DB
10865
10866 ret = fprog->len;
a8fc9277 10867 if (!len)
a3ea269b 10868 /* User space only enquires number of filter blocks. */
a8fc9277 10869 goto out;
a3ea269b 10870
a8fc9277 10871 ret = -EINVAL;
a3ea269b 10872 if (len < fprog->len)
a8fc9277
PE
10873 goto out;
10874
10875 ret = -EFAULT;
4ff09db1 10876 if (copy_to_sockptr(optval, fprog->filter, bpf_classic_proglen(fprog)))
a3ea269b 10877 goto out;
a8fc9277 10878
a3ea269b
DB
10879 /* Instead of bytes, the API requests to return the number
10880 * of filter blocks.
10881 */
10882 ret = fprog->len;
a8fc9277 10883out:
2c5b6bf5 10884 sockopt_release_sock(sk);
a8fc9277
PE
10885 return ret;
10886}
2dbb9b9e
MKL
10887
10888#ifdef CONFIG_INET
2dbb9b9e
MKL
10889static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
10890 struct sock_reuseport *reuse,
10891 struct sock *sk, struct sk_buff *skb,
d5e4ddae 10892 struct sock *migrating_sk,
2dbb9b9e
MKL
10893 u32 hash)
10894{
10895 reuse_kern->skb = skb;
10896 reuse_kern->sk = sk;
10897 reuse_kern->selected_sk = NULL;
d5e4ddae 10898 reuse_kern->migrating_sk = migrating_sk;
2dbb9b9e
MKL
10899 reuse_kern->data_end = skb->data + skb_headlen(skb);
10900 reuse_kern->hash = hash;
10901 reuse_kern->reuseport_id = reuse->reuseport_id;
10902 reuse_kern->bind_inany = reuse->bind_inany;
10903}
10904
10905struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
10906 struct bpf_prog *prog, struct sk_buff *skb,
d5e4ddae 10907 struct sock *migrating_sk,
2dbb9b9e
MKL
10908 u32 hash)
10909{
10910 struct sk_reuseport_kern reuse_kern;
10911 enum sk_action action;
10912
d5e4ddae 10913 bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, migrating_sk, hash);
fb7dd8bc 10914 action = bpf_prog_run(prog, &reuse_kern);
2dbb9b9e
MKL
10915
10916 if (action == SK_PASS)
10917 return reuse_kern.selected_sk;
10918 else
10919 return ERR_PTR(-ECONNREFUSED);
10920}
10921
10922BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
10923 struct bpf_map *, map, void *, key, u32, flags)
10924{
9fed9000 10925 bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
2dbb9b9e
MKL
10926 struct sock_reuseport *reuse;
10927 struct sock *selected_sk;
10928
10929 selected_sk = map->ops->map_lookup_elem(map, key);
10930 if (!selected_sk)
10931 return -ENOENT;
10932
10933 reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
9fed9000 10934 if (!reuse) {
64d85290
JS
10935 /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
10936 if (sk_is_refcounted(selected_sk))
10937 sock_put(selected_sk);
10938
9fed9000
JS
10939 /* reuseport_array has only sk with non NULL sk_reuseport_cb.
10940 * The only (!reuse) case here is - the sk has already been
10941 * unhashed (e.g. by close()), so treat it as -ENOENT.
10942 *
10943 * Other maps (e.g. sock_map) do not provide this guarantee and
10944 * the sk may never be in the reuseport group to begin with.
2dbb9b9e 10945 */
9fed9000
JS
10946 return is_sockarray ? -ENOENT : -EINVAL;
10947 }
2dbb9b9e
MKL
10948
10949 if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
035ff358 10950 struct sock *sk = reuse_kern->sk;
2dbb9b9e 10951
2dbb9b9e
MKL
10952 if (sk->sk_protocol != selected_sk->sk_protocol)
10953 return -EPROTOTYPE;
10954 else if (sk->sk_family != selected_sk->sk_family)
10955 return -EAFNOSUPPORT;
10956
10957 /* Catch all. Likely bound to a different sockaddr. */
10958 return -EBADFD;
10959 }
10960
10961 reuse_kern->selected_sk = selected_sk;
10962
10963 return 0;
10964}
10965
10966static const struct bpf_func_proto sk_select_reuseport_proto = {
10967 .func = sk_select_reuseport,
10968 .gpl_only = false,
10969 .ret_type = RET_INTEGER,
10970 .arg1_type = ARG_PTR_TO_CTX,
10971 .arg2_type = ARG_CONST_MAP_PTR,
10972 .arg3_type = ARG_PTR_TO_MAP_KEY,
10973 .arg4_type = ARG_ANYTHING,
10974};
10975
10976BPF_CALL_4(sk_reuseport_load_bytes,
10977 const struct sk_reuseport_kern *, reuse_kern, u32, offset,
10978 void *, to, u32, len)
10979{
10980 return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
10981}
10982
10983static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
10984 .func = sk_reuseport_load_bytes,
10985 .gpl_only = false,
10986 .ret_type = RET_INTEGER,
10987 .arg1_type = ARG_PTR_TO_CTX,
10988 .arg2_type = ARG_ANYTHING,
10989 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
10990 .arg4_type = ARG_CONST_SIZE,
10991};
10992
10993BPF_CALL_5(sk_reuseport_load_bytes_relative,
10994 const struct sk_reuseport_kern *, reuse_kern, u32, offset,
10995 void *, to, u32, len, u32, start_header)
10996{
10997 return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
10998 len, start_header);
10999}
11000
11001static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
11002 .func = sk_reuseport_load_bytes_relative,
11003 .gpl_only = false,
11004 .ret_type = RET_INTEGER,
11005 .arg1_type = ARG_PTR_TO_CTX,
11006 .arg2_type = ARG_ANYTHING,
11007 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
11008 .arg4_type = ARG_CONST_SIZE,
11009 .arg5_type = ARG_ANYTHING,
11010};
11011
11012static const struct bpf_func_proto *
11013sk_reuseport_func_proto(enum bpf_func_id func_id,
11014 const struct bpf_prog *prog)
11015{
11016 switch (func_id) {
11017 case BPF_FUNC_sk_select_reuseport:
11018 return &sk_select_reuseport_proto;
11019 case BPF_FUNC_skb_load_bytes:
11020 return &sk_reuseport_load_bytes_proto;
11021 case BPF_FUNC_skb_load_bytes_relative:
11022 return &sk_reuseport_load_bytes_relative_proto;
e0610476
KI
11023 case BPF_FUNC_get_socket_cookie:
11024 return &bpf_get_socket_ptr_cookie_proto;
5e0bc308
DB
11025 case BPF_FUNC_ktime_get_coarse_ns:
11026 return &bpf_ktime_get_coarse_ns_proto;
2dbb9b9e
MKL
11027 default:
11028 return bpf_base_func_proto(func_id);
11029 }
11030}
11031
11032static bool
11033sk_reuseport_is_valid_access(int off, int size,
11034 enum bpf_access_type type,
11035 const struct bpf_prog *prog,
11036 struct bpf_insn_access_aux *info)
11037{
11038 const u32 size_default = sizeof(__u32);
11039
11040 if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
11041 off % size || type != BPF_READ)
11042 return false;
11043
11044 switch (off) {
11045 case offsetof(struct sk_reuseport_md, data):
11046 info->reg_type = PTR_TO_PACKET;
11047 return size == sizeof(__u64);
11048
11049 case offsetof(struct sk_reuseport_md, data_end):
11050 info->reg_type = PTR_TO_PACKET_END;
11051 return size == sizeof(__u64);
11052
11053 case offsetof(struct sk_reuseport_md, hash):
11054 return size == size_default;
11055
e0610476
KI
11056 case offsetof(struct sk_reuseport_md, sk):
11057 info->reg_type = PTR_TO_SOCKET;
11058 return size == sizeof(__u64);
11059
d5e4ddae
KI
11060 case offsetof(struct sk_reuseport_md, migrating_sk):
11061 info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
11062 return size == sizeof(__u64);
11063
2dbb9b9e 11064 /* Fields that allow narrowing */
2c238177 11065 case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
c593642c 11066 if (size < sizeof_field(struct sk_buff, protocol))
2dbb9b9e 11067 return false;
df561f66 11068 fallthrough;
2c238177
IL
11069 case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
11070 case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
11071 case bpf_ctx_range(struct sk_reuseport_md, len):
2dbb9b9e
MKL
11072 bpf_ctx_record_field_size(info, size_default);
11073 return bpf_ctx_narrow_access_ok(off, size, size_default);
11074
11075 default:
11076 return false;
11077 }
11078}
11079
11080#define SK_REUSEPORT_LOAD_FIELD(F) ({ \
11081 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
11082 si->dst_reg, si->src_reg, \
11083 bpf_target_off(struct sk_reuseport_kern, F, \
c593642c 11084 sizeof_field(struct sk_reuseport_kern, F), \
2dbb9b9e
MKL
11085 target_size)); \
11086 })
11087
11088#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \
11089 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
11090 struct sk_buff, \
11091 skb, \
11092 SKB_FIELD)
11093
bf976514
MM
11094#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
11095 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
11096 struct sock, \
11097 sk, \
11098 SK_FIELD)
2dbb9b9e
MKL
11099
11100static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
11101 const struct bpf_insn *si,
11102 struct bpf_insn *insn_buf,
11103 struct bpf_prog *prog,
11104 u32 *target_size)
11105{
11106 struct bpf_insn *insn = insn_buf;
11107
11108 switch (si->off) {
11109 case offsetof(struct sk_reuseport_md, data):
11110 SK_REUSEPORT_LOAD_SKB_FIELD(data);
11111 break;
11112
11113 case offsetof(struct sk_reuseport_md, len):
11114 SK_REUSEPORT_LOAD_SKB_FIELD(len);
11115 break;
11116
11117 case offsetof(struct sk_reuseport_md, eth_protocol):
11118 SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
11119 break;
11120
11121 case offsetof(struct sk_reuseport_md, ip_protocol):
bf976514 11122 SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
2dbb9b9e
MKL
11123 break;
11124
11125 case offsetof(struct sk_reuseport_md, data_end):
11126 SK_REUSEPORT_LOAD_FIELD(data_end);
11127 break;
11128
11129 case offsetof(struct sk_reuseport_md, hash):
11130 SK_REUSEPORT_LOAD_FIELD(hash);
11131 break;
11132
11133 case offsetof(struct sk_reuseport_md, bind_inany):
11134 SK_REUSEPORT_LOAD_FIELD(bind_inany);
11135 break;
e0610476
KI
11136
11137 case offsetof(struct sk_reuseport_md, sk):
11138 SK_REUSEPORT_LOAD_FIELD(sk);
11139 break;
d5e4ddae
KI
11140
11141 case offsetof(struct sk_reuseport_md, migrating_sk):
11142 SK_REUSEPORT_LOAD_FIELD(migrating_sk);
11143 break;
2dbb9b9e
MKL
11144 }
11145
11146 return insn - insn_buf;
11147}
11148
11149const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
11150 .get_func_proto = sk_reuseport_func_proto,
11151 .is_valid_access = sk_reuseport_is_valid_access,
11152 .convert_ctx_access = sk_reuseport_convert_ctx_access,
11153};
11154
11155const struct bpf_prog_ops sk_reuseport_prog_ops = {
11156};
7e6897f9 11157
1559b4aa
JS
11158DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
11159EXPORT_SYMBOL(bpf_sk_lookup_enabled);
7e6897f9 11160
e9ddbb77
JS
11161BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
11162 struct sock *, sk, u64, flags)
7e6897f9 11163{
e9ddbb77
JS
11164 if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
11165 BPF_SK_LOOKUP_F_NO_REUSEPORT)))
11166 return -EINVAL;
11167 if (unlikely(sk && sk_is_refcounted(sk)))
11168 return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
40a34121
JF
11169 if (unlikely(sk && sk_is_tcp(sk) && sk->sk_state != TCP_LISTEN))
11170 return -ESOCKTNOSUPPORT; /* only accept TCP socket in LISTEN */
11171 if (unlikely(sk && sk_is_udp(sk) && sk->sk_state != TCP_CLOSE))
11172 return -ESOCKTNOSUPPORT; /* only accept UDP socket in CLOSE */
e9ddbb77
JS
11173
11174 /* Check if socket is suitable for packet L3/L4 protocol */
11175 if (sk && sk->sk_protocol != ctx->protocol)
11176 return -EPROTOTYPE;
11177 if (sk && sk->sk_family != ctx->family &&
11178 (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
11179 return -EAFNOSUPPORT;
11180
11181 if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
11182 return -EEXIST;
11183
11184 /* Select socket as lookup result */
11185 ctx->selected_sk = sk;
11186 ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
11187 return 0;
7e6897f9 11188}
af7ec138 11189
e9ddbb77
JS
11190static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
11191 .func = bpf_sk_lookup_assign,
11192 .gpl_only = false,
11193 .ret_type = RET_INTEGER,
11194 .arg1_type = ARG_PTR_TO_CTX,
11195 .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
11196 .arg3_type = ARG_ANYTHING,
af7ec138
YS
11197};
11198
e9ddbb77
JS
11199static const struct bpf_func_proto *
11200sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
11201{
11202 switch (func_id) {
11203 case BPF_FUNC_perf_event_output:
11204 return &bpf_event_output_data_proto;
11205 case BPF_FUNC_sk_assign:
11206 return &bpf_sk_lookup_assign_proto;
11207 case BPF_FUNC_sk_release:
11208 return &bpf_sk_release_proto;
11209 default:
1df8f55a 11210 return bpf_sk_base_func_proto(func_id);
e9ddbb77
JS
11211 }
11212}
af7ec138 11213
e9ddbb77
JS
11214static bool sk_lookup_is_valid_access(int off, int size,
11215 enum bpf_access_type type,
11216 const struct bpf_prog *prog,
11217 struct bpf_insn_access_aux *info)
11218{
11219 if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
11220 return false;
11221 if (off % size != 0)
11222 return false;
11223 if (type != BPF_READ)
11224 return false;
11225
11226 switch (off) {
11227 case offsetof(struct bpf_sk_lookup, sk):
11228 info->reg_type = PTR_TO_SOCKET_OR_NULL;
11229 return size == sizeof(__u64);
af7ec138 11230
e9ddbb77
JS
11231 case bpf_ctx_range(struct bpf_sk_lookup, family):
11232 case bpf_ctx_range(struct bpf_sk_lookup, protocol):
11233 case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
11234 case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
11235 case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
11236 case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
e9ddbb77 11237 case bpf_ctx_range(struct bpf_sk_lookup, local_port):
f8931565 11238 case bpf_ctx_range(struct bpf_sk_lookup, ingress_ifindex):
e9ddbb77
JS
11239 bpf_ctx_record_field_size(info, sizeof(__u32));
11240 return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
11241
058ec4a7
JS
11242 case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
11243 /* Allow 4-byte access to 2-byte field for backward compatibility */
11244 if (size == sizeof(__u32))
11245 return true;
11246 bpf_ctx_record_field_size(info, sizeof(__be16));
11247 return bpf_ctx_narrow_access_ok(off, size, sizeof(__be16));
11248
11249 case offsetofend(struct bpf_sk_lookup, remote_port) ...
11250 offsetof(struct bpf_sk_lookup, local_ip4) - 1:
11251 /* Allow access to zero padding for backward compatibility */
11252 bpf_ctx_record_field_size(info, sizeof(__u16));
11253 return bpf_ctx_narrow_access_ok(off, size, sizeof(__u16));
11254
e9ddbb77
JS
11255 default:
11256 return false;
11257 }
11258}
11259
11260static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
11261 const struct bpf_insn *si,
11262 struct bpf_insn *insn_buf,
11263 struct bpf_prog *prog,
11264 u32 *target_size)
af7ec138 11265{
e9ddbb77
JS
11266 struct bpf_insn *insn = insn_buf;
11267
11268 switch (si->off) {
11269 case offsetof(struct bpf_sk_lookup, sk):
11270 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
11271 offsetof(struct bpf_sk_lookup_kern, selected_sk));
11272 break;
af7ec138 11273
e9ddbb77
JS
11274 case offsetof(struct bpf_sk_lookup, family):
11275 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11276 bpf_target_off(struct bpf_sk_lookup_kern,
11277 family, 2, target_size));
11278 break;
11279
11280 case offsetof(struct bpf_sk_lookup, protocol):
11281 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11282 bpf_target_off(struct bpf_sk_lookup_kern,
11283 protocol, 2, target_size));
11284 break;
11285
11286 case offsetof(struct bpf_sk_lookup, remote_ip4):
11287 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
11288 bpf_target_off(struct bpf_sk_lookup_kern,
11289 v4.saddr, 4, target_size));
11290 break;
11291
11292 case offsetof(struct bpf_sk_lookup, local_ip4):
11293 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
11294 bpf_target_off(struct bpf_sk_lookup_kern,
11295 v4.daddr, 4, target_size));
11296 break;
11297
11298 case bpf_ctx_range_till(struct bpf_sk_lookup,
11299 remote_ip6[0], remote_ip6[3]): {
11300#if IS_ENABLED(CONFIG_IPV6)
11301 int off = si->off;
11302
11303 off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
11304 off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
11305 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
11306 offsetof(struct bpf_sk_lookup_kern, v6.saddr));
11307 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
11308 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
11309#else
11310 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
11311#endif
11312 break;
11313 }
11314 case bpf_ctx_range_till(struct bpf_sk_lookup,
11315 local_ip6[0], local_ip6[3]): {
11316#if IS_ENABLED(CONFIG_IPV6)
11317 int off = si->off;
11318
11319 off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
11320 off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
11321 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
11322 offsetof(struct bpf_sk_lookup_kern, v6.daddr));
11323 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
11324 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
11325#else
11326 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
11327#endif
11328 break;
af7ec138 11329 }
e9ddbb77
JS
11330 case offsetof(struct bpf_sk_lookup, remote_port):
11331 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11332 bpf_target_off(struct bpf_sk_lookup_kern,
11333 sport, 2, target_size));
11334 break;
11335
058ec4a7
JS
11336 case offsetofend(struct bpf_sk_lookup, remote_port):
11337 *target_size = 2;
11338 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
11339 break;
11340
e9ddbb77
JS
11341 case offsetof(struct bpf_sk_lookup, local_port):
11342 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
11343 bpf_target_off(struct bpf_sk_lookup_kern,
11344 dport, 2, target_size));
11345 break;
f8931565
MP
11346
11347 case offsetof(struct bpf_sk_lookup, ingress_ifindex):
11348 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
11349 bpf_target_off(struct bpf_sk_lookup_kern,
11350 ingress_ifindex, 4, target_size));
11351 break;
e9ddbb77
JS
11352 }
11353
11354 return insn - insn_buf;
af7ec138 11355}
e9ddbb77
JS
11356
11357const struct bpf_prog_ops sk_lookup_prog_ops = {
7c32e8f8 11358 .test_run = bpf_prog_test_run_sk_lookup,
e9ddbb77
JS
11359};
11360
11361const struct bpf_verifier_ops sk_lookup_verifier_ops = {
11362 .get_func_proto = sk_lookup_func_proto,
11363 .is_valid_access = sk_lookup_is_valid_access,
11364 .convert_ctx_access = sk_lookup_convert_ctx_access,
11365};
11366
2dbb9b9e 11367#endif /* CONFIG_INET */
7e6897f9 11368
6a64037d 11369DEFINE_BPF_DISPATCHER(xdp)
7e6897f9
BT
11370
11371void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
11372{
6a64037d 11373 bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
7e6897f9 11374}
af7ec138 11375
9e2ad638 11376BTF_ID_LIST_GLOBAL(btf_sock_ids, MAX_BTF_SOCK_TYPE)
bc4f0548 11377#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
af7ec138
YS
11378BTF_SOCK_TYPE_xxx
11379#undef BTF_SOCK_TYPE
af7ec138 11380
af7ec138
YS
11381BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
11382{
11383 /* tcp6_sock type is not generated in dwarf and hence btf,
11384 * trigger an explicit type generation here.
11385 */
11386 BTF_TYPE_EMIT(struct tcp6_sock);
8c33dadc 11387 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
af7ec138
YS
11388 sk->sk_family == AF_INET6)
11389 return (unsigned long)sk;
11390
11391 return (unsigned long)NULL;
11392}
11393
11394const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
11395 .func = bpf_skc_to_tcp6_sock,
11396 .gpl_only = false,
11397 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 11398 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
af7ec138
YS
11399 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
11400};
478cfbdf
YS
11401
11402BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
11403{
8c33dadc 11404 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
478cfbdf
YS
11405 return (unsigned long)sk;
11406
11407 return (unsigned long)NULL;
11408}
11409
11410const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
11411 .func = bpf_skc_to_tcp_sock,
11412 .gpl_only = false,
11413 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 11414 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
478cfbdf
YS
11415 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
11416};
11417
11418BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
11419{
d82a532a
YS
11420 /* BTF types for tcp_timewait_sock and inet_timewait_sock are not
11421 * generated if CONFIG_INET=n. Trigger an explicit generation here.
11422 */
11423 BTF_TYPE_EMIT(struct inet_timewait_sock);
11424 BTF_TYPE_EMIT(struct tcp_timewait_sock);
11425
6b207d66 11426#ifdef CONFIG_INET
8c33dadc 11427 if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
478cfbdf 11428 return (unsigned long)sk;
6b207d66 11429#endif
478cfbdf
YS
11430
11431#if IS_BUILTIN(CONFIG_IPV6)
8c33dadc 11432 if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
478cfbdf
YS
11433 return (unsigned long)sk;
11434#endif
11435
11436 return (unsigned long)NULL;
11437}
11438
11439const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
11440 .func = bpf_skc_to_tcp_timewait_sock,
11441 .gpl_only = false,
11442 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 11443 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
478cfbdf
YS
11444 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
11445};
11446
11447BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
11448{
6b207d66 11449#ifdef CONFIG_INET
8c33dadc 11450 if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
478cfbdf 11451 return (unsigned long)sk;
6b207d66 11452#endif
478cfbdf
YS
11453
11454#if IS_BUILTIN(CONFIG_IPV6)
8c33dadc 11455 if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
478cfbdf
YS
11456 return (unsigned long)sk;
11457#endif
11458
11459 return (unsigned long)NULL;
11460}
11461
11462const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
11463 .func = bpf_skc_to_tcp_request_sock,
11464 .gpl_only = false,
11465 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 11466 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
478cfbdf
YS
11467 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
11468};
0d4fad3e
YS
11469
11470BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
11471{
11472 /* udp6_sock type is not generated in dwarf and hence btf,
11473 * trigger an explicit type generation here.
11474 */
11475 BTF_TYPE_EMIT(struct udp6_sock);
8c33dadc 11476 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
0d4fad3e
YS
11477 sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
11478 return (unsigned long)sk;
11479
11480 return (unsigned long)NULL;
11481}
11482
11483const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
11484 .func = bpf_skc_to_udp6_sock,
11485 .gpl_only = false,
11486 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 11487 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
0d4fad3e
YS
11488 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
11489};
1df8f55a 11490
9eeb3aa3
HC
11491BPF_CALL_1(bpf_skc_to_unix_sock, struct sock *, sk)
11492{
11493 /* unix_sock type is not generated in dwarf and hence btf,
11494 * trigger an explicit type generation here.
11495 */
11496 BTF_TYPE_EMIT(struct unix_sock);
11497 if (sk && sk_fullsock(sk) && sk->sk_family == AF_UNIX)
11498 return (unsigned long)sk;
11499
11500 return (unsigned long)NULL;
11501}
11502
11503const struct bpf_func_proto bpf_skc_to_unix_sock_proto = {
11504 .func = bpf_skc_to_unix_sock,
11505 .gpl_only = false,
11506 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11507 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
11508 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UNIX],
11509};
11510
3bc253c2
GT
11511BPF_CALL_1(bpf_skc_to_mptcp_sock, struct sock *, sk)
11512{
11513 BTF_TYPE_EMIT(struct mptcp_sock);
11514 return (unsigned long)bpf_mptcp_sock_from_subflow(sk);
11515}
11516
11517const struct bpf_func_proto bpf_skc_to_mptcp_sock_proto = {
11518 .func = bpf_skc_to_mptcp_sock,
11519 .gpl_only = false,
11520 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11521 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
11522 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_MPTCP],
11523};
11524
b60da495
FR
11525BPF_CALL_1(bpf_sock_from_file, struct file *, file)
11526{
11527 return (unsigned long)sock_from_file(file);
11528}
11529
11530BTF_ID_LIST(bpf_sock_from_file_btf_ids)
11531BTF_ID(struct, socket)
11532BTF_ID(struct, file)
11533
11534const struct bpf_func_proto bpf_sock_from_file_proto = {
11535 .func = bpf_sock_from_file,
11536 .gpl_only = false,
11537 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
11538 .ret_btf_id = &bpf_sock_from_file_btf_ids[0],
11539 .arg1_type = ARG_PTR_TO_BTF_ID,
11540 .arg1_btf_id = &bpf_sock_from_file_btf_ids[1],
11541};
11542
1df8f55a
MKL
11543static const struct bpf_func_proto *
11544bpf_sk_base_func_proto(enum bpf_func_id func_id)
11545{
11546 const struct bpf_func_proto *func;
11547
11548 switch (func_id) {
11549 case BPF_FUNC_skc_to_tcp6_sock:
11550 func = &bpf_skc_to_tcp6_sock_proto;
11551 break;
11552 case BPF_FUNC_skc_to_tcp_sock:
11553 func = &bpf_skc_to_tcp_sock_proto;
11554 break;
11555 case BPF_FUNC_skc_to_tcp_timewait_sock:
11556 func = &bpf_skc_to_tcp_timewait_sock_proto;
11557 break;
11558 case BPF_FUNC_skc_to_tcp_request_sock:
11559 func = &bpf_skc_to_tcp_request_sock_proto;
11560 break;
11561 case BPF_FUNC_skc_to_udp6_sock:
11562 func = &bpf_skc_to_udp6_sock_proto;
11563 break;
9eeb3aa3
HC
11564 case BPF_FUNC_skc_to_unix_sock:
11565 func = &bpf_skc_to_unix_sock_proto;
11566 break;
3bc253c2
GT
11567 case BPF_FUNC_skc_to_mptcp_sock:
11568 func = &bpf_skc_to_mptcp_sock_proto;
11569 break;
5e0bc308
DB
11570 case BPF_FUNC_ktime_get_coarse_ns:
11571 return &bpf_ktime_get_coarse_ns_proto;
1df8f55a
MKL
11572 default:
11573 return bpf_base_func_proto(func_id);
11574 }
11575
11576 if (!perfmon_capable())
11577 return NULL;
11578
11579 return func;
11580}