bpf: Change bpf_sk_storage_*() to accept ARG_PTR_TO_BTF_ID_SOCK_COMMON
[linux-2.6-block.git] / net / core / filter.c
CommitLineData
2874c5fd 1// SPDX-License-Identifier: GPL-2.0-or-later
1da177e4
LT
2/*
3 * Linux Socket Filter - Kernel level socket filtering
4 *
bd4cf0ed
AS
5 * Based on the design of the Berkeley Packet Filter. The new
6 * internal format has been designed by PLUMgrid:
1da177e4 7 *
bd4cf0ed
AS
8 * Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
9 *
10 * Authors:
11 *
12 * Jay Schulist <jschlst@samba.org>
13 * Alexei Starovoitov <ast@plumgrid.com>
14 * Daniel Borkmann <dborkman@redhat.com>
1da177e4 15 *
1da177e4 16 * Andi Kleen - Fix a few bad bugs and races.
4df95ff4 17 * Kris Katterjohn - Added many additional checks in bpf_check_classic()
1da177e4
LT
18 */
19
20#include <linux/module.h>
21#include <linux/types.h>
1da177e4
LT
22#include <linux/mm.h>
23#include <linux/fcntl.h>
24#include <linux/socket.h>
91b8270f 25#include <linux/sock_diag.h>
1da177e4
LT
26#include <linux/in.h>
27#include <linux/inet.h>
28#include <linux/netdevice.h>
29#include <linux/if_packet.h>
c491680f 30#include <linux/if_arp.h>
5a0e3ad6 31#include <linux/gfp.h>
d74bad4e 32#include <net/inet_common.h>
1da177e4
LT
33#include <net/ip.h>
34#include <net/protocol.h>
4738c1db 35#include <net/netlink.h>
1da177e4 36#include <linux/skbuff.h>
604326b4 37#include <linux/skmsg.h>
1da177e4 38#include <net/sock.h>
10b89ee4 39#include <net/flow_dissector.h>
1da177e4
LT
40#include <linux/errno.h>
41#include <linux/timer.h>
7c0f6ba6 42#include <linux/uaccess.h>
40daafc8 43#include <asm/unaligned.h>
d66f2b91 44#include <asm/cmpxchg.h>
1da177e4 45#include <linux/filter.h>
86e4ca66 46#include <linux/ratelimit.h>
46b325c7 47#include <linux/seccomp.h>
f3335031 48#include <linux/if_vlan.h>
89aa0758 49#include <linux/bpf.h>
af7ec138 50#include <linux/btf.h>
d691f9e8 51#include <net/sch_generic.h>
8d20aabe 52#include <net/cls_cgroup.h>
d3aa45ce 53#include <net/dst_metadata.h>
c46646d0 54#include <net/dst.h>
538950a1 55#include <net/sock_reuseport.h>
b1d9fc41 56#include <net/busy_poll.h>
8c4b4c7e 57#include <net/tcp.h>
12bed760 58#include <net/xfrm.h>
6acc9b43 59#include <net/udp.h>
5acaee0a 60#include <linux/bpf_trace.h>
02671e23 61#include <net/xdp_sock.h>
87f5fc7e 62#include <linux/inetdevice.h>
6acc9b43
JS
63#include <net/inet_hashtables.h>
64#include <net/inet6_hashtables.h>
87f5fc7e 65#include <net/ip_fib.h>
5481d73f 66#include <net/nexthop.h>
87f5fc7e
DA
67#include <net/flow.h>
68#include <net/arp.h>
fe94cc29 69#include <net/ipv6.h>
6acc9b43 70#include <net/net_namespace.h>
fe94cc29
MX
71#include <linux/seg6_local.h>
72#include <net/seg6.h>
73#include <net/seg6_local.h>
52f27877 74#include <net/lwtunnel.h>
3616d08b 75#include <net/ipv6_stubs.h>
6ac99e8f 76#include <net/bpf_sk_storage.h>
478cfbdf 77#include <net/transp_v6.h>
c9a0f3b8 78#include <linux/btf_ids.h>
1da177e4 79
1df8f55a
MKL
80static const struct bpf_func_proto *
81bpf_sk_base_func_proto(enum bpf_func_id func_id);
82
b1ea9ff6 83int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len)
4d295e54
CH
84{
85 if (in_compat_syscall()) {
86 struct compat_sock_fprog f32;
87
88 if (len != sizeof(f32))
89 return -EINVAL;
b1ea9ff6 90 if (copy_from_sockptr(&f32, src, sizeof(f32)))
4d295e54
CH
91 return -EFAULT;
92 memset(dst, 0, sizeof(*dst));
93 dst->len = f32.len;
94 dst->filter = compat_ptr(f32.filter);
95 } else {
96 if (len != sizeof(*dst))
97 return -EINVAL;
b1ea9ff6 98 if (copy_from_sockptr(dst, src, sizeof(*dst)))
4d295e54
CH
99 return -EFAULT;
100 }
101
102 return 0;
103}
104EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user);
105
43db6d65 106/**
f4979fce 107 * sk_filter_trim_cap - run a packet through a socket filter
43db6d65
SH
108 * @sk: sock associated with &sk_buff
109 * @skb: buffer to filter
f4979fce 110 * @cap: limit on how short the eBPF program may trim the packet
43db6d65 111 *
ff936a04
AS
112 * Run the eBPF program and then cut skb->data to correct size returned by
113 * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
43db6d65 114 * than pkt_len we keep whole skb->data. This is the socket level
ff936a04 115 * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
43db6d65
SH
116 * be accepted or -EPERM if the packet should be tossed.
117 *
118 */
f4979fce 119int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
43db6d65
SH
120{
121 int err;
122 struct sk_filter *filter;
123
c93bdd0e
MG
124 /*
125 * If the skb was allocated from pfmemalloc reserves, only
126 * allow SOCK_MEMALLOC sockets to use it as this socket is
127 * helping free memory
128 */
8fe809a9
ED
129 if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
130 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
c93bdd0e 131 return -ENOMEM;
8fe809a9 132 }
c11cd3a6
DM
133 err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
134 if (err)
135 return err;
136
43db6d65
SH
137 err = security_sock_rcv_skb(sk, skb);
138 if (err)
139 return err;
140
80f8f102
ED
141 rcu_read_lock();
142 filter = rcu_dereference(sk->sk_filter);
43db6d65 143 if (filter) {
8f917bba
WB
144 struct sock *save_sk = skb->sk;
145 unsigned int pkt_len;
146
147 skb->sk = sk;
148 pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
8f917bba 149 skb->sk = save_sk;
d1f496fd 150 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
43db6d65 151 }
80f8f102 152 rcu_read_unlock();
43db6d65
SH
153
154 return err;
155}
f4979fce 156EXPORT_SYMBOL(sk_filter_trim_cap);
43db6d65 157
b390134c 158BPF_CALL_1(bpf_skb_get_pay_offset, struct sk_buff *, skb)
bd4cf0ed 159{
f3694e00 160 return skb_get_poff(skb);
bd4cf0ed
AS
161}
162
b390134c 163BPF_CALL_3(bpf_skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
bd4cf0ed 164{
bd4cf0ed
AS
165 struct nlattr *nla;
166
167 if (skb_is_nonlinear(skb))
168 return 0;
169
05ab8f26
MK
170 if (skb->len < sizeof(struct nlattr))
171 return 0;
172
30743837 173 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
174 return 0;
175
30743837 176 nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
bd4cf0ed
AS
177 if (nla)
178 return (void *) nla - (void *) skb->data;
179
180 return 0;
181}
182
b390134c 183BPF_CALL_3(bpf_skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
bd4cf0ed 184{
bd4cf0ed
AS
185 struct nlattr *nla;
186
187 if (skb_is_nonlinear(skb))
188 return 0;
189
05ab8f26
MK
190 if (skb->len < sizeof(struct nlattr))
191 return 0;
192
30743837 193 if (a > skb->len - sizeof(struct nlattr))
bd4cf0ed
AS
194 return 0;
195
30743837
DB
196 nla = (struct nlattr *) &skb->data[a];
197 if (nla->nla_len > skb->len - a)
bd4cf0ed
AS
198 return 0;
199
30743837 200 nla = nla_find_nested(nla, x);
bd4cf0ed
AS
201 if (nla)
202 return (void *) nla - (void *) skb->data;
203
204 return 0;
205}
206
e0cea7ce
DB
207BPF_CALL_4(bpf_skb_load_helper_8, const struct sk_buff *, skb, const void *,
208 data, int, headlen, int, offset)
209{
210 u8 tmp, *ptr;
211 const int len = sizeof(tmp);
212
213 if (offset >= 0) {
214 if (headlen - offset >= len)
215 return *(u8 *)(data + offset);
216 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
217 return tmp;
218 } else {
219 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
220 if (likely(ptr))
221 return *(u8 *)ptr;
222 }
223
224 return -EFAULT;
225}
226
227BPF_CALL_2(bpf_skb_load_helper_8_no_cache, const struct sk_buff *, skb,
228 int, offset)
229{
230 return ____bpf_skb_load_helper_8(skb, skb->data, skb->len - skb->data_len,
231 offset);
232}
233
234BPF_CALL_4(bpf_skb_load_helper_16, const struct sk_buff *, skb, const void *,
235 data, int, headlen, int, offset)
236{
237 u16 tmp, *ptr;
238 const int len = sizeof(tmp);
239
240 if (offset >= 0) {
241 if (headlen - offset >= len)
242 return get_unaligned_be16(data + offset);
243 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
244 return be16_to_cpu(tmp);
245 } else {
246 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
247 if (likely(ptr))
248 return get_unaligned_be16(ptr);
249 }
250
251 return -EFAULT;
252}
253
254BPF_CALL_2(bpf_skb_load_helper_16_no_cache, const struct sk_buff *, skb,
255 int, offset)
256{
257 return ____bpf_skb_load_helper_16(skb, skb->data, skb->len - skb->data_len,
258 offset);
259}
260
261BPF_CALL_4(bpf_skb_load_helper_32, const struct sk_buff *, skb, const void *,
262 data, int, headlen, int, offset)
263{
264 u32 tmp, *ptr;
265 const int len = sizeof(tmp);
266
267 if (likely(offset >= 0)) {
268 if (headlen - offset >= len)
269 return get_unaligned_be32(data + offset);
270 if (!skb_copy_bits(skb, offset, &tmp, sizeof(tmp)))
271 return be32_to_cpu(tmp);
272 } else {
273 ptr = bpf_internal_load_pointer_neg_helper(skb, offset, len);
274 if (likely(ptr))
275 return get_unaligned_be32(ptr);
276 }
277
278 return -EFAULT;
279}
280
281BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
282 int, offset)
283{
284 return ____bpf_skb_load_helper_32(skb, skb->data, skb->len - skb->data_len,
285 offset);
286}
287
9bac3d6d
AS
288static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
289 struct bpf_insn *insn_buf)
290{
291 struct bpf_insn *insn = insn_buf;
292
293 switch (skb_field) {
294 case SKF_AD_MARK:
c593642c 295 BUILD_BUG_ON(sizeof_field(struct sk_buff, mark) != 4);
9bac3d6d
AS
296
297 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
298 offsetof(struct sk_buff, mark));
299 break;
300
301 case SKF_AD_PKTTYPE:
302 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
303 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
304#ifdef __BIG_ENDIAN_BITFIELD
305 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
306#endif
307 break;
308
309 case SKF_AD_QUEUE:
c593642c 310 BUILD_BUG_ON(sizeof_field(struct sk_buff, queue_mapping) != 2);
9bac3d6d
AS
311
312 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
313 offsetof(struct sk_buff, queue_mapping));
314 break;
c2497395 315
c2497395 316 case SKF_AD_VLAN_TAG:
c593642c 317 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_tci) != 2);
c2497395
AS
318
319 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
320 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
321 offsetof(struct sk_buff, vlan_tci));
9c212255
MM
322 break;
323 case SKF_AD_VLAN_TAG_PRESENT:
324 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_VLAN_PRESENT_OFFSET());
325 if (PKT_VLAN_PRESENT_BIT)
326 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, PKT_VLAN_PRESENT_BIT);
327 if (PKT_VLAN_PRESENT_BIT < 7)
c2497395 328 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
c2497395 329 break;
9bac3d6d
AS
330 }
331
332 return insn - insn_buf;
333}
334
bd4cf0ed 335static bool convert_bpf_extensions(struct sock_filter *fp,
2695fb55 336 struct bpf_insn **insnp)
bd4cf0ed 337{
2695fb55 338 struct bpf_insn *insn = *insnp;
9bac3d6d 339 u32 cnt;
bd4cf0ed
AS
340
341 switch (fp->k) {
342 case SKF_AD_OFF + SKF_AD_PROTOCOL:
c593642c 343 BUILD_BUG_ON(sizeof_field(struct sk_buff, protocol) != 2);
0b8c707d
DB
344
345 /* A = *(u16 *) (CTX + offsetof(protocol)) */
346 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
347 offsetof(struct sk_buff, protocol));
348 /* A = ntohs(A) [emitting a nop or swap16] */
349 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
bd4cf0ed
AS
350 break;
351
352 case SKF_AD_OFF + SKF_AD_PKTTYPE:
9bac3d6d
AS
353 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
354 insn += cnt - 1;
bd4cf0ed
AS
355 break;
356
357 case SKF_AD_OFF + SKF_AD_IFINDEX:
358 case SKF_AD_OFF + SKF_AD_HATYPE:
c593642c
PB
359 BUILD_BUG_ON(sizeof_field(struct net_device, ifindex) != 4);
360 BUILD_BUG_ON(sizeof_field(struct net_device, type) != 2);
f8f6d679 361
f035a515 362 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
f8f6d679
DB
363 BPF_REG_TMP, BPF_REG_CTX,
364 offsetof(struct sk_buff, dev));
365 /* if (tmp != 0) goto pc + 1 */
366 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
367 *insn++ = BPF_EXIT_INSN();
368 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
369 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
370 offsetof(struct net_device, ifindex));
371 else
372 *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
373 offsetof(struct net_device, type));
bd4cf0ed
AS
374 break;
375
376 case SKF_AD_OFF + SKF_AD_MARK:
9bac3d6d
AS
377 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
378 insn += cnt - 1;
bd4cf0ed
AS
379 break;
380
381 case SKF_AD_OFF + SKF_AD_RXHASH:
c593642c 382 BUILD_BUG_ON(sizeof_field(struct sk_buff, hash) != 4);
bd4cf0ed 383
9739eef1
AS
384 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
385 offsetof(struct sk_buff, hash));
bd4cf0ed
AS
386 break;
387
388 case SKF_AD_OFF + SKF_AD_QUEUE:
9bac3d6d
AS
389 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
390 insn += cnt - 1;
bd4cf0ed
AS
391 break;
392
393 case SKF_AD_OFF + SKF_AD_VLAN_TAG:
c2497395
AS
394 cnt = convert_skb_access(SKF_AD_VLAN_TAG,
395 BPF_REG_A, BPF_REG_CTX, insn);
396 insn += cnt - 1;
397 break;
bd4cf0ed 398
c2497395
AS
399 case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
400 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
401 BPF_REG_A, BPF_REG_CTX, insn);
402 insn += cnt - 1;
bd4cf0ed
AS
403 break;
404
27cd5452 405 case SKF_AD_OFF + SKF_AD_VLAN_TPID:
c593642c 406 BUILD_BUG_ON(sizeof_field(struct sk_buff, vlan_proto) != 2);
27cd5452
MS
407
408 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
409 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
410 offsetof(struct sk_buff, vlan_proto));
411 /* A = ntohs(A) [emitting a nop or swap16] */
412 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
413 break;
414
bd4cf0ed
AS
415 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
416 case SKF_AD_OFF + SKF_AD_NLATTR:
417 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
418 case SKF_AD_OFF + SKF_AD_CPU:
4cd3675e 419 case SKF_AD_OFF + SKF_AD_RANDOM:
e430f34e 420 /* arg1 = CTX */
f8f6d679 421 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
bd4cf0ed 422 /* arg2 = A */
f8f6d679 423 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
bd4cf0ed 424 /* arg3 = X */
f8f6d679 425 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
e430f34e 426 /* Emit call(arg1=CTX, arg2=A, arg3=X) */
bd4cf0ed
AS
427 switch (fp->k) {
428 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
b390134c 429 *insn = BPF_EMIT_CALL(bpf_skb_get_pay_offset);
bd4cf0ed
AS
430 break;
431 case SKF_AD_OFF + SKF_AD_NLATTR:
b390134c 432 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr);
bd4cf0ed
AS
433 break;
434 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
b390134c 435 *insn = BPF_EMIT_CALL(bpf_skb_get_nlattr_nest);
bd4cf0ed
AS
436 break;
437 case SKF_AD_OFF + SKF_AD_CPU:
b390134c 438 *insn = BPF_EMIT_CALL(bpf_get_raw_cpu_id);
bd4cf0ed 439 break;
4cd3675e 440 case SKF_AD_OFF + SKF_AD_RANDOM:
3ad00405
DB
441 *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
442 bpf_user_rnd_init_once();
4cd3675e 443 break;
bd4cf0ed
AS
444 }
445 break;
446
447 case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
9739eef1
AS
448 /* A ^= X */
449 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
450 break;
451
452 default:
453 /* This is just a dummy call to avoid letting the compiler
454 * evict __bpf_call_base() as an optimization. Placed here
455 * where no-one bothers.
456 */
457 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
458 return false;
459 }
460
461 *insnp = insn;
462 return true;
463}
464
e0cea7ce
DB
465static bool convert_bpf_ld_abs(struct sock_filter *fp, struct bpf_insn **insnp)
466{
467 const bool unaligned_ok = IS_BUILTIN(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS);
468 int size = bpf_size_to_bytes(BPF_SIZE(fp->code));
469 bool endian = BPF_SIZE(fp->code) == BPF_H ||
470 BPF_SIZE(fp->code) == BPF_W;
471 bool indirect = BPF_MODE(fp->code) == BPF_IND;
472 const int ip_align = NET_IP_ALIGN;
473 struct bpf_insn *insn = *insnp;
474 int offset = fp->k;
475
476 if (!indirect &&
477 ((unaligned_ok && offset >= 0) ||
478 (!unaligned_ok && offset >= 0 &&
479 offset + ip_align >= 0 &&
480 offset + ip_align % size == 0))) {
59ee4129
DB
481 bool ldx_off_ok = offset <= S16_MAX;
482
e0cea7ce 483 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_H);
d8f3e978
DM
484 if (offset)
485 *insn++ = BPF_ALU64_IMM(BPF_SUB, BPF_REG_TMP, offset);
59ee4129
DB
486 *insn++ = BPF_JMP_IMM(BPF_JSLT, BPF_REG_TMP,
487 size, 2 + endian + (!ldx_off_ok * 2));
488 if (ldx_off_ok) {
489 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
490 BPF_REG_D, offset);
491 } else {
492 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_D);
493 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_TMP, offset);
494 *insn++ = BPF_LDX_MEM(BPF_SIZE(fp->code), BPF_REG_A,
495 BPF_REG_TMP, 0);
496 }
e0cea7ce
DB
497 if (endian)
498 *insn++ = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, size * 8);
499 *insn++ = BPF_JMP_A(8);
500 }
501
502 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
503 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_D);
504 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_H);
505 if (!indirect) {
506 *insn++ = BPF_MOV64_IMM(BPF_REG_ARG4, offset);
507 } else {
508 *insn++ = BPF_MOV64_REG(BPF_REG_ARG4, BPF_REG_X);
509 if (fp->k)
510 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG4, offset);
511 }
512
513 switch (BPF_SIZE(fp->code)) {
514 case BPF_B:
515 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8);
516 break;
517 case BPF_H:
518 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16);
519 break;
520 case BPF_W:
521 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32);
522 break;
523 default:
524 return false;
525 }
526
527 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_A, 0, 2);
528 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
529 *insn = BPF_EXIT_INSN();
530
531 *insnp = insn;
532 return true;
533}
534
bd4cf0ed 535/**
8fb575ca 536 * bpf_convert_filter - convert filter program
bd4cf0ed
AS
537 * @prog: the user passed filter program
538 * @len: the length of the user passed filter program
50bbfed9 539 * @new_prog: allocated 'struct bpf_prog' or NULL
bd4cf0ed 540 * @new_len: pointer to store length of converted program
e0cea7ce 541 * @seen_ld_abs: bool whether we've seen ld_abs/ind
bd4cf0ed 542 *
1f504ec9
TK
543 * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
544 * style extended BPF (eBPF).
bd4cf0ed
AS
545 * Conversion workflow:
546 *
547 * 1) First pass for calculating the new program length:
e0cea7ce 548 * bpf_convert_filter(old_prog, old_len, NULL, &new_len, &seen_ld_abs)
bd4cf0ed
AS
549 *
550 * 2) 2nd pass to remap in two passes: 1st pass finds new
551 * jump offsets, 2nd pass remapping:
e0cea7ce 552 * bpf_convert_filter(old_prog, old_len, new_prog, &new_len, &seen_ld_abs)
bd4cf0ed 553 */
d9e12f42 554static int bpf_convert_filter(struct sock_filter *prog, int len,
e0cea7ce
DB
555 struct bpf_prog *new_prog, int *new_len,
556 bool *seen_ld_abs)
bd4cf0ed 557{
50bbfed9
AS
558 int new_flen = 0, pass = 0, target, i, stack_off;
559 struct bpf_insn *new_insn, *first_insn = NULL;
bd4cf0ed
AS
560 struct sock_filter *fp;
561 int *addrs = NULL;
562 u8 bpf_src;
563
564 BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
30743837 565 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
bd4cf0ed 566
6f9a093b 567 if (len <= 0 || len > BPF_MAXINSNS)
bd4cf0ed
AS
568 return -EINVAL;
569
570 if (new_prog) {
50bbfed9 571 first_insn = new_prog->insnsi;
658da937
DB
572 addrs = kcalloc(len, sizeof(*addrs),
573 GFP_KERNEL | __GFP_NOWARN);
bd4cf0ed
AS
574 if (!addrs)
575 return -ENOMEM;
576 }
577
578do_pass:
50bbfed9 579 new_insn = first_insn;
bd4cf0ed
AS
580 fp = prog;
581
8b614aeb 582 /* Classic BPF related prologue emission. */
50bbfed9 583 if (new_prog) {
8b614aeb
DB
584 /* Classic BPF expects A and X to be reset first. These need
585 * to be guaranteed to be the first two instructions.
586 */
1d621674
DB
587 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
588 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
8b614aeb
DB
589
590 /* All programs must keep CTX in callee saved BPF_REG_CTX.
591 * In eBPF case it's done by the compiler, here we need to
592 * do this ourself. Initial CTX is present in BPF_REG_ARG1.
593 */
594 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
e0cea7ce
DB
595 if (*seen_ld_abs) {
596 /* For packet access in classic BPF, cache skb->data
597 * in callee-saved BPF R8 and skb->len - skb->data_len
598 * (headlen) in BPF R9. Since classic BPF is read-only
599 * on CTX, we only need to cache it once.
600 */
601 *new_insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
602 BPF_REG_D, BPF_REG_CTX,
603 offsetof(struct sk_buff, data));
604 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_H, BPF_REG_CTX,
605 offsetof(struct sk_buff, len));
606 *new_insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_TMP, BPF_REG_CTX,
607 offsetof(struct sk_buff, data_len));
608 *new_insn++ = BPF_ALU32_REG(BPF_SUB, BPF_REG_H, BPF_REG_TMP);
609 }
8b614aeb
DB
610 } else {
611 new_insn += 3;
612 }
bd4cf0ed
AS
613
614 for (i = 0; i < len; fp++, i++) {
e0cea7ce 615 struct bpf_insn tmp_insns[32] = { };
2695fb55 616 struct bpf_insn *insn = tmp_insns;
bd4cf0ed
AS
617
618 if (addrs)
50bbfed9 619 addrs[i] = new_insn - first_insn;
bd4cf0ed
AS
620
621 switch (fp->code) {
622 /* All arithmetic insns and skb loads map as-is. */
623 case BPF_ALU | BPF_ADD | BPF_X:
624 case BPF_ALU | BPF_ADD | BPF_K:
625 case BPF_ALU | BPF_SUB | BPF_X:
626 case BPF_ALU | BPF_SUB | BPF_K:
627 case BPF_ALU | BPF_AND | BPF_X:
628 case BPF_ALU | BPF_AND | BPF_K:
629 case BPF_ALU | BPF_OR | BPF_X:
630 case BPF_ALU | BPF_OR | BPF_K:
631 case BPF_ALU | BPF_LSH | BPF_X:
632 case BPF_ALU | BPF_LSH | BPF_K:
633 case BPF_ALU | BPF_RSH | BPF_X:
634 case BPF_ALU | BPF_RSH | BPF_K:
635 case BPF_ALU | BPF_XOR | BPF_X:
636 case BPF_ALU | BPF_XOR | BPF_K:
637 case BPF_ALU | BPF_MUL | BPF_X:
638 case BPF_ALU | BPF_MUL | BPF_K:
639 case BPF_ALU | BPF_DIV | BPF_X:
640 case BPF_ALU | BPF_DIV | BPF_K:
641 case BPF_ALU | BPF_MOD | BPF_X:
642 case BPF_ALU | BPF_MOD | BPF_K:
643 case BPF_ALU | BPF_NEG:
644 case BPF_LD | BPF_ABS | BPF_W:
645 case BPF_LD | BPF_ABS | BPF_H:
646 case BPF_LD | BPF_ABS | BPF_B:
647 case BPF_LD | BPF_IND | BPF_W:
648 case BPF_LD | BPF_IND | BPF_H:
649 case BPF_LD | BPF_IND | BPF_B:
650 /* Check for overloaded BPF extension and
651 * directly convert it if found, otherwise
652 * just move on with mapping.
653 */
654 if (BPF_CLASS(fp->code) == BPF_LD &&
655 BPF_MODE(fp->code) == BPF_ABS &&
656 convert_bpf_extensions(fp, &insn))
657 break;
e0cea7ce
DB
658 if (BPF_CLASS(fp->code) == BPF_LD &&
659 convert_bpf_ld_abs(fp, &insn)) {
660 *seen_ld_abs = true;
661 break;
662 }
bd4cf0ed 663
68fda450 664 if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
f6b1b3bf 665 fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
68fda450 666 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
f6b1b3bf
DB
667 /* Error with exception code on div/mod by 0.
668 * For cBPF programs, this was always return 0.
669 */
670 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
671 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
672 *insn++ = BPF_EXIT_INSN();
673 }
68fda450 674
f8f6d679 675 *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
bd4cf0ed
AS
676 break;
677
f8f6d679
DB
678 /* Jump transformation cannot use BPF block macros
679 * everywhere as offset calculation and target updates
680 * require a bit more work than the rest, i.e. jump
681 * opcodes map as-is, but offsets need adjustment.
682 */
683
684#define BPF_EMIT_JMP \
bd4cf0ed 685 do { \
050fad7c
DB
686 const s32 off_min = S16_MIN, off_max = S16_MAX; \
687 s32 off; \
688 \
bd4cf0ed
AS
689 if (target >= len || target < 0) \
690 goto err; \
050fad7c 691 off = addrs ? addrs[target] - addrs[i] - 1 : 0; \
bd4cf0ed 692 /* Adjust pc relative offset for 2nd or 3rd insn. */ \
050fad7c
DB
693 off -= insn - tmp_insns; \
694 /* Reject anything not fitting into insn->off. */ \
695 if (off < off_min || off > off_max) \
696 goto err; \
697 insn->off = off; \
bd4cf0ed
AS
698 } while (0)
699
f8f6d679
DB
700 case BPF_JMP | BPF_JA:
701 target = i + fp->k + 1;
702 insn->code = fp->code;
703 BPF_EMIT_JMP;
bd4cf0ed
AS
704 break;
705
706 case BPF_JMP | BPF_JEQ | BPF_K:
707 case BPF_JMP | BPF_JEQ | BPF_X:
708 case BPF_JMP | BPF_JSET | BPF_K:
709 case BPF_JMP | BPF_JSET | BPF_X:
710 case BPF_JMP | BPF_JGT | BPF_K:
711 case BPF_JMP | BPF_JGT | BPF_X:
712 case BPF_JMP | BPF_JGE | BPF_K:
713 case BPF_JMP | BPF_JGE | BPF_X:
714 if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
715 /* BPF immediates are signed, zero extend
716 * immediate into tmp register and use it
717 * in compare insn.
718 */
f8f6d679 719 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
bd4cf0ed 720
e430f34e
AS
721 insn->dst_reg = BPF_REG_A;
722 insn->src_reg = BPF_REG_TMP;
bd4cf0ed
AS
723 bpf_src = BPF_X;
724 } else {
e430f34e 725 insn->dst_reg = BPF_REG_A;
bd4cf0ed
AS
726 insn->imm = fp->k;
727 bpf_src = BPF_SRC(fp->code);
19539ce7 728 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
1da177e4 729 }
bd4cf0ed
AS
730
731 /* Common case where 'jump_false' is next insn. */
732 if (fp->jf == 0) {
733 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
734 target = i + fp->jt + 1;
f8f6d679 735 BPF_EMIT_JMP;
bd4cf0ed 736 break;
1da177e4 737 }
bd4cf0ed 738
92b31a9a
DB
739 /* Convert some jumps when 'jump_true' is next insn. */
740 if (fp->jt == 0) {
741 switch (BPF_OP(fp->code)) {
742 case BPF_JEQ:
743 insn->code = BPF_JMP | BPF_JNE | bpf_src;
744 break;
745 case BPF_JGT:
746 insn->code = BPF_JMP | BPF_JLE | bpf_src;
747 break;
748 case BPF_JGE:
749 insn->code = BPF_JMP | BPF_JLT | bpf_src;
750 break;
751 default:
752 goto jmp_rest;
753 }
754
bd4cf0ed 755 target = i + fp->jf + 1;
f8f6d679 756 BPF_EMIT_JMP;
bd4cf0ed 757 break;
0b05b2a4 758 }
92b31a9a 759jmp_rest:
bd4cf0ed
AS
760 /* Other jumps are mapped into two insns: Jxx and JA. */
761 target = i + fp->jt + 1;
762 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
f8f6d679 763 BPF_EMIT_JMP;
bd4cf0ed
AS
764 insn++;
765
766 insn->code = BPF_JMP | BPF_JA;
767 target = i + fp->jf + 1;
f8f6d679 768 BPF_EMIT_JMP;
bd4cf0ed
AS
769 break;
770
771 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
e0cea7ce
DB
772 case BPF_LDX | BPF_MSH | BPF_B: {
773 struct sock_filter tmp = {
774 .code = BPF_LD | BPF_ABS | BPF_B,
775 .k = fp->k,
776 };
777
778 *seen_ld_abs = true;
779
780 /* X = A */
781 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
1268e253 782 /* A = BPF_R0 = *(u8 *) (skb->data + K) */
e0cea7ce
DB
783 convert_bpf_ld_abs(&tmp, &insn);
784 insn++;
9739eef1 785 /* A &= 0xf */
f8f6d679 786 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
9739eef1 787 /* A <<= 2 */
f8f6d679 788 *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
e0cea7ce
DB
789 /* tmp = X */
790 *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_X);
9739eef1 791 /* X = A */
f8f6d679 792 *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
9739eef1 793 /* A = tmp */
f8f6d679 794 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
bd4cf0ed 795 break;
e0cea7ce 796 }
6205b9cf
DB
797 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
798 * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
799 */
bd4cf0ed
AS
800 case BPF_RET | BPF_A:
801 case BPF_RET | BPF_K:
6205b9cf
DB
802 if (BPF_RVAL(fp->code) == BPF_K)
803 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
804 0, fp->k);
9739eef1 805 *insn = BPF_EXIT_INSN();
bd4cf0ed
AS
806 break;
807
808 /* Store to stack. */
809 case BPF_ST:
810 case BPF_STX:
50bbfed9 811 stack_off = fp->k * 4 + 4;
f8f6d679
DB
812 *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
813 BPF_ST ? BPF_REG_A : BPF_REG_X,
50bbfed9
AS
814 -stack_off);
815 /* check_load_and_stores() verifies that classic BPF can
816 * load from stack only after write, so tracking
817 * stack_depth for ST|STX insns is enough
818 */
819 if (new_prog && new_prog->aux->stack_depth < stack_off)
820 new_prog->aux->stack_depth = stack_off;
bd4cf0ed
AS
821 break;
822
823 /* Load from stack. */
824 case BPF_LD | BPF_MEM:
825 case BPF_LDX | BPF_MEM:
50bbfed9 826 stack_off = fp->k * 4 + 4;
f8f6d679
DB
827 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
828 BPF_REG_A : BPF_REG_X, BPF_REG_FP,
50bbfed9 829 -stack_off);
bd4cf0ed
AS
830 break;
831
832 /* A = K or X = K */
833 case BPF_LD | BPF_IMM:
834 case BPF_LDX | BPF_IMM:
f8f6d679
DB
835 *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
836 BPF_REG_A : BPF_REG_X, fp->k);
bd4cf0ed
AS
837 break;
838
839 /* X = A */
840 case BPF_MISC | BPF_TAX:
f8f6d679 841 *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
bd4cf0ed
AS
842 break;
843
844 /* A = X */
845 case BPF_MISC | BPF_TXA:
f8f6d679 846 *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
bd4cf0ed
AS
847 break;
848
849 /* A = skb->len or X = skb->len */
850 case BPF_LD | BPF_W | BPF_LEN:
851 case BPF_LDX | BPF_W | BPF_LEN:
f8f6d679
DB
852 *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
853 BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
854 offsetof(struct sk_buff, len));
bd4cf0ed
AS
855 break;
856
f8f6d679 857 /* Access seccomp_data fields. */
bd4cf0ed 858 case BPF_LDX | BPF_ABS | BPF_W:
9739eef1
AS
859 /* A = *(u32 *) (ctx + K) */
860 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
bd4cf0ed
AS
861 break;
862
ca9f1fd2 863 /* Unknown instruction. */
1da177e4 864 default:
bd4cf0ed 865 goto err;
1da177e4 866 }
bd4cf0ed
AS
867
868 insn++;
869 if (new_prog)
870 memcpy(new_insn, tmp_insns,
871 sizeof(*insn) * (insn - tmp_insns));
bd4cf0ed 872 new_insn += insn - tmp_insns;
1da177e4
LT
873 }
874
bd4cf0ed
AS
875 if (!new_prog) {
876 /* Only calculating new length. */
50bbfed9 877 *new_len = new_insn - first_insn;
e0cea7ce
DB
878 if (*seen_ld_abs)
879 *new_len += 4; /* Prologue bits. */
bd4cf0ed
AS
880 return 0;
881 }
882
883 pass++;
50bbfed9
AS
884 if (new_flen != new_insn - first_insn) {
885 new_flen = new_insn - first_insn;
bd4cf0ed
AS
886 if (pass > 2)
887 goto err;
bd4cf0ed
AS
888 goto do_pass;
889 }
890
891 kfree(addrs);
892 BUG_ON(*new_len != new_flen);
1da177e4 893 return 0;
bd4cf0ed
AS
894err:
895 kfree(addrs);
896 return -EINVAL;
1da177e4
LT
897}
898
bd4cf0ed 899/* Security:
bd4cf0ed 900 *
2d5311e4 901 * As we dont want to clear mem[] array for each packet going through
8ea6e345 902 * __bpf_prog_run(), we check that filter loaded by user never try to read
2d5311e4 903 * a cell if not previously written, and we check all branches to be sure
25985edc 904 * a malicious user doesn't try to abuse us.
2d5311e4 905 */
ec31a05c 906static int check_load_and_stores(const struct sock_filter *filter, int flen)
2d5311e4 907{
34805931 908 u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
2d5311e4
ED
909 int pc, ret = 0;
910
911 BUILD_BUG_ON(BPF_MEMWORDS > 16);
34805931 912
99e72a0f 913 masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
2d5311e4
ED
914 if (!masks)
915 return -ENOMEM;
34805931 916
2d5311e4
ED
917 memset(masks, 0xff, flen * sizeof(*masks));
918
919 for (pc = 0; pc < flen; pc++) {
920 memvalid &= masks[pc];
921
922 switch (filter[pc].code) {
34805931
DB
923 case BPF_ST:
924 case BPF_STX:
2d5311e4
ED
925 memvalid |= (1 << filter[pc].k);
926 break;
34805931
DB
927 case BPF_LD | BPF_MEM:
928 case BPF_LDX | BPF_MEM:
2d5311e4
ED
929 if (!(memvalid & (1 << filter[pc].k))) {
930 ret = -EINVAL;
931 goto error;
932 }
933 break;
34805931
DB
934 case BPF_JMP | BPF_JA:
935 /* A jump must set masks on target */
2d5311e4
ED
936 masks[pc + 1 + filter[pc].k] &= memvalid;
937 memvalid = ~0;
938 break;
34805931
DB
939 case BPF_JMP | BPF_JEQ | BPF_K:
940 case BPF_JMP | BPF_JEQ | BPF_X:
941 case BPF_JMP | BPF_JGE | BPF_K:
942 case BPF_JMP | BPF_JGE | BPF_X:
943 case BPF_JMP | BPF_JGT | BPF_K:
944 case BPF_JMP | BPF_JGT | BPF_X:
945 case BPF_JMP | BPF_JSET | BPF_K:
946 case BPF_JMP | BPF_JSET | BPF_X:
947 /* A jump must set masks on targets */
2d5311e4
ED
948 masks[pc + 1 + filter[pc].jt] &= memvalid;
949 masks[pc + 1 + filter[pc].jf] &= memvalid;
950 memvalid = ~0;
951 break;
952 }
953 }
954error:
955 kfree(masks);
956 return ret;
957}
958
34805931
DB
959static bool chk_code_allowed(u16 code_to_probe)
960{
961 static const bool codes[] = {
962 /* 32 bit ALU operations */
963 [BPF_ALU | BPF_ADD | BPF_K] = true,
964 [BPF_ALU | BPF_ADD | BPF_X] = true,
965 [BPF_ALU | BPF_SUB | BPF_K] = true,
966 [BPF_ALU | BPF_SUB | BPF_X] = true,
967 [BPF_ALU | BPF_MUL | BPF_K] = true,
968 [BPF_ALU | BPF_MUL | BPF_X] = true,
969 [BPF_ALU | BPF_DIV | BPF_K] = true,
970 [BPF_ALU | BPF_DIV | BPF_X] = true,
971 [BPF_ALU | BPF_MOD | BPF_K] = true,
972 [BPF_ALU | BPF_MOD | BPF_X] = true,
973 [BPF_ALU | BPF_AND | BPF_K] = true,
974 [BPF_ALU | BPF_AND | BPF_X] = true,
975 [BPF_ALU | BPF_OR | BPF_K] = true,
976 [BPF_ALU | BPF_OR | BPF_X] = true,
977 [BPF_ALU | BPF_XOR | BPF_K] = true,
978 [BPF_ALU | BPF_XOR | BPF_X] = true,
979 [BPF_ALU | BPF_LSH | BPF_K] = true,
980 [BPF_ALU | BPF_LSH | BPF_X] = true,
981 [BPF_ALU | BPF_RSH | BPF_K] = true,
982 [BPF_ALU | BPF_RSH | BPF_X] = true,
983 [BPF_ALU | BPF_NEG] = true,
984 /* Load instructions */
985 [BPF_LD | BPF_W | BPF_ABS] = true,
986 [BPF_LD | BPF_H | BPF_ABS] = true,
987 [BPF_LD | BPF_B | BPF_ABS] = true,
988 [BPF_LD | BPF_W | BPF_LEN] = true,
989 [BPF_LD | BPF_W | BPF_IND] = true,
990 [BPF_LD | BPF_H | BPF_IND] = true,
991 [BPF_LD | BPF_B | BPF_IND] = true,
992 [BPF_LD | BPF_IMM] = true,
993 [BPF_LD | BPF_MEM] = true,
994 [BPF_LDX | BPF_W | BPF_LEN] = true,
995 [BPF_LDX | BPF_B | BPF_MSH] = true,
996 [BPF_LDX | BPF_IMM] = true,
997 [BPF_LDX | BPF_MEM] = true,
998 /* Store instructions */
999 [BPF_ST] = true,
1000 [BPF_STX] = true,
1001 /* Misc instructions */
1002 [BPF_MISC | BPF_TAX] = true,
1003 [BPF_MISC | BPF_TXA] = true,
1004 /* Return instructions */
1005 [BPF_RET | BPF_K] = true,
1006 [BPF_RET | BPF_A] = true,
1007 /* Jump instructions */
1008 [BPF_JMP | BPF_JA] = true,
1009 [BPF_JMP | BPF_JEQ | BPF_K] = true,
1010 [BPF_JMP | BPF_JEQ | BPF_X] = true,
1011 [BPF_JMP | BPF_JGE | BPF_K] = true,
1012 [BPF_JMP | BPF_JGE | BPF_X] = true,
1013 [BPF_JMP | BPF_JGT | BPF_K] = true,
1014 [BPF_JMP | BPF_JGT | BPF_X] = true,
1015 [BPF_JMP | BPF_JSET | BPF_K] = true,
1016 [BPF_JMP | BPF_JSET | BPF_X] = true,
1017 };
1018
1019 if (code_to_probe >= ARRAY_SIZE(codes))
1020 return false;
1021
1022 return codes[code_to_probe];
1023}
1024
f7bd9e36
DB
1025static bool bpf_check_basics_ok(const struct sock_filter *filter,
1026 unsigned int flen)
1027{
1028 if (filter == NULL)
1029 return false;
1030 if (flen == 0 || flen > BPF_MAXINSNS)
1031 return false;
1032
1033 return true;
1034}
1035
1da177e4 1036/**
4df95ff4 1037 * bpf_check_classic - verify socket filter code
1da177e4
LT
1038 * @filter: filter to verify
1039 * @flen: length of filter
1040 *
1041 * Check the user's filter code. If we let some ugly
1042 * filter code slip through kaboom! The filter must contain
93699863
KK
1043 * no references or jumps that are out of range, no illegal
1044 * instructions, and must end with a RET instruction.
1da177e4 1045 *
7b11f69f
KK
1046 * All jumps are forward as they are not signed.
1047 *
1048 * Returns 0 if the rule set is legal or -EINVAL if not.
1da177e4 1049 */
d9e12f42
NS
1050static int bpf_check_classic(const struct sock_filter *filter,
1051 unsigned int flen)
1da177e4 1052{
aa1113d9 1053 bool anc_found;
34805931 1054 int pc;
1da177e4 1055
34805931 1056 /* Check the filter code now */
1da177e4 1057 for (pc = 0; pc < flen; pc++) {
ec31a05c 1058 const struct sock_filter *ftest = &filter[pc];
93699863 1059
34805931
DB
1060 /* May we actually operate on this code? */
1061 if (!chk_code_allowed(ftest->code))
cba328fc 1062 return -EINVAL;
34805931 1063
93699863 1064 /* Some instructions need special checks */
34805931
DB
1065 switch (ftest->code) {
1066 case BPF_ALU | BPF_DIV | BPF_K:
1067 case BPF_ALU | BPF_MOD | BPF_K:
1068 /* Check for division by zero */
b6069a95
ED
1069 if (ftest->k == 0)
1070 return -EINVAL;
1071 break;
229394e8
RV
1072 case BPF_ALU | BPF_LSH | BPF_K:
1073 case BPF_ALU | BPF_RSH | BPF_K:
1074 if (ftest->k >= 32)
1075 return -EINVAL;
1076 break;
34805931
DB
1077 case BPF_LD | BPF_MEM:
1078 case BPF_LDX | BPF_MEM:
1079 case BPF_ST:
1080 case BPF_STX:
1081 /* Check for invalid memory addresses */
93699863
KK
1082 if (ftest->k >= BPF_MEMWORDS)
1083 return -EINVAL;
1084 break;
34805931
DB
1085 case BPF_JMP | BPF_JA:
1086 /* Note, the large ftest->k might cause loops.
93699863
KK
1087 * Compare this with conditional jumps below,
1088 * where offsets are limited. --ANK (981016)
1089 */
34805931 1090 if (ftest->k >= (unsigned int)(flen - pc - 1))
93699863 1091 return -EINVAL;
01f2f3f6 1092 break;
34805931
DB
1093 case BPF_JMP | BPF_JEQ | BPF_K:
1094 case BPF_JMP | BPF_JEQ | BPF_X:
1095 case BPF_JMP | BPF_JGE | BPF_K:
1096 case BPF_JMP | BPF_JGE | BPF_X:
1097 case BPF_JMP | BPF_JGT | BPF_K:
1098 case BPF_JMP | BPF_JGT | BPF_X:
1099 case BPF_JMP | BPF_JSET | BPF_K:
1100 case BPF_JMP | BPF_JSET | BPF_X:
1101 /* Both conditionals must be safe */
e35bedf3 1102 if (pc + ftest->jt + 1 >= flen ||
93699863
KK
1103 pc + ftest->jf + 1 >= flen)
1104 return -EINVAL;
cba328fc 1105 break;
34805931
DB
1106 case BPF_LD | BPF_W | BPF_ABS:
1107 case BPF_LD | BPF_H | BPF_ABS:
1108 case BPF_LD | BPF_B | BPF_ABS:
aa1113d9 1109 anc_found = false;
34805931
DB
1110 if (bpf_anc_helper(ftest) & BPF_ANC)
1111 anc_found = true;
1112 /* Ancillary operation unknown or unsupported */
aa1113d9
DB
1113 if (anc_found == false && ftest->k >= SKF_AD_OFF)
1114 return -EINVAL;
01f2f3f6
HPP
1115 }
1116 }
93699863 1117
34805931 1118 /* Last instruction must be a RET code */
01f2f3f6 1119 switch (filter[flen - 1].code) {
34805931
DB
1120 case BPF_RET | BPF_K:
1121 case BPF_RET | BPF_A:
2d5311e4 1122 return check_load_and_stores(filter, flen);
cba328fc 1123 }
34805931 1124
cba328fc 1125 return -EINVAL;
1da177e4
LT
1126}
1127
7ae457c1
AS
1128static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
1129 const struct sock_fprog *fprog)
a3ea269b 1130{
009937e7 1131 unsigned int fsize = bpf_classic_proglen(fprog);
a3ea269b
DB
1132 struct sock_fprog_kern *fkprog;
1133
1134 fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
1135 if (!fp->orig_prog)
1136 return -ENOMEM;
1137
1138 fkprog = fp->orig_prog;
1139 fkprog->len = fprog->len;
658da937
DB
1140
1141 fkprog->filter = kmemdup(fp->insns, fsize,
1142 GFP_KERNEL | __GFP_NOWARN);
a3ea269b
DB
1143 if (!fkprog->filter) {
1144 kfree(fp->orig_prog);
1145 return -ENOMEM;
1146 }
1147
1148 return 0;
1149}
1150
7ae457c1 1151static void bpf_release_orig_filter(struct bpf_prog *fp)
a3ea269b
DB
1152{
1153 struct sock_fprog_kern *fprog = fp->orig_prog;
1154
1155 if (fprog) {
1156 kfree(fprog->filter);
1157 kfree(fprog);
1158 }
1159}
1160
7ae457c1
AS
1161static void __bpf_prog_release(struct bpf_prog *prog)
1162{
24701ece 1163 if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
89aa0758
AS
1164 bpf_prog_put(prog);
1165 } else {
1166 bpf_release_orig_filter(prog);
1167 bpf_prog_free(prog);
1168 }
7ae457c1
AS
1169}
1170
34c5bd66
PN
1171static void __sk_filter_release(struct sk_filter *fp)
1172{
7ae457c1
AS
1173 __bpf_prog_release(fp->prog);
1174 kfree(fp);
34c5bd66
PN
1175}
1176
47e958ea 1177/**
46bcf14f 1178 * sk_filter_release_rcu - Release a socket filter by rcu_head
47e958ea
PE
1179 * @rcu: rcu_head that contains the sk_filter to free
1180 */
fbc907f0 1181static void sk_filter_release_rcu(struct rcu_head *rcu)
47e958ea
PE
1182{
1183 struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
1184
34c5bd66 1185 __sk_filter_release(fp);
47e958ea 1186}
fbc907f0
DB
1187
1188/**
1189 * sk_filter_release - release a socket filter
1190 * @fp: filter to remove
1191 *
1192 * Remove a filter from a socket and release its resources.
1193 */
1194static void sk_filter_release(struct sk_filter *fp)
1195{
4c355cdf 1196 if (refcount_dec_and_test(&fp->refcnt))
fbc907f0
DB
1197 call_rcu(&fp->rcu, sk_filter_release_rcu);
1198}
1199
1200void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
1201{
7ae457c1 1202 u32 filter_size = bpf_prog_size(fp->prog->len);
fbc907f0 1203
278571ba
AS
1204 atomic_sub(filter_size, &sk->sk_omem_alloc);
1205 sk_filter_release(fp);
fbc907f0 1206}
47e958ea 1207
278571ba
AS
1208/* try to charge the socket memory if there is space available
1209 * return true on success
1210 */
4c355cdf 1211static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
bd4cf0ed 1212{
7ae457c1 1213 u32 filter_size = bpf_prog_size(fp->prog->len);
278571ba
AS
1214
1215 /* same check as in sock_kmalloc() */
1216 if (filter_size <= sysctl_optmem_max &&
1217 atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
278571ba
AS
1218 atomic_add(filter_size, &sk->sk_omem_alloc);
1219 return true;
bd4cf0ed 1220 }
278571ba 1221 return false;
bd4cf0ed
AS
1222}
1223
4c355cdf
RE
1224bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1225{
eefca20e
ED
1226 if (!refcount_inc_not_zero(&fp->refcnt))
1227 return false;
1228
1229 if (!__sk_filter_charge(sk, fp)) {
1230 sk_filter_release(fp);
1231 return false;
1232 }
1233 return true;
4c355cdf
RE
1234}
1235
7ae457c1 1236static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
bd4cf0ed
AS
1237{
1238 struct sock_filter *old_prog;
7ae457c1 1239 struct bpf_prog *old_fp;
34805931 1240 int err, new_len, old_len = fp->len;
e0cea7ce 1241 bool seen_ld_abs = false;
bd4cf0ed
AS
1242
1243 /* We are free to overwrite insns et al right here as it
1244 * won't be used at this point in time anymore internally
1245 * after the migration to the internal BPF instruction
1246 * representation.
1247 */
1248 BUILD_BUG_ON(sizeof(struct sock_filter) !=
2695fb55 1249 sizeof(struct bpf_insn));
bd4cf0ed 1250
bd4cf0ed
AS
1251 /* Conversion cannot happen on overlapping memory areas,
1252 * so we need to keep the user BPF around until the 2nd
1253 * pass. At this time, the user BPF is stored in fp->insns.
1254 */
1255 old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
658da937 1256 GFP_KERNEL | __GFP_NOWARN);
bd4cf0ed
AS
1257 if (!old_prog) {
1258 err = -ENOMEM;
1259 goto out_err;
1260 }
1261
1262 /* 1st pass: calculate the new program length. */
e0cea7ce
DB
1263 err = bpf_convert_filter(old_prog, old_len, NULL, &new_len,
1264 &seen_ld_abs);
bd4cf0ed
AS
1265 if (err)
1266 goto out_err_free;
1267
1268 /* Expand fp for appending the new filter representation. */
1269 old_fp = fp;
60a3b225 1270 fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
bd4cf0ed
AS
1271 if (!fp) {
1272 /* The old_fp is still around in case we couldn't
1273 * allocate new memory, so uncharge on that one.
1274 */
1275 fp = old_fp;
1276 err = -ENOMEM;
1277 goto out_err_free;
1278 }
1279
bd4cf0ed
AS
1280 fp->len = new_len;
1281
2695fb55 1282 /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
e0cea7ce
DB
1283 err = bpf_convert_filter(old_prog, old_len, fp, &new_len,
1284 &seen_ld_abs);
bd4cf0ed 1285 if (err)
8fb575ca 1286 /* 2nd bpf_convert_filter() can fail only if it fails
bd4cf0ed
AS
1287 * to allocate memory, remapping must succeed. Note,
1288 * that at this time old_fp has already been released
278571ba 1289 * by krealloc().
bd4cf0ed
AS
1290 */
1291 goto out_err_free;
1292
d1c55ab5 1293 fp = bpf_prog_select_runtime(fp, &err);
290af866
AS
1294 if (err)
1295 goto out_err_free;
5fe821a9 1296
bd4cf0ed
AS
1297 kfree(old_prog);
1298 return fp;
1299
1300out_err_free:
1301 kfree(old_prog);
1302out_err:
7ae457c1 1303 __bpf_prog_release(fp);
bd4cf0ed
AS
1304 return ERR_PTR(err);
1305}
1306
ac67eb2c
DB
1307static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1308 bpf_aux_classic_check_t trans)
302d6637
JP
1309{
1310 int err;
1311
bd4cf0ed 1312 fp->bpf_func = NULL;
a91263d5 1313 fp->jited = 0;
302d6637 1314
4df95ff4 1315 err = bpf_check_classic(fp->insns, fp->len);
418c96ac 1316 if (err) {
7ae457c1 1317 __bpf_prog_release(fp);
bd4cf0ed 1318 return ERR_PTR(err);
418c96ac 1319 }
302d6637 1320
4ae92bc7
NS
1321 /* There might be additional checks and transformations
1322 * needed on classic filters, f.e. in case of seccomp.
1323 */
1324 if (trans) {
1325 err = trans(fp->insns, fp->len);
1326 if (err) {
1327 __bpf_prog_release(fp);
1328 return ERR_PTR(err);
1329 }
1330 }
1331
bd4cf0ed
AS
1332 /* Probe if we can JIT compile the filter and if so, do
1333 * the compilation of the filter.
1334 */
302d6637 1335 bpf_jit_compile(fp);
bd4cf0ed
AS
1336
1337 /* JIT compiler couldn't process this filter, so do the
1338 * internal BPF translation for the optimized interpreter.
1339 */
5fe821a9 1340 if (!fp->jited)
7ae457c1 1341 fp = bpf_migrate_filter(fp);
bd4cf0ed
AS
1342
1343 return fp;
302d6637
JP
1344}
1345
1346/**
7ae457c1 1347 * bpf_prog_create - create an unattached filter
c6c4b97c 1348 * @pfp: the unattached filter that is created
677a9fd3 1349 * @fprog: the filter program
302d6637 1350 *
c6c4b97c 1351 * Create a filter independent of any socket. We first run some
302d6637
JP
1352 * sanity checks on it to make sure it does not explode on us later.
1353 * If an error occurs or there is insufficient memory for the filter
1354 * a negative errno code is returned. On success the return is zero.
1355 */
7ae457c1 1356int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
302d6637 1357{
009937e7 1358 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1 1359 struct bpf_prog *fp;
302d6637
JP
1360
1361 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1362 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
302d6637
JP
1363 return -EINVAL;
1364
60a3b225 1365 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
302d6637
JP
1366 if (!fp)
1367 return -ENOMEM;
a3ea269b 1368
302d6637
JP
1369 memcpy(fp->insns, fprog->filter, fsize);
1370
302d6637 1371 fp->len = fprog->len;
a3ea269b
DB
1372 /* Since unattached filters are not copied back to user
1373 * space through sk_get_filter(), we do not need to hold
1374 * a copy here, and can spare us the work.
1375 */
1376 fp->orig_prog = NULL;
302d6637 1377
7ae457c1 1378 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1379 * memory in case something goes wrong.
1380 */
4ae92bc7 1381 fp = bpf_prepare_filter(fp, NULL);
bd4cf0ed
AS
1382 if (IS_ERR(fp))
1383 return PTR_ERR(fp);
302d6637
JP
1384
1385 *pfp = fp;
1386 return 0;
302d6637 1387}
7ae457c1 1388EXPORT_SYMBOL_GPL(bpf_prog_create);
302d6637 1389
ac67eb2c
DB
1390/**
1391 * bpf_prog_create_from_user - create an unattached filter from user buffer
1392 * @pfp: the unattached filter that is created
1393 * @fprog: the filter program
1394 * @trans: post-classic verifier transformation handler
bab18991 1395 * @save_orig: save classic BPF program
ac67eb2c
DB
1396 *
1397 * This function effectively does the same as bpf_prog_create(), only
1398 * that it builds up its insns buffer from user space provided buffer.
1399 * It also allows for passing a bpf_aux_classic_check_t handler.
1400 */
1401int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
bab18991 1402 bpf_aux_classic_check_t trans, bool save_orig)
ac67eb2c
DB
1403{
1404 unsigned int fsize = bpf_classic_proglen(fprog);
1405 struct bpf_prog *fp;
bab18991 1406 int err;
ac67eb2c
DB
1407
1408 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1409 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
ac67eb2c
DB
1410 return -EINVAL;
1411
1412 fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1413 if (!fp)
1414 return -ENOMEM;
1415
1416 if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1417 __bpf_prog_free(fp);
1418 return -EFAULT;
1419 }
1420
1421 fp->len = fprog->len;
ac67eb2c
DB
1422 fp->orig_prog = NULL;
1423
bab18991
DB
1424 if (save_orig) {
1425 err = bpf_prog_store_orig_filter(fp, fprog);
1426 if (err) {
1427 __bpf_prog_free(fp);
1428 return -ENOMEM;
1429 }
1430 }
1431
ac67eb2c
DB
1432 /* bpf_prepare_filter() already takes care of freeing
1433 * memory in case something goes wrong.
1434 */
1435 fp = bpf_prepare_filter(fp, trans);
1436 if (IS_ERR(fp))
1437 return PTR_ERR(fp);
1438
1439 *pfp = fp;
1440 return 0;
1441}
2ea273d7 1442EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
ac67eb2c 1443
7ae457c1 1444void bpf_prog_destroy(struct bpf_prog *fp)
302d6637 1445{
7ae457c1 1446 __bpf_prog_release(fp);
302d6637 1447}
7ae457c1 1448EXPORT_SYMBOL_GPL(bpf_prog_destroy);
302d6637 1449
8ced425e 1450static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
49b31e57
DB
1451{
1452 struct sk_filter *fp, *old_fp;
1453
1454 fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1455 if (!fp)
1456 return -ENOMEM;
1457
1458 fp->prog = prog;
49b31e57 1459
4c355cdf 1460 if (!__sk_filter_charge(sk, fp)) {
49b31e57
DB
1461 kfree(fp);
1462 return -ENOMEM;
1463 }
4c355cdf 1464 refcount_set(&fp->refcnt, 1);
49b31e57 1465
8ced425e
HFS
1466 old_fp = rcu_dereference_protected(sk->sk_filter,
1467 lockdep_sock_is_held(sk));
49b31e57 1468 rcu_assign_pointer(sk->sk_filter, fp);
8ced425e 1469
49b31e57
DB
1470 if (old_fp)
1471 sk_filter_uncharge(sk, old_fp);
1472
1473 return 0;
1474}
1475
538950a1
CG
1476static
1477struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1da177e4 1478{
009937e7 1479 unsigned int fsize = bpf_classic_proglen(fprog);
7ae457c1 1480 struct bpf_prog *prog;
1da177e4
LT
1481 int err;
1482
d59577b6 1483 if (sock_flag(sk, SOCK_FILTER_LOCKED))
538950a1 1484 return ERR_PTR(-EPERM);
d59577b6 1485
1da177e4 1486 /* Make sure new filter is there and in the right amounts. */
f7bd9e36 1487 if (!bpf_check_basics_ok(fprog->filter, fprog->len))
538950a1 1488 return ERR_PTR(-EINVAL);
1da177e4 1489
f7bd9e36 1490 prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
7ae457c1 1491 if (!prog)
538950a1 1492 return ERR_PTR(-ENOMEM);
a3ea269b 1493
7ae457c1 1494 if (copy_from_user(prog->insns, fprog->filter, fsize)) {
c0d1379a 1495 __bpf_prog_free(prog);
538950a1 1496 return ERR_PTR(-EFAULT);
1da177e4
LT
1497 }
1498
7ae457c1 1499 prog->len = fprog->len;
1da177e4 1500
7ae457c1 1501 err = bpf_prog_store_orig_filter(prog, fprog);
a3ea269b 1502 if (err) {
c0d1379a 1503 __bpf_prog_free(prog);
538950a1 1504 return ERR_PTR(-ENOMEM);
a3ea269b
DB
1505 }
1506
7ae457c1 1507 /* bpf_prepare_filter() already takes care of freeing
bd4cf0ed
AS
1508 * memory in case something goes wrong.
1509 */
538950a1
CG
1510 return bpf_prepare_filter(prog, NULL);
1511}
1512
1513/**
1514 * sk_attach_filter - attach a socket filter
1515 * @fprog: the filter program
1516 * @sk: the socket to use
1517 *
1518 * Attach the user's filter code. We first run some sanity checks on
1519 * it to make sure it does not explode on us later. If an error
1520 * occurs or there is insufficient memory for the filter a negative
1521 * errno code is returned. On success the return is zero.
1522 */
8ced425e 1523int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
538950a1
CG
1524{
1525 struct bpf_prog *prog = __get_filter(fprog, sk);
1526 int err;
1527
7ae457c1
AS
1528 if (IS_ERR(prog))
1529 return PTR_ERR(prog);
1530
8ced425e 1531 err = __sk_attach_prog(prog, sk);
49b31e57 1532 if (err < 0) {
7ae457c1 1533 __bpf_prog_release(prog);
49b31e57 1534 return err;
278571ba
AS
1535 }
1536
d3904b73 1537 return 0;
1da177e4 1538}
8ced425e 1539EXPORT_SYMBOL_GPL(sk_attach_filter);
1da177e4 1540
538950a1 1541int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
89aa0758 1542{
538950a1 1543 struct bpf_prog *prog = __get_filter(fprog, sk);
49b31e57 1544 int err;
89aa0758 1545
538950a1
CG
1546 if (IS_ERR(prog))
1547 return PTR_ERR(prog);
1548
8217ca65
MKL
1549 if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1550 err = -ENOMEM;
1551 else
1552 err = reuseport_attach_prog(sk, prog);
1553
1554 if (err)
538950a1 1555 __bpf_prog_release(prog);
538950a1 1556
8217ca65 1557 return err;
538950a1
CG
1558}
1559
1560static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1561{
89aa0758 1562 if (sock_flag(sk, SOCK_FILTER_LOCKED))
538950a1 1563 return ERR_PTR(-EPERM);
89aa0758 1564
113214be 1565 return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
538950a1
CG
1566}
1567
1568int sk_attach_bpf(u32 ufd, struct sock *sk)
1569{
1570 struct bpf_prog *prog = __get_bpf(ufd, sk);
1571 int err;
1572
1573 if (IS_ERR(prog))
1574 return PTR_ERR(prog);
1575
8ced425e 1576 err = __sk_attach_prog(prog, sk);
49b31e57 1577 if (err < 0) {
89aa0758 1578 bpf_prog_put(prog);
49b31e57 1579 return err;
89aa0758
AS
1580 }
1581
89aa0758
AS
1582 return 0;
1583}
1584
538950a1
CG
1585int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1586{
8217ca65 1587 struct bpf_prog *prog;
538950a1
CG
1588 int err;
1589
8217ca65
MKL
1590 if (sock_flag(sk, SOCK_FILTER_LOCKED))
1591 return -EPERM;
1592
1593 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
45586c70 1594 if (PTR_ERR(prog) == -EINVAL)
8217ca65 1595 prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT);
538950a1
CG
1596 if (IS_ERR(prog))
1597 return PTR_ERR(prog);
1598
8217ca65
MKL
1599 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT) {
1600 /* Like other non BPF_PROG_TYPE_SOCKET_FILTER
1601 * bpf prog (e.g. sockmap). It depends on the
1602 * limitation imposed by bpf_prog_load().
1603 * Hence, sysctl_optmem_max is not checked.
1604 */
1605 if ((sk->sk_type != SOCK_STREAM &&
1606 sk->sk_type != SOCK_DGRAM) ||
1607 (sk->sk_protocol != IPPROTO_UDP &&
1608 sk->sk_protocol != IPPROTO_TCP) ||
1609 (sk->sk_family != AF_INET &&
1610 sk->sk_family != AF_INET6)) {
1611 err = -ENOTSUPP;
1612 goto err_prog_put;
1613 }
1614 } else {
1615 /* BPF_PROG_TYPE_SOCKET_FILTER */
1616 if (bpf_prog_size(prog->len) > sysctl_optmem_max) {
1617 err = -ENOMEM;
1618 goto err_prog_put;
1619 }
538950a1
CG
1620 }
1621
8217ca65
MKL
1622 err = reuseport_attach_prog(sk, prog);
1623err_prog_put:
1624 if (err)
1625 bpf_prog_put(prog);
1626
1627 return err;
1628}
1629
1630void sk_reuseport_prog_free(struct bpf_prog *prog)
1631{
1632 if (!prog)
1633 return;
1634
1635 if (prog->type == BPF_PROG_TYPE_SK_REUSEPORT)
1636 bpf_prog_put(prog);
1637 else
1638 bpf_prog_destroy(prog);
538950a1
CG
1639}
1640
21cafc1d
DB
1641struct bpf_scratchpad {
1642 union {
1643 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1644 u8 buff[MAX_BPF_STACK];
1645 };
1646};
1647
1648static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
91bc4822 1649
5293efe6
DB
1650static inline int __bpf_try_make_writable(struct sk_buff *skb,
1651 unsigned int write_len)
1652{
1653 return skb_ensure_writable(skb, write_len);
1654}
1655
db58ba45
AS
1656static inline int bpf_try_make_writable(struct sk_buff *skb,
1657 unsigned int write_len)
1658{
5293efe6 1659 int err = __bpf_try_make_writable(skb, write_len);
db58ba45 1660
6aaae2b6 1661 bpf_compute_data_pointers(skb);
db58ba45
AS
1662 return err;
1663}
1664
36bbef52
DB
1665static int bpf_try_make_head_writable(struct sk_buff *skb)
1666{
1667 return bpf_try_make_writable(skb, skb_headlen(skb));
1668}
1669
a2bfe6bf
DB
1670static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1671{
1672 if (skb_at_tc_ingress(skb))
1673 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1674}
1675
8065694e
DB
1676static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1677{
1678 if (skb_at_tc_ingress(skb))
1679 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1680}
1681
f3694e00
DB
1682BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1683 const void *, from, u32, len, u64, flags)
608cd71a 1684{
608cd71a
AS
1685 void *ptr;
1686
8afd54c8 1687 if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
781c53bc 1688 return -EINVAL;
0ed661d5 1689 if (unlikely(offset > 0xffff))
608cd71a 1690 return -EFAULT;
db58ba45 1691 if (unlikely(bpf_try_make_writable(skb, offset + len)))
608cd71a
AS
1692 return -EFAULT;
1693
0ed661d5 1694 ptr = skb->data + offset;
781c53bc 1695 if (flags & BPF_F_RECOMPUTE_CSUM)
479ffccc 1696 __skb_postpull_rcsum(skb, ptr, len, offset);
608cd71a
AS
1697
1698 memcpy(ptr, from, len);
1699
781c53bc 1700 if (flags & BPF_F_RECOMPUTE_CSUM)
479ffccc 1701 __skb_postpush_rcsum(skb, ptr, len, offset);
8afd54c8
DB
1702 if (flags & BPF_F_INVALIDATE_HASH)
1703 skb_clear_hash(skb);
f8ffad69 1704
608cd71a
AS
1705 return 0;
1706}
1707
577c50aa 1708static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
608cd71a
AS
1709 .func = bpf_skb_store_bytes,
1710 .gpl_only = false,
1711 .ret_type = RET_INTEGER,
1712 .arg1_type = ARG_PTR_TO_CTX,
1713 .arg2_type = ARG_ANYTHING,
39f19ebb
AS
1714 .arg3_type = ARG_PTR_TO_MEM,
1715 .arg4_type = ARG_CONST_SIZE,
91bc4822
AS
1716 .arg5_type = ARG_ANYTHING,
1717};
1718
f3694e00
DB
1719BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1720 void *, to, u32, len)
05c74e5e 1721{
05c74e5e
DB
1722 void *ptr;
1723
0ed661d5 1724 if (unlikely(offset > 0xffff))
074f528e 1725 goto err_clear;
05c74e5e
DB
1726
1727 ptr = skb_header_pointer(skb, offset, len, to);
1728 if (unlikely(!ptr))
074f528e 1729 goto err_clear;
05c74e5e
DB
1730 if (ptr != to)
1731 memcpy(to, ptr, len);
1732
1733 return 0;
074f528e
DB
1734err_clear:
1735 memset(to, 0, len);
1736 return -EFAULT;
05c74e5e
DB
1737}
1738
577c50aa 1739static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
05c74e5e
DB
1740 .func = bpf_skb_load_bytes,
1741 .gpl_only = false,
1742 .ret_type = RET_INTEGER,
1743 .arg1_type = ARG_PTR_TO_CTX,
1744 .arg2_type = ARG_ANYTHING,
39f19ebb
AS
1745 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1746 .arg4_type = ARG_CONST_SIZE,
05c74e5e
DB
1747};
1748
089b19a9
SF
1749BPF_CALL_4(bpf_flow_dissector_load_bytes,
1750 const struct bpf_flow_dissector *, ctx, u32, offset,
1751 void *, to, u32, len)
1752{
1753 void *ptr;
1754
1755 if (unlikely(offset > 0xffff))
1756 goto err_clear;
1757
1758 if (unlikely(!ctx->skb))
1759 goto err_clear;
1760
1761 ptr = skb_header_pointer(ctx->skb, offset, len, to);
1762 if (unlikely(!ptr))
1763 goto err_clear;
1764 if (ptr != to)
1765 memcpy(to, ptr, len);
1766
1767 return 0;
1768err_clear:
1769 memset(to, 0, len);
1770 return -EFAULT;
1771}
1772
1773static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = {
1774 .func = bpf_flow_dissector_load_bytes,
1775 .gpl_only = false,
1776 .ret_type = RET_INTEGER,
1777 .arg1_type = ARG_PTR_TO_CTX,
1778 .arg2_type = ARG_ANYTHING,
1779 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1780 .arg4_type = ARG_CONST_SIZE,
1781};
1782
4e1ec56c
DB
1783BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb,
1784 u32, offset, void *, to, u32, len, u32, start_header)
1785{
3eee1f75 1786 u8 *end = skb_tail_pointer(skb);
0f5d82f1 1787 u8 *start, *ptr;
4e1ec56c 1788
0f5d82f1 1789 if (unlikely(offset > 0xffff))
4e1ec56c
DB
1790 goto err_clear;
1791
1792 switch (start_header) {
1793 case BPF_HDR_START_MAC:
0f5d82f1
YZ
1794 if (unlikely(!skb_mac_header_was_set(skb)))
1795 goto err_clear;
1796 start = skb_mac_header(skb);
4e1ec56c
DB
1797 break;
1798 case BPF_HDR_START_NET:
0f5d82f1 1799 start = skb_network_header(skb);
4e1ec56c
DB
1800 break;
1801 default:
1802 goto err_clear;
1803 }
1804
0f5d82f1
YZ
1805 ptr = start + offset;
1806
1807 if (likely(ptr + len <= end)) {
4e1ec56c
DB
1808 memcpy(to, ptr, len);
1809 return 0;
1810 }
1811
1812err_clear:
1813 memset(to, 0, len);
1814 return -EFAULT;
1815}
1816
1817static const struct bpf_func_proto bpf_skb_load_bytes_relative_proto = {
1818 .func = bpf_skb_load_bytes_relative,
1819 .gpl_only = false,
1820 .ret_type = RET_INTEGER,
1821 .arg1_type = ARG_PTR_TO_CTX,
1822 .arg2_type = ARG_ANYTHING,
1823 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
1824 .arg4_type = ARG_CONST_SIZE,
1825 .arg5_type = ARG_ANYTHING,
1826};
1827
36bbef52
DB
1828BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1829{
1830 /* Idea is the following: should the needed direct read/write
1831 * test fail during runtime, we can pull in more data and redo
1832 * again, since implicitly, we invalidate previous checks here.
1833 *
1834 * Or, since we know how much we need to make read/writeable,
1835 * this can be done once at the program beginning for direct
1836 * access case. By this we overcome limitations of only current
1837 * headroom being accessible.
1838 */
1839 return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1840}
1841
1842static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1843 .func = bpf_skb_pull_data,
1844 .gpl_only = false,
1845 .ret_type = RET_INTEGER,
1846 .arg1_type = ARG_PTR_TO_CTX,
1847 .arg2_type = ARG_ANYTHING,
1848};
1849
46f8bc92
MKL
1850BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
1851{
46f8bc92
MKL
1852 return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
1853}
1854
1855static const struct bpf_func_proto bpf_sk_fullsock_proto = {
1856 .func = bpf_sk_fullsock,
1857 .gpl_only = false,
1858 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
1859 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
1860};
1861
0ea488ff
JF
1862static inline int sk_skb_try_make_writable(struct sk_buff *skb,
1863 unsigned int write_len)
1864{
1865 int err = __bpf_try_make_writable(skb, write_len);
1866
1867 bpf_compute_data_end_sk_skb(skb);
1868 return err;
1869}
1870
1871BPF_CALL_2(sk_skb_pull_data, struct sk_buff *, skb, u32, len)
1872{
1873 /* Idea is the following: should the needed direct read/write
1874 * test fail during runtime, we can pull in more data and redo
1875 * again, since implicitly, we invalidate previous checks here.
1876 *
1877 * Or, since we know how much we need to make read/writeable,
1878 * this can be done once at the program beginning for direct
1879 * access case. By this we overcome limitations of only current
1880 * headroom being accessible.
1881 */
1882 return sk_skb_try_make_writable(skb, len ? : skb_headlen(skb));
1883}
1884
1885static const struct bpf_func_proto sk_skb_pull_data_proto = {
1886 .func = sk_skb_pull_data,
1887 .gpl_only = false,
1888 .ret_type = RET_INTEGER,
1889 .arg1_type = ARG_PTR_TO_CTX,
1890 .arg2_type = ARG_ANYTHING,
1891};
1892
f3694e00
DB
1893BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1894 u64, from, u64, to, u64, flags)
91bc4822 1895{
0ed661d5 1896 __sum16 *ptr;
91bc4822 1897
781c53bc
DB
1898 if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1899 return -EINVAL;
0ed661d5 1900 if (unlikely(offset > 0xffff || offset & 1))
91bc4822 1901 return -EFAULT;
0ed661d5 1902 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
91bc4822
AS
1903 return -EFAULT;
1904
0ed661d5 1905 ptr = (__sum16 *)(skb->data + offset);
781c53bc 1906 switch (flags & BPF_F_HDR_FIELD_MASK) {
8050c0f0
DB
1907 case 0:
1908 if (unlikely(from != 0))
1909 return -EINVAL;
1910
1911 csum_replace_by_diff(ptr, to);
1912 break;
91bc4822
AS
1913 case 2:
1914 csum_replace2(ptr, from, to);
1915 break;
1916 case 4:
1917 csum_replace4(ptr, from, to);
1918 break;
1919 default:
1920 return -EINVAL;
1921 }
1922
91bc4822
AS
1923 return 0;
1924}
1925
577c50aa 1926static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
91bc4822
AS
1927 .func = bpf_l3_csum_replace,
1928 .gpl_only = false,
1929 .ret_type = RET_INTEGER,
1930 .arg1_type = ARG_PTR_TO_CTX,
1931 .arg2_type = ARG_ANYTHING,
1932 .arg3_type = ARG_ANYTHING,
1933 .arg4_type = ARG_ANYTHING,
1934 .arg5_type = ARG_ANYTHING,
1935};
1936
f3694e00
DB
1937BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1938 u64, from, u64, to, u64, flags)
91bc4822 1939{
781c53bc 1940 bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
2f72959a 1941 bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
d1b662ad 1942 bool do_mforce = flags & BPF_F_MARK_ENFORCE;
0ed661d5 1943 __sum16 *ptr;
91bc4822 1944
d1b662ad
DB
1945 if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1946 BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
781c53bc 1947 return -EINVAL;
0ed661d5 1948 if (unlikely(offset > 0xffff || offset & 1))
91bc4822 1949 return -EFAULT;
0ed661d5 1950 if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
91bc4822
AS
1951 return -EFAULT;
1952
0ed661d5 1953 ptr = (__sum16 *)(skb->data + offset);
d1b662ad 1954 if (is_mmzero && !do_mforce && !*ptr)
2f72959a 1955 return 0;
91bc4822 1956
781c53bc 1957 switch (flags & BPF_F_HDR_FIELD_MASK) {
7d672345
DB
1958 case 0:
1959 if (unlikely(from != 0))
1960 return -EINVAL;
1961
1962 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1963 break;
91bc4822
AS
1964 case 2:
1965 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1966 break;
1967 case 4:
1968 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1969 break;
1970 default:
1971 return -EINVAL;
1972 }
1973
2f72959a
DB
1974 if (is_mmzero && !*ptr)
1975 *ptr = CSUM_MANGLED_0;
91bc4822
AS
1976 return 0;
1977}
1978
577c50aa 1979static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
91bc4822
AS
1980 .func = bpf_l4_csum_replace,
1981 .gpl_only = false,
1982 .ret_type = RET_INTEGER,
1983 .arg1_type = ARG_PTR_TO_CTX,
1984 .arg2_type = ARG_ANYTHING,
1985 .arg3_type = ARG_ANYTHING,
1986 .arg4_type = ARG_ANYTHING,
1987 .arg5_type = ARG_ANYTHING,
608cd71a
AS
1988};
1989
f3694e00
DB
1990BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1991 __be32 *, to, u32, to_size, __wsum, seed)
7d672345 1992{
21cafc1d 1993 struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
f3694e00 1994 u32 diff_size = from_size + to_size;
7d672345
DB
1995 int i, j = 0;
1996
1997 /* This is quite flexible, some examples:
1998 *
1999 * from_size == 0, to_size > 0, seed := csum --> pushing data
2000 * from_size > 0, to_size == 0, seed := csum --> pulling data
2001 * from_size > 0, to_size > 0, seed := 0 --> diffing data
2002 *
2003 * Even for diffing, from_size and to_size don't need to be equal.
2004 */
2005 if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
2006 diff_size > sizeof(sp->diff)))
2007 return -EINVAL;
2008
2009 for (i = 0; i < from_size / sizeof(__be32); i++, j++)
2010 sp->diff[j] = ~from[i];
2011 for (i = 0; i < to_size / sizeof(__be32); i++, j++)
2012 sp->diff[j] = to[i];
2013
2014 return csum_partial(sp->diff, diff_size, seed);
2015}
2016
577c50aa 2017static const struct bpf_func_proto bpf_csum_diff_proto = {
7d672345
DB
2018 .func = bpf_csum_diff,
2019 .gpl_only = false,
36bbef52 2020 .pkt_access = true,
7d672345 2021 .ret_type = RET_INTEGER,
db1ac496 2022 .arg1_type = ARG_PTR_TO_MEM_OR_NULL,
39f19ebb 2023 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
db1ac496 2024 .arg3_type = ARG_PTR_TO_MEM_OR_NULL,
39f19ebb 2025 .arg4_type = ARG_CONST_SIZE_OR_ZERO,
7d672345
DB
2026 .arg5_type = ARG_ANYTHING,
2027};
2028
36bbef52
DB
2029BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
2030{
2031 /* The interface is to be used in combination with bpf_csum_diff()
2032 * for direct packet writes. csum rotation for alignment as well
2033 * as emulating csum_sub() can be done from the eBPF program.
2034 */
2035 if (skb->ip_summed == CHECKSUM_COMPLETE)
2036 return (skb->csum = csum_add(skb->csum, csum));
2037
2038 return -ENOTSUPP;
2039}
2040
2041static const struct bpf_func_proto bpf_csum_update_proto = {
2042 .func = bpf_csum_update,
2043 .gpl_only = false,
2044 .ret_type = RET_INTEGER,
2045 .arg1_type = ARG_PTR_TO_CTX,
2046 .arg2_type = ARG_ANYTHING,
2047};
2048
7cdec54f
DB
2049BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
2050{
2051 /* The interface is to be used in combination with bpf_skb_adjust_room()
2052 * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
2053 * is passed as flags, for example.
2054 */
2055 switch (level) {
2056 case BPF_CSUM_LEVEL_INC:
2057 __skb_incr_checksum_unnecessary(skb);
2058 break;
2059 case BPF_CSUM_LEVEL_DEC:
2060 __skb_decr_checksum_unnecessary(skb);
2061 break;
2062 case BPF_CSUM_LEVEL_RESET:
2063 __skb_reset_checksum_unnecessary(skb);
2064 break;
2065 case BPF_CSUM_LEVEL_QUERY:
2066 return skb->ip_summed == CHECKSUM_UNNECESSARY ?
2067 skb->csum_level : -EACCES;
2068 default:
2069 return -EINVAL;
2070 }
2071
2072 return 0;
2073}
2074
2075static const struct bpf_func_proto bpf_csum_level_proto = {
2076 .func = bpf_csum_level,
2077 .gpl_only = false,
2078 .ret_type = RET_INTEGER,
2079 .arg1_type = ARG_PTR_TO_CTX,
2080 .arg2_type = ARG_ANYTHING,
2081};
2082
a70b506e
DB
2083static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
2084{
a70b506e
DB
2085 return dev_forward_skb(dev, skb);
2086}
2087
4e3264d2
MKL
2088static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
2089 struct sk_buff *skb)
2090{
2091 int ret = ____dev_forward_skb(dev, skb);
2092
2093 if (likely(!ret)) {
2094 skb->dev = dev;
2095 ret = netif_rx(skb);
2096 }
2097
2098 return ret;
2099}
2100
a70b506e
DB
2101static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
2102{
2103 int ret;
2104
97cdcf37 2105 if (dev_xmit_recursion()) {
a70b506e
DB
2106 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
2107 kfree_skb(skb);
2108 return -ENETDOWN;
2109 }
2110
2111 skb->dev = dev;
5133498f 2112 skb->tstamp = 0;
a70b506e 2113
97cdcf37 2114 dev_xmit_recursion_inc();
a70b506e 2115 ret = dev_queue_xmit(skb);
97cdcf37 2116 dev_xmit_recursion_dec();
a70b506e
DB
2117
2118 return ret;
2119}
2120
4e3264d2
MKL
2121static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
2122 u32 flags)
2123{
e7c87bd6 2124 unsigned int mlen = skb_network_offset(skb);
4e3264d2 2125
e7c87bd6
WB
2126 if (mlen) {
2127 __skb_pull(skb, mlen);
4e3264d2 2128
e7c87bd6
WB
2129 /* At ingress, the mac header has already been pulled once.
2130 * At egress, skb_pospull_rcsum has to be done in case that
2131 * the skb is originated from ingress (i.e. a forwarded skb)
2132 * to ensure that rcsum starts at net header.
2133 */
2134 if (!skb_at_tc_ingress(skb))
2135 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
2136 }
4e3264d2
MKL
2137 skb_pop_mac_header(skb);
2138 skb_reset_mac_len(skb);
2139 return flags & BPF_F_INGRESS ?
2140 __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
2141}
2142
2143static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
2144 u32 flags)
2145{
3a0af8fd
TG
2146 /* Verify that a link layer header is carried */
2147 if (unlikely(skb->mac_header >= skb->network_header)) {
2148 kfree_skb(skb);
2149 return -ERANGE;
2150 }
2151
4e3264d2
MKL
2152 bpf_push_mac_rcsum(skb);
2153 return flags & BPF_F_INGRESS ?
2154 __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
2155}
2156
2157static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
2158 u32 flags)
2159{
c491680f 2160 if (dev_is_mac_header_xmit(dev))
4e3264d2 2161 return __bpf_redirect_common(skb, dev, flags);
c491680f
DB
2162 else
2163 return __bpf_redirect_no_mac(skb, dev, flags);
4e3264d2
MKL
2164}
2165
f3694e00 2166BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
3896d655 2167{
3896d655 2168 struct net_device *dev;
36bbef52
DB
2169 struct sk_buff *clone;
2170 int ret;
3896d655 2171
781c53bc
DB
2172 if (unlikely(flags & ~(BPF_F_INGRESS)))
2173 return -EINVAL;
2174
3896d655
AS
2175 dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
2176 if (unlikely(!dev))
2177 return -EINVAL;
2178
36bbef52
DB
2179 clone = skb_clone(skb, GFP_ATOMIC);
2180 if (unlikely(!clone))
3896d655
AS
2181 return -ENOMEM;
2182
36bbef52
DB
2183 /* For direct write, we need to keep the invariant that the skbs
2184 * we're dealing with need to be uncloned. Should uncloning fail
2185 * here, we need to free the just generated clone to unclone once
2186 * again.
2187 */
2188 ret = bpf_try_make_head_writable(skb);
2189 if (unlikely(ret)) {
2190 kfree_skb(clone);
2191 return -ENOMEM;
2192 }
2193
4e3264d2 2194 return __bpf_redirect(clone, dev, flags);
3896d655
AS
2195}
2196
577c50aa 2197static const struct bpf_func_proto bpf_clone_redirect_proto = {
3896d655
AS
2198 .func = bpf_clone_redirect,
2199 .gpl_only = false,
2200 .ret_type = RET_INTEGER,
2201 .arg1_type = ARG_PTR_TO_CTX,
2202 .arg2_type = ARG_ANYTHING,
2203 .arg3_type = ARG_ANYTHING,
2204};
2205
0b19cc0a
TM
2206DEFINE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
2207EXPORT_PER_CPU_SYMBOL_GPL(bpf_redirect_info);
781c53bc 2208
f3694e00 2209BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
27b29f63 2210{
0b19cc0a 2211 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
27b29f63 2212
781c53bc
DB
2213 if (unlikely(flags & ~(BPF_F_INGRESS)))
2214 return TC_ACT_SHOT;
2215
27b29f63 2216 ri->flags = flags;
4b55cf29 2217 ri->tgt_index = ifindex;
781c53bc 2218
27b29f63
AS
2219 return TC_ACT_REDIRECT;
2220}
2221
2222int skb_do_redirect(struct sk_buff *skb)
2223{
0b19cc0a 2224 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
27b29f63
AS
2225 struct net_device *dev;
2226
4b55cf29
THJ
2227 dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->tgt_index);
2228 ri->tgt_index = 0;
27b29f63
AS
2229 if (unlikely(!dev)) {
2230 kfree_skb(skb);
2231 return -EINVAL;
2232 }
2233
4e3264d2 2234 return __bpf_redirect(skb, dev, ri->flags);
27b29f63
AS
2235}
2236
577c50aa 2237static const struct bpf_func_proto bpf_redirect_proto = {
27b29f63
AS
2238 .func = bpf_redirect,
2239 .gpl_only = false,
2240 .ret_type = RET_INTEGER,
2241 .arg1_type = ARG_ANYTHING,
2242 .arg2_type = ARG_ANYTHING,
2243};
2244
604326b4 2245BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes)
2a100317
JF
2246{
2247 msg->apply_bytes = bytes;
2248 return 0;
2249}
2250
2251static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
2252 .func = bpf_msg_apply_bytes,
2253 .gpl_only = false,
2254 .ret_type = RET_INTEGER,
2255 .arg1_type = ARG_PTR_TO_CTX,
2256 .arg2_type = ARG_ANYTHING,
2257};
2258
604326b4 2259BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg *, msg, u32, bytes)
91843d54
JF
2260{
2261 msg->cork_bytes = bytes;
2262 return 0;
2263}
2264
2265static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
2266 .func = bpf_msg_cork_bytes,
2267 .gpl_only = false,
2268 .ret_type = RET_INTEGER,
2269 .arg1_type = ARG_PTR_TO_CTX,
2270 .arg2_type = ARG_ANYTHING,
2271};
2272
604326b4
DB
2273BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start,
2274 u32, end, u64, flags)
015632bb 2275{
604326b4
DB
2276 u32 len = 0, offset = 0, copy = 0, poffset = 0, bytes = end - start;
2277 u32 first_sge, last_sge, i, shift, bytes_sg_total;
2278 struct scatterlist *sge;
2279 u8 *raw, *to, *from;
015632bb
JF
2280 struct page *page;
2281
2282 if (unlikely(flags || end <= start))
2283 return -EINVAL;
2284
2285 /* First find the starting scatterlist element */
604326b4 2286 i = msg->sg.start;
015632bb 2287 do {
6562e29c 2288 offset += len;
604326b4 2289 len = sk_msg_elem(msg, i)->length;
015632bb
JF
2290 if (start < offset + len)
2291 break;
604326b4
DB
2292 sk_msg_iter_var_next(i);
2293 } while (i != msg->sg.end);
015632bb
JF
2294
2295 if (unlikely(start >= offset + len))
2296 return -EINVAL;
2297
604326b4 2298 first_sge = i;
5b24109b
DB
2299 /* The start may point into the sg element so we need to also
2300 * account for the headroom.
2301 */
2302 bytes_sg_total = start - offset + bytes;
163ab96b 2303 if (!test_bit(i, &msg->sg.copy) && bytes_sg_total <= len)
015632bb 2304 goto out;
015632bb
JF
2305
2306 /* At this point we need to linearize multiple scatterlist
2307 * elements or a single shared page. Either way we need to
2308 * copy into a linear buffer exclusively owned by BPF. Then
2309 * place the buffer in the scatterlist and fixup the original
2310 * entries by removing the entries now in the linear buffer
2311 * and shifting the remaining entries. For now we do not try
2312 * to copy partial entries to avoid complexity of running out
2313 * of sg_entry slots. The downside is reading a single byte
2314 * will copy the entire sg entry.
2315 */
2316 do {
604326b4
DB
2317 copy += sk_msg_elem(msg, i)->length;
2318 sk_msg_iter_var_next(i);
5b24109b 2319 if (bytes_sg_total <= copy)
015632bb 2320 break;
604326b4
DB
2321 } while (i != msg->sg.end);
2322 last_sge = i;
015632bb 2323
5b24109b 2324 if (unlikely(bytes_sg_total > copy))
015632bb
JF
2325 return -EINVAL;
2326
4c3d795c
TD
2327 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2328 get_order(copy));
015632bb
JF
2329 if (unlikely(!page))
2330 return -ENOMEM;
015632bb 2331
604326b4
DB
2332 raw = page_address(page);
2333 i = first_sge;
015632bb 2334 do {
604326b4
DB
2335 sge = sk_msg_elem(msg, i);
2336 from = sg_virt(sge);
2337 len = sge->length;
2338 to = raw + poffset;
015632bb
JF
2339
2340 memcpy(to, from, len);
9db39f4d 2341 poffset += len;
604326b4
DB
2342 sge->length = 0;
2343 put_page(sg_page(sge));
015632bb 2344
604326b4
DB
2345 sk_msg_iter_var_next(i);
2346 } while (i != last_sge);
015632bb 2347
604326b4 2348 sg_set_page(&msg->sg.data[first_sge], page, copy, 0);
015632bb
JF
2349
2350 /* To repair sg ring we need to shift entries. If we only
2351 * had a single entry though we can just replace it and
2352 * be done. Otherwise walk the ring and shift the entries.
2353 */
604326b4
DB
2354 WARN_ON_ONCE(last_sge == first_sge);
2355 shift = last_sge > first_sge ?
2356 last_sge - first_sge - 1 :
031097d9 2357 NR_MSG_FRAG_IDS - first_sge + last_sge - 1;
015632bb
JF
2358 if (!shift)
2359 goto out;
2360
604326b4
DB
2361 i = first_sge;
2362 sk_msg_iter_var_next(i);
015632bb 2363 do {
604326b4 2364 u32 move_from;
015632bb 2365
031097d9
JK
2366 if (i + shift >= NR_MSG_FRAG_IDS)
2367 move_from = i + shift - NR_MSG_FRAG_IDS;
015632bb
JF
2368 else
2369 move_from = i + shift;
604326b4 2370 if (move_from == msg->sg.end)
015632bb
JF
2371 break;
2372
604326b4
DB
2373 msg->sg.data[i] = msg->sg.data[move_from];
2374 msg->sg.data[move_from].length = 0;
2375 msg->sg.data[move_from].page_link = 0;
2376 msg->sg.data[move_from].offset = 0;
2377 sk_msg_iter_var_next(i);
015632bb 2378 } while (1);
604326b4
DB
2379
2380 msg->sg.end = msg->sg.end - shift > msg->sg.end ?
031097d9 2381 msg->sg.end - shift + NR_MSG_FRAG_IDS :
604326b4 2382 msg->sg.end - shift;
015632bb 2383out:
604326b4 2384 msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset;
015632bb 2385 msg->data_end = msg->data + bytes;
015632bb
JF
2386 return 0;
2387}
2388
2389static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2390 .func = bpf_msg_pull_data,
2391 .gpl_only = false,
2392 .ret_type = RET_INTEGER,
2393 .arg1_type = ARG_PTR_TO_CTX,
2394 .arg2_type = ARG_ANYTHING,
2395 .arg3_type = ARG_ANYTHING,
2396 .arg4_type = ARG_ANYTHING,
2397};
2398
6fff607e
JF
2399BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start,
2400 u32, len, u64, flags)
2401{
2402 struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge;
6562e29c 2403 u32 new, i = 0, l = 0, space, copy = 0, offset = 0;
6fff607e
JF
2404 u8 *raw, *to, *from;
2405 struct page *page;
2406
2407 if (unlikely(flags))
2408 return -EINVAL;
2409
2410 /* First find the starting scatterlist element */
2411 i = msg->sg.start;
2412 do {
6562e29c 2413 offset += l;
6fff607e
JF
2414 l = sk_msg_elem(msg, i)->length;
2415
2416 if (start < offset + l)
2417 break;
6fff607e
JF
2418 sk_msg_iter_var_next(i);
2419 } while (i != msg->sg.end);
2420
2421 if (start >= offset + l)
2422 return -EINVAL;
2423
2424 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2425
2426 /* If no space available will fallback to copy, we need at
2427 * least one scatterlist elem available to push data into
2428 * when start aligns to the beginning of an element or two
2429 * when it falls inside an element. We handle the start equals
2430 * offset case because its the common case for inserting a
2431 * header.
2432 */
2433 if (!space || (space == 1 && start != offset))
2434 copy = msg->sg.data[i].length;
2435
2436 page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP,
2437 get_order(copy + len));
2438 if (unlikely(!page))
2439 return -ENOMEM;
2440
2441 if (copy) {
2442 int front, back;
2443
2444 raw = page_address(page);
2445
2446 psge = sk_msg_elem(msg, i);
2447 front = start - offset;
2448 back = psge->length - front;
2449 from = sg_virt(psge);
2450
2451 if (front)
2452 memcpy(raw, from, front);
2453
2454 if (back) {
2455 from += front;
2456 to = raw + front + len;
2457
2458 memcpy(to, from, back);
2459 }
2460
2461 put_page(sg_page(psge));
2462 } else if (start - offset) {
2463 psge = sk_msg_elem(msg, i);
2464 rsge = sk_msg_elem_cpy(msg, i);
2465
2466 psge->length = start - offset;
2467 rsge.length -= psge->length;
2468 rsge.offset += start;
2469
2470 sk_msg_iter_var_next(i);
2471 sg_unmark_end(psge);
cf21e9ba 2472 sg_unmark_end(&rsge);
6fff607e
JF
2473 sk_msg_iter_next(msg, end);
2474 }
2475
2476 /* Slot(s) to place newly allocated data */
2477 new = i;
2478
2479 /* Shift one or two slots as needed */
2480 if (!copy) {
2481 sge = sk_msg_elem_cpy(msg, i);
2482
2483 sk_msg_iter_var_next(i);
2484 sg_unmark_end(&sge);
2485 sk_msg_iter_next(msg, end);
2486
2487 nsge = sk_msg_elem_cpy(msg, i);
2488 if (rsge.length) {
2489 sk_msg_iter_var_next(i);
2490 nnsge = sk_msg_elem_cpy(msg, i);
2491 }
2492
2493 while (i != msg->sg.end) {
2494 msg->sg.data[i] = sge;
2495 sge = nsge;
2496 sk_msg_iter_var_next(i);
2497 if (rsge.length) {
2498 nsge = nnsge;
2499 nnsge = sk_msg_elem_cpy(msg, i);
2500 } else {
2501 nsge = sk_msg_elem_cpy(msg, i);
2502 }
2503 }
2504 }
2505
2506 /* Place newly allocated data buffer */
2507 sk_mem_charge(msg->sk, len);
2508 msg->sg.size += len;
163ab96b 2509 __clear_bit(new, &msg->sg.copy);
6fff607e
JF
2510 sg_set_page(&msg->sg.data[new], page, len + copy, 0);
2511 if (rsge.length) {
2512 get_page(sg_page(&rsge));
2513 sk_msg_iter_var_next(new);
2514 msg->sg.data[new] = rsge;
2515 }
2516
2517 sk_msg_compute_data_pointers(msg);
2518 return 0;
2519}
2520
2521static const struct bpf_func_proto bpf_msg_push_data_proto = {
2522 .func = bpf_msg_push_data,
2523 .gpl_only = false,
2524 .ret_type = RET_INTEGER,
2525 .arg1_type = ARG_PTR_TO_CTX,
2526 .arg2_type = ARG_ANYTHING,
2527 .arg3_type = ARG_ANYTHING,
2528 .arg4_type = ARG_ANYTHING,
2529};
2530
7246d8ed
JF
2531static void sk_msg_shift_left(struct sk_msg *msg, int i)
2532{
2533 int prev;
2534
2535 do {
2536 prev = i;
2537 sk_msg_iter_var_next(i);
2538 msg->sg.data[prev] = msg->sg.data[i];
2539 } while (i != msg->sg.end);
2540
2541 sk_msg_iter_prev(msg, end);
2542}
2543
2544static void sk_msg_shift_right(struct sk_msg *msg, int i)
2545{
2546 struct scatterlist tmp, sge;
2547
2548 sk_msg_iter_next(msg, end);
2549 sge = sk_msg_elem_cpy(msg, i);
2550 sk_msg_iter_var_next(i);
2551 tmp = sk_msg_elem_cpy(msg, i);
2552
2553 while (i != msg->sg.end) {
2554 msg->sg.data[i] = sge;
2555 sk_msg_iter_var_next(i);
2556 sge = tmp;
2557 tmp = sk_msg_elem_cpy(msg, i);
2558 }
2559}
2560
2561BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start,
2562 u32, len, u64, flags)
2563{
6562e29c 2564 u32 i = 0, l = 0, space, offset = 0;
7246d8ed
JF
2565 u64 last = start + len;
2566 int pop;
2567
2568 if (unlikely(flags))
2569 return -EINVAL;
2570
2571 /* First find the starting scatterlist element */
2572 i = msg->sg.start;
2573 do {
6562e29c 2574 offset += l;
7246d8ed
JF
2575 l = sk_msg_elem(msg, i)->length;
2576
2577 if (start < offset + l)
2578 break;
7246d8ed
JF
2579 sk_msg_iter_var_next(i);
2580 } while (i != msg->sg.end);
2581
2582 /* Bounds checks: start and pop must be inside message */
2583 if (start >= offset + l || last >= msg->sg.size)
2584 return -EINVAL;
2585
2586 space = MAX_MSG_FRAGS - sk_msg_elem_used(msg);
2587
2588 pop = len;
2589 /* --------------| offset
2590 * -| start |-------- len -------|
2591 *
2592 * |----- a ----|-------- pop -------|----- b ----|
2593 * |______________________________________________| length
2594 *
2595 *
2596 * a: region at front of scatter element to save
2597 * b: region at back of scatter element to save when length > A + pop
2598 * pop: region to pop from element, same as input 'pop' here will be
2599 * decremented below per iteration.
2600 *
2601 * Two top-level cases to handle when start != offset, first B is non
2602 * zero and second B is zero corresponding to when a pop includes more
2603 * than one element.
2604 *
2605 * Then if B is non-zero AND there is no space allocate space and
2606 * compact A, B regions into page. If there is space shift ring to
2607 * the rigth free'ing the next element in ring to place B, leaving
2608 * A untouched except to reduce length.
2609 */
2610 if (start != offset) {
2611 struct scatterlist *nsge, *sge = sk_msg_elem(msg, i);
2612 int a = start;
2613 int b = sge->length - pop - a;
2614
2615 sk_msg_iter_var_next(i);
2616
2617 if (pop < sge->length - a) {
2618 if (space) {
2619 sge->length = a;
2620 sk_msg_shift_right(msg, i);
2621 nsge = sk_msg_elem(msg, i);
2622 get_page(sg_page(sge));
2623 sg_set_page(nsge,
2624 sg_page(sge),
2625 b, sge->offset + pop + a);
2626 } else {
2627 struct page *page, *orig;
2628 u8 *to, *from;
2629
2630 page = alloc_pages(__GFP_NOWARN |
2631 __GFP_COMP | GFP_ATOMIC,
2632 get_order(a + b));
2633 if (unlikely(!page))
2634 return -ENOMEM;
2635
2636 sge->length = a;
2637 orig = sg_page(sge);
2638 from = sg_virt(sge);
2639 to = page_address(page);
2640 memcpy(to, from, a);
2641 memcpy(to + a, from + a + pop, b);
2642 sg_set_page(sge, page, a + b, 0);
2643 put_page(orig);
2644 }
2645 pop = 0;
2646 } else if (pop >= sge->length - a) {
7246d8ed 2647 pop -= (sge->length - a);
3e104c23 2648 sge->length = a;
7246d8ed
JF
2649 }
2650 }
2651
2652 /* From above the current layout _must_ be as follows,
2653 *
2654 * -| offset
2655 * -| start
2656 *
2657 * |---- pop ---|---------------- b ------------|
2658 * |____________________________________________| length
2659 *
2660 * Offset and start of the current msg elem are equal because in the
2661 * previous case we handled offset != start and either consumed the
2662 * entire element and advanced to the next element OR pop == 0.
2663 *
2664 * Two cases to handle here are first pop is less than the length
2665 * leaving some remainder b above. Simply adjust the element's layout
2666 * in this case. Or pop >= length of the element so that b = 0. In this
2667 * case advance to next element decrementing pop.
2668 */
2669 while (pop) {
2670 struct scatterlist *sge = sk_msg_elem(msg, i);
2671
2672 if (pop < sge->length) {
2673 sge->length -= pop;
2674 sge->offset += pop;
2675 pop = 0;
2676 } else {
2677 pop -= sge->length;
2678 sk_msg_shift_left(msg, i);
2679 }
2680 sk_msg_iter_var_next(i);
2681 }
2682
2683 sk_mem_uncharge(msg->sk, len - pop);
2684 msg->sg.size -= (len - pop);
2685 sk_msg_compute_data_pointers(msg);
2686 return 0;
2687}
2688
2689static const struct bpf_func_proto bpf_msg_pop_data_proto = {
2690 .func = bpf_msg_pop_data,
2691 .gpl_only = false,
2692 .ret_type = RET_INTEGER,
2693 .arg1_type = ARG_PTR_TO_CTX,
2694 .arg2_type = ARG_ANYTHING,
2695 .arg3_type = ARG_ANYTHING,
2696 .arg4_type = ARG_ANYTHING,
2697};
2698
5a52ae4e
DB
2699#ifdef CONFIG_CGROUP_NET_CLASSID
2700BPF_CALL_0(bpf_get_cgroup_classid_curr)
2701{
2702 return __task_get_classid(current);
2703}
2704
2705static const struct bpf_func_proto bpf_get_cgroup_classid_curr_proto = {
2706 .func = bpf_get_cgroup_classid_curr,
2707 .gpl_only = false,
2708 .ret_type = RET_INTEGER,
2709};
2710#endif
2711
f3694e00 2712BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
8d20aabe 2713{
f3694e00 2714 return task_get_classid(skb);
8d20aabe
DB
2715}
2716
2717static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
2718 .func = bpf_get_cgroup_classid,
2719 .gpl_only = false,
2720 .ret_type = RET_INTEGER,
2721 .arg1_type = ARG_PTR_TO_CTX,
2722};
2723
f3694e00 2724BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
c46646d0 2725{
f3694e00 2726 return dst_tclassid(skb);
c46646d0
DB
2727}
2728
2729static const struct bpf_func_proto bpf_get_route_realm_proto = {
2730 .func = bpf_get_route_realm,
2731 .gpl_only = false,
2732 .ret_type = RET_INTEGER,
2733 .arg1_type = ARG_PTR_TO_CTX,
2734};
2735
f3694e00 2736BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
13c5c240
DB
2737{
2738 /* If skb_clear_hash() was called due to mangling, we can
2739 * trigger SW recalculation here. Later access to hash
2740 * can then use the inline skb->hash via context directly
2741 * instead of calling this helper again.
2742 */
f3694e00 2743 return skb_get_hash(skb);
13c5c240
DB
2744}
2745
2746static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
2747 .func = bpf_get_hash_recalc,
2748 .gpl_only = false,
2749 .ret_type = RET_INTEGER,
2750 .arg1_type = ARG_PTR_TO_CTX,
2751};
2752
7a4b28c6
DB
2753BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
2754{
2755 /* After all direct packet write, this can be used once for
2756 * triggering a lazy recalc on next skb_get_hash() invocation.
2757 */
2758 skb_clear_hash(skb);
2759 return 0;
2760}
2761
2762static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
2763 .func = bpf_set_hash_invalid,
2764 .gpl_only = false,
2765 .ret_type = RET_INTEGER,
2766 .arg1_type = ARG_PTR_TO_CTX,
2767};
2768
ded092cd
DB
2769BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
2770{
2771 /* Set user specified hash as L4(+), so that it gets returned
2772 * on skb_get_hash() call unless BPF prog later on triggers a
2773 * skb_clear_hash().
2774 */
2775 __skb_set_sw_hash(skb, hash, true);
2776 return 0;
2777}
2778
2779static const struct bpf_func_proto bpf_set_hash_proto = {
2780 .func = bpf_set_hash,
2781 .gpl_only = false,
2782 .ret_type = RET_INTEGER,
2783 .arg1_type = ARG_PTR_TO_CTX,
2784 .arg2_type = ARG_ANYTHING,
2785};
2786
f3694e00
DB
2787BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
2788 u16, vlan_tci)
4e10df9a 2789{
db58ba45 2790 int ret;
4e10df9a
AS
2791
2792 if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
2793 vlan_proto != htons(ETH_P_8021AD)))
2794 vlan_proto = htons(ETH_P_8021Q);
2795
8065694e 2796 bpf_push_mac_rcsum(skb);
db58ba45 2797 ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
8065694e
DB
2798 bpf_pull_mac_rcsum(skb);
2799
6aaae2b6 2800 bpf_compute_data_pointers(skb);
db58ba45 2801 return ret;
4e10df9a
AS
2802}
2803
93731ef0 2804static const struct bpf_func_proto bpf_skb_vlan_push_proto = {
4e10df9a
AS
2805 .func = bpf_skb_vlan_push,
2806 .gpl_only = false,
2807 .ret_type = RET_INTEGER,
2808 .arg1_type = ARG_PTR_TO_CTX,
2809 .arg2_type = ARG_ANYTHING,
2810 .arg3_type = ARG_ANYTHING,
2811};
2812
f3694e00 2813BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
4e10df9a 2814{
db58ba45 2815 int ret;
4e10df9a 2816
8065694e 2817 bpf_push_mac_rcsum(skb);
db58ba45 2818 ret = skb_vlan_pop(skb);
8065694e
DB
2819 bpf_pull_mac_rcsum(skb);
2820
6aaae2b6 2821 bpf_compute_data_pointers(skb);
db58ba45 2822 return ret;
4e10df9a
AS
2823}
2824
93731ef0 2825static const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
4e10df9a
AS
2826 .func = bpf_skb_vlan_pop,
2827 .gpl_only = false,
2828 .ret_type = RET_INTEGER,
2829 .arg1_type = ARG_PTR_TO_CTX,
2830};
2831
6578171a
DB
2832static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
2833{
2834 /* Caller already did skb_cow() with len as headroom,
2835 * so no need to do it here.
2836 */
2837 skb_push(skb, len);
2838 memmove(skb->data, skb->data + len, off);
2839 memset(skb->data + off, 0, len);
2840
2841 /* No skb_postpush_rcsum(skb, skb->data + off, len)
2842 * needed here as it does not change the skb->csum
2843 * result for checksum complete when summing over
2844 * zeroed blocks.
2845 */
2846 return 0;
2847}
2848
2849static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
2850{
2851 /* skb_ensure_writable() is not needed here, as we're
2852 * already working on an uncloned skb.
2853 */
2854 if (unlikely(!pskb_may_pull(skb, off + len)))
2855 return -ENOMEM;
2856
2857 skb_postpull_rcsum(skb, skb->data + off, len);
2858 memmove(skb->data + len, skb->data, off);
2859 __skb_pull(skb, len);
2860
2861 return 0;
2862}
2863
2864static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
2865{
2866 bool trans_same = skb->transport_header == skb->network_header;
2867 int ret;
2868
2869 /* There's no need for __skb_push()/__skb_pull() pair to
2870 * get to the start of the mac header as we're guaranteed
2871 * to always start from here under eBPF.
2872 */
2873 ret = bpf_skb_generic_push(skb, off, len);
2874 if (likely(!ret)) {
2875 skb->mac_header -= len;
2876 skb->network_header -= len;
2877 if (trans_same)
2878 skb->transport_header = skb->network_header;
2879 }
2880
2881 return ret;
2882}
2883
2884static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
2885{
2886 bool trans_same = skb->transport_header == skb->network_header;
2887 int ret;
2888
2889 /* Same here, __skb_push()/__skb_pull() pair not needed. */
2890 ret = bpf_skb_generic_pop(skb, off, len);
2891 if (likely(!ret)) {
2892 skb->mac_header += len;
2893 skb->network_header += len;
2894 if (trans_same)
2895 skb->transport_header = skb->network_header;
2896 }
2897
2898 return ret;
2899}
2900
2901static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
2902{
2903 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
0daf4349 2904 u32 off = skb_mac_header_len(skb);
6578171a
DB
2905 int ret;
2906
4c3024de 2907 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
d02f51cb
DA
2908 return -ENOTSUPP;
2909
6578171a
DB
2910 ret = skb_cow(skb, len_diff);
2911 if (unlikely(ret < 0))
2912 return ret;
2913
2914 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2915 if (unlikely(ret < 0))
2916 return ret;
2917
2918 if (skb_is_gso(skb)) {
d02f51cb
DA
2919 struct skb_shared_info *shinfo = skb_shinfo(skb);
2920
880388aa
DM
2921 /* SKB_GSO_TCPV4 needs to be changed into
2922 * SKB_GSO_TCPV6.
6578171a 2923 */
d02f51cb
DA
2924 if (shinfo->gso_type & SKB_GSO_TCPV4) {
2925 shinfo->gso_type &= ~SKB_GSO_TCPV4;
2926 shinfo->gso_type |= SKB_GSO_TCPV6;
6578171a
DB
2927 }
2928
2929 /* Due to IPv6 header, MSS needs to be downgraded. */
d02f51cb 2930 skb_decrease_gso_size(shinfo, len_diff);
6578171a 2931 /* Header must be checked, and gso_segs recomputed. */
d02f51cb
DA
2932 shinfo->gso_type |= SKB_GSO_DODGY;
2933 shinfo->gso_segs = 0;
6578171a
DB
2934 }
2935
2936 skb->protocol = htons(ETH_P_IPV6);
2937 skb_clear_hash(skb);
2938
2939 return 0;
2940}
2941
2942static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
2943{
2944 const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
0daf4349 2945 u32 off = skb_mac_header_len(skb);
6578171a
DB
2946 int ret;
2947
4c3024de 2948 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb))
d02f51cb
DA
2949 return -ENOTSUPP;
2950
6578171a
DB
2951 ret = skb_unclone(skb, GFP_ATOMIC);
2952 if (unlikely(ret < 0))
2953 return ret;
2954
2955 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
2956 if (unlikely(ret < 0))
2957 return ret;
2958
2959 if (skb_is_gso(skb)) {
d02f51cb
DA
2960 struct skb_shared_info *shinfo = skb_shinfo(skb);
2961
880388aa
DM
2962 /* SKB_GSO_TCPV6 needs to be changed into
2963 * SKB_GSO_TCPV4.
6578171a 2964 */
d02f51cb
DA
2965 if (shinfo->gso_type & SKB_GSO_TCPV6) {
2966 shinfo->gso_type &= ~SKB_GSO_TCPV6;
2967 shinfo->gso_type |= SKB_GSO_TCPV4;
6578171a
DB
2968 }
2969
2970 /* Due to IPv4 header, MSS can be upgraded. */
d02f51cb 2971 skb_increase_gso_size(shinfo, len_diff);
6578171a 2972 /* Header must be checked, and gso_segs recomputed. */
d02f51cb
DA
2973 shinfo->gso_type |= SKB_GSO_DODGY;
2974 shinfo->gso_segs = 0;
6578171a
DB
2975 }
2976
2977 skb->protocol = htons(ETH_P_IP);
2978 skb_clear_hash(skb);
2979
2980 return 0;
2981}
2982
2983static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
2984{
2985 __be16 from_proto = skb->protocol;
2986
2987 if (from_proto == htons(ETH_P_IP) &&
2988 to_proto == htons(ETH_P_IPV6))
2989 return bpf_skb_proto_4_to_6(skb);
2990
2991 if (from_proto == htons(ETH_P_IPV6) &&
2992 to_proto == htons(ETH_P_IP))
2993 return bpf_skb_proto_6_to_4(skb);
2994
2995 return -ENOTSUPP;
2996}
2997
f3694e00
DB
2998BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
2999 u64, flags)
6578171a 3000{
6578171a
DB
3001 int ret;
3002
3003 if (unlikely(flags))
3004 return -EINVAL;
3005
3006 /* General idea is that this helper does the basic groundwork
3007 * needed for changing the protocol, and eBPF program fills the
3008 * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
3009 * and other helpers, rather than passing a raw buffer here.
3010 *
3011 * The rationale is to keep this minimal and without a need to
3012 * deal with raw packet data. F.e. even if we would pass buffers
3013 * here, the program still needs to call the bpf_lX_csum_replace()
3014 * helpers anyway. Plus, this way we keep also separation of
3015 * concerns, since f.e. bpf_skb_store_bytes() should only take
3016 * care of stores.
3017 *
3018 * Currently, additional options and extension header space are
3019 * not supported, but flags register is reserved so we can adapt
3020 * that. For offloads, we mark packet as dodgy, so that headers
3021 * need to be verified first.
3022 */
3023 ret = bpf_skb_proto_xlat(skb, proto);
6aaae2b6 3024 bpf_compute_data_pointers(skb);
6578171a
DB
3025 return ret;
3026}
3027
3028static const struct bpf_func_proto bpf_skb_change_proto_proto = {
3029 .func = bpf_skb_change_proto,
3030 .gpl_only = false,
3031 .ret_type = RET_INTEGER,
3032 .arg1_type = ARG_PTR_TO_CTX,
3033 .arg2_type = ARG_ANYTHING,
3034 .arg3_type = ARG_ANYTHING,
3035};
3036
f3694e00 3037BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
d2485c42 3038{
d2485c42 3039 /* We only allow a restricted subset to be changed for now. */
45c7fffa
DB
3040 if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
3041 !skb_pkt_type_ok(pkt_type)))
d2485c42
DB
3042 return -EINVAL;
3043
3044 skb->pkt_type = pkt_type;
3045 return 0;
3046}
3047
3048static const struct bpf_func_proto bpf_skb_change_type_proto = {
3049 .func = bpf_skb_change_type,
3050 .gpl_only = false,
3051 .ret_type = RET_INTEGER,
3052 .arg1_type = ARG_PTR_TO_CTX,
3053 .arg2_type = ARG_ANYTHING,
3054};
3055
2be7e212
DB
3056static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
3057{
3058 switch (skb->protocol) {
3059 case htons(ETH_P_IP):
3060 return sizeof(struct iphdr);
3061 case htons(ETH_P_IPV6):
3062 return sizeof(struct ipv6hdr);
3063 default:
3064 return ~0U;
3065 }
3066}
3067
868d5235
WB
3068#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
3069 BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3070
3071#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
3072 BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
3073 BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
58dfc900
AM
3074 BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
3075 BPF_F_ADJ_ROOM_ENCAP_L2( \
3076 BPF_ADJ_ROOM_ENCAP_L2_MASK))
2278f6cc
WB
3077
3078static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
3079 u64 flags)
2be7e212 3080{
58dfc900 3081 u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
868d5235 3082 bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
62b31b42 3083 u16 mac_len = 0, inner_net = 0, inner_trans = 0;
868d5235 3084 unsigned int gso_type = SKB_GSO_DODGY;
2be7e212
DB
3085 int ret;
3086
2278f6cc
WB
3087 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3088 /* udp gso_size delineates datagrams, only allow if fixed */
3089 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3090 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3091 return -ENOTSUPP;
3092 }
d02f51cb 3093
908adce6 3094 ret = skb_cow_head(skb, len_diff);
2be7e212
DB
3095 if (unlikely(ret < 0))
3096 return ret;
3097
868d5235
WB
3098 if (encap) {
3099 if (skb->protocol != htons(ETH_P_IP) &&
3100 skb->protocol != htons(ETH_P_IPV6))
3101 return -ENOTSUPP;
3102
3103 if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 &&
3104 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3105 return -EINVAL;
3106
3107 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE &&
3108 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3109 return -EINVAL;
3110
3111 if (skb->encapsulation)
3112 return -EALREADY;
3113
3114 mac_len = skb->network_header - skb->mac_header;
3115 inner_net = skb->network_header;
58dfc900
AM
3116 if (inner_mac_len > len_diff)
3117 return -EINVAL;
868d5235
WB
3118 inner_trans = skb->transport_header;
3119 }
3120
2be7e212
DB
3121 ret = bpf_skb_net_hdr_push(skb, off, len_diff);
3122 if (unlikely(ret < 0))
3123 return ret;
3124
868d5235 3125 if (encap) {
58dfc900 3126 skb->inner_mac_header = inner_net - inner_mac_len;
868d5235
WB
3127 skb->inner_network_header = inner_net;
3128 skb->inner_transport_header = inner_trans;
3129 skb_set_inner_protocol(skb, skb->protocol);
3130
3131 skb->encapsulation = 1;
3132 skb_set_network_header(skb, mac_len);
3133
3134 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
3135 gso_type |= SKB_GSO_UDP_TUNNEL;
3136 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE)
3137 gso_type |= SKB_GSO_GRE;
3138 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3139 gso_type |= SKB_GSO_IPXIP6;
58dfc900 3140 else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
868d5235
WB
3141 gso_type |= SKB_GSO_IPXIP4;
3142
3143 if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
3144 flags & BPF_F_ADJ_ROOM_ENCAP_L4_UDP) {
3145 int nh_len = flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 ?
3146 sizeof(struct ipv6hdr) :
3147 sizeof(struct iphdr);
3148
3149 skb_set_transport_header(skb, mac_len + nh_len);
3150 }
1b00e0df
WB
3151
3152 /* Match skb->protocol to new outer l3 protocol */
3153 if (skb->protocol == htons(ETH_P_IP) &&
3154 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
3155 skb->protocol = htons(ETH_P_IPV6);
3156 else if (skb->protocol == htons(ETH_P_IPV6) &&
3157 flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
3158 skb->protocol = htons(ETH_P_IP);
868d5235
WB
3159 }
3160
2be7e212 3161 if (skb_is_gso(skb)) {
d02f51cb
DA
3162 struct skb_shared_info *shinfo = skb_shinfo(skb);
3163
2be7e212 3164 /* Due to header grow, MSS needs to be downgraded. */
2278f6cc
WB
3165 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3166 skb_decrease_gso_size(shinfo, len_diff);
3167
2be7e212 3168 /* Header must be checked, and gso_segs recomputed. */
868d5235 3169 shinfo->gso_type |= gso_type;
d02f51cb 3170 shinfo->gso_segs = 0;
2be7e212
DB
3171 }
3172
3173 return 0;
3174}
3175
2278f6cc
WB
3176static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
3177 u64 flags)
2be7e212 3178{
2be7e212
DB
3179 int ret;
3180
836e66c2
DB
3181 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
3182 BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
43537b8e
WB
3183 return -EINVAL;
3184
2278f6cc
WB
3185 if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
3186 /* udp gso_size delineates datagrams, only allow if fixed */
3187 if (!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) ||
3188 !(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3189 return -ENOTSUPP;
3190 }
d02f51cb 3191
2be7e212
DB
3192 ret = skb_unclone(skb, GFP_ATOMIC);
3193 if (unlikely(ret < 0))
3194 return ret;
3195
3196 ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
3197 if (unlikely(ret < 0))
3198 return ret;
3199
3200 if (skb_is_gso(skb)) {
d02f51cb
DA
3201 struct skb_shared_info *shinfo = skb_shinfo(skb);
3202
2be7e212 3203 /* Due to header shrink, MSS can be upgraded. */
2278f6cc
WB
3204 if (!(flags & BPF_F_ADJ_ROOM_FIXED_GSO))
3205 skb_increase_gso_size(shinfo, len_diff);
3206
2be7e212 3207 /* Header must be checked, and gso_segs recomputed. */
d02f51cb
DA
3208 shinfo->gso_type |= SKB_GSO_DODGY;
3209 shinfo->gso_segs = 0;
2be7e212
DB
3210 }
3211
3212 return 0;
3213}
3214
3215static u32 __bpf_skb_max_len(const struct sk_buff *skb)
3216{
0c6bc6e5
JF
3217 return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
3218 SKB_MAX_ALLOC;
2be7e212
DB
3219}
3220
14aa3192
WB
3221BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
3222 u32, mode, u64, flags)
2be7e212 3223{
2be7e212
DB
3224 u32 len_cur, len_diff_abs = abs(len_diff);
3225 u32 len_min = bpf_skb_net_base_len(skb);
3226 u32 len_max = __bpf_skb_max_len(skb);
3227 __be16 proto = skb->protocol;
3228 bool shrink = len_diff < 0;
14aa3192 3229 u32 off;
2be7e212
DB
3230 int ret;
3231
836e66c2
DB
3232 if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
3233 BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
14aa3192 3234 return -EINVAL;
2be7e212
DB
3235 if (unlikely(len_diff_abs > 0xfffU))
3236 return -EFAULT;
3237 if (unlikely(proto != htons(ETH_P_IP) &&
3238 proto != htons(ETH_P_IPV6)))
3239 return -ENOTSUPP;
3240
14aa3192
WB
3241 off = skb_mac_header_len(skb);
3242 switch (mode) {
3243 case BPF_ADJ_ROOM_NET:
3244 off += bpf_skb_net_base_len(skb);
3245 break;
3246 case BPF_ADJ_ROOM_MAC:
3247 break;
3248 default:
3249 return -ENOTSUPP;
3250 }
3251
2be7e212 3252 len_cur = skb->len - skb_network_offset(skb);
2be7e212
DB
3253 if ((shrink && (len_diff_abs >= len_cur ||
3254 len_cur - len_diff_abs < len_min)) ||
3255 (!shrink && (skb->len + len_diff_abs > len_max &&
3256 !skb_is_gso(skb))))
3257 return -ENOTSUPP;
3258
2278f6cc
WB
3259 ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
3260 bpf_skb_net_grow(skb, off, len_diff_abs, flags);
836e66c2
DB
3261 if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
3262 __skb_reset_checksum_unnecessary(skb);
2be7e212 3263
6aaae2b6 3264 bpf_compute_data_pointers(skb);
e4a6a342 3265 return ret;
2be7e212
DB
3266}
3267
2be7e212
DB
3268static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
3269 .func = bpf_skb_adjust_room,
3270 .gpl_only = false,
3271 .ret_type = RET_INTEGER,
3272 .arg1_type = ARG_PTR_TO_CTX,
3273 .arg2_type = ARG_ANYTHING,
3274 .arg3_type = ARG_ANYTHING,
3275 .arg4_type = ARG_ANYTHING,
3276};
3277
5293efe6
DB
3278static u32 __bpf_skb_min_len(const struct sk_buff *skb)
3279{
3280 u32 min_len = skb_network_offset(skb);
3281
3282 if (skb_transport_header_was_set(skb))
3283 min_len = skb_transport_offset(skb);
3284 if (skb->ip_summed == CHECKSUM_PARTIAL)
3285 min_len = skb_checksum_start_offset(skb) +
3286 skb->csum_offset + sizeof(__sum16);
3287 return min_len;
3288}
3289
5293efe6
DB
3290static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
3291{
3292 unsigned int old_len = skb->len;
3293 int ret;
3294
3295 ret = __skb_grow_rcsum(skb, new_len);
3296 if (!ret)
3297 memset(skb->data + old_len, 0, new_len - old_len);
3298 return ret;
3299}
3300
3301static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
3302{
3303 return __skb_trim_rcsum(skb, new_len);
3304}
3305
0ea488ff
JF
3306static inline int __bpf_skb_change_tail(struct sk_buff *skb, u32 new_len,
3307 u64 flags)
5293efe6 3308{
5293efe6
DB
3309 u32 max_len = __bpf_skb_max_len(skb);
3310 u32 min_len = __bpf_skb_min_len(skb);
5293efe6
DB
3311 int ret;
3312
3313 if (unlikely(flags || new_len > max_len || new_len < min_len))
3314 return -EINVAL;
3315 if (skb->encapsulation)
3316 return -ENOTSUPP;
3317
3318 /* The basic idea of this helper is that it's performing the
3319 * needed work to either grow or trim an skb, and eBPF program
3320 * rewrites the rest via helpers like bpf_skb_store_bytes(),
3321 * bpf_lX_csum_replace() and others rather than passing a raw
3322 * buffer here. This one is a slow path helper and intended
3323 * for replies with control messages.
3324 *
3325 * Like in bpf_skb_change_proto(), we want to keep this rather
3326 * minimal and without protocol specifics so that we are able
3327 * to separate concerns as in bpf_skb_store_bytes() should only
3328 * be the one responsible for writing buffers.
3329 *
3330 * It's really expected to be a slow path operation here for
3331 * control message replies, so we're implicitly linearizing,
3332 * uncloning and drop offloads from the skb by this.
3333 */
3334 ret = __bpf_try_make_writable(skb, skb->len);
3335 if (!ret) {
3336 if (new_len > skb->len)
3337 ret = bpf_skb_grow_rcsum(skb, new_len);
3338 else if (new_len < skb->len)
3339 ret = bpf_skb_trim_rcsum(skb, new_len);
3340 if (!ret && skb_is_gso(skb))
3341 skb_gso_reset(skb);
3342 }
0ea488ff
JF
3343 return ret;
3344}
3345
3346BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3347 u64, flags)
3348{
3349 int ret = __bpf_skb_change_tail(skb, new_len, flags);
5293efe6 3350
6aaae2b6 3351 bpf_compute_data_pointers(skb);
5293efe6
DB
3352 return ret;
3353}
3354
3355static const struct bpf_func_proto bpf_skb_change_tail_proto = {
3356 .func = bpf_skb_change_tail,
3357 .gpl_only = false,
3358 .ret_type = RET_INTEGER,
3359 .arg1_type = ARG_PTR_TO_CTX,
3360 .arg2_type = ARG_ANYTHING,
3361 .arg3_type = ARG_ANYTHING,
3362};
3363
0ea488ff 3364BPF_CALL_3(sk_skb_change_tail, struct sk_buff *, skb, u32, new_len,
3a0af8fd 3365 u64, flags)
0ea488ff
JF
3366{
3367 int ret = __bpf_skb_change_tail(skb, new_len, flags);
3368
3369 bpf_compute_data_end_sk_skb(skb);
3370 return ret;
3371}
3372
3373static const struct bpf_func_proto sk_skb_change_tail_proto = {
3374 .func = sk_skb_change_tail,
3375 .gpl_only = false,
3376 .ret_type = RET_INTEGER,
3377 .arg1_type = ARG_PTR_TO_CTX,
3378 .arg2_type = ARG_ANYTHING,
3379 .arg3_type = ARG_ANYTHING,
3380};
3381
3382static inline int __bpf_skb_change_head(struct sk_buff *skb, u32 head_room,
3383 u64 flags)
3a0af8fd
TG
3384{
3385 u32 max_len = __bpf_skb_max_len(skb);
3386 u32 new_len = skb->len + head_room;
3387 int ret;
3388
3389 if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
3390 new_len < skb->len))
3391 return -EINVAL;
3392
3393 ret = skb_cow(skb, head_room);
3394 if (likely(!ret)) {
3395 /* Idea for this helper is that we currently only
3396 * allow to expand on mac header. This means that
3397 * skb->protocol network header, etc, stay as is.
3398 * Compared to bpf_skb_change_tail(), we're more
3399 * flexible due to not needing to linearize or
3400 * reset GSO. Intention for this helper is to be
3401 * used by an L3 skb that needs to push mac header
3402 * for redirection into L2 device.
3403 */
3404 __skb_push(skb, head_room);
3405 memset(skb->data, 0, head_room);
3406 skb_reset_mac_header(skb);
3407 }
3408
0ea488ff
JF
3409 return ret;
3410}
3411
3412BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
3413 u64, flags)
3414{
3415 int ret = __bpf_skb_change_head(skb, head_room, flags);
3416
6aaae2b6 3417 bpf_compute_data_pointers(skb);
0ea488ff 3418 return ret;
3a0af8fd
TG
3419}
3420
3421static const struct bpf_func_proto bpf_skb_change_head_proto = {
3422 .func = bpf_skb_change_head,
3423 .gpl_only = false,
3424 .ret_type = RET_INTEGER,
3425 .arg1_type = ARG_PTR_TO_CTX,
3426 .arg2_type = ARG_ANYTHING,
3427 .arg3_type = ARG_ANYTHING,
3428};
3429
0ea488ff
JF
3430BPF_CALL_3(sk_skb_change_head, struct sk_buff *, skb, u32, head_room,
3431 u64, flags)
3432{
3433 int ret = __bpf_skb_change_head(skb, head_room, flags);
3434
3435 bpf_compute_data_end_sk_skb(skb);
3436 return ret;
3437}
3438
3439static const struct bpf_func_proto sk_skb_change_head_proto = {
3440 .func = sk_skb_change_head,
3441 .gpl_only = false,
3442 .ret_type = RET_INTEGER,
3443 .arg1_type = ARG_PTR_TO_CTX,
3444 .arg2_type = ARG_ANYTHING,
3445 .arg3_type = ARG_ANYTHING,
3446};
de8f3a83
DB
3447static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
3448{
3449 return xdp_data_meta_unsupported(xdp) ? 0 :
3450 xdp->data - xdp->data_meta;
3451}
3452
17bedab2
MKL
3453BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
3454{
6dfb970d 3455 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
de8f3a83 3456 unsigned long metalen = xdp_get_metalen(xdp);
97e19cce 3457 void *data_start = xdp_frame_end + metalen;
17bedab2
MKL
3458 void *data = xdp->data + offset;
3459
de8f3a83 3460 if (unlikely(data < data_start ||
17bedab2
MKL
3461 data > xdp->data_end - ETH_HLEN))
3462 return -EINVAL;
3463
de8f3a83
DB
3464 if (metalen)
3465 memmove(xdp->data_meta + offset,
3466 xdp->data_meta, metalen);
3467 xdp->data_meta += offset;
17bedab2
MKL
3468 xdp->data = data;
3469
3470 return 0;
3471}
3472
3473static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
3474 .func = bpf_xdp_adjust_head,
3475 .gpl_only = false,
3476 .ret_type = RET_INTEGER,
3477 .arg1_type = ARG_PTR_TO_CTX,
3478 .arg2_type = ARG_ANYTHING,
3479};
3480
b32cc5b9
NS
3481BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
3482{
c8741e2b 3483 void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
b32cc5b9
NS
3484 void *data_end = xdp->data_end + offset;
3485
c8741e2b
JDB
3486 /* Notice that xdp_data_hard_end have reserved some tailroom */
3487 if (unlikely(data_end > data_hard_end))
b32cc5b9
NS
3488 return -EINVAL;
3489
c8741e2b
JDB
3490 /* ALL drivers MUST init xdp->frame_sz, chicken check below */
3491 if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
3492 WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
3493 return -EINVAL;
3494 }
3495
b32cc5b9
NS
3496 if (unlikely(data_end < xdp->data + ETH_HLEN))
3497 return -EINVAL;
3498
ddb47d51
JDB
3499 /* Clear memory area on grow, can contain uninit kernel memory */
3500 if (offset > 0)
3501 memset(xdp->data_end, 0, offset);
3502
b32cc5b9
NS
3503 xdp->data_end = data_end;
3504
3505 return 0;
3506}
3507
3508static const struct bpf_func_proto bpf_xdp_adjust_tail_proto = {
3509 .func = bpf_xdp_adjust_tail,
3510 .gpl_only = false,
3511 .ret_type = RET_INTEGER,
3512 .arg1_type = ARG_PTR_TO_CTX,
3513 .arg2_type = ARG_ANYTHING,
3514};
3515
de8f3a83
DB
3516BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
3517{
97e19cce 3518 void *xdp_frame_end = xdp->data_hard_start + sizeof(struct xdp_frame);
de8f3a83
DB
3519 void *meta = xdp->data_meta + offset;
3520 unsigned long metalen = xdp->data - meta;
3521
3522 if (xdp_data_meta_unsupported(xdp))
3523 return -ENOTSUPP;
97e19cce 3524 if (unlikely(meta < xdp_frame_end ||
de8f3a83
DB
3525 meta > xdp->data))
3526 return -EINVAL;
3527 if (unlikely((metalen & (sizeof(__u32) - 1)) ||
3528 (metalen > 32)))
3529 return -EACCES;
3530
3531 xdp->data_meta = meta;
3532
3533 return 0;
3534}
3535
3536static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
3537 .func = bpf_xdp_adjust_meta,
3538 .gpl_only = false,
3539 .ret_type = RET_INTEGER,
3540 .arg1_type = ARG_PTR_TO_CTX,
3541 .arg2_type = ARG_ANYTHING,
3542};
3543
9c270af3 3544static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
1170beaa 3545 struct bpf_map *map, struct xdp_buff *xdp)
9c270af3 3546{
1b1a251c 3547 switch (map->map_type) {
6f9d451a 3548 case BPF_MAP_TYPE_DEVMAP:
1170beaa
BT
3549 case BPF_MAP_TYPE_DEVMAP_HASH:
3550 return dev_map_enqueue(fwd, xdp, dev_rx);
3551 case BPF_MAP_TYPE_CPUMAP:
3552 return cpu_map_enqueue(fwd, xdp, dev_rx);
3553 case BPF_MAP_TYPE_XSKMAP:
3554 return __xsk_map_redirect(fwd, xdp);
1b1a251c 3555 default:
0a29275b 3556 return -EBADRQC;
9c270af3 3557 }
e4a8e817 3558 return 0;
814abfab
JF
3559}
3560
1d233886 3561void xdp_do_flush(void)
11393cc9 3562{
1d233886 3563 __dev_flush();
332f22a6
BT
3564 __cpu_map_flush();
3565 __xsk_map_flush();
11393cc9 3566}
1d233886 3567EXPORT_SYMBOL_GPL(xdp_do_flush);
11393cc9 3568
2a68d85f 3569static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
9c270af3
JDB
3570{
3571 switch (map->map_type) {
3572 case BPF_MAP_TYPE_DEVMAP:
3573 return __dev_map_lookup_elem(map, index);
6f9d451a
THJ
3574 case BPF_MAP_TYPE_DEVMAP_HASH:
3575 return __dev_map_hash_lookup_elem(map, index);
9c270af3
JDB
3576 case BPF_MAP_TYPE_CPUMAP:
3577 return __cpu_map_lookup_elem(map, index);
1b1a251c
BT
3578 case BPF_MAP_TYPE_XSKMAP:
3579 return __xsk_map_lookup_elem(map, index);
9c270af3
JDB
3580 default:
3581 return NULL;
3582 }
3583}
3584
f6069b9a 3585void bpf_clear_redirect_map(struct bpf_map *map)
7c300131 3586{
f6069b9a
DB
3587 struct bpf_redirect_info *ri;
3588 int cpu;
3589
3590 for_each_possible_cpu(cpu) {
3591 ri = per_cpu_ptr(&bpf_redirect_info, cpu);
3592 /* Avoid polluting remote cacheline due to writes if
3593 * not needed. Once we pass this test, we need the
3594 * cmpxchg() to make sure it hasn't been changed in
3595 * the meantime by remote CPU.
3596 */
3597 if (unlikely(READ_ONCE(ri->map) == map))
3598 cmpxchg(&ri->map, map, NULL);
3599 }
7c300131
DB
3600}
3601
1d233886
THJ
3602int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
3603 struct bpf_prog *xdp_prog)
97f91a7c 3604{
1d233886
THJ
3605 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
3606 struct bpf_map *map = READ_ONCE(ri->map);
4b55cf29 3607 u32 index = ri->tgt_index;
43e74c02 3608 void *fwd = ri->tgt_value;
4c03bdd7 3609 int err;
97f91a7c 3610
4b55cf29 3611 ri->tgt_index = 0;
43e74c02 3612 ri->tgt_value = NULL;
f6069b9a 3613 WRITE_ONCE(ri->map, NULL);
97f91a7c 3614
1d233886
THJ
3615 if (unlikely(!map)) {
3616 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3617 if (unlikely(!fwd)) {
3618 err = -EINVAL;
3619 goto err;
3620 }
3621
3622 err = dev_xdp_enqueue(fwd, xdp, dev);
3623 } else {
3624 err = __bpf_tx_xdp_map(dev, fwd, map, xdp);
3625 }
3626
f5836ca5
JDB
3627 if (unlikely(err))
3628 goto err;
3629
59a30896 3630 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
f5836ca5
JDB
3631 return 0;
3632err:
59a30896 3633 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
97f91a7c
JF
3634 return err;
3635}
814abfab
JF
3636EXPORT_SYMBOL_GPL(xdp_do_redirect);
3637
c060bc61
XS
3638static int xdp_do_generic_redirect_map(struct net_device *dev,
3639 struct sk_buff *skb,
02671e23 3640 struct xdp_buff *xdp,
f6069b9a
DB
3641 struct bpf_prog *xdp_prog,
3642 struct bpf_map *map)
6103aa96 3643{
0b19cc0a 3644 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
4b55cf29 3645 u32 index = ri->tgt_index;
43e74c02 3646 void *fwd = ri->tgt_value;
2facaad6 3647 int err = 0;
6103aa96 3648
4b55cf29 3649 ri->tgt_index = 0;
43e74c02 3650 ri->tgt_value = NULL;
f6069b9a 3651 WRITE_ONCE(ri->map, NULL);
96c5508e 3652
6f9d451a
THJ
3653 if (map->map_type == BPF_MAP_TYPE_DEVMAP ||
3654 map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) {
6d5fc195
TM
3655 struct bpf_dtab_netdev *dst = fwd;
3656
3657 err = dev_map_generic_redirect(dst, skb, xdp_prog);
3658 if (unlikely(err))
9c270af3 3659 goto err;
02671e23
BT
3660 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
3661 struct xdp_sock *xs = fwd;
3662
3663 err = xsk_generic_rcv(xs, xdp);
3664 if (err)
3665 goto err;
3666 consume_skb(skb);
9c270af3
JDB
3667 } else {
3668 /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
3669 err = -EBADRQC;
f5836ca5 3670 goto err;
2facaad6 3671 }
6103aa96 3672
9c270af3
JDB
3673 _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
3674 return 0;
3675err:
3676 _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
3677 return err;
3678}
3679
3680int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
02671e23 3681 struct xdp_buff *xdp, struct bpf_prog *xdp_prog)
9c270af3 3682{
0b19cc0a 3683 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
f6069b9a 3684 struct bpf_map *map = READ_ONCE(ri->map);
4b55cf29 3685 u32 index = ri->tgt_index;
9c270af3
JDB
3686 struct net_device *fwd;
3687 int err = 0;
3688
f6069b9a
DB
3689 if (map)
3690 return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog,
3691 map);
4b55cf29 3692 ri->tgt_index = 0;
9c270af3
JDB
3693 fwd = dev_get_by_index_rcu(dev_net(dev), index);
3694 if (unlikely(!fwd)) {
3695 err = -EINVAL;
f5836ca5 3696 goto err;
2facaad6
JDB
3697 }
3698
d8d7218a
TM
3699 err = xdp_ok_fwd_dev(fwd, skb->len);
3700 if (unlikely(err))
9c270af3
JDB
3701 goto err;
3702
2facaad6 3703 skb->dev = fwd;
9c270af3 3704 _trace_xdp_redirect(dev, xdp_prog, index);
02671e23 3705 generic_xdp_tx(skb, xdp_prog);
f5836ca5
JDB
3706 return 0;
3707err:
9c270af3 3708 _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2facaad6 3709 return err;
6103aa96 3710}
6103aa96 3711
814abfab
JF
3712BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
3713{
0b19cc0a 3714 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
814abfab
JF
3715
3716 if (unlikely(flags))
3717 return XDP_ABORTED;
3718
814abfab 3719 ri->flags = flags;
4b55cf29 3720 ri->tgt_index = ifindex;
43e74c02 3721 ri->tgt_value = NULL;
f6069b9a 3722 WRITE_ONCE(ri->map, NULL);
e4a8e817 3723
814abfab
JF
3724 return XDP_REDIRECT;
3725}
3726
3727static const struct bpf_func_proto bpf_xdp_redirect_proto = {
3728 .func = bpf_xdp_redirect,
3729 .gpl_only = false,
3730 .ret_type = RET_INTEGER,
3731 .arg1_type = ARG_ANYTHING,
3732 .arg2_type = ARG_ANYTHING,
3733};
3734
f6069b9a
DB
3735BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
3736 u64, flags)
e4a8e817 3737{
0b19cc0a 3738 struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
e4a8e817 3739
43e74c02
THJ
3740 /* Lower bits of the flags are used as return code on lookup failure */
3741 if (unlikely(flags > XDP_TX))
e4a8e817
DB
3742 return XDP_ABORTED;
3743
43e74c02
THJ
3744 ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
3745 if (unlikely(!ri->tgt_value)) {
3746 /* If the lookup fails we want to clear out the state in the
3747 * redirect_info struct completely, so that if an eBPF program
3748 * performs multiple lookups, the last one always takes
3749 * precedence.
3750 */
3751 WRITE_ONCE(ri->map, NULL);
3752 return flags;
3753 }
3754
e4a8e817 3755 ri->flags = flags;
4b55cf29 3756 ri->tgt_index = ifindex;
f6069b9a 3757 WRITE_ONCE(ri->map, map);
e4a8e817
DB
3758
3759 return XDP_REDIRECT;
3760}
3761
3762static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
3763 .func = bpf_xdp_redirect_map,
3764 .gpl_only = false,
3765 .ret_type = RET_INTEGER,
3766 .arg1_type = ARG_CONST_MAP_PTR,
3767 .arg2_type = ARG_ANYTHING,
3768 .arg3_type = ARG_ANYTHING,
3769};
3770
555c8a86 3771static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
aa7145c1 3772 unsigned long off, unsigned long len)
555c8a86 3773{
aa7145c1 3774 void *ptr = skb_header_pointer(skb, off, len, dst_buff);
555c8a86
DB
3775
3776 if (unlikely(!ptr))
3777 return len;
3778 if (ptr != dst_buff)
3779 memcpy(dst_buff, ptr, len);
3780
3781 return 0;
3782}
3783
f3694e00
DB
3784BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
3785 u64, flags, void *, meta, u64, meta_size)
555c8a86 3786{
555c8a86 3787 u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
555c8a86
DB
3788
3789 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
3790 return -EINVAL;
a7658e1a 3791 if (unlikely(!skb || skb_size > skb->len))
555c8a86
DB
3792 return -EFAULT;
3793
3794 return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
3795 bpf_skb_copy);
3796}
3797
3798static const struct bpf_func_proto bpf_skb_event_output_proto = {
3799 .func = bpf_skb_event_output,
3800 .gpl_only = true,
3801 .ret_type = RET_INTEGER,
3802 .arg1_type = ARG_PTR_TO_CTX,
3803 .arg2_type = ARG_CONST_MAP_PTR,
3804 .arg3_type = ARG_ANYTHING,
39f19ebb 3805 .arg4_type = ARG_PTR_TO_MEM,
1728a4f2 3806 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
555c8a86
DB
3807};
3808
9436ef6e 3809BTF_ID_LIST_SINGLE(bpf_skb_output_btf_ids, struct, sk_buff)
c9a0f3b8 3810
a7658e1a
AS
3811const struct bpf_func_proto bpf_skb_output_proto = {
3812 .func = bpf_skb_event_output,
3813 .gpl_only = true,
3814 .ret_type = RET_INTEGER,
3815 .arg1_type = ARG_PTR_TO_BTF_ID,
9436ef6e 3816 .arg1_btf_id = &bpf_skb_output_btf_ids[0],
a7658e1a
AS
3817 .arg2_type = ARG_CONST_MAP_PTR,
3818 .arg3_type = ARG_ANYTHING,
3819 .arg4_type = ARG_PTR_TO_MEM,
3820 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
a7658e1a
AS
3821};
3822
c6c33454
DB
3823static unsigned short bpf_tunnel_key_af(u64 flags)
3824{
3825 return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
3826}
3827
f3694e00
DB
3828BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
3829 u32, size, u64, flags)
d3aa45ce 3830{
c6c33454
DB
3831 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
3832 u8 compat[sizeof(struct bpf_tunnel_key)];
074f528e
DB
3833 void *to_orig = to;
3834 int err;
d3aa45ce 3835
074f528e
DB
3836 if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
3837 err = -EINVAL;
3838 goto err_clear;
3839 }
3840 if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
3841 err = -EPROTO;
3842 goto err_clear;
3843 }
c6c33454 3844 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
074f528e 3845 err = -EINVAL;
c6c33454 3846 switch (size) {
4018ab18 3847 case offsetof(struct bpf_tunnel_key, tunnel_label):
c0e760c9 3848 case offsetof(struct bpf_tunnel_key, tunnel_ext):
4018ab18 3849 goto set_compat;
c6c33454
DB
3850 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3851 /* Fixup deprecated structure layouts here, so we have
3852 * a common path later on.
3853 */
3854 if (ip_tunnel_info_af(info) != AF_INET)
074f528e 3855 goto err_clear;
4018ab18 3856set_compat:
c6c33454
DB
3857 to = (struct bpf_tunnel_key *)compat;
3858 break;
3859 default:
074f528e 3860 goto err_clear;
c6c33454
DB
3861 }
3862 }
d3aa45ce
AS
3863
3864 to->tunnel_id = be64_to_cpu(info->key.tun_id);
c6c33454
DB
3865 to->tunnel_tos = info->key.tos;
3866 to->tunnel_ttl = info->key.ttl;
1fbc2e0c 3867 to->tunnel_ext = 0;
c6c33454 3868
4018ab18 3869 if (flags & BPF_F_TUNINFO_IPV6) {
c6c33454
DB
3870 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
3871 sizeof(to->remote_ipv6));
4018ab18
DB
3872 to->tunnel_label = be32_to_cpu(info->key.label);
3873 } else {
c6c33454 3874 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
1fbc2e0c
DB
3875 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
3876 to->tunnel_label = 0;
4018ab18 3877 }
c6c33454
DB
3878
3879 if (unlikely(size != sizeof(struct bpf_tunnel_key)))
074f528e 3880 memcpy(to_orig, to, size);
d3aa45ce
AS
3881
3882 return 0;
074f528e
DB
3883err_clear:
3884 memset(to_orig, 0, size);
3885 return err;
d3aa45ce
AS
3886}
3887
577c50aa 3888static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
d3aa45ce
AS
3889 .func = bpf_skb_get_tunnel_key,
3890 .gpl_only = false,
3891 .ret_type = RET_INTEGER,
3892 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
3893 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
3894 .arg3_type = ARG_CONST_SIZE,
d3aa45ce
AS
3895 .arg4_type = ARG_ANYTHING,
3896};
3897
f3694e00 3898BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
14ca0751 3899{
14ca0751 3900 const struct ip_tunnel_info *info = skb_tunnel_info(skb);
074f528e 3901 int err;
14ca0751
DB
3902
3903 if (unlikely(!info ||
074f528e
DB
3904 !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
3905 err = -ENOENT;
3906 goto err_clear;
3907 }
3908 if (unlikely(size < info->options_len)) {
3909 err = -ENOMEM;
3910 goto err_clear;
3911 }
14ca0751
DB
3912
3913 ip_tunnel_info_opts_get(to, info);
074f528e
DB
3914 if (size > info->options_len)
3915 memset(to + info->options_len, 0, size - info->options_len);
14ca0751
DB
3916
3917 return info->options_len;
074f528e
DB
3918err_clear:
3919 memset(to, 0, size);
3920 return err;
14ca0751
DB
3921}
3922
3923static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
3924 .func = bpf_skb_get_tunnel_opt,
3925 .gpl_only = false,
3926 .ret_type = RET_INTEGER,
3927 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
3928 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
3929 .arg3_type = ARG_CONST_SIZE,
14ca0751
DB
3930};
3931
d3aa45ce
AS
3932static struct metadata_dst __percpu *md_dst;
3933
f3694e00
DB
3934BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
3935 const struct bpf_tunnel_key *, from, u32, size, u64, flags)
d3aa45ce 3936{
d3aa45ce 3937 struct metadata_dst *md = this_cpu_ptr(md_dst);
c6c33454 3938 u8 compat[sizeof(struct bpf_tunnel_key)];
d3aa45ce
AS
3939 struct ip_tunnel_info *info;
3940
22080870 3941 if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
77a5196a 3942 BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
d3aa45ce 3943 return -EINVAL;
c6c33454
DB
3944 if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
3945 switch (size) {
4018ab18 3946 case offsetof(struct bpf_tunnel_key, tunnel_label):
c0e760c9 3947 case offsetof(struct bpf_tunnel_key, tunnel_ext):
c6c33454
DB
3948 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3949 /* Fixup deprecated structure layouts here, so we have
3950 * a common path later on.
3951 */
3952 memcpy(compat, from, size);
3953 memset(compat + size, 0, sizeof(compat) - size);
f3694e00 3954 from = (const struct bpf_tunnel_key *) compat;
c6c33454
DB
3955 break;
3956 default:
3957 return -EINVAL;
3958 }
3959 }
c0e760c9
DB
3960 if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
3961 from->tunnel_ext))
4018ab18 3962 return -EINVAL;
d3aa45ce
AS
3963
3964 skb_dst_drop(skb);
3965 dst_hold((struct dst_entry *) md);
3966 skb_dst_set(skb, (struct dst_entry *) md);
3967
3968 info = &md->u.tun_info;
5540fbf4 3969 memset(info, 0, sizeof(*info));
d3aa45ce 3970 info->mode = IP_TUNNEL_INFO_TX;
c6c33454 3971
db3c6139 3972 info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
22080870
DB
3973 if (flags & BPF_F_DONT_FRAGMENT)
3974 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
792f3dd6
WT
3975 if (flags & BPF_F_ZERO_CSUM_TX)
3976 info->key.tun_flags &= ~TUNNEL_CSUM;
77a5196a
WT
3977 if (flags & BPF_F_SEQ_NUMBER)
3978 info->key.tun_flags |= TUNNEL_SEQ;
22080870 3979
d3aa45ce 3980 info->key.tun_id = cpu_to_be64(from->tunnel_id);
c6c33454
DB
3981 info->key.tos = from->tunnel_tos;
3982 info->key.ttl = from->tunnel_ttl;
3983
3984 if (flags & BPF_F_TUNINFO_IPV6) {
3985 info->mode |= IP_TUNNEL_INFO_IPV6;
3986 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
3987 sizeof(from->remote_ipv6));
4018ab18
DB
3988 info->key.label = cpu_to_be32(from->tunnel_label) &
3989 IPV6_FLOWLABEL_MASK;
c6c33454
DB
3990 } else {
3991 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
3992 }
d3aa45ce
AS
3993
3994 return 0;
3995}
3996
577c50aa 3997static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
d3aa45ce
AS
3998 .func = bpf_skb_set_tunnel_key,
3999 .gpl_only = false,
4000 .ret_type = RET_INTEGER,
4001 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
4002 .arg2_type = ARG_PTR_TO_MEM,
4003 .arg3_type = ARG_CONST_SIZE,
d3aa45ce
AS
4004 .arg4_type = ARG_ANYTHING,
4005};
4006
f3694e00
DB
4007BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
4008 const u8 *, from, u32, size)
14ca0751 4009{
14ca0751
DB
4010 struct ip_tunnel_info *info = skb_tunnel_info(skb);
4011 const struct metadata_dst *md = this_cpu_ptr(md_dst);
4012
4013 if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
4014 return -EINVAL;
fca5fdf6 4015 if (unlikely(size > IP_TUNNEL_OPTS_MAX))
14ca0751
DB
4016 return -ENOMEM;
4017
256c87c1 4018 ip_tunnel_info_opts_set(info, from, size, TUNNEL_OPTIONS_PRESENT);
14ca0751
DB
4019
4020 return 0;
4021}
4022
4023static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
4024 .func = bpf_skb_set_tunnel_opt,
4025 .gpl_only = false,
4026 .ret_type = RET_INTEGER,
4027 .arg1_type = ARG_PTR_TO_CTX,
39f19ebb
AS
4028 .arg2_type = ARG_PTR_TO_MEM,
4029 .arg3_type = ARG_CONST_SIZE,
14ca0751
DB
4030};
4031
4032static const struct bpf_func_proto *
4033bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
d3aa45ce
AS
4034{
4035 if (!md_dst) {
d66f2b91
JK
4036 struct metadata_dst __percpu *tmp;
4037
4038 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
4039 METADATA_IP_TUNNEL,
4040 GFP_KERNEL);
4041 if (!tmp)
d3aa45ce 4042 return NULL;
d66f2b91
JK
4043 if (cmpxchg(&md_dst, NULL, tmp))
4044 metadata_dst_free_percpu(tmp);
d3aa45ce 4045 }
14ca0751
DB
4046
4047 switch (which) {
4048 case BPF_FUNC_skb_set_tunnel_key:
4049 return &bpf_skb_set_tunnel_key_proto;
4050 case BPF_FUNC_skb_set_tunnel_opt:
4051 return &bpf_skb_set_tunnel_opt_proto;
4052 default:
4053 return NULL;
4054 }
d3aa45ce
AS
4055}
4056
f3694e00
DB
4057BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
4058 u32, idx)
4a482f34 4059{
4a482f34
MKL
4060 struct bpf_array *array = container_of(map, struct bpf_array, map);
4061 struct cgroup *cgrp;
4062 struct sock *sk;
4a482f34 4063
2d48c5f9 4064 sk = skb_to_full_sk(skb);
4a482f34
MKL
4065 if (!sk || !sk_fullsock(sk))
4066 return -ENOENT;
f3694e00 4067 if (unlikely(idx >= array->map.max_entries))
4a482f34
MKL
4068 return -E2BIG;
4069
f3694e00 4070 cgrp = READ_ONCE(array->ptrs[idx]);
4a482f34
MKL
4071 if (unlikely(!cgrp))
4072 return -EAGAIN;
4073
54fd9c2d 4074 return sk_under_cgroup_hierarchy(sk, cgrp);
4a482f34
MKL
4075}
4076
747ea55e
DB
4077static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
4078 .func = bpf_skb_under_cgroup,
4a482f34
MKL
4079 .gpl_only = false,
4080 .ret_type = RET_INTEGER,
4081 .arg1_type = ARG_PTR_TO_CTX,
4082 .arg2_type = ARG_CONST_MAP_PTR,
4083 .arg3_type = ARG_ANYTHING,
4084};
4a482f34 4085
cb20b08e 4086#ifdef CONFIG_SOCK_CGROUP_DATA
f307fa2c
AI
4087static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
4088{
4089 struct cgroup *cgrp;
4090
a5fa25ad
MKL
4091 sk = sk_to_full_sk(sk);
4092 if (!sk || !sk_fullsock(sk))
4093 return 0;
4094
f307fa2c
AI
4095 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4096 return cgroup_id(cgrp);
4097}
4098
cb20b08e
DB
4099BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
4100{
a5fa25ad 4101 return __bpf_sk_cgroup_id(skb->sk);
cb20b08e
DB
4102}
4103
4104static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
4105 .func = bpf_skb_cgroup_id,
4106 .gpl_only = false,
4107 .ret_type = RET_INTEGER,
4108 .arg1_type = ARG_PTR_TO_CTX,
4109};
77236281 4110
f307fa2c
AI
4111static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
4112 int ancestor_level)
77236281 4113{
77236281
AI
4114 struct cgroup *ancestor;
4115 struct cgroup *cgrp;
4116
a5fa25ad
MKL
4117 sk = sk_to_full_sk(sk);
4118 if (!sk || !sk_fullsock(sk))
4119 return 0;
4120
77236281
AI
4121 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
4122 ancestor = cgroup_ancestor(cgrp, ancestor_level);
4123 if (!ancestor)
4124 return 0;
4125
74321038 4126 return cgroup_id(ancestor);
77236281
AI
4127}
4128
f307fa2c
AI
4129BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
4130 ancestor_level)
4131{
a5fa25ad 4132 return __bpf_sk_ancestor_cgroup_id(skb->sk, ancestor_level);
f307fa2c
AI
4133}
4134
77236281
AI
4135static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
4136 .func = bpf_skb_ancestor_cgroup_id,
4137 .gpl_only = false,
4138 .ret_type = RET_INTEGER,
4139 .arg1_type = ARG_PTR_TO_CTX,
4140 .arg2_type = ARG_ANYTHING,
4141};
f307fa2c
AI
4142
4143BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
4144{
4145 return __bpf_sk_cgroup_id(sk);
4146}
4147
4148static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
4149 .func = bpf_sk_cgroup_id,
4150 .gpl_only = false,
4151 .ret_type = RET_INTEGER,
a5fa25ad 4152 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
f307fa2c
AI
4153};
4154
4155BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
4156{
4157 return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
4158}
4159
4160static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
4161 .func = bpf_sk_ancestor_cgroup_id,
4162 .gpl_only = false,
4163 .ret_type = RET_INTEGER,
a5fa25ad 4164 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
f307fa2c
AI
4165 .arg2_type = ARG_ANYTHING,
4166};
cb20b08e
DB
4167#endif
4168
4de16969
DB
4169static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
4170 unsigned long off, unsigned long len)
4171{
4172 memcpy(dst_buff, src_buff + off, len);
4173 return 0;
4174}
4175
f3694e00
DB
4176BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
4177 u64, flags, void *, meta, u64, meta_size)
4de16969 4178{
4de16969 4179 u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
4de16969
DB
4180
4181 if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
4182 return -EINVAL;
d831ee84
EC
4183 if (unlikely(!xdp ||
4184 xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
4de16969
DB
4185 return -EFAULT;
4186
9c471370
MKL
4187 return bpf_event_output(map, flags, meta, meta_size, xdp->data,
4188 xdp_size, bpf_xdp_copy);
4de16969
DB
4189}
4190
4191static const struct bpf_func_proto bpf_xdp_event_output_proto = {
4192 .func = bpf_xdp_event_output,
4193 .gpl_only = true,
4194 .ret_type = RET_INTEGER,
4195 .arg1_type = ARG_PTR_TO_CTX,
4196 .arg2_type = ARG_CONST_MAP_PTR,
4197 .arg3_type = ARG_ANYTHING,
39f19ebb 4198 .arg4_type = ARG_PTR_TO_MEM,
1728a4f2 4199 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
4de16969
DB
4200};
4201
9436ef6e 4202BTF_ID_LIST_SINGLE(bpf_xdp_output_btf_ids, struct, xdp_buff)
c9a0f3b8 4203
d831ee84
EC
4204const struct bpf_func_proto bpf_xdp_output_proto = {
4205 .func = bpf_xdp_event_output,
4206 .gpl_only = true,
4207 .ret_type = RET_INTEGER,
4208 .arg1_type = ARG_PTR_TO_BTF_ID,
9436ef6e 4209 .arg1_btf_id = &bpf_xdp_output_btf_ids[0],
d831ee84
EC
4210 .arg2_type = ARG_CONST_MAP_PTR,
4211 .arg3_type = ARG_ANYTHING,
4212 .arg4_type = ARG_PTR_TO_MEM,
4213 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
d831ee84
EC
4214};
4215
91b8270f
CF
4216BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
4217{
4218 return skb->sk ? sock_gen_cookie(skb->sk) : 0;
4219}
4220
4221static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
4222 .func = bpf_get_socket_cookie,
4223 .gpl_only = false,
4224 .ret_type = RET_INTEGER,
4225 .arg1_type = ARG_PTR_TO_CTX,
4226};
4227
d692f113
AI
4228BPF_CALL_1(bpf_get_socket_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4229{
4230 return sock_gen_cookie(ctx->sk);
4231}
4232
4233static const struct bpf_func_proto bpf_get_socket_cookie_sock_addr_proto = {
4234 .func = bpf_get_socket_cookie_sock_addr,
4235 .gpl_only = false,
4236 .ret_type = RET_INTEGER,
4237 .arg1_type = ARG_PTR_TO_CTX,
4238};
4239
0e53d9e5
DB
4240BPF_CALL_1(bpf_get_socket_cookie_sock, struct sock *, ctx)
4241{
4242 return sock_gen_cookie(ctx);
4243}
4244
4245static const struct bpf_func_proto bpf_get_socket_cookie_sock_proto = {
4246 .func = bpf_get_socket_cookie_sock,
4247 .gpl_only = false,
4248 .ret_type = RET_INTEGER,
4249 .arg1_type = ARG_PTR_TO_CTX,
4250};
4251
d692f113
AI
4252BPF_CALL_1(bpf_get_socket_cookie_sock_ops, struct bpf_sock_ops_kern *, ctx)
4253{
4254 return sock_gen_cookie(ctx->sk);
4255}
4256
4257static const struct bpf_func_proto bpf_get_socket_cookie_sock_ops_proto = {
4258 .func = bpf_get_socket_cookie_sock_ops,
4259 .gpl_only = false,
4260 .ret_type = RET_INTEGER,
4261 .arg1_type = ARG_PTR_TO_CTX,
4262};
4263
f318903c
DB
4264static u64 __bpf_get_netns_cookie(struct sock *sk)
4265{
4266#ifdef CONFIG_NET_NS
4267 return net_gen_cookie(sk ? sk->sk_net.net : &init_net);
4268#else
4269 return 0;
4270#endif
4271}
4272
4273BPF_CALL_1(bpf_get_netns_cookie_sock, struct sock *, ctx)
4274{
4275 return __bpf_get_netns_cookie(ctx);
4276}
4277
4278static const struct bpf_func_proto bpf_get_netns_cookie_sock_proto = {
4279 .func = bpf_get_netns_cookie_sock,
4280 .gpl_only = false,
4281 .ret_type = RET_INTEGER,
4282 .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
4283};
4284
4285BPF_CALL_1(bpf_get_netns_cookie_sock_addr, struct bpf_sock_addr_kern *, ctx)
4286{
4287 return __bpf_get_netns_cookie(ctx ? ctx->sk : NULL);
4288}
4289
4290static const struct bpf_func_proto bpf_get_netns_cookie_sock_addr_proto = {
4291 .func = bpf_get_netns_cookie_sock_addr,
4292 .gpl_only = false,
4293 .ret_type = RET_INTEGER,
4294 .arg1_type = ARG_PTR_TO_CTX_OR_NULL,
4295};
4296
6acc5c29
CF
4297BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
4298{
4299 struct sock *sk = sk_to_full_sk(skb->sk);
4300 kuid_t kuid;
4301
4302 if (!sk || !sk_fullsock(sk))
4303 return overflowuid;
4304 kuid = sock_net_uid(sock_net(sk), sk);
4305 return from_kuid_munged(sock_net(sk)->user_ns, kuid);
4306}
4307
4308static const struct bpf_func_proto bpf_get_socket_uid_proto = {
4309 .func = bpf_get_socket_uid,
4310 .gpl_only = false,
4311 .ret_type = RET_INTEGER,
4312 .arg1_type = ARG_PTR_TO_CTX,
4313};
4314
beecf11b 4315static int _bpf_setsockopt(struct sock *sk, int level, int optname,
5cdc744c 4316 char *optval, int optlen)
8c4b4c7e 4317{
70c58997 4318 char devname[IFNAMSIZ];
f9bcf968 4319 int val, valbool;
70c58997
FF
4320 struct net *net;
4321 int ifindex;
8c4b4c7e 4322 int ret = 0;
8c4b4c7e
LB
4323
4324 if (!sk_fullsock(sk))
4325 return -EINVAL;
4326
beecf11b
SF
4327 sock_owned_by_me(sk);
4328
8c4b4c7e 4329 if (level == SOL_SOCKET) {
70c58997 4330 if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
8c4b4c7e
LB
4331 return -EINVAL;
4332 val = *((int *)optval);
f9bcf968 4333 valbool = val ? 1 : 0;
8c4b4c7e
LB
4334
4335 /* Only some socketops are supported */
4336 switch (optname) {
4337 case SO_RCVBUF:
c9e45767 4338 val = min_t(u32, val, sysctl_rmem_max);
8c4b4c7e 4339 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
ebb3b78d
ED
4340 WRITE_ONCE(sk->sk_rcvbuf,
4341 max_t(int, val * 2, SOCK_MIN_RCVBUF));
8c4b4c7e
LB
4342 break;
4343 case SO_SNDBUF:
c9e45767 4344 val = min_t(u32, val, sysctl_wmem_max);
8c4b4c7e 4345 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
e292f05e
ED
4346 WRITE_ONCE(sk->sk_sndbuf,
4347 max_t(int, val * 2, SOCK_MIN_SNDBUF));
8c4b4c7e 4348 break;
76a9ebe8 4349 case SO_MAX_PACING_RATE: /* 32bit version */
e224c390
YC
4350 if (val != ~0U)
4351 cmpxchg(&sk->sk_pacing_status,
4352 SK_PACING_NONE,
4353 SK_PACING_NEEDED);
76a9ebe8 4354 sk->sk_max_pacing_rate = (val == ~0U) ? ~0UL : val;
8c4b4c7e
LB
4355 sk->sk_pacing_rate = min(sk->sk_pacing_rate,
4356 sk->sk_max_pacing_rate);
4357 break;
4358 case SO_PRIORITY:
4359 sk->sk_priority = val;
4360 break;
4361 case SO_RCVLOWAT:
4362 if (val < 0)
4363 val = INT_MAX;
eac66402 4364 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
8c4b4c7e
LB
4365 break;
4366 case SO_MARK:
f4924f24
PO
4367 if (sk->sk_mark != val) {
4368 sk->sk_mark = val;
4369 sk_dst_reset(sk);
4370 }
8c4b4c7e 4371 break;
70c58997 4372 case SO_BINDTODEVICE:
70c58997
FF
4373 optlen = min_t(long, optlen, IFNAMSIZ - 1);
4374 strncpy(devname, optval, optlen);
4375 devname[optlen] = 0;
4376
4377 ifindex = 0;
4378 if (devname[0] != '\0') {
4379 struct net_device *dev;
4380
4381 ret = -ENODEV;
4382
4383 net = sock_net(sk);
4384 dev = dev_get_by_name(net, devname);
4385 if (!dev)
4386 break;
4387 ifindex = dev->ifindex;
4388 dev_put(dev);
4389 }
4390 ret = sock_bindtoindex(sk, ifindex, false);
70c58997 4391 break;
f9bcf968
DY
4392 case SO_KEEPALIVE:
4393 if (sk->sk_prot->keepalive)
4394 sk->sk_prot->keepalive(sk, valbool);
4395 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
4396 break;
8c4b4c7e
LB
4397 default:
4398 ret = -EINVAL;
4399 }
a5192c52 4400#ifdef CONFIG_INET
6f5c39fa
NS
4401 } else if (level == SOL_IP) {
4402 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
4403 return -EINVAL;
4404
4405 val = *((int *)optval);
4406 /* Only some options are supported */
4407 switch (optname) {
4408 case IP_TOS:
4409 if (val < -1 || val > 0xff) {
4410 ret = -EINVAL;
4411 } else {
4412 struct inet_sock *inet = inet_sk(sk);
4413
4414 if (val == -1)
4415 val = 0;
4416 inet->tos = val;
4417 }
4418 break;
4419 default:
4420 ret = -EINVAL;
4421 }
6f9bd3d7
LB
4422#if IS_ENABLED(CONFIG_IPV6)
4423 } else if (level == SOL_IPV6) {
4424 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
4425 return -EINVAL;
4426
4427 val = *((int *)optval);
4428 /* Only some options are supported */
4429 switch (optname) {
4430 case IPV6_TCLASS:
4431 if (val < -1 || val > 0xff) {
4432 ret = -EINVAL;
4433 } else {
4434 struct ipv6_pinfo *np = inet6_sk(sk);
4435
4436 if (val == -1)
4437 val = 0;
4438 np->tclass = val;
4439 }
4440 break;
4441 default:
4442 ret = -EINVAL;
4443 }
4444#endif
8c4b4c7e
LB
4445 } else if (level == SOL_TCP &&
4446 sk->sk_prot->setsockopt == tcp_setsockopt) {
91b5b21c
LB
4447 if (optname == TCP_CONGESTION) {
4448 char name[TCP_CA_NAME_MAX];
4449
4450 strncpy(name, optval, min_t(long, optlen,
4451 TCP_CA_NAME_MAX-1));
4452 name[TCP_CA_NAME_MAX-1] = 0;
29a94932 4453 ret = tcp_set_congestion_control(sk, name, false, true);
91b5b21c 4454 } else {
f9bcf968 4455 struct inet_connection_sock *icsk = inet_csk(sk);
fc747810 4456 struct tcp_sock *tp = tcp_sk(sk);
2b8ee4f0 4457 unsigned long timeout;
fc747810
LB
4458
4459 if (optlen != sizeof(int))
4460 return -EINVAL;
4461
4462 val = *((int *)optval);
4463 /* Only some options are supported */
4464 switch (optname) {
4465 case TCP_BPF_IW:
31aa6503 4466 if (val <= 0 || tp->data_segs_out > tp->syn_data)
fc747810
LB
4467 ret = -EINVAL;
4468 else
4469 tp->snd_cwnd = val;
4470 break;
13bf9641
LB
4471 case TCP_BPF_SNDCWND_CLAMP:
4472 if (val <= 0) {
4473 ret = -EINVAL;
4474 } else {
4475 tp->snd_cwnd_clamp = val;
4476 tp->snd_ssthresh = val;
4477 }
6d3f06a0 4478 break;
2b8ee4f0
MKL
4479 case TCP_BPF_DELACK_MAX:
4480 timeout = usecs_to_jiffies(val);
4481 if (timeout > TCP_DELACK_MAX ||
4482 timeout < TCP_TIMEOUT_MIN)
4483 return -EINVAL;
4484 inet_csk(sk)->icsk_delack_max = timeout;
4485 break;
ca584ba0
MKL
4486 case TCP_BPF_RTO_MIN:
4487 timeout = usecs_to_jiffies(val);
4488 if (timeout > TCP_RTO_MIN ||
4489 timeout < TCP_TIMEOUT_MIN)
4490 return -EINVAL;
4491 inet_csk(sk)->icsk_rto_min = timeout;
4492 break;
1e215300
NS
4493 case TCP_SAVE_SYN:
4494 if (val < 0 || val > 1)
4495 ret = -EINVAL;
4496 else
4497 tp->save_syn = val;
4498 break;
f9bcf968
DY
4499 case TCP_KEEPIDLE:
4500 ret = tcp_sock_set_keepidle_locked(sk, val);
4501 break;
4502 case TCP_KEEPINTVL:
4503 if (val < 1 || val > MAX_TCP_KEEPINTVL)
4504 ret = -EINVAL;
4505 else
4506 tp->keepalive_intvl = val * HZ;
4507 break;
4508 case TCP_KEEPCNT:
4509 if (val < 1 || val > MAX_TCP_KEEPCNT)
4510 ret = -EINVAL;
4511 else
4512 tp->keepalive_probes = val;
4513 break;
4514 case TCP_SYNCNT:
4515 if (val < 1 || val > MAX_TCP_SYNCNT)
4516 ret = -EINVAL;
4517 else
4518 icsk->icsk_syn_retries = val;
4519 break;
4520 case TCP_USER_TIMEOUT:
4521 if (val < 0)
4522 ret = -EINVAL;
4523 else
4524 icsk->icsk_user_timeout = val;
4525 break;
fc747810
LB
4526 default:
4527 ret = -EINVAL;
4528 }
91b5b21c 4529 }
91b5b21c 4530#endif
8c4b4c7e
LB
4531 } else {
4532 ret = -EINVAL;
4533 }
4534 return ret;
4535}
4536
beecf11b
SF
4537static int _bpf_getsockopt(struct sock *sk, int level, int optname,
4538 char *optval, int optlen)
cd86d1fd 4539{
cd86d1fd
LB
4540 if (!sk_fullsock(sk))
4541 goto err_clear;
beecf11b
SF
4542
4543 sock_owned_by_me(sk);
4544
cd86d1fd
LB
4545#ifdef CONFIG_INET
4546 if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
1edb6e03
AR
4547 struct inet_connection_sock *icsk;
4548 struct tcp_sock *tp;
4549
1e215300
NS
4550 switch (optname) {
4551 case TCP_CONGESTION:
4552 icsk = inet_csk(sk);
cd86d1fd
LB
4553
4554 if (!icsk->icsk_ca_ops || optlen <= 1)
4555 goto err_clear;
4556 strncpy(optval, icsk->icsk_ca_ops->name, optlen);
4557 optval[optlen - 1] = 0;
1e215300
NS
4558 break;
4559 case TCP_SAVED_SYN:
4560 tp = tcp_sk(sk);
4561
4562 if (optlen <= 0 || !tp->saved_syn ||
70a217f1 4563 optlen > tcp_saved_syn_len(tp->saved_syn))
1e215300 4564 goto err_clear;
70a217f1 4565 memcpy(optval, tp->saved_syn->data, optlen);
1e215300
NS
4566 break;
4567 default:
cd86d1fd
LB
4568 goto err_clear;
4569 }
6f5c39fa
NS
4570 } else if (level == SOL_IP) {
4571 struct inet_sock *inet = inet_sk(sk);
4572
4573 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
4574 goto err_clear;
4575
4576 /* Only some options are supported */
4577 switch (optname) {
4578 case IP_TOS:
4579 *((int *)optval) = (int)inet->tos;
4580 break;
4581 default:
4582 goto err_clear;
4583 }
6f9bd3d7
LB
4584#if IS_ENABLED(CONFIG_IPV6)
4585 } else if (level == SOL_IPV6) {
4586 struct ipv6_pinfo *np = inet6_sk(sk);
4587
4588 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
4589 goto err_clear;
4590
4591 /* Only some options are supported */
4592 switch (optname) {
4593 case IPV6_TCLASS:
4594 *((int *)optval) = (int)np->tclass;
4595 break;
4596 default:
4597 goto err_clear;
4598 }
4599#endif
cd86d1fd
LB
4600 } else {
4601 goto err_clear;
4602 }
aa2bc739 4603 return 0;
cd86d1fd
LB
4604#endif
4605err_clear:
4606 memset(optval, 0, optlen);
4607 return -EINVAL;
4608}
4609
beecf11b
SF
4610BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
4611 int, level, int, optname, char *, optval, int, optlen)
4612{
5cdc744c 4613 return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen);
beecf11b
SF
4614}
4615
4616static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
4617 .func = bpf_sock_addr_setsockopt,
4618 .gpl_only = false,
4619 .ret_type = RET_INTEGER,
4620 .arg1_type = ARG_PTR_TO_CTX,
4621 .arg2_type = ARG_ANYTHING,
4622 .arg3_type = ARG_ANYTHING,
4623 .arg4_type = ARG_PTR_TO_MEM,
4624 .arg5_type = ARG_CONST_SIZE,
4625};
4626
4627BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
4628 int, level, int, optname, char *, optval, int, optlen)
4629{
4630 return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen);
4631}
4632
4633static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
4634 .func = bpf_sock_addr_getsockopt,
4635 .gpl_only = false,
4636 .ret_type = RET_INTEGER,
4637 .arg1_type = ARG_PTR_TO_CTX,
4638 .arg2_type = ARG_ANYTHING,
4639 .arg3_type = ARG_ANYTHING,
4640 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
4641 .arg5_type = ARG_CONST_SIZE,
4642};
4643
4644BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4645 int, level, int, optname, char *, optval, int, optlen)
4646{
5cdc744c 4647 return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen);
beecf11b
SF
4648}
4649
4650static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
4651 .func = bpf_sock_ops_setsockopt,
4652 .gpl_only = false,
4653 .ret_type = RET_INTEGER,
4654 .arg1_type = ARG_PTR_TO_CTX,
4655 .arg2_type = ARG_ANYTHING,
4656 .arg3_type = ARG_ANYTHING,
4657 .arg4_type = ARG_PTR_TO_MEM,
4658 .arg5_type = ARG_CONST_SIZE,
4659};
4660
0813a841
MKL
4661static int bpf_sock_ops_get_syn(struct bpf_sock_ops_kern *bpf_sock,
4662 int optname, const u8 **start)
4663{
4664 struct sk_buff *syn_skb = bpf_sock->syn_skb;
4665 const u8 *hdr_start;
4666 int ret;
4667
4668 if (syn_skb) {
4669 /* sk is a request_sock here */
4670
4671 if (optname == TCP_BPF_SYN) {
4672 hdr_start = syn_skb->data;
4673 ret = tcp_hdrlen(syn_skb);
267cf9fa 4674 } else if (optname == TCP_BPF_SYN_IP) {
0813a841
MKL
4675 hdr_start = skb_network_header(syn_skb);
4676 ret = skb_network_header_len(syn_skb) +
4677 tcp_hdrlen(syn_skb);
267cf9fa
MKL
4678 } else {
4679 /* optname == TCP_BPF_SYN_MAC */
4680 hdr_start = skb_mac_header(syn_skb);
4681 ret = skb_mac_header_len(syn_skb) +
4682 skb_network_header_len(syn_skb) +
4683 tcp_hdrlen(syn_skb);
0813a841
MKL
4684 }
4685 } else {
4686 struct sock *sk = bpf_sock->sk;
4687 struct saved_syn *saved_syn;
4688
4689 if (sk->sk_state == TCP_NEW_SYN_RECV)
4690 /* synack retransmit. bpf_sock->syn_skb will
4691 * not be available. It has to resort to
4692 * saved_syn (if it is saved).
4693 */
4694 saved_syn = inet_reqsk(sk)->saved_syn;
4695 else
4696 saved_syn = tcp_sk(sk)->saved_syn;
4697
4698 if (!saved_syn)
4699 return -ENOENT;
4700
4701 if (optname == TCP_BPF_SYN) {
4702 hdr_start = saved_syn->data +
267cf9fa 4703 saved_syn->mac_hdrlen +
0813a841
MKL
4704 saved_syn->network_hdrlen;
4705 ret = saved_syn->tcp_hdrlen;
267cf9fa
MKL
4706 } else if (optname == TCP_BPF_SYN_IP) {
4707 hdr_start = saved_syn->data +
4708 saved_syn->mac_hdrlen;
4709 ret = saved_syn->network_hdrlen +
4710 saved_syn->tcp_hdrlen;
0813a841 4711 } else {
267cf9fa
MKL
4712 /* optname == TCP_BPF_SYN_MAC */
4713
4714 /* TCP_SAVE_SYN may not have saved the mac hdr */
4715 if (!saved_syn->mac_hdrlen)
4716 return -ENOENT;
4717
0813a841 4718 hdr_start = saved_syn->data;
267cf9fa
MKL
4719 ret = saved_syn->mac_hdrlen +
4720 saved_syn->network_hdrlen +
0813a841
MKL
4721 saved_syn->tcp_hdrlen;
4722 }
4723 }
4724
4725 *start = hdr_start;
4726 return ret;
4727}
4728
beecf11b
SF
4729BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
4730 int, level, int, optname, char *, optval, int, optlen)
4731{
0813a841 4732 if (IS_ENABLED(CONFIG_INET) && level == SOL_TCP &&
267cf9fa 4733 optname >= TCP_BPF_SYN && optname <= TCP_BPF_SYN_MAC) {
0813a841
MKL
4734 int ret, copy_len = 0;
4735 const u8 *start;
4736
4737 ret = bpf_sock_ops_get_syn(bpf_sock, optname, &start);
4738 if (ret > 0) {
4739 copy_len = ret;
4740 if (optlen < copy_len) {
4741 copy_len = optlen;
4742 ret = -ENOSPC;
4743 }
4744
4745 memcpy(optval, start, copy_len);
4746 }
4747
4748 /* Zero out unused buffer at the end */
4749 memset(optval + copy_len, 0, optlen - copy_len);
4750
4751 return ret;
4752 }
4753
beecf11b
SF
4754 return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
4755}
4756
4757static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
4758 .func = bpf_sock_ops_getsockopt,
cd86d1fd
LB
4759 .gpl_only = false,
4760 .ret_type = RET_INTEGER,
4761 .arg1_type = ARG_PTR_TO_CTX,
4762 .arg2_type = ARG_ANYTHING,
4763 .arg3_type = ARG_ANYTHING,
4764 .arg4_type = ARG_PTR_TO_UNINIT_MEM,
4765 .arg5_type = ARG_CONST_SIZE,
4766};
4767
b13d8807
LB
4768BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
4769 int, argval)
4770{
4771 struct sock *sk = bpf_sock->sk;
4772 int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
4773
a7dcdf6e 4774 if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
b13d8807
LB
4775 return -EINVAL;
4776
725721a6 4777 tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
b13d8807
LB
4778
4779 return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
b13d8807
LB
4780}
4781
4782static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
4783 .func = bpf_sock_ops_cb_flags_set,
4784 .gpl_only = false,
4785 .ret_type = RET_INTEGER,
4786 .arg1_type = ARG_PTR_TO_CTX,
4787 .arg2_type = ARG_ANYTHING,
4788};
4789
d74bad4e
AI
4790const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
4791EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
4792
4793BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
4794 int, addr_len)
4795{
4796#ifdef CONFIG_INET
4797 struct sock *sk = ctx->sk;
8086fbaf 4798 u32 flags = BIND_FROM_BPF;
d74bad4e
AI
4799 int err;
4800
d74bad4e 4801 err = -EINVAL;
ba024f25
TH
4802 if (addr_len < offsetofend(struct sockaddr, sa_family))
4803 return err;
d74bad4e
AI
4804 if (addr->sa_family == AF_INET) {
4805 if (addr_len < sizeof(struct sockaddr_in))
4806 return err;
8086fbaf
SF
4807 if (((struct sockaddr_in *)addr)->sin_port == htons(0))
4808 flags |= BIND_FORCE_ADDRESS_NO_PORT;
4809 return __inet_bind(sk, addr, addr_len, flags);
d74bad4e
AI
4810#if IS_ENABLED(CONFIG_IPV6)
4811 } else if (addr->sa_family == AF_INET6) {
4812 if (addr_len < SIN6_LEN_RFC2133)
4813 return err;
8086fbaf
SF
4814 if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0))
4815 flags |= BIND_FORCE_ADDRESS_NO_PORT;
d74bad4e
AI
4816 /* ipv6_bpf_stub cannot be NULL, since it's called from
4817 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
4818 */
8086fbaf 4819 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
d74bad4e
AI
4820#endif /* CONFIG_IPV6 */
4821 }
4822#endif /* CONFIG_INET */
4823
4824 return -EAFNOSUPPORT;
4825}
4826
4827static const struct bpf_func_proto bpf_bind_proto = {
4828 .func = bpf_bind,
4829 .gpl_only = false,
4830 .ret_type = RET_INTEGER,
4831 .arg1_type = ARG_PTR_TO_CTX,
4832 .arg2_type = ARG_PTR_TO_MEM,
4833 .arg3_type = ARG_CONST_SIZE,
4834};
4835
12bed760
EB
4836#ifdef CONFIG_XFRM
4837BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
4838 struct bpf_xfrm_state *, to, u32, size, u64, flags)
4839{
4840 const struct sec_path *sp = skb_sec_path(skb);
4841 const struct xfrm_state *x;
4842
4843 if (!sp || unlikely(index >= sp->len || flags))
4844 goto err_clear;
4845
4846 x = sp->xvec[index];
4847
4848 if (unlikely(size != sizeof(struct bpf_xfrm_state)))
4849 goto err_clear;
4850
4851 to->reqid = x->props.reqid;
4852 to->spi = x->id.spi;
4853 to->family = x->props.family;
1fbc2e0c
DB
4854 to->ext = 0;
4855
12bed760
EB
4856 if (to->family == AF_INET6) {
4857 memcpy(to->remote_ipv6, x->props.saddr.a6,
4858 sizeof(to->remote_ipv6));
4859 } else {
4860 to->remote_ipv4 = x->props.saddr.a4;
1fbc2e0c 4861 memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
12bed760
EB
4862 }
4863
4864 return 0;
4865err_clear:
4866 memset(to, 0, size);
4867 return -EINVAL;
4868}
4869
4870static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
4871 .func = bpf_skb_get_xfrm_state,
4872 .gpl_only = false,
4873 .ret_type = RET_INTEGER,
4874 .arg1_type = ARG_PTR_TO_CTX,
4875 .arg2_type = ARG_ANYTHING,
4876 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
4877 .arg4_type = ARG_CONST_SIZE,
4878 .arg5_type = ARG_ANYTHING,
4879};
4880#endif
4881
87f5fc7e
DA
4882#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
4883static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
4884 const struct neighbour *neigh,
4885 const struct net_device *dev)
4886{
4887 memcpy(params->dmac, neigh->ha, ETH_ALEN);
4888 memcpy(params->smac, dev->dev_addr, ETH_ALEN);
4889 params->h_vlan_TCI = 0;
4890 params->h_vlan_proto = 0;
4c79579b 4891 params->ifindex = dev->ifindex;
87f5fc7e 4892
4c79579b 4893 return 0;
87f5fc7e
DA
4894}
4895#endif
4896
4897#if IS_ENABLED(CONFIG_INET)
4898static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4f74fede 4899 u32 flags, bool check_mtu)
87f5fc7e 4900{
eba618ab 4901 struct fib_nh_common *nhc;
87f5fc7e
DA
4902 struct in_device *in_dev;
4903 struct neighbour *neigh;
4904 struct net_device *dev;
4905 struct fib_result res;
87f5fc7e
DA
4906 struct flowi4 fl4;
4907 int err;
4f74fede 4908 u32 mtu;
87f5fc7e
DA
4909
4910 dev = dev_get_by_index_rcu(net, params->ifindex);
4911 if (unlikely(!dev))
4912 return -ENODEV;
4913
4914 /* verify forwarding is enabled on this interface */
4915 in_dev = __in_dev_get_rcu(dev);
4916 if (unlikely(!in_dev || !IN_DEV_FORWARD(in_dev)))
4c79579b 4917 return BPF_FIB_LKUP_RET_FWD_DISABLED;
87f5fc7e
DA
4918
4919 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
4920 fl4.flowi4_iif = 1;
4921 fl4.flowi4_oif = params->ifindex;
4922 } else {
4923 fl4.flowi4_iif = params->ifindex;
4924 fl4.flowi4_oif = 0;
4925 }
4926 fl4.flowi4_tos = params->tos & IPTOS_RT_MASK;
4927 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
4928 fl4.flowi4_flags = 0;
4929
4930 fl4.flowi4_proto = params->l4_protocol;
4931 fl4.daddr = params->ipv4_dst;
4932 fl4.saddr = params->ipv4_src;
4933 fl4.fl4_sport = params->sport;
4934 fl4.fl4_dport = params->dport;
1869e226 4935 fl4.flowi4_multipath_hash = 0;
87f5fc7e
DA
4936
4937 if (flags & BPF_FIB_LOOKUP_DIRECT) {
4938 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
4939 struct fib_table *tb;
4940
4941 tb = fib_get_table(net, tbid);
4942 if (unlikely(!tb))
4c79579b 4943 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e
DA
4944
4945 err = fib_table_lookup(tb, &fl4, &res, FIB_LOOKUP_NOREF);
4946 } else {
4947 fl4.flowi4_mark = 0;
4948 fl4.flowi4_secid = 0;
4949 fl4.flowi4_tun_key.tun_id = 0;
4950 fl4.flowi4_uid = sock_net_uid(net, NULL);
4951
4952 err = fib_lookup(net, &fl4, &res, FIB_LOOKUP_NOREF);
4953 }
4954
4c79579b
DA
4955 if (err) {
4956 /* map fib lookup errors to RTN_ type */
4957 if (err == -EINVAL)
4958 return BPF_FIB_LKUP_RET_BLACKHOLE;
4959 if (err == -EHOSTUNREACH)
4960 return BPF_FIB_LKUP_RET_UNREACHABLE;
4961 if (err == -EACCES)
4962 return BPF_FIB_LKUP_RET_PROHIBIT;
4963
4964 return BPF_FIB_LKUP_RET_NOT_FWDED;
4965 }
4966
4967 if (res.type != RTN_UNICAST)
4968 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e 4969
5481d73f 4970 if (fib_info_num_path(res.fi) > 1)
87f5fc7e
DA
4971 fib_select_path(net, &res, &fl4, NULL);
4972
4f74fede
DA
4973 if (check_mtu) {
4974 mtu = ip_mtu_from_fib_result(&res, params->ipv4_dst);
4975 if (params->tot_len > mtu)
4c79579b 4976 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4f74fede
DA
4977 }
4978
eba618ab 4979 nhc = res.nhc;
87f5fc7e
DA
4980
4981 /* do not handle lwt encaps right now */
eba618ab 4982 if (nhc->nhc_lwtstate)
4c79579b 4983 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
87f5fc7e 4984
eba618ab 4985 dev = nhc->nhc_dev;
87f5fc7e
DA
4986
4987 params->rt_metric = res.fi->fib_priority;
4988
4989 /* xdp and cls_bpf programs are run in RCU-bh so
4990 * rcu_read_lock_bh is not needed here
4991 */
6f5f68d0
DA
4992 if (likely(nhc->nhc_gw_family != AF_INET6)) {
4993 if (nhc->nhc_gw_family)
4994 params->ipv4_dst = nhc->nhc_gw.ipv4;
4995
4996 neigh = __ipv4_neigh_lookup_noref(dev,
4997 (__force u32)params->ipv4_dst);
4998 } else {
4999 struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
5000
5001 params->family = AF_INET6;
5002 *dst = nhc->nhc_gw.ipv6;
5003 neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
5004 }
5005
4c79579b
DA
5006 if (!neigh)
5007 return BPF_FIB_LKUP_RET_NO_NEIGH;
87f5fc7e 5008
4c79579b 5009 return bpf_fib_set_fwd_params(params, neigh, dev);
87f5fc7e
DA
5010}
5011#endif
5012
5013#if IS_ENABLED(CONFIG_IPV6)
5014static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
4f74fede 5015 u32 flags, bool check_mtu)
87f5fc7e
DA
5016{
5017 struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
5018 struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
e55449e7 5019 struct fib6_result res = {};
87f5fc7e
DA
5020 struct neighbour *neigh;
5021 struct net_device *dev;
5022 struct inet6_dev *idev;
87f5fc7e
DA
5023 struct flowi6 fl6;
5024 int strict = 0;
effda4dd 5025 int oif, err;
4f74fede 5026 u32 mtu;
87f5fc7e
DA
5027
5028 /* link local addresses are never forwarded */
5029 if (rt6_need_strict(dst) || rt6_need_strict(src))
4c79579b 5030 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e
DA
5031
5032 dev = dev_get_by_index_rcu(net, params->ifindex);
5033 if (unlikely(!dev))
5034 return -ENODEV;
5035
5036 idev = __in6_dev_get_safely(dev);
56f0f84e 5037 if (unlikely(!idev || !idev->cnf.forwarding))
4c79579b 5038 return BPF_FIB_LKUP_RET_FWD_DISABLED;
87f5fc7e
DA
5039
5040 if (flags & BPF_FIB_LOOKUP_OUTPUT) {
5041 fl6.flowi6_iif = 1;
5042 oif = fl6.flowi6_oif = params->ifindex;
5043 } else {
5044 oif = fl6.flowi6_iif = params->ifindex;
5045 fl6.flowi6_oif = 0;
5046 strict = RT6_LOOKUP_F_HAS_SADDR;
5047 }
bd3a08aa 5048 fl6.flowlabel = params->flowinfo;
87f5fc7e
DA
5049 fl6.flowi6_scope = 0;
5050 fl6.flowi6_flags = 0;
5051 fl6.mp_hash = 0;
5052
5053 fl6.flowi6_proto = params->l4_protocol;
5054 fl6.daddr = *dst;
5055 fl6.saddr = *src;
5056 fl6.fl6_sport = params->sport;
5057 fl6.fl6_dport = params->dport;
5058
5059 if (flags & BPF_FIB_LOOKUP_DIRECT) {
5060 u32 tbid = l3mdev_fib_table_rcu(dev) ? : RT_TABLE_MAIN;
5061 struct fib6_table *tb;
5062
5063 tb = ipv6_stub->fib6_get_table(net, tbid);
5064 if (unlikely(!tb))
4c79579b 5065 return BPF_FIB_LKUP_RET_NOT_FWDED;
87f5fc7e 5066
effda4dd
DA
5067 err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res,
5068 strict);
87f5fc7e
DA
5069 } else {
5070 fl6.flowi6_mark = 0;
5071 fl6.flowi6_secid = 0;
5072 fl6.flowi6_tun_key.tun_id = 0;
5073 fl6.flowi6_uid = sock_net_uid(net, NULL);
5074
effda4dd 5075 err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict);
87f5fc7e
DA
5076 }
5077
effda4dd 5078 if (unlikely(err || IS_ERR_OR_NULL(res.f6i) ||
b1d40991 5079 res.f6i == net->ipv6.fib6_null_entry))
4c79579b
DA
5080 return BPF_FIB_LKUP_RET_NOT_FWDED;
5081
7d21fec9
DA
5082 switch (res.fib6_type) {
5083 /* only unicast is forwarded */
5084 case RTN_UNICAST:
5085 break;
5086 case RTN_BLACKHOLE:
5087 return BPF_FIB_LKUP_RET_BLACKHOLE;
5088 case RTN_UNREACHABLE:
5089 return BPF_FIB_LKUP_RET_UNREACHABLE;
5090 case RTN_PROHIBIT:
5091 return BPF_FIB_LKUP_RET_PROHIBIT;
5092 default:
4c79579b 5093 return BPF_FIB_LKUP_RET_NOT_FWDED;
7d21fec9 5094 }
87f5fc7e 5095
b1d40991
DA
5096 ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif,
5097 fl6.flowi6_oif != 0, NULL, strict);
87f5fc7e 5098
4f74fede 5099 if (check_mtu) {
b748f260 5100 mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src);
4f74fede 5101 if (params->tot_len > mtu)
4c79579b 5102 return BPF_FIB_LKUP_RET_FRAG_NEEDED;
4f74fede
DA
5103 }
5104
b1d40991 5105 if (res.nh->fib_nh_lws)
4c79579b 5106 return BPF_FIB_LKUP_RET_UNSUPP_LWT;
87f5fc7e 5107
b1d40991
DA
5108 if (res.nh->fib_nh_gw_family)
5109 *dst = res.nh->fib_nh_gw6;
87f5fc7e 5110
b1d40991
DA
5111 dev = res.nh->fib_nh_dev;
5112 params->rt_metric = res.f6i->fib6_metric;
87f5fc7e
DA
5113
5114 /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
71df5777 5115 * not needed here.
87f5fc7e 5116 */
71df5777 5117 neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
4c79579b
DA
5118 if (!neigh)
5119 return BPF_FIB_LKUP_RET_NO_NEIGH;
87f5fc7e 5120
4c79579b 5121 return bpf_fib_set_fwd_params(params, neigh, dev);
87f5fc7e
DA
5122}
5123#endif
5124
5125BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
5126 struct bpf_fib_lookup *, params, int, plen, u32, flags)
5127{
5128 if (plen < sizeof(*params))
5129 return -EINVAL;
5130
9ce64f19
DA
5131 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5132 return -EINVAL;
5133
87f5fc7e
DA
5134 switch (params->family) {
5135#if IS_ENABLED(CONFIG_INET)
5136 case AF_INET:
5137 return bpf_ipv4_fib_lookup(dev_net(ctx->rxq->dev), params,
4f74fede 5138 flags, true);
87f5fc7e
DA
5139#endif
5140#if IS_ENABLED(CONFIG_IPV6)
5141 case AF_INET6:
5142 return bpf_ipv6_fib_lookup(dev_net(ctx->rxq->dev), params,
4f74fede 5143 flags, true);
87f5fc7e
DA
5144#endif
5145 }
bcece5dc 5146 return -EAFNOSUPPORT;
87f5fc7e
DA
5147}
5148
5149static const struct bpf_func_proto bpf_xdp_fib_lookup_proto = {
5150 .func = bpf_xdp_fib_lookup,
5151 .gpl_only = true,
5152 .ret_type = RET_INTEGER,
5153 .arg1_type = ARG_PTR_TO_CTX,
5154 .arg2_type = ARG_PTR_TO_MEM,
5155 .arg3_type = ARG_CONST_SIZE,
5156 .arg4_type = ARG_ANYTHING,
5157};
5158
5159BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
5160 struct bpf_fib_lookup *, params, int, plen, u32, flags)
5161{
4f74fede 5162 struct net *net = dev_net(skb->dev);
4c79579b 5163 int rc = -EAFNOSUPPORT;
4f74fede 5164
87f5fc7e
DA
5165 if (plen < sizeof(*params))
5166 return -EINVAL;
5167
9ce64f19
DA
5168 if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
5169 return -EINVAL;
5170
87f5fc7e
DA
5171 switch (params->family) {
5172#if IS_ENABLED(CONFIG_INET)
5173 case AF_INET:
4c79579b 5174 rc = bpf_ipv4_fib_lookup(net, params, flags, false);
4f74fede 5175 break;
87f5fc7e
DA
5176#endif
5177#if IS_ENABLED(CONFIG_IPV6)
5178 case AF_INET6:
4c79579b 5179 rc = bpf_ipv6_fib_lookup(net, params, flags, false);
4f74fede 5180 break;
87f5fc7e
DA
5181#endif
5182 }
4f74fede 5183
4c79579b 5184 if (!rc) {
4f74fede
DA
5185 struct net_device *dev;
5186
4c79579b 5187 dev = dev_get_by_index_rcu(net, params->ifindex);
4f74fede 5188 if (!is_skb_forwardable(dev, skb))
4c79579b 5189 rc = BPF_FIB_LKUP_RET_FRAG_NEEDED;
4f74fede
DA
5190 }
5191
4c79579b 5192 return rc;
87f5fc7e
DA
5193}
5194
5195static const struct bpf_func_proto bpf_skb_fib_lookup_proto = {
5196 .func = bpf_skb_fib_lookup,
5197 .gpl_only = true,
5198 .ret_type = RET_INTEGER,
5199 .arg1_type = ARG_PTR_TO_CTX,
5200 .arg2_type = ARG_PTR_TO_MEM,
5201 .arg3_type = ARG_CONST_SIZE,
5202 .arg4_type = ARG_ANYTHING,
5203};
5204
fe94cc29
MX
5205#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5206static int bpf_push_seg6_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
5207{
5208 int err;
5209 struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)hdr;
5210
bb986a50 5211 if (!seg6_validate_srh(srh, len, false))
fe94cc29
MX
5212 return -EINVAL;
5213
5214 switch (type) {
5215 case BPF_LWT_ENCAP_SEG6_INLINE:
5216 if (skb->protocol != htons(ETH_P_IPV6))
5217 return -EBADMSG;
5218
5219 err = seg6_do_srh_inline(skb, srh);
5220 break;
5221 case BPF_LWT_ENCAP_SEG6:
5222 skb_reset_inner_headers(skb);
5223 skb->encapsulation = 1;
5224 err = seg6_do_srh_encap(skb, srh, IPPROTO_IPV6);
5225 break;
5226 default:
5227 return -EINVAL;
5228 }
5229
5230 bpf_compute_data_pointers(skb);
5231 if (err)
5232 return err;
5233
5234 ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
5235 skb_set_transport_header(skb, sizeof(struct ipv6hdr));
5236
5237 return seg6_lookup_nexthop(skb, NULL, 0);
5238}
5239#endif /* CONFIG_IPV6_SEG6_BPF */
5240
3e0bd37c
PO
5241#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5242static int bpf_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len,
5243 bool ingress)
5244{
52f27877 5245 return bpf_lwt_push_ip_encap(skb, hdr, len, ingress);
3e0bd37c
PO
5246}
5247#endif
5248
5249BPF_CALL_4(bpf_lwt_in_push_encap, struct sk_buff *, skb, u32, type, void *, hdr,
fe94cc29
MX
5250 u32, len)
5251{
5252 switch (type) {
5253#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
5254 case BPF_LWT_ENCAP_SEG6:
5255 case BPF_LWT_ENCAP_SEG6_INLINE:
5256 return bpf_push_seg6_encap(skb, type, hdr, len);
3e0bd37c
PO
5257#endif
5258#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5259 case BPF_LWT_ENCAP_IP:
5260 return bpf_push_ip_encap(skb, hdr, len, true /* ingress */);
fe94cc29
MX
5261#endif
5262 default:
5263 return -EINVAL;
5264 }
5265}
5266
3e0bd37c
PO
5267BPF_CALL_4(bpf_lwt_xmit_push_encap, struct sk_buff *, skb, u32, type,
5268 void *, hdr, u32, len)
5269{
5270 switch (type) {
5271#if IS_ENABLED(CONFIG_LWTUNNEL_BPF)
5272 case BPF_LWT_ENCAP_IP:
5273 return bpf_push_ip_encap(skb, hdr, len, false /* egress */);
fe94cc29
MX
5274#endif
5275 default:
5276 return -EINVAL;
5277 }
5278}
5279
3e0bd37c
PO
5280static const struct bpf_func_proto bpf_lwt_in_push_encap_proto = {
5281 .func = bpf_lwt_in_push_encap,
5282 .gpl_only = false,
5283 .ret_type = RET_INTEGER,
5284 .arg1_type = ARG_PTR_TO_CTX,
5285 .arg2_type = ARG_ANYTHING,
5286 .arg3_type = ARG_PTR_TO_MEM,
5287 .arg4_type = ARG_CONST_SIZE
5288};
5289
5290static const struct bpf_func_proto bpf_lwt_xmit_push_encap_proto = {
5291 .func = bpf_lwt_xmit_push_encap,
fe94cc29
MX
5292 .gpl_only = false,
5293 .ret_type = RET_INTEGER,
5294 .arg1_type = ARG_PTR_TO_CTX,
5295 .arg2_type = ARG_ANYTHING,
5296 .arg3_type = ARG_PTR_TO_MEM,
5297 .arg4_type = ARG_CONST_SIZE
5298};
5299
61d76980 5300#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
fe94cc29
MX
5301BPF_CALL_4(bpf_lwt_seg6_store_bytes, struct sk_buff *, skb, u32, offset,
5302 const void *, from, u32, len)
5303{
fe94cc29
MX
5304 struct seg6_bpf_srh_state *srh_state =
5305 this_cpu_ptr(&seg6_bpf_srh_states);
486cdf21 5306 struct ipv6_sr_hdr *srh = srh_state->srh;
fe94cc29 5307 void *srh_tlvs, *srh_end, *ptr;
fe94cc29
MX
5308 int srhoff = 0;
5309
486cdf21 5310 if (srh == NULL)
fe94cc29
MX
5311 return -EINVAL;
5312
fe94cc29
MX
5313 srh_tlvs = (void *)((char *)srh + ((srh->first_segment + 1) << 4));
5314 srh_end = (void *)((char *)srh + sizeof(*srh) + srh_state->hdrlen);
5315
5316 ptr = skb->data + offset;
5317 if (ptr >= srh_tlvs && ptr + len <= srh_end)
486cdf21 5318 srh_state->valid = false;
fe94cc29
MX
5319 else if (ptr < (void *)&srh->flags ||
5320 ptr + len > (void *)&srh->segments)
5321 return -EFAULT;
5322
5323 if (unlikely(bpf_try_make_writable(skb, offset + len)))
5324 return -EFAULT;
486cdf21
MX
5325 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
5326 return -EINVAL;
5327 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
fe94cc29
MX
5328
5329 memcpy(skb->data + offset, from, len);
5330 return 0;
fe94cc29
MX
5331}
5332
5333static const struct bpf_func_proto bpf_lwt_seg6_store_bytes_proto = {
5334 .func = bpf_lwt_seg6_store_bytes,
5335 .gpl_only = false,
5336 .ret_type = RET_INTEGER,
5337 .arg1_type = ARG_PTR_TO_CTX,
5338 .arg2_type = ARG_ANYTHING,
5339 .arg3_type = ARG_PTR_TO_MEM,
5340 .arg4_type = ARG_CONST_SIZE
5341};
5342
486cdf21 5343static void bpf_update_srh_state(struct sk_buff *skb)
fe94cc29 5344{
fe94cc29
MX
5345 struct seg6_bpf_srh_state *srh_state =
5346 this_cpu_ptr(&seg6_bpf_srh_states);
fe94cc29 5347 int srhoff = 0;
fe94cc29 5348
486cdf21
MX
5349 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) {
5350 srh_state->srh = NULL;
5351 } else {
5352 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
5353 srh_state->hdrlen = srh_state->srh->hdrlen << 3;
5354 srh_state->valid = true;
fe94cc29 5355 }
486cdf21
MX
5356}
5357
5358BPF_CALL_4(bpf_lwt_seg6_action, struct sk_buff *, skb,
5359 u32, action, void *, param, u32, param_len)
5360{
5361 struct seg6_bpf_srh_state *srh_state =
5362 this_cpu_ptr(&seg6_bpf_srh_states);
5363 int hdroff = 0;
5364 int err;
fe94cc29
MX
5365
5366 switch (action) {
5367 case SEG6_LOCAL_ACTION_END_X:
486cdf21
MX
5368 if (!seg6_bpf_has_valid_srh(skb))
5369 return -EBADMSG;
fe94cc29
MX
5370 if (param_len != sizeof(struct in6_addr))
5371 return -EINVAL;
5372 return seg6_lookup_nexthop(skb, (struct in6_addr *)param, 0);
5373 case SEG6_LOCAL_ACTION_END_T:
486cdf21
MX
5374 if (!seg6_bpf_has_valid_srh(skb))
5375 return -EBADMSG;
fe94cc29
MX
5376 if (param_len != sizeof(int))
5377 return -EINVAL;
5378 return seg6_lookup_nexthop(skb, NULL, *(int *)param);
486cdf21
MX
5379 case SEG6_LOCAL_ACTION_END_DT6:
5380 if (!seg6_bpf_has_valid_srh(skb))
5381 return -EBADMSG;
fe94cc29
MX
5382 if (param_len != sizeof(int))
5383 return -EINVAL;
486cdf21
MX
5384
5385 if (ipv6_find_hdr(skb, &hdroff, IPPROTO_IPV6, NULL, NULL) < 0)
5386 return -EBADMSG;
5387 if (!pskb_pull(skb, hdroff))
5388 return -EBADMSG;
5389
5390 skb_postpull_rcsum(skb, skb_network_header(skb), hdroff);
5391 skb_reset_network_header(skb);
5392 skb_reset_transport_header(skb);
5393 skb->encapsulation = 0;
5394
5395 bpf_compute_data_pointers(skb);
5396 bpf_update_srh_state(skb);
fe94cc29
MX
5397 return seg6_lookup_nexthop(skb, NULL, *(int *)param);
5398 case SEG6_LOCAL_ACTION_END_B6:
486cdf21
MX
5399 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
5400 return -EBADMSG;
fe94cc29
MX
5401 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6_INLINE,
5402 param, param_len);
5403 if (!err)
486cdf21
MX
5404 bpf_update_srh_state(skb);
5405
fe94cc29
MX
5406 return err;
5407 case SEG6_LOCAL_ACTION_END_B6_ENCAP:
486cdf21
MX
5408 if (srh_state->srh && !seg6_bpf_has_valid_srh(skb))
5409 return -EBADMSG;
fe94cc29
MX
5410 err = bpf_push_seg6_encap(skb, BPF_LWT_ENCAP_SEG6,
5411 param, param_len);
5412 if (!err)
486cdf21
MX
5413 bpf_update_srh_state(skb);
5414
fe94cc29
MX
5415 return err;
5416 default:
5417 return -EINVAL;
5418 }
fe94cc29
MX
5419}
5420
5421static const struct bpf_func_proto bpf_lwt_seg6_action_proto = {
5422 .func = bpf_lwt_seg6_action,
5423 .gpl_only = false,
5424 .ret_type = RET_INTEGER,
5425 .arg1_type = ARG_PTR_TO_CTX,
5426 .arg2_type = ARG_ANYTHING,
5427 .arg3_type = ARG_PTR_TO_MEM,
5428 .arg4_type = ARG_CONST_SIZE
5429};
5430
5431BPF_CALL_3(bpf_lwt_seg6_adjust_srh, struct sk_buff *, skb, u32, offset,
5432 s32, len)
5433{
fe94cc29
MX
5434 struct seg6_bpf_srh_state *srh_state =
5435 this_cpu_ptr(&seg6_bpf_srh_states);
486cdf21 5436 struct ipv6_sr_hdr *srh = srh_state->srh;
fe94cc29 5437 void *srh_end, *srh_tlvs, *ptr;
fe94cc29
MX
5438 struct ipv6hdr *hdr;
5439 int srhoff = 0;
5440 int ret;
5441
486cdf21 5442 if (unlikely(srh == NULL))
fe94cc29 5443 return -EINVAL;
fe94cc29
MX
5444
5445 srh_tlvs = (void *)((unsigned char *)srh + sizeof(*srh) +
5446 ((srh->first_segment + 1) << 4));
5447 srh_end = (void *)((unsigned char *)srh + sizeof(*srh) +
5448 srh_state->hdrlen);
5449 ptr = skb->data + offset;
5450
5451 if (unlikely(ptr < srh_tlvs || ptr > srh_end))
5452 return -EFAULT;
5453 if (unlikely(len < 0 && (void *)((char *)ptr - len) > srh_end))
5454 return -EFAULT;
5455
5456 if (len > 0) {
5457 ret = skb_cow_head(skb, len);
5458 if (unlikely(ret < 0))
5459 return ret;
5460
5461 ret = bpf_skb_net_hdr_push(skb, offset, len);
5462 } else {
5463 ret = bpf_skb_net_hdr_pop(skb, offset, -1 * len);
5464 }
5465
5466 bpf_compute_data_pointers(skb);
5467 if (unlikely(ret < 0))
5468 return ret;
5469
5470 hdr = (struct ipv6hdr *)skb->data;
5471 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
5472
486cdf21
MX
5473 if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
5474 return -EINVAL;
5475 srh_state->srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
fe94cc29 5476 srh_state->hdrlen += len;
486cdf21 5477 srh_state->valid = false;
fe94cc29 5478 return 0;
fe94cc29
MX
5479}
5480
5481static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = {
5482 .func = bpf_lwt_seg6_adjust_srh,
5483 .gpl_only = false,
5484 .ret_type = RET_INTEGER,
5485 .arg1_type = ARG_PTR_TO_CTX,
5486 .arg2_type = ARG_ANYTHING,
5487 .arg3_type = ARG_ANYTHING,
5488};
61d76980 5489#endif /* CONFIG_IPV6_SEG6_BPF */
fe94cc29 5490
df3f94a0
AB
5491#ifdef CONFIG_INET
5492static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple,
c8123ead 5493 int dif, int sdif, u8 family, u8 proto)
6acc9b43 5494{
6acc9b43
JS
5495 bool refcounted = false;
5496 struct sock *sk = NULL;
5497
5498 if (family == AF_INET) {
5499 __be32 src4 = tuple->ipv4.saddr;
5500 __be32 dst4 = tuple->ipv4.daddr;
6acc9b43
JS
5501
5502 if (proto == IPPROTO_TCP)
c8123ead 5503 sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0,
6acc9b43
JS
5504 src4, tuple->ipv4.sport,
5505 dst4, tuple->ipv4.dport,
5506 dif, sdif, &refcounted);
5507 else
5508 sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport,
5509 dst4, tuple->ipv4.dport,
c8123ead 5510 dif, sdif, &udp_table, NULL);
8a615c6b 5511#if IS_ENABLED(CONFIG_IPV6)
6acc9b43
JS
5512 } else {
5513 struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr;
5514 struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr;
6acc9b43
JS
5515
5516 if (proto == IPPROTO_TCP)
c8123ead 5517 sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0,
6acc9b43 5518 src6, tuple->ipv6.sport,
cac6cc2f 5519 dst6, ntohs(tuple->ipv6.dport),
6acc9b43 5520 dif, sdif, &refcounted);
8a615c6b
JS
5521 else if (likely(ipv6_bpf_stub))
5522 sk = ipv6_bpf_stub->udp6_lib_lookup(net,
5523 src6, tuple->ipv6.sport,
cac6cc2f 5524 dst6, tuple->ipv6.dport,
8a615c6b 5525 dif, sdif,
c8123ead 5526 &udp_table, NULL);
6acc9b43
JS
5527#endif
5528 }
5529
5530 if (unlikely(sk && !refcounted && !sock_flag(sk, SOCK_RCU_FREE))) {
5531 WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
5532 sk = NULL;
5533 }
5534 return sk;
5535}
5536
edbf8c01 5537/* bpf_skc_lookup performs the core lookup for different types of sockets,
6acc9b43
JS
5538 * taking a reference on the socket if it doesn't have the flag SOCK_RCU_FREE.
5539 * Returns the socket as an 'unsigned long' to simplify the casting in the
5540 * callers to satisfy BPF_CALL declarations.
5541 */
edbf8c01
LB
5542static struct sock *
5543__bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5544 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
5545 u64 flags)
6acc9b43 5546{
6acc9b43
JS
5547 struct sock *sk = NULL;
5548 u8 family = AF_UNSPEC;
5549 struct net *net;
c8123ead 5550 int sdif;
6acc9b43 5551
9b28ae24
LB
5552 if (len == sizeof(tuple->ipv4))
5553 family = AF_INET;
5554 else if (len == sizeof(tuple->ipv6))
5555 family = AF_INET6;
5556 else
5557 return NULL;
5558
f71c6143
JS
5559 if (unlikely(family == AF_UNSPEC || flags ||
5560 !((s32)netns_id < 0 || netns_id <= S32_MAX)))
6acc9b43
JS
5561 goto out;
5562
c8123ead
NH
5563 if (family == AF_INET)
5564 sdif = inet_sdif(skb);
6acc9b43 5565 else
c8123ead
NH
5566 sdif = inet6_sdif(skb);
5567
f71c6143
JS
5568 if ((s32)netns_id < 0) {
5569 net = caller_net;
4cc1feeb 5570 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
f71c6143 5571 } else {
6acc9b43
JS
5572 net = get_net_ns_by_id(caller_net, netns_id);
5573 if (unlikely(!net))
5574 goto out;
c8123ead 5575 sk = sk_lookup(net, tuple, ifindex, sdif, family, proto);
6acc9b43 5576 put_net(net);
6acc9b43
JS
5577 }
5578
edbf8c01
LB
5579out:
5580 return sk;
5581}
5582
5583static struct sock *
5584__bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5585 struct net *caller_net, u32 ifindex, u8 proto, u64 netns_id,
5586 u64 flags)
5587{
5588 struct sock *sk = __bpf_skc_lookup(skb, tuple, len, caller_net,
5589 ifindex, proto, netns_id, flags);
5590
f7355a6c 5591 if (sk) {
6acc9b43 5592 sk = sk_to_full_sk(sk);
f7355a6c 5593 if (!sk_fullsock(sk)) {
2e012c74 5594 sock_gen_put(sk);
f7355a6c
MKL
5595 return NULL;
5596 }
5597 }
edbf8c01
LB
5598
5599 return sk;
6acc9b43
JS
5600}
5601
edbf8c01
LB
5602static struct sock *
5603bpf_skc_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5604 u8 proto, u64 netns_id, u64 flags)
c8123ead
NH
5605{
5606 struct net *caller_net;
5607 int ifindex;
5608
5609 if (skb->dev) {
5610 caller_net = dev_net(skb->dev);
5611 ifindex = skb->dev->ifindex;
5612 } else {
5613 caller_net = sock_net(skb->sk);
5614 ifindex = 0;
5615 }
5616
edbf8c01
LB
5617 return __bpf_skc_lookup(skb, tuple, len, caller_net, ifindex, proto,
5618 netns_id, flags);
c8123ead
NH
5619}
5620
edbf8c01
LB
5621static struct sock *
5622bpf_sk_lookup(struct sk_buff *skb, struct bpf_sock_tuple *tuple, u32 len,
5623 u8 proto, u64 netns_id, u64 flags)
5624{
5625 struct sock *sk = bpf_skc_lookup(skb, tuple, len, proto, netns_id,
5626 flags);
5627
f7355a6c 5628 if (sk) {
edbf8c01 5629 sk = sk_to_full_sk(sk);
f7355a6c 5630 if (!sk_fullsock(sk)) {
2e012c74 5631 sock_gen_put(sk);
f7355a6c
MKL
5632 return NULL;
5633 }
5634 }
edbf8c01
LB
5635
5636 return sk;
5637}
5638
5639BPF_CALL_5(bpf_skc_lookup_tcp, struct sk_buff *, skb,
5640 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5641{
5642 return (unsigned long)bpf_skc_lookup(skb, tuple, len, IPPROTO_TCP,
5643 netns_id, flags);
5644}
5645
5646static const struct bpf_func_proto bpf_skc_lookup_tcp_proto = {
5647 .func = bpf_skc_lookup_tcp,
5648 .gpl_only = false,
5649 .pkt_access = true,
5650 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
5651 .arg1_type = ARG_PTR_TO_CTX,
5652 .arg2_type = ARG_PTR_TO_MEM,
5653 .arg3_type = ARG_CONST_SIZE,
5654 .arg4_type = ARG_ANYTHING,
5655 .arg5_type = ARG_ANYTHING,
5656};
5657
6acc9b43
JS
5658BPF_CALL_5(bpf_sk_lookup_tcp, struct sk_buff *, skb,
5659 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5660{
edbf8c01
LB
5661 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_TCP,
5662 netns_id, flags);
6acc9b43
JS
5663}
5664
5665static const struct bpf_func_proto bpf_sk_lookup_tcp_proto = {
5666 .func = bpf_sk_lookup_tcp,
5667 .gpl_only = false,
5668 .pkt_access = true,
5669 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5670 .arg1_type = ARG_PTR_TO_CTX,
5671 .arg2_type = ARG_PTR_TO_MEM,
5672 .arg3_type = ARG_CONST_SIZE,
5673 .arg4_type = ARG_ANYTHING,
5674 .arg5_type = ARG_ANYTHING,
5675};
5676
5677BPF_CALL_5(bpf_sk_lookup_udp, struct sk_buff *, skb,
5678 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5679{
edbf8c01
LB
5680 return (unsigned long)bpf_sk_lookup(skb, tuple, len, IPPROTO_UDP,
5681 netns_id, flags);
6acc9b43
JS
5682}
5683
5684static const struct bpf_func_proto bpf_sk_lookup_udp_proto = {
5685 .func = bpf_sk_lookup_udp,
5686 .gpl_only = false,
5687 .pkt_access = true,
5688 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5689 .arg1_type = ARG_PTR_TO_CTX,
5690 .arg2_type = ARG_PTR_TO_MEM,
5691 .arg3_type = ARG_CONST_SIZE,
5692 .arg4_type = ARG_ANYTHING,
5693 .arg5_type = ARG_ANYTHING,
5694};
5695
5696BPF_CALL_1(bpf_sk_release, struct sock *, sk)
5697{
a5fa25ad 5698 if (sk && sk_is_refcounted(sk))
6acc9b43
JS
5699 sock_gen_put(sk);
5700 return 0;
5701}
5702
5703static const struct bpf_func_proto bpf_sk_release_proto = {
5704 .func = bpf_sk_release,
5705 .gpl_only = false,
5706 .ret_type = RET_INTEGER,
a5fa25ad 5707 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
6acc9b43 5708};
c8123ead
NH
5709
5710BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
5711 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
5712{
5713 struct net *caller_net = dev_net(ctx->rxq->dev);
5714 int ifindex = ctx->rxq->dev->ifindex;
5715
edbf8c01
LB
5716 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
5717 ifindex, IPPROTO_UDP, netns_id,
5718 flags);
c8123ead
NH
5719}
5720
5721static const struct bpf_func_proto bpf_xdp_sk_lookup_udp_proto = {
5722 .func = bpf_xdp_sk_lookup_udp,
5723 .gpl_only = false,
5724 .pkt_access = true,
5725 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5726 .arg1_type = ARG_PTR_TO_CTX,
5727 .arg2_type = ARG_PTR_TO_MEM,
5728 .arg3_type = ARG_CONST_SIZE,
5729 .arg4_type = ARG_ANYTHING,
5730 .arg5_type = ARG_ANYTHING,
5731};
5732
edbf8c01
LB
5733BPF_CALL_5(bpf_xdp_skc_lookup_tcp, struct xdp_buff *, ctx,
5734 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
5735{
5736 struct net *caller_net = dev_net(ctx->rxq->dev);
5737 int ifindex = ctx->rxq->dev->ifindex;
5738
5739 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len, caller_net,
5740 ifindex, IPPROTO_TCP, netns_id,
5741 flags);
5742}
5743
5744static const struct bpf_func_proto bpf_xdp_skc_lookup_tcp_proto = {
5745 .func = bpf_xdp_skc_lookup_tcp,
5746 .gpl_only = false,
5747 .pkt_access = true,
5748 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
5749 .arg1_type = ARG_PTR_TO_CTX,
5750 .arg2_type = ARG_PTR_TO_MEM,
5751 .arg3_type = ARG_CONST_SIZE,
5752 .arg4_type = ARG_ANYTHING,
5753 .arg5_type = ARG_ANYTHING,
5754};
5755
c8123ead
NH
5756BPF_CALL_5(bpf_xdp_sk_lookup_tcp, struct xdp_buff *, ctx,
5757 struct bpf_sock_tuple *, tuple, u32, len, u32, netns_id, u64, flags)
5758{
5759 struct net *caller_net = dev_net(ctx->rxq->dev);
5760 int ifindex = ctx->rxq->dev->ifindex;
5761
edbf8c01
LB
5762 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len, caller_net,
5763 ifindex, IPPROTO_TCP, netns_id,
5764 flags);
c8123ead
NH
5765}
5766
5767static const struct bpf_func_proto bpf_xdp_sk_lookup_tcp_proto = {
5768 .func = bpf_xdp_sk_lookup_tcp,
5769 .gpl_only = false,
5770 .pkt_access = true,
5771 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5772 .arg1_type = ARG_PTR_TO_CTX,
5773 .arg2_type = ARG_PTR_TO_MEM,
5774 .arg3_type = ARG_CONST_SIZE,
5775 .arg4_type = ARG_ANYTHING,
5776 .arg5_type = ARG_ANYTHING,
5777};
6c49e65e 5778
edbf8c01
LB
5779BPF_CALL_5(bpf_sock_addr_skc_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
5780 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5781{
5782 return (unsigned long)__bpf_skc_lookup(NULL, tuple, len,
5783 sock_net(ctx->sk), 0,
5784 IPPROTO_TCP, netns_id, flags);
5785}
5786
5787static const struct bpf_func_proto bpf_sock_addr_skc_lookup_tcp_proto = {
5788 .func = bpf_sock_addr_skc_lookup_tcp,
5789 .gpl_only = false,
5790 .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL,
5791 .arg1_type = ARG_PTR_TO_CTX,
5792 .arg2_type = ARG_PTR_TO_MEM,
5793 .arg3_type = ARG_CONST_SIZE,
5794 .arg4_type = ARG_ANYTHING,
5795 .arg5_type = ARG_ANYTHING,
5796};
5797
6c49e65e
AI
5798BPF_CALL_5(bpf_sock_addr_sk_lookup_tcp, struct bpf_sock_addr_kern *, ctx,
5799 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5800{
edbf8c01
LB
5801 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
5802 sock_net(ctx->sk), 0, IPPROTO_TCP,
5803 netns_id, flags);
6c49e65e
AI
5804}
5805
5806static const struct bpf_func_proto bpf_sock_addr_sk_lookup_tcp_proto = {
5807 .func = bpf_sock_addr_sk_lookup_tcp,
5808 .gpl_only = false,
5809 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5810 .arg1_type = ARG_PTR_TO_CTX,
5811 .arg2_type = ARG_PTR_TO_MEM,
5812 .arg3_type = ARG_CONST_SIZE,
5813 .arg4_type = ARG_ANYTHING,
5814 .arg5_type = ARG_ANYTHING,
5815};
5816
5817BPF_CALL_5(bpf_sock_addr_sk_lookup_udp, struct bpf_sock_addr_kern *, ctx,
5818 struct bpf_sock_tuple *, tuple, u32, len, u64, netns_id, u64, flags)
5819{
edbf8c01
LB
5820 return (unsigned long)__bpf_sk_lookup(NULL, tuple, len,
5821 sock_net(ctx->sk), 0, IPPROTO_UDP,
5822 netns_id, flags);
6c49e65e
AI
5823}
5824
5825static const struct bpf_func_proto bpf_sock_addr_sk_lookup_udp_proto = {
5826 .func = bpf_sock_addr_sk_lookup_udp,
5827 .gpl_only = false,
5828 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
5829 .arg1_type = ARG_PTR_TO_CTX,
5830 .arg2_type = ARG_PTR_TO_MEM,
5831 .arg3_type = ARG_CONST_SIZE,
5832 .arg4_type = ARG_ANYTHING,
5833 .arg5_type = ARG_ANYTHING,
5834};
5835
655a51e5
MKL
5836bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
5837 struct bpf_insn_access_aux *info)
5838{
c2cb5e82
SF
5839 if (off < 0 || off >= offsetofend(struct bpf_tcp_sock,
5840 icsk_retransmits))
655a51e5
MKL
5841 return false;
5842
5843 if (off % size != 0)
5844 return false;
5845
5846 switch (off) {
5847 case offsetof(struct bpf_tcp_sock, bytes_received):
5848 case offsetof(struct bpf_tcp_sock, bytes_acked):
5849 return size == sizeof(__u64);
5850 default:
5851 return size == sizeof(__u32);
5852 }
5853}
5854
5855u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
5856 const struct bpf_insn *si,
5857 struct bpf_insn *insn_buf,
5858 struct bpf_prog *prog, u32 *target_size)
5859{
5860 struct bpf_insn *insn = insn_buf;
5861
5862#define BPF_TCP_SOCK_GET_COMMON(FIELD) \
5863 do { \
c593642c
PB
5864 BUILD_BUG_ON(sizeof_field(struct tcp_sock, FIELD) > \
5865 sizeof_field(struct bpf_tcp_sock, FIELD)); \
655a51e5
MKL
5866 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_sock, FIELD),\
5867 si->dst_reg, si->src_reg, \
5868 offsetof(struct tcp_sock, FIELD)); \
5869 } while (0)
5870
c2cb5e82
SF
5871#define BPF_INET_SOCK_GET_COMMON(FIELD) \
5872 do { \
c593642c 5873 BUILD_BUG_ON(sizeof_field(struct inet_connection_sock, \
c2cb5e82 5874 FIELD) > \
c593642c 5875 sizeof_field(struct bpf_tcp_sock, FIELD)); \
c2cb5e82
SF
5876 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
5877 struct inet_connection_sock, \
5878 FIELD), \
5879 si->dst_reg, si->src_reg, \
5880 offsetof( \
5881 struct inet_connection_sock, \
5882 FIELD)); \
5883 } while (0)
5884
655a51e5
MKL
5885 if (insn > insn_buf)
5886 return insn - insn_buf;
5887
5888 switch (si->off) {
5889 case offsetof(struct bpf_tcp_sock, rtt_min):
c593642c 5890 BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
655a51e5
MKL
5891 sizeof(struct minmax));
5892 BUILD_BUG_ON(sizeof(struct minmax) <
5893 sizeof(struct minmax_sample));
5894
5895 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5896 offsetof(struct tcp_sock, rtt_min) +
5897 offsetof(struct minmax_sample, v));
5898 break;
2377b81d
SF
5899 case offsetof(struct bpf_tcp_sock, snd_cwnd):
5900 BPF_TCP_SOCK_GET_COMMON(snd_cwnd);
5901 break;
5902 case offsetof(struct bpf_tcp_sock, srtt_us):
5903 BPF_TCP_SOCK_GET_COMMON(srtt_us);
5904 break;
5905 case offsetof(struct bpf_tcp_sock, snd_ssthresh):
5906 BPF_TCP_SOCK_GET_COMMON(snd_ssthresh);
5907 break;
5908 case offsetof(struct bpf_tcp_sock, rcv_nxt):
5909 BPF_TCP_SOCK_GET_COMMON(rcv_nxt);
5910 break;
5911 case offsetof(struct bpf_tcp_sock, snd_nxt):
5912 BPF_TCP_SOCK_GET_COMMON(snd_nxt);
5913 break;
5914 case offsetof(struct bpf_tcp_sock, snd_una):
5915 BPF_TCP_SOCK_GET_COMMON(snd_una);
5916 break;
5917 case offsetof(struct bpf_tcp_sock, mss_cache):
5918 BPF_TCP_SOCK_GET_COMMON(mss_cache);
5919 break;
5920 case offsetof(struct bpf_tcp_sock, ecn_flags):
5921 BPF_TCP_SOCK_GET_COMMON(ecn_flags);
5922 break;
5923 case offsetof(struct bpf_tcp_sock, rate_delivered):
5924 BPF_TCP_SOCK_GET_COMMON(rate_delivered);
5925 break;
5926 case offsetof(struct bpf_tcp_sock, rate_interval_us):
5927 BPF_TCP_SOCK_GET_COMMON(rate_interval_us);
5928 break;
5929 case offsetof(struct bpf_tcp_sock, packets_out):
5930 BPF_TCP_SOCK_GET_COMMON(packets_out);
5931 break;
5932 case offsetof(struct bpf_tcp_sock, retrans_out):
5933 BPF_TCP_SOCK_GET_COMMON(retrans_out);
5934 break;
5935 case offsetof(struct bpf_tcp_sock, total_retrans):
5936 BPF_TCP_SOCK_GET_COMMON(total_retrans);
5937 break;
5938 case offsetof(struct bpf_tcp_sock, segs_in):
5939 BPF_TCP_SOCK_GET_COMMON(segs_in);
5940 break;
5941 case offsetof(struct bpf_tcp_sock, data_segs_in):
5942 BPF_TCP_SOCK_GET_COMMON(data_segs_in);
5943 break;
5944 case offsetof(struct bpf_tcp_sock, segs_out):
5945 BPF_TCP_SOCK_GET_COMMON(segs_out);
5946 break;
5947 case offsetof(struct bpf_tcp_sock, data_segs_out):
5948 BPF_TCP_SOCK_GET_COMMON(data_segs_out);
5949 break;
5950 case offsetof(struct bpf_tcp_sock, lost_out):
5951 BPF_TCP_SOCK_GET_COMMON(lost_out);
5952 break;
5953 case offsetof(struct bpf_tcp_sock, sacked_out):
5954 BPF_TCP_SOCK_GET_COMMON(sacked_out);
5955 break;
5956 case offsetof(struct bpf_tcp_sock, bytes_received):
5957 BPF_TCP_SOCK_GET_COMMON(bytes_received);
5958 break;
5959 case offsetof(struct bpf_tcp_sock, bytes_acked):
5960 BPF_TCP_SOCK_GET_COMMON(bytes_acked);
5961 break;
0357746d
SF
5962 case offsetof(struct bpf_tcp_sock, dsack_dups):
5963 BPF_TCP_SOCK_GET_COMMON(dsack_dups);
5964 break;
5965 case offsetof(struct bpf_tcp_sock, delivered):
5966 BPF_TCP_SOCK_GET_COMMON(delivered);
5967 break;
5968 case offsetof(struct bpf_tcp_sock, delivered_ce):
5969 BPF_TCP_SOCK_GET_COMMON(delivered_ce);
5970 break;
c2cb5e82
SF
5971 case offsetof(struct bpf_tcp_sock, icsk_retransmits):
5972 BPF_INET_SOCK_GET_COMMON(icsk_retransmits);
5973 break;
655a51e5
MKL
5974 }
5975
5976 return insn - insn_buf;
5977}
5978
5979BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
5980{
655a51e5
MKL
5981 if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
5982 return (unsigned long)sk;
5983
5984 return (unsigned long)NULL;
5985}
5986
0d01da6a 5987const struct bpf_func_proto bpf_tcp_sock_proto = {
655a51e5
MKL
5988 .func = bpf_tcp_sock,
5989 .gpl_only = false,
5990 .ret_type = RET_PTR_TO_TCP_SOCK_OR_NULL,
5991 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
5992};
5993
dbafd7dd
MKL
5994BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
5995{
5996 sk = sk_to_full_sk(sk);
5997
5998 if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
5999 return (unsigned long)sk;
6000
6001 return (unsigned long)NULL;
6002}
6003
6004static const struct bpf_func_proto bpf_get_listener_sock_proto = {
6005 .func = bpf_get_listener_sock,
6006 .gpl_only = false,
6007 .ret_type = RET_PTR_TO_SOCKET_OR_NULL,
6008 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
6009};
6010
f7c917ba 6011BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
6012{
6013 unsigned int iphdr_len;
6014
d7bf2ebe
THJ
6015 switch (skb_protocol(skb, true)) {
6016 case cpu_to_be16(ETH_P_IP):
f7c917ba 6017 iphdr_len = sizeof(struct iphdr);
d7bf2ebe
THJ
6018 break;
6019 case cpu_to_be16(ETH_P_IPV6):
f7c917ba 6020 iphdr_len = sizeof(struct ipv6hdr);
d7bf2ebe
THJ
6021 break;
6022 default:
f7c917ba 6023 return 0;
d7bf2ebe 6024 }
f7c917ba 6025
6026 if (skb_headlen(skb) < iphdr_len)
6027 return 0;
6028
6029 if (skb_cloned(skb) && !skb_clone_writable(skb, iphdr_len))
6030 return 0;
6031
6032 return INET_ECN_set_ce(skb);
6033}
6034
fada7fdc
JL
6035bool bpf_xdp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
6036 struct bpf_insn_access_aux *info)
6037{
6038 if (off < 0 || off >= offsetofend(struct bpf_xdp_sock, queue_id))
6039 return false;
6040
6041 if (off % size != 0)
6042 return false;
6043
6044 switch (off) {
6045 default:
6046 return size == sizeof(__u32);
6047 }
6048}
6049
6050u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
6051 const struct bpf_insn *si,
6052 struct bpf_insn *insn_buf,
6053 struct bpf_prog *prog, u32 *target_size)
6054{
6055 struct bpf_insn *insn = insn_buf;
6056
6057#define BPF_XDP_SOCK_GET(FIELD) \
6058 do { \
c593642c
PB
6059 BUILD_BUG_ON(sizeof_field(struct xdp_sock, FIELD) > \
6060 sizeof_field(struct bpf_xdp_sock, FIELD)); \
fada7fdc
JL
6061 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_sock, FIELD),\
6062 si->dst_reg, si->src_reg, \
6063 offsetof(struct xdp_sock, FIELD)); \
6064 } while (0)
6065
6066 switch (si->off) {
6067 case offsetof(struct bpf_xdp_sock, queue_id):
6068 BPF_XDP_SOCK_GET(queue_id);
6069 break;
6070 }
6071
6072 return insn - insn_buf;
6073}
6074
f7c917ba 6075static const struct bpf_func_proto bpf_skb_ecn_set_ce_proto = {
6076 .func = bpf_skb_ecn_set_ce,
6077 .gpl_only = false,
6078 .ret_type = RET_INTEGER,
6079 .arg1_type = ARG_PTR_TO_CTX,
6080};
39904084
LB
6081
6082BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
6083 struct tcphdr *, th, u32, th_len)
6084{
6085#ifdef CONFIG_SYN_COOKIES
6086 u32 cookie;
6087 int ret;
6088
6089 if (unlikely(th_len < sizeof(*th)))
6090 return -EINVAL;
6091
6092 /* sk_listener() allows TCP_NEW_SYN_RECV, which makes no sense here. */
6093 if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
6094 return -EINVAL;
6095
6096 if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
6097 return -EINVAL;
6098
6099 if (!th->ack || th->rst || th->syn)
6100 return -ENOENT;
6101
6102 if (tcp_synq_no_recent_overflow(sk))
6103 return -ENOENT;
6104
6105 cookie = ntohl(th->ack_seq) - 1;
6106
6107 switch (sk->sk_family) {
6108 case AF_INET:
6109 if (unlikely(iph_len < sizeof(struct iphdr)))
6110 return -EINVAL;
6111
6112 ret = __cookie_v4_check((struct iphdr *)iph, th, cookie);
6113 break;
6114
6115#if IS_BUILTIN(CONFIG_IPV6)
6116 case AF_INET6:
6117 if (unlikely(iph_len < sizeof(struct ipv6hdr)))
6118 return -EINVAL;
6119
6120 ret = __cookie_v6_check((struct ipv6hdr *)iph, th, cookie);
6121 break;
6122#endif /* CONFIG_IPV6 */
6123
6124 default:
6125 return -EPROTONOSUPPORT;
6126 }
6127
6128 if (ret > 0)
6129 return 0;
6130
6131 return -ENOENT;
6132#else
6133 return -ENOTSUPP;
6134#endif
6135}
6136
6137static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = {
6138 .func = bpf_tcp_check_syncookie,
6139 .gpl_only = true,
6140 .pkt_access = true,
6141 .ret_type = RET_INTEGER,
6142 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
6143 .arg2_type = ARG_PTR_TO_MEM,
6144 .arg3_type = ARG_CONST_SIZE,
6145 .arg4_type = ARG_PTR_TO_MEM,
6146 .arg5_type = ARG_CONST_SIZE,
6147};
6148
70d66244
PP
6149BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len,
6150 struct tcphdr *, th, u32, th_len)
6151{
6152#ifdef CONFIG_SYN_COOKIES
6153 u32 cookie;
6154 u16 mss;
6155
6156 if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4))
6157 return -EINVAL;
6158
6159 if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
6160 return -EINVAL;
6161
6162 if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
6163 return -ENOENT;
6164
6165 if (!th->syn || th->ack || th->fin || th->rst)
6166 return -EINVAL;
6167
6168 if (unlikely(iph_len < sizeof(struct iphdr)))
6169 return -EINVAL;
6170
6171 /* Both struct iphdr and struct ipv6hdr have the version field at the
6172 * same offset so we can cast to the shorter header (struct iphdr).
6173 */
6174 switch (((struct iphdr *)iph)->version) {
6175 case 4:
6176 if (sk->sk_family == AF_INET6 && sk->sk_ipv6only)
6177 return -EINVAL;
6178
6179 mss = tcp_v4_get_syncookie(sk, iph, th, &cookie);
6180 break;
6181
6182#if IS_BUILTIN(CONFIG_IPV6)
6183 case 6:
6184 if (unlikely(iph_len < sizeof(struct ipv6hdr)))
6185 return -EINVAL;
6186
6187 if (sk->sk_family != AF_INET6)
6188 return -EINVAL;
6189
6190 mss = tcp_v6_get_syncookie(sk, iph, th, &cookie);
6191 break;
6192#endif /* CONFIG_IPV6 */
6193
6194 default:
6195 return -EPROTONOSUPPORT;
6196 }
0741be35 6197 if (mss == 0)
70d66244
PP
6198 return -ENOENT;
6199
6200 return cookie | ((u64)mss << 32);
6201#else
6202 return -EOPNOTSUPP;
6203#endif /* CONFIG_SYN_COOKIES */
6204}
6205
6206static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = {
6207 .func = bpf_tcp_gen_syncookie,
6208 .gpl_only = true, /* __cookie_v*_init_sequence() is GPL */
6209 .pkt_access = true,
6210 .ret_type = RET_INTEGER,
6211 .arg1_type = ARG_PTR_TO_SOCK_COMMON,
6212 .arg2_type = ARG_PTR_TO_MEM,
6213 .arg3_type = ARG_CONST_SIZE,
6214 .arg4_type = ARG_PTR_TO_MEM,
6215 .arg5_type = ARG_CONST_SIZE,
6216};
6217
cf7fbe66
JS
6218BPF_CALL_3(bpf_sk_assign, struct sk_buff *, skb, struct sock *, sk, u64, flags)
6219{
6220 if (flags != 0)
6221 return -EINVAL;
6222 if (!skb_at_tc_ingress(skb))
6223 return -EOPNOTSUPP;
6224 if (unlikely(dev_net(skb->dev) != sock_net(sk)))
6225 return -ENETUNREACH;
8e368dc7 6226 if (unlikely(sk_fullsock(sk) && sk->sk_reuseport))
cf7fbe66 6227 return -ESOCKTNOSUPPORT;
7ae215d2
JS
6228 if (sk_is_refcounted(sk) &&
6229 unlikely(!refcount_inc_not_zero(&sk->sk_refcnt)))
cf7fbe66
JS
6230 return -ENOENT;
6231
6232 skb_orphan(skb);
6233 skb->sk = sk;
6234 skb->destructor = sock_pfree;
6235
6236 return 0;
6237}
6238
6239static const struct bpf_func_proto bpf_sk_assign_proto = {
6240 .func = bpf_sk_assign,
6241 .gpl_only = false,
6242 .ret_type = RET_INTEGER,
6243 .arg1_type = ARG_PTR_TO_CTX,
6244 .arg2_type = ARG_PTR_TO_SOCK_COMMON,
6245 .arg3_type = ARG_ANYTHING,
6246};
6247
0813a841
MKL
6248static const u8 *bpf_search_tcp_opt(const u8 *op, const u8 *opend,
6249 u8 search_kind, const u8 *magic,
6250 u8 magic_len, bool *eol)
6251{
6252 u8 kind, kind_len;
6253
6254 *eol = false;
6255
6256 while (op < opend) {
6257 kind = op[0];
6258
6259 if (kind == TCPOPT_EOL) {
6260 *eol = true;
6261 return ERR_PTR(-ENOMSG);
6262 } else if (kind == TCPOPT_NOP) {
6263 op++;
6264 continue;
6265 }
6266
6267 if (opend - op < 2 || opend - op < op[1] || op[1] < 2)
6268 /* Something is wrong in the received header.
6269 * Follow the TCP stack's tcp_parse_options()
6270 * and just bail here.
6271 */
6272 return ERR_PTR(-EFAULT);
6273
6274 kind_len = op[1];
6275 if (search_kind == kind) {
6276 if (!magic_len)
6277 return op;
6278
6279 if (magic_len > kind_len - 2)
6280 return ERR_PTR(-ENOMSG);
6281
6282 if (!memcmp(&op[2], magic, magic_len))
6283 return op;
6284 }
6285
6286 op += kind_len;
6287 }
6288
6289 return ERR_PTR(-ENOMSG);
6290}
6291
6292BPF_CALL_4(bpf_sock_ops_load_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
6293 void *, search_res, u32, len, u64, flags)
6294{
6295 bool eol, load_syn = flags & BPF_LOAD_HDR_OPT_TCP_SYN;
6296 const u8 *op, *opend, *magic, *search = search_res;
6297 u8 search_kind, search_len, copy_len, magic_len;
6298 int ret;
6299
6300 /* 2 byte is the minimal option len except TCPOPT_NOP and
6301 * TCPOPT_EOL which are useless for the bpf prog to learn
6302 * and this helper disallow loading them also.
6303 */
6304 if (len < 2 || flags & ~BPF_LOAD_HDR_OPT_TCP_SYN)
6305 return -EINVAL;
6306
6307 search_kind = search[0];
6308 search_len = search[1];
6309
6310 if (search_len > len || search_kind == TCPOPT_NOP ||
6311 search_kind == TCPOPT_EOL)
6312 return -EINVAL;
6313
6314 if (search_kind == TCPOPT_EXP || search_kind == 253) {
6315 /* 16 or 32 bit magic. +2 for kind and kind length */
6316 if (search_len != 4 && search_len != 6)
6317 return -EINVAL;
6318 magic = &search[2];
6319 magic_len = search_len - 2;
6320 } else {
6321 if (search_len)
6322 return -EINVAL;
6323 magic = NULL;
6324 magic_len = 0;
6325 }
6326
6327 if (load_syn) {
6328 ret = bpf_sock_ops_get_syn(bpf_sock, TCP_BPF_SYN, &op);
6329 if (ret < 0)
6330 return ret;
6331
6332 opend = op + ret;
6333 op += sizeof(struct tcphdr);
6334 } else {
6335 if (!bpf_sock->skb ||
6336 bpf_sock->op == BPF_SOCK_OPS_HDR_OPT_LEN_CB)
6337 /* This bpf_sock->op cannot call this helper */
6338 return -EPERM;
6339
6340 opend = bpf_sock->skb_data_end;
6341 op = bpf_sock->skb->data + sizeof(struct tcphdr);
6342 }
6343
6344 op = bpf_search_tcp_opt(op, opend, search_kind, magic, magic_len,
6345 &eol);
6346 if (IS_ERR(op))
6347 return PTR_ERR(op);
6348
6349 copy_len = op[1];
6350 ret = copy_len;
6351 if (copy_len > len) {
6352 ret = -ENOSPC;
6353 copy_len = len;
6354 }
6355
6356 memcpy(search_res, op, copy_len);
6357 return ret;
6358}
6359
6360static const struct bpf_func_proto bpf_sock_ops_load_hdr_opt_proto = {
6361 .func = bpf_sock_ops_load_hdr_opt,
6362 .gpl_only = false,
6363 .ret_type = RET_INTEGER,
6364 .arg1_type = ARG_PTR_TO_CTX,
6365 .arg2_type = ARG_PTR_TO_MEM,
6366 .arg3_type = ARG_CONST_SIZE,
6367 .arg4_type = ARG_ANYTHING,
6368};
6369
6370BPF_CALL_4(bpf_sock_ops_store_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
6371 const void *, from, u32, len, u64, flags)
6372{
6373 u8 new_kind, new_kind_len, magic_len = 0, *opend;
6374 const u8 *op, *new_op, *magic = NULL;
6375 struct sk_buff *skb;
6376 bool eol;
6377
6378 if (bpf_sock->op != BPF_SOCK_OPS_WRITE_HDR_OPT_CB)
6379 return -EPERM;
6380
6381 if (len < 2 || flags)
6382 return -EINVAL;
6383
6384 new_op = from;
6385 new_kind = new_op[0];
6386 new_kind_len = new_op[1];
6387
6388 if (new_kind_len > len || new_kind == TCPOPT_NOP ||
6389 new_kind == TCPOPT_EOL)
6390 return -EINVAL;
6391
6392 if (new_kind_len > bpf_sock->remaining_opt_len)
6393 return -ENOSPC;
6394
6395 /* 253 is another experimental kind */
6396 if (new_kind == TCPOPT_EXP || new_kind == 253) {
6397 if (new_kind_len < 4)
6398 return -EINVAL;
6399 /* Match for the 2 byte magic also.
6400 * RFC 6994: the magic could be 2 or 4 bytes.
6401 * Hence, matching by 2 byte only is on the
6402 * conservative side but it is the right
6403 * thing to do for the 'search-for-duplication'
6404 * purpose.
6405 */
6406 magic = &new_op[2];
6407 magic_len = 2;
6408 }
6409
6410 /* Check for duplication */
6411 skb = bpf_sock->skb;
6412 op = skb->data + sizeof(struct tcphdr);
6413 opend = bpf_sock->skb_data_end;
6414
6415 op = bpf_search_tcp_opt(op, opend, new_kind, magic, magic_len,
6416 &eol);
6417 if (!IS_ERR(op))
6418 return -EEXIST;
6419
6420 if (PTR_ERR(op) != -ENOMSG)
6421 return PTR_ERR(op);
6422
6423 if (eol)
6424 /* The option has been ended. Treat it as no more
6425 * header option can be written.
6426 */
6427 return -ENOSPC;
6428
6429 /* No duplication found. Store the header option. */
6430 memcpy(opend, from, new_kind_len);
6431
6432 bpf_sock->remaining_opt_len -= new_kind_len;
6433 bpf_sock->skb_data_end += new_kind_len;
6434
6435 return 0;
6436}
6437
6438static const struct bpf_func_proto bpf_sock_ops_store_hdr_opt_proto = {
6439 .func = bpf_sock_ops_store_hdr_opt,
6440 .gpl_only = false,
6441 .ret_type = RET_INTEGER,
6442 .arg1_type = ARG_PTR_TO_CTX,
6443 .arg2_type = ARG_PTR_TO_MEM,
6444 .arg3_type = ARG_CONST_SIZE,
6445 .arg4_type = ARG_ANYTHING,
6446};
6447
6448BPF_CALL_3(bpf_sock_ops_reserve_hdr_opt, struct bpf_sock_ops_kern *, bpf_sock,
6449 u32, len, u64, flags)
6450{
6451 if (bpf_sock->op != BPF_SOCK_OPS_HDR_OPT_LEN_CB)
6452 return -EPERM;
6453
6454 if (flags || len < 2)
6455 return -EINVAL;
6456
6457 if (len > bpf_sock->remaining_opt_len)
6458 return -ENOSPC;
6459
6460 bpf_sock->remaining_opt_len -= len;
6461
6462 return 0;
6463}
6464
6465static const struct bpf_func_proto bpf_sock_ops_reserve_hdr_opt_proto = {
6466 .func = bpf_sock_ops_reserve_hdr_opt,
6467 .gpl_only = false,
6468 .ret_type = RET_INTEGER,
6469 .arg1_type = ARG_PTR_TO_CTX,
6470 .arg2_type = ARG_ANYTHING,
6471 .arg3_type = ARG_ANYTHING,
6472};
6473
df3f94a0 6474#endif /* CONFIG_INET */
6acc9b43 6475
fe94cc29
MX
6476bool bpf_helper_changes_pkt_data(void *func)
6477{
6478 if (func == bpf_skb_vlan_push ||
6479 func == bpf_skb_vlan_pop ||
6480 func == bpf_skb_store_bytes ||
6481 func == bpf_skb_change_proto ||
6482 func == bpf_skb_change_head ||
0ea488ff 6483 func == sk_skb_change_head ||
fe94cc29 6484 func == bpf_skb_change_tail ||
0ea488ff 6485 func == sk_skb_change_tail ||
fe94cc29
MX
6486 func == bpf_skb_adjust_room ||
6487 func == bpf_skb_pull_data ||
0ea488ff 6488 func == sk_skb_pull_data ||
fe94cc29
MX
6489 func == bpf_clone_redirect ||
6490 func == bpf_l3_csum_replace ||
6491 func == bpf_l4_csum_replace ||
6492 func == bpf_xdp_adjust_head ||
6493 func == bpf_xdp_adjust_meta ||
6494 func == bpf_msg_pull_data ||
6fff607e 6495 func == bpf_msg_push_data ||
7246d8ed 6496 func == bpf_msg_pop_data ||
fe94cc29 6497 func == bpf_xdp_adjust_tail ||
61d76980 6498#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
fe94cc29
MX
6499 func == bpf_lwt_seg6_store_bytes ||
6500 func == bpf_lwt_seg6_adjust_srh ||
61d76980 6501 func == bpf_lwt_seg6_action ||
0813a841
MKL
6502#endif
6503#ifdef CONFIG_INET
6504 func == bpf_sock_ops_store_hdr_opt ||
61d76980 6505#endif
3e0bd37c
PO
6506 func == bpf_lwt_in_push_encap ||
6507 func == bpf_lwt_xmit_push_encap)
fe94cc29
MX
6508 return true;
6509
6510 return false;
6511}
6512
6890896b 6513const struct bpf_func_proto bpf_event_output_data_proto __weak;
f7c6cb1d 6514const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto __weak;
89aa0758 6515
ae2cf1c4 6516static const struct bpf_func_proto *
5e43f899 6517sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
ae2cf1c4
DA
6518{
6519 switch (func_id) {
6520 /* inet and inet6 sockets are created in a process
6521 * context so there is always a valid uid/gid
6522 */
6523 case BPF_FUNC_get_current_uid_gid:
6524 return &bpf_get_current_uid_gid_proto;
cd339431
RG
6525 case BPF_FUNC_get_local_storage:
6526 return &bpf_get_local_storage_proto;
0e53d9e5
DB
6527 case BPF_FUNC_get_socket_cookie:
6528 return &bpf_get_socket_cookie_sock_proto;
f318903c
DB
6529 case BPF_FUNC_get_netns_cookie:
6530 return &bpf_get_netns_cookie_sock_proto;
fcf752ea
DB
6531 case BPF_FUNC_perf_event_output:
6532 return &bpf_event_output_data_proto;
834ebca8
DB
6533 case BPF_FUNC_get_current_pid_tgid:
6534 return &bpf_get_current_pid_tgid_proto;
6535 case BPF_FUNC_get_current_comm:
6536 return &bpf_get_current_comm_proto;
0f09abd1
DB
6537#ifdef CONFIG_CGROUPS
6538 case BPF_FUNC_get_current_cgroup_id:
6539 return &bpf_get_current_cgroup_id_proto;
6540 case BPF_FUNC_get_current_ancestor_cgroup_id:
6541 return &bpf_get_current_ancestor_cgroup_id_proto;
6542#endif
5a52ae4e
DB
6543#ifdef CONFIG_CGROUP_NET_CLASSID
6544 case BPF_FUNC_get_cgroup_classid:
6545 return &bpf_get_cgroup_classid_curr_proto;
6546#endif
f7c6cb1d
SF
6547 case BPF_FUNC_sk_storage_get:
6548 return &bpf_sk_storage_get_cg_sock_proto;
ae2cf1c4
DA
6549 default:
6550 return bpf_base_func_proto(func_id);
6551 }
6552}
6553
4fbac77d
AI
6554static const struct bpf_func_proto *
6555sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6556{
6557 switch (func_id) {
6558 /* inet and inet6 sockets are created in a process
6559 * context so there is always a valid uid/gid
6560 */
6561 case BPF_FUNC_get_current_uid_gid:
6562 return &bpf_get_current_uid_gid_proto;
d74bad4e
AI
6563 case BPF_FUNC_bind:
6564 switch (prog->expected_attach_type) {
6565 case BPF_CGROUP_INET4_CONNECT:
6566 case BPF_CGROUP_INET6_CONNECT:
6567 return &bpf_bind_proto;
6568 default:
6569 return NULL;
6570 }
d692f113
AI
6571 case BPF_FUNC_get_socket_cookie:
6572 return &bpf_get_socket_cookie_sock_addr_proto;
f318903c
DB
6573 case BPF_FUNC_get_netns_cookie:
6574 return &bpf_get_netns_cookie_sock_addr_proto;
cd339431
RG
6575 case BPF_FUNC_get_local_storage:
6576 return &bpf_get_local_storage_proto;
fcf752ea
DB
6577 case BPF_FUNC_perf_event_output:
6578 return &bpf_event_output_data_proto;
834ebca8
DB
6579 case BPF_FUNC_get_current_pid_tgid:
6580 return &bpf_get_current_pid_tgid_proto;
6581 case BPF_FUNC_get_current_comm:
6582 return &bpf_get_current_comm_proto;
0f09abd1
DB
6583#ifdef CONFIG_CGROUPS
6584 case BPF_FUNC_get_current_cgroup_id:
6585 return &bpf_get_current_cgroup_id_proto;
6586 case BPF_FUNC_get_current_ancestor_cgroup_id:
6587 return &bpf_get_current_ancestor_cgroup_id_proto;
6588#endif
5a52ae4e
DB
6589#ifdef CONFIG_CGROUP_NET_CLASSID
6590 case BPF_FUNC_get_cgroup_classid:
6591 return &bpf_get_cgroup_classid_curr_proto;
6592#endif
6c49e65e
AI
6593#ifdef CONFIG_INET
6594 case BPF_FUNC_sk_lookup_tcp:
6595 return &bpf_sock_addr_sk_lookup_tcp_proto;
6596 case BPF_FUNC_sk_lookup_udp:
6597 return &bpf_sock_addr_sk_lookup_udp_proto;
6598 case BPF_FUNC_sk_release:
6599 return &bpf_sk_release_proto;
edbf8c01
LB
6600 case BPF_FUNC_skc_lookup_tcp:
6601 return &bpf_sock_addr_skc_lookup_tcp_proto;
6c49e65e 6602#endif /* CONFIG_INET */
fb85c4a7
SF
6603 case BPF_FUNC_sk_storage_get:
6604 return &bpf_sk_storage_get_proto;
6605 case BPF_FUNC_sk_storage_delete:
6606 return &bpf_sk_storage_delete_proto;
beecf11b
SF
6607 case BPF_FUNC_setsockopt:
6608 switch (prog->expected_attach_type) {
6609 case BPF_CGROUP_INET4_CONNECT:
6610 case BPF_CGROUP_INET6_CONNECT:
6611 return &bpf_sock_addr_setsockopt_proto;
6612 default:
6613 return NULL;
6614 }
6615 case BPF_FUNC_getsockopt:
6616 switch (prog->expected_attach_type) {
6617 case BPF_CGROUP_INET4_CONNECT:
6618 case BPF_CGROUP_INET6_CONNECT:
6619 return &bpf_sock_addr_getsockopt_proto;
6620 default:
6621 return NULL;
6622 }
4fbac77d 6623 default:
1df8f55a 6624 return bpf_sk_base_func_proto(func_id);
4fbac77d
AI
6625 }
6626}
6627
2492d3b8 6628static const struct bpf_func_proto *
5e43f899 6629sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
2492d3b8
DB
6630{
6631 switch (func_id) {
6632 case BPF_FUNC_skb_load_bytes:
6633 return &bpf_skb_load_bytes_proto;
4e1ec56c
DB
6634 case BPF_FUNC_skb_load_bytes_relative:
6635 return &bpf_skb_load_bytes_relative_proto;
91b8270f
CF
6636 case BPF_FUNC_get_socket_cookie:
6637 return &bpf_get_socket_cookie_proto;
6acc5c29
CF
6638 case BPF_FUNC_get_socket_uid:
6639 return &bpf_get_socket_uid_proto;
7c4b90d7
AZ
6640 case BPF_FUNC_perf_event_output:
6641 return &bpf_skb_event_output_proto;
2492d3b8 6642 default:
1df8f55a 6643 return bpf_sk_base_func_proto(func_id);
2492d3b8
DB
6644 }
6645}
6646
6ac99e8f
MKL
6647const struct bpf_func_proto bpf_sk_storage_get_proto __weak;
6648const struct bpf_func_proto bpf_sk_storage_delete_proto __weak;
6649
cd339431
RG
6650static const struct bpf_func_proto *
6651cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6652{
6653 switch (func_id) {
6654 case BPF_FUNC_get_local_storage:
6655 return &bpf_get_local_storage_proto;
46f8bc92
MKL
6656 case BPF_FUNC_sk_fullsock:
6657 return &bpf_sk_fullsock_proto;
6ac99e8f
MKL
6658 case BPF_FUNC_sk_storage_get:
6659 return &bpf_sk_storage_get_proto;
6660 case BPF_FUNC_sk_storage_delete:
6661 return &bpf_sk_storage_delete_proto;
7c4b90d7
AZ
6662 case BPF_FUNC_perf_event_output:
6663 return &bpf_skb_event_output_proto;
4ecabd55
RG
6664#ifdef CONFIG_SOCK_CGROUP_DATA
6665 case BPF_FUNC_skb_cgroup_id:
6666 return &bpf_skb_cgroup_id_proto;
06d3e4c9
AI
6667 case BPF_FUNC_skb_ancestor_cgroup_id:
6668 return &bpf_skb_ancestor_cgroup_id_proto;
f307fa2c
AI
6669 case BPF_FUNC_sk_cgroup_id:
6670 return &bpf_sk_cgroup_id_proto;
6671 case BPF_FUNC_sk_ancestor_cgroup_id:
6672 return &bpf_sk_ancestor_cgroup_id_proto;
4ecabd55 6673#endif
655a51e5 6674#ifdef CONFIG_INET
d56c2f95
AI
6675 case BPF_FUNC_sk_lookup_tcp:
6676 return &bpf_sk_lookup_tcp_proto;
6677 case BPF_FUNC_sk_lookup_udp:
6678 return &bpf_sk_lookup_udp_proto;
6679 case BPF_FUNC_sk_release:
6680 return &bpf_sk_release_proto;
6681 case BPF_FUNC_skc_lookup_tcp:
6682 return &bpf_skc_lookup_tcp_proto;
655a51e5
MKL
6683 case BPF_FUNC_tcp_sock:
6684 return &bpf_tcp_sock_proto;
dbafd7dd
MKL
6685 case BPF_FUNC_get_listener_sock:
6686 return &bpf_get_listener_sock_proto;
f7c917ba 6687 case BPF_FUNC_skb_ecn_set_ce:
6688 return &bpf_skb_ecn_set_ce_proto;
655a51e5 6689#endif
cd339431
RG
6690 default:
6691 return sk_filter_func_proto(func_id, prog);
6692 }
6693}
6694
608cd71a 6695static const struct bpf_func_proto *
5e43f899 6696tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
608cd71a
AS
6697{
6698 switch (func_id) {
6699 case BPF_FUNC_skb_store_bytes:
6700 return &bpf_skb_store_bytes_proto;
05c74e5e
DB
6701 case BPF_FUNC_skb_load_bytes:
6702 return &bpf_skb_load_bytes_proto;
4e1ec56c
DB
6703 case BPF_FUNC_skb_load_bytes_relative:
6704 return &bpf_skb_load_bytes_relative_proto;
36bbef52
DB
6705 case BPF_FUNC_skb_pull_data:
6706 return &bpf_skb_pull_data_proto;
7d672345
DB
6707 case BPF_FUNC_csum_diff:
6708 return &bpf_csum_diff_proto;
36bbef52
DB
6709 case BPF_FUNC_csum_update:
6710 return &bpf_csum_update_proto;
7cdec54f
DB
6711 case BPF_FUNC_csum_level:
6712 return &bpf_csum_level_proto;
91bc4822
AS
6713 case BPF_FUNC_l3_csum_replace:
6714 return &bpf_l3_csum_replace_proto;
6715 case BPF_FUNC_l4_csum_replace:
6716 return &bpf_l4_csum_replace_proto;
3896d655
AS
6717 case BPF_FUNC_clone_redirect:
6718 return &bpf_clone_redirect_proto;
8d20aabe
DB
6719 case BPF_FUNC_get_cgroup_classid:
6720 return &bpf_get_cgroup_classid_proto;
4e10df9a
AS
6721 case BPF_FUNC_skb_vlan_push:
6722 return &bpf_skb_vlan_push_proto;
6723 case BPF_FUNC_skb_vlan_pop:
6724 return &bpf_skb_vlan_pop_proto;
6578171a
DB
6725 case BPF_FUNC_skb_change_proto:
6726 return &bpf_skb_change_proto_proto;
d2485c42
DB
6727 case BPF_FUNC_skb_change_type:
6728 return &bpf_skb_change_type_proto;
2be7e212
DB
6729 case BPF_FUNC_skb_adjust_room:
6730 return &bpf_skb_adjust_room_proto;
5293efe6
DB
6731 case BPF_FUNC_skb_change_tail:
6732 return &bpf_skb_change_tail_proto;
6f3f65d8
LC
6733 case BPF_FUNC_skb_change_head:
6734 return &bpf_skb_change_head_proto;
d3aa45ce
AS
6735 case BPF_FUNC_skb_get_tunnel_key:
6736 return &bpf_skb_get_tunnel_key_proto;
6737 case BPF_FUNC_skb_set_tunnel_key:
14ca0751
DB
6738 return bpf_get_skb_set_tunnel_proto(func_id);
6739 case BPF_FUNC_skb_get_tunnel_opt:
6740 return &bpf_skb_get_tunnel_opt_proto;
6741 case BPF_FUNC_skb_set_tunnel_opt:
6742 return bpf_get_skb_set_tunnel_proto(func_id);
27b29f63
AS
6743 case BPF_FUNC_redirect:
6744 return &bpf_redirect_proto;
c46646d0
DB
6745 case BPF_FUNC_get_route_realm:
6746 return &bpf_get_route_realm_proto;
13c5c240
DB
6747 case BPF_FUNC_get_hash_recalc:
6748 return &bpf_get_hash_recalc_proto;
7a4b28c6
DB
6749 case BPF_FUNC_set_hash_invalid:
6750 return &bpf_set_hash_invalid_proto;
ded092cd
DB
6751 case BPF_FUNC_set_hash:
6752 return &bpf_set_hash_proto;
bd570ff9 6753 case BPF_FUNC_perf_event_output:
555c8a86 6754 return &bpf_skb_event_output_proto;
80b48c44
DB
6755 case BPF_FUNC_get_smp_processor_id:
6756 return &bpf_get_smp_processor_id_proto;
747ea55e
DB
6757 case BPF_FUNC_skb_under_cgroup:
6758 return &bpf_skb_under_cgroup_proto;
91b8270f
CF
6759 case BPF_FUNC_get_socket_cookie:
6760 return &bpf_get_socket_cookie_proto;
6acc5c29
CF
6761 case BPF_FUNC_get_socket_uid:
6762 return &bpf_get_socket_uid_proto;
cb20b08e
DB
6763 case BPF_FUNC_fib_lookup:
6764 return &bpf_skb_fib_lookup_proto;
46f8bc92
MKL
6765 case BPF_FUNC_sk_fullsock:
6766 return &bpf_sk_fullsock_proto;
6ac99e8f
MKL
6767 case BPF_FUNC_sk_storage_get:
6768 return &bpf_sk_storage_get_proto;
6769 case BPF_FUNC_sk_storage_delete:
6770 return &bpf_sk_storage_delete_proto;
12bed760
EB
6771#ifdef CONFIG_XFRM
6772 case BPF_FUNC_skb_get_xfrm_state:
6773 return &bpf_skb_get_xfrm_state_proto;
6774#endif
cb20b08e
DB
6775#ifdef CONFIG_SOCK_CGROUP_DATA
6776 case BPF_FUNC_skb_cgroup_id:
6777 return &bpf_skb_cgroup_id_proto;
77236281
AI
6778 case BPF_FUNC_skb_ancestor_cgroup_id:
6779 return &bpf_skb_ancestor_cgroup_id_proto;
cb20b08e 6780#endif
df3f94a0 6781#ifdef CONFIG_INET
6acc9b43
JS
6782 case BPF_FUNC_sk_lookup_tcp:
6783 return &bpf_sk_lookup_tcp_proto;
6784 case BPF_FUNC_sk_lookup_udp:
6785 return &bpf_sk_lookup_udp_proto;
6786 case BPF_FUNC_sk_release:
6787 return &bpf_sk_release_proto;
655a51e5
MKL
6788 case BPF_FUNC_tcp_sock:
6789 return &bpf_tcp_sock_proto;
dbafd7dd
MKL
6790 case BPF_FUNC_get_listener_sock:
6791 return &bpf_get_listener_sock_proto;
edbf8c01
LB
6792 case BPF_FUNC_skc_lookup_tcp:
6793 return &bpf_skc_lookup_tcp_proto;
39904084
LB
6794 case BPF_FUNC_tcp_check_syncookie:
6795 return &bpf_tcp_check_syncookie_proto;
315a2029
PO
6796 case BPF_FUNC_skb_ecn_set_ce:
6797 return &bpf_skb_ecn_set_ce_proto;
70d66244
PP
6798 case BPF_FUNC_tcp_gen_syncookie:
6799 return &bpf_tcp_gen_syncookie_proto;
cf7fbe66
JS
6800 case BPF_FUNC_sk_assign:
6801 return &bpf_sk_assign_proto;
df3f94a0 6802#endif
608cd71a 6803 default:
1df8f55a 6804 return bpf_sk_base_func_proto(func_id);
608cd71a
AS
6805 }
6806}
6807
6a773a15 6808static const struct bpf_func_proto *
5e43f899 6809xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6a773a15 6810{
4de16969
DB
6811 switch (func_id) {
6812 case BPF_FUNC_perf_event_output:
6813 return &bpf_xdp_event_output_proto;
669dc4d7
DB
6814 case BPF_FUNC_get_smp_processor_id:
6815 return &bpf_get_smp_processor_id_proto;
205c3807
DB
6816 case BPF_FUNC_csum_diff:
6817 return &bpf_csum_diff_proto;
17bedab2
MKL
6818 case BPF_FUNC_xdp_adjust_head:
6819 return &bpf_xdp_adjust_head_proto;
de8f3a83
DB
6820 case BPF_FUNC_xdp_adjust_meta:
6821 return &bpf_xdp_adjust_meta_proto;
814abfab
JF
6822 case BPF_FUNC_redirect:
6823 return &bpf_xdp_redirect_proto;
97f91a7c 6824 case BPF_FUNC_redirect_map:
e4a8e817 6825 return &bpf_xdp_redirect_map_proto;
b32cc5b9
NS
6826 case BPF_FUNC_xdp_adjust_tail:
6827 return &bpf_xdp_adjust_tail_proto;
87f5fc7e
DA
6828 case BPF_FUNC_fib_lookup:
6829 return &bpf_xdp_fib_lookup_proto;
c8123ead
NH
6830#ifdef CONFIG_INET
6831 case BPF_FUNC_sk_lookup_udp:
6832 return &bpf_xdp_sk_lookup_udp_proto;
6833 case BPF_FUNC_sk_lookup_tcp:
6834 return &bpf_xdp_sk_lookup_tcp_proto;
6835 case BPF_FUNC_sk_release:
6836 return &bpf_sk_release_proto;
edbf8c01
LB
6837 case BPF_FUNC_skc_lookup_tcp:
6838 return &bpf_xdp_skc_lookup_tcp_proto;
39904084
LB
6839 case BPF_FUNC_tcp_check_syncookie:
6840 return &bpf_tcp_check_syncookie_proto;
70d66244
PP
6841 case BPF_FUNC_tcp_gen_syncookie:
6842 return &bpf_tcp_gen_syncookie_proto;
c8123ead 6843#endif
4de16969 6844 default:
1df8f55a 6845 return bpf_sk_base_func_proto(func_id);
4de16969 6846 }
6a773a15
BB
6847}
6848
604326b4
DB
6849const struct bpf_func_proto bpf_sock_map_update_proto __weak;
6850const struct bpf_func_proto bpf_sock_hash_update_proto __weak;
6851
8c4b4c7e 6852static const struct bpf_func_proto *
5e43f899 6853sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
8c4b4c7e
LB
6854{
6855 switch (func_id) {
6856 case BPF_FUNC_setsockopt:
beecf11b 6857 return &bpf_sock_ops_setsockopt_proto;
cd86d1fd 6858 case BPF_FUNC_getsockopt:
beecf11b 6859 return &bpf_sock_ops_getsockopt_proto;
b13d8807
LB
6860 case BPF_FUNC_sock_ops_cb_flags_set:
6861 return &bpf_sock_ops_cb_flags_set_proto;
174a79ff
JF
6862 case BPF_FUNC_sock_map_update:
6863 return &bpf_sock_map_update_proto;
81110384
JF
6864 case BPF_FUNC_sock_hash_update:
6865 return &bpf_sock_hash_update_proto;
d692f113
AI
6866 case BPF_FUNC_get_socket_cookie:
6867 return &bpf_get_socket_cookie_sock_ops_proto;
cd339431
RG
6868 case BPF_FUNC_get_local_storage:
6869 return &bpf_get_local_storage_proto;
a5a3a828 6870 case BPF_FUNC_perf_event_output:
fcf752ea 6871 return &bpf_event_output_data_proto;
1314ef56
SF
6872 case BPF_FUNC_sk_storage_get:
6873 return &bpf_sk_storage_get_proto;
6874 case BPF_FUNC_sk_storage_delete:
6875 return &bpf_sk_storage_delete_proto;
6876#ifdef CONFIG_INET
0813a841
MKL
6877 case BPF_FUNC_load_hdr_opt:
6878 return &bpf_sock_ops_load_hdr_opt_proto;
6879 case BPF_FUNC_store_hdr_opt:
6880 return &bpf_sock_ops_store_hdr_opt_proto;
6881 case BPF_FUNC_reserve_hdr_opt:
6882 return &bpf_sock_ops_reserve_hdr_opt_proto;
1314ef56
SF
6883 case BPF_FUNC_tcp_sock:
6884 return &bpf_tcp_sock_proto;
6885#endif /* CONFIG_INET */
8c4b4c7e 6886 default:
1df8f55a 6887 return bpf_sk_base_func_proto(func_id);
8c4b4c7e
LB
6888 }
6889}
6890
604326b4
DB
6891const struct bpf_func_proto bpf_msg_redirect_map_proto __weak;
6892const struct bpf_func_proto bpf_msg_redirect_hash_proto __weak;
6893
5e43f899
AI
6894static const struct bpf_func_proto *
6895sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
4f738adb
JF
6896{
6897 switch (func_id) {
6898 case BPF_FUNC_msg_redirect_map:
6899 return &bpf_msg_redirect_map_proto;
81110384
JF
6900 case BPF_FUNC_msg_redirect_hash:
6901 return &bpf_msg_redirect_hash_proto;
2a100317
JF
6902 case BPF_FUNC_msg_apply_bytes:
6903 return &bpf_msg_apply_bytes_proto;
91843d54
JF
6904 case BPF_FUNC_msg_cork_bytes:
6905 return &bpf_msg_cork_bytes_proto;
015632bb
JF
6906 case BPF_FUNC_msg_pull_data:
6907 return &bpf_msg_pull_data_proto;
6fff607e
JF
6908 case BPF_FUNC_msg_push_data:
6909 return &bpf_msg_push_data_proto;
7246d8ed
JF
6910 case BPF_FUNC_msg_pop_data:
6911 return &bpf_msg_pop_data_proto;
abe3cac8
JF
6912 case BPF_FUNC_perf_event_output:
6913 return &bpf_event_output_data_proto;
6914 case BPF_FUNC_get_current_uid_gid:
6915 return &bpf_get_current_uid_gid_proto;
6916 case BPF_FUNC_get_current_pid_tgid:
6917 return &bpf_get_current_pid_tgid_proto;
13d70f5a
JF
6918 case BPF_FUNC_sk_storage_get:
6919 return &bpf_sk_storage_get_proto;
6920 case BPF_FUNC_sk_storage_delete:
6921 return &bpf_sk_storage_delete_proto;
abe3cac8
JF
6922#ifdef CONFIG_CGROUPS
6923 case BPF_FUNC_get_current_cgroup_id:
6924 return &bpf_get_current_cgroup_id_proto;
6925 case BPF_FUNC_get_current_ancestor_cgroup_id:
6926 return &bpf_get_current_ancestor_cgroup_id_proto;
6927#endif
6928#ifdef CONFIG_CGROUP_NET_CLASSID
6929 case BPF_FUNC_get_cgroup_classid:
6930 return &bpf_get_cgroup_classid_curr_proto;
6931#endif
4f738adb 6932 default:
1df8f55a 6933 return bpf_sk_base_func_proto(func_id);
4f738adb
JF
6934 }
6935}
6936
604326b4
DB
6937const struct bpf_func_proto bpf_sk_redirect_map_proto __weak;
6938const struct bpf_func_proto bpf_sk_redirect_hash_proto __weak;
6939
5e43f899
AI
6940static const struct bpf_func_proto *
6941sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
b005fd18
JF
6942{
6943 switch (func_id) {
8a31db56
JF
6944 case BPF_FUNC_skb_store_bytes:
6945 return &bpf_skb_store_bytes_proto;
b005fd18
JF
6946 case BPF_FUNC_skb_load_bytes:
6947 return &bpf_skb_load_bytes_proto;
8a31db56 6948 case BPF_FUNC_skb_pull_data:
0ea488ff 6949 return &sk_skb_pull_data_proto;
8a31db56 6950 case BPF_FUNC_skb_change_tail:
0ea488ff 6951 return &sk_skb_change_tail_proto;
8a31db56 6952 case BPF_FUNC_skb_change_head:
0ea488ff 6953 return &sk_skb_change_head_proto;
b005fd18
JF
6954 case BPF_FUNC_get_socket_cookie:
6955 return &bpf_get_socket_cookie_proto;
6956 case BPF_FUNC_get_socket_uid:
6957 return &bpf_get_socket_uid_proto;
174a79ff
JF
6958 case BPF_FUNC_sk_redirect_map:
6959 return &bpf_sk_redirect_map_proto;
81110384
JF
6960 case BPF_FUNC_sk_redirect_hash:
6961 return &bpf_sk_redirect_hash_proto;
7c4b90d7
AZ
6962 case BPF_FUNC_perf_event_output:
6963 return &bpf_skb_event_output_proto;
df3f94a0 6964#ifdef CONFIG_INET
6acc9b43
JS
6965 case BPF_FUNC_sk_lookup_tcp:
6966 return &bpf_sk_lookup_tcp_proto;
6967 case BPF_FUNC_sk_lookup_udp:
6968 return &bpf_sk_lookup_udp_proto;
6969 case BPF_FUNC_sk_release:
6970 return &bpf_sk_release_proto;
edbf8c01
LB
6971 case BPF_FUNC_skc_lookup_tcp:
6972 return &bpf_skc_lookup_tcp_proto;
df3f94a0 6973#endif
b005fd18 6974 default:
1df8f55a 6975 return bpf_sk_base_func_proto(func_id);
b005fd18
JF
6976 }
6977}
6978
d58e468b
PP
6979static const struct bpf_func_proto *
6980flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6981{
6982 switch (func_id) {
6983 case BPF_FUNC_skb_load_bytes:
089b19a9 6984 return &bpf_flow_dissector_load_bytes_proto;
d58e468b 6985 default:
1df8f55a 6986 return bpf_sk_base_func_proto(func_id);
d58e468b
PP
6987 }
6988}
6989
cd3092c7
MX
6990static const struct bpf_func_proto *
6991lwt_out_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
6992{
6993 switch (func_id) {
6994 case BPF_FUNC_skb_load_bytes:
6995 return &bpf_skb_load_bytes_proto;
6996 case BPF_FUNC_skb_pull_data:
6997 return &bpf_skb_pull_data_proto;
6998 case BPF_FUNC_csum_diff:
6999 return &bpf_csum_diff_proto;
7000 case BPF_FUNC_get_cgroup_classid:
7001 return &bpf_get_cgroup_classid_proto;
7002 case BPF_FUNC_get_route_realm:
7003 return &bpf_get_route_realm_proto;
7004 case BPF_FUNC_get_hash_recalc:
7005 return &bpf_get_hash_recalc_proto;
7006 case BPF_FUNC_perf_event_output:
7007 return &bpf_skb_event_output_proto;
7008 case BPF_FUNC_get_smp_processor_id:
7009 return &bpf_get_smp_processor_id_proto;
7010 case BPF_FUNC_skb_under_cgroup:
7011 return &bpf_skb_under_cgroup_proto;
7012 default:
1df8f55a 7013 return bpf_sk_base_func_proto(func_id);
cd3092c7
MX
7014 }
7015}
7016
7017static const struct bpf_func_proto *
7018lwt_in_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7019{
7020 switch (func_id) {
7021 case BPF_FUNC_lwt_push_encap:
3e0bd37c 7022 return &bpf_lwt_in_push_encap_proto;
cd3092c7
MX
7023 default:
7024 return lwt_out_func_proto(func_id, prog);
7025 }
7026}
7027
3a0af8fd 7028static const struct bpf_func_proto *
5e43f899 7029lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3a0af8fd
TG
7030{
7031 switch (func_id) {
7032 case BPF_FUNC_skb_get_tunnel_key:
7033 return &bpf_skb_get_tunnel_key_proto;
7034 case BPF_FUNC_skb_set_tunnel_key:
7035 return bpf_get_skb_set_tunnel_proto(func_id);
7036 case BPF_FUNC_skb_get_tunnel_opt:
7037 return &bpf_skb_get_tunnel_opt_proto;
7038 case BPF_FUNC_skb_set_tunnel_opt:
7039 return bpf_get_skb_set_tunnel_proto(func_id);
7040 case BPF_FUNC_redirect:
7041 return &bpf_redirect_proto;
7042 case BPF_FUNC_clone_redirect:
7043 return &bpf_clone_redirect_proto;
7044 case BPF_FUNC_skb_change_tail:
7045 return &bpf_skb_change_tail_proto;
7046 case BPF_FUNC_skb_change_head:
7047 return &bpf_skb_change_head_proto;
7048 case BPF_FUNC_skb_store_bytes:
7049 return &bpf_skb_store_bytes_proto;
7050 case BPF_FUNC_csum_update:
7051 return &bpf_csum_update_proto;
7cdec54f
DB
7052 case BPF_FUNC_csum_level:
7053 return &bpf_csum_level_proto;
3a0af8fd
TG
7054 case BPF_FUNC_l3_csum_replace:
7055 return &bpf_l3_csum_replace_proto;
7056 case BPF_FUNC_l4_csum_replace:
7057 return &bpf_l4_csum_replace_proto;
7058 case BPF_FUNC_set_hash_invalid:
7059 return &bpf_set_hash_invalid_proto;
3e0bd37c
PO
7060 case BPF_FUNC_lwt_push_encap:
7061 return &bpf_lwt_xmit_push_encap_proto;
3a0af8fd 7062 default:
cd3092c7 7063 return lwt_out_func_proto(func_id, prog);
3a0af8fd
TG
7064 }
7065}
7066
004d4b27
MX
7067static const struct bpf_func_proto *
7068lwt_seg6local_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
7069{
7070 switch (func_id) {
61d76980 7071#if IS_ENABLED(CONFIG_IPV6_SEG6_BPF)
004d4b27
MX
7072 case BPF_FUNC_lwt_seg6_store_bytes:
7073 return &bpf_lwt_seg6_store_bytes_proto;
7074 case BPF_FUNC_lwt_seg6_action:
7075 return &bpf_lwt_seg6_action_proto;
7076 case BPF_FUNC_lwt_seg6_adjust_srh:
7077 return &bpf_lwt_seg6_adjust_srh_proto;
61d76980 7078#endif
004d4b27
MX
7079 default:
7080 return lwt_out_func_proto(func_id, prog);
3a0af8fd
TG
7081 }
7082}
7083
f96da094 7084static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
5e43f899 7085 const struct bpf_prog *prog,
f96da094 7086 struct bpf_insn_access_aux *info)
23994631 7087{
f96da094 7088 const int size_default = sizeof(__u32);
23994631 7089
9bac3d6d
AS
7090 if (off < 0 || off >= sizeof(struct __sk_buff))
7091 return false;
62c7989b 7092
4936e352 7093 /* The verifier guarantees that size > 0. */
9bac3d6d
AS
7094 if (off % size != 0)
7095 return false;
62c7989b
DB
7096
7097 switch (off) {
f96da094
DB
7098 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7099 if (off + size > offsetofend(struct __sk_buff, cb[4]))
62c7989b
DB
7100 return false;
7101 break;
8a31db56
JF
7102 case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
7103 case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
7104 case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
7105 case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
f96da094 7106 case bpf_ctx_range(struct __sk_buff, data):
de8f3a83 7107 case bpf_ctx_range(struct __sk_buff, data_meta):
f96da094
DB
7108 case bpf_ctx_range(struct __sk_buff, data_end):
7109 if (size != size_default)
23994631 7110 return false;
31fd8581 7111 break;
b7df9ada 7112 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
089b19a9 7113 return false;
f11216b2
VD
7114 case bpf_ctx_range(struct __sk_buff, tstamp):
7115 if (size != sizeof(__u64))
7116 return false;
7117 break;
46f8bc92
MKL
7118 case offsetof(struct __sk_buff, sk):
7119 if (type == BPF_WRITE || size != sizeof(__u64))
7120 return false;
7121 info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
7122 break;
31fd8581 7123 default:
f96da094 7124 /* Only narrow read access allowed for now. */
31fd8581 7125 if (type == BPF_WRITE) {
f96da094 7126 if (size != size_default)
31fd8581
YS
7127 return false;
7128 } else {
f96da094
DB
7129 bpf_ctx_record_field_size(info, size_default);
7130 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
23994631 7131 return false;
31fd8581 7132 }
62c7989b 7133 }
9bac3d6d
AS
7134
7135 return true;
7136}
7137
d691f9e8 7138static bool sk_filter_is_valid_access(int off, int size,
19de99f7 7139 enum bpf_access_type type,
5e43f899 7140 const struct bpf_prog *prog,
23994631 7141 struct bpf_insn_access_aux *info)
d691f9e8 7142{
db58ba45 7143 switch (off) {
f96da094
DB
7144 case bpf_ctx_range(struct __sk_buff, tc_classid):
7145 case bpf_ctx_range(struct __sk_buff, data):
de8f3a83 7146 case bpf_ctx_range(struct __sk_buff, data_meta):
f96da094 7147 case bpf_ctx_range(struct __sk_buff, data_end):
8a31db56 7148 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
f11216b2 7149 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 7150 case bpf_ctx_range(struct __sk_buff, wire_len):
045efa82 7151 return false;
db58ba45 7152 }
045efa82 7153
d691f9e8
AS
7154 if (type == BPF_WRITE) {
7155 switch (off) {
f96da094 7156 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
d691f9e8
AS
7157 break;
7158 default:
7159 return false;
7160 }
7161 }
7162
5e43f899 7163 return bpf_skb_is_valid_access(off, size, type, prog, info);
d691f9e8
AS
7164}
7165
b39b5f41
SL
7166static bool cg_skb_is_valid_access(int off, int size,
7167 enum bpf_access_type type,
7168 const struct bpf_prog *prog,
7169 struct bpf_insn_access_aux *info)
7170{
7171 switch (off) {
7172 case bpf_ctx_range(struct __sk_buff, tc_classid):
7173 case bpf_ctx_range(struct __sk_buff, data_meta):
e3da08d0 7174 case bpf_ctx_range(struct __sk_buff, wire_len):
b39b5f41 7175 return false;
ab21c1b5
DB
7176 case bpf_ctx_range(struct __sk_buff, data):
7177 case bpf_ctx_range(struct __sk_buff, data_end):
2c78ee89 7178 if (!bpf_capable())
ab21c1b5
DB
7179 return false;
7180 break;
b39b5f41 7181 }
ab21c1b5 7182
b39b5f41
SL
7183 if (type == BPF_WRITE) {
7184 switch (off) {
7185 case bpf_ctx_range(struct __sk_buff, mark):
7186 case bpf_ctx_range(struct __sk_buff, priority):
7187 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
7188 break;
f11216b2 7189 case bpf_ctx_range(struct __sk_buff, tstamp):
2c78ee89 7190 if (!bpf_capable())
f11216b2
VD
7191 return false;
7192 break;
b39b5f41
SL
7193 default:
7194 return false;
7195 }
7196 }
7197
7198 switch (off) {
7199 case bpf_ctx_range(struct __sk_buff, data):
7200 info->reg_type = PTR_TO_PACKET;
7201 break;
7202 case bpf_ctx_range(struct __sk_buff, data_end):
7203 info->reg_type = PTR_TO_PACKET_END;
7204 break;
7205 }
7206
7207 return bpf_skb_is_valid_access(off, size, type, prog, info);
7208}
7209
3a0af8fd
TG
7210static bool lwt_is_valid_access(int off, int size,
7211 enum bpf_access_type type,
5e43f899 7212 const struct bpf_prog *prog,
23994631 7213 struct bpf_insn_access_aux *info)
3a0af8fd
TG
7214{
7215 switch (off) {
f96da094 7216 case bpf_ctx_range(struct __sk_buff, tc_classid):
8a31db56 7217 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
de8f3a83 7218 case bpf_ctx_range(struct __sk_buff, data_meta):
f11216b2 7219 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 7220 case bpf_ctx_range(struct __sk_buff, wire_len):
3a0af8fd
TG
7221 return false;
7222 }
7223
7224 if (type == BPF_WRITE) {
7225 switch (off) {
f96da094
DB
7226 case bpf_ctx_range(struct __sk_buff, mark):
7227 case bpf_ctx_range(struct __sk_buff, priority):
7228 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
3a0af8fd
TG
7229 break;
7230 default:
7231 return false;
7232 }
7233 }
7234
f96da094
DB
7235 switch (off) {
7236 case bpf_ctx_range(struct __sk_buff, data):
7237 info->reg_type = PTR_TO_PACKET;
7238 break;
7239 case bpf_ctx_range(struct __sk_buff, data_end):
7240 info->reg_type = PTR_TO_PACKET_END;
7241 break;
7242 }
7243
5e43f899 7244 return bpf_skb_is_valid_access(off, size, type, prog, info);
3a0af8fd
TG
7245}
7246
aac3fc32
AI
7247/* Attach type specific accesses */
7248static bool __sock_filter_check_attach_type(int off,
7249 enum bpf_access_type access_type,
7250 enum bpf_attach_type attach_type)
61023658 7251{
aac3fc32
AI
7252 switch (off) {
7253 case offsetof(struct bpf_sock, bound_dev_if):
7254 case offsetof(struct bpf_sock, mark):
7255 case offsetof(struct bpf_sock, priority):
7256 switch (attach_type) {
7257 case BPF_CGROUP_INET_SOCK_CREATE:
f5836749 7258 case BPF_CGROUP_INET_SOCK_RELEASE:
aac3fc32
AI
7259 goto full_access;
7260 default:
7261 return false;
7262 }
7263 case bpf_ctx_range(struct bpf_sock, src_ip4):
7264 switch (attach_type) {
7265 case BPF_CGROUP_INET4_POST_BIND:
7266 goto read_only;
7267 default:
7268 return false;
7269 }
7270 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
7271 switch (attach_type) {
7272 case BPF_CGROUP_INET6_POST_BIND:
7273 goto read_only;
7274 default:
7275 return false;
7276 }
7277 case bpf_ctx_range(struct bpf_sock, src_port):
7278 switch (attach_type) {
7279 case BPF_CGROUP_INET4_POST_BIND:
7280 case BPF_CGROUP_INET6_POST_BIND:
7281 goto read_only;
61023658
DA
7282 default:
7283 return false;
7284 }
7285 }
aac3fc32
AI
7286read_only:
7287 return access_type == BPF_READ;
7288full_access:
7289 return true;
7290}
7291
46f8bc92
MKL
7292bool bpf_sock_common_is_valid_access(int off, int size,
7293 enum bpf_access_type type,
aac3fc32
AI
7294 struct bpf_insn_access_aux *info)
7295{
aac3fc32 7296 switch (off) {
46f8bc92
MKL
7297 case bpf_ctx_range_till(struct bpf_sock, type, priority):
7298 return false;
7299 default:
7300 return bpf_sock_is_valid_access(off, size, type, info);
aac3fc32 7301 }
aac3fc32
AI
7302}
7303
c64b7983
JS
7304bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
7305 struct bpf_insn_access_aux *info)
aac3fc32 7306{
aa65d696
MKL
7307 const int size_default = sizeof(__u32);
7308
aac3fc32 7309 if (off < 0 || off >= sizeof(struct bpf_sock))
61023658 7310 return false;
61023658
DA
7311 if (off % size != 0)
7312 return false;
aa65d696
MKL
7313
7314 switch (off) {
7315 case offsetof(struct bpf_sock, state):
7316 case offsetof(struct bpf_sock, family):
7317 case offsetof(struct bpf_sock, type):
7318 case offsetof(struct bpf_sock, protocol):
7319 case offsetof(struct bpf_sock, dst_port):
7320 case offsetof(struct bpf_sock, src_port):
c3c16f2e 7321 case offsetof(struct bpf_sock, rx_queue_mapping):
aa65d696
MKL
7322 case bpf_ctx_range(struct bpf_sock, src_ip4):
7323 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
7324 case bpf_ctx_range(struct bpf_sock, dst_ip4):
7325 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
7326 bpf_ctx_record_field_size(info, size_default);
7327 return bpf_ctx_narrow_access_ok(off, size, size_default);
7328 }
7329
7330 return size == size_default;
61023658
DA
7331}
7332
c64b7983
JS
7333static bool sock_filter_is_valid_access(int off, int size,
7334 enum bpf_access_type type,
7335 const struct bpf_prog *prog,
7336 struct bpf_insn_access_aux *info)
7337{
7338 if (!bpf_sock_is_valid_access(off, size, type, info))
7339 return false;
7340 return __sock_filter_check_attach_type(off, type,
7341 prog->expected_attach_type);
7342}
7343
b09928b9
DB
7344static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
7345 const struct bpf_prog *prog)
7346{
7347 /* Neither direct read nor direct write requires any preliminary
7348 * action.
7349 */
7350 return 0;
7351}
7352
047b0ecd
DB
7353static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
7354 const struct bpf_prog *prog, int drop_verdict)
36bbef52
DB
7355{
7356 struct bpf_insn *insn = insn_buf;
7357
7358 if (!direct_write)
7359 return 0;
7360
7361 /* if (!skb->cloned)
7362 * goto start;
7363 *
7364 * (Fast-path, otherwise approximation that we might be
7365 * a clone, do the rest in helper.)
7366 */
7367 *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
7368 *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
7369 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
7370
7371 /* ret = bpf_skb_pull_data(skb, 0); */
7372 *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
7373 *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
7374 *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
7375 BPF_FUNC_skb_pull_data);
7376 /* if (!ret)
7377 * goto restore;
7378 * return TC_ACT_SHOT;
7379 */
7380 *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
047b0ecd 7381 *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
36bbef52
DB
7382 *insn++ = BPF_EXIT_INSN();
7383
7384 /* restore: */
7385 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
7386 /* start: */
7387 *insn++ = prog->insnsi[0];
7388
7389 return insn - insn_buf;
7390}
7391
e0cea7ce
DB
7392static int bpf_gen_ld_abs(const struct bpf_insn *orig,
7393 struct bpf_insn *insn_buf)
7394{
7395 bool indirect = BPF_MODE(orig->code) == BPF_IND;
7396 struct bpf_insn *insn = insn_buf;
7397
e0cea7ce
DB
7398 if (!indirect) {
7399 *insn++ = BPF_MOV64_IMM(BPF_REG_2, orig->imm);
7400 } else {
7401 *insn++ = BPF_MOV64_REG(BPF_REG_2, orig->src_reg);
7402 if (orig->imm)
7403 *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, orig->imm);
7404 }
e6a18d36
DB
7405 /* We're guaranteed here that CTX is in R6. */
7406 *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_CTX);
e0cea7ce
DB
7407
7408 switch (BPF_SIZE(orig->code)) {
7409 case BPF_B:
7410 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_8_no_cache);
7411 break;
7412 case BPF_H:
7413 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_16_no_cache);
7414 break;
7415 case BPF_W:
7416 *insn++ = BPF_EMIT_CALL(bpf_skb_load_helper_32_no_cache);
7417 break;
7418 }
7419
7420 *insn++ = BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 2);
7421 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_0);
7422 *insn++ = BPF_EXIT_INSN();
7423
7424 return insn - insn_buf;
7425}
7426
047b0ecd
DB
7427static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
7428 const struct bpf_prog *prog)
7429{
7430 return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
7431}
7432
d691f9e8 7433static bool tc_cls_act_is_valid_access(int off, int size,
19de99f7 7434 enum bpf_access_type type,
5e43f899 7435 const struct bpf_prog *prog,
23994631 7436 struct bpf_insn_access_aux *info)
d691f9e8
AS
7437{
7438 if (type == BPF_WRITE) {
7439 switch (off) {
f96da094
DB
7440 case bpf_ctx_range(struct __sk_buff, mark):
7441 case bpf_ctx_range(struct __sk_buff, tc_index):
7442 case bpf_ctx_range(struct __sk_buff, priority):
7443 case bpf_ctx_range(struct __sk_buff, tc_classid):
7444 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
f11216b2 7445 case bpf_ctx_range(struct __sk_buff, tstamp):
74e31ca8 7446 case bpf_ctx_range(struct __sk_buff, queue_mapping):
d691f9e8
AS
7447 break;
7448 default:
7449 return false;
7450 }
7451 }
19de99f7 7452
f96da094
DB
7453 switch (off) {
7454 case bpf_ctx_range(struct __sk_buff, data):
7455 info->reg_type = PTR_TO_PACKET;
7456 break;
de8f3a83
DB
7457 case bpf_ctx_range(struct __sk_buff, data_meta):
7458 info->reg_type = PTR_TO_PACKET_META;
7459 break;
f96da094
DB
7460 case bpf_ctx_range(struct __sk_buff, data_end):
7461 info->reg_type = PTR_TO_PACKET_END;
7462 break;
8a31db56
JF
7463 case bpf_ctx_range_till(struct __sk_buff, family, local_port):
7464 return false;
f96da094
DB
7465 }
7466
5e43f899 7467 return bpf_skb_is_valid_access(off, size, type, prog, info);
d691f9e8
AS
7468}
7469
1afaf661 7470static bool __is_valid_xdp_access(int off, int size)
6a773a15
BB
7471{
7472 if (off < 0 || off >= sizeof(struct xdp_md))
7473 return false;
7474 if (off % size != 0)
7475 return false;
6088b582 7476 if (size != sizeof(__u32))
6a773a15
BB
7477 return false;
7478
7479 return true;
7480}
7481
7482static bool xdp_is_valid_access(int off, int size,
7483 enum bpf_access_type type,
5e43f899 7484 const struct bpf_prog *prog,
23994631 7485 struct bpf_insn_access_aux *info)
6a773a15 7486{
64b59025
DA
7487 if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
7488 switch (off) {
7489 case offsetof(struct xdp_md, egress_ifindex):
7490 return false;
7491 }
7492 }
7493
0d830032
JK
7494 if (type == BPF_WRITE) {
7495 if (bpf_prog_is_dev_bound(prog->aux)) {
7496 switch (off) {
7497 case offsetof(struct xdp_md, rx_queue_index):
7498 return __is_valid_xdp_access(off, size);
7499 }
7500 }
6a773a15 7501 return false;
0d830032 7502 }
6a773a15
BB
7503
7504 switch (off) {
7505 case offsetof(struct xdp_md, data):
23994631 7506 info->reg_type = PTR_TO_PACKET;
6a773a15 7507 break;
de8f3a83
DB
7508 case offsetof(struct xdp_md, data_meta):
7509 info->reg_type = PTR_TO_PACKET_META;
7510 break;
6a773a15 7511 case offsetof(struct xdp_md, data_end):
23994631 7512 info->reg_type = PTR_TO_PACKET_END;
6a773a15
BB
7513 break;
7514 }
7515
1afaf661 7516 return __is_valid_xdp_access(off, size);
6a773a15
BB
7517}
7518
7519void bpf_warn_invalid_xdp_action(u32 act)
7520{
9beb8bed
DB
7521 const u32 act_max = XDP_REDIRECT;
7522
7523 WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
7524 act > act_max ? "Illegal" : "Driver unsupported",
7525 act);
6a773a15
BB
7526}
7527EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
7528
4fbac77d
AI
7529static bool sock_addr_is_valid_access(int off, int size,
7530 enum bpf_access_type type,
7531 const struct bpf_prog *prog,
7532 struct bpf_insn_access_aux *info)
7533{
7534 const int size_default = sizeof(__u32);
7535
7536 if (off < 0 || off >= sizeof(struct bpf_sock_addr))
7537 return false;
7538 if (off % size != 0)
7539 return false;
7540
7541 /* Disallow access to IPv6 fields from IPv4 contex and vise
7542 * versa.
7543 */
7544 switch (off) {
7545 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
7546 switch (prog->expected_attach_type) {
7547 case BPF_CGROUP_INET4_BIND:
d74bad4e 7548 case BPF_CGROUP_INET4_CONNECT:
1b66d253
DB
7549 case BPF_CGROUP_INET4_GETPEERNAME:
7550 case BPF_CGROUP_INET4_GETSOCKNAME:
1cedee13 7551 case BPF_CGROUP_UDP4_SENDMSG:
983695fa 7552 case BPF_CGROUP_UDP4_RECVMSG:
4fbac77d
AI
7553 break;
7554 default:
7555 return false;
7556 }
7557 break;
7558 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
7559 switch (prog->expected_attach_type) {
7560 case BPF_CGROUP_INET6_BIND:
d74bad4e 7561 case BPF_CGROUP_INET6_CONNECT:
1b66d253
DB
7562 case BPF_CGROUP_INET6_GETPEERNAME:
7563 case BPF_CGROUP_INET6_GETSOCKNAME:
1cedee13 7564 case BPF_CGROUP_UDP6_SENDMSG:
983695fa 7565 case BPF_CGROUP_UDP6_RECVMSG:
1cedee13
AI
7566 break;
7567 default:
7568 return false;
7569 }
7570 break;
7571 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
7572 switch (prog->expected_attach_type) {
7573 case BPF_CGROUP_UDP4_SENDMSG:
7574 break;
7575 default:
7576 return false;
7577 }
7578 break;
7579 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
7580 msg_src_ip6[3]):
7581 switch (prog->expected_attach_type) {
7582 case BPF_CGROUP_UDP6_SENDMSG:
4fbac77d
AI
7583 break;
7584 default:
7585 return false;
7586 }
7587 break;
7588 }
7589
7590 switch (off) {
7591 case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
7592 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
1cedee13
AI
7593 case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
7594 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
7595 msg_src_ip6[3]):
7aebfa1b 7596 case bpf_ctx_range(struct bpf_sock_addr, user_port):
4fbac77d
AI
7597 if (type == BPF_READ) {
7598 bpf_ctx_record_field_size(info, size_default);
d4ecfeb1
SF
7599
7600 if (bpf_ctx_wide_access_ok(off, size,
7601 struct bpf_sock_addr,
7602 user_ip6))
7603 return true;
7604
7605 if (bpf_ctx_wide_access_ok(off, size,
7606 struct bpf_sock_addr,
7607 msg_src_ip6))
7608 return true;
7609
4fbac77d
AI
7610 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
7611 return false;
7612 } else {
b4399546
SF
7613 if (bpf_ctx_wide_access_ok(off, size,
7614 struct bpf_sock_addr,
7615 user_ip6))
600c70ba
SF
7616 return true;
7617
b4399546
SF
7618 if (bpf_ctx_wide_access_ok(off, size,
7619 struct bpf_sock_addr,
7620 msg_src_ip6))
600c70ba
SF
7621 return true;
7622
4fbac77d
AI
7623 if (size != size_default)
7624 return false;
7625 }
7626 break;
fb85c4a7
SF
7627 case offsetof(struct bpf_sock_addr, sk):
7628 if (type != BPF_READ)
7629 return false;
7630 if (size != sizeof(__u64))
7631 return false;
7632 info->reg_type = PTR_TO_SOCKET;
7633 break;
4fbac77d
AI
7634 default:
7635 if (type == BPF_READ) {
7636 if (size != size_default)
7637 return false;
7638 } else {
7639 return false;
7640 }
7641 }
7642
7643 return true;
7644}
7645
44f0e430
LB
7646static bool sock_ops_is_valid_access(int off, int size,
7647 enum bpf_access_type type,
5e43f899 7648 const struct bpf_prog *prog,
44f0e430 7649 struct bpf_insn_access_aux *info)
40304b2a 7650{
44f0e430
LB
7651 const int size_default = sizeof(__u32);
7652
40304b2a
LB
7653 if (off < 0 || off >= sizeof(struct bpf_sock_ops))
7654 return false;
44f0e430 7655
40304b2a
LB
7656 /* The verifier guarantees that size > 0. */
7657 if (off % size != 0)
7658 return false;
40304b2a 7659
40304b2a
LB
7660 if (type == BPF_WRITE) {
7661 switch (off) {
2585cd62 7662 case offsetof(struct bpf_sock_ops, reply):
6f9bd3d7 7663 case offsetof(struct bpf_sock_ops, sk_txhash):
44f0e430
LB
7664 if (size != size_default)
7665 return false;
40304b2a
LB
7666 break;
7667 default:
7668 return false;
7669 }
44f0e430
LB
7670 } else {
7671 switch (off) {
7672 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
7673 bytes_acked):
7674 if (size != sizeof(__u64))
7675 return false;
7676 break;
1314ef56
SF
7677 case offsetof(struct bpf_sock_ops, sk):
7678 if (size != sizeof(__u64))
7679 return false;
7680 info->reg_type = PTR_TO_SOCKET_OR_NULL;
7681 break;
0813a841
MKL
7682 case offsetof(struct bpf_sock_ops, skb_data):
7683 if (size != sizeof(__u64))
7684 return false;
7685 info->reg_type = PTR_TO_PACKET;
7686 break;
7687 case offsetof(struct bpf_sock_ops, skb_data_end):
7688 if (size != sizeof(__u64))
7689 return false;
7690 info->reg_type = PTR_TO_PACKET_END;
7691 break;
7692 case offsetof(struct bpf_sock_ops, skb_tcp_flags):
7693 bpf_ctx_record_field_size(info, size_default);
7694 return bpf_ctx_narrow_access_ok(off, size,
7695 size_default);
44f0e430
LB
7696 default:
7697 if (size != size_default)
7698 return false;
7699 break;
7700 }
40304b2a
LB
7701 }
7702
44f0e430 7703 return true;
40304b2a
LB
7704}
7705
8a31db56
JF
7706static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
7707 const struct bpf_prog *prog)
7708{
047b0ecd 7709 return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
8a31db56
JF
7710}
7711
b005fd18
JF
7712static bool sk_skb_is_valid_access(int off, int size,
7713 enum bpf_access_type type,
5e43f899 7714 const struct bpf_prog *prog,
b005fd18
JF
7715 struct bpf_insn_access_aux *info)
7716{
de8f3a83
DB
7717 switch (off) {
7718 case bpf_ctx_range(struct __sk_buff, tc_classid):
7719 case bpf_ctx_range(struct __sk_buff, data_meta):
f11216b2 7720 case bpf_ctx_range(struct __sk_buff, tstamp):
e3da08d0 7721 case bpf_ctx_range(struct __sk_buff, wire_len):
de8f3a83
DB
7722 return false;
7723 }
7724
8a31db56
JF
7725 if (type == BPF_WRITE) {
7726 switch (off) {
8a31db56
JF
7727 case bpf_ctx_range(struct __sk_buff, tc_index):
7728 case bpf_ctx_range(struct __sk_buff, priority):
7729 break;
7730 default:
7731 return false;
7732 }
7733 }
7734
b005fd18 7735 switch (off) {
f7e9cb1e 7736 case bpf_ctx_range(struct __sk_buff, mark):
8a31db56 7737 return false;
b005fd18
JF
7738 case bpf_ctx_range(struct __sk_buff, data):
7739 info->reg_type = PTR_TO_PACKET;
7740 break;
7741 case bpf_ctx_range(struct __sk_buff, data_end):
7742 info->reg_type = PTR_TO_PACKET_END;
7743 break;
7744 }
7745
5e43f899 7746 return bpf_skb_is_valid_access(off, size, type, prog, info);
b005fd18
JF
7747}
7748
4f738adb
JF
7749static bool sk_msg_is_valid_access(int off, int size,
7750 enum bpf_access_type type,
5e43f899 7751 const struct bpf_prog *prog,
4f738adb
JF
7752 struct bpf_insn_access_aux *info)
7753{
7754 if (type == BPF_WRITE)
7755 return false;
7756
bc1b4f01
JF
7757 if (off % size != 0)
7758 return false;
7759
4f738adb
JF
7760 switch (off) {
7761 case offsetof(struct sk_msg_md, data):
7762 info->reg_type = PTR_TO_PACKET;
303def35
JF
7763 if (size != sizeof(__u64))
7764 return false;
4f738adb
JF
7765 break;
7766 case offsetof(struct sk_msg_md, data_end):
7767 info->reg_type = PTR_TO_PACKET_END;
303def35
JF
7768 if (size != sizeof(__u64))
7769 return false;
4f738adb 7770 break;
13d70f5a
JF
7771 case offsetof(struct sk_msg_md, sk):
7772 if (size != sizeof(__u64))
7773 return false;
7774 info->reg_type = PTR_TO_SOCKET;
7775 break;
bc1b4f01
JF
7776 case bpf_ctx_range(struct sk_msg_md, family):
7777 case bpf_ctx_range(struct sk_msg_md, remote_ip4):
7778 case bpf_ctx_range(struct sk_msg_md, local_ip4):
7779 case bpf_ctx_range_till(struct sk_msg_md, remote_ip6[0], remote_ip6[3]):
7780 case bpf_ctx_range_till(struct sk_msg_md, local_ip6[0], local_ip6[3]):
7781 case bpf_ctx_range(struct sk_msg_md, remote_port):
7782 case bpf_ctx_range(struct sk_msg_md, local_port):
7783 case bpf_ctx_range(struct sk_msg_md, size):
303def35
JF
7784 if (size != sizeof(__u32))
7785 return false;
bc1b4f01
JF
7786 break;
7787 default:
4f738adb 7788 return false;
bc1b4f01 7789 }
4f738adb
JF
7790 return true;
7791}
7792
d58e468b
PP
7793static bool flow_dissector_is_valid_access(int off, int size,
7794 enum bpf_access_type type,
7795 const struct bpf_prog *prog,
7796 struct bpf_insn_access_aux *info)
7797{
089b19a9
SF
7798 const int size_default = sizeof(__u32);
7799
7800 if (off < 0 || off >= sizeof(struct __sk_buff))
7801 return false;
7802
2ee7fba0
SF
7803 if (type == BPF_WRITE)
7804 return false;
d58e468b
PP
7805
7806 switch (off) {
7807 case bpf_ctx_range(struct __sk_buff, data):
089b19a9
SF
7808 if (size != size_default)
7809 return false;
d58e468b 7810 info->reg_type = PTR_TO_PACKET;
089b19a9 7811 return true;
d58e468b 7812 case bpf_ctx_range(struct __sk_buff, data_end):
089b19a9
SF
7813 if (size != size_default)
7814 return false;
d58e468b 7815 info->reg_type = PTR_TO_PACKET_END;
089b19a9 7816 return true;
b7df9ada 7817 case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
089b19a9
SF
7818 if (size != sizeof(__u64))
7819 return false;
d58e468b 7820 info->reg_type = PTR_TO_FLOW_KEYS;
089b19a9 7821 return true;
2ee7fba0 7822 default:
d58e468b
PP
7823 return false;
7824 }
089b19a9 7825}
d58e468b 7826
089b19a9
SF
7827static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type,
7828 const struct bpf_insn *si,
7829 struct bpf_insn *insn_buf,
7830 struct bpf_prog *prog,
7831 u32 *target_size)
7832
7833{
7834 struct bpf_insn *insn = insn_buf;
7835
7836 switch (si->off) {
7837 case offsetof(struct __sk_buff, data):
7838 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data),
7839 si->dst_reg, si->src_reg,
7840 offsetof(struct bpf_flow_dissector, data));
7841 break;
7842
7843 case offsetof(struct __sk_buff, data_end):
7844 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end),
7845 si->dst_reg, si->src_reg,
7846 offsetof(struct bpf_flow_dissector, data_end));
7847 break;
7848
7849 case offsetof(struct __sk_buff, flow_keys):
7850 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys),
7851 si->dst_reg, si->src_reg,
7852 offsetof(struct bpf_flow_dissector, flow_keys));
7853 break;
7854 }
7855
7856 return insn - insn_buf;
d58e468b
PP
7857}
7858
cf62089b
WB
7859static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si,
7860 struct bpf_insn *insn)
7861{
7862 /* si->dst_reg = skb_shinfo(SKB); */
7863#ifdef NET_SKBUFF_DATA_USES_OFFSET
7864 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
7865 BPF_REG_AX, si->src_reg,
7866 offsetof(struct sk_buff, end));
7867 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head),
7868 si->dst_reg, si->src_reg,
7869 offsetof(struct sk_buff, head));
7870 *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX);
7871#else
7872 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end),
7873 si->dst_reg, si->src_reg,
7874 offsetof(struct sk_buff, end));
7875#endif
7876
7877 return insn;
7878}
7879
2492d3b8
DB
7880static u32 bpf_convert_ctx_access(enum bpf_access_type type,
7881 const struct bpf_insn *si,
7882 struct bpf_insn *insn_buf,
f96da094 7883 struct bpf_prog *prog, u32 *target_size)
9bac3d6d
AS
7884{
7885 struct bpf_insn *insn = insn_buf;
6b8cc1d1 7886 int off;
9bac3d6d 7887
6b8cc1d1 7888 switch (si->off) {
9bac3d6d 7889 case offsetof(struct __sk_buff, len):
6b8cc1d1 7890 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
7891 bpf_target_off(struct sk_buff, len, 4,
7892 target_size));
9bac3d6d
AS
7893 break;
7894
0b8c707d 7895 case offsetof(struct __sk_buff, protocol):
6b8cc1d1 7896 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
7897 bpf_target_off(struct sk_buff, protocol, 2,
7898 target_size));
0b8c707d
DB
7899 break;
7900
27cd5452 7901 case offsetof(struct __sk_buff, vlan_proto):
6b8cc1d1 7902 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
7903 bpf_target_off(struct sk_buff, vlan_proto, 2,
7904 target_size));
27cd5452
MS
7905 break;
7906
bcad5718 7907 case offsetof(struct __sk_buff, priority):
754f1e6a 7908 if (type == BPF_WRITE)
6b8cc1d1 7909 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
7910 bpf_target_off(struct sk_buff, priority, 4,
7911 target_size));
754f1e6a 7912 else
6b8cc1d1 7913 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
7914 bpf_target_off(struct sk_buff, priority, 4,
7915 target_size));
bcad5718
DB
7916 break;
7917
37e82c2f 7918 case offsetof(struct __sk_buff, ingress_ifindex):
6b8cc1d1 7919 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
7920 bpf_target_off(struct sk_buff, skb_iif, 4,
7921 target_size));
37e82c2f
AS
7922 break;
7923
7924 case offsetof(struct __sk_buff, ifindex):
f035a515 7925 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
6b8cc1d1 7926 si->dst_reg, si->src_reg,
37e82c2f 7927 offsetof(struct sk_buff, dev));
6b8cc1d1
DB
7928 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
7929 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
f96da094
DB
7930 bpf_target_off(struct net_device, ifindex, 4,
7931 target_size));
37e82c2f
AS
7932 break;
7933
ba7591d8 7934 case offsetof(struct __sk_buff, hash):
6b8cc1d1 7935 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
7936 bpf_target_off(struct sk_buff, hash, 4,
7937 target_size));
ba7591d8
DB
7938 break;
7939
9bac3d6d 7940 case offsetof(struct __sk_buff, mark):
d691f9e8 7941 if (type == BPF_WRITE)
6b8cc1d1 7942 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
7943 bpf_target_off(struct sk_buff, mark, 4,
7944 target_size));
d691f9e8 7945 else
6b8cc1d1 7946 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
7947 bpf_target_off(struct sk_buff, mark, 4,
7948 target_size));
d691f9e8 7949 break;
9bac3d6d
AS
7950
7951 case offsetof(struct __sk_buff, pkt_type):
f96da094
DB
7952 *target_size = 1;
7953 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
7954 PKT_TYPE_OFFSET());
7955 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
7956#ifdef __BIG_ENDIAN_BITFIELD
7957 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
7958#endif
7959 break;
9bac3d6d
AS
7960
7961 case offsetof(struct __sk_buff, queue_mapping):
74e31ca8
JDB
7962 if (type == BPF_WRITE) {
7963 *insn++ = BPF_JMP_IMM(BPF_JGE, si->src_reg, NO_QUEUE_MAPPING, 1);
7964 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
7965 bpf_target_off(struct sk_buff,
7966 queue_mapping,
7967 2, target_size));
7968 } else {
7969 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
7970 bpf_target_off(struct sk_buff,
7971 queue_mapping,
7972 2, target_size));
7973 }
f96da094 7974 break;
c2497395 7975
c2497395 7976 case offsetof(struct __sk_buff, vlan_present):
9c212255
MM
7977 *target_size = 1;
7978 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
7979 PKT_VLAN_PRESENT_OFFSET());
7980 if (PKT_VLAN_PRESENT_BIT)
7981 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, PKT_VLAN_PRESENT_BIT);
7982 if (PKT_VLAN_PRESENT_BIT < 7)
7983 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
7984 break;
f96da094 7985
9c212255 7986 case offsetof(struct __sk_buff, vlan_tci):
f96da094
DB
7987 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
7988 bpf_target_off(struct sk_buff, vlan_tci, 2,
7989 target_size));
f96da094 7990 break;
d691f9e8
AS
7991
7992 case offsetof(struct __sk_buff, cb[0]) ...
f96da094 7993 offsetofend(struct __sk_buff, cb[4]) - 1:
c593642c 7994 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, data) < 20);
62c7989b
DB
7995 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
7996 offsetof(struct qdisc_skb_cb, data)) %
7997 sizeof(__u64));
d691f9e8 7998
ff936a04 7999 prog->cb_access = 1;
6b8cc1d1
DB
8000 off = si->off;
8001 off -= offsetof(struct __sk_buff, cb[0]);
8002 off += offsetof(struct sk_buff, cb);
8003 off += offsetof(struct qdisc_skb_cb, data);
d691f9e8 8004 if (type == BPF_WRITE)
62c7989b 8005 *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
6b8cc1d1 8006 si->src_reg, off);
d691f9e8 8007 else
62c7989b 8008 *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
6b8cc1d1 8009 si->src_reg, off);
d691f9e8
AS
8010 break;
8011
045efa82 8012 case offsetof(struct __sk_buff, tc_classid):
c593642c 8013 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, tc_classid) != 2);
6b8cc1d1
DB
8014
8015 off = si->off;
8016 off -= offsetof(struct __sk_buff, tc_classid);
8017 off += offsetof(struct sk_buff, cb);
8018 off += offsetof(struct qdisc_skb_cb, tc_classid);
f96da094 8019 *target_size = 2;
09c37a2c 8020 if (type == BPF_WRITE)
6b8cc1d1
DB
8021 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
8022 si->src_reg, off);
09c37a2c 8023 else
6b8cc1d1
DB
8024 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
8025 si->src_reg, off);
045efa82
DB
8026 break;
8027
db58ba45 8028 case offsetof(struct __sk_buff, data):
f035a515 8029 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
6b8cc1d1 8030 si->dst_reg, si->src_reg,
db58ba45
AS
8031 offsetof(struct sk_buff, data));
8032 break;
8033
de8f3a83
DB
8034 case offsetof(struct __sk_buff, data_meta):
8035 off = si->off;
8036 off -= offsetof(struct __sk_buff, data_meta);
8037 off += offsetof(struct sk_buff, cb);
8038 off += offsetof(struct bpf_skb_data_end, data_meta);
8039 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
8040 si->src_reg, off);
8041 break;
8042
db58ba45 8043 case offsetof(struct __sk_buff, data_end):
6b8cc1d1
DB
8044 off = si->off;
8045 off -= offsetof(struct __sk_buff, data_end);
8046 off += offsetof(struct sk_buff, cb);
8047 off += offsetof(struct bpf_skb_data_end, data_end);
8048 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
8049 si->src_reg, off);
db58ba45
AS
8050 break;
8051
d691f9e8
AS
8052 case offsetof(struct __sk_buff, tc_index):
8053#ifdef CONFIG_NET_SCHED
d691f9e8 8054 if (type == BPF_WRITE)
6b8cc1d1 8055 *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
8056 bpf_target_off(struct sk_buff, tc_index, 2,
8057 target_size));
d691f9e8 8058 else
6b8cc1d1 8059 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
f96da094
DB
8060 bpf_target_off(struct sk_buff, tc_index, 2,
8061 target_size));
d691f9e8 8062#else
2ed46ce4 8063 *target_size = 2;
d691f9e8 8064 if (type == BPF_WRITE)
6b8cc1d1 8065 *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
d691f9e8 8066 else
6b8cc1d1 8067 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
b1d9fc41
DB
8068#endif
8069 break;
8070
8071 case offsetof(struct __sk_buff, napi_id):
8072#if defined(CONFIG_NET_RX_BUSY_POLL)
b1d9fc41 8073 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
f96da094
DB
8074 bpf_target_off(struct sk_buff, napi_id, 4,
8075 target_size));
b1d9fc41
DB
8076 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
8077 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
8078#else
2ed46ce4 8079 *target_size = 4;
b1d9fc41 8080 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
d691f9e8 8081#endif
6b8cc1d1 8082 break;
8a31db56 8083 case offsetof(struct __sk_buff, family):
c593642c 8084 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
8a31db56
JF
8085
8086 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8087 si->dst_reg, si->src_reg,
8088 offsetof(struct sk_buff, sk));
8089 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8090 bpf_target_off(struct sock_common,
8091 skc_family,
8092 2, target_size));
8093 break;
8094 case offsetof(struct __sk_buff, remote_ip4):
c593642c 8095 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
8a31db56
JF
8096
8097 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8098 si->dst_reg, si->src_reg,
8099 offsetof(struct sk_buff, sk));
8100 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8101 bpf_target_off(struct sock_common,
8102 skc_daddr,
8103 4, target_size));
8104 break;
8105 case offsetof(struct __sk_buff, local_ip4):
c593642c 8106 BUILD_BUG_ON(sizeof_field(struct sock_common,
8a31db56
JF
8107 skc_rcv_saddr) != 4);
8108
8109 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8110 si->dst_reg, si->src_reg,
8111 offsetof(struct sk_buff, sk));
8112 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8113 bpf_target_off(struct sock_common,
8114 skc_rcv_saddr,
8115 4, target_size));
8116 break;
8117 case offsetof(struct __sk_buff, remote_ip6[0]) ...
8118 offsetof(struct __sk_buff, remote_ip6[3]):
8119#if IS_ENABLED(CONFIG_IPV6)
c593642c 8120 BUILD_BUG_ON(sizeof_field(struct sock_common,
8a31db56
JF
8121 skc_v6_daddr.s6_addr32[0]) != 4);
8122
8123 off = si->off;
8124 off -= offsetof(struct __sk_buff, remote_ip6[0]);
8125
8126 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8127 si->dst_reg, si->src_reg,
8128 offsetof(struct sk_buff, sk));
8129 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8130 offsetof(struct sock_common,
8131 skc_v6_daddr.s6_addr32[0]) +
8132 off);
8133#else
8134 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8135#endif
8136 break;
8137 case offsetof(struct __sk_buff, local_ip6[0]) ...
8138 offsetof(struct __sk_buff, local_ip6[3]):
8139#if IS_ENABLED(CONFIG_IPV6)
c593642c 8140 BUILD_BUG_ON(sizeof_field(struct sock_common,
8a31db56
JF
8141 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
8142
8143 off = si->off;
8144 off -= offsetof(struct __sk_buff, local_ip6[0]);
8145
8146 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8147 si->dst_reg, si->src_reg,
8148 offsetof(struct sk_buff, sk));
8149 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8150 offsetof(struct sock_common,
8151 skc_v6_rcv_saddr.s6_addr32[0]) +
8152 off);
8153#else
8154 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8155#endif
8156 break;
8157
8158 case offsetof(struct __sk_buff, remote_port):
c593642c 8159 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
8a31db56
JF
8160
8161 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8162 si->dst_reg, si->src_reg,
8163 offsetof(struct sk_buff, sk));
8164 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8165 bpf_target_off(struct sock_common,
8166 skc_dport,
8167 2, target_size));
8168#ifndef __BIG_ENDIAN_BITFIELD
8169 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
8170#endif
8171 break;
8172
8173 case offsetof(struct __sk_buff, local_port):
c593642c 8174 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
8a31db56
JF
8175
8176 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8177 si->dst_reg, si->src_reg,
8178 offsetof(struct sk_buff, sk));
8179 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8180 bpf_target_off(struct sock_common,
8181 skc_num, 2, target_size));
8182 break;
d58e468b 8183
f11216b2 8184 case offsetof(struct __sk_buff, tstamp):
c593642c 8185 BUILD_BUG_ON(sizeof_field(struct sk_buff, tstamp) != 8);
f11216b2
VD
8186
8187 if (type == BPF_WRITE)
8188 *insn++ = BPF_STX_MEM(BPF_DW,
8189 si->dst_reg, si->src_reg,
8190 bpf_target_off(struct sk_buff,
8191 tstamp, 8,
8192 target_size));
8193 else
8194 *insn++ = BPF_LDX_MEM(BPF_DW,
8195 si->dst_reg, si->src_reg,
8196 bpf_target_off(struct sk_buff,
8197 tstamp, 8,
8198 target_size));
e3da08d0
PP
8199 break;
8200
d9ff286a 8201 case offsetof(struct __sk_buff, gso_segs):
cf62089b 8202 insn = bpf_convert_shinfo_access(si, insn);
d9ff286a
ED
8203 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs),
8204 si->dst_reg, si->dst_reg,
8205 bpf_target_off(struct skb_shared_info,
8206 gso_segs, 2,
8207 target_size));
8208 break;
cf62089b
WB
8209 case offsetof(struct __sk_buff, gso_size):
8210 insn = bpf_convert_shinfo_access(si, insn);
8211 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size),
8212 si->dst_reg, si->dst_reg,
8213 bpf_target_off(struct skb_shared_info,
8214 gso_size, 2,
8215 target_size));
8216 break;
e3da08d0 8217 case offsetof(struct __sk_buff, wire_len):
c593642c 8218 BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4);
e3da08d0
PP
8219
8220 off = si->off;
8221 off -= offsetof(struct __sk_buff, wire_len);
8222 off += offsetof(struct sk_buff, cb);
8223 off += offsetof(struct qdisc_skb_cb, pkt_len);
8224 *target_size = 4;
8225 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, off);
46f8bc92
MKL
8226 break;
8227
8228 case offsetof(struct __sk_buff, sk):
8229 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
8230 si->dst_reg, si->src_reg,
8231 offsetof(struct sk_buff, sk));
8232 break;
9bac3d6d
AS
8233 }
8234
8235 return insn - insn_buf;
89aa0758
AS
8236}
8237
c64b7983
JS
8238u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
8239 const struct bpf_insn *si,
8240 struct bpf_insn *insn_buf,
8241 struct bpf_prog *prog, u32 *target_size)
61023658
DA
8242{
8243 struct bpf_insn *insn = insn_buf;
aac3fc32 8244 int off;
61023658 8245
6b8cc1d1 8246 switch (si->off) {
61023658 8247 case offsetof(struct bpf_sock, bound_dev_if):
c593642c 8248 BUILD_BUG_ON(sizeof_field(struct sock, sk_bound_dev_if) != 4);
61023658
DA
8249
8250 if (type == BPF_WRITE)
6b8cc1d1 8251 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
61023658
DA
8252 offsetof(struct sock, sk_bound_dev_if));
8253 else
6b8cc1d1 8254 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
61023658
DA
8255 offsetof(struct sock, sk_bound_dev_if));
8256 break;
aa4c1037 8257
482dca93 8258 case offsetof(struct bpf_sock, mark):
c593642c 8259 BUILD_BUG_ON(sizeof_field(struct sock, sk_mark) != 4);
482dca93
DA
8260
8261 if (type == BPF_WRITE)
8262 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8263 offsetof(struct sock, sk_mark));
8264 else
8265 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8266 offsetof(struct sock, sk_mark));
8267 break;
8268
8269 case offsetof(struct bpf_sock, priority):
c593642c 8270 BUILD_BUG_ON(sizeof_field(struct sock, sk_priority) != 4);
482dca93
DA
8271
8272 if (type == BPF_WRITE)
8273 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8274 offsetof(struct sock, sk_priority));
8275 else
8276 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8277 offsetof(struct sock, sk_priority));
8278 break;
8279
aa4c1037 8280 case offsetof(struct bpf_sock, family):
aa65d696
MKL
8281 *insn++ = BPF_LDX_MEM(
8282 BPF_FIELD_SIZEOF(struct sock_common, skc_family),
8283 si->dst_reg, si->src_reg,
8284 bpf_target_off(struct sock_common,
8285 skc_family,
c593642c 8286 sizeof_field(struct sock_common,
aa65d696
MKL
8287 skc_family),
8288 target_size));
aa4c1037
DA
8289 break;
8290
8291 case offsetof(struct bpf_sock, type):
bf976514
MM
8292 *insn++ = BPF_LDX_MEM(
8293 BPF_FIELD_SIZEOF(struct sock, sk_type),
8294 si->dst_reg, si->src_reg,
8295 bpf_target_off(struct sock, sk_type,
8296 sizeof_field(struct sock, sk_type),
8297 target_size));
aa4c1037
DA
8298 break;
8299
8300 case offsetof(struct bpf_sock, protocol):
bf976514
MM
8301 *insn++ = BPF_LDX_MEM(
8302 BPF_FIELD_SIZEOF(struct sock, sk_protocol),
8303 si->dst_reg, si->src_reg,
8304 bpf_target_off(struct sock, sk_protocol,
8305 sizeof_field(struct sock, sk_protocol),
8306 target_size));
aa4c1037 8307 break;
aac3fc32
AI
8308
8309 case offsetof(struct bpf_sock, src_ip4):
8310 *insn++ = BPF_LDX_MEM(
8311 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
8312 bpf_target_off(struct sock_common, skc_rcv_saddr,
c593642c 8313 sizeof_field(struct sock_common,
aac3fc32
AI
8314 skc_rcv_saddr),
8315 target_size));
8316 break;
8317
aa65d696
MKL
8318 case offsetof(struct bpf_sock, dst_ip4):
8319 *insn++ = BPF_LDX_MEM(
8320 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
8321 bpf_target_off(struct sock_common, skc_daddr,
c593642c 8322 sizeof_field(struct sock_common,
aa65d696
MKL
8323 skc_daddr),
8324 target_size));
8325 break;
8326
aac3fc32
AI
8327 case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
8328#if IS_ENABLED(CONFIG_IPV6)
8329 off = si->off;
8330 off -= offsetof(struct bpf_sock, src_ip6[0]);
8331 *insn++ = BPF_LDX_MEM(
8332 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
8333 bpf_target_off(
8334 struct sock_common,
8335 skc_v6_rcv_saddr.s6_addr32[0],
c593642c 8336 sizeof_field(struct sock_common,
aac3fc32
AI
8337 skc_v6_rcv_saddr.s6_addr32[0]),
8338 target_size) + off);
8339#else
8340 (void)off;
8341 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8342#endif
8343 break;
8344
aa65d696
MKL
8345 case bpf_ctx_range_till(struct bpf_sock, dst_ip6[0], dst_ip6[3]):
8346#if IS_ENABLED(CONFIG_IPV6)
8347 off = si->off;
8348 off -= offsetof(struct bpf_sock, dst_ip6[0]);
8349 *insn++ = BPF_LDX_MEM(
8350 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
8351 bpf_target_off(struct sock_common,
8352 skc_v6_daddr.s6_addr32[0],
c593642c 8353 sizeof_field(struct sock_common,
aa65d696
MKL
8354 skc_v6_daddr.s6_addr32[0]),
8355 target_size) + off);
8356#else
8357 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8358 *target_size = 4;
8359#endif
8360 break;
8361
aac3fc32
AI
8362 case offsetof(struct bpf_sock, src_port):
8363 *insn++ = BPF_LDX_MEM(
8364 BPF_FIELD_SIZEOF(struct sock_common, skc_num),
8365 si->dst_reg, si->src_reg,
8366 bpf_target_off(struct sock_common, skc_num,
c593642c 8367 sizeof_field(struct sock_common,
aac3fc32
AI
8368 skc_num),
8369 target_size));
8370 break;
aa65d696
MKL
8371
8372 case offsetof(struct bpf_sock, dst_port):
8373 *insn++ = BPF_LDX_MEM(
8374 BPF_FIELD_SIZEOF(struct sock_common, skc_dport),
8375 si->dst_reg, si->src_reg,
8376 bpf_target_off(struct sock_common, skc_dport,
c593642c 8377 sizeof_field(struct sock_common,
aa65d696
MKL
8378 skc_dport),
8379 target_size));
8380 break;
8381
8382 case offsetof(struct bpf_sock, state):
8383 *insn++ = BPF_LDX_MEM(
8384 BPF_FIELD_SIZEOF(struct sock_common, skc_state),
8385 si->dst_reg, si->src_reg,
8386 bpf_target_off(struct sock_common, skc_state,
c593642c 8387 sizeof_field(struct sock_common,
aa65d696
MKL
8388 skc_state),
8389 target_size));
8390 break;
c3c16f2e
AN
8391 case offsetof(struct bpf_sock, rx_queue_mapping):
8392#ifdef CONFIG_XPS
8393 *insn++ = BPF_LDX_MEM(
8394 BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
8395 si->dst_reg, si->src_reg,
8396 bpf_target_off(struct sock, sk_rx_queue_mapping,
8397 sizeof_field(struct sock,
8398 sk_rx_queue_mapping),
8399 target_size));
8400 *insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
8401 1);
8402 *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
8403#else
8404 *insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
8405 *target_size = 2;
8406#endif
8407 break;
61023658
DA
8408 }
8409
8410 return insn - insn_buf;
8411}
8412
6b8cc1d1
DB
8413static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
8414 const struct bpf_insn *si,
374fb54e 8415 struct bpf_insn *insn_buf,
f96da094 8416 struct bpf_prog *prog, u32 *target_size)
374fb54e
DB
8417{
8418 struct bpf_insn *insn = insn_buf;
8419
6b8cc1d1 8420 switch (si->off) {
374fb54e 8421 case offsetof(struct __sk_buff, ifindex):
374fb54e 8422 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
6b8cc1d1 8423 si->dst_reg, si->src_reg,
374fb54e 8424 offsetof(struct sk_buff, dev));
6b8cc1d1 8425 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
f96da094
DB
8426 bpf_target_off(struct net_device, ifindex, 4,
8427 target_size));
374fb54e
DB
8428 break;
8429 default:
f96da094
DB
8430 return bpf_convert_ctx_access(type, si, insn_buf, prog,
8431 target_size);
374fb54e
DB
8432 }
8433
8434 return insn - insn_buf;
8435}
8436
6b8cc1d1
DB
8437static u32 xdp_convert_ctx_access(enum bpf_access_type type,
8438 const struct bpf_insn *si,
6a773a15 8439 struct bpf_insn *insn_buf,
f96da094 8440 struct bpf_prog *prog, u32 *target_size)
6a773a15
BB
8441{
8442 struct bpf_insn *insn = insn_buf;
8443
6b8cc1d1 8444 switch (si->off) {
6a773a15 8445 case offsetof(struct xdp_md, data):
f035a515 8446 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
6b8cc1d1 8447 si->dst_reg, si->src_reg,
6a773a15
BB
8448 offsetof(struct xdp_buff, data));
8449 break;
de8f3a83
DB
8450 case offsetof(struct xdp_md, data_meta):
8451 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
8452 si->dst_reg, si->src_reg,
8453 offsetof(struct xdp_buff, data_meta));
8454 break;
6a773a15 8455 case offsetof(struct xdp_md, data_end):
f035a515 8456 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
6b8cc1d1 8457 si->dst_reg, si->src_reg,
6a773a15
BB
8458 offsetof(struct xdp_buff, data_end));
8459 break;
02dd3291
JDB
8460 case offsetof(struct xdp_md, ingress_ifindex):
8461 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
8462 si->dst_reg, si->src_reg,
8463 offsetof(struct xdp_buff, rxq));
8464 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
8465 si->dst_reg, si->dst_reg,
8466 offsetof(struct xdp_rxq_info, dev));
8467 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
daaf24c6 8468 offsetof(struct net_device, ifindex));
02dd3291
JDB
8469 break;
8470 case offsetof(struct xdp_md, rx_queue_index):
8471 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
8472 si->dst_reg, si->src_reg,
8473 offsetof(struct xdp_buff, rxq));
8474 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
daaf24c6
JDB
8475 offsetof(struct xdp_rxq_info,
8476 queue_index));
02dd3291 8477 break;
64b59025
DA
8478 case offsetof(struct xdp_md, egress_ifindex):
8479 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
8480 si->dst_reg, si->src_reg,
8481 offsetof(struct xdp_buff, txq));
8482 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
8483 si->dst_reg, si->dst_reg,
8484 offsetof(struct xdp_txq_info, dev));
8485 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8486 offsetof(struct net_device, ifindex));
8487 break;
6a773a15
BB
8488 }
8489
8490 return insn - insn_buf;
8491}
8492
4fbac77d
AI
8493/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
8494 * context Structure, F is Field in context structure that contains a pointer
8495 * to Nested Structure of type NS that has the field NF.
8496 *
8497 * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
8498 * sure that SIZE is not greater than actual size of S.F.NF.
8499 *
8500 * If offset OFF is provided, the load happens from that offset relative to
8501 * offset of NF.
8502 */
8503#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \
8504 do { \
8505 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \
8506 si->src_reg, offsetof(S, F)); \
8507 *insn++ = BPF_LDX_MEM( \
8508 SIZE, si->dst_reg, si->dst_reg, \
c593642c 8509 bpf_target_off(NS, NF, sizeof_field(NS, NF), \
4fbac77d
AI
8510 target_size) \
8511 + OFF); \
8512 } while (0)
8513
8514#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \
8515 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \
8516 BPF_FIELD_SIZEOF(NS, NF), 0)
8517
8518/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
8519 * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
8520 *
4fbac77d
AI
8521 * In addition it uses Temporary Field TF (member of struct S) as the 3rd
8522 * "register" since two registers available in convert_ctx_access are not
8523 * enough: we can't override neither SRC, since it contains value to store, nor
8524 * DST since it contains pointer to context that may be used by later
8525 * instructions. But we need a temporary place to save pointer to nested
8526 * structure whose field we want to store to.
8527 */
600c70ba 8528#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, OFF, TF) \
4fbac77d
AI
8529 do { \
8530 int tmp_reg = BPF_REG_9; \
8531 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
8532 --tmp_reg; \
8533 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \
8534 --tmp_reg; \
8535 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \
8536 offsetof(S, TF)); \
8537 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \
8538 si->dst_reg, offsetof(S, F)); \
600c70ba 8539 *insn++ = BPF_STX_MEM(SIZE, tmp_reg, si->src_reg, \
c593642c 8540 bpf_target_off(NS, NF, sizeof_field(NS, NF), \
4fbac77d
AI
8541 target_size) \
8542 + OFF); \
8543 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \
8544 offsetof(S, TF)); \
8545 } while (0)
8546
8547#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
8548 TF) \
8549 do { \
8550 if (type == BPF_WRITE) { \
600c70ba
SF
8551 SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, SIZE, \
8552 OFF, TF); \
4fbac77d
AI
8553 } else { \
8554 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \
8555 S, NS, F, NF, SIZE, OFF); \
8556 } \
8557 } while (0)
8558
8559#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \
8560 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \
8561 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
8562
8563static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
8564 const struct bpf_insn *si,
8565 struct bpf_insn *insn_buf,
8566 struct bpf_prog *prog, u32 *target_size)
8567{
7aebfa1b 8568 int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
4fbac77d 8569 struct bpf_insn *insn = insn_buf;
4fbac77d
AI
8570
8571 switch (si->off) {
8572 case offsetof(struct bpf_sock_addr, user_family):
8573 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
8574 struct sockaddr, uaddr, sa_family);
8575 break;
8576
8577 case offsetof(struct bpf_sock_addr, user_ip4):
8578 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8579 struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
8580 sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
8581 break;
8582
8583 case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
8584 off = si->off;
8585 off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
8586 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8587 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
8588 sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
8589 tmp_reg);
8590 break;
8591
8592 case offsetof(struct bpf_sock_addr, user_port):
8593 /* To get port we need to know sa_family first and then treat
8594 * sockaddr as either sockaddr_in or sockaddr_in6.
8595 * Though we can simplify since port field has same offset and
8596 * size in both structures.
8597 * Here we check this invariant and use just one of the
8598 * structures if it's true.
8599 */
8600 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
8601 offsetof(struct sockaddr_in6, sin6_port));
c593642c
PB
8602 BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
8603 sizeof_field(struct sockaddr_in6, sin6_port));
7aebfa1b
AI
8604 /* Account for sin6_port being smaller than user_port. */
8605 port_size = min(port_size, BPF_LDST_BYTES(si));
8606 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8607 struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
8608 sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg);
4fbac77d
AI
8609 break;
8610
8611 case offsetof(struct bpf_sock_addr, family):
8612 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
8613 struct sock, sk, sk_family);
8614 break;
8615
8616 case offsetof(struct bpf_sock_addr, type):
bf976514
MM
8617 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
8618 struct sock, sk, sk_type);
4fbac77d
AI
8619 break;
8620
8621 case offsetof(struct bpf_sock_addr, protocol):
bf976514
MM
8622 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
8623 struct sock, sk, sk_protocol);
4fbac77d 8624 break;
1cedee13
AI
8625
8626 case offsetof(struct bpf_sock_addr, msg_src_ip4):
8627 /* Treat t_ctx as struct in_addr for msg_src_ip4. */
8628 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8629 struct bpf_sock_addr_kern, struct in_addr, t_ctx,
8630 s_addr, BPF_SIZE(si->code), 0, tmp_reg);
8631 break;
8632
8633 case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
8634 msg_src_ip6[3]):
8635 off = si->off;
8636 off -= offsetof(struct bpf_sock_addr, msg_src_ip6[0]);
8637 /* Treat t_ctx as struct in6_addr for msg_src_ip6. */
8638 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
8639 struct bpf_sock_addr_kern, struct in6_addr, t_ctx,
8640 s6_addr32[0], BPF_SIZE(si->code), off, tmp_reg);
8641 break;
fb85c4a7
SF
8642 case offsetof(struct bpf_sock_addr, sk):
8643 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_addr_kern, sk),
8644 si->dst_reg, si->src_reg,
8645 offsetof(struct bpf_sock_addr_kern, sk));
8646 break;
4fbac77d
AI
8647 }
8648
8649 return insn - insn_buf;
8650}
8651
40304b2a
LB
8652static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
8653 const struct bpf_insn *si,
8654 struct bpf_insn *insn_buf,
f96da094
DB
8655 struct bpf_prog *prog,
8656 u32 *target_size)
40304b2a
LB
8657{
8658 struct bpf_insn *insn = insn_buf;
8659 int off;
8660
9b1f3d6e
MKL
8661/* Helper macro for adding read access to tcp_sock or sock fields. */
8662#define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
8663 do { \
fd09af01 8664 int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 2; \
c593642c
PB
8665 BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
8666 sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
fd09af01
JF
8667 if (si->dst_reg == reg || si->src_reg == reg) \
8668 reg--; \
8669 if (si->dst_reg == reg || si->src_reg == reg) \
8670 reg--; \
8671 if (si->dst_reg == si->src_reg) { \
8672 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
8673 offsetof(struct bpf_sock_ops_kern, \
8674 temp)); \
8675 fullsock_reg = reg; \
8676 jmp += 2; \
8677 } \
9b1f3d6e
MKL
8678 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8679 struct bpf_sock_ops_kern, \
8680 is_fullsock), \
fd09af01 8681 fullsock_reg, si->src_reg, \
9b1f3d6e
MKL
8682 offsetof(struct bpf_sock_ops_kern, \
8683 is_fullsock)); \
fd09af01
JF
8684 *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
8685 if (si->dst_reg == si->src_reg) \
8686 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
8687 offsetof(struct bpf_sock_ops_kern, \
8688 temp)); \
9b1f3d6e
MKL
8689 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8690 struct bpf_sock_ops_kern, sk),\
8691 si->dst_reg, si->src_reg, \
8692 offsetof(struct bpf_sock_ops_kern, sk));\
8693 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ, \
8694 OBJ_FIELD), \
8695 si->dst_reg, si->dst_reg, \
8696 offsetof(OBJ, OBJ_FIELD)); \
fd09af01
JF
8697 if (si->dst_reg == si->src_reg) { \
8698 *insn++ = BPF_JMP_A(1); \
8699 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
8700 offsetof(struct bpf_sock_ops_kern, \
8701 temp)); \
8702 } \
9b1f3d6e
MKL
8703 } while (0)
8704
84f44df6
JF
8705#define SOCK_OPS_GET_SK() \
8706 do { \
8707 int fullsock_reg = si->dst_reg, reg = BPF_REG_9, jmp = 1; \
8708 if (si->dst_reg == reg || si->src_reg == reg) \
8709 reg--; \
8710 if (si->dst_reg == reg || si->src_reg == reg) \
8711 reg--; \
8712 if (si->dst_reg == si->src_reg) { \
8713 *insn++ = BPF_STX_MEM(BPF_DW, si->src_reg, reg, \
8714 offsetof(struct bpf_sock_ops_kern, \
8715 temp)); \
8716 fullsock_reg = reg; \
8717 jmp += 2; \
8718 } \
8719 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8720 struct bpf_sock_ops_kern, \
8721 is_fullsock), \
8722 fullsock_reg, si->src_reg, \
8723 offsetof(struct bpf_sock_ops_kern, \
8724 is_fullsock)); \
8725 *insn++ = BPF_JMP_IMM(BPF_JEQ, fullsock_reg, 0, jmp); \
8726 if (si->dst_reg == si->src_reg) \
8727 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
8728 offsetof(struct bpf_sock_ops_kern, \
8729 temp)); \
8730 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8731 struct bpf_sock_ops_kern, sk),\
8732 si->dst_reg, si->src_reg, \
8733 offsetof(struct bpf_sock_ops_kern, sk));\
8734 if (si->dst_reg == si->src_reg) { \
8735 *insn++ = BPF_JMP_A(1); \
8736 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \
8737 offsetof(struct bpf_sock_ops_kern, \
8738 temp)); \
8739 } \
8740 } while (0)
8741
9b1f3d6e
MKL
8742#define SOCK_OPS_GET_TCP_SOCK_FIELD(FIELD) \
8743 SOCK_OPS_GET_FIELD(FIELD, FIELD, struct tcp_sock)
8744
8745/* Helper macro for adding write access to tcp_sock or sock fields.
8746 * The macro is called with two registers, dst_reg which contains a pointer
8747 * to ctx (context) and src_reg which contains the value that should be
8748 * stored. However, we need an additional register since we cannot overwrite
8749 * dst_reg because it may be used later in the program.
8750 * Instead we "borrow" one of the other register. We first save its value
8751 * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
8752 * it at the end of the macro.
8753 */
8754#define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \
8755 do { \
8756 int reg = BPF_REG_9; \
c593642c
PB
8757 BUILD_BUG_ON(sizeof_field(OBJ, OBJ_FIELD) > \
8758 sizeof_field(struct bpf_sock_ops, BPF_FIELD)); \
9b1f3d6e
MKL
8759 if (si->dst_reg == reg || si->src_reg == reg) \
8760 reg--; \
8761 if (si->dst_reg == reg || si->src_reg == reg) \
8762 reg--; \
8763 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg, \
8764 offsetof(struct bpf_sock_ops_kern, \
8765 temp)); \
8766 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8767 struct bpf_sock_ops_kern, \
8768 is_fullsock), \
8769 reg, si->dst_reg, \
8770 offsetof(struct bpf_sock_ops_kern, \
8771 is_fullsock)); \
8772 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2); \
8773 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( \
8774 struct bpf_sock_ops_kern, sk),\
8775 reg, si->dst_reg, \
8776 offsetof(struct bpf_sock_ops_kern, sk));\
8777 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD), \
8778 reg, si->src_reg, \
8779 offsetof(OBJ, OBJ_FIELD)); \
8780 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg, \
8781 offsetof(struct bpf_sock_ops_kern, \
8782 temp)); \
8783 } while (0)
8784
8785#define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE) \
8786 do { \
8787 if (TYPE == BPF_WRITE) \
8788 SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
8789 else \
8790 SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
8791 } while (0)
8792
9b1f3d6e
MKL
8793 if (insn > insn_buf)
8794 return insn - insn_buf;
8795
40304b2a 8796 switch (si->off) {
c9985d09
MKL
8797 case offsetof(struct bpf_sock_ops, op):
8798 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
8799 op),
8800 si->dst_reg, si->src_reg,
8801 offsetof(struct bpf_sock_ops_kern, op));
8802 break;
8803
8804 case offsetof(struct bpf_sock_ops, replylong[0]) ...
40304b2a 8805 offsetof(struct bpf_sock_ops, replylong[3]):
c593642c
PB
8806 BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, reply) !=
8807 sizeof_field(struct bpf_sock_ops_kern, reply));
8808 BUILD_BUG_ON(sizeof_field(struct bpf_sock_ops, replylong) !=
8809 sizeof_field(struct bpf_sock_ops_kern, replylong));
40304b2a 8810 off = si->off;
c9985d09
MKL
8811 off -= offsetof(struct bpf_sock_ops, replylong[0]);
8812 off += offsetof(struct bpf_sock_ops_kern, replylong[0]);
40304b2a
LB
8813 if (type == BPF_WRITE)
8814 *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
8815 off);
8816 else
8817 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
8818 off);
8819 break;
8820
8821 case offsetof(struct bpf_sock_ops, family):
c593642c 8822 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
40304b2a
LB
8823
8824 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8825 struct bpf_sock_ops_kern, sk),
8826 si->dst_reg, si->src_reg,
8827 offsetof(struct bpf_sock_ops_kern, sk));
8828 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8829 offsetof(struct sock_common, skc_family));
8830 break;
8831
8832 case offsetof(struct bpf_sock_ops, remote_ip4):
c593642c 8833 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
40304b2a
LB
8834
8835 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8836 struct bpf_sock_ops_kern, sk),
8837 si->dst_reg, si->src_reg,
8838 offsetof(struct bpf_sock_ops_kern, sk));
8839 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8840 offsetof(struct sock_common, skc_daddr));
8841 break;
8842
8843 case offsetof(struct bpf_sock_ops, local_ip4):
c593642c 8844 BUILD_BUG_ON(sizeof_field(struct sock_common,
303def35 8845 skc_rcv_saddr) != 4);
40304b2a
LB
8846
8847 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8848 struct bpf_sock_ops_kern, sk),
8849 si->dst_reg, si->src_reg,
8850 offsetof(struct bpf_sock_ops_kern, sk));
8851 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8852 offsetof(struct sock_common,
8853 skc_rcv_saddr));
8854 break;
8855
8856 case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
8857 offsetof(struct bpf_sock_ops, remote_ip6[3]):
8858#if IS_ENABLED(CONFIG_IPV6)
c593642c 8859 BUILD_BUG_ON(sizeof_field(struct sock_common,
40304b2a
LB
8860 skc_v6_daddr.s6_addr32[0]) != 4);
8861
8862 off = si->off;
8863 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
8864 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8865 struct bpf_sock_ops_kern, sk),
8866 si->dst_reg, si->src_reg,
8867 offsetof(struct bpf_sock_ops_kern, sk));
8868 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8869 offsetof(struct sock_common,
8870 skc_v6_daddr.s6_addr32[0]) +
8871 off);
8872#else
8873 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8874#endif
8875 break;
8876
8877 case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
8878 offsetof(struct bpf_sock_ops, local_ip6[3]):
8879#if IS_ENABLED(CONFIG_IPV6)
c593642c 8880 BUILD_BUG_ON(sizeof_field(struct sock_common,
40304b2a
LB
8881 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
8882
8883 off = si->off;
8884 off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
8885 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8886 struct bpf_sock_ops_kern, sk),
8887 si->dst_reg, si->src_reg,
8888 offsetof(struct bpf_sock_ops_kern, sk));
8889 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8890 offsetof(struct sock_common,
8891 skc_v6_rcv_saddr.s6_addr32[0]) +
8892 off);
8893#else
8894 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
8895#endif
8896 break;
8897
8898 case offsetof(struct bpf_sock_ops, remote_port):
c593642c 8899 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
40304b2a
LB
8900
8901 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8902 struct bpf_sock_ops_kern, sk),
8903 si->dst_reg, si->src_reg,
8904 offsetof(struct bpf_sock_ops_kern, sk));
8905 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8906 offsetof(struct sock_common, skc_dport));
8907#ifndef __BIG_ENDIAN_BITFIELD
8908 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
8909#endif
8910 break;
8911
8912 case offsetof(struct bpf_sock_ops, local_port):
c593642c 8913 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
40304b2a
LB
8914
8915 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8916 struct bpf_sock_ops_kern, sk),
8917 si->dst_reg, si->src_reg,
8918 offsetof(struct bpf_sock_ops_kern, sk));
8919 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
8920 offsetof(struct sock_common, skc_num));
8921 break;
f19397a5
LB
8922
8923 case offsetof(struct bpf_sock_ops, is_fullsock):
8924 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8925 struct bpf_sock_ops_kern,
8926 is_fullsock),
8927 si->dst_reg, si->src_reg,
8928 offsetof(struct bpf_sock_ops_kern,
8929 is_fullsock));
8930 break;
8931
44f0e430 8932 case offsetof(struct bpf_sock_ops, state):
c593642c 8933 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_state) != 1);
44f0e430
LB
8934
8935 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8936 struct bpf_sock_ops_kern, sk),
8937 si->dst_reg, si->src_reg,
8938 offsetof(struct bpf_sock_ops_kern, sk));
8939 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
8940 offsetof(struct sock_common, skc_state));
8941 break;
8942
8943 case offsetof(struct bpf_sock_ops, rtt_min):
c593642c 8944 BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
44f0e430
LB
8945 sizeof(struct minmax));
8946 BUILD_BUG_ON(sizeof(struct minmax) <
8947 sizeof(struct minmax_sample));
8948
8949 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
8950 struct bpf_sock_ops_kern, sk),
8951 si->dst_reg, si->src_reg,
8952 offsetof(struct bpf_sock_ops_kern, sk));
8953 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
8954 offsetof(struct tcp_sock, rtt_min) +
c593642c 8955 sizeof_field(struct minmax_sample, t));
44f0e430
LB
8956 break;
8957
b13d8807
LB
8958 case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
8959 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
8960 struct tcp_sock);
8961 break;
44f0e430 8962
44f0e430 8963 case offsetof(struct bpf_sock_ops, sk_txhash):
6f9bd3d7
LB
8964 SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash,
8965 struct sock, type);
44f0e430 8966 break;
2377b81d
SF
8967 case offsetof(struct bpf_sock_ops, snd_cwnd):
8968 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_cwnd);
8969 break;
8970 case offsetof(struct bpf_sock_ops, srtt_us):
8971 SOCK_OPS_GET_TCP_SOCK_FIELD(srtt_us);
8972 break;
8973 case offsetof(struct bpf_sock_ops, snd_ssthresh):
8974 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_ssthresh);
8975 break;
8976 case offsetof(struct bpf_sock_ops, rcv_nxt):
8977 SOCK_OPS_GET_TCP_SOCK_FIELD(rcv_nxt);
8978 break;
8979 case offsetof(struct bpf_sock_ops, snd_nxt):
8980 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_nxt);
8981 break;
8982 case offsetof(struct bpf_sock_ops, snd_una):
8983 SOCK_OPS_GET_TCP_SOCK_FIELD(snd_una);
8984 break;
8985 case offsetof(struct bpf_sock_ops, mss_cache):
8986 SOCK_OPS_GET_TCP_SOCK_FIELD(mss_cache);
8987 break;
8988 case offsetof(struct bpf_sock_ops, ecn_flags):
8989 SOCK_OPS_GET_TCP_SOCK_FIELD(ecn_flags);
8990 break;
8991 case offsetof(struct bpf_sock_ops, rate_delivered):
8992 SOCK_OPS_GET_TCP_SOCK_FIELD(rate_delivered);
8993 break;
8994 case offsetof(struct bpf_sock_ops, rate_interval_us):
8995 SOCK_OPS_GET_TCP_SOCK_FIELD(rate_interval_us);
8996 break;
8997 case offsetof(struct bpf_sock_ops, packets_out):
8998 SOCK_OPS_GET_TCP_SOCK_FIELD(packets_out);
8999 break;
9000 case offsetof(struct bpf_sock_ops, retrans_out):
9001 SOCK_OPS_GET_TCP_SOCK_FIELD(retrans_out);
9002 break;
9003 case offsetof(struct bpf_sock_ops, total_retrans):
9004 SOCK_OPS_GET_TCP_SOCK_FIELD(total_retrans);
9005 break;
9006 case offsetof(struct bpf_sock_ops, segs_in):
9007 SOCK_OPS_GET_TCP_SOCK_FIELD(segs_in);
9008 break;
9009 case offsetof(struct bpf_sock_ops, data_segs_in):
9010 SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_in);
9011 break;
9012 case offsetof(struct bpf_sock_ops, segs_out):
9013 SOCK_OPS_GET_TCP_SOCK_FIELD(segs_out);
9014 break;
9015 case offsetof(struct bpf_sock_ops, data_segs_out):
9016 SOCK_OPS_GET_TCP_SOCK_FIELD(data_segs_out);
9017 break;
9018 case offsetof(struct bpf_sock_ops, lost_out):
9019 SOCK_OPS_GET_TCP_SOCK_FIELD(lost_out);
9020 break;
9021 case offsetof(struct bpf_sock_ops, sacked_out):
9022 SOCK_OPS_GET_TCP_SOCK_FIELD(sacked_out);
9023 break;
9024 case offsetof(struct bpf_sock_ops, bytes_received):
9025 SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_received);
9026 break;
9027 case offsetof(struct bpf_sock_ops, bytes_acked):
9028 SOCK_OPS_GET_TCP_SOCK_FIELD(bytes_acked);
9029 break;
1314ef56 9030 case offsetof(struct bpf_sock_ops, sk):
84f44df6 9031 SOCK_OPS_GET_SK();
1314ef56 9032 break;
0813a841
MKL
9033 case offsetof(struct bpf_sock_ops, skb_data_end):
9034 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9035 skb_data_end),
9036 si->dst_reg, si->src_reg,
9037 offsetof(struct bpf_sock_ops_kern,
9038 skb_data_end));
9039 break;
9040 case offsetof(struct bpf_sock_ops, skb_data):
9041 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9042 skb),
9043 si->dst_reg, si->src_reg,
9044 offsetof(struct bpf_sock_ops_kern,
9045 skb));
9046 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9047 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
9048 si->dst_reg, si->dst_reg,
9049 offsetof(struct sk_buff, data));
9050 break;
9051 case offsetof(struct bpf_sock_ops, skb_len):
9052 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9053 skb),
9054 si->dst_reg, si->src_reg,
9055 offsetof(struct bpf_sock_ops_kern,
9056 skb));
9057 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9058 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, len),
9059 si->dst_reg, si->dst_reg,
9060 offsetof(struct sk_buff, len));
9061 break;
9062 case offsetof(struct bpf_sock_ops, skb_tcp_flags):
9063 off = offsetof(struct sk_buff, cb);
9064 off += offsetof(struct tcp_skb_cb, tcp_flags);
9065 *target_size = sizeof_field(struct tcp_skb_cb, tcp_flags);
9066 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
9067 skb),
9068 si->dst_reg, si->src_reg,
9069 offsetof(struct bpf_sock_ops_kern,
9070 skb));
9071 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9072 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct tcp_skb_cb,
9073 tcp_flags),
9074 si->dst_reg, si->dst_reg, off);
9075 break;
40304b2a
LB
9076 }
9077 return insn - insn_buf;
9078}
9079
8108a775
JF
9080static u32 sk_skb_convert_ctx_access(enum bpf_access_type type,
9081 const struct bpf_insn *si,
9082 struct bpf_insn *insn_buf,
9083 struct bpf_prog *prog, u32 *target_size)
9084{
9085 struct bpf_insn *insn = insn_buf;
9086 int off;
9087
9088 switch (si->off) {
9089 case offsetof(struct __sk_buff, data_end):
9090 off = si->off;
9091 off -= offsetof(struct __sk_buff, data_end);
9092 off += offsetof(struct sk_buff, cb);
9093 off += offsetof(struct tcp_skb_cb, bpf.data_end);
9094 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
9095 si->src_reg, off);
9096 break;
9097 default:
9098 return bpf_convert_ctx_access(type, si, insn_buf, prog,
9099 target_size);
9100 }
9101
9102 return insn - insn_buf;
9103}
9104
4f738adb
JF
9105static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
9106 const struct bpf_insn *si,
9107 struct bpf_insn *insn_buf,
9108 struct bpf_prog *prog, u32 *target_size)
9109{
9110 struct bpf_insn *insn = insn_buf;
720e7f38 9111#if IS_ENABLED(CONFIG_IPV6)
303def35 9112 int off;
720e7f38 9113#endif
4f738adb 9114
7a69c0f2
JF
9115 /* convert ctx uses the fact sg element is first in struct */
9116 BUILD_BUG_ON(offsetof(struct sk_msg, sg) != 0);
9117
4f738adb
JF
9118 switch (si->off) {
9119 case offsetof(struct sk_msg_md, data):
604326b4 9120 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data),
4f738adb 9121 si->dst_reg, si->src_reg,
604326b4 9122 offsetof(struct sk_msg, data));
4f738adb
JF
9123 break;
9124 case offsetof(struct sk_msg_md, data_end):
604326b4 9125 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, data_end),
4f738adb 9126 si->dst_reg, si->src_reg,
604326b4 9127 offsetof(struct sk_msg, data_end));
4f738adb 9128 break;
303def35 9129 case offsetof(struct sk_msg_md, family):
c593642c 9130 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_family) != 2);
303def35
JF
9131
9132 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 9133 struct sk_msg, sk),
303def35 9134 si->dst_reg, si->src_reg,
604326b4 9135 offsetof(struct sk_msg, sk));
303def35
JF
9136 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9137 offsetof(struct sock_common, skc_family));
9138 break;
9139
9140 case offsetof(struct sk_msg_md, remote_ip4):
c593642c 9141 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_daddr) != 4);
303def35
JF
9142
9143 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 9144 struct sk_msg, sk),
303def35 9145 si->dst_reg, si->src_reg,
604326b4 9146 offsetof(struct sk_msg, sk));
303def35
JF
9147 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9148 offsetof(struct sock_common, skc_daddr));
9149 break;
9150
9151 case offsetof(struct sk_msg_md, local_ip4):
c593642c 9152 BUILD_BUG_ON(sizeof_field(struct sock_common,
303def35
JF
9153 skc_rcv_saddr) != 4);
9154
9155 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 9156 struct sk_msg, sk),
303def35 9157 si->dst_reg, si->src_reg,
604326b4 9158 offsetof(struct sk_msg, sk));
303def35
JF
9159 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9160 offsetof(struct sock_common,
9161 skc_rcv_saddr));
9162 break;
9163
9164 case offsetof(struct sk_msg_md, remote_ip6[0]) ...
9165 offsetof(struct sk_msg_md, remote_ip6[3]):
9166#if IS_ENABLED(CONFIG_IPV6)
c593642c 9167 BUILD_BUG_ON(sizeof_field(struct sock_common,
303def35
JF
9168 skc_v6_daddr.s6_addr32[0]) != 4);
9169
9170 off = si->off;
9171 off -= offsetof(struct sk_msg_md, remote_ip6[0]);
9172 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 9173 struct sk_msg, sk),
303def35 9174 si->dst_reg, si->src_reg,
604326b4 9175 offsetof(struct sk_msg, sk));
303def35
JF
9176 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9177 offsetof(struct sock_common,
9178 skc_v6_daddr.s6_addr32[0]) +
9179 off);
9180#else
9181 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9182#endif
9183 break;
9184
9185 case offsetof(struct sk_msg_md, local_ip6[0]) ...
9186 offsetof(struct sk_msg_md, local_ip6[3]):
9187#if IS_ENABLED(CONFIG_IPV6)
c593642c 9188 BUILD_BUG_ON(sizeof_field(struct sock_common,
303def35
JF
9189 skc_v6_rcv_saddr.s6_addr32[0]) != 4);
9190
9191 off = si->off;
9192 off -= offsetof(struct sk_msg_md, local_ip6[0]);
9193 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 9194 struct sk_msg, sk),
303def35 9195 si->dst_reg, si->src_reg,
604326b4 9196 offsetof(struct sk_msg, sk));
303def35
JF
9197 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
9198 offsetof(struct sock_common,
9199 skc_v6_rcv_saddr.s6_addr32[0]) +
9200 off);
9201#else
9202 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9203#endif
9204 break;
9205
9206 case offsetof(struct sk_msg_md, remote_port):
c593642c 9207 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_dport) != 2);
303def35
JF
9208
9209 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 9210 struct sk_msg, sk),
303def35 9211 si->dst_reg, si->src_reg,
604326b4 9212 offsetof(struct sk_msg, sk));
303def35
JF
9213 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9214 offsetof(struct sock_common, skc_dport));
9215#ifndef __BIG_ENDIAN_BITFIELD
9216 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
9217#endif
9218 break;
9219
9220 case offsetof(struct sk_msg_md, local_port):
c593642c 9221 BUILD_BUG_ON(sizeof_field(struct sock_common, skc_num) != 2);
303def35
JF
9222
9223 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
604326b4 9224 struct sk_msg, sk),
303def35 9225 si->dst_reg, si->src_reg,
604326b4 9226 offsetof(struct sk_msg, sk));
303def35
JF
9227 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
9228 offsetof(struct sock_common, skc_num));
9229 break;
3bdbd022
JF
9230
9231 case offsetof(struct sk_msg_md, size):
9232 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg_sg, size),
9233 si->dst_reg, si->src_reg,
9234 offsetof(struct sk_msg_sg, size));
9235 break;
13d70f5a
JF
9236
9237 case offsetof(struct sk_msg_md, sk):
9238 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, sk),
9239 si->dst_reg, si->src_reg,
9240 offsetof(struct sk_msg, sk));
9241 break;
4f738adb
JF
9242 }
9243
9244 return insn - insn_buf;
9245}
9246
7de16e3a 9247const struct bpf_verifier_ops sk_filter_verifier_ops = {
4936e352
DB
9248 .get_func_proto = sk_filter_func_proto,
9249 .is_valid_access = sk_filter_is_valid_access,
2492d3b8 9250 .convert_ctx_access = bpf_convert_ctx_access,
e0cea7ce 9251 .gen_ld_abs = bpf_gen_ld_abs,
89aa0758
AS
9252};
9253
7de16e3a 9254const struct bpf_prog_ops sk_filter_prog_ops = {
61f3c964 9255 .test_run = bpf_prog_test_run_skb,
7de16e3a
JK
9256};
9257
9258const struct bpf_verifier_ops tc_cls_act_verifier_ops = {
4936e352
DB
9259 .get_func_proto = tc_cls_act_func_proto,
9260 .is_valid_access = tc_cls_act_is_valid_access,
374fb54e 9261 .convert_ctx_access = tc_cls_act_convert_ctx_access,
36bbef52 9262 .gen_prologue = tc_cls_act_prologue,
e0cea7ce 9263 .gen_ld_abs = bpf_gen_ld_abs,
7de16e3a
JK
9264};
9265
9266const struct bpf_prog_ops tc_cls_act_prog_ops = {
1cf1cae9 9267 .test_run = bpf_prog_test_run_skb,
608cd71a
AS
9268};
9269
7de16e3a 9270const struct bpf_verifier_ops xdp_verifier_ops = {
6a773a15
BB
9271 .get_func_proto = xdp_func_proto,
9272 .is_valid_access = xdp_is_valid_access,
9273 .convert_ctx_access = xdp_convert_ctx_access,
b09928b9 9274 .gen_prologue = bpf_noop_prologue,
7de16e3a
JK
9275};
9276
9277const struct bpf_prog_ops xdp_prog_ops = {
1cf1cae9 9278 .test_run = bpf_prog_test_run_xdp,
6a773a15
BB
9279};
9280
7de16e3a 9281const struct bpf_verifier_ops cg_skb_verifier_ops = {
cd339431 9282 .get_func_proto = cg_skb_func_proto,
b39b5f41 9283 .is_valid_access = cg_skb_is_valid_access,
2492d3b8 9284 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
9285};
9286
9287const struct bpf_prog_ops cg_skb_prog_ops = {
1cf1cae9 9288 .test_run = bpf_prog_test_run_skb,
0e33661d
DM
9289};
9290
cd3092c7
MX
9291const struct bpf_verifier_ops lwt_in_verifier_ops = {
9292 .get_func_proto = lwt_in_func_proto,
3a0af8fd 9293 .is_valid_access = lwt_is_valid_access,
2492d3b8 9294 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
9295};
9296
cd3092c7
MX
9297const struct bpf_prog_ops lwt_in_prog_ops = {
9298 .test_run = bpf_prog_test_run_skb,
9299};
9300
9301const struct bpf_verifier_ops lwt_out_verifier_ops = {
9302 .get_func_proto = lwt_out_func_proto,
3a0af8fd 9303 .is_valid_access = lwt_is_valid_access,
2492d3b8 9304 .convert_ctx_access = bpf_convert_ctx_access,
7de16e3a
JK
9305};
9306
cd3092c7 9307const struct bpf_prog_ops lwt_out_prog_ops = {
1cf1cae9 9308 .test_run = bpf_prog_test_run_skb,
3a0af8fd
TG
9309};
9310
7de16e3a 9311const struct bpf_verifier_ops lwt_xmit_verifier_ops = {
3a0af8fd
TG
9312 .get_func_proto = lwt_xmit_func_proto,
9313 .is_valid_access = lwt_is_valid_access,
2492d3b8 9314 .convert_ctx_access = bpf_convert_ctx_access,
3a0af8fd 9315 .gen_prologue = tc_cls_act_prologue,
7de16e3a
JK
9316};
9317
9318const struct bpf_prog_ops lwt_xmit_prog_ops = {
1cf1cae9 9319 .test_run = bpf_prog_test_run_skb,
3a0af8fd
TG
9320};
9321
004d4b27
MX
9322const struct bpf_verifier_ops lwt_seg6local_verifier_ops = {
9323 .get_func_proto = lwt_seg6local_func_proto,
9324 .is_valid_access = lwt_is_valid_access,
9325 .convert_ctx_access = bpf_convert_ctx_access,
9326};
9327
9328const struct bpf_prog_ops lwt_seg6local_prog_ops = {
9329 .test_run = bpf_prog_test_run_skb,
9330};
9331
7de16e3a 9332const struct bpf_verifier_ops cg_sock_verifier_ops = {
ae2cf1c4 9333 .get_func_proto = sock_filter_func_proto,
61023658 9334 .is_valid_access = sock_filter_is_valid_access,
c64b7983 9335 .convert_ctx_access = bpf_sock_convert_ctx_access,
61023658
DA
9336};
9337
7de16e3a
JK
9338const struct bpf_prog_ops cg_sock_prog_ops = {
9339};
9340
4fbac77d
AI
9341const struct bpf_verifier_ops cg_sock_addr_verifier_ops = {
9342 .get_func_proto = sock_addr_func_proto,
9343 .is_valid_access = sock_addr_is_valid_access,
9344 .convert_ctx_access = sock_addr_convert_ctx_access,
9345};
9346
9347const struct bpf_prog_ops cg_sock_addr_prog_ops = {
9348};
9349
7de16e3a 9350const struct bpf_verifier_ops sock_ops_verifier_ops = {
8c4b4c7e 9351 .get_func_proto = sock_ops_func_proto,
40304b2a
LB
9352 .is_valid_access = sock_ops_is_valid_access,
9353 .convert_ctx_access = sock_ops_convert_ctx_access,
9354};
9355
7de16e3a
JK
9356const struct bpf_prog_ops sock_ops_prog_ops = {
9357};
9358
9359const struct bpf_verifier_ops sk_skb_verifier_ops = {
b005fd18
JF
9360 .get_func_proto = sk_skb_func_proto,
9361 .is_valid_access = sk_skb_is_valid_access,
8108a775 9362 .convert_ctx_access = sk_skb_convert_ctx_access,
8a31db56 9363 .gen_prologue = sk_skb_prologue,
b005fd18
JF
9364};
9365
7de16e3a
JK
9366const struct bpf_prog_ops sk_skb_prog_ops = {
9367};
9368
4f738adb
JF
9369const struct bpf_verifier_ops sk_msg_verifier_ops = {
9370 .get_func_proto = sk_msg_func_proto,
9371 .is_valid_access = sk_msg_is_valid_access,
9372 .convert_ctx_access = sk_msg_convert_ctx_access,
b09928b9 9373 .gen_prologue = bpf_noop_prologue,
4f738adb
JF
9374};
9375
9376const struct bpf_prog_ops sk_msg_prog_ops = {
9377};
9378
d58e468b
PP
9379const struct bpf_verifier_ops flow_dissector_verifier_ops = {
9380 .get_func_proto = flow_dissector_func_proto,
9381 .is_valid_access = flow_dissector_is_valid_access,
089b19a9 9382 .convert_ctx_access = flow_dissector_convert_ctx_access,
d58e468b
PP
9383};
9384
9385const struct bpf_prog_ops flow_dissector_prog_ops = {
b7a1848e 9386 .test_run = bpf_prog_test_run_flow_dissector,
d58e468b
PP
9387};
9388
8ced425e 9389int sk_detach_filter(struct sock *sk)
55b33325
PE
9390{
9391 int ret = -ENOENT;
9392 struct sk_filter *filter;
9393
d59577b6
VB
9394 if (sock_flag(sk, SOCK_FILTER_LOCKED))
9395 return -EPERM;
9396
8ced425e
HFS
9397 filter = rcu_dereference_protected(sk->sk_filter,
9398 lockdep_sock_is_held(sk));
55b33325 9399 if (filter) {
a9b3cd7f 9400 RCU_INIT_POINTER(sk->sk_filter, NULL);
46bcf14f 9401 sk_filter_uncharge(sk, filter);
55b33325
PE
9402 ret = 0;
9403 }
a3ea269b 9404
55b33325
PE
9405 return ret;
9406}
8ced425e 9407EXPORT_SYMBOL_GPL(sk_detach_filter);
a8fc9277 9408
a3ea269b
DB
9409int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf,
9410 unsigned int len)
a8fc9277 9411{
a3ea269b 9412 struct sock_fprog_kern *fprog;
a8fc9277 9413 struct sk_filter *filter;
a3ea269b 9414 int ret = 0;
a8fc9277
PE
9415
9416 lock_sock(sk);
9417 filter = rcu_dereference_protected(sk->sk_filter,
8ced425e 9418 lockdep_sock_is_held(sk));
a8fc9277
PE
9419 if (!filter)
9420 goto out;
a3ea269b
DB
9421
9422 /* We're copying the filter that has been originally attached,
93d08b69
DB
9423 * so no conversion/decode needed anymore. eBPF programs that
9424 * have no original program cannot be dumped through this.
a3ea269b 9425 */
93d08b69 9426 ret = -EACCES;
7ae457c1 9427 fprog = filter->prog->orig_prog;
93d08b69
DB
9428 if (!fprog)
9429 goto out;
a3ea269b
DB
9430
9431 ret = fprog->len;
a8fc9277 9432 if (!len)
a3ea269b 9433 /* User space only enquires number of filter blocks. */
a8fc9277 9434 goto out;
a3ea269b 9435
a8fc9277 9436 ret = -EINVAL;
a3ea269b 9437 if (len < fprog->len)
a8fc9277
PE
9438 goto out;
9439
9440 ret = -EFAULT;
009937e7 9441 if (copy_to_user(ubuf, fprog->filter, bpf_classic_proglen(fprog)))
a3ea269b 9442 goto out;
a8fc9277 9443
a3ea269b
DB
9444 /* Instead of bytes, the API requests to return the number
9445 * of filter blocks.
9446 */
9447 ret = fprog->len;
a8fc9277
PE
9448out:
9449 release_sock(sk);
9450 return ret;
9451}
2dbb9b9e
MKL
9452
9453#ifdef CONFIG_INET
2dbb9b9e
MKL
9454static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
9455 struct sock_reuseport *reuse,
9456 struct sock *sk, struct sk_buff *skb,
9457 u32 hash)
9458{
9459 reuse_kern->skb = skb;
9460 reuse_kern->sk = sk;
9461 reuse_kern->selected_sk = NULL;
9462 reuse_kern->data_end = skb->data + skb_headlen(skb);
9463 reuse_kern->hash = hash;
9464 reuse_kern->reuseport_id = reuse->reuseport_id;
9465 reuse_kern->bind_inany = reuse->bind_inany;
9466}
9467
9468struct sock *bpf_run_sk_reuseport(struct sock_reuseport *reuse, struct sock *sk,
9469 struct bpf_prog *prog, struct sk_buff *skb,
9470 u32 hash)
9471{
9472 struct sk_reuseport_kern reuse_kern;
9473 enum sk_action action;
9474
9475 bpf_init_reuseport_kern(&reuse_kern, reuse, sk, skb, hash);
9476 action = BPF_PROG_RUN(prog, &reuse_kern);
9477
9478 if (action == SK_PASS)
9479 return reuse_kern.selected_sk;
9480 else
9481 return ERR_PTR(-ECONNREFUSED);
9482}
9483
9484BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
9485 struct bpf_map *, map, void *, key, u32, flags)
9486{
9fed9000 9487 bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
2dbb9b9e
MKL
9488 struct sock_reuseport *reuse;
9489 struct sock *selected_sk;
9490
9491 selected_sk = map->ops->map_lookup_elem(map, key);
9492 if (!selected_sk)
9493 return -ENOENT;
9494
9495 reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
9fed9000 9496 if (!reuse) {
64d85290
JS
9497 /* Lookup in sock_map can return TCP ESTABLISHED sockets. */
9498 if (sk_is_refcounted(selected_sk))
9499 sock_put(selected_sk);
9500
9fed9000
JS
9501 /* reuseport_array has only sk with non NULL sk_reuseport_cb.
9502 * The only (!reuse) case here is - the sk has already been
9503 * unhashed (e.g. by close()), so treat it as -ENOENT.
9504 *
9505 * Other maps (e.g. sock_map) do not provide this guarantee and
9506 * the sk may never be in the reuseport group to begin with.
2dbb9b9e 9507 */
9fed9000
JS
9508 return is_sockarray ? -ENOENT : -EINVAL;
9509 }
2dbb9b9e
MKL
9510
9511 if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
035ff358 9512 struct sock *sk = reuse_kern->sk;
2dbb9b9e 9513
2dbb9b9e
MKL
9514 if (sk->sk_protocol != selected_sk->sk_protocol)
9515 return -EPROTOTYPE;
9516 else if (sk->sk_family != selected_sk->sk_family)
9517 return -EAFNOSUPPORT;
9518
9519 /* Catch all. Likely bound to a different sockaddr. */
9520 return -EBADFD;
9521 }
9522
9523 reuse_kern->selected_sk = selected_sk;
9524
9525 return 0;
9526}
9527
9528static const struct bpf_func_proto sk_select_reuseport_proto = {
9529 .func = sk_select_reuseport,
9530 .gpl_only = false,
9531 .ret_type = RET_INTEGER,
9532 .arg1_type = ARG_PTR_TO_CTX,
9533 .arg2_type = ARG_CONST_MAP_PTR,
9534 .arg3_type = ARG_PTR_TO_MAP_KEY,
9535 .arg4_type = ARG_ANYTHING,
9536};
9537
9538BPF_CALL_4(sk_reuseport_load_bytes,
9539 const struct sk_reuseport_kern *, reuse_kern, u32, offset,
9540 void *, to, u32, len)
9541{
9542 return ____bpf_skb_load_bytes(reuse_kern->skb, offset, to, len);
9543}
9544
9545static const struct bpf_func_proto sk_reuseport_load_bytes_proto = {
9546 .func = sk_reuseport_load_bytes,
9547 .gpl_only = false,
9548 .ret_type = RET_INTEGER,
9549 .arg1_type = ARG_PTR_TO_CTX,
9550 .arg2_type = ARG_ANYTHING,
9551 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
9552 .arg4_type = ARG_CONST_SIZE,
9553};
9554
9555BPF_CALL_5(sk_reuseport_load_bytes_relative,
9556 const struct sk_reuseport_kern *, reuse_kern, u32, offset,
9557 void *, to, u32, len, u32, start_header)
9558{
9559 return ____bpf_skb_load_bytes_relative(reuse_kern->skb, offset, to,
9560 len, start_header);
9561}
9562
9563static const struct bpf_func_proto sk_reuseport_load_bytes_relative_proto = {
9564 .func = sk_reuseport_load_bytes_relative,
9565 .gpl_only = false,
9566 .ret_type = RET_INTEGER,
9567 .arg1_type = ARG_PTR_TO_CTX,
9568 .arg2_type = ARG_ANYTHING,
9569 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
9570 .arg4_type = ARG_CONST_SIZE,
9571 .arg5_type = ARG_ANYTHING,
9572};
9573
9574static const struct bpf_func_proto *
9575sk_reuseport_func_proto(enum bpf_func_id func_id,
9576 const struct bpf_prog *prog)
9577{
9578 switch (func_id) {
9579 case BPF_FUNC_sk_select_reuseport:
9580 return &sk_select_reuseport_proto;
9581 case BPF_FUNC_skb_load_bytes:
9582 return &sk_reuseport_load_bytes_proto;
9583 case BPF_FUNC_skb_load_bytes_relative:
9584 return &sk_reuseport_load_bytes_relative_proto;
9585 default:
9586 return bpf_base_func_proto(func_id);
9587 }
9588}
9589
9590static bool
9591sk_reuseport_is_valid_access(int off, int size,
9592 enum bpf_access_type type,
9593 const struct bpf_prog *prog,
9594 struct bpf_insn_access_aux *info)
9595{
9596 const u32 size_default = sizeof(__u32);
9597
9598 if (off < 0 || off >= sizeof(struct sk_reuseport_md) ||
9599 off % size || type != BPF_READ)
9600 return false;
9601
9602 switch (off) {
9603 case offsetof(struct sk_reuseport_md, data):
9604 info->reg_type = PTR_TO_PACKET;
9605 return size == sizeof(__u64);
9606
9607 case offsetof(struct sk_reuseport_md, data_end):
9608 info->reg_type = PTR_TO_PACKET_END;
9609 return size == sizeof(__u64);
9610
9611 case offsetof(struct sk_reuseport_md, hash):
9612 return size == size_default;
9613
9614 /* Fields that allow narrowing */
2c238177 9615 case bpf_ctx_range(struct sk_reuseport_md, eth_protocol):
c593642c 9616 if (size < sizeof_field(struct sk_buff, protocol))
2dbb9b9e 9617 return false;
df561f66 9618 fallthrough;
2c238177
IL
9619 case bpf_ctx_range(struct sk_reuseport_md, ip_protocol):
9620 case bpf_ctx_range(struct sk_reuseport_md, bind_inany):
9621 case bpf_ctx_range(struct sk_reuseport_md, len):
2dbb9b9e
MKL
9622 bpf_ctx_record_field_size(info, size_default);
9623 return bpf_ctx_narrow_access_ok(off, size, size_default);
9624
9625 default:
9626 return false;
9627 }
9628}
9629
9630#define SK_REUSEPORT_LOAD_FIELD(F) ({ \
9631 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_reuseport_kern, F), \
9632 si->dst_reg, si->src_reg, \
9633 bpf_target_off(struct sk_reuseport_kern, F, \
c593642c 9634 sizeof_field(struct sk_reuseport_kern, F), \
2dbb9b9e
MKL
9635 target_size)); \
9636 })
9637
9638#define SK_REUSEPORT_LOAD_SKB_FIELD(SKB_FIELD) \
9639 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
9640 struct sk_buff, \
9641 skb, \
9642 SKB_FIELD)
9643
bf976514
MM
9644#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
9645 SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
9646 struct sock, \
9647 sk, \
9648 SK_FIELD)
2dbb9b9e
MKL
9649
9650static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
9651 const struct bpf_insn *si,
9652 struct bpf_insn *insn_buf,
9653 struct bpf_prog *prog,
9654 u32 *target_size)
9655{
9656 struct bpf_insn *insn = insn_buf;
9657
9658 switch (si->off) {
9659 case offsetof(struct sk_reuseport_md, data):
9660 SK_REUSEPORT_LOAD_SKB_FIELD(data);
9661 break;
9662
9663 case offsetof(struct sk_reuseport_md, len):
9664 SK_REUSEPORT_LOAD_SKB_FIELD(len);
9665 break;
9666
9667 case offsetof(struct sk_reuseport_md, eth_protocol):
9668 SK_REUSEPORT_LOAD_SKB_FIELD(protocol);
9669 break;
9670
9671 case offsetof(struct sk_reuseport_md, ip_protocol):
bf976514 9672 SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
2dbb9b9e
MKL
9673 break;
9674
9675 case offsetof(struct sk_reuseport_md, data_end):
9676 SK_REUSEPORT_LOAD_FIELD(data_end);
9677 break;
9678
9679 case offsetof(struct sk_reuseport_md, hash):
9680 SK_REUSEPORT_LOAD_FIELD(hash);
9681 break;
9682
9683 case offsetof(struct sk_reuseport_md, bind_inany):
9684 SK_REUSEPORT_LOAD_FIELD(bind_inany);
9685 break;
9686 }
9687
9688 return insn - insn_buf;
9689}
9690
9691const struct bpf_verifier_ops sk_reuseport_verifier_ops = {
9692 .get_func_proto = sk_reuseport_func_proto,
9693 .is_valid_access = sk_reuseport_is_valid_access,
9694 .convert_ctx_access = sk_reuseport_convert_ctx_access,
9695};
9696
9697const struct bpf_prog_ops sk_reuseport_prog_ops = {
9698};
7e6897f9 9699
1559b4aa
JS
9700DEFINE_STATIC_KEY_FALSE(bpf_sk_lookup_enabled);
9701EXPORT_SYMBOL(bpf_sk_lookup_enabled);
7e6897f9 9702
e9ddbb77
JS
9703BPF_CALL_3(bpf_sk_lookup_assign, struct bpf_sk_lookup_kern *, ctx,
9704 struct sock *, sk, u64, flags)
7e6897f9 9705{
e9ddbb77
JS
9706 if (unlikely(flags & ~(BPF_SK_LOOKUP_F_REPLACE |
9707 BPF_SK_LOOKUP_F_NO_REUSEPORT)))
9708 return -EINVAL;
9709 if (unlikely(sk && sk_is_refcounted(sk)))
9710 return -ESOCKTNOSUPPORT; /* reject non-RCU freed sockets */
9711 if (unlikely(sk && sk->sk_state == TCP_ESTABLISHED))
9712 return -ESOCKTNOSUPPORT; /* reject connected sockets */
9713
9714 /* Check if socket is suitable for packet L3/L4 protocol */
9715 if (sk && sk->sk_protocol != ctx->protocol)
9716 return -EPROTOTYPE;
9717 if (sk && sk->sk_family != ctx->family &&
9718 (sk->sk_family == AF_INET || ipv6_only_sock(sk)))
9719 return -EAFNOSUPPORT;
9720
9721 if (ctx->selected_sk && !(flags & BPF_SK_LOOKUP_F_REPLACE))
9722 return -EEXIST;
9723
9724 /* Select socket as lookup result */
9725 ctx->selected_sk = sk;
9726 ctx->no_reuseport = flags & BPF_SK_LOOKUP_F_NO_REUSEPORT;
9727 return 0;
7e6897f9 9728}
af7ec138 9729
e9ddbb77
JS
9730static const struct bpf_func_proto bpf_sk_lookup_assign_proto = {
9731 .func = bpf_sk_lookup_assign,
9732 .gpl_only = false,
9733 .ret_type = RET_INTEGER,
9734 .arg1_type = ARG_PTR_TO_CTX,
9735 .arg2_type = ARG_PTR_TO_SOCKET_OR_NULL,
9736 .arg3_type = ARG_ANYTHING,
af7ec138
YS
9737};
9738
e9ddbb77
JS
9739static const struct bpf_func_proto *
9740sk_lookup_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
9741{
9742 switch (func_id) {
9743 case BPF_FUNC_perf_event_output:
9744 return &bpf_event_output_data_proto;
9745 case BPF_FUNC_sk_assign:
9746 return &bpf_sk_lookup_assign_proto;
9747 case BPF_FUNC_sk_release:
9748 return &bpf_sk_release_proto;
9749 default:
1df8f55a 9750 return bpf_sk_base_func_proto(func_id);
e9ddbb77
JS
9751 }
9752}
af7ec138 9753
e9ddbb77
JS
9754static bool sk_lookup_is_valid_access(int off, int size,
9755 enum bpf_access_type type,
9756 const struct bpf_prog *prog,
9757 struct bpf_insn_access_aux *info)
9758{
9759 if (off < 0 || off >= sizeof(struct bpf_sk_lookup))
9760 return false;
9761 if (off % size != 0)
9762 return false;
9763 if (type != BPF_READ)
9764 return false;
9765
9766 switch (off) {
9767 case offsetof(struct bpf_sk_lookup, sk):
9768 info->reg_type = PTR_TO_SOCKET_OR_NULL;
9769 return size == sizeof(__u64);
af7ec138 9770
e9ddbb77
JS
9771 case bpf_ctx_range(struct bpf_sk_lookup, family):
9772 case bpf_ctx_range(struct bpf_sk_lookup, protocol):
9773 case bpf_ctx_range(struct bpf_sk_lookup, remote_ip4):
9774 case bpf_ctx_range(struct bpf_sk_lookup, local_ip4):
9775 case bpf_ctx_range_till(struct bpf_sk_lookup, remote_ip6[0], remote_ip6[3]):
9776 case bpf_ctx_range_till(struct bpf_sk_lookup, local_ip6[0], local_ip6[3]):
9777 case bpf_ctx_range(struct bpf_sk_lookup, remote_port):
9778 case bpf_ctx_range(struct bpf_sk_lookup, local_port):
9779 bpf_ctx_record_field_size(info, sizeof(__u32));
9780 return bpf_ctx_narrow_access_ok(off, size, sizeof(__u32));
9781
9782 default:
9783 return false;
9784 }
9785}
9786
9787static u32 sk_lookup_convert_ctx_access(enum bpf_access_type type,
9788 const struct bpf_insn *si,
9789 struct bpf_insn *insn_buf,
9790 struct bpf_prog *prog,
9791 u32 *target_size)
af7ec138 9792{
e9ddbb77
JS
9793 struct bpf_insn *insn = insn_buf;
9794
9795 switch (si->off) {
9796 case offsetof(struct bpf_sk_lookup, sk):
9797 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
9798 offsetof(struct bpf_sk_lookup_kern, selected_sk));
9799 break;
af7ec138 9800
e9ddbb77
JS
9801 case offsetof(struct bpf_sk_lookup, family):
9802 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9803 bpf_target_off(struct bpf_sk_lookup_kern,
9804 family, 2, target_size));
9805 break;
9806
9807 case offsetof(struct bpf_sk_lookup, protocol):
9808 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9809 bpf_target_off(struct bpf_sk_lookup_kern,
9810 protocol, 2, target_size));
9811 break;
9812
9813 case offsetof(struct bpf_sk_lookup, remote_ip4):
9814 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9815 bpf_target_off(struct bpf_sk_lookup_kern,
9816 v4.saddr, 4, target_size));
9817 break;
9818
9819 case offsetof(struct bpf_sk_lookup, local_ip4):
9820 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
9821 bpf_target_off(struct bpf_sk_lookup_kern,
9822 v4.daddr, 4, target_size));
9823 break;
9824
9825 case bpf_ctx_range_till(struct bpf_sk_lookup,
9826 remote_ip6[0], remote_ip6[3]): {
9827#if IS_ENABLED(CONFIG_IPV6)
9828 int off = si->off;
9829
9830 off -= offsetof(struct bpf_sk_lookup, remote_ip6[0]);
9831 off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
9832 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
9833 offsetof(struct bpf_sk_lookup_kern, v6.saddr));
9834 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9835 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
9836#else
9837 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9838#endif
9839 break;
9840 }
9841 case bpf_ctx_range_till(struct bpf_sk_lookup,
9842 local_ip6[0], local_ip6[3]): {
9843#if IS_ENABLED(CONFIG_IPV6)
9844 int off = si->off;
9845
9846 off -= offsetof(struct bpf_sk_lookup, local_ip6[0]);
9847 off += bpf_target_off(struct in6_addr, s6_addr32[0], 4, target_size);
9848 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, si->src_reg,
9849 offsetof(struct bpf_sk_lookup_kern, v6.daddr));
9850 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
9851 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, off);
9852#else
9853 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
9854#endif
9855 break;
af7ec138 9856 }
e9ddbb77
JS
9857 case offsetof(struct bpf_sk_lookup, remote_port):
9858 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9859 bpf_target_off(struct bpf_sk_lookup_kern,
9860 sport, 2, target_size));
9861 break;
9862
9863 case offsetof(struct bpf_sk_lookup, local_port):
9864 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
9865 bpf_target_off(struct bpf_sk_lookup_kern,
9866 dport, 2, target_size));
9867 break;
9868 }
9869
9870 return insn - insn_buf;
af7ec138 9871}
e9ddbb77
JS
9872
9873const struct bpf_prog_ops sk_lookup_prog_ops = {
9874};
9875
9876const struct bpf_verifier_ops sk_lookup_verifier_ops = {
9877 .get_func_proto = sk_lookup_func_proto,
9878 .is_valid_access = sk_lookup_is_valid_access,
9879 .convert_ctx_access = sk_lookup_convert_ctx_access,
9880};
9881
2dbb9b9e 9882#endif /* CONFIG_INET */
7e6897f9 9883
6a64037d 9884DEFINE_BPF_DISPATCHER(xdp)
7e6897f9
BT
9885
9886void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
9887{
6a64037d 9888 bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
7e6897f9 9889}
af7ec138 9890
bc4f0548 9891#ifdef CONFIG_DEBUG_INFO_BTF
fce557bc 9892BTF_ID_LIST_GLOBAL(btf_sock_ids)
bc4f0548 9893#define BTF_SOCK_TYPE(name, type) BTF_ID(struct, type)
af7ec138
YS
9894BTF_SOCK_TYPE_xxx
9895#undef BTF_SOCK_TYPE
bc4f0548 9896#else
fce557bc 9897u32 btf_sock_ids[MAX_BTF_SOCK_TYPE];
af7ec138
YS
9898#endif
9899
af7ec138
YS
9900BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
9901{
9902 /* tcp6_sock type is not generated in dwarf and hence btf,
9903 * trigger an explicit type generation here.
9904 */
9905 BTF_TYPE_EMIT(struct tcp6_sock);
8c33dadc 9906 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
af7ec138
YS
9907 sk->sk_family == AF_INET6)
9908 return (unsigned long)sk;
9909
9910 return (unsigned long)NULL;
9911}
9912
9913const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
9914 .func = bpf_skc_to_tcp6_sock,
9915 .gpl_only = false,
9916 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 9917 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
af7ec138
YS
9918 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP6],
9919};
478cfbdf
YS
9920
9921BPF_CALL_1(bpf_skc_to_tcp_sock, struct sock *, sk)
9922{
8c33dadc 9923 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
478cfbdf
YS
9924 return (unsigned long)sk;
9925
9926 return (unsigned long)NULL;
9927}
9928
9929const struct bpf_func_proto bpf_skc_to_tcp_sock_proto = {
9930 .func = bpf_skc_to_tcp_sock,
9931 .gpl_only = false,
9932 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 9933 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
478cfbdf
YS
9934 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP],
9935};
9936
9937BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk)
9938{
6b207d66 9939#ifdef CONFIG_INET
8c33dadc 9940 if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_TIME_WAIT)
478cfbdf 9941 return (unsigned long)sk;
6b207d66 9942#endif
478cfbdf
YS
9943
9944#if IS_BUILTIN(CONFIG_IPV6)
8c33dadc 9945 if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT)
478cfbdf
YS
9946 return (unsigned long)sk;
9947#endif
9948
9949 return (unsigned long)NULL;
9950}
9951
9952const struct bpf_func_proto bpf_skc_to_tcp_timewait_sock_proto = {
9953 .func = bpf_skc_to_tcp_timewait_sock,
9954 .gpl_only = false,
9955 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 9956 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
478cfbdf
YS
9957 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_TW],
9958};
9959
9960BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk)
9961{
6b207d66 9962#ifdef CONFIG_INET
8c33dadc 9963 if (sk && sk->sk_prot == &tcp_prot && sk->sk_state == TCP_NEW_SYN_RECV)
478cfbdf 9964 return (unsigned long)sk;
6b207d66 9965#endif
478cfbdf
YS
9966
9967#if IS_BUILTIN(CONFIG_IPV6)
8c33dadc 9968 if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV)
478cfbdf
YS
9969 return (unsigned long)sk;
9970#endif
9971
9972 return (unsigned long)NULL;
9973}
9974
9975const struct bpf_func_proto bpf_skc_to_tcp_request_sock_proto = {
9976 .func = bpf_skc_to_tcp_request_sock,
9977 .gpl_only = false,
9978 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 9979 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
478cfbdf
YS
9980 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_TCP_REQ],
9981};
0d4fad3e
YS
9982
9983BPF_CALL_1(bpf_skc_to_udp6_sock, struct sock *, sk)
9984{
9985 /* udp6_sock type is not generated in dwarf and hence btf,
9986 * trigger an explicit type generation here.
9987 */
9988 BTF_TYPE_EMIT(struct udp6_sock);
8c33dadc 9989 if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_UDP &&
0d4fad3e
YS
9990 sk->sk_type == SOCK_DGRAM && sk->sk_family == AF_INET6)
9991 return (unsigned long)sk;
9992
9993 return (unsigned long)NULL;
9994}
9995
9996const struct bpf_func_proto bpf_skc_to_udp6_sock_proto = {
9997 .func = bpf_skc_to_udp6_sock,
9998 .gpl_only = false,
9999 .ret_type = RET_PTR_TO_BTF_ID_OR_NULL,
1df8f55a 10000 .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON,
0d4fad3e
YS
10001 .ret_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_UDP6],
10002};
1df8f55a
MKL
10003
10004static const struct bpf_func_proto *
10005bpf_sk_base_func_proto(enum bpf_func_id func_id)
10006{
10007 const struct bpf_func_proto *func;
10008
10009 switch (func_id) {
10010 case BPF_FUNC_skc_to_tcp6_sock:
10011 func = &bpf_skc_to_tcp6_sock_proto;
10012 break;
10013 case BPF_FUNC_skc_to_tcp_sock:
10014 func = &bpf_skc_to_tcp_sock_proto;
10015 break;
10016 case BPF_FUNC_skc_to_tcp_timewait_sock:
10017 func = &bpf_skc_to_tcp_timewait_sock_proto;
10018 break;
10019 case BPF_FUNC_skc_to_tcp_request_sock:
10020 func = &bpf_skc_to_tcp_request_sock_proto;
10021 break;
10022 case BPF_FUNC_skc_to_udp6_sock:
10023 func = &bpf_skc_to_udp6_sock_proto;
10024 break;
10025 default:
10026 return bpf_base_func_proto(func_id);
10027 }
10028
10029 if (!perfmon_capable())
10030 return NULL;
10031
10032 return func;
10033}