Merge tag 'clk-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux
[linux-block.git] / net / netfilter / nf_conntrack_bpf.c
CommitLineData
b4c2b959
KKD
1// SPDX-License-Identifier: GPL-2.0-only
2/* Unstable Conntrack Helpers for XDP and TC-BPF hook
3 *
4 * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
5 * allowed to break compatibility for these functions since the interface they
6 * are exposed through to BPF programs is explicitly unstable.
7 */
8
864b656f 9#include <linux/bpf_verifier.h>
b4c2b959
KKD
10#include <linux/bpf.h>
11#include <linux/btf.h>
fdf21497 12#include <linux/filter.h>
864b656f 13#include <linux/mutex.h>
b4c2b959
KKD
14#include <linux/types.h>
15#include <linux/btf_ids.h>
16#include <linux/net_namespace.h>
0b206c6d 17#include <net/netfilter/nf_conntrack_bpf.h>
b4c2b959
KKD
18#include <net/netfilter/nf_conntrack_core.h>
19
20/* bpf_ct_opts - Options for CT lookup helpers
21 *
22 * Members:
23 * @netns_id - Specify the network namespace for lookup
24 * Values:
25 * BPF_F_CURRENT_NETNS (-1)
26 * Use namespace associated with ctx (xdp_md, __sk_buff)
27 * [0, S32_MAX]
28 * Network Namespace ID
29 * @error - Out parameter, set for any errors encountered
30 * Values:
31 * -EINVAL - Passed NULL for bpf_tuple pointer
32 * -EINVAL - opts->reserved is not 0
33 * -EINVAL - netns_id is less than -1
34 * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
35 * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
36 * -ENONET - No network namespace found for netns_id
37 * -ENOENT - Conntrack lookup could not find entry for tuple
38 * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
39 * or sizeof(tuple->ipv6)
40 * @l4proto - Layer 4 protocol
41 * Values:
42 * IPPROTO_TCP, IPPROTO_UDP
1963c740 43 * @dir: - connection tracking tuple direction.
b4c2b959
KKD
44 * @reserved - Reserved member, will be reused for more options in future
45 * Values:
46 * 0
47 */
48struct bpf_ct_opts {
49 s32 netns_id;
50 s32 error;
51 u8 l4proto;
1963c740
LB
52 u8 dir;
53 u8 reserved[2];
b4c2b959
KKD
54};
55
56enum {
57 NF_BPF_CT_OPTS_SZ = 12,
58};
59
d7e79c97
LB
60static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
61 u32 tuple_len, u8 protonum, u8 dir,
62 struct nf_conntrack_tuple *tuple)
63{
64 union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
65 union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
66 union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
67 : &tuple->src.u;
68 union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
69 : (void *)&tuple->dst.u;
70
71 if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
72 return -EPROTO;
73
74 memset(tuple, 0, sizeof(*tuple));
75
76 switch (tuple_len) {
77 case sizeof(bpf_tuple->ipv4):
78 tuple->src.l3num = AF_INET;
79 src->ip = bpf_tuple->ipv4.saddr;
80 sport->tcp.port = bpf_tuple->ipv4.sport;
81 dst->ip = bpf_tuple->ipv4.daddr;
82 dport->tcp.port = bpf_tuple->ipv4.dport;
83 break;
84 case sizeof(bpf_tuple->ipv6):
85 tuple->src.l3num = AF_INET6;
86 memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
87 sport->tcp.port = bpf_tuple->ipv6.sport;
88 memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
89 dport->tcp.port = bpf_tuple->ipv6.dport;
90 break;
91 default:
92 return -EAFNOSUPPORT;
93 }
94 tuple->dst.protonum = protonum;
95 tuple->dst.dir = dir;
96
97 return 0;
98}
99
100static struct nf_conn *
101__bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
102 u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
103 u32 timeout)
104{
105 struct nf_conntrack_tuple otuple, rtuple;
106 struct nf_conn *ct;
107 int err;
108
109 if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
110 opts_len != NF_BPF_CT_OPTS_SZ)
111 return ERR_PTR(-EINVAL);
112
113 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
114 return ERR_PTR(-EINVAL);
115
116 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
117 IP_CT_DIR_ORIGINAL, &otuple);
118 if (err < 0)
119 return ERR_PTR(err);
120
121 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
122 IP_CT_DIR_REPLY, &rtuple);
123 if (err < 0)
124 return ERR_PTR(err);
125
126 if (opts->netns_id >= 0) {
127 net = get_net_ns_by_id(net, opts->netns_id);
128 if (unlikely(!net))
129 return ERR_PTR(-ENONET);
130 }
131
132 ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
133 GFP_ATOMIC);
134 if (IS_ERR(ct))
135 goto out;
136
137 memset(&ct->proto, 0, sizeof(ct->proto));
138 __nf_ct_set_timeout(ct, timeout * HZ);
d7e79c97
LB
139
140out:
141 if (opts->netns_id >= 0)
142 put_net(net);
143
144 return ct;
145}
146
b4c2b959
KKD
147static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
148 struct bpf_sock_tuple *bpf_tuple,
aed8ee7f
KKD
149 u32 tuple_len, struct bpf_ct_opts *opts,
150 u32 opts_len)
b4c2b959
KKD
151{
152 struct nf_conntrack_tuple_hash *hash;
153 struct nf_conntrack_tuple tuple;
1963c740 154 struct nf_conn *ct;
d7e79c97 155 int err;
b4c2b959 156
aed8ee7f
KKD
157 if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
158 opts_len != NF_BPF_CT_OPTS_SZ)
159 return ERR_PTR(-EINVAL);
160 if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
b4c2b959 161 return ERR_PTR(-EPROTO);
aed8ee7f 162 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
b4c2b959
KKD
163 return ERR_PTR(-EINVAL);
164
d7e79c97
LB
165 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
166 IP_CT_DIR_ORIGINAL, &tuple);
167 if (err < 0)
168 return ERR_PTR(err);
b4c2b959 169
aed8ee7f
KKD
170 if (opts->netns_id >= 0) {
171 net = get_net_ns_by_id(net, opts->netns_id);
b4c2b959
KKD
172 if (unlikely(!net))
173 return ERR_PTR(-ENONET);
174 }
175
176 hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
aed8ee7f 177 if (opts->netns_id >= 0)
b4c2b959
KKD
178 put_net(net);
179 if (!hash)
180 return ERR_PTR(-ENOENT);
1963c740
LB
181
182 ct = nf_ct_tuplehash_to_ctrack(hash);
aed8ee7f 183 opts->dir = NF_CT_DIRECTION(hash);
1963c740
LB
184
185 return ct;
b4c2b959
KKD
186}
187
864b656f
DX
188BTF_ID_LIST(btf_nf_conn_ids)
189BTF_ID(struct, nf_conn)
190BTF_ID(struct, nf_conn___init)
191
192/* Check writes into `struct nf_conn` */
193static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
194 const struct btf *btf,
195 const struct btf_type *t, int off,
196 int size, enum bpf_access_type atype,
197 u32 *next_btf_id,
198 enum bpf_type_flag *flag)
199{
200 const struct btf_type *ncit;
201 const struct btf_type *nct;
202 size_t end;
203
204 ncit = btf_type_by_id(btf, btf_nf_conn_ids[1]);
205 nct = btf_type_by_id(btf, btf_nf_conn_ids[0]);
206
207 if (t != nct && t != ncit) {
208 bpf_log(log, "only read is supported\n");
209 return -EACCES;
210 }
211
212 /* `struct nf_conn` and `struct nf_conn___init` have the same layout
213 * so we are safe to simply merge offset checks here
214 */
215 switch (off) {
216#if defined(CONFIG_NF_CONNTRACK_MARK)
217 case offsetof(struct nf_conn, mark):
218 end = offsetofend(struct nf_conn, mark);
219 break;
220#endif
221 default:
222 bpf_log(log, "no write support to nf_conn at off %d\n", off);
223 return -EACCES;
224 }
225
226 if (off + size > end) {
227 bpf_log(log,
228 "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
229 off, size, end);
230 return -EACCES;
231 }
232
233 return 0;
234}
235
b4c2b959 236__diag_push();
0b206c6d
KKD
237__diag_ignore_all("-Wmissing-prototypes",
238 "Global functions as their definitions will be in nf_conntrack BTF");
b4c2b959 239
d7e79c97
LB
240/* bpf_xdp_ct_alloc - Allocate a new CT entry
241 *
242 * Parameters:
243 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
244 * Cannot be NULL
245 * @bpf_tuple - Pointer to memory representing the tuple to look up
246 * Cannot be NULL
247 * @tuple__sz - Length of the tuple structure
248 * Must be one of sizeof(bpf_tuple->ipv4) or
249 * sizeof(bpf_tuple->ipv6)
250 * @opts - Additional options for allocation (documented above)
251 * Cannot be NULL
252 * @opts__sz - Length of the bpf_ct_opts structure
253 * Must be NF_BPF_CT_OPTS_SZ (12)
254 */
255struct nf_conn___init *
256bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
257 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
258{
259 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
260 struct nf_conn *nfct;
261
262 nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
263 opts, opts__sz, 10);
264 if (IS_ERR(nfct)) {
265 if (opts)
266 opts->error = PTR_ERR(nfct);
267 return NULL;
268 }
269
270 return (struct nf_conn___init *)nfct;
271}
272
b4c2b959
KKD
273/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
274 * reference to it
275 *
276 * Parameters:
277 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
278 * Cannot be NULL
279 * @bpf_tuple - Pointer to memory representing the tuple to look up
280 * Cannot be NULL
281 * @tuple__sz - Length of the tuple structure
282 * Must be one of sizeof(bpf_tuple->ipv4) or
283 * sizeof(bpf_tuple->ipv6)
284 * @opts - Additional options for lookup (documented above)
285 * Cannot be NULL
286 * @opts__sz - Length of the bpf_ct_opts structure
287 * Must be NF_BPF_CT_OPTS_SZ (12)
288 */
289struct nf_conn *
290bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
291 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
292{
293 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
294 struct net *caller_net;
295 struct nf_conn *nfct;
296
b4c2b959 297 caller_net = dev_net(ctx->rxq->dev);
aed8ee7f 298 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
b4c2b959 299 if (IS_ERR(nfct)) {
aed8ee7f
KKD
300 if (opts)
301 opts->error = PTR_ERR(nfct);
b4c2b959
KKD
302 return NULL;
303 }
304 return nfct;
305}
306
d7e79c97
LB
307/* bpf_skb_ct_alloc - Allocate a new CT entry
308 *
309 * Parameters:
310 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
311 * Cannot be NULL
312 * @bpf_tuple - Pointer to memory representing the tuple to look up
313 * Cannot be NULL
314 * @tuple__sz - Length of the tuple structure
315 * Must be one of sizeof(bpf_tuple->ipv4) or
316 * sizeof(bpf_tuple->ipv6)
317 * @opts - Additional options for allocation (documented above)
318 * Cannot be NULL
319 * @opts__sz - Length of the bpf_ct_opts structure
320 * Must be NF_BPF_CT_OPTS_SZ (12)
321 */
322struct nf_conn___init *
323bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
324 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
325{
326 struct sk_buff *skb = (struct sk_buff *)skb_ctx;
327 struct nf_conn *nfct;
328 struct net *net;
329
330 net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
331 nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
332 if (IS_ERR(nfct)) {
333 if (opts)
334 opts->error = PTR_ERR(nfct);
335 return NULL;
336 }
337
338 return (struct nf_conn___init *)nfct;
339}
340
b4c2b959
KKD
341/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
342 * reference to it
343 *
344 * Parameters:
345 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
346 * Cannot be NULL
347 * @bpf_tuple - Pointer to memory representing the tuple to look up
348 * Cannot be NULL
349 * @tuple__sz - Length of the tuple structure
350 * Must be one of sizeof(bpf_tuple->ipv4) or
351 * sizeof(bpf_tuple->ipv6)
352 * @opts - Additional options for lookup (documented above)
353 * Cannot be NULL
354 * @opts__sz - Length of the bpf_ct_opts structure
355 * Must be NF_BPF_CT_OPTS_SZ (12)
356 */
357struct nf_conn *
358bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
359 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
360{
361 struct sk_buff *skb = (struct sk_buff *)skb_ctx;
362 struct net *caller_net;
363 struct nf_conn *nfct;
364
b4c2b959 365 caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
aed8ee7f 366 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
b4c2b959 367 if (IS_ERR(nfct)) {
aed8ee7f
KKD
368 if (opts)
369 opts->error = PTR_ERR(nfct);
b4c2b959
KKD
370 return NULL;
371 }
372 return nfct;
373}
374
d7e79c97
LB
375/* bpf_ct_insert_entry - Add the provided entry into a CT map
376 *
377 * This must be invoked for referenced PTR_TO_BTF_ID.
378 *
0b389236 379 * @nfct - Pointer to referenced nf_conn___init object, obtained
d7e79c97
LB
380 * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
381 */
0b389236 382struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
d7e79c97 383{
0b389236 384 struct nf_conn *nfct = (struct nf_conn *)nfct_i;
d7e79c97
LB
385 int err;
386
0fabd2aa 387 nfct->status |= IPS_CONFIRMED;
d7e79c97
LB
388 err = nf_conntrack_hash_check_insert(nfct);
389 if (err < 0) {
390 nf_conntrack_free(nfct);
391 return NULL;
392 }
393 return nfct;
394}
395
b4c2b959
KKD
396/* bpf_ct_release - Release acquired nf_conn object
397 *
398 * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
399 * the program if any references remain in the program in all of the explored
400 * states.
401 *
402 * Parameters:
403 * @nf_conn - Pointer to referenced nf_conn object, obtained using
404 * bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
405 */
406void bpf_ct_release(struct nf_conn *nfct)
407{
408 if (!nfct)
409 return;
410 nf_ct_put(nfct);
411}
412
0b389236
KKD
413/* bpf_ct_set_timeout - Set timeout of allocated nf_conn
414 *
415 * Sets the default timeout of newly allocated nf_conn before insertion.
416 * This helper must be invoked for refcounted pointer to nf_conn___init.
417 *
418 * Parameters:
419 * @nfct - Pointer to referenced nf_conn object, obtained using
420 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
421 * @timeout - Timeout in msecs.
422 */
423void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
424{
425 __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
426}
427
428/* bpf_ct_change_timeout - Change timeout of inserted nf_conn
429 *
430 * Change timeout associated of the inserted or looked up nf_conn.
431 * This helper must be invoked for refcounted pointer to nf_conn.
432 *
433 * Parameters:
434 * @nfct - Pointer to referenced nf_conn object, obtained using
435 * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
436 * @timeout - New timeout in msecs.
437 */
438int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
439{
440 return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
441}
442
ef69aa3a
LB
443/* bpf_ct_set_status - Set status field of allocated nf_conn
444 *
445 * Set the status field of the newly allocated nf_conn before insertion.
446 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
447 *
448 * Parameters:
449 * @nfct - Pointer to referenced nf_conn object, obtained using
450 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
451 * @status - New status value.
452 */
453int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
454{
455 return nf_ct_change_status_common((struct nf_conn *)nfct, status);
456}
457
458/* bpf_ct_change_status - Change status of inserted nf_conn
459 *
460 * Change the status field of the provided connection tracking entry.
461 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
462 *
463 * Parameters:
464 * @nfct - Pointer to referenced nf_conn object, obtained using
465 * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
466 * @status - New status value.
467 */
468int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
469{
470 return nf_ct_change_status_common(nfct, status);
471}
472
b4c2b959
KKD
473__diag_pop()
474
a4703e31 475BTF_SET8_START(nf_ct_kfunc_set)
d7e79c97 476BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
a4703e31 477BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
d7e79c97 478BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
a4703e31 479BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
d7e79c97 480BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
a4703e31 481BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
0b389236
KKD
482BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
483BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
ef69aa3a
LB
484BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
485BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
a4703e31
KKD
486BTF_SET8_END(nf_ct_kfunc_set)
487
488static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
489 .owner = THIS_MODULE,
490 .set = &nf_ct_kfunc_set,
b4c2b959
KKD
491};
492
493int register_nf_conntrack_bpf(void)
494{
495 int ret;
496
a4703e31 497 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
864b656f
DX
498 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
499 if (!ret) {
500 mutex_lock(&nf_conn_btf_access_lock);
5a090aa3 501 nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
864b656f
DX
502 mutex_unlock(&nf_conn_btf_access_lock);
503 }
504
505 return ret;
506}
507
508void cleanup_nf_conntrack_bpf(void)
509{
510 mutex_lock(&nf_conn_btf_access_lock);
5a090aa3 511 nfct_btf_struct_access = NULL;
864b656f 512 mutex_unlock(&nf_conn_btf_access_lock);
b4c2b959 513}