Merge tag 'perf-tools-fixes-for-v6.4-1-2023-05-20' of git://git.kernel.org/pub/scm...
[linux-block.git] / net / netfilter / nf_conntrack_bpf.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Unstable Conntrack Helpers for XDP and TC-BPF hook
3  *
4  * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
5  * allowed to break compatibility for these functions since the interface they
6  * are exposed through to BPF programs is explicitly unstable.
7  */
8
9 #include <linux/bpf_verifier.h>
10 #include <linux/bpf.h>
11 #include <linux/btf.h>
12 #include <linux/filter.h>
13 #include <linux/mutex.h>
14 #include <linux/types.h>
15 #include <linux/btf_ids.h>
16 #include <linux/net_namespace.h>
17 #include <net/netfilter/nf_conntrack_bpf.h>
18 #include <net/netfilter/nf_conntrack_core.h>
19
20 /* bpf_ct_opts - Options for CT lookup helpers
21  *
22  * Members:
23  * @netns_id   - Specify the network namespace for lookup
24  *               Values:
25  *                 BPF_F_CURRENT_NETNS (-1)
26  *                   Use namespace associated with ctx (xdp_md, __sk_buff)
27  *                 [0, S32_MAX]
28  *                   Network Namespace ID
29  * @error      - Out parameter, set for any errors encountered
30  *               Values:
31  *                 -EINVAL - Passed NULL for bpf_tuple pointer
32  *                 -EINVAL - opts->reserved is not 0
33  *                 -EINVAL - netns_id is less than -1
34  *                 -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
35  *                 -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
36  *                 -ENONET - No network namespace found for netns_id
37  *                 -ENOENT - Conntrack lookup could not find entry for tuple
38  *                 -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
39  *                                 or sizeof(tuple->ipv6)
40  * @l4proto    - Layer 4 protocol
41  *               Values:
42  *                 IPPROTO_TCP, IPPROTO_UDP
43  * @dir:       - connection tracking tuple direction.
44  * @reserved   - Reserved member, will be reused for more options in future
45  *               Values:
46  *                 0
47  */
48 struct bpf_ct_opts {
49         s32 netns_id;
50         s32 error;
51         u8 l4proto;
52         u8 dir;
53         u8 reserved[2];
54 };
55
56 enum {
57         NF_BPF_CT_OPTS_SZ = 12,
58 };
59
60 static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
61                                  u32 tuple_len, u8 protonum, u8 dir,
62                                  struct nf_conntrack_tuple *tuple)
63 {
64         union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
65         union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
66         union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
67                                                   : &tuple->src.u;
68         union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
69                                                   : (void *)&tuple->dst.u;
70
71         if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
72                 return -EPROTO;
73
74         memset(tuple, 0, sizeof(*tuple));
75
76         switch (tuple_len) {
77         case sizeof(bpf_tuple->ipv4):
78                 tuple->src.l3num = AF_INET;
79                 src->ip = bpf_tuple->ipv4.saddr;
80                 sport->tcp.port = bpf_tuple->ipv4.sport;
81                 dst->ip = bpf_tuple->ipv4.daddr;
82                 dport->tcp.port = bpf_tuple->ipv4.dport;
83                 break;
84         case sizeof(bpf_tuple->ipv6):
85                 tuple->src.l3num = AF_INET6;
86                 memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
87                 sport->tcp.port = bpf_tuple->ipv6.sport;
88                 memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
89                 dport->tcp.port = bpf_tuple->ipv6.dport;
90                 break;
91         default:
92                 return -EAFNOSUPPORT;
93         }
94         tuple->dst.protonum = protonum;
95         tuple->dst.dir = dir;
96
97         return 0;
98 }
99
100 static struct nf_conn *
101 __bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
102                         u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
103                         u32 timeout)
104 {
105         struct nf_conntrack_tuple otuple, rtuple;
106         struct nf_conn *ct;
107         int err;
108
109         if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
110             opts_len != NF_BPF_CT_OPTS_SZ)
111                 return ERR_PTR(-EINVAL);
112
113         if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
114                 return ERR_PTR(-EINVAL);
115
116         err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
117                                     IP_CT_DIR_ORIGINAL, &otuple);
118         if (err < 0)
119                 return ERR_PTR(err);
120
121         err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
122                                     IP_CT_DIR_REPLY, &rtuple);
123         if (err < 0)
124                 return ERR_PTR(err);
125
126         if (opts->netns_id >= 0) {
127                 net = get_net_ns_by_id(net, opts->netns_id);
128                 if (unlikely(!net))
129                         return ERR_PTR(-ENONET);
130         }
131
132         ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
133                                 GFP_ATOMIC);
134         if (IS_ERR(ct))
135                 goto out;
136
137         memset(&ct->proto, 0, sizeof(ct->proto));
138         __nf_ct_set_timeout(ct, timeout * HZ);
139
140 out:
141         if (opts->netns_id >= 0)
142                 put_net(net);
143
144         return ct;
145 }
146
147 static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
148                                           struct bpf_sock_tuple *bpf_tuple,
149                                           u32 tuple_len, struct bpf_ct_opts *opts,
150                                           u32 opts_len)
151 {
152         struct nf_conntrack_tuple_hash *hash;
153         struct nf_conntrack_tuple tuple;
154         struct nf_conn *ct;
155         int err;
156
157         if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
158             opts_len != NF_BPF_CT_OPTS_SZ)
159                 return ERR_PTR(-EINVAL);
160         if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
161                 return ERR_PTR(-EPROTO);
162         if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
163                 return ERR_PTR(-EINVAL);
164
165         err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
166                                     IP_CT_DIR_ORIGINAL, &tuple);
167         if (err < 0)
168                 return ERR_PTR(err);
169
170         if (opts->netns_id >= 0) {
171                 net = get_net_ns_by_id(net, opts->netns_id);
172                 if (unlikely(!net))
173                         return ERR_PTR(-ENONET);
174         }
175
176         hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
177         if (opts->netns_id >= 0)
178                 put_net(net);
179         if (!hash)
180                 return ERR_PTR(-ENOENT);
181
182         ct = nf_ct_tuplehash_to_ctrack(hash);
183         opts->dir = NF_CT_DIRECTION(hash);
184
185         return ct;
186 }
187
188 BTF_ID_LIST(btf_nf_conn_ids)
189 BTF_ID(struct, nf_conn)
190 BTF_ID(struct, nf_conn___init)
191
192 /* Check writes into `struct nf_conn` */
193 static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
194                                            const struct bpf_reg_state *reg,
195                                            int off, int size)
196 {
197         const struct btf_type *ncit, *nct, *t;
198         size_t end;
199
200         ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]);
201         nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]);
202         t = btf_type_by_id(reg->btf, reg->btf_id);
203         if (t != nct && t != ncit) {
204                 bpf_log(log, "only read is supported\n");
205                 return -EACCES;
206         }
207
208         /* `struct nf_conn` and `struct nf_conn___init` have the same layout
209          * so we are safe to simply merge offset checks here
210          */
211         switch (off) {
212 #if defined(CONFIG_NF_CONNTRACK_MARK)
213         case offsetof(struct nf_conn, mark):
214                 end = offsetofend(struct nf_conn, mark);
215                 break;
216 #endif
217         default:
218                 bpf_log(log, "no write support to nf_conn at off %d\n", off);
219                 return -EACCES;
220         }
221
222         if (off + size > end) {
223                 bpf_log(log,
224                         "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
225                         off, size, end);
226                 return -EACCES;
227         }
228
229         return 0;
230 }
231
232 __diag_push();
233 __diag_ignore_all("-Wmissing-prototypes",
234                   "Global functions as their definitions will be in nf_conntrack BTF");
235
236 /* bpf_xdp_ct_alloc - Allocate a new CT entry
237  *
238  * Parameters:
239  * @xdp_ctx     - Pointer to ctx (xdp_md) in XDP program
240  *                  Cannot be NULL
241  * @bpf_tuple   - Pointer to memory representing the tuple to look up
242  *                  Cannot be NULL
243  * @tuple__sz   - Length of the tuple structure
244  *                  Must be one of sizeof(bpf_tuple->ipv4) or
245  *                  sizeof(bpf_tuple->ipv6)
246  * @opts        - Additional options for allocation (documented above)
247  *                  Cannot be NULL
248  * @opts__sz    - Length of the bpf_ct_opts structure
249  *                  Must be NF_BPF_CT_OPTS_SZ (12)
250  */
251 __bpf_kfunc struct nf_conn___init *
252 bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
253                  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
254 {
255         struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
256         struct nf_conn *nfct;
257
258         nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
259                                        opts, opts__sz, 10);
260         if (IS_ERR(nfct)) {
261                 if (opts)
262                         opts->error = PTR_ERR(nfct);
263                 return NULL;
264         }
265
266         return (struct nf_conn___init *)nfct;
267 }
268
269 /* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
270  *                     reference to it
271  *
272  * Parameters:
273  * @xdp_ctx     - Pointer to ctx (xdp_md) in XDP program
274  *                  Cannot be NULL
275  * @bpf_tuple   - Pointer to memory representing the tuple to look up
276  *                  Cannot be NULL
277  * @tuple__sz   - Length of the tuple structure
278  *                  Must be one of sizeof(bpf_tuple->ipv4) or
279  *                  sizeof(bpf_tuple->ipv6)
280  * @opts        - Additional options for lookup (documented above)
281  *                  Cannot be NULL
282  * @opts__sz    - Length of the bpf_ct_opts structure
283  *                  Must be NF_BPF_CT_OPTS_SZ (12)
284  */
285 __bpf_kfunc struct nf_conn *
286 bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
287                   u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
288 {
289         struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
290         struct net *caller_net;
291         struct nf_conn *nfct;
292
293         caller_net = dev_net(ctx->rxq->dev);
294         nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
295         if (IS_ERR(nfct)) {
296                 if (opts)
297                         opts->error = PTR_ERR(nfct);
298                 return NULL;
299         }
300         return nfct;
301 }
302
303 /* bpf_skb_ct_alloc - Allocate a new CT entry
304  *
305  * Parameters:
306  * @skb_ctx     - Pointer to ctx (__sk_buff) in TC program
307  *                  Cannot be NULL
308  * @bpf_tuple   - Pointer to memory representing the tuple to look up
309  *                  Cannot be NULL
310  * @tuple__sz   - Length of the tuple structure
311  *                  Must be one of sizeof(bpf_tuple->ipv4) or
312  *                  sizeof(bpf_tuple->ipv6)
313  * @opts        - Additional options for allocation (documented above)
314  *                  Cannot be NULL
315  * @opts__sz    - Length of the bpf_ct_opts structure
316  *                  Must be NF_BPF_CT_OPTS_SZ (12)
317  */
318 __bpf_kfunc struct nf_conn___init *
319 bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
320                  u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
321 {
322         struct sk_buff *skb = (struct sk_buff *)skb_ctx;
323         struct nf_conn *nfct;
324         struct net *net;
325
326         net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
327         nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
328         if (IS_ERR(nfct)) {
329                 if (opts)
330                         opts->error = PTR_ERR(nfct);
331                 return NULL;
332         }
333
334         return (struct nf_conn___init *)nfct;
335 }
336
337 /* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
338  *                     reference to it
339  *
340  * Parameters:
341  * @skb_ctx     - Pointer to ctx (__sk_buff) in TC program
342  *                  Cannot be NULL
343  * @bpf_tuple   - Pointer to memory representing the tuple to look up
344  *                  Cannot be NULL
345  * @tuple__sz   - Length of the tuple structure
346  *                  Must be one of sizeof(bpf_tuple->ipv4) or
347  *                  sizeof(bpf_tuple->ipv6)
348  * @opts        - Additional options for lookup (documented above)
349  *                  Cannot be NULL
350  * @opts__sz    - Length of the bpf_ct_opts structure
351  *                  Must be NF_BPF_CT_OPTS_SZ (12)
352  */
353 __bpf_kfunc struct nf_conn *
354 bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
355                   u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
356 {
357         struct sk_buff *skb = (struct sk_buff *)skb_ctx;
358         struct net *caller_net;
359         struct nf_conn *nfct;
360
361         caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
362         nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
363         if (IS_ERR(nfct)) {
364                 if (opts)
365                         opts->error = PTR_ERR(nfct);
366                 return NULL;
367         }
368         return nfct;
369 }
370
371 /* bpf_ct_insert_entry - Add the provided entry into a CT map
372  *
373  * This must be invoked for referenced PTR_TO_BTF_ID.
374  *
375  * @nfct         - Pointer to referenced nf_conn___init object, obtained
376  *                 using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
377  */
378 __bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
379 {
380         struct nf_conn *nfct = (struct nf_conn *)nfct_i;
381         int err;
382
383         nfct->status |= IPS_CONFIRMED;
384         err = nf_conntrack_hash_check_insert(nfct);
385         if (err < 0) {
386                 nf_conntrack_free(nfct);
387                 return NULL;
388         }
389         return nfct;
390 }
391
392 /* bpf_ct_release - Release acquired nf_conn object
393  *
394  * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
395  * the program if any references remain in the program in all of the explored
396  * states.
397  *
398  * Parameters:
399  * @nf_conn      - Pointer to referenced nf_conn object, obtained using
400  *                 bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
401  */
402 __bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
403 {
404         nf_ct_put(nfct);
405 }
406
407 /* bpf_ct_set_timeout - Set timeout of allocated nf_conn
408  *
409  * Sets the default timeout of newly allocated nf_conn before insertion.
410  * This helper must be invoked for refcounted pointer to nf_conn___init.
411  *
412  * Parameters:
413  * @nfct         - Pointer to referenced nf_conn object, obtained using
414  *                 bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
415  * @timeout      - Timeout in msecs.
416  */
417 __bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
418 {
419         __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
420 }
421
422 /* bpf_ct_change_timeout - Change timeout of inserted nf_conn
423  *
424  * Change timeout associated of the inserted or looked up nf_conn.
425  * This helper must be invoked for refcounted pointer to nf_conn.
426  *
427  * Parameters:
428  * @nfct         - Pointer to referenced nf_conn object, obtained using
429  *                 bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
430  * @timeout      - New timeout in msecs.
431  */
432 __bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
433 {
434         return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
435 }
436
437 /* bpf_ct_set_status - Set status field of allocated nf_conn
438  *
439  * Set the status field of the newly allocated nf_conn before insertion.
440  * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
441  *
442  * Parameters:
443  * @nfct         - Pointer to referenced nf_conn object, obtained using
444  *                 bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
445  * @status       - New status value.
446  */
447 __bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
448 {
449         return nf_ct_change_status_common((struct nf_conn *)nfct, status);
450 }
451
452 /* bpf_ct_change_status - Change status of inserted nf_conn
453  *
454  * Change the status field of the provided connection tracking entry.
455  * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
456  *
457  * Parameters:
458  * @nfct         - Pointer to referenced nf_conn object, obtained using
459  *                 bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
460  * @status       - New status value.
461  */
462 __bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
463 {
464         return nf_ct_change_status_common(nfct, status);
465 }
466
467 __diag_pop()
468
469 BTF_SET8_START(nf_ct_kfunc_set)
470 BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
471 BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
472 BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
473 BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
474 BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
475 BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
476 BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
477 BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
478 BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
479 BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
480 BTF_SET8_END(nf_ct_kfunc_set)
481
482 static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
483         .owner = THIS_MODULE,
484         .set   = &nf_ct_kfunc_set,
485 };
486
487 int register_nf_conntrack_bpf(void)
488 {
489         int ret;
490
491         ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
492         ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
493         if (!ret) {
494                 mutex_lock(&nf_conn_btf_access_lock);
495                 nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
496                 mutex_unlock(&nf_conn_btf_access_lock);
497         }
498
499         return ret;
500 }
501
502 void cleanup_nf_conntrack_bpf(void)
503 {
504         mutex_lock(&nf_conn_btf_access_lock);
505         nfct_btf_struct_access = NULL;
506         mutex_unlock(&nf_conn_btf_access_lock);
507 }