dm-crypt: use __bio_add_page to add single page to clone bio
[linux-block.git] / net / netfilter / nf_conntrack_bpf.c
CommitLineData
b4c2b959
KKD
1// SPDX-License-Identifier: GPL-2.0-only
2/* Unstable Conntrack Helpers for XDP and TC-BPF hook
3 *
4 * These are called from the XDP and SCHED_CLS BPF programs. Note that it is
5 * allowed to break compatibility for these functions since the interface they
6 * are exposed through to BPF programs is explicitly unstable.
7 */
8
864b656f 9#include <linux/bpf_verifier.h>
b4c2b959
KKD
10#include <linux/bpf.h>
11#include <linux/btf.h>
fdf21497 12#include <linux/filter.h>
864b656f 13#include <linux/mutex.h>
b4c2b959
KKD
14#include <linux/types.h>
15#include <linux/btf_ids.h>
16#include <linux/net_namespace.h>
0b206c6d 17#include <net/netfilter/nf_conntrack_bpf.h>
b4c2b959
KKD
18#include <net/netfilter/nf_conntrack_core.h>
19
20/* bpf_ct_opts - Options for CT lookup helpers
21 *
22 * Members:
23 * @netns_id - Specify the network namespace for lookup
24 * Values:
25 * BPF_F_CURRENT_NETNS (-1)
26 * Use namespace associated with ctx (xdp_md, __sk_buff)
27 * [0, S32_MAX]
28 * Network Namespace ID
29 * @error - Out parameter, set for any errors encountered
30 * Values:
31 * -EINVAL - Passed NULL for bpf_tuple pointer
32 * -EINVAL - opts->reserved is not 0
33 * -EINVAL - netns_id is less than -1
34 * -EINVAL - opts__sz isn't NF_BPF_CT_OPTS_SZ (12)
35 * -EPROTO - l4proto isn't one of IPPROTO_TCP or IPPROTO_UDP
36 * -ENONET - No network namespace found for netns_id
37 * -ENOENT - Conntrack lookup could not find entry for tuple
38 * -EAFNOSUPPORT - tuple__sz isn't one of sizeof(tuple->ipv4)
39 * or sizeof(tuple->ipv6)
40 * @l4proto - Layer 4 protocol
41 * Values:
42 * IPPROTO_TCP, IPPROTO_UDP
1963c740 43 * @dir: - connection tracking tuple direction.
b4c2b959
KKD
44 * @reserved - Reserved member, will be reused for more options in future
45 * Values:
46 * 0
47 */
48struct bpf_ct_opts {
49 s32 netns_id;
50 s32 error;
51 u8 l4proto;
1963c740
LB
52 u8 dir;
53 u8 reserved[2];
b4c2b959
KKD
54};
55
56enum {
57 NF_BPF_CT_OPTS_SZ = 12,
58};
59
d7e79c97
LB
60static int bpf_nf_ct_tuple_parse(struct bpf_sock_tuple *bpf_tuple,
61 u32 tuple_len, u8 protonum, u8 dir,
62 struct nf_conntrack_tuple *tuple)
63{
64 union nf_inet_addr *src = dir ? &tuple->dst.u3 : &tuple->src.u3;
65 union nf_inet_addr *dst = dir ? &tuple->src.u3 : &tuple->dst.u3;
66 union nf_conntrack_man_proto *sport = dir ? (void *)&tuple->dst.u
67 : &tuple->src.u;
68 union nf_conntrack_man_proto *dport = dir ? &tuple->src.u
69 : (void *)&tuple->dst.u;
70
71 if (unlikely(protonum != IPPROTO_TCP && protonum != IPPROTO_UDP))
72 return -EPROTO;
73
74 memset(tuple, 0, sizeof(*tuple));
75
76 switch (tuple_len) {
77 case sizeof(bpf_tuple->ipv4):
78 tuple->src.l3num = AF_INET;
79 src->ip = bpf_tuple->ipv4.saddr;
80 sport->tcp.port = bpf_tuple->ipv4.sport;
81 dst->ip = bpf_tuple->ipv4.daddr;
82 dport->tcp.port = bpf_tuple->ipv4.dport;
83 break;
84 case sizeof(bpf_tuple->ipv6):
85 tuple->src.l3num = AF_INET6;
86 memcpy(src->ip6, bpf_tuple->ipv6.saddr, sizeof(bpf_tuple->ipv6.saddr));
87 sport->tcp.port = bpf_tuple->ipv6.sport;
88 memcpy(dst->ip6, bpf_tuple->ipv6.daddr, sizeof(bpf_tuple->ipv6.daddr));
89 dport->tcp.port = bpf_tuple->ipv6.dport;
90 break;
91 default:
92 return -EAFNOSUPPORT;
93 }
94 tuple->dst.protonum = protonum;
95 tuple->dst.dir = dir;
96
97 return 0;
98}
99
100static struct nf_conn *
101__bpf_nf_ct_alloc_entry(struct net *net, struct bpf_sock_tuple *bpf_tuple,
102 u32 tuple_len, struct bpf_ct_opts *opts, u32 opts_len,
103 u32 timeout)
104{
105 struct nf_conntrack_tuple otuple, rtuple;
106 struct nf_conn *ct;
107 int err;
108
109 if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
110 opts_len != NF_BPF_CT_OPTS_SZ)
111 return ERR_PTR(-EINVAL);
112
113 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
114 return ERR_PTR(-EINVAL);
115
116 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
117 IP_CT_DIR_ORIGINAL, &otuple);
118 if (err < 0)
119 return ERR_PTR(err);
120
121 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
122 IP_CT_DIR_REPLY, &rtuple);
123 if (err < 0)
124 return ERR_PTR(err);
125
126 if (opts->netns_id >= 0) {
127 net = get_net_ns_by_id(net, opts->netns_id);
128 if (unlikely(!net))
129 return ERR_PTR(-ENONET);
130 }
131
132 ct = nf_conntrack_alloc(net, &nf_ct_zone_dflt, &otuple, &rtuple,
133 GFP_ATOMIC);
134 if (IS_ERR(ct))
135 goto out;
136
137 memset(&ct->proto, 0, sizeof(ct->proto));
138 __nf_ct_set_timeout(ct, timeout * HZ);
d7e79c97
LB
139
140out:
141 if (opts->netns_id >= 0)
142 put_net(net);
143
144 return ct;
145}
146
b4c2b959
KKD
147static struct nf_conn *__bpf_nf_ct_lookup(struct net *net,
148 struct bpf_sock_tuple *bpf_tuple,
aed8ee7f
KKD
149 u32 tuple_len, struct bpf_ct_opts *opts,
150 u32 opts_len)
b4c2b959
KKD
151{
152 struct nf_conntrack_tuple_hash *hash;
153 struct nf_conntrack_tuple tuple;
1963c740 154 struct nf_conn *ct;
d7e79c97 155 int err;
b4c2b959 156
aed8ee7f
KKD
157 if (!opts || !bpf_tuple || opts->reserved[0] || opts->reserved[1] ||
158 opts_len != NF_BPF_CT_OPTS_SZ)
159 return ERR_PTR(-EINVAL);
160 if (unlikely(opts->l4proto != IPPROTO_TCP && opts->l4proto != IPPROTO_UDP))
b4c2b959 161 return ERR_PTR(-EPROTO);
aed8ee7f 162 if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS))
b4c2b959
KKD
163 return ERR_PTR(-EINVAL);
164
d7e79c97
LB
165 err = bpf_nf_ct_tuple_parse(bpf_tuple, tuple_len, opts->l4proto,
166 IP_CT_DIR_ORIGINAL, &tuple);
167 if (err < 0)
168 return ERR_PTR(err);
b4c2b959 169
aed8ee7f
KKD
170 if (opts->netns_id >= 0) {
171 net = get_net_ns_by_id(net, opts->netns_id);
b4c2b959
KKD
172 if (unlikely(!net))
173 return ERR_PTR(-ENONET);
174 }
175
176 hash = nf_conntrack_find_get(net, &nf_ct_zone_dflt, &tuple);
aed8ee7f 177 if (opts->netns_id >= 0)
b4c2b959
KKD
178 put_net(net);
179 if (!hash)
180 return ERR_PTR(-ENOENT);
1963c740
LB
181
182 ct = nf_ct_tuplehash_to_ctrack(hash);
aed8ee7f 183 opts->dir = NF_CT_DIRECTION(hash);
1963c740
LB
184
185 return ct;
b4c2b959
KKD
186}
187
864b656f
DX
188BTF_ID_LIST(btf_nf_conn_ids)
189BTF_ID(struct, nf_conn)
190BTF_ID(struct, nf_conn___init)
191
192/* Check writes into `struct nf_conn` */
193static int _nf_conntrack_btf_struct_access(struct bpf_verifier_log *log,
6728aea7 194 const struct bpf_reg_state *reg,
b7e852a9 195 int off, int size)
864b656f 196{
6728aea7 197 const struct btf_type *ncit, *nct, *t;
864b656f
DX
198 size_t end;
199
6728aea7
KKD
200 ncit = btf_type_by_id(reg->btf, btf_nf_conn_ids[1]);
201 nct = btf_type_by_id(reg->btf, btf_nf_conn_ids[0]);
202 t = btf_type_by_id(reg->btf, reg->btf_id);
864b656f
DX
203 if (t != nct && t != ncit) {
204 bpf_log(log, "only read is supported\n");
205 return -EACCES;
206 }
207
208 /* `struct nf_conn` and `struct nf_conn___init` have the same layout
209 * so we are safe to simply merge offset checks here
210 */
211 switch (off) {
212#if defined(CONFIG_NF_CONNTRACK_MARK)
213 case offsetof(struct nf_conn, mark):
214 end = offsetofend(struct nf_conn, mark);
215 break;
216#endif
217 default:
218 bpf_log(log, "no write support to nf_conn at off %d\n", off);
219 return -EACCES;
220 }
221
222 if (off + size > end) {
223 bpf_log(log,
224 "write access at off %d with size %d beyond the member of nf_conn ended at %zu\n",
225 off, size, end);
226 return -EACCES;
227 }
228
229 return 0;
230}
231
b4c2b959 232__diag_push();
0b206c6d
KKD
233__diag_ignore_all("-Wmissing-prototypes",
234 "Global functions as their definitions will be in nf_conntrack BTF");
b4c2b959 235
d7e79c97
LB
236/* bpf_xdp_ct_alloc - Allocate a new CT entry
237 *
238 * Parameters:
239 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
240 * Cannot be NULL
241 * @bpf_tuple - Pointer to memory representing the tuple to look up
242 * Cannot be NULL
243 * @tuple__sz - Length of the tuple structure
244 * Must be one of sizeof(bpf_tuple->ipv4) or
245 * sizeof(bpf_tuple->ipv6)
246 * @opts - Additional options for allocation (documented above)
247 * Cannot be NULL
248 * @opts__sz - Length of the bpf_ct_opts structure
249 * Must be NF_BPF_CT_OPTS_SZ (12)
250 */
400031e0 251__bpf_kfunc struct nf_conn___init *
d7e79c97
LB
252bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
253 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
254{
255 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
256 struct nf_conn *nfct;
257
258 nfct = __bpf_nf_ct_alloc_entry(dev_net(ctx->rxq->dev), bpf_tuple, tuple__sz,
259 opts, opts__sz, 10);
260 if (IS_ERR(nfct)) {
261 if (opts)
262 opts->error = PTR_ERR(nfct);
263 return NULL;
264 }
265
266 return (struct nf_conn___init *)nfct;
267}
268
b4c2b959
KKD
269/* bpf_xdp_ct_lookup - Lookup CT entry for the given tuple, and acquire a
270 * reference to it
271 *
272 * Parameters:
273 * @xdp_ctx - Pointer to ctx (xdp_md) in XDP program
274 * Cannot be NULL
275 * @bpf_tuple - Pointer to memory representing the tuple to look up
276 * Cannot be NULL
277 * @tuple__sz - Length of the tuple structure
278 * Must be one of sizeof(bpf_tuple->ipv4) or
279 * sizeof(bpf_tuple->ipv6)
280 * @opts - Additional options for lookup (documented above)
281 * Cannot be NULL
282 * @opts__sz - Length of the bpf_ct_opts structure
283 * Must be NF_BPF_CT_OPTS_SZ (12)
284 */
400031e0 285__bpf_kfunc struct nf_conn *
b4c2b959
KKD
286bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
287 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
288{
289 struct xdp_buff *ctx = (struct xdp_buff *)xdp_ctx;
290 struct net *caller_net;
291 struct nf_conn *nfct;
292
b4c2b959 293 caller_net = dev_net(ctx->rxq->dev);
aed8ee7f 294 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
b4c2b959 295 if (IS_ERR(nfct)) {
aed8ee7f
KKD
296 if (opts)
297 opts->error = PTR_ERR(nfct);
b4c2b959
KKD
298 return NULL;
299 }
300 return nfct;
301}
302
d7e79c97
LB
303/* bpf_skb_ct_alloc - Allocate a new CT entry
304 *
305 * Parameters:
306 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
307 * Cannot be NULL
308 * @bpf_tuple - Pointer to memory representing the tuple to look up
309 * Cannot be NULL
310 * @tuple__sz - Length of the tuple structure
311 * Must be one of sizeof(bpf_tuple->ipv4) or
312 * sizeof(bpf_tuple->ipv6)
313 * @opts - Additional options for allocation (documented above)
314 * Cannot be NULL
315 * @opts__sz - Length of the bpf_ct_opts structure
316 * Must be NF_BPF_CT_OPTS_SZ (12)
317 */
400031e0 318__bpf_kfunc struct nf_conn___init *
d7e79c97
LB
319bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
320 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
321{
322 struct sk_buff *skb = (struct sk_buff *)skb_ctx;
323 struct nf_conn *nfct;
324 struct net *net;
325
326 net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
327 nfct = __bpf_nf_ct_alloc_entry(net, bpf_tuple, tuple__sz, opts, opts__sz, 10);
328 if (IS_ERR(nfct)) {
329 if (opts)
330 opts->error = PTR_ERR(nfct);
331 return NULL;
332 }
333
334 return (struct nf_conn___init *)nfct;
335}
336
b4c2b959
KKD
337/* bpf_skb_ct_lookup - Lookup CT entry for the given tuple, and acquire a
338 * reference to it
339 *
340 * Parameters:
341 * @skb_ctx - Pointer to ctx (__sk_buff) in TC program
342 * Cannot be NULL
343 * @bpf_tuple - Pointer to memory representing the tuple to look up
344 * Cannot be NULL
345 * @tuple__sz - Length of the tuple structure
346 * Must be one of sizeof(bpf_tuple->ipv4) or
347 * sizeof(bpf_tuple->ipv6)
348 * @opts - Additional options for lookup (documented above)
349 * Cannot be NULL
350 * @opts__sz - Length of the bpf_ct_opts structure
351 * Must be NF_BPF_CT_OPTS_SZ (12)
352 */
400031e0 353__bpf_kfunc struct nf_conn *
b4c2b959
KKD
354bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
355 u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
356{
357 struct sk_buff *skb = (struct sk_buff *)skb_ctx;
358 struct net *caller_net;
359 struct nf_conn *nfct;
360
b4c2b959 361 caller_net = skb->dev ? dev_net(skb->dev) : sock_net(skb->sk);
aed8ee7f 362 nfct = __bpf_nf_ct_lookup(caller_net, bpf_tuple, tuple__sz, opts, opts__sz);
b4c2b959 363 if (IS_ERR(nfct)) {
aed8ee7f
KKD
364 if (opts)
365 opts->error = PTR_ERR(nfct);
b4c2b959
KKD
366 return NULL;
367 }
368 return nfct;
369}
370
d7e79c97
LB
371/* bpf_ct_insert_entry - Add the provided entry into a CT map
372 *
373 * This must be invoked for referenced PTR_TO_BTF_ID.
374 *
0b389236 375 * @nfct - Pointer to referenced nf_conn___init object, obtained
d7e79c97
LB
376 * using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
377 */
400031e0 378__bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
d7e79c97 379{
0b389236 380 struct nf_conn *nfct = (struct nf_conn *)nfct_i;
d7e79c97
LB
381 int err;
382
2cdaa3ee 383 nfct->status |= IPS_CONFIRMED;
d7e79c97
LB
384 err = nf_conntrack_hash_check_insert(nfct);
385 if (err < 0) {
386 nf_conntrack_free(nfct);
387 return NULL;
388 }
389 return nfct;
390}
391
b4c2b959
KKD
392/* bpf_ct_release - Release acquired nf_conn object
393 *
394 * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
395 * the program if any references remain in the program in all of the explored
396 * states.
397 *
398 * Parameters:
399 * @nf_conn - Pointer to referenced nf_conn object, obtained using
400 * bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
401 */
400031e0 402__bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
b4c2b959 403{
b4c2b959
KKD
404 nf_ct_put(nfct);
405}
406
0b389236
KKD
407/* bpf_ct_set_timeout - Set timeout of allocated nf_conn
408 *
409 * Sets the default timeout of newly allocated nf_conn before insertion.
410 * This helper must be invoked for refcounted pointer to nf_conn___init.
411 *
412 * Parameters:
413 * @nfct - Pointer to referenced nf_conn object, obtained using
414 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
415 * @timeout - Timeout in msecs.
416 */
400031e0 417__bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
0b389236
KKD
418{
419 __nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
420}
421
422/* bpf_ct_change_timeout - Change timeout of inserted nf_conn
423 *
424 * Change timeout associated of the inserted or looked up nf_conn.
425 * This helper must be invoked for refcounted pointer to nf_conn.
426 *
427 * Parameters:
428 * @nfct - Pointer to referenced nf_conn object, obtained using
429 * bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
430 * @timeout - New timeout in msecs.
431 */
400031e0 432__bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
0b389236
KKD
433{
434 return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
435}
436
ef69aa3a
LB
437/* bpf_ct_set_status - Set status field of allocated nf_conn
438 *
439 * Set the status field of the newly allocated nf_conn before insertion.
440 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn___init.
441 *
442 * Parameters:
443 * @nfct - Pointer to referenced nf_conn object, obtained using
444 * bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
445 * @status - New status value.
446 */
400031e0 447__bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
ef69aa3a
LB
448{
449 return nf_ct_change_status_common((struct nf_conn *)nfct, status);
450}
451
452/* bpf_ct_change_status - Change status of inserted nf_conn
453 *
454 * Change the status field of the provided connection tracking entry.
455 * This must be invoked for referenced PTR_TO_BTF_ID to nf_conn.
456 *
457 * Parameters:
458 * @nfct - Pointer to referenced nf_conn object, obtained using
459 * bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
460 * @status - New status value.
461 */
400031e0 462__bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
ef69aa3a
LB
463{
464 return nf_ct_change_status_common(nfct, status);
465}
466
b4c2b959
KKD
467__diag_pop()
468
a4703e31 469BTF_SET8_START(nf_ct_kfunc_set)
d7e79c97 470BTF_ID_FLAGS(func, bpf_xdp_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
a4703e31 471BTF_ID_FLAGS(func, bpf_xdp_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
d7e79c97 472BTF_ID_FLAGS(func, bpf_skb_ct_alloc, KF_ACQUIRE | KF_RET_NULL)
a4703e31 473BTF_ID_FLAGS(func, bpf_skb_ct_lookup, KF_ACQUIRE | KF_RET_NULL)
d7e79c97 474BTF_ID_FLAGS(func, bpf_ct_insert_entry, KF_ACQUIRE | KF_RET_NULL | KF_RELEASE)
a4703e31 475BTF_ID_FLAGS(func, bpf_ct_release, KF_RELEASE)
0b389236
KKD
476BTF_ID_FLAGS(func, bpf_ct_set_timeout, KF_TRUSTED_ARGS)
477BTF_ID_FLAGS(func, bpf_ct_change_timeout, KF_TRUSTED_ARGS)
ef69aa3a
LB
478BTF_ID_FLAGS(func, bpf_ct_set_status, KF_TRUSTED_ARGS)
479BTF_ID_FLAGS(func, bpf_ct_change_status, KF_TRUSTED_ARGS)
a4703e31
KKD
480BTF_SET8_END(nf_ct_kfunc_set)
481
482static const struct btf_kfunc_id_set nf_conntrack_kfunc_set = {
483 .owner = THIS_MODULE,
484 .set = &nf_ct_kfunc_set,
b4c2b959
KKD
485};
486
487int register_nf_conntrack_bpf(void)
488{
489 int ret;
490
a4703e31 491 ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &nf_conntrack_kfunc_set);
864b656f
DX
492 ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &nf_conntrack_kfunc_set);
493 if (!ret) {
494 mutex_lock(&nf_conn_btf_access_lock);
5a090aa3 495 nfct_btf_struct_access = _nf_conntrack_btf_struct_access;
864b656f
DX
496 mutex_unlock(&nf_conn_btf_access_lock);
497 }
498
499 return ret;
500}
501
502void cleanup_nf_conntrack_bpf(void)
503{
504 mutex_lock(&nf_conn_btf_access_lock);
5a090aa3 505 nfct_btf_struct_access = NULL;
864b656f 506 mutex_unlock(&nf_conn_btf_access_lock);
b4c2b959 507}