1 // SPDX-License-Identifier: GPL-2.0-only
3 * Functions to manage eBPF programs attached to cgroups
5 * Copyright (c) 2016 Daniel Mack
8 #include <linux/kernel.h>
9 #include <linux/atomic.h>
10 #include <linux/cgroup.h>
11 #include <linux/filter.h>
12 #include <linux/slab.h>
13 #include <linux/sysctl.h>
14 #include <linux/string.h>
15 #include <linux/bpf.h>
16 #include <linux/bpf-cgroup.h>
18 #include <net/bpf_sk_storage.h>
20 #include "../cgroup/cgroup-internal.h"
22 DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key);
23 EXPORT_SYMBOL(cgroup_bpf_enabled_key);
25 void cgroup_bpf_offline(struct cgroup *cgrp)
28 percpu_ref_kill(&cgrp->bpf.refcnt);
31 static void bpf_cgroup_storages_free(struct bpf_cgroup_storage *storages[])
33 enum bpf_cgroup_storage_type stype;
35 for_each_cgroup_storage_type(stype)
36 bpf_cgroup_storage_free(storages[stype]);
39 static int bpf_cgroup_storages_alloc(struct bpf_cgroup_storage *storages[],
40 struct bpf_prog *prog)
42 enum bpf_cgroup_storage_type stype;
44 for_each_cgroup_storage_type(stype) {
45 storages[stype] = bpf_cgroup_storage_alloc(prog, stype);
46 if (IS_ERR(storages[stype])) {
47 storages[stype] = NULL;
48 bpf_cgroup_storages_free(storages);
56 static void bpf_cgroup_storages_assign(struct bpf_cgroup_storage *dst[],
57 struct bpf_cgroup_storage *src[])
59 enum bpf_cgroup_storage_type stype;
61 for_each_cgroup_storage_type(stype)
62 dst[stype] = src[stype];
65 static void bpf_cgroup_storages_link(struct bpf_cgroup_storage *storages[],
67 enum bpf_attach_type attach_type)
69 enum bpf_cgroup_storage_type stype;
71 for_each_cgroup_storage_type(stype)
72 bpf_cgroup_storage_link(storages[stype], cgrp, attach_type);
75 static void bpf_cgroup_storages_unlink(struct bpf_cgroup_storage *storages[])
77 enum bpf_cgroup_storage_type stype;
79 for_each_cgroup_storage_type(stype)
80 bpf_cgroup_storage_unlink(storages[stype]);
83 /* Called when bpf_cgroup_link is auto-detached from dying cgroup.
84 * It drops cgroup and bpf_prog refcounts, and marks bpf_link as defunct. It
85 * doesn't free link memory, which will eventually be done by bpf_link's
86 * release() callback, when its last FD is closed.
88 static void bpf_cgroup_link_auto_detach(struct bpf_cgroup_link *link)
90 cgroup_put(link->cgroup);
95 * cgroup_bpf_release() - put references of all bpf programs and
96 * release all cgroup bpf data
97 * @work: work structure embedded into the cgroup to modify
99 static void cgroup_bpf_release(struct work_struct *work)
101 struct cgroup *p, *cgrp = container_of(work, struct cgroup,
103 struct bpf_prog_array *old_array;
106 mutex_lock(&cgroup_mutex);
108 for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) {
109 struct list_head *progs = &cgrp->bpf.progs[type];
110 struct bpf_prog_list *pl, *tmp;
112 list_for_each_entry_safe(pl, tmp, progs, node) {
115 bpf_prog_put(pl->prog);
117 bpf_cgroup_link_auto_detach(pl->link);
118 bpf_cgroup_storages_unlink(pl->storage);
119 bpf_cgroup_storages_free(pl->storage);
121 static_branch_dec(&cgroup_bpf_enabled_key);
123 old_array = rcu_dereference_protected(
124 cgrp->bpf.effective[type],
125 lockdep_is_held(&cgroup_mutex));
126 bpf_prog_array_free(old_array);
129 mutex_unlock(&cgroup_mutex);
131 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
134 percpu_ref_exit(&cgrp->bpf.refcnt);
139 * cgroup_bpf_release_fn() - callback used to schedule releasing
141 * @ref: percpu ref counter structure
143 static void cgroup_bpf_release_fn(struct percpu_ref *ref)
145 struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
147 INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
148 queue_work(system_wq, &cgrp->bpf.release_work);
151 /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
152 * link or direct prog.
154 static struct bpf_prog *prog_list_prog(struct bpf_prog_list *pl)
159 return pl->link->link.prog;
163 /* count number of elements in the list.
164 * it's slow but the list cannot be long
166 static u32 prog_list_length(struct list_head *head)
168 struct bpf_prog_list *pl;
171 list_for_each_entry(pl, head, node) {
172 if (!prog_list_prog(pl))
179 /* if parent has non-overridable prog attached,
180 * disallow attaching new programs to the descendent cgroup.
181 * if parent has overridable or multi-prog, allow attaching
183 static bool hierarchy_allows_attach(struct cgroup *cgrp,
184 enum bpf_attach_type type)
188 p = cgroup_parent(cgrp);
192 u32 flags = p->bpf.flags[type];
195 if (flags & BPF_F_ALLOW_MULTI)
197 cnt = prog_list_length(&p->bpf.progs[type]);
198 WARN_ON_ONCE(cnt > 1);
200 return !!(flags & BPF_F_ALLOW_OVERRIDE);
201 p = cgroup_parent(p);
206 /* compute a chain of effective programs for a given cgroup:
207 * start from the list of programs in this cgroup and add
208 * all parent programs.
209 * Note that parent's F_ALLOW_OVERRIDE-type program is yielding
210 * to programs in this cgroup
212 static int compute_effective_progs(struct cgroup *cgrp,
213 enum bpf_attach_type type,
214 struct bpf_prog_array **array)
216 struct bpf_prog_array_item *item;
217 struct bpf_prog_array *progs;
218 struct bpf_prog_list *pl;
219 struct cgroup *p = cgrp;
222 /* count number of effective programs by walking parents */
224 if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
225 cnt += prog_list_length(&p->bpf.progs[type]);
226 p = cgroup_parent(p);
229 progs = bpf_prog_array_alloc(cnt, GFP_KERNEL);
233 /* populate the array with effective progs */
237 if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI))
240 list_for_each_entry(pl, &p->bpf.progs[type], node) {
241 if (!prog_list_prog(pl))
244 item = &progs->items[cnt];
245 item->prog = prog_list_prog(pl);
246 bpf_cgroup_storages_assign(item->cgroup_storage,
250 } while ((p = cgroup_parent(p)));
256 static void activate_effective_progs(struct cgroup *cgrp,
257 enum bpf_attach_type type,
258 struct bpf_prog_array *old_array)
260 old_array = rcu_replace_pointer(cgrp->bpf.effective[type], old_array,
261 lockdep_is_held(&cgroup_mutex));
262 /* free prog array after grace period, since __cgroup_bpf_run_*()
263 * might be still walking the array
265 bpf_prog_array_free(old_array);
269 * cgroup_bpf_inherit() - inherit effective programs from parent
270 * @cgrp: the cgroup to modify
272 int cgroup_bpf_inherit(struct cgroup *cgrp)
274 /* has to use marco instead of const int, since compiler thinks
275 * that array below is variable length
277 #define NR ARRAY_SIZE(cgrp->bpf.effective)
278 struct bpf_prog_array *arrays[NR] = {};
282 ret = percpu_ref_init(&cgrp->bpf.refcnt, cgroup_bpf_release_fn, 0,
287 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
290 for (i = 0; i < NR; i++)
291 INIT_LIST_HEAD(&cgrp->bpf.progs[i]);
293 for (i = 0; i < NR; i++)
294 if (compute_effective_progs(cgrp, i, &arrays[i]))
297 for (i = 0; i < NR; i++)
298 activate_effective_progs(cgrp, i, arrays[i]);
302 for (i = 0; i < NR; i++)
303 bpf_prog_array_free(arrays[i]);
305 for (p = cgroup_parent(cgrp); p; p = cgroup_parent(p))
308 percpu_ref_exit(&cgrp->bpf.refcnt);
313 static int update_effective_progs(struct cgroup *cgrp,
314 enum bpf_attach_type type)
316 struct cgroup_subsys_state *css;
319 /* allocate and recompute effective prog arrays */
320 css_for_each_descendant_pre(css, &cgrp->self) {
321 struct cgroup *desc = container_of(css, struct cgroup, self);
323 if (percpu_ref_is_zero(&desc->bpf.refcnt))
326 err = compute_effective_progs(desc, type, &desc->bpf.inactive);
331 /* all allocations were successful. Activate all prog arrays */
332 css_for_each_descendant_pre(css, &cgrp->self) {
333 struct cgroup *desc = container_of(css, struct cgroup, self);
335 if (percpu_ref_is_zero(&desc->bpf.refcnt)) {
336 if (unlikely(desc->bpf.inactive)) {
337 bpf_prog_array_free(desc->bpf.inactive);
338 desc->bpf.inactive = NULL;
343 activate_effective_progs(desc, type, desc->bpf.inactive);
344 desc->bpf.inactive = NULL;
350 /* oom while computing effective. Free all computed effective arrays
351 * since they were not activated
353 css_for_each_descendant_pre(css, &cgrp->self) {
354 struct cgroup *desc = container_of(css, struct cgroup, self);
356 bpf_prog_array_free(desc->bpf.inactive);
357 desc->bpf.inactive = NULL;
363 #define BPF_CGROUP_MAX_PROGS 64
365 static struct bpf_prog_list *find_attach_entry(struct list_head *progs,
366 struct bpf_prog *prog,
367 struct bpf_cgroup_link *link,
368 struct bpf_prog *replace_prog,
371 struct bpf_prog_list *pl;
373 /* single-attach case */
375 if (list_empty(progs))
377 return list_first_entry(progs, typeof(*pl), node);
380 list_for_each_entry(pl, progs, node) {
381 if (prog && pl->prog == prog)
382 /* disallow attaching the same prog twice */
383 return ERR_PTR(-EINVAL);
384 if (link && pl->link == link)
385 /* disallow attaching the same link twice */
386 return ERR_PTR(-EINVAL);
389 /* direct prog multi-attach w/ replacement case */
391 list_for_each_entry(pl, progs, node) {
392 if (pl->prog == replace_prog)
396 /* prog to replace not found for cgroup */
397 return ERR_PTR(-ENOENT);
404 * __cgroup_bpf_attach() - Attach the program or the link to a cgroup, and
405 * propagate the change to descendants
406 * @cgrp: The cgroup which descendants to traverse
407 * @prog: A program to attach
408 * @link: A link to attach
409 * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set
410 * @type: Type of attach operation
411 * @flags: Option flags
413 * Exactly one of @prog or @link can be non-null.
414 * Must be called with cgroup_mutex held.
416 int __cgroup_bpf_attach(struct cgroup *cgrp,
417 struct bpf_prog *prog, struct bpf_prog *replace_prog,
418 struct bpf_cgroup_link *link,
419 enum bpf_attach_type type, u32 flags)
421 u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI));
422 struct list_head *progs = &cgrp->bpf.progs[type];
423 struct bpf_prog *old_prog = NULL;
424 struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
425 struct bpf_cgroup_storage *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {};
426 struct bpf_prog_list *pl;
429 if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) ||
430 ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI)))
431 /* invalid combination */
433 if (link && (prog || replace_prog))
434 /* only either link or prog/replace_prog can be specified */
436 if (!!replace_prog != !!(flags & BPF_F_REPLACE))
437 /* replace_prog implies BPF_F_REPLACE, and vice versa */
440 if (!hierarchy_allows_attach(cgrp, type))
443 if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags)
444 /* Disallow attaching non-overridable on top
445 * of existing overridable in this cgroup.
446 * Disallow attaching multi-prog if overridable or none
450 if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS)
453 pl = find_attach_entry(progs, prog, link, replace_prog,
454 flags & BPF_F_ALLOW_MULTI);
458 if (bpf_cgroup_storages_alloc(storage, prog ? : link->link.prog))
463 bpf_cgroup_storages_unlink(pl->storage);
464 bpf_cgroup_storages_assign(old_storage, pl->storage);
466 pl = kmalloc(sizeof(*pl), GFP_KERNEL);
468 bpf_cgroup_storages_free(storage);
471 list_add_tail(&pl->node, progs);
476 bpf_cgroup_storages_assign(pl->storage, storage);
477 cgrp->bpf.flags[type] = saved_flags;
479 err = update_effective_progs(cgrp, type);
483 bpf_cgroup_storages_free(old_storage);
485 bpf_prog_put(old_prog);
487 static_branch_inc(&cgroup_bpf_enabled_key);
488 bpf_cgroup_storages_link(pl->storage, cgrp, type);
496 bpf_cgroup_storages_free(pl->storage);
497 bpf_cgroup_storages_assign(pl->storage, old_storage);
498 bpf_cgroup_storages_link(pl->storage, cgrp, type);
506 /* Swap updated BPF program for given link in effective program arrays across
507 * all descendant cgroups. This function is guaranteed to succeed.
509 static void replace_effective_prog(struct cgroup *cgrp,
510 enum bpf_attach_type type,
511 struct bpf_cgroup_link *link)
513 struct bpf_prog_array_item *item;
514 struct cgroup_subsys_state *css;
515 struct bpf_prog_array *progs;
516 struct bpf_prog_list *pl;
517 struct list_head *head;
521 css_for_each_descendant_pre(css, &cgrp->self) {
522 struct cgroup *desc = container_of(css, struct cgroup, self);
524 if (percpu_ref_is_zero(&desc->bpf.refcnt))
527 /* find position of link in effective progs array */
528 for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
529 if (pos && !(cg->bpf.flags[type] & BPF_F_ALLOW_MULTI))
532 head = &cg->bpf.progs[type];
533 list_for_each_entry(pl, head, node) {
534 if (!prog_list_prog(pl))
536 if (pl->link == link)
543 progs = rcu_dereference_protected(
544 desc->bpf.effective[type],
545 lockdep_is_held(&cgroup_mutex));
546 item = &progs->items[pos];
547 WRITE_ONCE(item->prog, link->link.prog);
552 * __cgroup_bpf_replace() - Replace link's program and propagate the change
554 * @cgrp: The cgroup which descendants to traverse
555 * @link: A link for which to replace BPF program
556 * @type: Type of attach operation
558 * Must be called with cgroup_mutex held.
560 int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
561 struct bpf_prog *new_prog)
563 struct list_head *progs = &cgrp->bpf.progs[link->type];
564 struct bpf_prog *old_prog;
565 struct bpf_prog_list *pl;
568 if (link->link.prog->type != new_prog->type)
571 list_for_each_entry(pl, progs, node) {
572 if (pl->link == link) {
580 old_prog = xchg(&link->link.prog, new_prog);
581 replace_effective_prog(cgrp, link->type, link);
582 bpf_prog_put(old_prog);
586 static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
587 struct bpf_prog *prog,
588 struct bpf_cgroup_link *link,
591 struct bpf_prog_list *pl;
594 if (list_empty(progs))
595 /* report error when trying to detach and nothing is attached */
596 return ERR_PTR(-ENOENT);
598 /* to maintain backward compatibility NONE and OVERRIDE cgroups
599 * allow detaching with invalid FD (prog==NULL) in legacy mode
601 return list_first_entry(progs, typeof(*pl), node);
605 /* to detach MULTI prog the user has to specify valid FD
606 * of the program or link to be detached
608 return ERR_PTR(-EINVAL);
610 /* find the prog or link and detach it */
611 list_for_each_entry(pl, progs, node) {
612 if (pl->prog == prog && pl->link == link)
615 return ERR_PTR(-ENOENT);
619 * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
620 * propagate the change to descendants
621 * @cgrp: The cgroup which descendants to traverse
622 * @prog: A program to detach or NULL
623 * @prog: A link to detach or NULL
624 * @type: Type of detach operation
626 * At most one of @prog or @link can be non-NULL.
627 * Must be called with cgroup_mutex held.
629 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
630 struct bpf_cgroup_link *link, enum bpf_attach_type type)
632 struct list_head *progs = &cgrp->bpf.progs[type];
633 u32 flags = cgrp->bpf.flags[type];
634 struct bpf_prog_list *pl;
635 struct bpf_prog *old_prog;
639 /* only one of prog or link can be specified */
642 pl = find_detach_entry(progs, prog, link, flags & BPF_F_ALLOW_MULTI);
646 /* mark it deleted, so it's ignored while recomputing effective */
651 err = update_effective_progs(cgrp, type);
655 /* now can actually delete it from this cgroup list */
657 bpf_cgroup_storages_unlink(pl->storage);
658 bpf_cgroup_storages_free(pl->storage);
660 if (list_empty(progs))
661 /* last program was detached, reset flags to zero */
662 cgrp->bpf.flags[type] = 0;
664 bpf_prog_put(old_prog);
665 static_branch_dec(&cgroup_bpf_enabled_key);
669 /* restore back prog or link */
675 /* Must be called with cgroup_mutex held to avoid races. */
676 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
677 union bpf_attr __user *uattr)
679 __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
680 enum bpf_attach_type type = attr->query.attach_type;
681 struct list_head *progs = &cgrp->bpf.progs[type];
682 u32 flags = cgrp->bpf.flags[type];
683 struct bpf_prog_array *effective;
684 struct bpf_prog *prog;
687 effective = rcu_dereference_protected(cgrp->bpf.effective[type],
688 lockdep_is_held(&cgroup_mutex));
690 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE)
691 cnt = bpf_prog_array_length(effective);
693 cnt = prog_list_length(progs);
695 if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
697 if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt)))
699 if (attr->query.prog_cnt == 0 || !prog_ids || !cnt)
700 /* return early if user requested only program count + flags */
702 if (attr->query.prog_cnt < cnt) {
703 cnt = attr->query.prog_cnt;
707 if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
708 return bpf_prog_array_copy_to_user(effective, prog_ids, cnt);
710 struct bpf_prog_list *pl;
714 list_for_each_entry(pl, progs, node) {
715 prog = prog_list_prog(pl);
717 if (copy_to_user(prog_ids + i, &id, sizeof(id)))
726 int cgroup_bpf_prog_attach(const union bpf_attr *attr,
727 enum bpf_prog_type ptype, struct bpf_prog *prog)
729 struct bpf_prog *replace_prog = NULL;
733 cgrp = cgroup_get_from_fd(attr->target_fd);
735 return PTR_ERR(cgrp);
737 if ((attr->attach_flags & BPF_F_ALLOW_MULTI) &&
738 (attr->attach_flags & BPF_F_REPLACE)) {
739 replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype);
740 if (IS_ERR(replace_prog)) {
742 return PTR_ERR(replace_prog);
746 ret = cgroup_bpf_attach(cgrp, prog, replace_prog, NULL,
747 attr->attach_type, attr->attach_flags);
750 bpf_prog_put(replace_prog);
755 int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype)
757 struct bpf_prog *prog;
761 cgrp = cgroup_get_from_fd(attr->target_fd);
763 return PTR_ERR(cgrp);
765 prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype);
769 ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type);
777 static void bpf_cgroup_link_release(struct bpf_link *link)
779 struct bpf_cgroup_link *cg_link =
780 container_of(link, struct bpf_cgroup_link, link);
782 /* link might have been auto-detached by dying cgroup already,
783 * in that case our work is done here
785 if (!cg_link->cgroup)
788 mutex_lock(&cgroup_mutex);
790 /* re-check cgroup under lock again */
791 if (!cg_link->cgroup) {
792 mutex_unlock(&cgroup_mutex);
796 WARN_ON(__cgroup_bpf_detach(cg_link->cgroup, NULL, cg_link,
799 mutex_unlock(&cgroup_mutex);
800 cgroup_put(cg_link->cgroup);
803 static void bpf_cgroup_link_dealloc(struct bpf_link *link)
805 struct bpf_cgroup_link *cg_link =
806 container_of(link, struct bpf_cgroup_link, link);
811 const struct bpf_link_ops bpf_cgroup_link_lops = {
812 .release = bpf_cgroup_link_release,
813 .dealloc = bpf_cgroup_link_dealloc,
816 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
818 struct bpf_cgroup_link *link;
819 struct file *link_file;
823 if (attr->link_create.flags)
826 cgrp = cgroup_get_from_fd(attr->link_create.target_fd);
828 return PTR_ERR(cgrp);
830 link = kzalloc(sizeof(*link), GFP_USER);
835 bpf_link_init(&link->link, &bpf_cgroup_link_lops, prog);
837 link->type = attr->link_create.attach_type;
839 link_file = bpf_link_new_file(&link->link, &link_fd);
840 if (IS_ERR(link_file)) {
842 err = PTR_ERR(link_file);
846 err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
849 bpf_link_cleanup(&link->link, link_file, link_fd);
853 fd_install(link_fd, link_file);
861 int cgroup_bpf_prog_query(const union bpf_attr *attr,
862 union bpf_attr __user *uattr)
867 cgrp = cgroup_get_from_fd(attr->query.target_fd);
869 return PTR_ERR(cgrp);
871 ret = cgroup_bpf_query(cgrp, attr, uattr);
878 * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering
879 * @sk: The socket sending or receiving traffic
880 * @skb: The skb that is being sent or received
881 * @type: The type of program to be exectuted
883 * If no socket is passed, or the socket is not of type INET or INET6,
884 * this function does nothing and returns 0.
886 * The program type passed in via @type must be suitable for network
887 * filtering. No further check is performed to assert that.
889 * For egress packets, this function can return:
890 * NET_XMIT_SUCCESS (0) - continue with packet output
891 * NET_XMIT_DROP (1) - drop packet and notify TCP to call cwr
892 * NET_XMIT_CN (2) - continue with packet output and notify TCP
894 * -EPERM - drop packet
896 * For ingress packets, this function will return -EPERM if any
897 * attached program was found and if it returned != 1 during execution.
898 * Otherwise 0 is returned.
900 int __cgroup_bpf_run_filter_skb(struct sock *sk,
902 enum bpf_attach_type type)
904 unsigned int offset = skb->data - skb_network_header(skb);
905 struct sock *save_sk;
906 void *saved_data_end;
910 if (!sk || !sk_fullsock(sk))
913 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
916 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
919 __skb_push(skb, offset);
921 /* compute pointers for the bpf prog */
922 bpf_compute_and_save_data_end(skb, &saved_data_end);
924 if (type == BPF_CGROUP_INET_EGRESS) {
925 ret = BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(
926 cgrp->bpf.effective[type], skb, __bpf_prog_run_save_cb);
928 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb,
929 __bpf_prog_run_save_cb);
930 ret = (ret == 1 ? 0 : -EPERM);
932 bpf_restore_data_end(skb, saved_data_end);
933 __skb_pull(skb, offset);
938 EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb);
941 * __cgroup_bpf_run_filter_sk() - Run a program on a sock
942 * @sk: sock structure to manipulate
943 * @type: The type of program to be exectuted
945 * socket is passed is expected to be of type INET or INET6.
947 * The program type passed in via @type must be suitable for sock
948 * filtering. No further check is performed to assert that.
950 * This function will return %-EPERM if any if an attached program was found
951 * and if it returned != 1 during execution. In all other cases, 0 is returned.
953 int __cgroup_bpf_run_filter_sk(struct sock *sk,
954 enum bpf_attach_type type)
956 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
959 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN);
960 return ret == 1 ? 0 : -EPERM;
962 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
965 * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and
966 * provided by user sockaddr
967 * @sk: sock struct that will use sockaddr
968 * @uaddr: sockaddr struct provided by user
969 * @type: The type of program to be exectuted
970 * @t_ctx: Pointer to attach type specific context
972 * socket is expected to be of type INET or INET6.
974 * This function will return %-EPERM if an attached program is found and
975 * returned value != 1 during execution. In all other cases, 0 is returned.
977 int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
978 struct sockaddr *uaddr,
979 enum bpf_attach_type type,
982 struct bpf_sock_addr_kern ctx = {
987 struct sockaddr_storage unspec;
991 /* Check socket family since not all sockets represent network
992 * endpoint (e.g. AF_UNIX).
994 if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6)
998 memset(&unspec, 0, sizeof(unspec));
999 ctx.uaddr = (struct sockaddr *)&unspec;
1002 cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1003 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
1005 return ret == 1 ? 0 : -EPERM;
1007 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
1010 * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock
1011 * @sk: socket to get cgroup from
1012 * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains
1013 * sk with connection information (IP addresses, etc.) May not contain
1014 * cgroup info if it is a req sock.
1015 * @type: The type of program to be exectuted
1017 * socket passed is expected to be of type INET or INET6.
1019 * The program type passed in via @type must be suitable for sock_ops
1020 * filtering. No further check is performed to assert that.
1022 * This function will return %-EPERM if any if an attached program was found
1023 * and if it returned != 1 during execution. In all other cases, 0 is returned.
1025 int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
1026 struct bpf_sock_ops_kern *sock_ops,
1027 enum bpf_attach_type type)
1029 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1032 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops,
1034 return ret == 1 ? 0 : -EPERM;
1036 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
1038 int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
1039 short access, enum bpf_attach_type type)
1041 struct cgroup *cgrp;
1042 struct bpf_cgroup_dev_ctx ctx = {
1043 .access_type = (access << 16) | dev_type,
1050 cgrp = task_dfl_cgroup(current);
1051 allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx,
1057 EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
1059 static const struct bpf_func_proto *
1060 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1063 case BPF_FUNC_map_lookup_elem:
1064 return &bpf_map_lookup_elem_proto;
1065 case BPF_FUNC_map_update_elem:
1066 return &bpf_map_update_elem_proto;
1067 case BPF_FUNC_map_delete_elem:
1068 return &bpf_map_delete_elem_proto;
1069 case BPF_FUNC_map_push_elem:
1070 return &bpf_map_push_elem_proto;
1071 case BPF_FUNC_map_pop_elem:
1072 return &bpf_map_pop_elem_proto;
1073 case BPF_FUNC_map_peek_elem:
1074 return &bpf_map_peek_elem_proto;
1075 case BPF_FUNC_get_current_uid_gid:
1076 return &bpf_get_current_uid_gid_proto;
1077 case BPF_FUNC_get_local_storage:
1078 return &bpf_get_local_storage_proto;
1079 case BPF_FUNC_get_current_cgroup_id:
1080 return &bpf_get_current_cgroup_id_proto;
1081 case BPF_FUNC_trace_printk:
1082 if (capable(CAP_SYS_ADMIN))
1083 return bpf_get_trace_printk_proto();
1090 static const struct bpf_func_proto *
1091 cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1093 return cgroup_base_func_proto(func_id, prog);
1096 static bool cgroup_dev_is_valid_access(int off, int size,
1097 enum bpf_access_type type,
1098 const struct bpf_prog *prog,
1099 struct bpf_insn_access_aux *info)
1101 const int size_default = sizeof(__u32);
1103 if (type == BPF_WRITE)
1106 if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx))
1108 /* The verifier guarantees that size > 0. */
1109 if (off % size != 0)
1113 case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type):
1114 bpf_ctx_record_field_size(info, size_default);
1115 if (!bpf_ctx_narrow_access_ok(off, size, size_default))
1119 if (size != size_default)
1126 const struct bpf_prog_ops cg_dev_prog_ops = {
1129 const struct bpf_verifier_ops cg_dev_verifier_ops = {
1130 .get_func_proto = cgroup_dev_func_proto,
1131 .is_valid_access = cgroup_dev_is_valid_access,
1135 * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl
1137 * @head: sysctl table header
1138 * @table: sysctl table
1139 * @write: sysctl is being read (= 0) or written (= 1)
1140 * @buf: pointer to buffer passed by user space
1141 * @pcount: value-result argument: value is size of buffer pointed to by @buf,
1142 * result is size of @new_buf if program set new value, initial value
1144 * @ppos: value-result argument: value is position at which read from or write
1145 * to sysctl is happening, result is new position if program overrode it,
1146 * initial value otherwise
1147 * @new_buf: pointer to pointer to new buffer that will be allocated if program
1148 * overrides new value provided by user space on sysctl write
1149 * NOTE: it's caller responsibility to free *new_buf if it was set
1150 * @type: type of program to be executed
1152 * Program is run when sysctl is being accessed, either read or written, and
1153 * can allow or deny such access.
1155 * This function will return %-EPERM if an attached program is found and
1156 * returned value != 1 during execution. In all other cases 0 is returned.
1158 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
1159 struct ctl_table *table, int write,
1160 void __user *buf, size_t *pcount,
1161 loff_t *ppos, void **new_buf,
1162 enum bpf_attach_type type)
1164 struct bpf_sysctl_kern ctx = {
1170 .cur_len = PAGE_SIZE,
1175 struct cgroup *cgrp;
1178 ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
1180 mm_segment_t old_fs;
1185 if (table->proc_handler(table, 0, (void __user *)ctx.cur_val,
1186 &ctx.cur_len, &pos)) {
1187 /* Let BPF program decide how to proceed. */
1192 /* Let BPF program decide how to proceed. */
1196 if (write && buf && *pcount) {
1197 /* BPF program should be able to override new value with a
1198 * buffer bigger than provided by user.
1200 ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
1201 ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
1203 copy_from_user(ctx.new_val, buf, ctx.new_len))
1204 /* Let BPF program decide how to proceed. */
1209 cgrp = task_dfl_cgroup(current);
1210 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN);
1215 if (ret == 1 && ctx.new_updated) {
1216 *new_buf = ctx.new_val;
1217 *pcount = ctx.new_len;
1222 return ret == 1 ? 0 : -EPERM;
1224 EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
1227 static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
1228 enum bpf_attach_type attach_type)
1230 struct bpf_prog_array *prog_array;
1234 prog_array = rcu_dereference(cgrp->bpf.effective[attach_type]);
1235 empty = bpf_prog_array_is_empty(prog_array);
1241 static int sockopt_alloc_buf(struct bpf_sockopt_kern *ctx, int max_optlen)
1243 if (unlikely(max_optlen > PAGE_SIZE) || max_optlen < 0)
1246 ctx->optval = kzalloc(max_optlen, GFP_USER);
1250 ctx->optval_end = ctx->optval + max_optlen;
1255 static void sockopt_free_buf(struct bpf_sockopt_kern *ctx)
1260 int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
1261 int *optname, char __user *optval,
1262 int *optlen, char **kernel_optval)
1264 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1265 struct bpf_sockopt_kern ctx = {
1268 .optname = *optname,
1270 int ret, max_optlen;
1272 /* Opportunistic check to see whether we have any BPF program
1273 * attached to the hook so we don't waste time allocating
1274 * memory and locking the socket.
1276 if (!cgroup_bpf_enabled ||
1277 __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_SETSOCKOPT))
1280 /* Allocate a bit more than the initial user buffer for
1281 * BPF program. The canonical use case is overriding
1282 * TCP_CONGESTION(nv) to TCP_CONGESTION(cubic).
1284 max_optlen = max_t(int, 16, *optlen);
1286 ret = sockopt_alloc_buf(&ctx, max_optlen);
1290 ctx.optlen = *optlen;
1292 if (copy_from_user(ctx.optval, optval, *optlen) != 0) {
1298 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_SETSOCKOPT],
1299 &ctx, BPF_PROG_RUN);
1307 if (ctx.optlen == -1) {
1308 /* optlen set to -1, bypass kernel */
1310 } else if (ctx.optlen > max_optlen || ctx.optlen < -1) {
1311 /* optlen is out of bounds */
1314 /* optlen within bounds, run kernel handler */
1317 /* export any potential modifications */
1319 *optname = ctx.optname;
1320 *optlen = ctx.optlen;
1321 *kernel_optval = ctx.optval;
1326 sockopt_free_buf(&ctx);
1329 EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt);
1331 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
1332 int optname, char __user *optval,
1333 int __user *optlen, int max_optlen,
1336 struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
1337 struct bpf_sockopt_kern ctx = {
1345 /* Opportunistic check to see whether we have any BPF program
1346 * attached to the hook so we don't waste time allocating
1347 * memory and locking the socket.
1349 if (!cgroup_bpf_enabled ||
1350 __cgroup_bpf_prog_array_is_empty(cgrp, BPF_CGROUP_GETSOCKOPT))
1353 ret = sockopt_alloc_buf(&ctx, max_optlen);
1357 ctx.optlen = max_optlen;
1360 /* If kernel getsockopt finished successfully,
1361 * copy whatever was returned to the user back
1362 * into our temporary buffer. Set optlen to the
1363 * one that kernel returned as well to let
1364 * BPF programs inspect the value.
1367 if (get_user(ctx.optlen, optlen)) {
1372 if (ctx.optlen > max_optlen)
1373 ctx.optlen = max_optlen;
1375 if (copy_from_user(ctx.optval, optval, ctx.optlen) != 0) {
1382 ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[BPF_CGROUP_GETSOCKOPT],
1383 &ctx, BPF_PROG_RUN);
1391 if (ctx.optlen > max_optlen) {
1396 /* BPF programs only allowed to set retval to 0, not some
1399 if (ctx.retval != 0 && ctx.retval != retval) {
1404 if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
1405 put_user(ctx.optlen, optlen)) {
1413 sockopt_free_buf(&ctx);
1416 EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt);
1419 static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
1422 ssize_t tmp_ret = 0, ret;
1424 if (dir->header.parent) {
1425 tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp);
1430 ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp);
1437 /* Avoid leading slash. */
1441 tmp_ret = strscpy(*bufp, "/", *lenp);
1447 return ret + tmp_ret;
1450 BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf,
1451 size_t, buf_len, u64, flags)
1453 ssize_t tmp_ret = 0, ret;
1458 if (!(flags & BPF_F_SYSCTL_BASE_NAME)) {
1461 tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len);
1466 ret = strscpy(buf, ctx->table->procname, buf_len);
1468 return ret < 0 ? ret : tmp_ret + ret;
1471 static const struct bpf_func_proto bpf_sysctl_get_name_proto = {
1472 .func = bpf_sysctl_get_name,
1474 .ret_type = RET_INTEGER,
1475 .arg1_type = ARG_PTR_TO_CTX,
1476 .arg2_type = ARG_PTR_TO_MEM,
1477 .arg3_type = ARG_CONST_SIZE,
1478 .arg4_type = ARG_ANYTHING,
1481 static int copy_sysctl_value(char *dst, size_t dst_len, char *src,
1490 if (!src || !src_len) {
1491 memset(dst, 0, dst_len);
1495 memcpy(dst, src, min(dst_len, src_len));
1497 if (dst_len > src_len) {
1498 memset(dst + src_len, '\0', dst_len - src_len);
1502 dst[dst_len - 1] = '\0';
1507 BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx,
1508 char *, buf, size_t, buf_len)
1510 return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len);
1513 static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = {
1514 .func = bpf_sysctl_get_current_value,
1516 .ret_type = RET_INTEGER,
1517 .arg1_type = ARG_PTR_TO_CTX,
1518 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
1519 .arg3_type = ARG_CONST_SIZE,
1522 BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf,
1527 memset(buf, '\0', buf_len);
1530 return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len);
1533 static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = {
1534 .func = bpf_sysctl_get_new_value,
1536 .ret_type = RET_INTEGER,
1537 .arg1_type = ARG_PTR_TO_CTX,
1538 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
1539 .arg3_type = ARG_CONST_SIZE,
1542 BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx,
1543 const char *, buf, size_t, buf_len)
1545 if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len)
1548 if (buf_len > PAGE_SIZE - 1)
1551 memcpy(ctx->new_val, buf, buf_len);
1552 ctx->new_len = buf_len;
1553 ctx->new_updated = 1;
1558 static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = {
1559 .func = bpf_sysctl_set_new_value,
1561 .ret_type = RET_INTEGER,
1562 .arg1_type = ARG_PTR_TO_CTX,
1563 .arg2_type = ARG_PTR_TO_MEM,
1564 .arg3_type = ARG_CONST_SIZE,
1567 static const struct bpf_func_proto *
1568 sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1571 case BPF_FUNC_strtol:
1572 return &bpf_strtol_proto;
1573 case BPF_FUNC_strtoul:
1574 return &bpf_strtoul_proto;
1575 case BPF_FUNC_sysctl_get_name:
1576 return &bpf_sysctl_get_name_proto;
1577 case BPF_FUNC_sysctl_get_current_value:
1578 return &bpf_sysctl_get_current_value_proto;
1579 case BPF_FUNC_sysctl_get_new_value:
1580 return &bpf_sysctl_get_new_value_proto;
1581 case BPF_FUNC_sysctl_set_new_value:
1582 return &bpf_sysctl_set_new_value_proto;
1584 return cgroup_base_func_proto(func_id, prog);
1588 static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type,
1589 const struct bpf_prog *prog,
1590 struct bpf_insn_access_aux *info)
1592 const int size_default = sizeof(__u32);
1594 if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size)
1598 case bpf_ctx_range(struct bpf_sysctl, write):
1599 if (type != BPF_READ)
1601 bpf_ctx_record_field_size(info, size_default);
1602 return bpf_ctx_narrow_access_ok(off, size, size_default);
1603 case bpf_ctx_range(struct bpf_sysctl, file_pos):
1604 if (type == BPF_READ) {
1605 bpf_ctx_record_field_size(info, size_default);
1606 return bpf_ctx_narrow_access_ok(off, size, size_default);
1608 return size == size_default;
1615 static u32 sysctl_convert_ctx_access(enum bpf_access_type type,
1616 const struct bpf_insn *si,
1617 struct bpf_insn *insn_buf,
1618 struct bpf_prog *prog, u32 *target_size)
1620 struct bpf_insn *insn = insn_buf;
1624 case offsetof(struct bpf_sysctl, write):
1625 *insn++ = BPF_LDX_MEM(
1626 BPF_SIZE(si->code), si->dst_reg, si->src_reg,
1627 bpf_target_off(struct bpf_sysctl_kern, write,
1628 sizeof_field(struct bpf_sysctl_kern,
1632 case offsetof(struct bpf_sysctl, file_pos):
1633 /* ppos is a pointer so it should be accessed via indirect
1634 * loads and stores. Also for stores additional temporary
1635 * register is used since neither src_reg nor dst_reg can be
1638 if (type == BPF_WRITE) {
1639 int treg = BPF_REG_9;
1641 if (si->src_reg == treg || si->dst_reg == treg)
1643 if (si->src_reg == treg || si->dst_reg == treg)
1645 *insn++ = BPF_STX_MEM(
1646 BPF_DW, si->dst_reg, treg,
1647 offsetof(struct bpf_sysctl_kern, tmp_reg));
1648 *insn++ = BPF_LDX_MEM(
1649 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
1651 offsetof(struct bpf_sysctl_kern, ppos));
1652 *insn++ = BPF_STX_MEM(
1653 BPF_SIZEOF(u32), treg, si->src_reg,
1654 bpf_ctx_narrow_access_offset(
1655 0, sizeof(u32), sizeof(loff_t)));
1656 *insn++ = BPF_LDX_MEM(
1657 BPF_DW, treg, si->dst_reg,
1658 offsetof(struct bpf_sysctl_kern, tmp_reg));
1660 *insn++ = BPF_LDX_MEM(
1661 BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos),
1662 si->dst_reg, si->src_reg,
1663 offsetof(struct bpf_sysctl_kern, ppos));
1664 read_size = bpf_size_to_bytes(BPF_SIZE(si->code));
1665 *insn++ = BPF_LDX_MEM(
1666 BPF_SIZE(si->code), si->dst_reg, si->dst_reg,
1667 bpf_ctx_narrow_access_offset(
1668 0, read_size, sizeof(loff_t)));
1670 *target_size = sizeof(u32);
1674 return insn - insn_buf;
1677 const struct bpf_verifier_ops cg_sysctl_verifier_ops = {
1678 .get_func_proto = sysctl_func_proto,
1679 .is_valid_access = sysctl_is_valid_access,
1680 .convert_ctx_access = sysctl_convert_ctx_access,
1683 const struct bpf_prog_ops cg_sysctl_prog_ops = {
1686 static const struct bpf_func_proto *
1687 cg_sockopt_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
1691 case BPF_FUNC_sk_storage_get:
1692 return &bpf_sk_storage_get_proto;
1693 case BPF_FUNC_sk_storage_delete:
1694 return &bpf_sk_storage_delete_proto;
1697 case BPF_FUNC_tcp_sock:
1698 return &bpf_tcp_sock_proto;
1701 return cgroup_base_func_proto(func_id, prog);
1705 static bool cg_sockopt_is_valid_access(int off, int size,
1706 enum bpf_access_type type,
1707 const struct bpf_prog *prog,
1708 struct bpf_insn_access_aux *info)
1710 const int size_default = sizeof(__u32);
1712 if (off < 0 || off >= sizeof(struct bpf_sockopt))
1715 if (off % size != 0)
1718 if (type == BPF_WRITE) {
1720 case offsetof(struct bpf_sockopt, retval):
1721 if (size != size_default)
1723 return prog->expected_attach_type ==
1724 BPF_CGROUP_GETSOCKOPT;
1725 case offsetof(struct bpf_sockopt, optname):
1727 case offsetof(struct bpf_sockopt, level):
1728 if (size != size_default)
1730 return prog->expected_attach_type ==
1731 BPF_CGROUP_SETSOCKOPT;
1732 case offsetof(struct bpf_sockopt, optlen):
1733 return size == size_default;
1740 case offsetof(struct bpf_sockopt, sk):
1741 if (size != sizeof(__u64))
1743 info->reg_type = PTR_TO_SOCKET;
1745 case offsetof(struct bpf_sockopt, optval):
1746 if (size != sizeof(__u64))
1748 info->reg_type = PTR_TO_PACKET;
1750 case offsetof(struct bpf_sockopt, optval_end):
1751 if (size != sizeof(__u64))
1753 info->reg_type = PTR_TO_PACKET_END;
1755 case offsetof(struct bpf_sockopt, retval):
1756 if (size != size_default)
1758 return prog->expected_attach_type == BPF_CGROUP_GETSOCKOPT;
1760 if (size != size_default)
1767 #define CG_SOCKOPT_ACCESS_FIELD(T, F) \
1768 T(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, F), \
1769 si->dst_reg, si->src_reg, \
1770 offsetof(struct bpf_sockopt_kern, F))
1772 static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
1773 const struct bpf_insn *si,
1774 struct bpf_insn *insn_buf,
1775 struct bpf_prog *prog,
1778 struct bpf_insn *insn = insn_buf;
1781 case offsetof(struct bpf_sockopt, sk):
1782 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, sk);
1784 case offsetof(struct bpf_sockopt, level):
1785 if (type == BPF_WRITE)
1786 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, level);
1788 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, level);
1790 case offsetof(struct bpf_sockopt, optname):
1791 if (type == BPF_WRITE)
1792 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optname);
1794 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optname);
1796 case offsetof(struct bpf_sockopt, optlen):
1797 if (type == BPF_WRITE)
1798 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, optlen);
1800 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
1802 case offsetof(struct bpf_sockopt, retval):
1803 if (type == BPF_WRITE)
1804 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
1806 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
1808 case offsetof(struct bpf_sockopt, optval):
1809 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
1811 case offsetof(struct bpf_sockopt, optval_end):
1812 *insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval_end);
1816 return insn - insn_buf;
1819 static int cg_sockopt_get_prologue(struct bpf_insn *insn_buf,
1821 const struct bpf_prog *prog)
1823 /* Nothing to do for sockopt argument. The data is kzalloc'ated.
1828 const struct bpf_verifier_ops cg_sockopt_verifier_ops = {
1829 .get_func_proto = cg_sockopt_func_proto,
1830 .is_valid_access = cg_sockopt_is_valid_access,
1831 .convert_ctx_access = cg_sockopt_convert_ctx_access,
1832 .gen_prologue = cg_sockopt_get_prologue,
1835 const struct bpf_prog_ops cg_sockopt_prog_ops = {