1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
4 * common eBPF ELF operations.
6 * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
7 * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
8 * Copyright (C) 2015 Huawei Inc.
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation;
13 * version 2.1 of the License (not later!)
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this program; if not, see <http://www.gnu.org/licenses>
28 #include <asm/unistd.h>
30 #include <linux/bpf.h>
31 #include <linux/filter.h>
32 #include <linux/kernel.h>
34 #include <sys/resource.h>
37 #include "libbpf_internal.h"
40 * When building perf, unistd.h is overridden. __NR_bpf is
41 * required to be defined explicitly.
44 # if defined(__i386__)
46 # elif defined(__x86_64__)
48 # elif defined(__aarch64__)
50 # elif defined(__sparc__)
52 # elif defined(__s390__)
54 # elif defined(__arc__)
56 # elif defined(__mips__) && defined(_ABIO32)
57 # define __NR_bpf 4355
58 # elif defined(__mips__) && defined(_ABIN32)
59 # define __NR_bpf 6319
60 # elif defined(__mips__) && defined(_ABI64)
61 # define __NR_bpf 5315
63 # error __NR_bpf not defined. libbpf does not support your arch.
67 static inline __u64 ptr_to_u64(const void *ptr)
69 return (__u64) (unsigned long) ptr;
72 static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
75 return syscall(__NR_bpf, cmd, attr, size);
78 static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr,
83 fd = sys_bpf(cmd, attr, size);
84 return ensure_good_fd(fd);
87 int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
92 fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size);
93 } while (fd < 0 && errno == EAGAIN && --attempts > 0);
98 /* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to
99 * memcg-based memory accounting for BPF maps and progs. This was done in [0].
100 * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in
101 * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF.
103 * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/
104 * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper")
106 int probe_memcg_account(int token_fd)
108 const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
109 struct bpf_insn insns[] = {
110 BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns),
113 size_t insn_cnt = ARRAY_SIZE(insns);
117 /* attempt loading freplace trying to use custom BTF */
118 memset(&attr, 0, attr_sz);
119 attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
120 attr.insns = ptr_to_u64(insns);
121 attr.insn_cnt = insn_cnt;
122 attr.license = ptr_to_u64("GPL");
123 attr.prog_token_fd = token_fd;
125 prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
133 static bool memlock_bumped;
134 static rlim_t memlock_rlim = RLIM_INFINITY;
136 int libbpf_set_memlock_rlim(size_t memlock_bytes)
139 return libbpf_err(-EBUSY);
141 memlock_rlim = memlock_bytes;
145 int bump_rlimit_memlock(void)
149 /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */
150 if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT))
153 memlock_bumped = true;
155 /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
156 if (memlock_rlim == 0)
159 rlim.rlim_cur = rlim.rlim_max = memlock_rlim;
160 if (setrlimit(RLIMIT_MEMLOCK, &rlim))
166 int bpf_map_create(enum bpf_map_type map_type,
167 const char *map_name,
171 const struct bpf_map_create_opts *opts)
173 const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
177 bump_rlimit_memlock();
179 memset(&attr, 0, attr_sz);
181 if (!OPTS_VALID(opts, bpf_map_create_opts))
182 return libbpf_err(-EINVAL);
184 attr.map_type = map_type;
185 if (map_name && feat_supported(NULL, FEAT_PROG_NAME))
186 libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
187 attr.key_size = key_size;
188 attr.value_size = value_size;
189 attr.max_entries = max_entries;
191 attr.btf_fd = OPTS_GET(opts, btf_fd, 0);
192 attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0);
193 attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0);
194 attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0);
196 attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0);
197 attr.map_flags = OPTS_GET(opts, map_flags, 0);
198 attr.map_extra = OPTS_GET(opts, map_extra, 0);
199 attr.numa_node = OPTS_GET(opts, numa_node, 0);
200 attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
202 attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
204 fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
205 return libbpf_err_errno(fd);
209 alloc_zero_tailing_info(const void *orecord, __u32 cnt,
210 __u32 actual_rec_size, __u32 expected_rec_size)
212 __u64 info_len = (__u64)actual_rec_size * cnt;
213 void *info, *nrecord;
216 info = malloc(info_len);
220 /* zero out bytes kernel does not understand */
222 for (i = 0; i < cnt; i++) {
223 memcpy(nrecord, orecord, expected_rec_size);
224 memset(nrecord + expected_rec_size, 0,
225 actual_rec_size - expected_rec_size);
226 orecord += actual_rec_size;
227 nrecord += actual_rec_size;
233 int bpf_prog_load(enum bpf_prog_type prog_type,
234 const char *prog_name, const char *license,
235 const struct bpf_insn *insns, size_t insn_cnt,
236 struct bpf_prog_load_opts *opts)
238 const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd);
239 void *finfo = NULL, *linfo = NULL;
240 const char *func_info, *line_info;
241 __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
242 __u32 func_info_rec_size, line_info_rec_size;
247 bump_rlimit_memlock();
249 if (!OPTS_VALID(opts, bpf_prog_load_opts))
250 return libbpf_err(-EINVAL);
252 attempts = OPTS_GET(opts, attempts, 0);
254 return libbpf_err(-EINVAL);
256 attempts = PROG_LOAD_ATTEMPTS;
258 memset(&attr, 0, attr_sz);
260 attr.prog_type = prog_type;
261 attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0);
263 attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0);
264 attr.prog_flags = OPTS_GET(opts, prog_flags, 0);
265 attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
266 attr.kern_version = OPTS_GET(opts, kern_version, 0);
267 attr.prog_token_fd = OPTS_GET(opts, token_fd, 0);
269 if (prog_name && feat_supported(NULL, FEAT_PROG_NAME))
270 libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
271 attr.license = ptr_to_u64(license);
273 if (insn_cnt > UINT_MAX)
274 return libbpf_err(-E2BIG);
276 attr.insns = ptr_to_u64(insns);
277 attr.insn_cnt = (__u32)insn_cnt;
279 attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
280 attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0);
282 if (attach_prog_fd && attach_btf_obj_fd)
283 return libbpf_err(-EINVAL);
285 attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0);
287 attr.attach_prog_fd = attach_prog_fd;
289 attr.attach_btf_obj_fd = attach_btf_obj_fd;
291 log_buf = OPTS_GET(opts, log_buf, NULL);
292 log_size = OPTS_GET(opts, log_size, 0);
293 log_level = OPTS_GET(opts, log_level, 0);
295 if (!!log_buf != !!log_size)
296 return libbpf_err(-EINVAL);
298 func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0);
299 func_info = OPTS_GET(opts, func_info, NULL);
300 attr.func_info_rec_size = func_info_rec_size;
301 attr.func_info = ptr_to_u64(func_info);
302 attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0);
304 line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0);
305 line_info = OPTS_GET(opts, line_info, NULL);
306 attr.line_info_rec_size = line_info_rec_size;
307 attr.line_info = ptr_to_u64(line_info);
308 attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0);
310 attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL));
313 attr.log_buf = ptr_to_u64(log_buf);
314 attr.log_size = log_size;
315 attr.log_level = log_level;
318 fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
319 OPTS_SET(opts, log_true_size, attr.log_true_size);
323 /* After bpf_prog_load, the kernel may modify certain attributes
324 * to give user space a hint how to deal with loading failure.
325 * Check to see whether we can make some changes and load again.
327 while (errno == E2BIG && (!finfo || !linfo)) {
328 if (!finfo && attr.func_info_cnt &&
329 attr.func_info_rec_size < func_info_rec_size) {
330 /* try with corrected func info records */
331 finfo = alloc_zero_tailing_info(func_info,
334 attr.func_info_rec_size);
340 attr.func_info = ptr_to_u64(finfo);
341 attr.func_info_rec_size = func_info_rec_size;
342 } else if (!linfo && attr.line_info_cnt &&
343 attr.line_info_rec_size < line_info_rec_size) {
344 linfo = alloc_zero_tailing_info(line_info,
347 attr.line_info_rec_size);
353 attr.line_info = ptr_to_u64(linfo);
354 attr.line_info_rec_size = line_info_rec_size;
359 fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
360 OPTS_SET(opts, log_true_size, attr.log_true_size);
365 if (log_level == 0 && log_buf) {
366 /* log_level == 0 with non-NULL log_buf requires retrying on error
367 * with log_level == 1 and log_buf/log_buf_size set, to get details of
370 attr.log_buf = ptr_to_u64(log_buf);
371 attr.log_size = log_size;
374 fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
375 OPTS_SET(opts, log_true_size, attr.log_true_size);
378 /* free() doesn't affect errno, so we don't need to restore it */
381 return libbpf_err_errno(fd);
384 int bpf_map_update_elem(int fd, const void *key, const void *value,
387 const size_t attr_sz = offsetofend(union bpf_attr, flags);
391 memset(&attr, 0, attr_sz);
393 attr.key = ptr_to_u64(key);
394 attr.value = ptr_to_u64(value);
397 ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
398 return libbpf_err_errno(ret);
401 int bpf_map_lookup_elem(int fd, const void *key, void *value)
403 const size_t attr_sz = offsetofend(union bpf_attr, flags);
407 memset(&attr, 0, attr_sz);
409 attr.key = ptr_to_u64(key);
410 attr.value = ptr_to_u64(value);
412 ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz);
413 return libbpf_err_errno(ret);
416 int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
418 const size_t attr_sz = offsetofend(union bpf_attr, flags);
422 memset(&attr, 0, attr_sz);
424 attr.key = ptr_to_u64(key);
425 attr.value = ptr_to_u64(value);
428 ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz);
429 return libbpf_err_errno(ret);
432 int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
434 const size_t attr_sz = offsetofend(union bpf_attr, flags);
438 memset(&attr, 0, attr_sz);
440 attr.key = ptr_to_u64(key);
441 attr.value = ptr_to_u64(value);
443 ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz);
444 return libbpf_err_errno(ret);
447 int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
449 const size_t attr_sz = offsetofend(union bpf_attr, flags);
453 memset(&attr, 0, attr_sz);
455 attr.key = ptr_to_u64(key);
456 attr.value = ptr_to_u64(value);
459 ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz);
460 return libbpf_err_errno(ret);
463 int bpf_map_delete_elem(int fd, const void *key)
465 const size_t attr_sz = offsetofend(union bpf_attr, flags);
469 memset(&attr, 0, attr_sz);
471 attr.key = ptr_to_u64(key);
473 ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
474 return libbpf_err_errno(ret);
477 int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags)
479 const size_t attr_sz = offsetofend(union bpf_attr, flags);
483 memset(&attr, 0, attr_sz);
485 attr.key = ptr_to_u64(key);
488 ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
489 return libbpf_err_errno(ret);
492 int bpf_map_get_next_key(int fd, const void *key, void *next_key)
494 const size_t attr_sz = offsetofend(union bpf_attr, next_key);
498 memset(&attr, 0, attr_sz);
500 attr.key = ptr_to_u64(key);
501 attr.next_key = ptr_to_u64(next_key);
503 ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, attr_sz);
504 return libbpf_err_errno(ret);
507 int bpf_map_freeze(int fd)
509 const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
513 memset(&attr, 0, attr_sz);
516 ret = sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
517 return libbpf_err_errno(ret);
520 static int bpf_map_batch_common(int cmd, int fd, void *in_batch,
521 void *out_batch, void *keys, void *values,
523 const struct bpf_map_batch_opts *opts)
525 const size_t attr_sz = offsetofend(union bpf_attr, batch);
529 if (!OPTS_VALID(opts, bpf_map_batch_opts))
530 return libbpf_err(-EINVAL);
532 memset(&attr, 0, attr_sz);
533 attr.batch.map_fd = fd;
534 attr.batch.in_batch = ptr_to_u64(in_batch);
535 attr.batch.out_batch = ptr_to_u64(out_batch);
536 attr.batch.keys = ptr_to_u64(keys);
537 attr.batch.values = ptr_to_u64(values);
538 attr.batch.count = *count;
539 attr.batch.elem_flags = OPTS_GET(opts, elem_flags, 0);
540 attr.batch.flags = OPTS_GET(opts, flags, 0);
542 ret = sys_bpf(cmd, &attr, attr_sz);
543 *count = attr.batch.count;
545 return libbpf_err_errno(ret);
548 int bpf_map_delete_batch(int fd, const void *keys, __u32 *count,
549 const struct bpf_map_batch_opts *opts)
551 return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL,
552 NULL, (void *)keys, NULL, count, opts);
555 int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys,
556 void *values, __u32 *count,
557 const struct bpf_map_batch_opts *opts)
559 return bpf_map_batch_common(BPF_MAP_LOOKUP_BATCH, fd, in_batch,
560 out_batch, keys, values, count, opts);
563 int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,
564 void *keys, void *values, __u32 *count,
565 const struct bpf_map_batch_opts *opts)
567 return bpf_map_batch_common(BPF_MAP_LOOKUP_AND_DELETE_BATCH,
568 fd, in_batch, out_batch, keys, values,
572 int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count,
573 const struct bpf_map_batch_opts *opts)
575 return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL,
576 (void *)keys, (void *)values, count, opts);
579 int bpf_obj_pin_opts(int fd, const char *pathname, const struct bpf_obj_pin_opts *opts)
581 const size_t attr_sz = offsetofend(union bpf_attr, path_fd);
585 if (!OPTS_VALID(opts, bpf_obj_pin_opts))
586 return libbpf_err(-EINVAL);
588 memset(&attr, 0, attr_sz);
589 attr.path_fd = OPTS_GET(opts, path_fd, 0);
590 attr.pathname = ptr_to_u64((void *)pathname);
591 attr.file_flags = OPTS_GET(opts, file_flags, 0);
594 ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz);
595 return libbpf_err_errno(ret);
598 int bpf_obj_pin(int fd, const char *pathname)
600 return bpf_obj_pin_opts(fd, pathname, NULL);
603 int bpf_obj_get(const char *pathname)
605 return bpf_obj_get_opts(pathname, NULL);
608 int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)
610 const size_t attr_sz = offsetofend(union bpf_attr, path_fd);
614 if (!OPTS_VALID(opts, bpf_obj_get_opts))
615 return libbpf_err(-EINVAL);
617 memset(&attr, 0, attr_sz);
618 attr.path_fd = OPTS_GET(opts, path_fd, 0);
619 attr.pathname = ptr_to_u64((void *)pathname);
620 attr.file_flags = OPTS_GET(opts, file_flags, 0);
622 fd = sys_bpf_fd(BPF_OBJ_GET, &attr, attr_sz);
623 return libbpf_err_errno(fd);
626 int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
629 DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts,
633 return bpf_prog_attach_opts(prog_fd, target_fd, type, &opts);
636 int bpf_prog_attach_opts(int prog_fd, int target, enum bpf_attach_type type,
637 const struct bpf_prog_attach_opts *opts)
639 const size_t attr_sz = offsetofend(union bpf_attr, expected_revision);
640 __u32 relative_id, flags;
641 int ret, relative_fd;
644 if (!OPTS_VALID(opts, bpf_prog_attach_opts))
645 return libbpf_err(-EINVAL);
647 relative_id = OPTS_GET(opts, relative_id, 0);
648 relative_fd = OPTS_GET(opts, relative_fd, 0);
649 flags = OPTS_GET(opts, flags, 0);
651 /* validate we don't have unexpected combinations of non-zero fields */
652 if (relative_fd && relative_id)
653 return libbpf_err(-EINVAL);
655 memset(&attr, 0, attr_sz);
656 attr.target_fd = target;
657 attr.attach_bpf_fd = prog_fd;
658 attr.attach_type = type;
659 attr.replace_bpf_fd = OPTS_GET(opts, replace_fd, 0);
660 attr.expected_revision = OPTS_GET(opts, expected_revision, 0);
663 attr.attach_flags = flags | BPF_F_ID;
664 attr.relative_id = relative_id;
666 attr.attach_flags = flags;
667 attr.relative_fd = relative_fd;
670 ret = sys_bpf(BPF_PROG_ATTACH, &attr, attr_sz);
671 return libbpf_err_errno(ret);
674 int bpf_prog_detach_opts(int prog_fd, int target, enum bpf_attach_type type,
675 const struct bpf_prog_detach_opts *opts)
677 const size_t attr_sz = offsetofend(union bpf_attr, expected_revision);
678 __u32 relative_id, flags;
679 int ret, relative_fd;
682 if (!OPTS_VALID(opts, bpf_prog_detach_opts))
683 return libbpf_err(-EINVAL);
685 relative_id = OPTS_GET(opts, relative_id, 0);
686 relative_fd = OPTS_GET(opts, relative_fd, 0);
687 flags = OPTS_GET(opts, flags, 0);
689 /* validate we don't have unexpected combinations of non-zero fields */
690 if (relative_fd && relative_id)
691 return libbpf_err(-EINVAL);
693 memset(&attr, 0, attr_sz);
694 attr.target_fd = target;
695 attr.attach_bpf_fd = prog_fd;
696 attr.attach_type = type;
697 attr.expected_revision = OPTS_GET(opts, expected_revision, 0);
700 attr.attach_flags = flags | BPF_F_ID;
701 attr.relative_id = relative_id;
703 attr.attach_flags = flags;
704 attr.relative_fd = relative_fd;
707 ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
708 return libbpf_err_errno(ret);
711 int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
713 return bpf_prog_detach_opts(0, target_fd, type, NULL);
716 int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
718 return bpf_prog_detach_opts(prog_fd, target_fd, type, NULL);
721 int bpf_link_create(int prog_fd, int target_fd,
722 enum bpf_attach_type attach_type,
723 const struct bpf_link_create_opts *opts)
725 const size_t attr_sz = offsetofend(union bpf_attr, link_create);
726 __u32 target_btf_id, iter_info_len, relative_id;
727 int fd, err, relative_fd;
730 if (!OPTS_VALID(opts, bpf_link_create_opts))
731 return libbpf_err(-EINVAL);
733 iter_info_len = OPTS_GET(opts, iter_info_len, 0);
734 target_btf_id = OPTS_GET(opts, target_btf_id, 0);
736 /* validate we don't have unexpected combinations of non-zero fields */
737 if (iter_info_len || target_btf_id) {
738 if (iter_info_len && target_btf_id)
739 return libbpf_err(-EINVAL);
740 if (!OPTS_ZEROED(opts, target_btf_id))
741 return libbpf_err(-EINVAL);
744 memset(&attr, 0, attr_sz);
745 attr.link_create.prog_fd = prog_fd;
746 attr.link_create.target_fd = target_fd;
747 attr.link_create.attach_type = attach_type;
748 attr.link_create.flags = OPTS_GET(opts, flags, 0);
751 attr.link_create.target_btf_id = target_btf_id;
755 switch (attach_type) {
757 attr.link_create.iter_info = ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
758 attr.link_create.iter_info_len = iter_info_len;
761 attr.link_create.perf_event.bpf_cookie = OPTS_GET(opts, perf_event.bpf_cookie, 0);
762 if (!OPTS_ZEROED(opts, perf_event))
763 return libbpf_err(-EINVAL);
765 case BPF_TRACE_KPROBE_MULTI:
766 attr.link_create.kprobe_multi.flags = OPTS_GET(opts, kprobe_multi.flags, 0);
767 attr.link_create.kprobe_multi.cnt = OPTS_GET(opts, kprobe_multi.cnt, 0);
768 attr.link_create.kprobe_multi.syms = ptr_to_u64(OPTS_GET(opts, kprobe_multi.syms, 0));
769 attr.link_create.kprobe_multi.addrs = ptr_to_u64(OPTS_GET(opts, kprobe_multi.addrs, 0));
770 attr.link_create.kprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, kprobe_multi.cookies, 0));
771 if (!OPTS_ZEROED(opts, kprobe_multi))
772 return libbpf_err(-EINVAL);
774 case BPF_TRACE_UPROBE_MULTI:
775 attr.link_create.uprobe_multi.flags = OPTS_GET(opts, uprobe_multi.flags, 0);
776 attr.link_create.uprobe_multi.cnt = OPTS_GET(opts, uprobe_multi.cnt, 0);
777 attr.link_create.uprobe_multi.path = ptr_to_u64(OPTS_GET(opts, uprobe_multi.path, 0));
778 attr.link_create.uprobe_multi.offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.offsets, 0));
779 attr.link_create.uprobe_multi.ref_ctr_offsets = ptr_to_u64(OPTS_GET(opts, uprobe_multi.ref_ctr_offsets, 0));
780 attr.link_create.uprobe_multi.cookies = ptr_to_u64(OPTS_GET(opts, uprobe_multi.cookies, 0));
781 attr.link_create.uprobe_multi.pid = OPTS_GET(opts, uprobe_multi.pid, 0);
782 if (!OPTS_ZEROED(opts, uprobe_multi))
783 return libbpf_err(-EINVAL);
785 case BPF_TRACE_FENTRY:
786 case BPF_TRACE_FEXIT:
787 case BPF_MODIFY_RETURN:
789 attr.link_create.tracing.cookie = OPTS_GET(opts, tracing.cookie, 0);
790 if (!OPTS_ZEROED(opts, tracing))
791 return libbpf_err(-EINVAL);
794 attr.link_create.netfilter.pf = OPTS_GET(opts, netfilter.pf, 0);
795 attr.link_create.netfilter.hooknum = OPTS_GET(opts, netfilter.hooknum, 0);
796 attr.link_create.netfilter.priority = OPTS_GET(opts, netfilter.priority, 0);
797 attr.link_create.netfilter.flags = OPTS_GET(opts, netfilter.flags, 0);
798 if (!OPTS_ZEROED(opts, netfilter))
799 return libbpf_err(-EINVAL);
801 case BPF_TCX_INGRESS:
803 relative_fd = OPTS_GET(opts, tcx.relative_fd, 0);
804 relative_id = OPTS_GET(opts, tcx.relative_id, 0);
805 if (relative_fd && relative_id)
806 return libbpf_err(-EINVAL);
808 attr.link_create.tcx.relative_id = relative_id;
809 attr.link_create.flags |= BPF_F_ID;
811 attr.link_create.tcx.relative_fd = relative_fd;
813 attr.link_create.tcx.expected_revision = OPTS_GET(opts, tcx.expected_revision, 0);
814 if (!OPTS_ZEROED(opts, tcx))
815 return libbpf_err(-EINVAL);
817 case BPF_NETKIT_PRIMARY:
818 case BPF_NETKIT_PEER:
819 relative_fd = OPTS_GET(opts, netkit.relative_fd, 0);
820 relative_id = OPTS_GET(opts, netkit.relative_id, 0);
821 if (relative_fd && relative_id)
822 return libbpf_err(-EINVAL);
824 attr.link_create.netkit.relative_id = relative_id;
825 attr.link_create.flags |= BPF_F_ID;
827 attr.link_create.netkit.relative_fd = relative_fd;
829 attr.link_create.netkit.expected_revision = OPTS_GET(opts, netkit.expected_revision, 0);
830 if (!OPTS_ZEROED(opts, netkit))
831 return libbpf_err(-EINVAL);
834 if (!OPTS_ZEROED(opts, flags))
835 return libbpf_err(-EINVAL);
839 fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, attr_sz);
842 /* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry
843 * and other similar programs
847 return libbpf_err(err);
849 /* if user used features not supported by
850 * BPF_RAW_TRACEPOINT_OPEN command, then just give up immediately
852 if (attr.link_create.target_fd || attr.link_create.target_btf_id)
853 return libbpf_err(err);
854 if (!OPTS_ZEROED(opts, sz))
855 return libbpf_err(err);
857 /* otherwise, for few select kinds of programs that can be
858 * attached using BPF_RAW_TRACEPOINT_OPEN command, try that as
859 * a fallback for older kernels
861 switch (attach_type) {
862 case BPF_TRACE_RAW_TP:
864 case BPF_TRACE_FENTRY:
865 case BPF_TRACE_FEXIT:
866 case BPF_MODIFY_RETURN:
867 return bpf_raw_tracepoint_open(NULL, prog_fd);
869 return libbpf_err(err);
873 int bpf_link_detach(int link_fd)
875 const size_t attr_sz = offsetofend(union bpf_attr, link_detach);
879 memset(&attr, 0, attr_sz);
880 attr.link_detach.link_fd = link_fd;
882 ret = sys_bpf(BPF_LINK_DETACH, &attr, attr_sz);
883 return libbpf_err_errno(ret);
886 int bpf_link_update(int link_fd, int new_prog_fd,
887 const struct bpf_link_update_opts *opts)
889 const size_t attr_sz = offsetofend(union bpf_attr, link_update);
893 if (!OPTS_VALID(opts, bpf_link_update_opts))
894 return libbpf_err(-EINVAL);
896 if (OPTS_GET(opts, old_prog_fd, 0) && OPTS_GET(opts, old_map_fd, 0))
897 return libbpf_err(-EINVAL);
899 memset(&attr, 0, attr_sz);
900 attr.link_update.link_fd = link_fd;
901 attr.link_update.new_prog_fd = new_prog_fd;
902 attr.link_update.flags = OPTS_GET(opts, flags, 0);
903 if (OPTS_GET(opts, old_prog_fd, 0))
904 attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
905 else if (OPTS_GET(opts, old_map_fd, 0))
906 attr.link_update.old_map_fd = OPTS_GET(opts, old_map_fd, 0);
908 ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz);
909 return libbpf_err_errno(ret);
912 int bpf_iter_create(int link_fd)
914 const size_t attr_sz = offsetofend(union bpf_attr, iter_create);
918 memset(&attr, 0, attr_sz);
919 attr.iter_create.link_fd = link_fd;
921 fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, attr_sz);
922 return libbpf_err_errno(fd);
925 int bpf_prog_query_opts(int target, enum bpf_attach_type type,
926 struct bpf_prog_query_opts *opts)
928 const size_t attr_sz = offsetofend(union bpf_attr, query);
932 if (!OPTS_VALID(opts, bpf_prog_query_opts))
933 return libbpf_err(-EINVAL);
935 memset(&attr, 0, attr_sz);
936 attr.query.target_fd = target;
937 attr.query.attach_type = type;
938 attr.query.query_flags = OPTS_GET(opts, query_flags, 0);
939 attr.query.count = OPTS_GET(opts, count, 0);
940 attr.query.prog_ids = ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
941 attr.query.link_ids = ptr_to_u64(OPTS_GET(opts, link_ids, NULL));
942 attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
943 attr.query.link_attach_flags = ptr_to_u64(OPTS_GET(opts, link_attach_flags, NULL));
945 ret = sys_bpf(BPF_PROG_QUERY, &attr, attr_sz);
947 OPTS_SET(opts, attach_flags, attr.query.attach_flags);
948 OPTS_SET(opts, revision, attr.query.revision);
949 OPTS_SET(opts, count, attr.query.count);
951 return libbpf_err_errno(ret);
954 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
955 __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
957 LIBBPF_OPTS(bpf_prog_query_opts, opts);
960 opts.query_flags = query_flags;
961 opts.prog_ids = prog_ids;
962 opts.prog_cnt = *prog_cnt;
964 ret = bpf_prog_query_opts(target_fd, type, &opts);
967 *attach_flags = opts.attach_flags;
968 *prog_cnt = opts.prog_cnt;
970 return libbpf_err_errno(ret);
973 int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
975 const size_t attr_sz = offsetofend(union bpf_attr, test);
979 if (!OPTS_VALID(opts, bpf_test_run_opts))
980 return libbpf_err(-EINVAL);
982 memset(&attr, 0, attr_sz);
983 attr.test.prog_fd = prog_fd;
984 attr.test.batch_size = OPTS_GET(opts, batch_size, 0);
985 attr.test.cpu = OPTS_GET(opts, cpu, 0);
986 attr.test.flags = OPTS_GET(opts, flags, 0);
987 attr.test.repeat = OPTS_GET(opts, repeat, 0);
988 attr.test.duration = OPTS_GET(opts, duration, 0);
989 attr.test.ctx_size_in = OPTS_GET(opts, ctx_size_in, 0);
990 attr.test.ctx_size_out = OPTS_GET(opts, ctx_size_out, 0);
991 attr.test.data_size_in = OPTS_GET(opts, data_size_in, 0);
992 attr.test.data_size_out = OPTS_GET(opts, data_size_out, 0);
993 attr.test.ctx_in = ptr_to_u64(OPTS_GET(opts, ctx_in, NULL));
994 attr.test.ctx_out = ptr_to_u64(OPTS_GET(opts, ctx_out, NULL));
995 attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL));
996 attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
998 ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, attr_sz);
1000 OPTS_SET(opts, data_size_out, attr.test.data_size_out);
1001 OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
1002 OPTS_SET(opts, duration, attr.test.duration);
1003 OPTS_SET(opts, retval, attr.test.retval);
1005 return libbpf_err_errno(ret);
1008 static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
1010 const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
1011 union bpf_attr attr;
1014 memset(&attr, 0, attr_sz);
1015 attr.start_id = start_id;
1017 err = sys_bpf(cmd, &attr, attr_sz);
1019 *next_id = attr.next_id;
1021 return libbpf_err_errno(err);
1024 int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
1026 return bpf_obj_get_next_id(start_id, next_id, BPF_PROG_GET_NEXT_ID);
1029 int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
1031 return bpf_obj_get_next_id(start_id, next_id, BPF_MAP_GET_NEXT_ID);
1034 int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
1036 return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
1039 int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
1041 return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
1044 int bpf_prog_get_fd_by_id_opts(__u32 id,
1045 const struct bpf_get_fd_by_id_opts *opts)
1047 const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
1048 union bpf_attr attr;
1051 if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
1052 return libbpf_err(-EINVAL);
1054 memset(&attr, 0, attr_sz);
1056 attr.open_flags = OPTS_GET(opts, open_flags, 0);
1058 fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz);
1059 return libbpf_err_errno(fd);
1062 int bpf_prog_get_fd_by_id(__u32 id)
1064 return bpf_prog_get_fd_by_id_opts(id, NULL);
1067 int bpf_map_get_fd_by_id_opts(__u32 id,
1068 const struct bpf_get_fd_by_id_opts *opts)
1070 const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
1071 union bpf_attr attr;
1074 if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
1075 return libbpf_err(-EINVAL);
1077 memset(&attr, 0, attr_sz);
1079 attr.open_flags = OPTS_GET(opts, open_flags, 0);
1081 fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
1082 return libbpf_err_errno(fd);
1085 int bpf_map_get_fd_by_id(__u32 id)
1087 return bpf_map_get_fd_by_id_opts(id, NULL);
1090 int bpf_btf_get_fd_by_id_opts(__u32 id,
1091 const struct bpf_get_fd_by_id_opts *opts)
1093 const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
1094 union bpf_attr attr;
1097 if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
1098 return libbpf_err(-EINVAL);
1100 memset(&attr, 0, attr_sz);
1102 attr.open_flags = OPTS_GET(opts, open_flags, 0);
1104 fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz);
1105 return libbpf_err_errno(fd);
1108 int bpf_btf_get_fd_by_id(__u32 id)
1110 return bpf_btf_get_fd_by_id_opts(id, NULL);
1113 int bpf_link_get_fd_by_id_opts(__u32 id,
1114 const struct bpf_get_fd_by_id_opts *opts)
1116 const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
1117 union bpf_attr attr;
1120 if (!OPTS_VALID(opts, bpf_get_fd_by_id_opts))
1121 return libbpf_err(-EINVAL);
1123 memset(&attr, 0, attr_sz);
1125 attr.open_flags = OPTS_GET(opts, open_flags, 0);
1127 fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz);
1128 return libbpf_err_errno(fd);
1131 int bpf_link_get_fd_by_id(__u32 id)
1133 return bpf_link_get_fd_by_id_opts(id, NULL);
1136 int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
1138 const size_t attr_sz = offsetofend(union bpf_attr, info);
1139 union bpf_attr attr;
1142 memset(&attr, 0, attr_sz);
1143 attr.info.bpf_fd = bpf_fd;
1144 attr.info.info_len = *info_len;
1145 attr.info.info = ptr_to_u64(info);
1147 err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
1149 *info_len = attr.info.info_len;
1150 return libbpf_err_errno(err);
1153 int bpf_prog_get_info_by_fd(int prog_fd, struct bpf_prog_info *info, __u32 *info_len)
1155 return bpf_obj_get_info_by_fd(prog_fd, info, info_len);
1158 int bpf_map_get_info_by_fd(int map_fd, struct bpf_map_info *info, __u32 *info_len)
1160 return bpf_obj_get_info_by_fd(map_fd, info, info_len);
1163 int bpf_btf_get_info_by_fd(int btf_fd, struct bpf_btf_info *info, __u32 *info_len)
1165 return bpf_obj_get_info_by_fd(btf_fd, info, info_len);
1168 int bpf_link_get_info_by_fd(int link_fd, struct bpf_link_info *info, __u32 *info_len)
1170 return bpf_obj_get_info_by_fd(link_fd, info, info_len);
1173 int bpf_raw_tracepoint_open(const char *name, int prog_fd)
1175 const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint);
1176 union bpf_attr attr;
1179 memset(&attr, 0, attr_sz);
1180 attr.raw_tracepoint.name = ptr_to_u64(name);
1181 attr.raw_tracepoint.prog_fd = prog_fd;
1183 fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz);
1184 return libbpf_err_errno(fd);
1187 int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts)
1189 const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd);
1190 union bpf_attr attr;
1196 bump_rlimit_memlock();
1198 memset(&attr, 0, attr_sz);
1200 if (!OPTS_VALID(opts, bpf_btf_load_opts))
1201 return libbpf_err(-EINVAL);
1203 log_buf = OPTS_GET(opts, log_buf, NULL);
1204 log_size = OPTS_GET(opts, log_size, 0);
1205 log_level = OPTS_GET(opts, log_level, 0);
1207 if (log_size > UINT_MAX)
1208 return libbpf_err(-EINVAL);
1209 if (log_size && !log_buf)
1210 return libbpf_err(-EINVAL);
1212 attr.btf = ptr_to_u64(btf_data);
1213 attr.btf_size = btf_size;
1214 attr.btf_token_fd = OPTS_GET(opts, token_fd, 0);
1216 /* log_level == 0 and log_buf != NULL means "try loading without
1217 * log_buf, but retry with log_buf and log_level=1 on error", which is
1218 * consistent across low-level and high-level BTF and program loading
1219 * APIs within libbpf and provides a sensible behavior in practice
1222 attr.btf_log_buf = ptr_to_u64(log_buf);
1223 attr.btf_log_size = (__u32)log_size;
1224 attr.btf_log_level = log_level;
1227 fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
1228 if (fd < 0 && log_buf && log_level == 0) {
1229 attr.btf_log_buf = ptr_to_u64(log_buf);
1230 attr.btf_log_size = (__u32)log_size;
1231 attr.btf_log_level = 1;
1232 fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);
1235 OPTS_SET(opts, log_true_size, attr.btf_log_true_size);
1236 return libbpf_err_errno(fd);
1239 int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
1240 __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
1243 const size_t attr_sz = offsetofend(union bpf_attr, task_fd_query);
1244 union bpf_attr attr;
1247 memset(&attr, 0, attr_sz);
1248 attr.task_fd_query.pid = pid;
1249 attr.task_fd_query.fd = fd;
1250 attr.task_fd_query.flags = flags;
1251 attr.task_fd_query.buf = ptr_to_u64(buf);
1252 attr.task_fd_query.buf_len = *buf_len;
1254 err = sys_bpf(BPF_TASK_FD_QUERY, &attr, attr_sz);
1256 *buf_len = attr.task_fd_query.buf_len;
1257 *prog_id = attr.task_fd_query.prog_id;
1258 *fd_type = attr.task_fd_query.fd_type;
1259 *probe_offset = attr.task_fd_query.probe_offset;
1260 *probe_addr = attr.task_fd_query.probe_addr;
1262 return libbpf_err_errno(err);
1265 int bpf_enable_stats(enum bpf_stats_type type)
1267 const size_t attr_sz = offsetofend(union bpf_attr, enable_stats);
1268 union bpf_attr attr;
1271 memset(&attr, 0, attr_sz);
1272 attr.enable_stats.type = type;
1274 fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, attr_sz);
1275 return libbpf_err_errno(fd);
1278 int bpf_prog_bind_map(int prog_fd, int map_fd,
1279 const struct bpf_prog_bind_opts *opts)
1281 const size_t attr_sz = offsetofend(union bpf_attr, prog_bind_map);
1282 union bpf_attr attr;
1285 if (!OPTS_VALID(opts, bpf_prog_bind_opts))
1286 return libbpf_err(-EINVAL);
1288 memset(&attr, 0, attr_sz);
1289 attr.prog_bind_map.prog_fd = prog_fd;
1290 attr.prog_bind_map.map_fd = map_fd;
1291 attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
1293 ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz);
1294 return libbpf_err_errno(ret);
1297 int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts)
1299 const size_t attr_sz = offsetofend(union bpf_attr, token_create);
1300 union bpf_attr attr;
1303 if (!OPTS_VALID(opts, bpf_token_create_opts))
1304 return libbpf_err(-EINVAL);
1306 memset(&attr, 0, attr_sz);
1307 attr.token_create.bpffs_fd = bpffs_fd;
1308 attr.token_create.flags = OPTS_GET(opts, flags, 0);
1310 fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz);
1311 return libbpf_err_errno(fd);