tools/lib/bpf/libbpf.c

   1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
   2
   3 /*
   4  * Common eBPF ELF object loading operations.
   5  *
   6  * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
   7  * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
   8  * Copyright (C) 2015 Huawei Inc.
   9  * Copyright (C) 2017 Nicira, Inc.
  10  * Copyright (C) 2019 Isovalent, Inc.
  11  */
  12
  13 #ifndef _GNU_SOURCE
  14 #define _GNU_SOURCE
  15 #endif
  16 #include <stdlib.h>
  17 #include <stdio.h>
  18 #include <stdarg.h>
  19 #include <libgen.h>
  20 #include <inttypes.h>
  21 #include <limits.h>
  22 #include <string.h>
  23 #include <unistd.h>
  24 #include <endian.h>
  25 #include <fcntl.h>
  26 #include <errno.h>
  27 #include <ctype.h>
  28 #include <asm/unistd.h>
  29 #include <linux/err.h>
  30 #include <linux/kernel.h>
  31 #include <linux/bpf.h>
  32 #include <linux/btf.h>
  33 #include <linux/filter.h>
  34 #include <linux/limits.h>
  35 #include <linux/perf_event.h>
  36 #include <linux/bpf_perf_event.h>
  37 #include <linux/ring_buffer.h>
  38 #include <sys/epoll.h>
  39 #include <sys/ioctl.h>
  40 #include <sys/mman.h>
  41 #include <sys/stat.h>
  42 #include <sys/types.h>
  43 #include <sys/vfs.h>
  44 #include <sys/utsname.h>
  45 #include <sys/resource.h>
  46 #include <libelf.h>
  47 #include <gelf.h>
  48 #include <zlib.h>
  49
  50 #include "libbpf.h"
  51 #include "bpf.h"
  52 #include "btf.h"
  53 #include "str_error.h"
  54 #include "libbpf_internal.h"
  55 #include "hashmap.h"
  56 #include "bpf_gen_internal.h"
  57 #include "zip.h"
  58
  59 #ifndef BPF_FS_MAGIC
  60 #define BPF_FS_MAGIC            0xcafe4a11
  61 #endif
  62
  63 #define BPF_FS_DEFAULT_PATH "/sys/fs/bpf"
  64
  65 #define BPF_INSN_SZ (sizeof(struct bpf_insn))
  66
  67 /* vsprintf() in __base_pr() uses nonliteral format string. It may break
  68  * compilation if user enables corresponding warning. Disable it explicitly.
  69  */
  70 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
  71
  72 #define __printf(a, b)  __attribute__((format(printf, a, b)))
  73
  74 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
  75 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
  76 static int map_set_def_max_entries(struct bpf_map *map);
  77
  78 static const char * const attach_type_name[] = {
  79         [BPF_CGROUP_INET_INGRESS]       = "cgroup_inet_ingress",
  80         [BPF_CGROUP_INET_EGRESS]        = "cgroup_inet_egress",
  81         [BPF_CGROUP_INET_SOCK_CREATE]   = "cgroup_inet_sock_create",
  82         [BPF_CGROUP_INET_SOCK_RELEASE]  = "cgroup_inet_sock_release",
  83         [BPF_CGROUP_SOCK_OPS]           = "cgroup_sock_ops",
  84         [BPF_CGROUP_DEVICE]             = "cgroup_device",
  85         [BPF_CGROUP_INET4_BIND]         = "cgroup_inet4_bind",
  86         [BPF_CGROUP_INET6_BIND]         = "cgroup_inet6_bind",
  87         [BPF_CGROUP_INET4_CONNECT]      = "cgroup_inet4_connect",
  88         [BPF_CGROUP_INET6_CONNECT]      = "cgroup_inet6_connect",
  89         [BPF_CGROUP_UNIX_CONNECT]       = "cgroup_unix_connect",
  90         [BPF_CGROUP_INET4_POST_BIND]    = "cgroup_inet4_post_bind",
  91         [BPF_CGROUP_INET6_POST_BIND]    = "cgroup_inet6_post_bind",
  92         [BPF_CGROUP_INET4_GETPEERNAME]  = "cgroup_inet4_getpeername",
  93         [BPF_CGROUP_INET6_GETPEERNAME]  = "cgroup_inet6_getpeername",
  94         [BPF_CGROUP_UNIX_GETPEERNAME]   = "cgroup_unix_getpeername",
  95         [BPF_CGROUP_INET4_GETSOCKNAME]  = "cgroup_inet4_getsockname",
  96         [BPF_CGROUP_INET6_GETSOCKNAME]  = "cgroup_inet6_getsockname",
  97         [BPF_CGROUP_UNIX_GETSOCKNAME]   = "cgroup_unix_getsockname",
  98         [BPF_CGROUP_UDP4_SENDMSG]       = "cgroup_udp4_sendmsg",
  99         [BPF_CGROUP_UDP6_SENDMSG]       = "cgroup_udp6_sendmsg",
 100         [BPF_CGROUP_UNIX_SENDMSG]       = "cgroup_unix_sendmsg",
 101         [BPF_CGROUP_SYSCTL]             = "cgroup_sysctl",
 102         [BPF_CGROUP_UDP4_RECVMSG]       = "cgroup_udp4_recvmsg",
 103         [BPF_CGROUP_UDP6_RECVMSG]       = "cgroup_udp6_recvmsg",
 104         [BPF_CGROUP_UNIX_RECVMSG]       = "cgroup_unix_recvmsg",
 105         [BPF_CGROUP_GETSOCKOPT]         = "cgroup_getsockopt",
 106         [BPF_CGROUP_SETSOCKOPT]         = "cgroup_setsockopt",
 107         [BPF_SK_SKB_STREAM_PARSER]      = "sk_skb_stream_parser",
 108         [BPF_SK_SKB_STREAM_VERDICT]     = "sk_skb_stream_verdict",
 109         [BPF_SK_SKB_VERDICT]            = "sk_skb_verdict",
 110         [BPF_SK_MSG_VERDICT]            = "sk_msg_verdict",
 111         [BPF_LIRC_MODE2]                = "lirc_mode2",
 112         [BPF_FLOW_DISSECTOR]            = "flow_dissector",
 113         [BPF_TRACE_RAW_TP]              = "trace_raw_tp",
 114         [BPF_TRACE_FENTRY]              = "trace_fentry",
 115         [BPF_TRACE_FEXIT]               = "trace_fexit",
 116         [BPF_MODIFY_RETURN]             = "modify_return",
 117         [BPF_LSM_MAC]                   = "lsm_mac",
 118         [BPF_LSM_CGROUP]                = "lsm_cgroup",
 119         [BPF_SK_LOOKUP]                 = "sk_lookup",
 120         [BPF_TRACE_ITER]                = "trace_iter",
 121         [BPF_XDP_DEVMAP]                = "xdp_devmap",
 122         [BPF_XDP_CPUMAP]                = "xdp_cpumap",
 123         [BPF_XDP]                       = "xdp",
 124         [BPF_SK_REUSEPORT_SELECT]       = "sk_reuseport_select",
 125         [BPF_SK_REUSEPORT_SELECT_OR_MIGRATE]    = "sk_reuseport_select_or_migrate",
 126         [BPF_PERF_EVENT]                = "perf_event",
 127         [BPF_TRACE_KPROBE_MULTI]        = "trace_kprobe_multi",
 128         [BPF_STRUCT_OPS]                = "struct_ops",
 129         [BPF_NETFILTER]                 = "netfilter",
 130         [BPF_TCX_INGRESS]               = "tcx_ingress",
 131         [BPF_TCX_EGRESS]                = "tcx_egress",
 132         [BPF_TRACE_UPROBE_MULTI]        = "trace_uprobe_multi",
 133         [BPF_NETKIT_PRIMARY]            = "netkit_primary",
 134         [BPF_NETKIT_PEER]               = "netkit_peer",
 135 };
 136
 137 static const char * const link_type_name[] = {
 138         [BPF_LINK_TYPE_UNSPEC]                  = "unspec",
 139         [BPF_LINK_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 140         [BPF_LINK_TYPE_TRACING]                 = "tracing",
 141         [BPF_LINK_TYPE_CGROUP]                  = "cgroup",
 142         [BPF_LINK_TYPE_ITER]                    = "iter",
 143         [BPF_LINK_TYPE_NETNS]                   = "netns",
 144         [BPF_LINK_TYPE_XDP]                     = "xdp",
 145         [BPF_LINK_TYPE_PERF_EVENT]              = "perf_event",
 146         [BPF_LINK_TYPE_KPROBE_MULTI]            = "kprobe_multi",
 147         [BPF_LINK_TYPE_STRUCT_OPS]              = "struct_ops",
 148         [BPF_LINK_TYPE_NETFILTER]               = "netfilter",
 149         [BPF_LINK_TYPE_TCX]                     = "tcx",
 150         [BPF_LINK_TYPE_UPROBE_MULTI]            = "uprobe_multi",
 151         [BPF_LINK_TYPE_NETKIT]                  = "netkit",
 152 };
 153
 154 static const char * const map_type_name[] = {
 155         [BPF_MAP_TYPE_UNSPEC]                   = "unspec",
 156         [BPF_MAP_TYPE_HASH]                     = "hash",
 157         [BPF_MAP_TYPE_ARRAY]                    = "array",
 158         [BPF_MAP_TYPE_PROG_ARRAY]               = "prog_array",
 159         [BPF_MAP_TYPE_PERF_EVENT_ARRAY]         = "perf_event_array",
 160         [BPF_MAP_TYPE_PERCPU_HASH]              = "percpu_hash",
 161         [BPF_MAP_TYPE_PERCPU_ARRAY]             = "percpu_array",
 162         [BPF_MAP_TYPE_STACK_TRACE]              = "stack_trace",
 163         [BPF_MAP_TYPE_CGROUP_ARRAY]             = "cgroup_array",
 164         [BPF_MAP_TYPE_LRU_HASH]                 = "lru_hash",
 165         [BPF_MAP_TYPE_LRU_PERCPU_HASH]          = "lru_percpu_hash",
 166         [BPF_MAP_TYPE_LPM_TRIE]                 = "lpm_trie",
 167         [BPF_MAP_TYPE_ARRAY_OF_MAPS]            = "array_of_maps",
 168         [BPF_MAP_TYPE_HASH_OF_MAPS]             = "hash_of_maps",
 169         [BPF_MAP_TYPE_DEVMAP]                   = "devmap",
 170         [BPF_MAP_TYPE_DEVMAP_HASH]              = "devmap_hash",
 171         [BPF_MAP_TYPE_SOCKMAP]                  = "sockmap",
 172         [BPF_MAP_TYPE_CPUMAP]                   = "cpumap",
 173         [BPF_MAP_TYPE_XSKMAP]                   = "xskmap",
 174         [BPF_MAP_TYPE_SOCKHASH]                 = "sockhash",
 175         [BPF_MAP_TYPE_CGROUP_STORAGE]           = "cgroup_storage",
 176         [BPF_MAP_TYPE_REUSEPORT_SOCKARRAY]      = "reuseport_sockarray",
 177         [BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE]    = "percpu_cgroup_storage",
 178         [BPF_MAP_TYPE_QUEUE]                    = "queue",
 179         [BPF_MAP_TYPE_STACK]                    = "stack",
 180         [BPF_MAP_TYPE_SK_STORAGE]               = "sk_storage",
 181         [BPF_MAP_TYPE_STRUCT_OPS]               = "struct_ops",
 182         [BPF_MAP_TYPE_RINGBUF]                  = "ringbuf",
 183         [BPF_MAP_TYPE_INODE_STORAGE]            = "inode_storage",
 184         [BPF_MAP_TYPE_TASK_STORAGE]             = "task_storage",
 185         [BPF_MAP_TYPE_BLOOM_FILTER]             = "bloom_filter",
 186         [BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
 187         [BPF_MAP_TYPE_CGRP_STORAGE]             = "cgrp_storage",
 188 };
 189
 190 static const char * const prog_type_name[] = {
 191         [BPF_PROG_TYPE_UNSPEC]                  = "unspec",
 192         [BPF_PROG_TYPE_SOCKET_FILTER]           = "socket_filter",
 193         [BPF_PROG_TYPE_KPROBE]                  = "kprobe",
 194         [BPF_PROG_TYPE_SCHED_CLS]               = "sched_cls",
 195         [BPF_PROG_TYPE_SCHED_ACT]               = "sched_act",
 196         [BPF_PROG_TYPE_TRACEPOINT]              = "tracepoint",
 197         [BPF_PROG_TYPE_XDP]                     = "xdp",
 198         [BPF_PROG_TYPE_PERF_EVENT]              = "perf_event",
 199         [BPF_PROG_TYPE_CGROUP_SKB]              = "cgroup_skb",
 200         [BPF_PROG_TYPE_CGROUP_SOCK]             = "cgroup_sock",
 201         [BPF_PROG_TYPE_LWT_IN]                  = "lwt_in",
 202         [BPF_PROG_TYPE_LWT_OUT]                 = "lwt_out",
 203         [BPF_PROG_TYPE_LWT_XMIT]                = "lwt_xmit",
 204         [BPF_PROG_TYPE_SOCK_OPS]                = "sock_ops",
 205         [BPF_PROG_TYPE_SK_SKB]                  = "sk_skb",
 206         [BPF_PROG_TYPE_CGROUP_DEVICE]           = "cgroup_device",
 207         [BPF_PROG_TYPE_SK_MSG]                  = "sk_msg",
 208         [BPF_PROG_TYPE_RAW_TRACEPOINT]          = "raw_tracepoint",
 209         [BPF_PROG_TYPE_CGROUP_SOCK_ADDR]        = "cgroup_sock_addr",
 210         [BPF_PROG_TYPE_LWT_SEG6LOCAL]           = "lwt_seg6local",
 211         [BPF_PROG_TYPE_LIRC_MODE2]              = "lirc_mode2",
 212         [BPF_PROG_TYPE_SK_REUSEPORT]            = "sk_reuseport",
 213         [BPF_PROG_TYPE_FLOW_DISSECTOR]          = "flow_dissector",
 214         [BPF_PROG_TYPE_CGROUP_SYSCTL]           = "cgroup_sysctl",
 215         [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
 216         [BPF_PROG_TYPE_CGROUP_SOCKOPT]          = "cgroup_sockopt",
 217         [BPF_PROG_TYPE_TRACING]                 = "tracing",
 218         [BPF_PROG_TYPE_STRUCT_OPS]              = "struct_ops",
 219         [BPF_PROG_TYPE_EXT]                     = "ext",
 220         [BPF_PROG_TYPE_LSM]                     = "lsm",
 221         [BPF_PROG_TYPE_SK_LOOKUP]               = "sk_lookup",
 222         [BPF_PROG_TYPE_SYSCALL]                 = "syscall",
 223         [BPF_PROG_TYPE_NETFILTER]               = "netfilter",
 224 };
 225
 226 static int __base_pr(enum libbpf_print_level level, const char *format,
 227                      va_list args)
 228 {
 229         if (level == LIBBPF_DEBUG)
 230                 return 0;
 231
 232         return vfprintf(stderr, format, args);
 233 }
 234
 235 static libbpf_print_fn_t __libbpf_pr = __base_pr;
 236
 237 libbpf_print_fn_t libbpf_set_print(libbpf_print_fn_t fn)
 238 {
 239         libbpf_print_fn_t old_print_fn;
 240
 241         old_print_fn = __atomic_exchange_n(&__libbpf_pr, fn, __ATOMIC_RELAXED);
 242
 243         return old_print_fn;
 244 }
 245
 246 __printf(2, 3)
 247 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
 248 {
 249         va_list args;
 250         int old_errno;
 251         libbpf_print_fn_t print_fn;
 252
 253         print_fn = __atomic_load_n(&__libbpf_pr, __ATOMIC_RELAXED);
 254         if (!print_fn)
 255                 return;
 256
 257         old_errno = errno;
 258
 259         va_start(args, format);
 260         __libbpf_pr(level, format, args);
 261         va_end(args);
 262
 263         errno = old_errno;
 264 }
 265
 266 static void pr_perm_msg(int err)
 267 {
 268         struct rlimit limit;
 269         char buf[100];
 270
 271         if (err != -EPERM || geteuid() != 0)
 272                 return;
 273
 274         err = getrlimit(RLIMIT_MEMLOCK, &limit);
 275         if (err)
 276                 return;
 277
 278         if (limit.rlim_cur == RLIM_INFINITY)
 279                 return;
 280
 281         if (limit.rlim_cur < 1024)
 282                 snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur);
 283         else if (limit.rlim_cur < 1024*1024)
 284                 snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024);
 285         else
 286                 snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024));
 287
 288         pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n",
 289                 buf);
 290 }
 291
 292 #define STRERR_BUFSIZE  128
 293
 294 /* Copied from tools/perf/util/util.h */
 295 #ifndef zfree
 296 # define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
 297 #endif
 298
 299 #ifndef zclose
 300 # define zclose(fd) ({                  \
 301         int ___err = 0;                 \
 302         if ((fd) >= 0)                  \
 303                 ___err = close((fd));   \
 304         fd = -1;                        \
 305         ___err; })
 306 #endif
 307
 308 static inline __u64 ptr_to_u64(const void *ptr)
 309 {
 310         return (__u64) (unsigned long) ptr;
 311 }
 312
 313 int libbpf_set_strict_mode(enum libbpf_strict_mode mode)
 314 {
 315         /* as of v1.0 libbpf_set_strict_mode() is a no-op */
 316         return 0;
 317 }
 318
 319 __u32 libbpf_major_version(void)
 320 {
 321         return LIBBPF_MAJOR_VERSION;
 322 }
 323
 324 __u32 libbpf_minor_version(void)
 325 {
 326         return LIBBPF_MINOR_VERSION;
 327 }
 328
 329 const char *libbpf_version_string(void)
 330 {
 331 #define __S(X) #X
 332 #define _S(X) __S(X)
 333         return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION);
 334 #undef _S
 335 #undef __S
 336 }
 337
 338 enum reloc_type {
 339         RELO_LD64,
 340         RELO_CALL,
 341         RELO_DATA,
 342         RELO_EXTERN_LD64,
 343         RELO_EXTERN_CALL,
 344         RELO_SUBPROG_ADDR,
 345         RELO_CORE,
 346 };
 347
 348 struct reloc_desc {
 349         enum reloc_type type;
 350         int insn_idx;
 351         union {
 352                 const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */
 353                 struct {
 354                         int map_idx;
 355                         int sym_off;
 356                         int ext_idx;
 357                 };
 358         };
 359 };
 360
 361 /* stored as sec_def->cookie for all libbpf-supported SEC()s */
 362 enum sec_def_flags {
 363         SEC_NONE = 0,
 364         /* expected_attach_type is optional, if kernel doesn't support that */
 365         SEC_EXP_ATTACH_OPT = 1,
 366         /* legacy, only used by libbpf_get_type_names() and
 367          * libbpf_attach_type_by_name(), not used by libbpf itself at all.
 368          * This used to be associated with cgroup (and few other) BPF programs
 369          * that were attachable through BPF_PROG_ATTACH command. Pretty
 370          * meaningless nowadays, though.
 371          */
 372         SEC_ATTACHABLE = 2,
 373         SEC_ATTACHABLE_OPT = SEC_ATTACHABLE | SEC_EXP_ATTACH_OPT,
 374         /* attachment target is specified through BTF ID in either kernel or
 375          * other BPF program's BTF object
 376          */
 377         SEC_ATTACH_BTF = 4,
 378         /* BPF program type allows sleeping/blocking in kernel */
 379         SEC_SLEEPABLE = 8,
 380         /* BPF program support non-linear XDP buffer */
 381         SEC_XDP_FRAGS = 16,
 382         /* Setup proper attach type for usdt probes. */
 383         SEC_USDT = 32,
 384 };
 385
 386 struct bpf_sec_def {
 387         char *sec;
 388         enum bpf_prog_type prog_type;
 389         enum bpf_attach_type expected_attach_type;
 390         long cookie;
 391         int handler_id;
 392
 393         libbpf_prog_setup_fn_t prog_setup_fn;
 394         libbpf_prog_prepare_load_fn_t prog_prepare_load_fn;
 395         libbpf_prog_attach_fn_t prog_attach_fn;
 396 };
 397
 398 /*
 399  * bpf_prog should be a better name but it has been used in
 400  * linux/filter.h.
 401  */
 402 struct bpf_program {
 403         char *name;
 404         char *sec_name;
 405         size_t sec_idx;
 406         const struct bpf_sec_def *sec_def;
 407         /* this program's instruction offset (in number of instructions)
 408          * within its containing ELF section
 409          */
 410         size_t sec_insn_off;
 411         /* number of original instructions in ELF section belonging to this
 412          * program, not taking into account subprogram instructions possible
 413          * appended later during relocation
 414          */
 415         size_t sec_insn_cnt;
 416         /* Offset (in number of instructions) of the start of instruction
 417          * belonging to this BPF program  within its containing main BPF
 418          * program. For the entry-point (main) BPF program, this is always
 419          * zero. For a sub-program, this gets reset before each of main BPF
 420          * programs are processed and relocated and is used to determined
 421          * whether sub-program was already appended to the main program, and
 422          * if yes, at which instruction offset.
 423          */
 424         size_t sub_insn_off;
 425
 426         /* instructions that belong to BPF program; insns[0] is located at
 427          * sec_insn_off instruction within its ELF section in ELF file, so
 428          * when mapping ELF file instruction index to the local instruction,
 429          * one needs to subtract sec_insn_off; and vice versa.
 430          */
 431         struct bpf_insn *insns;
 432         /* actual number of instruction in this BPF program's image; for
 433          * entry-point BPF programs this includes the size of main program
 434          * itself plus all the used sub-programs, appended at the end
 435          */
 436         size_t insns_cnt;
 437
 438         struct reloc_desc *reloc_desc;
 439         int nr_reloc;
 440
 441         /* BPF verifier log settings */
 442         char *log_buf;
 443         size_t log_size;
 444         __u32 log_level;
 445
 446         struct bpf_object *obj;
 447
 448         int fd;
 449         bool autoload;
 450         bool autoattach;
 451         bool sym_global;
 452         bool mark_btf_static;
 453         enum bpf_prog_type type;
 454         enum bpf_attach_type expected_attach_type;
 455         int exception_cb_idx;
 456
 457         int prog_ifindex;
 458         __u32 attach_btf_obj_fd;
 459         __u32 attach_btf_id;
 460         __u32 attach_prog_fd;
 461
 462         void *func_info;
 463         __u32 func_info_rec_size;
 464         __u32 func_info_cnt;
 465
 466         void *line_info;
 467         __u32 line_info_rec_size;
 468         __u32 line_info_cnt;
 469         __u32 prog_flags;
 470 };
 471
 472 struct bpf_struct_ops {
 473         const char *tname;
 474         const struct btf_type *type;
 475         struct bpf_program **progs;
 476         __u32 *kern_func_off;
 477         /* e.g. struct tcp_congestion_ops in bpf_prog's btf format */
 478         void *data;
 479         /* e.g. struct bpf_struct_ops_tcp_congestion_ops in
 480          *      btf_vmlinux's format.
 481          * struct bpf_struct_ops_tcp_congestion_ops {
 482          *      [... some other kernel fields ...]
 483          *      struct tcp_congestion_ops data;
 484          * }
 485          * kern_vdata-size == sizeof(struct bpf_struct_ops_tcp_congestion_ops)
 486          * bpf_map__init_kern_struct_ops() will populate the "kern_vdata"
 487          * from "data".
 488          */
 489         void *kern_vdata;
 490         __u32 type_id;
 491 };
 492
 493 #define DATA_SEC ".data"
 494 #define BSS_SEC ".bss"
 495 #define RODATA_SEC ".rodata"
 496 #define KCONFIG_SEC ".kconfig"
 497 #define KSYMS_SEC ".ksyms"
 498 #define STRUCT_OPS_SEC ".struct_ops"
 499 #define STRUCT_OPS_LINK_SEC ".struct_ops.link"
 500
 501 enum libbpf_map_type {
 502         LIBBPF_MAP_UNSPEC,
 503         LIBBPF_MAP_DATA,
 504         LIBBPF_MAP_BSS,
 505         LIBBPF_MAP_RODATA,
 506         LIBBPF_MAP_KCONFIG,
 507 };
 508
 509 struct bpf_map_def {
 510         unsigned int type;
 511         unsigned int key_size;
 512         unsigned int value_size;
 513         unsigned int max_entries;
 514         unsigned int map_flags;
 515 };
 516
 517 struct bpf_map {
 518         struct bpf_object *obj;
 519         char *name;
 520         /* real_name is defined for special internal maps (.rodata*,
 521          * .data*, .bss, .kconfig) and preserves their original ELF section
 522          * name. This is important to be able to find corresponding BTF
 523          * DATASEC information.
 524          */
 525         char *real_name;
 526         int fd;
 527         int sec_idx;
 528         size_t sec_offset;
 529         int map_ifindex;
 530         int inner_map_fd;
 531         struct bpf_map_def def;
 532         __u32 numa_node;
 533         __u32 btf_var_idx;
 534         int mod_btf_fd;
 535         __u32 btf_key_type_id;
 536         __u32 btf_value_type_id;
 537         __u32 btf_vmlinux_value_type_id;
 538         enum libbpf_map_type libbpf_type;
 539         void *mmaped;
 540         struct bpf_struct_ops *st_ops;
 541         struct bpf_map *inner_map;
 542         void **init_slots;
 543         int init_slots_sz;
 544         char *pin_path;
 545         bool pinned;
 546         bool reused;
 547         bool autocreate;
 548         __u64 map_extra;
 549 };
 550
 551 enum extern_type {
 552         EXT_UNKNOWN,
 553         EXT_KCFG,
 554         EXT_KSYM,
 555 };
 556
 557 enum kcfg_type {
 558         KCFG_UNKNOWN,
 559         KCFG_CHAR,
 560         KCFG_BOOL,
 561         KCFG_INT,
 562         KCFG_TRISTATE,
 563         KCFG_CHAR_ARR,
 564 };
 565
 566 struct extern_desc {
 567         enum extern_type type;
 568         int sym_idx;
 569         int btf_id;
 570         int sec_btf_id;
 571         const char *name;
 572         char *essent_name;
 573         bool is_set;
 574         bool is_weak;
 575         union {
 576                 struct {
 577                         enum kcfg_type type;
 578                         int sz;
 579                         int align;
 580                         int data_off;
 581                         bool is_signed;
 582                 } kcfg;
 583                 struct {
 584                         unsigned long long addr;
 585
 586                         /* target btf_id of the corresponding kernel var. */
 587                         int kernel_btf_obj_fd;
 588                         int kernel_btf_id;
 589
 590                         /* local btf_id of the ksym extern's type. */
 591                         __u32 type_id;
 592                         /* BTF fd index to be patched in for insn->off, this is
 593                          * 0 for vmlinux BTF, index in obj->fd_array for module
 594                          * BTF
 595                          */
 596                         __s16 btf_fd_idx;
 597                 } ksym;
 598         };
 599 };
 600
 601 struct module_btf {
 602         struct btf *btf;
 603         char *name;
 604         __u32 id;
 605         int fd;
 606         int fd_array_idx;
 607 };
 608
 609 enum sec_type {
 610         SEC_UNUSED = 0,
 611         SEC_RELO,
 612         SEC_BSS,
 613         SEC_DATA,
 614         SEC_RODATA,
 615 };
 616
 617 struct elf_sec_desc {
 618         enum sec_type sec_type;
 619         Elf64_Shdr *shdr;
 620         Elf_Data *data;
 621 };
 622
 623 struct elf_state {
 624         int fd;
 625         const void *obj_buf;
 626         size_t obj_buf_sz;
 627         Elf *elf;
 628         Elf64_Ehdr *ehdr;
 629         Elf_Data *symbols;
 630         Elf_Data *st_ops_data;
 631         Elf_Data *st_ops_link_data;
 632         size_t shstrndx; /* section index for section name strings */
 633         size_t strtabidx;
 634         struct elf_sec_desc *secs;
 635         size_t sec_cnt;
 636         int btf_maps_shndx;
 637         __u32 btf_maps_sec_btf_id;
 638         int text_shndx;
 639         int symbols_shndx;
 640         int st_ops_shndx;
 641         int st_ops_link_shndx;
 642 };
 643
 644 struct usdt_manager;
 645
 646 struct bpf_object {
 647         char name[BPF_OBJ_NAME_LEN];
 648         char license[64];
 649         __u32 kern_version;
 650
 651         struct bpf_program *programs;
 652         size_t nr_programs;
 653         struct bpf_map *maps;
 654         size_t nr_maps;
 655         size_t maps_cap;
 656
 657         char *kconfig;
 658         struct extern_desc *externs;
 659         int nr_extern;
 660         int kconfig_map_idx;
 661
 662         bool loaded;
 663         bool has_subcalls;
 664         bool has_rodata;
 665
 666         struct bpf_gen *gen_loader;
 667
 668         /* Information when doing ELF related work. Only valid if efile.elf is not NULL */
 669         struct elf_state efile;
 670
 671         struct btf *btf;
 672         struct btf_ext *btf_ext;
 673
 674         /* Parse and load BTF vmlinux if any of the programs in the object need
 675          * it at load time.
 676          */
 677         struct btf *btf_vmlinux;
 678         /* Path to the custom BTF to be used for BPF CO-RE relocations as an
 679          * override for vmlinux BTF.
 680          */
 681         char *btf_custom_path;
 682         /* vmlinux BTF override for CO-RE relocations */
 683         struct btf *btf_vmlinux_override;
 684         /* Lazily initialized kernel module BTFs */
 685         struct module_btf *btf_modules;
 686         bool btf_modules_loaded;
 687         size_t btf_module_cnt;
 688         size_t btf_module_cap;
 689
 690         /* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */
 691         char *log_buf;
 692         size_t log_size;
 693         __u32 log_level;
 694
 695         int *fd_array;
 696         size_t fd_array_cap;
 697         size_t fd_array_cnt;
 698
 699         struct usdt_manager *usdt_man;
 700
 701         struct kern_feature_cache *feat_cache;
 702         char *token_path;
 703         int token_fd;
 704
 705         char path[];
 706 };
 707
 708 static const char *elf_sym_str(const struct bpf_object *obj, size_t off);
 709 static const char *elf_sec_str(const struct bpf_object *obj, size_t off);
 710 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx);
 711 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
 712 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn);
 713 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
 714 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
 715 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx);
 716 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx);
 717
 718 void bpf_program__unload(struct bpf_program *prog)
 719 {
 720         if (!prog)
 721                 return;
 722
 723         zclose(prog->fd);
 724
 725         zfree(&prog->func_info);
 726         zfree(&prog->line_info);
 727 }
 728
 729 static void bpf_program__exit(struct bpf_program *prog)
 730 {
 731         if (!prog)
 732                 return;
 733
 734         bpf_program__unload(prog);
 735         zfree(&prog->name);
 736         zfree(&prog->sec_name);
 737         zfree(&prog->insns);
 738         zfree(&prog->reloc_desc);
 739
 740         prog->nr_reloc = 0;
 741         prog->insns_cnt = 0;
 742         prog->sec_idx = -1;
 743 }
 744
 745 static bool insn_is_subprog_call(const struct bpf_insn *insn)
 746 {
 747         return BPF_CLASS(insn->code) == BPF_JMP &&
 748                BPF_OP(insn->code) == BPF_CALL &&
 749                BPF_SRC(insn->code) == BPF_K &&
 750                insn->src_reg == BPF_PSEUDO_CALL &&
 751                insn->dst_reg == 0 &&
 752                insn->off == 0;
 753 }
 754
 755 static bool is_call_insn(const struct bpf_insn *insn)
 756 {
 757         return insn->code == (BPF_JMP | BPF_CALL);
 758 }
 759
 760 static bool insn_is_pseudo_func(struct bpf_insn *insn)
 761 {
 762         return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
 763 }
 764
 765 static int
 766 bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 767                       const char *name, size_t sec_idx, const char *sec_name,
 768                       size_t sec_off, void *insn_data, size_t insn_data_sz)
 769 {
 770         if (insn_data_sz == 0 || insn_data_sz % BPF_INSN_SZ || sec_off % BPF_INSN_SZ) {
 771                 pr_warn("sec '%s': corrupted program '%s', offset %zu, size %zu\n",
 772                         sec_name, name, sec_off, insn_data_sz);
 773                 return -EINVAL;
 774         }
 775
 776         memset(prog, 0, sizeof(*prog));
 777         prog->obj = obj;
 778
 779         prog->sec_idx = sec_idx;
 780         prog->sec_insn_off = sec_off / BPF_INSN_SZ;
 781         prog->sec_insn_cnt = insn_data_sz / BPF_INSN_SZ;
 782         /* insns_cnt can later be increased by appending used subprograms */
 783         prog->insns_cnt = prog->sec_insn_cnt;
 784
 785         prog->type = BPF_PROG_TYPE_UNSPEC;
 786         prog->fd = -1;
 787         prog->exception_cb_idx = -1;
 788
 789         /* libbpf's convention for SEC("?abc...") is that it's just like
 790          * SEC("abc...") but the corresponding bpf_program starts out with
 791          * autoload set to false.
 792          */
 793         if (sec_name[0] == '?') {
 794                 prog->autoload = false;
 795                 /* from now on forget there was ? in section name */
 796                 sec_name++;
 797         } else {
 798                 prog->autoload = true;
 799         }
 800
 801         prog->autoattach = true;
 802
 803         /* inherit object's log_level */
 804         prog->log_level = obj->log_level;
 805
 806         prog->sec_name = strdup(sec_name);
 807         if (!prog->sec_name)
 808                 goto errout;
 809
 810         prog->name = strdup(name);
 811         if (!prog->name)
 812                 goto errout;
 813
 814         prog->insns = malloc(insn_data_sz);
 815         if (!prog->insns)
 816                 goto errout;
 817         memcpy(prog->insns, insn_data, insn_data_sz);
 818
 819         return 0;
 820 errout:
 821         pr_warn("sec '%s': failed to allocate memory for prog '%s'\n", sec_name, name);
 822         bpf_program__exit(prog);
 823         return -ENOMEM;
 824 }
 825
 826 static int
 827 bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
 828                          const char *sec_name, int sec_idx)
 829 {
 830         Elf_Data *symbols = obj->efile.symbols;
 831         struct bpf_program *prog, *progs;
 832         void *data = sec_data->d_buf;
 833         size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
 834         int nr_progs, err, i;
 835         const char *name;
 836         Elf64_Sym *sym;
 837
 838         progs = obj->programs;
 839         nr_progs = obj->nr_programs;
 840         nr_syms = symbols->d_size / sizeof(Elf64_Sym);
 841
 842         for (i = 0; i < nr_syms; i++) {
 843                 sym = elf_sym_by_idx(obj, i);
 844
 845                 if (sym->st_shndx != sec_idx)
 846                         continue;
 847                 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
 848                         continue;
 849
 850                 prog_sz = sym->st_size;
 851                 sec_off = sym->st_value;
 852
 853                 name = elf_sym_str(obj, sym->st_name);
 854                 if (!name) {
 855                         pr_warn("sec '%s': failed to get symbol name for offset %zu\n",
 856                                 sec_name, sec_off);
 857                         return -LIBBPF_ERRNO__FORMAT;
 858                 }
 859
 860                 if (sec_off + prog_sz > sec_sz) {
 861                         pr_warn("sec '%s': program at offset %zu crosses section boundary\n",
 862                                 sec_name, sec_off);
 863                         return -LIBBPF_ERRNO__FORMAT;
 864                 }
 865
 866                 if (sec_idx != obj->efile.text_shndx && ELF64_ST_BIND(sym->st_info) == STB_LOCAL) {
 867                         pr_warn("sec '%s': program '%s' is static and not supported\n", sec_name, name);
 868                         return -ENOTSUP;
 869                 }
 870
 871                 pr_debug("sec '%s': found program '%s' at insn offset %zu (%zu bytes), code size %zu insns (%zu bytes)\n",
 872                          sec_name, name, sec_off / BPF_INSN_SZ, sec_off, prog_sz / BPF_INSN_SZ, prog_sz);
 873
 874                 progs = libbpf_reallocarray(progs, nr_progs + 1, sizeof(*progs));
 875                 if (!progs) {
 876                         /*
 877                          * In this case the original obj->programs
 878                          * is still valid, so don't need special treat for
 879                          * bpf_close_object().
 880                          */
 881                         pr_warn("sec '%s': failed to alloc memory for new program '%s'\n",
 882                                 sec_name, name);
 883                         return -ENOMEM;
 884                 }
 885                 obj->programs = progs;
 886
 887                 prog = &progs[nr_progs];
 888
 889                 err = bpf_object__init_prog(obj, prog, name, sec_idx, sec_name,
 890                                             sec_off, data + sec_off, prog_sz);
 891                 if (err)
 892                         return err;
 893
 894                 if (ELF64_ST_BIND(sym->st_info) != STB_LOCAL)
 895                         prog->sym_global = true;
 896
 897                 /* if function is a global/weak symbol, but has restricted
 898                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF FUNC
 899                  * as static to enable more permissive BPF verification mode
 900                  * with more outside context available to BPF verifier
 901                  */
 902                 if (prog->sym_global && (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
 903                     || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL))
 904                         prog->mark_btf_static = true;
 905
 906                 nr_progs++;
 907                 obj->nr_programs = nr_progs;
 908         }
 909
 910         return 0;
 911 }
 912
 913 static const struct btf_member *
 914 find_member_by_offset(const struct btf_type *t, __u32 bit_offset)
 915 {
 916         struct btf_member *m;
 917         int i;
 918
 919         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 920                 if (btf_member_bit_offset(t, i) == bit_offset)
 921                         return m;
 922         }
 923
 924         return NULL;
 925 }
 926
 927 static const struct btf_member *
 928 find_member_by_name(const struct btf *btf, const struct btf_type *t,
 929                     const char *name)
 930 {
 931         struct btf_member *m;
 932         int i;
 933
 934         for (i = 0, m = btf_members(t); i < btf_vlen(t); i++, m++) {
 935                 if (!strcmp(btf__name_by_offset(btf, m->name_off), name))
 936                         return m;
 937         }
 938
 939         return NULL;
 940 }
 941
 942 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
 943                             __u16 kind, struct btf **res_btf,
 944                             struct module_btf **res_mod_btf);
 945
 946 #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
 947 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
 948                                    const char *name, __u32 kind);
 949
 950 static int
 951 find_struct_ops_kern_types(struct bpf_object *obj, const char *tname,
 952                            struct module_btf **mod_btf,
 953                            const struct btf_type **type, __u32 *type_id,
 954                            const struct btf_type **vtype, __u32 *vtype_id,
 955                            const struct btf_member **data_member)
 956 {
 957         const struct btf_type *kern_type, *kern_vtype;
 958         const struct btf_member *kern_data_member;
 959         struct btf *btf;
 960         __s32 kern_vtype_id, kern_type_id;
 961         __u32 i;
 962
 963         kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
 964                                         &btf, mod_btf);
 965         if (kern_type_id < 0) {
 966                 pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
 967                         tname);
 968                 return kern_type_id;
 969         }
 970         kern_type = btf__type_by_id(btf, kern_type_id);
 971
 972         /* Find the corresponding "map_value" type that will be used
 973          * in map_update(BPF_MAP_TYPE_STRUCT_OPS).  For example,
 974          * find "struct bpf_struct_ops_tcp_congestion_ops" from the
 975          * btf_vmlinux.
 976          */
 977         kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
 978                                                 tname, BTF_KIND_STRUCT);
 979         if (kern_vtype_id < 0) {
 980                 pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
 981                         STRUCT_OPS_VALUE_PREFIX, tname);
 982                 return kern_vtype_id;
 983         }
 984         kern_vtype = btf__type_by_id(btf, kern_vtype_id);
 985
 986         /* Find "struct tcp_congestion_ops" from
 987          * struct bpf_struct_ops_tcp_congestion_ops {
 988          *      [ ... ]
 989          *      struct tcp_congestion_ops data;
 990          * }
 991          */
 992         kern_data_member = btf_members(kern_vtype);
 993         for (i = 0; i < btf_vlen(kern_vtype); i++, kern_data_member++) {
 994                 if (kern_data_member->type == kern_type_id)
 995                         break;
 996         }
 997         if (i == btf_vlen(kern_vtype)) {
 998                 pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
 999                         tname, STRUCT_OPS_VALUE_PREFIX, tname);
1000                 return -EINVAL;
1001         }
1002
1003         *type = kern_type;
1004         *type_id = kern_type_id;
1005         *vtype = kern_vtype;
1006         *vtype_id = kern_vtype_id;
1007         *data_member = kern_data_member;
1008
1009         return 0;
1010 }
1011
1012 static bool bpf_map__is_struct_ops(const struct bpf_map *map)
1013 {
1014         return map->def.type == BPF_MAP_TYPE_STRUCT_OPS;
1015 }
1016
1017 /* Init the map's fields that depend on kern_btf */
1018 static int bpf_map__init_kern_struct_ops(struct bpf_map *map)
1019 {
1020         const struct btf_member *member, *kern_member, *kern_data_member;
1021         const struct btf_type *type, *kern_type, *kern_vtype;
1022         __u32 i, kern_type_id, kern_vtype_id, kern_data_off;
1023         struct bpf_object *obj = map->obj;
1024         const struct btf *btf = obj->btf;
1025         struct bpf_struct_ops *st_ops;
1026         const struct btf *kern_btf;
1027         struct module_btf *mod_btf;
1028         void *data, *kern_data;
1029         const char *tname;
1030         int err;
1031
1032         st_ops = map->st_ops;
1033         type = st_ops->type;
1034         tname = st_ops->tname;
1035         err = find_struct_ops_kern_types(obj, tname, &mod_btf,
1036                                          &kern_type, &kern_type_id,
1037                                          &kern_vtype, &kern_vtype_id,
1038                                          &kern_data_member);
1039         if (err)
1040                 return err;
1041
1042         kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux;
1043
1044         pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n",
1045                  map->name, st_ops->type_id, kern_type_id, kern_vtype_id);
1046
1047         map->mod_btf_fd = mod_btf ? mod_btf->fd : -1;
1048         map->def.value_size = kern_vtype->size;
1049         map->btf_vmlinux_value_type_id = kern_vtype_id;
1050
1051         st_ops->kern_vdata = calloc(1, kern_vtype->size);
1052         if (!st_ops->kern_vdata)
1053                 return -ENOMEM;
1054
1055         data = st_ops->data;
1056         kern_data_off = kern_data_member->offset / 8;
1057         kern_data = st_ops->kern_vdata + kern_data_off;
1058
1059         member = btf_members(type);
1060         for (i = 0; i < btf_vlen(type); i++, member++) {
1061                 const struct btf_type *mtype, *kern_mtype;
1062                 __u32 mtype_id, kern_mtype_id;
1063                 void *mdata, *kern_mdata;
1064                 __s64 msize, kern_msize;
1065                 __u32 moff, kern_moff;
1066                 __u32 kern_member_idx;
1067                 const char *mname;
1068
1069                 mname = btf__name_by_offset(btf, member->name_off);
1070                 kern_member = find_member_by_name(kern_btf, kern_type, mname);
1071                 if (!kern_member) {
1072                         pr_warn("struct_ops init_kern %s: Cannot find member %s in kernel BTF\n",
1073                                 map->name, mname);
1074                         return -ENOTSUP;
1075                 }
1076
1077                 kern_member_idx = kern_member - btf_members(kern_type);
1078                 if (btf_member_bitfield_size(type, i) ||
1079                     btf_member_bitfield_size(kern_type, kern_member_idx)) {
1080                         pr_warn("struct_ops init_kern %s: bitfield %s is not supported\n",
1081                                 map->name, mname);
1082                         return -ENOTSUP;
1083                 }
1084
1085                 moff = member->offset / 8;
1086                 kern_moff = kern_member->offset / 8;
1087
1088                 mdata = data + moff;
1089                 kern_mdata = kern_data + kern_moff;
1090
1091                 mtype = skip_mods_and_typedefs(btf, member->type, &mtype_id);
1092                 kern_mtype = skip_mods_and_typedefs(kern_btf, kern_member->type,
1093                                                     &kern_mtype_id);
1094                 if (BTF_INFO_KIND(mtype->info) !=
1095                     BTF_INFO_KIND(kern_mtype->info)) {
1096                         pr_warn("struct_ops init_kern %s: Unmatched member type %s %u != %u(kernel)\n",
1097                                 map->name, mname, BTF_INFO_KIND(mtype->info),
1098                                 BTF_INFO_KIND(kern_mtype->info));
1099                         return -ENOTSUP;
1100                 }
1101
1102                 if (btf_is_ptr(mtype)) {
1103                         struct bpf_program *prog;
1104
1105                         prog = st_ops->progs[i];
1106                         if (!prog)
1107                                 continue;
1108
1109                         kern_mtype = skip_mods_and_typedefs(kern_btf,
1110                                                             kern_mtype->type,
1111                                                             &kern_mtype_id);
1112
1113                         /* mtype->type must be a func_proto which was
1114                          * guaranteed in bpf_object__collect_st_ops_relos(),
1115                          * so only check kern_mtype for func_proto here.
1116                          */
1117                         if (!btf_is_func_proto(kern_mtype)) {
1118                                 pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n",
1119                                         map->name, mname);
1120                                 return -ENOTSUP;
1121                         }
1122
1123                         if (mod_btf)
1124                                 prog->attach_btf_obj_fd = mod_btf->fd;
1125                         prog->attach_btf_id = kern_type_id;
1126                         prog->expected_attach_type = kern_member_idx;
1127
1128                         st_ops->kern_func_off[i] = kern_data_off + kern_moff;
1129
1130                         pr_debug("struct_ops init_kern %s: func ptr %s is set to prog %s from data(+%u) to kern_data(+%u)\n",
1131                                  map->name, mname, prog->name, moff,
1132                                  kern_moff);
1133
1134                         continue;
1135                 }
1136
1137                 msize = btf__resolve_size(btf, mtype_id);
1138                 kern_msize = btf__resolve_size(kern_btf, kern_mtype_id);
1139                 if (msize < 0 || kern_msize < 0 || msize != kern_msize) {
1140                         pr_warn("struct_ops init_kern %s: Error in size of member %s: %zd != %zd(kernel)\n",
1141                                 map->name, mname, (ssize_t)msize,
1142                                 (ssize_t)kern_msize);
1143                         return -ENOTSUP;
1144                 }
1145
1146                 pr_debug("struct_ops init_kern %s: copy %s %u bytes from data(+%u) to kern_data(+%u)\n",
1147                          map->name, mname, (unsigned int)msize,
1148                          moff, kern_moff);
1149                 memcpy(kern_mdata, mdata, msize);
1150         }
1151
1152         return 0;
1153 }
1154
1155 static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj)
1156 {
1157         struct bpf_map *map;
1158         size_t i;
1159         int err;
1160
1161         for (i = 0; i < obj->nr_maps; i++) {
1162                 map = &obj->maps[i];
1163
1164                 if (!bpf_map__is_struct_ops(map))
1165                         continue;
1166
1167                 err = bpf_map__init_kern_struct_ops(map);
1168                 if (err)
1169                         return err;
1170         }
1171
1172         return 0;
1173 }
1174
1175 static int init_struct_ops_maps(struct bpf_object *obj, const char *sec_name,
1176                                 int shndx, Elf_Data *data, __u32 map_flags)
1177 {
1178         const struct btf_type *type, *datasec;
1179         const struct btf_var_secinfo *vsi;
1180         struct bpf_struct_ops *st_ops;
1181         const char *tname, *var_name;
1182         __s32 type_id, datasec_id;
1183         const struct btf *btf;
1184         struct bpf_map *map;
1185         __u32 i;
1186
1187         if (shndx == -1)
1188                 return 0;
1189
1190         btf = obj->btf;
1191         datasec_id = btf__find_by_name_kind(btf, sec_name,
1192                                             BTF_KIND_DATASEC);
1193         if (datasec_id < 0) {
1194                 pr_warn("struct_ops init: DATASEC %s not found\n",
1195                         sec_name);
1196                 return -EINVAL;
1197         }
1198
1199         datasec = btf__type_by_id(btf, datasec_id);
1200         vsi = btf_var_secinfos(datasec);
1201         for (i = 0; i < btf_vlen(datasec); i++, vsi++) {
1202                 type = btf__type_by_id(obj->btf, vsi->type);
1203                 var_name = btf__name_by_offset(obj->btf, type->name_off);
1204
1205                 type_id = btf__resolve_type(obj->btf, vsi->type);
1206                 if (type_id < 0) {
1207                         pr_warn("struct_ops init: Cannot resolve var type_id %u in DATASEC %s\n",
1208                                 vsi->type, sec_name);
1209                         return -EINVAL;
1210                 }
1211
1212                 type = btf__type_by_id(obj->btf, type_id);
1213                 tname = btf__name_by_offset(obj->btf, type->name_off);
1214                 if (!tname[0]) {
1215                         pr_warn("struct_ops init: anonymous type is not supported\n");
1216                         return -ENOTSUP;
1217                 }
1218                 if (!btf_is_struct(type)) {
1219                         pr_warn("struct_ops init: %s is not a struct\n", tname);
1220                         return -EINVAL;
1221                 }
1222
1223                 map = bpf_object__add_map(obj);
1224                 if (IS_ERR(map))
1225                         return PTR_ERR(map);
1226
1227                 map->sec_idx = shndx;
1228                 map->sec_offset = vsi->offset;
1229                 map->name = strdup(var_name);
1230                 if (!map->name)
1231                         return -ENOMEM;
1232
1233                 map->def.type = BPF_MAP_TYPE_STRUCT_OPS;
1234                 map->def.key_size = sizeof(int);
1235                 map->def.value_size = type->size;
1236                 map->def.max_entries = 1;
1237                 map->def.map_flags = map_flags;
1238
1239                 map->st_ops = calloc(1, sizeof(*map->st_ops));
1240                 if (!map->st_ops)
1241                         return -ENOMEM;
1242                 st_ops = map->st_ops;
1243                 st_ops->data = malloc(type->size);
1244                 st_ops->progs = calloc(btf_vlen(type), sizeof(*st_ops->progs));
1245                 st_ops->kern_func_off = malloc(btf_vlen(type) *
1246                                                sizeof(*st_ops->kern_func_off));
1247                 if (!st_ops->data || !st_ops->progs || !st_ops->kern_func_off)
1248                         return -ENOMEM;
1249
1250                 if (vsi->offset + type->size > data->d_size) {
1251                         pr_warn("struct_ops init: var %s is beyond the end of DATASEC %s\n",
1252                                 var_name, sec_name);
1253                         return -EINVAL;
1254                 }
1255
1256                 memcpy(st_ops->data,
1257                        data->d_buf + vsi->offset,
1258                        type->size);
1259                 st_ops->tname = tname;
1260                 st_ops->type = type;
1261                 st_ops->type_id = type_id;
1262
1263                 pr_debug("struct_ops init: struct %s(type_id=%u) %s found at offset %u\n",
1264                          tname, type_id, var_name, vsi->offset);
1265         }
1266
1267         return 0;
1268 }
1269
1270 static int bpf_object_init_struct_ops(struct bpf_object *obj)
1271 {
1272         int err;
1273
1274         err = init_struct_ops_maps(obj, STRUCT_OPS_SEC, obj->efile.st_ops_shndx,
1275                                    obj->efile.st_ops_data, 0);
1276         err = err ?: init_struct_ops_maps(obj, STRUCT_OPS_LINK_SEC,
1277                                           obj->efile.st_ops_link_shndx,
1278                                           obj->efile.st_ops_link_data,
1279                                           BPF_F_LINK);
1280         return err;
1281 }
1282
1283 static struct bpf_object *bpf_object__new(const char *path,
1284                                           const void *obj_buf,
1285                                           size_t obj_buf_sz,
1286                                           const char *obj_name)
1287 {
1288         struct bpf_object *obj;
1289         char *end;
1290
1291         obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
1292         if (!obj) {
1293                 pr_warn("alloc memory failed for %s\n", path);
1294                 return ERR_PTR(-ENOMEM);
1295         }
1296
1297         strcpy(obj->path, path);
1298         if (obj_name) {
1299                 libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));
1300         } else {
1301                 /* Using basename() GNU version which doesn't modify arg. */
1302                 libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));
1303                 end = strchr(obj->name, '.');
1304                 if (end)
1305                         *end = 0;
1306         }
1307
1308         obj->efile.fd = -1;
1309         /*
1310          * Caller of this function should also call
1311          * bpf_object__elf_finish() after data collection to return
1312          * obj_buf to user. If not, we should duplicate the buffer to
1313          * avoid user freeing them before elf finish.
1314          */
1315         obj->efile.obj_buf = obj_buf;
1316         obj->efile.obj_buf_sz = obj_buf_sz;
1317         obj->efile.btf_maps_shndx = -1;
1318         obj->efile.st_ops_shndx = -1;
1319         obj->efile.st_ops_link_shndx = -1;
1320         obj->kconfig_map_idx = -1;
1321
1322         obj->kern_version = get_kernel_version();
1323         obj->loaded = false;
1324
1325         return obj;
1326 }
1327
1328 static void bpf_object__elf_finish(struct bpf_object *obj)
1329 {
1330         if (!obj->efile.elf)
1331                 return;
1332
1333         elf_end(obj->efile.elf);
1334         obj->efile.elf = NULL;
1335         obj->efile.symbols = NULL;
1336         obj->efile.st_ops_data = NULL;
1337         obj->efile.st_ops_link_data = NULL;
1338
1339         zfree(&obj->efile.secs);
1340         obj->efile.sec_cnt = 0;
1341         zclose(obj->efile.fd);
1342         obj->efile.obj_buf = NULL;
1343         obj->efile.obj_buf_sz = 0;
1344 }
1345
1346 static int bpf_object__elf_init(struct bpf_object *obj)
1347 {
1348         Elf64_Ehdr *ehdr;
1349         int err = 0;
1350         Elf *elf;
1351
1352         if (obj->efile.elf) {
1353                 pr_warn("elf: init internal error\n");
1354                 return -LIBBPF_ERRNO__LIBELF;
1355         }
1356
1357         if (obj->efile.obj_buf_sz > 0) {
1358                 /* obj_buf should have been validated by bpf_object__open_mem(). */
1359                 elf = elf_memory((char *)obj->efile.obj_buf, obj->efile.obj_buf_sz);
1360         } else {
1361                 obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC);
1362                 if (obj->efile.fd < 0) {
1363                         char errmsg[STRERR_BUFSIZE], *cp;
1364
1365                         err = -errno;
1366                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
1367                         pr_warn("elf: failed to open %s: %s\n", obj->path, cp);
1368                         return err;
1369                 }
1370
1371                 elf = elf_begin(obj->efile.fd, ELF_C_READ_MMAP, NULL);
1372         }
1373
1374         if (!elf) {
1375                 pr_warn("elf: failed to open %s as ELF file: %s\n", obj->path, elf_errmsg(-1));
1376                 err = -LIBBPF_ERRNO__LIBELF;
1377                 goto errout;
1378         }
1379
1380         obj->efile.elf = elf;
1381
1382         if (elf_kind(elf) != ELF_K_ELF) {
1383                 err = -LIBBPF_ERRNO__FORMAT;
1384                 pr_warn("elf: '%s' is not a proper ELF object\n", obj->path);
1385                 goto errout;
1386         }
1387
1388         if (gelf_getclass(elf) != ELFCLASS64) {
1389                 err = -LIBBPF_ERRNO__FORMAT;
1390                 pr_warn("elf: '%s' is not a 64-bit ELF object\n", obj->path);
1391                 goto errout;
1392         }
1393
1394         obj->efile.ehdr = ehdr = elf64_getehdr(elf);
1395         if (!obj->efile.ehdr) {
1396                 pr_warn("elf: failed to get ELF header from %s: %s\n", obj->path, elf_errmsg(-1));
1397                 err = -LIBBPF_ERRNO__FORMAT;
1398                 goto errout;
1399         }
1400
1401         if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) {
1402                 pr_warn("elf: failed to get section names section index for %s: %s\n",
1403                         obj->path, elf_errmsg(-1));
1404                 err = -LIBBPF_ERRNO__FORMAT;
1405                 goto errout;
1406         }
1407
1408         /* ELF is corrupted/truncated, avoid calling elf_strptr. */
1409         if (!elf_rawdata(elf_getscn(elf, obj->efile.shstrndx), NULL)) {
1410                 pr_warn("elf: failed to get section names strings from %s: %s\n",
1411                         obj->path, elf_errmsg(-1));
1412                 err = -LIBBPF_ERRNO__FORMAT;
1413                 goto errout;
1414         }
1415
1416         /* Old LLVM set e_machine to EM_NONE */
1417         if (ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF)) {
1418                 pr_warn("elf: %s is not a valid eBPF object file\n", obj->path);
1419                 err = -LIBBPF_ERRNO__FORMAT;
1420                 goto errout;
1421         }
1422
1423         return 0;
1424 errout:
1425         bpf_object__elf_finish(obj);
1426         return err;
1427 }
1428
1429 static int bpf_object__check_endianness(struct bpf_object *obj)
1430 {
1431 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
1432         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
1433                 return 0;
1434 #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
1435         if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
1436                 return 0;
1437 #else
1438 # error "Unrecognized __BYTE_ORDER__"
1439 #endif
1440         pr_warn("elf: endianness mismatch in %s.\n", obj->path);
1441         return -LIBBPF_ERRNO__ENDIAN;
1442 }
1443
1444 static int
1445 bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)
1446 {
1447         if (!data) {
1448                 pr_warn("invalid license section in %s\n", obj->path);
1449                 return -LIBBPF_ERRNO__FORMAT;
1450         }
1451         /* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't
1452          * go over allowed ELF data section buffer
1453          */
1454         libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));
1455         pr_debug("license of %s is %s\n", obj->path, obj->license);
1456         return 0;
1457 }
1458
1459 static int
1460 bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)
1461 {
1462         __u32 kver;
1463
1464         if (!data || size != sizeof(kver)) {
1465                 pr_warn("invalid kver section in %s\n", obj->path);
1466                 return -LIBBPF_ERRNO__FORMAT;
1467         }
1468         memcpy(&kver, data, sizeof(kver));
1469         obj->kern_version = kver;
1470         pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version);
1471         return 0;
1472 }
1473
1474 static bool bpf_map_type__is_map_in_map(enum bpf_map_type type)
1475 {
1476         if (type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
1477             type == BPF_MAP_TYPE_HASH_OF_MAPS)
1478                 return true;
1479         return false;
1480 }
1481
1482 static int find_elf_sec_sz(const struct bpf_object *obj, const char *name, __u32 *size)
1483 {
1484         Elf_Data *data;
1485         Elf_Scn *scn;
1486
1487         if (!name)
1488                 return -EINVAL;
1489
1490         scn = elf_sec_by_name(obj, name);
1491         data = elf_sec_data(obj, scn);
1492         if (data) {
1493                 *size = data->d_size;
1494                 return 0; /* found it */
1495         }
1496
1497         return -ENOENT;
1498 }
1499
1500 static Elf64_Sym *find_elf_var_sym(const struct bpf_object *obj, const char *name)
1501 {
1502         Elf_Data *symbols = obj->efile.symbols;
1503         const char *sname;
1504         size_t si;
1505
1506         for (si = 0; si < symbols->d_size / sizeof(Elf64_Sym); si++) {
1507                 Elf64_Sym *sym = elf_sym_by_idx(obj, si);
1508
1509                 if (ELF64_ST_TYPE(sym->st_info) != STT_OBJECT)
1510                         continue;
1511
1512                 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
1513                     ELF64_ST_BIND(sym->st_info) != STB_WEAK)
1514                         continue;
1515
1516                 sname = elf_sym_str(obj, sym->st_name);
1517                 if (!sname) {
1518                         pr_warn("failed to get sym name string for var %s\n", name);
1519                         return ERR_PTR(-EIO);
1520                 }
1521                 if (strcmp(name, sname) == 0)
1522                         return sym;
1523         }
1524
1525         return ERR_PTR(-ENOENT);
1526 }
1527
1528 static int create_placeholder_fd(void)
1529 {
1530         int fd;
1531
1532         fd = ensure_good_fd(memfd_create("libbpf-placeholder-fd", MFD_CLOEXEC));
1533         if (fd < 0)
1534                 return -errno;
1535         return fd;
1536 }
1537
1538 static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
1539 {
1540         struct bpf_map *map;
1541         int err;
1542
1543         err = libbpf_ensure_mem((void **)&obj->maps, &obj->maps_cap,
1544                                 sizeof(*obj->maps), obj->nr_maps + 1);
1545         if (err)
1546                 return ERR_PTR(err);
1547
1548         map = &obj->maps[obj->nr_maps++];
1549         map->obj = obj;
1550         /* Preallocate map FD without actually creating BPF map just yet.
1551          * These map FD "placeholders" will be reused later without changing
1552          * FD value when map is actually created in the kernel.
1553          *
1554          * This is useful to be able to perform BPF program relocations
1555          * without having to create BPF maps before that step. This allows us
1556          * to finalize and load BTF very late in BPF object's loading phase,
1557          * right before BPF maps have to be created and BPF programs have to
1558          * be loaded. By having these map FD placeholders we can perform all
1559          * the sanitizations, relocations, and any other adjustments before we
1560          * start creating actual BPF kernel objects (BTF, maps, progs).
1561          */
1562         map->fd = create_placeholder_fd();
1563         if (map->fd < 0)
1564                 return ERR_PTR(map->fd);
1565         map->inner_map_fd = -1;
1566         map->autocreate = true;
1567
1568         return map;
1569 }
1570
1571 static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
1572 {
1573         const long page_sz = sysconf(_SC_PAGE_SIZE);
1574         size_t map_sz;
1575
1576         map_sz = (size_t)roundup(value_sz, 8) * max_entries;
1577         map_sz = roundup(map_sz, page_sz);
1578         return map_sz;
1579 }
1580
1581 static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
1582 {
1583         void *mmaped;
1584
1585         if (!map->mmaped)
1586                 return -EINVAL;
1587
1588         if (old_sz == new_sz)
1589                 return 0;
1590
1591         mmaped = mmap(NULL, new_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1592         if (mmaped == MAP_FAILED)
1593                 return -errno;
1594
1595         memcpy(mmaped, map->mmaped, min(old_sz, new_sz));
1596         munmap(map->mmaped, old_sz);
1597         map->mmaped = mmaped;
1598         return 0;
1599 }
1600
1601 static char *internal_map_name(struct bpf_object *obj, const char *real_name)
1602 {
1603         char map_name[BPF_OBJ_NAME_LEN], *p;
1604         int pfx_len, sfx_len = max((size_t)7, strlen(real_name));
1605
1606         /* This is one of the more confusing parts of libbpf for various
1607          * reasons, some of which are historical. The original idea for naming
1608          * internal names was to include as much of BPF object name prefix as
1609          * possible, so that it can be distinguished from similar internal
1610          * maps of a different BPF object.
1611          * As an example, let's say we have bpf_object named 'my_object_name'
1612          * and internal map corresponding to '.rodata' ELF section. The final
1613          * map name advertised to user and to the kernel will be
1614          * 'my_objec.rodata', taking first 8 characters of object name and
1615          * entire 7 characters of '.rodata'.
1616          * Somewhat confusingly, if internal map ELF section name is shorter
1617          * than 7 characters, e.g., '.bss', we still reserve 7 characters
1618          * for the suffix, even though we only have 4 actual characters, and
1619          * resulting map will be called 'my_objec.bss', not even using all 15
1620          * characters allowed by the kernel. Oh well, at least the truncated
1621          * object name is somewhat consistent in this case. But if the map
1622          * name is '.kconfig', we'll still have entirety of '.kconfig' added
1623          * (8 chars) and thus will be left with only first 7 characters of the
1624          * object name ('my_obje'). Happy guessing, user, that the final map
1625          * name will be "my_obje.kconfig".
1626          * Now, with libbpf starting to support arbitrarily named .rodata.*
1627          * and .data.* data sections, it's possible that ELF section name is
1628          * longer than allowed 15 chars, so we now need to be careful to take
1629          * only up to 15 first characters of ELF name, taking no BPF object
1630          * name characters at all. So '.rodata.abracadabra' will result in
1631          * '.rodata.abracad' kernel and user-visible name.
1632          * We need to keep this convoluted logic intact for .data, .bss and
1633          * .rodata maps, but for new custom .data.custom and .rodata.custom
1634          * maps we use their ELF names as is, not prepending bpf_object name
1635          * in front. We still need to truncate them to 15 characters for the
1636          * kernel. Full name can be recovered for such maps by using DATASEC
1637          * BTF type associated with such map's value type, though.
1638          */
1639         if (sfx_len >= BPF_OBJ_NAME_LEN)
1640                 sfx_len = BPF_OBJ_NAME_LEN - 1;
1641
1642         /* if there are two or more dots in map name, it's a custom dot map */
1643         if (strchr(real_name + 1, '.') != NULL)
1644                 pfx_len = 0;
1645         else
1646                 pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, strlen(obj->name));
1647
1648         snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name,
1649                  sfx_len, real_name);
1650
1651         /* sanitise map name to characters allowed by kernel */
1652         for (p = map_name; *p && p < map_name + sizeof(map_name); p++)
1653                 if (!isalnum(*p) && *p != '_' && *p != '.')
1654                         *p = '_';
1655
1656         return strdup(map_name);
1657 }
1658
1659 static int
1660 map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map);
1661
1662 /* Internal BPF map is mmap()'able only if at least one of corresponding
1663  * DATASEC's VARs are to be exposed through BPF skeleton. I.e., it's a GLOBAL
1664  * variable and it's not marked as __hidden (which turns it into, effectively,
1665  * a STATIC variable).
1666  */
1667 static bool map_is_mmapable(struct bpf_object *obj, struct bpf_map *map)
1668 {
1669         const struct btf_type *t, *vt;
1670         struct btf_var_secinfo *vsi;
1671         int i, n;
1672
1673         if (!map->btf_value_type_id)
1674                 return false;
1675
1676         t = btf__type_by_id(obj->btf, map->btf_value_type_id);
1677         if (!btf_is_datasec(t))
1678                 return false;
1679
1680         vsi = btf_var_secinfos(t);
1681         for (i = 0, n = btf_vlen(t); i < n; i++, vsi++) {
1682                 vt = btf__type_by_id(obj->btf, vsi->type);
1683                 if (!btf_is_var(vt))
1684                         continue;
1685
1686                 if (btf_var(vt)->linkage != BTF_VAR_STATIC)
1687                         return true;
1688         }
1689
1690         return false;
1691 }
1692
1693 static int
1694 bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
1695                               const char *real_name, int sec_idx, void *data, size_t data_sz)
1696 {
1697         struct bpf_map_def *def;
1698         struct bpf_map *map;
1699         size_t mmap_sz;
1700         int err;
1701
1702         map = bpf_object__add_map(obj);
1703         if (IS_ERR(map))
1704                 return PTR_ERR(map);
1705
1706         map->libbpf_type = type;
1707         map->sec_idx = sec_idx;
1708         map->sec_offset = 0;
1709         map->real_name = strdup(real_name);
1710         map->name = internal_map_name(obj, real_name);
1711         if (!map->real_name || !map->name) {
1712                 zfree(&map->real_name);
1713                 zfree(&map->name);
1714                 return -ENOMEM;
1715         }
1716
1717         def = &map->def;
1718         def->type = BPF_MAP_TYPE_ARRAY;
1719         def->key_size = sizeof(int);
1720         def->value_size = data_sz;
1721         def->max_entries = 1;
1722         def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
1723                          ? BPF_F_RDONLY_PROG : 0;
1724
1725         /* failures are fine because of maps like .rodata.str1.1 */
1726         (void) map_fill_btf_type_info(obj, map);
1727
1728         if (map_is_mmapable(obj, map))
1729                 def->map_flags |= BPF_F_MMAPABLE;
1730
1731         pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
1732                  map->name, map->sec_idx, map->sec_offset, def->map_flags);
1733
1734         mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
1735         map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
1736                            MAP_SHARED | MAP_ANONYMOUS, -1, 0);
1737         if (map->mmaped == MAP_FAILED) {
1738                 err = -errno;
1739                 map->mmaped = NULL;
1740                 pr_warn("failed to alloc map '%s' content buffer: %d\n",
1741                         map->name, err);
1742                 zfree(&map->real_name);
1743                 zfree(&map->name);
1744                 return err;
1745         }
1746
1747         if (data)
1748                 memcpy(map->mmaped, data, data_sz);
1749
1750         pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name);
1751         return 0;
1752 }
1753
1754 static int bpf_object__init_global_data_maps(struct bpf_object *obj)
1755 {
1756         struct elf_sec_desc *sec_desc;
1757         const char *sec_name;
1758         int err = 0, sec_idx;
1759
1760         /*
1761          * Populate obj->maps with libbpf internal maps.
1762          */
1763         for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
1764                 sec_desc = &obj->efile.secs[sec_idx];
1765
1766                 /* Skip recognized sections with size 0. */
1767                 if (!sec_desc->data || sec_desc->data->d_size == 0)
1768                         continue;
1769
1770                 switch (sec_desc->sec_type) {
1771                 case SEC_DATA:
1772                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1773                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA,
1774                                                             sec_name, sec_idx,
1775                                                             sec_desc->data->d_buf,
1776                                                             sec_desc->data->d_size);
1777                         break;
1778                 case SEC_RODATA:
1779                         obj->has_rodata = true;
1780                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1781                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA,
1782                                                             sec_name, sec_idx,
1783                                                             sec_desc->data->d_buf,
1784                                                             sec_desc->data->d_size);
1785                         break;
1786                 case SEC_BSS:
1787                         sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
1788                         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS,
1789                                                             sec_name, sec_idx,
1790                                                             NULL,
1791                                                             sec_desc->data->d_size);
1792                         break;
1793                 default:
1794                         /* skip */
1795                         break;
1796                 }
1797                 if (err)
1798                         return err;
1799         }
1800         return 0;
1801 }
1802
1803
1804 static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
1805                                                const void *name)
1806 {
1807         int i;
1808
1809         for (i = 0; i < obj->nr_extern; i++) {
1810                 if (strcmp(obj->externs[i].name, name) == 0)
1811                         return &obj->externs[i];
1812         }
1813         return NULL;
1814 }
1815
1816 static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
1817                               char value)
1818 {
1819         switch (ext->kcfg.type) {
1820         case KCFG_BOOL:
1821                 if (value == 'm') {
1822                         pr_warn("extern (kcfg) '%s': value '%c' implies tristate or char type\n",
1823                                 ext->name, value);
1824                         return -EINVAL;
1825                 }
1826                 *(bool *)ext_val = value == 'y' ? true : false;
1827                 break;
1828         case KCFG_TRISTATE:
1829                 if (value == 'y')
1830                         *(enum libbpf_tristate *)ext_val = TRI_YES;
1831                 else if (value == 'm')
1832                         *(enum libbpf_tristate *)ext_val = TRI_MODULE;
1833                 else /* value == 'n' */
1834                         *(enum libbpf_tristate *)ext_val = TRI_NO;
1835                 break;
1836         case KCFG_CHAR:
1837                 *(char *)ext_val = value;
1838                 break;
1839         case KCFG_UNKNOWN:
1840         case KCFG_INT:
1841         case KCFG_CHAR_ARR:
1842         default:
1843                 pr_warn("extern (kcfg) '%s': value '%c' implies bool, tristate, or char type\n",
1844                         ext->name, value);
1845                 return -EINVAL;
1846         }
1847         ext->is_set = true;
1848         return 0;
1849 }
1850
1851 static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
1852                               const char *value)
1853 {
1854         size_t len;
1855
1856         if (ext->kcfg.type != KCFG_CHAR_ARR) {
1857                 pr_warn("extern (kcfg) '%s': value '%s' implies char array type\n",
1858                         ext->name, value);
1859                 return -EINVAL;
1860         }
1861
1862         len = strlen(value);
1863         if (value[len - 1] != '"') {
1864                 pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
1865                         ext->name, value);
1866                 return -EINVAL;
1867         }
1868
1869         /* strip quotes */
1870         len -= 2;
1871         if (len >= ext->kcfg.sz) {
1872                 pr_warn("extern (kcfg) '%s': long string '%s' of (%zu bytes) truncated to %d bytes\n",
1873                         ext->name, value, len, ext->kcfg.sz - 1);
1874                 len = ext->kcfg.sz - 1;
1875         }
1876         memcpy(ext_val, value + 1, len);
1877         ext_val[len] = '\0';
1878         ext->is_set = true;
1879         return 0;
1880 }
1881
1882 static int parse_u64(const char *value, __u64 *res)
1883 {
1884         char *value_end;
1885         int err;
1886
1887         errno = 0;
1888         *res = strtoull(value, &value_end, 0);
1889         if (errno) {
1890                 err = -errno;
1891                 pr_warn("failed to parse '%s' as integer: %d\n", value, err);
1892                 return err;
1893         }
1894         if (*value_end) {
1895                 pr_warn("failed to parse '%s' as integer completely\n", value);
1896                 return -EINVAL;
1897         }
1898         return 0;
1899 }
1900
1901 static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
1902 {
1903         int bit_sz = ext->kcfg.sz * 8;
1904
1905         if (ext->kcfg.sz == 8)
1906                 return true;
1907
1908         /* Validate that value stored in u64 fits in integer of `ext->sz`
1909          * bytes size without any loss of information. If the target integer
1910          * is signed, we rely on the following limits of integer type of
1911          * Y bits and subsequent transformation:
1912          *
1913          *     -2^(Y-1) <= X           <= 2^(Y-1) - 1
1914          *            0 <= X + 2^(Y-1) <= 2^Y - 1
1915          *            0 <= X + 2^(Y-1) <  2^Y
1916          *
1917          *  For unsigned target integer, check that all the (64 - Y) bits are
1918          *  zero.
1919          */
1920         if (ext->kcfg.is_signed)
1921                 return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
1922         else
1923                 return (v >> bit_sz) == 0;
1924 }
1925
1926 static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
1927                               __u64 value)
1928 {
1929         if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR &&
1930             ext->kcfg.type != KCFG_BOOL) {
1931                 pr_warn("extern (kcfg) '%s': value '%llu' implies integer, char, or boolean type\n",
1932                         ext->name, (unsigned long long)value);
1933                 return -EINVAL;
1934         }
1935         if (ext->kcfg.type == KCFG_BOOL && value > 1) {
1936                 pr_warn("extern (kcfg) '%s': value '%llu' isn't boolean compatible\n",
1937                         ext->name, (unsigned long long)value);
1938                 return -EINVAL;
1939
1940         }
1941         if (!is_kcfg_value_in_range(ext, value)) {
1942                 pr_warn("extern (kcfg) '%s': value '%llu' doesn't fit in %d bytes\n",
1943                         ext->name, (unsigned long long)value, ext->kcfg.sz);
1944                 return -ERANGE;
1945         }
1946         switch (ext->kcfg.sz) {
1947         case 1:
1948                 *(__u8 *)ext_val = value;
1949                 break;
1950         case 2:
1951                 *(__u16 *)ext_val = value;
1952                 break;
1953         case 4:
1954                 *(__u32 *)ext_val = value;
1955                 break;
1956         case 8:
1957                 *(__u64 *)ext_val = value;
1958                 break;
1959         default:
1960                 return -EINVAL;
1961         }
1962         ext->is_set = true;
1963         return 0;
1964 }
1965
1966 static int bpf_object__process_kconfig_line(struct bpf_object *obj,
1967                                             char *buf, void *data)
1968 {
1969         struct extern_desc *ext;
1970         char *sep, *value;
1971         int len, err = 0;
1972         void *ext_val;
1973         __u64 num;
1974
1975         if (!str_has_pfx(buf, "CONFIG_"))
1976                 return 0;
1977
1978         sep = strchr(buf, '=');
1979         if (!sep) {
1980                 pr_warn("failed to parse '%s': no separator\n", buf);
1981                 return -EINVAL;
1982         }
1983
1984         /* Trim ending '\n' */
1985         len = strlen(buf);
1986         if (buf[len - 1] == '\n')
1987                 buf[len - 1] = '\0';
1988         /* Split on '=' and ensure that a value is present. */
1989         *sep = '\0';
1990         if (!sep[1]) {
1991                 *sep = '=';
1992                 pr_warn("failed to parse '%s': no value\n", buf);
1993                 return -EINVAL;
1994         }
1995
1996         ext = find_extern_by_name(obj, buf);
1997         if (!ext || ext->is_set)
1998                 return 0;
1999
2000         ext_val = data + ext->kcfg.data_off;
2001         value = sep + 1;
2002
2003         switch (*value) {
2004         case 'y': case 'n': case 'm':
2005                 err = set_kcfg_value_tri(ext, ext_val, *value);
2006                 break;
2007         case '"':
2008                 err = set_kcfg_value_str(ext, ext_val, value);
2009                 break;
2010         default:
2011                 /* assume integer */
2012                 err = parse_u64(value, &num);
2013                 if (err) {
2014                         pr_warn("extern (kcfg) '%s': value '%s' isn't a valid integer\n", ext->name, value);
2015                         return err;
2016                 }
2017                 if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
2018                         pr_warn("extern (kcfg) '%s': value '%s' implies integer type\n", ext->name, value);
2019                         return -EINVAL;
2020                 }
2021                 err = set_kcfg_value_num(ext, ext_val, num);
2022                 break;
2023         }
2024         if (err)
2025                 return err;
2026         pr_debug("extern (kcfg) '%s': set to %s\n", ext->name, value);
2027         return 0;
2028 }
2029
2030 static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data)
2031 {
2032         char buf[PATH_MAX];
2033         struct utsname uts;
2034         int len, err = 0;
2035         gzFile file;
2036
2037         uname(&uts);
2038         len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release);
2039         if (len < 0)
2040                 return -EINVAL;
2041         else if (len >= PATH_MAX)
2042                 return -ENAMETOOLONG;
2043
2044         /* gzopen also accepts uncompressed files. */
2045         file = gzopen(buf, "re");
2046         if (!file)
2047                 file = gzopen("/proc/config.gz", "re");
2048
2049         if (!file) {
2050                 pr_warn("failed to open system Kconfig\n");
2051                 return -ENOENT;
2052         }
2053
2054         while (gzgets(file, buf, sizeof(buf))) {
2055                 err = bpf_object__process_kconfig_line(obj, buf, data);
2056                 if (err) {
2057                         pr_warn("error parsing system Kconfig line '%s': %d\n",
2058                                 buf, err);
2059                         goto out;
2060                 }
2061         }
2062
2063 out:
2064         gzclose(file);
2065         return err;
2066 }
2067
2068 static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
2069                                         const char *config, void *data)
2070 {
2071         char buf[PATH_MAX];
2072         int err = 0;
2073         FILE *file;
2074
2075         file = fmemopen((void *)config, strlen(config), "r");
2076         if (!file) {
2077                 err = -errno;
2078                 pr_warn("failed to open in-memory Kconfig: %d\n", err);
2079                 return err;
2080         }
2081
2082         while (fgets(buf, sizeof(buf), file)) {
2083                 err = bpf_object__process_kconfig_line(obj, buf, data);
2084                 if (err) {
2085                         pr_warn("error parsing in-memory Kconfig line '%s': %d\n",
2086                                 buf, err);
2087                         break;
2088                 }
2089         }
2090
2091         fclose(file);
2092         return err;
2093 }
2094
2095 static int bpf_object__init_kconfig_map(struct bpf_object *obj)
2096 {
2097         struct extern_desc *last_ext = NULL, *ext;
2098         size_t map_sz;
2099         int i, err;
2100
2101         for (i = 0; i < obj->nr_extern; i++) {
2102                 ext = &obj->externs[i];
2103                 if (ext->type == EXT_KCFG)
2104                         last_ext = ext;
2105         }
2106
2107         if (!last_ext)
2108                 return 0;
2109
2110         map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
2111         err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
2112                                             ".kconfig", obj->efile.symbols_shndx,
2113                                             NULL, map_sz);
2114         if (err)
2115                 return err;
2116
2117         obj->kconfig_map_idx = obj->nr_maps - 1;
2118
2119         return 0;
2120 }
2121
2122 const struct btf_type *
2123 skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
2124 {
2125         const struct btf_type *t = btf__type_by_id(btf, id);
2126
2127         if (res_id)
2128                 *res_id = id;
2129
2130         while (btf_is_mod(t) || btf_is_typedef(t)) {
2131                 if (res_id)
2132                         *res_id = t->type;
2133                 t = btf__type_by_id(btf, t->type);
2134         }
2135
2136         return t;
2137 }
2138
2139 static const struct btf_type *
2140 resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
2141 {
2142         const struct btf_type *t;
2143
2144         t = skip_mods_and_typedefs(btf, id, NULL);
2145         if (!btf_is_ptr(t))
2146                 return NULL;
2147
2148         t = skip_mods_and_typedefs(btf, t->type, res_id);
2149
2150         return btf_is_func_proto(t) ? t : NULL;
2151 }
2152
2153 static const char *__btf_kind_str(__u16 kind)
2154 {
2155         switch (kind) {
2156         case BTF_KIND_UNKN: return "void";
2157         case BTF_KIND_INT: return "int";
2158         case BTF_KIND_PTR: return "ptr";
2159         case BTF_KIND_ARRAY: return "array";
2160         case BTF_KIND_STRUCT: return "struct";
2161         case BTF_KIND_UNION: return "union";
2162         case BTF_KIND_ENUM: return "enum";
2163         case BTF_KIND_FWD: return "fwd";
2164         case BTF_KIND_TYPEDEF: return "typedef";
2165         case BTF_KIND_VOLATILE: return "volatile";
2166         case BTF_KIND_CONST: return "const";
2167         case BTF_KIND_RESTRICT: return "restrict";
2168         case BTF_KIND_FUNC: return "func";
2169         case BTF_KIND_FUNC_PROTO: return "func_proto";
2170         case BTF_KIND_VAR: return "var";
2171         case BTF_KIND_DATASEC: return "datasec";
2172         case BTF_KIND_FLOAT: return "float";
2173         case BTF_KIND_DECL_TAG: return "decl_tag";
2174         case BTF_KIND_TYPE_TAG: return "type_tag";
2175         case BTF_KIND_ENUM64: return "enum64";
2176         default: return "unknown";
2177         }
2178 }
2179
2180 const char *btf_kind_str(const struct btf_type *t)
2181 {
2182         return __btf_kind_str(btf_kind(t));
2183 }
2184
2185 /*
2186  * Fetch integer attribute of BTF map definition. Such attributes are
2187  * represented using a pointer to an array, in which dimensionality of array
2188  * encodes specified integer value. E.g., int (*type)[BPF_MAP_TYPE_ARRAY];
2189  * encodes `type => BPF_MAP_TYPE_ARRAY` key/value pair completely using BTF
2190  * type definition, while using only sizeof(void *) space in ELF data section.
2191  */
2192 static bool get_map_field_int(const char *map_name, const struct btf *btf,
2193                               const struct btf_member *m, __u32 *res)
2194 {
2195         const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
2196         const char *name = btf__name_by_offset(btf, m->name_off);
2197         const struct btf_array *arr_info;
2198         const struct btf_type *arr_t;
2199
2200         if (!btf_is_ptr(t)) {
2201                 pr_warn("map '%s': attr '%s': expected PTR, got %s.\n",
2202                         map_name, name, btf_kind_str(t));
2203                 return false;
2204         }
2205
2206         arr_t = btf__type_by_id(btf, t->type);
2207         if (!arr_t) {
2208                 pr_warn("map '%s': attr '%s': type [%u] not found.\n",
2209                         map_name, name, t->type);
2210                 return false;
2211         }
2212         if (!btf_is_array(arr_t)) {
2213                 pr_warn("map '%s': attr '%s': expected ARRAY, got %s.\n",
2214                         map_name, name, btf_kind_str(arr_t));
2215                 return false;
2216         }
2217         arr_info = btf_array(arr_t);
2218         *res = arr_info->nelems;
2219         return true;
2220 }
2221
2222 static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
2223 {
2224         int len;
2225
2226         len = snprintf(buf, buf_sz, "%s/%s", path, name);
2227         if (len < 0)
2228                 return -EINVAL;
2229         if (len >= buf_sz)
2230                 return -ENAMETOOLONG;
2231
2232         return 0;
2233 }
2234
2235 static int build_map_pin_path(struct bpf_map *map, const char *path)
2236 {
2237         char buf[PATH_MAX];
2238         int err;
2239
2240         if (!path)
2241                 path = BPF_FS_DEFAULT_PATH;
2242
2243         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
2244         if (err)
2245                 return err;
2246
2247         return bpf_map__set_pin_path(map, buf);
2248 }
2249
2250 /* should match definition in bpf_helpers.h */
2251 enum libbpf_pin_type {
2252         LIBBPF_PIN_NONE,
2253         /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
2254         LIBBPF_PIN_BY_NAME,
2255 };
2256
2257 int parse_btf_map_def(const char *map_name, struct btf *btf,
2258                       const struct btf_type *def_t, bool strict,
2259                       struct btf_map_def *map_def, struct btf_map_def *inner_def)
2260 {
2261         const struct btf_type *t;
2262         const struct btf_member *m;
2263         bool is_inner = inner_def == NULL;
2264         int vlen, i;
2265
2266         vlen = btf_vlen(def_t);
2267         m = btf_members(def_t);
2268         for (i = 0; i < vlen; i++, m++) {
2269                 const char *name = btf__name_by_offset(btf, m->name_off);
2270
2271                 if (!name) {
2272                         pr_warn("map '%s': invalid field #%d.\n", map_name, i);
2273                         return -EINVAL;
2274                 }
2275                 if (strcmp(name, "type") == 0) {
2276                         if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
2277                                 return -EINVAL;
2278                         map_def->parts |= MAP_DEF_MAP_TYPE;
2279                 } else if (strcmp(name, "max_entries") == 0) {
2280                         if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
2281                                 return -EINVAL;
2282                         map_def->parts |= MAP_DEF_MAX_ENTRIES;
2283                 } else if (strcmp(name, "map_flags") == 0) {
2284                         if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
2285                                 return -EINVAL;
2286                         map_def->parts |= MAP_DEF_MAP_FLAGS;
2287                 } else if (strcmp(name, "numa_node") == 0) {
2288                         if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
2289                                 return -EINVAL;
2290                         map_def->parts |= MAP_DEF_NUMA_NODE;
2291                 } else if (strcmp(name, "key_size") == 0) {
2292                         __u32 sz;
2293
2294                         if (!get_map_field_int(map_name, btf, m, &sz))
2295                                 return -EINVAL;
2296                         if (map_def->key_size && map_def->key_size != sz) {
2297                                 pr_warn("map '%s': conflicting key size %u != %u.\n",
2298                                         map_name, map_def->key_size, sz);
2299                                 return -EINVAL;
2300                         }
2301                         map_def->key_size = sz;
2302                         map_def->parts |= MAP_DEF_KEY_SIZE;
2303                 } else if (strcmp(name, "key") == 0) {
2304                         __s64 sz;
2305
2306                         t = btf__type_by_id(btf, m->type);
2307                         if (!t) {
2308                                 pr_warn("map '%s': key type [%d] not found.\n",
2309                                         map_name, m->type);
2310                                 return -EINVAL;
2311                         }
2312                         if (!btf_is_ptr(t)) {
2313                                 pr_warn("map '%s': key spec is not PTR: %s.\n",
2314                                         map_name, btf_kind_str(t));
2315                                 return -EINVAL;
2316                         }
2317                         sz = btf__resolve_size(btf, t->type);
2318                         if (sz < 0) {
2319                                 pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
2320                                         map_name, t->type, (ssize_t)sz);
2321                                 return sz;
2322                         }
2323                         if (map_def->key_size && map_def->key_size != sz) {
2324                                 pr_warn("map '%s': conflicting key size %u != %zd.\n",
2325                                         map_name, map_def->key_size, (ssize_t)sz);
2326                                 return -EINVAL;
2327                         }
2328                         map_def->key_size = sz;
2329                         map_def->key_type_id = t->type;
2330                         map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
2331                 } else if (strcmp(name, "value_size") == 0) {
2332                         __u32 sz;
2333
2334                         if (!get_map_field_int(map_name, btf, m, &sz))
2335                                 return -EINVAL;
2336                         if (map_def->value_size && map_def->value_size != sz) {
2337                                 pr_warn("map '%s': conflicting value size %u != %u.\n",
2338                                         map_name, map_def->value_size, sz);
2339                                 return -EINVAL;
2340                         }
2341                         map_def->value_size = sz;
2342                         map_def->parts |= MAP_DEF_VALUE_SIZE;
2343                 } else if (strcmp(name, "value") == 0) {
2344                         __s64 sz;
2345
2346                         t = btf__type_by_id(btf, m->type);
2347                         if (!t) {
2348                                 pr_warn("map '%s': value type [%d] not found.\n",
2349                                         map_name, m->type);
2350                                 return -EINVAL;
2351                         }
2352                         if (!btf_is_ptr(t)) {
2353                                 pr_warn("map '%s': value spec is not PTR: %s.\n",
2354                                         map_name, btf_kind_str(t));
2355                                 return -EINVAL;
2356                         }
2357                         sz = btf__resolve_size(btf, t->type);
2358                         if (sz < 0) {
2359                                 pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
2360                                         map_name, t->type, (ssize_t)sz);
2361                                 return sz;
2362                         }
2363                         if (map_def->value_size && map_def->value_size != sz) {
2364                                 pr_warn("map '%s': conflicting value size %u != %zd.\n",
2365                                         map_name, map_def->value_size, (ssize_t)sz);
2366                                 return -EINVAL;
2367                         }
2368                         map_def->value_size = sz;
2369                         map_def->value_type_id = t->type;
2370                         map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
2371                 }
2372                 else if (strcmp(name, "values") == 0) {
2373                         bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type);
2374                         bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY;
2375                         const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";
2376                         char inner_map_name[128];
2377                         int err;
2378
2379                         if (is_inner) {
2380                                 pr_warn("map '%s': multi-level inner maps not supported.\n",
2381                                         map_name);
2382                                 return -ENOTSUP;
2383                         }
2384                         if (i != vlen - 1) {
2385                                 pr_warn("map '%s': '%s' member should be last.\n",
2386                                         map_name, name);
2387                                 return -EINVAL;
2388                         }
2389                         if (!is_map_in_map && !is_prog_array) {
2390                                 pr_warn("map '%s': should be map-in-map or prog-array.\n",
2391                                         map_name);
2392                                 return -ENOTSUP;
2393                         }
2394                         if (map_def->value_size && map_def->value_size != 4) {
2395                                 pr_warn("map '%s': conflicting value size %u != 4.\n",
2396                                         map_name, map_def->value_size);
2397                                 return -EINVAL;
2398                         }
2399                         map_def->value_size = 4;
2400                         t = btf__type_by_id(btf, m->type);
2401                         if (!t) {
2402                                 pr_warn("map '%s': %s type [%d] not found.\n",
2403                                         map_name, desc, m->type);
2404                                 return -EINVAL;
2405                         }
2406                         if (!btf_is_array(t) || btf_array(t)->nelems) {
2407                                 pr_warn("map '%s': %s spec is not a zero-sized array.\n",
2408                                         map_name, desc);
2409                                 return -EINVAL;
2410                         }
2411                         t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
2412                         if (!btf_is_ptr(t)) {
2413                                 pr_warn("map '%s': %s def is of unexpected kind %s.\n",
2414                                         map_name, desc, btf_kind_str(t));
2415                                 return -EINVAL;
2416                         }
2417                         t = skip_mods_and_typedefs(btf, t->type, NULL);
2418                         if (is_prog_array) {
2419                                 if (!btf_is_func_proto(t)) {
2420                                         pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n",
2421                                                 map_name, btf_kind_str(t));
2422                                         return -EINVAL;
2423                                 }
2424                                 continue;
2425                         }
2426                         if (!btf_is_struct(t)) {
2427                                 pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
2428                                         map_name, btf_kind_str(t));
2429                                 return -EINVAL;
2430                         }
2431
2432                         snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
2433                         err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
2434                         if (err)
2435                                 return err;
2436
2437                         map_def->parts |= MAP_DEF_INNER_MAP;
2438                 } else if (strcmp(name, "pinning") == 0) {
2439                         __u32 val;
2440
2441                         if (is_inner) {
2442                                 pr_warn("map '%s': inner def can't be pinned.\n", map_name);
2443                                 return -EINVAL;
2444                         }
2445                         if (!get_map_field_int(map_name, btf, m, &val))
2446                                 return -EINVAL;
2447                         if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
2448                                 pr_warn("map '%s': invalid pinning value %u.\n",
2449                                         map_name, val);
2450                                 return -EINVAL;
2451                         }
2452                         map_def->pinning = val;
2453                         map_def->parts |= MAP_DEF_PINNING;
2454                 } else if (strcmp(name, "map_extra") == 0) {
2455                         __u32 map_extra;
2456
2457                         if (!get_map_field_int(map_name, btf, m, &map_extra))
2458                                 return -EINVAL;
2459                         map_def->map_extra = map_extra;
2460                         map_def->parts |= MAP_DEF_MAP_EXTRA;
2461                 } else {
2462                         if (strict) {
2463                                 pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
2464                                 return -ENOTSUP;
2465                         }
2466                         pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
2467                 }
2468         }
2469
2470         if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
2471                 pr_warn("map '%s': map type isn't specified.\n", map_name);
2472                 return -EINVAL;
2473         }
2474
2475         return 0;
2476 }
2477
2478 static size_t adjust_ringbuf_sz(size_t sz)
2479 {
2480         __u32 page_sz = sysconf(_SC_PAGE_SIZE);
2481         __u32 mul;
2482
2483         /* if user forgot to set any size, make sure they see error */
2484         if (sz == 0)
2485                 return 0;
2486         /* Kernel expects BPF_MAP_TYPE_RINGBUF's max_entries to be
2487          * a power-of-2 multiple of kernel's page size. If user diligently
2488          * satisified these conditions, pass the size through.
2489          */
2490         if ((sz % page_sz) == 0 && is_pow_of_2(sz / page_sz))
2491                 return sz;
2492
2493         /* Otherwise find closest (page_sz * power_of_2) product bigger than
2494          * user-set size to satisfy both user size request and kernel
2495          * requirements and substitute correct max_entries for map creation.
2496          */
2497         for (mul = 1; mul <= UINT_MAX / page_sz; mul <<= 1) {
2498                 if (mul * page_sz > sz)
2499                         return mul * page_sz;
2500         }
2501
2502         /* if it's impossible to satisfy the conditions (i.e., user size is
2503          * very close to UINT_MAX but is not a power-of-2 multiple of
2504          * page_size) then just return original size and let kernel reject it
2505          */
2506         return sz;
2507 }
2508
2509 static bool map_is_ringbuf(const struct bpf_map *map)
2510 {
2511         return map->def.type == BPF_MAP_TYPE_RINGBUF ||
2512                map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
2513 }
2514
2515 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
2516 {
2517         map->def.type = def->map_type;
2518         map->def.key_size = def->key_size;
2519         map->def.value_size = def->value_size;
2520         map->def.max_entries = def->max_entries;
2521         map->def.map_flags = def->map_flags;
2522         map->map_extra = def->map_extra;
2523
2524         map->numa_node = def->numa_node;
2525         map->btf_key_type_id = def->key_type_id;
2526         map->btf_value_type_id = def->value_type_id;
2527
2528         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
2529         if (map_is_ringbuf(map))
2530                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
2531
2532         if (def->parts & MAP_DEF_MAP_TYPE)
2533                 pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
2534
2535         if (def->parts & MAP_DEF_KEY_TYPE)
2536                 pr_debug("map '%s': found key [%u], sz = %u.\n",
2537                          map->name, def->key_type_id, def->key_size);
2538         else if (def->parts & MAP_DEF_KEY_SIZE)
2539                 pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
2540
2541         if (def->parts & MAP_DEF_VALUE_TYPE)
2542                 pr_debug("map '%s': found value [%u], sz = %u.\n",
2543                          map->name, def->value_type_id, def->value_size);
2544         else if (def->parts & MAP_DEF_VALUE_SIZE)
2545                 pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
2546
2547         if (def->parts & MAP_DEF_MAX_ENTRIES)
2548                 pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
2549         if (def->parts & MAP_DEF_MAP_FLAGS)
2550                 pr_debug("map '%s': found map_flags = 0x%x.\n", map->name, def->map_flags);
2551         if (def->parts & MAP_DEF_MAP_EXTRA)
2552                 pr_debug("map '%s': found map_extra = 0x%llx.\n", map->name,
2553                          (unsigned long long)def->map_extra);
2554         if (def->parts & MAP_DEF_PINNING)
2555                 pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
2556         if (def->parts & MAP_DEF_NUMA_NODE)
2557                 pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
2558
2559         if (def->parts & MAP_DEF_INNER_MAP)
2560                 pr_debug("map '%s': found inner map definition.\n", map->name);
2561 }
2562
2563 static const char *btf_var_linkage_str(__u32 linkage)
2564 {
2565         switch (linkage) {
2566         case BTF_VAR_STATIC: return "static";
2567         case BTF_VAR_GLOBAL_ALLOCATED: return "global";
2568         case BTF_VAR_GLOBAL_EXTERN: return "extern";
2569         default: return "unknown";
2570         }
2571 }
2572
2573 static int bpf_object__init_user_btf_map(struct bpf_object *obj,
2574                                          const struct btf_type *sec,
2575                                          int var_idx, int sec_idx,
2576                                          const Elf_Data *data, bool strict,
2577                                          const char *pin_root_path)
2578 {
2579         struct btf_map_def map_def = {}, inner_def = {};
2580         const struct btf_type *var, *def;
2581         const struct btf_var_secinfo *vi;
2582         const struct btf_var *var_extra;
2583         const char *map_name;
2584         struct bpf_map *map;
2585         int err;
2586
2587         vi = btf_var_secinfos(sec) + var_idx;
2588         var = btf__type_by_id(obj->btf, vi->type);
2589         var_extra = btf_var(var);
2590         map_name = btf__name_by_offset(obj->btf, var->name_off);
2591
2592         if (map_name == NULL || map_name[0] == '\0') {
2593                 pr_warn("map #%d: empty name.\n", var_idx);
2594                 return -EINVAL;
2595         }
2596         if ((__u64)vi->offset + vi->size > data->d_size) {
2597                 pr_warn("map '%s' BTF data is corrupted.\n", map_name);
2598                 return -EINVAL;
2599         }
2600         if (!btf_is_var(var)) {
2601                 pr_warn("map '%s': unexpected var kind %s.\n",
2602                         map_name, btf_kind_str(var));
2603                 return -EINVAL;
2604         }
2605         if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED) {
2606                 pr_warn("map '%s': unsupported map linkage %s.\n",
2607                         map_name, btf_var_linkage_str(var_extra->linkage));
2608                 return -EOPNOTSUPP;
2609         }
2610
2611         def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
2612         if (!btf_is_struct(def)) {
2613                 pr_warn("map '%s': unexpected def kind %s.\n",
2614                         map_name, btf_kind_str(var));
2615                 return -EINVAL;
2616         }
2617         if (def->size > vi->size) {
2618                 pr_warn("map '%s': invalid def size.\n", map_name);
2619                 return -EINVAL;
2620         }
2621
2622         map = bpf_object__add_map(obj);
2623         if (IS_ERR(map))
2624                 return PTR_ERR(map);
2625         map->name = strdup(map_name);
2626         if (!map->name) {
2627                 pr_warn("map '%s': failed to alloc map name.\n", map_name);
2628                 return -ENOMEM;
2629         }
2630         map->libbpf_type = LIBBPF_MAP_UNSPEC;
2631         map->def.type = BPF_MAP_TYPE_UNSPEC;
2632         map->sec_idx = sec_idx;
2633         map->sec_offset = vi->offset;
2634         map->btf_var_idx = var_idx;
2635         pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
2636                  map_name, map->sec_idx, map->sec_offset);
2637
2638         err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
2639         if (err)
2640                 return err;
2641
2642         fill_map_from_def(map, &map_def);
2643
2644         if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
2645                 err = build_map_pin_path(map, pin_root_path);
2646                 if (err) {
2647                         pr_warn("map '%s': couldn't build pin path.\n", map->name);
2648                         return err;
2649                 }
2650         }
2651
2652         if (map_def.parts & MAP_DEF_INNER_MAP) {
2653                 map->inner_map = calloc(1, sizeof(*map->inner_map));
2654                 if (!map->inner_map)
2655                         return -ENOMEM;
2656                 map->inner_map->fd = create_placeholder_fd();
2657                 if (map->inner_map->fd < 0)
2658                         return map->inner_map->fd;
2659                 map->inner_map->sec_idx = sec_idx;
2660                 map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
2661                 if (!map->inner_map->name)
2662                         return -ENOMEM;
2663                 sprintf(map->inner_map->name, "%s.inner", map_name);
2664
2665                 fill_map_from_def(map->inner_map, &inner_def);
2666         }
2667
2668         err = map_fill_btf_type_info(obj, map);
2669         if (err)
2670                 return err;
2671
2672         return 0;
2673 }
2674
2675 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
2676                                           const char *pin_root_path)
2677 {
2678         const struct btf_type *sec = NULL;
2679         int nr_types, i, vlen, err;
2680         const struct btf_type *t;
2681         const char *name;
2682         Elf_Data *data;
2683         Elf_Scn *scn;
2684
2685         if (obj->efile.btf_maps_shndx < 0)
2686                 return 0;
2687
2688         scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
2689         data = elf_sec_data(obj, scn);
2690         if (!scn || !data) {
2691                 pr_warn("elf: failed to get %s map definitions for %s\n",
2692                         MAPS_ELF_SEC, obj->path);
2693                 return -EINVAL;
2694         }
2695
2696         nr_types = btf__type_cnt(obj->btf);
2697         for (i = 1; i < nr_types; i++) {
2698                 t = btf__type_by_id(obj->btf, i);
2699                 if (!btf_is_datasec(t))
2700                         continue;
2701                 name = btf__name_by_offset(obj->btf, t->name_off);
2702                 if (strcmp(name, MAPS_ELF_SEC) == 0) {
2703                         sec = t;
2704                         obj->efile.btf_maps_sec_btf_id = i;
2705                         break;
2706                 }
2707         }
2708
2709         if (!sec) {
2710                 pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);
2711                 return -ENOENT;
2712         }
2713
2714         vlen = btf_vlen(sec);
2715         for (i = 0; i < vlen; i++) {
2716                 err = bpf_object__init_user_btf_map(obj, sec, i,
2717                                                     obj->efile.btf_maps_shndx,
2718                                                     data, strict,
2719                                                     pin_root_path);
2720                 if (err)
2721                         return err;
2722         }
2723
2724         return 0;
2725 }
2726
2727 static int bpf_object__init_maps(struct bpf_object *obj,
2728                                  const struct bpf_object_open_opts *opts)
2729 {
2730         const char *pin_root_path;
2731         bool strict;
2732         int err = 0;
2733
2734         strict = !OPTS_GET(opts, relaxed_maps, false);
2735         pin_root_path = OPTS_GET(opts, pin_root_path, NULL);
2736
2737         err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);
2738         err = err ?: bpf_object__init_global_data_maps(obj);
2739         err = err ?: bpf_object__init_kconfig_map(obj);
2740         err = err ?: bpf_object_init_struct_ops(obj);
2741
2742         return err;
2743 }
2744
2745 static bool section_have_execinstr(struct bpf_object *obj, int idx)
2746 {
2747         Elf64_Shdr *sh;
2748
2749         sh = elf_sec_hdr(obj, elf_sec_by_idx(obj, idx));
2750         if (!sh)
2751                 return false;
2752
2753         return sh->sh_flags & SHF_EXECINSTR;
2754 }
2755
2756 static bool btf_needs_sanitization(struct bpf_object *obj)
2757 {
2758         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2759         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2760         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2761         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2762         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2763         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2764         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2765
2766         return !has_func || !has_datasec || !has_func_global || !has_float ||
2767                !has_decl_tag || !has_type_tag || !has_enum64;
2768 }
2769
2770 static int bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
2771 {
2772         bool has_func_global = kernel_supports(obj, FEAT_BTF_GLOBAL_FUNC);
2773         bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
2774         bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
2775         bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
2776         bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG);
2777         bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);
2778         bool has_enum64 = kernel_supports(obj, FEAT_BTF_ENUM64);
2779         int enum64_placeholder_id = 0;
2780         struct btf_type *t;
2781         int i, j, vlen;
2782
2783         for (i = 1; i < btf__type_cnt(btf); i++) {
2784                 t = (struct btf_type *)btf__type_by_id(btf, i);
2785
2786                 if ((!has_datasec && btf_is_var(t)) || (!has_decl_tag && btf_is_decl_tag(t))) {
2787                         /* replace VAR/DECL_TAG with INT */
2788                         t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
2789                         /*
2790                          * using size = 1 is the safest choice, 4 will be too
2791                          * big and cause kernel BTF validation failure if
2792                          * original variable took less than 4 bytes
2793                          */
2794                         t->size = 1;
2795                         *(int *)(t + 1) = BTF_INT_ENC(0, 0, 8);
2796                 } else if (!has_datasec && btf_is_datasec(t)) {
2797                         /* replace DATASEC with STRUCT */
2798                         const struct btf_var_secinfo *v = btf_var_secinfos(t);
2799                         struct btf_member *m = btf_members(t);
2800                         struct btf_type *vt;
2801                         char *name;
2802
2803                         name = (char *)btf__name_by_offset(btf, t->name_off);
2804                         while (*name) {
2805                                 if (*name == '.')
2806                                         *name = '_';
2807                                 name++;
2808                         }
2809
2810                         vlen = btf_vlen(t);
2811                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, vlen);
2812                         for (j = 0; j < vlen; j++, v++, m++) {
2813                                 /* order of field assignments is important */
2814                                 m->offset = v->offset * 8;
2815                                 m->type = v->type;
2816                                 /* preserve variable name as member name */
2817                                 vt = (void *)btf__type_by_id(btf, v->type);
2818                                 m->name_off = vt->name_off;
2819                         }
2820                 } else if (!has_func && btf_is_func_proto(t)) {
2821                         /* replace FUNC_PROTO with ENUM */
2822                         vlen = btf_vlen(t);
2823                         t->info = BTF_INFO_ENC(BTF_KIND_ENUM, 0, vlen);
2824                         t->size = sizeof(__u32); /* kernel enforced */
2825                 } else if (!has_func && btf_is_func(t)) {
2826                         /* replace FUNC with TYPEDEF */
2827                         t->info = BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0);
2828                 } else if (!has_func_global && btf_is_func(t)) {
2829                         /* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
2830                         t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
2831                 } else if (!has_float && btf_is_float(t)) {
2832                         /* replace FLOAT with an equally-sized empty STRUCT;
2833                          * since C compilers do not accept e.g. "float" as a
2834                          * valid struct name, make it anonymous
2835                          */
2836                         t->name_off = 0;
2837                         t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
2838                 } else if (!has_type_tag && btf_is_type_tag(t)) {
2839                         /* replace TYPE_TAG with a CONST */
2840                         t->name_off = 0;
2841                         t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);
2842                 } else if (!has_enum64 && btf_is_enum(t)) {
2843                         /* clear the kflag */
2844                         t->info = btf_type_info(btf_kind(t), btf_vlen(t), false);
2845                 } else if (!has_enum64 && btf_is_enum64(t)) {
2846                         /* replace ENUM64 with a union */
2847                         struct btf_member *m;
2848
2849                         if (enum64_placeholder_id == 0) {
2850                                 enum64_placeholder_id = btf__add_int(btf, "enum64_placeholder", 1, 0);
2851                                 if (enum64_placeholder_id < 0)
2852                                         return enum64_placeholder_id;
2853
2854                                 t = (struct btf_type *)btf__type_by_id(btf, i);
2855                         }
2856
2857                         m = btf_members(t);
2858                         vlen = btf_vlen(t);
2859                         t->info = BTF_INFO_ENC(BTF_KIND_UNION, 0, vlen);
2860                         for (j = 0; j < vlen; j++, m++) {
2861                                 m->type = enum64_placeholder_id;
2862                                 m->offset = 0;
2863                         }
2864                 }
2865         }
2866
2867         return 0;
2868 }
2869
2870 static bool libbpf_needs_btf(const struct bpf_object *obj)
2871 {
2872         return obj->efile.btf_maps_shndx >= 0 ||
2873                obj->efile.st_ops_shndx >= 0 ||
2874                obj->efile.st_ops_link_shndx >= 0 ||
2875                obj->nr_extern > 0;
2876 }
2877
2878 static bool kernel_needs_btf(const struct bpf_object *obj)
2879 {
2880         return obj->efile.st_ops_shndx >= 0 || obj->efile.st_ops_link_shndx >= 0;
2881 }
2882
2883 static int bpf_object__init_btf(struct bpf_object *obj,
2884                                 Elf_Data *btf_data,
2885                                 Elf_Data *btf_ext_data)
2886 {
2887         int err = -ENOENT;
2888
2889         if (btf_data) {
2890                 obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);
2891                 err = libbpf_get_error(obj->btf);
2892                 if (err) {
2893                         obj->btf = NULL;
2894                         pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err);
2895                         goto out;
2896                 }
2897                 /* enforce 8-byte pointers for BPF-targeted BTFs */
2898                 btf__set_pointer_size(obj->btf, 8);
2899         }
2900         if (btf_ext_data) {
2901                 struct btf_ext_info *ext_segs[3];
2902                 int seg_num, sec_num;
2903
2904                 if (!obj->btf) {
2905                         pr_debug("Ignore ELF section %s because its depending ELF section %s is not found.\n",
2906                                  BTF_EXT_ELF_SEC, BTF_ELF_SEC);
2907                         goto out;
2908                 }
2909                 obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size);
2910                 err = libbpf_get_error(obj->btf_ext);
2911                 if (err) {
2912                         pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n",
2913                                 BTF_EXT_ELF_SEC, err);
2914                         obj->btf_ext = NULL;
2915                         goto out;
2916                 }
2917
2918                 /* setup .BTF.ext to ELF section mapping */
2919                 ext_segs[0] = &obj->btf_ext->func_info;
2920                 ext_segs[1] = &obj->btf_ext->line_info;
2921                 ext_segs[2] = &obj->btf_ext->core_relo_info;
2922                 for (seg_num = 0; seg_num < ARRAY_SIZE(ext_segs); seg_num++) {
2923                         struct btf_ext_info *seg = ext_segs[seg_num];
2924                         const struct btf_ext_info_sec *sec;
2925                         const char *sec_name;
2926                         Elf_Scn *scn;
2927
2928                         if (seg->sec_cnt == 0)
2929                                 continue;
2930
2931                         seg->sec_idxs = calloc(seg->sec_cnt, sizeof(*seg->sec_idxs));
2932                         if (!seg->sec_idxs) {
2933                                 err = -ENOMEM;
2934                                 goto out;
2935                         }
2936
2937                         sec_num = 0;
2938                         for_each_btf_ext_sec(seg, sec) {
2939                                 /* preventively increment index to avoid doing
2940                                  * this before every continue below
2941                                  */
2942                                 sec_num++;
2943
2944                                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
2945                                 if (str_is_empty(sec_name))
2946                                         continue;
2947                                 scn = elf_sec_by_name(obj, sec_name);
2948                                 if (!scn)
2949                                         continue;
2950
2951                                 seg->sec_idxs[sec_num - 1] = elf_ndxscn(scn);
2952                         }
2953                 }
2954         }
2955 out:
2956         if (err && libbpf_needs_btf(obj)) {
2957                 pr_warn("BTF is required, but is missing or corrupted.\n");
2958                 return err;
2959         }
2960         return 0;
2961 }
2962
2963 static int compare_vsi_off(const void *_a, const void *_b)
2964 {
2965         const struct btf_var_secinfo *a = _a;
2966         const struct btf_var_secinfo *b = _b;
2967
2968         return a->offset - b->offset;
2969 }
2970
2971 static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,
2972                              struct btf_type *t)
2973 {
2974         __u32 size = 0, i, vars = btf_vlen(t);
2975         const char *sec_name = btf__name_by_offset(btf, t->name_off);
2976         struct btf_var_secinfo *vsi;
2977         bool fixup_offsets = false;
2978         int err;
2979
2980         if (!sec_name) {
2981                 pr_debug("No name found in string section for DATASEC kind.\n");
2982                 return -ENOENT;
2983         }
2984
2985         /* Extern-backing datasecs (.ksyms, .kconfig) have their size and
2986          * variable offsets set at the previous step. Further, not every
2987          * extern BTF VAR has corresponding ELF symbol preserved, so we skip
2988          * all fixups altogether for such sections and go straight to sorting
2989          * VARs within their DATASEC.
2990          */
2991         if (strcmp(sec_name, KCONFIG_SEC) == 0 || strcmp(sec_name, KSYMS_SEC) == 0)
2992                 goto sort_vars;
2993
2994         /* Clang leaves DATASEC size and VAR offsets as zeroes, so we need to
2995          * fix this up. But BPF static linker already fixes this up and fills
2996          * all the sizes and offsets during static linking. So this step has
2997          * to be optional. But the STV_HIDDEN handling is non-optional for any
2998          * non-extern DATASEC, so the variable fixup loop below handles both
2999          * functions at the same time, paying the cost of BTF VAR <-> ELF
3000          * symbol matching just once.
3001          */
3002         if (t->size == 0) {
3003                 err = find_elf_sec_sz(obj, sec_name, &size);
3004                 if (err || !size) {
3005                         pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n",
3006                                  sec_name, size, err);
3007                         return -ENOENT;
3008                 }
3009
3010                 t->size = size;
3011                 fixup_offsets = true;
3012         }
3013
3014         for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {
3015                 const struct btf_type *t_var;
3016                 struct btf_var *var;
3017                 const char *var_name;
3018                 Elf64_Sym *sym;
3019
3020                 t_var = btf__type_by_id(btf, vsi->type);
3021                 if (!t_var || !btf_is_var(t_var)) {
3022                         pr_debug("sec '%s': unexpected non-VAR type found\n", sec_name);
3023                         return -EINVAL;
3024                 }
3025
3026                 var = btf_var(t_var);
3027                 if (var->linkage == BTF_VAR_STATIC || var->linkage == BTF_VAR_GLOBAL_EXTERN)
3028                         continue;
3029
3030                 var_name = btf__name_by_offset(btf, t_var->name_off);
3031                 if (!var_name) {
3032                         pr_debug("sec '%s': failed to find name of DATASEC's member #%d\n",
3033                                  sec_name, i);
3034                         return -ENOENT;
3035                 }
3036
3037                 sym = find_elf_var_sym(obj, var_name);
3038                 if (IS_ERR(sym)) {
3039                         pr_debug("sec '%s': failed to find ELF symbol for VAR '%s'\n",
3040                                  sec_name, var_name);
3041                         return -ENOENT;
3042                 }
3043
3044                 if (fixup_offsets)
3045                         vsi->offset = sym->st_value;
3046
3047                 /* if variable is a global/weak symbol, but has restricted
3048                  * (STV_HIDDEN or STV_INTERNAL) visibility, mark its BTF VAR
3049                  * as static. This follows similar logic for functions (BPF
3050                  * subprogs) and influences libbpf's further decisions about
3051                  * whether to make global data BPF array maps as
3052                  * BPF_F_MMAPABLE.
3053                  */
3054                 if (ELF64_ST_VISIBILITY(sym->st_other) == STV_HIDDEN
3055                     || ELF64_ST_VISIBILITY(sym->st_other) == STV_INTERNAL)
3056                         var->linkage = BTF_VAR_STATIC;
3057         }
3058
3059 sort_vars:
3060         qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off);
3061         return 0;
3062 }
3063
3064 static int bpf_object_fixup_btf(struct bpf_object *obj)
3065 {
3066         int i, n, err = 0;
3067
3068         if (!obj->btf)
3069                 return 0;
3070
3071         n = btf__type_cnt(obj->btf);
3072         for (i = 1; i < n; i++) {
3073                 struct btf_type *t = btf_type_by_id(obj->btf, i);
3074
3075                 /* Loader needs to fix up some of the things compiler
3076                  * couldn't get its hands on while emitting BTF. This
3077                  * is section size and global variable offset. We use
3078                  * the info from the ELF itself for this purpose.
3079                  */
3080                 if (btf_is_datasec(t)) {
3081                         err = btf_fixup_datasec(obj, obj->btf, t);
3082                         if (err)
3083                                 return err;
3084                 }
3085         }
3086
3087         return 0;
3088 }
3089
3090 static bool prog_needs_vmlinux_btf(struct bpf_program *prog)
3091 {
3092         if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
3093             prog->type == BPF_PROG_TYPE_LSM)
3094                 return true;
3095
3096         /* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
3097          * also need vmlinux BTF
3098          */
3099         if (prog->type == BPF_PROG_TYPE_TRACING && !prog->attach_prog_fd)
3100                 return true;
3101
3102         return false;
3103 }
3104
3105 static bool map_needs_vmlinux_btf(struct bpf_map *map)
3106 {
3107         return bpf_map__is_struct_ops(map);
3108 }
3109
3110 static bool obj_needs_vmlinux_btf(const struct bpf_object *obj)
3111 {
3112         struct bpf_program *prog;
3113         struct bpf_map *map;
3114         int i;
3115
3116         /* CO-RE relocations need kernel BTF, only when btf_custom_path
3117          * is not specified
3118          */
3119         if (obj->btf_ext && obj->btf_ext->core_relo_info.len && !obj->btf_custom_path)
3120                 return true;
3121
3122         /* Support for typed ksyms needs kernel BTF */
3123         for (i = 0; i < obj->nr_extern; i++) {
3124                 const struct extern_desc *ext;
3125
3126                 ext = &obj->externs[i];
3127                 if (ext->type == EXT_KSYM && ext->ksym.type_id)
3128                         return true;
3129         }
3130
3131         bpf_object__for_each_program(prog, obj) {
3132                 if (!prog->autoload)
3133                         continue;
3134                 if (prog_needs_vmlinux_btf(prog))
3135                         return true;
3136         }
3137
3138         bpf_object__for_each_map(map, obj) {
3139                 if (map_needs_vmlinux_btf(map))
3140                         return true;
3141         }
3142
3143         return false;
3144 }
3145
3146 static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force)
3147 {
3148         int err;
3149
3150         /* btf_vmlinux could be loaded earlier */
3151         if (obj->btf_vmlinux || obj->gen_loader)
3152                 return 0;
3153
3154         if (!force && !obj_needs_vmlinux_btf(obj))
3155                 return 0;
3156
3157         obj->btf_vmlinux = btf__load_vmlinux_btf();
3158         err = libbpf_get_error(obj->btf_vmlinux);
3159         if (err) {
3160                 pr_warn("Error loading vmlinux BTF: %d\n", err);
3161                 obj->btf_vmlinux = NULL;
3162                 return err;
3163         }
3164         return 0;
3165 }
3166
3167 static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
3168 {
3169         struct btf *kern_btf = obj->btf;
3170         bool btf_mandatory, sanitize;
3171         int i, err = 0;
3172
3173         if (!obj->btf)
3174                 return 0;
3175
3176         if (!kernel_supports(obj, FEAT_BTF)) {
3177                 if (kernel_needs_btf(obj)) {
3178                         err = -EOPNOTSUPP;
3179                         goto report;
3180                 }
3181                 pr_debug("Kernel doesn't support BTF, skipping uploading it.\n");
3182                 return 0;
3183         }
3184
3185         /* Even though some subprogs are global/weak, user might prefer more
3186          * permissive BPF verification process that BPF verifier performs for
3187          * static functions, taking into account more context from the caller
3188          * functions. In such case, they need to mark such subprogs with
3189          * __attribute__((visibility("hidden"))) and libbpf will adjust
3190          * corresponding FUNC BTF type to be marked as static and trigger more
3191          * involved BPF verification process.
3192          */
3193         for (i = 0; i < obj->nr_programs; i++) {
3194                 struct bpf_program *prog = &obj->programs[i];
3195                 struct btf_type *t;
3196                 const char *name;
3197                 int j, n;
3198
3199                 if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
3200                         continue;
3201
3202                 n = btf__type_cnt(obj->btf);
3203                 for (j = 1; j < n; j++) {
3204                         t = btf_type_by_id(obj->btf, j);
3205                         if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
3206                                 continue;
3207
3208                         name = btf__str_by_offset(obj->btf, t->name_off);
3209                         if (strcmp(name, prog->name) != 0)
3210                                 continue;
3211
3212                         t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
3213                         break;
3214                 }
3215         }
3216
3217         sanitize = btf_needs_sanitization(obj);
3218         if (sanitize) {
3219                 const void *raw_data;
3220                 __u32 sz;
3221
3222                 /* clone BTF to sanitize a copy and leave the original intact */
3223                 raw_data = btf__raw_data(obj->btf, &sz);
3224                 kern_btf = btf__new(raw_data, sz);
3225                 err = libbpf_get_error(kern_btf);
3226                 if (err)
3227                         return err;
3228
3229                 /* enforce 8-byte pointers for BPF-targeted BTFs */
3230                 btf__set_pointer_size(obj->btf, 8);
3231                 err = bpf_object__sanitize_btf(obj, kern_btf);
3232                 if (err)
3233                         return err;
3234         }
3235
3236         if (obj->gen_loader) {
3237                 __u32 raw_size = 0;
3238                 const void *raw_data = btf__raw_data(kern_btf, &raw_size);
3239
3240                 if (!raw_data)
3241                         return -ENOMEM;
3242                 bpf_gen__load_btf(obj->gen_loader, raw_data, raw_size);
3243                 /* Pretend to have valid FD to pass various fd >= 0 checks.
3244                  * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.
3245                  */
3246                 btf__set_fd(kern_btf, 0);
3247         } else {
3248                 /* currently BPF_BTF_LOAD only supports log_level 1 */
3249                 err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size,
3250                                            obj->log_level ? 1 : 0, obj->token_fd);
3251         }
3252         if (sanitize) {
3253                 if (!err) {
3254                         /* move fd to libbpf's BTF */
3255                         btf__set_fd(obj->btf, btf__fd(kern_btf));
3256                         btf__set_fd(kern_btf, -1);
3257                 }
3258                 btf__free(kern_btf);
3259         }
3260 report:
3261         if (err) {
3262                 btf_mandatory = kernel_needs_btf(obj);
3263                 pr_warn("Error loading .BTF into kernel: %d. %s\n", err,
3264                         btf_mandatory ? "BTF is mandatory, can't proceed."
3265                                       : "BTF is optional, ignoring.");
3266                 if (!btf_mandatory)
3267                         err = 0;
3268         }
3269         return err;
3270 }
3271
3272 static const char *elf_sym_str(const struct bpf_object *obj, size_t off)
3273 {
3274         const char *name;
3275
3276         name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, off);
3277         if (!name) {
3278                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3279                         off, obj->path, elf_errmsg(-1));
3280                 return NULL;
3281         }
3282
3283         return name;
3284 }
3285
3286 static const char *elf_sec_str(const struct bpf_object *obj, size_t off)
3287 {
3288         const char *name;
3289
3290         name = elf_strptr(obj->efile.elf, obj->efile.shstrndx, off);
3291         if (!name) {
3292                 pr_warn("elf: failed to get section name string at offset %zu from %s: %s\n",
3293                         off, obj->path, elf_errmsg(-1));
3294                 return NULL;
3295         }
3296
3297         return name;
3298 }
3299
3300 static Elf_Scn *elf_sec_by_idx(const struct bpf_object *obj, size_t idx)
3301 {
3302         Elf_Scn *scn;
3303
3304         scn = elf_getscn(obj->efile.elf, idx);
3305         if (!scn) {
3306                 pr_warn("elf: failed to get section(%zu) from %s: %s\n",
3307                         idx, obj->path, elf_errmsg(-1));
3308                 return NULL;
3309         }
3310         return scn;
3311 }
3312
3313 static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name)
3314 {
3315         Elf_Scn *scn = NULL;
3316         Elf *elf = obj->efile.elf;
3317         const char *sec_name;
3318
3319         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3320                 sec_name = elf_sec_name(obj, scn);
3321                 if (!sec_name)
3322                         return NULL;
3323
3324                 if (strcmp(sec_name, name) != 0)
3325                         continue;
3326
3327                 return scn;
3328         }
3329         return NULL;
3330 }
3331
3332 static Elf64_Shdr *elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn)
3333 {
3334         Elf64_Shdr *shdr;
3335
3336         if (!scn)
3337                 return NULL;
3338
3339         shdr = elf64_getshdr(scn);
3340         if (!shdr) {
3341                 pr_warn("elf: failed to get section(%zu) header from %s: %s\n",
3342                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3343                 return NULL;
3344         }
3345
3346         return shdr;
3347 }
3348
3349 static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn)
3350 {
3351         const char *name;
3352         Elf64_Shdr *sh;
3353
3354         if (!scn)
3355                 return NULL;
3356
3357         sh = elf_sec_hdr(obj, scn);
3358         if (!sh)
3359                 return NULL;
3360
3361         name = elf_sec_str(obj, sh->sh_name);
3362         if (!name) {
3363                 pr_warn("elf: failed to get section(%zu) name from %s: %s\n",
3364                         elf_ndxscn(scn), obj->path, elf_errmsg(-1));
3365                 return NULL;
3366         }
3367
3368         return name;
3369 }
3370
3371 static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
3372 {
3373         Elf_Data *data;
3374
3375         if (!scn)
3376                 return NULL;
3377
3378         data = elf_getdata(scn, 0);
3379         if (!data) {
3380                 pr_warn("elf: failed to get section(%zu) %s data from %s: %s\n",
3381                         elf_ndxscn(scn), elf_sec_name(obj, scn) ?: "<?>",
3382                         obj->path, elf_errmsg(-1));
3383                 return NULL;
3384         }
3385
3386         return data;
3387 }
3388
3389 static Elf64_Sym *elf_sym_by_idx(const struct bpf_object *obj, size_t idx)
3390 {
3391         if (idx >= obj->efile.symbols->d_size / sizeof(Elf64_Sym))
3392                 return NULL;
3393
3394         return (Elf64_Sym *)obj->efile.symbols->d_buf + idx;
3395 }
3396
3397 static Elf64_Rel *elf_rel_by_idx(Elf_Data *data, size_t idx)
3398 {
3399         if (idx >= data->d_size / sizeof(Elf64_Rel))
3400                 return NULL;
3401
3402         return (Elf64_Rel *)data->d_buf + idx;
3403 }
3404
3405 static bool is_sec_name_dwarf(const char *name)
3406 {
3407         /* approximation, but the actual list is too long */
3408         return str_has_pfx(name, ".debug_");
3409 }
3410
3411 static bool ignore_elf_section(Elf64_Shdr *hdr, const char *name)
3412 {
3413         /* no special handling of .strtab */
3414         if (hdr->sh_type == SHT_STRTAB)
3415                 return true;
3416
3417         /* ignore .llvm_addrsig section as well */
3418         if (hdr->sh_type == SHT_LLVM_ADDRSIG)
3419                 return true;
3420
3421         /* no subprograms will lead to an empty .text section, ignore it */
3422         if (hdr->sh_type == SHT_PROGBITS && hdr->sh_size == 0 &&
3423             strcmp(name, ".text") == 0)
3424                 return true;
3425
3426         /* DWARF sections */
3427         if (is_sec_name_dwarf(name))
3428                 return true;
3429
3430         if (str_has_pfx(name, ".rel")) {
3431                 name += sizeof(".rel") - 1;
3432                 /* DWARF section relocations */
3433                 if (is_sec_name_dwarf(name))
3434                         return true;
3435
3436                 /* .BTF and .BTF.ext don't need relocations */
3437                 if (strcmp(name, BTF_ELF_SEC) == 0 ||
3438                     strcmp(name, BTF_EXT_ELF_SEC) == 0)
3439                         return true;
3440         }
3441
3442         return false;
3443 }
3444
3445 static int cmp_progs(const void *_a, const void *_b)
3446 {
3447         const struct bpf_program *a = _a;
3448         const struct bpf_program *b = _b;
3449
3450         if (a->sec_idx != b->sec_idx)
3451                 return a->sec_idx < b->sec_idx ? -1 : 1;
3452
3453         /* sec_insn_off can't be the same within the section */
3454         return a->sec_insn_off < b->sec_insn_off ? -1 : 1;
3455 }
3456
3457 static int bpf_object__elf_collect(struct bpf_object *obj)
3458 {
3459         struct elf_sec_desc *sec_desc;
3460         Elf *elf = obj->efile.elf;
3461         Elf_Data *btf_ext_data = NULL;
3462         Elf_Data *btf_data = NULL;
3463         int idx = 0, err = 0;
3464         const char *name;
3465         Elf_Data *data;
3466         Elf_Scn *scn;
3467         Elf64_Shdr *sh;
3468
3469         /* ELF section indices are 0-based, but sec #0 is special "invalid"
3470          * section. Since section count retrieved by elf_getshdrnum() does
3471          * include sec #0, it is already the necessary size of an array to keep
3472          * all the sections.
3473          */
3474         if (elf_getshdrnum(obj->efile.elf, &obj->efile.sec_cnt)) {
3475                 pr_warn("elf: failed to get the number of sections for %s: %s\n",
3476                         obj->path, elf_errmsg(-1));
3477                 return -LIBBPF_ERRNO__FORMAT;
3478         }
3479         obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));
3480         if (!obj->efile.secs)
3481                 return -ENOMEM;
3482
3483         /* a bunch of ELF parsing functionality depends on processing symbols,
3484          * so do the first pass and find the symbol table
3485          */
3486         scn = NULL;
3487         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3488                 sh = elf_sec_hdr(obj, scn);
3489                 if (!sh)
3490                         return -LIBBPF_ERRNO__FORMAT;
3491
3492                 if (sh->sh_type == SHT_SYMTAB) {
3493                         if (obj->efile.symbols) {
3494                                 pr_warn("elf: multiple symbol tables in %s\n", obj->path);
3495                                 return -LIBBPF_ERRNO__FORMAT;
3496                         }
3497
3498                         data = elf_sec_data(obj, scn);
3499                         if (!data)
3500                                 return -LIBBPF_ERRNO__FORMAT;
3501
3502                         idx = elf_ndxscn(scn);
3503
3504                         obj->efile.symbols = data;
3505                         obj->efile.symbols_shndx = idx;
3506                         obj->efile.strtabidx = sh->sh_link;
3507                 }
3508         }
3509
3510         if (!obj->efile.symbols) {
3511                 pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
3512                         obj->path);
3513                 return -ENOENT;
3514         }
3515
3516         scn = NULL;
3517         while ((scn = elf_nextscn(elf, scn)) != NULL) {
3518                 idx = elf_ndxscn(scn);
3519                 sec_desc = &obj->efile.secs[idx];
3520
3521                 sh = elf_sec_hdr(obj, scn);
3522                 if (!sh)
3523                         return -LIBBPF_ERRNO__FORMAT;
3524
3525                 name = elf_sec_str(obj, sh->sh_name);
3526                 if (!name)
3527                         return -LIBBPF_ERRNO__FORMAT;
3528
3529                 if (ignore_elf_section(sh, name))
3530                         continue;
3531
3532                 data = elf_sec_data(obj, scn);
3533                 if (!data)
3534                         return -LIBBPF_ERRNO__FORMAT;
3535
3536                 pr_debug("elf: section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
3537                          idx, name, (unsigned long)data->d_size,
3538                          (int)sh->sh_link, (unsigned long)sh->sh_flags,
3539                          (int)sh->sh_type);
3540
3541                 if (strcmp(name, "license") == 0) {
3542                         err = bpf_object__init_license(obj, data->d_buf, data->d_size);
3543                         if (err)
3544                                 return err;
3545                 } else if (strcmp(name, "version") == 0) {
3546                         err = bpf_object__init_kversion(obj, data->d_buf, data->d_size);
3547                         if (err)
3548                                 return err;
3549                 } else if (strcmp(name, "maps") == 0) {
3550                         pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
3551                         return -ENOTSUP;
3552                 } else if (strcmp(name, MAPS_ELF_SEC) == 0) {
3553                         obj->efile.btf_maps_shndx = idx;
3554                 } else if (strcmp(name, BTF_ELF_SEC) == 0) {
3555                         if (sh->sh_type != SHT_PROGBITS)
3556                                 return -LIBBPF_ERRNO__FORMAT;
3557                         btf_data = data;
3558                 } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) {
3559                         if (sh->sh_type != SHT_PROGBITS)
3560                                 return -LIBBPF_ERRNO__FORMAT;
3561                         btf_ext_data = data;
3562                 } else if (sh->sh_type == SHT_SYMTAB) {
3563                         /* already processed during the first pass above */
3564                 } else if (sh->sh_type == SHT_PROGBITS && data->d_size > 0) {
3565                         if (sh->sh_flags & SHF_EXECINSTR) {
3566                                 if (strcmp(name, ".text") == 0)
3567                                         obj->efile.text_shndx = idx;
3568                                 err = bpf_object__add_programs(obj, data, name, idx);
3569                                 if (err)
3570                                         return err;
3571                         } else if (strcmp(name, DATA_SEC) == 0 ||
3572                                    str_has_pfx(name, DATA_SEC ".")) {
3573                                 sec_desc->sec_type = SEC_DATA;
3574                                 sec_desc->shdr = sh;
3575                                 sec_desc->data = data;
3576                         } else if (strcmp(name, RODATA_SEC) == 0 ||
3577                                    str_has_pfx(name, RODATA_SEC ".")) {
3578                                 sec_desc->sec_type = SEC_RODATA;
3579                                 sec_desc->shdr = sh;
3580                                 sec_desc->data = data;
3581                         } else if (strcmp(name, STRUCT_OPS_SEC) == 0) {
3582                                 obj->efile.st_ops_data = data;
3583                                 obj->efile.st_ops_shndx = idx;
3584                         } else if (strcmp(name, STRUCT_OPS_LINK_SEC) == 0) {
3585                                 obj->efile.st_ops_link_data = data;
3586                                 obj->efile.st_ops_link_shndx = idx;
3587                         } else {
3588                                 pr_info("elf: skipping unrecognized data section(%d) %s\n",
3589                                         idx, name);
3590                         }
3591                 } else if (sh->sh_type == SHT_REL) {
3592                         int targ_sec_idx = sh->sh_info; /* points to other section */
3593
3594                         if (sh->sh_entsize != sizeof(Elf64_Rel) ||
3595                             targ_sec_idx >= obj->efile.sec_cnt)
3596                                 return -LIBBPF_ERRNO__FORMAT;
3597
3598                         /* Only do relo for section with exec instructions */
3599                         if (!section_have_execinstr(obj, targ_sec_idx) &&
3600                             strcmp(name, ".rel" STRUCT_OPS_SEC) &&
3601                             strcmp(name, ".rel" STRUCT_OPS_LINK_SEC) &&
3602                             strcmp(name, ".rel" MAPS_ELF_SEC)) {
3603                                 pr_info("elf: skipping relo section(%d) %s for section(%d) %s\n",
3604                                         idx, name, targ_sec_idx,
3605                                         elf_sec_name(obj, elf_sec_by_idx(obj, targ_sec_idx)) ?: "<?>");
3606                                 continue;
3607                         }
3608
3609                         sec_desc->sec_type = SEC_RELO;
3610                         sec_desc->shdr = sh;
3611                         sec_desc->data = data;
3612                 } else if (sh->sh_type == SHT_NOBITS && (strcmp(name, BSS_SEC) == 0 ||
3613                                                          str_has_pfx(name, BSS_SEC "."))) {
3614                         sec_desc->sec_type = SEC_BSS;
3615                         sec_desc->shdr = sh;
3616                         sec_desc->data = data;
3617                 } else {
3618                         pr_info("elf: skipping section(%d) %s (size %zu)\n", idx, name,
3619                                 (size_t)sh->sh_size);
3620                 }
3621         }
3622
3623         if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) {
3624                 pr_warn("elf: symbol strings section missing or invalid in %s\n", obj->path);
3625                 return -LIBBPF_ERRNO__FORMAT;
3626         }
3627
3628         /* sort BPF programs by section name and in-section instruction offset
3629          * for faster search
3630          */
3631         if (obj->nr_programs)
3632                 qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);
3633
3634         return bpf_object__init_btf(obj, btf_data, btf_ext_data);
3635 }
3636
3637 static bool sym_is_extern(const Elf64_Sym *sym)
3638 {
3639         int bind = ELF64_ST_BIND(sym->st_info);
3640         /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */
3641         return sym->st_shndx == SHN_UNDEF &&
3642                (bind == STB_GLOBAL || bind == STB_WEAK) &&
3643                ELF64_ST_TYPE(sym->st_info) == STT_NOTYPE;
3644 }
3645
3646 static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx)
3647 {
3648         int bind = ELF64_ST_BIND(sym->st_info);
3649         int type = ELF64_ST_TYPE(sym->st_info);
3650
3651         /* in .text section */
3652         if (sym->st_shndx != text_shndx)
3653                 return false;
3654
3655         /* local function */
3656         if (bind == STB_LOCAL && type == STT_SECTION)
3657                 return true;
3658
3659         /* global function */
3660         return bind == STB_GLOBAL && type == STT_FUNC;
3661 }
3662
3663 static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
3664 {
3665         const struct btf_type *t;
3666         const char *tname;
3667         int i, n;
3668
3669         if (!btf)
3670                 return -ESRCH;
3671
3672         n = btf__type_cnt(btf);
3673         for (i = 1; i < n; i++) {
3674                 t = btf__type_by_id(btf, i);
3675
3676                 if (!btf_is_var(t) && !btf_is_func(t))
3677                         continue;
3678
3679                 tname = btf__name_by_offset(btf, t->name_off);
3680                 if (strcmp(tname, ext_name))
3681                         continue;
3682
3683                 if (btf_is_var(t) &&
3684                     btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
3685                         return -EINVAL;
3686
3687                 if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
3688                         return -EINVAL;
3689
3690                 return i;
3691         }
3692
3693         return -ENOENT;
3694 }
3695
3696 static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
3697         const struct btf_var_secinfo *vs;
3698         const struct btf_type *t;
3699         int i, j, n;
3700
3701         if (!btf)
3702                 return -ESRCH;
3703
3704         n = btf__type_cnt(btf);
3705         for (i = 1; i < n; i++) {
3706                 t = btf__type_by_id(btf, i);
3707
3708                 if (!btf_is_datasec(t))
3709                         continue;
3710
3711                 vs = btf_var_secinfos(t);
3712                 for (j = 0; j < btf_vlen(t); j++, vs++) {
3713                         if (vs->type == ext_btf_id)
3714                                 return i;
3715                 }
3716         }
3717
3718         return -ENOENT;
3719 }
3720
3721 static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
3722                                      bool *is_signed)
3723 {
3724         const struct btf_type *t;
3725         const char *name;
3726
3727         t = skip_mods_and_typedefs(btf, id, NULL);
3728         name = btf__name_by_offset(btf, t->name_off);
3729
3730         if (is_signed)
3731                 *is_signed = false;
3732         switch (btf_kind(t)) {
3733         case BTF_KIND_INT: {
3734                 int enc = btf_int_encoding(t);
3735
3736                 if (enc & BTF_INT_BOOL)
3737                         return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
3738                 if (is_signed)
3739                         *is_signed = enc & BTF_INT_SIGNED;
3740                 if (t->size == 1)
3741                         return KCFG_CHAR;
3742                 if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
3743                         return KCFG_UNKNOWN;
3744                 return KCFG_INT;
3745         }
3746         case BTF_KIND_ENUM:
3747                 if (t->size != 4)
3748                         return KCFG_UNKNOWN;
3749                 if (strcmp(name, "libbpf_tristate"))
3750                         return KCFG_UNKNOWN;
3751                 return KCFG_TRISTATE;
3752         case BTF_KIND_ENUM64:
3753                 if (strcmp(name, "libbpf_tristate"))
3754                         return KCFG_UNKNOWN;
3755                 return KCFG_TRISTATE;
3756         case BTF_KIND_ARRAY:
3757                 if (btf_array(t)->nelems == 0)
3758                         return KCFG_UNKNOWN;
3759                 if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
3760                         return KCFG_UNKNOWN;
3761                 return KCFG_CHAR_ARR;
3762         default:
3763                 return KCFG_UNKNOWN;
3764         }
3765 }
3766
3767 static int cmp_externs(const void *_a, const void *_b)
3768 {
3769         const struct extern_desc *a = _a;
3770         const struct extern_desc *b = _b;
3771
3772         if (a->type != b->type)
3773                 return a->type < b->type ? -1 : 1;
3774
3775         if (a->type == EXT_KCFG) {
3776                 /* descending order by alignment requirements */
3777                 if (a->kcfg.align != b->kcfg.align)
3778                         return a->kcfg.align > b->kcfg.align ? -1 : 1;
3779                 /* ascending order by size, within same alignment class */
3780                 if (a->kcfg.sz != b->kcfg.sz)
3781                         return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
3782         }
3783
3784         /* resolve ties by name */
3785         return strcmp(a->name, b->name);
3786 }
3787
3788 static int find_int_btf_id(const struct btf *btf)
3789 {
3790         const struct btf_type *t;
3791         int i, n;
3792
3793         n = btf__type_cnt(btf);
3794         for (i = 1; i < n; i++) {
3795                 t = btf__type_by_id(btf, i);
3796
3797                 if (btf_is_int(t) && btf_int_bits(t) == 32)
3798                         return i;
3799         }
3800
3801         return 0;
3802 }
3803
3804 static int add_dummy_ksym_var(struct btf *btf)
3805 {
3806         int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
3807         const struct btf_var_secinfo *vs;
3808         const struct btf_type *sec;
3809
3810         if (!btf)
3811                 return 0;
3812
3813         sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
3814                                             BTF_KIND_DATASEC);
3815         if (sec_btf_id < 0)
3816                 return 0;
3817
3818         sec = btf__type_by_id(btf, sec_btf_id);
3819         vs = btf_var_secinfos(sec);
3820         for (i = 0; i < btf_vlen(sec); i++, vs++) {
3821                 const struct btf_type *vt;
3822
3823                 vt = btf__type_by_id(btf, vs->type);
3824                 if (btf_is_func(vt))
3825                         break;
3826         }
3827
3828         /* No func in ksyms sec.  No need to add dummy var. */
3829         if (i == btf_vlen(sec))
3830                 return 0;
3831
3832         int_btf_id = find_int_btf_id(btf);
3833         dummy_var_btf_id = btf__add_var(btf,
3834                                         "dummy_ksym",
3835                                         BTF_VAR_GLOBAL_ALLOCATED,
3836                                         int_btf_id);
3837         if (dummy_var_btf_id < 0)
3838                 pr_warn("cannot create a dummy_ksym var\n");
3839
3840         return dummy_var_btf_id;
3841 }
3842
3843 static int bpf_object__collect_externs(struct bpf_object *obj)
3844 {
3845         struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
3846         const struct btf_type *t;
3847         struct extern_desc *ext;
3848         int i, n, off, dummy_var_btf_id;
3849         const char *ext_name, *sec_name;
3850         size_t ext_essent_len;
3851         Elf_Scn *scn;
3852         Elf64_Shdr *sh;
3853
3854         if (!obj->efile.symbols)
3855                 return 0;
3856
3857         scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);
3858         sh = elf_sec_hdr(obj, scn);
3859         if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))
3860                 return -LIBBPF_ERRNO__FORMAT;
3861
3862         dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
3863         if (dummy_var_btf_id < 0)
3864                 return dummy_var_btf_id;
3865
3866         n = sh->sh_size / sh->sh_entsize;
3867         pr_debug("looking for externs among %d symbols...\n", n);
3868
3869         for (i = 0; i < n; i++) {
3870                 Elf64_Sym *sym = elf_sym_by_idx(obj, i);
3871
3872                 if (!sym)
3873                         return -LIBBPF_ERRNO__FORMAT;
3874                 if (!sym_is_extern(sym))
3875                         continue;
3876                 ext_name = elf_sym_str(obj, sym->st_name);
3877                 if (!ext_name || !ext_name[0])
3878                         continue;
3879
3880                 ext = obj->externs;
3881                 ext = libbpf_reallocarray(ext, obj->nr_extern + 1, sizeof(*ext));
3882                 if (!ext)
3883                         return -ENOMEM;
3884                 obj->externs = ext;
3885                 ext = &ext[obj->nr_extern];
3886                 memset(ext, 0, sizeof(*ext));
3887                 obj->nr_extern++;
3888
3889                 ext->btf_id = find_extern_btf_id(obj->btf, ext_name);
3890                 if (ext->btf_id <= 0) {
3891                         pr_warn("failed to find BTF for extern '%s': %d\n",
3892                                 ext_name, ext->btf_id);
3893                         return ext->btf_id;
3894                 }
3895                 t = btf__type_by_id(obj->btf, ext->btf_id);
3896                 ext->name = btf__name_by_offset(obj->btf, t->name_off);
3897                 ext->sym_idx = i;
3898                 ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK;
3899
3900                 ext_essent_len = bpf_core_essential_name_len(ext->name);
3901                 ext->essent_name = NULL;
3902                 if (ext_essent_len != strlen(ext->name)) {
3903                         ext->essent_name = strndup(ext->name, ext_essent_len);
3904                         if (!ext->essent_name)
3905                                 return -ENOMEM;
3906                 }
3907
3908                 ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
3909                 if (ext->sec_btf_id <= 0) {
3910                         pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
3911                                 ext_name, ext->btf_id, ext->sec_btf_id);
3912                         return ext->sec_btf_id;
3913                 }
3914                 sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
3915                 sec_name = btf__name_by_offset(obj->btf, sec->name_off);
3916
3917                 if (strcmp(sec_name, KCONFIG_SEC) == 0) {
3918                         if (btf_is_func(t)) {
3919                                 pr_warn("extern function %s is unsupported under %s section\n",
3920                                         ext->name, KCONFIG_SEC);
3921                                 return -ENOTSUP;
3922                         }
3923                         kcfg_sec = sec;
3924                         ext->type = EXT_KCFG;
3925                         ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
3926                         if (ext->kcfg.sz <= 0) {
3927                                 pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
3928                                         ext_name, ext->kcfg.sz);
3929                                 return ext->kcfg.sz;
3930                         }
3931                         ext->kcfg.align = btf__align_of(obj->btf, t->type);
3932                         if (ext->kcfg.align <= 0) {
3933                                 pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
3934                                         ext_name, ext->kcfg.align);
3935                                 return -EINVAL;
3936                         }
3937                         ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
3938                                                         &ext->kcfg.is_signed);
3939                         if (ext->kcfg.type == KCFG_UNKNOWN) {
3940                                 pr_warn("extern (kcfg) '%s': type is unsupported\n", ext_name);
3941                                 return -ENOTSUP;
3942                         }
3943                 } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
3944                         ksym_sec = sec;
3945                         ext->type = EXT_KSYM;
3946                         skip_mods_and_typedefs(obj->btf, t->type,
3947                                                &ext->ksym.type_id);
3948                 } else {
3949                         pr_warn("unrecognized extern section '%s'\n", sec_name);
3950                         return -ENOTSUP;
3951                 }
3952         }
3953         pr_debug("collected %d externs total\n", obj->nr_extern);
3954
3955         if (!obj->nr_extern)
3956                 return 0;
3957
3958         /* sort externs by type, for kcfg ones also by (align, size, name) */
3959         qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
3960
3961         /* for .ksyms section, we need to turn all externs into allocated
3962          * variables in BTF to pass kernel verification; we do this by
3963          * pretending that each extern is a 8-byte variable
3964          */
3965         if (ksym_sec) {
3966                 /* find existing 4-byte integer type in BTF to use for fake
3967                  * extern variables in DATASEC
3968                  */
3969                 int int_btf_id = find_int_btf_id(obj->btf);
3970                 /* For extern function, a dummy_var added earlier
3971                  * will be used to replace the vs->type and
3972                  * its name string will be used to refill
3973                  * the missing param's name.
3974                  */
3975                 const struct btf_type *dummy_var;
3976
3977                 dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
3978                 for (i = 0; i < obj->nr_extern; i++) {
3979                         ext = &obj->externs[i];
3980                         if (ext->type != EXT_KSYM)
3981                                 continue;
3982                         pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
3983                                  i, ext->sym_idx, ext->name);
3984                 }
3985
3986                 sec = ksym_sec;
3987                 n = btf_vlen(sec);
3988                 for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
3989                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
3990                         struct btf_type *vt;
3991
3992                         vt = (void *)btf__type_by_id(obj->btf, vs->type);
3993                         ext_name = btf__name_by_offset(obj->btf, vt->name_off);
3994                         ext = find_extern_by_name(obj, ext_name);
3995                         if (!ext) {
3996                                 pr_warn("failed to find extern definition for BTF %s '%s'\n",
3997                                         btf_kind_str(vt), ext_name);
3998                                 return -ESRCH;
3999                         }
4000                         if (btf_is_func(vt)) {
4001                                 const struct btf_type *func_proto;
4002                                 struct btf_param *param;
4003                                 int j;
4004
4005                                 func_proto = btf__type_by_id(obj->btf,
4006                                                              vt->type);
4007                                 param = btf_params(func_proto);
4008                                 /* Reuse the dummy_var string if the
4009                                  * func proto does not have param name.
4010                                  */
4011                                 for (j = 0; j < btf_vlen(func_proto); j++)
4012                                         if (param[j].type && !param[j].name_off)
4013                                                 param[j].name_off =
4014                                                         dummy_var->name_off;
4015                                 vs->type = dummy_var_btf_id;
4016                                 vt->info &= ~0xffff;
4017                                 vt->info |= BTF_FUNC_GLOBAL;
4018                         } else {
4019                                 btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4020                                 vt->type = int_btf_id;
4021                         }
4022                         vs->offset = off;
4023                         vs->size = sizeof(int);
4024                 }
4025                 sec->size = off;
4026         }
4027
4028         if (kcfg_sec) {
4029                 sec = kcfg_sec;
4030                 /* for kcfg externs calculate their offsets within a .kconfig map */
4031                 off = 0;
4032                 for (i = 0; i < obj->nr_extern; i++) {
4033                         ext = &obj->externs[i];
4034                         if (ext->type != EXT_KCFG)
4035                                 continue;
4036
4037                         ext->kcfg.data_off = roundup(off, ext->kcfg.align);
4038                         off = ext->kcfg.data_off + ext->kcfg.sz;
4039                         pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
4040                                  i, ext->sym_idx, ext->kcfg.data_off, ext->name);
4041                 }
4042                 sec->size = off;
4043                 n = btf_vlen(sec);
4044                 for (i = 0; i < n; i++) {
4045                         struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
4046
4047                         t = btf__type_by_id(obj->btf, vs->type);
4048                         ext_name = btf__name_by_offset(obj->btf, t->name_off);
4049                         ext = find_extern_by_name(obj, ext_name);
4050                         if (!ext) {
4051                                 pr_warn("failed to find extern definition for BTF var '%s'\n",
4052                                         ext_name);
4053                                 return -ESRCH;
4054                         }
4055                         btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
4056                         vs->offset = ext->kcfg.data_off;
4057                 }
4058         }
4059         return 0;
4060 }
4061
4062 static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog)
4063 {
4064         return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1;
4065 }
4066
4067 struct bpf_program *
4068 bpf_object__find_program_by_name(const struct bpf_object *obj,
4069                                  const char *name)
4070 {
4071         struct bpf_program *prog;
4072
4073         bpf_object__for_each_program(prog, obj) {
4074                 if (prog_is_subprog(obj, prog))
4075                         continue;
4076                 if (!strcmp(prog->name, name))
4077                         return prog;
4078         }
4079         return errno = ENOENT, NULL;
4080 }
4081
4082 static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
4083                                       int shndx)
4084 {
4085         switch (obj->efile.secs[shndx].sec_type) {
4086         case SEC_BSS:
4087         case SEC_DATA:
4088         case SEC_RODATA:
4089                 return true;
4090         default:
4091                 return false;
4092         }
4093 }
4094
4095 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
4096                                       int shndx)
4097 {
4098         return shndx == obj->efile.btf_maps_shndx;
4099 }
4100
4101 static enum libbpf_map_type
4102 bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
4103 {
4104         if (shndx == obj->efile.symbols_shndx)
4105                 return LIBBPF_MAP_KCONFIG;
4106
4107         switch (obj->efile.secs[shndx].sec_type) {
4108         case SEC_BSS:
4109                 return LIBBPF_MAP_BSS;
4110         case SEC_DATA:
4111                 return LIBBPF_MAP_DATA;
4112         case SEC_RODATA:
4113                 return LIBBPF_MAP_RODATA;
4114         default:
4115                 return LIBBPF_MAP_UNSPEC;
4116         }
4117 }
4118
4119 static int bpf_program__record_reloc(struct bpf_program *prog,
4120                                      struct reloc_desc *reloc_desc,
4121                                      __u32 insn_idx, const char *sym_name,
4122                                      const Elf64_Sym *sym, const Elf64_Rel *rel)
4123 {
4124         struct bpf_insn *insn = &prog->insns[insn_idx];
4125         size_t map_idx, nr_maps = prog->obj->nr_maps;
4126         struct bpf_object *obj = prog->obj;
4127         __u32 shdr_idx = sym->st_shndx;
4128         enum libbpf_map_type type;
4129         const char *sym_sec_name;
4130         struct bpf_map *map;
4131
4132         if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
4133                 pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
4134                         prog->name, sym_name, insn_idx, insn->code);
4135                 return -LIBBPF_ERRNO__RELOC;
4136         }
4137
4138         if (sym_is_extern(sym)) {
4139                 int sym_idx = ELF64_R_SYM(rel->r_info);
4140                 int i, n = obj->nr_extern;
4141                 struct extern_desc *ext;
4142
4143                 for (i = 0; i < n; i++) {
4144                         ext = &obj->externs[i];
4145                         if (ext->sym_idx == sym_idx)
4146                                 break;
4147                 }
4148                 if (i >= n) {
4149                         pr_warn("prog '%s': extern relo failed to find extern for '%s' (%d)\n",
4150                                 prog->name, sym_name, sym_idx);
4151                         return -LIBBPF_ERRNO__RELOC;
4152                 }
4153                 pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
4154                          prog->name, i, ext->name, ext->sym_idx, insn_idx);
4155                 if (insn->code == (BPF_JMP | BPF_CALL))
4156                         reloc_desc->type = RELO_EXTERN_CALL;
4157                 else
4158                         reloc_desc->type = RELO_EXTERN_LD64;
4159                 reloc_desc->insn_idx = insn_idx;
4160                 reloc_desc->ext_idx = i;
4161                 return 0;
4162         }
4163
4164         /* sub-program call relocation */
4165         if (is_call_insn(insn)) {
4166                 if (insn->src_reg != BPF_PSEUDO_CALL) {
4167                         pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
4168                         return -LIBBPF_ERRNO__RELOC;
4169                 }
4170                 /* text_shndx can be 0, if no default "main" program exists */
4171                 if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
4172                         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4173                         pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
4174                                 prog->name, sym_name, sym_sec_name);
4175                         return -LIBBPF_ERRNO__RELOC;
4176                 }
4177                 if (sym->st_value % BPF_INSN_SZ) {
4178                         pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
4179                                 prog->name, sym_name, (size_t)sym->st_value);
4180                         return -LIBBPF_ERRNO__RELOC;
4181                 }
4182                 reloc_desc->type = RELO_CALL;
4183                 reloc_desc->insn_idx = insn_idx;
4184                 reloc_desc->sym_off = sym->st_value;
4185                 return 0;
4186         }
4187
4188         if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
4189                 pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
4190                         prog->name, sym_name, shdr_idx);
4191                 return -LIBBPF_ERRNO__RELOC;
4192         }
4193
4194         /* loading subprog addresses */
4195         if (sym_is_subprog(sym, obj->efile.text_shndx)) {
4196                 /* global_func: sym->st_value = offset in the section, insn->imm = 0.
4197                  * local_func: sym->st_value = 0, insn->imm = offset in the section.
4198                  */
4199                 if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
4200                         pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
4201                                 prog->name, sym_name, (size_t)sym->st_value, insn->imm);
4202                         return -LIBBPF_ERRNO__RELOC;
4203                 }
4204
4205                 reloc_desc->type = RELO_SUBPROG_ADDR;
4206                 reloc_desc->insn_idx = insn_idx;
4207                 reloc_desc->sym_off = sym->st_value;
4208                 return 0;
4209         }
4210
4211         type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
4212         sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
4213
4214         /* generic map reference relocation */
4215         if (type == LIBBPF_MAP_UNSPEC) {
4216                 if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
4217                         pr_warn("prog '%s': bad map relo against '%s' in section '%s'\n",
4218                                 prog->name, sym_name, sym_sec_name);
4219                         return -LIBBPF_ERRNO__RELOC;
4220                 }
4221                 for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4222                         map = &obj->maps[map_idx];
4223                         if (map->libbpf_type != type ||
4224                             map->sec_idx != sym->st_shndx ||
4225                             map->sec_offset != sym->st_value)
4226                                 continue;
4227                         pr_debug("prog '%s': found map %zd (%s, sec %d, off %zu) for insn #%u\n",
4228                                  prog->name, map_idx, map->name, map->sec_idx,
4229                                  map->sec_offset, insn_idx);
4230                         break;
4231                 }
4232                 if (map_idx >= nr_maps) {
4233                         pr_warn("prog '%s': map relo failed to find map for section '%s', off %zu\n",
4234                                 prog->name, sym_sec_name, (size_t)sym->st_value);
4235                         return -LIBBPF_ERRNO__RELOC;
4236                 }
4237                 reloc_desc->type = RELO_LD64;
4238                 reloc_desc->insn_idx = insn_idx;
4239                 reloc_desc->map_idx = map_idx;
4240                 reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */
4241                 return 0;
4242         }
4243
4244         /* global data map relocation */
4245         if (!bpf_object__shndx_is_data(obj, shdr_idx)) {
4246                 pr_warn("prog '%s': bad data relo against section '%s'\n",
4247                         prog->name, sym_sec_name);
4248                 return -LIBBPF_ERRNO__RELOC;
4249         }
4250         for (map_idx = 0; map_idx < nr_maps; map_idx++) {
4251                 map = &obj->maps[map_idx];
4252                 if (map->libbpf_type != type || map->sec_idx != sym->st_shndx)
4253                         continue;
4254                 pr_debug("prog '%s': found data map %zd (%s, sec %d, off %zu) for insn %u\n",
4255                          prog->name, map_idx, map->name, map->sec_idx,
4256                          map->sec_offset, insn_idx);
4257                 break;
4258         }
4259         if (map_idx >= nr_maps) {
4260                 pr_warn("prog '%s': data relo failed to find map for section '%s'\n",
4261                         prog->name, sym_sec_name);
4262                 return -LIBBPF_ERRNO__RELOC;
4263         }
4264
4265         reloc_desc->type = RELO_DATA;
4266         reloc_desc->insn_idx = insn_idx;
4267         reloc_desc->map_idx = map_idx;
4268         reloc_desc->sym_off = sym->st_value;
4269         return 0;
4270 }
4271
4272 static bool prog_contains_insn(const struct bpf_program *prog, size_t insn_idx)
4273 {
4274         return insn_idx >= prog->sec_insn_off &&
4275                insn_idx < prog->sec_insn_off + prog->sec_insn_cnt;
4276 }
4277
4278 static struct bpf_program *find_prog_by_sec_insn(const struct bpf_object *obj,
4279                                                  size_t sec_idx, size_t insn_idx)
4280 {
4281         int l = 0, r = obj->nr_programs - 1, m;
4282         struct bpf_program *prog;
4283
4284         if (!obj->nr_programs)
4285                 return NULL;
4286
4287         while (l < r) {
4288                 m = l + (r - l + 1) / 2;
4289                 prog = &obj->programs[m];
4290
4291                 if (prog->sec_idx < sec_idx ||
4292                     (prog->sec_idx == sec_idx && prog->sec_insn_off <= insn_idx))
4293                         l = m;
4294                 else
4295                         r = m - 1;
4296         }
4297         /* matching program could be at index l, but it still might be the
4298          * wrong one, so we need to double check conditions for the last time
4299          */
4300         prog = &obj->programs[l];
4301         if (prog->sec_idx == sec_idx && prog_contains_insn(prog, insn_idx))
4302                 return prog;
4303         return NULL;
4304 }
4305
4306 static int
4307 bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)
4308 {
4309         const char *relo_sec_name, *sec_name;
4310         size_t sec_idx = shdr->sh_info, sym_idx;
4311         struct bpf_program *prog;
4312         struct reloc_desc *relos;
4313         int err, i, nrels;
4314         const char *sym_name;
4315         __u32 insn_idx;
4316         Elf_Scn *scn;
4317         Elf_Data *scn_data;
4318         Elf64_Sym *sym;
4319         Elf64_Rel *rel;
4320
4321         if (sec_idx >= obj->efile.sec_cnt)
4322                 return -EINVAL;
4323
4324         scn = elf_sec_by_idx(obj, sec_idx);
4325         scn_data = elf_sec_data(obj, scn);
4326         if (!scn_data)
4327                 return -LIBBPF_ERRNO__FORMAT;
4328
4329         relo_sec_name = elf_sec_str(obj, shdr->sh_name);
4330         sec_name = elf_sec_name(obj, scn);
4331         if (!relo_sec_name || !sec_name)
4332                 return -EINVAL;
4333
4334         pr_debug("sec '%s': collecting relocation for section(%zu) '%s'\n",
4335                  relo_sec_name, sec_idx, sec_name);
4336         nrels = shdr->sh_size / shdr->sh_entsize;
4337
4338         for (i = 0; i < nrels; i++) {
4339                 rel = elf_rel_by_idx(data, i);
4340                 if (!rel) {
4341                         pr_warn("sec '%s': failed to get relo #%d\n", relo_sec_name, i);
4342                         return -LIBBPF_ERRNO__FORMAT;
4343                 }
4344
4345                 sym_idx = ELF64_R_SYM(rel->r_info);
4346                 sym = elf_sym_by_idx(obj, sym_idx);
4347                 if (!sym) {
4348                         pr_warn("sec '%s': symbol #%zu not found for relo #%d\n",
4349                                 relo_sec_name, sym_idx, i);
4350                         return -LIBBPF_ERRNO__FORMAT;
4351                 }
4352
4353                 if (sym->st_shndx >= obj->efile.sec_cnt) {
4354                         pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n",
4355                                 relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);
4356                         return -LIBBPF_ERRNO__FORMAT;
4357                 }
4358
4359                 if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {
4360                         pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
4361                                 relo_sec_name, (size_t)rel->r_offset, i);
4362                         return -LIBBPF_ERRNO__FORMAT;
4363                 }
4364
4365                 insn_idx = rel->r_offset / BPF_INSN_SZ;
4366                 /* relocations against static functions are recorded as
4367                  * relocations against the section that contains a function;
4368                  * in such case, symbol will be STT_SECTION and sym.st_name
4369                  * will point to empty string (0), so fetch section name
4370                  * instead
4371                  */
4372                 if (ELF64_ST_TYPE(sym->st_info) == STT_SECTION && sym->st_name == 0)
4373                         sym_name = elf_sec_name(obj, elf_sec_by_idx(obj, sym->st_shndx));
4374                 else
4375                         sym_name = elf_sym_str(obj, sym->st_name);
4376                 sym_name = sym_name ?: "<?";
4377
4378                 pr_debug("sec '%s': relo #%d: insn #%u against '%s'\n",
4379                          relo_sec_name, i, insn_idx, sym_name);
4380
4381                 prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
4382                 if (!prog) {
4383                         pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
4384                                 relo_sec_name, i, sec_name, insn_idx);
4385                         continue;
4386                 }
4387
4388                 relos = libbpf_reallocarray(prog->reloc_desc,
4389                                             prog->nr_reloc + 1, sizeof(*relos));
4390                 if (!relos)
4391                         return -ENOMEM;
4392                 prog->reloc_desc = relos;
4393
4394                 /* adjust insn_idx to local BPF program frame of reference */
4395                 insn_idx -= prog->sec_insn_off;
4396                 err = bpf_program__record_reloc(prog, &relos[prog->nr_reloc],
4397                                                 insn_idx, sym_name, sym, rel);
4398                 if (err)
4399                         return err;
4400
4401                 prog->nr_reloc++;
4402         }
4403         return 0;
4404 }
4405
4406 static int map_fill_btf_type_info(struct bpf_object *obj, struct bpf_map *map)
4407 {
4408         int id;
4409
4410         if (!obj->btf)
4411                 return -ENOENT;
4412
4413         /* if it's BTF-defined map, we don't need to search for type IDs.
4414          * For struct_ops map, it does not need btf_key_type_id and
4415          * btf_value_type_id.
4416          */
4417         if (map->sec_idx == obj->efile.btf_maps_shndx || bpf_map__is_struct_ops(map))
4418                 return 0;
4419
4420         /*
4421          * LLVM annotates global data differently in BTF, that is,
4422          * only as '.data', '.bss' or '.rodata'.
4423          */
4424         if (!bpf_map__is_internal(map))
4425                 return -ENOENT;
4426
4427         id = btf__find_by_name(obj->btf, map->real_name);
4428         if (id < 0)
4429                 return id;
4430
4431         map->btf_key_type_id = 0;
4432         map->btf_value_type_id = id;
4433         return 0;
4434 }
4435
4436 static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info)
4437 {
4438         char file[PATH_MAX], buff[4096];
4439         FILE *fp;
4440         __u32 val;
4441         int err;
4442
4443         snprintf(file, sizeof(file), "/proc/%d/fdinfo/%d", getpid(), fd);
4444         memset(info, 0, sizeof(*info));
4445
4446         fp = fopen(file, "re");
4447         if (!fp) {
4448                 err = -errno;
4449                 pr_warn("failed to open %s: %d. No procfs support?\n", file,
4450                         err);
4451                 return err;
4452         }
4453
4454         while (fgets(buff, sizeof(buff), fp)) {
4455                 if (sscanf(buff, "map_type:\t%u", &val) == 1)
4456                         info->type = val;
4457                 else if (sscanf(buff, "key_size:\t%u", &val) == 1)
4458                         info->key_size = val;
4459                 else if (sscanf(buff, "value_size:\t%u", &val) == 1)
4460                         info->value_size = val;
4461                 else if (sscanf(buff, "max_entries:\t%u", &val) == 1)
4462                         info->max_entries = val;
4463                 else if (sscanf(buff, "map_flags:\t%i", &val) == 1)
4464                         info->map_flags = val;
4465         }
4466
4467         fclose(fp);
4468
4469         return 0;
4470 }
4471
4472 bool bpf_map__autocreate(const struct bpf_map *map)
4473 {
4474         return map->autocreate;
4475 }
4476
4477 int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
4478 {
4479         if (map->obj->loaded)
4480                 return libbpf_err(-EBUSY);
4481
4482         map->autocreate = autocreate;
4483         return 0;
4484 }
4485
4486 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
4487 {
4488         struct bpf_map_info info;
4489         __u32 len = sizeof(info), name_len;
4490         int new_fd, err;
4491         char *new_name;
4492
4493         memset(&info, 0, len);
4494         err = bpf_map_get_info_by_fd(fd, &info, &len);
4495         if (err && errno == EINVAL)
4496                 err = bpf_get_map_info_from_fdinfo(fd, &info);
4497         if (err)
4498                 return libbpf_err(err);
4499
4500         name_len = strlen(info.name);
4501         if (name_len == BPF_OBJ_NAME_LEN - 1 && strncmp(map->name, info.name, name_len) == 0)
4502                 new_name = strdup(map->name);
4503         else
4504                 new_name = strdup(info.name);
4505
4506         if (!new_name)
4507                 return libbpf_err(-errno);
4508
4509         /*
4510          * Like dup(), but make sure new FD is >= 3 and has O_CLOEXEC set.
4511          * This is similar to what we do in ensure_good_fd(), but without
4512          * closing original FD.
4513          */
4514         new_fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
4515         if (new_fd < 0) {
4516                 err = -errno;
4517                 goto err_free_new_name;
4518         }
4519
4520         err = reuse_fd(map->fd, new_fd);
4521         if (err)
4522                 goto err_free_new_name;
4523
4524         free(map->name);
4525
4526         map->name = new_name;
4527         map->def.type = info.type;
4528         map->def.key_size = info.key_size;
4529         map->def.value_size = info.value_size;
4530         map->def.max_entries = info.max_entries;
4531         map->def.map_flags = info.map_flags;
4532         map->btf_key_type_id = info.btf_key_type_id;
4533         map->btf_value_type_id = info.btf_value_type_id;
4534         map->reused = true;
4535         map->map_extra = info.map_extra;
4536
4537         return 0;
4538
4539 err_free_new_name:
4540         free(new_name);
4541         return libbpf_err(err);
4542 }
4543
4544 __u32 bpf_map__max_entries(const struct bpf_map *map)
4545 {
4546         return map->def.max_entries;
4547 }
4548
4549 struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
4550 {
4551         if (!bpf_map_type__is_map_in_map(map->def.type))
4552                 return errno = EINVAL, NULL;
4553
4554         return map->inner_map;
4555 }
4556
4557 int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
4558 {
4559         if (map->obj->loaded)
4560                 return libbpf_err(-EBUSY);
4561
4562         map->def.max_entries = max_entries;
4563
4564         /* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
4565         if (map_is_ringbuf(map))
4566                 map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
4567
4568         return 0;
4569 }
4570
4571 static int bpf_object_prepare_token(struct bpf_object *obj)
4572 {
4573         const char *bpffs_path;
4574         int bpffs_fd = -1, token_fd, err;
4575         bool mandatory;
4576         enum libbpf_print_level level;
4577
4578         /* token is explicitly prevented */
4579         if (obj->token_path && obj->token_path[0] == '\0') {
4580                 pr_debug("object '%s': token is prevented, skipping...\n", obj->name);
4581                 return 0;
4582         }
4583
4584         mandatory = obj->token_path != NULL;
4585         level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG;
4586
4587         bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH;
4588         bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR);
4589         if (bpffs_fd < 0) {
4590                 err = -errno;
4591                 __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n",
4592                      obj->name, err, bpffs_path,
4593                      mandatory ? "" : ", skipping optional step...");
4594                 return mandatory ? err : 0;
4595         }
4596
4597         token_fd = bpf_token_create(bpffs_fd, 0);
4598         close(bpffs_fd);
4599         if (token_fd < 0) {
4600                 if (!mandatory && token_fd == -ENOENT) {
4601                         pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n",
4602                                  obj->name, bpffs_path);
4603                         return 0;
4604                 }
4605                 __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n",
4606                      obj->name, token_fd, bpffs_path,
4607                      mandatory ? "" : ", skipping optional step...");
4608                 return mandatory ? token_fd : 0;
4609         }
4610
4611         obj->feat_cache = calloc(1, sizeof(*obj->feat_cache));
4612         if (!obj->feat_cache) {
4613                 close(token_fd);
4614                 return -ENOMEM;
4615         }
4616
4617         obj->token_fd = token_fd;
4618         obj->feat_cache->token_fd = token_fd;
4619
4620         return 0;
4621 }
4622
4623 static int
4624 bpf_object__probe_loading(struct bpf_object *obj)
4625 {
4626         char *cp, errmsg[STRERR_BUFSIZE];
4627         struct bpf_insn insns[] = {
4628                 BPF_MOV64_IMM(BPF_REG_0, 0),
4629                 BPF_EXIT_INSN(),
4630         };
4631         int ret, insn_cnt = ARRAY_SIZE(insns);
4632         LIBBPF_OPTS(bpf_prog_load_opts, opts,
4633                 .token_fd = obj->token_fd,
4634                 .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0,
4635         );
4636
4637         if (obj->gen_loader)
4638                 return 0;
4639
4640         ret = bump_rlimit_memlock();
4641         if (ret)
4642                 pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret);
4643
4644         /* make sure basic loading works */
4645         ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts);
4646         if (ret < 0)
4647                 ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
4648         if (ret < 0) {
4649                 ret = errno;
4650                 cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
4651                 pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
4652                         "program. Make sure your kernel supports BPF "
4653                         "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
4654                         "set to big enough value.\n", __func__, cp, ret);
4655                 return -ret;
4656         }
4657         close(ret);
4658
4659         return 0;
4660 }
4661
4662 bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
4663 {
4664         if (obj && obj->gen_loader)
4665                 /* To generate loader program assume the latest kernel
4666                  * to avoid doing extra prog_load, map_create syscalls.
4667                  */
4668                 return true;
4669
4670         if (obj->token_fd)
4671                 return feat_supported(obj->feat_cache, feat_id);
4672
4673         return feat_supported(NULL, feat_id);
4674 }
4675
4676 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
4677 {
4678         struct bpf_map_info map_info;
4679         char msg[STRERR_BUFSIZE];
4680         __u32 map_info_len = sizeof(map_info);
4681         int err;
4682
4683         memset(&map_info, 0, map_info_len);
4684         err = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
4685         if (err && errno == EINVAL)
4686                 err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
4687         if (err) {
4688                 pr_warn("failed to get map info for map FD %d: %s\n", map_fd,
4689                         libbpf_strerror_r(errno, msg, sizeof(msg)));
4690                 return false;
4691         }
4692
4693         return (map_info.type == map->def.type &&
4694                 map_info.key_size == map->def.key_size &&
4695                 map_info.value_size == map->def.value_size &&
4696                 map_info.max_entries == map->def.max_entries &&
4697                 map_info.map_flags == map->def.map_flags &&
4698                 map_info.map_extra == map->map_extra);
4699 }
4700
4701 static int
4702 bpf_object__reuse_map(struct bpf_map *map)
4703 {
4704         char *cp, errmsg[STRERR_BUFSIZE];
4705         int err, pin_fd;
4706
4707         pin_fd = bpf_obj_get(map->pin_path);
4708         if (pin_fd < 0) {
4709                 err = -errno;
4710                 if (err == -ENOENT) {
4711                         pr_debug("found no pinned map to reuse at '%s'\n",
4712                                  map->pin_path);
4713                         return 0;
4714                 }
4715
4716                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
4717                 pr_warn("couldn't retrieve pinned map '%s': %s\n",
4718                         map->pin_path, cp);
4719                 return err;
4720         }
4721
4722         if (!map_is_reuse_compat(map, pin_fd)) {
4723                 pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n",
4724                         map->pin_path);
4725                 close(pin_fd);
4726                 return -EINVAL;
4727         }
4728
4729         err = bpf_map__reuse_fd(map, pin_fd);
4730         close(pin_fd);
4731         if (err)
4732                 return err;
4733
4734         map->pinned = true;
4735         pr_debug("reused pinned map at '%s'\n", map->pin_path);
4736
4737         return 0;
4738 }
4739
4740 static int
4741 bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
4742 {
4743         enum libbpf_map_type map_type = map->libbpf_type;
4744         char *cp, errmsg[STRERR_BUFSIZE];
4745         int err, zero = 0;
4746
4747         if (obj->gen_loader) {
4748                 bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps,
4749                                          map->mmaped, map->def.value_size);
4750                 if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG)
4751                         bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
4752                 return 0;
4753         }
4754         err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
4755         if (err) {
4756                 err = -errno;
4757                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4758                 pr_warn("Error setting initial map(%s) contents: %s\n",
4759                         map->name, cp);
4760                 return err;
4761         }
4762
4763         /* Freeze .rodata and .kconfig map as read-only from syscall side. */
4764         if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) {
4765                 err = bpf_map_freeze(map->fd);
4766                 if (err) {
4767                         err = -errno;
4768                         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4769                         pr_warn("Error freezing map(%s) as read-only: %s\n",
4770                                 map->name, cp);
4771                         return err;
4772                 }
4773         }
4774         return 0;
4775 }
4776
4777 static void bpf_map__destroy(struct bpf_map *map);
4778
4779 static bool map_is_created(const struct bpf_map *map)
4780 {
4781         return map->obj->loaded || map->reused;
4782 }
4783
4784 static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)
4785 {
4786         LIBBPF_OPTS(bpf_map_create_opts, create_attr);
4787         struct bpf_map_def *def = &map->def;
4788         const char *map_name = NULL;
4789         int err = 0, map_fd;
4790
4791         if (kernel_supports(obj, FEAT_PROG_NAME))
4792                 map_name = map->name;
4793         create_attr.map_ifindex = map->map_ifindex;
4794         create_attr.map_flags = def->map_flags;
4795         create_attr.numa_node = map->numa_node;
4796         create_attr.map_extra = map->map_extra;
4797         create_attr.token_fd = obj->token_fd;
4798         if (obj->token_fd)
4799                 create_attr.map_flags |= BPF_F_TOKEN_FD;
4800
4801         if (bpf_map__is_struct_ops(map)) {
4802                 create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
4803                 if (map->mod_btf_fd >= 0) {
4804                         create_attr.value_type_btf_obj_fd = map->mod_btf_fd;
4805                         create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD;
4806                 }
4807         }
4808
4809         if (obj->btf && btf__fd(obj->btf) >= 0) {
4810                 create_attr.btf_fd = btf__fd(obj->btf);
4811                 create_attr.btf_key_type_id = map->btf_key_type_id;
4812                 create_attr.btf_value_type_id = map->btf_value_type_id;
4813         }
4814
4815         if (bpf_map_type__is_map_in_map(def->type)) {
4816                 if (map->inner_map) {
4817                         err = map_set_def_max_entries(map->inner_map);
4818                         if (err)
4819                                 return err;
4820                         err = bpf_object__create_map(obj, map->inner_map, true);
4821                         if (err) {
4822                                 pr_warn("map '%s': failed to create inner map: %d\n",
4823                                         map->name, err);
4824                                 return err;
4825                         }
4826                         map->inner_map_fd = map->inner_map->fd;
4827                 }
4828                 if (map->inner_map_fd >= 0)
4829                         create_attr.inner_map_fd = map->inner_map_fd;
4830         }
4831
4832         switch (def->type) {
4833         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4834         case BPF_MAP_TYPE_CGROUP_ARRAY:
4835         case BPF_MAP_TYPE_STACK_TRACE:
4836         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4837         case BPF_MAP_TYPE_HASH_OF_MAPS:
4838         case BPF_MAP_TYPE_DEVMAP:
4839         case BPF_MAP_TYPE_DEVMAP_HASH:
4840         case BPF_MAP_TYPE_CPUMAP:
4841         case BPF_MAP_TYPE_XSKMAP:
4842         case BPF_MAP_TYPE_SOCKMAP:
4843         case BPF_MAP_TYPE_SOCKHASH:
4844         case BPF_MAP_TYPE_QUEUE:
4845         case BPF_MAP_TYPE_STACK:
4846                 create_attr.btf_fd = 0;
4847                 create_attr.btf_key_type_id = 0;
4848                 create_attr.btf_value_type_id = 0;
4849                 map->btf_key_type_id = 0;
4850                 map->btf_value_type_id = 0;
4851         default:
4852                 break;
4853         }
4854
4855         if (obj->gen_loader) {
4856                 bpf_gen__map_create(obj->gen_loader, def->type, map_name,
4857                                     def->key_size, def->value_size, def->max_entries,
4858                                     &create_attr, is_inner ? -1 : map - obj->maps);
4859                 /* We keep pretenting we have valid FD to pass various fd >= 0
4860                  * checks by just keeping original placeholder FDs in place.
4861                  * See bpf_object__add_map() comment.
4862                  * This placeholder fd will not be used with any syscall and
4863                  * will be reset to -1 eventually.
4864                  */
4865                 map_fd = map->fd;
4866         } else {
4867                 map_fd = bpf_map_create(def->type, map_name,
4868                                         def->key_size, def->value_size,
4869                                         def->max_entries, &create_attr);
4870         }
4871         if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) {
4872                 char *cp, errmsg[STRERR_BUFSIZE];
4873
4874                 err = -errno;
4875                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
4876                 pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
4877                         map->name, cp, err);
4878                 create_attr.btf_fd = 0;
4879                 create_attr.btf_key_type_id = 0;
4880                 create_attr.btf_value_type_id = 0;
4881                 map->btf_key_type_id = 0;
4882                 map->btf_value_type_id = 0;
4883                 map_fd = bpf_map_create(def->type, map_name,
4884                                         def->key_size, def->value_size,
4885                                         def->max_entries, &create_attr);
4886         }
4887
4888         if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
4889                 if (obj->gen_loader)
4890                         map->inner_map->fd = -1;
4891                 bpf_map__destroy(map->inner_map);
4892                 zfree(&map->inner_map);
4893         }
4894
4895         if (map_fd < 0)
4896                 return map_fd;
4897
4898         /* obj->gen_loader case, prevent reuse_fd() from closing map_fd */
4899         if (map->fd == map_fd)
4900                 return 0;
4901
4902         /* Keep placeholder FD value but now point it to the BPF map object.
4903          * This way everything that relied on this map's FD (e.g., relocated
4904          * ldimm64 instructions) will stay valid and won't need adjustments.
4905          * map->fd stays valid but now point to what map_fd points to.
4906          */
4907         return reuse_fd(map->fd, map_fd);
4908 }
4909
4910 static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)
4911 {
4912         const struct bpf_map *targ_map;
4913         unsigned int i;
4914         int fd, err = 0;
4915
4916         for (i = 0; i < map->init_slots_sz; i++) {
4917                 if (!map->init_slots[i])
4918                         continue;
4919
4920                 targ_map = map->init_slots[i];
4921                 fd = targ_map->fd;
4922
4923                 if (obj->gen_loader) {
4924                         bpf_gen__populate_outer_map(obj->gen_loader,
4925                                                     map - obj->maps, i,
4926                                                     targ_map - obj->maps);
4927                 } else {
4928                         err = bpf_map_update_elem(map->fd, &i, &fd, 0);
4929                 }
4930                 if (err) {
4931                         err = -errno;
4932                         pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
4933                                 map->name, i, targ_map->name, fd, err);
4934                         return err;
4935                 }
4936                 pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
4937                          map->name, i, targ_map->name, fd);
4938         }
4939
4940         zfree(&map->init_slots);
4941         map->init_slots_sz = 0;
4942
4943         return 0;
4944 }
4945
4946 static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map)
4947 {
4948         const struct bpf_program *targ_prog;
4949         unsigned int i;
4950         int fd, err;
4951
4952         if (obj->gen_loader)
4953                 return -ENOTSUP;
4954
4955         for (i = 0; i < map->init_slots_sz; i++) {
4956                 if (!map->init_slots[i])
4957                         continue;
4958
4959                 targ_prog = map->init_slots[i];
4960                 fd = bpf_program__fd(targ_prog);
4961
4962                 err = bpf_map_update_elem(map->fd, &i, &fd, 0);
4963                 if (err) {
4964                         err = -errno;
4965                         pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n",
4966                                 map->name, i, targ_prog->name, fd, err);
4967                         return err;
4968                 }
4969                 pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n",
4970                          map->name, i, targ_prog->name, fd);
4971         }
4972
4973         zfree(&map->init_slots);
4974         map->init_slots_sz = 0;
4975
4976         return 0;
4977 }
4978
4979 static int bpf_object_init_prog_arrays(struct bpf_object *obj)
4980 {
4981         struct bpf_map *map;
4982         int i, err;
4983
4984         for (i = 0; i < obj->nr_maps; i++) {
4985                 map = &obj->maps[i];
4986
4987                 if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY)
4988                         continue;
4989
4990                 err = init_prog_array_slots(obj, map);
4991                 if (err < 0)
4992                         return err;
4993         }
4994         return 0;
4995 }
4996
4997 static int map_set_def_max_entries(struct bpf_map *map)
4998 {
4999         if (map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !map->def.max_entries) {
5000                 int nr_cpus;
5001
5002                 nr_cpus = libbpf_num_possible_cpus();
5003                 if (nr_cpus < 0) {
5004                         pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
5005                                 map->name, nr_cpus);
5006                         return nr_cpus;
5007                 }
5008                 pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
5009                 map->def.max_entries = nr_cpus;
5010         }
5011
5012         return 0;
5013 }
5014
5015 static int
5016 bpf_object__create_maps(struct bpf_object *obj)
5017 {
5018         struct bpf_map *map;
5019         char *cp, errmsg[STRERR_BUFSIZE];
5020         unsigned int i, j;
5021         int err;
5022         bool retried;
5023
5024         for (i = 0; i < obj->nr_maps; i++) {
5025                 map = &obj->maps[i];
5026
5027                 /* To support old kernels, we skip creating global data maps
5028                  * (.rodata, .data, .kconfig, etc); later on, during program
5029                  * loading, if we detect that at least one of the to-be-loaded
5030                  * programs is referencing any global data map, we'll error
5031                  * out with program name and relocation index logged.
5032                  * This approach allows to accommodate Clang emitting
5033                  * unnecessary .rodata.str1.1 sections for string literals,
5034                  * but also it allows to have CO-RE applications that use
5035                  * global variables in some of BPF programs, but not others.
5036                  * If those global variable-using programs are not loaded at
5037                  * runtime due to bpf_program__set_autoload(prog, false),
5038                  * bpf_object loading will succeed just fine even on old
5039                  * kernels.
5040                  */
5041                 if (bpf_map__is_internal(map) && !kernel_supports(obj, FEAT_GLOBAL_DATA))
5042                         map->autocreate = false;
5043
5044                 if (!map->autocreate) {
5045                         pr_debug("map '%s': skipped auto-creating...\n", map->name);
5046                         continue;
5047                 }
5048
5049                 err = map_set_def_max_entries(map);
5050                 if (err)
5051                         goto err_out;
5052
5053                 retried = false;
5054 retry:
5055                 if (map->pin_path) {
5056                         err = bpf_object__reuse_map(map);
5057                         if (err) {
5058                                 pr_warn("map '%s': error reusing pinned map\n",
5059                                         map->name);
5060                                 goto err_out;
5061                         }
5062                         if (retried && map->fd < 0) {
5063                                 pr_warn("map '%s': cannot find pinned map\n",
5064                                         map->name);
5065                                 err = -ENOENT;
5066                                 goto err_out;
5067                         }
5068                 }
5069
5070                 if (map->reused) {
5071                         pr_debug("map '%s': skipping creation (preset fd=%d)\n",
5072                                  map->name, map->fd);
5073                 } else {
5074                         err = bpf_object__create_map(obj, map, false);
5075                         if (err)
5076                                 goto err_out;
5077
5078                         pr_debug("map '%s': created successfully, fd=%d\n",
5079                                  map->name, map->fd);
5080
5081                         if (bpf_map__is_internal(map)) {
5082                                 err = bpf_object__populate_internal_map(obj, map);
5083                                 if (err < 0)
5084                                         goto err_out;
5085                         }
5086
5087                         if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
5088                                 err = init_map_in_map_slots(obj, map);
5089                                 if (err < 0)
5090                                         goto err_out;
5091                         }
5092                 }
5093
5094                 if (map->pin_path && !map->pinned) {
5095                         err = bpf_map__pin(map, NULL);
5096                         if (err) {
5097                                 if (!retried && err == -EEXIST) {
5098                                         retried = true;
5099                                         goto retry;
5100                                 }
5101                                 pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
5102                                         map->name, map->pin_path, err);
5103                                 goto err_out;
5104                         }
5105                 }
5106         }
5107
5108         return 0;
5109
5110 err_out:
5111         cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
5112         pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
5113         pr_perm_msg(err);
5114         for (j = 0; j < i; j++)
5115                 zclose(obj->maps[j].fd);
5116         return err;
5117 }
5118
5119 static bool bpf_core_is_flavor_sep(const char *s)
5120 {
5121         /* check X___Y name pattern, where X and Y are not underscores */
5122         return s[0] != '_' &&                                 /* X */
5123                s[1] == '_' && s[2] == '_' && s[3] == '_' &&   /* ___ */
5124                s[4] != '_';                                   /* Y */
5125 }
5126
5127 /* Given 'some_struct_name___with_flavor' return the length of a name prefix
5128  * before last triple underscore. Struct name part after last triple
5129  * underscore is ignored by BPF CO-RE relocation during relocation matching.
5130  */
5131 size_t bpf_core_essential_name_len(const char *name)
5132 {
5133         size_t n = strlen(name);
5134         int i;
5135
5136         for (i = n - 5; i >= 0; i--) {
5137                 if (bpf_core_is_flavor_sep(name + i))
5138                         return i + 1;
5139         }
5140         return n;
5141 }
5142
5143 void bpf_core_free_cands(struct bpf_core_cand_list *cands)
5144 {
5145         if (!cands)
5146                 return;
5147
5148         free(cands->cands);
5149         free(cands);
5150 }
5151
5152 int bpf_core_add_cands(struct bpf_core_cand *local_cand,
5153                        size_t local_essent_len,
5154                        const struct btf *targ_btf,
5155                        const char *targ_btf_name,
5156                        int targ_start_id,
5157                        struct bpf_core_cand_list *cands)
5158 {
5159         struct bpf_core_cand *new_cands, *cand;
5160         const struct btf_type *t, *local_t;
5161         const char *targ_name, *local_name;
5162         size_t targ_essent_len;
5163         int n, i;
5164
5165         local_t = btf__type_by_id(local_cand->btf, local_cand->id);
5166         local_name = btf__str_by_offset(local_cand->btf, local_t->name_off);
5167
5168         n = btf__type_cnt(targ_btf);
5169         for (i = targ_start_id; i < n; i++) {
5170                 t = btf__type_by_id(targ_btf, i);
5171                 if (!btf_kind_core_compat(t, local_t))
5172                         continue;
5173
5174                 targ_name = btf__name_by_offset(targ_btf, t->name_off);
5175                 if (str_is_empty(targ_name))
5176                         continue;
5177
5178                 targ_essent_len = bpf_core_essential_name_len(targ_name);
5179                 if (targ_essent_len != local_essent_len)
5180                         continue;
5181
5182                 if (strncmp(local_name, targ_name, local_essent_len) != 0)
5183                         continue;
5184
5185                 pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n",
5186                          local_cand->id, btf_kind_str(local_t),
5187                          local_name, i, btf_kind_str(t), targ_name,
5188                          targ_btf_name);
5189                 new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,
5190                                               sizeof(*cands->cands));
5191                 if (!new_cands)
5192                         return -ENOMEM;
5193
5194                 cand = &new_cands[cands->len];
5195                 cand->btf = targ_btf;
5196                 cand->id = i;
5197
5198                 cands->cands = new_cands;
5199                 cands->len++;
5200         }
5201         return 0;
5202 }
5203
5204 static int load_module_btfs(struct bpf_object *obj)
5205 {
5206         struct bpf_btf_info info;
5207         struct module_btf *mod_btf;
5208         struct btf *btf;
5209         char name[64];
5210         __u32 id = 0, len;
5211         int err, fd;
5212
5213         if (obj->btf_modules_loaded)
5214                 return 0;
5215
5216         if (obj->gen_loader)
5217                 return 0;
5218
5219         /* don't do this again, even if we find no module BTFs */
5220         obj->btf_modules_loaded = true;
5221
5222         /* kernel too old to support module BTFs */
5223         if (!kernel_supports(obj, FEAT_MODULE_BTF))
5224                 return 0;
5225
5226         while (true) {
5227                 err = bpf_btf_get_next_id(id, &id);
5228                 if (err && errno == ENOENT)
5229                         return 0;
5230                 if (err && errno == EPERM) {
5231                         pr_debug("skipping module BTFs loading, missing privileges\n");
5232                         return 0;
5233                 }
5234                 if (err) {
5235                         err = -errno;
5236                         pr_warn("failed to iterate BTF objects: %d\n", err);
5237                         return err;
5238                 }
5239
5240                 fd = bpf_btf_get_fd_by_id(id);
5241                 if (fd < 0) {
5242                         if (errno == ENOENT)
5243                                 continue; /* expected race: BTF was unloaded */
5244                         err = -errno;
5245                         pr_warn("failed to get BTF object #%d FD: %d\n", id, err);
5246                         return err;
5247                 }
5248
5249                 len = sizeof(info);
5250                 memset(&info, 0, sizeof(info));
5251                 info.name = ptr_to_u64(name);
5252                 info.name_len = sizeof(name);
5253
5254                 err = bpf_btf_get_info_by_fd(fd, &info, &len);
5255                 if (err) {
5256                         err = -errno;
5257                         pr_warn("failed to get BTF object #%d info: %d\n", id, err);
5258                         goto err_out;
5259                 }
5260
5261                 /* ignore non-module BTFs */
5262                 if (!info.kernel_btf || strcmp(name, "vmlinux") == 0) {
5263                         close(fd);
5264                         continue;
5265                 }
5266
5267                 btf = btf_get_from_fd(fd, obj->btf_vmlinux);
5268                 err = libbpf_get_error(btf);
5269                 if (err) {
5270                         pr_warn("failed to load module [%s]'s BTF object #%d: %d\n",
5271                                 name, id, err);
5272                         goto err_out;
5273                 }
5274
5275                 err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
5276                                         sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
5277                 if (err)
5278                         goto err_out;
5279
5280                 mod_btf = &obj->btf_modules[obj->btf_module_cnt++];
5281
5282                 mod_btf->btf = btf;
5283                 mod_btf->id = id;
5284                 mod_btf->fd = fd;
5285                 mod_btf->name = strdup(name);
5286                 if (!mod_btf->name) {
5287                         err = -ENOMEM;
5288                         goto err_out;
5289                 }
5290                 continue;
5291
5292 err_out:
5293                 close(fd);
5294                 return err;
5295         }
5296
5297         return 0;
5298 }
5299
5300 static struct bpf_core_cand_list *
5301 bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 local_type_id)
5302 {
5303         struct bpf_core_cand local_cand = {};
5304         struct bpf_core_cand_list *cands;
5305         const struct btf *main_btf;
5306         const struct btf_type *local_t;
5307         const char *local_name;
5308         size_t local_essent_len;
5309         int err, i;
5310
5311         local_cand.btf = local_btf;
5312         local_cand.id = local_type_id;
5313         local_t = btf__type_by_id(local_btf, local_type_id);
5314         if (!local_t)
5315                 return ERR_PTR(-EINVAL);
5316
5317         local_name = btf__name_by_offset(local_btf, local_t->name_off);
5318         if (str_is_empty(local_name))
5319                 return ERR_PTR(-EINVAL);
5320         local_essent_len = bpf_core_essential_name_len(local_name);
5321
5322         cands = calloc(1, sizeof(*cands));
5323         if (!cands)
5324                 return ERR_PTR(-ENOMEM);
5325
5326         /* Attempt to find target candidates in vmlinux BTF first */
5327         main_btf = obj->btf_vmlinux_override ?: obj->btf_vmlinux;
5328         err = bpf_core_add_cands(&local_cand, local_essent_len, main_btf, "vmlinux", 1, cands);
5329         if (err)
5330                 goto err_out;
5331
5332         /* if vmlinux BTF has any candidate, don't got for module BTFs */
5333         if (cands->len)
5334                 return cands;
5335
5336         /* if vmlinux BTF was overridden, don't attempt to load module BTFs */
5337         if (obj->btf_vmlinux_override)
5338                 return cands;
5339
5340         /* now look through module BTFs, trying to still find candidates */
5341         err = load_module_btfs(obj);
5342         if (err)
5343                 goto err_out;
5344
5345         for (i = 0; i < obj->btf_module_cnt; i++) {
5346                 err = bpf_core_add_cands(&local_cand, local_essent_len,
5347                                          obj->btf_modules[i].btf,
5348                                          obj->btf_modules[i].name,
5349                                          btf__type_cnt(obj->btf_vmlinux),
5350                                          cands);
5351                 if (err)
5352                         goto err_out;
5353         }
5354
5355         return cands;
5356 err_out:
5357         bpf_core_free_cands(cands);
5358         return ERR_PTR(err);
5359 }
5360
5361 /* Check local and target types for compatibility. This check is used for
5362  * type-based CO-RE relocations and follow slightly different rules than
5363  * field-based relocations. This function assumes that root types were already
5364  * checked for name match. Beyond that initial root-level name check, names
5365  * are completely ignored. Compatibility rules are as follows:
5366  *   - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but
5367  *     kind should match for local and target types (i.e., STRUCT is not
5368  *     compatible with UNION);
5369  *   - for ENUMs, the size is ignored;
5370  *   - for INT, size and signedness are ignored;
5371  *   - for ARRAY, dimensionality is ignored, element types are checked for
5372  *     compatibility recursively;
5373  *   - CONST/VOLATILE/RESTRICT modifiers are ignored;
5374  *   - TYPEDEFs/PTRs are compatible if types they pointing to are compatible;
5375  *   - FUNC_PROTOs are compatible if they have compatible signature: same
5376  *     number of input args and compatible return and argument types.
5377  * These rules are not set in stone and probably will be adjusted as we get
5378  * more experience with using BPF CO-RE relocations.
5379  */
5380 int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,
5381                               const struct btf *targ_btf, __u32 targ_id)
5382 {
5383         return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id, 32);
5384 }
5385
5386 int bpf_core_types_match(const struct btf *local_btf, __u32 local_id,
5387                          const struct btf *targ_btf, __u32 targ_id)
5388 {
5389         return __bpf_core_types_match(local_btf, local_id, targ_btf, targ_id, false, 32);
5390 }
5391
5392 static size_t bpf_core_hash_fn(const long key, void *ctx)
5393 {
5394         return key;
5395 }
5396
5397 static bool bpf_core_equal_fn(const long k1, const long k2, void *ctx)
5398 {
5399         return k1 == k2;
5400 }
5401
5402 static int record_relo_core(struct bpf_program *prog,
5403                             const struct bpf_core_relo *core_relo, int insn_idx)
5404 {
5405         struct reloc_desc *relos, *relo;
5406
5407         relos = libbpf_reallocarray(prog->reloc_desc,
5408                                     prog->nr_reloc + 1, sizeof(*relos));
5409         if (!relos)
5410                 return -ENOMEM;
5411         relo = &relos[prog->nr_reloc];
5412         relo->type = RELO_CORE;
5413         relo->insn_idx = insn_idx;
5414         relo->core_relo = core_relo;
5415         prog->reloc_desc = relos;
5416         prog->nr_reloc++;
5417         return 0;
5418 }
5419
5420 static const struct bpf_core_relo *find_relo_core(struct bpf_program *prog, int insn_idx)
5421 {
5422         struct reloc_desc *relo;
5423         int i;
5424
5425         for (i = 0; i < prog->nr_reloc; i++) {
5426                 relo = &prog->reloc_desc[i];
5427                 if (relo->type != RELO_CORE || relo->insn_idx != insn_idx)
5428                         continue;
5429
5430                 return relo->core_relo;
5431         }
5432
5433         return NULL;
5434 }
5435
5436 static int bpf_core_resolve_relo(struct bpf_program *prog,
5437                                  const struct bpf_core_relo *relo,
5438                                  int relo_idx,
5439                                  const struct btf *local_btf,
5440                                  struct hashmap *cand_cache,
5441                                  struct bpf_core_relo_res *targ_res)
5442 {
5443         struct bpf_core_spec specs_scratch[3] = {};
5444         struct bpf_core_cand_list *cands = NULL;
5445         const char *prog_name = prog->name;
5446         const struct btf_type *local_type;
5447         const char *local_name;
5448         __u32 local_id = relo->type_id;
5449         int err;
5450
5451         local_type = btf__type_by_id(local_btf, local_id);
5452         if (!local_type)
5453                 return -EINVAL;
5454
5455         local_name = btf__name_by_offset(local_btf, local_type->name_off);
5456         if (!local_name)
5457                 return -EINVAL;
5458
5459         if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&
5460             !hashmap__find(cand_cache, local_id, &cands)) {
5461                 cands = bpf_core_find_cands(prog->obj, local_btf, local_id);
5462                 if (IS_ERR(cands)) {
5463                         pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s %s: %ld\n",
5464                                 prog_name, relo_idx, local_id, btf_kind_str(local_type),
5465                                 local_name, PTR_ERR(cands));
5466                         return PTR_ERR(cands);
5467                 }
5468                 err = hashmap__set(cand_cache, local_id, cands, NULL, NULL);
5469                 if (err) {
5470                         bpf_core_free_cands(cands);
5471                         return err;
5472                 }
5473         }
5474
5475         return bpf_core_calc_relo_insn(prog_name, relo, relo_idx, local_btf, cands, specs_scratch,
5476                                        targ_res);
5477 }
5478
5479 static int
5480 bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)
5481 {
5482         const struct btf_ext_info_sec *sec;
5483         struct bpf_core_relo_res targ_res;
5484         const struct bpf_core_relo *rec;
5485         const struct btf_ext_info *seg;
5486         struct hashmap_entry *entry;
5487         struct hashmap *cand_cache = NULL;
5488         struct bpf_program *prog;
5489         struct bpf_insn *insn;
5490         const char *sec_name;
5491         int i, err = 0, insn_idx, sec_idx, sec_num;
5492
5493         if (obj->btf_ext->core_relo_info.len == 0)
5494                 return 0;
5495
5496         if (targ_btf_path) {
5497                 obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL);
5498                 err = libbpf_get_error(obj->btf_vmlinux_override);
5499                 if (err) {
5500                         pr_warn("failed to parse target BTF: %d\n", err);
5501                         return err;
5502                 }
5503         }
5504
5505         cand_cache = hashmap__new(bpf_core_hash_fn, bpf_core_equal_fn, NULL);
5506         if (IS_ERR(cand_cache)) {
5507                 err = PTR_ERR(cand_cache);
5508                 goto out;
5509         }
5510
5511         seg = &obj->btf_ext->core_relo_info;
5512         sec_num = 0;
5513         for_each_btf_ext_sec(seg, sec) {
5514                 sec_idx = seg->sec_idxs[sec_num];
5515                 sec_num++;
5516
5517                 sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);
5518                 if (str_is_empty(sec_name)) {
5519                         err = -EINVAL;
5520                         goto out;
5521                 }
5522
5523                 pr_debug("sec '%s': found %d CO-RE relocations\n", sec_name, sec->num_info);
5524
5525                 for_each_btf_ext_rec(seg, sec, i, rec) {
5526                         if (rec->insn_off % BPF_INSN_SZ)
5527                                 return -EINVAL;
5528                         insn_idx = rec->insn_off / BPF_INSN_SZ;
5529                         prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
5530                         if (!prog) {
5531                                 /* When __weak subprog is "overridden" by another instance
5532                                  * of the subprog from a different object file, linker still
5533                                  * appends all the .BTF.ext info that used to belong to that
5534                                  * eliminated subprogram.
5535                                  * This is similar to what x86-64 linker does for relocations.
5536                                  * So just ignore such relocations just like we ignore
5537                                  * subprog instructions when discovering subprograms.
5538                                  */
5539                                 pr_debug("sec '%s': skipping CO-RE relocation #%d for insn #%d belonging to eliminated weak subprogram\n",
5540                                          sec_name, i, insn_idx);
5541                                 continue;
5542                         }
5543                         /* no need to apply CO-RE relocation if the program is
5544                          * not going to be loaded
5545                          */
5546                         if (!prog->autoload)
5547                                 continue;
5548
5549                         /* adjust insn_idx from section frame of reference to the local
5550                          * program's frame of reference; (sub-)program code is not yet
5551                          * relocated, so it's enough to just subtract in-section offset
5552                          */
5553                         insn_idx = insn_idx - prog->sec_insn_off;
5554                         if (insn_idx >= prog->insns_cnt)
5555                                 return -EINVAL;
5556                         insn = &prog->insns[insn_idx];
5557
5558                         err = record_relo_core(prog, rec, insn_idx);
5559                         if (err) {
5560                                 pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n",
5561                                         prog->name, i, err);
5562                                 goto out;
5563                         }
5564
5565                         if (prog->obj->gen_loader)
5566                                 continue;
5567
5568                         err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res);
5569                         if (err) {
5570                                 pr_warn("prog '%s': relo #%d: failed to relocate: %d\n",
5571                                         prog->name, i, err);
5572                                 goto out;
5573                         }
5574
5575                         err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res);
5576                         if (err) {
5577                                 pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n",
5578                                         prog->name, i, insn_idx, err);
5579                                 goto out;
5580                         }
5581                 }
5582         }
5583
5584 out:
5585         /* obj->btf_vmlinux and module BTFs are freed after object load */
5586         btf__free(obj->btf_vmlinux_override);
5587         obj->btf_vmlinux_override = NULL;
5588
5589         if (!IS_ERR_OR_NULL(cand_cache)) {
5590                 hashmap__for_each_entry(cand_cache, entry, i) {
5591                         bpf_core_free_cands(entry->pvalue);
5592                 }
5593                 hashmap__free(cand_cache);
5594         }
5595         return err;
5596 }
5597
5598 /* base map load ldimm64 special constant, used also for log fixup logic */
5599 #define POISON_LDIMM64_MAP_BASE 2001000000
5600 #define POISON_LDIMM64_MAP_PFX "200100"
5601
5602 static void poison_map_ldimm64(struct bpf_program *prog, int relo_idx,
5603                                int insn_idx, struct bpf_insn *insn,
5604                                int map_idx, const struct bpf_map *map)
5605 {
5606         int i;
5607
5608         pr_debug("prog '%s': relo #%d: poisoning insn #%d that loads map #%d '%s'\n",
5609                  prog->name, relo_idx, insn_idx, map_idx, map->name);
5610
5611         /* we turn single ldimm64 into two identical invalid calls */
5612         for (i = 0; i < 2; i++) {
5613                 insn->code = BPF_JMP | BPF_CALL;
5614                 insn->dst_reg = 0;
5615                 insn->src_reg = 0;
5616                 insn->off = 0;
5617                 /* if this instruction is reachable (not a dead code),
5618                  * verifier will complain with something like:
5619                  * invalid func unknown#2001000123
5620                  * where lower 123 is map index into obj->maps[] array
5621                  */
5622                 insn->imm = POISON_LDIMM64_MAP_BASE + map_idx;
5623
5624                 insn++;
5625         }
5626 }
5627
5628 /* unresolved kfunc call special constant, used also for log fixup logic */
5629 #define POISON_CALL_KFUNC_BASE 2002000000
5630 #define POISON_CALL_KFUNC_PFX "2002"
5631
5632 static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
5633                               int insn_idx, struct bpf_insn *insn,
5634                               int ext_idx, const struct extern_desc *ext)
5635 {
5636         pr_debug("prog '%s': relo #%d: poisoning insn #%d that calls kfunc '%s'\n",
5637                  prog->name, relo_idx, insn_idx, ext->name);
5638
5639         /* we turn kfunc call into invalid helper call with identifiable constant */
5640         insn->code = BPF_JMP | BPF_CALL;
5641         insn->dst_reg = 0;
5642         insn->src_reg = 0;
5643         insn->off = 0;
5644         /* if this instruction is reachable (not a dead code),
5645          * verifier will complain with something like:
5646          * invalid func unknown#2001000123
5647          * where lower 123 is extern index into obj->externs[] array
5648          */
5649         insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
5650 }
5651
5652 /* Relocate data references within program code:
5653  *  - map references;
5654  *  - global variable references;
5655  *  - extern references.
5656  */
5657 static int
5658 bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
5659 {
5660         int i;
5661
5662         for (i = 0; i < prog->nr_reloc; i++) {
5663                 struct reloc_desc *relo = &prog->reloc_desc[i];
5664                 struct bpf_insn *insn = &prog->insns[relo->insn_idx];
5665                 const struct bpf_map *map;
5666                 struct extern_desc *ext;
5667
5668                 switch (relo->type) {
5669                 case RELO_LD64:
5670                         map = &obj->maps[relo->map_idx];
5671                         if (obj->gen_loader) {
5672                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX;
5673                                 insn[0].imm = relo->map_idx;
5674                         } else if (map->autocreate) {
5675                                 insn[0].src_reg = BPF_PSEUDO_MAP_FD;
5676                                 insn[0].imm = map->fd;
5677                         } else {
5678                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5679                                                    relo->map_idx, map);
5680                         }
5681                         break;
5682                 case RELO_DATA:
5683                         map = &obj->maps[relo->map_idx];
5684                         insn[1].imm = insn[0].imm + relo->sym_off;
5685                         if (obj->gen_loader) {
5686                                 insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5687                                 insn[0].imm = relo->map_idx;
5688                         } else if (map->autocreate) {
5689                                 insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5690                                 insn[0].imm = map->fd;
5691                         } else {
5692                                 poison_map_ldimm64(prog, i, relo->insn_idx, insn,
5693                                                    relo->map_idx, map);
5694                         }
5695                         break;
5696                 case RELO_EXTERN_LD64:
5697                         ext = &obj->externs[relo->ext_idx];
5698                         if (ext->type == EXT_KCFG) {
5699                                 if (obj->gen_loader) {
5700                                         insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;
5701                                         insn[0].imm = obj->kconfig_map_idx;
5702                                 } else {
5703                                         insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
5704                                         insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
5705                                 }
5706                                 insn[1].imm = ext->kcfg.data_off;
5707                         } else /* EXT_KSYM */ {
5708                                 if (ext->ksym.type_id && ext->is_set) { /* typed ksyms */
5709                                         insn[0].src_reg = BPF_PSEUDO_BTF_ID;
5710                                         insn[0].imm = ext->ksym.kernel_btf_id;
5711                                         insn[1].imm = ext->ksym.kernel_btf_obj_fd;
5712                                 } else { /* typeless ksyms or unresolved typed ksyms */
5713                                         insn[0].imm = (__u32)ext->ksym.addr;
5714                                         insn[1].imm = ext->ksym.addr >> 32;
5715                                 }
5716                         }
5717                         break;
5718                 case RELO_EXTERN_CALL:
5719                         ext = &obj->externs[relo->ext_idx];
5720                         insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
5721                         if (ext->is_set) {
5722                                 insn[0].imm = ext->ksym.kernel_btf_id;
5723                                 insn[0].off = ext->ksym.btf_fd_idx;
5724                         } else { /* unresolved weak kfunc call */
5725                                 poison_kfunc_call(prog, i, relo->insn_idx, insn,
5726                                                   relo->ext_idx, ext);
5727                         }
5728                         break;
5729                 case RELO_SUBPROG_ADDR:
5730                         if (insn[0].src_reg != BPF_PSEUDO_FUNC) {
5731                                 pr_warn("prog '%s': relo #%d: bad insn\n",
5732                                         prog->name, i);
5733                                 return -EINVAL;
5734                         }
5735                         /* handled already */
5736                         break;
5737                 case RELO_CALL:
5738                         /* handled already */
5739                         break;
5740                 case RELO_CORE:
5741                         /* will be handled by bpf_program_record_relos() */
5742                         break;
5743                 default:
5744                         pr_warn("prog '%s': relo #%d: bad relo type %d\n",
5745                                 prog->name, i, relo->type);
5746                         return -EINVAL;
5747                 }
5748         }
5749
5750         return 0;
5751 }
5752
5753 static int adjust_prog_btf_ext_info(const struct bpf_object *obj,
5754                                     const struct bpf_program *prog,
5755                                     const struct btf_ext_info *ext_info,
5756                                     void **prog_info, __u32 *prog_rec_cnt,
5757                                     __u32 *prog_rec_sz)
5758 {
5759         void *copy_start = NULL, *copy_end = NULL;
5760         void *rec, *rec_end, *new_prog_info;
5761         const struct btf_ext_info_sec *sec;
5762         size_t old_sz, new_sz;
5763         int i, sec_num, sec_idx, off_adj;
5764
5765         sec_num = 0;
5766         for_each_btf_ext_sec(ext_info, sec) {
5767                 sec_idx = ext_info->sec_idxs[sec_num];
5768                 sec_num++;
5769                 if (prog->sec_idx != sec_idx)
5770                         continue;
5771
5772                 for_each_btf_ext_rec(ext_info, sec, i, rec) {
5773                         __u32 insn_off = *(__u32 *)rec / BPF_INSN_SZ;
5774
5775                         if (insn_off < prog->sec_insn_off)
5776                                 continue;
5777                         if (insn_off >= prog->sec_insn_off + prog->sec_insn_cnt)
5778                                 break;
5779
5780                         if (!copy_start)
5781                                 copy_start = rec;
5782                         copy_end = rec + ext_info->rec_size;
5783                 }
5784
5785                 if (!copy_start)
5786                         return -ENOENT;
5787
5788                 /* append func/line info of a given (sub-)program to the main
5789                  * program func/line info
5790                  */
5791                 old_sz = (size_t)(*prog_rec_cnt) * ext_info->rec_size;
5792                 new_sz = old_sz + (copy_end - copy_start);
5793                 new_prog_info = realloc(*prog_info, new_sz);
5794                 if (!new_prog_info)
5795                         return -ENOMEM;
5796                 *prog_info = new_prog_info;
5797                 *prog_rec_cnt = new_sz / ext_info->rec_size;
5798                 memcpy(new_prog_info + old_sz, copy_start, copy_end - copy_start);
5799
5800                 /* Kernel instruction offsets are in units of 8-byte
5801                  * instructions, while .BTF.ext instruction offsets generated
5802                  * by Clang are in units of bytes. So convert Clang offsets
5803                  * into kernel offsets and adjust offset according to program
5804                  * relocated position.
5805                  */
5806                 off_adj = prog->sub_insn_off - prog->sec_insn_off;
5807                 rec = new_prog_info + old_sz;
5808                 rec_end = new_prog_info + new_sz;
5809                 for (; rec < rec_end; rec += ext_info->rec_size) {
5810                         __u32 *insn_off = rec;
5811
5812                         *insn_off = *insn_off / BPF_INSN_SZ + off_adj;
5813                 }
5814                 *prog_rec_sz = ext_info->rec_size;
5815                 return 0;
5816         }
5817
5818         return -ENOENT;
5819 }
5820
5821 static int
5822 reloc_prog_func_and_line_info(const struct bpf_object *obj,
5823                               struct bpf_program *main_prog,
5824                               const struct bpf_program *prog)
5825 {
5826         int err;
5827
5828         /* no .BTF.ext relocation if .BTF.ext is missing or kernel doesn't
5829          * support func/line info
5830          */
5831         if (!obj->btf_ext || !kernel_supports(obj, FEAT_BTF_FUNC))
5832                 return 0;
5833
5834         /* only attempt func info relocation if main program's func_info
5835          * relocation was successful
5836          */
5837         if (main_prog != prog && !main_prog->func_info)
5838                 goto line_info;
5839
5840         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->func_info,
5841                                        &main_prog->func_info,
5842                                        &main_prog->func_info_cnt,
5843                                        &main_prog->func_info_rec_size);
5844         if (err) {
5845                 if (err != -ENOENT) {
5846                         pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n",
5847                                 prog->name, err);
5848                         return err;
5849                 }
5850                 if (main_prog->func_info) {
5851                         /*
5852                          * Some info has already been found but has problem
5853                          * in the last btf_ext reloc. Must have to error out.
5854                          */
5855                         pr_warn("prog '%s': missing .BTF.ext function info.\n", prog->name);
5856                         return err;
5857                 }
5858                 /* Have problem loading the very first info. Ignore the rest. */
5859                 pr_warn("prog '%s': missing .BTF.ext function info for the main program, skipping all of .BTF.ext func info.\n",
5860                         prog->name);
5861         }
5862
5863 line_info:
5864         /* don't relocate line info if main program's relocation failed */
5865         if (main_prog != prog && !main_prog->line_info)
5866                 return 0;
5867
5868         err = adjust_prog_btf_ext_info(obj, prog, &obj->btf_ext->line_info,
5869                                        &main_prog->line_info,
5870                                        &main_prog->line_info_cnt,
5871                                        &main_prog->line_info_rec_size);
5872         if (err) {
5873                 if (err != -ENOENT) {
5874                         pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n",
5875                                 prog->name, err);
5876                         return err;
5877                 }
5878                 if (main_prog->line_info) {
5879                         /*
5880                          * Some info has already been found but has problem
5881                          * in the last btf_ext reloc. Must have to error out.
5882                          */
5883                         pr_warn("prog '%s': missing .BTF.ext line info.\n", prog->name);
5884                         return err;
5885                 }
5886                 /* Have problem loading the very first info. Ignore the rest. */
5887                 pr_warn("prog '%s': missing .BTF.ext line info for the main program, skipping all of .BTF.ext line info.\n",
5888                         prog->name);
5889         }
5890         return 0;
5891 }
5892
5893 static int cmp_relo_by_insn_idx(const void *key, const void *elem)
5894 {
5895         size_t insn_idx = *(const size_t *)key;
5896         const struct reloc_desc *relo = elem;
5897
5898         if (insn_idx == relo->insn_idx)
5899                 return 0;
5900         return insn_idx < relo->insn_idx ? -1 : 1;
5901 }
5902
5903 static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)
5904 {
5905         if (!prog->nr_reloc)
5906                 return NULL;
5907         return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,
5908                        sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);
5909 }
5910
5911 static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_program *subprog)
5912 {
5913         int new_cnt = main_prog->nr_reloc + subprog->nr_reloc;
5914         struct reloc_desc *relos;
5915         int i;
5916
5917         if (main_prog == subprog)
5918                 return 0;
5919         relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));
5920         /* if new count is zero, reallocarray can return a valid NULL result;
5921          * in this case the previous pointer will be freed, so we *have to*
5922          * reassign old pointer to the new value (even if it's NULL)
5923          */
5924         if (!relos && new_cnt)
5925                 return -ENOMEM;
5926         if (subprog->nr_reloc)
5927                 memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc,
5928                        sizeof(*relos) * subprog->nr_reloc);
5929
5930         for (i = main_prog->nr_reloc; i < new_cnt; i++)
5931                 relos[i].insn_idx += subprog->sub_insn_off;
5932         /* After insn_idx adjustment the 'relos' array is still sorted
5933          * by insn_idx and doesn't break bsearch.
5934          */
5935         main_prog->reloc_desc = relos;
5936         main_prog->nr_reloc = new_cnt;
5937         return 0;
5938 }
5939
5940 static int
5941 bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
5942                                 struct bpf_program *subprog)
5943 {
5944        struct bpf_insn *insns;
5945        size_t new_cnt;
5946        int err;
5947
5948        subprog->sub_insn_off = main_prog->insns_cnt;
5949
5950        new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
5951        insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
5952        if (!insns) {
5953                pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
5954                return -ENOMEM;
5955        }
5956        main_prog->insns = insns;
5957        main_prog->insns_cnt = new_cnt;
5958
5959        memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
5960               subprog->insns_cnt * sizeof(*insns));
5961
5962        pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
5963                 main_prog->name, subprog->insns_cnt, subprog->name);
5964
5965        /* The subprog insns are now appended. Append its relos too. */
5966        err = append_subprog_relos(main_prog, subprog);
5967        if (err)
5968                return err;
5969        return 0;
5970 }
5971
5972 static int
5973 bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
5974                        struct bpf_program *prog)
5975 {
5976         size_t sub_insn_idx, insn_idx;
5977         struct bpf_program *subprog;
5978         struct reloc_desc *relo;
5979         struct bpf_insn *insn;
5980         int err;
5981
5982         err = reloc_prog_func_and_line_info(obj, main_prog, prog);
5983         if (err)
5984                 return err;
5985
5986         for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
5987                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
5988                 if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
5989                         continue;
5990
5991                 relo = find_prog_insn_relo(prog, insn_idx);
5992                 if (relo && relo->type == RELO_EXTERN_CALL)
5993                         /* kfunc relocations will be handled later
5994                          * in bpf_object__relocate_data()
5995                          */
5996                         continue;
5997                 if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
5998                         pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
5999                                 prog->name, insn_idx, relo->type);
6000                         return -LIBBPF_ERRNO__RELOC;
6001                 }
6002                 if (relo) {
6003                         /* sub-program instruction index is a combination of
6004                          * an offset of a symbol pointed to by relocation and
6005                          * call instruction's imm field; for global functions,
6006                          * call always has imm = -1, but for static functions
6007                          * relocation is against STT_SECTION and insn->imm
6008                          * points to a start of a static function
6009                          *
6010                          * for subprog addr relocation, the relo->sym_off + insn->imm is
6011                          * the byte offset in the corresponding section.
6012                          */
6013                         if (relo->type == RELO_CALL)
6014                                 sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
6015                         else
6016                                 sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
6017                 } else if (insn_is_pseudo_func(insn)) {
6018                         /*
6019                          * RELO_SUBPROG_ADDR relo is always emitted even if both
6020                          * functions are in the same section, so it shouldn't reach here.
6021                          */
6022                         pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
6023                                 prog->name, insn_idx);
6024                         return -LIBBPF_ERRNO__RELOC;
6025                 } else {
6026                         /* if subprogram call is to a static function within
6027                          * the same ELF section, there won't be any relocation
6028                          * emitted, but it also means there is no additional
6029                          * offset necessary, insns->imm is relative to
6030                          * instruction's original position within the section
6031                          */
6032                         sub_insn_idx = prog->sec_insn_off + insn_idx + insn->imm + 1;
6033                 }
6034
6035                 /* we enforce that sub-programs should be in .text section */
6036                 subprog = find_prog_by_sec_insn(obj, obj->efile.text_shndx, sub_insn_idx);
6037                 if (!subprog) {
6038                         pr_warn("prog '%s': no .text section found yet sub-program call exists\n",
6039                                 prog->name);
6040                         return -LIBBPF_ERRNO__RELOC;
6041                 }
6042
6043                 /* if it's the first call instruction calling into this
6044                  * subprogram (meaning this subprog hasn't been processed
6045                  * yet) within the context of current main program:
6046                  *   - append it at the end of main program's instructions blog;
6047                  *   - process is recursively, while current program is put on hold;
6048                  *   - if that subprogram calls some other not yet processes
6049                  *   subprogram, same thing will happen recursively until
6050                  *   there are no more unprocesses subprograms left to append
6051                  *   and relocate.
6052                  */
6053                 if (subprog->sub_insn_off == 0) {
6054                         err = bpf_object__append_subprog_code(obj, main_prog, subprog);
6055                         if (err)
6056                                 return err;
6057                         err = bpf_object__reloc_code(obj, main_prog, subprog);
6058                         if (err)
6059                                 return err;
6060                 }
6061
6062                 /* main_prog->insns memory could have been re-allocated, so
6063                  * calculate pointer again
6064                  */
6065                 insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
6066                 /* calculate correct instruction position within current main
6067                  * prog; each main prog can have a different set of
6068                  * subprograms appended (potentially in different order as
6069                  * well), so position of any subprog can be different for
6070                  * different main programs
6071                  */
6072                 insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
6073
6074                 pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
6075                          prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
6076         }
6077
6078         return 0;
6079 }
6080
6081 /*
6082  * Relocate sub-program calls.
6083  *
6084  * Algorithm operates as follows. Each entry-point BPF program (referred to as
6085  * main prog) is processed separately. For each subprog (non-entry functions,
6086  * that can be called from either entry progs or other subprogs) gets their
6087  * sub_insn_off reset to zero. This serves as indicator that this subprogram
6088  * hasn't been yet appended and relocated within current main prog. Once its
6089  * relocated, sub_insn_off will point at the position within current main prog
6090  * where given subprog was appended. This will further be used to relocate all
6091  * the call instructions jumping into this subprog.
6092  *
6093  * We start with main program and process all call instructions. If the call
6094  * is into a subprog that hasn't been processed (i.e., subprog->sub_insn_off
6095  * is zero), subprog instructions are appended at the end of main program's
6096  * instruction array. Then main program is "put on hold" while we recursively
6097  * process newly appended subprogram. If that subprogram calls into another
6098  * subprogram that hasn't been appended, new subprogram is appended again to
6099  * the *main* prog's instructions (subprog's instructions are always left
6100  * untouched, as they need to be in unmodified state for subsequent main progs
6101  * and subprog instructions are always sent only as part of a main prog) and
6102  * the process continues recursively. Once all the subprogs called from a main
6103  * prog or any of its subprogs are appended (and relocated), all their
6104  * positions within finalized instructions array are known, so it's easy to
6105  * rewrite call instructions with correct relative offsets, corresponding to
6106  * desired target subprog.
6107  *
6108  * Its important to realize that some subprogs might not be called from some
6109  * main prog and any of its called/used subprogs. Those will keep their
6110  * subprog->sub_insn_off as zero at all times and won't be appended to current
6111  * main prog and won't be relocated within the context of current main prog.
6112  * They might still be used from other main progs later.
6113  *
6114  * Visually this process can be shown as below. Suppose we have two main
6115  * programs mainA and mainB and BPF object contains three subprogs: subA,
6116  * subB, and subC. mainA calls only subA, mainB calls only subC, but subA and
6117  * subC both call subB:
6118  *
6119  *        +--------+ +-------+
6120  *        |        v v       |
6121  *     +--+---+ +--+-+-+ +---+--+
6122  *     | subA | | subB | | subC |
6123  *     +--+---+ +------+ +---+--+
6124  *        ^                  ^
6125  *        |                  |
6126  *    +---+-------+   +------+----+
6127  *    |   mainA   |   |   mainB   |
6128  *    +-----------+   +-----------+
6129  *
6130  * We'll start relocating mainA, will find subA, append it and start
6131  * processing sub A recursively:
6132  *
6133  *    +-----------+------+
6134  *    |   mainA   | subA |
6135  *    +-----------+------+
6136  *
6137  * At this point we notice that subB is used from subA, so we append it and
6138  * relocate (there are no further subcalls from subB):
6139  *
6140  *    +-----------+------+------+
6141  *    |   mainA   | subA | subB |
6142  *    +-----------+------+------+
6143  *
6144  * At this point, we relocate subA calls, then go one level up and finish with
6145  * relocatin mainA calls. mainA is done.
6146  *
6147  * For mainB process is similar but results in different order. We start with
6148  * mainB and skip subA and subB, as mainB never calls them (at least
6149  * directly), but we see subC is needed, so we append and start processing it:
6150  *
6151  *    +-----------+------+
6152  *    |   mainB   | subC |
6153  *    +-----------+------+
6154  * Now we see subC needs subB, so we go back to it, append and relocate it:
6155  *
6156  *    +-----------+------+------+
6157  *    |   mainB   | subC | subB |
6158  *    +-----------+------+------+
6159  *
6160  * At this point we unwind recursion, relocate calls in subC, then in mainB.
6161  */
6162 static int
6163 bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
6164 {
6165         struct bpf_program *subprog;
6166         int i, err;
6167
6168         /* mark all subprogs as not relocated (yet) within the context of
6169          * current main program
6170          */
6171         for (i = 0; i < obj->nr_programs; i++) {
6172                 subprog = &obj->programs[i];
6173                 if (!prog_is_subprog(obj, subprog))
6174                         continue;
6175
6176                 subprog->sub_insn_off = 0;
6177         }
6178
6179         err = bpf_object__reloc_code(obj, prog, prog);
6180         if (err)
6181                 return err;
6182
6183         return 0;
6184 }
6185
6186 static void
6187 bpf_object__free_relocs(struct bpf_object *obj)
6188 {
6189         struct bpf_program *prog;
6190         int i;
6191
6192         /* free up relocation descriptors */
6193         for (i = 0; i < obj->nr_programs; i++) {
6194                 prog = &obj->programs[i];
6195                 zfree(&prog->reloc_desc);
6196                 prog->nr_reloc = 0;
6197         }
6198 }
6199
6200 static int cmp_relocs(const void *_a, const void *_b)
6201 {
6202         const struct reloc_desc *a = _a;
6203         const struct reloc_desc *b = _b;
6204
6205         if (a->insn_idx != b->insn_idx)
6206                 return a->insn_idx < b->insn_idx ? -1 : 1;
6207
6208         /* no two relocations should have the same insn_idx, but ... */
6209         if (a->type != b->type)
6210                 return a->type < b->type ? -1 : 1;
6211
6212         return 0;
6213 }
6214
6215 static void bpf_object__sort_relos(struct bpf_object *obj)
6216 {
6217         int i;
6218
6219         for (i = 0; i < obj->nr_programs; i++) {
6220                 struct bpf_program *p = &obj->programs[i];
6221
6222                 if (!p->nr_reloc)
6223                         continue;
6224
6225                 qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs);
6226         }
6227 }
6228
6229 static int bpf_prog_assign_exc_cb(struct bpf_object *obj, struct bpf_program *prog)
6230 {
6231         const char *str = "exception_callback:";
6232         size_t pfx_len = strlen(str);
6233         int i, j, n;
6234
6235         if (!obj->btf || !kernel_supports(obj, FEAT_BTF_DECL_TAG))
6236                 return 0;
6237
6238         n = btf__type_cnt(obj->btf);
6239         for (i = 1; i < n; i++) {
6240                 const char *name;
6241                 struct btf_type *t;
6242
6243                 t = btf_type_by_id(obj->btf, i);
6244                 if (!btf_is_decl_tag(t) || btf_decl_tag(t)->component_idx != -1)
6245                         continue;
6246
6247                 name = btf__str_by_offset(obj->btf, t->name_off);
6248                 if (strncmp(name, str, pfx_len) != 0)
6249                         continue;
6250
6251                 t = btf_type_by_id(obj->btf, t->type);
6252                 if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL) {
6253                         pr_warn("prog '%s': exception_callback:<value> decl tag not applied to the main program\n",
6254                                 prog->name);
6255                         return -EINVAL;
6256                 }
6257                 if (strcmp(prog->name, btf__str_by_offset(obj->btf, t->name_off)) != 0)
6258                         continue;
6259                 /* Multiple callbacks are specified for the same prog,
6260                  * the verifier will eventually return an error for this
6261                  * case, hence simply skip appending a subprog.
6262                  */
6263                 if (prog->exception_cb_idx >= 0) {
6264                         prog->exception_cb_idx = -1;
6265                         break;
6266                 }
6267
6268                 name += pfx_len;
6269                 if (str_is_empty(name)) {
6270                         pr_warn("prog '%s': exception_callback:<value> decl tag contains empty value\n",
6271                                 prog->name);
6272                         return -EINVAL;
6273                 }
6274
6275                 for (j = 0; j < obj->nr_programs; j++) {
6276                         struct bpf_program *subprog = &obj->programs[j];
6277
6278                         if (!prog_is_subprog(obj, subprog))
6279                                 continue;
6280                         if (strcmp(name, subprog->name) != 0)
6281                                 continue;
6282                         /* Enforce non-hidden, as from verifier point of
6283                          * view it expects global functions, whereas the
6284                          * mark_btf_static fixes up linkage as static.
6285                          */
6286                         if (!subprog->sym_global || subprog->mark_btf_static) {
6287                                 pr_warn("prog '%s': exception callback %s must be a global non-hidden function\n",
6288                                         prog->name, subprog->name);
6289                                 return -EINVAL;
6290                         }
6291                         /* Let's see if we already saw a static exception callback with the same name */
6292                         if (prog->exception_cb_idx >= 0) {
6293                                 pr_warn("prog '%s': multiple subprogs with same name as exception callback '%s'\n",
6294                                         prog->name, subprog->name);
6295                                 return -EINVAL;
6296                         }
6297                         prog->exception_cb_idx = j;
6298                         break;
6299                 }
6300
6301                 if (prog->exception_cb_idx >= 0)
6302                         continue;
6303
6304                 pr_warn("prog '%s': cannot find exception callback '%s'\n", prog->name, name);
6305                 return -ENOENT;
6306         }
6307
6308         return 0;
6309 }
6310
6311 static struct {
6312         enum bpf_prog_type prog_type;
6313         const char *ctx_name;
6314 } global_ctx_map[] = {
6315         { BPF_PROG_TYPE_CGROUP_DEVICE,           "bpf_cgroup_dev_ctx" },
6316         { BPF_PROG_TYPE_CGROUP_SKB,              "__sk_buff" },
6317         { BPF_PROG_TYPE_CGROUP_SOCK,             "bpf_sock" },
6318         { BPF_PROG_TYPE_CGROUP_SOCK_ADDR,        "bpf_sock_addr" },
6319         { BPF_PROG_TYPE_CGROUP_SOCKOPT,          "bpf_sockopt" },
6320         { BPF_PROG_TYPE_CGROUP_SYSCTL,           "bpf_sysctl" },
6321         { BPF_PROG_TYPE_FLOW_DISSECTOR,          "__sk_buff" },
6322         { BPF_PROG_TYPE_KPROBE,                  "bpf_user_pt_regs_t" },
6323         { BPF_PROG_TYPE_LWT_IN,                  "__sk_buff" },
6324         { BPF_PROG_TYPE_LWT_OUT,                 "__sk_buff" },
6325         { BPF_PROG_TYPE_LWT_SEG6LOCAL,           "__sk_buff" },
6326         { BPF_PROG_TYPE_LWT_XMIT,                "__sk_buff" },
6327         { BPF_PROG_TYPE_NETFILTER,               "bpf_nf_ctx" },
6328         { BPF_PROG_TYPE_PERF_EVENT,              "bpf_perf_event_data" },
6329         { BPF_PROG_TYPE_RAW_TRACEPOINT,          "bpf_raw_tracepoint_args" },
6330         { BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, "bpf_raw_tracepoint_args" },
6331         { BPF_PROG_TYPE_SCHED_ACT,               "__sk_buff" },
6332         { BPF_PROG_TYPE_SCHED_CLS,               "__sk_buff" },
6333         { BPF_PROG_TYPE_SK_LOOKUP,               "bpf_sk_lookup" },
6334         { BPF_PROG_TYPE_SK_MSG,                  "sk_msg_md" },
6335         { BPF_PROG_TYPE_SK_REUSEPORT,            "sk_reuseport_md" },
6336         { BPF_PROG_TYPE_SK_SKB,                  "__sk_buff" },
6337         { BPF_PROG_TYPE_SOCK_OPS,                "bpf_sock_ops" },
6338         { BPF_PROG_TYPE_SOCKET_FILTER,           "__sk_buff" },
6339         { BPF_PROG_TYPE_XDP,                     "xdp_md" },
6340         /* all other program types don't have "named" context structs */
6341 };
6342
6343 /* forward declarations for arch-specific underlying types of bpf_user_pt_regs_t typedef,
6344  * for below __builtin_types_compatible_p() checks;
6345  * with this approach we don't need any extra arch-specific #ifdef guards
6346  */
6347 struct pt_regs;
6348 struct user_pt_regs;
6349 struct user_regs_struct;
6350
6351 static bool need_func_arg_type_fixup(const struct btf *btf, const struct bpf_program *prog,
6352                                      const char *subprog_name, int arg_idx,
6353                                      int arg_type_id, const char *ctx_name)
6354 {
6355         const struct btf_type *t;
6356         const char *tname;
6357
6358         /* check if existing parameter already matches verifier expectations */
6359         t = skip_mods_and_typedefs(btf, arg_type_id, NULL);
6360         if (!btf_is_ptr(t))
6361                 goto out_warn;
6362
6363         /* typedef bpf_user_pt_regs_t is a special PITA case, valid for kprobe
6364          * and perf_event programs, so check this case early on and forget
6365          * about it for subsequent checks
6366          */
6367         while (btf_is_mod(t))
6368                 t = btf__type_by_id(btf, t->type);
6369         if (btf_is_typedef(t) &&
6370             (prog->type == BPF_PROG_TYPE_KPROBE || prog->type == BPF_PROG_TYPE_PERF_EVENT)) {
6371                 tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6372                 if (strcmp(tname, "bpf_user_pt_regs_t") == 0)
6373                         return false; /* canonical type for kprobe/perf_event */
6374         }
6375
6376         /* now we can ignore typedefs moving forward */
6377         t = skip_mods_and_typedefs(btf, t->type, NULL);
6378
6379         /* if it's `void *`, definitely fix up BTF info */
6380         if (btf_is_void(t))
6381                 return true;
6382
6383         /* if it's already proper canonical type, no need to fix up */
6384         tname = btf__str_by_offset(btf, t->name_off) ?: "<anon>";
6385         if (btf_is_struct(t) && strcmp(tname, ctx_name) == 0)
6386                 return false;
6387
6388         /* special cases */
6389         switch (prog->type) {
6390         case BPF_PROG_TYPE_KPROBE:
6391                 /* `struct pt_regs *` is expected, but we need to fix up */
6392                 if (btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6393                         return true;
6394                 break;
6395         case BPF_PROG_TYPE_PERF_EVENT:
6396                 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
6397                     btf_is_struct(t) && strcmp(tname, "pt_regs") == 0)
6398                         return 0;
6399                 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
6400                     btf_is_struct(t) && strcmp(tname, "user_pt_regs") == 0)
6401                         return 0;
6402                 if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
6403                     btf_is_struct(t) && strcmp(tname, "user_regs_struct") == 0)
6404                         return 0;
6405                 break;
6406         case BPF_PROG_TYPE_RAW_TRACEPOINT:
6407         case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
6408                 /* allow u64* as ctx */
6409                 if (btf_is_int(t) && t->size == 8)
6410                         return true;
6411                 break;
6412         default:
6413                 break;
6414         }
6415
6416 out_warn:
6417         pr_warn("prog '%s': subprog '%s' arg#%d is expected to be of `struct %s *` type\n",
6418                 prog->name, subprog_name, arg_idx, ctx_name);
6419         return false;
6420 }
6421
6422 static int clone_func_btf_info(struct btf *btf, int orig_fn_id, struct bpf_program *prog)
6423 {
6424         int fn_id, fn_proto_id, ret_type_id, orig_proto_id;
6425         int i, err, arg_cnt, fn_name_off, linkage;
6426         struct btf_type *fn_t, *fn_proto_t, *t;
6427         struct btf_param *p;
6428
6429         /* caller already validated FUNC -> FUNC_PROTO validity */
6430         fn_t = btf_type_by_id(btf, orig_fn_id);
6431         fn_proto_t = btf_type_by_id(btf, fn_t->type);
6432
6433         /* Note that each btf__add_xxx() operation invalidates
6434          * all btf_type and string pointers, so we need to be
6435          * very careful when cloning BTF types. BTF type
6436          * pointers have to be always refetched. And to avoid
6437          * problems with invalidated string pointers, we
6438          * add empty strings initially, then just fix up
6439          * name_off offsets in place. Offsets are stable for
6440          * existing strings, so that works out.
6441          */
6442         fn_name_off = fn_t->name_off; /* we are about to invalidate fn_t */
6443         linkage = btf_func_linkage(fn_t);
6444         orig_proto_id = fn_t->type; /* original FUNC_PROTO ID */
6445         ret_type_id = fn_proto_t->type; /* fn_proto_t will be invalidated */
6446         arg_cnt = btf_vlen(fn_proto_t);
6447
6448         /* clone FUNC_PROTO and its params */
6449         fn_proto_id = btf__add_func_proto(btf, ret_type_id);
6450         if (fn_proto_id < 0)
6451                 return -EINVAL;
6452
6453         for (i = 0; i < arg_cnt; i++) {
6454                 int name_off;
6455
6456                 /* copy original parameter data */
6457                 t = btf_type_by_id(btf, orig_proto_id);
6458                 p = &btf_params(t)[i];
6459                 name_off = p->name_off;
6460
6461                 err = btf__add_func_param(btf, "", p->type);
6462                 if (err)
6463                         return err;
6464
6465                 fn_proto_t = btf_type_by_id(btf, fn_proto_id);
6466                 p = &btf_params(fn_proto_t)[i];
6467                 p->name_off = name_off; /* use remembered str offset */
6468         }
6469
6470         /* clone FUNC now, btf__add_func() enforces non-empty name, so use
6471          * entry program's name as a placeholder, which we replace immediately
6472          * with original name_off
6473          */
6474         fn_id = btf__add_func(btf, prog->name, linkage, fn_proto_id);
6475         if (fn_id < 0)
6476                 return -EINVAL;
6477
6478         fn_t = btf_type_by_id(btf, fn_id);
6479         fn_t->name_off = fn_name_off; /* reuse original string */
6480
6481         return fn_id;
6482 }
6483
6484 /* Check if main program or global subprog's function prototype has `arg:ctx`
6485  * argument tags, and, if necessary, substitute correct type to match what BPF
6486  * verifier would expect, taking into account specific program type. This
6487  * allows to support __arg_ctx tag transparently on old kernels that don't yet
6488  * have a native support for it in the verifier, making user's life much
6489  * easier.
6490  */
6491 static int bpf_program_fixup_func_info(struct bpf_object *obj, struct bpf_program *prog)
6492 {
6493         const char *ctx_name = NULL, *ctx_tag = "arg:ctx", *fn_name;
6494         struct bpf_func_info_min *func_rec;
6495         struct btf_type *fn_t, *fn_proto_t;
6496         struct btf *btf = obj->btf;
6497         const struct btf_type *t;
6498         struct btf_param *p;
6499         int ptr_id = 0, struct_id, tag_id, orig_fn_id;
6500         int i, n, arg_idx, arg_cnt, err, rec_idx;
6501         int *orig_ids;
6502
6503         /* no .BTF.ext, no problem */
6504         if (!obj->btf_ext || !prog->func_info)
6505                 return 0;
6506
6507         /* don't do any fix ups if kernel natively supports __arg_ctx */
6508         if (kernel_supports(obj, FEAT_ARG_CTX_TAG))
6509                 return 0;
6510
6511         /* some BPF program types just don't have named context structs, so
6512          * this fallback mechanism doesn't work for them
6513          */
6514         for (i = 0; i < ARRAY_SIZE(global_ctx_map); i++) {
6515                 if (global_ctx_map[i].prog_type != prog->type)
6516                         continue;
6517                 ctx_name = global_ctx_map[i].ctx_name;
6518                 break;
6519         }
6520         if (!ctx_name)
6521                 return 0;
6522
6523         /* remember original func BTF IDs to detect if we already cloned them */
6524         orig_ids = calloc(prog->func_info_cnt, sizeof(*orig_ids));
6525         if (!orig_ids)
6526                 return -ENOMEM;
6527         for (i = 0; i < prog->func_info_cnt; i++) {
6528                 func_rec = prog->func_info + prog->func_info_rec_size * i;
6529                 orig_ids[i] = func_rec->type_id;
6530         }
6531
6532         /* go through each DECL_TAG with "arg:ctx" and see if it points to one
6533          * of our subprogs; if yes and subprog is global and needs adjustment,
6534          * clone and adjust FUNC -> FUNC_PROTO combo
6535          */
6536         for (i = 1, n = btf__type_cnt(btf); i < n; i++) {
6537                 /* only DECL_TAG with "arg:ctx" value are interesting */
6538                 t = btf__type_by_id(btf, i);
6539                 if (!btf_is_decl_tag(t))
6540                         continue;
6541                 if (strcmp(btf__str_by_offset(btf, t->name_off), ctx_tag) != 0)
6542                         continue;
6543
6544                 /* only global funcs need adjustment, if at all */
6545                 orig_fn_id = t->type;
6546                 fn_t = btf_type_by_id(btf, orig_fn_id);
6547                 if (!btf_is_func(fn_t) || btf_func_linkage(fn_t) != BTF_FUNC_GLOBAL)
6548                         continue;
6549
6550                 /* sanity check FUNC -> FUNC_PROTO chain, just in case */
6551                 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6552                 if (!fn_proto_t || !btf_is_func_proto(fn_proto_t))
6553                         continue;
6554
6555                 /* find corresponding func_info record */
6556                 func_rec = NULL;
6557                 for (rec_idx = 0; rec_idx < prog->func_info_cnt; rec_idx++) {
6558                         if (orig_ids[rec_idx] == t->type) {
6559                                 func_rec = prog->func_info + prog->func_info_rec_size * rec_idx;
6560                                 break;
6561                         }
6562                 }
6563                 /* current main program doesn't call into this subprog */
6564                 if (!func_rec)
6565                         continue;
6566
6567                 /* some more sanity checking of DECL_TAG */
6568                 arg_cnt = btf_vlen(fn_proto_t);
6569                 arg_idx = btf_decl_tag(t)->component_idx;
6570                 if (arg_idx < 0 || arg_idx >= arg_cnt)
6571                         continue;
6572
6573                 /* check if we should fix up argument type */
6574                 p = &btf_params(fn_proto_t)[arg_idx];
6575                 fn_name = btf__str_by_offset(btf, fn_t->name_off) ?: "<anon>";
6576                 if (!need_func_arg_type_fixup(btf, prog, fn_name, arg_idx, p->type, ctx_name))
6577                         continue;
6578
6579                 /* clone fn/fn_proto, unless we already did it for another arg */
6580                 if (func_rec->type_id == orig_fn_id) {
6581                         int fn_id;
6582
6583                         fn_id = clone_func_btf_info(btf, orig_fn_id, prog);
6584                         if (fn_id < 0) {
6585                                 err = fn_id;
6586                                 goto err_out;
6587                         }
6588
6589                         /* point func_info record to a cloned FUNC type */
6590                         func_rec->type_id = fn_id;
6591                 }
6592
6593                 /* create PTR -> STRUCT type chain to mark PTR_TO_CTX argument;
6594                  * we do it just once per main BPF program, as all global
6595                  * funcs share the same program type, so need only PTR ->
6596                  * STRUCT type chain
6597                  */
6598                 if (ptr_id == 0) {
6599                         struct_id = btf__add_struct(btf, ctx_name, 0);
6600                         ptr_id = btf__add_ptr(btf, struct_id);
6601                         if (ptr_id < 0 || struct_id < 0) {
6602                                 err = -EINVAL;
6603                                 goto err_out;
6604                         }
6605                 }
6606
6607                 /* for completeness, clone DECL_TAG and point it to cloned param */
6608                 tag_id = btf__add_decl_tag(btf, ctx_tag, func_rec->type_id, arg_idx);
6609                 if (tag_id < 0) {
6610                         err = -EINVAL;
6611                         goto err_out;
6612                 }
6613
6614                 /* all the BTF manipulations invalidated pointers, refetch them */
6615                 fn_t = btf_type_by_id(btf, func_rec->type_id);
6616                 fn_proto_t = btf_type_by_id(btf, fn_t->type);
6617
6618                 /* fix up type ID pointed to by param */
6619                 p = &btf_params(fn_proto_t)[arg_idx];
6620                 p->type = ptr_id;
6621         }
6622
6623         free(orig_ids);
6624         return 0;
6625 err_out:
6626         free(orig_ids);
6627         return err;
6628 }
6629
6630 static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
6631 {
6632         struct bpf_program *prog;
6633         size_t i, j;
6634         int err;
6635
6636         if (obj->btf_ext) {
6637                 err = bpf_object__relocate_core(obj, targ_btf_path);
6638                 if (err) {
6639                         pr_warn("failed to perform CO-RE relocations: %d\n",
6640                                 err);
6641                         return err;
6642                 }
6643                 bpf_object__sort_relos(obj);
6644         }
6645
6646         /* Before relocating calls pre-process relocations and mark
6647          * few ld_imm64 instructions that points to subprogs.
6648          * Otherwise bpf_object__reloc_code() later would have to consider
6649          * all ld_imm64 insns as relocation candidates. That would
6650          * reduce relocation speed, since amount of find_prog_insn_relo()
6651          * would increase and most of them will fail to find a relo.
6652          */
6653         for (i = 0; i < obj->nr_programs; i++) {
6654                 prog = &obj->programs[i];
6655                 for (j = 0; j < prog->nr_reloc; j++) {
6656                         struct reloc_desc *relo = &prog->reloc_desc[j];
6657                         struct bpf_insn *insn = &prog->insns[relo->insn_idx];
6658
6659                         /* mark the insn, so it's recognized by insn_is_pseudo_func() */
6660                         if (relo->type == RELO_SUBPROG_ADDR)
6661                                 insn[0].src_reg = BPF_PSEUDO_FUNC;
6662                 }
6663         }
6664
6665         /* relocate subprogram calls and append used subprograms to main
6666          * programs; each copy of subprogram code needs to be relocated
6667          * differently for each main program, because its code location might
6668          * have changed.
6669          * Append subprog relos to main programs to allow data relos to be
6670          * processed after text is completely relocated.
6671          */
6672         for (i = 0; i < obj->nr_programs; i++) {
6673                 prog = &obj->programs[i];
6674                 /* sub-program's sub-calls are relocated within the context of
6675                  * its main program only
6676                  */
6677                 if (prog_is_subprog(obj, prog))
6678                         continue;
6679                 if (!prog->autoload)
6680                         continue;
6681
6682                 err = bpf_object__relocate_calls(obj, prog);
6683                 if (err) {
6684                         pr_warn("prog '%s': failed to relocate calls: %d\n",
6685                                 prog->name, err);
6686                         return err;
6687                 }
6688
6689                 err = bpf_prog_assign_exc_cb(obj, prog);
6690                 if (err)
6691                         return err;
6692                 /* Now, also append exception callback if it has not been done already. */
6693                 if (prog->exception_cb_idx >= 0) {
6694                         struct bpf_program *subprog = &obj->programs[prog->exception_cb_idx];
6695
6696                         /* Calling exception callback directly is disallowed, which the
6697                          * verifier will reject later. In case it was processed already,
6698                          * we can skip this step, otherwise for all other valid cases we
6699                          * have to append exception callback now.
6700                          */
6701                         if (subprog->sub_insn_off == 0) {
6702                                 err = bpf_object__append_subprog_code(obj, prog, subprog);
6703                                 if (err)
6704                                         return err;
6705                                 err = bpf_object__reloc_code(obj, prog, subprog);
6706                                 if (err)
6707                                         return err;
6708                         }
6709                 }
6710         }
6711         for (i = 0; i < obj->nr_programs; i++) {
6712                 prog = &obj->programs[i];
6713                 if (prog_is_subprog(obj, prog))
6714                         continue;
6715                 if (!prog->autoload)
6716                         continue;
6717
6718                 /* Process data relos for main programs */
6719                 err = bpf_object__relocate_data(obj, prog);
6720                 if (err) {
6721                         pr_warn("prog '%s': failed to relocate data references: %d\n",
6722                                 prog->name, err);
6723                         return err;
6724                 }
6725
6726                 /* Fix up .BTF.ext information, if necessary */
6727                 err = bpf_program_fixup_func_info(obj, prog);
6728                 if (err) {
6729                         pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n",
6730                                 prog->name, err);
6731                         return err;
6732                 }
6733         }
6734
6735         return 0;
6736 }
6737
6738 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
6739                                             Elf64_Shdr *shdr, Elf_Data *data);
6740
6741 static int bpf_object__collect_map_relos(struct bpf_object *obj,
6742                                          Elf64_Shdr *shdr, Elf_Data *data)
6743 {
6744         const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
6745         int i, j, nrels, new_sz;
6746         const struct btf_var_secinfo *vi = NULL;
6747         const struct btf_type *sec, *var, *def;
6748         struct bpf_map *map = NULL, *targ_map = NULL;
6749         struct bpf_program *targ_prog = NULL;
6750         bool is_prog_array, is_map_in_map;
6751         const struct btf_member *member;
6752         const char *name, *mname, *type;
6753         unsigned int moff;
6754         Elf64_Sym *sym;
6755         Elf64_Rel *rel;
6756         void *tmp;
6757
6758         if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
6759                 return -EINVAL;
6760         sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
6761         if (!sec)
6762                 return -EINVAL;
6763
6764         nrels = shdr->sh_size / shdr->sh_entsize;
6765         for (i = 0; i < nrels; i++) {
6766                 rel = elf_rel_by_idx(data, i);
6767                 if (!rel) {
6768                         pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
6769                         return -LIBBPF_ERRNO__FORMAT;
6770                 }
6771
6772                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
6773                 if (!sym) {
6774                         pr_warn(".maps relo #%d: symbol %zx not found\n",
6775                                 i, (size_t)ELF64_R_SYM(rel->r_info));
6776                         return -LIBBPF_ERRNO__FORMAT;
6777                 }
6778                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
6779
6780                 pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",
6781                          i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value,
6782                          (size_t)rel->r_offset, sym->st_name, name);
6783
6784                 for (j = 0; j < obj->nr_maps; j++) {
6785                         map = &obj->maps[j];
6786                         if (map->sec_idx != obj->efile.btf_maps_shndx)
6787                                 continue;
6788
6789                         vi = btf_var_secinfos(sec) + map->btf_var_idx;
6790                         if (vi->offset <= rel->r_offset &&
6791                             rel->r_offset + bpf_ptr_sz <= vi->offset + vi->size)
6792                                 break;
6793                 }
6794                 if (j == obj->nr_maps) {
6795                         pr_warn(".maps relo #%d: cannot find map '%s' at rel->r_offset %zu\n",
6796                                 i, name, (size_t)rel->r_offset);
6797                         return -EINVAL;
6798                 }
6799
6800                 is_map_in_map = bpf_map_type__is_map_in_map(map->def.type);
6801                 is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY;
6802                 type = is_map_in_map ? "map" : "prog";
6803                 if (is_map_in_map) {
6804                         if (sym->st_shndx != obj->efile.btf_maps_shndx) {
6805                                 pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
6806                                         i, name);
6807                                 return -LIBBPF_ERRNO__RELOC;
6808                         }
6809                         if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
6810                             map->def.key_size != sizeof(int)) {
6811                                 pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
6812                                         i, map->name, sizeof(int));
6813                                 return -EINVAL;
6814                         }
6815                         targ_map = bpf_object__find_map_by_name(obj, name);
6816                         if (!targ_map) {
6817                                 pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n",
6818                                         i, name);
6819                                 return -ESRCH;
6820                         }
6821                 } else if (is_prog_array) {
6822                         targ_prog = bpf_object__find_program_by_name(obj, name);
6823                         if (!targ_prog) {
6824                                 pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n",
6825                                         i, name);
6826                                 return -ESRCH;
6827                         }
6828                         if (targ_prog->sec_idx != sym->st_shndx ||
6829                             targ_prog->sec_insn_off * 8 != sym->st_value ||
6830                             prog_is_subprog(obj, targ_prog)) {
6831                                 pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n",
6832                                         i, name);
6833                                 return -LIBBPF_ERRNO__RELOC;
6834                         }
6835                 } else {
6836                         return -EINVAL;
6837                 }
6838
6839                 var = btf__type_by_id(obj->btf, vi->type);
6840                 def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
6841                 if (btf_vlen(def) == 0)
6842                         return -EINVAL;
6843                 member = btf_members(def) + btf_vlen(def) - 1;
6844                 mname = btf__name_by_offset(obj->btf, member->name_off);
6845                 if (strcmp(mname, "values"))
6846                         return -EINVAL;
6847
6848                 moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
6849                 if (rel->r_offset - vi->offset < moff)
6850                         return -EINVAL;
6851
6852                 moff = rel->r_offset - vi->offset - moff;
6853                 /* here we use BPF pointer size, which is always 64 bit, as we
6854                  * are parsing ELF that was built for BPF target
6855                  */
6856                 if (moff % bpf_ptr_sz)
6857                         return -EINVAL;
6858                 moff /= bpf_ptr_sz;
6859                 if (moff >= map->init_slots_sz) {
6860                         new_sz = moff + 1;
6861                         tmp = libbpf_reallocarray(map->init_slots, new_sz, host_ptr_sz);
6862                         if (!tmp)
6863                                 return -ENOMEM;
6864                         map->init_slots = tmp;
6865                         memset(map->init_slots + map->init_slots_sz, 0,
6866                                (new_sz - map->init_slots_sz) * host_ptr_sz);
6867                         map->init_slots_sz = new_sz;
6868                 }
6869                 map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog;
6870
6871                 pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n",
6872                          i, map->name, moff, type, name);
6873         }
6874
6875         return 0;
6876 }
6877
6878 static int bpf_object__collect_relos(struct bpf_object *obj)
6879 {
6880         int i, err;
6881
6882         for (i = 0; i < obj->efile.sec_cnt; i++) {
6883                 struct elf_sec_desc *sec_desc = &obj->efile.secs[i];
6884                 Elf64_Shdr *shdr;
6885                 Elf_Data *data;
6886                 int idx;
6887
6888                 if (sec_desc->sec_type != SEC_RELO)
6889                         continue;
6890
6891                 shdr = sec_desc->shdr;
6892                 data = sec_desc->data;
6893                 idx = shdr->sh_info;
6894
6895                 if (shdr->sh_type != SHT_REL) {
6896                         pr_warn("internal error at %d\n", __LINE__);
6897                         return -LIBBPF_ERRNO__INTERNAL;
6898                 }
6899
6900                 if (idx == obj->efile.st_ops_shndx || idx == obj->efile.st_ops_link_shndx)
6901                         err = bpf_object__collect_st_ops_relos(obj, shdr, data);
6902                 else if (idx == obj->efile.btf_maps_shndx)
6903                         err = bpf_object__collect_map_relos(obj, shdr, data);
6904                 else
6905                         err = bpf_object__collect_prog_relos(obj, shdr, data);
6906                 if (err)
6907                         return err;
6908         }
6909
6910         bpf_object__sort_relos(obj);
6911         return 0;
6912 }
6913
6914 static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id)
6915 {
6916         if (BPF_CLASS(insn->code) == BPF_JMP &&
6917             BPF_OP(insn->code) == BPF_CALL &&
6918             BPF_SRC(insn->code) == BPF_K &&
6919             insn->src_reg == 0 &&
6920             insn->dst_reg == 0) {
6921                     *func_id = insn->imm;
6922                     return true;
6923         }
6924         return false;
6925 }
6926
6927 static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
6928 {
6929         struct bpf_insn *insn = prog->insns;
6930         enum bpf_func_id func_id;
6931         int i;
6932
6933         if (obj->gen_loader)
6934                 return 0;
6935
6936         for (i = 0; i < prog->insns_cnt; i++, insn++) {
6937                 if (!insn_is_helper_call(insn, &func_id))
6938                         continue;
6939
6940                 /* on kernels that don't yet support
6941                  * bpf_probe_read_{kernel,user}[_str] helpers, fall back
6942                  * to bpf_probe_read() which works well for old kernels
6943                  */
6944                 switch (func_id) {
6945                 case BPF_FUNC_probe_read_kernel:
6946                 case BPF_FUNC_probe_read_user:
6947                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6948                                 insn->imm = BPF_FUNC_probe_read;
6949                         break;
6950                 case BPF_FUNC_probe_read_kernel_str:
6951                 case BPF_FUNC_probe_read_user_str:
6952                         if (!kernel_supports(obj, FEAT_PROBE_READ_KERN))
6953                                 insn->imm = BPF_FUNC_probe_read_str;
6954                         break;
6955                 default:
6956                         break;
6957                 }
6958         }
6959         return 0;
6960 }
6961
6962 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
6963                                      int *btf_obj_fd, int *btf_type_id);
6964
6965 /* this is called as prog->sec_def->prog_prepare_load_fn for libbpf-supported sec_defs */
6966 static int libbpf_prepare_prog_load(struct bpf_program *prog,
6967                                     struct bpf_prog_load_opts *opts, long cookie)
6968 {
6969         enum sec_def_flags def = cookie;
6970
6971         /* old kernels might not support specifying expected_attach_type */
6972         if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE))
6973                 opts->expected_attach_type = 0;
6974
6975         if (def & SEC_SLEEPABLE)
6976                 opts->prog_flags |= BPF_F_SLEEPABLE;
6977
6978         if (prog->type == BPF_PROG_TYPE_XDP && (def & SEC_XDP_FRAGS))
6979                 opts->prog_flags |= BPF_F_XDP_HAS_FRAGS;
6980
6981         /* special check for usdt to use uprobe_multi link */
6982         if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK))
6983                 prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI;
6984
6985         if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) {
6986                 int btf_obj_fd = 0, btf_type_id = 0, err;
6987                 const char *attach_name;
6988
6989                 attach_name = strchr(prog->sec_name, '/');
6990                 if (!attach_name) {
6991                         /* if BPF program is annotated with just SEC("fentry")
6992                          * (or similar) without declaratively specifying
6993                          * target, then it is expected that target will be
6994                          * specified with bpf_program__set_attach_target() at
6995                          * runtime before BPF object load step. If not, then
6996                          * there is nothing to load into the kernel as BPF
6997                          * verifier won't be able to validate BPF program
6998                          * correctness anyways.
6999                          */
7000                         pr_warn("prog '%s': no BTF-based attach target is specified, use bpf_program__set_attach_target()\n",
7001                                 prog->name);
7002                         return -EINVAL;
7003                 }
7004                 attach_name++; /* skip over / */
7005
7006                 err = libbpf_find_attach_btf_id(prog, attach_name, &btf_obj_fd, &btf_type_id);
7007                 if (err)
7008                         return err;
7009
7010                 /* cache resolved BTF FD and BTF type ID in the prog */
7011                 prog->attach_btf_obj_fd = btf_obj_fd;
7012                 prog->attach_btf_id = btf_type_id;
7013
7014                 /* but by now libbpf common logic is not utilizing
7015                  * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because
7016                  * this callback is called after opts were populated by
7017                  * libbpf, so this callback has to update opts explicitly here
7018                  */
7019                 opts->attach_btf_obj_fd = btf_obj_fd;
7020                 opts->attach_btf_id = btf_type_id;
7021         }
7022         return 0;
7023 }
7024
7025 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz);
7026
7027 static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog,
7028                                 struct bpf_insn *insns, int insns_cnt,
7029                                 const char *license, __u32 kern_version, int *prog_fd)
7030 {
7031         LIBBPF_OPTS(bpf_prog_load_opts, load_attr);
7032         const char *prog_name = NULL;
7033         char *cp, errmsg[STRERR_BUFSIZE];
7034         size_t log_buf_size = 0;
7035         char *log_buf = NULL, *tmp;
7036         int btf_fd, ret, err;
7037         bool own_log_buf = true;
7038         __u32 log_level = prog->log_level;
7039
7040         if (prog->type == BPF_PROG_TYPE_UNSPEC) {
7041                 /*
7042                  * The program type must be set.  Most likely we couldn't find a proper
7043                  * section definition at load time, and thus we didn't infer the type.
7044                  */
7045                 pr_warn("prog '%s': missing BPF prog type, check ELF section name '%s'\n",
7046                         prog->name, prog->sec_name);
7047                 return -EINVAL;
7048         }
7049
7050         if (!insns || !insns_cnt)
7051                 return -EINVAL;
7052
7053         if (kernel_supports(obj, FEAT_PROG_NAME))
7054                 prog_name = prog->name;
7055         load_attr.attach_prog_fd = prog->attach_prog_fd;
7056         load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;
7057         load_attr.attach_btf_id = prog->attach_btf_id;
7058         load_attr.kern_version = kern_version;
7059         load_attr.prog_ifindex = prog->prog_ifindex;
7060
7061         /* specify func_info/line_info only if kernel supports them */
7062         btf_fd = btf__fd(obj->btf);
7063         if (btf_fd >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) {
7064                 load_attr.prog_btf_fd = btf_fd;
7065                 load_attr.func_info = prog->func_info;
7066                 load_attr.func_info_rec_size = prog->func_info_rec_size;
7067                 load_attr.func_info_cnt = prog->func_info_cnt;
7068                 load_attr.line_info = prog->line_info;
7069                 load_attr.line_info_rec_size = prog->line_info_rec_size;
7070                 load_attr.line_info_cnt = prog->line_info_cnt;
7071         }
7072         load_attr.log_level = log_level;
7073         load_attr.prog_flags = prog->prog_flags;
7074         load_attr.fd_array = obj->fd_array;
7075
7076         load_attr.token_fd = obj->token_fd;
7077         if (obj->token_fd)
7078                 load_attr.prog_flags |= BPF_F_TOKEN_FD;
7079
7080         /* adjust load_attr if sec_def provides custom preload callback */
7081         if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) {
7082                 err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie);
7083                 if (err < 0) {
7084                         pr_warn("prog '%s': failed to prepare load attributes: %d\n",
7085                                 prog->name, err);
7086                         return err;
7087                 }
7088                 insns = prog->insns;
7089                 insns_cnt = prog->insns_cnt;
7090         }
7091
7092         /* allow prog_prepare_load_fn to change expected_attach_type */
7093         load_attr.expected_attach_type = prog->expected_attach_type;
7094
7095         if (obj->gen_loader) {
7096                 bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name,
7097                                    license, insns, insns_cnt, &load_attr,
7098                                    prog - obj->programs);
7099                 *prog_fd = -1;
7100                 return 0;
7101         }
7102
7103 retry_load:
7104         /* if log_level is zero, we don't request logs initially even if
7105          * custom log_buf is specified; if the program load fails, then we'll
7106          * bump log_level to 1 and use either custom log_buf or we'll allocate
7107          * our own and retry the load to get details on what failed
7108          */
7109         if (log_level) {
7110                 if (prog->log_buf) {
7111                         log_buf = prog->log_buf;
7112                         log_buf_size = prog->log_size;
7113                         own_log_buf = false;
7114                 } else if (obj->log_buf) {
7115                         log_buf = obj->log_buf;
7116                         log_buf_size = obj->log_size;
7117                         own_log_buf = false;
7118                 } else {
7119                         log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2);
7120                         tmp = realloc(log_buf, log_buf_size);
7121                         if (!tmp) {
7122                                 ret = -ENOMEM;
7123                                 goto out;
7124                         }
7125                         log_buf = tmp;
7126                         log_buf[0] = '\0';
7127                         own_log_buf = true;
7128                 }
7129         }
7130
7131         load_attr.log_buf = log_buf;
7132         load_attr.log_size = log_buf_size;
7133         load_attr.log_level = log_level;
7134
7135         ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);
7136         if (ret >= 0) {
7137                 if (log_level && own_log_buf) {
7138                         pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7139                                  prog->name, log_buf);
7140                 }
7141
7142                 if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {
7143                         struct bpf_map *map;
7144                         int i;
7145
7146                         for (i = 0; i < obj->nr_maps; i++) {
7147                                 map = &prog->obj->maps[i];
7148                                 if (map->libbpf_type != LIBBPF_MAP_RODATA)
7149                                         continue;
7150
7151                                 if (bpf_prog_bind_map(ret, map->fd, NULL)) {
7152                                         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7153                                         pr_warn("prog '%s': failed to bind map '%s': %s\n",
7154                                                 prog->name, map->real_name, cp);
7155                                         /* Don't fail hard if can't bind rodata. */
7156                                 }
7157                         }
7158                 }
7159
7160                 *prog_fd = ret;
7161                 ret = 0;
7162                 goto out;
7163         }
7164
7165         if (log_level == 0) {
7166                 log_level = 1;
7167                 goto retry_load;
7168         }
7169         /* On ENOSPC, increase log buffer size and retry, unless custom
7170          * log_buf is specified.
7171          * Be careful to not overflow u32, though. Kernel's log buf size limit
7172          * isn't part of UAPI so it can always be bumped to full 4GB. So don't
7173          * multiply by 2 unless we are sure we'll fit within 32 bits.
7174          * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2).
7175          */
7176         if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2)
7177                 goto retry_load;
7178
7179         ret = -errno;
7180
7181         /* post-process verifier log to improve error descriptions */
7182         fixup_verifier_log(prog, log_buf, log_buf_size);
7183
7184         cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
7185         pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);
7186         pr_perm_msg(ret);
7187
7188         if (own_log_buf && log_buf && log_buf[0] != '\0') {
7189                 pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n",
7190                         prog->name, log_buf);
7191         }
7192
7193 out:
7194         if (own_log_buf)
7195                 free(log_buf);
7196         return ret;
7197 }
7198
7199 static char *find_prev_line(char *buf, char *cur)
7200 {
7201         char *p;
7202
7203         if (cur == buf) /* end of a log buf */
7204                 return NULL;
7205
7206         p = cur - 1;
7207         while (p - 1 >= buf && *(p - 1) != '\n')
7208                 p--;
7209
7210         return p;
7211 }
7212
7213 static void patch_log(char *buf, size_t buf_sz, size_t log_sz,
7214                       char *orig, size_t orig_sz, const char *patch)
7215 {
7216         /* size of the remaining log content to the right from the to-be-replaced part */
7217         size_t rem_sz = (buf + log_sz) - (orig + orig_sz);
7218         size_t patch_sz = strlen(patch);
7219
7220         if (patch_sz != orig_sz) {
7221                 /* If patch line(s) are longer than original piece of verifier log,
7222                  * shift log contents by (patch_sz - orig_sz) bytes to the right
7223                  * starting from after to-be-replaced part of the log.
7224                  *
7225                  * If patch line(s) are shorter than original piece of verifier log,
7226                  * shift log contents by (orig_sz - patch_sz) bytes to the left
7227                  * starting from after to-be-replaced part of the log
7228                  *
7229                  * We need to be careful about not overflowing available
7230                  * buf_sz capacity. If that's the case, we'll truncate the end
7231                  * of the original log, as necessary.
7232                  */
7233                 if (patch_sz > orig_sz) {
7234                         if (orig + patch_sz >= buf + buf_sz) {
7235                                 /* patch is big enough to cover remaining space completely */
7236                                 patch_sz -= (orig + patch_sz) - (buf + buf_sz) + 1;
7237                                 rem_sz = 0;
7238                         } else if (patch_sz - orig_sz > buf_sz - log_sz) {
7239                                 /* patch causes part of remaining log to be truncated */
7240                                 rem_sz -= (patch_sz - orig_sz) - (buf_sz - log_sz);
7241                         }
7242                 }
7243                 /* shift remaining log to the right by calculated amount */
7244                 memmove(orig + patch_sz, orig + orig_sz, rem_sz);
7245         }
7246
7247         memcpy(orig, patch, patch_sz);
7248 }
7249
7250 static void fixup_log_failed_core_relo(struct bpf_program *prog,
7251                                        char *buf, size_t buf_sz, size_t log_sz,
7252                                        char *line1, char *line2, char *line3)
7253 {
7254         /* Expected log for failed and not properly guarded CO-RE relocation:
7255          * line1 -> 123: (85) call unknown#195896080
7256          * line2 -> invalid func unknown#195896080
7257          * line3 -> <anything else or end of buffer>
7258          *
7259          * "123" is the index of the instruction that was poisoned. We extract
7260          * instruction index to find corresponding CO-RE relocation and
7261          * replace this part of the log with more relevant information about
7262          * failed CO-RE relocation.
7263          */
7264         const struct bpf_core_relo *relo;
7265         struct bpf_core_spec spec;
7266         char patch[512], spec_buf[256];
7267         int insn_idx, err, spec_len;
7268
7269         if (sscanf(line1, "%d: (%*d) call unknown#195896080\n", &insn_idx) != 1)
7270                 return;
7271
7272         relo = find_relo_core(prog, insn_idx);
7273         if (!relo)
7274                 return;
7275
7276         err = bpf_core_parse_spec(prog->name, prog->obj->btf, relo, &spec);
7277         if (err)
7278                 return;
7279
7280         spec_len = bpf_core_format_spec(spec_buf, sizeof(spec_buf), &spec);
7281         snprintf(patch, sizeof(patch),
7282                  "%d: <invalid CO-RE relocation>\n"
7283                  "failed to resolve CO-RE relocation %s%s\n",
7284                  insn_idx, spec_buf, spec_len >= sizeof(spec_buf) ? "..." : "");
7285
7286         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7287 }
7288
7289 static void fixup_log_missing_map_load(struct bpf_program *prog,
7290                                        char *buf, size_t buf_sz, size_t log_sz,
7291                                        char *line1, char *line2, char *line3)
7292 {
7293         /* Expected log for failed and not properly guarded map reference:
7294          * line1 -> 123: (85) call unknown#2001000345
7295          * line2 -> invalid func unknown#2001000345
7296          * line3 -> <anything else or end of buffer>
7297          *
7298          * "123" is the index of the instruction that was poisoned.
7299          * "345" in "2001000345" is a map index in obj->maps to fetch map name.
7300          */
7301         struct bpf_object *obj = prog->obj;
7302         const struct bpf_map *map;
7303         int insn_idx, map_idx;
7304         char patch[128];
7305
7306         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &map_idx) != 2)
7307                 return;
7308
7309         map_idx -= POISON_LDIMM64_MAP_BASE;
7310         if (map_idx < 0 || map_idx >= obj->nr_maps)
7311                 return;
7312         map = &obj->maps[map_idx];
7313
7314         snprintf(patch, sizeof(patch),
7315                  "%d: <invalid BPF map reference>\n"
7316                  "BPF map '%s' is referenced but wasn't created\n",
7317                  insn_idx, map->name);
7318
7319         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7320 }
7321
7322 static void fixup_log_missing_kfunc_call(struct bpf_program *prog,
7323                                          char *buf, size_t buf_sz, size_t log_sz,
7324                                          char *line1, char *line2, char *line3)
7325 {
7326         /* Expected log for failed and not properly guarded kfunc call:
7327          * line1 -> 123: (85) call unknown#2002000345
7328          * line2 -> invalid func unknown#2002000345
7329          * line3 -> <anything else or end of buffer>
7330          *
7331          * "123" is the index of the instruction that was poisoned.
7332          * "345" in "2002000345" is an extern index in obj->externs to fetch kfunc name.
7333          */
7334         struct bpf_object *obj = prog->obj;
7335         const struct extern_desc *ext;
7336         int insn_idx, ext_idx;
7337         char patch[128];
7338
7339         if (sscanf(line1, "%d: (%*d) call unknown#%d\n", &insn_idx, &ext_idx) != 2)
7340                 return;
7341
7342         ext_idx -= POISON_CALL_KFUNC_BASE;
7343         if (ext_idx < 0 || ext_idx >= obj->nr_extern)
7344                 return;
7345         ext = &obj->externs[ext_idx];
7346
7347         snprintf(patch, sizeof(patch),
7348                  "%d: <invalid kfunc call>\n"
7349                  "kfunc '%s' is referenced but wasn't resolved\n",
7350                  insn_idx, ext->name);
7351
7352         patch_log(buf, buf_sz, log_sz, line1, line3 - line1, patch);
7353 }
7354
7355 static void fixup_verifier_log(struct bpf_program *prog, char *buf, size_t buf_sz)
7356 {
7357         /* look for familiar error patterns in last N lines of the log */
7358         const size_t max_last_line_cnt = 10;
7359         char *prev_line, *cur_line, *next_line;
7360         size_t log_sz;
7361         int i;
7362
7363         if (!buf)
7364                 return;
7365
7366         log_sz = strlen(buf) + 1;
7367         next_line = buf + log_sz - 1;
7368
7369         for (i = 0; i < max_last_line_cnt; i++, next_line = cur_line) {
7370                 cur_line = find_prev_line(buf, next_line);
7371                 if (!cur_line)
7372                         return;
7373
7374                 if (str_has_pfx(cur_line, "invalid func unknown#195896080\n")) {
7375                         prev_line = find_prev_line(buf, cur_line);
7376                         if (!prev_line)
7377                                 continue;
7378
7379                         /* failed CO-RE relocation case */
7380                         fixup_log_failed_core_relo(prog, buf, buf_sz, log_sz,
7381                                                    prev_line, cur_line, next_line);
7382                         return;
7383                 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_LDIMM64_MAP_PFX)) {
7384                         prev_line = find_prev_line(buf, cur_line);
7385                         if (!prev_line)
7386                                 continue;
7387
7388                         /* reference to uncreated BPF map */
7389                         fixup_log_missing_map_load(prog, buf, buf_sz, log_sz,
7390                                                    prev_line, cur_line, next_line);
7391                         return;
7392                 } else if (str_has_pfx(cur_line, "invalid func unknown#"POISON_CALL_KFUNC_PFX)) {
7393                         prev_line = find_prev_line(buf, cur_line);
7394                         if (!prev_line)
7395                                 continue;
7396
7397                         /* reference to unresolved kfunc */
7398                         fixup_log_missing_kfunc_call(prog, buf, buf_sz, log_sz,
7399                                                      prev_line, cur_line, next_line);
7400                         return;
7401                 }
7402         }
7403 }
7404
7405 static int bpf_program_record_relos(struct bpf_program *prog)
7406 {
7407         struct bpf_object *obj = prog->obj;
7408         int i;
7409
7410         for (i = 0; i < prog->nr_reloc; i++) {
7411                 struct reloc_desc *relo = &prog->reloc_desc[i];
7412                 struct extern_desc *ext = &obj->externs[relo->ext_idx];
7413                 int kind;
7414
7415                 switch (relo->type) {
7416                 case RELO_EXTERN_LD64:
7417                         if (ext->type != EXT_KSYM)
7418                                 continue;
7419                         kind = btf_is_var(btf__type_by_id(obj->btf, ext->btf_id)) ?
7420                                 BTF_KIND_VAR : BTF_KIND_FUNC;
7421                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7422                                                ext->is_weak, !ext->ksym.type_id,
7423                                                true, kind, relo->insn_idx);
7424                         break;
7425                 case RELO_EXTERN_CALL:
7426                         bpf_gen__record_extern(obj->gen_loader, ext->name,
7427                                                ext->is_weak, false, false, BTF_KIND_FUNC,
7428                                                relo->insn_idx);
7429                         break;
7430                 case RELO_CORE: {
7431                         struct bpf_core_relo cr = {
7432                                 .insn_off = relo->insn_idx * 8,
7433                                 .type_id = relo->core_relo->type_id,
7434                                 .access_str_off = relo->core_relo->access_str_off,
7435                                 .kind = relo->core_relo->kind,
7436                         };
7437
7438                         bpf_gen__record_relo_core(obj->gen_loader, &cr);
7439                         break;
7440                 }
7441                 default:
7442                         continue;
7443                 }
7444         }
7445         return 0;
7446 }
7447
7448 static int
7449 bpf_object__load_progs(struct bpf_object *obj, int log_level)
7450 {
7451         struct bpf_program *prog;
7452         size_t i;
7453         int err;
7454
7455         for (i = 0; i < obj->nr_programs; i++) {
7456                 prog = &obj->programs[i];
7457                 err = bpf_object__sanitize_prog(obj, prog);
7458                 if (err)
7459                         return err;
7460         }
7461
7462         for (i = 0; i < obj->nr_programs; i++) {
7463                 prog = &obj->programs[i];
7464                 if (prog_is_subprog(obj, prog))
7465                         continue;
7466                 if (!prog->autoload) {
7467                         pr_debug("prog '%s': skipped loading\n", prog->name);
7468                         continue;
7469                 }
7470                 prog->log_level |= log_level;
7471
7472                 if (obj->gen_loader)
7473                         bpf_program_record_relos(prog);
7474
7475                 err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt,
7476                                            obj->license, obj->kern_version, &prog->fd);
7477                 if (err) {
7478                         pr_warn("prog '%s': failed to load: %d\n", prog->name, err);
7479                         return err;
7480                 }
7481         }
7482
7483         bpf_object__free_relocs(obj);
7484         return 0;
7485 }
7486
7487 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
7488
7489 static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
7490 {
7491         struct bpf_program *prog;
7492         int err;
7493
7494         bpf_object__for_each_program(prog, obj) {
7495                 prog->sec_def = find_sec_def(prog->sec_name);
7496                 if (!prog->sec_def) {
7497                         /* couldn't guess, but user might manually specify */
7498                         pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
7499                                 prog->name, prog->sec_name);
7500                         continue;
7501                 }
7502
7503                 prog->type = prog->sec_def->prog_type;
7504                 prog->expected_attach_type = prog->sec_def->expected_attach_type;
7505
7506                 /* sec_def can have custom callback which should be called
7507                  * after bpf_program is initialized to adjust its properties
7508                  */
7509                 if (prog->sec_def->prog_setup_fn) {
7510                         err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie);
7511                         if (err < 0) {
7512                                 pr_warn("prog '%s': failed to initialize: %d\n",
7513                                         prog->name, err);
7514                                 return err;
7515                         }
7516                 }
7517         }
7518
7519         return 0;
7520 }
7521
7522 static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz,
7523                                           const struct bpf_object_open_opts *opts)
7524 {
7525         const char *obj_name, *kconfig, *btf_tmp_path, *token_path;
7526         struct bpf_object *obj;
7527         char tmp_name[64];
7528         int err;
7529         char *log_buf;
7530         size_t log_size;
7531         __u32 log_level;
7532
7533         if (elf_version(EV_CURRENT) == EV_NONE) {
7534                 pr_warn("failed to init libelf for %s\n",
7535                         path ? : "(mem buf)");
7536                 return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
7537         }
7538
7539         if (!OPTS_VALID(opts, bpf_object_open_opts))
7540                 return ERR_PTR(-EINVAL);
7541
7542         obj_name = OPTS_GET(opts, object_name, NULL);
7543         if (obj_buf) {
7544                 if (!obj_name) {
7545                         snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
7546                                  (unsigned long)obj_buf,
7547                                  (unsigned long)obj_buf_sz);
7548                         obj_name = tmp_name;
7549                 }
7550                 path = obj_name;
7551                 pr_debug("loading object '%s' from buffer\n", obj_name);
7552         }
7553
7554         log_buf = OPTS_GET(opts, kernel_log_buf, NULL);
7555         log_size = OPTS_GET(opts, kernel_log_size, 0);
7556         log_level = OPTS_GET(opts, kernel_log_level, 0);
7557         if (log_size > UINT_MAX)
7558                 return ERR_PTR(-EINVAL);
7559         if (log_size && !log_buf)
7560                 return ERR_PTR(-EINVAL);
7561
7562         token_path = OPTS_GET(opts, bpf_token_path, NULL);
7563         /* if user didn't specify bpf_token_path explicitly, check if
7564          * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path
7565          * option
7566          */
7567         if (!token_path)
7568                 token_path = getenv("LIBBPF_BPF_TOKEN_PATH");
7569         if (token_path && strlen(token_path) >= PATH_MAX)
7570                 return ERR_PTR(-ENAMETOOLONG);
7571
7572         obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);
7573         if (IS_ERR(obj))
7574                 return obj;
7575
7576         obj->log_buf = log_buf;
7577         obj->log_size = log_size;
7578         obj->log_level = log_level;
7579
7580         if (token_path) {
7581                 obj->token_path = strdup(token_path);
7582                 if (!obj->token_path) {
7583                         err = -ENOMEM;
7584                         goto out;
7585                 }
7586         }
7587
7588         btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);
7589         if (btf_tmp_path) {
7590                 if (strlen(btf_tmp_path) >= PATH_MAX) {
7591                         err = -ENAMETOOLONG;
7592                         goto out;
7593                 }
7594                 obj->btf_custom_path = strdup(btf_tmp_path);
7595                 if (!obj->btf_custom_path) {
7596                         err = -ENOMEM;
7597                         goto out;
7598                 }
7599         }
7600
7601         kconfig = OPTS_GET(opts, kconfig, NULL);
7602         if (kconfig) {
7603                 obj->kconfig = strdup(kconfig);
7604                 if (!obj->kconfig) {
7605                         err = -ENOMEM;
7606                         goto out;
7607                 }
7608         }
7609
7610         err = bpf_object__elf_init(obj);
7611         err = err ? : bpf_object__check_endianness(obj);
7612         err = err ? : bpf_object__elf_collect(obj);
7613         err = err ? : bpf_object__collect_externs(obj);
7614         err = err ? : bpf_object_fixup_btf(obj);
7615         err = err ? : bpf_object__init_maps(obj, opts);
7616         err = err ? : bpf_object_init_progs(obj, opts);
7617         err = err ? : bpf_object__collect_relos(obj);
7618         if (err)
7619                 goto out;
7620
7621         bpf_object__elf_finish(obj);
7622
7623         return obj;
7624 out:
7625         bpf_object__close(obj);
7626         return ERR_PTR(err);
7627 }
7628
7629 struct bpf_object *
7630 bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)
7631 {
7632         if (!path)
7633                 return libbpf_err_ptr(-EINVAL);
7634
7635         pr_debug("loading %s\n", path);
7636
7637         return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));
7638 }
7639
7640 struct bpf_object *bpf_object__open(const char *path)
7641 {
7642         return bpf_object__open_file(path, NULL);
7643 }
7644
7645 struct bpf_object *
7646 bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
7647                      const struct bpf_object_open_opts *opts)
7648 {
7649         if (!obj_buf || obj_buf_sz == 0)
7650                 return libbpf_err_ptr(-EINVAL);
7651
7652         return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));
7653 }
7654
7655 static int bpf_object_unload(struct bpf_object *obj)
7656 {
7657         size_t i;
7658
7659         if (!obj)
7660                 return libbpf_err(-EINVAL);
7661
7662         for (i = 0; i < obj->nr_maps; i++) {
7663                 zclose(obj->maps[i].fd);
7664                 if (obj->maps[i].st_ops)
7665                         zfree(&obj->maps[i].st_ops->kern_vdata);
7666         }
7667
7668         for (i = 0; i < obj->nr_programs; i++)
7669                 bpf_program__unload(&obj->programs[i]);
7670
7671         return 0;
7672 }
7673
7674 static int bpf_object__sanitize_maps(struct bpf_object *obj)
7675 {
7676         struct bpf_map *m;
7677
7678         bpf_object__for_each_map(m, obj) {
7679                 if (!bpf_map__is_internal(m))
7680                         continue;
7681                 if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
7682                         m->def.map_flags &= ~BPF_F_MMAPABLE;
7683         }
7684
7685         return 0;
7686 }
7687
7688 int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx)
7689 {
7690         char sym_type, sym_name[500];
7691         unsigned long long sym_addr;
7692         int ret, err = 0;
7693         FILE *f;
7694
7695         f = fopen("/proc/kallsyms", "re");
7696         if (!f) {
7697                 err = -errno;
7698                 pr_warn("failed to open /proc/kallsyms: %d\n", err);
7699                 return err;
7700         }
7701
7702         while (true) {
7703                 ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
7704                              &sym_addr, &sym_type, sym_name);
7705                 if (ret == EOF && feof(f))
7706                         break;
7707                 if (ret != 3) {
7708                         pr_warn("failed to read kallsyms entry: %d\n", ret);
7709                         err = -EINVAL;
7710                         break;
7711                 }
7712
7713                 err = cb(sym_addr, sym_type, sym_name, ctx);
7714                 if (err)
7715                         break;
7716         }
7717
7718         fclose(f);
7719         return err;
7720 }
7721
7722 static int kallsyms_cb(unsigned long long sym_addr, char sym_type,
7723                        const char *sym_name, void *ctx)
7724 {
7725         struct bpf_object *obj = ctx;
7726         const struct btf_type *t;
7727         struct extern_desc *ext;
7728
7729         ext = find_extern_by_name(obj, sym_name);
7730         if (!ext || ext->type != EXT_KSYM)
7731                 return 0;
7732
7733         t = btf__type_by_id(obj->btf, ext->btf_id);
7734         if (!btf_is_var(t))
7735                 return 0;
7736
7737         if (ext->is_set && ext->ksym.addr != sym_addr) {
7738                 pr_warn("extern (ksym) '%s': resolution is ambiguous: 0x%llx or 0x%llx\n",
7739                         sym_name, ext->ksym.addr, sym_addr);
7740                 return -EINVAL;
7741         }
7742         if (!ext->is_set) {
7743                 ext->is_set = true;
7744                 ext->ksym.addr = sym_addr;
7745                 pr_debug("extern (ksym) '%s': set to 0x%llx\n", sym_name, sym_addr);
7746         }
7747         return 0;
7748 }
7749
7750 static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
7751 {
7752         return libbpf_kallsyms_parse(kallsyms_cb, obj);
7753 }
7754
7755 static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
7756                             __u16 kind, struct btf **res_btf,
7757                             struct module_btf **res_mod_btf)
7758 {
7759         struct module_btf *mod_btf;
7760         struct btf *btf;
7761         int i, id, err;
7762
7763         btf = obj->btf_vmlinux;
7764         mod_btf = NULL;
7765         id = btf__find_by_name_kind(btf, ksym_name, kind);
7766
7767         if (id == -ENOENT) {
7768                 err = load_module_btfs(obj);
7769                 if (err)
7770                         return err;
7771
7772                 for (i = 0; i < obj->btf_module_cnt; i++) {
7773                         /* we assume module_btf's BTF FD is always >0 */
7774                         mod_btf = &obj->btf_modules[i];
7775                         btf = mod_btf->btf;
7776                         id = btf__find_by_name_kind_own(btf, ksym_name, kind);
7777                         if (id != -ENOENT)
7778                                 break;
7779                 }
7780         }
7781         if (id <= 0)
7782                 return -ESRCH;
7783
7784         *res_btf = btf;
7785         *res_mod_btf = mod_btf;
7786         return id;
7787 }
7788
7789 static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
7790                                                struct extern_desc *ext)
7791 {
7792         const struct btf_type *targ_var, *targ_type;
7793         __u32 targ_type_id, local_type_id;
7794         struct module_btf *mod_btf = NULL;
7795         const char *targ_var_name;
7796         struct btf *btf = NULL;
7797         int id, err;
7798
7799         id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &mod_btf);
7800         if (id < 0) {
7801                 if (id == -ESRCH && ext->is_weak)
7802                         return 0;
7803                 pr_warn("extern (var ksym) '%s': not found in kernel BTF\n",
7804                         ext->name);
7805                 return id;
7806         }
7807
7808         /* find local type_id */
7809         local_type_id = ext->ksym.type_id;
7810
7811         /* find target type_id */
7812         targ_var = btf__type_by_id(btf, id);
7813         targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
7814         targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
7815
7816         err = bpf_core_types_are_compat(obj->btf, local_type_id,
7817                                         btf, targ_type_id);
7818         if (err <= 0) {
7819                 const struct btf_type *local_type;
7820                 const char *targ_name, *local_name;
7821
7822                 local_type = btf__type_by_id(obj->btf, local_type_id);
7823                 local_name = btf__name_by_offset(obj->btf, local_type->name_off);
7824                 targ_name = btf__name_by_offset(btf, targ_type->name_off);
7825
7826                 pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
7827                         ext->name, local_type_id,
7828                         btf_kind_str(local_type), local_name, targ_type_id,
7829                         btf_kind_str(targ_type), targ_name);
7830                 return -EINVAL;
7831         }
7832
7833         ext->is_set = true;
7834         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7835         ext->ksym.kernel_btf_id = id;
7836         pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
7837                  ext->name, id, btf_kind_str(targ_var), targ_var_name);
7838
7839         return 0;
7840 }
7841
7842 static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
7843                                                 struct extern_desc *ext)
7844 {
7845         int local_func_proto_id, kfunc_proto_id, kfunc_id;
7846         struct module_btf *mod_btf = NULL;
7847         const struct btf_type *kern_func;
7848         struct btf *kern_btf = NULL;
7849         int ret;
7850
7851         local_func_proto_id = ext->ksym.type_id;
7852
7853         kfunc_id = find_ksym_btf_id(obj, ext->essent_name ?: ext->name, BTF_KIND_FUNC, &kern_btf,
7854                                     &mod_btf);
7855         if (kfunc_id < 0) {
7856                 if (kfunc_id == -ESRCH && ext->is_weak)
7857                         return 0;
7858                 pr_warn("extern (func ksym) '%s': not found in kernel or module BTFs\n",
7859                         ext->name);
7860                 return kfunc_id;
7861         }
7862
7863         kern_func = btf__type_by_id(kern_btf, kfunc_id);
7864         kfunc_proto_id = kern_func->type;
7865
7866         ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
7867                                         kern_btf, kfunc_proto_id);
7868         if (ret <= 0) {
7869                 if (ext->is_weak)
7870                         return 0;
7871
7872                 pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with %s [%d]\n",
7873                         ext->name, local_func_proto_id,
7874                         mod_btf ? mod_btf->name : "vmlinux", kfunc_proto_id);
7875                 return -EINVAL;
7876         }
7877
7878         /* set index for module BTF fd in fd_array, if unset */
7879         if (mod_btf && !mod_btf->fd_array_idx) {
7880                 /* insn->off is s16 */
7881                 if (obj->fd_array_cnt == INT16_MAX) {
7882                         pr_warn("extern (func ksym) '%s': module BTF fd index %d too big to fit in bpf_insn offset\n",
7883                                 ext->name, mod_btf->fd_array_idx);
7884                         return -E2BIG;
7885                 }
7886                 /* Cannot use index 0 for module BTF fd */
7887                 if (!obj->fd_array_cnt)
7888                         obj->fd_array_cnt = 1;
7889
7890                 ret = libbpf_ensure_mem((void **)&obj->fd_array, &obj->fd_array_cap, sizeof(int),
7891                                         obj->fd_array_cnt + 1);
7892                 if (ret)
7893                         return ret;
7894                 mod_btf->fd_array_idx = obj->fd_array_cnt;
7895                 /* we assume module BTF FD is always >0 */
7896                 obj->fd_array[obj->fd_array_cnt++] = mod_btf->fd;
7897         }
7898
7899         ext->is_set = true;
7900         ext->ksym.kernel_btf_id = kfunc_id;
7901         ext->ksym.btf_fd_idx = mod_btf ? mod_btf->fd_array_idx : 0;
7902         /* Also set kernel_btf_obj_fd to make sure that bpf_object__relocate_data()
7903          * populates FD into ld_imm64 insn when it's used to point to kfunc.
7904          * {kernel_btf_id, btf_fd_idx} -> fixup bpf_call.
7905          * {kernel_btf_id, kernel_btf_obj_fd} -> fixup ld_imm64.
7906          */
7907         ext->ksym.kernel_btf_obj_fd = mod_btf ? mod_btf->fd : 0;
7908         pr_debug("extern (func ksym) '%s': resolved to %s [%d]\n",
7909                  ext->name, mod_btf ? mod_btf->name : "vmlinux", kfunc_id);
7910
7911         return 0;
7912 }
7913
7914 static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
7915 {
7916         const struct btf_type *t;
7917         struct extern_desc *ext;
7918         int i, err;
7919
7920         for (i = 0; i < obj->nr_extern; i++) {
7921                 ext = &obj->externs[i];
7922                 if (ext->type != EXT_KSYM || !ext->ksym.type_id)
7923                         continue;
7924
7925                 if (obj->gen_loader) {
7926                         ext->is_set = true;
7927                         ext->ksym.kernel_btf_obj_fd = 0;
7928                         ext->ksym.kernel_btf_id = 0;
7929                         continue;
7930                 }
7931                 t = btf__type_by_id(obj->btf, ext->btf_id);
7932                 if (btf_is_var(t))
7933                         err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
7934                 else
7935                         err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
7936                 if (err)
7937                         return err;
7938         }
7939         return 0;
7940 }
7941
7942 static int bpf_object__resolve_externs(struct bpf_object *obj,
7943                                        const char *extra_kconfig)
7944 {
7945         bool need_config = false, need_kallsyms = false;
7946         bool need_vmlinux_btf = false;
7947         struct extern_desc *ext;
7948         void *kcfg_data = NULL;
7949         int err, i;
7950
7951         if (obj->nr_extern == 0)
7952                 return 0;
7953
7954         if (obj->kconfig_map_idx >= 0)
7955                 kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
7956
7957         for (i = 0; i < obj->nr_extern; i++) {
7958                 ext = &obj->externs[i];
7959
7960                 if (ext->type == EXT_KSYM) {
7961                         if (ext->ksym.type_id)
7962                                 need_vmlinux_btf = true;
7963                         else
7964                                 need_kallsyms = true;
7965                         continue;
7966                 } else if (ext->type == EXT_KCFG) {
7967                         void *ext_ptr = kcfg_data + ext->kcfg.data_off;
7968                         __u64 value = 0;
7969
7970                         /* Kconfig externs need actual /proc/config.gz */
7971                         if (str_has_pfx(ext->name, "CONFIG_")) {
7972                                 need_config = true;
7973                                 continue;
7974                         }
7975
7976                         /* Virtual kcfg externs are customly handled by libbpf */
7977                         if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
7978                                 value = get_kernel_version();
7979                                 if (!value) {
7980                                         pr_warn("extern (kcfg) '%s': failed to get kernel version\n", ext->name);
7981                                         return -EINVAL;
7982                                 }
7983                         } else if (strcmp(ext->name, "LINUX_HAS_BPF_COOKIE") == 0) {
7984                                 value = kernel_supports(obj, FEAT_BPF_COOKIE);
7985                         } else if (strcmp(ext->name, "LINUX_HAS_SYSCALL_WRAPPER") == 0) {
7986                                 value = kernel_supports(obj, FEAT_SYSCALL_WRAPPER);
7987                         } else if (!str_has_pfx(ext->name, "LINUX_") || !ext->is_weak) {
7988                                 /* Currently libbpf supports only CONFIG_ and LINUX_ prefixed
7989                                  * __kconfig externs, where LINUX_ ones are virtual and filled out
7990                                  * customly by libbpf (their values don't come from Kconfig).
7991                                  * If LINUX_xxx variable is not recognized by libbpf, but is marked
7992                                  * __weak, it defaults to zero value, just like for CONFIG_xxx
7993                                  * externs.
7994                                  */
7995                                 pr_warn("extern (kcfg) '%s': unrecognized virtual extern\n", ext->name);
7996                                 return -EINVAL;
7997                         }
7998
7999                         err = set_kcfg_value_num(ext, ext_ptr, value);
8000                         if (err)
8001                                 return err;
8002                         pr_debug("extern (kcfg) '%s': set to 0x%llx\n",
8003                                  ext->name, (long long)value);
8004                 } else {
8005                         pr_warn("extern '%s': unrecognized extern kind\n", ext->name);
8006                         return -EINVAL;
8007                 }
8008         }
8009         if (need_config && extra_kconfig) {
8010                 err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
8011                 if (err)
8012                         return -EINVAL;
8013                 need_config = false;
8014                 for (i = 0; i < obj->nr_extern; i++) {
8015                         ext = &obj->externs[i];
8016                         if (ext->type == EXT_KCFG && !ext->is_set) {
8017                                 need_config = true;
8018                                 break;
8019                         }
8020                 }
8021         }
8022         if (need_config) {
8023                 err = bpf_object__read_kconfig_file(obj, kcfg_data);
8024                 if (err)
8025                         return -EINVAL;
8026         }
8027         if (need_kallsyms) {
8028                 err = bpf_object__read_kallsyms_file(obj);
8029                 if (err)
8030                         return -EINVAL;
8031         }
8032         if (need_vmlinux_btf) {
8033                 err = bpf_object__resolve_ksyms_btf_id(obj);
8034                 if (err)
8035                         return -EINVAL;
8036         }
8037         for (i = 0; i < obj->nr_extern; i++) {
8038                 ext = &obj->externs[i];
8039
8040                 if (!ext->is_set && !ext->is_weak) {
8041                         pr_warn("extern '%s' (strong): not resolved\n", ext->name);
8042                         return -ESRCH;
8043                 } else if (!ext->is_set) {
8044                         pr_debug("extern '%s' (weak): not resolved, defaulting to zero\n",
8045                                  ext->name);
8046                 }
8047         }
8048
8049         return 0;
8050 }
8051
8052 static void bpf_map_prepare_vdata(const struct bpf_map *map)
8053 {
8054         struct bpf_struct_ops *st_ops;
8055         __u32 i;
8056
8057         st_ops = map->st_ops;
8058         for (i = 0; i < btf_vlen(st_ops->type); i++) {
8059                 struct bpf_program *prog = st_ops->progs[i];
8060                 void *kern_data;
8061                 int prog_fd;
8062
8063                 if (!prog)
8064                         continue;
8065
8066                 prog_fd = bpf_program__fd(prog);
8067                 kern_data = st_ops->kern_vdata + st_ops->kern_func_off[i];
8068                 *(unsigned long *)kern_data = prog_fd;
8069         }
8070 }
8071
8072 static int bpf_object_prepare_struct_ops(struct bpf_object *obj)
8073 {
8074         int i;
8075
8076         for (i = 0; i < obj->nr_maps; i++)
8077                 if (bpf_map__is_struct_ops(&obj->maps[i]))
8078                         bpf_map_prepare_vdata(&obj->maps[i]);
8079
8080         return 0;
8081 }
8082
8083 static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)
8084 {
8085         int err, i;
8086
8087         if (!obj)
8088                 return libbpf_err(-EINVAL);
8089
8090         if (obj->loaded) {
8091                 pr_warn("object '%s': load can't be attempted twice\n", obj->name);
8092                 return libbpf_err(-EINVAL);
8093         }
8094
8095         if (obj->gen_loader)
8096                 bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);
8097
8098         err = bpf_object_prepare_token(obj);
8099         err = err ? : bpf_object__probe_loading(obj);
8100         err = err ? : bpf_object__load_vmlinux_btf(obj, false);
8101         err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
8102         err = err ? : bpf_object__sanitize_maps(obj);
8103         err = err ? : bpf_object__init_kern_struct_ops_maps(obj);
8104         err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path);
8105         err = err ? : bpf_object__sanitize_and_load_btf(obj);
8106         err = err ? : bpf_object__create_maps(obj);
8107         err = err ? : bpf_object__load_progs(obj, extra_log_level);
8108         err = err ? : bpf_object_init_prog_arrays(obj);
8109         err = err ? : bpf_object_prepare_struct_ops(obj);
8110
8111         if (obj->gen_loader) {
8112                 /* reset FDs */
8113                 if (obj->btf)
8114                         btf__set_fd(obj->btf, -1);
8115                 if (!err)
8116                         err = bpf_gen__finish(obj->gen_loader, obj->nr_programs, obj->nr_maps);
8117         }
8118
8119         /* clean up fd_array */
8120         zfree(&obj->fd_array);
8121
8122         /* clean up module BTFs */
8123         for (i = 0; i < obj->btf_module_cnt; i++) {
8124                 close(obj->btf_modules[i].fd);
8125                 btf__free(obj->btf_modules[i].btf);
8126                 free(obj->btf_modules[i].name);
8127         }
8128         free(obj->btf_modules);
8129
8130         /* clean up vmlinux BTF */
8131         btf__free(obj->btf_vmlinux);
8132         obj->btf_vmlinux = NULL;
8133
8134         obj->loaded = true; /* doesn't matter if successfully or not */
8135
8136         if (err)
8137                 goto out;
8138
8139         return 0;
8140 out:
8141         /* unpin any maps that were auto-pinned during load */
8142         for (i = 0; i < obj->nr_maps; i++)
8143                 if (obj->maps[i].pinned && !obj->maps[i].reused)
8144                         bpf_map__unpin(&obj->maps[i], NULL);
8145
8146         bpf_object_unload(obj);
8147         pr_warn("failed to load object '%s'\n", obj->path);
8148         return libbpf_err(err);
8149 }
8150
8151 int bpf_object__load(struct bpf_object *obj)
8152 {
8153         return bpf_object_load(obj, 0, NULL);
8154 }
8155
8156 static int make_parent_dir(const char *path)
8157 {
8158         char *cp, errmsg[STRERR_BUFSIZE];
8159         char *dname, *dir;
8160         int err = 0;
8161
8162         dname = strdup(path);
8163         if (dname == NULL)
8164                 return -ENOMEM;
8165
8166         dir = dirname(dname);
8167         if (mkdir(dir, 0700) && errno != EEXIST)
8168                 err = -errno;
8169
8170         free(dname);
8171         if (err) {
8172                 cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8173                 pr_warn("failed to mkdir %s: %s\n", path, cp);
8174         }
8175         return err;
8176 }
8177
8178 static int check_path(const char *path)
8179 {
8180         char *cp, errmsg[STRERR_BUFSIZE];
8181         struct statfs st_fs;
8182         char *dname, *dir;
8183         int err = 0;
8184
8185         if (path == NULL)
8186                 return -EINVAL;
8187
8188         dname = strdup(path);
8189         if (dname == NULL)
8190                 return -ENOMEM;
8191
8192         dir = dirname(dname);
8193         if (statfs(dir, &st_fs)) {
8194                 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
8195                 pr_warn("failed to statfs %s: %s\n", dir, cp);
8196                 err = -errno;
8197         }
8198         free(dname);
8199
8200         if (!err && st_fs.f_type != BPF_FS_MAGIC) {
8201                 pr_warn("specified path %s is not on BPF FS\n", path);
8202                 err = -EINVAL;
8203         }
8204
8205         return err;
8206 }
8207
8208 int bpf_program__pin(struct bpf_program *prog, const char *path)
8209 {
8210         char *cp, errmsg[STRERR_BUFSIZE];
8211         int err;
8212
8213         if (prog->fd < 0) {
8214                 pr_warn("prog '%s': can't pin program that wasn't loaded\n", prog->name);
8215                 return libbpf_err(-EINVAL);
8216         }
8217
8218         err = make_parent_dir(path);
8219         if (err)
8220                 return libbpf_err(err);
8221
8222         err = check_path(path);
8223         if (err)
8224                 return libbpf_err(err);
8225
8226         if (bpf_obj_pin(prog->fd, path)) {
8227                 err = -errno;
8228                 cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
8229                 pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp);
8230                 return libbpf_err(err);
8231         }
8232
8233         pr_debug("prog '%s': pinned at '%s'\n", prog->name, path);
8234         return 0;
8235 }
8236
8237 int bpf_program__unpin(struct bpf_program *prog, const char *path)
8238 {
8239         int err;
8240
8241         if (prog->fd < 0) {
8242                 pr_warn("prog '%s': can't unpin program that wasn't loaded\n", prog->name);
8243                 return libbpf_err(-EINVAL);
8244         }
8245
8246         err = check_path(path);
8247         if (err)
8248                 return libbpf_err(err);
8249
8250         err = unlink(path);
8251         if (err)
8252                 return libbpf_err(-errno);
8253
8254         pr_debug("prog '%s': unpinned from '%s'\n", prog->name, path);
8255         return 0;
8256 }
8257
8258 int bpf_map__pin(struct bpf_map *map, const char *path)
8259 {
8260         char *cp, errmsg[STRERR_BUFSIZE];
8261         int err;
8262
8263         if (map == NULL) {
8264                 pr_warn("invalid map pointer\n");
8265                 return libbpf_err(-EINVAL);
8266         }
8267
8268         if (map->pin_path) {
8269                 if (path && strcmp(path, map->pin_path)) {
8270                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8271                                 bpf_map__name(map), map->pin_path, path);
8272                         return libbpf_err(-EINVAL);
8273                 } else if (map->pinned) {
8274                         pr_debug("map '%s' already pinned at '%s'; not re-pinning\n",
8275                                  bpf_map__name(map), map->pin_path);
8276                         return 0;
8277                 }
8278         } else {
8279                 if (!path) {
8280                         pr_warn("missing a path to pin map '%s' at\n",
8281                                 bpf_map__name(map));
8282                         return libbpf_err(-EINVAL);
8283                 } else if (map->pinned) {
8284                         pr_warn("map '%s' already pinned\n", bpf_map__name(map));
8285                         return libbpf_err(-EEXIST);
8286                 }
8287
8288                 map->pin_path = strdup(path);
8289                 if (!map->pin_path) {
8290                         err = -errno;
8291                         goto out_err;
8292                 }
8293         }
8294
8295         err = make_parent_dir(map->pin_path);
8296         if (err)
8297                 return libbpf_err(err);
8298
8299         err = check_path(map->pin_path);
8300         if (err)
8301                 return libbpf_err(err);
8302
8303         if (bpf_obj_pin(map->fd, map->pin_path)) {
8304                 err = -errno;
8305                 goto out_err;
8306         }
8307
8308         map->pinned = true;
8309         pr_debug("pinned map '%s'\n", map->pin_path);
8310
8311         return 0;
8312
8313 out_err:
8314         cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg));
8315         pr_warn("failed to pin map: %s\n", cp);
8316         return libbpf_err(err);
8317 }
8318
8319 int bpf_map__unpin(struct bpf_map *map, const char *path)
8320 {
8321         int err;
8322
8323         if (map == NULL) {
8324                 pr_warn("invalid map pointer\n");
8325                 return libbpf_err(-EINVAL);
8326         }
8327
8328         if (map->pin_path) {
8329                 if (path && strcmp(path, map->pin_path)) {
8330                         pr_warn("map '%s' already has pin path '%s' different from '%s'\n",
8331                                 bpf_map__name(map), map->pin_path, path);
8332                         return libbpf_err(-EINVAL);
8333                 }
8334                 path = map->pin_path;
8335         } else if (!path) {
8336                 pr_warn("no path to unpin map '%s' from\n",
8337                         bpf_map__name(map));
8338                 return libbpf_err(-EINVAL);
8339         }
8340
8341         err = check_path(path);
8342         if (err)
8343                 return libbpf_err(err);
8344
8345         err = unlink(path);
8346         if (err != 0)
8347                 return libbpf_err(-errno);
8348
8349         map->pinned = false;
8350         pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path);
8351
8352         return 0;
8353 }
8354
8355 int bpf_map__set_pin_path(struct bpf_map *map, const char *path)
8356 {
8357         char *new = NULL;
8358
8359         if (path) {
8360                 new = strdup(path);
8361                 if (!new)
8362                         return libbpf_err(-errno);
8363         }
8364
8365         free(map->pin_path);
8366         map->pin_path = new;
8367         return 0;
8368 }
8369
8370 __alias(bpf_map__pin_path)
8371 const char *bpf_map__get_pin_path(const struct bpf_map *map);
8372
8373 const char *bpf_map__pin_path(const struct bpf_map *map)
8374 {
8375         return map->pin_path;
8376 }
8377
8378 bool bpf_map__is_pinned(const struct bpf_map *map)
8379 {
8380         return map->pinned;
8381 }
8382
8383 static void sanitize_pin_path(char *s)
8384 {
8385         /* bpffs disallows periods in path names */
8386         while (*s) {
8387                 if (*s == '.')
8388                         *s = '_';
8389                 s++;
8390         }
8391 }
8392
8393 int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
8394 {
8395         struct bpf_map *map;
8396         int err;
8397
8398         if (!obj)
8399                 return libbpf_err(-ENOENT);
8400
8401         if (!obj->loaded) {
8402                 pr_warn("object not yet loaded; load it first\n");
8403                 return libbpf_err(-ENOENT);
8404         }
8405
8406         bpf_object__for_each_map(map, obj) {
8407                 char *pin_path = NULL;
8408                 char buf[PATH_MAX];
8409
8410                 if (!map->autocreate)
8411                         continue;
8412
8413                 if (path) {
8414                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8415                         if (err)
8416                                 goto err_unpin_maps;
8417                         sanitize_pin_path(buf);
8418                         pin_path = buf;
8419                 } else if (!map->pin_path) {
8420                         continue;
8421                 }
8422
8423                 err = bpf_map__pin(map, pin_path);
8424                 if (err)
8425                         goto err_unpin_maps;
8426         }
8427
8428         return 0;
8429
8430 err_unpin_maps:
8431         while ((map = bpf_object__prev_map(obj, map))) {
8432                 if (!map->pin_path)
8433                         continue;
8434
8435                 bpf_map__unpin(map, NULL);
8436         }
8437
8438         return libbpf_err(err);
8439 }
8440
8441 int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
8442 {
8443         struct bpf_map *map;
8444         int err;
8445
8446         if (!obj)
8447                 return libbpf_err(-ENOENT);
8448
8449         bpf_object__for_each_map(map, obj) {
8450                 char *pin_path = NULL;
8451                 char buf[PATH_MAX];
8452
8453                 if (path) {
8454                         err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
8455                         if (err)
8456                                 return libbpf_err(err);
8457                         sanitize_pin_path(buf);
8458                         pin_path = buf;
8459                 } else if (!map->pin_path) {
8460                         continue;
8461                 }
8462
8463                 err = bpf_map__unpin(map, pin_path);
8464                 if (err)
8465                         return libbpf_err(err);
8466         }
8467
8468         return 0;
8469 }
8470
8471 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
8472 {
8473         struct bpf_program *prog;
8474         char buf[PATH_MAX];
8475         int err;
8476
8477         if (!obj)
8478                 return libbpf_err(-ENOENT);
8479
8480         if (!obj->loaded) {
8481                 pr_warn("object not yet loaded; load it first\n");
8482                 return libbpf_err(-ENOENT);
8483         }
8484
8485         bpf_object__for_each_program(prog, obj) {
8486                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8487                 if (err)
8488                         goto err_unpin_programs;
8489
8490                 err = bpf_program__pin(prog, buf);
8491                 if (err)
8492                         goto err_unpin_programs;
8493         }
8494
8495         return 0;
8496
8497 err_unpin_programs:
8498         while ((prog = bpf_object__prev_program(obj, prog))) {
8499                 if (pathname_concat(buf, sizeof(buf), path, prog->name))
8500                         continue;
8501
8502                 bpf_program__unpin(prog, buf);
8503         }
8504
8505         return libbpf_err(err);
8506 }
8507
8508 int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
8509 {
8510         struct bpf_program *prog;
8511         int err;
8512
8513         if (!obj)
8514                 return libbpf_err(-ENOENT);
8515
8516         bpf_object__for_each_program(prog, obj) {
8517                 char buf[PATH_MAX];
8518
8519                 err = pathname_concat(buf, sizeof(buf), path, prog->name);
8520                 if (err)
8521                         return libbpf_err(err);
8522
8523                 err = bpf_program__unpin(prog, buf);
8524                 if (err)
8525                         return libbpf_err(err);
8526         }
8527
8528         return 0;
8529 }
8530
8531 int bpf_object__pin(struct bpf_object *obj, const char *path)
8532 {
8533         int err;
8534
8535         err = bpf_object__pin_maps(obj, path);
8536         if (err)
8537                 return libbpf_err(err);
8538
8539         err = bpf_object__pin_programs(obj, path);
8540         if (err) {
8541                 bpf_object__unpin_maps(obj, path);
8542                 return libbpf_err(err);
8543         }
8544
8545         return 0;
8546 }
8547
8548 int bpf_object__unpin(struct bpf_object *obj, const char *path)
8549 {
8550         int err;
8551
8552         err = bpf_object__unpin_programs(obj, path);
8553         if (err)
8554                 return libbpf_err(err);
8555
8556         err = bpf_object__unpin_maps(obj, path);
8557         if (err)
8558                 return libbpf_err(err);
8559
8560         return 0;
8561 }
8562
8563 static void bpf_map__destroy(struct bpf_map *map)
8564 {
8565         if (map->inner_map) {
8566                 bpf_map__destroy(map->inner_map);
8567                 zfree(&map->inner_map);
8568         }
8569
8570         zfree(&map->init_slots);
8571         map->init_slots_sz = 0;
8572
8573         if (map->mmaped) {
8574                 size_t mmap_sz;
8575
8576                 mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
8577                 munmap(map->mmaped, mmap_sz);
8578                 map->mmaped = NULL;
8579         }
8580
8581         if (map->st_ops) {
8582                 zfree(&map->st_ops->data);
8583                 zfree(&map->st_ops->progs);
8584                 zfree(&map->st_ops->kern_func_off);
8585                 zfree(&map->st_ops);
8586         }
8587
8588         zfree(&map->name);
8589         zfree(&map->real_name);
8590         zfree(&map->pin_path);
8591
8592         if (map->fd >= 0)
8593                 zclose(map->fd);
8594 }
8595
8596 void bpf_object__close(struct bpf_object *obj)
8597 {
8598         size_t i;
8599
8600         if (IS_ERR_OR_NULL(obj))
8601                 return;
8602
8603         usdt_manager_free(obj->usdt_man);
8604         obj->usdt_man = NULL;
8605
8606         bpf_gen__free(obj->gen_loader);
8607         bpf_object__elf_finish(obj);
8608         bpf_object_unload(obj);
8609         btf__free(obj->btf);
8610         btf__free(obj->btf_vmlinux);
8611         btf_ext__free(obj->btf_ext);
8612
8613         for (i = 0; i < obj->nr_maps; i++)
8614                 bpf_map__destroy(&obj->maps[i]);
8615
8616         zfree(&obj->btf_custom_path);
8617         zfree(&obj->kconfig);
8618
8619         for (i = 0; i < obj->nr_extern; i++)
8620                 zfree(&obj->externs[i].essent_name);
8621
8622         zfree(&obj->externs);
8623         obj->nr_extern = 0;
8624
8625         zfree(&obj->maps);
8626         obj->nr_maps = 0;
8627
8628         if (obj->programs && obj->nr_programs) {
8629                 for (i = 0; i < obj->nr_programs; i++)
8630                         bpf_program__exit(&obj->programs[i]);
8631         }
8632         zfree(&obj->programs);
8633
8634         zfree(&obj->feat_cache);
8635         zfree(&obj->token_path);
8636         if (obj->token_fd > 0)
8637                 close(obj->token_fd);
8638
8639         free(obj);
8640 }
8641
8642 const char *bpf_object__name(const struct bpf_object *obj)
8643 {
8644         return obj ? obj->name : libbpf_err_ptr(-EINVAL);
8645 }
8646
8647 unsigned int bpf_object__kversion(const struct bpf_object *obj)
8648 {
8649         return obj ? obj->kern_version : 0;
8650 }
8651
8652 struct btf *bpf_object__btf(const struct bpf_object *obj)
8653 {
8654         return obj ? obj->btf : NULL;
8655 }
8656
8657 int bpf_object__btf_fd(const struct bpf_object *obj)
8658 {
8659         return obj->btf ? btf__fd(obj->btf) : -1;
8660 }
8661
8662 int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
8663 {
8664         if (obj->loaded)
8665                 return libbpf_err(-EINVAL);
8666
8667         obj->kern_version = kern_version;
8668
8669         return 0;
8670 }
8671
8672 int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts)
8673 {
8674         struct bpf_gen *gen;
8675
8676         if (!opts)
8677                 return -EFAULT;
8678         if (!OPTS_VALID(opts, gen_loader_opts))
8679                 return -EINVAL;
8680         gen = calloc(sizeof(*gen), 1);
8681         if (!gen)
8682                 return -ENOMEM;
8683         gen->opts = opts;
8684         obj->gen_loader = gen;
8685         return 0;
8686 }
8687
8688 static struct bpf_program *
8689 __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,
8690                     bool forward)
8691 {
8692         size_t nr_programs = obj->nr_programs;
8693         ssize_t idx;
8694
8695         if (!nr_programs)
8696                 return NULL;
8697
8698         if (!p)
8699                 /* Iter from the beginning */
8700                 return forward ? &obj->programs[0] :
8701                         &obj->programs[nr_programs - 1];
8702
8703         if (p->obj != obj) {
8704                 pr_warn("error: program handler doesn't match object\n");
8705                 return errno = EINVAL, NULL;
8706         }
8707
8708         idx = (p - obj->programs) + (forward ? 1 : -1);
8709         if (idx >= obj->nr_programs || idx < 0)
8710                 return NULL;
8711         return &obj->programs[idx];
8712 }
8713
8714 struct bpf_program *
8715 bpf_object__next_program(const struct bpf_object *obj, struct bpf_program *prev)
8716 {
8717         struct bpf_program *prog = prev;
8718
8719         do {
8720                 prog = __bpf_program__iter(prog, obj, true);
8721         } while (prog && prog_is_subprog(obj, prog));
8722
8723         return prog;
8724 }
8725
8726 struct bpf_program *
8727 bpf_object__prev_program(const struct bpf_object *obj, struct bpf_program *next)
8728 {
8729         struct bpf_program *prog = next;
8730
8731         do {
8732                 prog = __bpf_program__iter(prog, obj, false);
8733         } while (prog && prog_is_subprog(obj, prog));
8734
8735         return prog;
8736 }
8737
8738 void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
8739 {
8740         prog->prog_ifindex = ifindex;
8741 }
8742
8743 const char *bpf_program__name(const struct bpf_program *prog)
8744 {
8745         return prog->name;
8746 }
8747
8748 const char *bpf_program__section_name(const struct bpf_program *prog)
8749 {
8750         return prog->sec_name;
8751 }
8752
8753 bool bpf_program__autoload(const struct bpf_program *prog)
8754 {
8755         return prog->autoload;
8756 }
8757
8758 int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
8759 {
8760         if (prog->obj->loaded)
8761                 return libbpf_err(-EINVAL);
8762
8763         prog->autoload = autoload;
8764         return 0;
8765 }
8766
8767 bool bpf_program__autoattach(const struct bpf_program *prog)
8768 {
8769         return prog->autoattach;
8770 }
8771
8772 void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
8773 {
8774         prog->autoattach = autoattach;
8775 }
8776
8777 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
8778 {
8779         return prog->insns;
8780 }
8781
8782 size_t bpf_program__insn_cnt(const struct bpf_program *prog)
8783 {
8784         return prog->insns_cnt;
8785 }
8786
8787 int bpf_program__set_insns(struct bpf_program *prog,
8788                            struct bpf_insn *new_insns, size_t new_insn_cnt)
8789 {
8790         struct bpf_insn *insns;
8791
8792         if (prog->obj->loaded)
8793                 return -EBUSY;
8794
8795         insns = libbpf_reallocarray(prog->insns, new_insn_cnt, sizeof(*insns));
8796         /* NULL is a valid return from reallocarray if the new count is zero */
8797         if (!insns && new_insn_cnt) {
8798                 pr_warn("prog '%s': failed to realloc prog code\n", prog->name);
8799                 return -ENOMEM;
8800         }
8801         memcpy(insns, new_insns, new_insn_cnt * sizeof(*insns));
8802
8803         prog->insns = insns;
8804         prog->insns_cnt = new_insn_cnt;
8805         return 0;
8806 }
8807
8808 int bpf_program__fd(const struct bpf_program *prog)
8809 {
8810         if (!prog)
8811                 return libbpf_err(-EINVAL);
8812
8813         if (prog->fd < 0)
8814                 return libbpf_err(-ENOENT);
8815
8816         return prog->fd;
8817 }
8818
8819 __alias(bpf_program__type)
8820 enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
8821
8822 enum bpf_prog_type bpf_program__type(const struct bpf_program *prog)
8823 {
8824         return prog->type;
8825 }
8826
8827 static size_t custom_sec_def_cnt;
8828 static struct bpf_sec_def *custom_sec_defs;
8829 static struct bpf_sec_def custom_fallback_def;
8830 static bool has_custom_fallback_def;
8831 static int last_custom_sec_def_handler_id;
8832
8833 int bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
8834 {
8835         if (prog->obj->loaded)
8836                 return libbpf_err(-EBUSY);
8837
8838         /* if type is not changed, do nothing */
8839         if (prog->type == type)
8840                 return 0;
8841
8842         prog->type = type;
8843
8844         /* If a program type was changed, we need to reset associated SEC()
8845          * handler, as it will be invalid now. The only exception is a generic
8846          * fallback handler, which by definition is program type-agnostic and
8847          * is a catch-all custom handler, optionally set by the application,
8848          * so should be able to handle any type of BPF program.
8849          */
8850         if (prog->sec_def != &custom_fallback_def)
8851                 prog->sec_def = NULL;
8852         return 0;
8853 }
8854
8855 __alias(bpf_program__expected_attach_type)
8856 enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
8857
8858 enum bpf_attach_type bpf_program__expected_attach_type(const struct bpf_program *prog)
8859 {
8860         return prog->expected_attach_type;
8861 }
8862
8863 int bpf_program__set_expected_attach_type(struct bpf_program *prog,
8864                                            enum bpf_attach_type type)
8865 {
8866         if (prog->obj->loaded)
8867                 return libbpf_err(-EBUSY);
8868
8869         prog->expected_attach_type = type;
8870         return 0;
8871 }
8872
8873 __u32 bpf_program__flags(const struct bpf_program *prog)
8874 {
8875         return prog->prog_flags;
8876 }
8877
8878 int bpf_program__set_flags(struct bpf_program *prog, __u32 flags)
8879 {
8880         if (prog->obj->loaded)
8881                 return libbpf_err(-EBUSY);
8882
8883         prog->prog_flags = flags;
8884         return 0;
8885 }
8886
8887 __u32 bpf_program__log_level(const struct bpf_program *prog)
8888 {
8889         return prog->log_level;
8890 }
8891
8892 int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level)
8893 {
8894         if (prog->obj->loaded)
8895                 return libbpf_err(-EBUSY);
8896
8897         prog->log_level = log_level;
8898         return 0;
8899 }
8900
8901 const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size)
8902 {
8903         *log_size = prog->log_size;
8904         return prog->log_buf;
8905 }
8906
8907 int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size)
8908 {
8909         if (log_size && !log_buf)
8910                 return -EINVAL;
8911         if (prog->log_size > UINT_MAX)
8912                 return -EINVAL;
8913         if (prog->obj->loaded)
8914                 return -EBUSY;
8915
8916         prog->log_buf = log_buf;
8917         prog->log_size = log_size;
8918         return 0;
8919 }
8920
8921 #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {                        \
8922         .sec = (char *)sec_pfx,                                             \
8923         .prog_type = BPF_PROG_TYPE_##ptype,                                 \
8924         .expected_attach_type = atype,                                      \
8925         .cookie = (long)(flags),                                            \
8926         .prog_prepare_load_fn = libbpf_prepare_prog_load,                   \
8927         __VA_ARGS__                                                         \
8928 }
8929
8930 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8931 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8932 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8933 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8934 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8935 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8936 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8937 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8938 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8939 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8940 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link);
8941
8942 static const struct bpf_sec_def section_defs[] = {
8943         SEC_DEF("socket",               SOCKET_FILTER, 0, SEC_NONE),
8944         SEC_DEF("sk_reuseport/migrate", SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE, SEC_ATTACHABLE),
8945         SEC_DEF("sk_reuseport",         SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT, SEC_ATTACHABLE),
8946         SEC_DEF("kprobe+",              KPROBE, 0, SEC_NONE, attach_kprobe),
8947         SEC_DEF("uprobe+",              KPROBE, 0, SEC_NONE, attach_uprobe),
8948         SEC_DEF("uprobe.s+",            KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8949         SEC_DEF("kretprobe+",           KPROBE, 0, SEC_NONE, attach_kprobe),
8950         SEC_DEF("uretprobe+",           KPROBE, 0, SEC_NONE, attach_uprobe),
8951         SEC_DEF("uretprobe.s+",         KPROBE, 0, SEC_SLEEPABLE, attach_uprobe),
8952         SEC_DEF("kprobe.multi+",        KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8953         SEC_DEF("kretprobe.multi+",     KPROBE, BPF_TRACE_KPROBE_MULTI, SEC_NONE, attach_kprobe_multi),
8954         SEC_DEF("uprobe.multi+",        KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
8955         SEC_DEF("uretprobe.multi+",     KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi),
8956         SEC_DEF("uprobe.multi.s+",      KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
8957         SEC_DEF("uretprobe.multi.s+",   KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi),
8958         SEC_DEF("ksyscall+",            KPROBE, 0, SEC_NONE, attach_ksyscall),
8959         SEC_DEF("kretsyscall+",         KPROBE, 0, SEC_NONE, attach_ksyscall),
8960         SEC_DEF("usdt+",                KPROBE, 0, SEC_USDT, attach_usdt),
8961         SEC_DEF("usdt.s+",              KPROBE, 0, SEC_USDT | SEC_SLEEPABLE, attach_usdt),
8962         SEC_DEF("tc/ingress",           SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE), /* alias for tcx */
8963         SEC_DEF("tc/egress",            SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),  /* alias for tcx */
8964         SEC_DEF("tcx/ingress",          SCHED_CLS, BPF_TCX_INGRESS, SEC_NONE),
8965         SEC_DEF("tcx/egress",           SCHED_CLS, BPF_TCX_EGRESS, SEC_NONE),
8966         SEC_DEF("tc",                   SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
8967         SEC_DEF("classifier",           SCHED_CLS, 0, SEC_NONE), /* deprecated / legacy, use tcx */
8968         SEC_DEF("action",               SCHED_ACT, 0, SEC_NONE), /* deprecated / legacy, use tcx */
8969         SEC_DEF("netkit/primary",       SCHED_CLS, BPF_NETKIT_PRIMARY, SEC_NONE),
8970         SEC_DEF("netkit/peer",          SCHED_CLS, BPF_NETKIT_PEER, SEC_NONE),
8971         SEC_DEF("tracepoint+",          TRACEPOINT, 0, SEC_NONE, attach_tp),
8972         SEC_DEF("tp+",                  TRACEPOINT, 0, SEC_NONE, attach_tp),
8973         SEC_DEF("raw_tracepoint+",      RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8974         SEC_DEF("raw_tp+",              RAW_TRACEPOINT, 0, SEC_NONE, attach_raw_tp),
8975         SEC_DEF("raw_tracepoint.w+",    RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8976         SEC_DEF("raw_tp.w+",            RAW_TRACEPOINT_WRITABLE, 0, SEC_NONE, attach_raw_tp),
8977         SEC_DEF("tp_btf+",              TRACING, BPF_TRACE_RAW_TP, SEC_ATTACH_BTF, attach_trace),
8978         SEC_DEF("fentry+",              TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF, attach_trace),
8979         SEC_DEF("fmod_ret+",            TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF, attach_trace),
8980         SEC_DEF("fexit+",               TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF, attach_trace),
8981         SEC_DEF("fentry.s+",            TRACING, BPF_TRACE_FENTRY, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8982         SEC_DEF("fmod_ret.s+",          TRACING, BPF_MODIFY_RETURN, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8983         SEC_DEF("fexit.s+",             TRACING, BPF_TRACE_FEXIT, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_trace),
8984         SEC_DEF("freplace+",            EXT, 0, SEC_ATTACH_BTF, attach_trace),
8985         SEC_DEF("lsm+",                 LSM, BPF_LSM_MAC, SEC_ATTACH_BTF, attach_lsm),
8986         SEC_DEF("lsm.s+",               LSM, BPF_LSM_MAC, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_lsm),
8987         SEC_DEF("lsm_cgroup+",          LSM, BPF_LSM_CGROUP, SEC_ATTACH_BTF),
8988         SEC_DEF("iter+",                TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF, attach_iter),
8989         SEC_DEF("iter.s+",              TRACING, BPF_TRACE_ITER, SEC_ATTACH_BTF | SEC_SLEEPABLE, attach_iter),
8990         SEC_DEF("syscall",              SYSCALL, 0, SEC_SLEEPABLE),
8991         SEC_DEF("xdp.frags/devmap",     XDP, BPF_XDP_DEVMAP, SEC_XDP_FRAGS),
8992         SEC_DEF("xdp/devmap",           XDP, BPF_XDP_DEVMAP, SEC_ATTACHABLE),
8993         SEC_DEF("xdp.frags/cpumap",     XDP, BPF_XDP_CPUMAP, SEC_XDP_FRAGS),
8994         SEC_DEF("xdp/cpumap",           XDP, BPF_XDP_CPUMAP, SEC_ATTACHABLE),
8995         SEC_DEF("xdp.frags",            XDP, BPF_XDP, SEC_XDP_FRAGS),
8996         SEC_DEF("xdp",                  XDP, BPF_XDP, SEC_ATTACHABLE_OPT),
8997         SEC_DEF("perf_event",           PERF_EVENT, 0, SEC_NONE),
8998         SEC_DEF("lwt_in",               LWT_IN, 0, SEC_NONE),
8999         SEC_DEF("lwt_out",              LWT_OUT, 0, SEC_NONE),
9000         SEC_DEF("lwt_xmit",             LWT_XMIT, 0, SEC_NONE),
9001         SEC_DEF("lwt_seg6local",        LWT_SEG6LOCAL, 0, SEC_NONE),
9002         SEC_DEF("sockops",              SOCK_OPS, BPF_CGROUP_SOCK_OPS, SEC_ATTACHABLE_OPT),
9003         SEC_DEF("sk_skb/stream_parser", SK_SKB, BPF_SK_SKB_STREAM_PARSER, SEC_ATTACHABLE_OPT),
9004         SEC_DEF("sk_skb/stream_verdict",SK_SKB, BPF_SK_SKB_STREAM_VERDICT, SEC_ATTACHABLE_OPT),
9005         SEC_DEF("sk_skb",               SK_SKB, 0, SEC_NONE),
9006         SEC_DEF("sk_msg",               SK_MSG, BPF_SK_MSG_VERDICT, SEC_ATTACHABLE_OPT),
9007         SEC_DEF("lirc_mode2",           LIRC_MODE2, BPF_LIRC_MODE2, SEC_ATTACHABLE_OPT),
9008         SEC_DEF("flow_dissector",       FLOW_DISSECTOR, BPF_FLOW_DISSECTOR, SEC_ATTACHABLE_OPT),
9009         SEC_DEF("cgroup_skb/ingress",   CGROUP_SKB, BPF_CGROUP_INET_INGRESS, SEC_ATTACHABLE_OPT),
9010         SEC_DEF("cgroup_skb/egress",    CGROUP_SKB, BPF_CGROUP_INET_EGRESS, SEC_ATTACHABLE_OPT),
9011         SEC_DEF("cgroup/skb",           CGROUP_SKB, 0, SEC_NONE),
9012         SEC_DEF("cgroup/sock_create",   CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE),
9013         SEC_DEF("cgroup/sock_release",  CGROUP_SOCK, BPF_CGROUP_INET_SOCK_RELEASE, SEC_ATTACHABLE),
9014         SEC_DEF("cgroup/sock",          CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE, SEC_ATTACHABLE_OPT),
9015         SEC_DEF("cgroup/post_bind4",    CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND, SEC_ATTACHABLE),
9016         SEC_DEF("cgroup/post_bind6",    CGROUP_SOCK, BPF_CGROUP_INET6_POST_BIND, SEC_ATTACHABLE),
9017         SEC_DEF("cgroup/bind4",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND, SEC_ATTACHABLE),
9018         SEC_DEF("cgroup/bind6",         CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_BIND, SEC_ATTACHABLE),
9019         SEC_DEF("cgroup/connect4",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_CONNECT, SEC_ATTACHABLE),
9020         SEC_DEF("cgroup/connect6",      CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_CONNECT, SEC_ATTACHABLE),
9021         SEC_DEF("cgroup/connect_unix",  CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT, SEC_ATTACHABLE),
9022         SEC_DEF("cgroup/sendmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG, SEC_ATTACHABLE),
9023         SEC_DEF("cgroup/sendmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_SENDMSG, SEC_ATTACHABLE),
9024         SEC_DEF("cgroup/sendmsg_unix",  CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG, SEC_ATTACHABLE),
9025         SEC_DEF("cgroup/recvmsg4",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG, SEC_ATTACHABLE),
9026         SEC_DEF("cgroup/recvmsg6",      CGROUP_SOCK_ADDR, BPF_CGROUP_UDP6_RECVMSG, SEC_ATTACHABLE),
9027         SEC_DEF("cgroup/recvmsg_unix",  CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG, SEC_ATTACHABLE),
9028         SEC_DEF("cgroup/getpeername4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME, SEC_ATTACHABLE),
9029         SEC_DEF("cgroup/getpeername6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME, SEC_ATTACHABLE),
9030         SEC_DEF("cgroup/getpeername_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME, SEC_ATTACHABLE),
9031         SEC_DEF("cgroup/getsockname4",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME, SEC_ATTACHABLE),
9032         SEC_DEF("cgroup/getsockname6",  CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME, SEC_ATTACHABLE),
9033         SEC_DEF("cgroup/getsockname_unix", CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME, SEC_ATTACHABLE),
9034         SEC_DEF("cgroup/sysctl",        CGROUP_SYSCTL, BPF_CGROUP_SYSCTL, SEC_ATTACHABLE),
9035         SEC_DEF("cgroup/getsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_GETSOCKOPT, SEC_ATTACHABLE),
9036         SEC_DEF("cgroup/setsockopt",    CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
9037         SEC_DEF("cgroup/dev",           CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
9038         SEC_DEF("struct_ops+",          STRUCT_OPS, 0, SEC_NONE),
9039         SEC_DEF("struct_ops.s+",        STRUCT_OPS, 0, SEC_SLEEPABLE),
9040         SEC_DEF("sk_lookup",            SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
9041         SEC_DEF("netfilter",            NETFILTER, BPF_NETFILTER, SEC_NONE),
9042 };
9043
9044 int libbpf_register_prog_handler(const char *sec,
9045                                  enum bpf_prog_type prog_type,
9046                                  enum bpf_attach_type exp_attach_type,
9047                                  const struct libbpf_prog_handler_opts *opts)
9048 {
9049         struct bpf_sec_def *sec_def;
9050
9051         if (!OPTS_VALID(opts, libbpf_prog_handler_opts))
9052                 return libbpf_err(-EINVAL);
9053
9054         if (last_custom_sec_def_handler_id == INT_MAX) /* prevent overflow */
9055                 return libbpf_err(-E2BIG);
9056
9057         if (sec) {
9058                 sec_def = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt + 1,
9059                                               sizeof(*sec_def));
9060                 if (!sec_def)
9061                         return libbpf_err(-ENOMEM);
9062
9063                 custom_sec_defs = sec_def;
9064                 sec_def = &custom_sec_defs[custom_sec_def_cnt];
9065         } else {
9066                 if (has_custom_fallback_def)
9067                         return libbpf_err(-EBUSY);
9068
9069                 sec_def = &custom_fallback_def;
9070         }
9071
9072         sec_def->sec = sec ? strdup(sec) : NULL;
9073         if (sec && !sec_def->sec)
9074                 return libbpf_err(-ENOMEM);
9075
9076         sec_def->prog_type = prog_type;
9077         sec_def->expected_attach_type = exp_attach_type;
9078         sec_def->cookie = OPTS_GET(opts, cookie, 0);
9079
9080         sec_def->prog_setup_fn = OPTS_GET(opts, prog_setup_fn, NULL);
9081         sec_def->prog_prepare_load_fn = OPTS_GET(opts, prog_prepare_load_fn, NULL);
9082         sec_def->prog_attach_fn = OPTS_GET(opts, prog_attach_fn, NULL);
9083
9084         sec_def->handler_id = ++last_custom_sec_def_handler_id;
9085
9086         if (sec)
9087                 custom_sec_def_cnt++;
9088         else
9089                 has_custom_fallback_def = true;
9090
9091         return sec_def->handler_id;
9092 }
9093
9094 int libbpf_unregister_prog_handler(int handler_id)
9095 {
9096         struct bpf_sec_def *sec_defs;
9097         int i;
9098
9099         if (handler_id <= 0)
9100                 return libbpf_err(-EINVAL);
9101
9102         if (has_custom_fallback_def && custom_fallback_def.handler_id == handler_id) {
9103                 memset(&custom_fallback_def, 0, sizeof(custom_fallback_def));
9104                 has_custom_fallback_def = false;
9105                 return 0;
9106         }
9107
9108         for (i = 0; i < custom_sec_def_cnt; i++) {
9109                 if (custom_sec_defs[i].handler_id == handler_id)
9110                         break;
9111         }
9112
9113         if (i == custom_sec_def_cnt)
9114                 return libbpf_err(-ENOENT);
9115
9116         free(custom_sec_defs[i].sec);
9117         for (i = i + 1; i < custom_sec_def_cnt; i++)
9118                 custom_sec_defs[i - 1] = custom_sec_defs[i];
9119         custom_sec_def_cnt--;
9120
9121         /* try to shrink the array, but it's ok if we couldn't */
9122         sec_defs = libbpf_reallocarray(custom_sec_defs, custom_sec_def_cnt, sizeof(*sec_defs));
9123         /* if new count is zero, reallocarray can return a valid NULL result;
9124          * in this case the previous pointer will be freed, so we *have to*
9125          * reassign old pointer to the new value (even if it's NULL)
9126          */
9127         if (sec_defs || custom_sec_def_cnt == 0)
9128                 custom_sec_defs = sec_defs;
9129
9130         return 0;
9131 }
9132
9133 static bool sec_def_matches(const struct bpf_sec_def *sec_def, const char *sec_name)
9134 {
9135         size_t len = strlen(sec_def->sec);
9136
9137         /* "type/" always has to have proper SEC("type/extras") form */
9138         if (sec_def->sec[len - 1] == '/') {
9139                 if (str_has_pfx(sec_name, sec_def->sec))
9140                         return true;
9141                 return false;
9142         }
9143
9144         /* "type+" means it can be either exact SEC("type") or
9145          * well-formed SEC("type/extras") with proper '/' separator
9146          */
9147         if (sec_def->sec[len - 1] == '+') {
9148                 len--;
9149                 /* not even a prefix */
9150                 if (strncmp(sec_name, sec_def->sec, len) != 0)
9151                         return false;
9152                 /* exact match or has '/' separator */
9153                 if (sec_name[len] == '\0' || sec_name[len] == '/')
9154                         return true;
9155                 return false;
9156         }
9157
9158         return strcmp(sec_name, sec_def->sec) == 0;
9159 }
9160
9161 static const struct bpf_sec_def *find_sec_def(const char *sec_name)
9162 {
9163         const struct bpf_sec_def *sec_def;
9164         int i, n;
9165
9166         n = custom_sec_def_cnt;
9167         for (i = 0; i < n; i++) {
9168                 sec_def = &custom_sec_defs[i];
9169                 if (sec_def_matches(sec_def, sec_name))
9170                         return sec_def;
9171         }
9172
9173         n = ARRAY_SIZE(section_defs);
9174         for (i = 0; i < n; i++) {
9175                 sec_def = &section_defs[i];
9176                 if (sec_def_matches(sec_def, sec_name))
9177                         return sec_def;
9178         }
9179
9180         if (has_custom_fallback_def)
9181                 return &custom_fallback_def;
9182
9183         return NULL;
9184 }
9185
9186 #define MAX_TYPE_NAME_SIZE 32
9187
9188 static char *libbpf_get_type_names(bool attach_type)
9189 {
9190         int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE;
9191         char *buf;
9192
9193         buf = malloc(len);
9194         if (!buf)
9195                 return NULL;
9196
9197         buf[0] = '\0';
9198         /* Forge string buf with all available names */
9199         for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
9200                 const struct bpf_sec_def *sec_def = &section_defs[i];
9201
9202                 if (attach_type) {
9203                         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9204                                 continue;
9205
9206                         if (!(sec_def->cookie & SEC_ATTACHABLE))
9207                                 continue;
9208                 }
9209
9210                 if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) {
9211                         free(buf);
9212                         return NULL;
9213                 }
9214                 strcat(buf, " ");
9215                 strcat(buf, section_defs[i].sec);
9216         }
9217
9218         return buf;
9219 }
9220
9221 int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
9222                              enum bpf_attach_type *expected_attach_type)
9223 {
9224         const struct bpf_sec_def *sec_def;
9225         char *type_names;
9226
9227         if (!name)
9228                 return libbpf_err(-EINVAL);
9229
9230         sec_def = find_sec_def(name);
9231         if (sec_def) {
9232                 *prog_type = sec_def->prog_type;
9233                 *expected_attach_type = sec_def->expected_attach_type;
9234                 return 0;
9235         }
9236
9237         pr_debug("failed to guess program type from ELF section '%s'\n", name);
9238         type_names = libbpf_get_type_names(false);
9239         if (type_names != NULL) {
9240                 pr_debug("supported section(type) names are:%s\n", type_names);
9241                 free(type_names);
9242         }
9243
9244         return libbpf_err(-ESRCH);
9245 }
9246
9247 const char *libbpf_bpf_attach_type_str(enum bpf_attach_type t)
9248 {
9249         if (t < 0 || t >= ARRAY_SIZE(attach_type_name))
9250                 return NULL;
9251
9252         return attach_type_name[t];
9253 }
9254
9255 const char *libbpf_bpf_link_type_str(enum bpf_link_type t)
9256 {
9257         if (t < 0 || t >= ARRAY_SIZE(link_type_name))
9258                 return NULL;
9259
9260         return link_type_name[t];
9261 }
9262
9263 const char *libbpf_bpf_map_type_str(enum bpf_map_type t)
9264 {
9265         if (t < 0 || t >= ARRAY_SIZE(map_type_name))
9266                 return NULL;
9267
9268         return map_type_name[t];
9269 }
9270
9271 const char *libbpf_bpf_prog_type_str(enum bpf_prog_type t)
9272 {
9273         if (t < 0 || t >= ARRAY_SIZE(prog_type_name))
9274                 return NULL;
9275
9276         return prog_type_name[t];
9277 }
9278
9279 static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
9280                                                      int sec_idx,
9281                                                      size_t offset)
9282 {
9283         struct bpf_map *map;
9284         size_t i;
9285
9286         for (i = 0; i < obj->nr_maps; i++) {
9287                 map = &obj->maps[i];
9288                 if (!bpf_map__is_struct_ops(map))
9289                         continue;
9290                 if (map->sec_idx == sec_idx &&
9291                     map->sec_offset <= offset &&
9292                     offset - map->sec_offset < map->def.value_size)
9293                         return map;
9294         }
9295
9296         return NULL;
9297 }
9298
9299 /* Collect the reloc from ELF and populate the st_ops->progs[] */
9300 static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
9301                                             Elf64_Shdr *shdr, Elf_Data *data)
9302 {
9303         const struct btf_member *member;
9304         struct bpf_struct_ops *st_ops;
9305         struct bpf_program *prog;
9306         unsigned int shdr_idx;
9307         const struct btf *btf;
9308         struct bpf_map *map;
9309         unsigned int moff, insn_idx;
9310         const char *name;
9311         __u32 member_idx;
9312         Elf64_Sym *sym;
9313         Elf64_Rel *rel;
9314         int i, nrels;
9315
9316         btf = obj->btf;
9317         nrels = shdr->sh_size / shdr->sh_entsize;
9318         for (i = 0; i < nrels; i++) {
9319                 rel = elf_rel_by_idx(data, i);
9320                 if (!rel) {
9321                         pr_warn("struct_ops reloc: failed to get %d reloc\n", i);
9322                         return -LIBBPF_ERRNO__FORMAT;
9323                 }
9324
9325                 sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info));
9326                 if (!sym) {
9327                         pr_warn("struct_ops reloc: symbol %zx not found\n",
9328                                 (size_t)ELF64_R_SYM(rel->r_info));
9329                         return -LIBBPF_ERRNO__FORMAT;
9330                 }
9331
9332                 name = elf_sym_str(obj, sym->st_name) ?: "<?>";
9333                 map = find_struct_ops_map_by_offset(obj, shdr->sh_info, rel->r_offset);
9334                 if (!map) {
9335                         pr_warn("struct_ops reloc: cannot find map at rel->r_offset %zu\n",
9336                                 (size_t)rel->r_offset);
9337                         return -EINVAL;
9338                 }
9339
9340                 moff = rel->r_offset - map->sec_offset;
9341                 shdr_idx = sym->st_shndx;
9342                 st_ops = map->st_ops;
9343                 pr_debug("struct_ops reloc %s: for %lld value %lld shdr_idx %u rel->r_offset %zu map->sec_offset %zu name %d (\'%s\')\n",
9344                          map->name,
9345                          (long long)(rel->r_info >> 32),
9346                          (long long)sym->st_value,
9347                          shdr_idx, (size_t)rel->r_offset,
9348                          map->sec_offset, sym->st_name, name);
9349
9350                 if (shdr_idx >= SHN_LORESERVE) {
9351                         pr_warn("struct_ops reloc %s: rel->r_offset %zu shdr_idx %u unsupported non-static function\n",
9352                                 map->name, (size_t)rel->r_offset, shdr_idx);
9353                         return -LIBBPF_ERRNO__RELOC;
9354                 }
9355                 if (sym->st_value % BPF_INSN_SZ) {
9356                         pr_warn("struct_ops reloc %s: invalid target program offset %llu\n",
9357                                 map->name, (unsigned long long)sym->st_value);
9358                         return -LIBBPF_ERRNO__FORMAT;
9359                 }
9360                 insn_idx = sym->st_value / BPF_INSN_SZ;
9361
9362                 member = find_member_by_offset(st_ops->type, moff * 8);
9363                 if (!member) {
9364                         pr_warn("struct_ops reloc %s: cannot find member at moff %u\n",
9365                                 map->name, moff);
9366                         return -EINVAL;
9367                 }
9368                 member_idx = member - btf_members(st_ops->type);
9369                 name = btf__name_by_offset(btf, member->name_off);
9370
9371                 if (!resolve_func_ptr(btf, member->type, NULL)) {
9372                         pr_warn("struct_ops reloc %s: cannot relocate non func ptr %s\n",
9373                                 map->name, name);
9374                         return -EINVAL;
9375                 }
9376
9377                 prog = find_prog_by_sec_insn(obj, shdr_idx, insn_idx);
9378                 if (!prog) {
9379                         pr_warn("struct_ops reloc %s: cannot find prog at shdr_idx %u to relocate func ptr %s\n",
9380                                 map->name, shdr_idx, name);
9381                         return -EINVAL;
9382                 }
9383
9384                 /* prevent the use of BPF prog with invalid type */
9385                 if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
9386                         pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
9387                                 map->name, prog->name);
9388                         return -EINVAL;
9389                 }
9390
9391                 /* if we haven't yet processed this BPF program, record proper
9392                  * attach_btf_id and member_idx
9393                  */
9394                 if (!prog->attach_btf_id) {
9395                         prog->attach_btf_id = st_ops->type_id;
9396                         prog->expected_attach_type = member_idx;
9397                 }
9398
9399                 /* struct_ops BPF prog can be re-used between multiple
9400                  * .struct_ops & .struct_ops.link as long as it's the
9401                  * same struct_ops struct definition and the same
9402                  * function pointer field
9403                  */
9404                 if (prog->attach_btf_id != st_ops->type_id ||
9405                     prog->expected_attach_type != member_idx) {
9406                         pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
9407                                 map->name, prog->name, prog->sec_name, prog->type,
9408                                 prog->attach_btf_id, prog->expected_attach_type, name);
9409                         return -EINVAL;
9410                 }
9411
9412                 st_ops->progs[member_idx] = prog;
9413         }
9414
9415         return 0;
9416 }
9417
9418 #define BTF_TRACE_PREFIX "btf_trace_"
9419 #define BTF_LSM_PREFIX "bpf_lsm_"
9420 #define BTF_ITER_PREFIX "bpf_iter_"
9421 #define BTF_MAX_NAME_SIZE 128
9422
9423 void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type,
9424                                 const char **prefix, int *kind)
9425 {
9426         switch (attach_type) {
9427         case BPF_TRACE_RAW_TP:
9428                 *prefix = BTF_TRACE_PREFIX;
9429                 *kind = BTF_KIND_TYPEDEF;
9430                 break;
9431         case BPF_LSM_MAC:
9432         case BPF_LSM_CGROUP:
9433                 *prefix = BTF_LSM_PREFIX;
9434                 *kind = BTF_KIND_FUNC;
9435                 break;
9436         case BPF_TRACE_ITER:
9437                 *prefix = BTF_ITER_PREFIX;
9438                 *kind = BTF_KIND_FUNC;
9439                 break;
9440         default:
9441                 *prefix = "";
9442                 *kind = BTF_KIND_FUNC;
9443         }
9444 }
9445
9446 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
9447                                    const char *name, __u32 kind)
9448 {
9449         char btf_type_name[BTF_MAX_NAME_SIZE];
9450         int ret;
9451
9452         ret = snprintf(btf_type_name, sizeof(btf_type_name),
9453                        "%s%s", prefix, name);
9454         /* snprintf returns the number of characters written excluding the
9455          * terminating null. So, if >= BTF_MAX_NAME_SIZE are written, it
9456          * indicates truncation.
9457          */
9458         if (ret < 0 || ret >= sizeof(btf_type_name))
9459                 return -ENAMETOOLONG;
9460         return btf__find_by_name_kind(btf, btf_type_name, kind);
9461 }
9462
9463 static inline int find_attach_btf_id(struct btf *btf, const char *name,
9464                                      enum bpf_attach_type attach_type)
9465 {
9466         const char *prefix;
9467         int kind;
9468
9469         btf_get_kernel_prefix_kind(attach_type, &prefix, &kind);
9470         return find_btf_by_prefix_kind(btf, prefix, name, kind);
9471 }
9472
9473 int libbpf_find_vmlinux_btf_id(const char *name,
9474                                enum bpf_attach_type attach_type)
9475 {
9476         struct btf *btf;
9477         int err;
9478
9479         btf = btf__load_vmlinux_btf();
9480         err = libbpf_get_error(btf);
9481         if (err) {
9482                 pr_warn("vmlinux BTF is not found\n");
9483                 return libbpf_err(err);
9484         }
9485
9486         err = find_attach_btf_id(btf, name, attach_type);
9487         if (err <= 0)
9488                 pr_warn("%s is not found in vmlinux BTF\n", name);
9489
9490         btf__free(btf);
9491         return libbpf_err(err);
9492 }
9493
9494 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
9495 {
9496         struct bpf_prog_info info;
9497         __u32 info_len = sizeof(info);
9498         struct btf *btf;
9499         int err;
9500
9501         memset(&info, 0, info_len);
9502         err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
9503         if (err) {
9504                 pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n",
9505                         attach_prog_fd, err);
9506                 return err;
9507         }
9508
9509         err = -EINVAL;
9510         if (!info.btf_id) {
9511                 pr_warn("The target program doesn't have BTF\n");
9512                 goto out;
9513         }
9514         btf = btf__load_from_kernel_by_id(info.btf_id);
9515         err = libbpf_get_error(btf);
9516         if (err) {
9517                 pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err);
9518                 goto out;
9519         }
9520         err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
9521         btf__free(btf);
9522         if (err <= 0) {
9523                 pr_warn("%s is not found in prog's BTF\n", name);
9524                 goto out;
9525         }
9526 out:
9527         return err;
9528 }
9529
9530 static int find_kernel_btf_id(struct bpf_object *obj, const char *attach_name,
9531                               enum bpf_attach_type attach_type,
9532                               int *btf_obj_fd, int *btf_type_id)
9533 {
9534         int ret, i;
9535
9536         ret = find_attach_btf_id(obj->btf_vmlinux, attach_name, attach_type);
9537         if (ret > 0) {
9538                 *btf_obj_fd = 0; /* vmlinux BTF */
9539                 *btf_type_id = ret;
9540                 return 0;
9541         }
9542         if (ret != -ENOENT)
9543                 return ret;
9544
9545         ret = load_module_btfs(obj);
9546         if (ret)
9547                 return ret;
9548
9549         for (i = 0; i < obj->btf_module_cnt; i++) {
9550                 const struct module_btf *mod = &obj->btf_modules[i];
9551
9552                 ret = find_attach_btf_id(mod->btf, attach_name, attach_type);
9553                 if (ret > 0) {
9554                         *btf_obj_fd = mod->fd;
9555                         *btf_type_id = ret;
9556                         return 0;
9557                 }
9558                 if (ret == -ENOENT)
9559                         continue;
9560
9561                 return ret;
9562         }
9563
9564         return -ESRCH;
9565 }
9566
9567 static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attach_name,
9568                                      int *btf_obj_fd, int *btf_type_id)
9569 {
9570         enum bpf_attach_type attach_type = prog->expected_attach_type;
9571         __u32 attach_prog_fd = prog->attach_prog_fd;
9572         int err = 0;
9573
9574         /* BPF program's BTF ID */
9575         if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
9576                 if (!attach_prog_fd) {
9577                         pr_warn("prog '%s': attach program FD is not set\n", prog->name);
9578                         return -EINVAL;
9579                 }
9580                 err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
9581                 if (err < 0) {
9582                         pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
9583                                  prog->name, attach_prog_fd, attach_name, err);
9584                         return err;
9585                 }
9586                 *btf_obj_fd = 0;
9587                 *btf_type_id = err;
9588                 return 0;
9589         }
9590
9591         /* kernel/module BTF ID */
9592         if (prog->obj->gen_loader) {
9593                 bpf_gen__record_attach_target(prog->obj->gen_loader, attach_name, attach_type);
9594                 *btf_obj_fd = 0;
9595                 *btf_type_id = 1;
9596         } else {
9597                 err = find_kernel_btf_id(prog->obj, attach_name,
9598                                          attach_type, btf_obj_fd,
9599                                          btf_type_id);
9600         }
9601         if (err) {
9602                 pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
9603                         prog->name, attach_name, err);
9604                 return err;
9605         }
9606         return 0;
9607 }
9608
9609 int libbpf_attach_type_by_name(const char *name,
9610                                enum bpf_attach_type *attach_type)
9611 {
9612         char *type_names;
9613         const struct bpf_sec_def *sec_def;
9614
9615         if (!name)
9616                 return libbpf_err(-EINVAL);
9617
9618         sec_def = find_sec_def(name);
9619         if (!sec_def) {
9620                 pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
9621                 type_names = libbpf_get_type_names(true);
9622                 if (type_names != NULL) {
9623                         pr_debug("attachable section(type) names are:%s\n", type_names);
9624                         free(type_names);
9625                 }
9626
9627                 return libbpf_err(-EINVAL);
9628         }
9629
9630         if (sec_def->prog_prepare_load_fn != libbpf_prepare_prog_load)
9631                 return libbpf_err(-EINVAL);
9632         if (!(sec_def->cookie & SEC_ATTACHABLE))
9633                 return libbpf_err(-EINVAL);
9634
9635         *attach_type = sec_def->expected_attach_type;
9636         return 0;
9637 }
9638
9639 int bpf_map__fd(const struct bpf_map *map)
9640 {
9641         if (!map)
9642                 return libbpf_err(-EINVAL);
9643         if (!map_is_created(map))
9644                 return -1;
9645         return map->fd;
9646 }
9647
9648 static bool map_uses_real_name(const struct bpf_map *map)
9649 {
9650         /* Since libbpf started to support custom .data.* and .rodata.* maps,
9651          * their user-visible name differs from kernel-visible name. Users see
9652          * such map's corresponding ELF section name as a map name.
9653          * This check distinguishes .data/.rodata from .data.* and .rodata.*
9654          * maps to know which name has to be returned to the user.
9655          */
9656         if (map->libbpf_type == LIBBPF_MAP_DATA && strcmp(map->real_name, DATA_SEC) != 0)
9657                 return true;
9658         if (map->libbpf_type == LIBBPF_MAP_RODATA && strcmp(map->real_name, RODATA_SEC) != 0)
9659                 return true;
9660         return false;
9661 }
9662
9663 const char *bpf_map__name(const struct bpf_map *map)
9664 {
9665         if (!map)
9666                 return NULL;
9667
9668         if (map_uses_real_name(map))
9669                 return map->real_name;
9670
9671         return map->name;
9672 }
9673
9674 enum bpf_map_type bpf_map__type(const struct bpf_map *map)
9675 {
9676         return map->def.type;
9677 }
9678
9679 int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
9680 {
9681         if (map_is_created(map))
9682                 return libbpf_err(-EBUSY);
9683         map->def.type = type;
9684         return 0;
9685 }
9686
9687 __u32 bpf_map__map_flags(const struct bpf_map *map)
9688 {
9689         return map->def.map_flags;
9690 }
9691
9692 int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
9693 {
9694         if (map_is_created(map))
9695                 return libbpf_err(-EBUSY);
9696         map->def.map_flags = flags;
9697         return 0;
9698 }
9699
9700 __u64 bpf_map__map_extra(const struct bpf_map *map)
9701 {
9702         return map->map_extra;
9703 }
9704
9705 int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra)
9706 {
9707         if (map_is_created(map))
9708                 return libbpf_err(-EBUSY);
9709         map->map_extra = map_extra;
9710         return 0;
9711 }
9712
9713 __u32 bpf_map__numa_node(const struct bpf_map *map)
9714 {
9715         return map->numa_node;
9716 }
9717
9718 int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
9719 {
9720         if (map_is_created(map))
9721                 return libbpf_err(-EBUSY);
9722         map->numa_node = numa_node;
9723         return 0;
9724 }
9725
9726 __u32 bpf_map__key_size(const struct bpf_map *map)
9727 {
9728         return map->def.key_size;
9729 }
9730
9731 int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
9732 {
9733         if (map_is_created(map))
9734                 return libbpf_err(-EBUSY);
9735         map->def.key_size = size;
9736         return 0;
9737 }
9738
9739 __u32 bpf_map__value_size(const struct bpf_map *map)
9740 {
9741         return map->def.value_size;
9742 }
9743
9744 static int map_btf_datasec_resize(struct bpf_map *map, __u32 size)
9745 {
9746         struct btf *btf;
9747         struct btf_type *datasec_type, *var_type;
9748         struct btf_var_secinfo *var;
9749         const struct btf_type *array_type;
9750         const struct btf_array *array;
9751         int vlen, element_sz, new_array_id;
9752         __u32 nr_elements;
9753
9754         /* check btf existence */
9755         btf = bpf_object__btf(map->obj);
9756         if (!btf)
9757                 return -ENOENT;
9758
9759         /* verify map is datasec */
9760         datasec_type = btf_type_by_id(btf, bpf_map__btf_value_type_id(map));
9761         if (!btf_is_datasec(datasec_type)) {
9762                 pr_warn("map '%s': cannot be resized, map value type is not a datasec\n",
9763                         bpf_map__name(map));
9764                 return -EINVAL;
9765         }
9766
9767         /* verify datasec has at least one var */
9768         vlen = btf_vlen(datasec_type);
9769         if (vlen == 0) {
9770                 pr_warn("map '%s': cannot be resized, map value datasec is empty\n",
9771                         bpf_map__name(map));
9772                 return -EINVAL;
9773         }
9774
9775         /* verify last var in the datasec is an array */
9776         var = &btf_var_secinfos(datasec_type)[vlen - 1];
9777         var_type = btf_type_by_id(btf, var->type);
9778         array_type = skip_mods_and_typedefs(btf, var_type->type, NULL);
9779         if (!btf_is_array(array_type)) {
9780                 pr_warn("map '%s': cannot be resized, last var must be an array\n",
9781                         bpf_map__name(map));
9782                 return -EINVAL;
9783         }
9784
9785         /* verify request size aligns with array */
9786         array = btf_array(array_type);
9787         element_sz = btf__resolve_size(btf, array->type);
9788         if (element_sz <= 0 || (size - var->offset) % element_sz != 0) {
9789                 pr_warn("map '%s': cannot be resized, element size (%d) doesn't align with new total size (%u)\n",
9790                         bpf_map__name(map), element_sz, size);
9791                 return -EINVAL;
9792         }
9793
9794         /* create a new array based on the existing array, but with new length */
9795         nr_elements = (size - var->offset) / element_sz;
9796         new_array_id = btf__add_array(btf, array->index_type, array->type, nr_elements);
9797         if (new_array_id < 0)
9798                 return new_array_id;
9799
9800         /* adding a new btf type invalidates existing pointers to btf objects,
9801          * so refresh pointers before proceeding
9802          */
9803         datasec_type = btf_type_by_id(btf, map->btf_value_type_id);
9804         var = &btf_var_secinfos(datasec_type)[vlen - 1];
9805         var_type = btf_type_by_id(btf, var->type);
9806
9807         /* finally update btf info */
9808         datasec_type->size = size;
9809         var->size = size - var->offset;
9810         var_type->type = new_array_id;
9811
9812         return 0;
9813 }
9814
9815 int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
9816 {
9817         if (map->obj->loaded || map->reused)
9818                 return libbpf_err(-EBUSY);
9819
9820         if (map->mmaped) {
9821                 int err;
9822                 size_t mmap_old_sz, mmap_new_sz;
9823
9824                 mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
9825                 mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
9826                 err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
9827                 if (err) {
9828                         pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
9829                                 bpf_map__name(map), err);
9830                         return err;
9831                 }
9832                 err = map_btf_datasec_resize(map, size);
9833                 if (err && err != -ENOENT) {
9834                         pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n",
9835                                 bpf_map__name(map), err);
9836                         map->btf_value_type_id = 0;
9837                         map->btf_key_type_id = 0;
9838                 }
9839         }
9840
9841         map->def.value_size = size;
9842         return 0;
9843 }
9844
9845 __u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
9846 {
9847         return map ? map->btf_key_type_id : 0;
9848 }
9849
9850 __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
9851 {
9852         return map ? map->btf_value_type_id : 0;
9853 }
9854
9855 int bpf_map__set_initial_value(struct bpf_map *map,
9856                                const void *data, size_t size)
9857 {
9858         if (map->obj->loaded || map->reused)
9859                 return libbpf_err(-EBUSY);
9860
9861         if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
9862             size != map->def.value_size)
9863                 return libbpf_err(-EINVAL);
9864
9865         memcpy(map->mmaped, data, size);
9866         return 0;
9867 }
9868
9869 void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
9870 {
9871         if (!map->mmaped)
9872                 return NULL;
9873         *psize = map->def.value_size;
9874         return map->mmaped;
9875 }
9876
9877 bool bpf_map__is_internal(const struct bpf_map *map)
9878 {
9879         return map->libbpf_type != LIBBPF_MAP_UNSPEC;
9880 }
9881
9882 __u32 bpf_map__ifindex(const struct bpf_map *map)
9883 {
9884         return map->map_ifindex;
9885 }
9886
9887 int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
9888 {
9889         if (map_is_created(map))
9890                 return libbpf_err(-EBUSY);
9891         map->map_ifindex = ifindex;
9892         return 0;
9893 }
9894
9895 int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
9896 {
9897         if (!bpf_map_type__is_map_in_map(map->def.type)) {
9898                 pr_warn("error: unsupported map type\n");
9899                 return libbpf_err(-EINVAL);
9900         }
9901         if (map->inner_map_fd != -1) {
9902                 pr_warn("error: inner_map_fd already specified\n");
9903                 return libbpf_err(-EINVAL);
9904         }
9905         if (map->inner_map) {
9906                 bpf_map__destroy(map->inner_map);
9907                 zfree(&map->inner_map);
9908         }
9909         map->inner_map_fd = fd;
9910         return 0;
9911 }
9912
9913 static struct bpf_map *
9914 __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
9915 {
9916         ssize_t idx;
9917         struct bpf_map *s, *e;
9918
9919         if (!obj || !obj->maps)
9920                 return errno = EINVAL, NULL;
9921
9922         s = obj->maps;
9923         e = obj->maps + obj->nr_maps;
9924
9925         if ((m < s) || (m >= e)) {
9926                 pr_warn("error in %s: map handler doesn't belong to object\n",
9927                          __func__);
9928                 return errno = EINVAL, NULL;
9929         }
9930
9931         idx = (m - obj->maps) + i;
9932         if (idx >= obj->nr_maps || idx < 0)
9933                 return NULL;
9934         return &obj->maps[idx];
9935 }
9936
9937 struct bpf_map *
9938 bpf_object__next_map(const struct bpf_object *obj, const struct bpf_map *prev)
9939 {
9940         if (prev == NULL)
9941                 return obj->maps;
9942
9943         return __bpf_map__iter(prev, obj, 1);
9944 }
9945
9946 struct bpf_map *
9947 bpf_object__prev_map(const struct bpf_object *obj, const struct bpf_map *next)
9948 {
9949         if (next == NULL) {
9950                 if (!obj->nr_maps)
9951                         return NULL;
9952                 return obj->maps + obj->nr_maps - 1;
9953         }
9954
9955         return __bpf_map__iter(next, obj, -1);
9956 }
9957
9958 struct bpf_map *
9959 bpf_object__find_map_by_name(const struct bpf_object *obj, const char *name)
9960 {
9961         struct bpf_map *pos;
9962
9963         bpf_object__for_each_map(pos, obj) {
9964                 /* if it's a special internal map name (which always starts
9965                  * with dot) then check if that special name matches the
9966                  * real map name (ELF section name)
9967                  */
9968                 if (name[0] == '.') {
9969                         if (pos->real_name && strcmp(pos->real_name, name) == 0)
9970                                 return pos;
9971                         continue;
9972                 }
9973                 /* otherwise map name has to be an exact match */
9974                 if (map_uses_real_name(pos)) {
9975                         if (strcmp(pos->real_name, name) == 0)
9976                                 return pos;
9977                         continue;
9978                 }
9979                 if (strcmp(pos->name, name) == 0)
9980                         return pos;
9981         }
9982         return errno = ENOENT, NULL;
9983 }
9984
9985 int
9986 bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name)
9987 {
9988         return bpf_map__fd(bpf_object__find_map_by_name(obj, name));
9989 }
9990
9991 static int validate_map_op(const struct bpf_map *map, size_t key_sz,
9992                            size_t value_sz, bool check_value_sz)
9993 {
9994         if (!map_is_created(map)) /* map is not yet created */
9995                 return -ENOENT;
9996
9997         if (map->def.key_size != key_sz) {
9998                 pr_warn("map '%s': unexpected key size %zu provided, expected %u\n",
9999                         map->name, key_sz, map->def.key_size);
10000                 return -EINVAL;
10001         }
10002
10003         if (!check_value_sz)
10004                 return 0;
10005
10006         switch (map->def.type) {
10007         case BPF_MAP_TYPE_PERCPU_ARRAY:
10008         case BPF_MAP_TYPE_PERCPU_HASH:
10009         case BPF_MAP_TYPE_LRU_PERCPU_HASH:
10010         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: {
10011                 int num_cpu = libbpf_num_possible_cpus();
10012                 size_t elem_sz = roundup(map->def.value_size, 8);
10013
10014                 if (value_sz != num_cpu * elem_sz) {
10015                         pr_warn("map '%s': unexpected value size %zu provided for per-CPU map, expected %d * %zu = %zd\n",
10016                                 map->name, value_sz, num_cpu, elem_sz, num_cpu * elem_sz);
10017                         return -EINVAL;
10018                 }
10019                 break;
10020         }
10021         default:
10022                 if (map->def.value_size != value_sz) {
10023                         pr_warn("map '%s': unexpected value size %zu provided, expected %u\n",
10024                                 map->name, value_sz, map->def.value_size);
10025                         return -EINVAL;
10026                 }
10027                 break;
10028         }
10029         return 0;
10030 }
10031
10032 int bpf_map__lookup_elem(const struct bpf_map *map,
10033                          const void *key, size_t key_sz,
10034                          void *value, size_t value_sz, __u64 flags)
10035 {
10036         int err;
10037
10038         err = validate_map_op(map, key_sz, value_sz, true);
10039         if (err)
10040                 return libbpf_err(err);
10041
10042         return bpf_map_lookup_elem_flags(map->fd, key, value, flags);
10043 }
10044
10045 int bpf_map__update_elem(const struct bpf_map *map,
10046                          const void *key, size_t key_sz,
10047                          const void *value, size_t value_sz, __u64 flags)
10048 {
10049         int err;
10050
10051         err = validate_map_op(map, key_sz, value_sz, true);
10052         if (err)
10053                 return libbpf_err(err);
10054
10055         return bpf_map_update_elem(map->fd, key, value, flags);
10056 }
10057
10058 int bpf_map__delete_elem(const struct bpf_map *map,
10059                          const void *key, size_t key_sz, __u64 flags)
10060 {
10061         int err;
10062
10063         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10064         if (err)
10065                 return libbpf_err(err);
10066
10067         return bpf_map_delete_elem_flags(map->fd, key, flags);
10068 }
10069
10070 int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
10071                                     const void *key, size_t key_sz,
10072                                     void *value, size_t value_sz, __u64 flags)
10073 {
10074         int err;
10075
10076         err = validate_map_op(map, key_sz, value_sz, true);
10077         if (err)
10078                 return libbpf_err(err);
10079
10080         return bpf_map_lookup_and_delete_elem_flags(map->fd, key, value, flags);
10081 }
10082
10083 int bpf_map__get_next_key(const struct bpf_map *map,
10084                           const void *cur_key, void *next_key, size_t key_sz)
10085 {
10086         int err;
10087
10088         err = validate_map_op(map, key_sz, 0, false /* check_value_sz */);
10089         if (err)
10090                 return libbpf_err(err);
10091
10092         return bpf_map_get_next_key(map->fd, cur_key, next_key);
10093 }
10094
10095 long libbpf_get_error(const void *ptr)
10096 {
10097         if (!IS_ERR_OR_NULL(ptr))
10098                 return 0;
10099
10100         if (IS_ERR(ptr))
10101                 errno = -PTR_ERR(ptr);
10102
10103         /* If ptr == NULL, then errno should be already set by the failing
10104          * API, because libbpf never returns NULL on success and it now always
10105          * sets errno on error. So no extra errno handling for ptr == NULL
10106          * case.
10107          */
10108         return -errno;
10109 }
10110
10111 /* Replace link's underlying BPF program with the new one */
10112 int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
10113 {
10114         int ret;
10115
10116         ret = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
10117         return libbpf_err_errno(ret);
10118 }
10119
10120 /* Release "ownership" of underlying BPF resource (typically, BPF program
10121  * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
10122  * link, when destructed through bpf_link__destroy() call won't attempt to
10123  * detach/unregisted that BPF resource. This is useful in situations where,
10124  * say, attached BPF program has to outlive userspace program that attached it
10125  * in the system. Depending on type of BPF program, though, there might be
10126  * additional steps (like pinning BPF program in BPF FS) necessary to ensure
10127  * exit of userspace program doesn't trigger automatic detachment and clean up
10128  * inside the kernel.
10129  */
10130 void bpf_link__disconnect(struct bpf_link *link)
10131 {
10132         link->disconnected = true;
10133 }
10134
10135 int bpf_link__destroy(struct bpf_link *link)
10136 {
10137         int err = 0;
10138
10139         if (IS_ERR_OR_NULL(link))
10140                 return 0;
10141
10142         if (!link->disconnected && link->detach)
10143                 err = link->detach(link);
10144         if (link->pin_path)
10145                 free(link->pin_path);
10146         if (link->dealloc)
10147                 link->dealloc(link);
10148         else
10149                 free(link);
10150
10151         return libbpf_err(err);
10152 }
10153
10154 int bpf_link__fd(const struct bpf_link *link)
10155 {
10156         return link->fd;
10157 }
10158
10159 const char *bpf_link__pin_path(const struct bpf_link *link)
10160 {
10161         return link->pin_path;
10162 }
10163
10164 static int bpf_link__detach_fd(struct bpf_link *link)
10165 {
10166         return libbpf_err_errno(close(link->fd));
10167 }
10168
10169 struct bpf_link *bpf_link__open(const char *path)
10170 {
10171         struct bpf_link *link;
10172         int fd;
10173
10174         fd = bpf_obj_get(path);
10175         if (fd < 0) {
10176                 fd = -errno;
10177                 pr_warn("failed to open link at %s: %d\n", path, fd);
10178                 return libbpf_err_ptr(fd);
10179         }
10180
10181         link = calloc(1, sizeof(*link));
10182         if (!link) {
10183                 close(fd);
10184                 return libbpf_err_ptr(-ENOMEM);
10185         }
10186         link->detach = &bpf_link__detach_fd;
10187         link->fd = fd;
10188
10189         link->pin_path = strdup(path);
10190         if (!link->pin_path) {
10191                 bpf_link__destroy(link);
10192                 return libbpf_err_ptr(-ENOMEM);
10193         }
10194
10195         return link;
10196 }
10197
10198 int bpf_link__detach(struct bpf_link *link)
10199 {
10200         return bpf_link_detach(link->fd) ? -errno : 0;
10201 }
10202
10203 int bpf_link__pin(struct bpf_link *link, const char *path)
10204 {
10205         int err;
10206
10207         if (link->pin_path)
10208                 return libbpf_err(-EBUSY);
10209         err = make_parent_dir(path);
10210         if (err)
10211                 return libbpf_err(err);
10212         err = check_path(path);
10213         if (err)
10214                 return libbpf_err(err);
10215
10216         link->pin_path = strdup(path);
10217         if (!link->pin_path)
10218                 return libbpf_err(-ENOMEM);
10219
10220         if (bpf_obj_pin(link->fd, link->pin_path)) {
10221                 err = -errno;
10222                 zfree(&link->pin_path);
10223                 return libbpf_err(err);
10224         }
10225
10226         pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
10227         return 0;
10228 }
10229
10230 int bpf_link__unpin(struct bpf_link *link)
10231 {
10232         int err;
10233
10234         if (!link->pin_path)
10235                 return libbpf_err(-EINVAL);
10236
10237         err = unlink(link->pin_path);
10238         if (err != 0)
10239                 return -errno;
10240
10241         pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
10242         zfree(&link->pin_path);
10243         return 0;
10244 }
10245
10246 struct bpf_link_perf {
10247         struct bpf_link link;
10248         int perf_event_fd;
10249         /* legacy kprobe support: keep track of probe identifier and type */
10250         char *legacy_probe_name;
10251         bool legacy_is_kprobe;
10252         bool legacy_is_retprobe;
10253 };
10254
10255 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe);
10256 static int remove_uprobe_event_legacy(const char *probe_name, bool retprobe);
10257
10258 static int bpf_link_perf_detach(struct bpf_link *link)
10259 {
10260         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10261         int err = 0;
10262
10263         if (ioctl(perf_link->perf_event_fd, PERF_EVENT_IOC_DISABLE, 0) < 0)
10264                 err = -errno;
10265
10266         if (perf_link->perf_event_fd != link->fd)
10267                 close(perf_link->perf_event_fd);
10268         close(link->fd);
10269
10270         /* legacy uprobe/kprobe needs to be removed after perf event fd closure */
10271         if (perf_link->legacy_probe_name) {
10272                 if (perf_link->legacy_is_kprobe) {
10273                         err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
10274                                                          perf_link->legacy_is_retprobe);
10275                 } else {
10276                         err = remove_uprobe_event_legacy(perf_link->legacy_probe_name,
10277                                                          perf_link->legacy_is_retprobe);
10278                 }
10279         }
10280
10281         return err;
10282 }
10283
10284 static void bpf_link_perf_dealloc(struct bpf_link *link)
10285 {
10286         struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10287
10288         free(perf_link->legacy_probe_name);
10289         free(perf_link);
10290 }
10291
10292 struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
10293                                                      const struct bpf_perf_event_opts *opts)
10294 {
10295         char errmsg[STRERR_BUFSIZE];
10296         struct bpf_link_perf *link;
10297         int prog_fd, link_fd = -1, err;
10298         bool force_ioctl_attach;
10299
10300         if (!OPTS_VALID(opts, bpf_perf_event_opts))
10301                 return libbpf_err_ptr(-EINVAL);
10302
10303         if (pfd < 0) {
10304                 pr_warn("prog '%s': invalid perf event FD %d\n",
10305                         prog->name, pfd);
10306                 return libbpf_err_ptr(-EINVAL);
10307         }
10308         prog_fd = bpf_program__fd(prog);
10309         if (prog_fd < 0) {
10310                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
10311                         prog->name);
10312                 return libbpf_err_ptr(-EINVAL);
10313         }
10314
10315         link = calloc(1, sizeof(*link));
10316         if (!link)
10317                 return libbpf_err_ptr(-ENOMEM);
10318         link->link.detach = &bpf_link_perf_detach;
10319         link->link.dealloc = &bpf_link_perf_dealloc;
10320         link->perf_event_fd = pfd;
10321
10322         force_ioctl_attach = OPTS_GET(opts, force_ioctl_attach, false);
10323         if (kernel_supports(prog->obj, FEAT_PERF_LINK) && !force_ioctl_attach) {
10324                 DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts,
10325                         .perf_event.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0));
10326
10327                 link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts);
10328                 if (link_fd < 0) {
10329                         err = -errno;
10330                         pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n",
10331                                 prog->name, pfd,
10332                                 err, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10333                         goto err_out;
10334                 }
10335                 link->link.fd = link_fd;
10336         } else {
10337                 if (OPTS_GET(opts, bpf_cookie, 0)) {
10338                         pr_warn("prog '%s': user context value is not supported\n", prog->name);
10339                         err = -EOPNOTSUPP;
10340                         goto err_out;
10341                 }
10342
10343                 if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
10344                         err = -errno;
10345                         pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n",
10346                                 prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10347                         if (err == -EPROTO)
10348                                 pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n",
10349                                         prog->name, pfd);
10350                         goto err_out;
10351                 }
10352                 link->link.fd = pfd;
10353         }
10354         if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
10355                 err = -errno;
10356                 pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n",
10357                         prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10358                 goto err_out;
10359         }
10360
10361         return &link->link;
10362 err_out:
10363         if (link_fd >= 0)
10364                 close(link_fd);
10365         free(link);
10366         return libbpf_err_ptr(err);
10367 }
10368
10369 struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
10370 {
10371         return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
10372 }
10373
10374 /*
10375  * this function is expected to parse integer in the range of [0, 2^31-1] from
10376  * given file using scanf format string fmt. If actual parsed value is
10377  * negative, the result might be indistinguishable from error
10378  */
10379 static int parse_uint_from_file(const char *file, const char *fmt)
10380 {
10381         char buf[STRERR_BUFSIZE];
10382         int err, ret;
10383         FILE *f;
10384
10385         f = fopen(file, "re");
10386         if (!f) {
10387                 err = -errno;
10388                 pr_debug("failed to open '%s': %s\n", file,
10389                          libbpf_strerror_r(err, buf, sizeof(buf)));
10390                 return err;
10391         }
10392         err = fscanf(f, fmt, &ret);
10393         if (err != 1) {
10394                 err = err == EOF ? -EIO : -errno;
10395                 pr_debug("failed to parse '%s': %s\n", file,
10396                         libbpf_strerror_r(err, buf, sizeof(buf)));
10397                 fclose(f);
10398                 return err;
10399         }
10400         fclose(f);
10401         return ret;
10402 }
10403
10404 static int determine_kprobe_perf_type(void)
10405 {
10406         const char *file = "/sys/bus/event_source/devices/kprobe/type";
10407
10408         return parse_uint_from_file(file, "%d\n");
10409 }
10410
10411 static int determine_uprobe_perf_type(void)
10412 {
10413         const char *file = "/sys/bus/event_source/devices/uprobe/type";
10414
10415         return parse_uint_from_file(file, "%d\n");
10416 }
10417
10418 static int determine_kprobe_retprobe_bit(void)
10419 {
10420         const char *file = "/sys/bus/event_source/devices/kprobe/format/retprobe";
10421
10422         return parse_uint_from_file(file, "config:%d\n");
10423 }
10424
10425 static int determine_uprobe_retprobe_bit(void)
10426 {
10427         const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
10428
10429         return parse_uint_from_file(file, "config:%d\n");
10430 }
10431
10432 #define PERF_UPROBE_REF_CTR_OFFSET_BITS 32
10433 #define PERF_UPROBE_REF_CTR_OFFSET_SHIFT 32
10434
10435 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
10436                                  uint64_t offset, int pid, size_t ref_ctr_off)
10437 {
10438         const size_t attr_sz = sizeof(struct perf_event_attr);
10439         struct perf_event_attr attr;
10440         char errmsg[STRERR_BUFSIZE];
10441         int type, pfd;
10442
10443         if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
10444                 return -EINVAL;
10445
10446         memset(&attr, 0, attr_sz);
10447
10448         type = uprobe ? determine_uprobe_perf_type()
10449                       : determine_kprobe_perf_type();
10450         if (type < 0) {
10451                 pr_warn("failed to determine %s perf type: %s\n",
10452                         uprobe ? "uprobe" : "kprobe",
10453                         libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
10454                 return type;
10455         }
10456         if (retprobe) {
10457                 int bit = uprobe ? determine_uprobe_retprobe_bit()
10458                                  : determine_kprobe_retprobe_bit();
10459
10460                 if (bit < 0) {
10461                         pr_warn("failed to determine %s retprobe bit: %s\n",
10462                                 uprobe ? "uprobe" : "kprobe",
10463                                 libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));
10464                         return bit;
10465                 }
10466                 attr.config |= 1 << bit;
10467         }
10468         attr.size = attr_sz;
10469         attr.type = type;
10470         attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
10471         attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
10472         attr.config2 = offset;           /* kprobe_addr or probe_offset */
10473
10474         /* pid filter is meaningful only for uprobes */
10475         pfd = syscall(__NR_perf_event_open, &attr,
10476                       pid < 0 ? -1 : pid /* pid */,
10477                       pid == -1 ? 0 : -1 /* cpu */,
10478                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
10479         return pfd >= 0 ? pfd : -errno;
10480 }
10481
10482 static int append_to_file(const char *file, const char *fmt, ...)
10483 {
10484         int fd, n, err = 0;
10485         va_list ap;
10486         char buf[1024];
10487
10488         va_start(ap, fmt);
10489         n = vsnprintf(buf, sizeof(buf), fmt, ap);
10490         va_end(ap);
10491
10492         if (n < 0 || n >= sizeof(buf))
10493                 return -EINVAL;
10494
10495         fd = open(file, O_WRONLY | O_APPEND | O_CLOEXEC, 0);
10496         if (fd < 0)
10497                 return -errno;
10498
10499         if (write(fd, buf, n) < 0)
10500                 err = -errno;
10501
10502         close(fd);
10503         return err;
10504 }
10505
10506 #define DEBUGFS "/sys/kernel/debug/tracing"
10507 #define TRACEFS "/sys/kernel/tracing"
10508
10509 static bool use_debugfs(void)
10510 {
10511         static int has_debugfs = -1;
10512
10513         if (has_debugfs < 0)
10514                 has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
10515
10516         return has_debugfs == 1;
10517 }
10518
10519 static const char *tracefs_path(void)
10520 {
10521         return use_debugfs() ? DEBUGFS : TRACEFS;
10522 }
10523
10524 static const char *tracefs_kprobe_events(void)
10525 {
10526         return use_debugfs() ? DEBUGFS"/kprobe_events" : TRACEFS"/kprobe_events";
10527 }
10528
10529 static const char *tracefs_uprobe_events(void)
10530 {
10531         return use_debugfs() ? DEBUGFS"/uprobe_events" : TRACEFS"/uprobe_events";
10532 }
10533
10534 static const char *tracefs_available_filter_functions(void)
10535 {
10536         return use_debugfs() ? DEBUGFS"/available_filter_functions"
10537                              : TRACEFS"/available_filter_functions";
10538 }
10539
10540 static const char *tracefs_available_filter_functions_addrs(void)
10541 {
10542         return use_debugfs() ? DEBUGFS"/available_filter_functions_addrs"
10543                              : TRACEFS"/available_filter_functions_addrs";
10544 }
10545
10546 static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
10547                                          const char *kfunc_name, size_t offset)
10548 {
10549         static int index = 0;
10550         int i;
10551
10552         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
10553                  __sync_fetch_and_add(&index, 1));
10554
10555         /* sanitize binary_path in the probe name */
10556         for (i = 0; buf[i]; i++) {
10557                 if (!isalnum(buf[i]))
10558                         buf[i] = '_';
10559         }
10560 }
10561
10562 static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
10563                                    const char *kfunc_name, size_t offset)
10564 {
10565         return append_to_file(tracefs_kprobe_events(), "%c:%s/%s %s+0x%zx",
10566                               retprobe ? 'r' : 'p',
10567                               retprobe ? "kretprobes" : "kprobes",
10568                               probe_name, kfunc_name, offset);
10569 }
10570
10571 static int remove_kprobe_event_legacy(const char *probe_name, bool retprobe)
10572 {
10573         return append_to_file(tracefs_kprobe_events(), "-:%s/%s",
10574                               retprobe ? "kretprobes" : "kprobes", probe_name);
10575 }
10576
10577 static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retprobe)
10578 {
10579         char file[256];
10580
10581         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
10582                  tracefs_path(), retprobe ? "kretprobes" : "kprobes", probe_name);
10583
10584         return parse_uint_from_file(file, "%d\n");
10585 }
10586
10587 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
10588                                          const char *kfunc_name, size_t offset, int pid)
10589 {
10590         const size_t attr_sz = sizeof(struct perf_event_attr);
10591         struct perf_event_attr attr;
10592         char errmsg[STRERR_BUFSIZE];
10593         int type, pfd, err;
10594
10595         err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset);
10596         if (err < 0) {
10597                 pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n",
10598                         kfunc_name, offset,
10599                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10600                 return err;
10601         }
10602         type = determine_kprobe_perf_type_legacy(probe_name, retprobe);
10603         if (type < 0) {
10604                 err = type;
10605                 pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n",
10606                         kfunc_name, offset,
10607                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10608                 goto err_clean_legacy;
10609         }
10610
10611         memset(&attr, 0, attr_sz);
10612         attr.size = attr_sz;
10613         attr.config = type;
10614         attr.type = PERF_TYPE_TRACEPOINT;
10615
10616         pfd = syscall(__NR_perf_event_open, &attr,
10617                       pid < 0 ? -1 : pid, /* pid */
10618                       pid == -1 ? 0 : -1, /* cpu */
10619                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
10620         if (pfd < 0) {
10621                 err = -errno;
10622                 pr_warn("legacy kprobe perf_event_open() failed: %s\n",
10623                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10624                 goto err_clean_legacy;
10625         }
10626         return pfd;
10627
10628 err_clean_legacy:
10629         /* Clear the newly added legacy kprobe_event */
10630         remove_kprobe_event_legacy(probe_name, retprobe);
10631         return err;
10632 }
10633
10634 static const char *arch_specific_syscall_pfx(void)
10635 {
10636 #if defined(__x86_64__)
10637         return "x64";
10638 #elif defined(__i386__)
10639         return "ia32";
10640 #elif defined(__s390x__)
10641         return "s390x";
10642 #elif defined(__s390__)
10643         return "s390";
10644 #elif defined(__arm__)
10645         return "arm";
10646 #elif defined(__aarch64__)
10647         return "arm64";
10648 #elif defined(__mips__)
10649         return "mips";
10650 #elif defined(__riscv)
10651         return "riscv";
10652 #elif defined(__powerpc__)
10653         return "powerpc";
10654 #elif defined(__powerpc64__)
10655         return "powerpc64";
10656 #else
10657         return NULL;
10658 #endif
10659 }
10660
10661 int probe_kern_syscall_wrapper(int token_fd)
10662 {
10663         char syscall_name[64];
10664         const char *ksys_pfx;
10665
10666         ksys_pfx = arch_specific_syscall_pfx();
10667         if (!ksys_pfx)
10668                 return 0;
10669
10670         snprintf(syscall_name, sizeof(syscall_name), "__%s_sys_bpf", ksys_pfx);
10671
10672         if (determine_kprobe_perf_type() >= 0) {
10673                 int pfd;
10674
10675                 pfd = perf_event_open_probe(false, false, syscall_name, 0, getpid(), 0);
10676                 if (pfd >= 0)
10677                         close(pfd);
10678
10679                 return pfd >= 0 ? 1 : 0;
10680         } else { /* legacy mode */
10681                 char probe_name[128];
10682
10683                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name), syscall_name, 0);
10684                 if (add_kprobe_event_legacy(probe_name, false, syscall_name, 0) < 0)
10685                         return 0;
10686
10687                 (void)remove_kprobe_event_legacy(probe_name, false);
10688                 return 1;
10689         }
10690 }
10691
10692 struct bpf_link *
10693 bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
10694                                 const char *func_name,
10695                                 const struct bpf_kprobe_opts *opts)
10696 {
10697         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
10698         enum probe_attach_mode attach_mode;
10699         char errmsg[STRERR_BUFSIZE];
10700         char *legacy_probe = NULL;
10701         struct bpf_link *link;
10702         size_t offset;
10703         bool retprobe, legacy;
10704         int pfd, err;
10705
10706         if (!OPTS_VALID(opts, bpf_kprobe_opts))
10707                 return libbpf_err_ptr(-EINVAL);
10708
10709         attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
10710         retprobe = OPTS_GET(opts, retprobe, false);
10711         offset = OPTS_GET(opts, offset, 0);
10712         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10713
10714         legacy = determine_kprobe_perf_type() < 0;
10715         switch (attach_mode) {
10716         case PROBE_ATTACH_MODE_LEGACY:
10717                 legacy = true;
10718                 pe_opts.force_ioctl_attach = true;
10719                 break;
10720         case PROBE_ATTACH_MODE_PERF:
10721                 if (legacy)
10722                         return libbpf_err_ptr(-ENOTSUP);
10723                 pe_opts.force_ioctl_attach = true;
10724                 break;
10725         case PROBE_ATTACH_MODE_LINK:
10726                 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
10727                         return libbpf_err_ptr(-ENOTSUP);
10728                 break;
10729         case PROBE_ATTACH_MODE_DEFAULT:
10730                 break;
10731         default:
10732                 return libbpf_err_ptr(-EINVAL);
10733         }
10734
10735         if (!legacy) {
10736                 pfd = perf_event_open_probe(false /* uprobe */, retprobe,
10737                                             func_name, offset,
10738                                             -1 /* pid */, 0 /* ref_ctr_off */);
10739         } else {
10740                 char probe_name[256];
10741
10742                 gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),
10743                                              func_name, offset);
10744
10745                 legacy_probe = strdup(probe_name);
10746                 if (!legacy_probe)
10747                         return libbpf_err_ptr(-ENOMEM);
10748
10749                 pfd = perf_event_kprobe_open_legacy(legacy_probe, retprobe, func_name,
10750                                                     offset, -1 /* pid */);
10751         }
10752         if (pfd < 0) {
10753                 err = -errno;
10754                 pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n",
10755                         prog->name, retprobe ? "kretprobe" : "kprobe",
10756                         func_name, offset,
10757                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10758                 goto err_out;
10759         }
10760         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
10761         err = libbpf_get_error(link);
10762         if (err) {
10763                 close(pfd);
10764                 pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n",
10765                         prog->name, retprobe ? "kretprobe" : "kprobe",
10766                         func_name, offset,
10767                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
10768                 goto err_clean_legacy;
10769         }
10770         if (legacy) {
10771                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
10772
10773                 perf_link->legacy_probe_name = legacy_probe;
10774                 perf_link->legacy_is_kprobe = true;
10775                 perf_link->legacy_is_retprobe = retprobe;
10776         }
10777
10778         return link;
10779
10780 err_clean_legacy:
10781         if (legacy)
10782                 remove_kprobe_event_legacy(legacy_probe, retprobe);
10783 err_out:
10784         free(legacy_probe);
10785         return libbpf_err_ptr(err);
10786 }
10787
10788 struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
10789                                             bool retprobe,
10790                                             const char *func_name)
10791 {
10792         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
10793                 .retprobe = retprobe,
10794         );
10795
10796         return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
10797 }
10798
10799 struct bpf_link *bpf_program__attach_ksyscall(const struct bpf_program *prog,
10800                                               const char *syscall_name,
10801                                               const struct bpf_ksyscall_opts *opts)
10802 {
10803         LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
10804         char func_name[128];
10805
10806         if (!OPTS_VALID(opts, bpf_ksyscall_opts))
10807                 return libbpf_err_ptr(-EINVAL);
10808
10809         if (kernel_supports(prog->obj, FEAT_SYSCALL_WRAPPER)) {
10810                 /* arch_specific_syscall_pfx() should never return NULL here
10811                  * because it is guarded by kernel_supports(). However, since
10812                  * compiler does not know that we have an explicit conditional
10813                  * as well.
10814                  */
10815                 snprintf(func_name, sizeof(func_name), "__%s_sys_%s",
10816                          arch_specific_syscall_pfx() ? : "", syscall_name);
10817         } else {
10818                 snprintf(func_name, sizeof(func_name), "__se_sys_%s", syscall_name);
10819         }
10820
10821         kprobe_opts.retprobe = OPTS_GET(opts, retprobe, false);
10822         kprobe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
10823
10824         return bpf_program__attach_kprobe_opts(prog, func_name, &kprobe_opts);
10825 }
10826
10827 /* Adapted from perf/util/string.c */
10828 bool glob_match(const char *str, const char *pat)
10829 {
10830         while (*str && *pat && *pat != '*') {
10831                 if (*pat == '?') {      /* Matches any single character */
10832                         str++;
10833                         pat++;
10834                         continue;
10835                 }
10836                 if (*str != *pat)
10837                         return false;
10838                 str++;
10839                 pat++;
10840         }
10841         /* Check wild card */
10842         if (*pat == '*') {
10843                 while (*pat == '*')
10844                         pat++;
10845                 if (!*pat) /* Tail wild card matches all */
10846                         return true;
10847                 while (*str)
10848                         if (glob_match(str++, pat))
10849                                 return true;
10850         }
10851         return !*str && !*pat;
10852 }
10853
10854 struct kprobe_multi_resolve {
10855         const char *pattern;
10856         unsigned long *addrs;
10857         size_t cap;
10858         size_t cnt;
10859 };
10860
10861 struct avail_kallsyms_data {
10862         char **syms;
10863         size_t cnt;
10864         struct kprobe_multi_resolve *res;
10865 };
10866
10867 static int avail_func_cmp(const void *a, const void *b)
10868 {
10869         return strcmp(*(const char **)a, *(const char **)b);
10870 }
10871
10872 static int avail_kallsyms_cb(unsigned long long sym_addr, char sym_type,
10873                              const char *sym_name, void *ctx)
10874 {
10875         struct avail_kallsyms_data *data = ctx;
10876         struct kprobe_multi_resolve *res = data->res;
10877         int err;
10878
10879         if (!bsearch(&sym_name, data->syms, data->cnt, sizeof(*data->syms), avail_func_cmp))
10880                 return 0;
10881
10882         err = libbpf_ensure_mem((void **)&res->addrs, &res->cap, sizeof(*res->addrs), res->cnt + 1);
10883         if (err)
10884                 return err;
10885
10886         res->addrs[res->cnt++] = (unsigned long)sym_addr;
10887         return 0;
10888 }
10889
10890 static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res)
10891 {
10892         const char *available_functions_file = tracefs_available_filter_functions();
10893         struct avail_kallsyms_data data;
10894         char sym_name[500];
10895         FILE *f;
10896         int err = 0, ret, i;
10897         char **syms = NULL;
10898         size_t cap = 0, cnt = 0;
10899
10900         f = fopen(available_functions_file, "re");
10901         if (!f) {
10902                 err = -errno;
10903                 pr_warn("failed to open %s: %d\n", available_functions_file, err);
10904                 return err;
10905         }
10906
10907         while (true) {
10908                 char *name;
10909
10910                 ret = fscanf(f, "%499s%*[^\n]\n", sym_name);
10911                 if (ret == EOF && feof(f))
10912                         break;
10913
10914                 if (ret != 1) {
10915                         pr_warn("failed to parse available_filter_functions entry: %d\n", ret);
10916                         err = -EINVAL;
10917                         goto cleanup;
10918                 }
10919
10920                 if (!glob_match(sym_name, res->pattern))
10921                         continue;
10922
10923                 err = libbpf_ensure_mem((void **)&syms, &cap, sizeof(*syms), cnt + 1);
10924                 if (err)
10925                         goto cleanup;
10926
10927                 name = strdup(sym_name);
10928                 if (!name) {
10929                         err = -errno;
10930                         goto cleanup;
10931                 }
10932
10933                 syms[cnt++] = name;
10934         }
10935
10936         /* no entries found, bail out */
10937         if (cnt == 0) {
10938                 err = -ENOENT;
10939                 goto cleanup;
10940         }
10941
10942         /* sort available functions */
10943         qsort(syms, cnt, sizeof(*syms), avail_func_cmp);
10944
10945         data.syms = syms;
10946         data.res = res;
10947         data.cnt = cnt;
10948         libbpf_kallsyms_parse(avail_kallsyms_cb, &data);
10949
10950         if (res->cnt == 0)
10951                 err = -ENOENT;
10952
10953 cleanup:
10954         for (i = 0; i < cnt; i++)
10955                 free((char *)syms[i]);
10956         free(syms);
10957
10958         fclose(f);
10959         return err;
10960 }
10961
10962 static bool has_available_filter_functions_addrs(void)
10963 {
10964         return access(tracefs_available_filter_functions_addrs(), R_OK) != -1;
10965 }
10966
10967 static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res)
10968 {
10969         const char *available_path = tracefs_available_filter_functions_addrs();
10970         char sym_name[500];
10971         FILE *f;
10972         int ret, err = 0;
10973         unsigned long long sym_addr;
10974
10975         f = fopen(available_path, "re");
10976         if (!f) {
10977                 err = -errno;
10978                 pr_warn("failed to open %s: %d\n", available_path, err);
10979                 return err;
10980         }
10981
10982         while (true) {
10983                 ret = fscanf(f, "%llx %499s%*[^\n]\n", &sym_addr, sym_name);
10984                 if (ret == EOF && feof(f))
10985                         break;
10986
10987                 if (ret != 2) {
10988                         pr_warn("failed to parse available_filter_functions_addrs entry: %d\n",
10989                                 ret);
10990                         err = -EINVAL;
10991                         goto cleanup;
10992                 }
10993
10994                 if (!glob_match(sym_name, res->pattern))
10995                         continue;
10996
10997                 err = libbpf_ensure_mem((void **)&res->addrs, &res->cap,
10998                                         sizeof(*res->addrs), res->cnt + 1);
10999                 if (err)
11000                         goto cleanup;
11001
11002                 res->addrs[res->cnt++] = (unsigned long)sym_addr;
11003         }
11004
11005         if (res->cnt == 0)
11006                 err = -ENOENT;
11007
11008 cleanup:
11009         fclose(f);
11010         return err;
11011 }
11012
11013 struct bpf_link *
11014 bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog,
11015                                       const char *pattern,
11016                                       const struct bpf_kprobe_multi_opts *opts)
11017 {
11018         LIBBPF_OPTS(bpf_link_create_opts, lopts);
11019         struct kprobe_multi_resolve res = {
11020                 .pattern = pattern,
11021         };
11022         struct bpf_link *link = NULL;
11023         char errmsg[STRERR_BUFSIZE];
11024         const unsigned long *addrs;
11025         int err, link_fd, prog_fd;
11026         const __u64 *cookies;
11027         const char **syms;
11028         bool retprobe;
11029         size_t cnt;
11030
11031         if (!OPTS_VALID(opts, bpf_kprobe_multi_opts))
11032                 return libbpf_err_ptr(-EINVAL);
11033
11034         syms    = OPTS_GET(opts, syms, false);
11035         addrs   = OPTS_GET(opts, addrs, false);
11036         cnt     = OPTS_GET(opts, cnt, false);
11037         cookies = OPTS_GET(opts, cookies, false);
11038
11039         if (!pattern && !addrs && !syms)
11040                 return libbpf_err_ptr(-EINVAL);
11041         if (pattern && (addrs || syms || cookies || cnt))
11042                 return libbpf_err_ptr(-EINVAL);
11043         if (!pattern && !cnt)
11044                 return libbpf_err_ptr(-EINVAL);
11045         if (addrs && syms)
11046                 return libbpf_err_ptr(-EINVAL);
11047
11048         if (pattern) {
11049                 if (has_available_filter_functions_addrs())
11050                         err = libbpf_available_kprobes_parse(&res);
11051                 else
11052                         err = libbpf_available_kallsyms_parse(&res);
11053                 if (err)
11054                         goto error;
11055                 addrs = res.addrs;
11056                 cnt = res.cnt;
11057         }
11058
11059         retprobe = OPTS_GET(opts, retprobe, false);
11060
11061         lopts.kprobe_multi.syms = syms;
11062         lopts.kprobe_multi.addrs = addrs;
11063         lopts.kprobe_multi.cookies = cookies;
11064         lopts.kprobe_multi.cnt = cnt;
11065         lopts.kprobe_multi.flags = retprobe ? BPF_F_KPROBE_MULTI_RETURN : 0;
11066
11067         link = calloc(1, sizeof(*link));
11068         if (!link) {
11069                 err = -ENOMEM;
11070                 goto error;
11071         }
11072         link->detach = &bpf_link__detach_fd;
11073
11074         prog_fd = bpf_program__fd(prog);
11075         link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &lopts);
11076         if (link_fd < 0) {
11077                 err = -errno;
11078                 pr_warn("prog '%s': failed to attach: %s\n",
11079                         prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11080                 goto error;
11081         }
11082         link->fd = link_fd;
11083         free(res.addrs);
11084         return link;
11085
11086 error:
11087         free(link);
11088         free(res.addrs);
11089         return libbpf_err_ptr(err);
11090 }
11091
11092 static int attach_kprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11093 {
11094         DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
11095         unsigned long offset = 0;
11096         const char *func_name;
11097         char *func;
11098         int n;
11099
11100         *link = NULL;
11101
11102         /* no auto-attach for SEC("kprobe") and SEC("kretprobe") */
11103         if (strcmp(prog->sec_name, "kprobe") == 0 || strcmp(prog->sec_name, "kretprobe") == 0)
11104                 return 0;
11105
11106         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe/");
11107         if (opts.retprobe)
11108                 func_name = prog->sec_name + sizeof("kretprobe/") - 1;
11109         else
11110                 func_name = prog->sec_name + sizeof("kprobe/") - 1;
11111
11112         n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
11113         if (n < 1) {
11114                 pr_warn("kprobe name is invalid: %s\n", func_name);
11115                 return -EINVAL;
11116         }
11117         if (opts.retprobe && offset != 0) {
11118                 free(func);
11119                 pr_warn("kretprobes do not support offset specification\n");
11120                 return -EINVAL;
11121         }
11122
11123         opts.offset = offset;
11124         *link = bpf_program__attach_kprobe_opts(prog, func, &opts);
11125         free(func);
11126         return libbpf_get_error(*link);
11127 }
11128
11129 static int attach_ksyscall(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11130 {
11131         LIBBPF_OPTS(bpf_ksyscall_opts, opts);
11132         const char *syscall_name;
11133
11134         *link = NULL;
11135
11136         /* no auto-attach for SEC("ksyscall") and SEC("kretsyscall") */
11137         if (strcmp(prog->sec_name, "ksyscall") == 0 || strcmp(prog->sec_name, "kretsyscall") == 0)
11138                 return 0;
11139
11140         opts.retprobe = str_has_pfx(prog->sec_name, "kretsyscall/");
11141         if (opts.retprobe)
11142                 syscall_name = prog->sec_name + sizeof("kretsyscall/") - 1;
11143         else
11144                 syscall_name = prog->sec_name + sizeof("ksyscall/") - 1;
11145
11146         *link = bpf_program__attach_ksyscall(prog, syscall_name, &opts);
11147         return *link ? 0 : -errno;
11148 }
11149
11150 static int attach_kprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11151 {
11152         LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
11153         const char *spec;
11154         char *pattern;
11155         int n;
11156
11157         *link = NULL;
11158
11159         /* no auto-attach for SEC("kprobe.multi") and SEC("kretprobe.multi") */
11160         if (strcmp(prog->sec_name, "kprobe.multi") == 0 ||
11161             strcmp(prog->sec_name, "kretprobe.multi") == 0)
11162                 return 0;
11163
11164         opts.retprobe = str_has_pfx(prog->sec_name, "kretprobe.multi/");
11165         if (opts.retprobe)
11166                 spec = prog->sec_name + sizeof("kretprobe.multi/") - 1;
11167         else
11168                 spec = prog->sec_name + sizeof("kprobe.multi/") - 1;
11169
11170         n = sscanf(spec, "%m[a-zA-Z0-9_.*?]", &pattern);
11171         if (n < 1) {
11172                 pr_warn("kprobe multi pattern is invalid: %s\n", pattern);
11173                 return -EINVAL;
11174         }
11175
11176         *link = bpf_program__attach_kprobe_multi_opts(prog, pattern, &opts);
11177         free(pattern);
11178         return libbpf_get_error(*link);
11179 }
11180
11181 static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11182 {
11183         char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
11184         LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
11185         int n, ret = -EINVAL;
11186
11187         *link = NULL;
11188
11189         n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11190                    &probe_type, &binary_path, &func_name);
11191         switch (n) {
11192         case 1:
11193                 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11194                 ret = 0;
11195                 break;
11196         case 3:
11197                 opts.retprobe = strcmp(probe_type, "uretprobe.multi") == 0;
11198                 *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts);
11199                 ret = libbpf_get_error(*link);
11200                 break;
11201         default:
11202                 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11203                         prog->sec_name);
11204                 break;
11205         }
11206         free(probe_type);
11207         free(binary_path);
11208         free(func_name);
11209         return ret;
11210 }
11211
11212 static void gen_uprobe_legacy_event_name(char *buf, size_t buf_sz,
11213                                          const char *binary_path, uint64_t offset)
11214 {
11215         int i;
11216
11217         snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), binary_path, (size_t)offset);
11218
11219         /* sanitize binary_path in the probe name */
11220         for (i = 0; buf[i]; i++) {
11221                 if (!isalnum(buf[i]))
11222                         buf[i] = '_';
11223         }
11224 }
11225
11226 static inline int add_uprobe_event_legacy(const char *probe_name, bool retprobe,
11227                                           const char *binary_path, size_t offset)
11228 {
11229         return append_to_file(tracefs_uprobe_events(), "%c:%s/%s %s:0x%zx",
11230                               retprobe ? 'r' : 'p',
11231                               retprobe ? "uretprobes" : "uprobes",
11232                               probe_name, binary_path, offset);
11233 }
11234
11235 static inline int remove_uprobe_event_legacy(const char *probe_name, bool retprobe)
11236 {
11237         return append_to_file(tracefs_uprobe_events(), "-:%s/%s",
11238                               retprobe ? "uretprobes" : "uprobes", probe_name);
11239 }
11240
11241 static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retprobe)
11242 {
11243         char file[512];
11244
11245         snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11246                  tracefs_path(), retprobe ? "uretprobes" : "uprobes", probe_name);
11247
11248         return parse_uint_from_file(file, "%d\n");
11249 }
11250
11251 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
11252                                          const char *binary_path, size_t offset, int pid)
11253 {
11254         const size_t attr_sz = sizeof(struct perf_event_attr);
11255         struct perf_event_attr attr;
11256         int type, pfd, err;
11257
11258         err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset);
11259         if (err < 0) {
11260                 pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n",
11261                         binary_path, (size_t)offset, err);
11262                 return err;
11263         }
11264         type = determine_uprobe_perf_type_legacy(probe_name, retprobe);
11265         if (type < 0) {
11266                 err = type;
11267                 pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n",
11268                         binary_path, offset, err);
11269                 goto err_clean_legacy;
11270         }
11271
11272         memset(&attr, 0, attr_sz);
11273         attr.size = attr_sz;
11274         attr.config = type;
11275         attr.type = PERF_TYPE_TRACEPOINT;
11276
11277         pfd = syscall(__NR_perf_event_open, &attr,
11278                       pid < 0 ? -1 : pid, /* pid */
11279                       pid == -1 ? 0 : -1, /* cpu */
11280                       -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
11281         if (pfd < 0) {
11282                 err = -errno;
11283                 pr_warn("legacy uprobe perf_event_open() failed: %d\n", err);
11284                 goto err_clean_legacy;
11285         }
11286         return pfd;
11287
11288 err_clean_legacy:
11289         /* Clear the newly added legacy uprobe_event */
11290         remove_uprobe_event_legacy(probe_name, retprobe);
11291         return err;
11292 }
11293
11294 /* Find offset of function name in archive specified by path. Currently
11295  * supported are .zip files that do not compress their contents, as used on
11296  * Android in the form of APKs, for example. "file_name" is the name of the ELF
11297  * file inside the archive. "func_name" matches symbol name or name@@LIB for
11298  * library functions.
11299  *
11300  * An overview of the APK format specifically provided here:
11301  * https://en.wikipedia.org/w/index.php?title=Apk_(file_format)&oldid=1139099120#Package_contents
11302  */
11303 static long elf_find_func_offset_from_archive(const char *archive_path, const char *file_name,
11304                                               const char *func_name)
11305 {
11306         struct zip_archive *archive;
11307         struct zip_entry entry;
11308         long ret;
11309         Elf *elf;
11310
11311         archive = zip_archive_open(archive_path);
11312         if (IS_ERR(archive)) {
11313                 ret = PTR_ERR(archive);
11314                 pr_warn("zip: failed to open %s: %ld\n", archive_path, ret);
11315                 return ret;
11316         }
11317
11318         ret = zip_archive_find_entry(archive, file_name, &entry);
11319         if (ret) {
11320                 pr_warn("zip: could not find archive member %s in %s: %ld\n", file_name,
11321                         archive_path, ret);
11322                 goto out;
11323         }
11324         pr_debug("zip: found entry for %s in %s at 0x%lx\n", file_name, archive_path,
11325                  (unsigned long)entry.data_offset);
11326
11327         if (entry.compression) {
11328                 pr_warn("zip: entry %s of %s is compressed and cannot be handled\n", file_name,
11329                         archive_path);
11330                 ret = -LIBBPF_ERRNO__FORMAT;
11331                 goto out;
11332         }
11333
11334         elf = elf_memory((void *)entry.data, entry.data_length);
11335         if (!elf) {
11336                 pr_warn("elf: could not read elf file %s from %s: %s\n", file_name, archive_path,
11337                         elf_errmsg(-1));
11338                 ret = -LIBBPF_ERRNO__LIBELF;
11339                 goto out;
11340         }
11341
11342         ret = elf_find_func_offset(elf, file_name, func_name);
11343         if (ret > 0) {
11344                 pr_debug("elf: symbol address match for %s of %s in %s: 0x%x + 0x%lx = 0x%lx\n",
11345                          func_name, file_name, archive_path, entry.data_offset, ret,
11346                          ret + entry.data_offset);
11347                 ret += entry.data_offset;
11348         }
11349         elf_end(elf);
11350
11351 out:
11352         zip_archive_close(archive);
11353         return ret;
11354 }
11355
11356 static const char *arch_specific_lib_paths(void)
11357 {
11358         /*
11359          * Based on https://packages.debian.org/sid/libc6.
11360          *
11361          * Assume that the traced program is built for the same architecture
11362          * as libbpf, which should cover the vast majority of cases.
11363          */
11364 #if defined(__x86_64__)
11365         return "/lib/x86_64-linux-gnu";
11366 #elif defined(__i386__)
11367         return "/lib/i386-linux-gnu";
11368 #elif defined(__s390x__)
11369         return "/lib/s390x-linux-gnu";
11370 #elif defined(__s390__)
11371         return "/lib/s390-linux-gnu";
11372 #elif defined(__arm__) && defined(__SOFTFP__)
11373         return "/lib/arm-linux-gnueabi";
11374 #elif defined(__arm__) && !defined(__SOFTFP__)
11375         return "/lib/arm-linux-gnueabihf";
11376 #elif defined(__aarch64__)
11377         return "/lib/aarch64-linux-gnu";
11378 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 64
11379         return "/lib/mips64el-linux-gnuabi64";
11380 #elif defined(__mips__) && defined(__MIPSEL__) && _MIPS_SZLONG == 32
11381         return "/lib/mipsel-linux-gnu";
11382 #elif defined(__powerpc64__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
11383         return "/lib/powerpc64le-linux-gnu";
11384 #elif defined(__sparc__) && defined(__arch64__)
11385         return "/lib/sparc64-linux-gnu";
11386 #elif defined(__riscv) && __riscv_xlen == 64
11387         return "/lib/riscv64-linux-gnu";
11388 #else
11389         return NULL;
11390 #endif
11391 }
11392
11393 /* Get full path to program/shared library. */
11394 static int resolve_full_path(const char *file, char *result, size_t result_sz)
11395 {
11396         const char *search_paths[3] = {};
11397         int i, perm;
11398
11399         if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
11400                 search_paths[0] = getenv("LD_LIBRARY_PATH");
11401                 search_paths[1] = "/usr/lib64:/usr/lib";
11402                 search_paths[2] = arch_specific_lib_paths();
11403                 perm = R_OK;
11404         } else {
11405                 search_paths[0] = getenv("PATH");
11406                 search_paths[1] = "/usr/bin:/usr/sbin";
11407                 perm = R_OK | X_OK;
11408         }
11409
11410         for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
11411                 const char *s;
11412
11413                 if (!search_paths[i])
11414                         continue;
11415                 for (s = search_paths[i]; s != NULL; s = strchr(s, ':')) {
11416                         char *next_path;
11417                         int seg_len;
11418
11419                         if (s[0] == ':')
11420                                 s++;
11421                         next_path = strchr(s, ':');
11422                         seg_len = next_path ? next_path - s : strlen(s);
11423                         if (!seg_len)
11424                                 continue;
11425                         snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
11426                         /* ensure it has required permissions */
11427                         if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
11428                                 continue;
11429                         pr_debug("resolved '%s' to '%s'\n", file, result);
11430                         return 0;
11431                 }
11432         }
11433         return -ENOENT;
11434 }
11435
11436 struct bpf_link *
11437 bpf_program__attach_uprobe_multi(const struct bpf_program *prog,
11438                                  pid_t pid,
11439                                  const char *path,
11440                                  const char *func_pattern,
11441                                  const struct bpf_uprobe_multi_opts *opts)
11442 {
11443         const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL;
11444         LIBBPF_OPTS(bpf_link_create_opts, lopts);
11445         unsigned long *resolved_offsets = NULL;
11446         int err = 0, link_fd, prog_fd;
11447         struct bpf_link *link = NULL;
11448         char errmsg[STRERR_BUFSIZE];
11449         char full_path[PATH_MAX];
11450         const __u64 *cookies;
11451         const char **syms;
11452         size_t cnt;
11453
11454         if (!OPTS_VALID(opts, bpf_uprobe_multi_opts))
11455                 return libbpf_err_ptr(-EINVAL);
11456
11457         syms = OPTS_GET(opts, syms, NULL);
11458         offsets = OPTS_GET(opts, offsets, NULL);
11459         ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL);
11460         cookies = OPTS_GET(opts, cookies, NULL);
11461         cnt = OPTS_GET(opts, cnt, 0);
11462
11463         /*
11464          * User can specify 2 mutually exclusive set of inputs:
11465          *
11466          * 1) use only path/func_pattern/pid arguments
11467          *
11468          * 2) use path/pid with allowed combinations of:
11469          *    syms/offsets/ref_ctr_offsets/cookies/cnt
11470          *
11471          *    - syms and offsets are mutually exclusive
11472          *    - ref_ctr_offsets and cookies are optional
11473          *
11474          * Any other usage results in error.
11475          */
11476
11477         if (!path)
11478                 return libbpf_err_ptr(-EINVAL);
11479         if (!func_pattern && cnt == 0)
11480                 return libbpf_err_ptr(-EINVAL);
11481
11482         if (func_pattern) {
11483                 if (syms || offsets || ref_ctr_offsets || cookies || cnt)
11484                         return libbpf_err_ptr(-EINVAL);
11485         } else {
11486                 if (!!syms == !!offsets)
11487                         return libbpf_err_ptr(-EINVAL);
11488         }
11489
11490         if (func_pattern) {
11491                 if (!strchr(path, '/')) {
11492                         err = resolve_full_path(path, full_path, sizeof(full_path));
11493                         if (err) {
11494                                 pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11495                                         prog->name, path, err);
11496                                 return libbpf_err_ptr(err);
11497                         }
11498                         path = full_path;
11499                 }
11500
11501                 err = elf_resolve_pattern_offsets(path, func_pattern,
11502                                                   &resolved_offsets, &cnt);
11503                 if (err < 0)
11504                         return libbpf_err_ptr(err);
11505                 offsets = resolved_offsets;
11506         } else if (syms) {
11507                 err = elf_resolve_syms_offsets(path, cnt, syms, &resolved_offsets, STT_FUNC);
11508                 if (err < 0)
11509                         return libbpf_err_ptr(err);
11510                 offsets = resolved_offsets;
11511         }
11512
11513         lopts.uprobe_multi.path = path;
11514         lopts.uprobe_multi.offsets = offsets;
11515         lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets;
11516         lopts.uprobe_multi.cookies = cookies;
11517         lopts.uprobe_multi.cnt = cnt;
11518         lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0;
11519
11520         if (pid == 0)
11521                 pid = getpid();
11522         if (pid > 0)
11523                 lopts.uprobe_multi.pid = pid;
11524
11525         link = calloc(1, sizeof(*link));
11526         if (!link) {
11527                 err = -ENOMEM;
11528                 goto error;
11529         }
11530         link->detach = &bpf_link__detach_fd;
11531
11532         prog_fd = bpf_program__fd(prog);
11533         link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts);
11534         if (link_fd < 0) {
11535                 err = -errno;
11536                 pr_warn("prog '%s': failed to attach multi-uprobe: %s\n",
11537                         prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11538                 goto error;
11539         }
11540         link->fd = link_fd;
11541         free(resolved_offsets);
11542         return link;
11543
11544 error:
11545         free(resolved_offsets);
11546         free(link);
11547         return libbpf_err_ptr(err);
11548 }
11549
11550 LIBBPF_API struct bpf_link *
11551 bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
11552                                 const char *binary_path, size_t func_offset,
11553                                 const struct bpf_uprobe_opts *opts)
11554 {
11555         const char *archive_path = NULL, *archive_sep = NULL;
11556         char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL;
11557         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11558         enum probe_attach_mode attach_mode;
11559         char full_path[PATH_MAX];
11560         struct bpf_link *link;
11561         size_t ref_ctr_off;
11562         int pfd, err;
11563         bool retprobe, legacy;
11564         const char *func_name;
11565
11566         if (!OPTS_VALID(opts, bpf_uprobe_opts))
11567                 return libbpf_err_ptr(-EINVAL);
11568
11569         attach_mode = OPTS_GET(opts, attach_mode, PROBE_ATTACH_MODE_DEFAULT);
11570         retprobe = OPTS_GET(opts, retprobe, false);
11571         ref_ctr_off = OPTS_GET(opts, ref_ctr_offset, 0);
11572         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11573
11574         if (!binary_path)
11575                 return libbpf_err_ptr(-EINVAL);
11576
11577         /* Check if "binary_path" refers to an archive. */
11578         archive_sep = strstr(binary_path, "!/");
11579         if (archive_sep) {
11580                 full_path[0] = '\0';
11581                 libbpf_strlcpy(full_path, binary_path,
11582                                min(sizeof(full_path), (size_t)(archive_sep - binary_path + 1)));
11583                 archive_path = full_path;
11584                 binary_path = archive_sep + 2;
11585         } else if (!strchr(binary_path, '/')) {
11586                 err = resolve_full_path(binary_path, full_path, sizeof(full_path));
11587                 if (err) {
11588                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11589                                 prog->name, binary_path, err);
11590                         return libbpf_err_ptr(err);
11591                 }
11592                 binary_path = full_path;
11593         }
11594         func_name = OPTS_GET(opts, func_name, NULL);
11595         if (func_name) {
11596                 long sym_off;
11597
11598                 if (archive_path) {
11599                         sym_off = elf_find_func_offset_from_archive(archive_path, binary_path,
11600                                                                     func_name);
11601                         binary_path = archive_path;
11602                 } else {
11603                         sym_off = elf_find_func_offset_from_file(binary_path, func_name);
11604                 }
11605                 if (sym_off < 0)
11606                         return libbpf_err_ptr(sym_off);
11607                 func_offset += sym_off;
11608         }
11609
11610         legacy = determine_uprobe_perf_type() < 0;
11611         switch (attach_mode) {
11612         case PROBE_ATTACH_MODE_LEGACY:
11613                 legacy = true;
11614                 pe_opts.force_ioctl_attach = true;
11615                 break;
11616         case PROBE_ATTACH_MODE_PERF:
11617                 if (legacy)
11618                         return libbpf_err_ptr(-ENOTSUP);
11619                 pe_opts.force_ioctl_attach = true;
11620                 break;
11621         case PROBE_ATTACH_MODE_LINK:
11622                 if (legacy || !kernel_supports(prog->obj, FEAT_PERF_LINK))
11623                         return libbpf_err_ptr(-ENOTSUP);
11624                 break;
11625         case PROBE_ATTACH_MODE_DEFAULT:
11626                 break;
11627         default:
11628                 return libbpf_err_ptr(-EINVAL);
11629         }
11630
11631         if (!legacy) {
11632                 pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path,
11633                                             func_offset, pid, ref_ctr_off);
11634         } else {
11635                 char probe_name[PATH_MAX + 64];
11636
11637                 if (ref_ctr_off)
11638                         return libbpf_err_ptr(-EINVAL);
11639
11640                 gen_uprobe_legacy_event_name(probe_name, sizeof(probe_name),
11641                                              binary_path, func_offset);
11642
11643                 legacy_probe = strdup(probe_name);
11644                 if (!legacy_probe)
11645                         return libbpf_err_ptr(-ENOMEM);
11646
11647                 pfd = perf_event_uprobe_open_legacy(legacy_probe, retprobe,
11648                                                     binary_path, func_offset, pid);
11649         }
11650         if (pfd < 0) {
11651                 err = -errno;
11652                 pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n",
11653                         prog->name, retprobe ? "uretprobe" : "uprobe",
11654                         binary_path, func_offset,
11655                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11656                 goto err_out;
11657         }
11658
11659         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11660         err = libbpf_get_error(link);
11661         if (err) {
11662                 close(pfd);
11663                 pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n",
11664                         prog->name, retprobe ? "uretprobe" : "uprobe",
11665                         binary_path, func_offset,
11666                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11667                 goto err_clean_legacy;
11668         }
11669         if (legacy) {
11670                 struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
11671
11672                 perf_link->legacy_probe_name = legacy_probe;
11673                 perf_link->legacy_is_kprobe = false;
11674                 perf_link->legacy_is_retprobe = retprobe;
11675         }
11676         return link;
11677
11678 err_clean_legacy:
11679         if (legacy)
11680                 remove_uprobe_event_legacy(legacy_probe, retprobe);
11681 err_out:
11682         free(legacy_probe);
11683         return libbpf_err_ptr(err);
11684 }
11685
11686 /* Format of u[ret]probe section definition supporting auto-attach:
11687  * u[ret]probe/binary:function[+offset]
11688  *
11689  * binary can be an absolute/relative path or a filename; the latter is resolved to a
11690  * full binary path via bpf_program__attach_uprobe_opts.
11691  *
11692  * Specifying uprobe+ ensures we carry out strict matching; either "uprobe" must be
11693  * specified (and auto-attach is not possible) or the above format is specified for
11694  * auto-attach.
11695  */
11696 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11697 {
11698         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
11699         char *probe_type = NULL, *binary_path = NULL, *func_name = NULL, *func_off;
11700         int n, c, ret = -EINVAL;
11701         long offset = 0;
11702
11703         *link = NULL;
11704
11705         n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[^\n]",
11706                    &probe_type, &binary_path, &func_name);
11707         switch (n) {
11708         case 1:
11709                 /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
11710                 ret = 0;
11711                 break;
11712         case 2:
11713                 pr_warn("prog '%s': section '%s' missing ':function[+offset]' specification\n",
11714                         prog->name, prog->sec_name);
11715                 break;
11716         case 3:
11717                 /* check if user specifies `+offset`, if yes, this should be
11718                  * the last part of the string, make sure sscanf read to EOL
11719                  */
11720                 func_off = strrchr(func_name, '+');
11721                 if (func_off) {
11722                         n = sscanf(func_off, "+%li%n", &offset, &c);
11723                         if (n == 1 && *(func_off + c) == '\0')
11724                                 func_off[0] = '\0';
11725                         else
11726                                 offset = 0;
11727                 }
11728                 opts.retprobe = strcmp(probe_type, "uretprobe") == 0 ||
11729                                 strcmp(probe_type, "uretprobe.s") == 0;
11730                 if (opts.retprobe && offset != 0) {
11731                         pr_warn("prog '%s': uretprobes do not support offset specification\n",
11732                                 prog->name);
11733                         break;
11734                 }
11735                 opts.func_name = func_name;
11736                 *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
11737                 ret = libbpf_get_error(*link);
11738                 break;
11739         default:
11740                 pr_warn("prog '%s': invalid format of section definition '%s'\n", prog->name,
11741                         prog->sec_name);
11742                 break;
11743         }
11744         free(probe_type);
11745         free(binary_path);
11746         free(func_name);
11747
11748         return ret;
11749 }
11750
11751 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
11752                                             bool retprobe, pid_t pid,
11753                                             const char *binary_path,
11754                                             size_t func_offset)
11755 {
11756         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, .retprobe = retprobe);
11757
11758         return bpf_program__attach_uprobe_opts(prog, pid, binary_path, func_offset, &opts);
11759 }
11760
11761 struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog,
11762                                           pid_t pid, const char *binary_path,
11763                                           const char *usdt_provider, const char *usdt_name,
11764                                           const struct bpf_usdt_opts *opts)
11765 {
11766         char resolved_path[512];
11767         struct bpf_object *obj = prog->obj;
11768         struct bpf_link *link;
11769         __u64 usdt_cookie;
11770         int err;
11771
11772         if (!OPTS_VALID(opts, bpf_uprobe_opts))
11773                 return libbpf_err_ptr(-EINVAL);
11774
11775         if (bpf_program__fd(prog) < 0) {
11776                 pr_warn("prog '%s': can't attach BPF program w/o FD (did you load it?)\n",
11777                         prog->name);
11778                 return libbpf_err_ptr(-EINVAL);
11779         }
11780
11781         if (!binary_path)
11782                 return libbpf_err_ptr(-EINVAL);
11783
11784         if (!strchr(binary_path, '/')) {
11785                 err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path));
11786                 if (err) {
11787                         pr_warn("prog '%s': failed to resolve full path for '%s': %d\n",
11788                                 prog->name, binary_path, err);
11789                         return libbpf_err_ptr(err);
11790                 }
11791                 binary_path = resolved_path;
11792         }
11793
11794         /* USDT manager is instantiated lazily on first USDT attach. It will
11795          * be destroyed together with BPF object in bpf_object__close().
11796          */
11797         if (IS_ERR(obj->usdt_man))
11798                 return libbpf_ptr(obj->usdt_man);
11799         if (!obj->usdt_man) {
11800                 obj->usdt_man = usdt_manager_new(obj);
11801                 if (IS_ERR(obj->usdt_man))
11802                         return libbpf_ptr(obj->usdt_man);
11803         }
11804
11805         usdt_cookie = OPTS_GET(opts, usdt_cookie, 0);
11806         link = usdt_manager_attach_usdt(obj->usdt_man, prog, pid, binary_path,
11807                                         usdt_provider, usdt_name, usdt_cookie);
11808         err = libbpf_get_error(link);
11809         if (err)
11810                 return libbpf_err_ptr(err);
11811         return link;
11812 }
11813
11814 static int attach_usdt(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11815 {
11816         char *path = NULL, *provider = NULL, *name = NULL;
11817         const char *sec_name;
11818         int n, err;
11819
11820         sec_name = bpf_program__section_name(prog);
11821         if (strcmp(sec_name, "usdt") == 0) {
11822                 /* no auto-attach for just SEC("usdt") */
11823                 *link = NULL;
11824                 return 0;
11825         }
11826
11827         n = sscanf(sec_name, "usdt/%m[^:]:%m[^:]:%m[^:]", &path, &provider, &name);
11828         if (n != 3) {
11829                 pr_warn("invalid section '%s', expected SEC(\"usdt/<path>:<provider>:<name>\")\n",
11830                         sec_name);
11831                 err = -EINVAL;
11832         } else {
11833                 *link = bpf_program__attach_usdt(prog, -1 /* any process */, path,
11834                                                  provider, name, NULL);
11835                 err = libbpf_get_error(*link);
11836         }
11837         free(path);
11838         free(provider);
11839         free(name);
11840         return err;
11841 }
11842
11843 static int determine_tracepoint_id(const char *tp_category,
11844                                    const char *tp_name)
11845 {
11846         char file[PATH_MAX];
11847         int ret;
11848
11849         ret = snprintf(file, sizeof(file), "%s/events/%s/%s/id",
11850                        tracefs_path(), tp_category, tp_name);
11851         if (ret < 0)
11852                 return -errno;
11853         if (ret >= sizeof(file)) {
11854                 pr_debug("tracepoint %s/%s path is too long\n",
11855                          tp_category, tp_name);
11856                 return -E2BIG;
11857         }
11858         return parse_uint_from_file(file, "%d\n");
11859 }
11860
11861 static int perf_event_open_tracepoint(const char *tp_category,
11862                                       const char *tp_name)
11863 {
11864         const size_t attr_sz = sizeof(struct perf_event_attr);
11865         struct perf_event_attr attr;
11866         char errmsg[STRERR_BUFSIZE];
11867         int tp_id, pfd, err;
11868
11869         tp_id = determine_tracepoint_id(tp_category, tp_name);
11870         if (tp_id < 0) {
11871                 pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n",
11872                         tp_category, tp_name,
11873                         libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));
11874                 return tp_id;
11875         }
11876
11877         memset(&attr, 0, attr_sz);
11878         attr.type = PERF_TYPE_TRACEPOINT;
11879         attr.size = attr_sz;
11880         attr.config = tp_id;
11881
11882         pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
11883                       -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
11884         if (pfd < 0) {
11885                 err = -errno;
11886                 pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n",
11887                         tp_category, tp_name,
11888                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11889                 return err;
11890         }
11891         return pfd;
11892 }
11893
11894 struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
11895                                                      const char *tp_category,
11896                                                      const char *tp_name,
11897                                                      const struct bpf_tracepoint_opts *opts)
11898 {
11899         DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
11900         char errmsg[STRERR_BUFSIZE];
11901         struct bpf_link *link;
11902         int pfd, err;
11903
11904         if (!OPTS_VALID(opts, bpf_tracepoint_opts))
11905                 return libbpf_err_ptr(-EINVAL);
11906
11907         pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
11908
11909         pfd = perf_event_open_tracepoint(tp_category, tp_name);
11910         if (pfd < 0) {
11911                 pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n",
11912                         prog->name, tp_category, tp_name,
11913                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11914                 return libbpf_err_ptr(pfd);
11915         }
11916         link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts);
11917         err = libbpf_get_error(link);
11918         if (err) {
11919                 close(pfd);
11920                 pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n",
11921                         prog->name, tp_category, tp_name,
11922                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
11923                 return libbpf_err_ptr(err);
11924         }
11925         return link;
11926 }
11927
11928 struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
11929                                                 const char *tp_category,
11930                                                 const char *tp_name)
11931 {
11932         return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
11933 }
11934
11935 static int attach_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11936 {
11937         char *sec_name, *tp_cat, *tp_name;
11938
11939         *link = NULL;
11940
11941         /* no auto-attach for SEC("tp") or SEC("tracepoint") */
11942         if (strcmp(prog->sec_name, "tp") == 0 || strcmp(prog->sec_name, "tracepoint") == 0)
11943                 return 0;
11944
11945         sec_name = strdup(prog->sec_name);
11946         if (!sec_name)
11947                 return -ENOMEM;
11948
11949         /* extract "tp/<category>/<name>" or "tracepoint/<category>/<name>" */
11950         if (str_has_pfx(prog->sec_name, "tp/"))
11951                 tp_cat = sec_name + sizeof("tp/") - 1;
11952         else
11953                 tp_cat = sec_name + sizeof("tracepoint/") - 1;
11954         tp_name = strchr(tp_cat, '/');
11955         if (!tp_name) {
11956                 free(sec_name);
11957                 return -EINVAL;
11958         }
11959         *tp_name = '\0';
11960         tp_name++;
11961
11962         *link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name);
11963         free(sec_name);
11964         return libbpf_get_error(*link);
11965 }
11966
11967 struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
11968                                                     const char *tp_name)
11969 {
11970         char errmsg[STRERR_BUFSIZE];
11971         struct bpf_link *link;
11972         int prog_fd, pfd;
11973
11974         prog_fd = bpf_program__fd(prog);
11975         if (prog_fd < 0) {
11976                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
11977                 return libbpf_err_ptr(-EINVAL);
11978         }
11979
11980         link = calloc(1, sizeof(*link));
11981         if (!link)
11982                 return libbpf_err_ptr(-ENOMEM);
11983         link->detach = &bpf_link__detach_fd;
11984
11985         pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
11986         if (pfd < 0) {
11987                 pfd = -errno;
11988                 free(link);
11989                 pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n",
11990                         prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
11991                 return libbpf_err_ptr(pfd);
11992         }
11993         link->fd = pfd;
11994         return link;
11995 }
11996
11997 static int attach_raw_tp(const struct bpf_program *prog, long cookie, struct bpf_link **link)
11998 {
11999         static const char *const prefixes[] = {
12000                 "raw_tp",
12001                 "raw_tracepoint",
12002                 "raw_tp.w",
12003                 "raw_tracepoint.w",
12004         };
12005         size_t i;
12006         const char *tp_name = NULL;
12007
12008         *link = NULL;
12009
12010         for (i = 0; i < ARRAY_SIZE(prefixes); i++) {
12011                 size_t pfx_len;
12012
12013                 if (!str_has_pfx(prog->sec_name, prefixes[i]))
12014                         continue;
12015
12016                 pfx_len = strlen(prefixes[i]);
12017                 /* no auto-attach case of, e.g., SEC("raw_tp") */
12018                 if (prog->sec_name[pfx_len] == '\0')
12019                         return 0;
12020
12021                 if (prog->sec_name[pfx_len] != '/')
12022                         continue;
12023
12024                 tp_name = prog->sec_name + pfx_len + 1;
12025                 break;
12026         }
12027
12028         if (!tp_name) {
12029                 pr_warn("prog '%s': invalid section name '%s'\n",
12030                         prog->name, prog->sec_name);
12031                 return -EINVAL;
12032         }
12033
12034         *link = bpf_program__attach_raw_tracepoint(prog, tp_name);
12035         return libbpf_get_error(*link);
12036 }
12037
12038 /* Common logic for all BPF program types that attach to a btf_id */
12039 static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog,
12040                                                    const struct bpf_trace_opts *opts)
12041 {
12042         LIBBPF_OPTS(bpf_link_create_opts, link_opts);
12043         char errmsg[STRERR_BUFSIZE];
12044         struct bpf_link *link;
12045         int prog_fd, pfd;
12046
12047         if (!OPTS_VALID(opts, bpf_trace_opts))
12048                 return libbpf_err_ptr(-EINVAL);
12049
12050         prog_fd = bpf_program__fd(prog);
12051         if (prog_fd < 0) {
12052                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12053                 return libbpf_err_ptr(-EINVAL);
12054         }
12055
12056         link = calloc(1, sizeof(*link));
12057         if (!link)
12058                 return libbpf_err_ptr(-ENOMEM);
12059         link->detach = &bpf_link__detach_fd;
12060
12061         /* libbpf is smart enough to redirect to BPF_RAW_TRACEPOINT_OPEN on old kernels */
12062         link_opts.tracing.cookie = OPTS_GET(opts, cookie, 0);
12063         pfd = bpf_link_create(prog_fd, 0, bpf_program__expected_attach_type(prog), &link_opts);
12064         if (pfd < 0) {
12065                 pfd = -errno;
12066                 free(link);
12067                 pr_warn("prog '%s': failed to attach: %s\n",
12068                         prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
12069                 return libbpf_err_ptr(pfd);
12070         }
12071         link->fd = pfd;
12072         return link;
12073 }
12074
12075 struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
12076 {
12077         return bpf_program__attach_btf_id(prog, NULL);
12078 }
12079
12080 struct bpf_link *bpf_program__attach_trace_opts(const struct bpf_program *prog,
12081                                                 const struct bpf_trace_opts *opts)
12082 {
12083         return bpf_program__attach_btf_id(prog, opts);
12084 }
12085
12086 struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
12087 {
12088         return bpf_program__attach_btf_id(prog, NULL);
12089 }
12090
12091 static int attach_trace(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12092 {
12093         *link = bpf_program__attach_trace(prog);
12094         return libbpf_get_error(*link);
12095 }
12096
12097 static int attach_lsm(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12098 {
12099         *link = bpf_program__attach_lsm(prog);
12100         return libbpf_get_error(*link);
12101 }
12102
12103 static struct bpf_link *
12104 bpf_program_attach_fd(const struct bpf_program *prog,
12105                       int target_fd, const char *target_name,
12106                       const struct bpf_link_create_opts *opts)
12107 {
12108         enum bpf_attach_type attach_type;
12109         char errmsg[STRERR_BUFSIZE];
12110         struct bpf_link *link;
12111         int prog_fd, link_fd;
12112
12113         prog_fd = bpf_program__fd(prog);
12114         if (prog_fd < 0) {
12115                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12116                 return libbpf_err_ptr(-EINVAL);
12117         }
12118
12119         link = calloc(1, sizeof(*link));
12120         if (!link)
12121                 return libbpf_err_ptr(-ENOMEM);
12122         link->detach = &bpf_link__detach_fd;
12123
12124         attach_type = bpf_program__expected_attach_type(prog);
12125         link_fd = bpf_link_create(prog_fd, target_fd, attach_type, opts);
12126         if (link_fd < 0) {
12127                 link_fd = -errno;
12128                 free(link);
12129                 pr_warn("prog '%s': failed to attach to %s: %s\n",
12130                         prog->name, target_name,
12131                         libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12132                 return libbpf_err_ptr(link_fd);
12133         }
12134         link->fd = link_fd;
12135         return link;
12136 }
12137
12138 struct bpf_link *
12139 bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
12140 {
12141         return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", NULL);
12142 }
12143
12144 struct bpf_link *
12145 bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
12146 {
12147         return bpf_program_attach_fd(prog, netns_fd, "netns", NULL);
12148 }
12149
12150 struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
12151 {
12152         /* target_fd/target_ifindex use the same field in LINK_CREATE */
12153         return bpf_program_attach_fd(prog, ifindex, "xdp", NULL);
12154 }
12155
12156 struct bpf_link *
12157 bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex,
12158                         const struct bpf_tcx_opts *opts)
12159 {
12160         LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12161         __u32 relative_id;
12162         int relative_fd;
12163
12164         if (!OPTS_VALID(opts, bpf_tcx_opts))
12165                 return libbpf_err_ptr(-EINVAL);
12166
12167         relative_id = OPTS_GET(opts, relative_id, 0);
12168         relative_fd = OPTS_GET(opts, relative_fd, 0);
12169
12170         /* validate we don't have unexpected combinations of non-zero fields */
12171         if (!ifindex) {
12172                 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12173                         prog->name);
12174                 return libbpf_err_ptr(-EINVAL);
12175         }
12176         if (relative_fd && relative_id) {
12177                 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12178                         prog->name);
12179                 return libbpf_err_ptr(-EINVAL);
12180         }
12181
12182         link_create_opts.tcx.expected_revision = OPTS_GET(opts, expected_revision, 0);
12183         link_create_opts.tcx.relative_fd = relative_fd;
12184         link_create_opts.tcx.relative_id = relative_id;
12185         link_create_opts.flags = OPTS_GET(opts, flags, 0);
12186
12187         /* target_fd/target_ifindex use the same field in LINK_CREATE */
12188         return bpf_program_attach_fd(prog, ifindex, "tcx", &link_create_opts);
12189 }
12190
12191 struct bpf_link *
12192 bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex,
12193                            const struct bpf_netkit_opts *opts)
12194 {
12195         LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12196         __u32 relative_id;
12197         int relative_fd;
12198
12199         if (!OPTS_VALID(opts, bpf_netkit_opts))
12200                 return libbpf_err_ptr(-EINVAL);
12201
12202         relative_id = OPTS_GET(opts, relative_id, 0);
12203         relative_fd = OPTS_GET(opts, relative_fd, 0);
12204
12205         /* validate we don't have unexpected combinations of non-zero fields */
12206         if (!ifindex) {
12207                 pr_warn("prog '%s': target netdevice ifindex cannot be zero\n",
12208                         prog->name);
12209                 return libbpf_err_ptr(-EINVAL);
12210         }
12211         if (relative_fd && relative_id) {
12212                 pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n",
12213                         prog->name);
12214                 return libbpf_err_ptr(-EINVAL);
12215         }
12216
12217         link_create_opts.netkit.expected_revision = OPTS_GET(opts, expected_revision, 0);
12218         link_create_opts.netkit.relative_fd = relative_fd;
12219         link_create_opts.netkit.relative_id = relative_id;
12220         link_create_opts.flags = OPTS_GET(opts, flags, 0);
12221
12222         return bpf_program_attach_fd(prog, ifindex, "netkit", &link_create_opts);
12223 }
12224
12225 struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
12226                                               int target_fd,
12227                                               const char *attach_func_name)
12228 {
12229         int btf_id;
12230
12231         if (!!target_fd != !!attach_func_name) {
12232                 pr_warn("prog '%s': supply none or both of target_fd and attach_func_name\n",
12233                         prog->name);
12234                 return libbpf_err_ptr(-EINVAL);
12235         }
12236
12237         if (prog->type != BPF_PROG_TYPE_EXT) {
12238                 pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace",
12239                         prog->name);
12240                 return libbpf_err_ptr(-EINVAL);
12241         }
12242
12243         if (target_fd) {
12244                 LIBBPF_OPTS(bpf_link_create_opts, target_opts);
12245
12246                 btf_id = libbpf_find_prog_btf_id(attach_func_name, target_fd);
12247                 if (btf_id < 0)
12248                         return libbpf_err_ptr(btf_id);
12249
12250                 target_opts.target_btf_id = btf_id;
12251
12252                 return bpf_program_attach_fd(prog, target_fd, "freplace",
12253                                              &target_opts);
12254         } else {
12255                 /* no target, so use raw_tracepoint_open for compatibility
12256                  * with old kernels
12257                  */
12258                 return bpf_program__attach_trace(prog);
12259         }
12260 }
12261
12262 struct bpf_link *
12263 bpf_program__attach_iter(const struct bpf_program *prog,
12264                          const struct bpf_iter_attach_opts *opts)
12265 {
12266         DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
12267         char errmsg[STRERR_BUFSIZE];
12268         struct bpf_link *link;
12269         int prog_fd, link_fd;
12270         __u32 target_fd = 0;
12271
12272         if (!OPTS_VALID(opts, bpf_iter_attach_opts))
12273                 return libbpf_err_ptr(-EINVAL);
12274
12275         link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
12276         link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
12277
12278         prog_fd = bpf_program__fd(prog);
12279         if (prog_fd < 0) {
12280                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12281                 return libbpf_err_ptr(-EINVAL);
12282         }
12283
12284         link = calloc(1, sizeof(*link));
12285         if (!link)
12286                 return libbpf_err_ptr(-ENOMEM);
12287         link->detach = &bpf_link__detach_fd;
12288
12289         link_fd = bpf_link_create(prog_fd, target_fd, BPF_TRACE_ITER,
12290                                   &link_create_opts);
12291         if (link_fd < 0) {
12292                 link_fd = -errno;
12293                 free(link);
12294                 pr_warn("prog '%s': failed to attach to iterator: %s\n",
12295                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12296                 return libbpf_err_ptr(link_fd);
12297         }
12298         link->fd = link_fd;
12299         return link;
12300 }
12301
12302 static int attach_iter(const struct bpf_program *prog, long cookie, struct bpf_link **link)
12303 {
12304         *link = bpf_program__attach_iter(prog, NULL);
12305         return libbpf_get_error(*link);
12306 }
12307
12308 struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog,
12309                                                const struct bpf_netfilter_opts *opts)
12310 {
12311         LIBBPF_OPTS(bpf_link_create_opts, lopts);
12312         struct bpf_link *link;
12313         int prog_fd, link_fd;
12314
12315         if (!OPTS_VALID(opts, bpf_netfilter_opts))
12316                 return libbpf_err_ptr(-EINVAL);
12317
12318         prog_fd = bpf_program__fd(prog);
12319         if (prog_fd < 0) {
12320                 pr_warn("prog '%s': can't attach before loaded\n", prog->name);
12321                 return libbpf_err_ptr(-EINVAL);
12322         }
12323
12324         link = calloc(1, sizeof(*link));
12325         if (!link)
12326                 return libbpf_err_ptr(-ENOMEM);
12327
12328         link->detach = &bpf_link__detach_fd;
12329
12330         lopts.netfilter.pf = OPTS_GET(opts, pf, 0);
12331         lopts.netfilter.hooknum = OPTS_GET(opts, hooknum, 0);
12332         lopts.netfilter.priority = OPTS_GET(opts, priority, 0);
12333         lopts.netfilter.flags = OPTS_GET(opts, flags, 0);
12334
12335         link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts);
12336         if (link_fd < 0) {
12337                 char errmsg[STRERR_BUFSIZE];
12338
12339                 link_fd = -errno;
12340                 free(link);
12341                 pr_warn("prog '%s': failed to attach to netfilter: %s\n",
12342                         prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
12343                 return libbpf_err_ptr(link_fd);
12344         }
12345         link->fd = link_fd;
12346
12347         return link;
12348 }
12349
12350 struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
12351 {
12352         struct bpf_link *link = NULL;
12353         int err;
12354
12355         if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
12356                 return libbpf_err_ptr(-EOPNOTSUPP);
12357
12358         err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, &link);
12359         if (err)
12360                 return libbpf_err_ptr(err);
12361
12362         /* When calling bpf_program__attach() explicitly, auto-attach support
12363          * is expected to work, so NULL returned link is considered an error.
12364          * This is different for skeleton's attach, see comment in
12365          * bpf_object__attach_skeleton().
12366          */
12367         if (!link)
12368                 return libbpf_err_ptr(-EOPNOTSUPP);
12369
12370         return link;
12371 }
12372
12373 struct bpf_link_struct_ops {
12374         struct bpf_link link;
12375         int map_fd;
12376 };
12377
12378 static int bpf_link__detach_struct_ops(struct bpf_link *link)
12379 {
12380         struct bpf_link_struct_ops *st_link;
12381         __u32 zero = 0;
12382
12383         st_link = container_of(link, struct bpf_link_struct_ops, link);
12384
12385         if (st_link->map_fd < 0)
12386                 /* w/o a real link */
12387                 return bpf_map_delete_elem(link->fd, &zero);
12388
12389         return close(link->fd);
12390 }
12391
12392 struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
12393 {
12394         struct bpf_link_struct_ops *link;
12395         __u32 zero = 0;
12396         int err, fd;
12397
12398         if (!bpf_map__is_struct_ops(map) || map->fd == -1)
12399                 return libbpf_err_ptr(-EINVAL);
12400
12401         link = calloc(1, sizeof(*link));
12402         if (!link)
12403                 return libbpf_err_ptr(-EINVAL);
12404
12405         /* kern_vdata should be prepared during the loading phase. */
12406         err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12407         /* It can be EBUSY if the map has been used to create or
12408          * update a link before.  We don't allow updating the value of
12409          * a struct_ops once it is set.  That ensures that the value
12410          * never changed.  So, it is safe to skip EBUSY.
12411          */
12412         if (err && (!(map->def.map_flags & BPF_F_LINK) || err != -EBUSY)) {
12413                 free(link);
12414                 return libbpf_err_ptr(err);
12415         }
12416
12417         link->link.detach = bpf_link__detach_struct_ops;
12418
12419         if (!(map->def.map_flags & BPF_F_LINK)) {
12420                 /* w/o a real link */
12421                 link->link.fd = map->fd;
12422                 link->map_fd = -1;
12423                 return &link->link;
12424         }
12425
12426         fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
12427         if (fd < 0) {
12428                 free(link);
12429                 return libbpf_err_ptr(fd);
12430         }
12431
12432         link->link.fd = fd;
12433         link->map_fd = map->fd;
12434
12435         return &link->link;
12436 }
12437
12438 /*
12439  * Swap the back struct_ops of a link with a new struct_ops map.
12440  */
12441 int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map)
12442 {
12443         struct bpf_link_struct_ops *st_ops_link;
12444         __u32 zero = 0;
12445         int err;
12446
12447         if (!bpf_map__is_struct_ops(map) || !map_is_created(map))
12448                 return -EINVAL;
12449
12450         st_ops_link = container_of(link, struct bpf_link_struct_ops, link);
12451         /* Ensure the type of a link is correct */
12452         if (st_ops_link->map_fd < 0)
12453                 return -EINVAL;
12454
12455         err = bpf_map_update_elem(map->fd, &zero, map->st_ops->kern_vdata, 0);
12456         /* It can be EBUSY if the map has been used to create or
12457          * update a link before.  We don't allow updating the value of
12458          * a struct_ops once it is set.  That ensures that the value
12459          * never changed.  So, it is safe to skip EBUSY.
12460          */
12461         if (err && err != -EBUSY)
12462                 return err;
12463
12464         err = bpf_link_update(link->fd, map->fd, NULL);
12465         if (err < 0)
12466                 return err;
12467
12468         st_ops_link->map_fd = map->fd;
12469
12470         return 0;
12471 }
12472
12473 typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(struct perf_event_header *hdr,
12474                                                           void *private_data);
12475
12476 static enum bpf_perf_event_ret
12477 perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,
12478                        void **copy_mem, size_t *copy_size,
12479                        bpf_perf_event_print_t fn, void *private_data)
12480 {
12481         struct perf_event_mmap_page *header = mmap_mem;
12482         __u64 data_head = ring_buffer_read_head(header);
12483         __u64 data_tail = header->data_tail;
12484         void *base = ((__u8 *)header) + page_size;
12485         int ret = LIBBPF_PERF_EVENT_CONT;
12486         struct perf_event_header *ehdr;
12487         size_t ehdr_size;
12488
12489         while (data_head != data_tail) {
12490                 ehdr = base + (data_tail & (mmap_size - 1));
12491                 ehdr_size = ehdr->size;
12492
12493                 if (((void *)ehdr) + ehdr_size > base + mmap_size) {
12494                         void *copy_start = ehdr;
12495                         size_t len_first = base + mmap_size - copy_start;
12496                         size_t len_secnd = ehdr_size - len_first;
12497
12498                         if (*copy_size < ehdr_size) {
12499                                 free(*copy_mem);
12500                                 *copy_mem = malloc(ehdr_size);
12501                                 if (!*copy_mem) {
12502                                         *copy_size = 0;
12503                                         ret = LIBBPF_PERF_EVENT_ERROR;
12504                                         break;
12505                                 }
12506                                 *copy_size = ehdr_size;
12507                         }
12508
12509                         memcpy(*copy_mem, copy_start, len_first);
12510                         memcpy(*copy_mem + len_first, base, len_secnd);
12511                         ehdr = *copy_mem;
12512                 }
12513
12514                 ret = fn(ehdr, private_data);
12515                 data_tail += ehdr_size;
12516                 if (ret != LIBBPF_PERF_EVENT_CONT)
12517                         break;
12518         }
12519
12520         ring_buffer_write_tail(header, data_tail);
12521         return libbpf_err(ret);
12522 }
12523
12524 struct perf_buffer;
12525
12526 struct perf_buffer_params {
12527         struct perf_event_attr *attr;
12528         /* if event_cb is specified, it takes precendence */
12529         perf_buffer_event_fn event_cb;
12530         /* sample_cb and lost_cb are higher-level common-case callbacks */
12531         perf_buffer_sample_fn sample_cb;
12532         perf_buffer_lost_fn lost_cb;
12533         void *ctx;
12534         int cpu_cnt;
12535         int *cpus;
12536         int *map_keys;
12537 };
12538
12539 struct perf_cpu_buf {
12540         struct perf_buffer *pb;
12541         void *base; /* mmap()'ed memory */
12542         void *buf; /* for reconstructing segmented data */
12543         size_t buf_size;
12544         int fd;
12545         int cpu;
12546         int map_key;
12547 };
12548
12549 struct perf_buffer {
12550         perf_buffer_event_fn event_cb;
12551         perf_buffer_sample_fn sample_cb;
12552         perf_buffer_lost_fn lost_cb;
12553         void *ctx; /* passed into callbacks */
12554
12555         size_t page_size;
12556         size_t mmap_size;
12557         struct perf_cpu_buf **cpu_bufs;
12558         struct epoll_event *events;
12559         int cpu_cnt; /* number of allocated CPU buffers */
12560         int epoll_fd; /* perf event FD */
12561         int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
12562 };
12563
12564 static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,
12565                                       struct perf_cpu_buf *cpu_buf)
12566 {
12567         if (!cpu_buf)
12568                 return;
12569         if (cpu_buf->base &&
12570             munmap(cpu_buf->base, pb->mmap_size + pb->page_size))
12571                 pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);
12572         if (cpu_buf->fd >= 0) {
12573                 ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);
12574                 close(cpu_buf->fd);
12575         }
12576         free(cpu_buf->buf);
12577         free(cpu_buf);
12578 }
12579
12580 void perf_buffer__free(struct perf_buffer *pb)
12581 {
12582         int i;
12583
12584         if (IS_ERR_OR_NULL(pb))
12585                 return;
12586         if (pb->cpu_bufs) {
12587                 for (i = 0; i < pb->cpu_cnt; i++) {
12588                         struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
12589
12590                         if (!cpu_buf)
12591                                 continue;
12592
12593                         bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
12594                         perf_buffer__free_cpu_buf(pb, cpu_buf);
12595                 }
12596                 free(pb->cpu_bufs);
12597         }
12598         if (pb->epoll_fd >= 0)
12599                 close(pb->epoll_fd);
12600         free(pb->events);
12601         free(pb);
12602 }
12603
12604 static struct perf_cpu_buf *
12605 perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,
12606                           int cpu, int map_key)
12607 {
12608         struct perf_cpu_buf *cpu_buf;
12609         char msg[STRERR_BUFSIZE];
12610         int err;
12611
12612         cpu_buf = calloc(1, sizeof(*cpu_buf));
12613         if (!cpu_buf)
12614                 return ERR_PTR(-ENOMEM);
12615
12616         cpu_buf->pb = pb;
12617         cpu_buf->cpu = cpu;
12618         cpu_buf->map_key = map_key;
12619
12620         cpu_buf->fd = syscall(__NR_perf_event_open, attr, -1 /* pid */, cpu,
12621                               -1, PERF_FLAG_FD_CLOEXEC);
12622         if (cpu_buf->fd < 0) {
12623                 err = -errno;
12624                 pr_warn("failed to open perf buffer event on cpu #%d: %s\n",
12625                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12626                 goto error;
12627         }
12628
12629         cpu_buf->base = mmap(NULL, pb->mmap_size + pb->page_size,
12630                              PROT_READ | PROT_WRITE, MAP_SHARED,
12631                              cpu_buf->fd, 0);
12632         if (cpu_buf->base == MAP_FAILED) {
12633                 cpu_buf->base = NULL;
12634                 err = -errno;
12635                 pr_warn("failed to mmap perf buffer on cpu #%d: %s\n",
12636                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12637                 goto error;
12638         }
12639
12640         if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {
12641                 err = -errno;
12642                 pr_warn("failed to enable perf buffer event on cpu #%d: %s\n",
12643                         cpu, libbpf_strerror_r(err, msg, sizeof(msg)));
12644                 goto error;
12645         }
12646
12647         return cpu_buf;
12648
12649 error:
12650         perf_buffer__free_cpu_buf(pb, cpu_buf);
12651         return (struct perf_cpu_buf *)ERR_PTR(err);
12652 }
12653
12654 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12655                                               struct perf_buffer_params *p);
12656
12657 struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
12658                                      perf_buffer_sample_fn sample_cb,
12659                                      perf_buffer_lost_fn lost_cb,
12660                                      void *ctx,
12661                                      const struct perf_buffer_opts *opts)
12662 {
12663         const size_t attr_sz = sizeof(struct perf_event_attr);
12664         struct perf_buffer_params p = {};
12665         struct perf_event_attr attr;
12666         __u32 sample_period;
12667
12668         if (!OPTS_VALID(opts, perf_buffer_opts))
12669                 return libbpf_err_ptr(-EINVAL);
12670
12671         sample_period = OPTS_GET(opts, sample_period, 1);
12672         if (!sample_period)
12673                 sample_period = 1;
12674
12675         memset(&attr, 0, attr_sz);
12676         attr.size = attr_sz;
12677         attr.config = PERF_COUNT_SW_BPF_OUTPUT;
12678         attr.type = PERF_TYPE_SOFTWARE;
12679         attr.sample_type = PERF_SAMPLE_RAW;
12680         attr.sample_period = sample_period;
12681         attr.wakeup_events = sample_period;
12682
12683         p.attr = &attr;
12684         p.sample_cb = sample_cb;
12685         p.lost_cb = lost_cb;
12686         p.ctx = ctx;
12687
12688         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12689 }
12690
12691 struct perf_buffer *perf_buffer__new_raw(int map_fd, size_t page_cnt,
12692                                          struct perf_event_attr *attr,
12693                                          perf_buffer_event_fn event_cb, void *ctx,
12694                                          const struct perf_buffer_raw_opts *opts)
12695 {
12696         struct perf_buffer_params p = {};
12697
12698         if (!attr)
12699                 return libbpf_err_ptr(-EINVAL);
12700
12701         if (!OPTS_VALID(opts, perf_buffer_raw_opts))
12702                 return libbpf_err_ptr(-EINVAL);
12703
12704         p.attr = attr;
12705         p.event_cb = event_cb;
12706         p.ctx = ctx;
12707         p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0);
12708         p.cpus = OPTS_GET(opts, cpus, NULL);
12709         p.map_keys = OPTS_GET(opts, map_keys, NULL);
12710
12711         return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));
12712 }
12713
12714 static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,
12715                                               struct perf_buffer_params *p)
12716 {
12717         const char *online_cpus_file = "/sys/devices/system/cpu/online";
12718         struct bpf_map_info map;
12719         char msg[STRERR_BUFSIZE];
12720         struct perf_buffer *pb;
12721         bool *online = NULL;
12722         __u32 map_info_len;
12723         int err, i, j, n;
12724
12725         if (page_cnt == 0 || (page_cnt & (page_cnt - 1))) {
12726                 pr_warn("page count should be power of two, but is %zu\n",
12727                         page_cnt);
12728                 return ERR_PTR(-EINVAL);
12729         }
12730
12731         /* best-effort sanity checks */
12732         memset(&map, 0, sizeof(map));
12733         map_info_len = sizeof(map);
12734         err = bpf_map_get_info_by_fd(map_fd, &map, &map_info_len);
12735         if (err) {
12736                 err = -errno;
12737                 /* if BPF_OBJ_GET_INFO_BY_FD is supported, will return
12738                  * -EBADFD, -EFAULT, or -E2BIG on real error
12739                  */
12740                 if (err != -EINVAL) {
12741                         pr_warn("failed to get map info for map FD %d: %s\n",
12742                                 map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));
12743                         return ERR_PTR(err);
12744                 }
12745                 pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n",
12746                          map_fd);
12747         } else {
12748                 if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
12749                         pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
12750                                 map.name);
12751                         return ERR_PTR(-EINVAL);
12752                 }
12753         }
12754
12755         pb = calloc(1, sizeof(*pb));
12756         if (!pb)
12757                 return ERR_PTR(-ENOMEM);
12758
12759         pb->event_cb = p->event_cb;
12760         pb->sample_cb = p->sample_cb;
12761         pb->lost_cb = p->lost_cb;
12762         pb->ctx = p->ctx;
12763
12764         pb->page_size = getpagesize();
12765         pb->mmap_size = pb->page_size * page_cnt;
12766         pb->map_fd = map_fd;
12767
12768         pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
12769         if (pb->epoll_fd < 0) {
12770                 err = -errno;
12771                 pr_warn("failed to create epoll instance: %s\n",
12772                         libbpf_strerror_r(err, msg, sizeof(msg)));
12773                 goto error;
12774         }
12775
12776         if (p->cpu_cnt > 0) {
12777                 pb->cpu_cnt = p->cpu_cnt;
12778         } else {
12779                 pb->cpu_cnt = libbpf_num_possible_cpus();
12780                 if (pb->cpu_cnt < 0) {
12781                         err = pb->cpu_cnt;
12782                         goto error;
12783                 }
12784                 if (map.max_entries && map.max_entries < pb->cpu_cnt)
12785                         pb->cpu_cnt = map.max_entries;
12786         }
12787
12788         pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));
12789         if (!pb->events) {
12790                 err = -ENOMEM;
12791                 pr_warn("failed to allocate events: out of memory\n");
12792                 goto error;
12793         }
12794         pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));
12795         if (!pb->cpu_bufs) {
12796                 err = -ENOMEM;
12797                 pr_warn("failed to allocate buffers: out of memory\n");
12798                 goto error;
12799         }
12800
12801         err = parse_cpu_mask_file(online_cpus_file, &online, &n);
12802         if (err) {
12803                 pr_warn("failed to get online CPU mask: %d\n", err);
12804                 goto error;
12805         }
12806
12807         for (i = 0, j = 0; i < pb->cpu_cnt; i++) {
12808                 struct perf_cpu_buf *cpu_buf;
12809                 int cpu, map_key;
12810
12811                 cpu = p->cpu_cnt > 0 ? p->cpus[i] : i;
12812                 map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i;
12813
12814                 /* in case user didn't explicitly requested particular CPUs to
12815                  * be attached to, skip offline/not present CPUs
12816                  */
12817                 if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu]))
12818                         continue;
12819
12820                 cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key);
12821                 if (IS_ERR(cpu_buf)) {
12822                         err = PTR_ERR(cpu_buf);
12823                         goto error;
12824                 }
12825
12826                 pb->cpu_bufs[j] = cpu_buf;
12827
12828                 err = bpf_map_update_elem(pb->map_fd, &map_key,
12829                                           &cpu_buf->fd, 0);
12830                 if (err) {
12831                         err = -errno;
12832                         pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n",
12833                                 cpu, map_key, cpu_buf->fd,
12834                                 libbpf_strerror_r(err, msg, sizeof(msg)));
12835                         goto error;
12836                 }
12837
12838                 pb->events[j].events = EPOLLIN;
12839                 pb->events[j].data.ptr = cpu_buf;
12840                 if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,
12841                               &pb->events[j]) < 0) {
12842                         err = -errno;
12843                         pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n",
12844                                 cpu, cpu_buf->fd,
12845                                 libbpf_strerror_r(err, msg, sizeof(msg)));
12846                         goto error;
12847                 }
12848                 j++;
12849         }
12850         pb->cpu_cnt = j;
12851         free(online);
12852
12853         return pb;
12854
12855 error:
12856         free(online);
12857         if (pb)
12858                 perf_buffer__free(pb);
12859         return ERR_PTR(err);
12860 }
12861
12862 struct perf_sample_raw {
12863         struct perf_event_header header;
12864         uint32_t size;
12865         char data[];
12866 };
12867
12868 struct perf_sample_lost {
12869         struct perf_event_header header;
12870         uint64_t id;
12871         uint64_t lost;
12872         uint64_t sample_id;
12873 };
12874
12875 static enum bpf_perf_event_ret
12876 perf_buffer__process_record(struct perf_event_header *e, void *ctx)
12877 {
12878         struct perf_cpu_buf *cpu_buf = ctx;
12879         struct perf_buffer *pb = cpu_buf->pb;
12880         void *data = e;
12881
12882         /* user wants full control over parsing perf event */
12883         if (pb->event_cb)
12884                 return pb->event_cb(pb->ctx, cpu_buf->cpu, e);
12885
12886         switch (e->type) {
12887         case PERF_RECORD_SAMPLE: {
12888                 struct perf_sample_raw *s = data;
12889
12890                 if (pb->sample_cb)
12891                         pb->sample_cb(pb->ctx, cpu_buf->cpu, s->data, s->size);
12892                 break;
12893         }
12894         case PERF_RECORD_LOST: {
12895                 struct perf_sample_lost *s = data;
12896
12897                 if (pb->lost_cb)
12898                         pb->lost_cb(pb->ctx, cpu_buf->cpu, s->lost);
12899                 break;
12900         }
12901         default:
12902                 pr_warn("unknown perf sample type %d\n", e->type);
12903                 return LIBBPF_PERF_EVENT_ERROR;
12904         }
12905         return LIBBPF_PERF_EVENT_CONT;
12906 }
12907
12908 static int perf_buffer__process_records(struct perf_buffer *pb,
12909                                         struct perf_cpu_buf *cpu_buf)
12910 {
12911         enum bpf_perf_event_ret ret;
12912
12913         ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size,
12914                                      pb->page_size, &cpu_buf->buf,
12915                                      &cpu_buf->buf_size,
12916                                      perf_buffer__process_record, cpu_buf);
12917         if (ret != LIBBPF_PERF_EVENT_CONT)
12918                 return ret;
12919         return 0;
12920 }
12921
12922 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
12923 {
12924         return pb->epoll_fd;
12925 }
12926
12927 int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
12928 {
12929         int i, cnt, err;
12930
12931         cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, timeout_ms);
12932         if (cnt < 0)
12933                 return -errno;
12934
12935         for (i = 0; i < cnt; i++) {
12936                 struct perf_cpu_buf *cpu_buf = pb->events[i].data.ptr;
12937
12938                 err = perf_buffer__process_records(pb, cpu_buf);
12939                 if (err) {
12940                         pr_warn("error while processing records: %d\n", err);
12941                         return libbpf_err(err);
12942                 }
12943         }
12944         return cnt;
12945 }
12946
12947 /* Return number of PERF_EVENT_ARRAY map slots set up by this perf_buffer
12948  * manager.
12949  */
12950 size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb)
12951 {
12952         return pb->cpu_cnt;
12953 }
12954
12955 /*
12956  * Return perf_event FD of a ring buffer in *buf_idx* slot of
12957  * PERF_EVENT_ARRAY BPF map. This FD can be polled for new data using
12958  * select()/poll()/epoll() Linux syscalls.
12959  */
12960 int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx)
12961 {
12962         struct perf_cpu_buf *cpu_buf;
12963
12964         if (buf_idx >= pb->cpu_cnt)
12965                 return libbpf_err(-EINVAL);
12966
12967         cpu_buf = pb->cpu_bufs[buf_idx];
12968         if (!cpu_buf)
12969                 return libbpf_err(-ENOENT);
12970
12971         return cpu_buf->fd;
12972 }
12973
12974 int perf_buffer__buffer(struct perf_buffer *pb, int buf_idx, void **buf, size_t *buf_size)
12975 {
12976         struct perf_cpu_buf *cpu_buf;
12977
12978         if (buf_idx >= pb->cpu_cnt)
12979                 return libbpf_err(-EINVAL);
12980
12981         cpu_buf = pb->cpu_bufs[buf_idx];
12982         if (!cpu_buf)
12983                 return libbpf_err(-ENOENT);
12984
12985         *buf = cpu_buf->base;
12986         *buf_size = pb->mmap_size;
12987         return 0;
12988 }
12989
12990 /*
12991  * Consume data from perf ring buffer corresponding to slot *buf_idx* in
12992  * PERF_EVENT_ARRAY BPF map without waiting/polling. If there is no data to
12993  * consume, do nothing and return success.
12994  * Returns:
12995  *   - 0 on success;
12996  *   - <0 on failure.
12997  */
12998 int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx)
12999 {
13000         struct perf_cpu_buf *cpu_buf;
13001
13002         if (buf_idx >= pb->cpu_cnt)
13003                 return libbpf_err(-EINVAL);
13004
13005         cpu_buf = pb->cpu_bufs[buf_idx];
13006         if (!cpu_buf)
13007                 return libbpf_err(-ENOENT);
13008
13009         return perf_buffer__process_records(pb, cpu_buf);
13010 }
13011
13012 int perf_buffer__consume(struct perf_buffer *pb)
13013 {
13014         int i, err;
13015
13016         for (i = 0; i < pb->cpu_cnt; i++) {
13017                 struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
13018
13019                 if (!cpu_buf)
13020                         continue;
13021
13022                 err = perf_buffer__process_records(pb, cpu_buf);
13023                 if (err) {
13024                         pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err);
13025                         return libbpf_err(err);
13026                 }
13027         }
13028         return 0;
13029 }
13030
13031 int bpf_program__set_attach_target(struct bpf_program *prog,
13032                                    int attach_prog_fd,
13033                                    const char *attach_func_name)
13034 {
13035         int btf_obj_fd = 0, btf_id = 0, err;
13036
13037         if (!prog || attach_prog_fd < 0)
13038                 return libbpf_err(-EINVAL);
13039
13040         if (prog->obj->loaded)
13041                 return libbpf_err(-EINVAL);
13042
13043         if (attach_prog_fd && !attach_func_name) {
13044                 /* remember attach_prog_fd and let bpf_program__load() find
13045                  * BTF ID during the program load
13046                  */
13047                 prog->attach_prog_fd = attach_prog_fd;
13048                 return 0;
13049         }
13050
13051         if (attach_prog_fd) {
13052                 btf_id = libbpf_find_prog_btf_id(attach_func_name,
13053                                                  attach_prog_fd);
13054                 if (btf_id < 0)
13055                         return libbpf_err(btf_id);
13056         } else {
13057                 if (!attach_func_name)
13058                         return libbpf_err(-EINVAL);
13059
13060                 /* load btf_vmlinux, if not yet */
13061                 err = bpf_object__load_vmlinux_btf(prog->obj, true);
13062                 if (err)
13063                         return libbpf_err(err);
13064                 err = find_kernel_btf_id(prog->obj, attach_func_name,
13065                                          prog->expected_attach_type,
13066                                          &btf_obj_fd, &btf_id);
13067                 if (err)
13068                         return libbpf_err(err);
13069         }
13070
13071         prog->attach_btf_id = btf_id;
13072         prog->attach_btf_obj_fd = btf_obj_fd;
13073         prog->attach_prog_fd = attach_prog_fd;
13074         return 0;
13075 }
13076
13077 int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
13078 {
13079         int err = 0, n, len, start, end = -1;
13080         bool *tmp;
13081
13082         *mask = NULL;
13083         *mask_sz = 0;
13084
13085         /* Each sub string separated by ',' has format \d+-\d+ or \d+ */
13086         while (*s) {
13087                 if (*s == ',' || *s == '\n') {
13088                         s++;
13089                         continue;
13090                 }
13091                 n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len);
13092                 if (n <= 0 || n > 2) {
13093                         pr_warn("Failed to get CPU range %s: %d\n", s, n);
13094                         err = -EINVAL;
13095                         goto cleanup;
13096                 } else if (n == 1) {
13097                         end = start;
13098                 }
13099                 if (start < 0 || start > end) {
13100                         pr_warn("Invalid CPU range [%d,%d] in %s\n",
13101                                 start, end, s);
13102                         err = -EINVAL;
13103                         goto cleanup;
13104                 }
13105                 tmp = realloc(*mask, end + 1);
13106                 if (!tmp) {
13107                         err = -ENOMEM;
13108                         goto cleanup;
13109                 }
13110                 *mask = tmp;
13111                 memset(tmp + *mask_sz, 0, start - *mask_sz);
13112                 memset(tmp + start, 1, end - start + 1);
13113                 *mask_sz = end + 1;
13114                 s += len;
13115         }
13116         if (!*mask_sz) {
13117                 pr_warn("Empty CPU range\n");
13118                 return -EINVAL;
13119         }
13120         return 0;
13121 cleanup:
13122         free(*mask);
13123         *mask = NULL;
13124         return err;
13125 }
13126
13127 int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz)
13128 {
13129         int fd, err = 0, len;
13130         char buf[128];
13131
13132         fd = open(fcpu, O_RDONLY | O_CLOEXEC);
13133         if (fd < 0) {
13134                 err = -errno;
13135                 pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err);
13136                 return err;
13137         }
13138         len = read(fd, buf, sizeof(buf));
13139         close(fd);
13140         if (len <= 0) {
13141                 err = len ? -errno : -EINVAL;
13142                 pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err);
13143                 return err;
13144         }
13145         if (len >= sizeof(buf)) {
13146                 pr_warn("CPU mask is too big in file %s\n", fcpu);
13147                 return -E2BIG;
13148         }
13149         buf[len] = '\0';
13150
13151         return parse_cpu_mask_str(buf, mask, mask_sz);
13152 }
13153
13154 int libbpf_num_possible_cpus(void)
13155 {
13156         static const char *fcpu = "/sys/devices/system/cpu/possible";
13157         static int cpus;
13158         int err, n, i, tmp_cpus;
13159         bool *mask;
13160
13161         tmp_cpus = READ_ONCE(cpus);
13162         if (tmp_cpus > 0)
13163                 return tmp_cpus;
13164
13165         err = parse_cpu_mask_file(fcpu, &mask, &n);
13166         if (err)
13167                 return libbpf_err(err);
13168
13169         tmp_cpus = 0;
13170         for (i = 0; i < n; i++) {
13171                 if (mask[i])
13172                         tmp_cpus++;
13173         }
13174         free(mask);
13175
13176         WRITE_ONCE(cpus, tmp_cpus);
13177         return tmp_cpus;
13178 }
13179
13180 static int populate_skeleton_maps(const struct bpf_object *obj,
13181                                   struct bpf_map_skeleton *maps,
13182                                   size_t map_cnt)
13183 {
13184         int i;
13185
13186         for (i = 0; i < map_cnt; i++) {
13187                 struct bpf_map **map = maps[i].map;
13188                 const char *name = maps[i].name;
13189                 void **mmaped = maps[i].mmaped;
13190
13191                 *map = bpf_object__find_map_by_name(obj, name);
13192                 if (!*map) {
13193                         pr_warn("failed to find skeleton map '%s'\n", name);
13194                         return -ESRCH;
13195                 }
13196
13197                 /* externs shouldn't be pre-setup from user code */
13198                 if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG)
13199                         *mmaped = (*map)->mmaped;
13200         }
13201         return 0;
13202 }
13203
13204 static int populate_skeleton_progs(const struct bpf_object *obj,
13205                                    struct bpf_prog_skeleton *progs,
13206                                    size_t prog_cnt)
13207 {
13208         int i;
13209
13210         for (i = 0; i < prog_cnt; i++) {
13211                 struct bpf_program **prog = progs[i].prog;
13212                 const char *name = progs[i].name;
13213
13214                 *prog = bpf_object__find_program_by_name(obj, name);
13215                 if (!*prog) {
13216                         pr_warn("failed to find skeleton program '%s'\n", name);
13217                         return -ESRCH;
13218                 }
13219         }
13220         return 0;
13221 }
13222
13223 int bpf_object__open_skeleton(struct bpf_object_skeleton *s,
13224                               const struct bpf_object_open_opts *opts)
13225 {
13226         DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts,
13227                 .object_name = s->name,
13228         );
13229         struct bpf_object *obj;
13230         int err;
13231
13232         /* Attempt to preserve opts->object_name, unless overriden by user
13233          * explicitly. Overwriting object name for skeletons is discouraged,
13234          * as it breaks global data maps, because they contain object name
13235          * prefix as their own map name prefix. When skeleton is generated,
13236          * bpftool is making an assumption that this name will stay the same.
13237          */
13238         if (opts) {
13239                 memcpy(&skel_opts, opts, sizeof(*opts));
13240                 if (!opts->object_name)
13241                         skel_opts.object_name = s->name;
13242         }
13243
13244         obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts);
13245         err = libbpf_get_error(obj);
13246         if (err) {
13247                 pr_warn("failed to initialize skeleton BPF object '%s': %d\n",
13248                         s->name, err);
13249                 return libbpf_err(err);
13250         }
13251
13252         *s->obj = obj;
13253         err = populate_skeleton_maps(obj, s->maps, s->map_cnt);
13254         if (err) {
13255                 pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err);
13256                 return libbpf_err(err);
13257         }
13258
13259         err = populate_skeleton_progs(obj, s->progs, s->prog_cnt);
13260         if (err) {
13261                 pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err);
13262                 return libbpf_err(err);
13263         }
13264
13265         return 0;
13266 }
13267
13268 int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s)
13269 {
13270         int err, len, var_idx, i;
13271         const char *var_name;
13272         const struct bpf_map *map;
13273         struct btf *btf;
13274         __u32 map_type_id;
13275         const struct btf_type *map_type, *var_type;
13276         const struct bpf_var_skeleton *var_skel;
13277         struct btf_var_secinfo *var;
13278
13279         if (!s->obj)
13280                 return libbpf_err(-EINVAL);
13281
13282         btf = bpf_object__btf(s->obj);
13283         if (!btf) {
13284                 pr_warn("subskeletons require BTF at runtime (object %s)\n",
13285                         bpf_object__name(s->obj));
13286                 return libbpf_err(-errno);
13287         }
13288
13289         err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt);
13290         if (err) {
13291                 pr_warn("failed to populate subskeleton maps: %d\n", err);
13292                 return libbpf_err(err);
13293         }
13294
13295         err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt);
13296         if (err) {
13297                 pr_warn("failed to populate subskeleton maps: %d\n", err);
13298                 return libbpf_err(err);
13299         }
13300
13301         for (var_idx = 0; var_idx < s->var_cnt; var_idx++) {
13302                 var_skel = &s->vars[var_idx];
13303                 map = *var_skel->map;
13304                 map_type_id = bpf_map__btf_value_type_id(map);
13305                 map_type = btf__type_by_id(btf, map_type_id);
13306
13307                 if (!btf_is_datasec(map_type)) {
13308                         pr_warn("type for map '%1$s' is not a datasec: %2$s",
13309                                 bpf_map__name(map),
13310                                 __btf_kind_str(btf_kind(map_type)));
13311                         return libbpf_err(-EINVAL);
13312                 }
13313
13314                 len = btf_vlen(map_type);
13315                 var = btf_var_secinfos(map_type);
13316                 for (i = 0; i < len; i++, var++) {
13317                         var_type = btf__type_by_id(btf, var->type);
13318                         var_name = btf__name_by_offset(btf, var_type->name_off);
13319                         if (strcmp(var_name, var_skel->name) == 0) {
13320                                 *var_skel->addr = map->mmaped + var->offset;
13321                                 break;
13322                         }
13323                 }
13324         }
13325         return 0;
13326 }
13327
13328 void bpf_object__destroy_subskeleton(struct bpf_object_subskeleton *s)
13329 {
13330         if (!s)
13331                 return;
13332         free(s->maps);
13333         free(s->progs);
13334         free(s->vars);
13335         free(s);
13336 }
13337
13338 int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
13339 {
13340         int i, err;
13341
13342         err = bpf_object__load(*s->obj);
13343         if (err) {
13344                 pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err);
13345                 return libbpf_err(err);
13346         }
13347
13348         for (i = 0; i < s->map_cnt; i++) {
13349                 struct bpf_map *map = *s->maps[i].map;
13350                 size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
13351                 int prot, map_fd = map->fd;
13352                 void **mmaped = s->maps[i].mmaped;
13353
13354                 if (!mmaped)
13355                         continue;
13356
13357                 if (!(map->def.map_flags & BPF_F_MMAPABLE)) {
13358                         *mmaped = NULL;
13359                         continue;
13360                 }
13361
13362                 if (map->def.map_flags & BPF_F_RDONLY_PROG)
13363                         prot = PROT_READ;
13364                 else
13365                         prot = PROT_READ | PROT_WRITE;
13366
13367                 /* Remap anonymous mmap()-ed "map initialization image" as
13368                  * a BPF map-backed mmap()-ed memory, but preserving the same
13369                  * memory address. This will cause kernel to change process'
13370                  * page table to point to a different piece of kernel memory,
13371                  * but from userspace point of view memory address (and its
13372                  * contents, being identical at this point) will stay the
13373                  * same. This mapping will be released by bpf_object__close()
13374                  * as per normal clean up procedure, so we don't need to worry
13375                  * about it from skeleton's clean up perspective.
13376                  */
13377                 *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0);
13378                 if (*mmaped == MAP_FAILED) {
13379                         err = -errno;
13380                         *mmaped = NULL;
13381                         pr_warn("failed to re-mmap() map '%s': %d\n",
13382                                  bpf_map__name(map), err);
13383                         return libbpf_err(err);
13384                 }
13385         }
13386
13387         return 0;
13388 }
13389
13390 int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
13391 {
13392         int i, err;
13393
13394         for (i = 0; i < s->prog_cnt; i++) {
13395                 struct bpf_program *prog = *s->progs[i].prog;
13396                 struct bpf_link **link = s->progs[i].link;
13397
13398                 if (!prog->autoload || !prog->autoattach)
13399                         continue;
13400
13401                 /* auto-attaching not supported for this program */
13402                 if (!prog->sec_def || !prog->sec_def->prog_attach_fn)
13403                         continue;
13404
13405                 /* if user already set the link manually, don't attempt auto-attach */
13406                 if (*link)
13407                         continue;
13408
13409                 err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link);
13410                 if (err) {
13411                         pr_warn("prog '%s': failed to auto-attach: %d\n",
13412                                 bpf_program__name(prog), err);
13413                         return libbpf_err(err);
13414                 }
13415
13416                 /* It's possible that for some SEC() definitions auto-attach
13417                  * is supported in some cases (e.g., if definition completely
13418                  * specifies target information), but is not in other cases.
13419                  * SEC("uprobe") is one such case. If user specified target
13420                  * binary and function name, such BPF program can be
13421                  * auto-attached. But if not, it shouldn't trigger skeleton's
13422                  * attach to fail. It should just be skipped.
13423                  * attach_fn signals such case with returning 0 (no error) and
13424                  * setting link to NULL.
13425                  */
13426         }
13427
13428         return 0;
13429 }
13430
13431 void bpf_object__detach_skeleton(struct bpf_object_skeleton *s)
13432 {
13433         int i;
13434
13435         for (i = 0; i < s->prog_cnt; i++) {
13436                 struct bpf_link **link = s->progs[i].link;
13437
13438                 bpf_link__destroy(*link);
13439                 *link = NULL;
13440         }
13441 }
13442
13443 void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s)
13444 {
13445         if (!s)
13446                 return;
13447
13448         if (s->progs)
13449                 bpf_object__detach_skeleton(s);
13450         if (s->obj)
13451                 bpf_object__close(*s->obj);
13452         free(s->maps);
13453         free(s->progs);
13454         free(s);
13455 }