tools/testing/selftests/bpf/progs/profiler.inc.h

   1 // SPDX-License-Identifier: GPL-2.0
   2 /* Copyright (c) 2020 Facebook */
   3 #include <vmlinux.h>
   4 #include <bpf/bpf_core_read.h>
   5 #include <bpf/bpf_helpers.h>
   6 #include <bpf/bpf_tracing.h>
   7
   8 #include "profiler.h"
   9
  10 #ifndef NULL
  11 #define NULL 0
  12 #endif
  13
  14 #define O_WRONLY 00000001
  15 #define O_RDWR 00000002
  16 #define O_DIRECTORY 00200000
  17 #define __O_TMPFILE 020000000
  18 #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
  19 #define MAX_ERRNO 4095
  20 #define S_IFMT 00170000
  21 #define S_IFSOCK 0140000
  22 #define S_IFLNK 0120000
  23 #define S_IFREG 0100000
  24 #define S_IFBLK 0060000
  25 #define S_IFDIR 0040000
  26 #define S_IFCHR 0020000
  27 #define S_IFIFO 0010000
  28 #define S_ISUID 0004000
  29 #define S_ISGID 0002000
  30 #define S_ISVTX 0001000
  31 #define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
  32 #define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
  33 #define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
  34 #define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
  35 #define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
  36 #define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
  37 #define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
  38
  39 #define KILL_DATA_ARRAY_SIZE 8
  40
  41 struct var_kill_data_arr_t {
  42         struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
  43 };
  44
  45 union any_profiler_data_t {
  46         struct var_exec_data_t var_exec;
  47         struct var_kill_data_t var_kill;
  48         struct var_sysctl_data_t var_sysctl;
  49         struct var_filemod_data_t var_filemod;
  50         struct var_fork_data_t var_fork;
  51         struct var_kill_data_arr_t var_kill_data_arr;
  52 };
  53
  54 volatile struct profiler_config_struct bpf_config = {};
  55
  56 #define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
  57 #define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
  58 #define CGROUP_LOGIN_SESSION_INODE \
  59         (bpf_config.cgroup_login_session_inode)
  60 #define KILL_SIGNALS (bpf_config.kill_signals_mask)
  61 #define STALE_INFO (bpf_config.stale_info_secs)
  62 #define INODE_FILTER (bpf_config.inode_filter)
  63 #define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
  64 #define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
  65
  66 struct kernfs_iattrs___52 {
  67         struct iattr ia_iattr;
  68 };
  69
  70 struct kernfs_node___52 {
  71         union /* kernfs_node_id */ {
  72                 struct {
  73                         u32 ino;
  74                         u32 generation;
  75                 };
  76                 u64 id;
  77         } id;
  78 };
  79
  80 struct {
  81         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  82         __uint(max_entries, 1);
  83         __type(key, u32);
  84         __type(value, union any_profiler_data_t);
  85 } data_heap SEC(".maps");
  86
  87 struct {
  88         __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
  89         __uint(key_size, sizeof(int));
  90         __uint(value_size, sizeof(int));
  91 } events SEC(".maps");
  92
  93 struct {
  94         __uint(type, BPF_MAP_TYPE_HASH);
  95         __uint(max_entries, KILL_DATA_ARRAY_SIZE);
  96         __type(key, u32);
  97         __type(value, struct var_kill_data_arr_t);
  98 } var_tpid_to_data SEC(".maps");
  99
 100 struct {
 101         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 102         __uint(max_entries, profiler_bpf_max_function_id);
 103         __type(key, u32);
 104         __type(value, struct bpf_func_stats_data);
 105 } bpf_func_stats SEC(".maps");
 106
 107 struct {
 108         __uint(type, BPF_MAP_TYPE_HASH);
 109         __type(key, u32);
 110         __type(value, bool);
 111         __uint(max_entries, 16);
 112 } allowed_devices SEC(".maps");
 113
 114 struct {
 115         __uint(type, BPF_MAP_TYPE_HASH);
 116         __type(key, u64);
 117         __type(value, bool);
 118         __uint(max_entries, 1024);
 119 } allowed_file_inodes SEC(".maps");
 120
 121 struct {
 122         __uint(type, BPF_MAP_TYPE_HASH);
 123         __type(key, u64);
 124         __type(value, bool);
 125         __uint(max_entries, 1024);
 126 } allowed_directory_inodes SEC(".maps");
 127
 128 struct {
 129         __uint(type, BPF_MAP_TYPE_HASH);
 130         __type(key, u32);
 131         __type(value, bool);
 132         __uint(max_entries, 16);
 133 } disallowed_exec_inodes SEC(".maps");
 134
 135 #ifndef ARRAY_SIZE
 136 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
 137 #endif
 138
 139 static INLINE bool IS_ERR(const void* ptr)
 140 {
 141         return IS_ERR_VALUE((unsigned long)ptr);
 142 }
 143
 144 static INLINE u32 get_userspace_pid()
 145 {
 146         return bpf_get_current_pid_tgid() >> 32;
 147 }
 148
 149 static INLINE bool is_init_process(u32 tgid)
 150 {
 151         return tgid == 1 || tgid == 0;
 152 }
 153
 154 static INLINE unsigned long
 155 probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
 156 {
 157         len = len < max ? len : max;
 158         if (len > 1) {
 159                 if (bpf_probe_read(dst, len, src))
 160                         return 0;
 161         } else if (len == 1) {
 162                 if (bpf_probe_read(dst, 1, src))
 163                         return 0;
 164         }
 165         return len;
 166 }
 167
 168 static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
 169                                      int spid)
 170 {
 171 #ifdef UNROLL
 172 #pragma unroll
 173 #endif
 174         for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
 175                 if (arr_struct->array[i].meta.pid == spid)
 176                         return i;
 177         return -1;
 178 }
 179
 180 static INLINE void populate_ancestors(struct task_struct* task,
 181                                       struct ancestors_data_t* ancestors_data)
 182 {
 183         struct task_struct* parent = task;
 184         u32 num_ancestors, ppid;
 185
 186         ancestors_data->num_ancestors = 0;
 187 #ifdef UNROLL
 188 #pragma unroll
 189 #endif
 190         for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
 191                 parent = BPF_CORE_READ(parent, real_parent);
 192                 if (parent == NULL)
 193                         break;
 194                 ppid = BPF_CORE_READ(parent, tgid);
 195                 if (is_init_process(ppid))
 196                         break;
 197                 ancestors_data->ancestor_pids[num_ancestors] = ppid;
 198                 ancestors_data->ancestor_exec_ids[num_ancestors] =
 199                         BPF_CORE_READ(parent, self_exec_id);
 200                 ancestors_data->ancestor_start_times[num_ancestors] =
 201                         BPF_CORE_READ(parent, start_time);
 202                 ancestors_data->num_ancestors = num_ancestors;
 203         }
 204 }
 205
 206 static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
 207                                           struct kernfs_node* cgroup_root_node,
 208                                           void* payload,
 209                                           int* root_pos)
 210 {
 211         void* payload_start = payload;
 212         size_t filepart_length;
 213
 214 #ifdef UNROLL
 215 #pragma unroll
 216 #endif
 217         for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
 218                 filepart_length =
 219                         bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
 220                 if (!cgroup_node)
 221                         return payload;
 222                 if (cgroup_node == cgroup_root_node)
 223                         *root_pos = payload - payload_start;
 224                 if (filepart_length <= MAX_PATH) {
 225                         barrier_var(filepart_length);
 226                         payload += filepart_length;
 227                 }
 228                 cgroup_node = BPF_CORE_READ(cgroup_node, parent);
 229         }
 230         return payload;
 231 }
 232
 233 static ino_t get_inode_from_kernfs(struct kernfs_node* node)
 234 {
 235         struct kernfs_node___52* node52 = (void*)node;
 236
 237         if (bpf_core_field_exists(node52->id.ino)) {
 238                 barrier_var(node52);
 239                 return BPF_CORE_READ(node52, id.ino);
 240         } else {
 241                 barrier_var(node);
 242                 return (u64)BPF_CORE_READ(node, id);
 243         }
 244 }
 245
 246 extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
 247 enum cgroup_subsys_id___local {
 248         pids_cgrp_id___local = 123, /* value doesn't matter */
 249 };
 250
 251 static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
 252                                          struct task_struct* task,
 253                                          void* payload)
 254 {
 255         struct kernfs_node* root_kernfs =
 256                 BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
 257         struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 258
 259 #if __has_builtin(__builtin_preserve_enum_value)
 260         if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
 261                 int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
 262                                                   pids_cgrp_id___local);
 263 #ifdef UNROLL
 264 #pragma unroll
 265 #endif
 266                 for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 267                         struct cgroup_subsys_state* subsys =
 268                                 BPF_CORE_READ(task, cgroups, subsys[i]);
 269                         if (subsys != NULL) {
 270                                 int subsys_id = BPF_CORE_READ(subsys, ss, id);
 271                                 if (subsys_id == cgrp_id) {
 272                                         proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
 273                                         root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
 274                                         break;
 275                                 }
 276                         }
 277                 }
 278         }
 279 #endif
 280
 281         cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
 282         cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
 283
 284         if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
 285                 cgroup_data->cgroup_root_mtime =
 286                         BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
 287                 cgroup_data->cgroup_proc_mtime =
 288                         BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
 289         } else {
 290                 struct kernfs_iattrs___52* root_iattr =
 291                         (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
 292                 cgroup_data->cgroup_root_mtime =
 293                         BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
 294
 295                 struct kernfs_iattrs___52* proc_iattr =
 296                         (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
 297                 cgroup_data->cgroup_proc_mtime =
 298                         BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
 299         }
 300
 301         cgroup_data->cgroup_root_length = 0;
 302         cgroup_data->cgroup_proc_length = 0;
 303         cgroup_data->cgroup_full_length = 0;
 304
 305         size_t cgroup_root_length =
 306                 bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
 307         barrier_var(cgroup_root_length);
 308         if (cgroup_root_length <= MAX_PATH) {
 309                 barrier_var(cgroup_root_length);
 310                 cgroup_data->cgroup_root_length = cgroup_root_length;
 311                 payload += cgroup_root_length;
 312         }
 313
 314         size_t cgroup_proc_length =
 315                 bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
 316         barrier_var(cgroup_proc_length);
 317         if (cgroup_proc_length <= MAX_PATH) {
 318                 barrier_var(cgroup_proc_length);
 319                 cgroup_data->cgroup_proc_length = cgroup_proc_length;
 320                 payload += cgroup_proc_length;
 321         }
 322
 323         if (FETCH_CGROUPS_FROM_BPF) {
 324                 cgroup_data->cgroup_full_path_root_pos = -1;
 325                 void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
 326                                                               &cgroup_data->cgroup_full_path_root_pos);
 327                 cgroup_data->cgroup_full_length = payload_end_pos - payload;
 328                 payload = payload_end_pos;
 329         }
 330
 331         return (void*)payload;
 332 }
 333
 334 static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
 335                                           struct task_struct* task,
 336                                           u32 pid, void* payload)
 337 {
 338         u64 uid_gid = bpf_get_current_uid_gid();
 339
 340         metadata->uid = (u32)uid_gid;
 341         metadata->gid = uid_gid >> 32;
 342         metadata->pid = pid;
 343         metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
 344         metadata->start_time = BPF_CORE_READ(task, start_time);
 345         metadata->comm_length = 0;
 346
 347         size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
 348         barrier_var(comm_length);
 349         if (comm_length <= TASK_COMM_LEN) {
 350                 barrier_var(comm_length);
 351                 metadata->comm_length = comm_length;
 352                 payload += comm_length;
 353         }
 354
 355         return (void*)payload;
 356 }
 357
 358 static INLINE struct var_kill_data_t*
 359 get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
 360 {
 361         int zero = 0;
 362         struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
 363
 364         if (kill_data == NULL)
 365                 return NULL;
 366         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 367
 368         void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
 369         payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
 370         size_t payload_length = payload - (void*)kill_data->payload;
 371         kill_data->payload_length = payload_length;
 372         populate_ancestors(task, &kill_data->ancestors_info);
 373         kill_data->meta.type = KILL_EVENT;
 374         kill_data->kill_target_pid = tpid;
 375         kill_data->kill_sig = sig;
 376         kill_data->kill_count = 1;
 377         kill_data->last_kill_time = bpf_ktime_get_ns();
 378         return kill_data;
 379 }
 380
 381 static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
 382 {
 383         if ((KILL_SIGNALS & (1ULL << sig)) == 0)
 384                 return 0;
 385
 386         u32 spid = get_userspace_pid();
 387         struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
 388
 389         if (arr_struct == NULL) {
 390                 struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
 391                 int zero = 0;
 392
 393                 if (kill_data == NULL)
 394                         return 0;
 395                 arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
 396                 if (arr_struct == NULL)
 397                         return 0;
 398                 bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
 399         } else {
 400                 int index = get_var_spid_index(arr_struct, spid);
 401
 402                 if (index == -1) {
 403                         struct var_kill_data_t* kill_data =
 404                                 get_var_kill_data(ctx, spid, tpid, sig);
 405                         if (kill_data == NULL)
 406                                 return 0;
 407 #ifdef UNROLL
 408 #pragma unroll
 409 #endif
 410                         for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
 411                                 if (arr_struct->array[i].meta.pid == 0) {
 412                                         bpf_probe_read(&arr_struct->array[i],
 413                                                        sizeof(arr_struct->array[i]), kill_data);
 414                                         bpf_map_update_elem(&var_tpid_to_data, &tpid,
 415                                                             arr_struct, 0);
 416
 417                                         return 0;
 418                                 }
 419                         return 0;
 420                 }
 421
 422                 struct var_kill_data_t* kill_data = &arr_struct->array[index];
 423
 424                 u64 delta_sec =
 425                         (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
 426
 427                 if (delta_sec < STALE_INFO) {
 428                         kill_data->kill_count++;
 429                         kill_data->last_kill_time = bpf_ktime_get_ns();
 430                         bpf_probe_read(&arr_struct->array[index],
 431                                        sizeof(arr_struct->array[index]),
 432                                        kill_data);
 433                 } else {
 434                         struct var_kill_data_t* kill_data =
 435                                 get_var_kill_data(ctx, spid, tpid, sig);
 436                         if (kill_data == NULL)
 437                                 return 0;
 438                         bpf_probe_read(&arr_struct->array[index],
 439                                        sizeof(arr_struct->array[index]),
 440                                        kill_data);
 441                 }
 442         }
 443         bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
 444         return 0;
 445 }
 446
 447 static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
 448                                    enum bpf_function_id func_id)
 449 {
 450         int func_id_key = func_id;
 451
 452         bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
 453         bpf_stat_ctx->bpf_func_stats_data_val =
 454                 bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
 455         if (bpf_stat_ctx->bpf_func_stats_data_val)
 456                 bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
 457 }
 458
 459 static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
 460 {
 461         if (bpf_stat_ctx->bpf_func_stats_data_val)
 462                 bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
 463                         bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
 464 }
 465
 466 static INLINE void
 467 bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
 468                                     struct var_metadata_t* meta)
 469 {
 470         if (bpf_stat_ctx->bpf_func_stats_data_val) {
 471                 bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
 472                 meta->bpf_stats_num_perf_events =
 473                         bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
 474         }
 475         meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
 476         meta->cpu_id = bpf_get_smp_processor_id();
 477 }
 478
 479 static INLINE size_t
 480 read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
 481 {
 482         size_t length = 0;
 483         size_t filepart_length;
 484         struct dentry* parent_dentry;
 485
 486 #ifdef UNROLL
 487 #pragma unroll
 488 #endif
 489         for (int i = 0; i < MAX_PATH_DEPTH; i++) {
 490                 filepart_length = bpf_probe_read_str(payload, MAX_PATH,
 491                                                      BPF_CORE_READ(filp_dentry, d_name.name));
 492                 barrier_var(filepart_length);
 493                 if (filepart_length > MAX_PATH)
 494                         break;
 495                 barrier_var(filepart_length);
 496                 payload += filepart_length;
 497                 length += filepart_length;
 498
 499                 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
 500                 if (filp_dentry == parent_dentry)
 501                         break;
 502                 filp_dentry = parent_dentry;
 503         }
 504
 505         return length;
 506 }
 507
 508 static INLINE bool
 509 is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
 510 {
 511         struct dentry* parent_dentry;
 512 #ifdef UNROLL
 513 #pragma unroll
 514 #endif
 515         for (int i = 0; i < MAX_PATH_DEPTH; i++) {
 516                 u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
 517                 bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
 518
 519                 if (allowed_dir != NULL)
 520                         return true;
 521                 parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
 522                 if (filp_dentry == parent_dentry)
 523                         break;
 524                 filp_dentry = parent_dentry;
 525         }
 526         return false;
 527 }
 528
 529 static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
 530                                                  u32* device_id,
 531                                                  u64* file_ino)
 532 {
 533         u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
 534         *device_id = dev_id;
 535         bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
 536
 537         if (allowed_device == NULL)
 538                 return false;
 539
 540         u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
 541         *file_ino = ino;
 542         bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
 543
 544         if (allowed_file == NULL)
 545                 if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
 546                         return false;
 547         return true;
 548 }
 549
 550 SEC("kprobe/proc_sys_write")
 551 ssize_t BPF_KPROBE(kprobe__proc_sys_write,
 552                    struct file* filp, const char* buf,
 553                    size_t count, loff_t* ppos)
 554 {
 555         struct bpf_func_stats_ctx stats_ctx;
 556         bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
 557
 558         u32 pid = get_userspace_pid();
 559         int zero = 0;
 560         struct var_sysctl_data_t* sysctl_data =
 561                 bpf_map_lookup_elem(&data_heap, &zero);
 562         if (!sysctl_data)
 563                 goto out;
 564
 565         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 566         sysctl_data->meta.type = SYSCTL_EVENT;
 567         void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
 568         payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
 569
 570         populate_ancestors(task, &sysctl_data->ancestors_info);
 571
 572         sysctl_data->sysctl_val_length = 0;
 573         sysctl_data->sysctl_path_length = 0;
 574
 575         size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
 576         barrier_var(sysctl_val_length);
 577         if (sysctl_val_length <= CTL_MAXNAME) {
 578                 barrier_var(sysctl_val_length);
 579                 sysctl_data->sysctl_val_length = sysctl_val_length;
 580                 payload += sysctl_val_length;
 581         }
 582
 583         size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
 584                                                        BPF_CORE_READ(filp, f_path.dentry, d_name.name));
 585         barrier_var(sysctl_path_length);
 586         if (sysctl_path_length <= MAX_PATH) {
 587                 barrier_var(sysctl_path_length);
 588                 sysctl_data->sysctl_path_length = sysctl_path_length;
 589                 payload += sysctl_path_length;
 590         }
 591
 592         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
 593         unsigned long data_len = payload - (void*)sysctl_data;
 594         data_len = data_len > sizeof(struct var_sysctl_data_t)
 595                 ? sizeof(struct var_sysctl_data_t)
 596                 : data_len;
 597         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
 598 out:
 599         bpf_stats_exit(&stats_ctx);
 600         return 0;
 601 }
 602
 603 SEC("tracepoint/syscalls/sys_enter_kill")
 604 int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
 605 {
 606         struct bpf_func_stats_ctx stats_ctx;
 607
 608         bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
 609         int pid = ctx->args[0];
 610         int sig = ctx->args[1];
 611         int ret = trace_var_sys_kill(ctx, pid, sig);
 612         bpf_stats_exit(&stats_ctx);
 613         return ret;
 614 };
 615
 616 SEC("raw_tracepoint/sched_process_exit")
 617 int raw_tracepoint__sched_process_exit(void* ctx)
 618 {
 619         int zero = 0;
 620         struct bpf_func_stats_ctx stats_ctx;
 621         bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
 622
 623         u32 tpid = get_userspace_pid();
 624
 625         struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
 626         struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
 627
 628         if (arr_struct == NULL || kill_data == NULL)
 629                 goto out;
 630
 631         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 632         struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
 633
 634 #ifdef UNROLL
 635 #pragma unroll
 636 #endif
 637         for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
 638                 struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
 639
 640                 if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
 641                         bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
 642                         void* payload = kill_data->payload;
 643                         size_t offset = kill_data->payload_length;
 644                         if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
 645                                 return 0;
 646                         payload += offset;
 647
 648                         kill_data->kill_target_name_length = 0;
 649                         kill_data->kill_target_cgroup_proc_length = 0;
 650
 651                         size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
 652                         barrier_var(comm_length);
 653                         if (comm_length <= TASK_COMM_LEN) {
 654                                 barrier_var(comm_length);
 655                                 kill_data->kill_target_name_length = comm_length;
 656                                 payload += comm_length;
 657                         }
 658
 659                         size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
 660                                                                        BPF_CORE_READ(proc_kernfs, name));
 661                         barrier_var(cgroup_proc_length);
 662                         if (cgroup_proc_length <= KILL_TARGET_LEN) {
 663                                 barrier_var(cgroup_proc_length);
 664                                 kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
 665                                 payload += cgroup_proc_length;
 666                         }
 667
 668                         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
 669                         unsigned long data_len = (void*)payload - (void*)kill_data;
 670                         data_len = data_len > sizeof(struct var_kill_data_t)
 671                                 ? sizeof(struct var_kill_data_t)
 672                                 : data_len;
 673                         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
 674                 }
 675         }
 676         bpf_map_delete_elem(&var_tpid_to_data, &tpid);
 677 out:
 678         bpf_stats_exit(&stats_ctx);
 679         return 0;
 680 }
 681
 682 SEC("raw_tracepoint/sched_process_exec")
 683 int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
 684 {
 685         struct bpf_func_stats_ctx stats_ctx;
 686         bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
 687
 688         struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
 689         u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
 690
 691         bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
 692         if (should_filter_binprm != NULL)
 693                 goto out;
 694
 695         int zero = 0;
 696         struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
 697         if (!proc_exec_data)
 698                 goto out;
 699
 700         if (INODE_FILTER && inode != INODE_FILTER)
 701                 return 0;
 702
 703         u32 pid = get_userspace_pid();
 704         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 705
 706         proc_exec_data->meta.type = EXEC_EVENT;
 707         proc_exec_data->bin_path_length = 0;
 708         proc_exec_data->cmdline_length = 0;
 709         proc_exec_data->environment_length = 0;
 710         void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
 711                                               proc_exec_data->payload);
 712         payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
 713
 714         struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
 715         proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
 716         proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
 717         proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
 718         proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
 719
 720         const char* filename = BPF_CORE_READ(bprm, filename);
 721         size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
 722         barrier_var(bin_path_length);
 723         if (bin_path_length <= MAX_FILENAME_LEN) {
 724                 barrier_var(bin_path_length);
 725                 proc_exec_data->bin_path_length = bin_path_length;
 726                 payload += bin_path_length;
 727         }
 728
 729         void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
 730         void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
 731         unsigned int cmdline_length = probe_read_lim(payload, arg_start,
 732                                                      arg_end - arg_start, MAX_ARGS_LEN);
 733
 734         if (cmdline_length <= MAX_ARGS_LEN) {
 735                 barrier_var(cmdline_length);
 736                 proc_exec_data->cmdline_length = cmdline_length;
 737                 payload += cmdline_length;
 738         }
 739
 740         if (READ_ENVIRON_FROM_EXEC) {
 741                 void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
 742                 void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
 743                 unsigned long env_len = probe_read_lim(payload, env_start,
 744                                                        env_end - env_start, MAX_ENVIRON_LEN);
 745                 if (cmdline_length <= MAX_ENVIRON_LEN) {
 746                         proc_exec_data->environment_length = env_len;
 747                         payload += env_len;
 748                 }
 749         }
 750
 751         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
 752         unsigned long data_len = payload - (void*)proc_exec_data;
 753         data_len = data_len > sizeof(struct var_exec_data_t)
 754                 ? sizeof(struct var_exec_data_t)
 755                 : data_len;
 756         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
 757 out:
 758         bpf_stats_exit(&stats_ctx);
 759         return 0;
 760 }
 761
 762 SEC("kretprobe/do_filp_open")
 763 int kprobe_ret__do_filp_open(struct pt_regs* ctx)
 764 {
 765         struct bpf_func_stats_ctx stats_ctx;
 766         bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
 767
 768         struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
 769
 770         if (filp == NULL || IS_ERR(filp))
 771                 goto out;
 772         unsigned int flags = BPF_CORE_READ(filp, f_flags);
 773         if ((flags & (O_RDWR | O_WRONLY)) == 0)
 774                 goto out;
 775         if ((flags & O_TMPFILE) > 0)
 776                 goto out;
 777         struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
 778         umode_t mode = BPF_CORE_READ(file_inode, i_mode);
 779         if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
 780             S_ISSOCK(mode))
 781                 goto out;
 782
 783         struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
 784         u32 device_id = 0;
 785         u64 file_ino = 0;
 786         if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
 787                 goto out;
 788
 789         int zero = 0;
 790         struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
 791         if (!filemod_data)
 792                 goto out;
 793
 794         u32 pid = get_userspace_pid();
 795         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 796
 797         filemod_data->meta.type = FILEMOD_EVENT;
 798         filemod_data->fmod_type = FMOD_OPEN;
 799         filemod_data->dst_flags = flags;
 800         filemod_data->src_inode = 0;
 801         filemod_data->dst_inode = file_ino;
 802         filemod_data->src_device_id = 0;
 803         filemod_data->dst_device_id = device_id;
 804         filemod_data->src_filepath_length = 0;
 805         filemod_data->dst_filepath_length = 0;
 806
 807         void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
 808                                               filemod_data->payload);
 809         payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 810
 811         size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
 812         barrier_var(len);
 813         if (len <= MAX_FILEPATH_LENGTH) {
 814                 barrier_var(len);
 815                 payload += len;
 816                 filemod_data->dst_filepath_length = len;
 817         }
 818         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
 819         unsigned long data_len = payload - (void*)filemod_data;
 820         data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
 821         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
 822 out:
 823         bpf_stats_exit(&stats_ctx);
 824         return 0;
 825 }
 826
 827 SEC("kprobe/vfs_link")
 828 int BPF_KPROBE(kprobe__vfs_link,
 829                struct dentry* old_dentry, struct mnt_idmap *idmap,
 830                struct inode* dir, struct dentry* new_dentry,
 831                struct inode** delegated_inode)
 832 {
 833         struct bpf_func_stats_ctx stats_ctx;
 834         bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
 835
 836         u32 src_device_id = 0;
 837         u64 src_file_ino = 0;
 838         u32 dst_device_id = 0;
 839         u64 dst_file_ino = 0;
 840         if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
 841             !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
 842                 goto out;
 843
 844         int zero = 0;
 845         struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
 846         if (!filemod_data)
 847                 goto out;
 848
 849         u32 pid = get_userspace_pid();
 850         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 851
 852         filemod_data->meta.type = FILEMOD_EVENT;
 853         filemod_data->fmod_type = FMOD_LINK;
 854         filemod_data->dst_flags = 0;
 855         filemod_data->src_inode = src_file_ino;
 856         filemod_data->dst_inode = dst_file_ino;
 857         filemod_data->src_device_id = src_device_id;
 858         filemod_data->dst_device_id = dst_device_id;
 859         filemod_data->src_filepath_length = 0;
 860         filemod_data->dst_filepath_length = 0;
 861
 862         void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
 863                                               filemod_data->payload);
 864         payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 865
 866         size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
 867         barrier_var(len);
 868         if (len <= MAX_FILEPATH_LENGTH) {
 869                 barrier_var(len);
 870                 payload += len;
 871                 filemod_data->src_filepath_length = len;
 872         }
 873
 874         len = read_absolute_file_path_from_dentry(new_dentry, payload);
 875         barrier_var(len);
 876         if (len <= MAX_FILEPATH_LENGTH) {
 877                 barrier_var(len);
 878                 payload += len;
 879                 filemod_data->dst_filepath_length = len;
 880         }
 881
 882         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
 883         unsigned long data_len = payload - (void*)filemod_data;
 884         data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
 885         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
 886 out:
 887         bpf_stats_exit(&stats_ctx);
 888         return 0;
 889 }
 890
 891 SEC("kprobe/vfs_symlink")
 892 int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
 893                const char* oldname)
 894 {
 895         struct bpf_func_stats_ctx stats_ctx;
 896         bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
 897
 898         u32 dst_device_id = 0;
 899         u64 dst_file_ino = 0;
 900         if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
 901                 goto out;
 902
 903         int zero = 0;
 904         struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
 905         if (!filemod_data)
 906                 goto out;
 907
 908         u32 pid = get_userspace_pid();
 909         struct task_struct* task = (struct task_struct*)bpf_get_current_task();
 910
 911         filemod_data->meta.type = FILEMOD_EVENT;
 912         filemod_data->fmod_type = FMOD_SYMLINK;
 913         filemod_data->dst_flags = 0;
 914         filemod_data->src_inode = 0;
 915         filemod_data->dst_inode = dst_file_ino;
 916         filemod_data->src_device_id = 0;
 917         filemod_data->dst_device_id = dst_device_id;
 918         filemod_data->src_filepath_length = 0;
 919         filemod_data->dst_filepath_length = 0;
 920
 921         void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
 922                                               filemod_data->payload);
 923         payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
 924
 925         size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
 926         barrier_var(len);
 927         if (len <= MAX_FILEPATH_LENGTH) {
 928                 barrier_var(len);
 929                 payload += len;
 930                 filemod_data->src_filepath_length = len;
 931         }
 932         len = read_absolute_file_path_from_dentry(dentry, payload);
 933         barrier_var(len);
 934         if (len <= MAX_FILEPATH_LENGTH) {
 935                 barrier_var(len);
 936                 payload += len;
 937                 filemod_data->dst_filepath_length = len;
 938         }
 939         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
 940         unsigned long data_len = payload - (void*)filemod_data;
 941         data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
 942         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
 943 out:
 944         bpf_stats_exit(&stats_ctx);
 945         return 0;
 946 }
 947
 948 SEC("raw_tracepoint/sched_process_fork")
 949 int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
 950 {
 951         struct bpf_func_stats_ctx stats_ctx;
 952         bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
 953
 954         int zero = 0;
 955         struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
 956         if (!fork_data)
 957                 goto out;
 958
 959         struct task_struct* parent = (struct task_struct*)ctx->args[0];
 960         struct task_struct* child = (struct task_struct*)ctx->args[1];
 961         fork_data->meta.type = FORK_EVENT;
 962
 963         void* payload = populate_var_metadata(&fork_data->meta, child,
 964                                               BPF_CORE_READ(child, pid), fork_data->payload);
 965         fork_data->parent_pid = BPF_CORE_READ(parent, pid);
 966         fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
 967         fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
 968         bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
 969
 970         unsigned long data_len = payload - (void*)fork_data;
 971         data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
 972         bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
 973 out:
 974         bpf_stats_exit(&stats_ctx);
 975         return 0;
 976 }
 977 char _license[] SEC("license") = "GPL";