1 /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
2 * Copyright (c) 2016 Facebook
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
8 #include <linux/kernel.h>
9 #include <linux/types.h>
10 #include <linux/slab.h>
11 #include <linux/bpf.h>
12 #include <linux/bpf_perf_event.h>
13 #include <linux/filter.h>
14 #include <linux/uaccess.h>
15 #include <linux/ctype.h>
16 #include <linux/kprobes.h>
17 #include <linux/syscalls.h>
18 #include <linux/error-injection.h>
20 #include "trace_probe.h"
23 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
24 u64 bpf_get_stack(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
27 * trace_call_bpf - invoke BPF program
28 * @call: tracepoint event
29 * @ctx: opaque context pointer
31 * kprobe handlers execute BPF programs via this helper.
32 * Can be used from static tracepoints in the future.
34 * Return: BPF programs always return an integer which is interpreted by
36 * 0 - return from kprobe (event is filtered out)
37 * 1 - store kprobe event into ring buffer
38 * Other values are reserved and currently alias to 1
40 unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
44 if (in_nmi()) /* not supported yet */
49 if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
51 * since some bpf program is already running on this cpu,
52 * don't call into another bpf program (same or different)
53 * and don't send kprobe event into ring-buffer,
61 * Instead of moving rcu_read_lock/rcu_dereference/rcu_read_unlock
62 * to all call sites, we did a bpf_prog_array_valid() there to check
63 * whether call->prog_array is empty or not, which is
64 * a heurisitc to speed up execution.
66 * If bpf_prog_array_valid() fetched prog_array was
67 * non-NULL, we go into trace_call_bpf() and do the actual
68 * proper rcu_dereference() under RCU lock.
69 * If it turns out that prog_array is NULL then, we bail out.
70 * For the opposite, if the bpf_prog_array_valid() fetched pointer
71 * was NULL, you'll skip the prog_array with the risk of missing
72 * out of events when it was updated in between this and the
73 * rcu_dereference() which is accepted risk.
75 ret = BPF_PROG_RUN_ARRAY_CHECK(call->prog_array, ctx, BPF_PROG_RUN);
78 __this_cpu_dec(bpf_prog_active);
83 EXPORT_SYMBOL_GPL(trace_call_bpf);
85 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
86 BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
88 regs_set_return_value(regs, rc);
89 override_function_with_return(regs);
93 static const struct bpf_func_proto bpf_override_return_proto = {
94 .func = bpf_override_return,
96 .ret_type = RET_INTEGER,
97 .arg1_type = ARG_PTR_TO_CTX,
98 .arg2_type = ARG_ANYTHING,
102 BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
106 ret = probe_kernel_read(dst, unsafe_ptr, size);
107 if (unlikely(ret < 0))
108 memset(dst, 0, size);
113 static const struct bpf_func_proto bpf_probe_read_proto = {
114 .func = bpf_probe_read,
116 .ret_type = RET_INTEGER,
117 .arg1_type = ARG_PTR_TO_UNINIT_MEM,
118 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
119 .arg3_type = ARG_ANYTHING,
122 BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src,
126 * Ensure we're in user context which is safe for the helper to
127 * run. This helper has no business in a kthread.
129 * access_ok() should prevent writing to non-user memory, but in
130 * some situations (nommu, temporary switch, etc) access_ok() does
131 * not provide enough validation, hence the check on KERNEL_DS.
134 if (unlikely(in_interrupt() ||
135 current->flags & (PF_KTHREAD | PF_EXITING)))
137 if (unlikely(uaccess_kernel()))
139 if (!access_ok(VERIFY_WRITE, unsafe_ptr, size))
142 return probe_kernel_write(unsafe_ptr, src, size);
145 static const struct bpf_func_proto bpf_probe_write_user_proto = {
146 .func = bpf_probe_write_user,
148 .ret_type = RET_INTEGER,
149 .arg1_type = ARG_ANYTHING,
150 .arg2_type = ARG_PTR_TO_MEM,
151 .arg3_type = ARG_CONST_SIZE,
154 static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
156 pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
157 current->comm, task_pid_nr(current));
159 return &bpf_probe_write_user_proto;
163 * Only limited trace_printk() conversion specifiers allowed:
164 * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %s
166 BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
167 u64, arg2, u64, arg3)
169 bool str_seen = false;
177 * bpf_check()->check_func_arg()->check_stack_boundary()
178 * guarantees that fmt points to bpf program stack,
179 * fmt_size bytes of it were initialized and fmt_size > 0
181 if (fmt[--fmt_size] != 0)
184 /* check format string for allowed specifiers */
185 for (i = 0; i < fmt_size; i++) {
186 if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
195 /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
200 } else if (fmt[i] == 'p' || fmt[i] == 's') {
203 if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
206 if (fmt[i - 1] == 's') {
208 /* allow only one '%s' per fmt string */
227 strncpy_from_unsafe(buf,
228 (void *) (long) unsafe_addr,
239 if (fmt[i] != 'i' && fmt[i] != 'd' &&
240 fmt[i] != 'u' && fmt[i] != 'x')
245 /* Horrid workaround for getting va_list handling working with different
246 * argument type combinations generically for 32 and 64 bit archs.
248 #define __BPF_TP_EMIT() __BPF_ARG3_TP()
249 #define __BPF_TP(...) \
250 __trace_printk(0 /* Fake ip */, \
253 #define __BPF_ARG1_TP(...) \
254 ((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64)) \
255 ? __BPF_TP(arg1, ##__VA_ARGS__) \
256 : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32)) \
257 ? __BPF_TP((long)arg1, ##__VA_ARGS__) \
258 : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
260 #define __BPF_ARG2_TP(...) \
261 ((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64)) \
262 ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__) \
263 : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32)) \
264 ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__) \
265 : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
267 #define __BPF_ARG3_TP(...) \
268 ((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64)) \
269 ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__) \
270 : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32)) \
271 ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__) \
272 : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
274 return __BPF_TP_EMIT();
277 static const struct bpf_func_proto bpf_trace_printk_proto = {
278 .func = bpf_trace_printk,
280 .ret_type = RET_INTEGER,
281 .arg1_type = ARG_PTR_TO_MEM,
282 .arg2_type = ARG_CONST_SIZE,
285 const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
288 * this program might be calling bpf_trace_printk,
289 * so allocate per-cpu printk buffers
291 trace_printk_init_buffers();
293 return &bpf_trace_printk_proto;
296 static __always_inline int
297 get_map_perf_counter(struct bpf_map *map, u64 flags,
298 u64 *value, u64 *enabled, u64 *running)
300 struct bpf_array *array = container_of(map, struct bpf_array, map);
301 unsigned int cpu = smp_processor_id();
302 u64 index = flags & BPF_F_INDEX_MASK;
303 struct bpf_event_entry *ee;
305 if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
307 if (index == BPF_F_CURRENT_CPU)
309 if (unlikely(index >= array->map.max_entries))
312 ee = READ_ONCE(array->ptrs[index]);
316 return perf_event_read_local(ee->event, value, enabled, running);
319 BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags)
324 err = get_map_perf_counter(map, flags, &value, NULL, NULL);
326 * this api is ugly since we miss [-22..-2] range of valid
327 * counter values, but that's uapi
334 static const struct bpf_func_proto bpf_perf_event_read_proto = {
335 .func = bpf_perf_event_read,
337 .ret_type = RET_INTEGER,
338 .arg1_type = ARG_CONST_MAP_PTR,
339 .arg2_type = ARG_ANYTHING,
342 BPF_CALL_4(bpf_perf_event_read_value, struct bpf_map *, map, u64, flags,
343 struct bpf_perf_event_value *, buf, u32, size)
347 if (unlikely(size != sizeof(struct bpf_perf_event_value)))
349 err = get_map_perf_counter(map, flags, &buf->counter, &buf->enabled,
355 memset(buf, 0, size);
359 static const struct bpf_func_proto bpf_perf_event_read_value_proto = {
360 .func = bpf_perf_event_read_value,
362 .ret_type = RET_INTEGER,
363 .arg1_type = ARG_CONST_MAP_PTR,
364 .arg2_type = ARG_ANYTHING,
365 .arg3_type = ARG_PTR_TO_UNINIT_MEM,
366 .arg4_type = ARG_CONST_SIZE,
369 static DEFINE_PER_CPU(struct perf_sample_data, bpf_trace_sd);
371 static __always_inline u64
372 __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map,
373 u64 flags, struct perf_sample_data *sd)
375 struct bpf_array *array = container_of(map, struct bpf_array, map);
376 unsigned int cpu = smp_processor_id();
377 u64 index = flags & BPF_F_INDEX_MASK;
378 struct bpf_event_entry *ee;
379 struct perf_event *event;
381 if (index == BPF_F_CURRENT_CPU)
383 if (unlikely(index >= array->map.max_entries))
386 ee = READ_ONCE(array->ptrs[index]);
391 if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
392 event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
395 if (unlikely(event->oncpu != cpu))
398 perf_event_output(event, sd, regs);
402 BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
403 u64, flags, void *, data, u64, size)
405 struct perf_sample_data *sd = this_cpu_ptr(&bpf_trace_sd);
406 struct perf_raw_record raw = {
413 if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
416 perf_sample_data_init(sd, 0, 0);
419 return __bpf_perf_event_output(regs, map, flags, sd);
422 static const struct bpf_func_proto bpf_perf_event_output_proto = {
423 .func = bpf_perf_event_output,
425 .ret_type = RET_INTEGER,
426 .arg1_type = ARG_PTR_TO_CTX,
427 .arg2_type = ARG_CONST_MAP_PTR,
428 .arg3_type = ARG_ANYTHING,
429 .arg4_type = ARG_PTR_TO_MEM,
430 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
433 static DEFINE_PER_CPU(struct pt_regs, bpf_pt_regs);
434 static DEFINE_PER_CPU(struct perf_sample_data, bpf_misc_sd);
436 u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
437 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
439 struct perf_sample_data *sd = this_cpu_ptr(&bpf_misc_sd);
440 struct pt_regs *regs = this_cpu_ptr(&bpf_pt_regs);
441 struct perf_raw_frag frag = {
446 struct perf_raw_record raw = {
449 .next = ctx_size ? &frag : NULL,
456 perf_fetch_caller_regs(regs);
457 perf_sample_data_init(sd, 0, 0);
460 return __bpf_perf_event_output(regs, map, flags, sd);
463 BPF_CALL_0(bpf_get_current_task)
465 return (long) current;
468 static const struct bpf_func_proto bpf_get_current_task_proto = {
469 .func = bpf_get_current_task,
471 .ret_type = RET_INTEGER,
474 BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx)
476 struct bpf_array *array = container_of(map, struct bpf_array, map);
479 if (unlikely(idx >= array->map.max_entries))
482 cgrp = READ_ONCE(array->ptrs[idx]);
486 return task_under_cgroup_hierarchy(current, cgrp);
489 static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
490 .func = bpf_current_task_under_cgroup,
492 .ret_type = RET_INTEGER,
493 .arg1_type = ARG_CONST_MAP_PTR,
494 .arg2_type = ARG_ANYTHING,
497 BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
498 const void *, unsafe_ptr)
503 * The strncpy_from_unsafe() call will likely not fill the entire
504 * buffer, but that's okay in this circumstance as we're probing
505 * arbitrary memory anyway similar to bpf_probe_read() and might
506 * as well probe the stack. Thus, memory is explicitly cleared
507 * only in error case, so that improper users ignoring return
508 * code altogether don't copy garbage; otherwise length of string
509 * is returned that can be used for bpf_perf_event_output() et al.
511 ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
512 if (unlikely(ret < 0))
513 memset(dst, 0, size);
518 static const struct bpf_func_proto bpf_probe_read_str_proto = {
519 .func = bpf_probe_read_str,
521 .ret_type = RET_INTEGER,
522 .arg1_type = ARG_PTR_TO_UNINIT_MEM,
523 .arg2_type = ARG_CONST_SIZE_OR_ZERO,
524 .arg3_type = ARG_ANYTHING,
527 static const struct bpf_func_proto *
528 tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
531 case BPF_FUNC_map_lookup_elem:
532 return &bpf_map_lookup_elem_proto;
533 case BPF_FUNC_map_update_elem:
534 return &bpf_map_update_elem_proto;
535 case BPF_FUNC_map_delete_elem:
536 return &bpf_map_delete_elem_proto;
537 case BPF_FUNC_probe_read:
538 return &bpf_probe_read_proto;
539 case BPF_FUNC_ktime_get_ns:
540 return &bpf_ktime_get_ns_proto;
541 case BPF_FUNC_tail_call:
542 return &bpf_tail_call_proto;
543 case BPF_FUNC_get_current_pid_tgid:
544 return &bpf_get_current_pid_tgid_proto;
545 case BPF_FUNC_get_current_task:
546 return &bpf_get_current_task_proto;
547 case BPF_FUNC_get_current_uid_gid:
548 return &bpf_get_current_uid_gid_proto;
549 case BPF_FUNC_get_current_comm:
550 return &bpf_get_current_comm_proto;
551 case BPF_FUNC_trace_printk:
552 return bpf_get_trace_printk_proto();
553 case BPF_FUNC_get_smp_processor_id:
554 return &bpf_get_smp_processor_id_proto;
555 case BPF_FUNC_get_numa_node_id:
556 return &bpf_get_numa_node_id_proto;
557 case BPF_FUNC_perf_event_read:
558 return &bpf_perf_event_read_proto;
559 case BPF_FUNC_probe_write_user:
560 return bpf_get_probe_write_proto();
561 case BPF_FUNC_current_task_under_cgroup:
562 return &bpf_current_task_under_cgroup_proto;
563 case BPF_FUNC_get_prandom_u32:
564 return &bpf_get_prandom_u32_proto;
565 case BPF_FUNC_probe_read_str:
566 return &bpf_probe_read_str_proto;
572 static const struct bpf_func_proto *
573 kprobe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
576 case BPF_FUNC_perf_event_output:
577 return &bpf_perf_event_output_proto;
578 case BPF_FUNC_get_stackid:
579 return &bpf_get_stackid_proto;
580 case BPF_FUNC_get_stack:
581 return &bpf_get_stack_proto;
582 case BPF_FUNC_perf_event_read_value:
583 return &bpf_perf_event_read_value_proto;
584 #ifdef CONFIG_BPF_KPROBE_OVERRIDE
585 case BPF_FUNC_override_return:
586 return &bpf_override_return_proto;
589 return tracing_func_proto(func_id, prog);
593 /* bpf+kprobe programs can access fields of 'struct pt_regs' */
594 static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
595 const struct bpf_prog *prog,
596 struct bpf_insn_access_aux *info)
598 if (off < 0 || off >= sizeof(struct pt_regs))
600 if (type != BPF_READ)
605 * Assertion for 32 bit to make sure last 8 byte access
606 * (BPF_DW) to the last 4 byte member is disallowed.
608 if (off + size > sizeof(struct pt_regs))
614 const struct bpf_verifier_ops kprobe_verifier_ops = {
615 .get_func_proto = kprobe_prog_func_proto,
616 .is_valid_access = kprobe_prog_is_valid_access,
619 const struct bpf_prog_ops kprobe_prog_ops = {
622 BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map,
623 u64, flags, void *, data, u64, size)
625 struct pt_regs *regs = *(struct pt_regs **)tp_buff;
628 * r1 points to perf tracepoint buffer where first 8 bytes are hidden
629 * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it
630 * from there and call the same bpf_perf_event_output() helper inline.
632 return ____bpf_perf_event_output(regs, map, flags, data, size);
635 static const struct bpf_func_proto bpf_perf_event_output_proto_tp = {
636 .func = bpf_perf_event_output_tp,
638 .ret_type = RET_INTEGER,
639 .arg1_type = ARG_PTR_TO_CTX,
640 .arg2_type = ARG_CONST_MAP_PTR,
641 .arg3_type = ARG_ANYTHING,
642 .arg4_type = ARG_PTR_TO_MEM,
643 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
646 BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map,
649 struct pt_regs *regs = *(struct pt_regs **)tp_buff;
652 * Same comment as in bpf_perf_event_output_tp(), only that this time
653 * the other helper's function body cannot be inlined due to being
654 * external, thus we need to call raw helper function.
656 return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
660 static const struct bpf_func_proto bpf_get_stackid_proto_tp = {
661 .func = bpf_get_stackid_tp,
663 .ret_type = RET_INTEGER,
664 .arg1_type = ARG_PTR_TO_CTX,
665 .arg2_type = ARG_CONST_MAP_PTR,
666 .arg3_type = ARG_ANYTHING,
669 BPF_CALL_4(bpf_get_stack_tp, void *, tp_buff, void *, buf, u32, size,
672 struct pt_regs *regs = *(struct pt_regs **)tp_buff;
674 return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
675 (unsigned long) size, flags, 0);
678 static const struct bpf_func_proto bpf_get_stack_proto_tp = {
679 .func = bpf_get_stack_tp,
681 .ret_type = RET_INTEGER,
682 .arg1_type = ARG_PTR_TO_CTX,
683 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
684 .arg3_type = ARG_CONST_SIZE_OR_ZERO,
685 .arg4_type = ARG_ANYTHING,
688 static const struct bpf_func_proto *
689 tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
692 case BPF_FUNC_perf_event_output:
693 return &bpf_perf_event_output_proto_tp;
694 case BPF_FUNC_get_stackid:
695 return &bpf_get_stackid_proto_tp;
696 case BPF_FUNC_get_stack:
697 return &bpf_get_stack_proto_tp;
699 return tracing_func_proto(func_id, prog);
703 static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type,
704 const struct bpf_prog *prog,
705 struct bpf_insn_access_aux *info)
707 if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE)
709 if (type != BPF_READ)
714 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(__u64));
718 const struct bpf_verifier_ops tracepoint_verifier_ops = {
719 .get_func_proto = tp_prog_func_proto,
720 .is_valid_access = tp_prog_is_valid_access,
723 const struct bpf_prog_ops tracepoint_prog_ops = {
726 BPF_CALL_3(bpf_perf_prog_read_value, struct bpf_perf_event_data_kern *, ctx,
727 struct bpf_perf_event_value *, buf, u32, size)
731 if (unlikely(size != sizeof(struct bpf_perf_event_value)))
733 err = perf_event_read_local(ctx->event, &buf->counter, &buf->enabled,
739 memset(buf, 0, size);
743 static const struct bpf_func_proto bpf_perf_prog_read_value_proto = {
744 .func = bpf_perf_prog_read_value,
746 .ret_type = RET_INTEGER,
747 .arg1_type = ARG_PTR_TO_CTX,
748 .arg2_type = ARG_PTR_TO_UNINIT_MEM,
749 .arg3_type = ARG_CONST_SIZE,
752 static const struct bpf_func_proto *
753 pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
756 case BPF_FUNC_perf_event_output:
757 return &bpf_perf_event_output_proto_tp;
758 case BPF_FUNC_get_stackid:
759 return &bpf_get_stackid_proto_tp;
760 case BPF_FUNC_get_stack:
761 return &bpf_get_stack_proto_tp;
762 case BPF_FUNC_perf_prog_read_value:
763 return &bpf_perf_prog_read_value_proto;
765 return tracing_func_proto(func_id, prog);
770 * bpf_raw_tp_regs are separate from bpf_pt_regs used from skb/xdp
771 * to avoid potential recursive reuse issue when/if tracepoints are added
772 * inside bpf_*_event_output, bpf_get_stackid and/or bpf_get_stack
774 static DEFINE_PER_CPU(struct pt_regs, bpf_raw_tp_regs);
775 BPF_CALL_5(bpf_perf_event_output_raw_tp, struct bpf_raw_tracepoint_args *, args,
776 struct bpf_map *, map, u64, flags, void *, data, u64, size)
778 struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
780 perf_fetch_caller_regs(regs);
781 return ____bpf_perf_event_output(regs, map, flags, data, size);
784 static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = {
785 .func = bpf_perf_event_output_raw_tp,
787 .ret_type = RET_INTEGER,
788 .arg1_type = ARG_PTR_TO_CTX,
789 .arg2_type = ARG_CONST_MAP_PTR,
790 .arg3_type = ARG_ANYTHING,
791 .arg4_type = ARG_PTR_TO_MEM,
792 .arg5_type = ARG_CONST_SIZE_OR_ZERO,
795 BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args,
796 struct bpf_map *, map, u64, flags)
798 struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
800 perf_fetch_caller_regs(regs);
801 /* similar to bpf_perf_event_output_tp, but pt_regs fetched differently */
802 return bpf_get_stackid((unsigned long) regs, (unsigned long) map,
806 static const struct bpf_func_proto bpf_get_stackid_proto_raw_tp = {
807 .func = bpf_get_stackid_raw_tp,
809 .ret_type = RET_INTEGER,
810 .arg1_type = ARG_PTR_TO_CTX,
811 .arg2_type = ARG_CONST_MAP_PTR,
812 .arg3_type = ARG_ANYTHING,
815 BPF_CALL_4(bpf_get_stack_raw_tp, struct bpf_raw_tracepoint_args *, args,
816 void *, buf, u32, size, u64, flags)
818 struct pt_regs *regs = this_cpu_ptr(&bpf_raw_tp_regs);
820 perf_fetch_caller_regs(regs);
821 return bpf_get_stack((unsigned long) regs, (unsigned long) buf,
822 (unsigned long) size, flags, 0);
825 static const struct bpf_func_proto bpf_get_stack_proto_raw_tp = {
826 .func = bpf_get_stack_raw_tp,
828 .ret_type = RET_INTEGER,
829 .arg1_type = ARG_PTR_TO_CTX,
830 .arg2_type = ARG_PTR_TO_MEM,
831 .arg3_type = ARG_CONST_SIZE_OR_ZERO,
832 .arg4_type = ARG_ANYTHING,
835 static const struct bpf_func_proto *
836 raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
839 case BPF_FUNC_perf_event_output:
840 return &bpf_perf_event_output_proto_raw_tp;
841 case BPF_FUNC_get_stackid:
842 return &bpf_get_stackid_proto_raw_tp;
843 case BPF_FUNC_get_stack:
844 return &bpf_get_stack_proto_raw_tp;
846 return tracing_func_proto(func_id, prog);
850 static bool raw_tp_prog_is_valid_access(int off, int size,
851 enum bpf_access_type type,
852 const struct bpf_prog *prog,
853 struct bpf_insn_access_aux *info)
855 /* largest tracepoint in the kernel has 12 args */
856 if (off < 0 || off >= sizeof(__u64) * 12)
858 if (type != BPF_READ)
865 const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
866 .get_func_proto = raw_tp_prog_func_proto,
867 .is_valid_access = raw_tp_prog_is_valid_access,
870 const struct bpf_prog_ops raw_tracepoint_prog_ops = {
873 static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
874 const struct bpf_prog *prog,
875 struct bpf_insn_access_aux *info)
877 const int size_u64 = sizeof(u64);
879 if (off < 0 || off >= sizeof(struct bpf_perf_event_data))
881 if (type != BPF_READ)
887 case bpf_ctx_range(struct bpf_perf_event_data, sample_period):
888 bpf_ctx_record_field_size(info, size_u64);
889 if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
892 case bpf_ctx_range(struct bpf_perf_event_data, addr):
893 bpf_ctx_record_field_size(info, size_u64);
894 if (!bpf_ctx_narrow_access_ok(off, size, size_u64))
898 if (size != sizeof(long))
905 static u32 pe_prog_convert_ctx_access(enum bpf_access_type type,
906 const struct bpf_insn *si,
907 struct bpf_insn *insn_buf,
908 struct bpf_prog *prog, u32 *target_size)
910 struct bpf_insn *insn = insn_buf;
913 case offsetof(struct bpf_perf_event_data, sample_period):
914 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
915 data), si->dst_reg, si->src_reg,
916 offsetof(struct bpf_perf_event_data_kern, data));
917 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
918 bpf_target_off(struct perf_sample_data, period, 8,
921 case offsetof(struct bpf_perf_event_data, addr):
922 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
923 data), si->dst_reg, si->src_reg,
924 offsetof(struct bpf_perf_event_data_kern, data));
925 *insn++ = BPF_LDX_MEM(BPF_DW, si->dst_reg, si->dst_reg,
926 bpf_target_off(struct perf_sample_data, addr, 8,
930 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern,
931 regs), si->dst_reg, si->src_reg,
932 offsetof(struct bpf_perf_event_data_kern, regs));
933 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), si->dst_reg, si->dst_reg,
938 return insn - insn_buf;
941 const struct bpf_verifier_ops perf_event_verifier_ops = {
942 .get_func_proto = pe_prog_func_proto,
943 .is_valid_access = pe_prog_is_valid_access,
944 .convert_ctx_access = pe_prog_convert_ctx_access,
947 const struct bpf_prog_ops perf_event_prog_ops = {
950 static DEFINE_MUTEX(bpf_event_mutex);
952 #define BPF_TRACE_MAX_PROGS 64
954 int perf_event_attach_bpf_prog(struct perf_event *event,
955 struct bpf_prog *prog)
957 struct bpf_prog_array __rcu *old_array;
958 struct bpf_prog_array *new_array;
962 * Kprobe override only works if they are on the function entry,
963 * and only if they are on the opt-in list.
965 if (prog->kprobe_override &&
966 (!trace_kprobe_on_func_entry(event->tp_event) ||
967 !trace_kprobe_error_injectable(event->tp_event)))
970 mutex_lock(&bpf_event_mutex);
975 old_array = event->tp_event->prog_array;
977 bpf_prog_array_length(old_array) >= BPF_TRACE_MAX_PROGS) {
982 ret = bpf_prog_array_copy(old_array, NULL, prog, &new_array);
986 /* set the new array to event->tp_event and set event->prog */
988 rcu_assign_pointer(event->tp_event->prog_array, new_array);
989 bpf_prog_array_free(old_array);
992 mutex_unlock(&bpf_event_mutex);
996 void perf_event_detach_bpf_prog(struct perf_event *event)
998 struct bpf_prog_array __rcu *old_array;
999 struct bpf_prog_array *new_array;
1002 mutex_lock(&bpf_event_mutex);
1007 old_array = event->tp_event->prog_array;
1008 ret = bpf_prog_array_copy(old_array, event->prog, NULL, &new_array);
1012 bpf_prog_array_delete_safe(old_array, event->prog);
1014 rcu_assign_pointer(event->tp_event->prog_array, new_array);
1015 bpf_prog_array_free(old_array);
1018 bpf_prog_put(event->prog);
1022 mutex_unlock(&bpf_event_mutex);
1025 int perf_event_query_prog_array(struct perf_event *event, void __user *info)
1027 struct perf_event_query_bpf __user *uquery = info;
1028 struct perf_event_query_bpf query = {};
1029 u32 *ids, prog_cnt, ids_len;
1032 if (!capable(CAP_SYS_ADMIN))
1034 if (event->attr.type != PERF_TYPE_TRACEPOINT)
1036 if (copy_from_user(&query, uquery, sizeof(query)))
1039 ids_len = query.ids_len;
1040 if (ids_len > BPF_TRACE_MAX_PROGS)
1042 ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN);
1046 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which
1047 * is required when user only wants to check for uquery->prog_cnt.
1048 * There is no need to check for it since the case is handled
1049 * gracefully in bpf_prog_array_copy_info.
1052 mutex_lock(&bpf_event_mutex);
1053 ret = bpf_prog_array_copy_info(event->tp_event->prog_array,
1057 mutex_unlock(&bpf_event_mutex);
1059 if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) ||
1060 copy_to_user(uquery->ids, ids, ids_len * sizeof(u32)))
1067 extern struct bpf_raw_event_map __start__bpf_raw_tp[];
1068 extern struct bpf_raw_event_map __stop__bpf_raw_tp[];
1070 struct bpf_raw_event_map *bpf_find_raw_tracepoint(const char *name)
1072 struct bpf_raw_event_map *btp = __start__bpf_raw_tp;
1074 for (; btp < __stop__bpf_raw_tp; btp++) {
1075 if (!strcmp(btp->tp->name, name))
1081 static __always_inline
1082 void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
1086 (void) BPF_PROG_RUN(prog, args);
1091 #define UNPACK(...) __VA_ARGS__
1092 #define REPEAT_1(FN, DL, X, ...) FN(X)
1093 #define REPEAT_2(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_1(FN, DL, __VA_ARGS__)
1094 #define REPEAT_3(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_2(FN, DL, __VA_ARGS__)
1095 #define REPEAT_4(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_3(FN, DL, __VA_ARGS__)
1096 #define REPEAT_5(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_4(FN, DL, __VA_ARGS__)
1097 #define REPEAT_6(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_5(FN, DL, __VA_ARGS__)
1098 #define REPEAT_7(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_6(FN, DL, __VA_ARGS__)
1099 #define REPEAT_8(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_7(FN, DL, __VA_ARGS__)
1100 #define REPEAT_9(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_8(FN, DL, __VA_ARGS__)
1101 #define REPEAT_10(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_9(FN, DL, __VA_ARGS__)
1102 #define REPEAT_11(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_10(FN, DL, __VA_ARGS__)
1103 #define REPEAT_12(FN, DL, X, ...) FN(X) UNPACK DL REPEAT_11(FN, DL, __VA_ARGS__)
1104 #define REPEAT(X, FN, DL, ...) REPEAT_##X(FN, DL, __VA_ARGS__)
1106 #define SARG(X) u64 arg##X
1107 #define COPY(X) args[X] = arg##X
1109 #define __DL_COM (,)
1110 #define __DL_SEM (;)
1112 #define __SEQ_0_11 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
1114 #define BPF_TRACE_DEFN_x(x) \
1115 void bpf_trace_run##x(struct bpf_prog *prog, \
1116 REPEAT(x, SARG, __DL_COM, __SEQ_0_11)) \
1119 REPEAT(x, COPY, __DL_SEM, __SEQ_0_11); \
1120 __bpf_trace_run(prog, args); \
1122 EXPORT_SYMBOL_GPL(bpf_trace_run##x)
1123 BPF_TRACE_DEFN_x(1);
1124 BPF_TRACE_DEFN_x(2);
1125 BPF_TRACE_DEFN_x(3);
1126 BPF_TRACE_DEFN_x(4);
1127 BPF_TRACE_DEFN_x(5);
1128 BPF_TRACE_DEFN_x(6);
1129 BPF_TRACE_DEFN_x(7);
1130 BPF_TRACE_DEFN_x(8);
1131 BPF_TRACE_DEFN_x(9);
1132 BPF_TRACE_DEFN_x(10);
1133 BPF_TRACE_DEFN_x(11);
1134 BPF_TRACE_DEFN_x(12);
1136 static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1138 struct tracepoint *tp = btp->tp;
1141 * check that program doesn't access arguments beyond what's
1142 * available in this tracepoint
1144 if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
1147 return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
1150 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1154 mutex_lock(&bpf_event_mutex);
1155 err = __bpf_probe_register(btp, prog);
1156 mutex_unlock(&bpf_event_mutex);
1160 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
1164 mutex_lock(&bpf_event_mutex);
1165 err = tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
1166 mutex_unlock(&bpf_event_mutex);
1170 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
1171 u32 *fd_type, const char **buf,
1172 u64 *probe_offset, u64 *probe_addr)
1174 bool is_tracepoint, is_syscall_tp;
1175 struct bpf_prog *prog;
1182 /* not supporting BPF_PROG_TYPE_PERF_EVENT yet */
1183 if (prog->type == BPF_PROG_TYPE_PERF_EVENT)
1186 *prog_id = prog->aux->id;
1187 flags = event->tp_event->flags;
1188 is_tracepoint = flags & TRACE_EVENT_FL_TRACEPOINT;
1189 is_syscall_tp = is_syscall_trace_event(event->tp_event);
1191 if (is_tracepoint || is_syscall_tp) {
1192 *buf = is_tracepoint ? event->tp_event->tp->name
1193 : event->tp_event->name;
1194 *fd_type = BPF_FD_TYPE_TRACEPOINT;
1195 *probe_offset = 0x0;
1200 #ifdef CONFIG_KPROBE_EVENTS
1201 if (flags & TRACE_EVENT_FL_KPROBE)
1202 err = bpf_get_kprobe_info(event, fd_type, buf,
1203 probe_offset, probe_addr,
1204 event->attr.type == PERF_TYPE_TRACEPOINT);
1206 #ifdef CONFIG_UPROBE_EVENTS
1207 if (flags & TRACE_EVENT_FL_UPROBE)
1208 err = bpf_get_uprobe_info(event, fd_type, buf,
1210 event->attr.type == PERF_TYPE_TRACEPOINT);