[linux-2.6-block.git] / kernel / trace / bpf_trace.c

/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of version 2 of the GNU General Public
 * License as published by the Free Software Foundation.
 */
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/uaccess.h>
#include <linux/ctype.h>
#include "trace.h"

static DEFINE_PER_CPU(int, bpf_prog_active);

/**
 * trace_call_bpf - invoke BPF program
 * @prog: BPF program
 * @ctx: opaque context pointer
 *
 * kprobe handlers execute BPF programs via this helper.
 * Can be used from static tracepoints in the future.
 *
 * Return: BPF programs always return an integer which is interpreted by
 * kprobe handler as:
 * 0 - return from kprobe (event is filtered out)
 * 1 - store kprobe event into ring buffer
 * Other values are reserved and currently alias to 1
 */
unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
{
	unsigned int ret;

	if (in_nmi()) /* not supported yet */
		return 1;

	preempt_disable();

	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
		/*
		 * since some bpf program is already running on this cpu,
		 * don't call into another bpf program (same or different)
		 * and don't send kprobe event into ring-buffer,
		 * so return zero here
		 */
		ret = 0;
		goto out;
	}

	rcu_read_lock();
	ret = BPF_PROG_RUN(prog, ctx);
	rcu_read_unlock();

 out:
	__this_cpu_dec(bpf_prog_active);
	preempt_enable();

	return ret;
}
EXPORT_SYMBOL_GPL(trace_call_bpf);

static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
	void *dst = (void *) (long) r1;
	int size = (int) r2;
	void *unsafe_ptr = (void *) (long) r3;

	return probe_kernel_read(dst, unsafe_ptr, size);
}

static const struct bpf_func_proto bpf_probe_read_proto = {
	.func		= bpf_probe_read,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_STACK,
	.arg2_type	= ARG_CONST_STACK_SIZE,
	.arg3_type	= ARG_ANYTHING,
};

/*
 * limited trace_printk()
 * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
 */
static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
{
	char *fmt = (char *) (long) r1;
	int mod[3] = {};
	int fmt_cnt = 0;
	int i;

	/*
	 * bpf_check()->check_func_arg()->check_stack_boundary()
	 * guarantees that fmt points to bpf program stack,
	 * fmt_size bytes of it were initialized and fmt_size > 0
	 */
	if (fmt[--fmt_size] != 0)
		return -EINVAL;

	/* check format string for allowed specifiers */
	for (i = 0; i < fmt_size; i++) {
		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
			return -EINVAL;

		if (fmt[i] != '%')
			continue;

		if (fmt_cnt >= 3)
			return -EINVAL;

		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
		i++;
		if (fmt[i] == 'l') {
			mod[fmt_cnt]++;
			i++;
		} else if (fmt[i] == 'p') {
			mod[fmt_cnt]++;
			i++;
			if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
				return -EINVAL;
			fmt_cnt++;
			continue;
		}

		if (fmt[i] == 'l') {
			mod[fmt_cnt]++;
			i++;
		}

		if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
			return -EINVAL;
		fmt_cnt++;
	}

	return __trace_printk(1/* fake ip will not be printed */, fmt,
			      mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
			      mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
			      mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
}

static const struct bpf_func_proto bpf_trace_printk_proto = {
	.func		= bpf_trace_printk,
	.gpl_only	= true,
	.ret_type	= RET_INTEGER,
	.arg1_type	= ARG_PTR_TO_STACK,
	.arg2_type	= ARG_CONST_STACK_SIZE,
};

static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
{
	switch (func_id) {
	case BPF_FUNC_map_lookup_elem:
		return &bpf_map_lookup_elem_proto;
	case BPF_FUNC_map_update_elem:
		return &bpf_map_update_elem_proto;
	case BPF_FUNC_map_delete_elem:
		return &bpf_map_delete_elem_proto;
	case BPF_FUNC_probe_read:
		return &bpf_probe_read_proto;
	case BPF_FUNC_ktime_get_ns:
		return &bpf_ktime_get_ns_proto;
	case BPF_FUNC_tail_call:
		return &bpf_tail_call_proto;

	case BPF_FUNC_trace_printk:
		/*
		 * this program might be calling bpf_trace_printk,
		 * so allocate per-cpu printk buffers
		 */
		trace_printk_init_buffers();

		return &bpf_trace_printk_proto;
	default:
		return NULL;
	}
}

/* bpf+kprobe programs can access fields of 'struct pt_regs' */
static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
{
	/* check bounds */
	if (off < 0 || off >= sizeof(struct pt_regs))
		return false;

	/* only read is allowed */
	if (type != BPF_READ)
		return false;

	/* disallow misaligned access */
	if (off % size != 0)
		return false;

	return true;
}

static struct bpf_verifier_ops kprobe_prog_ops = {
	.get_func_proto  = kprobe_prog_func_proto,
	.is_valid_access = kprobe_prog_is_valid_access,
};

static struct bpf_prog_type_list kprobe_tl = {
	.ops	= &kprobe_prog_ops,
	.type	= BPF_PROG_TYPE_KPROBE,
};

static int __init register_kprobe_prog_ops(void)
{
	bpf_register_prog_type(&kprobe_tl);
	return 0;
}
late_initcall(register_kprobe_prog_ops);
Commit	Line	Data
2541517c AS	1	/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
	2	*
	3	* This program is free software; you can redistribute it and/or
	4	* modify it under the terms of version 2 of the GNU General Public
	5	* License as published by the Free Software Foundation.
	6	*/
	7	#include <linux/kernel.h>
	8	#include <linux/types.h>
	9	#include <linux/slab.h>
	10	#include <linux/bpf.h>
	11	#include <linux/filter.h>
	12	#include <linux/uaccess.h>
9c959c86	13	#include <linux/ctype.h>
2541517c AS	14	#include "trace.h"
	15
	16	static DEFINE_PER_CPU(int, bpf_prog_active);
	17
	18	/**
	19	* trace_call_bpf - invoke BPF program
	20	* @prog: BPF program
	21	* @ctx: opaque context pointer
	22	*
	23	* kprobe handlers execute BPF programs via this helper.
	24	* Can be used from static tracepoints in the future.
	25	*
	26	* Return: BPF programs always return an integer which is interpreted by
	27	* kprobe handler as:
	28	* 0 - return from kprobe (event is filtered out)
	29	* 1 - store kprobe event into ring buffer
	30	* Other values are reserved and currently alias to 1
	31	*/
	32	unsigned int trace_call_bpf(struct bpf_prog prog, void ctx)
	33	{
	34	unsigned int ret;
	35
	36	if (in_nmi()) /* not supported yet */
	37	return 1;
	38
	39	preempt_disable();
	40
	41	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
	42	/*
	43	* since some bpf program is already running on this cpu,
	44	* don't call into another bpf program (same or different)
	45	* and don't send kprobe event into ring-buffer,
	46	* so return zero here
	47	*/
	48	ret = 0;
	49	goto out;
	50	}
	51
	52	rcu_read_lock();
	53	ret = BPF_PROG_RUN(prog, ctx);
	54	rcu_read_unlock();
	55
	56	out:
	57	__this_cpu_dec(bpf_prog_active);
	58	preempt_enable();
	59
	60	return ret;
	61	}
	62	EXPORT_SYMBOL_GPL(trace_call_bpf);
	63
	64	static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
	65	{
	66	void dst = (void ) (long) r1;
	67	int size = (int) r2;
	68	void unsafe_ptr = (void ) (long) r3;
	69
	70	return probe_kernel_read(dst, unsafe_ptr, size);
	71	}
	72
	73	static const struct bpf_func_proto bpf_probe_read_proto = {
	74	.func = bpf_probe_read,
	75	.gpl_only = true,
	76	.ret_type = RET_INTEGER,
	77	.arg1_type = ARG_PTR_TO_STACK,
78	.arg2_type = ARG_CONST_STACK_SIZE,
79	.arg3_type = ARG_ANYTHING,
80	};
81
9c959c86 AS	82	/*
	83	* limited trace_printk()
	84	* only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
	85	*/
	86	static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
	87	{
	88	char fmt = (char ) (long) r1;
	89	int mod[3] = {};
	90	int fmt_cnt = 0;
	91	int i;
	92
	93	/*
	94	* bpf_check()->check_func_arg()->check_stack_boundary()
	95	* guarantees that fmt points to bpf program stack,
	96	* fmt_size bytes of it were initialized and fmt_size > 0
	97	*/
	98	if (fmt[--fmt_size] != 0)
	99	return -EINVAL;
	100
	101	/* check format string for allowed specifiers */
	102	for (i = 0; i < fmt_size; i++) {
	103	if ((!isprint(fmt[i]) && !isspace(fmt[i])) \|\| !isascii(fmt[i]))
	104	return -EINVAL;
	105
	106	if (fmt[i] != '%')
	107	continue;
	108
	109	if (fmt_cnt >= 3)
	110	return -EINVAL;
	111
	112	/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
	113	i++;
	114	if (fmt[i] == 'l') {
	115	mod[fmt_cnt]++;
	116	i++;
	117	} else if (fmt[i] == 'p') {
	118	mod[fmt_cnt]++;
	119	i++;
	120	if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
	121	return -EINVAL;
	122	fmt_cnt++;
	123	continue;
	124	}
	125
	126	if (fmt[i] == 'l') {
	127	mod[fmt_cnt]++;
	128	i++;
	129	}
	130
	131	if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
	132	return -EINVAL;
	133	fmt_cnt++;
	134	}
	135
	136	return __trace_printk(1/* fake ip will not be printed */, fmt,
	137	mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
	138	mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
	139	mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
	140	}
	141
	142	static const struct bpf_func_proto bpf_trace_printk_proto = {
	143	.func = bpf_trace_printk,
	144	.gpl_only = true,
	145	.ret_type = RET_INTEGER,
146	.arg1_type = ARG_PTR_TO_STACK,
147	.arg2_type = ARG_CONST_STACK_SIZE,
148	};
149
2541517c AS	150	static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
	151	{
	152	switch (func_id) {
	153	case BPF_FUNC_map_lookup_elem:
	154	return &bpf_map_lookup_elem_proto;
	155	case BPF_FUNC_map_update_elem:
	156	return &bpf_map_update_elem_proto;
	157	case BPF_FUNC_map_delete_elem:
	158	return &bpf_map_delete_elem_proto;
	159	case BPF_FUNC_probe_read:
	160	return &bpf_probe_read_proto;
d9847d31 AS	161	case BPF_FUNC_ktime_get_ns:
d9847d31 AS	162	return &bpf_ktime_get_ns_proto;
04fd61ab AS	163	case BPF_FUNC_tail_call:
04fd61ab AS	164	return &bpf_tail_call_proto;
9c959c86 AS	165
	166	case BPF_FUNC_trace_printk:
	167	/*
	168	* this program might be calling bpf_trace_printk,
	169	* so allocate per-cpu printk buffers
	170	*/
	171	trace_printk_init_buffers();
	172
	173	return &bpf_trace_printk_proto;
2541517c AS	174	default:
	175	return NULL;
	176	}
	177	}
	178
	179	/* bpf+kprobe programs can access fields of 'struct pt_regs' */
	180	static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
	181	{
	182	/* check bounds */
	183	if (off < 0 \|\| off >= sizeof(struct pt_regs))
	184	return false;
	185
	186	/* only read is allowed */
	187	if (type != BPF_READ)
	188	return false;
	189
	190	/* disallow misaligned access */
	191	if (off % size != 0)
	192	return false;
	193
	194	return true;
	195	}
	196
	197	static struct bpf_verifier_ops kprobe_prog_ops = {
	198	.get_func_proto = kprobe_prog_func_proto,
	199	.is_valid_access = kprobe_prog_is_valid_access,
	200	};
	201
	202	static struct bpf_prog_type_list kprobe_tl = {
	203	.ops = &kprobe_prog_ops,
	204	.type = BPF_PROG_TYPE_KPROBE,
	205	};
	206
	207	static int __init register_kprobe_prog_ops(void)
	208	{
	209	bpf_register_prog_type(&kprobe_tl);
	210	return 0;
	211	}
	212	late_initcall(register_kprobe_prog_ops);