ftrace, perf: Add open/close tracepoint perf registration actions
[linux-2.6-block.git] / kernel / trace / trace_event_perf.c
CommitLineData
ac199db0 1/*
97d5a220 2 * trace event based perf event profiling/tracing
ac199db0
PZ
3 *
4 * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
c530665c 5 * Copyright (C) 2009-2010 Frederic Weisbecker <fweisbec@gmail.com>
ac199db0
PZ
6 */
7
558e6547 8#include <linux/module.h>
430ad5a6 9#include <linux/kprobes.h>
ac199db0
PZ
10#include "trace.h"
11
6016ee13 12static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
20ab4425 13
eb1e7961
FW
14/*
15 * Force it to be aligned to unsigned long to avoid misaligned accesses
16 * suprises
17 */
18typedef typeof(unsigned long [PERF_MAX_TRACE_SIZE / sizeof(unsigned long)])
19 perf_trace_t;
ce71b9df 20
20ab4425 21/* Count the events in use (per event id, not per instance) */
97d5a220 22static int total_ref_count;
20ab4425 23
61c32659
FW
24static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
25 struct perf_event *p_event)
26{
27 /* No tracing, just counting, so no obvious leak */
28 if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
29 return 0;
30
31 /* Some events are ok to be traced by non-root users... */
32 if (p_event->attach_state == PERF_ATTACH_TASK) {
33 if (tp_event->flags & TRACE_EVENT_FL_CAP_ANY)
34 return 0;
35 }
36
37 /*
38 * ...otherwise raw tracepoint data can be a severe data leak,
39 * only allow root to have these.
40 */
41 if (perf_paranoid_tracepoint_raw() && !capable(CAP_SYS_ADMIN))
42 return -EPERM;
43
44 return 0;
45}
46
ceec0b6f
JO
47static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
48 struct perf_event *p_event)
e5e25cf4 49{
6016ee13 50 struct hlist_head __percpu *list;
ceec0b6f 51 int ret = -ENOMEM;
1c024eca 52 int cpu;
20ab4425 53
1c024eca
PZ
54 p_event->tp_event = tp_event;
55 if (tp_event->perf_refcount++ > 0)
e5e25cf4
FW
56 return 0;
57
1c024eca
PZ
58 list = alloc_percpu(struct hlist_head);
59 if (!list)
60 goto fail;
61
62 for_each_possible_cpu(cpu)
63 INIT_HLIST_HEAD(per_cpu_ptr(list, cpu));
20ab4425 64
1c024eca 65 tp_event->perf_events = list;
e5e25cf4 66
97d5a220 67 if (!total_ref_count) {
6016ee13 68 char __percpu *buf;
b7e2ecef 69 int i;
20ab4425 70
7ae07ea3 71 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
6016ee13 72 buf = (char __percpu *)alloc_percpu(perf_trace_t);
b7e2ecef 73 if (!buf)
1c024eca 74 goto fail;
20ab4425 75
1c024eca 76 perf_trace_buf[i] = buf;
b7e2ecef 77 }
20ab4425
FW
78 }
79
ceec0b6f 80 ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
1c024eca
PZ
81 if (ret)
82 goto fail;
20ab4425 83
1c024eca
PZ
84 total_ref_count++;
85 return 0;
86
87fail:
97d5a220 88 if (!total_ref_count) {
b7e2ecef
PZ
89 int i;
90
7ae07ea3 91 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
b7e2ecef
PZ
92 free_percpu(perf_trace_buf[i]);
93 perf_trace_buf[i] = NULL;
94 }
fe8e5b5a 95 }
1c024eca
PZ
96
97 if (!--tp_event->perf_refcount) {
98 free_percpu(tp_event->perf_events);
99 tp_event->perf_events = NULL;
fe8e5b5a 100 }
20ab4425
FW
101
102 return ret;
e5e25cf4
FW
103}
104
ceec0b6f
JO
105static void perf_trace_event_unreg(struct perf_event *p_event)
106{
107 struct ftrace_event_call *tp_event = p_event->tp_event;
108 int i;
109
110 if (--tp_event->perf_refcount > 0)
111 goto out;
112
113 tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
114
115 /*
116 * Ensure our callback won't be called anymore. The buffers
117 * will be freed after that.
118 */
119 tracepoint_synchronize_unregister();
120
121 free_percpu(tp_event->perf_events);
122 tp_event->perf_events = NULL;
123
124 if (!--total_ref_count) {
125 for (i = 0; i < PERF_NR_CONTEXTS; i++) {
126 free_percpu(perf_trace_buf[i]);
127 perf_trace_buf[i] = NULL;
128 }
129 }
130out:
131 module_put(tp_event->mod);
132}
133
134static int perf_trace_event_open(struct perf_event *p_event)
135{
136 struct ftrace_event_call *tp_event = p_event->tp_event;
137 return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
138}
139
140static void perf_trace_event_close(struct perf_event *p_event)
141{
142 struct ftrace_event_call *tp_event = p_event->tp_event;
143 tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
144}
145
146static int perf_trace_event_init(struct ftrace_event_call *tp_event,
147 struct perf_event *p_event)
148{
149 int ret;
150
151 ret = perf_trace_event_perm(tp_event, p_event);
152 if (ret)
153 return ret;
154
155 ret = perf_trace_event_reg(tp_event, p_event);
156 if (ret)
157 return ret;
158
159 ret = perf_trace_event_open(p_event);
160 if (ret) {
161 perf_trace_event_unreg(p_event);
162 return ret;
163 }
164
165 return 0;
166}
167
1c024eca 168int perf_trace_init(struct perf_event *p_event)
ac199db0 169{
1c024eca
PZ
170 struct ftrace_event_call *tp_event;
171 int event_id = p_event->attr.config;
20c8928a 172 int ret = -EINVAL;
ac199db0 173
20c8928a 174 mutex_lock(&event_mutex);
1c024eca 175 list_for_each_entry(tp_event, &ftrace_events, list) {
ff5f149b 176 if (tp_event->event.type == event_id &&
a1d0ce82 177 tp_event->class && tp_event->class->reg &&
1c024eca
PZ
178 try_module_get(tp_event->mod)) {
179 ret = perf_trace_event_init(tp_event, p_event);
9cb627d5
LZ
180 if (ret)
181 module_put(tp_event->mod);
20c8928a
LZ
182 break;
183 }
ac199db0 184 }
20c8928a 185 mutex_unlock(&event_mutex);
ac199db0 186
20c8928a 187 return ret;
ac199db0
PZ
188}
189
ceec0b6f
JO
190void perf_trace_destroy(struct perf_event *p_event)
191{
192 mutex_lock(&event_mutex);
193 perf_trace_event_close(p_event);
194 perf_trace_event_unreg(p_event);
195 mutex_unlock(&event_mutex);
196}
197
a4eaf7f1 198int perf_trace_add(struct perf_event *p_event, int flags)
e5e25cf4 199{
1c024eca 200 struct ftrace_event_call *tp_event = p_event->tp_event;
6016ee13 201 struct hlist_head __percpu *pcpu_list;
1c024eca 202 struct hlist_head *list;
20ab4425 203
6016ee13
NK
204 pcpu_list = tp_event->perf_events;
205 if (WARN_ON_ONCE(!pcpu_list))
1c024eca 206 return -EINVAL;
20ab4425 207
a4eaf7f1
PZ
208 if (!(flags & PERF_EF_START))
209 p_event->hw.state = PERF_HES_STOPPED;
210
6016ee13 211 list = this_cpu_ptr(pcpu_list);
1c024eca 212 hlist_add_head_rcu(&p_event->hlist_entry, list);
20ab4425 213
1c024eca
PZ
214 return 0;
215}
20ab4425 216
a4eaf7f1 217void perf_trace_del(struct perf_event *p_event, int flags)
1c024eca
PZ
218{
219 hlist_del_rcu(&p_event->hlist_entry);
e5e25cf4
FW
220}
221
97d5a220 222__kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
b7e2ecef 223 struct pt_regs *regs, int *rctxp)
430ad5a6
XG
224{
225 struct trace_entry *entry;
87f44bbc 226 unsigned long flags;
1c024eca 227 char *raw_data;
b7e2ecef 228 int pc;
430ad5a6 229
eb1e7961
FW
230 BUILD_BUG_ON(PERF_MAX_TRACE_SIZE % sizeof(unsigned long));
231
430ad5a6
XG
232 pc = preempt_count();
233
430ad5a6
XG
234 *rctxp = perf_swevent_get_recursion_context();
235 if (*rctxp < 0)
1c024eca 236 return NULL;
430ad5a6 237
3771f077 238 raw_data = this_cpu_ptr(perf_trace_buf[*rctxp]);
430ad5a6
XG
239
240 /* zero the dead bytes from align to not leak stack to user */
eb1e7961 241 memset(&raw_data[size - sizeof(u64)], 0, sizeof(u64));
430ad5a6
XG
242
243 entry = (struct trace_entry *)raw_data;
87f44bbc
PZ
244 local_save_flags(flags);
245 tracing_generic_entry_update(entry, flags, pc);
430ad5a6
XG
246 entry->type = type;
247
248 return raw_data;
430ad5a6 249}
97d5a220 250EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);