Commit | Line | Data |
---|---|---|
eee3af4a MM |
1 | /* |
2 | * Debug Store support | |
3 | * | |
4 | * This provides a low-level interface to the hardware's Debug Store | |
93fa7636 | 5 | * feature that is used for branch trace store (BTS) and |
eee3af4a MM |
6 | * precise-event based sampling (PEBS). |
7 | * | |
93fa7636 | 8 | * It manages: |
c2724775 | 9 | * - DS and BTS hardware configuration |
6abb11ae | 10 | * - buffer overflow handling (to be done) |
93fa7636 | 11 | * - buffer access |
eee3af4a | 12 | * |
c2724775 MM |
13 | * It does not do: |
14 | * - security checking (is the caller allowed to trace the task) | |
15 | * - buffer allocation (memory accounting) | |
eee3af4a | 16 | * |
eee3af4a | 17 | * |
ba2607fe MM |
18 | * Copyright (C) 2007-2009 Intel Corporation. |
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | |
eee3af4a MM |
20 | */ |
21 | ||
e9a22d1f | 22 | #include <linux/kernel.h> |
eee3af4a | 23 | #include <linux/string.h> |
e9a22d1f | 24 | #include <linux/errno.h> |
93fa7636 | 25 | #include <linux/sched.h> |
e9a22d1f | 26 | #include <linux/slab.h> |
3c933904 | 27 | #include <linux/mm.h> |
15879d04 | 28 | #include <linux/trace_clock.h> |
e9a22d1f IM |
29 | |
30 | #include <asm/ds.h> | |
93fa7636 | 31 | |
8a327f6d | 32 | #include "ds_selftest.h" |
93fa7636 MM |
33 | |
34 | /* | |
e9a22d1f | 35 | * The configuration for a particular DS hardware implementation: |
93fa7636 MM |
36 | */ |
37 | struct ds_configuration { | |
e9a22d1f IM |
38 | /* The name of the configuration: */ |
39 | const char *name; | |
40 | ||
41 | /* The size of pointer-typed fields in DS, BTS, and PEBS: */ | |
42 | unsigned char sizeof_ptr_field; | |
43 | ||
44 | /* The size of a BTS/PEBS record in bytes: */ | |
45 | unsigned char sizeof_rec[2]; | |
46 | ||
017bc617 MM |
47 | /* The number of pebs counter reset values in the DS structure. */ |
48 | unsigned char nr_counter_reset; | |
49 | ||
e9a22d1f IM |
50 | /* Control bit-masks indexed by enum ds_feature: */ |
51 | unsigned long ctl[dsf_ctl_max]; | |
93fa7636 | 52 | }; |
ee811517 | 53 | static struct ds_configuration ds_cfg __read_mostly; |
c2724775 | 54 | |
c2724775 | 55 | |
e9a22d1f | 56 | /* Maximal size of a DS configuration: */ |
017bc617 | 57 | #define MAX_SIZEOF_DS 0x80 |
e9a22d1f IM |
58 | |
59 | /* Maximal size of a BTS record: */ | |
60 | #define MAX_SIZEOF_BTS (3 * 8) | |
c2724775 | 61 | |
e9a22d1f IM |
62 | /* BTS and PEBS buffer alignment: */ |
63 | #define DS_ALIGNMENT (1 << 3) | |
c2724775 | 64 | |
017bc617 MM |
65 | /* Number of buffer pointers in DS: */ |
66 | #define NUM_DS_PTR_FIELDS 8 | |
67 | ||
68 | /* Size of a pebs reset value in DS: */ | |
69 | #define PEBS_RESET_FIELD_SIZE 8 | |
70 | ||
e9a22d1f IM |
71 | /* Mask of control bits in the DS MSR register: */ |
72 | #define BTS_CONTROL \ | |
73 | ( ds_cfg.ctl[dsf_bts] | \ | |
74 | ds_cfg.ctl[dsf_bts_kernel] | \ | |
75 | ds_cfg.ctl[dsf_bts_user] | \ | |
76 | ds_cfg.ctl[dsf_bts_overflow] ) | |
eee3af4a | 77 | |
ca0002a1 MM |
78 | /* |
79 | * A BTS or PEBS tracer. | |
80 | * | |
81 | * This holds the configuration of the tracer and serves as a handle | |
82 | * to identify tracers. | |
83 | */ | |
84 | struct ds_tracer { | |
b8e47195 | 85 | /* The DS context (partially) owned by this tracer. */ |
e9a22d1f | 86 | struct ds_context *context; |
b8e47195 | 87 | /* The buffer provided on ds_request() and its size in bytes. */ |
e9a22d1f IM |
88 | void *buffer; |
89 | size_t size; | |
ca0002a1 MM |
90 | }; |
91 | ||
92 | struct bts_tracer { | |
e9a22d1f IM |
93 | /* The common DS part: */ |
94 | struct ds_tracer ds; | |
95 | ||
96 | /* The trace including the DS configuration: */ | |
97 | struct bts_trace trace; | |
98 | ||
99 | /* Buffer overflow notification function: */ | |
100 | bts_ovfl_callback_t ovfl; | |
cac94f97 MM |
101 | |
102 | /* Active flags affecting trace collection. */ | |
103 | unsigned int flags; | |
ca0002a1 MM |
104 | }; |
105 | ||
106 | struct pebs_tracer { | |
e9a22d1f IM |
107 | /* The common DS part: */ |
108 | struct ds_tracer ds; | |
109 | ||
110 | /* The trace including the DS configuration: */ | |
111 | struct pebs_trace trace; | |
112 | ||
113 | /* Buffer overflow notification function: */ | |
114 | pebs_ovfl_callback_t ovfl; | |
ca0002a1 | 115 | }; |
eee3af4a MM |
116 | |
117 | /* | |
118 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | |
119 | * Architectures Software Developer's Manual, section 18.5) | |
120 | * | |
121 | * The DS configuration consists of the following fields; different | |
122 | * architetures vary in the size of those fields. | |
e9a22d1f | 123 | * |
eee3af4a MM |
124 | * - double-word aligned base linear address of the BTS buffer |
125 | * - write pointer into the BTS buffer | |
126 | * - end linear address of the BTS buffer (one byte beyond the end of | |
127 | * the buffer) | |
128 | * - interrupt pointer into BTS buffer | |
129 | * (interrupt occurs when write pointer passes interrupt pointer) | |
130 | * - double-word aligned base linear address of the PEBS buffer | |
131 | * - write pointer into the PEBS buffer | |
132 | * - end linear address of the PEBS buffer (one byte beyond the end of | |
133 | * the buffer) | |
134 | * - interrupt pointer into PEBS buffer | |
135 | * (interrupt occurs when write pointer passes interrupt pointer) | |
136 | * - value to which counter is reset following counter overflow | |
137 | * | |
93fa7636 MM |
138 | * Later architectures use 64bit pointers throughout, whereas earlier |
139 | * architectures use 32bit pointers in 32bit mode. | |
eee3af4a | 140 | * |
eee3af4a | 141 | * |
93fa7636 MM |
142 | * We compute the base address for the first 8 fields based on: |
143 | * - the field size stored in the DS configuration | |
144 | * - the relative field position | |
145 | * - an offset giving the start of the respective region | |
eee3af4a | 146 | * |
93fa7636 MM |
147 | * This offset is further used to index various arrays holding |
148 | * information for BTS and PEBS at the respective index. | |
eee3af4a | 149 | * |
93fa7636 MM |
150 | * On later 32bit processors, we only access the lower 32bit of the |
151 | * 64bit pointer fields. The upper halves will be zeroed out. | |
eee3af4a MM |
152 | */ |
153 | ||
93fa7636 MM |
154 | enum ds_field { |
155 | ds_buffer_base = 0, | |
156 | ds_index, | |
157 | ds_absolute_maximum, | |
158 | ds_interrupt_threshold, | |
159 | }; | |
eee3af4a | 160 | |
93fa7636 | 161 | enum ds_qualifier { |
e9a22d1f | 162 | ds_bts = 0, |
93fa7636 | 163 | ds_pebs |
eee3af4a MM |
164 | }; |
165 | ||
e9a22d1f IM |
166 | static inline unsigned long |
167 | ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) | |
93fa7636 | 168 | { |
bc44fb5f | 169 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); |
93fa7636 MM |
170 | return *(unsigned long *)base; |
171 | } | |
172 | ||
e9a22d1f IM |
173 | static inline void |
174 | ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, | |
175 | unsigned long value) | |
93fa7636 | 176 | { |
bc44fb5f | 177 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); |
93fa7636 MM |
178 | (*(unsigned long *)base) = value; |
179 | } | |
180 | ||
181 | ||
eee3af4a | 182 | /* |
6abb11ae | 183 | * Locking is done only for allocating BTS or PEBS resources. |
eee3af4a | 184 | */ |
c2724775 | 185 | static DEFINE_SPINLOCK(ds_lock); |
eee3af4a | 186 | |
eee3af4a | 187 | /* |
93fa7636 MM |
188 | * We either support (system-wide) per-cpu or per-thread allocation. |
189 | * We distinguish the two based on the task_struct pointer, where a | |
190 | * NULL pointer indicates per-cpu allocation for the current cpu. | |
191 | * | |
192 | * Allocations are use-counted. As soon as resources are allocated, | |
193 | * further allocations must be of the same type (per-cpu or | |
194 | * per-thread). We model this by counting allocations (i.e. the number | |
195 | * of tracers of a certain type) for one type negatively: | |
196 | * =0 no tracers | |
197 | * >0 number of per-thread tracers | |
198 | * <0 number of per-cpu tracers | |
199 | * | |
93fa7636 MM |
200 | * Tracers essentially gives the number of ds contexts for a certain |
201 | * type of allocation. | |
eee3af4a | 202 | */ |
c2724775 | 203 | static atomic_t tracers = ATOMIC_INIT(0); |
93fa7636 | 204 | |
38f80112 | 205 | static inline int get_tracer(struct task_struct *task) |
eee3af4a | 206 | { |
38f80112 MM |
207 | int error; |
208 | ||
209 | spin_lock_irq(&ds_lock); | |
210 | ||
211 | if (task) { | |
212 | error = -EPERM; | |
213 | if (atomic_read(&tracers) < 0) | |
214 | goto out; | |
c2724775 | 215 | atomic_inc(&tracers); |
38f80112 MM |
216 | } else { |
217 | error = -EPERM; | |
218 | if (atomic_read(&tracers) > 0) | |
219 | goto out; | |
c2724775 | 220 | atomic_dec(&tracers); |
38f80112 MM |
221 | } |
222 | ||
223 | error = 0; | |
224 | out: | |
225 | spin_unlock_irq(&ds_lock); | |
226 | return error; | |
eee3af4a | 227 | } |
93fa7636 MM |
228 | |
229 | static inline void put_tracer(struct task_struct *task) | |
eee3af4a | 230 | { |
c2724775 MM |
231 | if (task) |
232 | atomic_dec(&tracers); | |
233 | else | |
234 | atomic_inc(&tracers); | |
eee3af4a | 235 | } |
93fa7636 | 236 | |
93fa7636 MM |
237 | /* |
238 | * The DS context is either attached to a thread or to a cpu: | |
239 | * - in the former case, the thread_struct contains a pointer to the | |
240 | * attached context. | |
241 | * - in the latter case, we use a static array of per-cpu context | |
242 | * pointers. | |
243 | * | |
244 | * Contexts are use-counted. They are allocated on first access and | |
245 | * deallocated when the last user puts the context. | |
93fa7636 | 246 | */ |
c2724775 | 247 | struct ds_context { |
e9a22d1f IM |
248 | /* The DS configuration; goes into MSR_IA32_DS_AREA: */ |
249 | unsigned char ds[MAX_SIZEOF_DS]; | |
250 | ||
251 | /* The owner of the BTS and PEBS configuration, respectively: */ | |
252 | struct bts_tracer *bts_master; | |
253 | struct pebs_tracer *pebs_master; | |
254 | ||
255 | /* Use count: */ | |
de79f54f | 256 | unsigned long count; |
e9a22d1f IM |
257 | |
258 | /* Pointer to the context pointer field: */ | |
259 | struct ds_context **this; | |
260 | ||
de79f54f | 261 | /* The traced task; NULL for cpu tracing: */ |
e9a22d1f | 262 | struct task_struct *task; |
c2724775 | 263 | |
de79f54f MM |
264 | /* The traced cpu; only valid if task is NULL: */ |
265 | int cpu; | |
266 | }; | |
93fa7636 | 267 | |
0fe1e009 | 268 | static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); |
93fa7636 | 269 | |
cc1dc6d0 | 270 | |
de79f54f | 271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) |
eee3af4a | 272 | { |
93fa7636 | 273 | struct ds_context **p_context = |
0fe1e009 | 274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); |
cc1dc6d0 MM |
275 | struct ds_context *context = NULL; |
276 | struct ds_context *new_context = NULL; | |
93fa7636 | 277 | |
de79f54f MM |
278 | /* Chances are small that we already have a context. */ |
279 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | |
cc1dc6d0 MM |
280 | if (!new_context) |
281 | return NULL; | |
de90add3 | 282 | |
de79f54f | 283 | spin_lock_irq(&ds_lock); |
93fa7636 | 284 | |
cc1dc6d0 | 285 | context = *p_context; |
de79f54f | 286 | if (likely(!context)) { |
cc1dc6d0 | 287 | context = new_context; |
93fa7636 | 288 | |
cc1dc6d0 MM |
289 | context->this = p_context; |
290 | context->task = task; | |
de79f54f | 291 | context->cpu = cpu; |
cc1dc6d0 | 292 | context->count = 0; |
93fa7636 | 293 | |
cc1dc6d0 MM |
294 | *p_context = context; |
295 | } | |
c2724775 | 296 | |
cc1dc6d0 | 297 | context->count++; |
c2724775 | 298 | |
de79f54f | 299 | spin_unlock_irq(&ds_lock); |
93fa7636 | 300 | |
cc1dc6d0 MM |
301 | if (context != new_context) |
302 | kfree(new_context); | |
93fa7636 MM |
303 | |
304 | return context; | |
eee3af4a | 305 | } |
93fa7636 | 306 | |
de79f54f | 307 | static void ds_put_context(struct ds_context *context) |
eee3af4a | 308 | { |
8d99b3ac | 309 | struct task_struct *task; |
de90add3 MM |
310 | unsigned long irq; |
311 | ||
93fa7636 MM |
312 | if (!context) |
313 | return; | |
314 | ||
de90add3 | 315 | spin_lock_irqsave(&ds_lock, irq); |
93fa7636 | 316 | |
c2724775 MM |
317 | if (--context->count) { |
318 | spin_unlock_irqrestore(&ds_lock, irq); | |
319 | return; | |
320 | } | |
93fa7636 | 321 | |
573da422 | 322 | *(context->this) = NULL; |
93fa7636 | 323 | |
8d99b3ac MM |
324 | task = context->task; |
325 | ||
326 | if (task) | |
327 | clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); | |
93fa7636 | 328 | |
de79f54f MM |
329 | /* |
330 | * We leave the (now dangling) pointer to the DS configuration in | |
331 | * the DS_AREA msr. This is as good or as bad as replacing it with | |
332 | * NULL - the hardware would crash if we enabled tracing. | |
333 | * | |
334 | * This saves us some problems with having to write an msr on a | |
335 | * different cpu while preventing others from doing the same for the | |
336 | * next context for that same cpu. | |
337 | */ | |
93fa7636 | 338 | |
de90add3 | 339 | spin_unlock_irqrestore(&ds_lock, irq); |
c2724775 | 340 | |
8d99b3ac MM |
341 | /* The context might still be in use for context switching. */ |
342 | if (task && (task != current)) | |
343 | wait_task_context_switch(task); | |
344 | ||
c2724775 | 345 | kfree(context); |
eee3af4a | 346 | } |
93fa7636 | 347 | |
de79f54f MM |
348 | static void ds_install_ds_area(struct ds_context *context) |
349 | { | |
350 | unsigned long ds; | |
351 | ||
352 | ds = (unsigned long)context->ds; | |
353 | ||
354 | /* | |
355 | * There is a race between the bts master and the pebs master. | |
356 | * | |
357 | * The thread/cpu access is synchronized via get/put_cpu() for | |
358 | * task tracing and via wrmsr_on_cpu for cpu tracing. | |
359 | * | |
360 | * If bts and pebs are collected for the same task or same cpu, | |
361 | * the same confiuration is written twice. | |
362 | */ | |
363 | if (context->task) { | |
364 | get_cpu(); | |
365 | if (context->task == current) | |
366 | wrmsrl(MSR_IA32_DS_AREA, ds); | |
367 | set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | |
368 | put_cpu(); | |
369 | } else | |
370 | wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, | |
371 | (u32)((u64)ds), (u32)((u64)ds >> 32)); | |
372 | } | |
93fa7636 MM |
373 | |
374 | /* | |
c2724775 | 375 | * Call the tracer's callback on a buffer overflow. |
93fa7636 | 376 | * |
93fa7636 MM |
377 | * context: the ds context |
378 | * qual: the buffer type | |
379 | */ | |
ca0002a1 MM |
380 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) |
381 | { | |
382 | switch (qual) { | |
c2724775 MM |
383 | case ds_bts: |
384 | if (context->bts_master && | |
385 | context->bts_master->ovfl) | |
386 | context->bts_master->ovfl(context->bts_master); | |
387 | break; | |
388 | case ds_pebs: | |
389 | if (context->pebs_master && | |
390 | context->pebs_master->ovfl) | |
391 | context->pebs_master->ovfl(context->pebs_master); | |
ca0002a1 | 392 | break; |
ca0002a1 | 393 | } |
c2724775 MM |
394 | } |
395 | ||
396 | ||
397 | /* | |
398 | * Write raw data into the BTS or PEBS buffer. | |
399 | * | |
400 | * The remainder of any partially written record is zeroed out. | |
401 | * | |
402 | * context: the DS context | |
e9a22d1f IM |
403 | * qual: the buffer type |
404 | * record: the data to write | |
405 | * size: the size of the data | |
c2724775 MM |
406 | */ |
407 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |
408 | const void *record, size_t size) | |
409 | { | |
410 | int bytes_written = 0; | |
411 | ||
412 | if (!record) | |
413 | return -EINVAL; | |
414 | ||
415 | while (size) { | |
416 | unsigned long base, index, end, write_end, int_th; | |
417 | unsigned long write_size, adj_write_size; | |
418 | ||
419 | /* | |
b8e47195 | 420 | * Write as much as possible without producing an |
c2724775 MM |
421 | * overflow interrupt. |
422 | * | |
b8e47195 | 423 | * Interrupt_threshold must either be |
c2724775 MM |
424 | * - bigger than absolute_maximum or |
425 | * - point to a record between buffer_base and absolute_maximum | |
426 | * | |
b8e47195 | 427 | * Index points to a valid record. |
c2724775 MM |
428 | */ |
429 | base = ds_get(context->ds, qual, ds_buffer_base); | |
430 | index = ds_get(context->ds, qual, ds_index); | |
431 | end = ds_get(context->ds, qual, ds_absolute_maximum); | |
432 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | |
433 | ||
434 | write_end = min(end, int_th); | |
435 | ||
b8e47195 MM |
436 | /* |
437 | * If we are already beyond the interrupt threshold, | |
438 | * we fill the entire buffer. | |
439 | */ | |
c2724775 MM |
440 | if (write_end <= index) |
441 | write_end = end; | |
442 | ||
443 | if (write_end <= index) | |
444 | break; | |
445 | ||
446 | write_size = min((unsigned long) size, write_end - index); | |
447 | memcpy((void *)index, record, write_size); | |
448 | ||
449 | record = (const char *)record + write_size; | |
450 | size -= write_size; | |
451 | bytes_written += write_size; | |
452 | ||
453 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | |
454 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | |
455 | ||
b8e47195 | 456 | /* Zero out trailing bytes. */ |
c2724775 MM |
457 | memset((char *)index + write_size, 0, |
458 | adj_write_size - write_size); | |
459 | index += adj_write_size; | |
460 | ||
461 | if (index >= end) | |
462 | index = base; | |
463 | ds_set(context->ds, qual, ds_index, index); | |
464 | ||
465 | if (index >= int_th) | |
466 | ds_overflow(context, qual); | |
467 | } | |
468 | ||
469 | return bytes_written; | |
470 | } | |
471 | ||
472 | ||
473 | /* | |
474 | * Branch Trace Store (BTS) uses the following format. Different | |
475 | * architectures vary in the size of those fields. | |
476 | * - source linear address | |
477 | * - destination linear address | |
478 | * - flags | |
479 | * | |
480 | * Later architectures use 64bit pointers throughout, whereas earlier | |
481 | * architectures use 32bit pointers in 32bit mode. | |
482 | * | |
bc44fb5f | 483 | * We compute the base address for the fields based on: |
c2724775 MM |
484 | * - the field size stored in the DS configuration |
485 | * - the relative field position | |
486 | * | |
487 | * In order to store additional information in the BTS buffer, we use | |
488 | * a special source address to indicate that the record requires | |
489 | * special interpretation. | |
490 | * | |
491 | * Netburst indicated via a bit in the flags field whether the branch | |
492 | * was predicted; this is ignored. | |
493 | * | |
494 | * We use two levels of abstraction: | |
495 | * - the raw data level defined here | |
496 | * - an arch-independent level defined in ds.h | |
497 | */ | |
498 | ||
499 | enum bts_field { | |
500 | bts_from, | |
501 | bts_to, | |
502 | bts_flags, | |
503 | ||
e9a22d1f | 504 | bts_qual = bts_from, |
15879d04 | 505 | bts_clock = bts_to, |
e9a22d1f | 506 | bts_pid = bts_flags, |
c2724775 | 507 | |
e9a22d1f IM |
508 | bts_qual_mask = (bts_qual_max - 1), |
509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | |
c2724775 MM |
510 | }; |
511 | ||
feaa0457 | 512 | static inline unsigned long bts_get(const char *base, unsigned long field) |
c2724775 | 513 | { |
bc44fb5f | 514 | base += (ds_cfg.sizeof_ptr_field * field); |
c2724775 MM |
515 | return *(unsigned long *)base; |
516 | } | |
517 | ||
feaa0457 | 518 | static inline void bts_set(char *base, unsigned long field, unsigned long val) |
c2724775 | 519 | { |
feaa0457 | 520 | base += (ds_cfg.sizeof_ptr_field * field); |
c2724775 MM |
521 | (*(unsigned long *)base) = val; |
522 | } | |
523 | ||
524 | ||
525 | /* | |
526 | * The raw BTS data is architecture dependent. | |
527 | * | |
528 | * For higher-level users, we give an arch-independent view. | |
529 | * - ds.h defines struct bts_struct | |
530 | * - bts_read translates one raw bts record into a bts_struct | |
531 | * - bts_write translates one bts_struct into the raw format and | |
532 | * writes it into the top of the parameter tracer's buffer. | |
533 | * | |
534 | * return: bytes read/written on success; -Eerrno, otherwise | |
535 | */ | |
e9a22d1f IM |
536 | static int |
537 | bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) | |
c2724775 MM |
538 | { |
539 | if (!tracer) | |
540 | return -EINVAL; | |
541 | ||
542 | if (at < tracer->trace.ds.begin) | |
543 | return -EINVAL; | |
544 | ||
545 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | |
546 | return -EINVAL; | |
547 | ||
548 | memset(out, 0, sizeof(*out)); | |
549 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | |
550 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | |
15879d04 MM |
551 | out->variant.event.clock = bts_get(at, bts_clock); |
552 | out->variant.event.pid = bts_get(at, bts_pid); | |
c2724775 MM |
553 | } else { |
554 | out->qualifier = bts_branch; | |
555 | out->variant.lbr.from = bts_get(at, bts_from); | |
556 | out->variant.lbr.to = bts_get(at, bts_to); | |
d072c25f MM |
557 | |
558 | if (!out->variant.lbr.from && !out->variant.lbr.to) | |
559 | out->qualifier = bts_invalid; | |
c2724775 MM |
560 | } |
561 | ||
562 | return ds_cfg.sizeof_rec[ds_bts]; | |
563 | } | |
564 | ||
565 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | |
566 | { | |
567 | unsigned char raw[MAX_SIZEOF_BTS]; | |
568 | ||
569 | if (!tracer) | |
570 | return -EINVAL; | |
571 | ||
572 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) | |
573 | return -EOVERFLOW; | |
574 | ||
575 | switch (in->qualifier) { | |
576 | case bts_invalid: | |
577 | bts_set(raw, bts_from, 0); | |
578 | bts_set(raw, bts_to, 0); | |
579 | bts_set(raw, bts_flags, 0); | |
580 | break; | |
581 | case bts_branch: | |
582 | bts_set(raw, bts_from, in->variant.lbr.from); | |
583 | bts_set(raw, bts_to, in->variant.lbr.to); | |
584 | bts_set(raw, bts_flags, 0); | |
585 | break; | |
586 | case bts_task_arrives: | |
587 | case bts_task_departs: | |
588 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | |
15879d04 MM |
589 | bts_set(raw, bts_clock, in->variant.event.clock); |
590 | bts_set(raw, bts_pid, in->variant.event.pid); | |
ca0002a1 | 591 | break; |
c2724775 MM |
592 | default: |
593 | return -EINVAL; | |
ca0002a1 | 594 | } |
c2724775 MM |
595 | |
596 | return ds_write(tracer->ds.context, ds_bts, raw, | |
597 | ds_cfg.sizeof_rec[ds_bts]); | |
eee3af4a | 598 | } |
93fa7636 MM |
599 | |
600 | ||
c2724775 MM |
601 | static void ds_write_config(struct ds_context *context, |
602 | struct ds_trace *cfg, enum ds_qualifier qual) | |
603 | { | |
604 | unsigned char *ds = context->ds; | |
605 | ||
606 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | |
607 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | |
608 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | |
609 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | |
610 | } | |
611 | ||
612 | static void ds_read_config(struct ds_context *context, | |
613 | struct ds_trace *cfg, enum ds_qualifier qual) | |
eee3af4a | 614 | { |
c2724775 MM |
615 | unsigned char *ds = context->ds; |
616 | ||
617 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); | |
618 | cfg->top = (void *)ds_get(ds, qual, ds_index); | |
619 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | |
620 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | |
621 | } | |
622 | ||
623 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |
624 | void *base, size_t size, size_t ith, | |
625 | unsigned int flags) { | |
93fa7636 | 626 | unsigned long buffer, adj; |
ca0002a1 | 627 | |
b8e47195 MM |
628 | /* |
629 | * Adjust the buffer address and size to meet alignment | |
ca0002a1 MM |
630 | * constraints: |
631 | * - buffer is double-word aligned | |
632 | * - size is multiple of record size | |
633 | * | |
634 | * We checked the size at the very beginning; we have enough | |
635 | * space to do the adjustment. | |
636 | */ | |
637 | buffer = (unsigned long)base; | |
638 | ||
639 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; | |
640 | buffer += adj; | |
641 | size -= adj; | |
642 | ||
c2724775 MM |
643 | trace->n = size / ds_cfg.sizeof_rec[qual]; |
644 | trace->size = ds_cfg.sizeof_rec[qual]; | |
ca0002a1 | 645 | |
c2724775 | 646 | size = (trace->n * trace->size); |
ca0002a1 | 647 | |
c2724775 MM |
648 | trace->begin = (void *)buffer; |
649 | trace->top = trace->begin; | |
650 | trace->end = (void *)(buffer + size); | |
b8e47195 MM |
651 | /* |
652 | * The value for 'no threshold' is -1, which will set the | |
ca0002a1 MM |
653 | * threshold outside of the buffer, just like we want it. |
654 | */ | |
de79f54f | 655 | ith *= ds_cfg.sizeof_rec[qual]; |
c2724775 MM |
656 | trace->ith = (void *)(buffer + size - ith); |
657 | ||
658 | trace->flags = flags; | |
ca0002a1 MM |
659 | } |
660 | ||
c2724775 MM |
661 | |
662 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | |
663 | enum ds_qualifier qual, struct task_struct *task, | |
de79f54f | 664 | int cpu, void *base, size_t size, size_t th) |
ca0002a1 MM |
665 | { |
666 | struct ds_context *context; | |
ca0002a1 | 667 | int error; |
150f5164 | 668 | size_t req_size; |
93fa7636 | 669 | |
bc44fb5f MM |
670 | error = -EOPNOTSUPP; |
671 | if (!ds_cfg.sizeof_rec[qual]) | |
672 | goto out; | |
673 | ||
6abb11ae MM |
674 | error = -EINVAL; |
675 | if (!base) | |
676 | goto out; | |
677 | ||
150f5164 MM |
678 | req_size = ds_cfg.sizeof_rec[qual]; |
679 | /* We might need space for alignment adjustments. */ | |
680 | if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) | |
681 | req_size += DS_ALIGNMENT; | |
682 | ||
ca0002a1 | 683 | error = -EINVAL; |
150f5164 | 684 | if (size < req_size) |
ca0002a1 | 685 | goto out; |
93fa7636 | 686 | |
ca0002a1 MM |
687 | if (th != (size_t)-1) { |
688 | th *= ds_cfg.sizeof_rec[qual]; | |
689 | ||
690 | error = -EINVAL; | |
691 | if (size <= th) | |
692 | goto out; | |
693 | } | |
694 | ||
ca0002a1 MM |
695 | tracer->buffer = base; |
696 | tracer->size = size; | |
93fa7636 | 697 | |
ca0002a1 | 698 | error = -ENOMEM; |
de79f54f | 699 | context = ds_get_context(task, cpu); |
93fa7636 | 700 | if (!context) |
ca0002a1 MM |
701 | goto out; |
702 | tracer->context = context; | |
703 | ||
de79f54f MM |
704 | /* |
705 | * Defer any tracer-specific initialization work for the context until | |
706 | * context ownership has been clarified. | |
707 | */ | |
de90add3 | 708 | |
c2724775 | 709 | error = 0; |
ca0002a1 | 710 | out: |
93fa7636 | 711 | return error; |
eee3af4a | 712 | } |
93fa7636 | 713 | |
de79f54f MM |
714 | static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, |
715 | void *base, size_t size, | |
716 | bts_ovfl_callback_t ovfl, size_t th, | |
717 | unsigned int flags) | |
eee3af4a | 718 | { |
ca0002a1 MM |
719 | struct bts_tracer *tracer; |
720 | int error; | |
93fa7636 | 721 | |
b8e47195 | 722 | /* Buffer overflow notification is not yet implemented. */ |
ca0002a1 MM |
723 | error = -EOPNOTSUPP; |
724 | if (ovfl) | |
725 | goto out; | |
726 | ||
38f80112 MM |
727 | error = get_tracer(task); |
728 | if (error < 0) | |
729 | goto out; | |
730 | ||
ca0002a1 | 731 | error = -ENOMEM; |
de79f54f | 732 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
ca0002a1 | 733 | if (!tracer) |
38f80112 | 734 | goto out_put_tracer; |
ca0002a1 MM |
735 | tracer->ovfl = ovfl; |
736 | ||
de79f54f | 737 | /* Do some more error checking and acquire a tracing context. */ |
c2724775 | 738 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
de79f54f | 739 | ds_bts, task, cpu, base, size, th); |
ca0002a1 MM |
740 | if (error < 0) |
741 | goto out_tracer; | |
742 | ||
de79f54f MM |
743 | /* Claim the bts part of the tracing context we acquired above. */ |
744 | spin_lock_irq(&ds_lock); | |
c2724775 | 745 | |
c2724775 MM |
746 | error = -EPERM; |
747 | if (tracer->ds.context->bts_master) | |
38f80112 | 748 | goto out_unlock; |
c2724775 MM |
749 | tracer->ds.context->bts_master = tracer; |
750 | ||
de79f54f | 751 | spin_unlock_irq(&ds_lock); |
c2724775 | 752 | |
de79f54f MM |
753 | /* |
754 | * Now that we own the bts part of the context, let's complete the | |
755 | * initialization for that part. | |
756 | */ | |
757 | ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); | |
758 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | |
759 | ds_install_ds_area(tracer->ds.context); | |
c2724775 MM |
760 | |
761 | tracer->trace.read = bts_read; | |
762 | tracer->trace.write = bts_write; | |
763 | ||
de79f54f | 764 | /* Start tracing. */ |
c2724775 MM |
765 | ds_resume_bts(tracer); |
766 | ||
ca0002a1 MM |
767 | return tracer; |
768 | ||
c2724775 | 769 | out_unlock: |
de79f54f | 770 | spin_unlock_irq(&ds_lock); |
c2724775 | 771 | ds_put_context(tracer->ds.context); |
ca0002a1 | 772 | out_tracer: |
6abb11ae | 773 | kfree(tracer); |
38f80112 MM |
774 | out_put_tracer: |
775 | put_tracer(task); | |
ca0002a1 MM |
776 | out: |
777 | return ERR_PTR(error); | |
eee3af4a | 778 | } |
93fa7636 | 779 | |
de79f54f MM |
780 | struct bts_tracer *ds_request_bts_task(struct task_struct *task, |
781 | void *base, size_t size, | |
782 | bts_ovfl_callback_t ovfl, | |
783 | size_t th, unsigned int flags) | |
784 | { | |
785 | return ds_request_bts(task, 0, base, size, ovfl, th, flags); | |
786 | } | |
787 | ||
788 | struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | |
789 | bts_ovfl_callback_t ovfl, | |
790 | size_t th, unsigned int flags) | |
791 | { | |
792 | return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); | |
793 | } | |
794 | ||
795 | static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, | |
796 | void *base, size_t size, | |
797 | pebs_ovfl_callback_t ovfl, size_t th, | |
798 | unsigned int flags) | |
eee3af4a | 799 | { |
ca0002a1 | 800 | struct pebs_tracer *tracer; |
93fa7636 MM |
801 | int error; |
802 | ||
b8e47195 | 803 | /* Buffer overflow notification is not yet implemented. */ |
ca0002a1 MM |
804 | error = -EOPNOTSUPP; |
805 | if (ovfl) | |
93fa7636 MM |
806 | goto out; |
807 | ||
38f80112 MM |
808 | error = get_tracer(task); |
809 | if (error < 0) | |
810 | goto out; | |
811 | ||
ca0002a1 | 812 | error = -ENOMEM; |
de79f54f | 813 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
ca0002a1 | 814 | if (!tracer) |
38f80112 | 815 | goto out_put_tracer; |
ca0002a1 | 816 | tracer->ovfl = ovfl; |
93fa7636 | 817 | |
de79f54f | 818 | /* Do some more error checking and acquire a tracing context. */ |
c2724775 | 819 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
de79f54f | 820 | ds_pebs, task, cpu, base, size, th); |
ca0002a1 MM |
821 | if (error < 0) |
822 | goto out_tracer; | |
93fa7636 | 823 | |
de79f54f MM |
824 | /* Claim the pebs part of the tracing context we acquired above. */ |
825 | spin_lock_irq(&ds_lock); | |
c2724775 | 826 | |
c2724775 MM |
827 | error = -EPERM; |
828 | if (tracer->ds.context->pebs_master) | |
38f80112 | 829 | goto out_unlock; |
c2724775 MM |
830 | tracer->ds.context->pebs_master = tracer; |
831 | ||
de79f54f | 832 | spin_unlock_irq(&ds_lock); |
c2724775 | 833 | |
de79f54f MM |
834 | /* |
835 | * Now that we own the pebs part of the context, let's complete the | |
836 | * initialization for that part. | |
837 | */ | |
838 | ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); | |
73bf1b62 | 839 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
de79f54f MM |
840 | ds_install_ds_area(tracer->ds.context); |
841 | ||
842 | /* Start tracing. */ | |
c2724775 MM |
843 | ds_resume_pebs(tracer); |
844 | ||
ca0002a1 MM |
845 | return tracer; |
846 | ||
c2724775 | 847 | out_unlock: |
de79f54f | 848 | spin_unlock_irq(&ds_lock); |
c2724775 | 849 | ds_put_context(tracer->ds.context); |
ca0002a1 | 850 | out_tracer: |
6abb11ae | 851 | kfree(tracer); |
38f80112 MM |
852 | out_put_tracer: |
853 | put_tracer(task); | |
93fa7636 | 854 | out: |
ca0002a1 MM |
855 | return ERR_PTR(error); |
856 | } | |
857 | ||
de79f54f MM |
858 | struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, |
859 | void *base, size_t size, | |
860 | pebs_ovfl_callback_t ovfl, | |
861 | size_t th, unsigned int flags) | |
eee3af4a | 862 | { |
de79f54f MM |
863 | return ds_request_pebs(task, 0, base, size, ovfl, th, flags); |
864 | } | |
8d99b3ac | 865 | |
de79f54f MM |
866 | struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, |
867 | pebs_ovfl_callback_t ovfl, | |
868 | size_t th, unsigned int flags) | |
869 | { | |
870 | return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); | |
871 | } | |
ca0002a1 | 872 | |
de79f54f MM |
873 | static void ds_free_bts(struct bts_tracer *tracer) |
874 | { | |
875 | struct task_struct *task; | |
8d99b3ac | 876 | |
de79f54f | 877 | task = tracer->ds.context->task; |
ca0002a1 | 878 | |
c2724775 MM |
879 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
880 | tracer->ds.context->bts_master = NULL; | |
93fa7636 | 881 | |
8d99b3ac MM |
882 | /* Make sure tracing stopped and the tracer is not in use. */ |
883 | if (task && (task != current)) | |
884 | wait_task_context_switch(task); | |
885 | ||
c2724775 | 886 | ds_put_context(tracer->ds.context); |
38f80112 | 887 | put_tracer(task); |
ca0002a1 | 888 | |
ca0002a1 | 889 | kfree(tracer); |
eee3af4a | 890 | } |
93fa7636 | 891 | |
de79f54f MM |
892 | void ds_release_bts(struct bts_tracer *tracer) |
893 | { | |
894 | might_sleep(); | |
895 | ||
896 | if (!tracer) | |
897 | return; | |
898 | ||
899 | ds_suspend_bts(tracer); | |
900 | ds_free_bts(tracer); | |
901 | } | |
902 | ||
903 | int ds_release_bts_noirq(struct bts_tracer *tracer) | |
904 | { | |
905 | struct task_struct *task; | |
906 | unsigned long irq; | |
907 | int error; | |
908 | ||
909 | if (!tracer) | |
910 | return 0; | |
911 | ||
912 | task = tracer->ds.context->task; | |
913 | ||
914 | local_irq_save(irq); | |
915 | ||
916 | error = -EPERM; | |
917 | if (!task && | |
918 | (tracer->ds.context->cpu != smp_processor_id())) | |
919 | goto out; | |
920 | ||
921 | error = -EPERM; | |
922 | if (task && (task != current)) | |
923 | goto out; | |
924 | ||
925 | ds_suspend_bts_noirq(tracer); | |
926 | ds_free_bts(tracer); | |
927 | ||
928 | error = 0; | |
929 | out: | |
930 | local_irq_restore(irq); | |
931 | return error; | |
932 | } | |
933 | ||
934 | static void update_task_debugctlmsr(struct task_struct *task, | |
935 | unsigned long debugctlmsr) | |
936 | { | |
937 | task->thread.debugctlmsr = debugctlmsr; | |
938 | ||
939 | get_cpu(); | |
940 | if (task == current) | |
941 | update_debugctlmsr(debugctlmsr); | |
de79f54f MM |
942 | put_cpu(); |
943 | } | |
944 | ||
c2724775 | 945 | void ds_suspend_bts(struct bts_tracer *tracer) |
eee3af4a | 946 | { |
c2724775 | 947 | struct task_struct *task; |
de79f54f MM |
948 | unsigned long debugctlmsr; |
949 | int cpu; | |
ca0002a1 | 950 | |
ca0002a1 | 951 | if (!tracer) |
c2724775 | 952 | return; |
ca0002a1 | 953 | |
cac94f97 MM |
954 | tracer->flags = 0; |
955 | ||
c2724775 | 956 | task = tracer->ds.context->task; |
de79f54f | 957 | cpu = tracer->ds.context->cpu; |
ca0002a1 | 958 | |
de79f54f | 959 | WARN_ON(!task && irqs_disabled()); |
ca0002a1 | 960 | |
de79f54f MM |
961 | debugctlmsr = (task ? |
962 | task->thread.debugctlmsr : | |
963 | get_debugctlmsr_on_cpu(cpu)); | |
964 | debugctlmsr &= ~BTS_CONTROL; | |
eee3af4a | 965 | |
de79f54f MM |
966 | if (task) |
967 | update_task_debugctlmsr(task, debugctlmsr); | |
968 | else | |
969 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | |
93fa7636 | 970 | } |
eee3af4a | 971 | |
de79f54f | 972 | int ds_suspend_bts_noirq(struct bts_tracer *tracer) |
93fa7636 | 973 | { |
c2724775 | 974 | struct task_struct *task; |
de79f54f MM |
975 | unsigned long debugctlmsr, irq; |
976 | int cpu, error = 0; | |
eee3af4a | 977 | |
ca0002a1 | 978 | if (!tracer) |
de79f54f | 979 | return 0; |
eee3af4a | 980 | |
de79f54f | 981 | tracer->flags = 0; |
cac94f97 | 982 | |
c2724775 | 983 | task = tracer->ds.context->task; |
de79f54f MM |
984 | cpu = tracer->ds.context->cpu; |
985 | ||
986 | local_irq_save(irq); | |
987 | ||
988 | error = -EPERM; | |
989 | if (!task && (cpu != smp_processor_id())) | |
990 | goto out; | |
991 | ||
992 | debugctlmsr = (task ? | |
993 | task->thread.debugctlmsr : | |
994 | get_debugctlmsr()); | |
995 | debugctlmsr &= ~BTS_CONTROL; | |
996 | ||
997 | if (task) | |
998 | update_task_debugctlmsr(task, debugctlmsr); | |
999 | else | |
1000 | update_debugctlmsr(debugctlmsr); | |
1001 | ||
1002 | error = 0; | |
1003 | out: | |
1004 | local_irq_restore(irq); | |
1005 | return error; | |
1006 | } | |
1007 | ||
1008 | static unsigned long ds_bts_control(struct bts_tracer *tracer) | |
1009 | { | |
1010 | unsigned long control; | |
ca0002a1 | 1011 | |
c2724775 MM |
1012 | control = ds_cfg.ctl[dsf_bts]; |
1013 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | |
1014 | control |= ds_cfg.ctl[dsf_bts_kernel]; | |
1015 | if (!(tracer->trace.ds.flags & BTS_USER)) | |
1016 | control |= ds_cfg.ctl[dsf_bts_user]; | |
eee3af4a | 1017 | |
de79f54f | 1018 | return control; |
eee3af4a MM |
1019 | } |
1020 | ||
de79f54f | 1021 | void ds_resume_bts(struct bts_tracer *tracer) |
eee3af4a | 1022 | { |
38f80112 | 1023 | struct task_struct *task; |
de79f54f MM |
1024 | unsigned long debugctlmsr; |
1025 | int cpu; | |
38f80112 | 1026 | |
ca0002a1 | 1027 | if (!tracer) |
c2724775 | 1028 | return; |
93fa7636 | 1029 | |
de79f54f MM |
1030 | tracer->flags = tracer->trace.ds.flags; |
1031 | ||
38f80112 | 1032 | task = tracer->ds.context->task; |
de79f54f | 1033 | cpu = tracer->ds.context->cpu; |
38f80112 | 1034 | |
de79f54f MM |
1035 | WARN_ON(!task && irqs_disabled()); |
1036 | ||
1037 | debugctlmsr = (task ? | |
1038 | task->thread.debugctlmsr : | |
1039 | get_debugctlmsr_on_cpu(cpu)); | |
1040 | debugctlmsr |= ds_bts_control(tracer); | |
1041 | ||
1042 | if (task) | |
1043 | update_task_debugctlmsr(task, debugctlmsr); | |
1044 | else | |
1045 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | |
1046 | } | |
1047 | ||
1048 | int ds_resume_bts_noirq(struct bts_tracer *tracer) | |
1049 | { | |
1050 | struct task_struct *task; | |
1051 | unsigned long debugctlmsr, irq; | |
1052 | int cpu, error = 0; | |
1053 | ||
1054 | if (!tracer) | |
1055 | return 0; | |
1056 | ||
1057 | tracer->flags = tracer->trace.ds.flags; | |
1058 | ||
1059 | task = tracer->ds.context->task; | |
1060 | cpu = tracer->ds.context->cpu; | |
1061 | ||
1062 | local_irq_save(irq); | |
1063 | ||
1064 | error = -EPERM; | |
1065 | if (!task && (cpu != smp_processor_id())) | |
1066 | goto out; | |
1067 | ||
1068 | debugctlmsr = (task ? | |
1069 | task->thread.debugctlmsr : | |
1070 | get_debugctlmsr()); | |
1071 | debugctlmsr |= ds_bts_control(tracer); | |
1072 | ||
1073 | if (task) | |
1074 | update_task_debugctlmsr(task, debugctlmsr); | |
1075 | else | |
1076 | update_debugctlmsr(debugctlmsr); | |
1077 | ||
1078 | error = 0; | |
1079 | out: | |
1080 | local_irq_restore(irq); | |
1081 | return error; | |
1082 | } | |
1083 | ||
1084 | static void ds_free_pebs(struct pebs_tracer *tracer) | |
1085 | { | |
1086 | struct task_struct *task; | |
1087 | ||
1088 | task = tracer->ds.context->task; | |
93fa7636 | 1089 | |
c2724775 MM |
1090 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); |
1091 | tracer->ds.context->pebs_master = NULL; | |
eee3af4a | 1092 | |
c2724775 | 1093 | ds_put_context(tracer->ds.context); |
38f80112 | 1094 | put_tracer(task); |
eee3af4a | 1095 | |
c2724775 | 1096 | kfree(tracer); |
a95d67f8 MM |
1097 | } |
1098 | ||
de79f54f MM |
1099 | void ds_release_pebs(struct pebs_tracer *tracer) |
1100 | { | |
1101 | might_sleep(); | |
1102 | ||
1103 | if (!tracer) | |
1104 | return; | |
1105 | ||
1106 | ds_suspend_pebs(tracer); | |
1107 | ds_free_pebs(tracer); | |
1108 | } | |
1109 | ||
1110 | int ds_release_pebs_noirq(struct pebs_tracer *tracer) | |
1111 | { | |
1112 | struct task_struct *task; | |
1113 | unsigned long irq; | |
1114 | int error; | |
1115 | ||
1116 | if (!tracer) | |
1117 | return 0; | |
1118 | ||
1119 | task = tracer->ds.context->task; | |
1120 | ||
1121 | local_irq_save(irq); | |
1122 | ||
1123 | error = -EPERM; | |
1124 | if (!task && | |
1125 | (tracer->ds.context->cpu != smp_processor_id())) | |
1126 | goto out; | |
1127 | ||
1128 | error = -EPERM; | |
1129 | if (task && (task != current)) | |
1130 | goto out; | |
1131 | ||
1132 | ds_suspend_pebs_noirq(tracer); | |
1133 | ds_free_pebs(tracer); | |
1134 | ||
1135 | error = 0; | |
1136 | out: | |
1137 | local_irq_restore(irq); | |
1138 | return error; | |
1139 | } | |
1140 | ||
c2724775 | 1141 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
a95d67f8 | 1142 | { |
a95d67f8 | 1143 | |
93fa7636 | 1144 | } |
eee3af4a | 1145 | |
de79f54f MM |
1146 | int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) |
1147 | { | |
1148 | return 0; | |
1149 | } | |
1150 | ||
c2724775 | 1151 | void ds_resume_pebs(struct pebs_tracer *tracer) |
93fa7636 | 1152 | { |
eee3af4a | 1153 | |
eee3af4a MM |
1154 | } |
1155 | ||
de79f54f MM |
1156 | int ds_resume_pebs_noirq(struct pebs_tracer *tracer) |
1157 | { | |
1158 | return 0; | |
1159 | } | |
1160 | ||
c2724775 | 1161 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
eee3af4a | 1162 | { |
ca0002a1 | 1163 | if (!tracer) |
c2724775 | 1164 | return NULL; |
ca0002a1 | 1165 | |
c2724775 MM |
1166 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
1167 | return &tracer->trace; | |
93fa7636 | 1168 | } |
eee3af4a | 1169 | |
c2724775 | 1170 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) |
93fa7636 | 1171 | { |
ca0002a1 | 1172 | if (!tracer) |
c2724775 | 1173 | return NULL; |
ca0002a1 | 1174 | |
c2724775 | 1175 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
017bc617 MM |
1176 | |
1177 | tracer->trace.counters = ds_cfg.nr_counter_reset; | |
1178 | memcpy(tracer->trace.counter_reset, | |
1179 | tracer->ds.context->ds + | |
1180 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), | |
1181 | ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); | |
ca0002a1 | 1182 | |
c2724775 | 1183 | return &tracer->trace; |
93fa7636 | 1184 | } |
eee3af4a | 1185 | |
c2724775 | 1186 | int ds_reset_bts(struct bts_tracer *tracer) |
93fa7636 | 1187 | { |
ca0002a1 MM |
1188 | if (!tracer) |
1189 | return -EINVAL; | |
1190 | ||
c2724775 | 1191 | tracer->trace.ds.top = tracer->trace.ds.begin; |
ca0002a1 | 1192 | |
c2724775 MM |
1193 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
1194 | (unsigned long)tracer->trace.ds.top); | |
ca0002a1 MM |
1195 | |
1196 | return 0; | |
93fa7636 | 1197 | } |
eee3af4a | 1198 | |
c2724775 | 1199 | int ds_reset_pebs(struct pebs_tracer *tracer) |
93fa7636 | 1200 | { |
ca0002a1 MM |
1201 | if (!tracer) |
1202 | return -EINVAL; | |
eee3af4a | 1203 | |
c2724775 | 1204 | tracer->trace.ds.top = tracer->trace.ds.begin; |
eee3af4a | 1205 | |
608780a9 | 1206 | ds_set(tracer->ds.context->ds, ds_pebs, ds_index, |
c2724775 | 1207 | (unsigned long)tracer->trace.ds.top); |
93fa7636 | 1208 | |
ca0002a1 | 1209 | return 0; |
eee3af4a MM |
1210 | } |
1211 | ||
017bc617 MM |
1212 | int ds_set_pebs_reset(struct pebs_tracer *tracer, |
1213 | unsigned int counter, u64 value) | |
eee3af4a | 1214 | { |
ca0002a1 MM |
1215 | if (!tracer) |
1216 | return -EINVAL; | |
eee3af4a | 1217 | |
017bc617 MM |
1218 | if (ds_cfg.nr_counter_reset < counter) |
1219 | return -EINVAL; | |
1220 | ||
bc44fb5f | 1221 | *(u64 *)(tracer->ds.context->ds + |
017bc617 MM |
1222 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + |
1223 | (counter * PEBS_RESET_FIELD_SIZE)) = value; | |
93fa7636 | 1224 | |
ca0002a1 | 1225 | return 0; |
93fa7636 MM |
1226 | } |
1227 | ||
c2724775 | 1228 | static const struct ds_configuration ds_cfg_netburst = { |
ba2607fe | 1229 | .name = "Netburst", |
c2724775 MM |
1230 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
1231 | .ctl[dsf_bts_kernel] = (1 << 5), | |
1232 | .ctl[dsf_bts_user] = (1 << 6), | |
017bc617 | 1233 | .nr_counter_reset = 1, |
eee3af4a | 1234 | }; |
c2724775 | 1235 | static const struct ds_configuration ds_cfg_pentium_m = { |
ba2607fe | 1236 | .name = "Pentium M", |
c2724775 | 1237 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
017bc617 | 1238 | .nr_counter_reset = 1, |
eee3af4a | 1239 | }; |
ba2607fe MM |
1240 | static const struct ds_configuration ds_cfg_core2_atom = { |
1241 | .name = "Core 2/Atom", | |
c2724775 MM |
1242 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
1243 | .ctl[dsf_bts_kernel] = (1 << 9), | |
1244 | .ctl[dsf_bts_user] = (1 << 10), | |
017bc617 MM |
1245 | .nr_counter_reset = 1, |
1246 | }; | |
1247 | static const struct ds_configuration ds_cfg_core_i7 = { | |
1248 | .name = "Core i7", | |
1249 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | |
1250 | .ctl[dsf_bts_kernel] = (1 << 9), | |
1251 | .ctl[dsf_bts_user] = (1 << 10), | |
1252 | .nr_counter_reset = 4, | |
c2724775 | 1253 | }; |
eee3af4a | 1254 | |
c2724775 | 1255 | static void |
bc44fb5f MM |
1256 | ds_configure(const struct ds_configuration *cfg, |
1257 | struct cpuinfo_x86 *cpu) | |
eee3af4a | 1258 | { |
bc44fb5f MM |
1259 | unsigned long nr_pebs_fields = 0; |
1260 | ||
1261 | printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); | |
1262 | ||
1263 | #ifdef __i386__ | |
1264 | nr_pebs_fields = 10; | |
1265 | #else | |
1266 | nr_pebs_fields = 18; | |
1267 | #endif | |
1268 | ||
017bc617 MM |
1269 | /* |
1270 | * Starting with version 2, architectural performance | |
1271 | * monitoring supports a format specifier. | |
1272 | */ | |
1273 | if ((cpuid_eax(0xa) & 0xff) > 1) { | |
1274 | unsigned long perf_capabilities, format; | |
1275 | ||
1276 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); | |
1277 | ||
1278 | format = (perf_capabilities >> 8) & 0xf; | |
1279 | ||
1280 | switch (format) { | |
1281 | case 0: | |
1282 | nr_pebs_fields = 18; | |
1283 | break; | |
1284 | case 1: | |
1285 | nr_pebs_fields = 22; | |
1286 | break; | |
1287 | default: | |
1288 | printk(KERN_INFO | |
1289 | "[ds] unknown PEBS format: %lu\n", format); | |
1290 | nr_pebs_fields = 0; | |
1291 | break; | |
1292 | } | |
1293 | } | |
1294 | ||
c2724775 | 1295 | memset(&ds_cfg, 0, sizeof(ds_cfg)); |
eee3af4a | 1296 | ds_cfg = *cfg; |
ca0002a1 | 1297 | |
bc44fb5f MM |
1298 | ds_cfg.sizeof_ptr_field = |
1299 | (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); | |
c2724775 | 1300 | |
bc44fb5f MM |
1301 | ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; |
1302 | ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; | |
1303 | ||
1304 | if (!cpu_has(cpu, X86_FEATURE_BTS)) { | |
1305 | ds_cfg.sizeof_rec[ds_bts] = 0; | |
c2724775 MM |
1306 | printk(KERN_INFO "[ds] bts not available\n"); |
1307 | } | |
bc44fb5f MM |
1308 | if (!cpu_has(cpu, X86_FEATURE_PEBS)) { |
1309 | ds_cfg.sizeof_rec[ds_pebs] = 0; | |
c2724775 | 1310 | printk(KERN_INFO "[ds] pebs not available\n"); |
bc44fb5f MM |
1311 | } |
1312 | ||
1313 | printk(KERN_INFO "[ds] sizes: address: %u bit, ", | |
1314 | 8 * ds_cfg.sizeof_ptr_field); | |
1315 | printk("bts/pebs record: %u/%u bytes\n", | |
1316 | ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); | |
ca0002a1 | 1317 | |
017bc617 | 1318 | WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); |
eee3af4a MM |
1319 | } |
1320 | ||
1321 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |
1322 | { | |
ee811517 MM |
1323 | /* Only configure the first cpu. Others are identical. */ |
1324 | if (ds_cfg.name) | |
1325 | return; | |
1326 | ||
eee3af4a MM |
1327 | switch (c->x86) { |
1328 | case 0x6: | |
1329 | switch (c->x86_model) { | |
ba2607fe MM |
1330 | case 0x9: |
1331 | case 0xd: /* Pentium M */ | |
bc44fb5f | 1332 | ds_configure(&ds_cfg_pentium_m, c); |
eee3af4a | 1333 | break; |
ba2607fe MM |
1334 | case 0xf: |
1335 | case 0x17: /* Core2 */ | |
1336 | case 0x1c: /* Atom */ | |
bc44fb5f | 1337 | ds_configure(&ds_cfg_core2_atom, c); |
ba2607fe | 1338 | break; |
b8e47195 | 1339 | case 0x1a: /* Core i7 */ |
017bc617 MM |
1340 | ds_configure(&ds_cfg_core_i7, c); |
1341 | break; | |
ba2607fe | 1342 | default: |
b8e47195 | 1343 | /* Sorry, don't know about them. */ |
eee3af4a | 1344 | break; |
eee3af4a MM |
1345 | } |
1346 | break; | |
ba2607fe | 1347 | case 0xf: |
eee3af4a | 1348 | switch (c->x86_model) { |
eee3af4a MM |
1349 | case 0x0: |
1350 | case 0x1: | |
1351 | case 0x2: /* Netburst */ | |
bc44fb5f | 1352 | ds_configure(&ds_cfg_netburst, c); |
eee3af4a | 1353 | break; |
eee3af4a | 1354 | default: |
b8e47195 | 1355 | /* Sorry, don't know about them. */ |
eee3af4a MM |
1356 | break; |
1357 | } | |
1358 | break; | |
1359 | default: | |
b8e47195 | 1360 | /* Sorry, don't know about them. */ |
eee3af4a MM |
1361 | break; |
1362 | } | |
1363 | } | |
93fa7636 | 1364 | |
cac94f97 MM |
1365 | static inline void ds_take_timestamp(struct ds_context *context, |
1366 | enum bts_qualifier qualifier, | |
1367 | struct task_struct *task) | |
1368 | { | |
1369 | struct bts_tracer *tracer = context->bts_master; | |
1370 | struct bts_struct ts; | |
1371 | ||
1372 | /* Prevent compilers from reading the tracer pointer twice. */ | |
1373 | barrier(); | |
1374 | ||
1375 | if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) | |
1376 | return; | |
1377 | ||
1378 | memset(&ts, 0, sizeof(ts)); | |
15879d04 MM |
1379 | ts.qualifier = qualifier; |
1380 | ts.variant.event.clock = trace_clock_global(); | |
1381 | ts.variant.event.pid = task->pid; | |
cac94f97 MM |
1382 | |
1383 | bts_write(tracer, &ts); | |
1384 | } | |
1385 | ||
c2724775 MM |
1386 | /* |
1387 | * Change the DS configuration from tracing prev to tracing next. | |
1388 | */ | |
1389 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | |
93fa7636 | 1390 | { |
cac94f97 MM |
1391 | struct ds_context *prev_ctx = prev->thread.ds_ctx; |
1392 | struct ds_context *next_ctx = next->thread.ds_ctx; | |
1393 | unsigned long debugctlmsr = next->thread.debugctlmsr; | |
1394 | ||
1395 | /* Make sure all data is read before we start. */ | |
1396 | barrier(); | |
c2724775 MM |
1397 | |
1398 | if (prev_ctx) { | |
1399 | update_debugctlmsr(0); | |
1400 | ||
cac94f97 | 1401 | ds_take_timestamp(prev_ctx, bts_task_departs, prev); |
c2724775 MM |
1402 | } |
1403 | ||
1404 | if (next_ctx) { | |
cac94f97 | 1405 | ds_take_timestamp(next_ctx, bts_task_arrives, next); |
c2724775 MM |
1406 | |
1407 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | |
ca0002a1 | 1408 | } |
c2724775 | 1409 | |
cac94f97 | 1410 | update_debugctlmsr(debugctlmsr); |
93fa7636 | 1411 | } |
bf53de90 | 1412 | |
de79f54f MM |
1413 | static __init int ds_selftest(void) |
1414 | { | |
1415 | if (ds_cfg.sizeof_rec[ds_bts]) { | |
1416 | int error; | |
1417 | ||
1418 | error = ds_selftest_bts(); | |
1419 | if (error) { | |
1420 | WARN(1, "[ds] selftest failed. disabling bts.\n"); | |
1421 | ds_cfg.sizeof_rec[ds_bts] = 0; | |
1422 | } | |
1423 | } | |
1424 | ||
1425 | if (ds_cfg.sizeof_rec[ds_pebs]) { | |
1426 | int error; | |
1427 | ||
1428 | error = ds_selftest_pebs(); | |
1429 | if (error) { | |
1430 | WARN(1, "[ds] selftest failed. disabling pebs.\n"); | |
1431 | ds_cfg.sizeof_rec[ds_pebs] = 0; | |
1432 | } | |
1433 | } | |
1434 | ||
1435 | return 0; | |
1436 | } | |
1437 | device_initcall(ds_selftest); |