3 * Copyright IBM Corp. 2002, 2011
4 * Author(s): Thomas Spatzier (tspat@de.ibm.com)
5 * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
6 * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
7 * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
9 * @remark Copyright 2002-2011 OProfile authors
12 #include <linux/oprofile.h>
13 #include <linux/perf_event.h>
14 #include <linux/init.h>
15 #include <linux/errno.h>
17 #include <linux/module.h>
18 #include <asm/processor.h>
19 #include <asm/perf_event.h>
21 #include "../../../drivers/oprofile/oprof.h"
23 #include "hwsampler.h"
24 #include "op_counter.h"
26 #define DEFAULT_INTERVAL 4127518
28 #define DEFAULT_SDBT_BLOCKS 1
29 #define DEFAULT_SDB_BLOCKS 511
31 static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
32 static unsigned long oprofile_min_interval;
33 static unsigned long oprofile_max_interval;
35 static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
36 static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
38 static int hwsampler_enabled;
39 static int hwsampler_running; /* start_mutex must be held to change */
40 static int hwsampler_available;
42 static struct oprofile_operations timer_ops;
44 struct op_counter_config counter_config;
46 enum __force_cpu_type {
47 reserved = 0, /* do not force */
50 static int force_cpu_type;
52 static int set_cpu_type(const char *str, struct kernel_param *kp)
54 if (!strcmp(str, "timer")) {
55 force_cpu_type = timer;
56 printk(KERN_INFO "oprofile: forcing timer to be returned "
64 module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
65 MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
66 "(report cpu_type \"timer\"");
68 static int __oprofile_hwsampler_start(void)
72 retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
76 retval = hwsampler_start_all(oprofile_hw_interval);
78 hwsampler_deallocate();
83 static int oprofile_hwsampler_start(void)
87 hwsampler_running = hwsampler_enabled;
89 if (!hwsampler_running)
90 return timer_ops.start();
92 retval = perf_reserve_sampling();
96 retval = __oprofile_hwsampler_start();
98 perf_release_sampling();
103 static void oprofile_hwsampler_stop(void)
105 if (!hwsampler_running) {
110 hwsampler_stop_all();
111 hwsampler_deallocate();
112 perf_release_sampling();
118 * /dev/oprofile/0/enabled
119 * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer)
122 static ssize_t hwsampler_read(struct file *file, char __user *buf,
123 size_t count, loff_t *offset)
125 return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
128 static ssize_t hwsampler_write(struct file *file, char const __user *buf,
129 size_t count, loff_t *offset)
137 retval = oprofilefs_ulong_from_user(&val, buf, count);
141 if (val != 0 && val != 1)
144 if (oprofile_started)
146 * save to do without locking as we set
147 * hwsampler_running in start() when start_mutex is
152 hwsampler_enabled = val;
157 static const struct file_operations hwsampler_fops = {
158 .read = hwsampler_read,
159 .write = hwsampler_write,
164 * /dev/oprofile/0/count
165 * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer)
167 * Make sure that the value is within the hardware range.
170 static ssize_t hw_interval_read(struct file *file, char __user *buf,
171 size_t count, loff_t *offset)
173 return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
177 static ssize_t hw_interval_write(struct file *file, char const __user *buf,
178 size_t count, loff_t *offset)
185 retval = oprofilefs_ulong_from_user(&val, buf, count);
188 if (val < oprofile_min_interval)
189 oprofile_hw_interval = oprofile_min_interval;
190 else if (val > oprofile_max_interval)
191 oprofile_hw_interval = oprofile_max_interval;
193 oprofile_hw_interval = val;
198 static const struct file_operations hw_interval_fops = {
199 .read = hw_interval_read,
200 .write = hw_interval_write,
205 * /dev/oprofile/0/event
206 * Only a single event with number 0 is supported with this counter.
208 * /dev/oprofile/0/unit_mask
209 * This is a dummy file needed by the user space tools.
210 * No value other than 0 is accepted or returned.
213 static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
214 size_t count, loff_t *offset)
216 return oprofilefs_ulong_to_user(0, buf, count, offset);
219 static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
220 size_t count, loff_t *offset)
228 retval = oprofilefs_ulong_from_user(&val, buf, count);
236 static const struct file_operations zero_fops = {
237 .read = hwsampler_zero_read,
238 .write = hwsampler_zero_write,
241 /* /dev/oprofile/0/kernel file ops. */
243 static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
244 size_t count, loff_t *offset)
246 return oprofilefs_ulong_to_user(counter_config.kernel,
250 static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
251 size_t count, loff_t *offset)
259 retval = oprofilefs_ulong_from_user(&val, buf, count);
263 if (val != 0 && val != 1)
266 counter_config.kernel = val;
271 static const struct file_operations kernel_fops = {
272 .read = hwsampler_kernel_read,
273 .write = hwsampler_kernel_write,
276 /* /dev/oprofile/0/user file ops. */
278 static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
279 size_t count, loff_t *offset)
281 return oprofilefs_ulong_to_user(counter_config.user,
285 static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
286 size_t count, loff_t *offset)
294 retval = oprofilefs_ulong_from_user(&val, buf, count);
298 if (val != 0 && val != 1)
301 counter_config.user = val;
306 static const struct file_operations user_fops = {
307 .read = hwsampler_user_read,
308 .write = hwsampler_user_write,
313 * File ops used for: /dev/oprofile/timer/enabled
314 * The value always has to be the inverted value of hwsampler_enabled. So
315 * no separate variable is created. That way we do not need locking.
318 static ssize_t timer_enabled_read(struct file *file, char __user *buf,
319 size_t count, loff_t *offset)
321 return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
324 static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
325 size_t count, loff_t *offset)
333 retval = oprofilefs_ulong_from_user(&val, buf, count);
337 if (val != 0 && val != 1)
340 /* Timer cannot be disabled without having hardware sampling. */
341 if (val == 0 && !hwsampler_available)
344 if (oprofile_started)
346 * save to do without locking as we set
347 * hwsampler_running in start() when start_mutex is
352 hwsampler_enabled = !val;
357 static const struct file_operations timer_enabled_fops = {
358 .read = timer_enabled_read,
359 .write = timer_enabled_write,
363 static int oprofile_create_hwsampling_files(struct dentry *root)
367 dir = oprofilefs_mkdir(root, "timer");
371 oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
373 if (!hwsampler_available)
376 /* reinitialize default values */
377 hwsampler_enabled = 1;
378 counter_config.kernel = 1;
379 counter_config.user = 1;
381 if (!force_cpu_type) {
383 * Create the counter file system. A single virtual
384 * counter is created which can be used to
385 * enable/disable hardware sampling dynamically from
386 * user space. The user space will configure a single
387 * counter with a single event. The value of 'event'
388 * and 'unit_mask' are not evaluated by the kernel code
389 * and can only be set to 0.
392 dir = oprofilefs_mkdir(root, "0");
396 oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
397 oprofilefs_create_file(dir, "event", &zero_fops);
398 oprofilefs_create_file(dir, "count", &hw_interval_fops);
399 oprofilefs_create_file(dir, "unit_mask", &zero_fops);
400 oprofilefs_create_file(dir, "kernel", &kernel_fops);
401 oprofilefs_create_file(dir, "user", &user_fops);
402 oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
403 &oprofile_sdbt_blocks);
407 * Hardware sampling can be used but the cpu_type is
408 * forced to timer in order to deal with legacy user
409 * space tools. The /dev/oprofile/hwsampling fs is
410 * provided in that case.
412 dir = oprofilefs_mkdir(root, "hwsampling");
416 oprofilefs_create_file(dir, "hwsampler",
418 oprofilefs_create_file(dir, "hw_interval",
420 oprofilefs_create_ro_ulong(dir, "hw_min_interval",
421 &oprofile_min_interval);
422 oprofilefs_create_ro_ulong(dir, "hw_max_interval",
423 &oprofile_max_interval);
424 oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
425 &oprofile_sdbt_blocks);
430 static int oprofile_hwsampler_init(struct oprofile_operations *ops)
433 * Initialize the timer mode infrastructure as well in order
434 * to be able to switch back dynamically. oprofile_timer_init
435 * is not supposed to fail.
437 if (oprofile_timer_init(ops))
440 memcpy(&timer_ops, ops, sizeof(timer_ops));
441 ops->create_files = oprofile_create_hwsampling_files;
444 * If the user space tools do not support newer cpu types,
445 * the force_cpu_type module parameter
446 * can be used to always return \"timer\" as cpu type.
448 if (force_cpu_type != timer) {
453 switch (id.machine) {
454 case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
455 case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
456 case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
457 case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
458 default: return -ENODEV;
462 if (hwsampler_setup())
466 * Query the range for the sampling interval from the
469 oprofile_min_interval = hwsampler_query_min_interval();
470 if (oprofile_min_interval == 0)
472 oprofile_max_interval = hwsampler_query_max_interval();
473 if (oprofile_max_interval == 0)
476 /* The initial value should be sane */
477 if (oprofile_hw_interval < oprofile_min_interval)
478 oprofile_hw_interval = oprofile_min_interval;
479 if (oprofile_hw_interval > oprofile_max_interval)
480 oprofile_hw_interval = oprofile_max_interval;
482 printk(KERN_INFO "oprofile: System z hardware sampling "
483 "facility found.\n");
485 ops->start = oprofile_hwsampler_start;
486 ops->stop = oprofile_hwsampler_stop;
491 static void oprofile_hwsampler_exit(void)
493 hwsampler_shutdown();
496 static int __s390_backtrace(void *data, unsigned long address)
498 unsigned int *depth = data;
503 oprofile_add_trace(address);
507 static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
511 dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
514 int __init oprofile_arch_init(struct oprofile_operations *ops)
516 ops->backtrace = s390_backtrace;
519 * -ENODEV is not reported to the caller. The module itself
520 * will use the timer mode sampling as fallback and this is
523 hwsampler_available = oprofile_hwsampler_init(ops) == 0;
528 void oprofile_arch_exit(void)
530 oprofile_hwsampler_exit();