Merge branch 'mm-pkeys-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / arch / s390 / oprofile / init.c
1 /*
2  * S390 Version
3  *   Copyright IBM Corp. 2002, 2011
4  *   Author(s): Thomas Spatzier (tspat@de.ibm.com)
5  *   Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
6  *   Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
7  *   Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
8  *
9  * @remark Copyright 2002-2011 OProfile authors
10  */
11
12 #include <linux/oprofile.h>
13 #include <linux/perf_event.h>
14 #include <linux/init.h>
15 #include <linux/errno.h>
16 #include <linux/fs.h>
17 #include <linux/module.h>
18 #include <asm/processor.h>
19 #include <asm/perf_event.h>
20
21 #include "../../../drivers/oprofile/oprof.h"
22
23 #include "hwsampler.h"
24 #include "op_counter.h"
25
26 #define DEFAULT_INTERVAL        4127518
27
28 #define DEFAULT_SDBT_BLOCKS     1
29 #define DEFAULT_SDB_BLOCKS      511
30
31 static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
32 static unsigned long oprofile_min_interval;
33 static unsigned long oprofile_max_interval;
34
35 static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
36 static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
37
38 static int hwsampler_enabled;
39 static int hwsampler_running;   /* start_mutex must be held to change */
40 static int hwsampler_available;
41
42 static struct oprofile_operations timer_ops;
43
44 struct op_counter_config counter_config;
45
46 enum __force_cpu_type {
47         reserved = 0,           /* do not force */
48         timer,
49 };
50 static int force_cpu_type;
51
52 static int set_cpu_type(const char *str, struct kernel_param *kp)
53 {
54         if (!strcmp(str, "timer")) {
55                 force_cpu_type = timer;
56                 printk(KERN_INFO "oprofile: forcing timer to be returned "
57                                  "as cpu type\n");
58         } else {
59                 force_cpu_type = 0;
60         }
61
62         return 0;
63 }
64 module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
65 MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
66                            "(report cpu_type \"timer\"");
67
68 static int __oprofile_hwsampler_start(void)
69 {
70         int retval;
71
72         retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
73         if (retval)
74                 return retval;
75
76         retval = hwsampler_start_all(oprofile_hw_interval);
77         if (retval)
78                 hwsampler_deallocate();
79
80         return retval;
81 }
82
83 static int oprofile_hwsampler_start(void)
84 {
85         int retval;
86
87         hwsampler_running = hwsampler_enabled;
88
89         if (!hwsampler_running)
90                 return timer_ops.start();
91
92         retval = perf_reserve_sampling();
93         if (retval)
94                 return retval;
95
96         retval = __oprofile_hwsampler_start();
97         if (retval)
98                 perf_release_sampling();
99
100         return retval;
101 }
102
103 static void oprofile_hwsampler_stop(void)
104 {
105         if (!hwsampler_running) {
106                 timer_ops.stop();
107                 return;
108         }
109
110         hwsampler_stop_all();
111         hwsampler_deallocate();
112         perf_release_sampling();
113         return;
114 }
115
116 /*
117  * File ops used for:
118  * /dev/oprofile/0/enabled
119  * /dev/oprofile/hwsampling/hwsampler  (cpu_type = timer)
120  */
121
122 static ssize_t hwsampler_read(struct file *file, char __user *buf,
123                 size_t count, loff_t *offset)
124 {
125         return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
126 }
127
128 static ssize_t hwsampler_write(struct file *file, char const __user *buf,
129                 size_t count, loff_t *offset)
130 {
131         unsigned long val;
132         int retval;
133
134         if (*offset)
135                 return -EINVAL;
136
137         retval = oprofilefs_ulong_from_user(&val, buf, count);
138         if (retval <= 0)
139                 return retval;
140
141         if (val != 0 && val != 1)
142                 return -EINVAL;
143
144         if (oprofile_started)
145                 /*
146                  * save to do without locking as we set
147                  * hwsampler_running in start() when start_mutex is
148                  * held
149                  */
150                 return -EBUSY;
151
152         hwsampler_enabled = val;
153
154         return count;
155 }
156
157 static const struct file_operations hwsampler_fops = {
158         .read           = hwsampler_read,
159         .write          = hwsampler_write,
160 };
161
162 /*
163  * File ops used for:
164  * /dev/oprofile/0/count
165  * /dev/oprofile/hwsampling/hw_interval  (cpu_type = timer)
166  *
167  * Make sure that the value is within the hardware range.
168  */
169
170 static ssize_t hw_interval_read(struct file *file, char __user *buf,
171                                 size_t count, loff_t *offset)
172 {
173         return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
174                                         count, offset);
175 }
176
177 static ssize_t hw_interval_write(struct file *file, char const __user *buf,
178                                  size_t count, loff_t *offset)
179 {
180         unsigned long val;
181         int retval;
182
183         if (*offset)
184                 return -EINVAL;
185         retval = oprofilefs_ulong_from_user(&val, buf, count);
186         if (retval <= 0)
187                 return retval;
188         if (val < oprofile_min_interval)
189                 oprofile_hw_interval = oprofile_min_interval;
190         else if (val > oprofile_max_interval)
191                 oprofile_hw_interval = oprofile_max_interval;
192         else
193                 oprofile_hw_interval = val;
194
195         return count;
196 }
197
198 static const struct file_operations hw_interval_fops = {
199         .read           = hw_interval_read,
200         .write          = hw_interval_write,
201 };
202
203 /*
204  * File ops used for:
205  * /dev/oprofile/0/event
206  * Only a single event with number 0 is supported with this counter.
207  *
208  * /dev/oprofile/0/unit_mask
209  * This is a dummy file needed by the user space tools.
210  * No value other than 0 is accepted or returned.
211  */
212
213 static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
214                                     size_t count, loff_t *offset)
215 {
216         return oprofilefs_ulong_to_user(0, buf, count, offset);
217 }
218
219 static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
220                                      size_t count, loff_t *offset)
221 {
222         unsigned long val;
223         int retval;
224
225         if (*offset)
226                 return -EINVAL;
227
228         retval = oprofilefs_ulong_from_user(&val, buf, count);
229         if (retval <= 0)
230                 return retval;
231         if (val != 0)
232                 return -EINVAL;
233         return count;
234 }
235
236 static const struct file_operations zero_fops = {
237         .read           = hwsampler_zero_read,
238         .write          = hwsampler_zero_write,
239 };
240
241 /* /dev/oprofile/0/kernel file ops.  */
242
243 static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
244                                      size_t count, loff_t *offset)
245 {
246         return oprofilefs_ulong_to_user(counter_config.kernel,
247                                         buf, count, offset);
248 }
249
250 static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
251                                       size_t count, loff_t *offset)
252 {
253         unsigned long val;
254         int retval;
255
256         if (*offset)
257                 return -EINVAL;
258
259         retval = oprofilefs_ulong_from_user(&val, buf, count);
260         if (retval <= 0)
261                 return retval;
262
263         if (val != 0 && val != 1)
264                 return -EINVAL;
265
266         counter_config.kernel = val;
267
268         return count;
269 }
270
271 static const struct file_operations kernel_fops = {
272         .read           = hwsampler_kernel_read,
273         .write          = hwsampler_kernel_write,
274 };
275
276 /* /dev/oprofile/0/user file ops. */
277
278 static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
279                                    size_t count, loff_t *offset)
280 {
281         return oprofilefs_ulong_to_user(counter_config.user,
282                                         buf, count, offset);
283 }
284
285 static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
286                                     size_t count, loff_t *offset)
287 {
288         unsigned long val;
289         int retval;
290
291         if (*offset)
292                 return -EINVAL;
293
294         retval = oprofilefs_ulong_from_user(&val, buf, count);
295         if (retval <= 0)
296                 return retval;
297
298         if (val != 0 && val != 1)
299                 return -EINVAL;
300
301         counter_config.user = val;
302
303         return count;
304 }
305
306 static const struct file_operations user_fops = {
307         .read           = hwsampler_user_read,
308         .write          = hwsampler_user_write,
309 };
310
311
312 /*
313  * File ops used for: /dev/oprofile/timer/enabled
314  * The value always has to be the inverted value of hwsampler_enabled. So
315  * no separate variable is created. That way we do not need locking.
316  */
317
318 static ssize_t timer_enabled_read(struct file *file, char __user *buf,
319                                   size_t count, loff_t *offset)
320 {
321         return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
322 }
323
324 static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
325                                    size_t count, loff_t *offset)
326 {
327         unsigned long val;
328         int retval;
329
330         if (*offset)
331                 return -EINVAL;
332
333         retval = oprofilefs_ulong_from_user(&val, buf, count);
334         if (retval <= 0)
335                 return retval;
336
337         if (val != 0 && val != 1)
338                 return -EINVAL;
339
340         /* Timer cannot be disabled without having hardware sampling.  */
341         if (val == 0 && !hwsampler_available)
342                 return -EINVAL;
343
344         if (oprofile_started)
345                 /*
346                  * save to do without locking as we set
347                  * hwsampler_running in start() when start_mutex is
348                  * held
349                  */
350                 return -EBUSY;
351
352         hwsampler_enabled = !val;
353
354         return count;
355 }
356
357 static const struct file_operations timer_enabled_fops = {
358         .read           = timer_enabled_read,
359         .write          = timer_enabled_write,
360 };
361
362
363 static int oprofile_create_hwsampling_files(struct dentry *root)
364 {
365         struct dentry *dir;
366
367         dir = oprofilefs_mkdir(root, "timer");
368         if (!dir)
369                 return -EINVAL;
370
371         oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
372
373         if (!hwsampler_available)
374                 return 0;
375
376         /* reinitialize default values */
377         hwsampler_enabled = 1;
378         counter_config.kernel = 1;
379         counter_config.user = 1;
380
381         if (!force_cpu_type) {
382                 /*
383                  * Create the counter file system.  A single virtual
384                  * counter is created which can be used to
385                  * enable/disable hardware sampling dynamically from
386                  * user space.  The user space will configure a single
387                  * counter with a single event.  The value of 'event'
388                  * and 'unit_mask' are not evaluated by the kernel code
389                  * and can only be set to 0.
390                  */
391
392                 dir = oprofilefs_mkdir(root, "0");
393                 if (!dir)
394                         return -EINVAL;
395
396                 oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
397                 oprofilefs_create_file(dir, "event", &zero_fops);
398                 oprofilefs_create_file(dir, "count", &hw_interval_fops);
399                 oprofilefs_create_file(dir, "unit_mask", &zero_fops);
400                 oprofilefs_create_file(dir, "kernel", &kernel_fops);
401                 oprofilefs_create_file(dir, "user", &user_fops);
402                 oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
403                                         &oprofile_sdbt_blocks);
404
405         } else {
406                 /*
407                  * Hardware sampling can be used but the cpu_type is
408                  * forced to timer in order to deal with legacy user
409                  * space tools.  The /dev/oprofile/hwsampling fs is
410                  * provided in that case.
411                  */
412                 dir = oprofilefs_mkdir(root, "hwsampling");
413                 if (!dir)
414                         return -EINVAL;
415
416                 oprofilefs_create_file(dir, "hwsampler",
417                                        &hwsampler_fops);
418                 oprofilefs_create_file(dir, "hw_interval",
419                                        &hw_interval_fops);
420                 oprofilefs_create_ro_ulong(dir, "hw_min_interval",
421                                            &oprofile_min_interval);
422                 oprofilefs_create_ro_ulong(dir, "hw_max_interval",
423                                            &oprofile_max_interval);
424                 oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
425                                         &oprofile_sdbt_blocks);
426         }
427         return 0;
428 }
429
430 static int oprofile_hwsampler_init(struct oprofile_operations *ops)
431 {
432         /*
433          * Initialize the timer mode infrastructure as well in order
434          * to be able to switch back dynamically.  oprofile_timer_init
435          * is not supposed to fail.
436          */
437         if (oprofile_timer_init(ops))
438                 BUG();
439
440         memcpy(&timer_ops, ops, sizeof(timer_ops));
441         ops->create_files = oprofile_create_hwsampling_files;
442
443         /*
444          * If the user space tools do not support newer cpu types,
445          * the force_cpu_type module parameter
446          * can be used to always return \"timer\" as cpu type.
447          */
448         if (force_cpu_type != timer) {
449                 struct cpuid id;
450
451                 get_cpu_id (&id);
452
453                 switch (id.machine) {
454                 case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
455                 case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
456                 case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
457                 case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
458                 default: return -ENODEV;
459                 }
460         }
461
462         if (hwsampler_setup())
463                 return -ENODEV;
464
465         /*
466          * Query the range for the sampling interval from the
467          * hardware.
468          */
469         oprofile_min_interval = hwsampler_query_min_interval();
470         if (oprofile_min_interval == 0)
471                 return -ENODEV;
472         oprofile_max_interval = hwsampler_query_max_interval();
473         if (oprofile_max_interval == 0)
474                 return -ENODEV;
475
476         /* The initial value should be sane */
477         if (oprofile_hw_interval < oprofile_min_interval)
478                 oprofile_hw_interval = oprofile_min_interval;
479         if (oprofile_hw_interval > oprofile_max_interval)
480                 oprofile_hw_interval = oprofile_max_interval;
481
482         printk(KERN_INFO "oprofile: System z hardware sampling "
483                "facility found.\n");
484
485         ops->start = oprofile_hwsampler_start;
486         ops->stop = oprofile_hwsampler_stop;
487
488         return 0;
489 }
490
491 static void oprofile_hwsampler_exit(void)
492 {
493         hwsampler_shutdown();
494 }
495
496 static int __s390_backtrace(void *data, unsigned long address)
497 {
498         unsigned int *depth = data;
499
500         if (*depth == 0)
501                 return 1;
502         (*depth)--;
503         oprofile_add_trace(address);
504         return 0;
505 }
506
507 static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
508 {
509         if (user_mode(regs))
510                 return;
511         dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
512 }
513
514 int __init oprofile_arch_init(struct oprofile_operations *ops)
515 {
516         ops->backtrace = s390_backtrace;
517
518         /*
519          * -ENODEV is not reported to the caller.  The module itself
520          * will use the timer mode sampling as fallback and this is
521          * always available.
522          */
523         hwsampler_available = oprofile_hwsampler_init(ops) == 0;
524
525         return 0;
526 }
527
528 void oprofile_arch_exit(void)
529 {
530         oprofile_hwsampler_exit();
531 }