ftrace: Decrement count for dyn_ftrace_total_info file
[linux-2.6-block.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
45
46 #include "trace.h"
47 #include "trace_output.h"
48
49 /*
50  * On boot up, the ring buffer is set to the minimum size, so that
51  * we do not waste memory on systems that are not using tracing.
52  */
53 bool ring_buffer_expanded;
54
55 /*
56  * We need to change this state when a selftest is running.
57  * A selftest will lurk into the ring-buffer to count the
58  * entries inserted during the selftest although some concurrent
59  * insertions into the ring-buffer such as trace_printk could occurred
60  * at the same time, giving false positive or negative results.
61  */
62 static bool __read_mostly tracing_selftest_running;
63
64 /*
65  * If a tracer is running, we do not want to run SELFTEST.
66  */
67 bool __read_mostly tracing_selftest_disabled;
68
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
73
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
76         { }
77 };
78
79 static int
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
81 {
82         return 0;
83 }
84
85 /*
86  * To prevent the comm cache from being overwritten when no
87  * tracing is active, only save the comm when a trace event
88  * occurred.
89  */
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
91
92 /*
93  * Kill all tracing for good (never come back).
94  * It is initialized to 1 but will turn to zero if the initialization
95  * of the tracer is successful. But that is the only place that sets
96  * this back to zero.
97  */
98 static int tracing_disabled = 1;
99
100 cpumask_var_t __read_mostly     tracing_buffer_mask;
101
102 /*
103  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
104  *
105  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106  * is set, then ftrace_dump is called. This will output the contents
107  * of the ftrace buffers to the console.  This is very useful for
108  * capturing traces that lead to crashes and outputing it to a
109  * serial console.
110  *
111  * It is default off, but you can enable it with either specifying
112  * "ftrace_dump_on_oops" in the kernel command line, or setting
113  * /proc/sys/kernel/ftrace_dump_on_oops
114  * Set 1 if you want to dump buffers of all CPUs
115  * Set 2 if you want to dump the buffer of the CPU that triggered oops
116  */
117
118 enum ftrace_dump_mode ftrace_dump_on_oops;
119
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
122
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
126         struct module                   *mod;
127         unsigned long                   length;
128 };
129
130 union trace_eval_map_item;
131
132 struct trace_eval_map_tail {
133         /*
134          * "end" is first and points to NULL as it must be different
135          * than "mod" or "eval_string"
136          */
137         union trace_eval_map_item       *next;
138         const char                      *end;   /* points to NULL */
139 };
140
141 static DEFINE_MUTEX(trace_eval_mutex);
142
143 /*
144  * The trace_eval_maps are saved in an array with two extra elements,
145  * one at the beginning, and one at the end. The beginning item contains
146  * the count of the saved maps (head.length), and the module they
147  * belong to if not built in (head.mod). The ending item contains a
148  * pointer to the next array of saved eval_map items.
149  */
150 union trace_eval_map_item {
151         struct trace_eval_map           map;
152         struct trace_eval_map_head      head;
153         struct trace_eval_map_tail      tail;
154 };
155
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
158
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
160
161 #define MAX_TRACER_SIZE         100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
164
165 static bool allocate_snapshot;
166
167 static int __init set_cmdline_ftrace(char *str)
168 {
169         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170         default_bootup_tracer = bootup_tracer_buf;
171         /* We are using ftrace early, expand it */
172         ring_buffer_expanded = true;
173         return 1;
174 }
175 __setup("ftrace=", set_cmdline_ftrace);
176
177 static int __init set_ftrace_dump_on_oops(char *str)
178 {
179         if (*str++ != '=' || !*str) {
180                 ftrace_dump_on_oops = DUMP_ALL;
181                 return 1;
182         }
183
184         if (!strcmp("orig_cpu", str)) {
185                 ftrace_dump_on_oops = DUMP_ORIG;
186                 return 1;
187         }
188
189         return 0;
190 }
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
192
193 static int __init stop_trace_on_warning(char *str)
194 {
195         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196                 __disable_trace_on_warning = 1;
197         return 1;
198 }
199 __setup("traceoff_on_warning", stop_trace_on_warning);
200
201 static int __init boot_alloc_snapshot(char *str)
202 {
203         allocate_snapshot = true;
204         /* We also need the main ring buffer expanded */
205         ring_buffer_expanded = true;
206         return 1;
207 }
208 __setup("alloc_snapshot", boot_alloc_snapshot);
209
210
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
212
213 static int __init set_trace_boot_options(char *str)
214 {
215         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
216         return 0;
217 }
218 __setup("trace_options=", set_trace_boot_options);
219
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
222
223 static int __init set_trace_boot_clock(char *str)
224 {
225         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226         trace_boot_clock = trace_boot_clock_buf;
227         return 0;
228 }
229 __setup("trace_clock=", set_trace_boot_clock);
230
231 static int __init set_tracepoint_printk(char *str)
232 {
233         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234                 tracepoint_printk = 1;
235         return 1;
236 }
237 __setup("tp_printk", set_tracepoint_printk);
238
239 unsigned long long ns2usecs(u64 nsec)
240 {
241         nsec += 500;
242         do_div(nsec, 1000);
243         return nsec;
244 }
245
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS                                             \
248         (FUNCTION_DEFAULT_FLAGS |                                       \
249          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
250          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
251          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
252          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
253
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
256                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
257
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
261
262 /*
263  * The global_trace is the descriptor that holds the top-level tracing
264  * buffers for the live tracing.
265  */
266 static struct trace_array global_trace = {
267         .trace_flags = TRACE_DEFAULT_FLAGS,
268 };
269
270 LIST_HEAD(ftrace_trace_arrays);
271
272 int trace_array_get(struct trace_array *this_tr)
273 {
274         struct trace_array *tr;
275         int ret = -ENODEV;
276
277         mutex_lock(&trace_types_lock);
278         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
279                 if (tr == this_tr) {
280                         tr->ref++;
281                         ret = 0;
282                         break;
283                 }
284         }
285         mutex_unlock(&trace_types_lock);
286
287         return ret;
288 }
289
290 static void __trace_array_put(struct trace_array *this_tr)
291 {
292         WARN_ON(!this_tr->ref);
293         this_tr->ref--;
294 }
295
296 void trace_array_put(struct trace_array *this_tr)
297 {
298         mutex_lock(&trace_types_lock);
299         __trace_array_put(this_tr);
300         mutex_unlock(&trace_types_lock);
301 }
302
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304                               struct ring_buffer *buffer,
305                               struct ring_buffer_event *event)
306 {
307         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308             !filter_match_preds(call->filter, rec)) {
309                 __trace_event_discard_commit(buffer, event);
310                 return 1;
311         }
312
313         return 0;
314 }
315
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
317 {
318         vfree(pid_list->pids);
319         kfree(pid_list);
320 }
321
322 /**
323  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324  * @filtered_pids: The list of pids to check
325  * @search_pid: The PID to find in @filtered_pids
326  *
327  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
328  */
329 bool
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
331 {
332         /*
333          * If pid_max changed after filtered_pids was created, we
334          * by default ignore all pids greater than the previous pid_max.
335          */
336         if (search_pid >= filtered_pids->pid_max)
337                 return false;
338
339         return test_bit(search_pid, filtered_pids->pids);
340 }
341
342 /**
343  * trace_ignore_this_task - should a task be ignored for tracing
344  * @filtered_pids: The list of pids to check
345  * @task: The task that should be ignored if not filtered
346  *
347  * Checks if @task should be traced or not from @filtered_pids.
348  * Returns true if @task should *NOT* be traced.
349  * Returns false if @task should be traced.
350  */
351 bool
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
353 {
354         /*
355          * Return false, because if filtered_pids does not exist,
356          * all pids are good to trace.
357          */
358         if (!filtered_pids)
359                 return false;
360
361         return !trace_find_filtered_pid(filtered_pids, task->pid);
362 }
363
364 /**
365  * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366  * @pid_list: The list to modify
367  * @self: The current task for fork or NULL for exit
368  * @task: The task to add or remove
369  *
370  * If adding a task, if @self is defined, the task is only added if @self
371  * is also included in @pid_list. This happens on fork and tasks should
372  * only be added when the parent is listed. If @self is NULL, then the
373  * @task pid will be removed from the list, which would happen on exit
374  * of a task.
375  */
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377                                   struct task_struct *self,
378                                   struct task_struct *task)
379 {
380         if (!pid_list)
381                 return;
382
383         /* For forks, we only add if the forking task is listed */
384         if (self) {
385                 if (!trace_find_filtered_pid(pid_list, self->pid))
386                         return;
387         }
388
389         /* Sorry, but we don't support pid_max changing after setting */
390         if (task->pid >= pid_list->pid_max)
391                 return;
392
393         /* "self" is set for forks, and NULL for exits */
394         if (self)
395                 set_bit(task->pid, pid_list->pids);
396         else
397                 clear_bit(task->pid, pid_list->pids);
398 }
399
400 /**
401  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402  * @pid_list: The pid list to show
403  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404  * @pos: The position of the file
405  *
406  * This is used by the seq_file "next" operation to iterate the pids
407  * listed in a trace_pid_list structure.
408  *
409  * Returns the pid+1 as we want to display pid of zero, but NULL would
410  * stop the iteration.
411  */
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
413 {
414         unsigned long pid = (unsigned long)v;
415
416         (*pos)++;
417
418         /* pid already is +1 of the actual prevous bit */
419         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
420
421         /* Return pid + 1 to allow zero to be represented */
422         if (pid < pid_list->pid_max)
423                 return (void *)(pid + 1);
424
425         return NULL;
426 }
427
428 /**
429  * trace_pid_start - Used for seq_file to start reading pid lists
430  * @pid_list: The pid list to show
431  * @pos: The position of the file
432  *
433  * This is used by seq_file "start" operation to start the iteration
434  * of listing pids.
435  *
436  * Returns the pid+1 as we want to display pid of zero, but NULL would
437  * stop the iteration.
438  */
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
440 {
441         unsigned long pid;
442         loff_t l = 0;
443
444         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445         if (pid >= pid_list->pid_max)
446                 return NULL;
447
448         /* Return pid + 1 so that zero can be the exit value */
449         for (pid++; pid && l < *pos;
450              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
451                 ;
452         return (void *)pid;
453 }
454
455 /**
456  * trace_pid_show - show the current pid in seq_file processing
457  * @m: The seq_file structure to write into
458  * @v: A void pointer of the pid (+1) value to display
459  *
460  * Can be directly used by seq_file operations to display the current
461  * pid value.
462  */
463 int trace_pid_show(struct seq_file *m, void *v)
464 {
465         unsigned long pid = (unsigned long)v - 1;
466
467         seq_printf(m, "%lu\n", pid);
468         return 0;
469 }
470
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE            127
473
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475                     struct trace_pid_list **new_pid_list,
476                     const char __user *ubuf, size_t cnt)
477 {
478         struct trace_pid_list *pid_list;
479         struct trace_parser parser;
480         unsigned long val;
481         int nr_pids = 0;
482         ssize_t read = 0;
483         ssize_t ret = 0;
484         loff_t pos;
485         pid_t pid;
486
487         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
488                 return -ENOMEM;
489
490         /*
491          * Always recreate a new array. The write is an all or nothing
492          * operation. Always create a new array when adding new pids by
493          * the user. If the operation fails, then the current list is
494          * not modified.
495          */
496         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
497         if (!pid_list)
498                 return -ENOMEM;
499
500         pid_list->pid_max = READ_ONCE(pid_max);
501
502         /* Only truncating will shrink pid_max */
503         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504                 pid_list->pid_max = filtered_pids->pid_max;
505
506         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507         if (!pid_list->pids) {
508                 kfree(pid_list);
509                 return -ENOMEM;
510         }
511
512         if (filtered_pids) {
513                 /* copy the current bits to the new max */
514                 for_each_set_bit(pid, filtered_pids->pids,
515                                  filtered_pids->pid_max) {
516                         set_bit(pid, pid_list->pids);
517                         nr_pids++;
518                 }
519         }
520
521         while (cnt > 0) {
522
523                 pos = 0;
524
525                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526                 if (ret < 0 || !trace_parser_loaded(&parser))
527                         break;
528
529                 read += ret;
530                 ubuf += ret;
531                 cnt -= ret;
532
533                 parser.buffer[parser.idx] = 0;
534
535                 ret = -EINVAL;
536                 if (kstrtoul(parser.buffer, 0, &val))
537                         break;
538                 if (val >= pid_list->pid_max)
539                         break;
540
541                 pid = (pid_t)val;
542
543                 set_bit(pid, pid_list->pids);
544                 nr_pids++;
545
546                 trace_parser_clear(&parser);
547                 ret = 0;
548         }
549         trace_parser_put(&parser);
550
551         if (ret < 0) {
552                 trace_free_pid_list(pid_list);
553                 return ret;
554         }
555
556         if (!nr_pids) {
557                 /* Cleared the list of pids */
558                 trace_free_pid_list(pid_list);
559                 read = ret;
560                 pid_list = NULL;
561         }
562
563         *new_pid_list = pid_list;
564
565         return read;
566 }
567
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
569 {
570         u64 ts;
571
572         /* Early boot up does not have a buffer yet */
573         if (!buf->buffer)
574                 return trace_clock_local();
575
576         ts = ring_buffer_time_stamp(buf->buffer, cpu);
577         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
578
579         return ts;
580 }
581
582 u64 ftrace_now(int cpu)
583 {
584         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
585 }
586
587 /**
588  * tracing_is_enabled - Show if global_trace has been disabled
589  *
590  * Shows if the global trace has been enabled or not. It uses the
591  * mirror flag "buffer_disabled" to be used in fast paths such as for
592  * the irqsoff tracer. But it may be inaccurate due to races. If you
593  * need to know the accurate state, use tracing_is_on() which is a little
594  * slower, but accurate.
595  */
596 int tracing_is_enabled(void)
597 {
598         /*
599          * For quick access (irqsoff uses this in fast path), just
600          * return the mirror variable of the state of the ring buffer.
601          * It's a little racy, but we don't really care.
602          */
603         smp_rmb();
604         return !global_trace.buffer_disabled;
605 }
606
607 /*
608  * trace_buf_size is the size in bytes that is allocated
609  * for a buffer. Note, the number of bytes is always rounded
610  * to page size.
611  *
612  * This number is purposely set to a low number of 16384.
613  * If the dump on oops happens, it will be much appreciated
614  * to not have to wait for all that output. Anyway this can be
615  * boot time and run time configurable.
616  */
617 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
618
619 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
620
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer            *trace_types __read_mostly;
623
624 /*
625  * trace_types_lock is used to protect the trace_types list.
626  */
627 DEFINE_MUTEX(trace_types_lock);
628
629 /*
630  * serialize the access of the ring buffer
631  *
632  * ring buffer serializes readers, but it is low level protection.
633  * The validity of the events (which returns by ring_buffer_peek() ..etc)
634  * are not protected by ring buffer.
635  *
636  * The content of events may become garbage if we allow other process consumes
637  * these events concurrently:
638  *   A) the page of the consumed events may become a normal page
639  *      (not reader page) in ring buffer, and this page will be rewrited
640  *      by events producer.
641  *   B) The page of the consumed events may become a page for splice_read,
642  *      and this page will be returned to system.
643  *
644  * These primitives allow multi process access to different cpu ring buffer
645  * concurrently.
646  *
647  * These primitives don't distinguish read-only and read-consume access.
648  * Multi read-only access are also serialized.
649  */
650
651 #ifdef CONFIG_SMP
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
654
655 static inline void trace_access_lock(int cpu)
656 {
657         if (cpu == RING_BUFFER_ALL_CPUS) {
658                 /* gain it for accessing the whole ring buffer. */
659                 down_write(&all_cpu_access_lock);
660         } else {
661                 /* gain it for accessing a cpu ring buffer. */
662
663                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664                 down_read(&all_cpu_access_lock);
665
666                 /* Secondly block other access to this @cpu ring buffer. */
667                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
668         }
669 }
670
671 static inline void trace_access_unlock(int cpu)
672 {
673         if (cpu == RING_BUFFER_ALL_CPUS) {
674                 up_write(&all_cpu_access_lock);
675         } else {
676                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677                 up_read(&all_cpu_access_lock);
678         }
679 }
680
681 static inline void trace_access_lock_init(void)
682 {
683         int cpu;
684
685         for_each_possible_cpu(cpu)
686                 mutex_init(&per_cpu(cpu_access_lock, cpu));
687 }
688
689 #else
690
691 static DEFINE_MUTEX(access_lock);
692
693 static inline void trace_access_lock(int cpu)
694 {
695         (void)cpu;
696         mutex_lock(&access_lock);
697 }
698
699 static inline void trace_access_unlock(int cpu)
700 {
701         (void)cpu;
702         mutex_unlock(&access_lock);
703 }
704
705 static inline void trace_access_lock_init(void)
706 {
707 }
708
709 #endif
710
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
713                                  unsigned long flags,
714                                  int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716                                       struct ring_buffer *buffer,
717                                       unsigned long flags,
718                                       int skip, int pc, struct pt_regs *regs);
719
720 #else
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
722                                         unsigned long flags,
723                                         int skip, int pc, struct pt_regs *regs)
724 {
725 }
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727                                       struct ring_buffer *buffer,
728                                       unsigned long flags,
729                                       int skip, int pc, struct pt_regs *regs)
730 {
731 }
732
733 #endif
734
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737                   int type, unsigned long flags, int pc)
738 {
739         struct trace_entry *ent = ring_buffer_event_data(event);
740
741         tracing_generic_entry_update(ent, flags, pc);
742         ent->type = type;
743 }
744
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
747                           int type,
748                           unsigned long len,
749                           unsigned long flags, int pc)
750 {
751         struct ring_buffer_event *event;
752
753         event = ring_buffer_lock_reserve(buffer, len);
754         if (event != NULL)
755                 trace_event_setup(event, type, flags, pc);
756
757         return event;
758 }
759
760 void tracer_tracing_on(struct trace_array *tr)
761 {
762         if (tr->trace_buffer.buffer)
763                 ring_buffer_record_on(tr->trace_buffer.buffer);
764         /*
765          * This flag is looked at when buffers haven't been allocated
766          * yet, or by some tracers (like irqsoff), that just want to
767          * know if the ring buffer has been disabled, but it can handle
768          * races of where it gets disabled but we still do a record.
769          * As the check is in the fast path of the tracers, it is more
770          * important to be fast than accurate.
771          */
772         tr->buffer_disabled = 0;
773         /* Make the flag seen by readers */
774         smp_wmb();
775 }
776
777 /**
778  * tracing_on - enable tracing buffers
779  *
780  * This function enables tracing buffers that may have been
781  * disabled with tracing_off.
782  */
783 void tracing_on(void)
784 {
785         tracer_tracing_on(&global_trace);
786 }
787 EXPORT_SYMBOL_GPL(tracing_on);
788
789
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
792 {
793         __this_cpu_write(trace_cmdline_save, true);
794
795         /* If this is the temp buffer, we need to commit fully */
796         if (this_cpu_read(trace_buffered_event) == event) {
797                 /* Length is in event->array[0] */
798                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799                 /* Release the temp buffer */
800                 this_cpu_dec(trace_buffered_event_cnt);
801         } else
802                 ring_buffer_unlock_commit(buffer, event);
803 }
804
805 /**
806  * __trace_puts - write a constant string into the trace buffer.
807  * @ip:    The address of the caller
808  * @str:   The constant string to write
809  * @size:  The size of the string.
810  */
811 int __trace_puts(unsigned long ip, const char *str, int size)
812 {
813         struct ring_buffer_event *event;
814         struct ring_buffer *buffer;
815         struct print_entry *entry;
816         unsigned long irq_flags;
817         int alloc;
818         int pc;
819
820         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
821                 return 0;
822
823         pc = preempt_count();
824
825         if (unlikely(tracing_selftest_running || tracing_disabled))
826                 return 0;
827
828         alloc = sizeof(*entry) + size + 2; /* possible \n added */
829
830         local_save_flags(irq_flags);
831         buffer = global_trace.trace_buffer.buffer;
832         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
833                                             irq_flags, pc);
834         if (!event)
835                 return 0;
836
837         entry = ring_buffer_event_data(event);
838         entry->ip = ip;
839
840         memcpy(&entry->buf, str, size);
841
842         /* Add a newline if necessary */
843         if (entry->buf[size - 1] != '\n') {
844                 entry->buf[size] = '\n';
845                 entry->buf[size + 1] = '\0';
846         } else
847                 entry->buf[size] = '\0';
848
849         __buffer_unlock_commit(buffer, event);
850         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
851
852         return size;
853 }
854 EXPORT_SYMBOL_GPL(__trace_puts);
855
856 /**
857  * __trace_bputs - write the pointer to a constant string into trace buffer
858  * @ip:    The address of the caller
859  * @str:   The constant string to write to the buffer to
860  */
861 int __trace_bputs(unsigned long ip, const char *str)
862 {
863         struct ring_buffer_event *event;
864         struct ring_buffer *buffer;
865         struct bputs_entry *entry;
866         unsigned long irq_flags;
867         int size = sizeof(struct bputs_entry);
868         int pc;
869
870         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
871                 return 0;
872
873         pc = preempt_count();
874
875         if (unlikely(tracing_selftest_running || tracing_disabled))
876                 return 0;
877
878         local_save_flags(irq_flags);
879         buffer = global_trace.trace_buffer.buffer;
880         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
881                                             irq_flags, pc);
882         if (!event)
883                 return 0;
884
885         entry = ring_buffer_event_data(event);
886         entry->ip                       = ip;
887         entry->str                      = str;
888
889         __buffer_unlock_commit(buffer, event);
890         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
891
892         return 1;
893 }
894 EXPORT_SYMBOL_GPL(__trace_bputs);
895
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
898 {
899         struct tracer *tracer = tr->current_trace;
900         unsigned long flags;
901
902         if (in_nmi()) {
903                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904                 internal_trace_puts("*** snapshot is being ignored        ***\n");
905                 return;
906         }
907
908         if (!tr->allocated_snapshot) {
909                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910                 internal_trace_puts("*** stopping trace here!   ***\n");
911                 tracing_off();
912                 return;
913         }
914
915         /* Note, snapshot can not be used when the tracer uses it */
916         if (tracer->use_max_tr) {
917                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
919                 return;
920         }
921
922         local_irq_save(flags);
923         update_max_tr(tr, current, smp_processor_id());
924         local_irq_restore(flags);
925 }
926
927 /**
928  * trace_snapshot - take a snapshot of the current buffer.
929  *
930  * This causes a swap between the snapshot buffer and the current live
931  * tracing buffer. You can use this to take snapshots of the live
932  * trace when some condition is triggered, but continue to trace.
933  *
934  * Note, make sure to allocate the snapshot with either
935  * a tracing_snapshot_alloc(), or by doing it manually
936  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
937  *
938  * If the snapshot buffer is not allocated, it will stop tracing.
939  * Basically making a permanent snapshot.
940  */
941 void tracing_snapshot(void)
942 {
943         struct trace_array *tr = &global_trace;
944
945         tracing_snapshot_instance(tr);
946 }
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
948
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950                                         struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
952
953 static int alloc_snapshot(struct trace_array *tr)
954 {
955         int ret;
956
957         if (!tr->allocated_snapshot) {
958
959                 /* allocate spare buffer */
960                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
962                 if (ret < 0)
963                         return ret;
964
965                 tr->allocated_snapshot = true;
966         }
967
968         return 0;
969 }
970
971 static void free_snapshot(struct trace_array *tr)
972 {
973         /*
974          * We don't free the ring buffer. instead, resize it because
975          * The max_tr ring buffer has some state (e.g. ring->clock) and
976          * we want preserve it.
977          */
978         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979         set_buffer_entries(&tr->max_buffer, 1);
980         tracing_reset_online_cpus(&tr->max_buffer);
981         tr->allocated_snapshot = false;
982 }
983
984 /**
985  * tracing_alloc_snapshot - allocate snapshot buffer.
986  *
987  * This only allocates the snapshot buffer if it isn't already
988  * allocated - it doesn't also take a snapshot.
989  *
990  * This is meant to be used in cases where the snapshot buffer needs
991  * to be set up for events that can't sleep but need to be able to
992  * trigger a snapshot.
993  */
994 int tracing_alloc_snapshot(void)
995 {
996         struct trace_array *tr = &global_trace;
997         int ret;
998
999         ret = alloc_snapshot(tr);
1000         WARN_ON(ret < 0);
1001
1002         return ret;
1003 }
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1005
1006 /**
1007  * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1008  *
1009  * This is similar to trace_snapshot(), but it will allocate the
1010  * snapshot buffer if it isn't already allocated. Use this only
1011  * where it is safe to sleep, as the allocation may sleep.
1012  *
1013  * This causes a swap between the snapshot buffer and the current live
1014  * tracing buffer. You can use this to take snapshots of the live
1015  * trace when some condition is triggered, but continue to trace.
1016  */
1017 void tracing_snapshot_alloc(void)
1018 {
1019         int ret;
1020
1021         ret = tracing_alloc_snapshot();
1022         if (ret < 0)
1023                 return;
1024
1025         tracing_snapshot();
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1028 #else
1029 void tracing_snapshot(void)
1030 {
1031         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1032 }
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1035 {
1036         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1037         return -ENODEV;
1038 }
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1041 {
1042         /* Give warning */
1043         tracing_snapshot();
1044 }
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1047
1048 void tracer_tracing_off(struct trace_array *tr)
1049 {
1050         if (tr->trace_buffer.buffer)
1051                 ring_buffer_record_off(tr->trace_buffer.buffer);
1052         /*
1053          * This flag is looked at when buffers haven't been allocated
1054          * yet, or by some tracers (like irqsoff), that just want to
1055          * know if the ring buffer has been disabled, but it can handle
1056          * races of where it gets disabled but we still do a record.
1057          * As the check is in the fast path of the tracers, it is more
1058          * important to be fast than accurate.
1059          */
1060         tr->buffer_disabled = 1;
1061         /* Make the flag seen by readers */
1062         smp_wmb();
1063 }
1064
1065 /**
1066  * tracing_off - turn off tracing buffers
1067  *
1068  * This function stops the tracing buffers from recording data.
1069  * It does not disable any overhead the tracers themselves may
1070  * be causing. This function simply causes all recording to
1071  * the ring buffers to fail.
1072  */
1073 void tracing_off(void)
1074 {
1075         tracer_tracing_off(&global_trace);
1076 }
1077 EXPORT_SYMBOL_GPL(tracing_off);
1078
1079 void disable_trace_on_warning(void)
1080 {
1081         if (__disable_trace_on_warning)
1082                 tracing_off();
1083 }
1084
1085 /**
1086  * tracer_tracing_is_on - show real state of ring buffer enabled
1087  * @tr : the trace array to know if ring buffer is enabled
1088  *
1089  * Shows real state of the ring buffer if it is enabled or not.
1090  */
1091 int tracer_tracing_is_on(struct trace_array *tr)
1092 {
1093         if (tr->trace_buffer.buffer)
1094                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095         return !tr->buffer_disabled;
1096 }
1097
1098 /**
1099  * tracing_is_on - show state of ring buffers enabled
1100  */
1101 int tracing_is_on(void)
1102 {
1103         return tracer_tracing_is_on(&global_trace);
1104 }
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1106
1107 static int __init set_buf_size(char *str)
1108 {
1109         unsigned long buf_size;
1110
1111         if (!str)
1112                 return 0;
1113         buf_size = memparse(str, &str);
1114         /* nr_entries can not be zero */
1115         if (buf_size == 0)
1116                 return 0;
1117         trace_buf_size = buf_size;
1118         return 1;
1119 }
1120 __setup("trace_buf_size=", set_buf_size);
1121
1122 static int __init set_tracing_thresh(char *str)
1123 {
1124         unsigned long threshold;
1125         int ret;
1126
1127         if (!str)
1128                 return 0;
1129         ret = kstrtoul(str, 0, &threshold);
1130         if (ret < 0)
1131                 return 0;
1132         tracing_thresh = threshold * 1000;
1133         return 1;
1134 }
1135 __setup("tracing_thresh=", set_tracing_thresh);
1136
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1138 {
1139         return nsecs / 1000;
1140 }
1141
1142 /*
1143  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146  * of strings in the order that the evals (enum) were defined.
1147  */
1148 #undef C
1149 #define C(a, b) b
1150
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1153         TRACE_FLAGS
1154         NULL
1155 };
1156
1157 static struct {
1158         u64 (*func)(void);
1159         const char *name;
1160         int in_ns;              /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162         { trace_clock_local,            "local",        1 },
1163         { trace_clock_global,           "global",       1 },
1164         { trace_clock_counter,          "counter",      0 },
1165         { trace_clock_jiffies,          "uptime",       0 },
1166         { trace_clock,                  "perf",         1 },
1167         { ktime_get_mono_fast_ns,       "mono",         1 },
1168         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1169         { ktime_get_boot_fast_ns,       "boot",         1 },
1170         ARCH_TRACE_CLOCKS
1171 };
1172
1173 /*
1174  * trace_parser_get_init - gets the buffer for trace parser
1175  */
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1177 {
1178         memset(parser, 0, sizeof(*parser));
1179
1180         parser->buffer = kmalloc(size, GFP_KERNEL);
1181         if (!parser->buffer)
1182                 return 1;
1183
1184         parser->size = size;
1185         return 0;
1186 }
1187
1188 /*
1189  * trace_parser_put - frees the buffer for trace parser
1190  */
1191 void trace_parser_put(struct trace_parser *parser)
1192 {
1193         kfree(parser->buffer);
1194         parser->buffer = NULL;
1195 }
1196
1197 /*
1198  * trace_get_user - reads the user input string separated by  space
1199  * (matched by isspace(ch))
1200  *
1201  * For each string found the 'struct trace_parser' is updated,
1202  * and the function returns.
1203  *
1204  * Returns number of bytes read.
1205  *
1206  * See kernel/trace/trace.h for 'struct trace_parser' details.
1207  */
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209         size_t cnt, loff_t *ppos)
1210 {
1211         char ch;
1212         size_t read = 0;
1213         ssize_t ret;
1214
1215         if (!*ppos)
1216                 trace_parser_clear(parser);
1217
1218         ret = get_user(ch, ubuf++);
1219         if (ret)
1220                 goto out;
1221
1222         read++;
1223         cnt--;
1224
1225         /*
1226          * The parser is not finished with the last write,
1227          * continue reading the user input without skipping spaces.
1228          */
1229         if (!parser->cont) {
1230                 /* skip white space */
1231                 while (cnt && isspace(ch)) {
1232                         ret = get_user(ch, ubuf++);
1233                         if (ret)
1234                                 goto out;
1235                         read++;
1236                         cnt--;
1237                 }
1238
1239                 /* only spaces were written */
1240                 if (isspace(ch)) {
1241                         *ppos += read;
1242                         ret = read;
1243                         goto out;
1244                 }
1245
1246                 parser->idx = 0;
1247         }
1248
1249         /* read the non-space input */
1250         while (cnt && !isspace(ch)) {
1251                 if (parser->idx < parser->size - 1)
1252                         parser->buffer[parser->idx++] = ch;
1253                 else {
1254                         ret = -EINVAL;
1255                         goto out;
1256                 }
1257                 ret = get_user(ch, ubuf++);
1258                 if (ret)
1259                         goto out;
1260                 read++;
1261                 cnt--;
1262         }
1263
1264         /* We either got finished input or we have to wait for another call. */
1265         if (isspace(ch)) {
1266                 parser->buffer[parser->idx] = 0;
1267                 parser->cont = false;
1268         } else if (parser->idx < parser->size - 1) {
1269                 parser->cont = true;
1270                 parser->buffer[parser->idx++] = ch;
1271         } else {
1272                 ret = -EINVAL;
1273                 goto out;
1274         }
1275
1276         *ppos += read;
1277         ret = read;
1278
1279 out:
1280         return ret;
1281 }
1282
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1285 {
1286         int len;
1287
1288         if (trace_seq_used(s) <= s->seq.readpos)
1289                 return -EBUSY;
1290
1291         len = trace_seq_used(s) - s->seq.readpos;
1292         if (cnt > len)
1293                 cnt = len;
1294         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1295
1296         s->seq.readpos += cnt;
1297         return cnt;
1298 }
1299
1300 unsigned long __read_mostly     tracing_thresh;
1301
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1303 /*
1304  * Copy the new maximum trace into the separate maximum-trace
1305  * structure. (this way the maximum trace is permanently saved,
1306  * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1307  */
1308 static void
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1310 {
1311         struct trace_buffer *trace_buf = &tr->trace_buffer;
1312         struct trace_buffer *max_buf = &tr->max_buffer;
1313         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1315
1316         max_buf->cpu = cpu;
1317         max_buf->time_start = data->preempt_timestamp;
1318
1319         max_data->saved_latency = tr->max_latency;
1320         max_data->critical_start = data->critical_start;
1321         max_data->critical_end = data->critical_end;
1322
1323         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324         max_data->pid = tsk->pid;
1325         /*
1326          * If tsk == current, then use current_uid(), as that does not use
1327          * RCU. The irq tracer can be called out of RCU scope.
1328          */
1329         if (tsk == current)
1330                 max_data->uid = current_uid();
1331         else
1332                 max_data->uid = task_uid(tsk);
1333
1334         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335         max_data->policy = tsk->policy;
1336         max_data->rt_priority = tsk->rt_priority;
1337
1338         /* record this tasks comm */
1339         tracing_record_cmdline(tsk);
1340 }
1341
1342 /**
1343  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1344  * @tr: tracer
1345  * @tsk: the task with the latency
1346  * @cpu: The cpu that initiated the trace.
1347  *
1348  * Flip the buffers between the @tr and the max_tr and record information
1349  * about which task was the cause of this latency.
1350  */
1351 void
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1353 {
1354         struct ring_buffer *buf;
1355
1356         if (tr->stop_count)
1357                 return;
1358
1359         WARN_ON_ONCE(!irqs_disabled());
1360
1361         if (!tr->allocated_snapshot) {
1362                 /* Only the nop tracer should hit this when disabling */
1363                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1364                 return;
1365         }
1366
1367         arch_spin_lock(&tr->max_lock);
1368
1369         buf = tr->trace_buffer.buffer;
1370         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371         tr->max_buffer.buffer = buf;
1372
1373         __update_max_tr(tr, tsk, cpu);
1374         arch_spin_unlock(&tr->max_lock);
1375 }
1376
1377 /**
1378  * update_max_tr_single - only copy one trace over, and reset the rest
1379  * @tr - tracer
1380  * @tsk - task with the latency
1381  * @cpu - the cpu of the buffer to copy.
1382  *
1383  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1384  */
1385 void
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1387 {
1388         int ret;
1389
1390         if (tr->stop_count)
1391                 return;
1392
1393         WARN_ON_ONCE(!irqs_disabled());
1394         if (!tr->allocated_snapshot) {
1395                 /* Only the nop tracer should hit this when disabling */
1396                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1397                 return;
1398         }
1399
1400         arch_spin_lock(&tr->max_lock);
1401
1402         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1403
1404         if (ret == -EBUSY) {
1405                 /*
1406                  * We failed to swap the buffer due to a commit taking
1407                  * place on this CPU. We fail to record, but we reset
1408                  * the max trace buffer (no one writes directly to it)
1409                  * and flag that it failed.
1410                  */
1411                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412                         "Failed to swap buffers due to commit in progress\n");
1413         }
1414
1415         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1416
1417         __update_max_tr(tr, tsk, cpu);
1418         arch_spin_unlock(&tr->max_lock);
1419 }
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1421
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1423 {
1424         /* Iterators are static, they should be filled or empty */
1425         if (trace_buffer_iter(iter, iter->cpu_file))
1426                 return 0;
1427
1428         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1429                                 full);
1430 }
1431
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1434
1435 struct trace_selftests {
1436         struct list_head                list;
1437         struct tracer                   *type;
1438 };
1439
1440 static LIST_HEAD(postponed_selftests);
1441
1442 static int save_selftest(struct tracer *type)
1443 {
1444         struct trace_selftests *selftest;
1445
1446         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1447         if (!selftest)
1448                 return -ENOMEM;
1449
1450         selftest->type = type;
1451         list_add(&selftest->list, &postponed_selftests);
1452         return 0;
1453 }
1454
1455 static int run_tracer_selftest(struct tracer *type)
1456 {
1457         struct trace_array *tr = &global_trace;
1458         struct tracer *saved_tracer = tr->current_trace;
1459         int ret;
1460
1461         if (!type->selftest || tracing_selftest_disabled)
1462                 return 0;
1463
1464         /*
1465          * If a tracer registers early in boot up (before scheduling is
1466          * initialized and such), then do not run its selftests yet.
1467          * Instead, run it a little later in the boot process.
1468          */
1469         if (!selftests_can_run)
1470                 return save_selftest(type);
1471
1472         /*
1473          * Run a selftest on this tracer.
1474          * Here we reset the trace buffer, and set the current
1475          * tracer to be this tracer. The tracer can then run some
1476          * internal tracing to verify that everything is in order.
1477          * If we fail, we do not register this tracer.
1478          */
1479         tracing_reset_online_cpus(&tr->trace_buffer);
1480
1481         tr->current_trace = type;
1482
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484         if (type->use_max_tr) {
1485                 /* If we expanded the buffers, make sure the max is expanded too */
1486                 if (ring_buffer_expanded)
1487                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488                                            RING_BUFFER_ALL_CPUS);
1489                 tr->allocated_snapshot = true;
1490         }
1491 #endif
1492
1493         /* the test is responsible for initializing and enabling */
1494         pr_info("Testing tracer %s: ", type->name);
1495         ret = type->selftest(type, tr);
1496         /* the test is responsible for resetting too */
1497         tr->current_trace = saved_tracer;
1498         if (ret) {
1499                 printk(KERN_CONT "FAILED!\n");
1500                 /* Add the warning after printing 'FAILED' */
1501                 WARN_ON(1);
1502                 return -1;
1503         }
1504         /* Only reset on passing, to avoid touching corrupted buffers */
1505         tracing_reset_online_cpus(&tr->trace_buffer);
1506
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508         if (type->use_max_tr) {
1509                 tr->allocated_snapshot = false;
1510
1511                 /* Shrink the max buffer again */
1512                 if (ring_buffer_expanded)
1513                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1514                                            RING_BUFFER_ALL_CPUS);
1515         }
1516 #endif
1517
1518         printk(KERN_CONT "PASSED\n");
1519         return 0;
1520 }
1521
1522 static __init int init_trace_selftests(void)
1523 {
1524         struct trace_selftests *p, *n;
1525         struct tracer *t, **last;
1526         int ret;
1527
1528         selftests_can_run = true;
1529
1530         mutex_lock(&trace_types_lock);
1531
1532         if (list_empty(&postponed_selftests))
1533                 goto out;
1534
1535         pr_info("Running postponed tracer tests:\n");
1536
1537         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538                 ret = run_tracer_selftest(p->type);
1539                 /* If the test fails, then warn and remove from available_tracers */
1540                 if (ret < 0) {
1541                         WARN(1, "tracer: %s failed selftest, disabling\n",
1542                              p->type->name);
1543                         last = &trace_types;
1544                         for (t = trace_types; t; t = t->next) {
1545                                 if (t == p->type) {
1546                                         *last = t->next;
1547                                         break;
1548                                 }
1549                                 last = &t->next;
1550                         }
1551                 }
1552                 list_del(&p->list);
1553                 kfree(p);
1554         }
1555
1556  out:
1557         mutex_unlock(&trace_types_lock);
1558
1559         return 0;
1560 }
1561 core_initcall(init_trace_selftests);
1562 #else
1563 static inline int run_tracer_selftest(struct tracer *type)
1564 {
1565         return 0;
1566 }
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1568
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1570
1571 static void __init apply_trace_boot_options(void);
1572
1573 /**
1574  * register_tracer - register a tracer with the ftrace system.
1575  * @type - the plugin for the tracer
1576  *
1577  * Register a new plugin tracer.
1578  */
1579 int __init register_tracer(struct tracer *type)
1580 {
1581         struct tracer *t;
1582         int ret = 0;
1583
1584         if (!type->name) {
1585                 pr_info("Tracer must have a name\n");
1586                 return -1;
1587         }
1588
1589         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1591                 return -1;
1592         }
1593
1594         mutex_lock(&trace_types_lock);
1595
1596         tracing_selftest_running = true;
1597
1598         for (t = trace_types; t; t = t->next) {
1599                 if (strcmp(type->name, t->name) == 0) {
1600                         /* already found */
1601                         pr_info("Tracer %s already registered\n",
1602                                 type->name);
1603                         ret = -1;
1604                         goto out;
1605                 }
1606         }
1607
1608         if (!type->set_flag)
1609                 type->set_flag = &dummy_set_flag;
1610         if (!type->flags) {
1611                 /*allocate a dummy tracer_flags*/
1612                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1613                 if (!type->flags) {
1614                         ret = -ENOMEM;
1615                         goto out;
1616                 }
1617                 type->flags->val = 0;
1618                 type->flags->opts = dummy_tracer_opt;
1619         } else
1620                 if (!type->flags->opts)
1621                         type->flags->opts = dummy_tracer_opt;
1622
1623         /* store the tracer for __set_tracer_option */
1624         type->flags->trace = type;
1625
1626         ret = run_tracer_selftest(type);
1627         if (ret < 0)
1628                 goto out;
1629
1630         type->next = trace_types;
1631         trace_types = type;
1632         add_tracer_options(&global_trace, type);
1633
1634  out:
1635         tracing_selftest_running = false;
1636         mutex_unlock(&trace_types_lock);
1637
1638         if (ret || !default_bootup_tracer)
1639                 goto out_unlock;
1640
1641         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1642                 goto out_unlock;
1643
1644         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645         /* Do we want this tracer to start on bootup? */
1646         tracing_set_tracer(&global_trace, type->name);
1647         default_bootup_tracer = NULL;
1648
1649         apply_trace_boot_options();
1650
1651         /* disable other selftests, since this will break it. */
1652         tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1655                type->name);
1656 #endif
1657
1658  out_unlock:
1659         return ret;
1660 }
1661
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1663 {
1664         struct ring_buffer *buffer = buf->buffer;
1665
1666         if (!buffer)
1667                 return;
1668
1669         ring_buffer_record_disable(buffer);
1670
1671         /* Make sure all commits have finished */
1672         synchronize_sched();
1673         ring_buffer_reset_cpu(buffer, cpu);
1674
1675         ring_buffer_record_enable(buffer);
1676 }
1677
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1679 {
1680         struct ring_buffer *buffer = buf->buffer;
1681         int cpu;
1682
1683         if (!buffer)
1684                 return;
1685
1686         ring_buffer_record_disable(buffer);
1687
1688         /* Make sure all commits have finished */
1689         synchronize_sched();
1690
1691         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1692
1693         for_each_online_cpu(cpu)
1694                 ring_buffer_reset_cpu(buffer, cpu);
1695
1696         ring_buffer_record_enable(buffer);
1697 }
1698
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1701 {
1702         struct trace_array *tr;
1703
1704         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705                 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707                 tracing_reset_online_cpus(&tr->max_buffer);
1708 #endif
1709         }
1710 }
1711
1712 #define SAVED_CMDLINES_DEFAULT 128
1713 #define NO_CMDLINE_MAP UINT_MAX
1714 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1715 struct saved_cmdlines_buffer {
1716         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1717         unsigned *map_cmdline_to_pid;
1718         unsigned cmdline_num;
1719         int cmdline_idx;
1720         char *saved_cmdlines;
1721 };
1722 static struct saved_cmdlines_buffer *savedcmd;
1723
1724 /* temporary disable recording */
1725 static atomic_t trace_record_cmdline_disabled __read_mostly;
1726
1727 static inline char *get_saved_cmdlines(int idx)
1728 {
1729         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1730 }
1731
1732 static inline void set_cmdline(int idx, const char *cmdline)
1733 {
1734         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1735 }
1736
1737 static int allocate_cmdlines_buffer(unsigned int val,
1738                                     struct saved_cmdlines_buffer *s)
1739 {
1740         s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1741                                         GFP_KERNEL);
1742         if (!s->map_cmdline_to_pid)
1743                 return -ENOMEM;
1744
1745         s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1746         if (!s->saved_cmdlines) {
1747                 kfree(s->map_cmdline_to_pid);
1748                 return -ENOMEM;
1749         }
1750
1751         s->cmdline_idx = 0;
1752         s->cmdline_num = val;
1753         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1754                sizeof(s->map_pid_to_cmdline));
1755         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1756                val * sizeof(*s->map_cmdline_to_pid));
1757
1758         return 0;
1759 }
1760
1761 static int trace_create_savedcmd(void)
1762 {
1763         int ret;
1764
1765         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1766         if (!savedcmd)
1767                 return -ENOMEM;
1768
1769         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1770         if (ret < 0) {
1771                 kfree(savedcmd);
1772                 savedcmd = NULL;
1773                 return -ENOMEM;
1774         }
1775
1776         return 0;
1777 }
1778
1779 int is_tracing_stopped(void)
1780 {
1781         return global_trace.stop_count;
1782 }
1783
1784 /**
1785  * tracing_start - quick start of the tracer
1786  *
1787  * If tracing is enabled but was stopped by tracing_stop,
1788  * this will start the tracer back up.
1789  */
1790 void tracing_start(void)
1791 {
1792         struct ring_buffer *buffer;
1793         unsigned long flags;
1794
1795         if (tracing_disabled)
1796                 return;
1797
1798         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1799         if (--global_trace.stop_count) {
1800                 if (global_trace.stop_count < 0) {
1801                         /* Someone screwed up their debugging */
1802                         WARN_ON_ONCE(1);
1803                         global_trace.stop_count = 0;
1804                 }
1805                 goto out;
1806         }
1807
1808         /* Prevent the buffers from switching */
1809         arch_spin_lock(&global_trace.max_lock);
1810
1811         buffer = global_trace.trace_buffer.buffer;
1812         if (buffer)
1813                 ring_buffer_record_enable(buffer);
1814
1815 #ifdef CONFIG_TRACER_MAX_TRACE
1816         buffer = global_trace.max_buffer.buffer;
1817         if (buffer)
1818                 ring_buffer_record_enable(buffer);
1819 #endif
1820
1821         arch_spin_unlock(&global_trace.max_lock);
1822
1823  out:
1824         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1825 }
1826
1827 static void tracing_start_tr(struct trace_array *tr)
1828 {
1829         struct ring_buffer *buffer;
1830         unsigned long flags;
1831
1832         if (tracing_disabled)
1833                 return;
1834
1835         /* If global, we need to also start the max tracer */
1836         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1837                 return tracing_start();
1838
1839         raw_spin_lock_irqsave(&tr->start_lock, flags);
1840
1841         if (--tr->stop_count) {
1842                 if (tr->stop_count < 0) {
1843                         /* Someone screwed up their debugging */
1844                         WARN_ON_ONCE(1);
1845                         tr->stop_count = 0;
1846                 }
1847                 goto out;
1848         }
1849
1850         buffer = tr->trace_buffer.buffer;
1851         if (buffer)
1852                 ring_buffer_record_enable(buffer);
1853
1854  out:
1855         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1856 }
1857
1858 /**
1859  * tracing_stop - quick stop of the tracer
1860  *
1861  * Light weight way to stop tracing. Use in conjunction with
1862  * tracing_start.
1863  */
1864 void tracing_stop(void)
1865 {
1866         struct ring_buffer *buffer;
1867         unsigned long flags;
1868
1869         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1870         if (global_trace.stop_count++)
1871                 goto out;
1872
1873         /* Prevent the buffers from switching */
1874         arch_spin_lock(&global_trace.max_lock);
1875
1876         buffer = global_trace.trace_buffer.buffer;
1877         if (buffer)
1878                 ring_buffer_record_disable(buffer);
1879
1880 #ifdef CONFIG_TRACER_MAX_TRACE
1881         buffer = global_trace.max_buffer.buffer;
1882         if (buffer)
1883                 ring_buffer_record_disable(buffer);
1884 #endif
1885
1886         arch_spin_unlock(&global_trace.max_lock);
1887
1888  out:
1889         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1890 }
1891
1892 static void tracing_stop_tr(struct trace_array *tr)
1893 {
1894         struct ring_buffer *buffer;
1895         unsigned long flags;
1896
1897         /* If global, we need to also stop the max tracer */
1898         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1899                 return tracing_stop();
1900
1901         raw_spin_lock_irqsave(&tr->start_lock, flags);
1902         if (tr->stop_count++)
1903                 goto out;
1904
1905         buffer = tr->trace_buffer.buffer;
1906         if (buffer)
1907                 ring_buffer_record_disable(buffer);
1908
1909  out:
1910         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1911 }
1912
1913 static int trace_save_cmdline(struct task_struct *tsk)
1914 {
1915         unsigned pid, idx;
1916
1917         if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1918                 return 0;
1919
1920         /*
1921          * It's not the end of the world if we don't get
1922          * the lock, but we also don't want to spin
1923          * nor do we want to disable interrupts,
1924          * so if we miss here, then better luck next time.
1925          */
1926         if (!arch_spin_trylock(&trace_cmdline_lock))
1927                 return 0;
1928
1929         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1930         if (idx == NO_CMDLINE_MAP) {
1931                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1932
1933                 /*
1934                  * Check whether the cmdline buffer at idx has a pid
1935                  * mapped. We are going to overwrite that entry so we
1936                  * need to clear the map_pid_to_cmdline. Otherwise we
1937                  * would read the new comm for the old pid.
1938                  */
1939                 pid = savedcmd->map_cmdline_to_pid[idx];
1940                 if (pid != NO_CMDLINE_MAP)
1941                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1942
1943                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1944                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1945
1946                 savedcmd->cmdline_idx = idx;
1947         }
1948
1949         set_cmdline(idx, tsk->comm);
1950
1951         arch_spin_unlock(&trace_cmdline_lock);
1952
1953         return 1;
1954 }
1955
1956 static void __trace_find_cmdline(int pid, char comm[])
1957 {
1958         unsigned map;
1959
1960         if (!pid) {
1961                 strcpy(comm, "<idle>");
1962                 return;
1963         }
1964
1965         if (WARN_ON_ONCE(pid < 0)) {
1966                 strcpy(comm, "<XXX>");
1967                 return;
1968         }
1969
1970         if (pid > PID_MAX_DEFAULT) {
1971                 strcpy(comm, "<...>");
1972                 return;
1973         }
1974
1975         map = savedcmd->map_pid_to_cmdline[pid];
1976         if (map != NO_CMDLINE_MAP)
1977                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1978         else
1979                 strcpy(comm, "<...>");
1980 }
1981
1982 void trace_find_cmdline(int pid, char comm[])
1983 {
1984         preempt_disable();
1985         arch_spin_lock(&trace_cmdline_lock);
1986
1987         __trace_find_cmdline(pid, comm);
1988
1989         arch_spin_unlock(&trace_cmdline_lock);
1990         preempt_enable();
1991 }
1992
1993 void tracing_record_cmdline(struct task_struct *tsk)
1994 {
1995         if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1996                 return;
1997
1998         if (!__this_cpu_read(trace_cmdline_save))
1999                 return;
2000
2001         if (trace_save_cmdline(tsk))
2002                 __this_cpu_write(trace_cmdline_save, false);
2003 }
2004
2005 /*
2006  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2007  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2008  * simplifies those functions and keeps them in sync.
2009  */
2010 enum print_line_t trace_handle_return(struct trace_seq *s)
2011 {
2012         return trace_seq_has_overflowed(s) ?
2013                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2014 }
2015 EXPORT_SYMBOL_GPL(trace_handle_return);
2016
2017 void
2018 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2019                              int pc)
2020 {
2021         struct task_struct *tsk = current;
2022
2023         entry->preempt_count            = pc & 0xff;
2024         entry->pid                      = (tsk) ? tsk->pid : 0;
2025         entry->flags =
2026 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2027                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2028 #else
2029                 TRACE_FLAG_IRQS_NOSUPPORT |
2030 #endif
2031                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2032                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2033                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2034                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2035                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2036 }
2037 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2038
2039 struct ring_buffer_event *
2040 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2041                           int type,
2042                           unsigned long len,
2043                           unsigned long flags, int pc)
2044 {
2045         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2046 }
2047
2048 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2049 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2050 static int trace_buffered_event_ref;
2051
2052 /**
2053  * trace_buffered_event_enable - enable buffering events
2054  *
2055  * When events are being filtered, it is quicker to use a temporary
2056  * buffer to write the event data into if there's a likely chance
2057  * that it will not be committed. The discard of the ring buffer
2058  * is not as fast as committing, and is much slower than copying
2059  * a commit.
2060  *
2061  * When an event is to be filtered, allocate per cpu buffers to
2062  * write the event data into, and if the event is filtered and discarded
2063  * it is simply dropped, otherwise, the entire data is to be committed
2064  * in one shot.
2065  */
2066 void trace_buffered_event_enable(void)
2067 {
2068         struct ring_buffer_event *event;
2069         struct page *page;
2070         int cpu;
2071
2072         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2073
2074         if (trace_buffered_event_ref++)
2075                 return;
2076
2077         for_each_tracing_cpu(cpu) {
2078                 page = alloc_pages_node(cpu_to_node(cpu),
2079                                         GFP_KERNEL | __GFP_NORETRY, 0);
2080                 if (!page)
2081                         goto failed;
2082
2083                 event = page_address(page);
2084                 memset(event, 0, sizeof(*event));
2085
2086                 per_cpu(trace_buffered_event, cpu) = event;
2087
2088                 preempt_disable();
2089                 if (cpu == smp_processor_id() &&
2090                     this_cpu_read(trace_buffered_event) !=
2091                     per_cpu(trace_buffered_event, cpu))
2092                         WARN_ON_ONCE(1);
2093                 preempt_enable();
2094         }
2095
2096         return;
2097  failed:
2098         trace_buffered_event_disable();
2099 }
2100
2101 static void enable_trace_buffered_event(void *data)
2102 {
2103         /* Probably not needed, but do it anyway */
2104         smp_rmb();
2105         this_cpu_dec(trace_buffered_event_cnt);
2106 }
2107
2108 static void disable_trace_buffered_event(void *data)
2109 {
2110         this_cpu_inc(trace_buffered_event_cnt);
2111 }
2112
2113 /**
2114  * trace_buffered_event_disable - disable buffering events
2115  *
2116  * When a filter is removed, it is faster to not use the buffered
2117  * events, and to commit directly into the ring buffer. Free up
2118  * the temp buffers when there are no more users. This requires
2119  * special synchronization with current events.
2120  */
2121 void trace_buffered_event_disable(void)
2122 {
2123         int cpu;
2124
2125         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2126
2127         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2128                 return;
2129
2130         if (--trace_buffered_event_ref)
2131                 return;
2132
2133         preempt_disable();
2134         /* For each CPU, set the buffer as used. */
2135         smp_call_function_many(tracing_buffer_mask,
2136                                disable_trace_buffered_event, NULL, 1);
2137         preempt_enable();
2138
2139         /* Wait for all current users to finish */
2140         synchronize_sched();
2141
2142         for_each_tracing_cpu(cpu) {
2143                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2144                 per_cpu(trace_buffered_event, cpu) = NULL;
2145         }
2146         /*
2147          * Make sure trace_buffered_event is NULL before clearing
2148          * trace_buffered_event_cnt.
2149          */
2150         smp_wmb();
2151
2152         preempt_disable();
2153         /* Do the work on each cpu */
2154         smp_call_function_many(tracing_buffer_mask,
2155                                enable_trace_buffered_event, NULL, 1);
2156         preempt_enable();
2157 }
2158
2159 static struct ring_buffer *temp_buffer;
2160
2161 struct ring_buffer_event *
2162 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2163                           struct trace_event_file *trace_file,
2164                           int type, unsigned long len,
2165                           unsigned long flags, int pc)
2166 {
2167         struct ring_buffer_event *entry;
2168         int val;
2169
2170         *current_rb = trace_file->tr->trace_buffer.buffer;
2171
2172         if ((trace_file->flags &
2173              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2174             (entry = this_cpu_read(trace_buffered_event))) {
2175                 /* Try to use the per cpu buffer first */
2176                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2177                 if (val == 1) {
2178                         trace_event_setup(entry, type, flags, pc);
2179                         entry->array[0] = len;
2180                         return entry;
2181                 }
2182                 this_cpu_dec(trace_buffered_event_cnt);
2183         }
2184
2185         entry = __trace_buffer_lock_reserve(*current_rb,
2186                                             type, len, flags, pc);
2187         /*
2188          * If tracing is off, but we have triggers enabled
2189          * we still need to look at the event data. Use the temp_buffer
2190          * to store the trace event for the tigger to use. It's recusive
2191          * safe and will not be recorded anywhere.
2192          */
2193         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2194                 *current_rb = temp_buffer;
2195                 entry = __trace_buffer_lock_reserve(*current_rb,
2196                                                     type, len, flags, pc);
2197         }
2198         return entry;
2199 }
2200 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2201
2202 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2203 static DEFINE_MUTEX(tracepoint_printk_mutex);
2204
2205 static void output_printk(struct trace_event_buffer *fbuffer)
2206 {
2207         struct trace_event_call *event_call;
2208         struct trace_event *event;
2209         unsigned long flags;
2210         struct trace_iterator *iter = tracepoint_print_iter;
2211
2212         /* We should never get here if iter is NULL */
2213         if (WARN_ON_ONCE(!iter))
2214                 return;
2215
2216         event_call = fbuffer->trace_file->event_call;
2217         if (!event_call || !event_call->event.funcs ||
2218             !event_call->event.funcs->trace)
2219                 return;
2220
2221         event = &fbuffer->trace_file->event_call->event;
2222
2223         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2224         trace_seq_init(&iter->seq);
2225         iter->ent = fbuffer->entry;
2226         event_call->event.funcs->trace(iter, 0, event);
2227         trace_seq_putc(&iter->seq, 0);
2228         printk("%s", iter->seq.buffer);
2229
2230         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2231 }
2232
2233 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2234                              void __user *buffer, size_t *lenp,
2235                              loff_t *ppos)
2236 {
2237         int save_tracepoint_printk;
2238         int ret;
2239
2240         mutex_lock(&tracepoint_printk_mutex);
2241         save_tracepoint_printk = tracepoint_printk;
2242
2243         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2244
2245         /*
2246          * This will force exiting early, as tracepoint_printk
2247          * is always zero when tracepoint_printk_iter is not allocated
2248          */
2249         if (!tracepoint_print_iter)
2250                 tracepoint_printk = 0;
2251
2252         if (save_tracepoint_printk == tracepoint_printk)
2253                 goto out;
2254
2255         if (tracepoint_printk)
2256                 static_key_enable(&tracepoint_printk_key.key);
2257         else
2258                 static_key_disable(&tracepoint_printk_key.key);
2259
2260  out:
2261         mutex_unlock(&tracepoint_printk_mutex);
2262
2263         return ret;
2264 }
2265
2266 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2267 {
2268         if (static_key_false(&tracepoint_printk_key.key))
2269                 output_printk(fbuffer);
2270
2271         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2272                                     fbuffer->event, fbuffer->entry,
2273                                     fbuffer->flags, fbuffer->pc);
2274 }
2275 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2276
2277 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2278                                      struct ring_buffer *buffer,
2279                                      struct ring_buffer_event *event,
2280                                      unsigned long flags, int pc,
2281                                      struct pt_regs *regs)
2282 {
2283         __buffer_unlock_commit(buffer, event);
2284
2285         /*
2286          * If regs is not set, then skip the following callers:
2287          *   trace_buffer_unlock_commit_regs
2288          *   event_trigger_unlock_commit
2289          *   trace_event_buffer_commit
2290          *   trace_event_raw_event_sched_switch
2291          * Note, we can still get here via blktrace, wakeup tracer
2292          * and mmiotrace, but that's ok if they lose a function or
2293          * two. They are that meaningful.
2294          */
2295         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2296         ftrace_trace_userstack(buffer, flags, pc);
2297 }
2298
2299 /*
2300  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2301  */
2302 void
2303 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2304                                    struct ring_buffer_event *event)
2305 {
2306         __buffer_unlock_commit(buffer, event);
2307 }
2308
2309 static void
2310 trace_process_export(struct trace_export *export,
2311                struct ring_buffer_event *event)
2312 {
2313         struct trace_entry *entry;
2314         unsigned int size = 0;
2315
2316         entry = ring_buffer_event_data(event);
2317         size = ring_buffer_event_length(event);
2318         export->write(entry, size);
2319 }
2320
2321 static DEFINE_MUTEX(ftrace_export_lock);
2322
2323 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2324
2325 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2326
2327 static inline void ftrace_exports_enable(void)
2328 {
2329         static_branch_enable(&ftrace_exports_enabled);
2330 }
2331
2332 static inline void ftrace_exports_disable(void)
2333 {
2334         static_branch_disable(&ftrace_exports_enabled);
2335 }
2336
2337 void ftrace_exports(struct ring_buffer_event *event)
2338 {
2339         struct trace_export *export;
2340
2341         preempt_disable_notrace();
2342
2343         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2344         while (export) {
2345                 trace_process_export(export, event);
2346                 export = rcu_dereference_raw_notrace(export->next);
2347         }
2348
2349         preempt_enable_notrace();
2350 }
2351
2352 static inline void
2353 add_trace_export(struct trace_export **list, struct trace_export *export)
2354 {
2355         rcu_assign_pointer(export->next, *list);
2356         /*
2357          * We are entering export into the list but another
2358          * CPU might be walking that list. We need to make sure
2359          * the export->next pointer is valid before another CPU sees
2360          * the export pointer included into the list.
2361          */
2362         rcu_assign_pointer(*list, export);
2363 }
2364
2365 static inline int
2366 rm_trace_export(struct trace_export **list, struct trace_export *export)
2367 {
2368         struct trace_export **p;
2369
2370         for (p = list; *p != NULL; p = &(*p)->next)
2371                 if (*p == export)
2372                         break;
2373
2374         if (*p != export)
2375                 return -1;
2376
2377         rcu_assign_pointer(*p, (*p)->next);
2378
2379         return 0;
2380 }
2381
2382 static inline void
2383 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2384 {
2385         if (*list == NULL)
2386                 ftrace_exports_enable();
2387
2388         add_trace_export(list, export);
2389 }
2390
2391 static inline int
2392 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2393 {
2394         int ret;
2395
2396         ret = rm_trace_export(list, export);
2397         if (*list == NULL)
2398                 ftrace_exports_disable();
2399
2400         return ret;
2401 }
2402
2403 int register_ftrace_export(struct trace_export *export)
2404 {
2405         if (WARN_ON_ONCE(!export->write))
2406                 return -1;
2407
2408         mutex_lock(&ftrace_export_lock);
2409
2410         add_ftrace_export(&ftrace_exports_list, export);
2411
2412         mutex_unlock(&ftrace_export_lock);
2413
2414         return 0;
2415 }
2416 EXPORT_SYMBOL_GPL(register_ftrace_export);
2417
2418 int unregister_ftrace_export(struct trace_export *export)
2419 {
2420         int ret;
2421
2422         mutex_lock(&ftrace_export_lock);
2423
2424         ret = rm_ftrace_export(&ftrace_exports_list, export);
2425
2426         mutex_unlock(&ftrace_export_lock);
2427
2428         return ret;
2429 }
2430 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2431
2432 void
2433 trace_function(struct trace_array *tr,
2434                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2435                int pc)
2436 {
2437         struct trace_event_call *call = &event_function;
2438         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2439         struct ring_buffer_event *event;
2440         struct ftrace_entry *entry;
2441
2442         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2443                                             flags, pc);
2444         if (!event)
2445                 return;
2446         entry   = ring_buffer_event_data(event);
2447         entry->ip                       = ip;
2448         entry->parent_ip                = parent_ip;
2449
2450         if (!call_filter_check_discard(call, entry, buffer, event)) {
2451                 if (static_branch_unlikely(&ftrace_exports_enabled))
2452                         ftrace_exports(event);
2453                 __buffer_unlock_commit(buffer, event);
2454         }
2455 }
2456
2457 #ifdef CONFIG_STACKTRACE
2458
2459 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2460 struct ftrace_stack {
2461         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2462 };
2463
2464 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2465 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2466
2467 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2468                                  unsigned long flags,
2469                                  int skip, int pc, struct pt_regs *regs)
2470 {
2471         struct trace_event_call *call = &event_kernel_stack;
2472         struct ring_buffer_event *event;
2473         struct stack_entry *entry;
2474         struct stack_trace trace;
2475         int use_stack;
2476         int size = FTRACE_STACK_ENTRIES;
2477
2478         trace.nr_entries        = 0;
2479         trace.skip              = skip;
2480
2481         /*
2482          * Add two, for this function and the call to save_stack_trace()
2483          * If regs is set, then these functions will not be in the way.
2484          */
2485         if (!regs)
2486                 trace.skip += 2;
2487
2488         /*
2489          * Since events can happen in NMIs there's no safe way to
2490          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2491          * or NMI comes in, it will just have to use the default
2492          * FTRACE_STACK_SIZE.
2493          */
2494         preempt_disable_notrace();
2495
2496         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2497         /*
2498          * We don't need any atomic variables, just a barrier.
2499          * If an interrupt comes in, we don't care, because it would
2500          * have exited and put the counter back to what we want.
2501          * We just need a barrier to keep gcc from moving things
2502          * around.
2503          */
2504         barrier();
2505         if (use_stack == 1) {
2506                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2507                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2508
2509                 if (regs)
2510                         save_stack_trace_regs(regs, &trace);
2511                 else
2512                         save_stack_trace(&trace);
2513
2514                 if (trace.nr_entries > size)
2515                         size = trace.nr_entries;
2516         } else
2517                 /* From now on, use_stack is a boolean */
2518                 use_stack = 0;
2519
2520         size *= sizeof(unsigned long);
2521
2522         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2523                                             sizeof(*entry) + size, flags, pc);
2524         if (!event)
2525                 goto out;
2526         entry = ring_buffer_event_data(event);
2527
2528         memset(&entry->caller, 0, size);
2529
2530         if (use_stack)
2531                 memcpy(&entry->caller, trace.entries,
2532                        trace.nr_entries * sizeof(unsigned long));
2533         else {
2534                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2535                 trace.entries           = entry->caller;
2536                 if (regs)
2537                         save_stack_trace_regs(regs, &trace);
2538                 else
2539                         save_stack_trace(&trace);
2540         }
2541
2542         entry->size = trace.nr_entries;
2543
2544         if (!call_filter_check_discard(call, entry, buffer, event))
2545                 __buffer_unlock_commit(buffer, event);
2546
2547  out:
2548         /* Again, don't let gcc optimize things here */
2549         barrier();
2550         __this_cpu_dec(ftrace_stack_reserve);
2551         preempt_enable_notrace();
2552
2553 }
2554
2555 static inline void ftrace_trace_stack(struct trace_array *tr,
2556                                       struct ring_buffer *buffer,
2557                                       unsigned long flags,
2558                                       int skip, int pc, struct pt_regs *regs)
2559 {
2560         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2561                 return;
2562
2563         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2564 }
2565
2566 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2567                    int pc)
2568 {
2569         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2570
2571         if (rcu_is_watching()) {
2572                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2573                 return;
2574         }
2575
2576         /*
2577          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2578          * but if the above rcu_is_watching() failed, then the NMI
2579          * triggered someplace critical, and rcu_irq_enter() should
2580          * not be called from NMI.
2581          */
2582         if (unlikely(in_nmi()))
2583                 return;
2584
2585         /*
2586          * It is possible that a function is being traced in a
2587          * location that RCU is not watching. A call to
2588          * rcu_irq_enter() will make sure that it is, but there's
2589          * a few internal rcu functions that could be traced
2590          * where that wont work either. In those cases, we just
2591          * do nothing.
2592          */
2593         if (unlikely(rcu_irq_enter_disabled()))
2594                 return;
2595
2596         rcu_irq_enter_irqson();
2597         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2598         rcu_irq_exit_irqson();
2599 }
2600
2601 /**
2602  * trace_dump_stack - record a stack back trace in the trace buffer
2603  * @skip: Number of functions to skip (helper handlers)
2604  */
2605 void trace_dump_stack(int skip)
2606 {
2607         unsigned long flags;
2608
2609         if (tracing_disabled || tracing_selftest_running)
2610                 return;
2611
2612         local_save_flags(flags);
2613
2614         /*
2615          * Skip 3 more, seems to get us at the caller of
2616          * this function.
2617          */
2618         skip += 3;
2619         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2620                              flags, skip, preempt_count(), NULL);
2621 }
2622
2623 static DEFINE_PER_CPU(int, user_stack_count);
2624
2625 void
2626 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2627 {
2628         struct trace_event_call *call = &event_user_stack;
2629         struct ring_buffer_event *event;
2630         struct userstack_entry *entry;
2631         struct stack_trace trace;
2632
2633         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2634                 return;
2635
2636         /*
2637          * NMIs can not handle page faults, even with fix ups.
2638          * The save user stack can (and often does) fault.
2639          */
2640         if (unlikely(in_nmi()))
2641                 return;
2642
2643         /*
2644          * prevent recursion, since the user stack tracing may
2645          * trigger other kernel events.
2646          */
2647         preempt_disable();
2648         if (__this_cpu_read(user_stack_count))
2649                 goto out;
2650
2651         __this_cpu_inc(user_stack_count);
2652
2653         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2654                                             sizeof(*entry), flags, pc);
2655         if (!event)
2656                 goto out_drop_count;
2657         entry   = ring_buffer_event_data(event);
2658
2659         entry->tgid             = current->tgid;
2660         memset(&entry->caller, 0, sizeof(entry->caller));
2661
2662         trace.nr_entries        = 0;
2663         trace.max_entries       = FTRACE_STACK_ENTRIES;
2664         trace.skip              = 0;
2665         trace.entries           = entry->caller;
2666
2667         save_stack_trace_user(&trace);
2668         if (!call_filter_check_discard(call, entry, buffer, event))
2669                 __buffer_unlock_commit(buffer, event);
2670
2671  out_drop_count:
2672         __this_cpu_dec(user_stack_count);
2673  out:
2674         preempt_enable();
2675 }
2676
2677 #ifdef UNUSED
2678 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2679 {
2680         ftrace_trace_userstack(tr, flags, preempt_count());
2681 }
2682 #endif /* UNUSED */
2683
2684 #endif /* CONFIG_STACKTRACE */
2685
2686 /* created for use with alloc_percpu */
2687 struct trace_buffer_struct {
2688         int nesting;
2689         char buffer[4][TRACE_BUF_SIZE];
2690 };
2691
2692 static struct trace_buffer_struct *trace_percpu_buffer;
2693
2694 /*
2695  * Thise allows for lockless recording.  If we're nested too deeply, then
2696  * this returns NULL.
2697  */
2698 static char *get_trace_buf(void)
2699 {
2700         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2701
2702         if (!buffer || buffer->nesting >= 4)
2703                 return NULL;
2704
2705         return &buffer->buffer[buffer->nesting++][0];
2706 }
2707
2708 static void put_trace_buf(void)
2709 {
2710         this_cpu_dec(trace_percpu_buffer->nesting);
2711 }
2712
2713 static int alloc_percpu_trace_buffer(void)
2714 {
2715         struct trace_buffer_struct *buffers;
2716
2717         buffers = alloc_percpu(struct trace_buffer_struct);
2718         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2719                 return -ENOMEM;
2720
2721         trace_percpu_buffer = buffers;
2722         return 0;
2723 }
2724
2725 static int buffers_allocated;
2726
2727 void trace_printk_init_buffers(void)
2728 {
2729         if (buffers_allocated)
2730                 return;
2731
2732         if (alloc_percpu_trace_buffer())
2733                 return;
2734
2735         /* trace_printk() is for debug use only. Don't use it in production. */
2736
2737         pr_warn("\n");
2738         pr_warn("**********************************************************\n");
2739         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2740         pr_warn("**                                                      **\n");
2741         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2742         pr_warn("**                                                      **\n");
2743         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2744         pr_warn("** unsafe for production use.                           **\n");
2745         pr_warn("**                                                      **\n");
2746         pr_warn("** If you see this message and you are not debugging    **\n");
2747         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2748         pr_warn("**                                                      **\n");
2749         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2750         pr_warn("**********************************************************\n");
2751
2752         /* Expand the buffers to set size */
2753         tracing_update_buffers();
2754
2755         buffers_allocated = 1;
2756
2757         /*
2758          * trace_printk_init_buffers() can be called by modules.
2759          * If that happens, then we need to start cmdline recording
2760          * directly here. If the global_trace.buffer is already
2761          * allocated here, then this was called by module code.
2762          */
2763         if (global_trace.trace_buffer.buffer)
2764                 tracing_start_cmdline_record();
2765 }
2766
2767 void trace_printk_start_comm(void)
2768 {
2769         /* Start tracing comms if trace printk is set */
2770         if (!buffers_allocated)
2771                 return;
2772         tracing_start_cmdline_record();
2773 }
2774
2775 static void trace_printk_start_stop_comm(int enabled)
2776 {
2777         if (!buffers_allocated)
2778                 return;
2779
2780         if (enabled)
2781                 tracing_start_cmdline_record();
2782         else
2783                 tracing_stop_cmdline_record();
2784 }
2785
2786 /**
2787  * trace_vbprintk - write binary msg to tracing buffer
2788  *
2789  */
2790 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2791 {
2792         struct trace_event_call *call = &event_bprint;
2793         struct ring_buffer_event *event;
2794         struct ring_buffer *buffer;
2795         struct trace_array *tr = &global_trace;
2796         struct bprint_entry *entry;
2797         unsigned long flags;
2798         char *tbuffer;
2799         int len = 0, size, pc;
2800
2801         if (unlikely(tracing_selftest_running || tracing_disabled))
2802                 return 0;
2803
2804         /* Don't pollute graph traces with trace_vprintk internals */
2805         pause_graph_tracing();
2806
2807         pc = preempt_count();
2808         preempt_disable_notrace();
2809
2810         tbuffer = get_trace_buf();
2811         if (!tbuffer) {
2812                 len = 0;
2813                 goto out_nobuffer;
2814         }
2815
2816         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2817
2818         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2819                 goto out;
2820
2821         local_save_flags(flags);
2822         size = sizeof(*entry) + sizeof(u32) * len;
2823         buffer = tr->trace_buffer.buffer;
2824         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2825                                             flags, pc);
2826         if (!event)
2827                 goto out;
2828         entry = ring_buffer_event_data(event);
2829         entry->ip                       = ip;
2830         entry->fmt                      = fmt;
2831
2832         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2833         if (!call_filter_check_discard(call, entry, buffer, event)) {
2834                 __buffer_unlock_commit(buffer, event);
2835                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2836         }
2837
2838 out:
2839         put_trace_buf();
2840
2841 out_nobuffer:
2842         preempt_enable_notrace();
2843         unpause_graph_tracing();
2844
2845         return len;
2846 }
2847 EXPORT_SYMBOL_GPL(trace_vbprintk);
2848
2849 static int
2850 __trace_array_vprintk(struct ring_buffer *buffer,
2851                       unsigned long ip, const char *fmt, va_list args)
2852 {
2853         struct trace_event_call *call = &event_print;
2854         struct ring_buffer_event *event;
2855         int len = 0, size, pc;
2856         struct print_entry *entry;
2857         unsigned long flags;
2858         char *tbuffer;
2859
2860         if (tracing_disabled || tracing_selftest_running)
2861                 return 0;
2862
2863         /* Don't pollute graph traces with trace_vprintk internals */
2864         pause_graph_tracing();
2865
2866         pc = preempt_count();
2867         preempt_disable_notrace();
2868
2869
2870         tbuffer = get_trace_buf();
2871         if (!tbuffer) {
2872                 len = 0;
2873                 goto out_nobuffer;
2874         }
2875
2876         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2877
2878         local_save_flags(flags);
2879         size = sizeof(*entry) + len + 1;
2880         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2881                                             flags, pc);
2882         if (!event)
2883                 goto out;
2884         entry = ring_buffer_event_data(event);
2885         entry->ip = ip;
2886
2887         memcpy(&entry->buf, tbuffer, len + 1);
2888         if (!call_filter_check_discard(call, entry, buffer, event)) {
2889                 __buffer_unlock_commit(buffer, event);
2890                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2891         }
2892
2893 out:
2894         put_trace_buf();
2895
2896 out_nobuffer:
2897         preempt_enable_notrace();
2898         unpause_graph_tracing();
2899
2900         return len;
2901 }
2902
2903 int trace_array_vprintk(struct trace_array *tr,
2904                         unsigned long ip, const char *fmt, va_list args)
2905 {
2906         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2907 }
2908
2909 int trace_array_printk(struct trace_array *tr,
2910                        unsigned long ip, const char *fmt, ...)
2911 {
2912         int ret;
2913         va_list ap;
2914
2915         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2916                 return 0;
2917
2918         va_start(ap, fmt);
2919         ret = trace_array_vprintk(tr, ip, fmt, ap);
2920         va_end(ap);
2921         return ret;
2922 }
2923
2924 int trace_array_printk_buf(struct ring_buffer *buffer,
2925                            unsigned long ip, const char *fmt, ...)
2926 {
2927         int ret;
2928         va_list ap;
2929
2930         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2931                 return 0;
2932
2933         va_start(ap, fmt);
2934         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2935         va_end(ap);
2936         return ret;
2937 }
2938
2939 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2940 {
2941         return trace_array_vprintk(&global_trace, ip, fmt, args);
2942 }
2943 EXPORT_SYMBOL_GPL(trace_vprintk);
2944
2945 static void trace_iterator_increment(struct trace_iterator *iter)
2946 {
2947         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2948
2949         iter->idx++;
2950         if (buf_iter)
2951                 ring_buffer_read(buf_iter, NULL);
2952 }
2953
2954 static struct trace_entry *
2955 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2956                 unsigned long *lost_events)
2957 {
2958         struct ring_buffer_event *event;
2959         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2960
2961         if (buf_iter)
2962                 event = ring_buffer_iter_peek(buf_iter, ts);
2963         else
2964                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2965                                          lost_events);
2966
2967         if (event) {
2968                 iter->ent_size = ring_buffer_event_length(event);
2969                 return ring_buffer_event_data(event);
2970         }
2971         iter->ent_size = 0;
2972         return NULL;
2973 }
2974
2975 static struct trace_entry *
2976 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2977                   unsigned long *missing_events, u64 *ent_ts)
2978 {
2979         struct ring_buffer *buffer = iter->trace_buffer->buffer;
2980         struct trace_entry *ent, *next = NULL;
2981         unsigned long lost_events = 0, next_lost = 0;
2982         int cpu_file = iter->cpu_file;
2983         u64 next_ts = 0, ts;
2984         int next_cpu = -1;
2985         int next_size = 0;
2986         int cpu;
2987
2988         /*
2989          * If we are in a per_cpu trace file, don't bother by iterating over
2990          * all cpu and peek directly.
2991          */
2992         if (cpu_file > RING_BUFFER_ALL_CPUS) {
2993                 if (ring_buffer_empty_cpu(buffer, cpu_file))
2994                         return NULL;
2995                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2996                 if (ent_cpu)
2997                         *ent_cpu = cpu_file;
2998
2999                 return ent;
3000         }
3001
3002         for_each_tracing_cpu(cpu) {
3003
3004                 if (ring_buffer_empty_cpu(buffer, cpu))
3005                         continue;
3006
3007                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3008
3009                 /*
3010                  * Pick the entry with the smallest timestamp:
3011                  */
3012                 if (ent && (!next || ts < next_ts)) {
3013                         next = ent;
3014                         next_cpu = cpu;
3015                         next_ts = ts;
3016                         next_lost = lost_events;
3017                         next_size = iter->ent_size;
3018                 }
3019         }
3020
3021         iter->ent_size = next_size;
3022
3023         if (ent_cpu)
3024                 *ent_cpu = next_cpu;
3025
3026         if (ent_ts)
3027                 *ent_ts = next_ts;
3028
3029         if (missing_events)
3030                 *missing_events = next_lost;
3031
3032         return next;
3033 }
3034
3035 /* Find the next real entry, without updating the iterator itself */
3036 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3037                                           int *ent_cpu, u64 *ent_ts)
3038 {
3039         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3040 }
3041
3042 /* Find the next real entry, and increment the iterator to the next entry */
3043 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3044 {
3045         iter->ent = __find_next_entry(iter, &iter->cpu,
3046                                       &iter->lost_events, &iter->ts);
3047
3048         if (iter->ent)
3049                 trace_iterator_increment(iter);
3050
3051         return iter->ent ? iter : NULL;
3052 }
3053
3054 static void trace_consume(struct trace_iterator *iter)
3055 {
3056         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3057                             &iter->lost_events);
3058 }
3059
3060 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3061 {
3062         struct trace_iterator *iter = m->private;
3063         int i = (int)*pos;
3064         void *ent;
3065
3066         WARN_ON_ONCE(iter->leftover);
3067
3068         (*pos)++;
3069
3070         /* can't go backwards */
3071         if (iter->idx > i)
3072                 return NULL;
3073
3074         if (iter->idx < 0)
3075                 ent = trace_find_next_entry_inc(iter);
3076         else
3077                 ent = iter;
3078
3079         while (ent && iter->idx < i)
3080                 ent = trace_find_next_entry_inc(iter);
3081
3082         iter->pos = *pos;
3083
3084         return ent;
3085 }
3086
3087 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3088 {
3089         struct ring_buffer_event *event;
3090         struct ring_buffer_iter *buf_iter;
3091         unsigned long entries = 0;
3092         u64 ts;
3093
3094         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3095
3096         buf_iter = trace_buffer_iter(iter, cpu);
3097         if (!buf_iter)
3098                 return;
3099
3100         ring_buffer_iter_reset(buf_iter);
3101
3102         /*
3103          * We could have the case with the max latency tracers
3104          * that a reset never took place on a cpu. This is evident
3105          * by the timestamp being before the start of the buffer.
3106          */
3107         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3108                 if (ts >= iter->trace_buffer->time_start)
3109                         break;
3110                 entries++;
3111                 ring_buffer_read(buf_iter, NULL);
3112         }
3113
3114         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3115 }
3116
3117 /*
3118  * The current tracer is copied to avoid a global locking
3119  * all around.
3120  */
3121 static void *s_start(struct seq_file *m, loff_t *pos)
3122 {
3123         struct trace_iterator *iter = m->private;
3124         struct trace_array *tr = iter->tr;
3125         int cpu_file = iter->cpu_file;
3126         void *p = NULL;
3127         loff_t l = 0;
3128         int cpu;
3129
3130         /*
3131          * copy the tracer to avoid using a global lock all around.
3132          * iter->trace is a copy of current_trace, the pointer to the
3133          * name may be used instead of a strcmp(), as iter->trace->name
3134          * will point to the same string as current_trace->name.
3135          */
3136         mutex_lock(&trace_types_lock);
3137         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3138                 *iter->trace = *tr->current_trace;
3139         mutex_unlock(&trace_types_lock);
3140
3141 #ifdef CONFIG_TRACER_MAX_TRACE
3142         if (iter->snapshot && iter->trace->use_max_tr)
3143                 return ERR_PTR(-EBUSY);
3144 #endif
3145
3146         if (!iter->snapshot)
3147                 atomic_inc(&trace_record_cmdline_disabled);
3148
3149         if (*pos != iter->pos) {
3150                 iter->ent = NULL;
3151                 iter->cpu = 0;
3152                 iter->idx = -1;
3153
3154                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3155                         for_each_tracing_cpu(cpu)
3156                                 tracing_iter_reset(iter, cpu);
3157                 } else
3158                         tracing_iter_reset(iter, cpu_file);
3159
3160                 iter->leftover = 0;
3161                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3162                         ;
3163
3164         } else {
3165                 /*
3166                  * If we overflowed the seq_file before, then we want
3167                  * to just reuse the trace_seq buffer again.
3168                  */
3169                 if (iter->leftover)
3170                         p = iter;
3171                 else {
3172                         l = *pos - 1;
3173                         p = s_next(m, p, &l);
3174                 }
3175         }
3176
3177         trace_event_read_lock();
3178         trace_access_lock(cpu_file);
3179         return p;
3180 }
3181
3182 static void s_stop(struct seq_file *m, void *p)
3183 {
3184         struct trace_iterator *iter = m->private;
3185
3186 #ifdef CONFIG_TRACER_MAX_TRACE
3187         if (iter->snapshot && iter->trace->use_max_tr)
3188                 return;
3189 #endif
3190
3191         if (!iter->snapshot)
3192                 atomic_dec(&trace_record_cmdline_disabled);
3193
3194         trace_access_unlock(iter->cpu_file);
3195         trace_event_read_unlock();
3196 }
3197
3198 static void
3199 get_total_entries(struct trace_buffer *buf,
3200                   unsigned long *total, unsigned long *entries)
3201 {
3202         unsigned long count;
3203         int cpu;
3204
3205         *total = 0;
3206         *entries = 0;
3207
3208         for_each_tracing_cpu(cpu) {
3209                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3210                 /*
3211                  * If this buffer has skipped entries, then we hold all
3212                  * entries for the trace and we need to ignore the
3213                  * ones before the time stamp.
3214                  */
3215                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3216                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3217                         /* total is the same as the entries */
3218                         *total += count;
3219                 } else
3220                         *total += count +
3221                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3222                 *entries += count;
3223         }
3224 }
3225
3226 static void print_lat_help_header(struct seq_file *m)
3227 {
3228         seq_puts(m, "#                  _------=> CPU#            \n"
3229                     "#                 / _-----=> irqs-off        \n"
3230                     "#                | / _----=> need-resched    \n"
3231                     "#                || / _---=> hardirq/softirq \n"
3232                     "#                ||| / _--=> preempt-depth   \n"
3233                     "#                |||| /     delay            \n"
3234                     "#  cmd     pid   ||||| time  |   caller      \n"
3235                     "#     \\   /      |||||  \\    |   /         \n");
3236 }
3237
3238 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3239 {
3240         unsigned long total;
3241         unsigned long entries;
3242
3243         get_total_entries(buf, &total, &entries);
3244         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3245                    entries, total, num_online_cpus());
3246         seq_puts(m, "#\n");
3247 }
3248
3249 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3250 {
3251         print_event_info(buf, m);
3252         seq_puts(m, "#           TASK-PID   CPU#      TIMESTAMP  FUNCTION\n"
3253                     "#              | |       |          |         |\n");
3254 }
3255
3256 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3257 {
3258         print_event_info(buf, m);
3259         seq_puts(m, "#                              _-----=> irqs-off\n"
3260                     "#                             / _----=> need-resched\n"
3261                     "#                            | / _---=> hardirq/softirq\n"
3262                     "#                            || / _--=> preempt-depth\n"
3263                     "#                            ||| /     delay\n"
3264                     "#           TASK-PID   CPU#  ||||    TIMESTAMP  FUNCTION\n"
3265                     "#              | |       |   ||||       |         |\n");
3266 }
3267
3268 void
3269 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3270 {
3271         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3272         struct trace_buffer *buf = iter->trace_buffer;
3273         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3274         struct tracer *type = iter->trace;
3275         unsigned long entries;
3276         unsigned long total;
3277         const char *name = "preemption";
3278
3279         name = type->name;
3280
3281         get_total_entries(buf, &total, &entries);
3282
3283         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3284                    name, UTS_RELEASE);
3285         seq_puts(m, "# -----------------------------------"
3286                  "---------------------------------\n");
3287         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3288                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3289                    nsecs_to_usecs(data->saved_latency),
3290                    entries,
3291                    total,
3292                    buf->cpu,
3293 #if defined(CONFIG_PREEMPT_NONE)
3294                    "server",
3295 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3296                    "desktop",
3297 #elif defined(CONFIG_PREEMPT)
3298                    "preempt",
3299 #else
3300                    "unknown",
3301 #endif
3302                    /* These are reserved for later use */
3303                    0, 0, 0, 0);
3304 #ifdef CONFIG_SMP
3305         seq_printf(m, " #P:%d)\n", num_online_cpus());
3306 #else
3307         seq_puts(m, ")\n");
3308 #endif
3309         seq_puts(m, "#    -----------------\n");
3310         seq_printf(m, "#    | task: %.16s-%d "
3311                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3312                    data->comm, data->pid,
3313                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3314                    data->policy, data->rt_priority);
3315         seq_puts(m, "#    -----------------\n");
3316
3317         if (data->critical_start) {
3318                 seq_puts(m, "#  => started at: ");
3319                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3320                 trace_print_seq(m, &iter->seq);
3321                 seq_puts(m, "\n#  => ended at:   ");
3322                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3323                 trace_print_seq(m, &iter->seq);
3324                 seq_puts(m, "\n#\n");
3325         }
3326
3327         seq_puts(m, "#\n");
3328 }
3329
3330 static void test_cpu_buff_start(struct trace_iterator *iter)
3331 {
3332         struct trace_seq *s = &iter->seq;
3333         struct trace_array *tr = iter->tr;
3334
3335         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3336                 return;
3337
3338         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3339                 return;
3340
3341         if (cpumask_available(iter->started) &&
3342             cpumask_test_cpu(iter->cpu, iter->started))
3343                 return;
3344
3345         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3346                 return;
3347
3348         if (cpumask_available(iter->started))
3349                 cpumask_set_cpu(iter->cpu, iter->started);
3350
3351         /* Don't print started cpu buffer for the first entry of the trace */
3352         if (iter->idx > 1)
3353                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3354                                 iter->cpu);
3355 }
3356
3357 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3358 {
3359         struct trace_array *tr = iter->tr;
3360         struct trace_seq *s = &iter->seq;
3361         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3362         struct trace_entry *entry;
3363         struct trace_event *event;
3364
3365         entry = iter->ent;
3366
3367         test_cpu_buff_start(iter);
3368
3369         event = ftrace_find_event(entry->type);
3370
3371         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3372                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3373                         trace_print_lat_context(iter);
3374                 else
3375                         trace_print_context(iter);
3376         }
3377
3378         if (trace_seq_has_overflowed(s))
3379                 return TRACE_TYPE_PARTIAL_LINE;
3380
3381         if (event)
3382                 return event->funcs->trace(iter, sym_flags, event);
3383
3384         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3385
3386         return trace_handle_return(s);
3387 }
3388
3389 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3390 {
3391         struct trace_array *tr = iter->tr;
3392         struct trace_seq *s = &iter->seq;
3393         struct trace_entry *entry;
3394         struct trace_event *event;
3395
3396         entry = iter->ent;
3397
3398         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3399                 trace_seq_printf(s, "%d %d %llu ",
3400                                  entry->pid, iter->cpu, iter->ts);
3401
3402         if (trace_seq_has_overflowed(s))
3403                 return TRACE_TYPE_PARTIAL_LINE;
3404
3405         event = ftrace_find_event(entry->type);
3406         if (event)
3407                 return event->funcs->raw(iter, 0, event);
3408
3409         trace_seq_printf(s, "%d ?\n", entry->type);
3410
3411         return trace_handle_return(s);
3412 }
3413
3414 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3415 {
3416         struct trace_array *tr = iter->tr;
3417         struct trace_seq *s = &iter->seq;
3418         unsigned char newline = '\n';
3419         struct trace_entry *entry;
3420         struct trace_event *event;
3421
3422         entry = iter->ent;
3423
3424         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3425                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3426                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3427                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3428                 if (trace_seq_has_overflowed(s))
3429                         return TRACE_TYPE_PARTIAL_LINE;
3430         }
3431
3432         event = ftrace_find_event(entry->type);
3433         if (event) {
3434                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3435                 if (ret != TRACE_TYPE_HANDLED)
3436                         return ret;
3437         }
3438
3439         SEQ_PUT_FIELD(s, newline);
3440
3441         return trace_handle_return(s);
3442 }
3443
3444 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3445 {
3446         struct trace_array *tr = iter->tr;
3447         struct trace_seq *s = &iter->seq;
3448         struct trace_entry *entry;
3449         struct trace_event *event;
3450
3451         entry = iter->ent;
3452
3453         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3454                 SEQ_PUT_FIELD(s, entry->pid);
3455                 SEQ_PUT_FIELD(s, iter->cpu);
3456                 SEQ_PUT_FIELD(s, iter->ts);
3457                 if (trace_seq_has_overflowed(s))
3458                         return TRACE_TYPE_PARTIAL_LINE;
3459         }
3460
3461         event = ftrace_find_event(entry->type);
3462         return event ? event->funcs->binary(iter, 0, event) :
3463                 TRACE_TYPE_HANDLED;
3464 }
3465
3466 int trace_empty(struct trace_iterator *iter)
3467 {
3468         struct ring_buffer_iter *buf_iter;
3469         int cpu;
3470
3471         /* If we are looking at one CPU buffer, only check that one */
3472         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3473                 cpu = iter->cpu_file;
3474                 buf_iter = trace_buffer_iter(iter, cpu);
3475                 if (buf_iter) {
3476                         if (!ring_buffer_iter_empty(buf_iter))
3477                                 return 0;
3478                 } else {
3479                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3480                                 return 0;
3481                 }
3482                 return 1;
3483         }
3484
3485         for_each_tracing_cpu(cpu) {
3486                 buf_iter = trace_buffer_iter(iter, cpu);
3487                 if (buf_iter) {
3488                         if (!ring_buffer_iter_empty(buf_iter))
3489                                 return 0;
3490                 } else {
3491                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3492                                 return 0;
3493                 }
3494         }
3495
3496         return 1;
3497 }
3498
3499 /*  Called with trace_event_read_lock() held. */
3500 enum print_line_t print_trace_line(struct trace_iterator *iter)
3501 {
3502         struct trace_array *tr = iter->tr;
3503         unsigned long trace_flags = tr->trace_flags;
3504         enum print_line_t ret;
3505
3506         if (iter->lost_events) {
3507                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3508                                  iter->cpu, iter->lost_events);
3509                 if (trace_seq_has_overflowed(&iter->seq))
3510                         return TRACE_TYPE_PARTIAL_LINE;
3511         }
3512
3513         if (iter->trace && iter->trace->print_line) {
3514                 ret = iter->trace->print_line(iter);
3515                 if (ret != TRACE_TYPE_UNHANDLED)
3516                         return ret;
3517         }
3518
3519         if (iter->ent->type == TRACE_BPUTS &&
3520                         trace_flags & TRACE_ITER_PRINTK &&
3521                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3522                 return trace_print_bputs_msg_only(iter);
3523
3524         if (iter->ent->type == TRACE_BPRINT &&
3525                         trace_flags & TRACE_ITER_PRINTK &&
3526                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3527                 return trace_print_bprintk_msg_only(iter);
3528
3529         if (iter->ent->type == TRACE_PRINT &&
3530                         trace_flags & TRACE_ITER_PRINTK &&
3531                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3532                 return trace_print_printk_msg_only(iter);
3533
3534         if (trace_flags & TRACE_ITER_BIN)
3535                 return print_bin_fmt(iter);
3536
3537         if (trace_flags & TRACE_ITER_HEX)
3538                 return print_hex_fmt(iter);
3539
3540         if (trace_flags & TRACE_ITER_RAW)
3541                 return print_raw_fmt(iter);
3542
3543         return print_trace_fmt(iter);
3544 }
3545
3546 void trace_latency_header(struct seq_file *m)
3547 {
3548         struct trace_iterator *iter = m->private;
3549         struct trace_array *tr = iter->tr;
3550
3551         /* print nothing if the buffers are empty */
3552         if (trace_empty(iter))
3553                 return;
3554
3555         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3556                 print_trace_header(m, iter);
3557
3558         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3559                 print_lat_help_header(m);
3560 }
3561
3562 void trace_default_header(struct seq_file *m)
3563 {
3564         struct trace_iterator *iter = m->private;
3565         struct trace_array *tr = iter->tr;
3566         unsigned long trace_flags = tr->trace_flags;
3567
3568         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3569                 return;
3570
3571         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3572                 /* print nothing if the buffers are empty */
3573                 if (trace_empty(iter))
3574                         return;
3575                 print_trace_header(m, iter);
3576                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3577                         print_lat_help_header(m);
3578         } else {
3579                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3580                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3581                                 print_func_help_header_irq(iter->trace_buffer, m);
3582                         else
3583                                 print_func_help_header(iter->trace_buffer, m);
3584                 }
3585         }
3586 }
3587
3588 static void test_ftrace_alive(struct seq_file *m)
3589 {
3590         if (!ftrace_is_dead())
3591                 return;
3592         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3593                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3594 }
3595
3596 #ifdef CONFIG_TRACER_MAX_TRACE
3597 static void show_snapshot_main_help(struct seq_file *m)
3598 {
3599         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3600                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3601                     "#                      Takes a snapshot of the main buffer.\n"
3602                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3603                     "#                      (Doesn't have to be '2' works with any number that\n"
3604                     "#                       is not a '0' or '1')\n");
3605 }
3606
3607 static void show_snapshot_percpu_help(struct seq_file *m)
3608 {
3609         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3610 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3611         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3612                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3613 #else
3614         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3615                     "#                     Must use main snapshot file to allocate.\n");
3616 #endif
3617         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3618                     "#                      (Doesn't have to be '2' works with any number that\n"
3619                     "#                       is not a '0' or '1')\n");
3620 }
3621
3622 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3623 {
3624         if (iter->tr->allocated_snapshot)
3625                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3626         else
3627                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3628
3629         seq_puts(m, "# Snapshot commands:\n");
3630         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3631                 show_snapshot_main_help(m);
3632         else
3633                 show_snapshot_percpu_help(m);
3634 }
3635 #else
3636 /* Should never be called */
3637 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3638 #endif
3639
3640 static int s_show(struct seq_file *m, void *v)
3641 {
3642         struct trace_iterator *iter = v;
3643         int ret;
3644
3645         if (iter->ent == NULL) {
3646                 if (iter->tr) {
3647                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3648                         seq_puts(m, "#\n");
3649                         test_ftrace_alive(m);
3650                 }
3651                 if (iter->snapshot && trace_empty(iter))
3652                         print_snapshot_help(m, iter);
3653                 else if (iter->trace && iter->trace->print_header)
3654                         iter->trace->print_header(m);
3655                 else
3656                         trace_default_header(m);
3657
3658         } else if (iter->leftover) {
3659                 /*
3660                  * If we filled the seq_file buffer earlier, we
3661                  * want to just show it now.
3662                  */
3663                 ret = trace_print_seq(m, &iter->seq);
3664
3665                 /* ret should this time be zero, but you never know */
3666                 iter->leftover = ret;
3667
3668         } else {
3669                 print_trace_line(iter);
3670                 ret = trace_print_seq(m, &iter->seq);
3671                 /*
3672                  * If we overflow the seq_file buffer, then it will
3673                  * ask us for this data again at start up.
3674                  * Use that instead.
3675                  *  ret is 0 if seq_file write succeeded.
3676                  *        -1 otherwise.
3677                  */
3678                 iter->leftover = ret;
3679         }
3680
3681         return 0;
3682 }
3683
3684 /*
3685  * Should be used after trace_array_get(), trace_types_lock
3686  * ensures that i_cdev was already initialized.
3687  */
3688 static inline int tracing_get_cpu(struct inode *inode)
3689 {
3690         if (inode->i_cdev) /* See trace_create_cpu_file() */
3691                 return (long)inode->i_cdev - 1;
3692         return RING_BUFFER_ALL_CPUS;
3693 }
3694
3695 static const struct seq_operations tracer_seq_ops = {
3696         .start          = s_start,
3697         .next           = s_next,
3698         .stop           = s_stop,
3699         .show           = s_show,
3700 };
3701
3702 static struct trace_iterator *
3703 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3704 {
3705         struct trace_array *tr = inode->i_private;
3706         struct trace_iterator *iter;
3707         int cpu;
3708
3709         if (tracing_disabled)
3710                 return ERR_PTR(-ENODEV);
3711
3712         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3713         if (!iter)
3714                 return ERR_PTR(-ENOMEM);
3715
3716         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3717                                     GFP_KERNEL);
3718         if (!iter->buffer_iter)
3719                 goto release;
3720
3721         /*
3722          * We make a copy of the current tracer to avoid concurrent
3723          * changes on it while we are reading.
3724          */
3725         mutex_lock(&trace_types_lock);
3726         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3727         if (!iter->trace)
3728                 goto fail;
3729
3730         *iter->trace = *tr->current_trace;
3731
3732         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3733                 goto fail;
3734
3735         iter->tr = tr;
3736
3737 #ifdef CONFIG_TRACER_MAX_TRACE
3738         /* Currently only the top directory has a snapshot */
3739         if (tr->current_trace->print_max || snapshot)
3740                 iter->trace_buffer = &tr->max_buffer;
3741         else
3742 #endif
3743                 iter->trace_buffer = &tr->trace_buffer;
3744         iter->snapshot = snapshot;
3745         iter->pos = -1;
3746         iter->cpu_file = tracing_get_cpu(inode);
3747         mutex_init(&iter->mutex);
3748
3749         /* Notify the tracer early; before we stop tracing. */
3750         if (iter->trace && iter->trace->open)
3751                 iter->trace->open(iter);
3752
3753         /* Annotate start of buffers if we had overruns */
3754         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3755                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3756
3757         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3758         if (trace_clocks[tr->clock_id].in_ns)
3759                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3760
3761         /* stop the trace while dumping if we are not opening "snapshot" */
3762         if (!iter->snapshot)
3763                 tracing_stop_tr(tr);
3764
3765         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3766                 for_each_tracing_cpu(cpu) {
3767                         iter->buffer_iter[cpu] =
3768                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3769                 }
3770                 ring_buffer_read_prepare_sync();
3771                 for_each_tracing_cpu(cpu) {
3772                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3773                         tracing_iter_reset(iter, cpu);
3774                 }
3775         } else {
3776                 cpu = iter->cpu_file;
3777                 iter->buffer_iter[cpu] =
3778                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3779                 ring_buffer_read_prepare_sync();
3780                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3781                 tracing_iter_reset(iter, cpu);
3782         }
3783
3784         mutex_unlock(&trace_types_lock);
3785
3786         return iter;
3787
3788  fail:
3789         mutex_unlock(&trace_types_lock);
3790         kfree(iter->trace);
3791         kfree(iter->buffer_iter);
3792 release:
3793         seq_release_private(inode, file);
3794         return ERR_PTR(-ENOMEM);
3795 }
3796
3797 int tracing_open_generic(struct inode *inode, struct file *filp)
3798 {
3799         if (tracing_disabled)
3800                 return -ENODEV;
3801
3802         filp->private_data = inode->i_private;
3803         return 0;
3804 }
3805
3806 bool tracing_is_disabled(void)
3807 {
3808         return (tracing_disabled) ? true: false;
3809 }
3810
3811 /*
3812  * Open and update trace_array ref count.
3813  * Must have the current trace_array passed to it.
3814  */
3815 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3816 {
3817         struct trace_array *tr = inode->i_private;
3818
3819         if (tracing_disabled)
3820                 return -ENODEV;
3821
3822         if (trace_array_get(tr) < 0)
3823                 return -ENODEV;
3824
3825         filp->private_data = inode->i_private;
3826
3827         return 0;
3828 }
3829
3830 static int tracing_release(struct inode *inode, struct file *file)
3831 {
3832         struct trace_array *tr = inode->i_private;
3833         struct seq_file *m = file->private_data;
3834         struct trace_iterator *iter;
3835         int cpu;
3836
3837         if (!(file->f_mode & FMODE_READ)) {
3838                 trace_array_put(tr);
3839                 return 0;
3840         }
3841
3842         /* Writes do not use seq_file */
3843         iter = m->private;
3844         mutex_lock(&trace_types_lock);
3845
3846         for_each_tracing_cpu(cpu) {
3847                 if (iter->buffer_iter[cpu])
3848                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3849         }
3850
3851         if (iter->trace && iter->trace->close)
3852                 iter->trace->close(iter);
3853
3854         if (!iter->snapshot)
3855                 /* reenable tracing if it was previously enabled */
3856                 tracing_start_tr(tr);
3857
3858         __trace_array_put(tr);
3859
3860         mutex_unlock(&trace_types_lock);
3861
3862         mutex_destroy(&iter->mutex);
3863         free_cpumask_var(iter->started);
3864         kfree(iter->trace);
3865         kfree(iter->buffer_iter);
3866         seq_release_private(inode, file);
3867
3868         return 0;
3869 }
3870
3871 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3872 {
3873         struct trace_array *tr = inode->i_private;
3874
3875         trace_array_put(tr);
3876         return 0;
3877 }
3878
3879 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3880 {
3881         struct trace_array *tr = inode->i_private;
3882
3883         trace_array_put(tr);
3884
3885         return single_release(inode, file);
3886 }
3887
3888 static int tracing_open(struct inode *inode, struct file *file)
3889 {
3890         struct trace_array *tr = inode->i_private;
3891         struct trace_iterator *iter;
3892         int ret = 0;
3893
3894         if (trace_array_get(tr) < 0)
3895                 return -ENODEV;
3896
3897         /* If this file was open for write, then erase contents */
3898         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3899                 int cpu = tracing_get_cpu(inode);
3900
3901                 if (cpu == RING_BUFFER_ALL_CPUS)
3902                         tracing_reset_online_cpus(&tr->trace_buffer);
3903                 else
3904                         tracing_reset(&tr->trace_buffer, cpu);
3905         }
3906
3907         if (file->f_mode & FMODE_READ) {
3908                 iter = __tracing_open(inode, file, false);
3909                 if (IS_ERR(iter))
3910                         ret = PTR_ERR(iter);
3911                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3912                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
3913         }
3914
3915         if (ret < 0)
3916                 trace_array_put(tr);
3917
3918         return ret;
3919 }
3920
3921 /*
3922  * Some tracers are not suitable for instance buffers.
3923  * A tracer is always available for the global array (toplevel)
3924  * or if it explicitly states that it is.
3925  */
3926 static bool
3927 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3928 {
3929         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3930 }
3931
3932 /* Find the next tracer that this trace array may use */
3933 static struct tracer *
3934 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3935 {
3936         while (t && !trace_ok_for_array(t, tr))
3937                 t = t->next;
3938
3939         return t;
3940 }
3941
3942 static void *
3943 t_next(struct seq_file *m, void *v, loff_t *pos)
3944 {
3945         struct trace_array *tr = m->private;
3946         struct tracer *t = v;
3947
3948         (*pos)++;
3949
3950         if (t)
3951                 t = get_tracer_for_array(tr, t->next);
3952
3953         return t;
3954 }
3955
3956 static void *t_start(struct seq_file *m, loff_t *pos)
3957 {
3958         struct trace_array *tr = m->private;
3959         struct tracer *t;
3960         loff_t l = 0;
3961
3962         mutex_lock(&trace_types_lock);
3963
3964         t = get_tracer_for_array(tr, trace_types);
3965         for (; t && l < *pos; t = t_next(m, t, &l))
3966                         ;
3967
3968         return t;
3969 }
3970
3971 static void t_stop(struct seq_file *m, void *p)
3972 {
3973         mutex_unlock(&trace_types_lock);
3974 }
3975
3976 static int t_show(struct seq_file *m, void *v)
3977 {
3978         struct tracer *t = v;
3979
3980         if (!t)
3981                 return 0;
3982
3983         seq_puts(m, t->name);
3984         if (t->next)
3985                 seq_putc(m, ' ');
3986         else
3987                 seq_putc(m, '\n');
3988
3989         return 0;
3990 }
3991
3992 static const struct seq_operations show_traces_seq_ops = {
3993         .start          = t_start,
3994         .next           = t_next,
3995         .stop           = t_stop,
3996         .show           = t_show,
3997 };
3998
3999 static int show_traces_open(struct inode *inode, struct file *file)
4000 {
4001         struct trace_array *tr = inode->i_private;
4002         struct seq_file *m;
4003         int ret;
4004
4005         if (tracing_disabled)
4006                 return -ENODEV;
4007
4008         ret = seq_open(file, &show_traces_seq_ops);
4009         if (ret)
4010                 return ret;
4011
4012         m = file->private_data;
4013         m->private = tr;
4014
4015         return 0;
4016 }
4017
4018 static ssize_t
4019 tracing_write_stub(struct file *filp, const char __user *ubuf,
4020                    size_t count, loff_t *ppos)
4021 {
4022         return count;
4023 }
4024
4025 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4026 {
4027         int ret;
4028
4029         if (file->f_mode & FMODE_READ)
4030                 ret = seq_lseek(file, offset, whence);
4031         else
4032                 file->f_pos = ret = 0;
4033
4034         return ret;
4035 }
4036
4037 static const struct file_operations tracing_fops = {
4038         .open           = tracing_open,
4039         .read           = seq_read,
4040         .write          = tracing_write_stub,
4041         .llseek         = tracing_lseek,
4042         .release        = tracing_release,
4043 };
4044
4045 static const struct file_operations show_traces_fops = {
4046         .open           = show_traces_open,
4047         .read           = seq_read,
4048         .release        = seq_release,
4049         .llseek         = seq_lseek,
4050 };
4051
4052 /*
4053  * The tracer itself will not take this lock, but still we want
4054  * to provide a consistent cpumask to user-space:
4055  */
4056 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4057
4058 /*
4059  * Temporary storage for the character representation of the
4060  * CPU bitmask (and one more byte for the newline):
4061  */
4062 static char mask_str[NR_CPUS + 1];
4063
4064 static ssize_t
4065 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4066                      size_t count, loff_t *ppos)
4067 {
4068         struct trace_array *tr = file_inode(filp)->i_private;
4069         int len;
4070
4071         mutex_lock(&tracing_cpumask_update_lock);
4072
4073         len = snprintf(mask_str, count, "%*pb\n",
4074                        cpumask_pr_args(tr->tracing_cpumask));
4075         if (len >= count) {
4076                 count = -EINVAL;
4077                 goto out_err;
4078         }
4079         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4080
4081 out_err:
4082         mutex_unlock(&tracing_cpumask_update_lock);
4083
4084         return count;
4085 }
4086
4087 static ssize_t
4088 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4089                       size_t count, loff_t *ppos)
4090 {
4091         struct trace_array *tr = file_inode(filp)->i_private;
4092         cpumask_var_t tracing_cpumask_new;
4093         int err, cpu;
4094
4095         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4096                 return -ENOMEM;
4097
4098         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4099         if (err)
4100                 goto err_unlock;
4101
4102         mutex_lock(&tracing_cpumask_update_lock);
4103
4104         local_irq_disable();
4105         arch_spin_lock(&tr->max_lock);
4106         for_each_tracing_cpu(cpu) {
4107                 /*
4108                  * Increase/decrease the disabled counter if we are
4109                  * about to flip a bit in the cpumask:
4110                  */
4111                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4112                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4113                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4114                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4115                 }
4116                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4117                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4118                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4119                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4120                 }
4121         }
4122         arch_spin_unlock(&tr->max_lock);
4123         local_irq_enable();
4124
4125         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4126
4127         mutex_unlock(&tracing_cpumask_update_lock);
4128         free_cpumask_var(tracing_cpumask_new);
4129
4130         return count;
4131
4132 err_unlock:
4133         free_cpumask_var(tracing_cpumask_new);
4134
4135         return err;
4136 }
4137
4138 static const struct file_operations tracing_cpumask_fops = {
4139         .open           = tracing_open_generic_tr,
4140         .read           = tracing_cpumask_read,
4141         .write          = tracing_cpumask_write,
4142         .release        = tracing_release_generic_tr,
4143         .llseek         = generic_file_llseek,
4144 };
4145
4146 static int tracing_trace_options_show(struct seq_file *m, void *v)
4147 {
4148         struct tracer_opt *trace_opts;
4149         struct trace_array *tr = m->private;
4150         u32 tracer_flags;
4151         int i;
4152
4153         mutex_lock(&trace_types_lock);
4154         tracer_flags = tr->current_trace->flags->val;
4155         trace_opts = tr->current_trace->flags->opts;
4156
4157         for (i = 0; trace_options[i]; i++) {
4158                 if (tr->trace_flags & (1 << i))
4159                         seq_printf(m, "%s\n", trace_options[i]);
4160                 else
4161                         seq_printf(m, "no%s\n", trace_options[i]);
4162         }
4163
4164         for (i = 0; trace_opts[i].name; i++) {
4165                 if (tracer_flags & trace_opts[i].bit)
4166                         seq_printf(m, "%s\n", trace_opts[i].name);
4167                 else
4168                         seq_printf(m, "no%s\n", trace_opts[i].name);
4169         }
4170         mutex_unlock(&trace_types_lock);
4171
4172         return 0;
4173 }
4174
4175 static int __set_tracer_option(struct trace_array *tr,
4176                                struct tracer_flags *tracer_flags,
4177                                struct tracer_opt *opts, int neg)
4178 {
4179         struct tracer *trace = tracer_flags->trace;
4180         int ret;
4181
4182         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4183         if (ret)
4184                 return ret;
4185
4186         if (neg)
4187                 tracer_flags->val &= ~opts->bit;
4188         else
4189                 tracer_flags->val |= opts->bit;
4190         return 0;
4191 }
4192
4193 /* Try to assign a tracer specific option */
4194 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4195 {
4196         struct tracer *trace = tr->current_trace;
4197         struct tracer_flags *tracer_flags = trace->flags;
4198         struct tracer_opt *opts = NULL;
4199         int i;
4200
4201         for (i = 0; tracer_flags->opts[i].name; i++) {
4202                 opts = &tracer_flags->opts[i];
4203
4204                 if (strcmp(cmp, opts->name) == 0)
4205                         return __set_tracer_option(tr, trace->flags, opts, neg);
4206         }
4207
4208         return -EINVAL;
4209 }
4210
4211 /* Some tracers require overwrite to stay enabled */
4212 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4213 {
4214         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4215                 return -1;
4216
4217         return 0;
4218 }
4219
4220 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4221 {
4222         /* do nothing if flag is already set */
4223         if (!!(tr->trace_flags & mask) == !!enabled)
4224                 return 0;
4225
4226         /* Give the tracer a chance to approve the change */
4227         if (tr->current_trace->flag_changed)
4228                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4229                         return -EINVAL;
4230
4231         if (enabled)
4232                 tr->trace_flags |= mask;
4233         else
4234                 tr->trace_flags &= ~mask;
4235
4236         if (mask == TRACE_ITER_RECORD_CMD)
4237                 trace_event_enable_cmd_record(enabled);
4238
4239         if (mask == TRACE_ITER_EVENT_FORK)
4240                 trace_event_follow_fork(tr, enabled);
4241
4242         if (mask == TRACE_ITER_FUNC_FORK)
4243                 ftrace_pid_follow_fork(tr, enabled);
4244
4245         if (mask == TRACE_ITER_OVERWRITE) {
4246                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4247 #ifdef CONFIG_TRACER_MAX_TRACE
4248                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4249 #endif
4250         }
4251
4252         if (mask == TRACE_ITER_PRINTK) {
4253                 trace_printk_start_stop_comm(enabled);
4254                 trace_printk_control(enabled);
4255         }
4256
4257         return 0;
4258 }
4259
4260 static int trace_set_options(struct trace_array *tr, char *option)
4261 {
4262         char *cmp;
4263         int neg = 0;
4264         int ret = -ENODEV;
4265         int i;
4266         size_t orig_len = strlen(option);
4267
4268         cmp = strstrip(option);
4269
4270         if (strncmp(cmp, "no", 2) == 0) {
4271                 neg = 1;
4272                 cmp += 2;
4273         }
4274
4275         mutex_lock(&trace_types_lock);
4276
4277         for (i = 0; trace_options[i]; i++) {
4278                 if (strcmp(cmp, trace_options[i]) == 0) {
4279                         ret = set_tracer_flag(tr, 1 << i, !neg);
4280                         break;
4281                 }
4282         }
4283
4284         /* If no option could be set, test the specific tracer options */
4285         if (!trace_options[i])
4286                 ret = set_tracer_option(tr, cmp, neg);
4287
4288         mutex_unlock(&trace_types_lock);
4289
4290         /*
4291          * If the first trailing whitespace is replaced with '\0' by strstrip,
4292          * turn it back into a space.
4293          */
4294         if (orig_len > strlen(option))
4295                 option[strlen(option)] = ' ';
4296
4297         return ret;
4298 }
4299
4300 static void __init apply_trace_boot_options(void)
4301 {
4302         char *buf = trace_boot_options_buf;
4303         char *option;
4304
4305         while (true) {
4306                 option = strsep(&buf, ",");
4307
4308                 if (!option)
4309                         break;
4310
4311                 if (*option)
4312                         trace_set_options(&global_trace, option);
4313
4314                 /* Put back the comma to allow this to be called again */
4315                 if (buf)
4316                         *(buf - 1) = ',';
4317         }
4318 }
4319
4320 static ssize_t
4321 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4322                         size_t cnt, loff_t *ppos)
4323 {
4324         struct seq_file *m = filp->private_data;
4325         struct trace_array *tr = m->private;
4326         char buf[64];
4327         int ret;
4328
4329         if (cnt >= sizeof(buf))
4330                 return -EINVAL;
4331
4332         if (copy_from_user(buf, ubuf, cnt))
4333                 return -EFAULT;
4334
4335         buf[cnt] = 0;
4336
4337         ret = trace_set_options(tr, buf);
4338         if (ret < 0)
4339                 return ret;
4340
4341         *ppos += cnt;
4342
4343         return cnt;
4344 }
4345
4346 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4347 {
4348         struct trace_array *tr = inode->i_private;
4349         int ret;
4350
4351         if (tracing_disabled)
4352                 return -ENODEV;
4353
4354         if (trace_array_get(tr) < 0)
4355                 return -ENODEV;
4356
4357         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4358         if (ret < 0)
4359                 trace_array_put(tr);
4360
4361         return ret;
4362 }
4363
4364 static const struct file_operations tracing_iter_fops = {
4365         .open           = tracing_trace_options_open,
4366         .read           = seq_read,
4367         .llseek         = seq_lseek,
4368         .release        = tracing_single_release_tr,
4369         .write          = tracing_trace_options_write,
4370 };
4371
4372 static const char readme_msg[] =
4373         "tracing mini-HOWTO:\n\n"
4374         "# echo 0 > tracing_on : quick way to disable tracing\n"
4375         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4376         " Important files:\n"
4377         "  trace\t\t\t- The static contents of the buffer\n"
4378         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4379         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4380         "  current_tracer\t- function and latency tracers\n"
4381         "  available_tracers\t- list of configured tracers for current_tracer\n"
4382         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4383         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4384         "  trace_clock\t\t-change the clock used to order events\n"
4385         "       local:   Per cpu clock but may not be synced across CPUs\n"
4386         "      global:   Synced across CPUs but slows tracing down.\n"
4387         "     counter:   Not a clock, but just an increment\n"
4388         "      uptime:   Jiffy counter from time of boot\n"
4389         "        perf:   Same clock that perf events use\n"
4390 #ifdef CONFIG_X86_64
4391         "     x86-tsc:   TSC cycle counter\n"
4392 #endif
4393         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4394         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4395         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4396         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4397         "\t\t\t  Remove sub-buffer with rmdir\n"
4398         "  trace_options\t\t- Set format or modify how tracing happens\n"
4399         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4400         "\t\t\t  option name\n"
4401         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4402 #ifdef CONFIG_DYNAMIC_FTRACE
4403         "\n  available_filter_functions - list of functions that can be filtered on\n"
4404         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4405         "\t\t\t  functions\n"
4406         "\t     accepts: func_full_name or glob-matching-pattern\n"
4407         "\t     modules: Can select a group via module\n"
4408         "\t      Format: :mod:<module-name>\n"
4409         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4410         "\t    triggers: a command to perform when function is hit\n"
4411         "\t      Format: <function>:<trigger>[:count]\n"
4412         "\t     trigger: traceon, traceoff\n"
4413         "\t\t      enable_event:<system>:<event>\n"
4414         "\t\t      disable_event:<system>:<event>\n"
4415 #ifdef CONFIG_STACKTRACE
4416         "\t\t      stacktrace\n"
4417 #endif
4418 #ifdef CONFIG_TRACER_SNAPSHOT
4419         "\t\t      snapshot\n"
4420 #endif
4421         "\t\t      dump\n"
4422         "\t\t      cpudump\n"
4423         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4424         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4425         "\t     The first one will disable tracing every time do_fault is hit\n"
4426         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4427         "\t       The first time do trap is hit and it disables tracing, the\n"
4428         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4429         "\t       the counter will not decrement. It only decrements when the\n"
4430         "\t       trigger did work\n"
4431         "\t     To remove trigger without count:\n"
4432         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4433         "\t     To remove trigger with a count:\n"
4434         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4435         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4436         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4437         "\t    modules: Can select a group via module command :mod:\n"
4438         "\t    Does not accept triggers\n"
4439 #endif /* CONFIG_DYNAMIC_FTRACE */
4440 #ifdef CONFIG_FUNCTION_TRACER
4441         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4442         "\t\t    (function)\n"
4443 #endif
4444 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4445         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4446         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4447         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4448 #endif
4449 #ifdef CONFIG_TRACER_SNAPSHOT
4450         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4451         "\t\t\t  snapshot buffer. Read the contents for more\n"
4452         "\t\t\t  information\n"
4453 #endif
4454 #ifdef CONFIG_STACK_TRACER
4455         "  stack_trace\t\t- Shows the max stack trace when active\n"
4456         "  stack_max_size\t- Shows current max stack size that was traced\n"
4457         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4458         "\t\t\t  new trace)\n"
4459 #ifdef CONFIG_DYNAMIC_FTRACE
4460         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4461         "\t\t\t  traces\n"
4462 #endif
4463 #endif /* CONFIG_STACK_TRACER */
4464 #ifdef CONFIG_KPROBE_EVENTS
4465         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4466         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4467 #endif
4468 #ifdef CONFIG_UPROBE_EVENTS
4469         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4470         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4471 #endif
4472 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4473         "\t  accepts: event-definitions (one definition per line)\n"
4474         "\t   Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4475         "\t           -:[<group>/]<event>\n"
4476 #ifdef CONFIG_KPROBE_EVENTS
4477         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4478   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4479 #endif
4480 #ifdef CONFIG_UPROBE_EVENTS
4481         "\t    place: <path>:<offset>\n"
4482 #endif
4483         "\t     args: <name>=fetcharg[:type]\n"
4484         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4485         "\t           $stack<index>, $stack, $retval, $comm\n"
4486         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4487         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4488 #endif
4489         "  events/\t\t- Directory containing all trace event subsystems:\n"
4490         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4491         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4492         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4493         "\t\t\t  events\n"
4494         "      filter\t\t- If set, only events passing filter are traced\n"
4495         "  events/<system>/<event>/\t- Directory containing control files for\n"
4496         "\t\t\t  <event>:\n"
4497         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4498         "      filter\t\t- If set, only events passing filter are traced\n"
4499         "      trigger\t\t- If set, a command to perform when event is hit\n"
4500         "\t    Format: <trigger>[:count][if <filter>]\n"
4501         "\t   trigger: traceon, traceoff\n"
4502         "\t            enable_event:<system>:<event>\n"
4503         "\t            disable_event:<system>:<event>\n"
4504 #ifdef CONFIG_HIST_TRIGGERS
4505         "\t            enable_hist:<system>:<event>\n"
4506         "\t            disable_hist:<system>:<event>\n"
4507 #endif
4508 #ifdef CONFIG_STACKTRACE
4509         "\t\t    stacktrace\n"
4510 #endif
4511 #ifdef CONFIG_TRACER_SNAPSHOT
4512         "\t\t    snapshot\n"
4513 #endif
4514 #ifdef CONFIG_HIST_TRIGGERS
4515         "\t\t    hist (see below)\n"
4516 #endif
4517         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4518         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4519         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4520         "\t                  events/block/block_unplug/trigger\n"
4521         "\t   The first disables tracing every time block_unplug is hit.\n"
4522         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4523         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4524         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4525         "\t   Like function triggers, the counter is only decremented if it\n"
4526         "\t    enabled or disabled tracing.\n"
4527         "\t   To remove a trigger without a count:\n"
4528         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4529         "\t   To remove a trigger with a count:\n"
4530         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4531         "\t   Filters can be ignored when removing a trigger.\n"
4532 #ifdef CONFIG_HIST_TRIGGERS
4533         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4534         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4535         "\t            [:values=<field1[,field2,...]>]\n"
4536         "\t            [:sort=<field1[,field2,...]>]\n"
4537         "\t            [:size=#entries]\n"
4538         "\t            [:pause][:continue][:clear]\n"
4539         "\t            [:name=histname1]\n"
4540         "\t            [if <filter>]\n\n"
4541         "\t    When a matching event is hit, an entry is added to a hash\n"
4542         "\t    table using the key(s) and value(s) named, and the value of a\n"
4543         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4544         "\t    correspond to fields in the event's format description.  Keys\n"
4545         "\t    can be any field, or the special string 'stacktrace'.\n"
4546         "\t    Compound keys consisting of up to two fields can be specified\n"
4547         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4548         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4549         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4550         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4551         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4552         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4553         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4554         "\t    its histogram data will be shared with other triggers of the\n"
4555         "\t    same name, and trigger hits will update this common data.\n\n"
4556         "\t    Reading the 'hist' file for the event will dump the hash\n"
4557         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4558         "\t    triggers attached to an event, there will be a table for each\n"
4559         "\t    trigger in the output.  The table displayed for a named\n"
4560         "\t    trigger will be the same as any other instance having the\n"
4561         "\t    same name.  The default format used to display a given field\n"
4562         "\t    can be modified by appending any of the following modifiers\n"
4563         "\t    to the field name, as applicable:\n\n"
4564         "\t            .hex        display a number as a hex value\n"
4565         "\t            .sym        display an address as a symbol\n"
4566         "\t            .sym-offset display an address as a symbol and offset\n"
4567         "\t            .execname   display a common_pid as a program name\n"
4568         "\t            .syscall    display a syscall id as a syscall name\n\n"
4569         "\t            .log2       display log2 value rather than raw number\n\n"
4570         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4571         "\t    trigger or to start a hist trigger but not log any events\n"
4572         "\t    until told to do so.  'continue' can be used to start or\n"
4573         "\t    restart a paused hist trigger.\n\n"
4574         "\t    The 'clear' parameter will clear the contents of a running\n"
4575         "\t    hist trigger and leave its current paused/active state\n"
4576         "\t    unchanged.\n\n"
4577         "\t    The enable_hist and disable_hist triggers can be used to\n"
4578         "\t    have one event conditionally start and stop another event's\n"
4579         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4580         "\t    the enable_event and disable_event triggers.\n"
4581 #endif
4582 ;
4583
4584 static ssize_t
4585 tracing_readme_read(struct file *filp, char __user *ubuf,
4586                        size_t cnt, loff_t *ppos)
4587 {
4588         return simple_read_from_buffer(ubuf, cnt, ppos,
4589                                         readme_msg, strlen(readme_msg));
4590 }
4591
4592 static const struct file_operations tracing_readme_fops = {
4593         .open           = tracing_open_generic,
4594         .read           = tracing_readme_read,
4595         .llseek         = generic_file_llseek,
4596 };
4597
4598 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4599 {
4600         unsigned int *ptr = v;
4601
4602         if (*pos || m->count)
4603                 ptr++;
4604
4605         (*pos)++;
4606
4607         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4608              ptr++) {
4609                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4610                         continue;
4611
4612                 return ptr;
4613         }
4614
4615         return NULL;
4616 }
4617
4618 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4619 {
4620         void *v;
4621         loff_t l = 0;
4622
4623         preempt_disable();
4624         arch_spin_lock(&trace_cmdline_lock);
4625
4626         v = &savedcmd->map_cmdline_to_pid[0];
4627         while (l <= *pos) {
4628                 v = saved_cmdlines_next(m, v, &l);
4629                 if (!v)
4630                         return NULL;
4631         }
4632
4633         return v;
4634 }
4635
4636 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4637 {
4638         arch_spin_unlock(&trace_cmdline_lock);
4639         preempt_enable();
4640 }
4641
4642 static int saved_cmdlines_show(struct seq_file *m, void *v)
4643 {
4644         char buf[TASK_COMM_LEN];
4645         unsigned int *pid = v;
4646
4647         __trace_find_cmdline(*pid, buf);
4648         seq_printf(m, "%d %s\n", *pid, buf);
4649         return 0;
4650 }
4651
4652 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4653         .start          = saved_cmdlines_start,
4654         .next           = saved_cmdlines_next,
4655         .stop           = saved_cmdlines_stop,
4656         .show           = saved_cmdlines_show,
4657 };
4658
4659 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4660 {
4661         if (tracing_disabled)
4662                 return -ENODEV;
4663
4664         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4665 }
4666
4667 static const struct file_operations tracing_saved_cmdlines_fops = {
4668         .open           = tracing_saved_cmdlines_open,
4669         .read           = seq_read,
4670         .llseek         = seq_lseek,
4671         .release        = seq_release,
4672 };
4673
4674 static ssize_t
4675 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4676                                  size_t cnt, loff_t *ppos)
4677 {
4678         char buf[64];
4679         int r;
4680
4681         arch_spin_lock(&trace_cmdline_lock);
4682         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4683         arch_spin_unlock(&trace_cmdline_lock);
4684
4685         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4686 }
4687
4688 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4689 {
4690         kfree(s->saved_cmdlines);
4691         kfree(s->map_cmdline_to_pid);
4692         kfree(s);
4693 }
4694
4695 static int tracing_resize_saved_cmdlines(unsigned int val)
4696 {
4697         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4698
4699         s = kmalloc(sizeof(*s), GFP_KERNEL);
4700         if (!s)
4701                 return -ENOMEM;
4702
4703         if (allocate_cmdlines_buffer(val, s) < 0) {
4704                 kfree(s);
4705                 return -ENOMEM;
4706         }
4707
4708         arch_spin_lock(&trace_cmdline_lock);
4709         savedcmd_temp = savedcmd;
4710         savedcmd = s;
4711         arch_spin_unlock(&trace_cmdline_lock);
4712         free_saved_cmdlines_buffer(savedcmd_temp);
4713
4714         return 0;
4715 }
4716
4717 static ssize_t
4718 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4719                                   size_t cnt, loff_t *ppos)
4720 {
4721         unsigned long val;
4722         int ret;
4723
4724         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4725         if (ret)
4726                 return ret;
4727
4728         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4729         if (!val || val > PID_MAX_DEFAULT)
4730                 return -EINVAL;
4731
4732         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4733         if (ret < 0)
4734                 return ret;
4735
4736         *ppos += cnt;
4737
4738         return cnt;
4739 }
4740
4741 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4742         .open           = tracing_open_generic,
4743         .read           = tracing_saved_cmdlines_size_read,
4744         .write          = tracing_saved_cmdlines_size_write,
4745 };
4746
4747 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4748 static union trace_eval_map_item *
4749 update_eval_map(union trace_eval_map_item *ptr)
4750 {
4751         if (!ptr->map.eval_string) {
4752                 if (ptr->tail.next) {
4753                         ptr = ptr->tail.next;
4754                         /* Set ptr to the next real item (skip head) */
4755                         ptr++;
4756                 } else
4757                         return NULL;
4758         }
4759         return ptr;
4760 }
4761
4762 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4763 {
4764         union trace_eval_map_item *ptr = v;
4765
4766         /*
4767          * Paranoid! If ptr points to end, we don't want to increment past it.
4768          * This really should never happen.
4769          */
4770         ptr = update_eval_map(ptr);
4771         if (WARN_ON_ONCE(!ptr))
4772                 return NULL;
4773
4774         ptr++;
4775
4776         (*pos)++;
4777
4778         ptr = update_eval_map(ptr);
4779
4780         return ptr;
4781 }
4782
4783 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4784 {
4785         union trace_eval_map_item *v;
4786         loff_t l = 0;
4787
4788         mutex_lock(&trace_eval_mutex);
4789
4790         v = trace_eval_maps;
4791         if (v)
4792                 v++;
4793
4794         while (v && l < *pos) {
4795                 v = eval_map_next(m, v, &l);
4796         }
4797
4798         return v;
4799 }
4800
4801 static void eval_map_stop(struct seq_file *m, void *v)
4802 {
4803         mutex_unlock(&trace_eval_mutex);
4804 }
4805
4806 static int eval_map_show(struct seq_file *m, void *v)
4807 {
4808         union trace_eval_map_item *ptr = v;
4809
4810         seq_printf(m, "%s %ld (%s)\n",
4811                    ptr->map.eval_string, ptr->map.eval_value,
4812                    ptr->map.system);
4813
4814         return 0;
4815 }
4816
4817 static const struct seq_operations tracing_eval_map_seq_ops = {
4818         .start          = eval_map_start,
4819         .next           = eval_map_next,
4820         .stop           = eval_map_stop,
4821         .show           = eval_map_show,
4822 };
4823
4824 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
4825 {
4826         if (tracing_disabled)
4827                 return -ENODEV;
4828
4829         return seq_open(filp, &tracing_eval_map_seq_ops);
4830 }
4831
4832 static const struct file_operations tracing_eval_map_fops = {
4833         .open           = tracing_eval_map_open,
4834         .read           = seq_read,
4835         .llseek         = seq_lseek,
4836         .release        = seq_release,
4837 };
4838
4839 static inline union trace_eval_map_item *
4840 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
4841 {
4842         /* Return tail of array given the head */
4843         return ptr + ptr->head.length + 1;
4844 }
4845
4846 static void
4847 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
4848                            int len)
4849 {
4850         struct trace_eval_map **stop;
4851         struct trace_eval_map **map;
4852         union trace_eval_map_item *map_array;
4853         union trace_eval_map_item *ptr;
4854
4855         stop = start + len;
4856
4857         /*
4858          * The trace_eval_maps contains the map plus a head and tail item,
4859          * where the head holds the module and length of array, and the
4860          * tail holds a pointer to the next list.
4861          */
4862         map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4863         if (!map_array) {
4864                 pr_warn("Unable to allocate trace eval mapping\n");
4865                 return;
4866         }
4867
4868         mutex_lock(&trace_eval_mutex);
4869
4870         if (!trace_eval_maps)
4871                 trace_eval_maps = map_array;
4872         else {
4873                 ptr = trace_eval_maps;
4874                 for (;;) {
4875                         ptr = trace_eval_jmp_to_tail(ptr);
4876                         if (!ptr->tail.next)
4877                                 break;
4878                         ptr = ptr->tail.next;
4879
4880                 }
4881                 ptr->tail.next = map_array;
4882         }
4883         map_array->head.mod = mod;
4884         map_array->head.length = len;
4885         map_array++;
4886
4887         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4888                 map_array->map = **map;
4889                 map_array++;
4890         }
4891         memset(map_array, 0, sizeof(*map_array));
4892
4893         mutex_unlock(&trace_eval_mutex);
4894 }
4895
4896 static void trace_create_eval_file(struct dentry *d_tracer)
4897 {
4898         trace_create_file("eval_map", 0444, d_tracer,
4899                           NULL, &tracing_eval_map_fops);
4900 }
4901
4902 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
4903 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
4904 static inline void trace_insert_eval_map_file(struct module *mod,
4905                               struct trace_eval_map **start, int len) { }
4906 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
4907
4908 static void trace_insert_eval_map(struct module *mod,
4909                                   struct trace_eval_map **start, int len)
4910 {
4911         struct trace_eval_map **map;
4912
4913         if (len <= 0)
4914                 return;
4915
4916         map = start;
4917
4918         trace_event_eval_update(map, len);
4919
4920         trace_insert_eval_map_file(mod, start, len);
4921 }
4922
4923 static ssize_t
4924 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4925                        size_t cnt, loff_t *ppos)
4926 {
4927         struct trace_array *tr = filp->private_data;
4928         char buf[MAX_TRACER_SIZE+2];
4929         int r;
4930
4931         mutex_lock(&trace_types_lock);
4932         r = sprintf(buf, "%s\n", tr->current_trace->name);
4933         mutex_unlock(&trace_types_lock);
4934
4935         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4936 }
4937
4938 int tracer_init(struct tracer *t, struct trace_array *tr)
4939 {
4940         tracing_reset_online_cpus(&tr->trace_buffer);
4941         return t->init(tr);
4942 }
4943
4944 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4945 {
4946         int cpu;
4947
4948         for_each_tracing_cpu(cpu)
4949                 per_cpu_ptr(buf->data, cpu)->entries = val;
4950 }
4951
4952 #ifdef CONFIG_TRACER_MAX_TRACE
4953 /* resize @tr's buffer to the size of @size_tr's entries */
4954 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4955                                         struct trace_buffer *size_buf, int cpu_id)
4956 {
4957         int cpu, ret = 0;
4958
4959         if (cpu_id == RING_BUFFER_ALL_CPUS) {
4960                 for_each_tracing_cpu(cpu) {
4961                         ret = ring_buffer_resize(trace_buf->buffer,
4962                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4963                         if (ret < 0)
4964                                 break;
4965                         per_cpu_ptr(trace_buf->data, cpu)->entries =
4966                                 per_cpu_ptr(size_buf->data, cpu)->entries;
4967                 }
4968         } else {
4969                 ret = ring_buffer_resize(trace_buf->buffer,
4970                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4971                 if (ret == 0)
4972                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4973                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4974         }
4975
4976         return ret;
4977 }
4978 #endif /* CONFIG_TRACER_MAX_TRACE */
4979
4980 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4981                                         unsigned long size, int cpu)
4982 {
4983         int ret;
4984
4985         /*
4986          * If kernel or user changes the size of the ring buffer
4987          * we use the size that was given, and we can forget about
4988          * expanding it later.
4989          */
4990         ring_buffer_expanded = true;
4991
4992         /* May be called before buffers are initialized */
4993         if (!tr->trace_buffer.buffer)
4994                 return 0;
4995
4996         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
4997         if (ret < 0)
4998                 return ret;
4999
5000 #ifdef CONFIG_TRACER_MAX_TRACE
5001         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5002             !tr->current_trace->use_max_tr)
5003                 goto out;
5004
5005         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5006         if (ret < 0) {
5007                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5008                                                      &tr->trace_buffer, cpu);
5009                 if (r < 0) {
5010                         /*
5011                          * AARGH! We are left with different
5012                          * size max buffer!!!!
5013                          * The max buffer is our "snapshot" buffer.
5014                          * When a tracer needs a snapshot (one of the
5015                          * latency tracers), it swaps the max buffer
5016                          * with the saved snap shot. We succeeded to
5017                          * update the size of the main buffer, but failed to
5018                          * update the size of the max buffer. But when we tried
5019                          * to reset the main buffer to the original size, we
5020                          * failed there too. This is very unlikely to
5021                          * happen, but if it does, warn and kill all
5022                          * tracing.
5023                          */
5024                         WARN_ON(1);
5025                         tracing_disabled = 1;
5026                 }
5027                 return ret;
5028         }
5029
5030         if (cpu == RING_BUFFER_ALL_CPUS)
5031                 set_buffer_entries(&tr->max_buffer, size);
5032         else
5033                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5034
5035  out:
5036 #endif /* CONFIG_TRACER_MAX_TRACE */
5037
5038         if (cpu == RING_BUFFER_ALL_CPUS)
5039                 set_buffer_entries(&tr->trace_buffer, size);
5040         else
5041                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5042
5043         return ret;
5044 }
5045
5046 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5047                                           unsigned long size, int cpu_id)
5048 {
5049         int ret = size;
5050
5051         mutex_lock(&trace_types_lock);
5052
5053         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5054                 /* make sure, this cpu is enabled in the mask */
5055                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5056                         ret = -EINVAL;
5057                         goto out;
5058                 }
5059         }
5060
5061         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5062         if (ret < 0)
5063                 ret = -ENOMEM;
5064
5065 out:
5066         mutex_unlock(&trace_types_lock);
5067
5068         return ret;
5069 }
5070
5071
5072 /**
5073  * tracing_update_buffers - used by tracing facility to expand ring buffers
5074  *
5075  * To save on memory when the tracing is never used on a system with it
5076  * configured in. The ring buffers are set to a minimum size. But once
5077  * a user starts to use the tracing facility, then they need to grow
5078  * to their default size.
5079  *
5080  * This function is to be called when a tracer is about to be used.
5081  */
5082 int tracing_update_buffers(void)
5083 {
5084         int ret = 0;
5085
5086         mutex_lock(&trace_types_lock);
5087         if (!ring_buffer_expanded)
5088                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5089                                                 RING_BUFFER_ALL_CPUS);
5090         mutex_unlock(&trace_types_lock);
5091
5092         return ret;
5093 }
5094
5095 struct trace_option_dentry;
5096
5097 static void
5098 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5099
5100 /*
5101  * Used to clear out the tracer before deletion of an instance.
5102  * Must have trace_types_lock held.
5103  */
5104 static void tracing_set_nop(struct trace_array *tr)
5105 {
5106         if (tr->current_trace == &nop_trace)
5107                 return;
5108         
5109         tr->current_trace->enabled--;
5110
5111         if (tr->current_trace->reset)
5112                 tr->current_trace->reset(tr);
5113
5114         tr->current_trace = &nop_trace;
5115 }
5116
5117 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5118 {
5119         /* Only enable if the directory has been created already. */
5120         if (!tr->dir)
5121                 return;
5122
5123         create_trace_option_files(tr, t);
5124 }
5125
5126 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5127 {
5128         struct tracer *t;
5129 #ifdef CONFIG_TRACER_MAX_TRACE
5130         bool had_max_tr;
5131 #endif
5132         int ret = 0;
5133
5134         mutex_lock(&trace_types_lock);
5135
5136         if (!ring_buffer_expanded) {
5137                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5138                                                 RING_BUFFER_ALL_CPUS);
5139                 if (ret < 0)
5140                         goto out;
5141                 ret = 0;
5142         }
5143
5144         for (t = trace_types; t; t = t->next) {
5145                 if (strcmp(t->name, buf) == 0)
5146                         break;
5147         }
5148         if (!t) {
5149                 ret = -EINVAL;
5150                 goto out;
5151         }
5152         if (t == tr->current_trace)
5153                 goto out;
5154
5155         /* Some tracers are only allowed for the top level buffer */
5156         if (!trace_ok_for_array(t, tr)) {
5157                 ret = -EINVAL;
5158                 goto out;
5159         }
5160
5161         /* If trace pipe files are being read, we can't change the tracer */
5162         if (tr->current_trace->ref) {
5163                 ret = -EBUSY;
5164                 goto out;
5165         }
5166
5167         trace_branch_disable();
5168
5169         tr->current_trace->enabled--;
5170
5171         if (tr->current_trace->reset)
5172                 tr->current_trace->reset(tr);
5173
5174         /* Current trace needs to be nop_trace before synchronize_sched */
5175         tr->current_trace = &nop_trace;
5176
5177 #ifdef CONFIG_TRACER_MAX_TRACE
5178         had_max_tr = tr->allocated_snapshot;
5179
5180         if (had_max_tr && !t->use_max_tr) {
5181                 /*
5182                  * We need to make sure that the update_max_tr sees that
5183                  * current_trace changed to nop_trace to keep it from
5184                  * swapping the buffers after we resize it.
5185                  * The update_max_tr is called from interrupts disabled
5186                  * so a synchronized_sched() is sufficient.
5187                  */
5188                 synchronize_sched();
5189                 free_snapshot(tr);
5190         }
5191 #endif
5192
5193 #ifdef CONFIG_TRACER_MAX_TRACE
5194         if (t->use_max_tr && !had_max_tr) {
5195                 ret = alloc_snapshot(tr);
5196                 if (ret < 0)
5197                         goto out;
5198         }
5199 #endif
5200
5201         if (t->init) {
5202                 ret = tracer_init(t, tr);
5203                 if (ret)
5204                         goto out;
5205         }
5206
5207         tr->current_trace = t;
5208         tr->current_trace->enabled++;
5209         trace_branch_enable(tr);
5210  out:
5211         mutex_unlock(&trace_types_lock);
5212
5213         return ret;
5214 }
5215
5216 static ssize_t
5217 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5218                         size_t cnt, loff_t *ppos)
5219 {
5220         struct trace_array *tr = filp->private_data;
5221         char buf[MAX_TRACER_SIZE+1];
5222         int i;
5223         size_t ret;
5224         int err;
5225
5226         ret = cnt;
5227
5228         if (cnt > MAX_TRACER_SIZE)
5229                 cnt = MAX_TRACER_SIZE;
5230
5231         if (copy_from_user(buf, ubuf, cnt))
5232                 return -EFAULT;
5233
5234         buf[cnt] = 0;
5235
5236         /* strip ending whitespace. */
5237         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5238                 buf[i] = 0;
5239
5240         err = tracing_set_tracer(tr, buf);
5241         if (err)
5242                 return err;
5243
5244         *ppos += ret;
5245
5246         return ret;
5247 }
5248
5249 static ssize_t
5250 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5251                    size_t cnt, loff_t *ppos)
5252 {
5253         char buf[64];
5254         int r;
5255
5256         r = snprintf(buf, sizeof(buf), "%ld\n",
5257                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5258         if (r > sizeof(buf))
5259                 r = sizeof(buf);
5260         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5261 }
5262
5263 static ssize_t
5264 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5265                     size_t cnt, loff_t *ppos)
5266 {
5267         unsigned long val;
5268         int ret;
5269
5270         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5271         if (ret)
5272                 return ret;
5273
5274         *ptr = val * 1000;
5275
5276         return cnt;
5277 }
5278
5279 static ssize_t
5280 tracing_thresh_read(struct file *filp, char __user *ubuf,
5281                     size_t cnt, loff_t *ppos)
5282 {
5283         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5284 }
5285
5286 static ssize_t
5287 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5288                      size_t cnt, loff_t *ppos)
5289 {
5290         struct trace_array *tr = filp->private_data;
5291         int ret;
5292
5293         mutex_lock(&trace_types_lock);
5294         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5295         if (ret < 0)
5296                 goto out;
5297
5298         if (tr->current_trace->update_thresh) {
5299                 ret = tr->current_trace->update_thresh(tr);
5300                 if (ret < 0)
5301                         goto out;
5302         }
5303
5304         ret = cnt;
5305 out:
5306         mutex_unlock(&trace_types_lock);
5307
5308         return ret;
5309 }
5310
5311 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5312
5313 static ssize_t
5314 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5315                      size_t cnt, loff_t *ppos)
5316 {
5317         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5318 }
5319
5320 static ssize_t
5321 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5322                       size_t cnt, loff_t *ppos)
5323 {
5324         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5325 }
5326
5327 #endif
5328
5329 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5330 {
5331         struct trace_array *tr = inode->i_private;
5332         struct trace_iterator *iter;
5333         int ret = 0;
5334
5335         if (tracing_disabled)
5336                 return -ENODEV;
5337
5338         if (trace_array_get(tr) < 0)
5339                 return -ENODEV;
5340
5341         mutex_lock(&trace_types_lock);
5342
5343         /* create a buffer to store the information to pass to userspace */
5344         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5345         if (!iter) {
5346                 ret = -ENOMEM;
5347                 __trace_array_put(tr);
5348                 goto out;
5349         }
5350
5351         trace_seq_init(&iter->seq);
5352         iter->trace = tr->current_trace;
5353
5354         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5355                 ret = -ENOMEM;
5356                 goto fail;
5357         }
5358
5359         /* trace pipe does not show start of buffer */
5360         cpumask_setall(iter->started);
5361
5362         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5363                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5364
5365         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5366         if (trace_clocks[tr->clock_id].in_ns)
5367                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5368
5369         iter->tr = tr;
5370         iter->trace_buffer = &tr->trace_buffer;
5371         iter->cpu_file = tracing_get_cpu(inode);
5372         mutex_init(&iter->mutex);
5373         filp->private_data = iter;
5374
5375         if (iter->trace->pipe_open)
5376                 iter->trace->pipe_open(iter);
5377
5378         nonseekable_open(inode, filp);
5379
5380         tr->current_trace->ref++;
5381 out:
5382         mutex_unlock(&trace_types_lock);
5383         return ret;
5384
5385 fail:
5386         kfree(iter->trace);
5387         kfree(iter);
5388         __trace_array_put(tr);
5389         mutex_unlock(&trace_types_lock);
5390         return ret;
5391 }
5392
5393 static int tracing_release_pipe(struct inode *inode, struct file *file)
5394 {
5395         struct trace_iterator *iter = file->private_data;
5396         struct trace_array *tr = inode->i_private;
5397
5398         mutex_lock(&trace_types_lock);
5399
5400         tr->current_trace->ref--;
5401
5402         if (iter->trace->pipe_close)
5403                 iter->trace->pipe_close(iter);
5404
5405         mutex_unlock(&trace_types_lock);
5406
5407         free_cpumask_var(iter->started);
5408         mutex_destroy(&iter->mutex);
5409         kfree(iter);
5410
5411         trace_array_put(tr);
5412
5413         return 0;
5414 }
5415
5416 static unsigned int
5417 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5418 {
5419         struct trace_array *tr = iter->tr;
5420
5421         /* Iterators are static, they should be filled or empty */
5422         if (trace_buffer_iter(iter, iter->cpu_file))
5423                 return POLLIN | POLLRDNORM;
5424
5425         if (tr->trace_flags & TRACE_ITER_BLOCK)
5426                 /*
5427                  * Always select as readable when in blocking mode
5428                  */
5429                 return POLLIN | POLLRDNORM;
5430         else
5431                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5432                                              filp, poll_table);
5433 }
5434
5435 static unsigned int
5436 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5437 {
5438         struct trace_iterator *iter = filp->private_data;
5439
5440         return trace_poll(iter, filp, poll_table);
5441 }
5442
5443 /* Must be called with iter->mutex held. */
5444 static int tracing_wait_pipe(struct file *filp)
5445 {
5446         struct trace_iterator *iter = filp->private_data;
5447         int ret;
5448
5449         while (trace_empty(iter)) {
5450
5451                 if ((filp->f_flags & O_NONBLOCK)) {
5452                         return -EAGAIN;
5453                 }
5454
5455                 /*
5456                  * We block until we read something and tracing is disabled.
5457                  * We still block if tracing is disabled, but we have never
5458                  * read anything. This allows a user to cat this file, and
5459                  * then enable tracing. But after we have read something,
5460                  * we give an EOF when tracing is again disabled.
5461                  *
5462                  * iter->pos will be 0 if we haven't read anything.
5463                  */
5464                 if (!tracing_is_on() && iter->pos)
5465                         break;
5466
5467                 mutex_unlock(&iter->mutex);
5468
5469                 ret = wait_on_pipe(iter, false);
5470
5471                 mutex_lock(&iter->mutex);
5472
5473                 if (ret)
5474                         return ret;
5475         }
5476
5477         return 1;
5478 }
5479
5480 /*
5481  * Consumer reader.
5482  */
5483 static ssize_t
5484 tracing_read_pipe(struct file *filp, char __user *ubuf,
5485                   size_t cnt, loff_t *ppos)
5486 {
5487         struct trace_iterator *iter = filp->private_data;
5488         ssize_t sret;
5489
5490         /*
5491          * Avoid more than one consumer on a single file descriptor
5492          * This is just a matter of traces coherency, the ring buffer itself
5493          * is protected.
5494          */
5495         mutex_lock(&iter->mutex);
5496
5497         /* return any leftover data */
5498         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5499         if (sret != -EBUSY)
5500                 goto out;
5501
5502         trace_seq_init(&iter->seq);
5503
5504         if (iter->trace->read) {
5505                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5506                 if (sret)
5507                         goto out;
5508         }
5509
5510 waitagain:
5511         sret = tracing_wait_pipe(filp);
5512         if (sret <= 0)
5513                 goto out;
5514
5515         /* stop when tracing is finished */
5516         if (trace_empty(iter)) {
5517                 sret = 0;
5518                 goto out;
5519         }
5520
5521         if (cnt >= PAGE_SIZE)
5522                 cnt = PAGE_SIZE - 1;
5523
5524         /* reset all but tr, trace, and overruns */
5525         memset(&iter->seq, 0,
5526                sizeof(struct trace_iterator) -
5527                offsetof(struct trace_iterator, seq));
5528         cpumask_clear(iter->started);
5529         iter->pos = -1;
5530
5531         trace_event_read_lock();
5532         trace_access_lock(iter->cpu_file);
5533         while (trace_find_next_entry_inc(iter) != NULL) {
5534                 enum print_line_t ret;
5535                 int save_len = iter->seq.seq.len;
5536
5537                 ret = print_trace_line(iter);
5538                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5539                         /* don't print partial lines */
5540                         iter->seq.seq.len = save_len;
5541                         break;
5542                 }
5543                 if (ret != TRACE_TYPE_NO_CONSUME)
5544                         trace_consume(iter);
5545
5546                 if (trace_seq_used(&iter->seq) >= cnt)
5547                         break;
5548
5549                 /*
5550                  * Setting the full flag means we reached the trace_seq buffer
5551                  * size and we should leave by partial output condition above.
5552                  * One of the trace_seq_* functions is not used properly.
5553                  */
5554                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5555                           iter->ent->type);
5556         }
5557         trace_access_unlock(iter->cpu_file);
5558         trace_event_read_unlock();
5559
5560         /* Now copy what we have to the user */
5561         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5562         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5563                 trace_seq_init(&iter->seq);
5564
5565         /*
5566          * If there was nothing to send to user, in spite of consuming trace
5567          * entries, go back to wait for more entries.
5568          */
5569         if (sret == -EBUSY)
5570                 goto waitagain;
5571
5572 out:
5573         mutex_unlock(&iter->mutex);
5574
5575         return sret;
5576 }
5577
5578 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5579                                      unsigned int idx)
5580 {
5581         __free_page(spd->pages[idx]);
5582 }
5583
5584 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5585         .can_merge              = 0,
5586         .confirm                = generic_pipe_buf_confirm,
5587         .release                = generic_pipe_buf_release,
5588         .steal                  = generic_pipe_buf_steal,
5589         .get                    = generic_pipe_buf_get,
5590 };
5591
5592 static size_t
5593 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5594 {
5595         size_t count;
5596         int save_len;
5597         int ret;
5598
5599         /* Seq buffer is page-sized, exactly what we need. */
5600         for (;;) {
5601                 save_len = iter->seq.seq.len;
5602                 ret = print_trace_line(iter);
5603
5604                 if (trace_seq_has_overflowed(&iter->seq)) {
5605                         iter->seq.seq.len = save_len;
5606                         break;
5607                 }
5608
5609                 /*
5610                  * This should not be hit, because it should only
5611                  * be set if the iter->seq overflowed. But check it
5612                  * anyway to be safe.
5613                  */
5614                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5615                         iter->seq.seq.len = save_len;
5616                         break;
5617                 }
5618
5619                 count = trace_seq_used(&iter->seq) - save_len;
5620                 if (rem < count) {
5621                         rem = 0;
5622                         iter->seq.seq.len = save_len;
5623                         break;
5624                 }
5625
5626                 if (ret != TRACE_TYPE_NO_CONSUME)
5627                         trace_consume(iter);
5628                 rem -= count;
5629                 if (!trace_find_next_entry_inc(iter))   {
5630                         rem = 0;
5631                         iter->ent = NULL;
5632                         break;
5633                 }
5634         }
5635
5636         return rem;
5637 }
5638
5639 static ssize_t tracing_splice_read_pipe(struct file *filp,
5640                                         loff_t *ppos,
5641                                         struct pipe_inode_info *pipe,
5642                                         size_t len,
5643                                         unsigned int flags)
5644 {
5645         struct page *pages_def[PIPE_DEF_BUFFERS];
5646         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5647         struct trace_iterator *iter = filp->private_data;
5648         struct splice_pipe_desc spd = {
5649                 .pages          = pages_def,
5650                 .partial        = partial_def,
5651                 .nr_pages       = 0, /* This gets updated below. */
5652                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5653                 .ops            = &tracing_pipe_buf_ops,
5654                 .spd_release    = tracing_spd_release_pipe,
5655         };
5656         ssize_t ret;
5657         size_t rem;
5658         unsigned int i;
5659
5660         if (splice_grow_spd(pipe, &spd))
5661                 return -ENOMEM;
5662
5663         mutex_lock(&iter->mutex);
5664
5665         if (iter->trace->splice_read) {
5666                 ret = iter->trace->splice_read(iter, filp,
5667                                                ppos, pipe, len, flags);
5668                 if (ret)
5669                         goto out_err;
5670         }
5671
5672         ret = tracing_wait_pipe(filp);
5673         if (ret <= 0)
5674                 goto out_err;
5675
5676         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5677                 ret = -EFAULT;
5678                 goto out_err;
5679         }
5680
5681         trace_event_read_lock();
5682         trace_access_lock(iter->cpu_file);
5683
5684         /* Fill as many pages as possible. */
5685         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5686                 spd.pages[i] = alloc_page(GFP_KERNEL);
5687                 if (!spd.pages[i])
5688                         break;
5689
5690                 rem = tracing_fill_pipe_page(rem, iter);
5691
5692                 /* Copy the data into the page, so we can start over. */
5693                 ret = trace_seq_to_buffer(&iter->seq,
5694                                           page_address(spd.pages[i]),
5695                                           trace_seq_used(&iter->seq));
5696                 if (ret < 0) {
5697                         __free_page(spd.pages[i]);
5698                         break;
5699                 }
5700                 spd.partial[i].offset = 0;
5701                 spd.partial[i].len = trace_seq_used(&iter->seq);
5702
5703                 trace_seq_init(&iter->seq);
5704         }
5705
5706         trace_access_unlock(iter->cpu_file);
5707         trace_event_read_unlock();
5708         mutex_unlock(&iter->mutex);
5709
5710         spd.nr_pages = i;
5711
5712         if (i)
5713                 ret = splice_to_pipe(pipe, &spd);
5714         else
5715                 ret = 0;
5716 out:
5717         splice_shrink_spd(&spd);
5718         return ret;
5719
5720 out_err:
5721         mutex_unlock(&iter->mutex);
5722         goto out;
5723 }
5724
5725 static ssize_t
5726 tracing_entries_read(struct file *filp, char __user *ubuf,
5727                      size_t cnt, loff_t *ppos)
5728 {
5729         struct inode *inode = file_inode(filp);
5730         struct trace_array *tr = inode->i_private;
5731         int cpu = tracing_get_cpu(inode);
5732         char buf[64];
5733         int r = 0;
5734         ssize_t ret;
5735
5736         mutex_lock(&trace_types_lock);
5737
5738         if (cpu == RING_BUFFER_ALL_CPUS) {
5739                 int cpu, buf_size_same;
5740                 unsigned long size;
5741
5742                 size = 0;
5743                 buf_size_same = 1;
5744                 /* check if all cpu sizes are same */
5745                 for_each_tracing_cpu(cpu) {
5746                         /* fill in the size from first enabled cpu */
5747                         if (size == 0)
5748                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5749                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5750                                 buf_size_same = 0;
5751                                 break;
5752                         }
5753                 }
5754
5755                 if (buf_size_same) {
5756                         if (!ring_buffer_expanded)
5757                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5758                                             size >> 10,
5759                                             trace_buf_size >> 10);
5760                         else
5761                                 r = sprintf(buf, "%lu\n", size >> 10);
5762                 } else
5763                         r = sprintf(buf, "X\n");
5764         } else
5765                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5766
5767         mutex_unlock(&trace_types_lock);
5768
5769         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5770         return ret;
5771 }
5772
5773 static ssize_t
5774 tracing_entries_write(struct file *filp, const char __user *ubuf,
5775                       size_t cnt, loff_t *ppos)
5776 {
5777         struct inode *inode = file_inode(filp);
5778         struct trace_array *tr = inode->i_private;
5779         unsigned long val;
5780         int ret;
5781
5782         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5783         if (ret)
5784                 return ret;
5785
5786         /* must have at least 1 entry */
5787         if (!val)
5788                 return -EINVAL;
5789
5790         /* value is in KB */
5791         val <<= 10;
5792         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5793         if (ret < 0)
5794                 return ret;
5795
5796         *ppos += cnt;
5797
5798         return cnt;
5799 }
5800
5801 static ssize_t
5802 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5803                                 size_t cnt, loff_t *ppos)
5804 {
5805         struct trace_array *tr = filp->private_data;
5806         char buf[64];
5807         int r, cpu;
5808         unsigned long size = 0, expanded_size = 0;
5809
5810         mutex_lock(&trace_types_lock);
5811         for_each_tracing_cpu(cpu) {
5812                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5813                 if (!ring_buffer_expanded)
5814                         expanded_size += trace_buf_size >> 10;
5815         }
5816         if (ring_buffer_expanded)
5817                 r = sprintf(buf, "%lu\n", size);
5818         else
5819                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5820         mutex_unlock(&trace_types_lock);
5821
5822         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5823 }
5824
5825 static ssize_t
5826 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5827                           size_t cnt, loff_t *ppos)
5828 {
5829         /*
5830          * There is no need to read what the user has written, this function
5831          * is just to make sure that there is no error when "echo" is used
5832          */
5833
5834         *ppos += cnt;
5835
5836         return cnt;
5837 }
5838
5839 static int
5840 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5841 {
5842         struct trace_array *tr = inode->i_private;
5843
5844         /* disable tracing ? */
5845         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5846                 tracer_tracing_off(tr);
5847         /* resize the ring buffer to 0 */
5848         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5849
5850         trace_array_put(tr);
5851
5852         return 0;
5853 }
5854
5855 static ssize_t
5856 tracing_mark_write(struct file *filp, const char __user *ubuf,
5857                                         size_t cnt, loff_t *fpos)
5858 {
5859         struct trace_array *tr = filp->private_data;
5860         struct ring_buffer_event *event;
5861         struct ring_buffer *buffer;
5862         struct print_entry *entry;
5863         unsigned long irq_flags;
5864         const char faulted[] = "<faulted>";
5865         ssize_t written;
5866         int size;
5867         int len;
5868
5869 /* Used in tracing_mark_raw_write() as well */
5870 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5871
5872         if (tracing_disabled)
5873                 return -EINVAL;
5874
5875         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5876                 return -EINVAL;
5877
5878         if (cnt > TRACE_BUF_SIZE)
5879                 cnt = TRACE_BUF_SIZE;
5880
5881         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5882
5883         local_save_flags(irq_flags);
5884         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5885
5886         /* If less than "<faulted>", then make sure we can still add that */
5887         if (cnt < FAULTED_SIZE)
5888                 size += FAULTED_SIZE - cnt;
5889
5890         buffer = tr->trace_buffer.buffer;
5891         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5892                                             irq_flags, preempt_count());
5893         if (unlikely(!event))
5894                 /* Ring buffer disabled, return as if not open for write */
5895                 return -EBADF;
5896
5897         entry = ring_buffer_event_data(event);
5898         entry->ip = _THIS_IP_;
5899
5900         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5901         if (len) {
5902                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5903                 cnt = FAULTED_SIZE;
5904                 written = -EFAULT;
5905         } else
5906                 written = cnt;
5907         len = cnt;
5908
5909         if (entry->buf[cnt - 1] != '\n') {
5910                 entry->buf[cnt] = '\n';
5911                 entry->buf[cnt + 1] = '\0';
5912         } else
5913                 entry->buf[cnt] = '\0';
5914
5915         __buffer_unlock_commit(buffer, event);
5916
5917         if (written > 0)
5918                 *fpos += written;
5919
5920         return written;
5921 }
5922
5923 /* Limit it for now to 3K (including tag) */
5924 #define RAW_DATA_MAX_SIZE (1024*3)
5925
5926 static ssize_t
5927 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5928                                         size_t cnt, loff_t *fpos)
5929 {
5930         struct trace_array *tr = filp->private_data;
5931         struct ring_buffer_event *event;
5932         struct ring_buffer *buffer;
5933         struct raw_data_entry *entry;
5934         const char faulted[] = "<faulted>";
5935         unsigned long irq_flags;
5936         ssize_t written;
5937         int size;
5938         int len;
5939
5940 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5941
5942         if (tracing_disabled)
5943                 return -EINVAL;
5944
5945         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5946                 return -EINVAL;
5947
5948         /* The marker must at least have a tag id */
5949         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5950                 return -EINVAL;
5951
5952         if (cnt > TRACE_BUF_SIZE)
5953                 cnt = TRACE_BUF_SIZE;
5954
5955         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5956
5957         local_save_flags(irq_flags);
5958         size = sizeof(*entry) + cnt;
5959         if (cnt < FAULT_SIZE_ID)
5960                 size += FAULT_SIZE_ID - cnt;
5961
5962         buffer = tr->trace_buffer.buffer;
5963         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5964                                             irq_flags, preempt_count());
5965         if (!event)
5966                 /* Ring buffer disabled, return as if not open for write */
5967                 return -EBADF;
5968
5969         entry = ring_buffer_event_data(event);
5970
5971         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5972         if (len) {
5973                 entry->id = -1;
5974                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5975                 written = -EFAULT;
5976         } else
5977                 written = cnt;
5978
5979         __buffer_unlock_commit(buffer, event);
5980
5981         if (written > 0)
5982                 *fpos += written;
5983
5984         return written;
5985 }
5986
5987 static int tracing_clock_show(struct seq_file *m, void *v)
5988 {
5989         struct trace_array *tr = m->private;
5990         int i;
5991
5992         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5993                 seq_printf(m,
5994                         "%s%s%s%s", i ? " " : "",
5995                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5996                         i == tr->clock_id ? "]" : "");
5997         seq_putc(m, '\n');
5998
5999         return 0;
6000 }
6001
6002 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6003 {
6004         int i;
6005
6006         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6007                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6008                         break;
6009         }
6010         if (i == ARRAY_SIZE(trace_clocks))
6011                 return -EINVAL;
6012
6013         mutex_lock(&trace_types_lock);
6014
6015         tr->clock_id = i;
6016
6017         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6018
6019         /*
6020          * New clock may not be consistent with the previous clock.
6021          * Reset the buffer so that it doesn't have incomparable timestamps.
6022          */
6023         tracing_reset_online_cpus(&tr->trace_buffer);
6024
6025 #ifdef CONFIG_TRACER_MAX_TRACE
6026         if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6027                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6028         tracing_reset_online_cpus(&tr->max_buffer);
6029 #endif
6030
6031         mutex_unlock(&trace_types_lock);
6032
6033         return 0;
6034 }
6035
6036 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6037                                    size_t cnt, loff_t *fpos)
6038 {
6039         struct seq_file *m = filp->private_data;
6040         struct trace_array *tr = m->private;
6041         char buf[64];
6042         const char *clockstr;
6043         int ret;
6044
6045         if (cnt >= sizeof(buf))
6046                 return -EINVAL;
6047
6048         if (copy_from_user(buf, ubuf, cnt))
6049                 return -EFAULT;
6050
6051         buf[cnt] = 0;
6052
6053         clockstr = strstrip(buf);
6054
6055         ret = tracing_set_clock(tr, clockstr);
6056         if (ret)
6057                 return ret;
6058
6059         *fpos += cnt;
6060
6061         return cnt;
6062 }
6063
6064 static int tracing_clock_open(struct inode *inode, struct file *file)
6065 {
6066         struct trace_array *tr = inode->i_private;
6067         int ret;
6068
6069         if (tracing_disabled)
6070                 return -ENODEV;
6071
6072         if (trace_array_get(tr))
6073                 return -ENODEV;
6074
6075         ret = single_open(file, tracing_clock_show, inode->i_private);
6076         if (ret < 0)
6077                 trace_array_put(tr);
6078
6079         return ret;
6080 }
6081
6082 struct ftrace_buffer_info {
6083         struct trace_iterator   iter;
6084         void                    *spare;
6085         unsigned int            spare_cpu;
6086         unsigned int            read;
6087 };
6088
6089 #ifdef CONFIG_TRACER_SNAPSHOT
6090 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6091 {
6092         struct trace_array *tr = inode->i_private;
6093         struct trace_iterator *iter;
6094         struct seq_file *m;
6095         int ret = 0;
6096
6097         if (trace_array_get(tr) < 0)
6098                 return -ENODEV;
6099
6100         if (file->f_mode & FMODE_READ) {
6101                 iter = __tracing_open(inode, file, true);
6102                 if (IS_ERR(iter))
6103                         ret = PTR_ERR(iter);
6104         } else {
6105                 /* Writes still need the seq_file to hold the private data */
6106                 ret = -ENOMEM;
6107                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6108                 if (!m)
6109                         goto out;
6110                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6111                 if (!iter) {
6112                         kfree(m);
6113                         goto out;
6114                 }
6115                 ret = 0;
6116
6117                 iter->tr = tr;
6118                 iter->trace_buffer = &tr->max_buffer;
6119                 iter->cpu_file = tracing_get_cpu(inode);
6120                 m->private = iter;
6121                 file->private_data = m;
6122         }
6123 out:
6124         if (ret < 0)
6125                 trace_array_put(tr);
6126
6127         return ret;
6128 }
6129
6130 static ssize_t
6131 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6132                        loff_t *ppos)
6133 {
6134         struct seq_file *m = filp->private_data;
6135         struct trace_iterator *iter = m->private;
6136         struct trace_array *tr = iter->tr;
6137         unsigned long val;
6138         int ret;
6139
6140         ret = tracing_update_buffers();
6141         if (ret < 0)
6142                 return ret;
6143
6144         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6145         if (ret)
6146                 return ret;
6147
6148         mutex_lock(&trace_types_lock);
6149
6150         if (tr->current_trace->use_max_tr) {
6151                 ret = -EBUSY;
6152                 goto out;
6153         }
6154
6155         switch (val) {
6156         case 0:
6157                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6158                         ret = -EINVAL;
6159                         break;
6160                 }
6161                 if (tr->allocated_snapshot)
6162                         free_snapshot(tr);
6163                 break;
6164         case 1:
6165 /* Only allow per-cpu swap if the ring buffer supports it */
6166 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6167                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6168                         ret = -EINVAL;
6169                         break;
6170                 }
6171 #endif
6172                 if (!tr->allocated_snapshot) {
6173                         ret = alloc_snapshot(tr);
6174                         if (ret < 0)
6175                                 break;
6176                 }
6177                 local_irq_disable();
6178                 /* Now, we're going to swap */
6179                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6180                         update_max_tr(tr, current, smp_processor_id());
6181                 else
6182                         update_max_tr_single(tr, current, iter->cpu_file);
6183                 local_irq_enable();
6184                 break;
6185         default:
6186                 if (tr->allocated_snapshot) {
6187                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6188                                 tracing_reset_online_cpus(&tr->max_buffer);
6189                         else
6190                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6191                 }
6192                 break;
6193         }
6194
6195         if (ret >= 0) {
6196                 *ppos += cnt;
6197                 ret = cnt;
6198         }
6199 out:
6200         mutex_unlock(&trace_types_lock);
6201         return ret;
6202 }
6203
6204 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6205 {
6206         struct seq_file *m = file->private_data;
6207         int ret;
6208
6209         ret = tracing_release(inode, file);
6210
6211         if (file->f_mode & FMODE_READ)
6212                 return ret;
6213
6214         /* If write only, the seq_file is just a stub */
6215         if (m)
6216                 kfree(m->private);
6217         kfree(m);
6218
6219         return 0;
6220 }
6221
6222 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6223 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6224                                     size_t count, loff_t *ppos);
6225 static int tracing_buffers_release(struct inode *inode, struct file *file);
6226 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6227                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6228
6229 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6230 {
6231         struct ftrace_buffer_info *info;
6232         int ret;
6233
6234         ret = tracing_buffers_open(inode, filp);
6235         if (ret < 0)
6236                 return ret;
6237
6238         info = filp->private_data;
6239
6240         if (info->iter.trace->use_max_tr) {
6241                 tracing_buffers_release(inode, filp);
6242                 return -EBUSY;
6243         }
6244
6245         info->iter.snapshot = true;
6246         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6247
6248         return ret;
6249 }
6250
6251 #endif /* CONFIG_TRACER_SNAPSHOT */
6252
6253
6254 static const struct file_operations tracing_thresh_fops = {
6255         .open           = tracing_open_generic,
6256         .read           = tracing_thresh_read,
6257         .write          = tracing_thresh_write,
6258         .llseek         = generic_file_llseek,
6259 };
6260
6261 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6262 static const struct file_operations tracing_max_lat_fops = {
6263         .open           = tracing_open_generic,
6264         .read           = tracing_max_lat_read,
6265         .write          = tracing_max_lat_write,
6266         .llseek         = generic_file_llseek,
6267 };
6268 #endif
6269
6270 static const struct file_operations set_tracer_fops = {
6271         .open           = tracing_open_generic,
6272         .read           = tracing_set_trace_read,
6273         .write          = tracing_set_trace_write,
6274         .llseek         = generic_file_llseek,
6275 };
6276
6277 static const struct file_operations tracing_pipe_fops = {
6278         .open           = tracing_open_pipe,
6279         .poll           = tracing_poll_pipe,
6280         .read           = tracing_read_pipe,
6281         .splice_read    = tracing_splice_read_pipe,
6282         .release        = tracing_release_pipe,
6283         .llseek         = no_llseek,
6284 };
6285
6286 static const struct file_operations tracing_entries_fops = {
6287         .open           = tracing_open_generic_tr,
6288         .read           = tracing_entries_read,
6289         .write          = tracing_entries_write,
6290         .llseek         = generic_file_llseek,
6291         .release        = tracing_release_generic_tr,
6292 };
6293
6294 static const struct file_operations tracing_total_entries_fops = {
6295         .open           = tracing_open_generic_tr,
6296         .read           = tracing_total_entries_read,
6297         .llseek         = generic_file_llseek,
6298         .release        = tracing_release_generic_tr,
6299 };
6300
6301 static const struct file_operations tracing_free_buffer_fops = {
6302         .open           = tracing_open_generic_tr,
6303         .write          = tracing_free_buffer_write,
6304         .release        = tracing_free_buffer_release,
6305 };
6306
6307 static const struct file_operations tracing_mark_fops = {
6308         .open           = tracing_open_generic_tr,
6309         .write          = tracing_mark_write,
6310         .llseek         = generic_file_llseek,
6311         .release        = tracing_release_generic_tr,
6312 };
6313
6314 static const struct file_operations tracing_mark_raw_fops = {
6315         .open           = tracing_open_generic_tr,
6316         .write          = tracing_mark_raw_write,
6317         .llseek         = generic_file_llseek,
6318         .release        = tracing_release_generic_tr,
6319 };
6320
6321 static const struct file_operations trace_clock_fops = {
6322         .open           = tracing_clock_open,
6323         .read           = seq_read,
6324         .llseek         = seq_lseek,
6325         .release        = tracing_single_release_tr,
6326         .write          = tracing_clock_write,
6327 };
6328
6329 #ifdef CONFIG_TRACER_SNAPSHOT
6330 static const struct file_operations snapshot_fops = {
6331         .open           = tracing_snapshot_open,
6332         .read           = seq_read,
6333         .write          = tracing_snapshot_write,
6334         .llseek         = tracing_lseek,
6335         .release        = tracing_snapshot_release,
6336 };
6337
6338 static const struct file_operations snapshot_raw_fops = {
6339         .open           = snapshot_raw_open,
6340         .read           = tracing_buffers_read,
6341         .release        = tracing_buffers_release,
6342         .splice_read    = tracing_buffers_splice_read,
6343         .llseek         = no_llseek,
6344 };
6345
6346 #endif /* CONFIG_TRACER_SNAPSHOT */
6347
6348 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6349 {
6350         struct trace_array *tr = inode->i_private;
6351         struct ftrace_buffer_info *info;
6352         int ret;
6353
6354         if (tracing_disabled)
6355                 return -ENODEV;
6356
6357         if (trace_array_get(tr) < 0)
6358                 return -ENODEV;
6359
6360         info = kzalloc(sizeof(*info), GFP_KERNEL);
6361         if (!info) {
6362                 trace_array_put(tr);
6363                 return -ENOMEM;
6364         }
6365
6366         mutex_lock(&trace_types_lock);
6367
6368         info->iter.tr           = tr;
6369         info->iter.cpu_file     = tracing_get_cpu(inode);
6370         info->iter.trace        = tr->current_trace;
6371         info->iter.trace_buffer = &tr->trace_buffer;
6372         info->spare             = NULL;
6373         /* Force reading ring buffer for first read */
6374         info->read              = (unsigned int)-1;
6375
6376         filp->private_data = info;
6377
6378         tr->current_trace->ref++;
6379
6380         mutex_unlock(&trace_types_lock);
6381
6382         ret = nonseekable_open(inode, filp);
6383         if (ret < 0)
6384                 trace_array_put(tr);
6385
6386         return ret;
6387 }
6388
6389 static unsigned int
6390 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6391 {
6392         struct ftrace_buffer_info *info = filp->private_data;
6393         struct trace_iterator *iter = &info->iter;
6394
6395         return trace_poll(iter, filp, poll_table);
6396 }
6397
6398 static ssize_t
6399 tracing_buffers_read(struct file *filp, char __user *ubuf,
6400                      size_t count, loff_t *ppos)
6401 {
6402         struct ftrace_buffer_info *info = filp->private_data;
6403         struct trace_iterator *iter = &info->iter;
6404         ssize_t ret;
6405         ssize_t size;
6406
6407         if (!count)
6408                 return 0;
6409
6410 #ifdef CONFIG_TRACER_MAX_TRACE
6411         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6412                 return -EBUSY;
6413 #endif
6414
6415         if (!info->spare) {
6416                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6417                                                           iter->cpu_file);
6418                 info->spare_cpu = iter->cpu_file;
6419         }
6420         if (!info->spare)
6421                 return -ENOMEM;
6422
6423         /* Do we have previous read data to read? */
6424         if (info->read < PAGE_SIZE)
6425                 goto read;
6426
6427  again:
6428         trace_access_lock(iter->cpu_file);
6429         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6430                                     &info->spare,
6431                                     count,
6432                                     iter->cpu_file, 0);
6433         trace_access_unlock(iter->cpu_file);
6434
6435         if (ret < 0) {
6436                 if (trace_empty(iter)) {
6437                         if ((filp->f_flags & O_NONBLOCK))
6438                                 return -EAGAIN;
6439
6440                         ret = wait_on_pipe(iter, false);
6441                         if (ret)
6442                                 return ret;
6443
6444                         goto again;
6445                 }
6446                 return 0;
6447         }
6448
6449         info->read = 0;
6450  read:
6451         size = PAGE_SIZE - info->read;
6452         if (size > count)
6453                 size = count;
6454
6455         ret = copy_to_user(ubuf, info->spare + info->read, size);
6456         if (ret == size)
6457                 return -EFAULT;
6458
6459         size -= ret;
6460
6461         *ppos += size;
6462         info->read += size;
6463
6464         return size;
6465 }
6466
6467 static int tracing_buffers_release(struct inode *inode, struct file *file)
6468 {
6469         struct ftrace_buffer_info *info = file->private_data;
6470         struct trace_iterator *iter = &info->iter;
6471
6472         mutex_lock(&trace_types_lock);
6473
6474         iter->tr->current_trace->ref--;
6475
6476         __trace_array_put(iter->tr);
6477
6478         if (info->spare)
6479                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6480                                            info->spare_cpu, info->spare);
6481         kfree(info);
6482
6483         mutex_unlock(&trace_types_lock);
6484
6485         return 0;
6486 }
6487
6488 struct buffer_ref {
6489         struct ring_buffer      *buffer;
6490         void                    *page;
6491         int                     cpu;
6492         int                     ref;
6493 };
6494
6495 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6496                                     struct pipe_buffer *buf)
6497 {
6498         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6499
6500         if (--ref->ref)
6501                 return;
6502
6503         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6504         kfree(ref);
6505         buf->private = 0;
6506 }
6507
6508 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6509                                 struct pipe_buffer *buf)
6510 {
6511         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6512
6513         ref->ref++;
6514 }
6515
6516 /* Pipe buffer operations for a buffer. */
6517 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6518         .can_merge              = 0,
6519         .confirm                = generic_pipe_buf_confirm,
6520         .release                = buffer_pipe_buf_release,
6521         .steal                  = generic_pipe_buf_steal,
6522         .get                    = buffer_pipe_buf_get,
6523 };
6524
6525 /*
6526  * Callback from splice_to_pipe(), if we need to release some pages
6527  * at the end of the spd in case we error'ed out in filling the pipe.
6528  */
6529 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6530 {
6531         struct buffer_ref *ref =
6532                 (struct buffer_ref *)spd->partial[i].private;
6533
6534         if (--ref->ref)
6535                 return;
6536
6537         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6538         kfree(ref);
6539         spd->partial[i].private = 0;
6540 }
6541
6542 static ssize_t
6543 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6544                             struct pipe_inode_info *pipe, size_t len,
6545                             unsigned int flags)
6546 {
6547         struct ftrace_buffer_info *info = file->private_data;
6548         struct trace_iterator *iter = &info->iter;
6549         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6550         struct page *pages_def[PIPE_DEF_BUFFERS];
6551         struct splice_pipe_desc spd = {
6552                 .pages          = pages_def,
6553                 .partial        = partial_def,
6554                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6555                 .ops            = &buffer_pipe_buf_ops,
6556                 .spd_release    = buffer_spd_release,
6557         };
6558         struct buffer_ref *ref;
6559         int entries, size, i;
6560         ssize_t ret = 0;
6561
6562 #ifdef CONFIG_TRACER_MAX_TRACE
6563         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6564                 return -EBUSY;
6565 #endif
6566
6567         if (*ppos & (PAGE_SIZE - 1))
6568                 return -EINVAL;
6569
6570         if (len & (PAGE_SIZE - 1)) {
6571                 if (len < PAGE_SIZE)
6572                         return -EINVAL;
6573                 len &= PAGE_MASK;
6574         }
6575
6576         if (splice_grow_spd(pipe, &spd))
6577                 return -ENOMEM;
6578
6579  again:
6580         trace_access_lock(iter->cpu_file);
6581         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6582
6583         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6584                 struct page *page;
6585                 int r;
6586
6587                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6588                 if (!ref) {
6589                         ret = -ENOMEM;
6590                         break;
6591                 }
6592
6593                 ref->ref = 1;
6594                 ref->buffer = iter->trace_buffer->buffer;
6595                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6596                 if (!ref->page) {
6597                         ret = -ENOMEM;
6598                         kfree(ref);
6599                         break;
6600                 }
6601                 ref->cpu = iter->cpu_file;
6602
6603                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6604                                           len, iter->cpu_file, 1);
6605                 if (r < 0) {
6606                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6607                                                    ref->page);
6608                         kfree(ref);
6609                         break;
6610                 }
6611
6612                 /*
6613                  * zero out any left over data, this is going to
6614                  * user land.
6615                  */
6616                 size = ring_buffer_page_len(ref->page);
6617                 if (size < PAGE_SIZE)
6618                         memset(ref->page + size, 0, PAGE_SIZE - size);
6619
6620                 page = virt_to_page(ref->page);
6621
6622                 spd.pages[i] = page;
6623                 spd.partial[i].len = PAGE_SIZE;
6624                 spd.partial[i].offset = 0;
6625                 spd.partial[i].private = (unsigned long)ref;
6626                 spd.nr_pages++;
6627                 *ppos += PAGE_SIZE;
6628
6629                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6630         }
6631
6632         trace_access_unlock(iter->cpu_file);
6633         spd.nr_pages = i;
6634
6635         /* did we read anything? */
6636         if (!spd.nr_pages) {
6637                 if (ret)
6638                         goto out;
6639
6640                 ret = -EAGAIN;
6641                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6642                         goto out;
6643
6644                 ret = wait_on_pipe(iter, true);
6645                 if (ret)
6646                         goto out;
6647
6648                 goto again;
6649         }
6650
6651         ret = splice_to_pipe(pipe, &spd);
6652 out:
6653         splice_shrink_spd(&spd);
6654
6655         return ret;
6656 }
6657
6658 static const struct file_operations tracing_buffers_fops = {
6659         .open           = tracing_buffers_open,
6660         .read           = tracing_buffers_read,
6661         .poll           = tracing_buffers_poll,
6662         .release        = tracing_buffers_release,
6663         .splice_read    = tracing_buffers_splice_read,
6664         .llseek         = no_llseek,
6665 };
6666
6667 static ssize_t
6668 tracing_stats_read(struct file *filp, char __user *ubuf,
6669                    size_t count, loff_t *ppos)
6670 {
6671         struct inode *inode = file_inode(filp);
6672         struct trace_array *tr = inode->i_private;
6673         struct trace_buffer *trace_buf = &tr->trace_buffer;
6674         int cpu = tracing_get_cpu(inode);
6675         struct trace_seq *s;
6676         unsigned long cnt;
6677         unsigned long long t;
6678         unsigned long usec_rem;
6679
6680         s = kmalloc(sizeof(*s), GFP_KERNEL);
6681         if (!s)
6682                 return -ENOMEM;
6683
6684         trace_seq_init(s);
6685
6686         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6687         trace_seq_printf(s, "entries: %ld\n", cnt);
6688
6689         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6690         trace_seq_printf(s, "overrun: %ld\n", cnt);
6691
6692         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6693         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6694
6695         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6696         trace_seq_printf(s, "bytes: %ld\n", cnt);
6697
6698         if (trace_clocks[tr->clock_id].in_ns) {
6699                 /* local or global for trace_clock */
6700                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6701                 usec_rem = do_div(t, USEC_PER_SEC);
6702                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6703                                                                 t, usec_rem);
6704
6705                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6706                 usec_rem = do_div(t, USEC_PER_SEC);
6707                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6708         } else {
6709                 /* counter or tsc mode for trace_clock */
6710                 trace_seq_printf(s, "oldest event ts: %llu\n",
6711                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6712
6713                 trace_seq_printf(s, "now ts: %llu\n",
6714                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6715         }
6716
6717         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6718         trace_seq_printf(s, "dropped events: %ld\n", cnt);
6719
6720         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6721         trace_seq_printf(s, "read events: %ld\n", cnt);
6722
6723         count = simple_read_from_buffer(ubuf, count, ppos,
6724                                         s->buffer, trace_seq_used(s));
6725
6726         kfree(s);
6727
6728         return count;
6729 }
6730
6731 static const struct file_operations tracing_stats_fops = {
6732         .open           = tracing_open_generic_tr,
6733         .read           = tracing_stats_read,
6734         .llseek         = generic_file_llseek,
6735         .release        = tracing_release_generic_tr,
6736 };
6737
6738 #ifdef CONFIG_DYNAMIC_FTRACE
6739
6740 static ssize_t
6741 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6742                   size_t cnt, loff_t *ppos)
6743 {
6744         unsigned long *p = filp->private_data;
6745         char buf[64]; /* Not too big for a shallow stack */
6746         int r;
6747
6748         r = scnprintf(buf, 63, "%ld", *p);
6749         buf[r++] = '\n';
6750
6751         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6752 }
6753
6754 static const struct file_operations tracing_dyn_info_fops = {
6755         .open           = tracing_open_generic,
6756         .read           = tracing_read_dyn_info,
6757         .llseek         = generic_file_llseek,
6758 };
6759 #endif /* CONFIG_DYNAMIC_FTRACE */
6760
6761 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6762 static void
6763 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6764                 struct trace_array *tr, struct ftrace_probe_ops *ops,
6765                 void *data)
6766 {
6767         tracing_snapshot_instance(tr);
6768 }
6769
6770 static void
6771 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6772                       struct trace_array *tr, struct ftrace_probe_ops *ops,
6773                       void *data)
6774 {
6775         struct ftrace_func_mapper *mapper = data;
6776         long *count = NULL;
6777
6778         if (mapper)
6779                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6780
6781         if (count) {
6782
6783                 if (*count <= 0)
6784                         return;
6785
6786                 (*count)--;
6787         }
6788
6789         tracing_snapshot_instance(tr);
6790 }
6791
6792 static int
6793 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6794                       struct ftrace_probe_ops *ops, void *data)
6795 {
6796         struct ftrace_func_mapper *mapper = data;
6797         long *count = NULL;
6798
6799         seq_printf(m, "%ps:", (void *)ip);
6800
6801         seq_puts(m, "snapshot");
6802
6803         if (mapper)
6804                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6805
6806         if (count)
6807                 seq_printf(m, ":count=%ld\n", *count);
6808         else
6809                 seq_puts(m, ":unlimited\n");
6810
6811         return 0;
6812 }
6813
6814 static int
6815 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6816                      unsigned long ip, void *init_data, void **data)
6817 {
6818         struct ftrace_func_mapper *mapper = *data;
6819
6820         if (!mapper) {
6821                 mapper = allocate_ftrace_func_mapper();
6822                 if (!mapper)
6823                         return -ENOMEM;
6824                 *data = mapper;
6825         }
6826
6827         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6828 }
6829
6830 static void
6831 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6832                      unsigned long ip, void *data)
6833 {
6834         struct ftrace_func_mapper *mapper = data;
6835
6836         if (!ip) {
6837                 if (!mapper)
6838                         return;
6839                 free_ftrace_func_mapper(mapper, NULL);
6840                 return;
6841         }
6842
6843         ftrace_func_mapper_remove_ip(mapper, ip);
6844 }
6845
6846 static struct ftrace_probe_ops snapshot_probe_ops = {
6847         .func                   = ftrace_snapshot,
6848         .print                  = ftrace_snapshot_print,
6849 };
6850
6851 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6852         .func                   = ftrace_count_snapshot,
6853         .print                  = ftrace_snapshot_print,
6854         .init                   = ftrace_snapshot_init,
6855         .free                   = ftrace_snapshot_free,
6856 };
6857
6858 static int
6859 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6860                                char *glob, char *cmd, char *param, int enable)
6861 {
6862         struct ftrace_probe_ops *ops;
6863         void *count = (void *)-1;
6864         char *number;
6865         int ret;
6866
6867         /* hash funcs only work with set_ftrace_filter */
6868         if (!enable)
6869                 return -EINVAL;
6870
6871         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
6872
6873         if (glob[0] == '!')
6874                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6875
6876         if (!param)
6877                 goto out_reg;
6878
6879         number = strsep(&param, ":");
6880
6881         if (!strlen(number))
6882                 goto out_reg;
6883
6884         /*
6885          * We use the callback data field (which is a pointer)
6886          * as our counter.
6887          */
6888         ret = kstrtoul(number, 0, (unsigned long *)&count);
6889         if (ret)
6890                 return ret;
6891
6892  out_reg:
6893         ret = alloc_snapshot(tr);
6894         if (ret < 0)
6895                 goto out;
6896
6897         ret = register_ftrace_function_probe(glob, tr, ops, count);
6898
6899  out:
6900         return ret < 0 ? ret : 0;
6901 }
6902
6903 static struct ftrace_func_command ftrace_snapshot_cmd = {
6904         .name                   = "snapshot",
6905         .func                   = ftrace_trace_snapshot_callback,
6906 };
6907
6908 static __init int register_snapshot_cmd(void)
6909 {
6910         return register_ftrace_command(&ftrace_snapshot_cmd);
6911 }
6912 #else
6913 static inline __init int register_snapshot_cmd(void) { return 0; }
6914 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6915
6916 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6917 {
6918         if (WARN_ON(!tr->dir))
6919                 return ERR_PTR(-ENODEV);
6920
6921         /* Top directory uses NULL as the parent */
6922         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6923                 return NULL;
6924
6925         /* All sub buffers have a descriptor */
6926         return tr->dir;
6927 }
6928
6929 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6930 {
6931         struct dentry *d_tracer;
6932
6933         if (tr->percpu_dir)
6934                 return tr->percpu_dir;
6935
6936         d_tracer = tracing_get_dentry(tr);
6937         if (IS_ERR(d_tracer))
6938                 return NULL;
6939
6940         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6941
6942         WARN_ONCE(!tr->percpu_dir,
6943                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6944
6945         return tr->percpu_dir;
6946 }
6947
6948 static struct dentry *
6949 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6950                       void *data, long cpu, const struct file_operations *fops)
6951 {
6952         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6953
6954         if (ret) /* See tracing_get_cpu() */
6955                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6956         return ret;
6957 }
6958
6959 static void
6960 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6961 {
6962         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6963         struct dentry *d_cpu;
6964         char cpu_dir[30]; /* 30 characters should be more than enough */
6965
6966         if (!d_percpu)
6967                 return;
6968
6969         snprintf(cpu_dir, 30, "cpu%ld", cpu);
6970         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6971         if (!d_cpu) {
6972                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6973                 return;
6974         }
6975
6976         /* per cpu trace_pipe */
6977         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6978                                 tr, cpu, &tracing_pipe_fops);
6979
6980         /* per cpu trace */
6981         trace_create_cpu_file("trace", 0644, d_cpu,
6982                                 tr, cpu, &tracing_fops);
6983
6984         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6985                                 tr, cpu, &tracing_buffers_fops);
6986
6987         trace_create_cpu_file("stats", 0444, d_cpu,
6988                                 tr, cpu, &tracing_stats_fops);
6989
6990         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6991                                 tr, cpu, &tracing_entries_fops);
6992
6993 #ifdef CONFIG_TRACER_SNAPSHOT
6994         trace_create_cpu_file("snapshot", 0644, d_cpu,
6995                                 tr, cpu, &snapshot_fops);
6996
6997         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6998                                 tr, cpu, &snapshot_raw_fops);
6999 #endif
7000 }
7001
7002 #ifdef CONFIG_FTRACE_SELFTEST
7003 /* Let selftest have access to static functions in this file */
7004 #include "trace_selftest.c"
7005 #endif
7006
7007 static ssize_t
7008 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7009                         loff_t *ppos)
7010 {
7011         struct trace_option_dentry *topt = filp->private_data;
7012         char *buf;
7013
7014         if (topt->flags->val & topt->opt->bit)
7015                 buf = "1\n";
7016         else
7017                 buf = "0\n";
7018
7019         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7020 }
7021
7022 static ssize_t
7023 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7024                          loff_t *ppos)
7025 {
7026         struct trace_option_dentry *topt = filp->private_data;
7027         unsigned long val;
7028         int ret;
7029
7030         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7031         if (ret)
7032                 return ret;
7033
7034         if (val != 0 && val != 1)
7035                 return -EINVAL;
7036
7037         if (!!(topt->flags->val & topt->opt->bit) != val) {
7038                 mutex_lock(&trace_types_lock);
7039                 ret = __set_tracer_option(topt->tr, topt->flags,
7040                                           topt->opt, !val);
7041                 mutex_unlock(&trace_types_lock);
7042                 if (ret)
7043                         return ret;
7044         }
7045
7046         *ppos += cnt;
7047
7048         return cnt;
7049 }
7050
7051
7052 static const struct file_operations trace_options_fops = {
7053         .open = tracing_open_generic,
7054         .read = trace_options_read,
7055         .write = trace_options_write,
7056         .llseek = generic_file_llseek,
7057 };
7058
7059 /*
7060  * In order to pass in both the trace_array descriptor as well as the index
7061  * to the flag that the trace option file represents, the trace_array
7062  * has a character array of trace_flags_index[], which holds the index
7063  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7064  * The address of this character array is passed to the flag option file
7065  * read/write callbacks.
7066  *
7067  * In order to extract both the index and the trace_array descriptor,
7068  * get_tr_index() uses the following algorithm.
7069  *
7070  *   idx = *ptr;
7071  *
7072  * As the pointer itself contains the address of the index (remember
7073  * index[1] == 1).
7074  *
7075  * Then to get the trace_array descriptor, by subtracting that index
7076  * from the ptr, we get to the start of the index itself.
7077  *
7078  *   ptr - idx == &index[0]
7079  *
7080  * Then a simple container_of() from that pointer gets us to the
7081  * trace_array descriptor.
7082  */
7083 static void get_tr_index(void *data, struct trace_array **ptr,
7084                          unsigned int *pindex)
7085 {
7086         *pindex = *(unsigned char *)data;
7087
7088         *ptr = container_of(data - *pindex, struct trace_array,
7089                             trace_flags_index);
7090 }
7091
7092 static ssize_t
7093 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7094                         loff_t *ppos)
7095 {
7096         void *tr_index = filp->private_data;
7097         struct trace_array *tr;
7098         unsigned int index;
7099         char *buf;
7100
7101         get_tr_index(tr_index, &tr, &index);
7102
7103         if (tr->trace_flags & (1 << index))
7104                 buf = "1\n";
7105         else
7106                 buf = "0\n";
7107
7108         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7109 }
7110
7111 static ssize_t
7112 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7113                          loff_t *ppos)
7114 {
7115         void *tr_index = filp->private_data;
7116         struct trace_array *tr;
7117         unsigned int index;
7118         unsigned long val;
7119         int ret;
7120
7121         get_tr_index(tr_index, &tr, &index);
7122
7123         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7124         if (ret)
7125                 return ret;
7126
7127         if (val != 0 && val != 1)
7128                 return -EINVAL;
7129
7130         mutex_lock(&trace_types_lock);
7131         ret = set_tracer_flag(tr, 1 << index, val);
7132         mutex_unlock(&trace_types_lock);
7133
7134         if (ret < 0)
7135                 return ret;
7136
7137         *ppos += cnt;
7138
7139         return cnt;
7140 }
7141
7142 static const struct file_operations trace_options_core_fops = {
7143         .open = tracing_open_generic,
7144         .read = trace_options_core_read,
7145         .write = trace_options_core_write,
7146         .llseek = generic_file_llseek,
7147 };
7148
7149 struct dentry *trace_create_file(const char *name,
7150                                  umode_t mode,
7151                                  struct dentry *parent,
7152                                  void *data,
7153                                  const struct file_operations *fops)
7154 {
7155         struct dentry *ret;
7156
7157         ret = tracefs_create_file(name, mode, parent, data, fops);
7158         if (!ret)
7159                 pr_warn("Could not create tracefs '%s' entry\n", name);
7160
7161         return ret;
7162 }
7163
7164
7165 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7166 {
7167         struct dentry *d_tracer;
7168
7169         if (tr->options)
7170                 return tr->options;
7171
7172         d_tracer = tracing_get_dentry(tr);
7173         if (IS_ERR(d_tracer))
7174                 return NULL;
7175
7176         tr->options = tracefs_create_dir("options", d_tracer);
7177         if (!tr->options) {
7178                 pr_warn("Could not create tracefs directory 'options'\n");
7179                 return NULL;
7180         }
7181
7182         return tr->options;
7183 }
7184
7185 static void
7186 create_trace_option_file(struct trace_array *tr,
7187                          struct trace_option_dentry *topt,
7188                          struct tracer_flags *flags,
7189                          struct tracer_opt *opt)
7190 {
7191         struct dentry *t_options;
7192
7193         t_options = trace_options_init_dentry(tr);
7194         if (!t_options)
7195                 return;
7196
7197         topt->flags = flags;
7198         topt->opt = opt;
7199         topt->tr = tr;
7200
7201         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7202                                     &trace_options_fops);
7203
7204 }
7205
7206 static void
7207 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7208 {
7209         struct trace_option_dentry *topts;
7210         struct trace_options *tr_topts;
7211         struct tracer_flags *flags;
7212         struct tracer_opt *opts;
7213         int cnt;
7214         int i;
7215
7216         if (!tracer)
7217                 return;
7218
7219         flags = tracer->flags;
7220
7221         if (!flags || !flags->opts)
7222                 return;
7223
7224         /*
7225          * If this is an instance, only create flags for tracers
7226          * the instance may have.
7227          */
7228         if (!trace_ok_for_array(tracer, tr))
7229                 return;
7230
7231         for (i = 0; i < tr->nr_topts; i++) {
7232                 /* Make sure there's no duplicate flags. */
7233                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7234                         return;
7235         }
7236
7237         opts = flags->opts;
7238
7239         for (cnt = 0; opts[cnt].name; cnt++)
7240                 ;
7241
7242         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7243         if (!topts)
7244                 return;
7245
7246         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7247                             GFP_KERNEL);
7248         if (!tr_topts) {
7249                 kfree(topts);
7250                 return;
7251         }
7252
7253         tr->topts = tr_topts;
7254         tr->topts[tr->nr_topts].tracer = tracer;
7255         tr->topts[tr->nr_topts].topts = topts;
7256         tr->nr_topts++;
7257
7258         for (cnt = 0; opts[cnt].name; cnt++) {
7259                 create_trace_option_file(tr, &topts[cnt], flags,
7260                                          &opts[cnt]);
7261                 WARN_ONCE(topts[cnt].entry == NULL,
7262                           "Failed to create trace option: %s",
7263                           opts[cnt].name);
7264         }
7265 }
7266
7267 static struct dentry *
7268 create_trace_option_core_file(struct trace_array *tr,
7269                               const char *option, long index)
7270 {
7271         struct dentry *t_options;
7272
7273         t_options = trace_options_init_dentry(tr);
7274         if (!t_options)
7275                 return NULL;
7276
7277         return trace_create_file(option, 0644, t_options,
7278                                  (void *)&tr->trace_flags_index[index],
7279                                  &trace_options_core_fops);
7280 }
7281
7282 static void create_trace_options_dir(struct trace_array *tr)
7283 {
7284         struct dentry *t_options;
7285         bool top_level = tr == &global_trace;
7286         int i;
7287
7288         t_options = trace_options_init_dentry(tr);
7289         if (!t_options)
7290                 return;
7291
7292         for (i = 0; trace_options[i]; i++) {
7293                 if (top_level ||
7294                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7295                         create_trace_option_core_file(tr, trace_options[i], i);
7296         }
7297 }
7298
7299 static ssize_t
7300 rb_simple_read(struct file *filp, char __user *ubuf,
7301                size_t cnt, loff_t *ppos)
7302 {
7303         struct trace_array *tr = filp->private_data;
7304         char buf[64];
7305         int r;
7306
7307         r = tracer_tracing_is_on(tr);
7308         r = sprintf(buf, "%d\n", r);
7309
7310         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7311 }
7312
7313 static ssize_t
7314 rb_simple_write(struct file *filp, const char __user *ubuf,
7315                 size_t cnt, loff_t *ppos)
7316 {
7317         struct trace_array *tr = filp->private_data;
7318         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7319         unsigned long val;
7320         int ret;
7321
7322         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7323         if (ret)
7324                 return ret;
7325
7326         if (buffer) {
7327                 mutex_lock(&trace_types_lock);
7328                 if (val) {
7329                         tracer_tracing_on(tr);
7330                         if (tr->current_trace->start)
7331                                 tr->current_trace->start(tr);
7332                 } else {
7333                         tracer_tracing_off(tr);
7334                         if (tr->current_trace->stop)
7335                                 tr->current_trace->stop(tr);
7336                 }
7337                 mutex_unlock(&trace_types_lock);
7338         }
7339
7340         (*ppos)++;
7341
7342         return cnt;
7343 }
7344
7345 static const struct file_operations rb_simple_fops = {
7346         .open           = tracing_open_generic_tr,
7347         .read           = rb_simple_read,
7348         .write          = rb_simple_write,
7349         .release        = tracing_release_generic_tr,
7350         .llseek         = default_llseek,
7351 };
7352
7353 struct dentry *trace_instance_dir;
7354
7355 static void
7356 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7357
7358 static int
7359 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7360 {
7361         enum ring_buffer_flags rb_flags;
7362
7363         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7364
7365         buf->tr = tr;
7366
7367         buf->buffer = ring_buffer_alloc(size, rb_flags);
7368         if (!buf->buffer)
7369                 return -ENOMEM;
7370
7371         buf->data = alloc_percpu(struct trace_array_cpu);
7372         if (!buf->data) {
7373                 ring_buffer_free(buf->buffer);
7374                 return -ENOMEM;
7375         }
7376
7377         /* Allocate the first page for all buffers */
7378         set_buffer_entries(&tr->trace_buffer,
7379                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7380
7381         return 0;
7382 }
7383
7384 static int allocate_trace_buffers(struct trace_array *tr, int size)
7385 {
7386         int ret;
7387
7388         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7389         if (ret)
7390                 return ret;
7391
7392 #ifdef CONFIG_TRACER_MAX_TRACE
7393         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7394                                     allocate_snapshot ? size : 1);
7395         if (WARN_ON(ret)) {
7396                 ring_buffer_free(tr->trace_buffer.buffer);
7397                 free_percpu(tr->trace_buffer.data);
7398                 return -ENOMEM;
7399         }
7400         tr->allocated_snapshot = allocate_snapshot;
7401
7402         /*
7403          * Only the top level trace array gets its snapshot allocated
7404          * from the kernel command line.
7405          */
7406         allocate_snapshot = false;
7407 #endif
7408         return 0;
7409 }
7410
7411 static void free_trace_buffer(struct trace_buffer *buf)
7412 {
7413         if (buf->buffer) {
7414                 ring_buffer_free(buf->buffer);
7415                 buf->buffer = NULL;
7416                 free_percpu(buf->data);
7417                 buf->data = NULL;
7418         }
7419 }
7420
7421 static void free_trace_buffers(struct trace_array *tr)
7422 {
7423         if (!tr)
7424                 return;
7425
7426         free_trace_buffer(&tr->trace_buffer);
7427
7428 #ifdef CONFIG_TRACER_MAX_TRACE
7429         free_trace_buffer(&tr->max_buffer);
7430 #endif
7431 }
7432
7433 static void init_trace_flags_index(struct trace_array *tr)
7434 {
7435         int i;
7436
7437         /* Used by the trace options files */
7438         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7439                 tr->trace_flags_index[i] = i;
7440 }
7441
7442 static void __update_tracer_options(struct trace_array *tr)
7443 {
7444         struct tracer *t;
7445
7446         for (t = trace_types; t; t = t->next)
7447                 add_tracer_options(tr, t);
7448 }
7449
7450 static void update_tracer_options(struct trace_array *tr)
7451 {
7452         mutex_lock(&trace_types_lock);
7453         __update_tracer_options(tr);
7454         mutex_unlock(&trace_types_lock);
7455 }
7456
7457 static int instance_mkdir(const char *name)
7458 {
7459         struct trace_array *tr;
7460         int ret;
7461
7462         mutex_lock(&trace_types_lock);
7463
7464         ret = -EEXIST;
7465         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7466                 if (tr->name && strcmp(tr->name, name) == 0)
7467                         goto out_unlock;
7468         }
7469
7470         ret = -ENOMEM;
7471         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7472         if (!tr)
7473                 goto out_unlock;
7474
7475         tr->name = kstrdup(name, GFP_KERNEL);
7476         if (!tr->name)
7477                 goto out_free_tr;
7478
7479         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7480                 goto out_free_tr;
7481
7482         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7483
7484         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7485
7486         raw_spin_lock_init(&tr->start_lock);
7487
7488         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7489
7490         tr->current_trace = &nop_trace;
7491
7492         INIT_LIST_HEAD(&tr->systems);
7493         INIT_LIST_HEAD(&tr->events);
7494
7495         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7496                 goto out_free_tr;
7497
7498         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7499         if (!tr->dir)
7500                 goto out_free_tr;
7501
7502         ret = event_trace_add_tracer(tr->dir, tr);
7503         if (ret) {
7504                 tracefs_remove_recursive(tr->dir);
7505                 goto out_free_tr;
7506         }
7507
7508         ftrace_init_trace_array(tr);
7509
7510         init_tracer_tracefs(tr, tr->dir);
7511         init_trace_flags_index(tr);
7512         __update_tracer_options(tr);
7513
7514         list_add(&tr->list, &ftrace_trace_arrays);
7515
7516         mutex_unlock(&trace_types_lock);
7517
7518         return 0;
7519
7520  out_free_tr:
7521         free_trace_buffers(tr);
7522         free_cpumask_var(tr->tracing_cpumask);
7523         kfree(tr->name);
7524         kfree(tr);
7525
7526  out_unlock:
7527         mutex_unlock(&trace_types_lock);
7528
7529         return ret;
7530
7531 }
7532
7533 static int instance_rmdir(const char *name)
7534 {
7535         struct trace_array *tr;
7536         int found = 0;
7537         int ret;
7538         int i;
7539
7540         mutex_lock(&trace_types_lock);
7541
7542         ret = -ENODEV;
7543         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7544                 if (tr->name && strcmp(tr->name, name) == 0) {
7545                         found = 1;
7546                         break;
7547                 }
7548         }
7549         if (!found)
7550                 goto out_unlock;
7551
7552         ret = -EBUSY;
7553         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7554                 goto out_unlock;
7555
7556         list_del(&tr->list);
7557
7558         /* Disable all the flags that were enabled coming in */
7559         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7560                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7561                         set_tracer_flag(tr, 1 << i, 0);
7562         }
7563
7564         tracing_set_nop(tr);
7565         clear_ftrace_function_probes(tr);
7566         event_trace_del_tracer(tr);
7567         ftrace_clear_pids(tr);
7568         ftrace_destroy_function_files(tr);
7569         tracefs_remove_recursive(tr->dir);
7570         free_trace_buffers(tr);
7571
7572         for (i = 0; i < tr->nr_topts; i++) {
7573                 kfree(tr->topts[i].topts);
7574         }
7575         kfree(tr->topts);
7576
7577         kfree(tr->name);
7578         kfree(tr);
7579
7580         ret = 0;
7581
7582  out_unlock:
7583         mutex_unlock(&trace_types_lock);
7584
7585         return ret;
7586 }
7587
7588 static __init void create_trace_instances(struct dentry *d_tracer)
7589 {
7590         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7591                                                          instance_mkdir,
7592                                                          instance_rmdir);
7593         if (WARN_ON(!trace_instance_dir))
7594                 return;
7595 }
7596
7597 static void
7598 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7599 {
7600         int cpu;
7601
7602         trace_create_file("available_tracers", 0444, d_tracer,
7603                         tr, &show_traces_fops);
7604
7605         trace_create_file("current_tracer", 0644, d_tracer,
7606                         tr, &set_tracer_fops);
7607
7608         trace_create_file("tracing_cpumask", 0644, d_tracer,
7609                           tr, &tracing_cpumask_fops);
7610
7611         trace_create_file("trace_options", 0644, d_tracer,
7612                           tr, &tracing_iter_fops);
7613
7614         trace_create_file("trace", 0644, d_tracer,
7615                           tr, &tracing_fops);
7616
7617         trace_create_file("trace_pipe", 0444, d_tracer,
7618                           tr, &tracing_pipe_fops);
7619
7620         trace_create_file("buffer_size_kb", 0644, d_tracer,
7621                           tr, &tracing_entries_fops);
7622
7623         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7624                           tr, &tracing_total_entries_fops);
7625
7626         trace_create_file("free_buffer", 0200, d_tracer,
7627                           tr, &tracing_free_buffer_fops);
7628
7629         trace_create_file("trace_marker", 0220, d_tracer,
7630                           tr, &tracing_mark_fops);
7631
7632         trace_create_file("trace_marker_raw", 0220, d_tracer,
7633                           tr, &tracing_mark_raw_fops);
7634
7635         trace_create_file("trace_clock", 0644, d_tracer, tr,
7636                           &trace_clock_fops);
7637
7638         trace_create_file("tracing_on", 0644, d_tracer,
7639                           tr, &rb_simple_fops);
7640
7641         create_trace_options_dir(tr);
7642
7643 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7644         trace_create_file("tracing_max_latency", 0644, d_tracer,
7645                         &tr->max_latency, &tracing_max_lat_fops);
7646 #endif
7647
7648         if (ftrace_create_function_files(tr, d_tracer))
7649                 WARN(1, "Could not allocate function filter files");
7650
7651 #ifdef CONFIG_TRACER_SNAPSHOT
7652         trace_create_file("snapshot", 0644, d_tracer,
7653                           tr, &snapshot_fops);
7654 #endif
7655
7656         for_each_tracing_cpu(cpu)
7657                 tracing_init_tracefs_percpu(tr, cpu);
7658
7659         ftrace_init_tracefs(tr, d_tracer);
7660 }
7661
7662 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7663 {
7664         struct vfsmount *mnt;
7665         struct file_system_type *type;
7666
7667         /*
7668          * To maintain backward compatibility for tools that mount
7669          * debugfs to get to the tracing facility, tracefs is automatically
7670          * mounted to the debugfs/tracing directory.
7671          */
7672         type = get_fs_type("tracefs");
7673         if (!type)
7674                 return NULL;
7675         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7676         put_filesystem(type);
7677         if (IS_ERR(mnt))
7678                 return NULL;
7679         mntget(mnt);
7680
7681         return mnt;
7682 }
7683
7684 /**
7685  * tracing_init_dentry - initialize top level trace array
7686  *
7687  * This is called when creating files or directories in the tracing
7688  * directory. It is called via fs_initcall() by any of the boot up code
7689  * and expects to return the dentry of the top level tracing directory.
7690  */
7691 struct dentry *tracing_init_dentry(void)
7692 {
7693         struct trace_array *tr = &global_trace;
7694
7695         /* The top level trace array uses  NULL as parent */
7696         if (tr->dir)
7697                 return NULL;
7698
7699         if (WARN_ON(!tracefs_initialized()) ||
7700                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7701                  WARN_ON(!debugfs_initialized())))
7702                 return ERR_PTR(-ENODEV);
7703
7704         /*
7705          * As there may still be users that expect the tracing
7706          * files to exist in debugfs/tracing, we must automount
7707          * the tracefs file system there, so older tools still
7708          * work with the newer kerenl.
7709          */
7710         tr->dir = debugfs_create_automount("tracing", NULL,
7711                                            trace_automount, NULL);
7712         if (!tr->dir) {
7713                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7714                 return ERR_PTR(-ENOMEM);
7715         }
7716
7717         return NULL;
7718 }
7719
7720 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7721 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7722
7723 static void __init trace_eval_init(void)
7724 {
7725         int len;
7726
7727         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7728         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7729 }
7730
7731 #ifdef CONFIG_MODULES
7732 static void trace_module_add_evals(struct module *mod)
7733 {
7734         if (!mod->num_trace_evals)
7735                 return;
7736
7737         /*
7738          * Modules with bad taint do not have events created, do
7739          * not bother with enums either.
7740          */
7741         if (trace_module_has_bad_taint(mod))
7742                 return;
7743
7744         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7745 }
7746
7747 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7748 static void trace_module_remove_evals(struct module *mod)
7749 {
7750         union trace_eval_map_item *map;
7751         union trace_eval_map_item **last = &trace_eval_maps;
7752
7753         if (!mod->num_trace_evals)
7754                 return;
7755
7756         mutex_lock(&trace_eval_mutex);
7757
7758         map = trace_eval_maps;
7759
7760         while (map) {
7761                 if (map->head.mod == mod)
7762                         break;
7763                 map = trace_eval_jmp_to_tail(map);
7764                 last = &map->tail.next;
7765                 map = map->tail.next;
7766         }
7767         if (!map)
7768                 goto out;
7769
7770         *last = trace_eval_jmp_to_tail(map)->tail.next;
7771         kfree(map);
7772  out:
7773         mutex_unlock(&trace_eval_mutex);
7774 }
7775 #else
7776 static inline void trace_module_remove_evals(struct module *mod) { }
7777 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
7778
7779 static int trace_module_notify(struct notifier_block *self,
7780                                unsigned long val, void *data)
7781 {
7782         struct module *mod = data;
7783
7784         switch (val) {
7785         case MODULE_STATE_COMING:
7786                 trace_module_add_evals(mod);
7787                 break;
7788         case MODULE_STATE_GOING:
7789                 trace_module_remove_evals(mod);
7790                 break;
7791         }
7792
7793         return 0;
7794 }
7795
7796 static struct notifier_block trace_module_nb = {
7797         .notifier_call = trace_module_notify,
7798         .priority = 0,
7799 };
7800 #endif /* CONFIG_MODULES */
7801
7802 static __init int tracer_init_tracefs(void)
7803 {
7804         struct dentry *d_tracer;
7805
7806         trace_access_lock_init();
7807
7808         d_tracer = tracing_init_dentry();
7809         if (IS_ERR(d_tracer))
7810                 return 0;
7811
7812         init_tracer_tracefs(&global_trace, d_tracer);
7813         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7814
7815         trace_create_file("tracing_thresh", 0644, d_tracer,
7816                         &global_trace, &tracing_thresh_fops);
7817
7818         trace_create_file("README", 0444, d_tracer,
7819                         NULL, &tracing_readme_fops);
7820
7821         trace_create_file("saved_cmdlines", 0444, d_tracer,
7822                         NULL, &tracing_saved_cmdlines_fops);
7823
7824         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7825                           NULL, &tracing_saved_cmdlines_size_fops);
7826
7827         trace_eval_init();
7828
7829         trace_create_eval_file(d_tracer);
7830
7831 #ifdef CONFIG_MODULES
7832         register_module_notifier(&trace_module_nb);
7833 #endif
7834
7835 #ifdef CONFIG_DYNAMIC_FTRACE
7836         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7837                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7838 #endif
7839
7840         create_trace_instances(d_tracer);
7841
7842         update_tracer_options(&global_trace);
7843
7844         return 0;
7845 }
7846
7847 static int trace_panic_handler(struct notifier_block *this,
7848                                unsigned long event, void *unused)
7849 {
7850         if (ftrace_dump_on_oops)
7851                 ftrace_dump(ftrace_dump_on_oops);
7852         return NOTIFY_OK;
7853 }
7854
7855 static struct notifier_block trace_panic_notifier = {
7856         .notifier_call  = trace_panic_handler,
7857         .next           = NULL,
7858         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
7859 };
7860
7861 static int trace_die_handler(struct notifier_block *self,
7862                              unsigned long val,
7863                              void *data)
7864 {
7865         switch (val) {
7866         case DIE_OOPS:
7867                 if (ftrace_dump_on_oops)
7868                         ftrace_dump(ftrace_dump_on_oops);
7869                 break;
7870         default:
7871                 break;
7872         }
7873         return NOTIFY_OK;
7874 }
7875
7876 static struct notifier_block trace_die_notifier = {
7877         .notifier_call = trace_die_handler,
7878         .priority = 200
7879 };
7880
7881 /*
7882  * printk is set to max of 1024, we really don't need it that big.
7883  * Nothing should be printing 1000 characters anyway.
7884  */
7885 #define TRACE_MAX_PRINT         1000
7886
7887 /*
7888  * Define here KERN_TRACE so that we have one place to modify
7889  * it if we decide to change what log level the ftrace dump
7890  * should be at.
7891  */
7892 #define KERN_TRACE              KERN_EMERG
7893
7894 void
7895 trace_printk_seq(struct trace_seq *s)
7896 {
7897         /* Probably should print a warning here. */
7898         if (s->seq.len >= TRACE_MAX_PRINT)
7899                 s->seq.len = TRACE_MAX_PRINT;
7900
7901         /*
7902          * More paranoid code. Although the buffer size is set to
7903          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7904          * an extra layer of protection.
7905          */
7906         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7907                 s->seq.len = s->seq.size - 1;
7908
7909         /* should be zero ended, but we are paranoid. */
7910         s->buffer[s->seq.len] = 0;
7911
7912         printk(KERN_TRACE "%s", s->buffer);
7913
7914         trace_seq_init(s);
7915 }
7916
7917 void trace_init_global_iter(struct trace_iterator *iter)
7918 {
7919         iter->tr = &global_trace;
7920         iter->trace = iter->tr->current_trace;
7921         iter->cpu_file = RING_BUFFER_ALL_CPUS;
7922         iter->trace_buffer = &global_trace.trace_buffer;
7923
7924         if (iter->trace && iter->trace->open)
7925                 iter->trace->open(iter);
7926
7927         /* Annotate start of buffers if we had overruns */
7928         if (ring_buffer_overruns(iter->trace_buffer->buffer))
7929                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7930
7931         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7932         if (trace_clocks[iter->tr->clock_id].in_ns)
7933                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7934 }
7935
7936 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7937 {
7938         /* use static because iter can be a bit big for the stack */
7939         static struct trace_iterator iter;
7940         static atomic_t dump_running;
7941         struct trace_array *tr = &global_trace;
7942         unsigned int old_userobj;
7943         unsigned long flags;
7944         int cnt = 0, cpu;
7945
7946         /* Only allow one dump user at a time. */
7947         if (atomic_inc_return(&dump_running) != 1) {
7948                 atomic_dec(&dump_running);
7949                 return;
7950         }
7951
7952         /*
7953          * Always turn off tracing when we dump.
7954          * We don't need to show trace output of what happens
7955          * between multiple crashes.
7956          *
7957          * If the user does a sysrq-z, then they can re-enable
7958          * tracing with echo 1 > tracing_on.
7959          */
7960         tracing_off();
7961
7962         local_irq_save(flags);
7963
7964         /* Simulate the iterator */
7965         trace_init_global_iter(&iter);
7966
7967         for_each_tracing_cpu(cpu) {
7968                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7969         }
7970
7971         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7972
7973         /* don't look at user memory in panic mode */
7974         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7975
7976         switch (oops_dump_mode) {
7977         case DUMP_ALL:
7978                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7979                 break;
7980         case DUMP_ORIG:
7981                 iter.cpu_file = raw_smp_processor_id();
7982                 break;
7983         case DUMP_NONE:
7984                 goto out_enable;
7985         default:
7986                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7987                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7988         }
7989
7990         printk(KERN_TRACE "Dumping ftrace buffer:\n");
7991
7992         /* Did function tracer already get disabled? */
7993         if (ftrace_is_dead()) {
7994                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7995                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
7996         }
7997
7998         /*
7999          * We need to stop all tracing on all CPUS to read the
8000          * the next buffer. This is a bit expensive, but is
8001          * not done often. We fill all what we can read,
8002          * and then release the locks again.
8003          */
8004
8005         while (!trace_empty(&iter)) {
8006
8007                 if (!cnt)
8008                         printk(KERN_TRACE "---------------------------------\n");
8009
8010                 cnt++;
8011
8012                 /* reset all but tr, trace, and overruns */
8013                 memset(&iter.seq, 0,
8014                        sizeof(struct trace_iterator) -
8015                        offsetof(struct trace_iterator, seq));
8016                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8017                 iter.pos = -1;
8018
8019                 if (trace_find_next_entry_inc(&iter) != NULL) {
8020                         int ret;
8021
8022                         ret = print_trace_line(&iter);
8023                         if (ret != TRACE_TYPE_NO_CONSUME)
8024                                 trace_consume(&iter);
8025                 }
8026                 touch_nmi_watchdog();
8027
8028                 trace_printk_seq(&iter.seq);
8029         }
8030
8031         if (!cnt)
8032                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8033         else
8034                 printk(KERN_TRACE "---------------------------------\n");
8035
8036  out_enable:
8037         tr->trace_flags |= old_userobj;
8038
8039         for_each_tracing_cpu(cpu) {
8040                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8041         }
8042         atomic_dec(&dump_running);
8043         local_irq_restore(flags);
8044 }
8045 EXPORT_SYMBOL_GPL(ftrace_dump);
8046
8047 __init static int tracer_alloc_buffers(void)
8048 {
8049         int ring_buf_size;
8050         int ret = -ENOMEM;
8051
8052         /*
8053          * Make sure we don't accidently add more trace options
8054          * than we have bits for.
8055          */
8056         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8057
8058         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8059                 goto out;
8060
8061         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8062                 goto out_free_buffer_mask;
8063
8064         /* Only allocate trace_printk buffers if a trace_printk exists */
8065         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8066                 /* Must be called before global_trace.buffer is allocated */
8067                 trace_printk_init_buffers();
8068
8069         /* To save memory, keep the ring buffer size to its minimum */
8070         if (ring_buffer_expanded)
8071                 ring_buf_size = trace_buf_size;
8072         else
8073                 ring_buf_size = 1;
8074
8075         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8076         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8077
8078         raw_spin_lock_init(&global_trace.start_lock);
8079
8080         /*
8081          * The prepare callbacks allocates some memory for the ring buffer. We
8082          * don't free the buffer if the if the CPU goes down. If we were to free
8083          * the buffer, then the user would lose any trace that was in the
8084          * buffer. The memory will be removed once the "instance" is removed.
8085          */
8086         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8087                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8088                                       NULL);
8089         if (ret < 0)
8090                 goto out_free_cpumask;
8091         /* Used for event triggers */
8092         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8093         if (!temp_buffer)
8094                 goto out_rm_hp_state;
8095
8096         if (trace_create_savedcmd() < 0)
8097                 goto out_free_temp_buffer;
8098
8099         /* TODO: make the number of buffers hot pluggable with CPUS */
8100         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8101                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8102                 WARN_ON(1);
8103                 goto out_free_savedcmd;
8104         }
8105
8106         if (global_trace.buffer_disabled)
8107                 tracing_off();
8108
8109         if (trace_boot_clock) {
8110                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8111                 if (ret < 0)
8112                         pr_warn("Trace clock %s not defined, going back to default\n",
8113                                 trace_boot_clock);
8114         }
8115
8116         /*
8117          * register_tracer() might reference current_trace, so it
8118          * needs to be set before we register anything. This is
8119          * just a bootstrap of current_trace anyway.
8120          */
8121         global_trace.current_trace = &nop_trace;
8122
8123         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8124
8125         ftrace_init_global_array_ops(&global_trace);
8126
8127         init_trace_flags_index(&global_trace);
8128
8129         register_tracer(&nop_trace);
8130
8131         /* Function tracing may start here (via kernel command line) */
8132         init_function_trace();
8133
8134         /* All seems OK, enable tracing */
8135         tracing_disabled = 0;
8136
8137         atomic_notifier_chain_register(&panic_notifier_list,
8138                                        &trace_panic_notifier);
8139
8140         register_die_notifier(&trace_die_notifier);
8141
8142         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8143
8144         INIT_LIST_HEAD(&global_trace.systems);
8145         INIT_LIST_HEAD(&global_trace.events);
8146         list_add(&global_trace.list, &ftrace_trace_arrays);
8147
8148         apply_trace_boot_options();
8149
8150         register_snapshot_cmd();
8151
8152         return 0;
8153
8154 out_free_savedcmd:
8155         free_saved_cmdlines_buffer(savedcmd);
8156 out_free_temp_buffer:
8157         ring_buffer_free(temp_buffer);
8158 out_rm_hp_state:
8159         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8160 out_free_cpumask:
8161         free_cpumask_var(global_trace.tracing_cpumask);
8162 out_free_buffer_mask:
8163         free_cpumask_var(tracing_buffer_mask);
8164 out:
8165         return ret;
8166 }
8167
8168 void __init early_trace_init(void)
8169 {
8170         if (tracepoint_printk) {
8171                 tracepoint_print_iter =
8172                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8173                 if (WARN_ON(!tracepoint_print_iter))
8174                         tracepoint_printk = 0;
8175                 else
8176                         static_key_enable(&tracepoint_printk_key.key);
8177         }
8178         tracer_alloc_buffers();
8179 }
8180
8181 void __init trace_init(void)
8182 {
8183         trace_event_init();
8184 }
8185
8186 __init static int clear_boot_tracer(void)
8187 {
8188         /*
8189          * The default tracer at boot buffer is an init section.
8190          * This function is called in lateinit. If we did not
8191          * find the boot tracer, then clear it out, to prevent
8192          * later registration from accessing the buffer that is
8193          * about to be freed.
8194          */
8195         if (!default_bootup_tracer)
8196                 return 0;
8197
8198         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8199                default_bootup_tracer);
8200         default_bootup_tracer = NULL;
8201
8202         return 0;
8203 }
8204
8205 fs_initcall(tracer_init_tracefs);
8206 late_initcall(clear_boot_tracer);