treewide: kmalloc() -> kmalloc_array()
[linux-block.git] / kernel / trace / trace.c
1 /*
2  * ring buffer based function tracer
3  *
4  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
6  *
7  * Originally taken from the RT patch by:
8  *    Arnaldo Carvalho de Melo <acme@redhat.com>
9  *
10  * Based on code from the latency_tracer, that is:
11  *  Copyright (C) 2004-2006 Ingo Molnar
12  *  Copyright (C) 2004 Nadia Yvette Chambers
13  */
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
42 #include <linux/fs.h>
43 #include <linux/trace.h>
44 #include <linux/sched/clock.h>
45 #include <linux/sched/rt.h>
46
47 #include "trace.h"
48 #include "trace_output.h"
49
50 /*
51  * On boot up, the ring buffer is set to the minimum size, so that
52  * we do not waste memory on systems that are not using tracing.
53  */
54 bool ring_buffer_expanded;
55
56 /*
57  * We need to change this state when a selftest is running.
58  * A selftest will lurk into the ring-buffer to count the
59  * entries inserted during the selftest although some concurrent
60  * insertions into the ring-buffer such as trace_printk could occurred
61  * at the same time, giving false positive or negative results.
62  */
63 static bool __read_mostly tracing_selftest_running;
64
65 /*
66  * If a tracer is running, we do not want to run SELFTEST.
67  */
68 bool __read_mostly tracing_selftest_disabled;
69
70 /* Pipe tracepoints to printk */
71 struct trace_iterator *tracepoint_print_iter;
72 int tracepoint_printk;
73 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74
75 /* For tracers that don't implement custom flags */
76 static struct tracer_opt dummy_tracer_opt[] = {
77         { }
78 };
79
80 static int
81 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
82 {
83         return 0;
84 }
85
86 /*
87  * To prevent the comm cache from being overwritten when no
88  * tracing is active, only save the comm when a trace event
89  * occurred.
90  */
91 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
92
93 /*
94  * Kill all tracing for good (never come back).
95  * It is initialized to 1 but will turn to zero if the initialization
96  * of the tracer is successful. But that is the only place that sets
97  * this back to zero.
98  */
99 static int tracing_disabled = 1;
100
101 cpumask_var_t __read_mostly     tracing_buffer_mask;
102
103 /*
104  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105  *
106  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
107  * is set, then ftrace_dump is called. This will output the contents
108  * of the ftrace buffers to the console.  This is very useful for
109  * capturing traces that lead to crashes and outputing it to a
110  * serial console.
111  *
112  * It is default off, but you can enable it with either specifying
113  * "ftrace_dump_on_oops" in the kernel command line, or setting
114  * /proc/sys/kernel/ftrace_dump_on_oops
115  * Set 1 if you want to dump buffers of all CPUs
116  * Set 2 if you want to dump the buffer of the CPU that triggered oops
117  */
118
119 enum ftrace_dump_mode ftrace_dump_on_oops;
120
121 /* When set, tracing will stop when a WARN*() is hit */
122 int __disable_trace_on_warning;
123
124 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
125 /* Map of enums to their values, for "eval_map" file */
126 struct trace_eval_map_head {
127         struct module                   *mod;
128         unsigned long                   length;
129 };
130
131 union trace_eval_map_item;
132
133 struct trace_eval_map_tail {
134         /*
135          * "end" is first and points to NULL as it must be different
136          * than "mod" or "eval_string"
137          */
138         union trace_eval_map_item       *next;
139         const char                      *end;   /* points to NULL */
140 };
141
142 static DEFINE_MUTEX(trace_eval_mutex);
143
144 /*
145  * The trace_eval_maps are saved in an array with two extra elements,
146  * one at the beginning, and one at the end. The beginning item contains
147  * the count of the saved maps (head.length), and the module they
148  * belong to if not built in (head.mod). The ending item contains a
149  * pointer to the next array of saved eval_map items.
150  */
151 union trace_eval_map_item {
152         struct trace_eval_map           map;
153         struct trace_eval_map_head      head;
154         struct trace_eval_map_tail      tail;
155 };
156
157 static union trace_eval_map_item *trace_eval_maps;
158 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159
160 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161
162 #define MAX_TRACER_SIZE         100
163 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
164 static char *default_bootup_tracer;
165
166 static bool allocate_snapshot;
167
168 static int __init set_cmdline_ftrace(char *str)
169 {
170         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
171         default_bootup_tracer = bootup_tracer_buf;
172         /* We are using ftrace early, expand it */
173         ring_buffer_expanded = true;
174         return 1;
175 }
176 __setup("ftrace=", set_cmdline_ftrace);
177
178 static int __init set_ftrace_dump_on_oops(char *str)
179 {
180         if (*str++ != '=' || !*str) {
181                 ftrace_dump_on_oops = DUMP_ALL;
182                 return 1;
183         }
184
185         if (!strcmp("orig_cpu", str)) {
186                 ftrace_dump_on_oops = DUMP_ORIG;
187                 return 1;
188         }
189
190         return 0;
191 }
192 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193
194 static int __init stop_trace_on_warning(char *str)
195 {
196         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
197                 __disable_trace_on_warning = 1;
198         return 1;
199 }
200 __setup("traceoff_on_warning", stop_trace_on_warning);
201
202 static int __init boot_alloc_snapshot(char *str)
203 {
204         allocate_snapshot = true;
205         /* We also need the main ring buffer expanded */
206         ring_buffer_expanded = true;
207         return 1;
208 }
209 __setup("alloc_snapshot", boot_alloc_snapshot);
210
211
212 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213
214 static int __init set_trace_boot_options(char *str)
215 {
216         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
217         return 0;
218 }
219 __setup("trace_options=", set_trace_boot_options);
220
221 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
222 static char *trace_boot_clock __initdata;
223
224 static int __init set_trace_boot_clock(char *str)
225 {
226         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
227         trace_boot_clock = trace_boot_clock_buf;
228         return 0;
229 }
230 __setup("trace_clock=", set_trace_boot_clock);
231
232 static int __init set_tracepoint_printk(char *str)
233 {
234         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
235                 tracepoint_printk = 1;
236         return 1;
237 }
238 __setup("tp_printk", set_tracepoint_printk);
239
240 unsigned long long ns2usecs(u64 nsec)
241 {
242         nsec += 500;
243         do_div(nsec, 1000);
244         return nsec;
245 }
246
247 /* trace_flags holds trace_options default values */
248 #define TRACE_DEFAULT_FLAGS                                             \
249         (FUNCTION_DEFAULT_FLAGS |                                       \
250          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
251          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
252          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
253          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254
255 /* trace_options that are only supported by global_trace */
256 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
257                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258
259 /* trace_flags that are default zero for instances */
260 #define ZEROED_TRACE_FLAGS \
261         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
262
263 /*
264  * The global_trace is the descriptor that holds the top-level tracing
265  * buffers for the live tracing.
266  */
267 static struct trace_array global_trace = {
268         .trace_flags = TRACE_DEFAULT_FLAGS,
269 };
270
271 LIST_HEAD(ftrace_trace_arrays);
272
273 int trace_array_get(struct trace_array *this_tr)
274 {
275         struct trace_array *tr;
276         int ret = -ENODEV;
277
278         mutex_lock(&trace_types_lock);
279         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
280                 if (tr == this_tr) {
281                         tr->ref++;
282                         ret = 0;
283                         break;
284                 }
285         }
286         mutex_unlock(&trace_types_lock);
287
288         return ret;
289 }
290
291 static void __trace_array_put(struct trace_array *this_tr)
292 {
293         WARN_ON(!this_tr->ref);
294         this_tr->ref--;
295 }
296
297 void trace_array_put(struct trace_array *this_tr)
298 {
299         mutex_lock(&trace_types_lock);
300         __trace_array_put(this_tr);
301         mutex_unlock(&trace_types_lock);
302 }
303
304 int call_filter_check_discard(struct trace_event_call *call, void *rec,
305                               struct ring_buffer *buffer,
306                               struct ring_buffer_event *event)
307 {
308         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
309             !filter_match_preds(call->filter, rec)) {
310                 __trace_event_discard_commit(buffer, event);
311                 return 1;
312         }
313
314         return 0;
315 }
316
317 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 {
319         vfree(pid_list->pids);
320         kfree(pid_list);
321 }
322
323 /**
324  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
325  * @filtered_pids: The list of pids to check
326  * @search_pid: The PID to find in @filtered_pids
327  *
328  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
329  */
330 bool
331 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
332 {
333         /*
334          * If pid_max changed after filtered_pids was created, we
335          * by default ignore all pids greater than the previous pid_max.
336          */
337         if (search_pid >= filtered_pids->pid_max)
338                 return false;
339
340         return test_bit(search_pid, filtered_pids->pids);
341 }
342
343 /**
344  * trace_ignore_this_task - should a task be ignored for tracing
345  * @filtered_pids: The list of pids to check
346  * @task: The task that should be ignored if not filtered
347  *
348  * Checks if @task should be traced or not from @filtered_pids.
349  * Returns true if @task should *NOT* be traced.
350  * Returns false if @task should be traced.
351  */
352 bool
353 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
354 {
355         /*
356          * Return false, because if filtered_pids does not exist,
357          * all pids are good to trace.
358          */
359         if (!filtered_pids)
360                 return false;
361
362         return !trace_find_filtered_pid(filtered_pids, task->pid);
363 }
364
365 /**
366  * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
367  * @pid_list: The list to modify
368  * @self: The current task for fork or NULL for exit
369  * @task: The task to add or remove
370  *
371  * If adding a task, if @self is defined, the task is only added if @self
372  * is also included in @pid_list. This happens on fork and tasks should
373  * only be added when the parent is listed. If @self is NULL, then the
374  * @task pid will be removed from the list, which would happen on exit
375  * of a task.
376  */
377 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
378                                   struct task_struct *self,
379                                   struct task_struct *task)
380 {
381         if (!pid_list)
382                 return;
383
384         /* For forks, we only add if the forking task is listed */
385         if (self) {
386                 if (!trace_find_filtered_pid(pid_list, self->pid))
387                         return;
388         }
389
390         /* Sorry, but we don't support pid_max changing after setting */
391         if (task->pid >= pid_list->pid_max)
392                 return;
393
394         /* "self" is set for forks, and NULL for exits */
395         if (self)
396                 set_bit(task->pid, pid_list->pids);
397         else
398                 clear_bit(task->pid, pid_list->pids);
399 }
400
401 /**
402  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
403  * @pid_list: The pid list to show
404  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
405  * @pos: The position of the file
406  *
407  * This is used by the seq_file "next" operation to iterate the pids
408  * listed in a trace_pid_list structure.
409  *
410  * Returns the pid+1 as we want to display pid of zero, but NULL would
411  * stop the iteration.
412  */
413 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 {
415         unsigned long pid = (unsigned long)v;
416
417         (*pos)++;
418
419         /* pid already is +1 of the actual prevous bit */
420         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421
422         /* Return pid + 1 to allow zero to be represented */
423         if (pid < pid_list->pid_max)
424                 return (void *)(pid + 1);
425
426         return NULL;
427 }
428
429 /**
430  * trace_pid_start - Used for seq_file to start reading pid lists
431  * @pid_list: The pid list to show
432  * @pos: The position of the file
433  *
434  * This is used by seq_file "start" operation to start the iteration
435  * of listing pids.
436  *
437  * Returns the pid+1 as we want to display pid of zero, but NULL would
438  * stop the iteration.
439  */
440 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
441 {
442         unsigned long pid;
443         loff_t l = 0;
444
445         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
446         if (pid >= pid_list->pid_max)
447                 return NULL;
448
449         /* Return pid + 1 so that zero can be the exit value */
450         for (pid++; pid && l < *pos;
451              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
452                 ;
453         return (void *)pid;
454 }
455
456 /**
457  * trace_pid_show - show the current pid in seq_file processing
458  * @m: The seq_file structure to write into
459  * @v: A void pointer of the pid (+1) value to display
460  *
461  * Can be directly used by seq_file operations to display the current
462  * pid value.
463  */
464 int trace_pid_show(struct seq_file *m, void *v)
465 {
466         unsigned long pid = (unsigned long)v - 1;
467
468         seq_printf(m, "%lu\n", pid);
469         return 0;
470 }
471
472 /* 128 should be much more than enough */
473 #define PID_BUF_SIZE            127
474
475 int trace_pid_write(struct trace_pid_list *filtered_pids,
476                     struct trace_pid_list **new_pid_list,
477                     const char __user *ubuf, size_t cnt)
478 {
479         struct trace_pid_list *pid_list;
480         struct trace_parser parser;
481         unsigned long val;
482         int nr_pids = 0;
483         ssize_t read = 0;
484         ssize_t ret = 0;
485         loff_t pos;
486         pid_t pid;
487
488         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
489                 return -ENOMEM;
490
491         /*
492          * Always recreate a new array. The write is an all or nothing
493          * operation. Always create a new array when adding new pids by
494          * the user. If the operation fails, then the current list is
495          * not modified.
496          */
497         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
498         if (!pid_list)
499                 return -ENOMEM;
500
501         pid_list->pid_max = READ_ONCE(pid_max);
502
503         /* Only truncating will shrink pid_max */
504         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
505                 pid_list->pid_max = filtered_pids->pid_max;
506
507         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
508         if (!pid_list->pids) {
509                 kfree(pid_list);
510                 return -ENOMEM;
511         }
512
513         if (filtered_pids) {
514                 /* copy the current bits to the new max */
515                 for_each_set_bit(pid, filtered_pids->pids,
516                                  filtered_pids->pid_max) {
517                         set_bit(pid, pid_list->pids);
518                         nr_pids++;
519                 }
520         }
521
522         while (cnt > 0) {
523
524                 pos = 0;
525
526                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
527                 if (ret < 0 || !trace_parser_loaded(&parser))
528                         break;
529
530                 read += ret;
531                 ubuf += ret;
532                 cnt -= ret;
533
534                 ret = -EINVAL;
535                 if (kstrtoul(parser.buffer, 0, &val))
536                         break;
537                 if (val >= pid_list->pid_max)
538                         break;
539
540                 pid = (pid_t)val;
541
542                 set_bit(pid, pid_list->pids);
543                 nr_pids++;
544
545                 trace_parser_clear(&parser);
546                 ret = 0;
547         }
548         trace_parser_put(&parser);
549
550         if (ret < 0) {
551                 trace_free_pid_list(pid_list);
552                 return ret;
553         }
554
555         if (!nr_pids) {
556                 /* Cleared the list of pids */
557                 trace_free_pid_list(pid_list);
558                 read = ret;
559                 pid_list = NULL;
560         }
561
562         *new_pid_list = pid_list;
563
564         return read;
565 }
566
567 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
568 {
569         u64 ts;
570
571         /* Early boot up does not have a buffer yet */
572         if (!buf->buffer)
573                 return trace_clock_local();
574
575         ts = ring_buffer_time_stamp(buf->buffer, cpu);
576         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
577
578         return ts;
579 }
580
581 u64 ftrace_now(int cpu)
582 {
583         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
584 }
585
586 /**
587  * tracing_is_enabled - Show if global_trace has been disabled
588  *
589  * Shows if the global trace has been enabled or not. It uses the
590  * mirror flag "buffer_disabled" to be used in fast paths such as for
591  * the irqsoff tracer. But it may be inaccurate due to races. If you
592  * need to know the accurate state, use tracing_is_on() which is a little
593  * slower, but accurate.
594  */
595 int tracing_is_enabled(void)
596 {
597         /*
598          * For quick access (irqsoff uses this in fast path), just
599          * return the mirror variable of the state of the ring buffer.
600          * It's a little racy, but we don't really care.
601          */
602         smp_rmb();
603         return !global_trace.buffer_disabled;
604 }
605
606 /*
607  * trace_buf_size is the size in bytes that is allocated
608  * for a buffer. Note, the number of bytes is always rounded
609  * to page size.
610  *
611  * This number is purposely set to a low number of 16384.
612  * If the dump on oops happens, it will be much appreciated
613  * to not have to wait for all that output. Anyway this can be
614  * boot time and run time configurable.
615  */
616 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
617
618 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
619
620 /* trace_types holds a link list of available tracers. */
621 static struct tracer            *trace_types __read_mostly;
622
623 /*
624  * trace_types_lock is used to protect the trace_types list.
625  */
626 DEFINE_MUTEX(trace_types_lock);
627
628 /*
629  * serialize the access of the ring buffer
630  *
631  * ring buffer serializes readers, but it is low level protection.
632  * The validity of the events (which returns by ring_buffer_peek() ..etc)
633  * are not protected by ring buffer.
634  *
635  * The content of events may become garbage if we allow other process consumes
636  * these events concurrently:
637  *   A) the page of the consumed events may become a normal page
638  *      (not reader page) in ring buffer, and this page will be rewrited
639  *      by events producer.
640  *   B) The page of the consumed events may become a page for splice_read,
641  *      and this page will be returned to system.
642  *
643  * These primitives allow multi process access to different cpu ring buffer
644  * concurrently.
645  *
646  * These primitives don't distinguish read-only and read-consume access.
647  * Multi read-only access are also serialized.
648  */
649
650 #ifdef CONFIG_SMP
651 static DECLARE_RWSEM(all_cpu_access_lock);
652 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
653
654 static inline void trace_access_lock(int cpu)
655 {
656         if (cpu == RING_BUFFER_ALL_CPUS) {
657                 /* gain it for accessing the whole ring buffer. */
658                 down_write(&all_cpu_access_lock);
659         } else {
660                 /* gain it for accessing a cpu ring buffer. */
661
662                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
663                 down_read(&all_cpu_access_lock);
664
665                 /* Secondly block other access to this @cpu ring buffer. */
666                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
667         }
668 }
669
670 static inline void trace_access_unlock(int cpu)
671 {
672         if (cpu == RING_BUFFER_ALL_CPUS) {
673                 up_write(&all_cpu_access_lock);
674         } else {
675                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
676                 up_read(&all_cpu_access_lock);
677         }
678 }
679
680 static inline void trace_access_lock_init(void)
681 {
682         int cpu;
683
684         for_each_possible_cpu(cpu)
685                 mutex_init(&per_cpu(cpu_access_lock, cpu));
686 }
687
688 #else
689
690 static DEFINE_MUTEX(access_lock);
691
692 static inline void trace_access_lock(int cpu)
693 {
694         (void)cpu;
695         mutex_lock(&access_lock);
696 }
697
698 static inline void trace_access_unlock(int cpu)
699 {
700         (void)cpu;
701         mutex_unlock(&access_lock);
702 }
703
704 static inline void trace_access_lock_init(void)
705 {
706 }
707
708 #endif
709
710 #ifdef CONFIG_STACKTRACE
711 static void __ftrace_trace_stack(struct ring_buffer *buffer,
712                                  unsigned long flags,
713                                  int skip, int pc, struct pt_regs *regs);
714 static inline void ftrace_trace_stack(struct trace_array *tr,
715                                       struct ring_buffer *buffer,
716                                       unsigned long flags,
717                                       int skip, int pc, struct pt_regs *regs);
718
719 #else
720 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
721                                         unsigned long flags,
722                                         int skip, int pc, struct pt_regs *regs)
723 {
724 }
725 static inline void ftrace_trace_stack(struct trace_array *tr,
726                                       struct ring_buffer *buffer,
727                                       unsigned long flags,
728                                       int skip, int pc, struct pt_regs *regs)
729 {
730 }
731
732 #endif
733
734 static __always_inline void
735 trace_event_setup(struct ring_buffer_event *event,
736                   int type, unsigned long flags, int pc)
737 {
738         struct trace_entry *ent = ring_buffer_event_data(event);
739
740         tracing_generic_entry_update(ent, flags, pc);
741         ent->type = type;
742 }
743
744 static __always_inline struct ring_buffer_event *
745 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
746                           int type,
747                           unsigned long len,
748                           unsigned long flags, int pc)
749 {
750         struct ring_buffer_event *event;
751
752         event = ring_buffer_lock_reserve(buffer, len);
753         if (event != NULL)
754                 trace_event_setup(event, type, flags, pc);
755
756         return event;
757 }
758
759 void tracer_tracing_on(struct trace_array *tr)
760 {
761         if (tr->trace_buffer.buffer)
762                 ring_buffer_record_on(tr->trace_buffer.buffer);
763         /*
764          * This flag is looked at when buffers haven't been allocated
765          * yet, or by some tracers (like irqsoff), that just want to
766          * know if the ring buffer has been disabled, but it can handle
767          * races of where it gets disabled but we still do a record.
768          * As the check is in the fast path of the tracers, it is more
769          * important to be fast than accurate.
770          */
771         tr->buffer_disabled = 0;
772         /* Make the flag seen by readers */
773         smp_wmb();
774 }
775
776 /**
777  * tracing_on - enable tracing buffers
778  *
779  * This function enables tracing buffers that may have been
780  * disabled with tracing_off.
781  */
782 void tracing_on(void)
783 {
784         tracer_tracing_on(&global_trace);
785 }
786 EXPORT_SYMBOL_GPL(tracing_on);
787
788
789 static __always_inline void
790 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
791 {
792         __this_cpu_write(trace_taskinfo_save, true);
793
794         /* If this is the temp buffer, we need to commit fully */
795         if (this_cpu_read(trace_buffered_event) == event) {
796                 /* Length is in event->array[0] */
797                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
798                 /* Release the temp buffer */
799                 this_cpu_dec(trace_buffered_event_cnt);
800         } else
801                 ring_buffer_unlock_commit(buffer, event);
802 }
803
804 /**
805  * __trace_puts - write a constant string into the trace buffer.
806  * @ip:    The address of the caller
807  * @str:   The constant string to write
808  * @size:  The size of the string.
809  */
810 int __trace_puts(unsigned long ip, const char *str, int size)
811 {
812         struct ring_buffer_event *event;
813         struct ring_buffer *buffer;
814         struct print_entry *entry;
815         unsigned long irq_flags;
816         int alloc;
817         int pc;
818
819         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
820                 return 0;
821
822         pc = preempt_count();
823
824         if (unlikely(tracing_selftest_running || tracing_disabled))
825                 return 0;
826
827         alloc = sizeof(*entry) + size + 2; /* possible \n added */
828
829         local_save_flags(irq_flags);
830         buffer = global_trace.trace_buffer.buffer;
831         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
832                                             irq_flags, pc);
833         if (!event)
834                 return 0;
835
836         entry = ring_buffer_event_data(event);
837         entry->ip = ip;
838
839         memcpy(&entry->buf, str, size);
840
841         /* Add a newline if necessary */
842         if (entry->buf[size - 1] != '\n') {
843                 entry->buf[size] = '\n';
844                 entry->buf[size + 1] = '\0';
845         } else
846                 entry->buf[size] = '\0';
847
848         __buffer_unlock_commit(buffer, event);
849         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
850
851         return size;
852 }
853 EXPORT_SYMBOL_GPL(__trace_puts);
854
855 /**
856  * __trace_bputs - write the pointer to a constant string into trace buffer
857  * @ip:    The address of the caller
858  * @str:   The constant string to write to the buffer to
859  */
860 int __trace_bputs(unsigned long ip, const char *str)
861 {
862         struct ring_buffer_event *event;
863         struct ring_buffer *buffer;
864         struct bputs_entry *entry;
865         unsigned long irq_flags;
866         int size = sizeof(struct bputs_entry);
867         int pc;
868
869         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
870                 return 0;
871
872         pc = preempt_count();
873
874         if (unlikely(tracing_selftest_running || tracing_disabled))
875                 return 0;
876
877         local_save_flags(irq_flags);
878         buffer = global_trace.trace_buffer.buffer;
879         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
880                                             irq_flags, pc);
881         if (!event)
882                 return 0;
883
884         entry = ring_buffer_event_data(event);
885         entry->ip                       = ip;
886         entry->str                      = str;
887
888         __buffer_unlock_commit(buffer, event);
889         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
890
891         return 1;
892 }
893 EXPORT_SYMBOL_GPL(__trace_bputs);
894
895 #ifdef CONFIG_TRACER_SNAPSHOT
896 void tracing_snapshot_instance(struct trace_array *tr)
897 {
898         struct tracer *tracer = tr->current_trace;
899         unsigned long flags;
900
901         if (in_nmi()) {
902                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
903                 internal_trace_puts("*** snapshot is being ignored        ***\n");
904                 return;
905         }
906
907         if (!tr->allocated_snapshot) {
908                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
909                 internal_trace_puts("*** stopping trace here!   ***\n");
910                 tracing_off();
911                 return;
912         }
913
914         /* Note, snapshot can not be used when the tracer uses it */
915         if (tracer->use_max_tr) {
916                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
917                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
918                 return;
919         }
920
921         local_irq_save(flags);
922         update_max_tr(tr, current, smp_processor_id());
923         local_irq_restore(flags);
924 }
925
926 /**
927  * tracing_snapshot - take a snapshot of the current buffer.
928  *
929  * This causes a swap between the snapshot buffer and the current live
930  * tracing buffer. You can use this to take snapshots of the live
931  * trace when some condition is triggered, but continue to trace.
932  *
933  * Note, make sure to allocate the snapshot with either
934  * a tracing_snapshot_alloc(), or by doing it manually
935  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
936  *
937  * If the snapshot buffer is not allocated, it will stop tracing.
938  * Basically making a permanent snapshot.
939  */
940 void tracing_snapshot(void)
941 {
942         struct trace_array *tr = &global_trace;
943
944         tracing_snapshot_instance(tr);
945 }
946 EXPORT_SYMBOL_GPL(tracing_snapshot);
947
948 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
949                                         struct trace_buffer *size_buf, int cpu_id);
950 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
951
952 int tracing_alloc_snapshot_instance(struct trace_array *tr)
953 {
954         int ret;
955
956         if (!tr->allocated_snapshot) {
957
958                 /* allocate spare buffer */
959                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
960                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
961                 if (ret < 0)
962                         return ret;
963
964                 tr->allocated_snapshot = true;
965         }
966
967         return 0;
968 }
969
970 static void free_snapshot(struct trace_array *tr)
971 {
972         /*
973          * We don't free the ring buffer. instead, resize it because
974          * The max_tr ring buffer has some state (e.g. ring->clock) and
975          * we want preserve it.
976          */
977         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
978         set_buffer_entries(&tr->max_buffer, 1);
979         tracing_reset_online_cpus(&tr->max_buffer);
980         tr->allocated_snapshot = false;
981 }
982
983 /**
984  * tracing_alloc_snapshot - allocate snapshot buffer.
985  *
986  * This only allocates the snapshot buffer if it isn't already
987  * allocated - it doesn't also take a snapshot.
988  *
989  * This is meant to be used in cases where the snapshot buffer needs
990  * to be set up for events that can't sleep but need to be able to
991  * trigger a snapshot.
992  */
993 int tracing_alloc_snapshot(void)
994 {
995         struct trace_array *tr = &global_trace;
996         int ret;
997
998         ret = tracing_alloc_snapshot_instance(tr);
999         WARN_ON(ret < 0);
1000
1001         return ret;
1002 }
1003 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1004
1005 /**
1006  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1007  *
1008  * This is similar to tracing_snapshot(), but it will allocate the
1009  * snapshot buffer if it isn't already allocated. Use this only
1010  * where it is safe to sleep, as the allocation may sleep.
1011  *
1012  * This causes a swap between the snapshot buffer and the current live
1013  * tracing buffer. You can use this to take snapshots of the live
1014  * trace when some condition is triggered, but continue to trace.
1015  */
1016 void tracing_snapshot_alloc(void)
1017 {
1018         int ret;
1019
1020         ret = tracing_alloc_snapshot();
1021         if (ret < 0)
1022                 return;
1023
1024         tracing_snapshot();
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1027 #else
1028 void tracing_snapshot(void)
1029 {
1030         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1031 }
1032 EXPORT_SYMBOL_GPL(tracing_snapshot);
1033 int tracing_alloc_snapshot(void)
1034 {
1035         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1036         return -ENODEV;
1037 }
1038 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1039 void tracing_snapshot_alloc(void)
1040 {
1041         /* Give warning */
1042         tracing_snapshot();
1043 }
1044 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1045 #endif /* CONFIG_TRACER_SNAPSHOT */
1046
1047 void tracer_tracing_off(struct trace_array *tr)
1048 {
1049         if (tr->trace_buffer.buffer)
1050                 ring_buffer_record_off(tr->trace_buffer.buffer);
1051         /*
1052          * This flag is looked at when buffers haven't been allocated
1053          * yet, or by some tracers (like irqsoff), that just want to
1054          * know if the ring buffer has been disabled, but it can handle
1055          * races of where it gets disabled but we still do a record.
1056          * As the check is in the fast path of the tracers, it is more
1057          * important to be fast than accurate.
1058          */
1059         tr->buffer_disabled = 1;
1060         /* Make the flag seen by readers */
1061         smp_wmb();
1062 }
1063
1064 /**
1065  * tracing_off - turn off tracing buffers
1066  *
1067  * This function stops the tracing buffers from recording data.
1068  * It does not disable any overhead the tracers themselves may
1069  * be causing. This function simply causes all recording to
1070  * the ring buffers to fail.
1071  */
1072 void tracing_off(void)
1073 {
1074         tracer_tracing_off(&global_trace);
1075 }
1076 EXPORT_SYMBOL_GPL(tracing_off);
1077
1078 void disable_trace_on_warning(void)
1079 {
1080         if (__disable_trace_on_warning)
1081                 tracing_off();
1082 }
1083
1084 /**
1085  * tracer_tracing_is_on - show real state of ring buffer enabled
1086  * @tr : the trace array to know if ring buffer is enabled
1087  *
1088  * Shows real state of the ring buffer if it is enabled or not.
1089  */
1090 int tracer_tracing_is_on(struct trace_array *tr)
1091 {
1092         if (tr->trace_buffer.buffer)
1093                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1094         return !tr->buffer_disabled;
1095 }
1096
1097 /**
1098  * tracing_is_on - show state of ring buffers enabled
1099  */
1100 int tracing_is_on(void)
1101 {
1102         return tracer_tracing_is_on(&global_trace);
1103 }
1104 EXPORT_SYMBOL_GPL(tracing_is_on);
1105
1106 static int __init set_buf_size(char *str)
1107 {
1108         unsigned long buf_size;
1109
1110         if (!str)
1111                 return 0;
1112         buf_size = memparse(str, &str);
1113         /* nr_entries can not be zero */
1114         if (buf_size == 0)
1115                 return 0;
1116         trace_buf_size = buf_size;
1117         return 1;
1118 }
1119 __setup("trace_buf_size=", set_buf_size);
1120
1121 static int __init set_tracing_thresh(char *str)
1122 {
1123         unsigned long threshold;
1124         int ret;
1125
1126         if (!str)
1127                 return 0;
1128         ret = kstrtoul(str, 0, &threshold);
1129         if (ret < 0)
1130                 return 0;
1131         tracing_thresh = threshold * 1000;
1132         return 1;
1133 }
1134 __setup("tracing_thresh=", set_tracing_thresh);
1135
1136 unsigned long nsecs_to_usecs(unsigned long nsecs)
1137 {
1138         return nsecs / 1000;
1139 }
1140
1141 /*
1142  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1143  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1144  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1145  * of strings in the order that the evals (enum) were defined.
1146  */
1147 #undef C
1148 #define C(a, b) b
1149
1150 /* These must match the bit postions in trace_iterator_flags */
1151 static const char *trace_options[] = {
1152         TRACE_FLAGS
1153         NULL
1154 };
1155
1156 static struct {
1157         u64 (*func)(void);
1158         const char *name;
1159         int in_ns;              /* is this clock in nanoseconds? */
1160 } trace_clocks[] = {
1161         { trace_clock_local,            "local",        1 },
1162         { trace_clock_global,           "global",       1 },
1163         { trace_clock_counter,          "counter",      0 },
1164         { trace_clock_jiffies,          "uptime",       0 },
1165         { trace_clock,                  "perf",         1 },
1166         { ktime_get_mono_fast_ns,       "mono",         1 },
1167         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1168         { ktime_get_boot_fast_ns,       "boot",         1 },
1169         ARCH_TRACE_CLOCKS
1170 };
1171
1172 bool trace_clock_in_ns(struct trace_array *tr)
1173 {
1174         if (trace_clocks[tr->clock_id].in_ns)
1175                 return true;
1176
1177         return false;
1178 }
1179
1180 /*
1181  * trace_parser_get_init - gets the buffer for trace parser
1182  */
1183 int trace_parser_get_init(struct trace_parser *parser, int size)
1184 {
1185         memset(parser, 0, sizeof(*parser));
1186
1187         parser->buffer = kmalloc(size, GFP_KERNEL);
1188         if (!parser->buffer)
1189                 return 1;
1190
1191         parser->size = size;
1192         return 0;
1193 }
1194
1195 /*
1196  * trace_parser_put - frees the buffer for trace parser
1197  */
1198 void trace_parser_put(struct trace_parser *parser)
1199 {
1200         kfree(parser->buffer);
1201         parser->buffer = NULL;
1202 }
1203
1204 /*
1205  * trace_get_user - reads the user input string separated by  space
1206  * (matched by isspace(ch))
1207  *
1208  * For each string found the 'struct trace_parser' is updated,
1209  * and the function returns.
1210  *
1211  * Returns number of bytes read.
1212  *
1213  * See kernel/trace/trace.h for 'struct trace_parser' details.
1214  */
1215 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1216         size_t cnt, loff_t *ppos)
1217 {
1218         char ch;
1219         size_t read = 0;
1220         ssize_t ret;
1221
1222         if (!*ppos)
1223                 trace_parser_clear(parser);
1224
1225         ret = get_user(ch, ubuf++);
1226         if (ret)
1227                 goto out;
1228
1229         read++;
1230         cnt--;
1231
1232         /*
1233          * The parser is not finished with the last write,
1234          * continue reading the user input without skipping spaces.
1235          */
1236         if (!parser->cont) {
1237                 /* skip white space */
1238                 while (cnt && isspace(ch)) {
1239                         ret = get_user(ch, ubuf++);
1240                         if (ret)
1241                                 goto out;
1242                         read++;
1243                         cnt--;
1244                 }
1245
1246                 parser->idx = 0;
1247
1248                 /* only spaces were written */
1249                 if (isspace(ch) || !ch) {
1250                         *ppos += read;
1251                         ret = read;
1252                         goto out;
1253                 }
1254         }
1255
1256         /* read the non-space input */
1257         while (cnt && !isspace(ch) && ch) {
1258                 if (parser->idx < parser->size - 1)
1259                         parser->buffer[parser->idx++] = ch;
1260                 else {
1261                         ret = -EINVAL;
1262                         goto out;
1263                 }
1264                 ret = get_user(ch, ubuf++);
1265                 if (ret)
1266                         goto out;
1267                 read++;
1268                 cnt--;
1269         }
1270
1271         /* We either got finished input or we have to wait for another call. */
1272         if (isspace(ch) || !ch) {
1273                 parser->buffer[parser->idx] = 0;
1274                 parser->cont = false;
1275         } else if (parser->idx < parser->size - 1) {
1276                 parser->cont = true;
1277                 parser->buffer[parser->idx++] = ch;
1278                 /* Make sure the parsed string always terminates with '\0'. */
1279                 parser->buffer[parser->idx] = 0;
1280         } else {
1281                 ret = -EINVAL;
1282                 goto out;
1283         }
1284
1285         *ppos += read;
1286         ret = read;
1287
1288 out:
1289         return ret;
1290 }
1291
1292 /* TODO add a seq_buf_to_buffer() */
1293 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1294 {
1295         int len;
1296
1297         if (trace_seq_used(s) <= s->seq.readpos)
1298                 return -EBUSY;
1299
1300         len = trace_seq_used(s) - s->seq.readpos;
1301         if (cnt > len)
1302                 cnt = len;
1303         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1304
1305         s->seq.readpos += cnt;
1306         return cnt;
1307 }
1308
1309 unsigned long __read_mostly     tracing_thresh;
1310
1311 #ifdef CONFIG_TRACER_MAX_TRACE
1312 /*
1313  * Copy the new maximum trace into the separate maximum-trace
1314  * structure. (this way the maximum trace is permanently saved,
1315  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1316  */
1317 static void
1318 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1319 {
1320         struct trace_buffer *trace_buf = &tr->trace_buffer;
1321         struct trace_buffer *max_buf = &tr->max_buffer;
1322         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1323         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1324
1325         max_buf->cpu = cpu;
1326         max_buf->time_start = data->preempt_timestamp;
1327
1328         max_data->saved_latency = tr->max_latency;
1329         max_data->critical_start = data->critical_start;
1330         max_data->critical_end = data->critical_end;
1331
1332         memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1333         max_data->pid = tsk->pid;
1334         /*
1335          * If tsk == current, then use current_uid(), as that does not use
1336          * RCU. The irq tracer can be called out of RCU scope.
1337          */
1338         if (tsk == current)
1339                 max_data->uid = current_uid();
1340         else
1341                 max_data->uid = task_uid(tsk);
1342
1343         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1344         max_data->policy = tsk->policy;
1345         max_data->rt_priority = tsk->rt_priority;
1346
1347         /* record this tasks comm */
1348         tracing_record_cmdline(tsk);
1349 }
1350
1351 /**
1352  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1353  * @tr: tracer
1354  * @tsk: the task with the latency
1355  * @cpu: The cpu that initiated the trace.
1356  *
1357  * Flip the buffers between the @tr and the max_tr and record information
1358  * about which task was the cause of this latency.
1359  */
1360 void
1361 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1362 {
1363         struct ring_buffer *buf;
1364
1365         if (tr->stop_count)
1366                 return;
1367
1368         WARN_ON_ONCE(!irqs_disabled());
1369
1370         if (!tr->allocated_snapshot) {
1371                 /* Only the nop tracer should hit this when disabling */
1372                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1373                 return;
1374         }
1375
1376         arch_spin_lock(&tr->max_lock);
1377
1378         buf = tr->trace_buffer.buffer;
1379         tr->trace_buffer.buffer = tr->max_buffer.buffer;
1380         tr->max_buffer.buffer = buf;
1381
1382         __update_max_tr(tr, tsk, cpu);
1383         arch_spin_unlock(&tr->max_lock);
1384 }
1385
1386 /**
1387  * update_max_tr_single - only copy one trace over, and reset the rest
1388  * @tr - tracer
1389  * @tsk - task with the latency
1390  * @cpu - the cpu of the buffer to copy.
1391  *
1392  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1393  */
1394 void
1395 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1396 {
1397         int ret;
1398
1399         if (tr->stop_count)
1400                 return;
1401
1402         WARN_ON_ONCE(!irqs_disabled());
1403         if (!tr->allocated_snapshot) {
1404                 /* Only the nop tracer should hit this when disabling */
1405                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1406                 return;
1407         }
1408
1409         arch_spin_lock(&tr->max_lock);
1410
1411         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1412
1413         if (ret == -EBUSY) {
1414                 /*
1415                  * We failed to swap the buffer due to a commit taking
1416                  * place on this CPU. We fail to record, but we reset
1417                  * the max trace buffer (no one writes directly to it)
1418                  * and flag that it failed.
1419                  */
1420                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1421                         "Failed to swap buffers due to commit in progress\n");
1422         }
1423
1424         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1425
1426         __update_max_tr(tr, tsk, cpu);
1427         arch_spin_unlock(&tr->max_lock);
1428 }
1429 #endif /* CONFIG_TRACER_MAX_TRACE */
1430
1431 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1432 {
1433         /* Iterators are static, they should be filled or empty */
1434         if (trace_buffer_iter(iter, iter->cpu_file))
1435                 return 0;
1436
1437         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1438                                 full);
1439 }
1440
1441 #ifdef CONFIG_FTRACE_STARTUP_TEST
1442 static bool selftests_can_run;
1443
1444 struct trace_selftests {
1445         struct list_head                list;
1446         struct tracer                   *type;
1447 };
1448
1449 static LIST_HEAD(postponed_selftests);
1450
1451 static int save_selftest(struct tracer *type)
1452 {
1453         struct trace_selftests *selftest;
1454
1455         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1456         if (!selftest)
1457                 return -ENOMEM;
1458
1459         selftest->type = type;
1460         list_add(&selftest->list, &postponed_selftests);
1461         return 0;
1462 }
1463
1464 static int run_tracer_selftest(struct tracer *type)
1465 {
1466         struct trace_array *tr = &global_trace;
1467         struct tracer *saved_tracer = tr->current_trace;
1468         int ret;
1469
1470         if (!type->selftest || tracing_selftest_disabled)
1471                 return 0;
1472
1473         /*
1474          * If a tracer registers early in boot up (before scheduling is
1475          * initialized and such), then do not run its selftests yet.
1476          * Instead, run it a little later in the boot process.
1477          */
1478         if (!selftests_can_run)
1479                 return save_selftest(type);
1480
1481         /*
1482          * Run a selftest on this tracer.
1483          * Here we reset the trace buffer, and set the current
1484          * tracer to be this tracer. The tracer can then run some
1485          * internal tracing to verify that everything is in order.
1486          * If we fail, we do not register this tracer.
1487          */
1488         tracing_reset_online_cpus(&tr->trace_buffer);
1489
1490         tr->current_trace = type;
1491
1492 #ifdef CONFIG_TRACER_MAX_TRACE
1493         if (type->use_max_tr) {
1494                 /* If we expanded the buffers, make sure the max is expanded too */
1495                 if (ring_buffer_expanded)
1496                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1497                                            RING_BUFFER_ALL_CPUS);
1498                 tr->allocated_snapshot = true;
1499         }
1500 #endif
1501
1502         /* the test is responsible for initializing and enabling */
1503         pr_info("Testing tracer %s: ", type->name);
1504         ret = type->selftest(type, tr);
1505         /* the test is responsible for resetting too */
1506         tr->current_trace = saved_tracer;
1507         if (ret) {
1508                 printk(KERN_CONT "FAILED!\n");
1509                 /* Add the warning after printing 'FAILED' */
1510                 WARN_ON(1);
1511                 return -1;
1512         }
1513         /* Only reset on passing, to avoid touching corrupted buffers */
1514         tracing_reset_online_cpus(&tr->trace_buffer);
1515
1516 #ifdef CONFIG_TRACER_MAX_TRACE
1517         if (type->use_max_tr) {
1518                 tr->allocated_snapshot = false;
1519
1520                 /* Shrink the max buffer again */
1521                 if (ring_buffer_expanded)
1522                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1523                                            RING_BUFFER_ALL_CPUS);
1524         }
1525 #endif
1526
1527         printk(KERN_CONT "PASSED\n");
1528         return 0;
1529 }
1530
1531 static __init int init_trace_selftests(void)
1532 {
1533         struct trace_selftests *p, *n;
1534         struct tracer *t, **last;
1535         int ret;
1536
1537         selftests_can_run = true;
1538
1539         mutex_lock(&trace_types_lock);
1540
1541         if (list_empty(&postponed_selftests))
1542                 goto out;
1543
1544         pr_info("Running postponed tracer tests:\n");
1545
1546         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1547                 ret = run_tracer_selftest(p->type);
1548                 /* If the test fails, then warn and remove from available_tracers */
1549                 if (ret < 0) {
1550                         WARN(1, "tracer: %s failed selftest, disabling\n",
1551                              p->type->name);
1552                         last = &trace_types;
1553                         for (t = trace_types; t; t = t->next) {
1554                                 if (t == p->type) {
1555                                         *last = t->next;
1556                                         break;
1557                                 }
1558                                 last = &t->next;
1559                         }
1560                 }
1561                 list_del(&p->list);
1562                 kfree(p);
1563         }
1564
1565  out:
1566         mutex_unlock(&trace_types_lock);
1567
1568         return 0;
1569 }
1570 core_initcall(init_trace_selftests);
1571 #else
1572 static inline int run_tracer_selftest(struct tracer *type)
1573 {
1574         return 0;
1575 }
1576 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1577
1578 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1579
1580 static void __init apply_trace_boot_options(void);
1581
1582 /**
1583  * register_tracer - register a tracer with the ftrace system.
1584  * @type - the plugin for the tracer
1585  *
1586  * Register a new plugin tracer.
1587  */
1588 int __init register_tracer(struct tracer *type)
1589 {
1590         struct tracer *t;
1591         int ret = 0;
1592
1593         if (!type->name) {
1594                 pr_info("Tracer must have a name\n");
1595                 return -1;
1596         }
1597
1598         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1599                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1600                 return -1;
1601         }
1602
1603         mutex_lock(&trace_types_lock);
1604
1605         tracing_selftest_running = true;
1606
1607         for (t = trace_types; t; t = t->next) {
1608                 if (strcmp(type->name, t->name) == 0) {
1609                         /* already found */
1610                         pr_info("Tracer %s already registered\n",
1611                                 type->name);
1612                         ret = -1;
1613                         goto out;
1614                 }
1615         }
1616
1617         if (!type->set_flag)
1618                 type->set_flag = &dummy_set_flag;
1619         if (!type->flags) {
1620                 /*allocate a dummy tracer_flags*/
1621                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1622                 if (!type->flags) {
1623                         ret = -ENOMEM;
1624                         goto out;
1625                 }
1626                 type->flags->val = 0;
1627                 type->flags->opts = dummy_tracer_opt;
1628         } else
1629                 if (!type->flags->opts)
1630                         type->flags->opts = dummy_tracer_opt;
1631
1632         /* store the tracer for __set_tracer_option */
1633         type->flags->trace = type;
1634
1635         ret = run_tracer_selftest(type);
1636         if (ret < 0)
1637                 goto out;
1638
1639         type->next = trace_types;
1640         trace_types = type;
1641         add_tracer_options(&global_trace, type);
1642
1643  out:
1644         tracing_selftest_running = false;
1645         mutex_unlock(&trace_types_lock);
1646
1647         if (ret || !default_bootup_tracer)
1648                 goto out_unlock;
1649
1650         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1651                 goto out_unlock;
1652
1653         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1654         /* Do we want this tracer to start on bootup? */
1655         tracing_set_tracer(&global_trace, type->name);
1656         default_bootup_tracer = NULL;
1657
1658         apply_trace_boot_options();
1659
1660         /* disable other selftests, since this will break it. */
1661         tracing_selftest_disabled = true;
1662 #ifdef CONFIG_FTRACE_STARTUP_TEST
1663         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1664                type->name);
1665 #endif
1666
1667  out_unlock:
1668         return ret;
1669 }
1670
1671 void tracing_reset(struct trace_buffer *buf, int cpu)
1672 {
1673         struct ring_buffer *buffer = buf->buffer;
1674
1675         if (!buffer)
1676                 return;
1677
1678         ring_buffer_record_disable(buffer);
1679
1680         /* Make sure all commits have finished */
1681         synchronize_sched();
1682         ring_buffer_reset_cpu(buffer, cpu);
1683
1684         ring_buffer_record_enable(buffer);
1685 }
1686
1687 void tracing_reset_online_cpus(struct trace_buffer *buf)
1688 {
1689         struct ring_buffer *buffer = buf->buffer;
1690         int cpu;
1691
1692         if (!buffer)
1693                 return;
1694
1695         ring_buffer_record_disable(buffer);
1696
1697         /* Make sure all commits have finished */
1698         synchronize_sched();
1699
1700         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1701
1702         for_each_online_cpu(cpu)
1703                 ring_buffer_reset_cpu(buffer, cpu);
1704
1705         ring_buffer_record_enable(buffer);
1706 }
1707
1708 /* Must have trace_types_lock held */
1709 void tracing_reset_all_online_cpus(void)
1710 {
1711         struct trace_array *tr;
1712
1713         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1714                 if (!tr->clear_trace)
1715                         continue;
1716                 tr->clear_trace = false;
1717                 tracing_reset_online_cpus(&tr->trace_buffer);
1718 #ifdef CONFIG_TRACER_MAX_TRACE
1719                 tracing_reset_online_cpus(&tr->max_buffer);
1720 #endif
1721         }
1722 }
1723
1724 static int *tgid_map;
1725
1726 #define SAVED_CMDLINES_DEFAULT 128
1727 #define NO_CMDLINE_MAP UINT_MAX
1728 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1729 struct saved_cmdlines_buffer {
1730         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1731         unsigned *map_cmdline_to_pid;
1732         unsigned cmdline_num;
1733         int cmdline_idx;
1734         char *saved_cmdlines;
1735 };
1736 static struct saved_cmdlines_buffer *savedcmd;
1737
1738 /* temporary disable recording */
1739 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1740
1741 static inline char *get_saved_cmdlines(int idx)
1742 {
1743         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1744 }
1745
1746 static inline void set_cmdline(int idx, const char *cmdline)
1747 {
1748         memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1749 }
1750
1751 static int allocate_cmdlines_buffer(unsigned int val,
1752                                     struct saved_cmdlines_buffer *s)
1753 {
1754         s->map_cmdline_to_pid = kmalloc_array(val,
1755                                               sizeof(*s->map_cmdline_to_pid),
1756                                               GFP_KERNEL);
1757         if (!s->map_cmdline_to_pid)
1758                 return -ENOMEM;
1759
1760         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1761         if (!s->saved_cmdlines) {
1762                 kfree(s->map_cmdline_to_pid);
1763                 return -ENOMEM;
1764         }
1765
1766         s->cmdline_idx = 0;
1767         s->cmdline_num = val;
1768         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1769                sizeof(s->map_pid_to_cmdline));
1770         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1771                val * sizeof(*s->map_cmdline_to_pid));
1772
1773         return 0;
1774 }
1775
1776 static int trace_create_savedcmd(void)
1777 {
1778         int ret;
1779
1780         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1781         if (!savedcmd)
1782                 return -ENOMEM;
1783
1784         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1785         if (ret < 0) {
1786                 kfree(savedcmd);
1787                 savedcmd = NULL;
1788                 return -ENOMEM;
1789         }
1790
1791         return 0;
1792 }
1793
1794 int is_tracing_stopped(void)
1795 {
1796         return global_trace.stop_count;
1797 }
1798
1799 /**
1800  * tracing_start - quick start of the tracer
1801  *
1802  * If tracing is enabled but was stopped by tracing_stop,
1803  * this will start the tracer back up.
1804  */
1805 void tracing_start(void)
1806 {
1807         struct ring_buffer *buffer;
1808         unsigned long flags;
1809
1810         if (tracing_disabled)
1811                 return;
1812
1813         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1814         if (--global_trace.stop_count) {
1815                 if (global_trace.stop_count < 0) {
1816                         /* Someone screwed up their debugging */
1817                         WARN_ON_ONCE(1);
1818                         global_trace.stop_count = 0;
1819                 }
1820                 goto out;
1821         }
1822
1823         /* Prevent the buffers from switching */
1824         arch_spin_lock(&global_trace.max_lock);
1825
1826         buffer = global_trace.trace_buffer.buffer;
1827         if (buffer)
1828                 ring_buffer_record_enable(buffer);
1829
1830 #ifdef CONFIG_TRACER_MAX_TRACE
1831         buffer = global_trace.max_buffer.buffer;
1832         if (buffer)
1833                 ring_buffer_record_enable(buffer);
1834 #endif
1835
1836         arch_spin_unlock(&global_trace.max_lock);
1837
1838  out:
1839         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1840 }
1841
1842 static void tracing_start_tr(struct trace_array *tr)
1843 {
1844         struct ring_buffer *buffer;
1845         unsigned long flags;
1846
1847         if (tracing_disabled)
1848                 return;
1849
1850         /* If global, we need to also start the max tracer */
1851         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1852                 return tracing_start();
1853
1854         raw_spin_lock_irqsave(&tr->start_lock, flags);
1855
1856         if (--tr->stop_count) {
1857                 if (tr->stop_count < 0) {
1858                         /* Someone screwed up their debugging */
1859                         WARN_ON_ONCE(1);
1860                         tr->stop_count = 0;
1861                 }
1862                 goto out;
1863         }
1864
1865         buffer = tr->trace_buffer.buffer;
1866         if (buffer)
1867                 ring_buffer_record_enable(buffer);
1868
1869  out:
1870         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1871 }
1872
1873 /**
1874  * tracing_stop - quick stop of the tracer
1875  *
1876  * Light weight way to stop tracing. Use in conjunction with
1877  * tracing_start.
1878  */
1879 void tracing_stop(void)
1880 {
1881         struct ring_buffer *buffer;
1882         unsigned long flags;
1883
1884         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1885         if (global_trace.stop_count++)
1886                 goto out;
1887
1888         /* Prevent the buffers from switching */
1889         arch_spin_lock(&global_trace.max_lock);
1890
1891         buffer = global_trace.trace_buffer.buffer;
1892         if (buffer)
1893                 ring_buffer_record_disable(buffer);
1894
1895 #ifdef CONFIG_TRACER_MAX_TRACE
1896         buffer = global_trace.max_buffer.buffer;
1897         if (buffer)
1898                 ring_buffer_record_disable(buffer);
1899 #endif
1900
1901         arch_spin_unlock(&global_trace.max_lock);
1902
1903  out:
1904         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1905 }
1906
1907 static void tracing_stop_tr(struct trace_array *tr)
1908 {
1909         struct ring_buffer *buffer;
1910         unsigned long flags;
1911
1912         /* If global, we need to also stop the max tracer */
1913         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1914                 return tracing_stop();
1915
1916         raw_spin_lock_irqsave(&tr->start_lock, flags);
1917         if (tr->stop_count++)
1918                 goto out;
1919
1920         buffer = tr->trace_buffer.buffer;
1921         if (buffer)
1922                 ring_buffer_record_disable(buffer);
1923
1924  out:
1925         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1926 }
1927
1928 static int trace_save_cmdline(struct task_struct *tsk)
1929 {
1930         unsigned pid, idx;
1931
1932         /* treat recording of idle task as a success */
1933         if (!tsk->pid)
1934                 return 1;
1935
1936         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
1937                 return 0;
1938
1939         /*
1940          * It's not the end of the world if we don't get
1941          * the lock, but we also don't want to spin
1942          * nor do we want to disable interrupts,
1943          * so if we miss here, then better luck next time.
1944          */
1945         if (!arch_spin_trylock(&trace_cmdline_lock))
1946                 return 0;
1947
1948         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1949         if (idx == NO_CMDLINE_MAP) {
1950                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1951
1952                 /*
1953                  * Check whether the cmdline buffer at idx has a pid
1954                  * mapped. We are going to overwrite that entry so we
1955                  * need to clear the map_pid_to_cmdline. Otherwise we
1956                  * would read the new comm for the old pid.
1957                  */
1958                 pid = savedcmd->map_cmdline_to_pid[idx];
1959                 if (pid != NO_CMDLINE_MAP)
1960                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1961
1962                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1963                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1964
1965                 savedcmd->cmdline_idx = idx;
1966         }
1967
1968         set_cmdline(idx, tsk->comm);
1969
1970         arch_spin_unlock(&trace_cmdline_lock);
1971
1972         return 1;
1973 }
1974
1975 static void __trace_find_cmdline(int pid, char comm[])
1976 {
1977         unsigned map;
1978
1979         if (!pid) {
1980                 strcpy(comm, "<idle>");
1981                 return;
1982         }
1983
1984         if (WARN_ON_ONCE(pid < 0)) {
1985                 strcpy(comm, "<XXX>");
1986                 return;
1987         }
1988
1989         if (pid > PID_MAX_DEFAULT) {
1990                 strcpy(comm, "<...>");
1991                 return;
1992         }
1993
1994         map = savedcmd->map_pid_to_cmdline[pid];
1995         if (map != NO_CMDLINE_MAP)
1996                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1997         else
1998                 strcpy(comm, "<...>");
1999 }
2000
2001 void trace_find_cmdline(int pid, char comm[])
2002 {
2003         preempt_disable();
2004         arch_spin_lock(&trace_cmdline_lock);
2005
2006         __trace_find_cmdline(pid, comm);
2007
2008         arch_spin_unlock(&trace_cmdline_lock);
2009         preempt_enable();
2010 }
2011
2012 int trace_find_tgid(int pid)
2013 {
2014         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2015                 return 0;
2016
2017         return tgid_map[pid];
2018 }
2019
2020 static int trace_save_tgid(struct task_struct *tsk)
2021 {
2022         /* treat recording of idle task as a success */
2023         if (!tsk->pid)
2024                 return 1;
2025
2026         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2027                 return 0;
2028
2029         tgid_map[tsk->pid] = tsk->tgid;
2030         return 1;
2031 }
2032
2033 static bool tracing_record_taskinfo_skip(int flags)
2034 {
2035         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2036                 return true;
2037         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2038                 return true;
2039         if (!__this_cpu_read(trace_taskinfo_save))
2040                 return true;
2041         return false;
2042 }
2043
2044 /**
2045  * tracing_record_taskinfo - record the task info of a task
2046  *
2047  * @task  - task to record
2048  * @flags - TRACE_RECORD_CMDLINE for recording comm
2049  *        - TRACE_RECORD_TGID for recording tgid
2050  */
2051 void tracing_record_taskinfo(struct task_struct *task, int flags)
2052 {
2053         bool done;
2054
2055         if (tracing_record_taskinfo_skip(flags))
2056                 return;
2057
2058         /*
2059          * Record as much task information as possible. If some fail, continue
2060          * to try to record the others.
2061          */
2062         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2063         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2064
2065         /* If recording any information failed, retry again soon. */
2066         if (!done)
2067                 return;
2068
2069         __this_cpu_write(trace_taskinfo_save, false);
2070 }
2071
2072 /**
2073  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2074  *
2075  * @prev - previous task during sched_switch
2076  * @next - next task during sched_switch
2077  * @flags - TRACE_RECORD_CMDLINE for recording comm
2078  *          TRACE_RECORD_TGID for recording tgid
2079  */
2080 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2081                                           struct task_struct *next, int flags)
2082 {
2083         bool done;
2084
2085         if (tracing_record_taskinfo_skip(flags))
2086                 return;
2087
2088         /*
2089          * Record as much task information as possible. If some fail, continue
2090          * to try to record the others.
2091          */
2092         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2093         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2094         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2095         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2096
2097         /* If recording any information failed, retry again soon. */
2098         if (!done)
2099                 return;
2100
2101         __this_cpu_write(trace_taskinfo_save, false);
2102 }
2103
2104 /* Helpers to record a specific task information */
2105 void tracing_record_cmdline(struct task_struct *task)
2106 {
2107         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2108 }
2109
2110 void tracing_record_tgid(struct task_struct *task)
2111 {
2112         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2113 }
2114
2115 /*
2116  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2117  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2118  * simplifies those functions and keeps them in sync.
2119  */
2120 enum print_line_t trace_handle_return(struct trace_seq *s)
2121 {
2122         return trace_seq_has_overflowed(s) ?
2123                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2124 }
2125 EXPORT_SYMBOL_GPL(trace_handle_return);
2126
2127 void
2128 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2129                              int pc)
2130 {
2131         struct task_struct *tsk = current;
2132
2133         entry->preempt_count            = pc & 0xff;
2134         entry->pid                      = (tsk) ? tsk->pid : 0;
2135         entry->flags =
2136 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2137                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2138 #else
2139                 TRACE_FLAG_IRQS_NOSUPPORT |
2140 #endif
2141                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2142                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2143                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2144                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2145                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2146 }
2147 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2148
2149 struct ring_buffer_event *
2150 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2151                           int type,
2152                           unsigned long len,
2153                           unsigned long flags, int pc)
2154 {
2155         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2156 }
2157
2158 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2159 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2160 static int trace_buffered_event_ref;
2161
2162 /**
2163  * trace_buffered_event_enable - enable buffering events
2164  *
2165  * When events are being filtered, it is quicker to use a temporary
2166  * buffer to write the event data into if there's a likely chance
2167  * that it will not be committed. The discard of the ring buffer
2168  * is not as fast as committing, and is much slower than copying
2169  * a commit.
2170  *
2171  * When an event is to be filtered, allocate per cpu buffers to
2172  * write the event data into, and if the event is filtered and discarded
2173  * it is simply dropped, otherwise, the entire data is to be committed
2174  * in one shot.
2175  */
2176 void trace_buffered_event_enable(void)
2177 {
2178         struct ring_buffer_event *event;
2179         struct page *page;
2180         int cpu;
2181
2182         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2183
2184         if (trace_buffered_event_ref++)
2185                 return;
2186
2187         for_each_tracing_cpu(cpu) {
2188                 page = alloc_pages_node(cpu_to_node(cpu),
2189                                         GFP_KERNEL | __GFP_NORETRY, 0);
2190                 if (!page)
2191                         goto failed;
2192
2193                 event = page_address(page);
2194                 memset(event, 0, sizeof(*event));
2195
2196                 per_cpu(trace_buffered_event, cpu) = event;
2197
2198                 preempt_disable();
2199                 if (cpu == smp_processor_id() &&
2200                     this_cpu_read(trace_buffered_event) !=
2201                     per_cpu(trace_buffered_event, cpu))
2202                         WARN_ON_ONCE(1);
2203                 preempt_enable();
2204         }
2205
2206         return;
2207  failed:
2208         trace_buffered_event_disable();
2209 }
2210
2211 static void enable_trace_buffered_event(void *data)
2212 {
2213         /* Probably not needed, but do it anyway */
2214         smp_rmb();
2215         this_cpu_dec(trace_buffered_event_cnt);
2216 }
2217
2218 static void disable_trace_buffered_event(void *data)
2219 {
2220         this_cpu_inc(trace_buffered_event_cnt);
2221 }
2222
2223 /**
2224  * trace_buffered_event_disable - disable buffering events
2225  *
2226  * When a filter is removed, it is faster to not use the buffered
2227  * events, and to commit directly into the ring buffer. Free up
2228  * the temp buffers when there are no more users. This requires
2229  * special synchronization with current events.
2230  */
2231 void trace_buffered_event_disable(void)
2232 {
2233         int cpu;
2234
2235         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2236
2237         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2238                 return;
2239
2240         if (--trace_buffered_event_ref)
2241                 return;
2242
2243         preempt_disable();
2244         /* For each CPU, set the buffer as used. */
2245         smp_call_function_many(tracing_buffer_mask,
2246                                disable_trace_buffered_event, NULL, 1);
2247         preempt_enable();
2248
2249         /* Wait for all current users to finish */
2250         synchronize_sched();
2251
2252         for_each_tracing_cpu(cpu) {
2253                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2254                 per_cpu(trace_buffered_event, cpu) = NULL;
2255         }
2256         /*
2257          * Make sure trace_buffered_event is NULL before clearing
2258          * trace_buffered_event_cnt.
2259          */
2260         smp_wmb();
2261
2262         preempt_disable();
2263         /* Do the work on each cpu */
2264         smp_call_function_many(tracing_buffer_mask,
2265                                enable_trace_buffered_event, NULL, 1);
2266         preempt_enable();
2267 }
2268
2269 static struct ring_buffer *temp_buffer;
2270
2271 struct ring_buffer_event *
2272 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2273                           struct trace_event_file *trace_file,
2274                           int type, unsigned long len,
2275                           unsigned long flags, int pc)
2276 {
2277         struct ring_buffer_event *entry;
2278         int val;
2279
2280         *current_rb = trace_file->tr->trace_buffer.buffer;
2281
2282         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2283              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2284             (entry = this_cpu_read(trace_buffered_event))) {
2285                 /* Try to use the per cpu buffer first */
2286                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2287                 if (val == 1) {
2288                         trace_event_setup(entry, type, flags, pc);
2289                         entry->array[0] = len;
2290                         return entry;
2291                 }
2292                 this_cpu_dec(trace_buffered_event_cnt);
2293         }
2294
2295         entry = __trace_buffer_lock_reserve(*current_rb,
2296                                             type, len, flags, pc);
2297         /*
2298          * If tracing is off, but we have triggers enabled
2299          * we still need to look at the event data. Use the temp_buffer
2300          * to store the trace event for the tigger to use. It's recusive
2301          * safe and will not be recorded anywhere.
2302          */
2303         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2304                 *current_rb = temp_buffer;
2305                 entry = __trace_buffer_lock_reserve(*current_rb,
2306                                                     type, len, flags, pc);
2307         }
2308         return entry;
2309 }
2310 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2311
2312 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2313 static DEFINE_MUTEX(tracepoint_printk_mutex);
2314
2315 static void output_printk(struct trace_event_buffer *fbuffer)
2316 {
2317         struct trace_event_call *event_call;
2318         struct trace_event *event;
2319         unsigned long flags;
2320         struct trace_iterator *iter = tracepoint_print_iter;
2321
2322         /* We should never get here if iter is NULL */
2323         if (WARN_ON_ONCE(!iter))
2324                 return;
2325
2326         event_call = fbuffer->trace_file->event_call;
2327         if (!event_call || !event_call->event.funcs ||
2328             !event_call->event.funcs->trace)
2329                 return;
2330
2331         event = &fbuffer->trace_file->event_call->event;
2332
2333         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2334         trace_seq_init(&iter->seq);
2335         iter->ent = fbuffer->entry;
2336         event_call->event.funcs->trace(iter, 0, event);
2337         trace_seq_putc(&iter->seq, 0);
2338         printk("%s", iter->seq.buffer);
2339
2340         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2341 }
2342
2343 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2344                              void __user *buffer, size_t *lenp,
2345                              loff_t *ppos)
2346 {
2347         int save_tracepoint_printk;
2348         int ret;
2349
2350         mutex_lock(&tracepoint_printk_mutex);
2351         save_tracepoint_printk = tracepoint_printk;
2352
2353         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2354
2355         /*
2356          * This will force exiting early, as tracepoint_printk
2357          * is always zero when tracepoint_printk_iter is not allocated
2358          */
2359         if (!tracepoint_print_iter)
2360                 tracepoint_printk = 0;
2361
2362         if (save_tracepoint_printk == tracepoint_printk)
2363                 goto out;
2364
2365         if (tracepoint_printk)
2366                 static_key_enable(&tracepoint_printk_key.key);
2367         else
2368                 static_key_disable(&tracepoint_printk_key.key);
2369
2370  out:
2371         mutex_unlock(&tracepoint_printk_mutex);
2372
2373         return ret;
2374 }
2375
2376 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2377 {
2378         if (static_key_false(&tracepoint_printk_key.key))
2379                 output_printk(fbuffer);
2380
2381         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2382                                     fbuffer->event, fbuffer->entry,
2383                                     fbuffer->flags, fbuffer->pc);
2384 }
2385 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2386
2387 /*
2388  * Skip 3:
2389  *
2390  *   trace_buffer_unlock_commit_regs()
2391  *   trace_event_buffer_commit()
2392  *   trace_event_raw_event_xxx()
2393  */
2394 # define STACK_SKIP 3
2395
2396 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2397                                      struct ring_buffer *buffer,
2398                                      struct ring_buffer_event *event,
2399                                      unsigned long flags, int pc,
2400                                      struct pt_regs *regs)
2401 {
2402         __buffer_unlock_commit(buffer, event);
2403
2404         /*
2405          * If regs is not set, then skip the necessary functions.
2406          * Note, we can still get here via blktrace, wakeup tracer
2407          * and mmiotrace, but that's ok if they lose a function or
2408          * two. They are not that meaningful.
2409          */
2410         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2411         ftrace_trace_userstack(buffer, flags, pc);
2412 }
2413
2414 /*
2415  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2416  */
2417 void
2418 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2419                                    struct ring_buffer_event *event)
2420 {
2421         __buffer_unlock_commit(buffer, event);
2422 }
2423
2424 static void
2425 trace_process_export(struct trace_export *export,
2426                struct ring_buffer_event *event)
2427 {
2428         struct trace_entry *entry;
2429         unsigned int size = 0;
2430
2431         entry = ring_buffer_event_data(event);
2432         size = ring_buffer_event_length(event);
2433         export->write(export, entry, size);
2434 }
2435
2436 static DEFINE_MUTEX(ftrace_export_lock);
2437
2438 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2439
2440 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2441
2442 static inline void ftrace_exports_enable(void)
2443 {
2444         static_branch_enable(&ftrace_exports_enabled);
2445 }
2446
2447 static inline void ftrace_exports_disable(void)
2448 {
2449         static_branch_disable(&ftrace_exports_enabled);
2450 }
2451
2452 void ftrace_exports(struct ring_buffer_event *event)
2453 {
2454         struct trace_export *export;
2455
2456         preempt_disable_notrace();
2457
2458         export = rcu_dereference_raw_notrace(ftrace_exports_list);
2459         while (export) {
2460                 trace_process_export(export, event);
2461                 export = rcu_dereference_raw_notrace(export->next);
2462         }
2463
2464         preempt_enable_notrace();
2465 }
2466
2467 static inline void
2468 add_trace_export(struct trace_export **list, struct trace_export *export)
2469 {
2470         rcu_assign_pointer(export->next, *list);
2471         /*
2472          * We are entering export into the list but another
2473          * CPU might be walking that list. We need to make sure
2474          * the export->next pointer is valid before another CPU sees
2475          * the export pointer included into the list.
2476          */
2477         rcu_assign_pointer(*list, export);
2478 }
2479
2480 static inline int
2481 rm_trace_export(struct trace_export **list, struct trace_export *export)
2482 {
2483         struct trace_export **p;
2484
2485         for (p = list; *p != NULL; p = &(*p)->next)
2486                 if (*p == export)
2487                         break;
2488
2489         if (*p != export)
2490                 return -1;
2491
2492         rcu_assign_pointer(*p, (*p)->next);
2493
2494         return 0;
2495 }
2496
2497 static inline void
2498 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2499 {
2500         if (*list == NULL)
2501                 ftrace_exports_enable();
2502
2503         add_trace_export(list, export);
2504 }
2505
2506 static inline int
2507 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2508 {
2509         int ret;
2510
2511         ret = rm_trace_export(list, export);
2512         if (*list == NULL)
2513                 ftrace_exports_disable();
2514
2515         return ret;
2516 }
2517
2518 int register_ftrace_export(struct trace_export *export)
2519 {
2520         if (WARN_ON_ONCE(!export->write))
2521                 return -1;
2522
2523         mutex_lock(&ftrace_export_lock);
2524
2525         add_ftrace_export(&ftrace_exports_list, export);
2526
2527         mutex_unlock(&ftrace_export_lock);
2528
2529         return 0;
2530 }
2531 EXPORT_SYMBOL_GPL(register_ftrace_export);
2532
2533 int unregister_ftrace_export(struct trace_export *export)
2534 {
2535         int ret;
2536
2537         mutex_lock(&ftrace_export_lock);
2538
2539         ret = rm_ftrace_export(&ftrace_exports_list, export);
2540
2541         mutex_unlock(&ftrace_export_lock);
2542
2543         return ret;
2544 }
2545 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2546
2547 void
2548 trace_function(struct trace_array *tr,
2549                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2550                int pc)
2551 {
2552         struct trace_event_call *call = &event_function;
2553         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2554         struct ring_buffer_event *event;
2555         struct ftrace_entry *entry;
2556
2557         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2558                                             flags, pc);
2559         if (!event)
2560                 return;
2561         entry   = ring_buffer_event_data(event);
2562         entry->ip                       = ip;
2563         entry->parent_ip                = parent_ip;
2564
2565         if (!call_filter_check_discard(call, entry, buffer, event)) {
2566                 if (static_branch_unlikely(&ftrace_exports_enabled))
2567                         ftrace_exports(event);
2568                 __buffer_unlock_commit(buffer, event);
2569         }
2570 }
2571
2572 #ifdef CONFIG_STACKTRACE
2573
2574 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2575 struct ftrace_stack {
2576         unsigned long           calls[FTRACE_STACK_MAX_ENTRIES];
2577 };
2578
2579 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2580 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2581
2582 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2583                                  unsigned long flags,
2584                                  int skip, int pc, struct pt_regs *regs)
2585 {
2586         struct trace_event_call *call = &event_kernel_stack;
2587         struct ring_buffer_event *event;
2588         struct stack_entry *entry;
2589         struct stack_trace trace;
2590         int use_stack;
2591         int size = FTRACE_STACK_ENTRIES;
2592
2593         trace.nr_entries        = 0;
2594         trace.skip              = skip;
2595
2596         /*
2597          * Add one, for this function and the call to save_stack_trace()
2598          * If regs is set, then these functions will not be in the way.
2599          */
2600 #ifndef CONFIG_UNWINDER_ORC
2601         if (!regs)
2602                 trace.skip++;
2603 #endif
2604
2605         /*
2606          * Since events can happen in NMIs there's no safe way to
2607          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2608          * or NMI comes in, it will just have to use the default
2609          * FTRACE_STACK_SIZE.
2610          */
2611         preempt_disable_notrace();
2612
2613         use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2614         /*
2615          * We don't need any atomic variables, just a barrier.
2616          * If an interrupt comes in, we don't care, because it would
2617          * have exited and put the counter back to what we want.
2618          * We just need a barrier to keep gcc from moving things
2619          * around.
2620          */
2621         barrier();
2622         if (use_stack == 1) {
2623                 trace.entries           = this_cpu_ptr(ftrace_stack.calls);
2624                 trace.max_entries       = FTRACE_STACK_MAX_ENTRIES;
2625
2626                 if (regs)
2627                         save_stack_trace_regs(regs, &trace);
2628                 else
2629                         save_stack_trace(&trace);
2630
2631                 if (trace.nr_entries > size)
2632                         size = trace.nr_entries;
2633         } else
2634                 /* From now on, use_stack is a boolean */
2635                 use_stack = 0;
2636
2637         size *= sizeof(unsigned long);
2638
2639         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2640                                             sizeof(*entry) + size, flags, pc);
2641         if (!event)
2642                 goto out;
2643         entry = ring_buffer_event_data(event);
2644
2645         memset(&entry->caller, 0, size);
2646
2647         if (use_stack)
2648                 memcpy(&entry->caller, trace.entries,
2649                        trace.nr_entries * sizeof(unsigned long));
2650         else {
2651                 trace.max_entries       = FTRACE_STACK_ENTRIES;
2652                 trace.entries           = entry->caller;
2653                 if (regs)
2654                         save_stack_trace_regs(regs, &trace);
2655                 else
2656                         save_stack_trace(&trace);
2657         }
2658
2659         entry->size = trace.nr_entries;
2660
2661         if (!call_filter_check_discard(call, entry, buffer, event))
2662                 __buffer_unlock_commit(buffer, event);
2663
2664  out:
2665         /* Again, don't let gcc optimize things here */
2666         barrier();
2667         __this_cpu_dec(ftrace_stack_reserve);
2668         preempt_enable_notrace();
2669
2670 }
2671
2672 static inline void ftrace_trace_stack(struct trace_array *tr,
2673                                       struct ring_buffer *buffer,
2674                                       unsigned long flags,
2675                                       int skip, int pc, struct pt_regs *regs)
2676 {
2677         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2678                 return;
2679
2680         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2681 }
2682
2683 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2684                    int pc)
2685 {
2686         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2687
2688         if (rcu_is_watching()) {
2689                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2690                 return;
2691         }
2692
2693         /*
2694          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2695          * but if the above rcu_is_watching() failed, then the NMI
2696          * triggered someplace critical, and rcu_irq_enter() should
2697          * not be called from NMI.
2698          */
2699         if (unlikely(in_nmi()))
2700                 return;
2701
2702         rcu_irq_enter_irqson();
2703         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2704         rcu_irq_exit_irqson();
2705 }
2706
2707 /**
2708  * trace_dump_stack - record a stack back trace in the trace buffer
2709  * @skip: Number of functions to skip (helper handlers)
2710  */
2711 void trace_dump_stack(int skip)
2712 {
2713         unsigned long flags;
2714
2715         if (tracing_disabled || tracing_selftest_running)
2716                 return;
2717
2718         local_save_flags(flags);
2719
2720 #ifndef CONFIG_UNWINDER_ORC
2721         /* Skip 1 to skip this function. */
2722         skip++;
2723 #endif
2724         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2725                              flags, skip, preempt_count(), NULL);
2726 }
2727
2728 static DEFINE_PER_CPU(int, user_stack_count);
2729
2730 void
2731 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2732 {
2733         struct trace_event_call *call = &event_user_stack;
2734         struct ring_buffer_event *event;
2735         struct userstack_entry *entry;
2736         struct stack_trace trace;
2737
2738         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2739                 return;
2740
2741         /*
2742          * NMIs can not handle page faults, even with fix ups.
2743          * The save user stack can (and often does) fault.
2744          */
2745         if (unlikely(in_nmi()))
2746                 return;
2747
2748         /*
2749          * prevent recursion, since the user stack tracing may
2750          * trigger other kernel events.
2751          */
2752         preempt_disable();
2753         if (__this_cpu_read(user_stack_count))
2754                 goto out;
2755
2756         __this_cpu_inc(user_stack_count);
2757
2758         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2759                                             sizeof(*entry), flags, pc);
2760         if (!event)
2761                 goto out_drop_count;
2762         entry   = ring_buffer_event_data(event);
2763
2764         entry->tgid             = current->tgid;
2765         memset(&entry->caller, 0, sizeof(entry->caller));
2766
2767         trace.nr_entries        = 0;
2768         trace.max_entries       = FTRACE_STACK_ENTRIES;
2769         trace.skip              = 0;
2770         trace.entries           = entry->caller;
2771
2772         save_stack_trace_user(&trace);
2773         if (!call_filter_check_discard(call, entry, buffer, event))
2774                 __buffer_unlock_commit(buffer, event);
2775
2776  out_drop_count:
2777         __this_cpu_dec(user_stack_count);
2778  out:
2779         preempt_enable();
2780 }
2781
2782 #ifdef UNUSED
2783 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2784 {
2785         ftrace_trace_userstack(tr, flags, preempt_count());
2786 }
2787 #endif /* UNUSED */
2788
2789 #endif /* CONFIG_STACKTRACE */
2790
2791 /* created for use with alloc_percpu */
2792 struct trace_buffer_struct {
2793         int nesting;
2794         char buffer[4][TRACE_BUF_SIZE];
2795 };
2796
2797 static struct trace_buffer_struct *trace_percpu_buffer;
2798
2799 /*
2800  * Thise allows for lockless recording.  If we're nested too deeply, then
2801  * this returns NULL.
2802  */
2803 static char *get_trace_buf(void)
2804 {
2805         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2806
2807         if (!buffer || buffer->nesting >= 4)
2808                 return NULL;
2809
2810         buffer->nesting++;
2811
2812         /* Interrupts must see nesting incremented before we use the buffer */
2813         barrier();
2814         return &buffer->buffer[buffer->nesting][0];
2815 }
2816
2817 static void put_trace_buf(void)
2818 {
2819         /* Don't let the decrement of nesting leak before this */
2820         barrier();
2821         this_cpu_dec(trace_percpu_buffer->nesting);
2822 }
2823
2824 static int alloc_percpu_trace_buffer(void)
2825 {
2826         struct trace_buffer_struct *buffers;
2827
2828         buffers = alloc_percpu(struct trace_buffer_struct);
2829         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2830                 return -ENOMEM;
2831
2832         trace_percpu_buffer = buffers;
2833         return 0;
2834 }
2835
2836 static int buffers_allocated;
2837
2838 void trace_printk_init_buffers(void)
2839 {
2840         if (buffers_allocated)
2841                 return;
2842
2843         if (alloc_percpu_trace_buffer())
2844                 return;
2845
2846         /* trace_printk() is for debug use only. Don't use it in production. */
2847
2848         pr_warn("\n");
2849         pr_warn("**********************************************************\n");
2850         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2851         pr_warn("**                                                      **\n");
2852         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
2853         pr_warn("**                                                      **\n");
2854         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
2855         pr_warn("** unsafe for production use.                           **\n");
2856         pr_warn("**                                                      **\n");
2857         pr_warn("** If you see this message and you are not debugging    **\n");
2858         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
2859         pr_warn("**                                                      **\n");
2860         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
2861         pr_warn("**********************************************************\n");
2862
2863         /* Expand the buffers to set size */
2864         tracing_update_buffers();
2865
2866         buffers_allocated = 1;
2867
2868         /*
2869          * trace_printk_init_buffers() can be called by modules.
2870          * If that happens, then we need to start cmdline recording
2871          * directly here. If the global_trace.buffer is already
2872          * allocated here, then this was called by module code.
2873          */
2874         if (global_trace.trace_buffer.buffer)
2875                 tracing_start_cmdline_record();
2876 }
2877
2878 void trace_printk_start_comm(void)
2879 {
2880         /* Start tracing comms if trace printk is set */
2881         if (!buffers_allocated)
2882                 return;
2883         tracing_start_cmdline_record();
2884 }
2885
2886 static void trace_printk_start_stop_comm(int enabled)
2887 {
2888         if (!buffers_allocated)
2889                 return;
2890
2891         if (enabled)
2892                 tracing_start_cmdline_record();
2893         else
2894                 tracing_stop_cmdline_record();
2895 }
2896
2897 /**
2898  * trace_vbprintk - write binary msg to tracing buffer
2899  *
2900  */
2901 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2902 {
2903         struct trace_event_call *call = &event_bprint;
2904         struct ring_buffer_event *event;
2905         struct ring_buffer *buffer;
2906         struct trace_array *tr = &global_trace;
2907         struct bprint_entry *entry;
2908         unsigned long flags;
2909         char *tbuffer;
2910         int len = 0, size, pc;
2911
2912         if (unlikely(tracing_selftest_running || tracing_disabled))
2913                 return 0;
2914
2915         /* Don't pollute graph traces with trace_vprintk internals */
2916         pause_graph_tracing();
2917
2918         pc = preempt_count();
2919         preempt_disable_notrace();
2920
2921         tbuffer = get_trace_buf();
2922         if (!tbuffer) {
2923                 len = 0;
2924                 goto out_nobuffer;
2925         }
2926
2927         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2928
2929         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2930                 goto out;
2931
2932         local_save_flags(flags);
2933         size = sizeof(*entry) + sizeof(u32) * len;
2934         buffer = tr->trace_buffer.buffer;
2935         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2936                                             flags, pc);
2937         if (!event)
2938                 goto out;
2939         entry = ring_buffer_event_data(event);
2940         entry->ip                       = ip;
2941         entry->fmt                      = fmt;
2942
2943         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2944         if (!call_filter_check_discard(call, entry, buffer, event)) {
2945                 __buffer_unlock_commit(buffer, event);
2946                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2947         }
2948
2949 out:
2950         put_trace_buf();
2951
2952 out_nobuffer:
2953         preempt_enable_notrace();
2954         unpause_graph_tracing();
2955
2956         return len;
2957 }
2958 EXPORT_SYMBOL_GPL(trace_vbprintk);
2959
2960 static int
2961 __trace_array_vprintk(struct ring_buffer *buffer,
2962                       unsigned long ip, const char *fmt, va_list args)
2963 {
2964         struct trace_event_call *call = &event_print;
2965         struct ring_buffer_event *event;
2966         int len = 0, size, pc;
2967         struct print_entry *entry;
2968         unsigned long flags;
2969         char *tbuffer;
2970
2971         if (tracing_disabled || tracing_selftest_running)
2972                 return 0;
2973
2974         /* Don't pollute graph traces with trace_vprintk internals */
2975         pause_graph_tracing();
2976
2977         pc = preempt_count();
2978         preempt_disable_notrace();
2979
2980
2981         tbuffer = get_trace_buf();
2982         if (!tbuffer) {
2983                 len = 0;
2984                 goto out_nobuffer;
2985         }
2986
2987         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2988
2989         local_save_flags(flags);
2990         size = sizeof(*entry) + len + 1;
2991         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2992                                             flags, pc);
2993         if (!event)
2994                 goto out;
2995         entry = ring_buffer_event_data(event);
2996         entry->ip = ip;
2997
2998         memcpy(&entry->buf, tbuffer, len + 1);
2999         if (!call_filter_check_discard(call, entry, buffer, event)) {
3000                 __buffer_unlock_commit(buffer, event);
3001                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3002         }
3003
3004 out:
3005         put_trace_buf();
3006
3007 out_nobuffer:
3008         preempt_enable_notrace();
3009         unpause_graph_tracing();
3010
3011         return len;
3012 }
3013
3014 int trace_array_vprintk(struct trace_array *tr,
3015                         unsigned long ip, const char *fmt, va_list args)
3016 {
3017         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3018 }
3019
3020 int trace_array_printk(struct trace_array *tr,
3021                        unsigned long ip, const char *fmt, ...)
3022 {
3023         int ret;
3024         va_list ap;
3025
3026         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3027                 return 0;
3028
3029         va_start(ap, fmt);
3030         ret = trace_array_vprintk(tr, ip, fmt, ap);
3031         va_end(ap);
3032         return ret;
3033 }
3034
3035 int trace_array_printk_buf(struct ring_buffer *buffer,
3036                            unsigned long ip, const char *fmt, ...)
3037 {
3038         int ret;
3039         va_list ap;
3040
3041         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3042                 return 0;
3043
3044         va_start(ap, fmt);
3045         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3046         va_end(ap);
3047         return ret;
3048 }
3049
3050 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3051 {
3052         return trace_array_vprintk(&global_trace, ip, fmt, args);
3053 }
3054 EXPORT_SYMBOL_GPL(trace_vprintk);
3055
3056 static void trace_iterator_increment(struct trace_iterator *iter)
3057 {
3058         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3059
3060         iter->idx++;
3061         if (buf_iter)
3062                 ring_buffer_read(buf_iter, NULL);
3063 }
3064
3065 static struct trace_entry *
3066 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3067                 unsigned long *lost_events)
3068 {
3069         struct ring_buffer_event *event;
3070         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3071
3072         if (buf_iter)
3073                 event = ring_buffer_iter_peek(buf_iter, ts);
3074         else
3075                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3076                                          lost_events);
3077
3078         if (event) {
3079                 iter->ent_size = ring_buffer_event_length(event);
3080                 return ring_buffer_event_data(event);
3081         }
3082         iter->ent_size = 0;
3083         return NULL;
3084 }
3085
3086 static struct trace_entry *
3087 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3088                   unsigned long *missing_events, u64 *ent_ts)
3089 {
3090         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3091         struct trace_entry *ent, *next = NULL;
3092         unsigned long lost_events = 0, next_lost = 0;
3093         int cpu_file = iter->cpu_file;
3094         u64 next_ts = 0, ts;
3095         int next_cpu = -1;
3096         int next_size = 0;
3097         int cpu;
3098
3099         /*
3100          * If we are in a per_cpu trace file, don't bother by iterating over
3101          * all cpu and peek directly.
3102          */
3103         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3104                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3105                         return NULL;
3106                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3107                 if (ent_cpu)
3108                         *ent_cpu = cpu_file;
3109
3110                 return ent;
3111         }
3112
3113         for_each_tracing_cpu(cpu) {
3114
3115                 if (ring_buffer_empty_cpu(buffer, cpu))
3116                         continue;
3117
3118                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3119
3120                 /*
3121                  * Pick the entry with the smallest timestamp:
3122                  */
3123                 if (ent && (!next || ts < next_ts)) {
3124                         next = ent;
3125                         next_cpu = cpu;
3126                         next_ts = ts;
3127                         next_lost = lost_events;
3128                         next_size = iter->ent_size;
3129                 }
3130         }
3131
3132         iter->ent_size = next_size;
3133
3134         if (ent_cpu)
3135                 *ent_cpu = next_cpu;
3136
3137         if (ent_ts)
3138                 *ent_ts = next_ts;
3139
3140         if (missing_events)
3141                 *missing_events = next_lost;
3142
3143         return next;
3144 }
3145
3146 /* Find the next real entry, without updating the iterator itself */
3147 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3148                                           int *ent_cpu, u64 *ent_ts)
3149 {
3150         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3151 }
3152
3153 /* Find the next real entry, and increment the iterator to the next entry */
3154 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3155 {
3156         iter->ent = __find_next_entry(iter, &iter->cpu,
3157                                       &iter->lost_events, &iter->ts);
3158
3159         if (iter->ent)
3160                 trace_iterator_increment(iter);
3161
3162         return iter->ent ? iter : NULL;
3163 }
3164
3165 static void trace_consume(struct trace_iterator *iter)
3166 {
3167         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3168                             &iter->lost_events);
3169 }
3170
3171 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3172 {
3173         struct trace_iterator *iter = m->private;
3174         int i = (int)*pos;
3175         void *ent;
3176
3177         WARN_ON_ONCE(iter->leftover);
3178
3179         (*pos)++;
3180
3181         /* can't go backwards */
3182         if (iter->idx > i)
3183                 return NULL;
3184
3185         if (iter->idx < 0)
3186                 ent = trace_find_next_entry_inc(iter);
3187         else
3188                 ent = iter;
3189
3190         while (ent && iter->idx < i)
3191                 ent = trace_find_next_entry_inc(iter);
3192
3193         iter->pos = *pos;
3194
3195         return ent;
3196 }
3197
3198 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3199 {
3200         struct ring_buffer_event *event;
3201         struct ring_buffer_iter *buf_iter;
3202         unsigned long entries = 0;
3203         u64 ts;
3204
3205         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3206
3207         buf_iter = trace_buffer_iter(iter, cpu);
3208         if (!buf_iter)
3209                 return;
3210
3211         ring_buffer_iter_reset(buf_iter);
3212
3213         /*
3214          * We could have the case with the max latency tracers
3215          * that a reset never took place on a cpu. This is evident
3216          * by the timestamp being before the start of the buffer.
3217          */
3218         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3219                 if (ts >= iter->trace_buffer->time_start)
3220                         break;
3221                 entries++;
3222                 ring_buffer_read(buf_iter, NULL);
3223         }
3224
3225         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3226 }
3227
3228 /*
3229  * The current tracer is copied to avoid a global locking
3230  * all around.
3231  */
3232 static void *s_start(struct seq_file *m, loff_t *pos)
3233 {
3234         struct trace_iterator *iter = m->private;
3235         struct trace_array *tr = iter->tr;
3236         int cpu_file = iter->cpu_file;
3237         void *p = NULL;
3238         loff_t l = 0;
3239         int cpu;
3240
3241         /*
3242          * copy the tracer to avoid using a global lock all around.
3243          * iter->trace is a copy of current_trace, the pointer to the
3244          * name may be used instead of a strcmp(), as iter->trace->name
3245          * will point to the same string as current_trace->name.
3246          */
3247         mutex_lock(&trace_types_lock);
3248         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3249                 *iter->trace = *tr->current_trace;
3250         mutex_unlock(&trace_types_lock);
3251
3252 #ifdef CONFIG_TRACER_MAX_TRACE
3253         if (iter->snapshot && iter->trace->use_max_tr)
3254                 return ERR_PTR(-EBUSY);
3255 #endif
3256
3257         if (!iter->snapshot)
3258                 atomic_inc(&trace_record_taskinfo_disabled);
3259
3260         if (*pos != iter->pos) {
3261                 iter->ent = NULL;
3262                 iter->cpu = 0;
3263                 iter->idx = -1;
3264
3265                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3266                         for_each_tracing_cpu(cpu)
3267                                 tracing_iter_reset(iter, cpu);
3268                 } else
3269                         tracing_iter_reset(iter, cpu_file);
3270
3271                 iter->leftover = 0;
3272                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3273                         ;
3274
3275         } else {
3276                 /*
3277                  * If we overflowed the seq_file before, then we want
3278                  * to just reuse the trace_seq buffer again.
3279                  */
3280                 if (iter->leftover)
3281                         p = iter;
3282                 else {
3283                         l = *pos - 1;
3284                         p = s_next(m, p, &l);
3285                 }
3286         }
3287
3288         trace_event_read_lock();
3289         trace_access_lock(cpu_file);
3290         return p;
3291 }
3292
3293 static void s_stop(struct seq_file *m, void *p)
3294 {
3295         struct trace_iterator *iter = m->private;
3296
3297 #ifdef CONFIG_TRACER_MAX_TRACE
3298         if (iter->snapshot && iter->trace->use_max_tr)
3299                 return;
3300 #endif
3301
3302         if (!iter->snapshot)
3303                 atomic_dec(&trace_record_taskinfo_disabled);
3304
3305         trace_access_unlock(iter->cpu_file);
3306         trace_event_read_unlock();
3307 }
3308
3309 static void
3310 get_total_entries(struct trace_buffer *buf,
3311                   unsigned long *total, unsigned long *entries)
3312 {
3313         unsigned long count;
3314         int cpu;
3315
3316         *total = 0;
3317         *entries = 0;
3318
3319         for_each_tracing_cpu(cpu) {
3320                 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3321                 /*
3322                  * If this buffer has skipped entries, then we hold all
3323                  * entries for the trace and we need to ignore the
3324                  * ones before the time stamp.
3325                  */
3326                 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3327                         count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3328                         /* total is the same as the entries */
3329                         *total += count;
3330                 } else
3331                         *total += count +
3332                                 ring_buffer_overrun_cpu(buf->buffer, cpu);
3333                 *entries += count;
3334         }
3335 }
3336
3337 static void print_lat_help_header(struct seq_file *m)
3338 {
3339         seq_puts(m, "#                  _------=> CPU#            \n"
3340                     "#                 / _-----=> irqs-off        \n"
3341                     "#                | / _----=> need-resched    \n"
3342                     "#                || / _---=> hardirq/softirq \n"
3343                     "#                ||| / _--=> preempt-depth   \n"
3344                     "#                |||| /     delay            \n"
3345                     "#  cmd     pid   ||||| time  |   caller      \n"
3346                     "#     \\   /      |||||  \\    |   /         \n");
3347 }
3348
3349 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3350 {
3351         unsigned long total;
3352         unsigned long entries;
3353
3354         get_total_entries(buf, &total, &entries);
3355         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3356                    entries, total, num_online_cpus());
3357         seq_puts(m, "#\n");
3358 }
3359
3360 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3361                                    unsigned int flags)
3362 {
3363         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3364
3365         print_event_info(buf, m);
3366
3367         seq_printf(m, "#           TASK-PID   CPU#   %s  TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3368         seq_printf(m, "#              | |       |    %s     |         |\n",      tgid ? "  |      " : "");
3369 }
3370
3371 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3372                                        unsigned int flags)
3373 {
3374         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3375         const char tgid_space[] = "          ";
3376         const char space[] = "  ";
3377
3378         seq_printf(m, "#                          %s  _-----=> irqs-off\n",
3379                    tgid ? tgid_space : space);
3380         seq_printf(m, "#                          %s / _----=> need-resched\n",
3381                    tgid ? tgid_space : space);
3382         seq_printf(m, "#                          %s| / _---=> hardirq/softirq\n",
3383                    tgid ? tgid_space : space);
3384         seq_printf(m, "#                          %s|| / _--=> preempt-depth\n",
3385                    tgid ? tgid_space : space);
3386         seq_printf(m, "#                          %s||| /     delay\n",
3387                    tgid ? tgid_space : space);
3388         seq_printf(m, "#           TASK-PID   CPU#%s||||    TIMESTAMP  FUNCTION\n",
3389                    tgid ? "   TGID   " : space);
3390         seq_printf(m, "#              | |       | %s||||       |         |\n",
3391                    tgid ? "     |    " : space);
3392 }
3393
3394 void
3395 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3396 {
3397         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3398         struct trace_buffer *buf = iter->trace_buffer;
3399         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3400         struct tracer *type = iter->trace;
3401         unsigned long entries;
3402         unsigned long total;
3403         const char *name = "preemption";
3404
3405         name = type->name;
3406
3407         get_total_entries(buf, &total, &entries);
3408
3409         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3410                    name, UTS_RELEASE);
3411         seq_puts(m, "# -----------------------------------"
3412                  "---------------------------------\n");
3413         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3414                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3415                    nsecs_to_usecs(data->saved_latency),
3416                    entries,
3417                    total,
3418                    buf->cpu,
3419 #if defined(CONFIG_PREEMPT_NONE)
3420                    "server",
3421 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3422                    "desktop",
3423 #elif defined(CONFIG_PREEMPT)
3424                    "preempt",
3425 #else
3426                    "unknown",
3427 #endif
3428                    /* These are reserved for later use */
3429                    0, 0, 0, 0);
3430 #ifdef CONFIG_SMP
3431         seq_printf(m, " #P:%d)\n", num_online_cpus());
3432 #else
3433         seq_puts(m, ")\n");
3434 #endif
3435         seq_puts(m, "#    -----------------\n");
3436         seq_printf(m, "#    | task: %.16s-%d "
3437                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3438                    data->comm, data->pid,
3439                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3440                    data->policy, data->rt_priority);
3441         seq_puts(m, "#    -----------------\n");
3442
3443         if (data->critical_start) {
3444                 seq_puts(m, "#  => started at: ");
3445                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3446                 trace_print_seq(m, &iter->seq);
3447                 seq_puts(m, "\n#  => ended at:   ");
3448                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3449                 trace_print_seq(m, &iter->seq);
3450                 seq_puts(m, "\n#\n");
3451         }
3452
3453         seq_puts(m, "#\n");
3454 }
3455
3456 static void test_cpu_buff_start(struct trace_iterator *iter)
3457 {
3458         struct trace_seq *s = &iter->seq;
3459         struct trace_array *tr = iter->tr;
3460
3461         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3462                 return;
3463
3464         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3465                 return;
3466
3467         if (cpumask_available(iter->started) &&
3468             cpumask_test_cpu(iter->cpu, iter->started))
3469                 return;
3470
3471         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3472                 return;
3473
3474         if (cpumask_available(iter->started))
3475                 cpumask_set_cpu(iter->cpu, iter->started);
3476
3477         /* Don't print started cpu buffer for the first entry of the trace */
3478         if (iter->idx > 1)
3479                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3480                                 iter->cpu);
3481 }
3482
3483 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3484 {
3485         struct trace_array *tr = iter->tr;
3486         struct trace_seq *s = &iter->seq;
3487         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3488         struct trace_entry *entry;
3489         struct trace_event *event;
3490
3491         entry = iter->ent;
3492
3493         test_cpu_buff_start(iter);
3494
3495         event = ftrace_find_event(entry->type);
3496
3497         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3498                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3499                         trace_print_lat_context(iter);
3500                 else
3501                         trace_print_context(iter);
3502         }
3503
3504         if (trace_seq_has_overflowed(s))
3505                 return TRACE_TYPE_PARTIAL_LINE;
3506
3507         if (event)
3508                 return event->funcs->trace(iter, sym_flags, event);
3509
3510         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3511
3512         return trace_handle_return(s);
3513 }
3514
3515 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3516 {
3517         struct trace_array *tr = iter->tr;
3518         struct trace_seq *s = &iter->seq;
3519         struct trace_entry *entry;
3520         struct trace_event *event;
3521
3522         entry = iter->ent;
3523
3524         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3525                 trace_seq_printf(s, "%d %d %llu ",
3526                                  entry->pid, iter->cpu, iter->ts);
3527
3528         if (trace_seq_has_overflowed(s))
3529                 return TRACE_TYPE_PARTIAL_LINE;
3530
3531         event = ftrace_find_event(entry->type);
3532         if (event)
3533                 return event->funcs->raw(iter, 0, event);
3534
3535         trace_seq_printf(s, "%d ?\n", entry->type);
3536
3537         return trace_handle_return(s);
3538 }
3539
3540 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3541 {
3542         struct trace_array *tr = iter->tr;
3543         struct trace_seq *s = &iter->seq;
3544         unsigned char newline = '\n';
3545         struct trace_entry *entry;
3546         struct trace_event *event;
3547
3548         entry = iter->ent;
3549
3550         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3551                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3552                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3553                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3554                 if (trace_seq_has_overflowed(s))
3555                         return TRACE_TYPE_PARTIAL_LINE;
3556         }
3557
3558         event = ftrace_find_event(entry->type);
3559         if (event) {
3560                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3561                 if (ret != TRACE_TYPE_HANDLED)
3562                         return ret;
3563         }
3564
3565         SEQ_PUT_FIELD(s, newline);
3566
3567         return trace_handle_return(s);
3568 }
3569
3570 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3571 {
3572         struct trace_array *tr = iter->tr;
3573         struct trace_seq *s = &iter->seq;
3574         struct trace_entry *entry;
3575         struct trace_event *event;
3576
3577         entry = iter->ent;
3578
3579         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3580                 SEQ_PUT_FIELD(s, entry->pid);
3581                 SEQ_PUT_FIELD(s, iter->cpu);
3582                 SEQ_PUT_FIELD(s, iter->ts);
3583                 if (trace_seq_has_overflowed(s))
3584                         return TRACE_TYPE_PARTIAL_LINE;
3585         }
3586
3587         event = ftrace_find_event(entry->type);
3588         return event ? event->funcs->binary(iter, 0, event) :
3589                 TRACE_TYPE_HANDLED;
3590 }
3591
3592 int trace_empty(struct trace_iterator *iter)
3593 {
3594         struct ring_buffer_iter *buf_iter;
3595         int cpu;
3596
3597         /* If we are looking at one CPU buffer, only check that one */
3598         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3599                 cpu = iter->cpu_file;
3600                 buf_iter = trace_buffer_iter(iter, cpu);
3601                 if (buf_iter) {
3602                         if (!ring_buffer_iter_empty(buf_iter))
3603                                 return 0;
3604                 } else {
3605                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3606                                 return 0;
3607                 }
3608                 return 1;
3609         }
3610
3611         for_each_tracing_cpu(cpu) {
3612                 buf_iter = trace_buffer_iter(iter, cpu);
3613                 if (buf_iter) {
3614                         if (!ring_buffer_iter_empty(buf_iter))
3615                                 return 0;
3616                 } else {
3617                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3618                                 return 0;
3619                 }
3620         }
3621
3622         return 1;
3623 }
3624
3625 /*  Called with trace_event_read_lock() held. */
3626 enum print_line_t print_trace_line(struct trace_iterator *iter)
3627 {
3628         struct trace_array *tr = iter->tr;
3629         unsigned long trace_flags = tr->trace_flags;
3630         enum print_line_t ret;
3631
3632         if (iter->lost_events) {
3633                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3634                                  iter->cpu, iter->lost_events);
3635                 if (trace_seq_has_overflowed(&iter->seq))
3636                         return TRACE_TYPE_PARTIAL_LINE;
3637         }
3638
3639         if (iter->trace && iter->trace->print_line) {
3640                 ret = iter->trace->print_line(iter);
3641                 if (ret != TRACE_TYPE_UNHANDLED)
3642                         return ret;
3643         }
3644
3645         if (iter->ent->type == TRACE_BPUTS &&
3646                         trace_flags & TRACE_ITER_PRINTK &&
3647                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3648                 return trace_print_bputs_msg_only(iter);
3649
3650         if (iter->ent->type == TRACE_BPRINT &&
3651                         trace_flags & TRACE_ITER_PRINTK &&
3652                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3653                 return trace_print_bprintk_msg_only(iter);
3654
3655         if (iter->ent->type == TRACE_PRINT &&
3656                         trace_flags & TRACE_ITER_PRINTK &&
3657                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3658                 return trace_print_printk_msg_only(iter);
3659
3660         if (trace_flags & TRACE_ITER_BIN)
3661                 return print_bin_fmt(iter);
3662
3663         if (trace_flags & TRACE_ITER_HEX)
3664                 return print_hex_fmt(iter);
3665
3666         if (trace_flags & TRACE_ITER_RAW)
3667                 return print_raw_fmt(iter);
3668
3669         return print_trace_fmt(iter);
3670 }
3671
3672 void trace_latency_header(struct seq_file *m)
3673 {
3674         struct trace_iterator *iter = m->private;
3675         struct trace_array *tr = iter->tr;
3676
3677         /* print nothing if the buffers are empty */
3678         if (trace_empty(iter))
3679                 return;
3680
3681         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3682                 print_trace_header(m, iter);
3683
3684         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3685                 print_lat_help_header(m);
3686 }
3687
3688 void trace_default_header(struct seq_file *m)
3689 {
3690         struct trace_iterator *iter = m->private;
3691         struct trace_array *tr = iter->tr;
3692         unsigned long trace_flags = tr->trace_flags;
3693
3694         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3695                 return;
3696
3697         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3698                 /* print nothing if the buffers are empty */
3699                 if (trace_empty(iter))
3700                         return;
3701                 print_trace_header(m, iter);
3702                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3703                         print_lat_help_header(m);
3704         } else {
3705                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3706                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3707                                 print_func_help_header_irq(iter->trace_buffer,
3708                                                            m, trace_flags);
3709                         else
3710                                 print_func_help_header(iter->trace_buffer, m,
3711                                                        trace_flags);
3712                 }
3713         }
3714 }
3715
3716 static void test_ftrace_alive(struct seq_file *m)
3717 {
3718         if (!ftrace_is_dead())
3719                 return;
3720         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3721                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3722 }
3723
3724 #ifdef CONFIG_TRACER_MAX_TRACE
3725 static void show_snapshot_main_help(struct seq_file *m)
3726 {
3727         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3728                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3729                     "#                      Takes a snapshot of the main buffer.\n"
3730                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3731                     "#                      (Doesn't have to be '2' works with any number that\n"
3732                     "#                       is not a '0' or '1')\n");
3733 }
3734
3735 static void show_snapshot_percpu_help(struct seq_file *m)
3736 {
3737         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3738 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3739         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3740                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3741 #else
3742         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3743                     "#                     Must use main snapshot file to allocate.\n");
3744 #endif
3745         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3746                     "#                      (Doesn't have to be '2' works with any number that\n"
3747                     "#                       is not a '0' or '1')\n");
3748 }
3749
3750 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3751 {
3752         if (iter->tr->allocated_snapshot)
3753                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3754         else
3755                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3756
3757         seq_puts(m, "# Snapshot commands:\n");
3758         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3759                 show_snapshot_main_help(m);
3760         else
3761                 show_snapshot_percpu_help(m);
3762 }
3763 #else
3764 /* Should never be called */
3765 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3766 #endif
3767
3768 static int s_show(struct seq_file *m, void *v)
3769 {
3770         struct trace_iterator *iter = v;
3771         int ret;
3772
3773         if (iter->ent == NULL) {
3774                 if (iter->tr) {
3775                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
3776                         seq_puts(m, "#\n");
3777                         test_ftrace_alive(m);
3778                 }
3779                 if (iter->snapshot && trace_empty(iter))
3780                         print_snapshot_help(m, iter);
3781                 else if (iter->trace && iter->trace->print_header)
3782                         iter->trace->print_header(m);
3783                 else
3784                         trace_default_header(m);
3785
3786         } else if (iter->leftover) {
3787                 /*
3788                  * If we filled the seq_file buffer earlier, we
3789                  * want to just show it now.
3790                  */
3791                 ret = trace_print_seq(m, &iter->seq);
3792
3793                 /* ret should this time be zero, but you never know */
3794                 iter->leftover = ret;
3795
3796         } else {
3797                 print_trace_line(iter);
3798                 ret = trace_print_seq(m, &iter->seq);
3799                 /*
3800                  * If we overflow the seq_file buffer, then it will
3801                  * ask us for this data again at start up.
3802                  * Use that instead.
3803                  *  ret is 0 if seq_file write succeeded.
3804                  *        -1 otherwise.
3805                  */
3806                 iter->leftover = ret;
3807         }
3808
3809         return 0;
3810 }
3811
3812 /*
3813  * Should be used after trace_array_get(), trace_types_lock
3814  * ensures that i_cdev was already initialized.
3815  */
3816 static inline int tracing_get_cpu(struct inode *inode)
3817 {
3818         if (inode->i_cdev) /* See trace_create_cpu_file() */
3819                 return (long)inode->i_cdev - 1;
3820         return RING_BUFFER_ALL_CPUS;
3821 }
3822
3823 static const struct seq_operations tracer_seq_ops = {
3824         .start          = s_start,
3825         .next           = s_next,
3826         .stop           = s_stop,
3827         .show           = s_show,
3828 };
3829
3830 static struct trace_iterator *
3831 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3832 {
3833         struct trace_array *tr = inode->i_private;
3834         struct trace_iterator *iter;
3835         int cpu;
3836
3837         if (tracing_disabled)
3838                 return ERR_PTR(-ENODEV);
3839
3840         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3841         if (!iter)
3842                 return ERR_PTR(-ENOMEM);
3843
3844         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3845                                     GFP_KERNEL);
3846         if (!iter->buffer_iter)
3847                 goto release;
3848
3849         /*
3850          * We make a copy of the current tracer to avoid concurrent
3851          * changes on it while we are reading.
3852          */
3853         mutex_lock(&trace_types_lock);
3854         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3855         if (!iter->trace)
3856                 goto fail;
3857
3858         *iter->trace = *tr->current_trace;
3859
3860         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3861                 goto fail;
3862
3863         iter->tr = tr;
3864
3865 #ifdef CONFIG_TRACER_MAX_TRACE
3866         /* Currently only the top directory has a snapshot */
3867         if (tr->current_trace->print_max || snapshot)
3868                 iter->trace_buffer = &tr->max_buffer;
3869         else
3870 #endif
3871                 iter->trace_buffer = &tr->trace_buffer;
3872         iter->snapshot = snapshot;
3873         iter->pos = -1;
3874         iter->cpu_file = tracing_get_cpu(inode);
3875         mutex_init(&iter->mutex);
3876
3877         /* Notify the tracer early; before we stop tracing. */
3878         if (iter->trace && iter->trace->open)
3879                 iter->trace->open(iter);
3880
3881         /* Annotate start of buffers if we had overruns */
3882         if (ring_buffer_overruns(iter->trace_buffer->buffer))
3883                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3884
3885         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3886         if (trace_clocks[tr->clock_id].in_ns)
3887                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3888
3889         /* stop the trace while dumping if we are not opening "snapshot" */
3890         if (!iter->snapshot)
3891                 tracing_stop_tr(tr);
3892
3893         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3894                 for_each_tracing_cpu(cpu) {
3895                         iter->buffer_iter[cpu] =
3896                                 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3897                 }
3898                 ring_buffer_read_prepare_sync();
3899                 for_each_tracing_cpu(cpu) {
3900                         ring_buffer_read_start(iter->buffer_iter[cpu]);
3901                         tracing_iter_reset(iter, cpu);
3902                 }
3903         } else {
3904                 cpu = iter->cpu_file;
3905                 iter->buffer_iter[cpu] =
3906                         ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3907                 ring_buffer_read_prepare_sync();
3908                 ring_buffer_read_start(iter->buffer_iter[cpu]);
3909                 tracing_iter_reset(iter, cpu);
3910         }
3911
3912         mutex_unlock(&trace_types_lock);
3913
3914         return iter;
3915
3916  fail:
3917         mutex_unlock(&trace_types_lock);
3918         kfree(iter->trace);
3919         kfree(iter->buffer_iter);
3920 release:
3921         seq_release_private(inode, file);
3922         return ERR_PTR(-ENOMEM);
3923 }
3924
3925 int tracing_open_generic(struct inode *inode, struct file *filp)
3926 {
3927         if (tracing_disabled)
3928                 return -ENODEV;
3929
3930         filp->private_data = inode->i_private;
3931         return 0;
3932 }
3933
3934 bool tracing_is_disabled(void)
3935 {
3936         return (tracing_disabled) ? true: false;
3937 }
3938
3939 /*
3940  * Open and update trace_array ref count.
3941  * Must have the current trace_array passed to it.
3942  */
3943 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3944 {
3945         struct trace_array *tr = inode->i_private;
3946
3947         if (tracing_disabled)
3948                 return -ENODEV;
3949
3950         if (trace_array_get(tr) < 0)
3951                 return -ENODEV;
3952
3953         filp->private_data = inode->i_private;
3954
3955         return 0;
3956 }
3957
3958 static int tracing_release(struct inode *inode, struct file *file)
3959 {
3960         struct trace_array *tr = inode->i_private;
3961         struct seq_file *m = file->private_data;
3962         struct trace_iterator *iter;
3963         int cpu;
3964
3965         if (!(file->f_mode & FMODE_READ)) {
3966                 trace_array_put(tr);
3967                 return 0;
3968         }
3969
3970         /* Writes do not use seq_file */
3971         iter = m->private;
3972         mutex_lock(&trace_types_lock);
3973
3974         for_each_tracing_cpu(cpu) {
3975                 if (iter->buffer_iter[cpu])
3976                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
3977         }
3978
3979         if (iter->trace && iter->trace->close)
3980                 iter->trace->close(iter);
3981
3982         if (!iter->snapshot)
3983                 /* reenable tracing if it was previously enabled */
3984                 tracing_start_tr(tr);
3985
3986         __trace_array_put(tr);
3987
3988         mutex_unlock(&trace_types_lock);
3989
3990         mutex_destroy(&iter->mutex);
3991         free_cpumask_var(iter->started);
3992         kfree(iter->trace);
3993         kfree(iter->buffer_iter);
3994         seq_release_private(inode, file);
3995
3996         return 0;
3997 }
3998
3999 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4000 {
4001         struct trace_array *tr = inode->i_private;
4002
4003         trace_array_put(tr);
4004         return 0;
4005 }
4006
4007 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4008 {
4009         struct trace_array *tr = inode->i_private;
4010
4011         trace_array_put(tr);
4012
4013         return single_release(inode, file);
4014 }
4015
4016 static int tracing_open(struct inode *inode, struct file *file)
4017 {
4018         struct trace_array *tr = inode->i_private;
4019         struct trace_iterator *iter;
4020         int ret = 0;
4021
4022         if (trace_array_get(tr) < 0)
4023                 return -ENODEV;
4024
4025         /* If this file was open for write, then erase contents */
4026         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4027                 int cpu = tracing_get_cpu(inode);
4028                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4029
4030 #ifdef CONFIG_TRACER_MAX_TRACE
4031                 if (tr->current_trace->print_max)
4032                         trace_buf = &tr->max_buffer;
4033 #endif
4034
4035                 if (cpu == RING_BUFFER_ALL_CPUS)
4036                         tracing_reset_online_cpus(trace_buf);
4037                 else
4038                         tracing_reset(trace_buf, cpu);
4039         }
4040
4041         if (file->f_mode & FMODE_READ) {
4042                 iter = __tracing_open(inode, file, false);
4043                 if (IS_ERR(iter))
4044                         ret = PTR_ERR(iter);
4045                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4046                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4047         }
4048
4049         if (ret < 0)
4050                 trace_array_put(tr);
4051
4052         return ret;
4053 }
4054
4055 /*
4056  * Some tracers are not suitable for instance buffers.
4057  * A tracer is always available for the global array (toplevel)
4058  * or if it explicitly states that it is.
4059  */
4060 static bool
4061 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4062 {
4063         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4064 }
4065
4066 /* Find the next tracer that this trace array may use */
4067 static struct tracer *
4068 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4069 {
4070         while (t && !trace_ok_for_array(t, tr))
4071                 t = t->next;
4072
4073         return t;
4074 }
4075
4076 static void *
4077 t_next(struct seq_file *m, void *v, loff_t *pos)
4078 {
4079         struct trace_array *tr = m->private;
4080         struct tracer *t = v;
4081
4082         (*pos)++;
4083
4084         if (t)
4085                 t = get_tracer_for_array(tr, t->next);
4086
4087         return t;
4088 }
4089
4090 static void *t_start(struct seq_file *m, loff_t *pos)
4091 {
4092         struct trace_array *tr = m->private;
4093         struct tracer *t;
4094         loff_t l = 0;
4095
4096         mutex_lock(&trace_types_lock);
4097
4098         t = get_tracer_for_array(tr, trace_types);
4099         for (; t && l < *pos; t = t_next(m, t, &l))
4100                         ;
4101
4102         return t;
4103 }
4104
4105 static void t_stop(struct seq_file *m, void *p)
4106 {
4107         mutex_unlock(&trace_types_lock);
4108 }
4109
4110 static int t_show(struct seq_file *m, void *v)
4111 {
4112         struct tracer *t = v;
4113
4114         if (!t)
4115                 return 0;
4116
4117         seq_puts(m, t->name);
4118         if (t->next)
4119                 seq_putc(m, ' ');
4120         else
4121                 seq_putc(m, '\n');
4122
4123         return 0;
4124 }
4125
4126 static const struct seq_operations show_traces_seq_ops = {
4127         .start          = t_start,
4128         .next           = t_next,
4129         .stop           = t_stop,
4130         .show           = t_show,
4131 };
4132
4133 static int show_traces_open(struct inode *inode, struct file *file)
4134 {
4135         struct trace_array *tr = inode->i_private;
4136         struct seq_file *m;
4137         int ret;
4138
4139         if (tracing_disabled)
4140                 return -ENODEV;
4141
4142         ret = seq_open(file, &show_traces_seq_ops);
4143         if (ret)
4144                 return ret;
4145
4146         m = file->private_data;
4147         m->private = tr;
4148
4149         return 0;
4150 }
4151
4152 static ssize_t
4153 tracing_write_stub(struct file *filp, const char __user *ubuf,
4154                    size_t count, loff_t *ppos)
4155 {
4156         return count;
4157 }
4158
4159 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4160 {
4161         int ret;
4162
4163         if (file->f_mode & FMODE_READ)
4164                 ret = seq_lseek(file, offset, whence);
4165         else
4166                 file->f_pos = ret = 0;
4167
4168         return ret;
4169 }
4170
4171 static const struct file_operations tracing_fops = {
4172         .open           = tracing_open,
4173         .read           = seq_read,
4174         .write          = tracing_write_stub,
4175         .llseek         = tracing_lseek,
4176         .release        = tracing_release,
4177 };
4178
4179 static const struct file_operations show_traces_fops = {
4180         .open           = show_traces_open,
4181         .read           = seq_read,
4182         .release        = seq_release,
4183         .llseek         = seq_lseek,
4184 };
4185
4186 static ssize_t
4187 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4188                      size_t count, loff_t *ppos)
4189 {
4190         struct trace_array *tr = file_inode(filp)->i_private;
4191         char *mask_str;
4192         int len;
4193
4194         len = snprintf(NULL, 0, "%*pb\n",
4195                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4196         mask_str = kmalloc(len, GFP_KERNEL);
4197         if (!mask_str)
4198                 return -ENOMEM;
4199
4200         len = snprintf(mask_str, len, "%*pb\n",
4201                        cpumask_pr_args(tr->tracing_cpumask));
4202         if (len >= count) {
4203                 count = -EINVAL;
4204                 goto out_err;
4205         }
4206         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4207
4208 out_err:
4209         kfree(mask_str);
4210
4211         return count;
4212 }
4213
4214 static ssize_t
4215 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4216                       size_t count, loff_t *ppos)
4217 {
4218         struct trace_array *tr = file_inode(filp)->i_private;
4219         cpumask_var_t tracing_cpumask_new;
4220         int err, cpu;
4221
4222         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4223                 return -ENOMEM;
4224
4225         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4226         if (err)
4227                 goto err_unlock;
4228
4229         local_irq_disable();
4230         arch_spin_lock(&tr->max_lock);
4231         for_each_tracing_cpu(cpu) {
4232                 /*
4233                  * Increase/decrease the disabled counter if we are
4234                  * about to flip a bit in the cpumask:
4235                  */
4236                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4237                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4238                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4239                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4240                 }
4241                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4242                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4243                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4244                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4245                 }
4246         }
4247         arch_spin_unlock(&tr->max_lock);
4248         local_irq_enable();
4249
4250         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4251         free_cpumask_var(tracing_cpumask_new);
4252
4253         return count;
4254
4255 err_unlock:
4256         free_cpumask_var(tracing_cpumask_new);
4257
4258         return err;
4259 }
4260
4261 static const struct file_operations tracing_cpumask_fops = {
4262         .open           = tracing_open_generic_tr,
4263         .read           = tracing_cpumask_read,
4264         .write          = tracing_cpumask_write,
4265         .release        = tracing_release_generic_tr,
4266         .llseek         = generic_file_llseek,
4267 };
4268
4269 static int tracing_trace_options_show(struct seq_file *m, void *v)
4270 {
4271         struct tracer_opt *trace_opts;
4272         struct trace_array *tr = m->private;
4273         u32 tracer_flags;
4274         int i;
4275
4276         mutex_lock(&trace_types_lock);
4277         tracer_flags = tr->current_trace->flags->val;
4278         trace_opts = tr->current_trace->flags->opts;
4279
4280         for (i = 0; trace_options[i]; i++) {
4281                 if (tr->trace_flags & (1 << i))
4282                         seq_printf(m, "%s\n", trace_options[i]);
4283                 else
4284                         seq_printf(m, "no%s\n", trace_options[i]);
4285         }
4286
4287         for (i = 0; trace_opts[i].name; i++) {
4288                 if (tracer_flags & trace_opts[i].bit)
4289                         seq_printf(m, "%s\n", trace_opts[i].name);
4290                 else
4291                         seq_printf(m, "no%s\n", trace_opts[i].name);
4292         }
4293         mutex_unlock(&trace_types_lock);
4294
4295         return 0;
4296 }
4297
4298 static int __set_tracer_option(struct trace_array *tr,
4299                                struct tracer_flags *tracer_flags,
4300                                struct tracer_opt *opts, int neg)
4301 {
4302         struct tracer *trace = tracer_flags->trace;
4303         int ret;
4304
4305         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4306         if (ret)
4307                 return ret;
4308
4309         if (neg)
4310                 tracer_flags->val &= ~opts->bit;
4311         else
4312                 tracer_flags->val |= opts->bit;
4313         return 0;
4314 }
4315
4316 /* Try to assign a tracer specific option */
4317 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4318 {
4319         struct tracer *trace = tr->current_trace;
4320         struct tracer_flags *tracer_flags = trace->flags;
4321         struct tracer_opt *opts = NULL;
4322         int i;
4323
4324         for (i = 0; tracer_flags->opts[i].name; i++) {
4325                 opts = &tracer_flags->opts[i];
4326
4327                 if (strcmp(cmp, opts->name) == 0)
4328                         return __set_tracer_option(tr, trace->flags, opts, neg);
4329         }
4330
4331         return -EINVAL;
4332 }
4333
4334 /* Some tracers require overwrite to stay enabled */
4335 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4336 {
4337         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4338                 return -1;
4339
4340         return 0;
4341 }
4342
4343 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4344 {
4345         /* do nothing if flag is already set */
4346         if (!!(tr->trace_flags & mask) == !!enabled)
4347                 return 0;
4348
4349         /* Give the tracer a chance to approve the change */
4350         if (tr->current_trace->flag_changed)
4351                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4352                         return -EINVAL;
4353
4354         if (enabled)
4355                 tr->trace_flags |= mask;
4356         else
4357                 tr->trace_flags &= ~mask;
4358
4359         if (mask == TRACE_ITER_RECORD_CMD)
4360                 trace_event_enable_cmd_record(enabled);
4361
4362         if (mask == TRACE_ITER_RECORD_TGID) {
4363                 if (!tgid_map)
4364                         tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
4365                                            GFP_KERNEL);
4366                 if (!tgid_map) {
4367                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4368                         return -ENOMEM;
4369                 }
4370
4371                 trace_event_enable_tgid_record(enabled);
4372         }
4373
4374         if (mask == TRACE_ITER_EVENT_FORK)
4375                 trace_event_follow_fork(tr, enabled);
4376
4377         if (mask == TRACE_ITER_FUNC_FORK)
4378                 ftrace_pid_follow_fork(tr, enabled);
4379
4380         if (mask == TRACE_ITER_OVERWRITE) {
4381                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4382 #ifdef CONFIG_TRACER_MAX_TRACE
4383                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4384 #endif
4385         }
4386
4387         if (mask == TRACE_ITER_PRINTK) {
4388                 trace_printk_start_stop_comm(enabled);
4389                 trace_printk_control(enabled);
4390         }
4391
4392         return 0;
4393 }
4394
4395 static int trace_set_options(struct trace_array *tr, char *option)
4396 {
4397         char *cmp;
4398         int neg = 0;
4399         int ret;
4400         size_t orig_len = strlen(option);
4401
4402         cmp = strstrip(option);
4403
4404         if (strncmp(cmp, "no", 2) == 0) {
4405                 neg = 1;
4406                 cmp += 2;
4407         }
4408
4409         mutex_lock(&trace_types_lock);
4410
4411         ret = match_string(trace_options, -1, cmp);
4412         /* If no option could be set, test the specific tracer options */
4413         if (ret < 0)
4414                 ret = set_tracer_option(tr, cmp, neg);
4415         else
4416                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4417
4418         mutex_unlock(&trace_types_lock);
4419
4420         /*
4421          * If the first trailing whitespace is replaced with '\0' by strstrip,
4422          * turn it back into a space.
4423          */
4424         if (orig_len > strlen(option))
4425                 option[strlen(option)] = ' ';
4426
4427         return ret;
4428 }
4429
4430 static void __init apply_trace_boot_options(void)
4431 {
4432         char *buf = trace_boot_options_buf;
4433         char *option;
4434
4435         while (true) {
4436                 option = strsep(&buf, ",");
4437
4438                 if (!option)
4439                         break;
4440
4441                 if (*option)
4442                         trace_set_options(&global_trace, option);
4443
4444                 /* Put back the comma to allow this to be called again */
4445                 if (buf)
4446                         *(buf - 1) = ',';
4447         }
4448 }
4449
4450 static ssize_t
4451 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4452                         size_t cnt, loff_t *ppos)
4453 {
4454         struct seq_file *m = filp->private_data;
4455         struct trace_array *tr = m->private;
4456         char buf[64];
4457         int ret;
4458
4459         if (cnt >= sizeof(buf))
4460                 return -EINVAL;
4461
4462         if (copy_from_user(buf, ubuf, cnt))
4463                 return -EFAULT;
4464
4465         buf[cnt] = 0;
4466
4467         ret = trace_set_options(tr, buf);
4468         if (ret < 0)
4469                 return ret;
4470
4471         *ppos += cnt;
4472
4473         return cnt;
4474 }
4475
4476 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4477 {
4478         struct trace_array *tr = inode->i_private;
4479         int ret;
4480
4481         if (tracing_disabled)
4482                 return -ENODEV;
4483
4484         if (trace_array_get(tr) < 0)
4485                 return -ENODEV;
4486
4487         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4488         if (ret < 0)
4489                 trace_array_put(tr);
4490
4491         return ret;
4492 }
4493
4494 static const struct file_operations tracing_iter_fops = {
4495         .open           = tracing_trace_options_open,
4496         .read           = seq_read,
4497         .llseek         = seq_lseek,
4498         .release        = tracing_single_release_tr,
4499         .write          = tracing_trace_options_write,
4500 };
4501
4502 static const char readme_msg[] =
4503         "tracing mini-HOWTO:\n\n"
4504         "# echo 0 > tracing_on : quick way to disable tracing\n"
4505         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4506         " Important files:\n"
4507         "  trace\t\t\t- The static contents of the buffer\n"
4508         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4509         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4510         "  current_tracer\t- function and latency tracers\n"
4511         "  available_tracers\t- list of configured tracers for current_tracer\n"
4512         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4513         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4514         "  trace_clock\t\t-change the clock used to order events\n"
4515         "       local:   Per cpu clock but may not be synced across CPUs\n"
4516         "      global:   Synced across CPUs but slows tracing down.\n"
4517         "     counter:   Not a clock, but just an increment\n"
4518         "      uptime:   Jiffy counter from time of boot\n"
4519         "        perf:   Same clock that perf events use\n"
4520 #ifdef CONFIG_X86_64
4521         "     x86-tsc:   TSC cycle counter\n"
4522 #endif
4523         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4524         "       delta:   Delta difference against a buffer-wide timestamp\n"
4525         "    absolute:   Absolute (standalone) timestamp\n"
4526         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4527         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4528         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4529         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4530         "\t\t\t  Remove sub-buffer with rmdir\n"
4531         "  trace_options\t\t- Set format or modify how tracing happens\n"
4532         "\t\t\t  Disable an option by adding a suffix 'no' to the\n"
4533         "\t\t\t  option name\n"
4534         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4535 #ifdef CONFIG_DYNAMIC_FTRACE
4536         "\n  available_filter_functions - list of functions that can be filtered on\n"
4537         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4538         "\t\t\t  functions\n"
4539         "\t     accepts: func_full_name or glob-matching-pattern\n"
4540         "\t     modules: Can select a group via module\n"
4541         "\t      Format: :mod:<module-name>\n"
4542         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4543         "\t    triggers: a command to perform when function is hit\n"
4544         "\t      Format: <function>:<trigger>[:count]\n"
4545         "\t     trigger: traceon, traceoff\n"
4546         "\t\t      enable_event:<system>:<event>\n"
4547         "\t\t      disable_event:<system>:<event>\n"
4548 #ifdef CONFIG_STACKTRACE
4549         "\t\t      stacktrace\n"
4550 #endif
4551 #ifdef CONFIG_TRACER_SNAPSHOT
4552         "\t\t      snapshot\n"
4553 #endif
4554         "\t\t      dump\n"
4555         "\t\t      cpudump\n"
4556         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4557         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4558         "\t     The first one will disable tracing every time do_fault is hit\n"
4559         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4560         "\t       The first time do trap is hit and it disables tracing, the\n"
4561         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4562         "\t       the counter will not decrement. It only decrements when the\n"
4563         "\t       trigger did work\n"
4564         "\t     To remove trigger without count:\n"
4565         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4566         "\t     To remove trigger with a count:\n"
4567         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4568         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4569         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4570         "\t    modules: Can select a group via module command :mod:\n"
4571         "\t    Does not accept triggers\n"
4572 #endif /* CONFIG_DYNAMIC_FTRACE */
4573 #ifdef CONFIG_FUNCTION_TRACER
4574         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4575         "\t\t    (function)\n"
4576 #endif
4577 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4578         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4579         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4580         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4581 #endif
4582 #ifdef CONFIG_TRACER_SNAPSHOT
4583         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4584         "\t\t\t  snapshot buffer. Read the contents for more\n"
4585         "\t\t\t  information\n"
4586 #endif
4587 #ifdef CONFIG_STACK_TRACER
4588         "  stack_trace\t\t- Shows the max stack trace when active\n"
4589         "  stack_max_size\t- Shows current max stack size that was traced\n"
4590         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4591         "\t\t\t  new trace)\n"
4592 #ifdef CONFIG_DYNAMIC_FTRACE
4593         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4594         "\t\t\t  traces\n"
4595 #endif
4596 #endif /* CONFIG_STACK_TRACER */
4597 #ifdef CONFIG_KPROBE_EVENTS
4598         "  kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4599         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4600 #endif
4601 #ifdef CONFIG_UPROBE_EVENTS
4602         "  uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4603         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4604 #endif
4605 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4606         "\t  accepts: event-definitions (one definition per line)\n"
4607         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4608         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4609         "\t           -:[<group>/]<event>\n"
4610 #ifdef CONFIG_KPROBE_EVENTS
4611         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4612   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4613 #endif
4614 #ifdef CONFIG_UPROBE_EVENTS
4615         "\t    place: <path>:<offset>\n"
4616 #endif
4617         "\t     args: <name>=fetcharg[:type]\n"
4618         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4619         "\t           $stack<index>, $stack, $retval, $comm\n"
4620         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4621         "\t           b<bit-width>@<bit-offset>/<container-size>\n"
4622 #endif
4623         "  events/\t\t- Directory containing all trace event subsystems:\n"
4624         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4625         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4626         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4627         "\t\t\t  events\n"
4628         "      filter\t\t- If set, only events passing filter are traced\n"
4629         "  events/<system>/<event>/\t- Directory containing control files for\n"
4630         "\t\t\t  <event>:\n"
4631         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4632         "      filter\t\t- If set, only events passing filter are traced\n"
4633         "      trigger\t\t- If set, a command to perform when event is hit\n"
4634         "\t    Format: <trigger>[:count][if <filter>]\n"
4635         "\t   trigger: traceon, traceoff\n"
4636         "\t            enable_event:<system>:<event>\n"
4637         "\t            disable_event:<system>:<event>\n"
4638 #ifdef CONFIG_HIST_TRIGGERS
4639         "\t            enable_hist:<system>:<event>\n"
4640         "\t            disable_hist:<system>:<event>\n"
4641 #endif
4642 #ifdef CONFIG_STACKTRACE
4643         "\t\t    stacktrace\n"
4644 #endif
4645 #ifdef CONFIG_TRACER_SNAPSHOT
4646         "\t\t    snapshot\n"
4647 #endif
4648 #ifdef CONFIG_HIST_TRIGGERS
4649         "\t\t    hist (see below)\n"
4650 #endif
4651         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4652         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4653         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4654         "\t                  events/block/block_unplug/trigger\n"
4655         "\t   The first disables tracing every time block_unplug is hit.\n"
4656         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4657         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4658         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4659         "\t   Like function triggers, the counter is only decremented if it\n"
4660         "\t    enabled or disabled tracing.\n"
4661         "\t   To remove a trigger without a count:\n"
4662         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4663         "\t   To remove a trigger with a count:\n"
4664         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4665         "\t   Filters can be ignored when removing a trigger.\n"
4666 #ifdef CONFIG_HIST_TRIGGERS
4667         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4668         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4669         "\t            [:values=<field1[,field2,...]>]\n"
4670         "\t            [:sort=<field1[,field2,...]>]\n"
4671         "\t            [:size=#entries]\n"
4672         "\t            [:pause][:continue][:clear]\n"
4673         "\t            [:name=histname1]\n"
4674         "\t            [if <filter>]\n\n"
4675         "\t    When a matching event is hit, an entry is added to a hash\n"
4676         "\t    table using the key(s) and value(s) named, and the value of a\n"
4677         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4678         "\t    correspond to fields in the event's format description.  Keys\n"
4679         "\t    can be any field, or the special string 'stacktrace'.\n"
4680         "\t    Compound keys consisting of up to two fields can be specified\n"
4681         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4682         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4683         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4684         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4685         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4686         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4687         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4688         "\t    its histogram data will be shared with other triggers of the\n"
4689         "\t    same name, and trigger hits will update this common data.\n\n"
4690         "\t    Reading the 'hist' file for the event will dump the hash\n"
4691         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4692         "\t    triggers attached to an event, there will be a table for each\n"
4693         "\t    trigger in the output.  The table displayed for a named\n"
4694         "\t    trigger will be the same as any other instance having the\n"
4695         "\t    same name.  The default format used to display a given field\n"
4696         "\t    can be modified by appending any of the following modifiers\n"
4697         "\t    to the field name, as applicable:\n\n"
4698         "\t            .hex        display a number as a hex value\n"
4699         "\t            .sym        display an address as a symbol\n"
4700         "\t            .sym-offset display an address as a symbol and offset\n"
4701         "\t            .execname   display a common_pid as a program name\n"
4702         "\t            .syscall    display a syscall id as a syscall name\n"
4703         "\t            .log2       display log2 value rather than raw number\n"
4704         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4705         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4706         "\t    trigger or to start a hist trigger but not log any events\n"
4707         "\t    until told to do so.  'continue' can be used to start or\n"
4708         "\t    restart a paused hist trigger.\n\n"
4709         "\t    The 'clear' parameter will clear the contents of a running\n"
4710         "\t    hist trigger and leave its current paused/active state\n"
4711         "\t    unchanged.\n\n"
4712         "\t    The enable_hist and disable_hist triggers can be used to\n"
4713         "\t    have one event conditionally start and stop another event's\n"
4714         "\t    already-attached hist trigger.  The syntax is analagous to\n"
4715         "\t    the enable_event and disable_event triggers.\n"
4716 #endif
4717 ;
4718
4719 static ssize_t
4720 tracing_readme_read(struct file *filp, char __user *ubuf,
4721                        size_t cnt, loff_t *ppos)
4722 {
4723         return simple_read_from_buffer(ubuf, cnt, ppos,
4724                                         readme_msg, strlen(readme_msg));
4725 }
4726
4727 static const struct file_operations tracing_readme_fops = {
4728         .open           = tracing_open_generic,
4729         .read           = tracing_readme_read,
4730         .llseek         = generic_file_llseek,
4731 };
4732
4733 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
4734 {
4735         int *ptr = v;
4736
4737         if (*pos || m->count)
4738                 ptr++;
4739
4740         (*pos)++;
4741
4742         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
4743                 if (trace_find_tgid(*ptr))
4744                         return ptr;
4745         }
4746
4747         return NULL;
4748 }
4749
4750 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
4751 {
4752         void *v;
4753         loff_t l = 0;
4754
4755         if (!tgid_map)
4756                 return NULL;
4757
4758         v = &tgid_map[0];
4759         while (l <= *pos) {
4760                 v = saved_tgids_next(m, v, &l);
4761                 if (!v)
4762                         return NULL;
4763         }
4764
4765         return v;
4766 }
4767
4768 static void saved_tgids_stop(struct seq_file *m, void *v)
4769 {
4770 }
4771
4772 static int saved_tgids_show(struct seq_file *m, void *v)
4773 {
4774         int pid = (int *)v - tgid_map;
4775
4776         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
4777         return 0;
4778 }
4779
4780 static const struct seq_operations tracing_saved_tgids_seq_ops = {
4781         .start          = saved_tgids_start,
4782         .stop           = saved_tgids_stop,
4783         .next           = saved_tgids_next,
4784         .show           = saved_tgids_show,
4785 };
4786
4787 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
4788 {
4789         if (tracing_disabled)
4790                 return -ENODEV;
4791
4792         return seq_open(filp, &tracing_saved_tgids_seq_ops);
4793 }
4794
4795
4796 static const struct file_operations tracing_saved_tgids_fops = {
4797         .open           = tracing_saved_tgids_open,
4798         .read           = seq_read,
4799         .llseek         = seq_lseek,
4800         .release        = seq_release,
4801 };
4802
4803 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4804 {
4805         unsigned int *ptr = v;
4806
4807         if (*pos || m->count)
4808                 ptr++;
4809
4810         (*pos)++;
4811
4812         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4813              ptr++) {
4814                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4815                         continue;
4816
4817                 return ptr;
4818         }
4819
4820         return NULL;
4821 }
4822
4823 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4824 {
4825         void *v;
4826         loff_t l = 0;
4827
4828         preempt_disable();
4829         arch_spin_lock(&trace_cmdline_lock);
4830
4831         v = &savedcmd->map_cmdline_to_pid[0];
4832         while (l <= *pos) {
4833                 v = saved_cmdlines_next(m, v, &l);
4834                 if (!v)
4835                         return NULL;
4836         }
4837
4838         return v;
4839 }
4840
4841 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4842 {
4843         arch_spin_unlock(&trace_cmdline_lock);
4844         preempt_enable();
4845 }
4846
4847 static int saved_cmdlines_show(struct seq_file *m, void *v)
4848 {
4849         char buf[TASK_COMM_LEN];
4850         unsigned int *pid = v;
4851
4852         __trace_find_cmdline(*pid, buf);
4853         seq_printf(m, "%d %s\n", *pid, buf);
4854         return 0;
4855 }
4856
4857 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4858         .start          = saved_cmdlines_start,
4859         .next           = saved_cmdlines_next,
4860         .stop           = saved_cmdlines_stop,
4861         .show           = saved_cmdlines_show,
4862 };
4863
4864 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4865 {
4866         if (tracing_disabled)
4867                 return -ENODEV;
4868
4869         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4870 }
4871
4872 static const struct file_operations tracing_saved_cmdlines_fops = {
4873         .open           = tracing_saved_cmdlines_open,
4874         .read           = seq_read,
4875         .llseek         = seq_lseek,
4876         .release        = seq_release,
4877 };
4878
4879 static ssize_t
4880 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4881                                  size_t cnt, loff_t *ppos)
4882 {
4883         char buf[64];
4884         int r;
4885
4886         arch_spin_lock(&trace_cmdline_lock);
4887         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4888         arch_spin_unlock(&trace_cmdline_lock);
4889
4890         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4891 }
4892
4893 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4894 {
4895         kfree(s->saved_cmdlines);
4896         kfree(s->map_cmdline_to_pid);
4897         kfree(s);
4898 }
4899
4900 static int tracing_resize_saved_cmdlines(unsigned int val)
4901 {
4902         struct saved_cmdlines_buffer *s, *savedcmd_temp;
4903
4904         s = kmalloc(sizeof(*s), GFP_KERNEL);
4905         if (!s)
4906                 return -ENOMEM;
4907
4908         if (allocate_cmdlines_buffer(val, s) < 0) {
4909                 kfree(s);
4910                 return -ENOMEM;
4911         }
4912
4913         arch_spin_lock(&trace_cmdline_lock);
4914         savedcmd_temp = savedcmd;
4915         savedcmd = s;
4916         arch_spin_unlock(&trace_cmdline_lock);
4917         free_saved_cmdlines_buffer(savedcmd_temp);
4918
4919         return 0;
4920 }
4921
4922 static ssize_t
4923 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4924                                   size_t cnt, loff_t *ppos)
4925 {
4926         unsigned long val;
4927         int ret;
4928
4929         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4930         if (ret)
4931                 return ret;
4932
4933         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4934         if (!val || val > PID_MAX_DEFAULT)
4935                 return -EINVAL;
4936
4937         ret = tracing_resize_saved_cmdlines((unsigned int)val);
4938         if (ret < 0)
4939                 return ret;
4940
4941         *ppos += cnt;
4942
4943         return cnt;
4944 }
4945
4946 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4947         .open           = tracing_open_generic,
4948         .read           = tracing_saved_cmdlines_size_read,
4949         .write          = tracing_saved_cmdlines_size_write,
4950 };
4951
4952 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4953 static union trace_eval_map_item *
4954 update_eval_map(union trace_eval_map_item *ptr)
4955 {
4956         if (!ptr->map.eval_string) {
4957                 if (ptr->tail.next) {
4958                         ptr = ptr->tail.next;
4959                         /* Set ptr to the next real item (skip head) */
4960                         ptr++;
4961                 } else
4962                         return NULL;
4963         }
4964         return ptr;
4965 }
4966
4967 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4968 {
4969         union trace_eval_map_item *ptr = v;
4970
4971         /*
4972          * Paranoid! If ptr points to end, we don't want to increment past it.
4973          * This really should never happen.
4974          */
4975         ptr = update_eval_map(ptr);
4976         if (WARN_ON_ONCE(!ptr))
4977                 return NULL;
4978
4979         ptr++;
4980
4981         (*pos)++;
4982
4983         ptr = update_eval_map(ptr);
4984
4985         return ptr;
4986 }
4987
4988 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4989 {
4990         union trace_eval_map_item *v;
4991         loff_t l = 0;
4992
4993         mutex_lock(&trace_eval_mutex);
4994
4995         v = trace_eval_maps;
4996         if (v)
4997                 v++;
4998
4999         while (v && l < *pos) {
5000                 v = eval_map_next(m, v, &l);
5001         }
5002
5003         return v;
5004 }
5005
5006 static void eval_map_stop(struct seq_file *m, void *v)
5007 {
5008         mutex_unlock(&trace_eval_mutex);
5009 }
5010
5011 static int eval_map_show(struct seq_file *m, void *v)
5012 {
5013         union trace_eval_map_item *ptr = v;
5014
5015         seq_printf(m, "%s %ld (%s)\n",
5016                    ptr->map.eval_string, ptr->map.eval_value,
5017                    ptr->map.system);
5018
5019         return 0;
5020 }
5021
5022 static const struct seq_operations tracing_eval_map_seq_ops = {
5023         .start          = eval_map_start,
5024         .next           = eval_map_next,
5025         .stop           = eval_map_stop,
5026         .show           = eval_map_show,
5027 };
5028
5029 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5030 {
5031         if (tracing_disabled)
5032                 return -ENODEV;
5033
5034         return seq_open(filp, &tracing_eval_map_seq_ops);
5035 }
5036
5037 static const struct file_operations tracing_eval_map_fops = {
5038         .open           = tracing_eval_map_open,
5039         .read           = seq_read,
5040         .llseek         = seq_lseek,
5041         .release        = seq_release,
5042 };
5043
5044 static inline union trace_eval_map_item *
5045 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5046 {
5047         /* Return tail of array given the head */
5048         return ptr + ptr->head.length + 1;
5049 }
5050
5051 static void
5052 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5053                            int len)
5054 {
5055         struct trace_eval_map **stop;
5056         struct trace_eval_map **map;
5057         union trace_eval_map_item *map_array;
5058         union trace_eval_map_item *ptr;
5059
5060         stop = start + len;
5061
5062         /*
5063          * The trace_eval_maps contains the map plus a head and tail item,
5064          * where the head holds the module and length of array, and the
5065          * tail holds a pointer to the next list.
5066          */
5067         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5068         if (!map_array) {
5069                 pr_warn("Unable to allocate trace eval mapping\n");
5070                 return;
5071         }
5072
5073         mutex_lock(&trace_eval_mutex);
5074
5075         if (!trace_eval_maps)
5076                 trace_eval_maps = map_array;
5077         else {
5078                 ptr = trace_eval_maps;
5079                 for (;;) {
5080                         ptr = trace_eval_jmp_to_tail(ptr);
5081                         if (!ptr->tail.next)
5082                                 break;
5083                         ptr = ptr->tail.next;
5084
5085                 }
5086                 ptr->tail.next = map_array;
5087         }
5088         map_array->head.mod = mod;
5089         map_array->head.length = len;
5090         map_array++;
5091
5092         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5093                 map_array->map = **map;
5094                 map_array++;
5095         }
5096         memset(map_array, 0, sizeof(*map_array));
5097
5098         mutex_unlock(&trace_eval_mutex);
5099 }
5100
5101 static void trace_create_eval_file(struct dentry *d_tracer)
5102 {
5103         trace_create_file("eval_map", 0444, d_tracer,
5104                           NULL, &tracing_eval_map_fops);
5105 }
5106
5107 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5108 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5109 static inline void trace_insert_eval_map_file(struct module *mod,
5110                               struct trace_eval_map **start, int len) { }
5111 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5112
5113 static void trace_insert_eval_map(struct module *mod,
5114                                   struct trace_eval_map **start, int len)
5115 {
5116         struct trace_eval_map **map;
5117
5118         if (len <= 0)
5119                 return;
5120
5121         map = start;
5122
5123         trace_event_eval_update(map, len);
5124
5125         trace_insert_eval_map_file(mod, start, len);
5126 }
5127
5128 static ssize_t
5129 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5130                        size_t cnt, loff_t *ppos)
5131 {
5132         struct trace_array *tr = filp->private_data;
5133         char buf[MAX_TRACER_SIZE+2];
5134         int r;
5135
5136         mutex_lock(&trace_types_lock);
5137         r = sprintf(buf, "%s\n", tr->current_trace->name);
5138         mutex_unlock(&trace_types_lock);
5139
5140         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5141 }
5142
5143 int tracer_init(struct tracer *t, struct trace_array *tr)
5144 {
5145         tracing_reset_online_cpus(&tr->trace_buffer);
5146         return t->init(tr);
5147 }
5148
5149 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5150 {
5151         int cpu;
5152
5153         for_each_tracing_cpu(cpu)
5154                 per_cpu_ptr(buf->data, cpu)->entries = val;
5155 }
5156
5157 #ifdef CONFIG_TRACER_MAX_TRACE
5158 /* resize @tr's buffer to the size of @size_tr's entries */
5159 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5160                                         struct trace_buffer *size_buf, int cpu_id)
5161 {
5162         int cpu, ret = 0;
5163
5164         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5165                 for_each_tracing_cpu(cpu) {
5166                         ret = ring_buffer_resize(trace_buf->buffer,
5167                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5168                         if (ret < 0)
5169                                 break;
5170                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5171                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5172                 }
5173         } else {
5174                 ret = ring_buffer_resize(trace_buf->buffer,
5175                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5176                 if (ret == 0)
5177                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5178                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5179         }
5180
5181         return ret;
5182 }
5183 #endif /* CONFIG_TRACER_MAX_TRACE */
5184
5185 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5186                                         unsigned long size, int cpu)
5187 {
5188         int ret;
5189
5190         /*
5191          * If kernel or user changes the size of the ring buffer
5192          * we use the size that was given, and we can forget about
5193          * expanding it later.
5194          */
5195         ring_buffer_expanded = true;
5196
5197         /* May be called before buffers are initialized */
5198         if (!tr->trace_buffer.buffer)
5199                 return 0;
5200
5201         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5202         if (ret < 0)
5203                 return ret;
5204
5205 #ifdef CONFIG_TRACER_MAX_TRACE
5206         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5207             !tr->current_trace->use_max_tr)
5208                 goto out;
5209
5210         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5211         if (ret < 0) {
5212                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5213                                                      &tr->trace_buffer, cpu);
5214                 if (r < 0) {
5215                         /*
5216                          * AARGH! We are left with different
5217                          * size max buffer!!!!
5218                          * The max buffer is our "snapshot" buffer.
5219                          * When a tracer needs a snapshot (one of the
5220                          * latency tracers), it swaps the max buffer
5221                          * with the saved snap shot. We succeeded to
5222                          * update the size of the main buffer, but failed to
5223                          * update the size of the max buffer. But when we tried
5224                          * to reset the main buffer to the original size, we
5225                          * failed there too. This is very unlikely to
5226                          * happen, but if it does, warn and kill all
5227                          * tracing.
5228                          */
5229                         WARN_ON(1);
5230                         tracing_disabled = 1;
5231                 }
5232                 return ret;
5233         }
5234
5235         if (cpu == RING_BUFFER_ALL_CPUS)
5236                 set_buffer_entries(&tr->max_buffer, size);
5237         else
5238                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5239
5240  out:
5241 #endif /* CONFIG_TRACER_MAX_TRACE */
5242
5243         if (cpu == RING_BUFFER_ALL_CPUS)
5244                 set_buffer_entries(&tr->trace_buffer, size);
5245         else
5246                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5247
5248         return ret;
5249 }
5250
5251 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5252                                           unsigned long size, int cpu_id)
5253 {
5254         int ret = size;
5255
5256         mutex_lock(&trace_types_lock);
5257
5258         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5259                 /* make sure, this cpu is enabled in the mask */
5260                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5261                         ret = -EINVAL;
5262                         goto out;
5263                 }
5264         }
5265
5266         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5267         if (ret < 0)
5268                 ret = -ENOMEM;
5269
5270 out:
5271         mutex_unlock(&trace_types_lock);
5272
5273         return ret;
5274 }
5275
5276
5277 /**
5278  * tracing_update_buffers - used by tracing facility to expand ring buffers
5279  *
5280  * To save on memory when the tracing is never used on a system with it
5281  * configured in. The ring buffers are set to a minimum size. But once
5282  * a user starts to use the tracing facility, then they need to grow
5283  * to their default size.
5284  *
5285  * This function is to be called when a tracer is about to be used.
5286  */
5287 int tracing_update_buffers(void)
5288 {
5289         int ret = 0;
5290
5291         mutex_lock(&trace_types_lock);
5292         if (!ring_buffer_expanded)
5293                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5294                                                 RING_BUFFER_ALL_CPUS);
5295         mutex_unlock(&trace_types_lock);
5296
5297         return ret;
5298 }
5299
5300 struct trace_option_dentry;
5301
5302 static void
5303 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5304
5305 /*
5306  * Used to clear out the tracer before deletion of an instance.
5307  * Must have trace_types_lock held.
5308  */
5309 static void tracing_set_nop(struct trace_array *tr)
5310 {
5311         if (tr->current_trace == &nop_trace)
5312                 return;
5313         
5314         tr->current_trace->enabled--;
5315
5316         if (tr->current_trace->reset)
5317                 tr->current_trace->reset(tr);
5318
5319         tr->current_trace = &nop_trace;
5320 }
5321
5322 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5323 {
5324         /* Only enable if the directory has been created already. */
5325         if (!tr->dir)
5326                 return;
5327
5328         create_trace_option_files(tr, t);
5329 }
5330
5331 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5332 {
5333         struct tracer *t;
5334 #ifdef CONFIG_TRACER_MAX_TRACE
5335         bool had_max_tr;
5336 #endif
5337         int ret = 0;
5338
5339         mutex_lock(&trace_types_lock);
5340
5341         if (!ring_buffer_expanded) {
5342                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5343                                                 RING_BUFFER_ALL_CPUS);
5344                 if (ret < 0)
5345                         goto out;
5346                 ret = 0;
5347         }
5348
5349         for (t = trace_types; t; t = t->next) {
5350                 if (strcmp(t->name, buf) == 0)
5351                         break;
5352         }
5353         if (!t) {
5354                 ret = -EINVAL;
5355                 goto out;
5356         }
5357         if (t == tr->current_trace)
5358                 goto out;
5359
5360         /* Some tracers won't work on kernel command line */
5361         if (system_state < SYSTEM_RUNNING && t->noboot) {
5362                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5363                         t->name);
5364                 goto out;
5365         }
5366
5367         /* Some tracers are only allowed for the top level buffer */
5368         if (!trace_ok_for_array(t, tr)) {
5369                 ret = -EINVAL;
5370                 goto out;
5371         }
5372
5373         /* If trace pipe files are being read, we can't change the tracer */
5374         if (tr->current_trace->ref) {
5375                 ret = -EBUSY;
5376                 goto out;
5377         }
5378
5379         trace_branch_disable();
5380
5381         tr->current_trace->enabled--;
5382
5383         if (tr->current_trace->reset)
5384                 tr->current_trace->reset(tr);
5385
5386         /* Current trace needs to be nop_trace before synchronize_sched */
5387         tr->current_trace = &nop_trace;
5388
5389 #ifdef CONFIG_TRACER_MAX_TRACE
5390         had_max_tr = tr->allocated_snapshot;
5391
5392         if (had_max_tr && !t->use_max_tr) {
5393                 /*
5394                  * We need to make sure that the update_max_tr sees that
5395                  * current_trace changed to nop_trace to keep it from
5396                  * swapping the buffers after we resize it.
5397                  * The update_max_tr is called from interrupts disabled
5398                  * so a synchronized_sched() is sufficient.
5399                  */
5400                 synchronize_sched();
5401                 free_snapshot(tr);
5402         }
5403 #endif
5404
5405 #ifdef CONFIG_TRACER_MAX_TRACE
5406         if (t->use_max_tr && !had_max_tr) {
5407                 ret = tracing_alloc_snapshot_instance(tr);
5408                 if (ret < 0)
5409                         goto out;
5410         }
5411 #endif
5412
5413         if (t->init) {
5414                 ret = tracer_init(t, tr);
5415                 if (ret)
5416                         goto out;
5417         }
5418
5419         tr->current_trace = t;
5420         tr->current_trace->enabled++;
5421         trace_branch_enable(tr);
5422  out:
5423         mutex_unlock(&trace_types_lock);
5424
5425         return ret;
5426 }
5427
5428 static ssize_t
5429 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5430                         size_t cnt, loff_t *ppos)
5431 {
5432         struct trace_array *tr = filp->private_data;
5433         char buf[MAX_TRACER_SIZE+1];
5434         int i;
5435         size_t ret;
5436         int err;
5437
5438         ret = cnt;
5439
5440         if (cnt > MAX_TRACER_SIZE)
5441                 cnt = MAX_TRACER_SIZE;
5442
5443         if (copy_from_user(buf, ubuf, cnt))
5444                 return -EFAULT;
5445
5446         buf[cnt] = 0;
5447
5448         /* strip ending whitespace. */
5449         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5450                 buf[i] = 0;
5451
5452         err = tracing_set_tracer(tr, buf);
5453         if (err)
5454                 return err;
5455
5456         *ppos += ret;
5457
5458         return ret;
5459 }
5460
5461 static ssize_t
5462 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5463                    size_t cnt, loff_t *ppos)
5464 {
5465         char buf[64];
5466         int r;
5467
5468         r = snprintf(buf, sizeof(buf), "%ld\n",
5469                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5470         if (r > sizeof(buf))
5471                 r = sizeof(buf);
5472         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5473 }
5474
5475 static ssize_t
5476 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5477                     size_t cnt, loff_t *ppos)
5478 {
5479         unsigned long val;
5480         int ret;
5481
5482         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5483         if (ret)
5484                 return ret;
5485
5486         *ptr = val * 1000;
5487
5488         return cnt;
5489 }
5490
5491 static ssize_t
5492 tracing_thresh_read(struct file *filp, char __user *ubuf,
5493                     size_t cnt, loff_t *ppos)
5494 {
5495         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5496 }
5497
5498 static ssize_t
5499 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5500                      size_t cnt, loff_t *ppos)
5501 {
5502         struct trace_array *tr = filp->private_data;
5503         int ret;
5504
5505         mutex_lock(&trace_types_lock);
5506         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5507         if (ret < 0)
5508                 goto out;
5509
5510         if (tr->current_trace->update_thresh) {
5511                 ret = tr->current_trace->update_thresh(tr);
5512                 if (ret < 0)
5513                         goto out;
5514         }
5515
5516         ret = cnt;
5517 out:
5518         mutex_unlock(&trace_types_lock);
5519
5520         return ret;
5521 }
5522
5523 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5524
5525 static ssize_t
5526 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5527                      size_t cnt, loff_t *ppos)
5528 {
5529         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5530 }
5531
5532 static ssize_t
5533 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5534                       size_t cnt, loff_t *ppos)
5535 {
5536         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5537 }
5538
5539 #endif
5540
5541 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5542 {
5543         struct trace_array *tr = inode->i_private;
5544         struct trace_iterator *iter;
5545         int ret = 0;
5546
5547         if (tracing_disabled)
5548                 return -ENODEV;
5549
5550         if (trace_array_get(tr) < 0)
5551                 return -ENODEV;
5552
5553         mutex_lock(&trace_types_lock);
5554
5555         /* create a buffer to store the information to pass to userspace */
5556         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5557         if (!iter) {
5558                 ret = -ENOMEM;
5559                 __trace_array_put(tr);
5560                 goto out;
5561         }
5562
5563         trace_seq_init(&iter->seq);
5564         iter->trace = tr->current_trace;
5565
5566         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5567                 ret = -ENOMEM;
5568                 goto fail;
5569         }
5570
5571         /* trace pipe does not show start of buffer */
5572         cpumask_setall(iter->started);
5573
5574         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5575                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5576
5577         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5578         if (trace_clocks[tr->clock_id].in_ns)
5579                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5580
5581         iter->tr = tr;
5582         iter->trace_buffer = &tr->trace_buffer;
5583         iter->cpu_file = tracing_get_cpu(inode);
5584         mutex_init(&iter->mutex);
5585         filp->private_data = iter;
5586
5587         if (iter->trace->pipe_open)
5588                 iter->trace->pipe_open(iter);
5589
5590         nonseekable_open(inode, filp);
5591
5592         tr->current_trace->ref++;
5593 out:
5594         mutex_unlock(&trace_types_lock);
5595         return ret;
5596
5597 fail:
5598         kfree(iter->trace);
5599         kfree(iter);
5600         __trace_array_put(tr);
5601         mutex_unlock(&trace_types_lock);
5602         return ret;
5603 }
5604
5605 static int tracing_release_pipe(struct inode *inode, struct file *file)
5606 {
5607         struct trace_iterator *iter = file->private_data;
5608         struct trace_array *tr = inode->i_private;
5609
5610         mutex_lock(&trace_types_lock);
5611
5612         tr->current_trace->ref--;
5613
5614         if (iter->trace->pipe_close)
5615                 iter->trace->pipe_close(iter);
5616
5617         mutex_unlock(&trace_types_lock);
5618
5619         free_cpumask_var(iter->started);
5620         mutex_destroy(&iter->mutex);
5621         kfree(iter);
5622
5623         trace_array_put(tr);
5624
5625         return 0;
5626 }
5627
5628 static __poll_t
5629 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5630 {
5631         struct trace_array *tr = iter->tr;
5632
5633         /* Iterators are static, they should be filled or empty */
5634         if (trace_buffer_iter(iter, iter->cpu_file))
5635                 return EPOLLIN | EPOLLRDNORM;
5636
5637         if (tr->trace_flags & TRACE_ITER_BLOCK)
5638                 /*
5639                  * Always select as readable when in blocking mode
5640                  */
5641                 return EPOLLIN | EPOLLRDNORM;
5642         else
5643                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5644                                              filp, poll_table);
5645 }
5646
5647 static __poll_t
5648 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5649 {
5650         struct trace_iterator *iter = filp->private_data;
5651
5652         return trace_poll(iter, filp, poll_table);
5653 }
5654
5655 /* Must be called with iter->mutex held. */
5656 static int tracing_wait_pipe(struct file *filp)
5657 {
5658         struct trace_iterator *iter = filp->private_data;
5659         int ret;
5660
5661         while (trace_empty(iter)) {
5662
5663                 if ((filp->f_flags & O_NONBLOCK)) {
5664                         return -EAGAIN;
5665                 }
5666
5667                 /*
5668                  * We block until we read something and tracing is disabled.
5669                  * We still block if tracing is disabled, but we have never
5670                  * read anything. This allows a user to cat this file, and
5671                  * then enable tracing. But after we have read something,
5672                  * we give an EOF when tracing is again disabled.
5673                  *
5674                  * iter->pos will be 0 if we haven't read anything.
5675                  */
5676                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5677                         break;
5678
5679                 mutex_unlock(&iter->mutex);
5680
5681                 ret = wait_on_pipe(iter, false);
5682
5683                 mutex_lock(&iter->mutex);
5684
5685                 if (ret)
5686                         return ret;
5687         }
5688
5689         return 1;
5690 }
5691
5692 /*
5693  * Consumer reader.
5694  */
5695 static ssize_t
5696 tracing_read_pipe(struct file *filp, char __user *ubuf,
5697                   size_t cnt, loff_t *ppos)
5698 {
5699         struct trace_iterator *iter = filp->private_data;
5700         ssize_t sret;
5701
5702         /*
5703          * Avoid more than one consumer on a single file descriptor
5704          * This is just a matter of traces coherency, the ring buffer itself
5705          * is protected.
5706          */
5707         mutex_lock(&iter->mutex);
5708
5709         /* return any leftover data */
5710         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5711         if (sret != -EBUSY)
5712                 goto out;
5713
5714         trace_seq_init(&iter->seq);
5715
5716         if (iter->trace->read) {
5717                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5718                 if (sret)
5719                         goto out;
5720         }
5721
5722 waitagain:
5723         sret = tracing_wait_pipe(filp);
5724         if (sret <= 0)
5725                 goto out;
5726
5727         /* stop when tracing is finished */
5728         if (trace_empty(iter)) {
5729                 sret = 0;
5730                 goto out;
5731         }
5732
5733         if (cnt >= PAGE_SIZE)
5734                 cnt = PAGE_SIZE - 1;
5735
5736         /* reset all but tr, trace, and overruns */
5737         memset(&iter->seq, 0,
5738                sizeof(struct trace_iterator) -
5739                offsetof(struct trace_iterator, seq));
5740         cpumask_clear(iter->started);
5741         iter->pos = -1;
5742
5743         trace_event_read_lock();
5744         trace_access_lock(iter->cpu_file);
5745         while (trace_find_next_entry_inc(iter) != NULL) {
5746                 enum print_line_t ret;
5747                 int save_len = iter->seq.seq.len;
5748
5749                 ret = print_trace_line(iter);
5750                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5751                         /* don't print partial lines */
5752                         iter->seq.seq.len = save_len;
5753                         break;
5754                 }
5755                 if (ret != TRACE_TYPE_NO_CONSUME)
5756                         trace_consume(iter);
5757
5758                 if (trace_seq_used(&iter->seq) >= cnt)
5759                         break;
5760
5761                 /*
5762                  * Setting the full flag means we reached the trace_seq buffer
5763                  * size and we should leave by partial output condition above.
5764                  * One of the trace_seq_* functions is not used properly.
5765                  */
5766                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5767                           iter->ent->type);
5768         }
5769         trace_access_unlock(iter->cpu_file);
5770         trace_event_read_unlock();
5771
5772         /* Now copy what we have to the user */
5773         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5774         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5775                 trace_seq_init(&iter->seq);
5776
5777         /*
5778          * If there was nothing to send to user, in spite of consuming trace
5779          * entries, go back to wait for more entries.
5780          */
5781         if (sret == -EBUSY)
5782                 goto waitagain;
5783
5784 out:
5785         mutex_unlock(&iter->mutex);
5786
5787         return sret;
5788 }
5789
5790 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5791                                      unsigned int idx)
5792 {
5793         __free_page(spd->pages[idx]);
5794 }
5795
5796 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5797         .can_merge              = 0,
5798         .confirm                = generic_pipe_buf_confirm,
5799         .release                = generic_pipe_buf_release,
5800         .steal                  = generic_pipe_buf_steal,
5801         .get                    = generic_pipe_buf_get,
5802 };
5803
5804 static size_t
5805 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5806 {
5807         size_t count;
5808         int save_len;
5809         int ret;
5810
5811         /* Seq buffer is page-sized, exactly what we need. */
5812         for (;;) {
5813                 save_len = iter->seq.seq.len;
5814                 ret = print_trace_line(iter);
5815
5816                 if (trace_seq_has_overflowed(&iter->seq)) {
5817                         iter->seq.seq.len = save_len;
5818                         break;
5819                 }
5820
5821                 /*
5822                  * This should not be hit, because it should only
5823                  * be set if the iter->seq overflowed. But check it
5824                  * anyway to be safe.
5825                  */
5826                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5827                         iter->seq.seq.len = save_len;
5828                         break;
5829                 }
5830
5831                 count = trace_seq_used(&iter->seq) - save_len;
5832                 if (rem < count) {
5833                         rem = 0;
5834                         iter->seq.seq.len = save_len;
5835                         break;
5836                 }
5837
5838                 if (ret != TRACE_TYPE_NO_CONSUME)
5839                         trace_consume(iter);
5840                 rem -= count;
5841                 if (!trace_find_next_entry_inc(iter))   {
5842                         rem = 0;
5843                         iter->ent = NULL;
5844                         break;
5845                 }
5846         }
5847
5848         return rem;
5849 }
5850
5851 static ssize_t tracing_splice_read_pipe(struct file *filp,
5852                                         loff_t *ppos,
5853                                         struct pipe_inode_info *pipe,
5854                                         size_t len,
5855                                         unsigned int flags)
5856 {
5857         struct page *pages_def[PIPE_DEF_BUFFERS];
5858         struct partial_page partial_def[PIPE_DEF_BUFFERS];
5859         struct trace_iterator *iter = filp->private_data;
5860         struct splice_pipe_desc spd = {
5861                 .pages          = pages_def,
5862                 .partial        = partial_def,
5863                 .nr_pages       = 0, /* This gets updated below. */
5864                 .nr_pages_max   = PIPE_DEF_BUFFERS,
5865                 .ops            = &tracing_pipe_buf_ops,
5866                 .spd_release    = tracing_spd_release_pipe,
5867         };
5868         ssize_t ret;
5869         size_t rem;
5870         unsigned int i;
5871
5872         if (splice_grow_spd(pipe, &spd))
5873                 return -ENOMEM;
5874
5875         mutex_lock(&iter->mutex);
5876
5877         if (iter->trace->splice_read) {
5878                 ret = iter->trace->splice_read(iter, filp,
5879                                                ppos, pipe, len, flags);
5880                 if (ret)
5881                         goto out_err;
5882         }
5883
5884         ret = tracing_wait_pipe(filp);
5885         if (ret <= 0)
5886                 goto out_err;
5887
5888         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5889                 ret = -EFAULT;
5890                 goto out_err;
5891         }
5892
5893         trace_event_read_lock();
5894         trace_access_lock(iter->cpu_file);
5895
5896         /* Fill as many pages as possible. */
5897         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5898                 spd.pages[i] = alloc_page(GFP_KERNEL);
5899                 if (!spd.pages[i])
5900                         break;
5901
5902                 rem = tracing_fill_pipe_page(rem, iter);
5903
5904                 /* Copy the data into the page, so we can start over. */
5905                 ret = trace_seq_to_buffer(&iter->seq,
5906                                           page_address(spd.pages[i]),
5907                                           trace_seq_used(&iter->seq));
5908                 if (ret < 0) {
5909                         __free_page(spd.pages[i]);
5910                         break;
5911                 }
5912                 spd.partial[i].offset = 0;
5913                 spd.partial[i].len = trace_seq_used(&iter->seq);
5914
5915                 trace_seq_init(&iter->seq);
5916         }
5917
5918         trace_access_unlock(iter->cpu_file);
5919         trace_event_read_unlock();
5920         mutex_unlock(&iter->mutex);
5921
5922         spd.nr_pages = i;
5923
5924         if (i)
5925                 ret = splice_to_pipe(pipe, &spd);
5926         else
5927                 ret = 0;
5928 out:
5929         splice_shrink_spd(&spd);
5930         return ret;
5931
5932 out_err:
5933         mutex_unlock(&iter->mutex);
5934         goto out;
5935 }
5936
5937 static ssize_t
5938 tracing_entries_read(struct file *filp, char __user *ubuf,
5939                      size_t cnt, loff_t *ppos)
5940 {
5941         struct inode *inode = file_inode(filp);
5942         struct trace_array *tr = inode->i_private;
5943         int cpu = tracing_get_cpu(inode);
5944         char buf[64];
5945         int r = 0;
5946         ssize_t ret;
5947
5948         mutex_lock(&trace_types_lock);
5949
5950         if (cpu == RING_BUFFER_ALL_CPUS) {
5951                 int cpu, buf_size_same;
5952                 unsigned long size;
5953
5954                 size = 0;
5955                 buf_size_same = 1;
5956                 /* check if all cpu sizes are same */
5957                 for_each_tracing_cpu(cpu) {
5958                         /* fill in the size from first enabled cpu */
5959                         if (size == 0)
5960                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5961                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5962                                 buf_size_same = 0;
5963                                 break;
5964                         }
5965                 }
5966
5967                 if (buf_size_same) {
5968                         if (!ring_buffer_expanded)
5969                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
5970                                             size >> 10,
5971                                             trace_buf_size >> 10);
5972                         else
5973                                 r = sprintf(buf, "%lu\n", size >> 10);
5974                 } else
5975                         r = sprintf(buf, "X\n");
5976         } else
5977                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5978
5979         mutex_unlock(&trace_types_lock);
5980
5981         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5982         return ret;
5983 }
5984
5985 static ssize_t
5986 tracing_entries_write(struct file *filp, const char __user *ubuf,
5987                       size_t cnt, loff_t *ppos)
5988 {
5989         struct inode *inode = file_inode(filp);
5990         struct trace_array *tr = inode->i_private;
5991         unsigned long val;
5992         int ret;
5993
5994         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5995         if (ret)
5996                 return ret;
5997
5998         /* must have at least 1 entry */
5999         if (!val)
6000                 return -EINVAL;
6001
6002         /* value is in KB */
6003         val <<= 10;
6004         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6005         if (ret < 0)
6006                 return ret;
6007
6008         *ppos += cnt;
6009
6010         return cnt;
6011 }
6012
6013 static ssize_t
6014 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6015                                 size_t cnt, loff_t *ppos)
6016 {
6017         struct trace_array *tr = filp->private_data;
6018         char buf[64];
6019         int r, cpu;
6020         unsigned long size = 0, expanded_size = 0;
6021
6022         mutex_lock(&trace_types_lock);
6023         for_each_tracing_cpu(cpu) {
6024                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6025                 if (!ring_buffer_expanded)
6026                         expanded_size += trace_buf_size >> 10;
6027         }
6028         if (ring_buffer_expanded)
6029                 r = sprintf(buf, "%lu\n", size);
6030         else
6031                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6032         mutex_unlock(&trace_types_lock);
6033
6034         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6035 }
6036
6037 static ssize_t
6038 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6039                           size_t cnt, loff_t *ppos)
6040 {
6041         /*
6042          * There is no need to read what the user has written, this function
6043          * is just to make sure that there is no error when "echo" is used
6044          */
6045
6046         *ppos += cnt;
6047
6048         return cnt;
6049 }
6050
6051 static int
6052 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6053 {
6054         struct trace_array *tr = inode->i_private;
6055
6056         /* disable tracing ? */
6057         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6058                 tracer_tracing_off(tr);
6059         /* resize the ring buffer to 0 */
6060         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6061
6062         trace_array_put(tr);
6063
6064         return 0;
6065 }
6066
6067 static ssize_t
6068 tracing_mark_write(struct file *filp, const char __user *ubuf,
6069                                         size_t cnt, loff_t *fpos)
6070 {
6071         struct trace_array *tr = filp->private_data;
6072         struct ring_buffer_event *event;
6073         enum event_trigger_type tt = ETT_NONE;
6074         struct ring_buffer *buffer;
6075         struct print_entry *entry;
6076         unsigned long irq_flags;
6077         const char faulted[] = "<faulted>";
6078         ssize_t written;
6079         int size;
6080         int len;
6081
6082 /* Used in tracing_mark_raw_write() as well */
6083 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
6084
6085         if (tracing_disabled)
6086                 return -EINVAL;
6087
6088         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6089                 return -EINVAL;
6090
6091         if (cnt > TRACE_BUF_SIZE)
6092                 cnt = TRACE_BUF_SIZE;
6093
6094         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6095
6096         local_save_flags(irq_flags);
6097         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6098
6099         /* If less than "<faulted>", then make sure we can still add that */
6100         if (cnt < FAULTED_SIZE)
6101                 size += FAULTED_SIZE - cnt;
6102
6103         buffer = tr->trace_buffer.buffer;
6104         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6105                                             irq_flags, preempt_count());
6106         if (unlikely(!event))
6107                 /* Ring buffer disabled, return as if not open for write */
6108                 return -EBADF;
6109
6110         entry = ring_buffer_event_data(event);
6111         entry->ip = _THIS_IP_;
6112
6113         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6114         if (len) {
6115                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6116                 cnt = FAULTED_SIZE;
6117                 written = -EFAULT;
6118         } else
6119                 written = cnt;
6120         len = cnt;
6121
6122         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6123                 /* do not add \n before testing triggers, but add \0 */
6124                 entry->buf[cnt] = '\0';
6125                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6126         }
6127
6128         if (entry->buf[cnt - 1] != '\n') {
6129                 entry->buf[cnt] = '\n';
6130                 entry->buf[cnt + 1] = '\0';
6131         } else
6132                 entry->buf[cnt] = '\0';
6133
6134         __buffer_unlock_commit(buffer, event);
6135
6136         if (tt)
6137                 event_triggers_post_call(tr->trace_marker_file, tt);
6138
6139         if (written > 0)
6140                 *fpos += written;
6141
6142         return written;
6143 }
6144
6145 /* Limit it for now to 3K (including tag) */
6146 #define RAW_DATA_MAX_SIZE (1024*3)
6147
6148 static ssize_t
6149 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6150                                         size_t cnt, loff_t *fpos)
6151 {
6152         struct trace_array *tr = filp->private_data;
6153         struct ring_buffer_event *event;
6154         struct ring_buffer *buffer;
6155         struct raw_data_entry *entry;
6156         const char faulted[] = "<faulted>";
6157         unsigned long irq_flags;
6158         ssize_t written;
6159         int size;
6160         int len;
6161
6162 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6163
6164         if (tracing_disabled)
6165                 return -EINVAL;
6166
6167         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6168                 return -EINVAL;
6169
6170         /* The marker must at least have a tag id */
6171         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6172                 return -EINVAL;
6173
6174         if (cnt > TRACE_BUF_SIZE)
6175                 cnt = TRACE_BUF_SIZE;
6176
6177         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6178
6179         local_save_flags(irq_flags);
6180         size = sizeof(*entry) + cnt;
6181         if (cnt < FAULT_SIZE_ID)
6182                 size += FAULT_SIZE_ID - cnt;
6183
6184         buffer = tr->trace_buffer.buffer;
6185         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6186                                             irq_flags, preempt_count());
6187         if (!event)
6188                 /* Ring buffer disabled, return as if not open for write */
6189                 return -EBADF;
6190
6191         entry = ring_buffer_event_data(event);
6192
6193         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6194         if (len) {
6195                 entry->id = -1;
6196                 memcpy(&entry->buf, faulted, FAULTED_SIZE);
6197                 written = -EFAULT;
6198         } else
6199                 written = cnt;
6200
6201         __buffer_unlock_commit(buffer, event);
6202
6203         if (written > 0)
6204                 *fpos += written;
6205
6206         return written;
6207 }
6208
6209 static int tracing_clock_show(struct seq_file *m, void *v)
6210 {
6211         struct trace_array *tr = m->private;
6212         int i;
6213
6214         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6215                 seq_printf(m,
6216                         "%s%s%s%s", i ? " " : "",
6217                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6218                         i == tr->clock_id ? "]" : "");
6219         seq_putc(m, '\n');
6220
6221         return 0;
6222 }
6223
6224 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6225 {
6226         int i;
6227
6228         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6229                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6230                         break;
6231         }
6232         if (i == ARRAY_SIZE(trace_clocks))
6233                 return -EINVAL;
6234
6235         mutex_lock(&trace_types_lock);
6236
6237         tr->clock_id = i;
6238
6239         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6240
6241         /*
6242          * New clock may not be consistent with the previous clock.
6243          * Reset the buffer so that it doesn't have incomparable timestamps.
6244          */
6245         tracing_reset_online_cpus(&tr->trace_buffer);
6246
6247 #ifdef CONFIG_TRACER_MAX_TRACE
6248         if (tr->max_buffer.buffer)
6249                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6250         tracing_reset_online_cpus(&tr->max_buffer);
6251 #endif
6252
6253         mutex_unlock(&trace_types_lock);
6254
6255         return 0;
6256 }
6257
6258 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6259                                    size_t cnt, loff_t *fpos)
6260 {
6261         struct seq_file *m = filp->private_data;
6262         struct trace_array *tr = m->private;
6263         char buf[64];
6264         const char *clockstr;
6265         int ret;
6266
6267         if (cnt >= sizeof(buf))
6268                 return -EINVAL;
6269
6270         if (copy_from_user(buf, ubuf, cnt))
6271                 return -EFAULT;
6272
6273         buf[cnt] = 0;
6274
6275         clockstr = strstrip(buf);
6276
6277         ret = tracing_set_clock(tr, clockstr);
6278         if (ret)
6279                 return ret;
6280
6281         *fpos += cnt;
6282
6283         return cnt;
6284 }
6285
6286 static int tracing_clock_open(struct inode *inode, struct file *file)
6287 {
6288         struct trace_array *tr = inode->i_private;
6289         int ret;
6290
6291         if (tracing_disabled)
6292                 return -ENODEV;
6293
6294         if (trace_array_get(tr))
6295                 return -ENODEV;
6296
6297         ret = single_open(file, tracing_clock_show, inode->i_private);
6298         if (ret < 0)
6299                 trace_array_put(tr);
6300
6301         return ret;
6302 }
6303
6304 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6305 {
6306         struct trace_array *tr = m->private;
6307
6308         mutex_lock(&trace_types_lock);
6309
6310         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6311                 seq_puts(m, "delta [absolute]\n");
6312         else
6313                 seq_puts(m, "[delta] absolute\n");
6314
6315         mutex_unlock(&trace_types_lock);
6316
6317         return 0;
6318 }
6319
6320 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6321 {
6322         struct trace_array *tr = inode->i_private;
6323         int ret;
6324
6325         if (tracing_disabled)
6326                 return -ENODEV;
6327
6328         if (trace_array_get(tr))
6329                 return -ENODEV;
6330
6331         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6332         if (ret < 0)
6333                 trace_array_put(tr);
6334
6335         return ret;
6336 }
6337
6338 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6339 {
6340         int ret = 0;
6341
6342         mutex_lock(&trace_types_lock);
6343
6344         if (abs && tr->time_stamp_abs_ref++)
6345                 goto out;
6346
6347         if (!abs) {
6348                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6349                         ret = -EINVAL;
6350                         goto out;
6351                 }
6352
6353                 if (--tr->time_stamp_abs_ref)
6354                         goto out;
6355         }
6356
6357         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6358
6359 #ifdef CONFIG_TRACER_MAX_TRACE
6360         if (tr->max_buffer.buffer)
6361                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6362 #endif
6363  out:
6364         mutex_unlock(&trace_types_lock);
6365
6366         return ret;
6367 }
6368
6369 struct ftrace_buffer_info {
6370         struct trace_iterator   iter;
6371         void                    *spare;
6372         unsigned int            spare_cpu;
6373         unsigned int            read;
6374 };
6375
6376 #ifdef CONFIG_TRACER_SNAPSHOT
6377 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6378 {
6379         struct trace_array *tr = inode->i_private;
6380         struct trace_iterator *iter;
6381         struct seq_file *m;
6382         int ret = 0;
6383
6384         if (trace_array_get(tr) < 0)
6385                 return -ENODEV;
6386
6387         if (file->f_mode & FMODE_READ) {
6388                 iter = __tracing_open(inode, file, true);
6389                 if (IS_ERR(iter))
6390                         ret = PTR_ERR(iter);
6391         } else {
6392                 /* Writes still need the seq_file to hold the private data */
6393                 ret = -ENOMEM;
6394                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6395                 if (!m)
6396                         goto out;
6397                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6398                 if (!iter) {
6399                         kfree(m);
6400                         goto out;
6401                 }
6402                 ret = 0;
6403
6404                 iter->tr = tr;
6405                 iter->trace_buffer = &tr->max_buffer;
6406                 iter->cpu_file = tracing_get_cpu(inode);
6407                 m->private = iter;
6408                 file->private_data = m;
6409         }
6410 out:
6411         if (ret < 0)
6412                 trace_array_put(tr);
6413
6414         return ret;
6415 }
6416
6417 static ssize_t
6418 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6419                        loff_t *ppos)
6420 {
6421         struct seq_file *m = filp->private_data;
6422         struct trace_iterator *iter = m->private;
6423         struct trace_array *tr = iter->tr;
6424         unsigned long val;
6425         int ret;
6426
6427         ret = tracing_update_buffers();
6428         if (ret < 0)
6429                 return ret;
6430
6431         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6432         if (ret)
6433                 return ret;
6434
6435         mutex_lock(&trace_types_lock);
6436
6437         if (tr->current_trace->use_max_tr) {
6438                 ret = -EBUSY;
6439                 goto out;
6440         }
6441
6442         switch (val) {
6443         case 0:
6444                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6445                         ret = -EINVAL;
6446                         break;
6447                 }
6448                 if (tr->allocated_snapshot)
6449                         free_snapshot(tr);
6450                 break;
6451         case 1:
6452 /* Only allow per-cpu swap if the ring buffer supports it */
6453 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6454                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6455                         ret = -EINVAL;
6456                         break;
6457                 }
6458 #endif
6459                 if (!tr->allocated_snapshot) {
6460                         ret = tracing_alloc_snapshot_instance(tr);
6461                         if (ret < 0)
6462                                 break;
6463                 }
6464                 local_irq_disable();
6465                 /* Now, we're going to swap */
6466                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6467                         update_max_tr(tr, current, smp_processor_id());
6468                 else
6469                         update_max_tr_single(tr, current, iter->cpu_file);
6470                 local_irq_enable();
6471                 break;
6472         default:
6473                 if (tr->allocated_snapshot) {
6474                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6475                                 tracing_reset_online_cpus(&tr->max_buffer);
6476                         else
6477                                 tracing_reset(&tr->max_buffer, iter->cpu_file);
6478                 }
6479                 break;
6480         }
6481
6482         if (ret >= 0) {
6483                 *ppos += cnt;
6484                 ret = cnt;
6485         }
6486 out:
6487         mutex_unlock(&trace_types_lock);
6488         return ret;
6489 }
6490
6491 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6492 {
6493         struct seq_file *m = file->private_data;
6494         int ret;
6495
6496         ret = tracing_release(inode, file);
6497
6498         if (file->f_mode & FMODE_READ)
6499                 return ret;
6500
6501         /* If write only, the seq_file is just a stub */
6502         if (m)
6503                 kfree(m->private);
6504         kfree(m);
6505
6506         return 0;
6507 }
6508
6509 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6510 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6511                                     size_t count, loff_t *ppos);
6512 static int tracing_buffers_release(struct inode *inode, struct file *file);
6513 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6514                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6515
6516 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6517 {
6518         struct ftrace_buffer_info *info;
6519         int ret;
6520
6521         ret = tracing_buffers_open(inode, filp);
6522         if (ret < 0)
6523                 return ret;
6524
6525         info = filp->private_data;
6526
6527         if (info->iter.trace->use_max_tr) {
6528                 tracing_buffers_release(inode, filp);
6529                 return -EBUSY;
6530         }
6531
6532         info->iter.snapshot = true;
6533         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6534
6535         return ret;
6536 }
6537
6538 #endif /* CONFIG_TRACER_SNAPSHOT */
6539
6540
6541 static const struct file_operations tracing_thresh_fops = {
6542         .open           = tracing_open_generic,
6543         .read           = tracing_thresh_read,
6544         .write          = tracing_thresh_write,
6545         .llseek         = generic_file_llseek,
6546 };
6547
6548 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6549 static const struct file_operations tracing_max_lat_fops = {
6550         .open           = tracing_open_generic,
6551         .read           = tracing_max_lat_read,
6552         .write          = tracing_max_lat_write,
6553         .llseek         = generic_file_llseek,
6554 };
6555 #endif
6556
6557 static const struct file_operations set_tracer_fops = {
6558         .open           = tracing_open_generic,
6559         .read           = tracing_set_trace_read,
6560         .write          = tracing_set_trace_write,
6561         .llseek         = generic_file_llseek,
6562 };
6563
6564 static const struct file_operations tracing_pipe_fops = {
6565         .open           = tracing_open_pipe,
6566         .poll           = tracing_poll_pipe,
6567         .read           = tracing_read_pipe,
6568         .splice_read    = tracing_splice_read_pipe,
6569         .release        = tracing_release_pipe,
6570         .llseek         = no_llseek,
6571 };
6572
6573 static const struct file_operations tracing_entries_fops = {
6574         .open           = tracing_open_generic_tr,
6575         .read           = tracing_entries_read,
6576         .write          = tracing_entries_write,
6577         .llseek         = generic_file_llseek,
6578         .release        = tracing_release_generic_tr,
6579 };
6580
6581 static const struct file_operations tracing_total_entries_fops = {
6582         .open           = tracing_open_generic_tr,
6583         .read           = tracing_total_entries_read,
6584         .llseek         = generic_file_llseek,
6585         .release        = tracing_release_generic_tr,
6586 };
6587
6588 static const struct file_operations tracing_free_buffer_fops = {
6589         .open           = tracing_open_generic_tr,
6590         .write          = tracing_free_buffer_write,
6591         .release        = tracing_free_buffer_release,
6592 };
6593
6594 static const struct file_operations tracing_mark_fops = {
6595         .open           = tracing_open_generic_tr,
6596         .write          = tracing_mark_write,
6597         .llseek         = generic_file_llseek,
6598         .release        = tracing_release_generic_tr,
6599 };
6600
6601 static const struct file_operations tracing_mark_raw_fops = {
6602         .open           = tracing_open_generic_tr,
6603         .write          = tracing_mark_raw_write,
6604         .llseek         = generic_file_llseek,
6605         .release        = tracing_release_generic_tr,
6606 };
6607
6608 static const struct file_operations trace_clock_fops = {
6609         .open           = tracing_clock_open,
6610         .read           = seq_read,
6611         .llseek         = seq_lseek,
6612         .release        = tracing_single_release_tr,
6613         .write          = tracing_clock_write,
6614 };
6615
6616 static const struct file_operations trace_time_stamp_mode_fops = {
6617         .open           = tracing_time_stamp_mode_open,
6618         .read           = seq_read,
6619         .llseek         = seq_lseek,
6620         .release        = tracing_single_release_tr,
6621 };
6622
6623 #ifdef CONFIG_TRACER_SNAPSHOT
6624 static const struct file_operations snapshot_fops = {
6625         .open           = tracing_snapshot_open,
6626         .read           = seq_read,
6627         .write          = tracing_snapshot_write,
6628         .llseek         = tracing_lseek,
6629         .release        = tracing_snapshot_release,
6630 };
6631
6632 static const struct file_operations snapshot_raw_fops = {
6633         .open           = snapshot_raw_open,
6634         .read           = tracing_buffers_read,
6635         .release        = tracing_buffers_release,
6636         .splice_read    = tracing_buffers_splice_read,
6637         .llseek         = no_llseek,
6638 };
6639
6640 #endif /* CONFIG_TRACER_SNAPSHOT */
6641
6642 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6643 {
6644         struct trace_array *tr = inode->i_private;
6645         struct ftrace_buffer_info *info;
6646         int ret;
6647
6648         if (tracing_disabled)
6649                 return -ENODEV;
6650
6651         if (trace_array_get(tr) < 0)
6652                 return -ENODEV;
6653
6654         info = kzalloc(sizeof(*info), GFP_KERNEL);
6655         if (!info) {
6656                 trace_array_put(tr);
6657                 return -ENOMEM;
6658         }
6659
6660         mutex_lock(&trace_types_lock);
6661
6662         info->iter.tr           = tr;
6663         info->iter.cpu_file     = tracing_get_cpu(inode);
6664         info->iter.trace        = tr->current_trace;
6665         info->iter.trace_buffer = &tr->trace_buffer;
6666         info->spare             = NULL;
6667         /* Force reading ring buffer for first read */
6668         info->read              = (unsigned int)-1;
6669
6670         filp->private_data = info;
6671
6672         tr->current_trace->ref++;
6673
6674         mutex_unlock(&trace_types_lock);
6675
6676         ret = nonseekable_open(inode, filp);
6677         if (ret < 0)
6678                 trace_array_put(tr);
6679
6680         return ret;
6681 }
6682
6683 static __poll_t
6684 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6685 {
6686         struct ftrace_buffer_info *info = filp->private_data;
6687         struct trace_iterator *iter = &info->iter;
6688
6689         return trace_poll(iter, filp, poll_table);
6690 }
6691
6692 static ssize_t
6693 tracing_buffers_read(struct file *filp, char __user *ubuf,
6694                      size_t count, loff_t *ppos)
6695 {
6696         struct ftrace_buffer_info *info = filp->private_data;
6697         struct trace_iterator *iter = &info->iter;
6698         ssize_t ret = 0;
6699         ssize_t size;
6700
6701         if (!count)
6702                 return 0;
6703
6704 #ifdef CONFIG_TRACER_MAX_TRACE
6705         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6706                 return -EBUSY;
6707 #endif
6708
6709         if (!info->spare) {
6710                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6711                                                           iter->cpu_file);
6712                 if (IS_ERR(info->spare)) {
6713                         ret = PTR_ERR(info->spare);
6714                         info->spare = NULL;
6715                 } else {
6716                         info->spare_cpu = iter->cpu_file;
6717                 }
6718         }
6719         if (!info->spare)
6720                 return ret;
6721
6722         /* Do we have previous read data to read? */
6723         if (info->read < PAGE_SIZE)
6724                 goto read;
6725
6726  again:
6727         trace_access_lock(iter->cpu_file);
6728         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6729                                     &info->spare,
6730                                     count,
6731                                     iter->cpu_file, 0);
6732         trace_access_unlock(iter->cpu_file);
6733
6734         if (ret < 0) {
6735                 if (trace_empty(iter)) {
6736                         if ((filp->f_flags & O_NONBLOCK))
6737                                 return -EAGAIN;
6738
6739                         ret = wait_on_pipe(iter, false);
6740                         if (ret)
6741                                 return ret;
6742
6743                         goto again;
6744                 }
6745                 return 0;
6746         }
6747
6748         info->read = 0;
6749  read:
6750         size = PAGE_SIZE - info->read;
6751         if (size > count)
6752                 size = count;
6753
6754         ret = copy_to_user(ubuf, info->spare + info->read, size);
6755         if (ret == size)
6756                 return -EFAULT;
6757
6758         size -= ret;
6759
6760         *ppos += size;
6761         info->read += size;
6762
6763         return size;
6764 }
6765
6766 static int tracing_buffers_release(struct inode *inode, struct file *file)
6767 {
6768         struct ftrace_buffer_info *info = file->private_data;
6769         struct trace_iterator *iter = &info->iter;
6770
6771         mutex_lock(&trace_types_lock);
6772
6773         iter->tr->current_trace->ref--;
6774
6775         __trace_array_put(iter->tr);
6776
6777         if (info->spare)
6778                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6779                                            info->spare_cpu, info->spare);
6780         kfree(info);
6781
6782         mutex_unlock(&trace_types_lock);
6783
6784         return 0;
6785 }
6786
6787 struct buffer_ref {
6788         struct ring_buffer      *buffer;
6789         void                    *page;
6790         int                     cpu;
6791         int                     ref;
6792 };
6793
6794 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6795                                     struct pipe_buffer *buf)
6796 {
6797         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6798
6799         if (--ref->ref)
6800                 return;
6801
6802         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6803         kfree(ref);
6804         buf->private = 0;
6805 }
6806
6807 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6808                                 struct pipe_buffer *buf)
6809 {
6810         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6811
6812         ref->ref++;
6813 }
6814
6815 /* Pipe buffer operations for a buffer. */
6816 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6817         .can_merge              = 0,
6818         .confirm                = generic_pipe_buf_confirm,
6819         .release                = buffer_pipe_buf_release,
6820         .steal                  = generic_pipe_buf_steal,
6821         .get                    = buffer_pipe_buf_get,
6822 };
6823
6824 /*
6825  * Callback from splice_to_pipe(), if we need to release some pages
6826  * at the end of the spd in case we error'ed out in filling the pipe.
6827  */
6828 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6829 {
6830         struct buffer_ref *ref =
6831                 (struct buffer_ref *)spd->partial[i].private;
6832
6833         if (--ref->ref)
6834                 return;
6835
6836         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6837         kfree(ref);
6838         spd->partial[i].private = 0;
6839 }
6840
6841 static ssize_t
6842 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6843                             struct pipe_inode_info *pipe, size_t len,
6844                             unsigned int flags)
6845 {
6846         struct ftrace_buffer_info *info = file->private_data;
6847         struct trace_iterator *iter = &info->iter;
6848         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6849         struct page *pages_def[PIPE_DEF_BUFFERS];
6850         struct splice_pipe_desc spd = {
6851                 .pages          = pages_def,
6852                 .partial        = partial_def,
6853                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6854                 .ops            = &buffer_pipe_buf_ops,
6855                 .spd_release    = buffer_spd_release,
6856         };
6857         struct buffer_ref *ref;
6858         int entries, i;
6859         ssize_t ret = 0;
6860
6861 #ifdef CONFIG_TRACER_MAX_TRACE
6862         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6863                 return -EBUSY;
6864 #endif
6865
6866         if (*ppos & (PAGE_SIZE - 1))
6867                 return -EINVAL;
6868
6869         if (len & (PAGE_SIZE - 1)) {
6870                 if (len < PAGE_SIZE)
6871                         return -EINVAL;
6872                 len &= PAGE_MASK;
6873         }
6874
6875         if (splice_grow_spd(pipe, &spd))
6876                 return -ENOMEM;
6877
6878  again:
6879         trace_access_lock(iter->cpu_file);
6880         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6881
6882         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6883                 struct page *page;
6884                 int r;
6885
6886                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6887                 if (!ref) {
6888                         ret = -ENOMEM;
6889                         break;
6890                 }
6891
6892                 ref->ref = 1;
6893                 ref->buffer = iter->trace_buffer->buffer;
6894                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6895                 if (IS_ERR(ref->page)) {
6896                         ret = PTR_ERR(ref->page);
6897                         ref->page = NULL;
6898                         kfree(ref);
6899                         break;
6900                 }
6901                 ref->cpu = iter->cpu_file;
6902
6903                 r = ring_buffer_read_page(ref->buffer, &ref->page,
6904                                           len, iter->cpu_file, 1);
6905                 if (r < 0) {
6906                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
6907                                                    ref->page);
6908                         kfree(ref);
6909                         break;
6910                 }
6911
6912                 page = virt_to_page(ref->page);
6913
6914                 spd.pages[i] = page;
6915                 spd.partial[i].len = PAGE_SIZE;
6916                 spd.partial[i].offset = 0;
6917                 spd.partial[i].private = (unsigned long)ref;
6918                 spd.nr_pages++;
6919                 *ppos += PAGE_SIZE;
6920
6921                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6922         }
6923
6924         trace_access_unlock(iter->cpu_file);
6925         spd.nr_pages = i;
6926
6927         /* did we read anything? */
6928         if (!spd.nr_pages) {
6929                 if (ret)
6930                         goto out;
6931
6932                 ret = -EAGAIN;
6933                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6934                         goto out;
6935
6936                 ret = wait_on_pipe(iter, true);
6937                 if (ret)
6938                         goto out;
6939
6940                 goto again;
6941         }
6942
6943         ret = splice_to_pipe(pipe, &spd);
6944 out:
6945         splice_shrink_spd(&spd);
6946
6947         return ret;
6948 }
6949
6950 static const struct file_operations tracing_buffers_fops = {
6951         .open           = tracing_buffers_open,
6952         .read           = tracing_buffers_read,
6953         .poll           = tracing_buffers_poll,
6954         .release        = tracing_buffers_release,
6955         .splice_read    = tracing_buffers_splice_read,
6956         .llseek         = no_llseek,
6957 };
6958
6959 static ssize_t
6960 tracing_stats_read(struct file *filp, char __user *ubuf,
6961                    size_t count, loff_t *ppos)
6962 {
6963         struct inode *inode = file_inode(filp);
6964         struct trace_array *tr = inode->i_private;
6965         struct trace_buffer *trace_buf = &tr->trace_buffer;
6966         int cpu = tracing_get_cpu(inode);
6967         struct trace_seq *s;
6968         unsigned long cnt;
6969         unsigned long long t;
6970         unsigned long usec_rem;
6971
6972         s = kmalloc(sizeof(*s), GFP_KERNEL);
6973         if (!s)
6974                 return -ENOMEM;
6975
6976         trace_seq_init(s);
6977
6978         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6979         trace_seq_printf(s, "entries: %ld\n", cnt);
6980
6981         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6982         trace_seq_printf(s, "overrun: %ld\n", cnt);
6983
6984         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6985         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6986
6987         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6988         trace_seq_printf(s, "bytes: %ld\n", cnt);
6989
6990         if (trace_clocks[tr->clock_id].in_ns) {
6991                 /* local or global for trace_clock */
6992                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6993                 usec_rem = do_div(t, USEC_PER_SEC);
6994                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6995                                                                 t, usec_rem);
6996
6997                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6998                 usec_rem = do_div(t, USEC_PER_SEC);
6999                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7000         } else {
7001                 /* counter or tsc mode for trace_clock */
7002                 trace_seq_printf(s, "oldest event ts: %llu\n",
7003                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7004
7005                 trace_seq_printf(s, "now ts: %llu\n",
7006                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7007         }
7008
7009         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7010         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7011
7012         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7013         trace_seq_printf(s, "read events: %ld\n", cnt);
7014
7015         count = simple_read_from_buffer(ubuf, count, ppos,
7016                                         s->buffer, trace_seq_used(s));
7017
7018         kfree(s);
7019
7020         return count;
7021 }
7022
7023 static const struct file_operations tracing_stats_fops = {
7024         .open           = tracing_open_generic_tr,
7025         .read           = tracing_stats_read,
7026         .llseek         = generic_file_llseek,
7027         .release        = tracing_release_generic_tr,
7028 };
7029
7030 #ifdef CONFIG_DYNAMIC_FTRACE
7031
7032 static ssize_t
7033 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7034                   size_t cnt, loff_t *ppos)
7035 {
7036         unsigned long *p = filp->private_data;
7037         char buf[64]; /* Not too big for a shallow stack */
7038         int r;
7039
7040         r = scnprintf(buf, 63, "%ld", *p);
7041         buf[r++] = '\n';
7042
7043         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7044 }
7045
7046 static const struct file_operations tracing_dyn_info_fops = {
7047         .open           = tracing_open_generic,
7048         .read           = tracing_read_dyn_info,
7049         .llseek         = generic_file_llseek,
7050 };
7051 #endif /* CONFIG_DYNAMIC_FTRACE */
7052
7053 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7054 static void
7055 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7056                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7057                 void *data)
7058 {
7059         tracing_snapshot_instance(tr);
7060 }
7061
7062 static void
7063 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7064                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7065                       void *data)
7066 {
7067         struct ftrace_func_mapper *mapper = data;
7068         long *count = NULL;
7069
7070         if (mapper)
7071                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7072
7073         if (count) {
7074
7075                 if (*count <= 0)
7076                         return;
7077
7078                 (*count)--;
7079         }
7080
7081         tracing_snapshot_instance(tr);
7082 }
7083
7084 static int
7085 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7086                       struct ftrace_probe_ops *ops, void *data)
7087 {
7088         struct ftrace_func_mapper *mapper = data;
7089         long *count = NULL;
7090
7091         seq_printf(m, "%ps:", (void *)ip);
7092
7093         seq_puts(m, "snapshot");
7094
7095         if (mapper)
7096                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7097
7098         if (count)
7099                 seq_printf(m, ":count=%ld\n", *count);
7100         else
7101                 seq_puts(m, ":unlimited\n");
7102
7103         return 0;
7104 }
7105
7106 static int
7107 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7108                      unsigned long ip, void *init_data, void **data)
7109 {
7110         struct ftrace_func_mapper *mapper = *data;
7111
7112         if (!mapper) {
7113                 mapper = allocate_ftrace_func_mapper();
7114                 if (!mapper)
7115                         return -ENOMEM;
7116                 *data = mapper;
7117         }
7118
7119         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7120 }
7121
7122 static void
7123 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7124                      unsigned long ip, void *data)
7125 {
7126         struct ftrace_func_mapper *mapper = data;
7127
7128         if (!ip) {
7129                 if (!mapper)
7130                         return;
7131                 free_ftrace_func_mapper(mapper, NULL);
7132                 return;
7133         }
7134
7135         ftrace_func_mapper_remove_ip(mapper, ip);
7136 }
7137
7138 static struct ftrace_probe_ops snapshot_probe_ops = {
7139         .func                   = ftrace_snapshot,
7140         .print                  = ftrace_snapshot_print,
7141 };
7142
7143 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7144         .func                   = ftrace_count_snapshot,
7145         .print                  = ftrace_snapshot_print,
7146         .init                   = ftrace_snapshot_init,
7147         .free                   = ftrace_snapshot_free,
7148 };
7149
7150 static int
7151 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7152                                char *glob, char *cmd, char *param, int enable)
7153 {
7154         struct ftrace_probe_ops *ops;
7155         void *count = (void *)-1;
7156         char *number;
7157         int ret;
7158
7159         if (!tr)
7160                 return -ENODEV;
7161
7162         /* hash funcs only work with set_ftrace_filter */
7163         if (!enable)
7164                 return -EINVAL;
7165
7166         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7167
7168         if (glob[0] == '!')
7169                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7170
7171         if (!param)
7172                 goto out_reg;
7173
7174         number = strsep(&param, ":");
7175
7176         if (!strlen(number))
7177                 goto out_reg;
7178
7179         /*
7180          * We use the callback data field (which is a pointer)
7181          * as our counter.
7182          */
7183         ret = kstrtoul(number, 0, (unsigned long *)&count);
7184         if (ret)
7185                 return ret;
7186
7187  out_reg:
7188         ret = tracing_alloc_snapshot_instance(tr);
7189         if (ret < 0)
7190                 goto out;
7191
7192         ret = register_ftrace_function_probe(glob, tr, ops, count);
7193
7194  out:
7195         return ret < 0 ? ret : 0;
7196 }
7197
7198 static struct ftrace_func_command ftrace_snapshot_cmd = {
7199         .name                   = "snapshot",
7200         .func                   = ftrace_trace_snapshot_callback,
7201 };
7202
7203 static __init int register_snapshot_cmd(void)
7204 {
7205         return register_ftrace_command(&ftrace_snapshot_cmd);
7206 }
7207 #else
7208 static inline __init int register_snapshot_cmd(void) { return 0; }
7209 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7210
7211 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7212 {
7213         if (WARN_ON(!tr->dir))
7214                 return ERR_PTR(-ENODEV);
7215
7216         /* Top directory uses NULL as the parent */
7217         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7218                 return NULL;
7219
7220         /* All sub buffers have a descriptor */
7221         return tr->dir;
7222 }
7223
7224 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7225 {
7226         struct dentry *d_tracer;
7227
7228         if (tr->percpu_dir)
7229                 return tr->percpu_dir;
7230
7231         d_tracer = tracing_get_dentry(tr);
7232         if (IS_ERR(d_tracer))
7233                 return NULL;
7234
7235         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7236
7237         WARN_ONCE(!tr->percpu_dir,
7238                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7239
7240         return tr->percpu_dir;
7241 }
7242
7243 static struct dentry *
7244 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7245                       void *data, long cpu, const struct file_operations *fops)
7246 {
7247         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7248
7249         if (ret) /* See tracing_get_cpu() */
7250                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7251         return ret;
7252 }
7253
7254 static void
7255 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7256 {
7257         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7258         struct dentry *d_cpu;
7259         char cpu_dir[30]; /* 30 characters should be more than enough */
7260
7261         if (!d_percpu)
7262                 return;
7263
7264         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7265         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7266         if (!d_cpu) {
7267                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7268                 return;
7269         }
7270
7271         /* per cpu trace_pipe */
7272         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7273                                 tr, cpu, &tracing_pipe_fops);
7274
7275         /* per cpu trace */
7276         trace_create_cpu_file("trace", 0644, d_cpu,
7277                                 tr, cpu, &tracing_fops);
7278
7279         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7280                                 tr, cpu, &tracing_buffers_fops);
7281
7282         trace_create_cpu_file("stats", 0444, d_cpu,
7283                                 tr, cpu, &tracing_stats_fops);
7284
7285         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7286                                 tr, cpu, &tracing_entries_fops);
7287
7288 #ifdef CONFIG_TRACER_SNAPSHOT
7289         trace_create_cpu_file("snapshot", 0644, d_cpu,
7290                                 tr, cpu, &snapshot_fops);
7291
7292         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7293                                 tr, cpu, &snapshot_raw_fops);
7294 #endif
7295 }
7296
7297 #ifdef CONFIG_FTRACE_SELFTEST
7298 /* Let selftest have access to static functions in this file */
7299 #include "trace_selftest.c"
7300 #endif
7301
7302 static ssize_t
7303 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7304                         loff_t *ppos)
7305 {
7306         struct trace_option_dentry *topt = filp->private_data;
7307         char *buf;
7308
7309         if (topt->flags->val & topt->opt->bit)
7310                 buf = "1\n";
7311         else
7312                 buf = "0\n";
7313
7314         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7315 }
7316
7317 static ssize_t
7318 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7319                          loff_t *ppos)
7320 {
7321         struct trace_option_dentry *topt = filp->private_data;
7322         unsigned long val;
7323         int ret;
7324
7325         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7326         if (ret)
7327                 return ret;
7328
7329         if (val != 0 && val != 1)
7330                 return -EINVAL;
7331
7332         if (!!(topt->flags->val & topt->opt->bit) != val) {
7333                 mutex_lock(&trace_types_lock);
7334                 ret = __set_tracer_option(topt->tr, topt->flags,
7335                                           topt->opt, !val);
7336                 mutex_unlock(&trace_types_lock);
7337                 if (ret)
7338                         return ret;
7339         }
7340
7341         *ppos += cnt;
7342
7343         return cnt;
7344 }
7345
7346
7347 static const struct file_operations trace_options_fops = {
7348         .open = tracing_open_generic,
7349         .read = trace_options_read,
7350         .write = trace_options_write,
7351         .llseek = generic_file_llseek,
7352 };
7353
7354 /*
7355  * In order to pass in both the trace_array descriptor as well as the index
7356  * to the flag that the trace option file represents, the trace_array
7357  * has a character array of trace_flags_index[], which holds the index
7358  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7359  * The address of this character array is passed to the flag option file
7360  * read/write callbacks.
7361  *
7362  * In order to extract both the index and the trace_array descriptor,
7363  * get_tr_index() uses the following algorithm.
7364  *
7365  *   idx = *ptr;
7366  *
7367  * As the pointer itself contains the address of the index (remember
7368  * index[1] == 1).
7369  *
7370  * Then to get the trace_array descriptor, by subtracting that index
7371  * from the ptr, we get to the start of the index itself.
7372  *
7373  *   ptr - idx == &index[0]
7374  *
7375  * Then a simple container_of() from that pointer gets us to the
7376  * trace_array descriptor.
7377  */
7378 static void get_tr_index(void *data, struct trace_array **ptr,
7379                          unsigned int *pindex)
7380 {
7381         *pindex = *(unsigned char *)data;
7382
7383         *ptr = container_of(data - *pindex, struct trace_array,
7384                             trace_flags_index);
7385 }
7386
7387 static ssize_t
7388 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7389                         loff_t *ppos)
7390 {
7391         void *tr_index = filp->private_data;
7392         struct trace_array *tr;
7393         unsigned int index;
7394         char *buf;
7395
7396         get_tr_index(tr_index, &tr, &index);
7397
7398         if (tr->trace_flags & (1 << index))
7399                 buf = "1\n";
7400         else
7401                 buf = "0\n";
7402
7403         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7404 }
7405
7406 static ssize_t
7407 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7408                          loff_t *ppos)
7409 {
7410         void *tr_index = filp->private_data;
7411         struct trace_array *tr;
7412         unsigned int index;
7413         unsigned long val;
7414         int ret;
7415
7416         get_tr_index(tr_index, &tr, &index);
7417
7418         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7419         if (ret)
7420                 return ret;
7421
7422         if (val != 0 && val != 1)
7423                 return -EINVAL;
7424
7425         mutex_lock(&trace_types_lock);
7426         ret = set_tracer_flag(tr, 1 << index, val);
7427         mutex_unlock(&trace_types_lock);
7428
7429         if (ret < 0)
7430                 return ret;
7431
7432         *ppos += cnt;
7433
7434         return cnt;
7435 }
7436
7437 static const struct file_operations trace_options_core_fops = {
7438         .open = tracing_open_generic,
7439         .read = trace_options_core_read,
7440         .write = trace_options_core_write,
7441         .llseek = generic_file_llseek,
7442 };
7443
7444 struct dentry *trace_create_file(const char *name,
7445                                  umode_t mode,
7446                                  struct dentry *parent,
7447                                  void *data,
7448                                  const struct file_operations *fops)
7449 {
7450         struct dentry *ret;
7451
7452         ret = tracefs_create_file(name, mode, parent, data, fops);
7453         if (!ret)
7454                 pr_warn("Could not create tracefs '%s' entry\n", name);
7455
7456         return ret;
7457 }
7458
7459
7460 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7461 {
7462         struct dentry *d_tracer;
7463
7464         if (tr->options)
7465                 return tr->options;
7466
7467         d_tracer = tracing_get_dentry(tr);
7468         if (IS_ERR(d_tracer))
7469                 return NULL;
7470
7471         tr->options = tracefs_create_dir("options", d_tracer);
7472         if (!tr->options) {
7473                 pr_warn("Could not create tracefs directory 'options'\n");
7474                 return NULL;
7475         }
7476
7477         return tr->options;
7478 }
7479
7480 static void
7481 create_trace_option_file(struct trace_array *tr,
7482                          struct trace_option_dentry *topt,
7483                          struct tracer_flags *flags,
7484                          struct tracer_opt *opt)
7485 {
7486         struct dentry *t_options;
7487
7488         t_options = trace_options_init_dentry(tr);
7489         if (!t_options)
7490                 return;
7491
7492         topt->flags = flags;
7493         topt->opt = opt;
7494         topt->tr = tr;
7495
7496         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7497                                     &trace_options_fops);
7498
7499 }
7500
7501 static void
7502 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7503 {
7504         struct trace_option_dentry *topts;
7505         struct trace_options *tr_topts;
7506         struct tracer_flags *flags;
7507         struct tracer_opt *opts;
7508         int cnt;
7509         int i;
7510
7511         if (!tracer)
7512                 return;
7513
7514         flags = tracer->flags;
7515
7516         if (!flags || !flags->opts)
7517                 return;
7518
7519         /*
7520          * If this is an instance, only create flags for tracers
7521          * the instance may have.
7522          */
7523         if (!trace_ok_for_array(tracer, tr))
7524                 return;
7525
7526         for (i = 0; i < tr->nr_topts; i++) {
7527                 /* Make sure there's no duplicate flags. */
7528                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7529                         return;
7530         }
7531
7532         opts = flags->opts;
7533
7534         for (cnt = 0; opts[cnt].name; cnt++)
7535                 ;
7536
7537         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7538         if (!topts)
7539                 return;
7540
7541         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7542                             GFP_KERNEL);
7543         if (!tr_topts) {
7544                 kfree(topts);
7545                 return;
7546         }
7547
7548         tr->topts = tr_topts;
7549         tr->topts[tr->nr_topts].tracer = tracer;
7550         tr->topts[tr->nr_topts].topts = topts;
7551         tr->nr_topts++;
7552
7553         for (cnt = 0; opts[cnt].name; cnt++) {
7554                 create_trace_option_file(tr, &topts[cnt], flags,
7555                                          &opts[cnt]);
7556                 WARN_ONCE(topts[cnt].entry == NULL,
7557                           "Failed to create trace option: %s",
7558                           opts[cnt].name);
7559         }
7560 }
7561
7562 static struct dentry *
7563 create_trace_option_core_file(struct trace_array *tr,
7564                               const char *option, long index)
7565 {
7566         struct dentry *t_options;
7567
7568         t_options = trace_options_init_dentry(tr);
7569         if (!t_options)
7570                 return NULL;
7571
7572         return trace_create_file(option, 0644, t_options,
7573                                  (void *)&tr->trace_flags_index[index],
7574                                  &trace_options_core_fops);
7575 }
7576
7577 static void create_trace_options_dir(struct trace_array *tr)
7578 {
7579         struct dentry *t_options;
7580         bool top_level = tr == &global_trace;
7581         int i;
7582
7583         t_options = trace_options_init_dentry(tr);
7584         if (!t_options)
7585                 return;
7586
7587         for (i = 0; trace_options[i]; i++) {
7588                 if (top_level ||
7589                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7590                         create_trace_option_core_file(tr, trace_options[i], i);
7591         }
7592 }
7593
7594 static ssize_t
7595 rb_simple_read(struct file *filp, char __user *ubuf,
7596                size_t cnt, loff_t *ppos)
7597 {
7598         struct trace_array *tr = filp->private_data;
7599         char buf[64];
7600         int r;
7601
7602         r = tracer_tracing_is_on(tr);
7603         r = sprintf(buf, "%d\n", r);
7604
7605         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7606 }
7607
7608 static ssize_t
7609 rb_simple_write(struct file *filp, const char __user *ubuf,
7610                 size_t cnt, loff_t *ppos)
7611 {
7612         struct trace_array *tr = filp->private_data;
7613         struct ring_buffer *buffer = tr->trace_buffer.buffer;
7614         unsigned long val;
7615         int ret;
7616
7617         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7618         if (ret)
7619                 return ret;
7620
7621         if (buffer) {
7622                 mutex_lock(&trace_types_lock);
7623                 if (val) {
7624                         tracer_tracing_on(tr);
7625                         if (tr->current_trace->start)
7626                                 tr->current_trace->start(tr);
7627                 } else {
7628                         tracer_tracing_off(tr);
7629                         if (tr->current_trace->stop)
7630                                 tr->current_trace->stop(tr);
7631                 }
7632                 mutex_unlock(&trace_types_lock);
7633         }
7634
7635         (*ppos)++;
7636
7637         return cnt;
7638 }
7639
7640 static const struct file_operations rb_simple_fops = {
7641         .open           = tracing_open_generic_tr,
7642         .read           = rb_simple_read,
7643         .write          = rb_simple_write,
7644         .release        = tracing_release_generic_tr,
7645         .llseek         = default_llseek,
7646 };
7647
7648 struct dentry *trace_instance_dir;
7649
7650 static void
7651 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7652
7653 static int
7654 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7655 {
7656         enum ring_buffer_flags rb_flags;
7657
7658         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7659
7660         buf->tr = tr;
7661
7662         buf->buffer = ring_buffer_alloc(size, rb_flags);
7663         if (!buf->buffer)
7664                 return -ENOMEM;
7665
7666         buf->data = alloc_percpu(struct trace_array_cpu);
7667         if (!buf->data) {
7668                 ring_buffer_free(buf->buffer);
7669                 buf->buffer = NULL;
7670                 return -ENOMEM;
7671         }
7672
7673         /* Allocate the first page for all buffers */
7674         set_buffer_entries(&tr->trace_buffer,
7675                            ring_buffer_size(tr->trace_buffer.buffer, 0));
7676
7677         return 0;
7678 }
7679
7680 static int allocate_trace_buffers(struct trace_array *tr, int size)
7681 {
7682         int ret;
7683
7684         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7685         if (ret)
7686                 return ret;
7687
7688 #ifdef CONFIG_TRACER_MAX_TRACE
7689         ret = allocate_trace_buffer(tr, &tr->max_buffer,
7690                                     allocate_snapshot ? size : 1);
7691         if (WARN_ON(ret)) {
7692                 ring_buffer_free(tr->trace_buffer.buffer);
7693                 tr->trace_buffer.buffer = NULL;
7694                 free_percpu(tr->trace_buffer.data);
7695                 tr->trace_buffer.data = NULL;
7696                 return -ENOMEM;
7697         }
7698         tr->allocated_snapshot = allocate_snapshot;
7699
7700         /*
7701          * Only the top level trace array gets its snapshot allocated
7702          * from the kernel command line.
7703          */
7704         allocate_snapshot = false;
7705 #endif
7706         return 0;
7707 }
7708
7709 static void free_trace_buffer(struct trace_buffer *buf)
7710 {
7711         if (buf->buffer) {
7712                 ring_buffer_free(buf->buffer);
7713                 buf->buffer = NULL;
7714                 free_percpu(buf->data);
7715                 buf->data = NULL;
7716         }
7717 }
7718
7719 static void free_trace_buffers(struct trace_array *tr)
7720 {
7721         if (!tr)
7722                 return;
7723
7724         free_trace_buffer(&tr->trace_buffer);
7725
7726 #ifdef CONFIG_TRACER_MAX_TRACE
7727         free_trace_buffer(&tr->max_buffer);
7728 #endif
7729 }
7730
7731 static void init_trace_flags_index(struct trace_array *tr)
7732 {
7733         int i;
7734
7735         /* Used by the trace options files */
7736         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7737                 tr->trace_flags_index[i] = i;
7738 }
7739
7740 static void __update_tracer_options(struct trace_array *tr)
7741 {
7742         struct tracer *t;
7743
7744         for (t = trace_types; t; t = t->next)
7745                 add_tracer_options(tr, t);
7746 }
7747
7748 static void update_tracer_options(struct trace_array *tr)
7749 {
7750         mutex_lock(&trace_types_lock);
7751         __update_tracer_options(tr);
7752         mutex_unlock(&trace_types_lock);
7753 }
7754
7755 static int instance_mkdir(const char *name)
7756 {
7757         struct trace_array *tr;
7758         int ret;
7759
7760         mutex_lock(&event_mutex);
7761         mutex_lock(&trace_types_lock);
7762
7763         ret = -EEXIST;
7764         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7765                 if (tr->name && strcmp(tr->name, name) == 0)
7766                         goto out_unlock;
7767         }
7768
7769         ret = -ENOMEM;
7770         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7771         if (!tr)
7772                 goto out_unlock;
7773
7774         tr->name = kstrdup(name, GFP_KERNEL);
7775         if (!tr->name)
7776                 goto out_free_tr;
7777
7778         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7779                 goto out_free_tr;
7780
7781         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7782
7783         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7784
7785         raw_spin_lock_init(&tr->start_lock);
7786
7787         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7788
7789         tr->current_trace = &nop_trace;
7790
7791         INIT_LIST_HEAD(&tr->systems);
7792         INIT_LIST_HEAD(&tr->events);
7793         INIT_LIST_HEAD(&tr->hist_vars);
7794
7795         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7796                 goto out_free_tr;
7797
7798         tr->dir = tracefs_create_dir(name, trace_instance_dir);
7799         if (!tr->dir)
7800                 goto out_free_tr;
7801
7802         ret = event_trace_add_tracer(tr->dir, tr);
7803         if (ret) {
7804                 tracefs_remove_recursive(tr->dir);
7805                 goto out_free_tr;
7806         }
7807
7808         ftrace_init_trace_array(tr);
7809
7810         init_tracer_tracefs(tr, tr->dir);
7811         init_trace_flags_index(tr);
7812         __update_tracer_options(tr);
7813
7814         list_add(&tr->list, &ftrace_trace_arrays);
7815
7816         mutex_unlock(&trace_types_lock);
7817         mutex_unlock(&event_mutex);
7818
7819         return 0;
7820
7821  out_free_tr:
7822         free_trace_buffers(tr);
7823         free_cpumask_var(tr->tracing_cpumask);
7824         kfree(tr->name);
7825         kfree(tr);
7826
7827  out_unlock:
7828         mutex_unlock(&trace_types_lock);
7829         mutex_unlock(&event_mutex);
7830
7831         return ret;
7832
7833 }
7834
7835 static int instance_rmdir(const char *name)
7836 {
7837         struct trace_array *tr;
7838         int found = 0;
7839         int ret;
7840         int i;
7841
7842         mutex_lock(&event_mutex);
7843         mutex_lock(&trace_types_lock);
7844
7845         ret = -ENODEV;
7846         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7847                 if (tr->name && strcmp(tr->name, name) == 0) {
7848                         found = 1;
7849                         break;
7850                 }
7851         }
7852         if (!found)
7853                 goto out_unlock;
7854
7855         ret = -EBUSY;
7856         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7857                 goto out_unlock;
7858
7859         list_del(&tr->list);
7860
7861         /* Disable all the flags that were enabled coming in */
7862         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7863                 if ((1 << i) & ZEROED_TRACE_FLAGS)
7864                         set_tracer_flag(tr, 1 << i, 0);
7865         }
7866
7867         tracing_set_nop(tr);
7868         clear_ftrace_function_probes(tr);
7869         event_trace_del_tracer(tr);
7870         ftrace_clear_pids(tr);
7871         ftrace_destroy_function_files(tr);
7872         tracefs_remove_recursive(tr->dir);
7873         free_trace_buffers(tr);
7874
7875         for (i = 0; i < tr->nr_topts; i++) {
7876                 kfree(tr->topts[i].topts);
7877         }
7878         kfree(tr->topts);
7879
7880         free_cpumask_var(tr->tracing_cpumask);
7881         kfree(tr->name);
7882         kfree(tr);
7883
7884         ret = 0;
7885
7886  out_unlock:
7887         mutex_unlock(&trace_types_lock);
7888         mutex_unlock(&event_mutex);
7889
7890         return ret;
7891 }
7892
7893 static __init void create_trace_instances(struct dentry *d_tracer)
7894 {
7895         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7896                                                          instance_mkdir,
7897                                                          instance_rmdir);
7898         if (WARN_ON(!trace_instance_dir))
7899                 return;
7900 }
7901
7902 static void
7903 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7904 {
7905         struct trace_event_file *file;
7906         int cpu;
7907
7908         trace_create_file("available_tracers", 0444, d_tracer,
7909                         tr, &show_traces_fops);
7910
7911         trace_create_file("current_tracer", 0644, d_tracer,
7912                         tr, &set_tracer_fops);
7913
7914         trace_create_file("tracing_cpumask", 0644, d_tracer,
7915                           tr, &tracing_cpumask_fops);
7916
7917         trace_create_file("trace_options", 0644, d_tracer,
7918                           tr, &tracing_iter_fops);
7919
7920         trace_create_file("trace", 0644, d_tracer,
7921                           tr, &tracing_fops);
7922
7923         trace_create_file("trace_pipe", 0444, d_tracer,
7924                           tr, &tracing_pipe_fops);
7925
7926         trace_create_file("buffer_size_kb", 0644, d_tracer,
7927                           tr, &tracing_entries_fops);
7928
7929         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7930                           tr, &tracing_total_entries_fops);
7931
7932         trace_create_file("free_buffer", 0200, d_tracer,
7933                           tr, &tracing_free_buffer_fops);
7934
7935         trace_create_file("trace_marker", 0220, d_tracer,
7936                           tr, &tracing_mark_fops);
7937
7938         file = __find_event_file(tr, "ftrace", "print");
7939         if (file && file->dir)
7940                 trace_create_file("trigger", 0644, file->dir, file,
7941                                   &event_trigger_fops);
7942         tr->trace_marker_file = file;
7943
7944         trace_create_file("trace_marker_raw", 0220, d_tracer,
7945                           tr, &tracing_mark_raw_fops);
7946
7947         trace_create_file("trace_clock", 0644, d_tracer, tr,
7948                           &trace_clock_fops);
7949
7950         trace_create_file("tracing_on", 0644, d_tracer,
7951                           tr, &rb_simple_fops);
7952
7953         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
7954                           &trace_time_stamp_mode_fops);
7955
7956         create_trace_options_dir(tr);
7957
7958 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7959         trace_create_file("tracing_max_latency", 0644, d_tracer,
7960                         &tr->max_latency, &tracing_max_lat_fops);
7961 #endif
7962
7963         if (ftrace_create_function_files(tr, d_tracer))
7964                 WARN(1, "Could not allocate function filter files");
7965
7966 #ifdef CONFIG_TRACER_SNAPSHOT
7967         trace_create_file("snapshot", 0644, d_tracer,
7968                           tr, &snapshot_fops);
7969 #endif
7970
7971         for_each_tracing_cpu(cpu)
7972                 tracing_init_tracefs_percpu(tr, cpu);
7973
7974         ftrace_init_tracefs(tr, d_tracer);
7975 }
7976
7977 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7978 {
7979         struct vfsmount *mnt;
7980         struct file_system_type *type;
7981
7982         /*
7983          * To maintain backward compatibility for tools that mount
7984          * debugfs to get to the tracing facility, tracefs is automatically
7985          * mounted to the debugfs/tracing directory.
7986          */
7987         type = get_fs_type("tracefs");
7988         if (!type)
7989                 return NULL;
7990         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7991         put_filesystem(type);
7992         if (IS_ERR(mnt))
7993                 return NULL;
7994         mntget(mnt);
7995
7996         return mnt;
7997 }
7998
7999 /**
8000  * tracing_init_dentry - initialize top level trace array
8001  *
8002  * This is called when creating files or directories in the tracing
8003  * directory. It is called via fs_initcall() by any of the boot up code
8004  * and expects to return the dentry of the top level tracing directory.
8005  */
8006 struct dentry *tracing_init_dentry(void)
8007 {
8008         struct trace_array *tr = &global_trace;
8009
8010         /* The top level trace array uses  NULL as parent */
8011         if (tr->dir)
8012                 return NULL;
8013
8014         if (WARN_ON(!tracefs_initialized()) ||
8015                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8016                  WARN_ON(!debugfs_initialized())))
8017                 return ERR_PTR(-ENODEV);
8018
8019         /*
8020          * As there may still be users that expect the tracing
8021          * files to exist in debugfs/tracing, we must automount
8022          * the tracefs file system there, so older tools still
8023          * work with the newer kerenl.
8024          */
8025         tr->dir = debugfs_create_automount("tracing", NULL,
8026                                            trace_automount, NULL);
8027         if (!tr->dir) {
8028                 pr_warn_once("Could not create debugfs directory 'tracing'\n");
8029                 return ERR_PTR(-ENOMEM);
8030         }
8031
8032         return NULL;
8033 }
8034
8035 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8036 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8037
8038 static void __init trace_eval_init(void)
8039 {
8040         int len;
8041
8042         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8043         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8044 }
8045
8046 #ifdef CONFIG_MODULES
8047 static void trace_module_add_evals(struct module *mod)
8048 {
8049         if (!mod->num_trace_evals)
8050                 return;
8051
8052         /*
8053          * Modules with bad taint do not have events created, do
8054          * not bother with enums either.
8055          */
8056         if (trace_module_has_bad_taint(mod))
8057                 return;
8058
8059         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8060 }
8061
8062 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8063 static void trace_module_remove_evals(struct module *mod)
8064 {
8065         union trace_eval_map_item *map;
8066         union trace_eval_map_item **last = &trace_eval_maps;
8067
8068         if (!mod->num_trace_evals)
8069                 return;
8070
8071         mutex_lock(&trace_eval_mutex);
8072
8073         map = trace_eval_maps;
8074
8075         while (map) {
8076                 if (map->head.mod == mod)
8077                         break;
8078                 map = trace_eval_jmp_to_tail(map);
8079                 last = &map->tail.next;
8080                 map = map->tail.next;
8081         }
8082         if (!map)
8083                 goto out;
8084
8085         *last = trace_eval_jmp_to_tail(map)->tail.next;
8086         kfree(map);
8087  out:
8088         mutex_unlock(&trace_eval_mutex);
8089 }
8090 #else
8091 static inline void trace_module_remove_evals(struct module *mod) { }
8092 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8093
8094 static int trace_module_notify(struct notifier_block *self,
8095                                unsigned long val, void *data)
8096 {
8097         struct module *mod = data;
8098
8099         switch (val) {
8100         case MODULE_STATE_COMING:
8101                 trace_module_add_evals(mod);
8102                 break;
8103         case MODULE_STATE_GOING:
8104                 trace_module_remove_evals(mod);
8105                 break;
8106         }
8107
8108         return 0;
8109 }
8110
8111 static struct notifier_block trace_module_nb = {
8112         .notifier_call = trace_module_notify,
8113         .priority = 0,
8114 };
8115 #endif /* CONFIG_MODULES */
8116
8117 static __init int tracer_init_tracefs(void)
8118 {
8119         struct dentry *d_tracer;
8120
8121         trace_access_lock_init();
8122
8123         d_tracer = tracing_init_dentry();
8124         if (IS_ERR(d_tracer))
8125                 return 0;
8126
8127         event_trace_init();
8128
8129         init_tracer_tracefs(&global_trace, d_tracer);
8130         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8131
8132         trace_create_file("tracing_thresh", 0644, d_tracer,
8133                         &global_trace, &tracing_thresh_fops);
8134
8135         trace_create_file("README", 0444, d_tracer,
8136                         NULL, &tracing_readme_fops);
8137
8138         trace_create_file("saved_cmdlines", 0444, d_tracer,
8139                         NULL, &tracing_saved_cmdlines_fops);
8140
8141         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8142                           NULL, &tracing_saved_cmdlines_size_fops);
8143
8144         trace_create_file("saved_tgids", 0444, d_tracer,
8145                         NULL, &tracing_saved_tgids_fops);
8146
8147         trace_eval_init();
8148
8149         trace_create_eval_file(d_tracer);
8150
8151 #ifdef CONFIG_MODULES
8152         register_module_notifier(&trace_module_nb);
8153 #endif
8154
8155 #ifdef CONFIG_DYNAMIC_FTRACE
8156         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8157                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8158 #endif
8159
8160         create_trace_instances(d_tracer);
8161
8162         update_tracer_options(&global_trace);
8163
8164         return 0;
8165 }
8166
8167 static int trace_panic_handler(struct notifier_block *this,
8168                                unsigned long event, void *unused)
8169 {
8170         if (ftrace_dump_on_oops)
8171                 ftrace_dump(ftrace_dump_on_oops);
8172         return NOTIFY_OK;
8173 }
8174
8175 static struct notifier_block trace_panic_notifier = {
8176         .notifier_call  = trace_panic_handler,
8177         .next           = NULL,
8178         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8179 };
8180
8181 static int trace_die_handler(struct notifier_block *self,
8182                              unsigned long val,
8183                              void *data)
8184 {
8185         switch (val) {
8186         case DIE_OOPS:
8187                 if (ftrace_dump_on_oops)
8188                         ftrace_dump(ftrace_dump_on_oops);
8189                 break;
8190         default:
8191                 break;
8192         }
8193         return NOTIFY_OK;
8194 }
8195
8196 static struct notifier_block trace_die_notifier = {
8197         .notifier_call = trace_die_handler,
8198         .priority = 200
8199 };
8200
8201 /*
8202  * printk is set to max of 1024, we really don't need it that big.
8203  * Nothing should be printing 1000 characters anyway.
8204  */
8205 #define TRACE_MAX_PRINT         1000
8206
8207 /*
8208  * Define here KERN_TRACE so that we have one place to modify
8209  * it if we decide to change what log level the ftrace dump
8210  * should be at.
8211  */
8212 #define KERN_TRACE              KERN_EMERG
8213
8214 void
8215 trace_printk_seq(struct trace_seq *s)
8216 {
8217         /* Probably should print a warning here. */
8218         if (s->seq.len >= TRACE_MAX_PRINT)
8219                 s->seq.len = TRACE_MAX_PRINT;
8220
8221         /*
8222          * More paranoid code. Although the buffer size is set to
8223          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8224          * an extra layer of protection.
8225          */
8226         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8227                 s->seq.len = s->seq.size - 1;
8228
8229         /* should be zero ended, but we are paranoid. */
8230         s->buffer[s->seq.len] = 0;
8231
8232         printk(KERN_TRACE "%s", s->buffer);
8233
8234         trace_seq_init(s);
8235 }
8236
8237 void trace_init_global_iter(struct trace_iterator *iter)
8238 {
8239         iter->tr = &global_trace;
8240         iter->trace = iter->tr->current_trace;
8241         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8242         iter->trace_buffer = &global_trace.trace_buffer;
8243
8244         if (iter->trace && iter->trace->open)
8245                 iter->trace->open(iter);
8246
8247         /* Annotate start of buffers if we had overruns */
8248         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8249                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8250
8251         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8252         if (trace_clocks[iter->tr->clock_id].in_ns)
8253                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8254 }
8255
8256 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8257 {
8258         /* use static because iter can be a bit big for the stack */
8259         static struct trace_iterator iter;
8260         static atomic_t dump_running;
8261         struct trace_array *tr = &global_trace;
8262         unsigned int old_userobj;
8263         unsigned long flags;
8264         int cnt = 0, cpu;
8265
8266         /* Only allow one dump user at a time. */
8267         if (atomic_inc_return(&dump_running) != 1) {
8268                 atomic_dec(&dump_running);
8269                 return;
8270         }
8271
8272         /*
8273          * Always turn off tracing when we dump.
8274          * We don't need to show trace output of what happens
8275          * between multiple crashes.
8276          *
8277          * If the user does a sysrq-z, then they can re-enable
8278          * tracing with echo 1 > tracing_on.
8279          */
8280         tracing_off();
8281
8282         local_irq_save(flags);
8283
8284         /* Simulate the iterator */
8285         trace_init_global_iter(&iter);
8286
8287         for_each_tracing_cpu(cpu) {
8288                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8289         }
8290
8291         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8292
8293         /* don't look at user memory in panic mode */
8294         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8295
8296         switch (oops_dump_mode) {
8297         case DUMP_ALL:
8298                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8299                 break;
8300         case DUMP_ORIG:
8301                 iter.cpu_file = raw_smp_processor_id();
8302                 break;
8303         case DUMP_NONE:
8304                 goto out_enable;
8305         default:
8306                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8307                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8308         }
8309
8310         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8311
8312         /* Did function tracer already get disabled? */
8313         if (ftrace_is_dead()) {
8314                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8315                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8316         }
8317
8318         /*
8319          * We need to stop all tracing on all CPUS to read the
8320          * the next buffer. This is a bit expensive, but is
8321          * not done often. We fill all what we can read,
8322          * and then release the locks again.
8323          */
8324
8325         while (!trace_empty(&iter)) {
8326
8327                 if (!cnt)
8328                         printk(KERN_TRACE "---------------------------------\n");
8329
8330                 cnt++;
8331
8332                 /* reset all but tr, trace, and overruns */
8333                 memset(&iter.seq, 0,
8334                        sizeof(struct trace_iterator) -
8335                        offsetof(struct trace_iterator, seq));
8336                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8337                 iter.pos = -1;
8338
8339                 if (trace_find_next_entry_inc(&iter) != NULL) {
8340                         int ret;
8341
8342                         ret = print_trace_line(&iter);
8343                         if (ret != TRACE_TYPE_NO_CONSUME)
8344                                 trace_consume(&iter);
8345                 }
8346                 touch_nmi_watchdog();
8347
8348                 trace_printk_seq(&iter.seq);
8349         }
8350
8351         if (!cnt)
8352                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8353         else
8354                 printk(KERN_TRACE "---------------------------------\n");
8355
8356  out_enable:
8357         tr->trace_flags |= old_userobj;
8358
8359         for_each_tracing_cpu(cpu) {
8360                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8361         }
8362         atomic_dec(&dump_running);
8363         local_irq_restore(flags);
8364 }
8365 EXPORT_SYMBOL_GPL(ftrace_dump);
8366
8367 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8368 {
8369         char **argv;
8370         int argc, ret;
8371
8372         argc = 0;
8373         ret = 0;
8374         argv = argv_split(GFP_KERNEL, buf, &argc);
8375         if (!argv)
8376                 return -ENOMEM;
8377
8378         if (argc)
8379                 ret = createfn(argc, argv);
8380
8381         argv_free(argv);
8382
8383         return ret;
8384 }
8385
8386 #define WRITE_BUFSIZE  4096
8387
8388 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
8389                                 size_t count, loff_t *ppos,
8390                                 int (*createfn)(int, char **))
8391 {
8392         char *kbuf, *buf, *tmp;
8393         int ret = 0;
8394         size_t done = 0;
8395         size_t size;
8396
8397         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
8398         if (!kbuf)
8399                 return -ENOMEM;
8400
8401         while (done < count) {
8402                 size = count - done;
8403
8404                 if (size >= WRITE_BUFSIZE)
8405                         size = WRITE_BUFSIZE - 1;
8406
8407                 if (copy_from_user(kbuf, buffer + done, size)) {
8408                         ret = -EFAULT;
8409                         goto out;
8410                 }
8411                 kbuf[size] = '\0';
8412                 buf = kbuf;
8413                 do {
8414                         tmp = strchr(buf, '\n');
8415                         if (tmp) {
8416                                 *tmp = '\0';
8417                                 size = tmp - buf + 1;
8418                         } else {
8419                                 size = strlen(buf);
8420                                 if (done + size < count) {
8421                                         if (buf != kbuf)
8422                                                 break;
8423                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
8424                                         pr_warn("Line length is too long: Should be less than %d\n",
8425                                                 WRITE_BUFSIZE - 2);
8426                                         ret = -EINVAL;
8427                                         goto out;
8428                                 }
8429                         }
8430                         done += size;
8431
8432                         /* Remove comments */
8433                         tmp = strchr(buf, '#');
8434
8435                         if (tmp)
8436                                 *tmp = '\0';
8437
8438                         ret = trace_run_command(buf, createfn);
8439                         if (ret)
8440                                 goto out;
8441                         buf += size;
8442
8443                 } while (done < count);
8444         }
8445         ret = done;
8446
8447 out:
8448         kfree(kbuf);
8449
8450         return ret;
8451 }
8452
8453 __init static int tracer_alloc_buffers(void)
8454 {
8455         int ring_buf_size;
8456         int ret = -ENOMEM;
8457
8458         /*
8459          * Make sure we don't accidently add more trace options
8460          * than we have bits for.
8461          */
8462         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8463
8464         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8465                 goto out;
8466
8467         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8468                 goto out_free_buffer_mask;
8469
8470         /* Only allocate trace_printk buffers if a trace_printk exists */
8471         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8472                 /* Must be called before global_trace.buffer is allocated */
8473                 trace_printk_init_buffers();
8474
8475         /* To save memory, keep the ring buffer size to its minimum */
8476         if (ring_buffer_expanded)
8477                 ring_buf_size = trace_buf_size;
8478         else
8479                 ring_buf_size = 1;
8480
8481         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8482         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8483
8484         raw_spin_lock_init(&global_trace.start_lock);
8485
8486         /*
8487          * The prepare callbacks allocates some memory for the ring buffer. We
8488          * don't free the buffer if the if the CPU goes down. If we were to free
8489          * the buffer, then the user would lose any trace that was in the
8490          * buffer. The memory will be removed once the "instance" is removed.
8491          */
8492         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8493                                       "trace/RB:preapre", trace_rb_cpu_prepare,
8494                                       NULL);
8495         if (ret < 0)
8496                 goto out_free_cpumask;
8497         /* Used for event triggers */
8498         ret = -ENOMEM;
8499         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8500         if (!temp_buffer)
8501                 goto out_rm_hp_state;
8502
8503         if (trace_create_savedcmd() < 0)
8504                 goto out_free_temp_buffer;
8505
8506         /* TODO: make the number of buffers hot pluggable with CPUS */
8507         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8508                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8509                 WARN_ON(1);
8510                 goto out_free_savedcmd;
8511         }
8512
8513         if (global_trace.buffer_disabled)
8514                 tracing_off();
8515
8516         if (trace_boot_clock) {
8517                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8518                 if (ret < 0)
8519                         pr_warn("Trace clock %s not defined, going back to default\n",
8520                                 trace_boot_clock);
8521         }
8522
8523         /*
8524          * register_tracer() might reference current_trace, so it
8525          * needs to be set before we register anything. This is
8526          * just a bootstrap of current_trace anyway.
8527          */
8528         global_trace.current_trace = &nop_trace;
8529
8530         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8531
8532         ftrace_init_global_array_ops(&global_trace);
8533
8534         init_trace_flags_index(&global_trace);
8535
8536         register_tracer(&nop_trace);
8537
8538         /* Function tracing may start here (via kernel command line) */
8539         init_function_trace();
8540
8541         /* All seems OK, enable tracing */
8542         tracing_disabled = 0;
8543
8544         atomic_notifier_chain_register(&panic_notifier_list,
8545                                        &trace_panic_notifier);
8546
8547         register_die_notifier(&trace_die_notifier);
8548
8549         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8550
8551         INIT_LIST_HEAD(&global_trace.systems);
8552         INIT_LIST_HEAD(&global_trace.events);
8553         INIT_LIST_HEAD(&global_trace.hist_vars);
8554         list_add(&global_trace.list, &ftrace_trace_arrays);
8555
8556         apply_trace_boot_options();
8557
8558         register_snapshot_cmd();
8559
8560         return 0;
8561
8562 out_free_savedcmd:
8563         free_saved_cmdlines_buffer(savedcmd);
8564 out_free_temp_buffer:
8565         ring_buffer_free(temp_buffer);
8566 out_rm_hp_state:
8567         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8568 out_free_cpumask:
8569         free_cpumask_var(global_trace.tracing_cpumask);
8570 out_free_buffer_mask:
8571         free_cpumask_var(tracing_buffer_mask);
8572 out:
8573         return ret;
8574 }
8575
8576 void __init early_trace_init(void)
8577 {
8578         if (tracepoint_printk) {
8579                 tracepoint_print_iter =
8580                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8581                 if (WARN_ON(!tracepoint_print_iter))
8582                         tracepoint_printk = 0;
8583                 else
8584                         static_key_enable(&tracepoint_printk_key.key);
8585         }
8586         tracer_alloc_buffers();
8587 }
8588
8589 void __init trace_init(void)
8590 {
8591         trace_event_init();
8592 }
8593
8594 __init static int clear_boot_tracer(void)
8595 {
8596         /*
8597          * The default tracer at boot buffer is an init section.
8598          * This function is called in lateinit. If we did not
8599          * find the boot tracer, then clear it out, to prevent
8600          * later registration from accessing the buffer that is
8601          * about to be freed.
8602          */
8603         if (!default_bootup_tracer)
8604                 return 0;
8605
8606         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8607                default_bootup_tracer);
8608         default_bootup_tracer = NULL;
8609
8610         return 0;
8611 }
8612
8613 fs_initcall(tracer_init_tracefs);
8614 late_initcall_sync(clear_boot_tracer);
8615
8616 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
8617 __init static int tracing_set_default_clock(void)
8618 {
8619         /* sched_clock_stable() is determined in late_initcall */
8620         if (!trace_boot_clock && !sched_clock_stable()) {
8621                 printk(KERN_WARNING
8622                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
8623                        "If you want to keep using the local clock, then add:\n"
8624                        "  \"trace_clock=local\"\n"
8625                        "on the kernel command line\n");
8626                 tracing_set_clock(&global_trace, "global");
8627         }
8628
8629         return 0;
8630 }
8631 late_initcall_sync(tracing_set_default_clock);
8632 #endif