iommu/amd: Apply the same IVRS IOAPIC workaround to Acer Aspire A315-41
[linux-2.6-block.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48
49 #include "trace.h"
50 #include "trace_output.h"
51
52 /*
53  * On boot up, the ring buffer is set to the minimum size, so that
54  * we do not waste memory on systems that are not using tracing.
55  */
56 bool ring_buffer_expanded;
57
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66
67 /*
68  * If a tracer is running, we do not want to run SELFTEST.
69  */
70 bool __read_mostly tracing_selftest_disabled;
71
72 /* Pipe tracepoints to printk */
73 struct trace_iterator *tracepoint_print_iter;
74 int tracepoint_printk;
75 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
76
77 /* For tracers that don't implement custom flags */
78 static struct tracer_opt dummy_tracer_opt[] = {
79         { }
80 };
81
82 static int
83 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
84 {
85         return 0;
86 }
87
88 /*
89  * To prevent the comm cache from being overwritten when no
90  * tracing is active, only save the comm when a trace event
91  * occurred.
92  */
93 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
94
95 /*
96  * Kill all tracing for good (never come back).
97  * It is initialized to 1 but will turn to zero if the initialization
98  * of the tracer is successful. But that is the only place that sets
99  * this back to zero.
100  */
101 static int tracing_disabled = 1;
102
103 cpumask_var_t __read_mostly     tracing_buffer_mask;
104
105 /*
106  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
107  *
108  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
109  * is set, then ftrace_dump is called. This will output the contents
110  * of the ftrace buffers to the console.  This is very useful for
111  * capturing traces that lead to crashes and outputing it to a
112  * serial console.
113  *
114  * It is default off, but you can enable it with either specifying
115  * "ftrace_dump_on_oops" in the kernel command line, or setting
116  * /proc/sys/kernel/ftrace_dump_on_oops
117  * Set 1 if you want to dump buffers of all CPUs
118  * Set 2 if you want to dump the buffer of the CPU that triggered oops
119  */
120
121 enum ftrace_dump_mode ftrace_dump_on_oops;
122
123 /* When set, tracing will stop when a WARN*() is hit */
124 int __disable_trace_on_warning;
125
126 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
127 /* Map of enums to their values, for "eval_map" file */
128 struct trace_eval_map_head {
129         struct module                   *mod;
130         unsigned long                   length;
131 };
132
133 union trace_eval_map_item;
134
135 struct trace_eval_map_tail {
136         /*
137          * "end" is first and points to NULL as it must be different
138          * than "mod" or "eval_string"
139          */
140         union trace_eval_map_item       *next;
141         const char                      *end;   /* points to NULL */
142 };
143
144 static DEFINE_MUTEX(trace_eval_mutex);
145
146 /*
147  * The trace_eval_maps are saved in an array with two extra elements,
148  * one at the beginning, and one at the end. The beginning item contains
149  * the count of the saved maps (head.length), and the module they
150  * belong to if not built in (head.mod). The ending item contains a
151  * pointer to the next array of saved eval_map items.
152  */
153 union trace_eval_map_item {
154         struct trace_eval_map           map;
155         struct trace_eval_map_head      head;
156         struct trace_eval_map_tail      tail;
157 };
158
159 static union trace_eval_map_item *trace_eval_maps;
160 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
161
162 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
163 static void ftrace_trace_userstack(struct ring_buffer *buffer,
164                                    unsigned long flags, int pc);
165
166 #define MAX_TRACER_SIZE         100
167 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
168 static char *default_bootup_tracer;
169
170 static bool allocate_snapshot;
171
172 static int __init set_cmdline_ftrace(char *str)
173 {
174         strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
175         default_bootup_tracer = bootup_tracer_buf;
176         /* We are using ftrace early, expand it */
177         ring_buffer_expanded = true;
178         return 1;
179 }
180 __setup("ftrace=", set_cmdline_ftrace);
181
182 static int __init set_ftrace_dump_on_oops(char *str)
183 {
184         if (*str++ != '=' || !*str) {
185                 ftrace_dump_on_oops = DUMP_ALL;
186                 return 1;
187         }
188
189         if (!strcmp("orig_cpu", str)) {
190                 ftrace_dump_on_oops = DUMP_ORIG;
191                 return 1;
192         }
193
194         return 0;
195 }
196 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
197
198 static int __init stop_trace_on_warning(char *str)
199 {
200         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
201                 __disable_trace_on_warning = 1;
202         return 1;
203 }
204 __setup("traceoff_on_warning", stop_trace_on_warning);
205
206 static int __init boot_alloc_snapshot(char *str)
207 {
208         allocate_snapshot = true;
209         /* We also need the main ring buffer expanded */
210         ring_buffer_expanded = true;
211         return 1;
212 }
213 __setup("alloc_snapshot", boot_alloc_snapshot);
214
215
216 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
217
218 static int __init set_trace_boot_options(char *str)
219 {
220         strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
221         return 0;
222 }
223 __setup("trace_options=", set_trace_boot_options);
224
225 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
226 static char *trace_boot_clock __initdata;
227
228 static int __init set_trace_boot_clock(char *str)
229 {
230         strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
231         trace_boot_clock = trace_boot_clock_buf;
232         return 0;
233 }
234 __setup("trace_clock=", set_trace_boot_clock);
235
236 static int __init set_tracepoint_printk(char *str)
237 {
238         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239                 tracepoint_printk = 1;
240         return 1;
241 }
242 __setup("tp_printk", set_tracepoint_printk);
243
244 unsigned long long ns2usecs(u64 nsec)
245 {
246         nsec += 500;
247         do_div(nsec, 1000);
248         return nsec;
249 }
250
251 /* trace_flags holds trace_options default values */
252 #define TRACE_DEFAULT_FLAGS                                             \
253         (FUNCTION_DEFAULT_FLAGS |                                       \
254          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
255          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
256          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
257          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
258
259 /* trace_options that are only supported by global_trace */
260 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
261                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
262
263 /* trace_flags that are default zero for instances */
264 #define ZEROED_TRACE_FLAGS \
265         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
266
267 /*
268  * The global_trace is the descriptor that holds the top-level tracing
269  * buffers for the live tracing.
270  */
271 static struct trace_array global_trace = {
272         .trace_flags = TRACE_DEFAULT_FLAGS,
273 };
274
275 LIST_HEAD(ftrace_trace_arrays);
276
277 int trace_array_get(struct trace_array *this_tr)
278 {
279         struct trace_array *tr;
280         int ret = -ENODEV;
281
282         mutex_lock(&trace_types_lock);
283         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
284                 if (tr == this_tr) {
285                         tr->ref++;
286                         ret = 0;
287                         break;
288                 }
289         }
290         mutex_unlock(&trace_types_lock);
291
292         return ret;
293 }
294
295 static void __trace_array_put(struct trace_array *this_tr)
296 {
297         WARN_ON(!this_tr->ref);
298         this_tr->ref--;
299 }
300
301 void trace_array_put(struct trace_array *this_tr)
302 {
303         mutex_lock(&trace_types_lock);
304         __trace_array_put(this_tr);
305         mutex_unlock(&trace_types_lock);
306 }
307
308 int tracing_check_open_get_tr(struct trace_array *tr)
309 {
310         int ret;
311
312         ret = security_locked_down(LOCKDOWN_TRACEFS);
313         if (ret)
314                 return ret;
315
316         if (tracing_disabled)
317                 return -ENODEV;
318
319         if (tr && trace_array_get(tr) < 0)
320                 return -ENODEV;
321
322         return 0;
323 }
324
325 int call_filter_check_discard(struct trace_event_call *call, void *rec,
326                               struct ring_buffer *buffer,
327                               struct ring_buffer_event *event)
328 {
329         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
330             !filter_match_preds(call->filter, rec)) {
331                 __trace_event_discard_commit(buffer, event);
332                 return 1;
333         }
334
335         return 0;
336 }
337
338 void trace_free_pid_list(struct trace_pid_list *pid_list)
339 {
340         vfree(pid_list->pids);
341         kfree(pid_list);
342 }
343
344 /**
345  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
346  * @filtered_pids: The list of pids to check
347  * @search_pid: The PID to find in @filtered_pids
348  *
349  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
350  */
351 bool
352 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
353 {
354         /*
355          * If pid_max changed after filtered_pids was created, we
356          * by default ignore all pids greater than the previous pid_max.
357          */
358         if (search_pid >= filtered_pids->pid_max)
359                 return false;
360
361         return test_bit(search_pid, filtered_pids->pids);
362 }
363
364 /**
365  * trace_ignore_this_task - should a task be ignored for tracing
366  * @filtered_pids: The list of pids to check
367  * @task: The task that should be ignored if not filtered
368  *
369  * Checks if @task should be traced or not from @filtered_pids.
370  * Returns true if @task should *NOT* be traced.
371  * Returns false if @task should be traced.
372  */
373 bool
374 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
375 {
376         /*
377          * Return false, because if filtered_pids does not exist,
378          * all pids are good to trace.
379          */
380         if (!filtered_pids)
381                 return false;
382
383         return !trace_find_filtered_pid(filtered_pids, task->pid);
384 }
385
386 /**
387  * trace_filter_add_remove_task - Add or remove a task from a pid_list
388  * @pid_list: The list to modify
389  * @self: The current task for fork or NULL for exit
390  * @task: The task to add or remove
391  *
392  * If adding a task, if @self is defined, the task is only added if @self
393  * is also included in @pid_list. This happens on fork and tasks should
394  * only be added when the parent is listed. If @self is NULL, then the
395  * @task pid will be removed from the list, which would happen on exit
396  * of a task.
397  */
398 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
399                                   struct task_struct *self,
400                                   struct task_struct *task)
401 {
402         if (!pid_list)
403                 return;
404
405         /* For forks, we only add if the forking task is listed */
406         if (self) {
407                 if (!trace_find_filtered_pid(pid_list, self->pid))
408                         return;
409         }
410
411         /* Sorry, but we don't support pid_max changing after setting */
412         if (task->pid >= pid_list->pid_max)
413                 return;
414
415         /* "self" is set for forks, and NULL for exits */
416         if (self)
417                 set_bit(task->pid, pid_list->pids);
418         else
419                 clear_bit(task->pid, pid_list->pids);
420 }
421
422 /**
423  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
424  * @pid_list: The pid list to show
425  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
426  * @pos: The position of the file
427  *
428  * This is used by the seq_file "next" operation to iterate the pids
429  * listed in a trace_pid_list structure.
430  *
431  * Returns the pid+1 as we want to display pid of zero, but NULL would
432  * stop the iteration.
433  */
434 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
435 {
436         unsigned long pid = (unsigned long)v;
437
438         (*pos)++;
439
440         /* pid already is +1 of the actual prevous bit */
441         pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
442
443         /* Return pid + 1 to allow zero to be represented */
444         if (pid < pid_list->pid_max)
445                 return (void *)(pid + 1);
446
447         return NULL;
448 }
449
450 /**
451  * trace_pid_start - Used for seq_file to start reading pid lists
452  * @pid_list: The pid list to show
453  * @pos: The position of the file
454  *
455  * This is used by seq_file "start" operation to start the iteration
456  * of listing pids.
457  *
458  * Returns the pid+1 as we want to display pid of zero, but NULL would
459  * stop the iteration.
460  */
461 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
462 {
463         unsigned long pid;
464         loff_t l = 0;
465
466         pid = find_first_bit(pid_list->pids, pid_list->pid_max);
467         if (pid >= pid_list->pid_max)
468                 return NULL;
469
470         /* Return pid + 1 so that zero can be the exit value */
471         for (pid++; pid && l < *pos;
472              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
473                 ;
474         return (void *)pid;
475 }
476
477 /**
478  * trace_pid_show - show the current pid in seq_file processing
479  * @m: The seq_file structure to write into
480  * @v: A void pointer of the pid (+1) value to display
481  *
482  * Can be directly used by seq_file operations to display the current
483  * pid value.
484  */
485 int trace_pid_show(struct seq_file *m, void *v)
486 {
487         unsigned long pid = (unsigned long)v - 1;
488
489         seq_printf(m, "%lu\n", pid);
490         return 0;
491 }
492
493 /* 128 should be much more than enough */
494 #define PID_BUF_SIZE            127
495
496 int trace_pid_write(struct trace_pid_list *filtered_pids,
497                     struct trace_pid_list **new_pid_list,
498                     const char __user *ubuf, size_t cnt)
499 {
500         struct trace_pid_list *pid_list;
501         struct trace_parser parser;
502         unsigned long val;
503         int nr_pids = 0;
504         ssize_t read = 0;
505         ssize_t ret = 0;
506         loff_t pos;
507         pid_t pid;
508
509         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
510                 return -ENOMEM;
511
512         /*
513          * Always recreate a new array. The write is an all or nothing
514          * operation. Always create a new array when adding new pids by
515          * the user. If the operation fails, then the current list is
516          * not modified.
517          */
518         pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
519         if (!pid_list) {
520                 trace_parser_put(&parser);
521                 return -ENOMEM;
522         }
523
524         pid_list->pid_max = READ_ONCE(pid_max);
525
526         /* Only truncating will shrink pid_max */
527         if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
528                 pid_list->pid_max = filtered_pids->pid_max;
529
530         pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
531         if (!pid_list->pids) {
532                 trace_parser_put(&parser);
533                 kfree(pid_list);
534                 return -ENOMEM;
535         }
536
537         if (filtered_pids) {
538                 /* copy the current bits to the new max */
539                 for_each_set_bit(pid, filtered_pids->pids,
540                                  filtered_pids->pid_max) {
541                         set_bit(pid, pid_list->pids);
542                         nr_pids++;
543                 }
544         }
545
546         while (cnt > 0) {
547
548                 pos = 0;
549
550                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
551                 if (ret < 0 || !trace_parser_loaded(&parser))
552                         break;
553
554                 read += ret;
555                 ubuf += ret;
556                 cnt -= ret;
557
558                 ret = -EINVAL;
559                 if (kstrtoul(parser.buffer, 0, &val))
560                         break;
561                 if (val >= pid_list->pid_max)
562                         break;
563
564                 pid = (pid_t)val;
565
566                 set_bit(pid, pid_list->pids);
567                 nr_pids++;
568
569                 trace_parser_clear(&parser);
570                 ret = 0;
571         }
572         trace_parser_put(&parser);
573
574         if (ret < 0) {
575                 trace_free_pid_list(pid_list);
576                 return ret;
577         }
578
579         if (!nr_pids) {
580                 /* Cleared the list of pids */
581                 trace_free_pid_list(pid_list);
582                 read = ret;
583                 pid_list = NULL;
584         }
585
586         *new_pid_list = pid_list;
587
588         return read;
589 }
590
591 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
592 {
593         u64 ts;
594
595         /* Early boot up does not have a buffer yet */
596         if (!buf->buffer)
597                 return trace_clock_local();
598
599         ts = ring_buffer_time_stamp(buf->buffer, cpu);
600         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
601
602         return ts;
603 }
604
605 u64 ftrace_now(int cpu)
606 {
607         return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
608 }
609
610 /**
611  * tracing_is_enabled - Show if global_trace has been disabled
612  *
613  * Shows if the global trace has been enabled or not. It uses the
614  * mirror flag "buffer_disabled" to be used in fast paths such as for
615  * the irqsoff tracer. But it may be inaccurate due to races. If you
616  * need to know the accurate state, use tracing_is_on() which is a little
617  * slower, but accurate.
618  */
619 int tracing_is_enabled(void)
620 {
621         /*
622          * For quick access (irqsoff uses this in fast path), just
623          * return the mirror variable of the state of the ring buffer.
624          * It's a little racy, but we don't really care.
625          */
626         smp_rmb();
627         return !global_trace.buffer_disabled;
628 }
629
630 /*
631  * trace_buf_size is the size in bytes that is allocated
632  * for a buffer. Note, the number of bytes is always rounded
633  * to page size.
634  *
635  * This number is purposely set to a low number of 16384.
636  * If the dump on oops happens, it will be much appreciated
637  * to not have to wait for all that output. Anyway this can be
638  * boot time and run time configurable.
639  */
640 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
641
642 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
643
644 /* trace_types holds a link list of available tracers. */
645 static struct tracer            *trace_types __read_mostly;
646
647 /*
648  * trace_types_lock is used to protect the trace_types list.
649  */
650 DEFINE_MUTEX(trace_types_lock);
651
652 /*
653  * serialize the access of the ring buffer
654  *
655  * ring buffer serializes readers, but it is low level protection.
656  * The validity of the events (which returns by ring_buffer_peek() ..etc)
657  * are not protected by ring buffer.
658  *
659  * The content of events may become garbage if we allow other process consumes
660  * these events concurrently:
661  *   A) the page of the consumed events may become a normal page
662  *      (not reader page) in ring buffer, and this page will be rewrited
663  *      by events producer.
664  *   B) The page of the consumed events may become a page for splice_read,
665  *      and this page will be returned to system.
666  *
667  * These primitives allow multi process access to different cpu ring buffer
668  * concurrently.
669  *
670  * These primitives don't distinguish read-only and read-consume access.
671  * Multi read-only access are also serialized.
672  */
673
674 #ifdef CONFIG_SMP
675 static DECLARE_RWSEM(all_cpu_access_lock);
676 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
677
678 static inline void trace_access_lock(int cpu)
679 {
680         if (cpu == RING_BUFFER_ALL_CPUS) {
681                 /* gain it for accessing the whole ring buffer. */
682                 down_write(&all_cpu_access_lock);
683         } else {
684                 /* gain it for accessing a cpu ring buffer. */
685
686                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
687                 down_read(&all_cpu_access_lock);
688
689                 /* Secondly block other access to this @cpu ring buffer. */
690                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
691         }
692 }
693
694 static inline void trace_access_unlock(int cpu)
695 {
696         if (cpu == RING_BUFFER_ALL_CPUS) {
697                 up_write(&all_cpu_access_lock);
698         } else {
699                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
700                 up_read(&all_cpu_access_lock);
701         }
702 }
703
704 static inline void trace_access_lock_init(void)
705 {
706         int cpu;
707
708         for_each_possible_cpu(cpu)
709                 mutex_init(&per_cpu(cpu_access_lock, cpu));
710 }
711
712 #else
713
714 static DEFINE_MUTEX(access_lock);
715
716 static inline void trace_access_lock(int cpu)
717 {
718         (void)cpu;
719         mutex_lock(&access_lock);
720 }
721
722 static inline void trace_access_unlock(int cpu)
723 {
724         (void)cpu;
725         mutex_unlock(&access_lock);
726 }
727
728 static inline void trace_access_lock_init(void)
729 {
730 }
731
732 #endif
733
734 #ifdef CONFIG_STACKTRACE
735 static void __ftrace_trace_stack(struct ring_buffer *buffer,
736                                  unsigned long flags,
737                                  int skip, int pc, struct pt_regs *regs);
738 static inline void ftrace_trace_stack(struct trace_array *tr,
739                                       struct ring_buffer *buffer,
740                                       unsigned long flags,
741                                       int skip, int pc, struct pt_regs *regs);
742
743 #else
744 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
745                                         unsigned long flags,
746                                         int skip, int pc, struct pt_regs *regs)
747 {
748 }
749 static inline void ftrace_trace_stack(struct trace_array *tr,
750                                       struct ring_buffer *buffer,
751                                       unsigned long flags,
752                                       int skip, int pc, struct pt_regs *regs)
753 {
754 }
755
756 #endif
757
758 static __always_inline void
759 trace_event_setup(struct ring_buffer_event *event,
760                   int type, unsigned long flags, int pc)
761 {
762         struct trace_entry *ent = ring_buffer_event_data(event);
763
764         tracing_generic_entry_update(ent, type, flags, pc);
765 }
766
767 static __always_inline struct ring_buffer_event *
768 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
769                           int type,
770                           unsigned long len,
771                           unsigned long flags, int pc)
772 {
773         struct ring_buffer_event *event;
774
775         event = ring_buffer_lock_reserve(buffer, len);
776         if (event != NULL)
777                 trace_event_setup(event, type, flags, pc);
778
779         return event;
780 }
781
782 void tracer_tracing_on(struct trace_array *tr)
783 {
784         if (tr->trace_buffer.buffer)
785                 ring_buffer_record_on(tr->trace_buffer.buffer);
786         /*
787          * This flag is looked at when buffers haven't been allocated
788          * yet, or by some tracers (like irqsoff), that just want to
789          * know if the ring buffer has been disabled, but it can handle
790          * races of where it gets disabled but we still do a record.
791          * As the check is in the fast path of the tracers, it is more
792          * important to be fast than accurate.
793          */
794         tr->buffer_disabled = 0;
795         /* Make the flag seen by readers */
796         smp_wmb();
797 }
798
799 /**
800  * tracing_on - enable tracing buffers
801  *
802  * This function enables tracing buffers that may have been
803  * disabled with tracing_off.
804  */
805 void tracing_on(void)
806 {
807         tracer_tracing_on(&global_trace);
808 }
809 EXPORT_SYMBOL_GPL(tracing_on);
810
811
812 static __always_inline void
813 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
814 {
815         __this_cpu_write(trace_taskinfo_save, true);
816
817         /* If this is the temp buffer, we need to commit fully */
818         if (this_cpu_read(trace_buffered_event) == event) {
819                 /* Length is in event->array[0] */
820                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
821                 /* Release the temp buffer */
822                 this_cpu_dec(trace_buffered_event_cnt);
823         } else
824                 ring_buffer_unlock_commit(buffer, event);
825 }
826
827 /**
828  * __trace_puts - write a constant string into the trace buffer.
829  * @ip:    The address of the caller
830  * @str:   The constant string to write
831  * @size:  The size of the string.
832  */
833 int __trace_puts(unsigned long ip, const char *str, int size)
834 {
835         struct ring_buffer_event *event;
836         struct ring_buffer *buffer;
837         struct print_entry *entry;
838         unsigned long irq_flags;
839         int alloc;
840         int pc;
841
842         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
843                 return 0;
844
845         pc = preempt_count();
846
847         if (unlikely(tracing_selftest_running || tracing_disabled))
848                 return 0;
849
850         alloc = sizeof(*entry) + size + 2; /* possible \n added */
851
852         local_save_flags(irq_flags);
853         buffer = global_trace.trace_buffer.buffer;
854         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc, 
855                                             irq_flags, pc);
856         if (!event)
857                 return 0;
858
859         entry = ring_buffer_event_data(event);
860         entry->ip = ip;
861
862         memcpy(&entry->buf, str, size);
863
864         /* Add a newline if necessary */
865         if (entry->buf[size - 1] != '\n') {
866                 entry->buf[size] = '\n';
867                 entry->buf[size + 1] = '\0';
868         } else
869                 entry->buf[size] = '\0';
870
871         __buffer_unlock_commit(buffer, event);
872         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
873
874         return size;
875 }
876 EXPORT_SYMBOL_GPL(__trace_puts);
877
878 /**
879  * __trace_bputs - write the pointer to a constant string into trace buffer
880  * @ip:    The address of the caller
881  * @str:   The constant string to write to the buffer to
882  */
883 int __trace_bputs(unsigned long ip, const char *str)
884 {
885         struct ring_buffer_event *event;
886         struct ring_buffer *buffer;
887         struct bputs_entry *entry;
888         unsigned long irq_flags;
889         int size = sizeof(struct bputs_entry);
890         int pc;
891
892         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
893                 return 0;
894
895         pc = preempt_count();
896
897         if (unlikely(tracing_selftest_running || tracing_disabled))
898                 return 0;
899
900         local_save_flags(irq_flags);
901         buffer = global_trace.trace_buffer.buffer;
902         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
903                                             irq_flags, pc);
904         if (!event)
905                 return 0;
906
907         entry = ring_buffer_event_data(event);
908         entry->ip                       = ip;
909         entry->str                      = str;
910
911         __buffer_unlock_commit(buffer, event);
912         ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
913
914         return 1;
915 }
916 EXPORT_SYMBOL_GPL(__trace_bputs);
917
918 #ifdef CONFIG_TRACER_SNAPSHOT
919 void tracing_snapshot_instance_cond(struct trace_array *tr, void *cond_data)
920 {
921         struct tracer *tracer = tr->current_trace;
922         unsigned long flags;
923
924         if (in_nmi()) {
925                 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
926                 internal_trace_puts("*** snapshot is being ignored        ***\n");
927                 return;
928         }
929
930         if (!tr->allocated_snapshot) {
931                 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
932                 internal_trace_puts("*** stopping trace here!   ***\n");
933                 tracing_off();
934                 return;
935         }
936
937         /* Note, snapshot can not be used when the tracer uses it */
938         if (tracer->use_max_tr) {
939                 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
940                 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
941                 return;
942         }
943
944         local_irq_save(flags);
945         update_max_tr(tr, current, smp_processor_id(), cond_data);
946         local_irq_restore(flags);
947 }
948
949 void tracing_snapshot_instance(struct trace_array *tr)
950 {
951         tracing_snapshot_instance_cond(tr, NULL);
952 }
953
954 /**
955  * tracing_snapshot - take a snapshot of the current buffer.
956  *
957  * This causes a swap between the snapshot buffer and the current live
958  * tracing buffer. You can use this to take snapshots of the live
959  * trace when some condition is triggered, but continue to trace.
960  *
961  * Note, make sure to allocate the snapshot with either
962  * a tracing_snapshot_alloc(), or by doing it manually
963  * with: echo 1 > /sys/kernel/debug/tracing/snapshot
964  *
965  * If the snapshot buffer is not allocated, it will stop tracing.
966  * Basically making a permanent snapshot.
967  */
968 void tracing_snapshot(void)
969 {
970         struct trace_array *tr = &global_trace;
971
972         tracing_snapshot_instance(tr);
973 }
974 EXPORT_SYMBOL_GPL(tracing_snapshot);
975
976 /**
977  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
978  * @tr:         The tracing instance to snapshot
979  * @cond_data:  The data to be tested conditionally, and possibly saved
980  *
981  * This is the same as tracing_snapshot() except that the snapshot is
982  * conditional - the snapshot will only happen if the
983  * cond_snapshot.update() implementation receiving the cond_data
984  * returns true, which means that the trace array's cond_snapshot
985  * update() operation used the cond_data to determine whether the
986  * snapshot should be taken, and if it was, presumably saved it along
987  * with the snapshot.
988  */
989 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
990 {
991         tracing_snapshot_instance_cond(tr, cond_data);
992 }
993 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
994
995 /**
996  * tracing_snapshot_cond_data - get the user data associated with a snapshot
997  * @tr:         The tracing instance
998  *
999  * When the user enables a conditional snapshot using
1000  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1001  * with the snapshot.  This accessor is used to retrieve it.
1002  *
1003  * Should not be called from cond_snapshot.update(), since it takes
1004  * the tr->max_lock lock, which the code calling
1005  * cond_snapshot.update() has already done.
1006  *
1007  * Returns the cond_data associated with the trace array's snapshot.
1008  */
1009 void *tracing_cond_snapshot_data(struct trace_array *tr)
1010 {
1011         void *cond_data = NULL;
1012
1013         arch_spin_lock(&tr->max_lock);
1014
1015         if (tr->cond_snapshot)
1016                 cond_data = tr->cond_snapshot->cond_data;
1017
1018         arch_spin_unlock(&tr->max_lock);
1019
1020         return cond_data;
1021 }
1022 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1023
1024 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
1025                                         struct trace_buffer *size_buf, int cpu_id);
1026 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
1027
1028 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1029 {
1030         int ret;
1031
1032         if (!tr->allocated_snapshot) {
1033
1034                 /* allocate spare buffer */
1035                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1036                                    &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
1037                 if (ret < 0)
1038                         return ret;
1039
1040                 tr->allocated_snapshot = true;
1041         }
1042
1043         return 0;
1044 }
1045
1046 static void free_snapshot(struct trace_array *tr)
1047 {
1048         /*
1049          * We don't free the ring buffer. instead, resize it because
1050          * The max_tr ring buffer has some state (e.g. ring->clock) and
1051          * we want preserve it.
1052          */
1053         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1054         set_buffer_entries(&tr->max_buffer, 1);
1055         tracing_reset_online_cpus(&tr->max_buffer);
1056         tr->allocated_snapshot = false;
1057 }
1058
1059 /**
1060  * tracing_alloc_snapshot - allocate snapshot buffer.
1061  *
1062  * This only allocates the snapshot buffer if it isn't already
1063  * allocated - it doesn't also take a snapshot.
1064  *
1065  * This is meant to be used in cases where the snapshot buffer needs
1066  * to be set up for events that can't sleep but need to be able to
1067  * trigger a snapshot.
1068  */
1069 int tracing_alloc_snapshot(void)
1070 {
1071         struct trace_array *tr = &global_trace;
1072         int ret;
1073
1074         ret = tracing_alloc_snapshot_instance(tr);
1075         WARN_ON(ret < 0);
1076
1077         return ret;
1078 }
1079 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1080
1081 /**
1082  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1083  *
1084  * This is similar to tracing_snapshot(), but it will allocate the
1085  * snapshot buffer if it isn't already allocated. Use this only
1086  * where it is safe to sleep, as the allocation may sleep.
1087  *
1088  * This causes a swap between the snapshot buffer and the current live
1089  * tracing buffer. You can use this to take snapshots of the live
1090  * trace when some condition is triggered, but continue to trace.
1091  */
1092 void tracing_snapshot_alloc(void)
1093 {
1094         int ret;
1095
1096         ret = tracing_alloc_snapshot();
1097         if (ret < 0)
1098                 return;
1099
1100         tracing_snapshot();
1101 }
1102 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1103
1104 /**
1105  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1106  * @tr:         The tracing instance
1107  * @cond_data:  User data to associate with the snapshot
1108  * @update:     Implementation of the cond_snapshot update function
1109  *
1110  * Check whether the conditional snapshot for the given instance has
1111  * already been enabled, or if the current tracer is already using a
1112  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1113  * save the cond_data and update function inside.
1114  *
1115  * Returns 0 if successful, error otherwise.
1116  */
1117 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1118                                  cond_update_fn_t update)
1119 {
1120         struct cond_snapshot *cond_snapshot;
1121         int ret = 0;
1122
1123         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1124         if (!cond_snapshot)
1125                 return -ENOMEM;
1126
1127         cond_snapshot->cond_data = cond_data;
1128         cond_snapshot->update = update;
1129
1130         mutex_lock(&trace_types_lock);
1131
1132         ret = tracing_alloc_snapshot_instance(tr);
1133         if (ret)
1134                 goto fail_unlock;
1135
1136         if (tr->current_trace->use_max_tr) {
1137                 ret = -EBUSY;
1138                 goto fail_unlock;
1139         }
1140
1141         /*
1142          * The cond_snapshot can only change to NULL without the
1143          * trace_types_lock. We don't care if we race with it going
1144          * to NULL, but we want to make sure that it's not set to
1145          * something other than NULL when we get here, which we can
1146          * do safely with only holding the trace_types_lock and not
1147          * having to take the max_lock.
1148          */
1149         if (tr->cond_snapshot) {
1150                 ret = -EBUSY;
1151                 goto fail_unlock;
1152         }
1153
1154         arch_spin_lock(&tr->max_lock);
1155         tr->cond_snapshot = cond_snapshot;
1156         arch_spin_unlock(&tr->max_lock);
1157
1158         mutex_unlock(&trace_types_lock);
1159
1160         return ret;
1161
1162  fail_unlock:
1163         mutex_unlock(&trace_types_lock);
1164         kfree(cond_snapshot);
1165         return ret;
1166 }
1167 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1168
1169 /**
1170  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1171  * @tr:         The tracing instance
1172  *
1173  * Check whether the conditional snapshot for the given instance is
1174  * enabled; if so, free the cond_snapshot associated with it,
1175  * otherwise return -EINVAL.
1176  *
1177  * Returns 0 if successful, error otherwise.
1178  */
1179 int tracing_snapshot_cond_disable(struct trace_array *tr)
1180 {
1181         int ret = 0;
1182
1183         arch_spin_lock(&tr->max_lock);
1184
1185         if (!tr->cond_snapshot)
1186                 ret = -EINVAL;
1187         else {
1188                 kfree(tr->cond_snapshot);
1189                 tr->cond_snapshot = NULL;
1190         }
1191
1192         arch_spin_unlock(&tr->max_lock);
1193
1194         return ret;
1195 }
1196 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1197 #else
1198 void tracing_snapshot(void)
1199 {
1200         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1201 }
1202 EXPORT_SYMBOL_GPL(tracing_snapshot);
1203 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1204 {
1205         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1206 }
1207 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1208 int tracing_alloc_snapshot(void)
1209 {
1210         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1211         return -ENODEV;
1212 }
1213 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1214 void tracing_snapshot_alloc(void)
1215 {
1216         /* Give warning */
1217         tracing_snapshot();
1218 }
1219 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1220 void *tracing_cond_snapshot_data(struct trace_array *tr)
1221 {
1222         return NULL;
1223 }
1224 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1225 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1226 {
1227         return -ENODEV;
1228 }
1229 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1230 int tracing_snapshot_cond_disable(struct trace_array *tr)
1231 {
1232         return false;
1233 }
1234 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1235 #endif /* CONFIG_TRACER_SNAPSHOT */
1236
1237 void tracer_tracing_off(struct trace_array *tr)
1238 {
1239         if (tr->trace_buffer.buffer)
1240                 ring_buffer_record_off(tr->trace_buffer.buffer);
1241         /*
1242          * This flag is looked at when buffers haven't been allocated
1243          * yet, or by some tracers (like irqsoff), that just want to
1244          * know if the ring buffer has been disabled, but it can handle
1245          * races of where it gets disabled but we still do a record.
1246          * As the check is in the fast path of the tracers, it is more
1247          * important to be fast than accurate.
1248          */
1249         tr->buffer_disabled = 1;
1250         /* Make the flag seen by readers */
1251         smp_wmb();
1252 }
1253
1254 /**
1255  * tracing_off - turn off tracing buffers
1256  *
1257  * This function stops the tracing buffers from recording data.
1258  * It does not disable any overhead the tracers themselves may
1259  * be causing. This function simply causes all recording to
1260  * the ring buffers to fail.
1261  */
1262 void tracing_off(void)
1263 {
1264         tracer_tracing_off(&global_trace);
1265 }
1266 EXPORT_SYMBOL_GPL(tracing_off);
1267
1268 void disable_trace_on_warning(void)
1269 {
1270         if (__disable_trace_on_warning)
1271                 tracing_off();
1272 }
1273
1274 /**
1275  * tracer_tracing_is_on - show real state of ring buffer enabled
1276  * @tr : the trace array to know if ring buffer is enabled
1277  *
1278  * Shows real state of the ring buffer if it is enabled or not.
1279  */
1280 bool tracer_tracing_is_on(struct trace_array *tr)
1281 {
1282         if (tr->trace_buffer.buffer)
1283                 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1284         return !tr->buffer_disabled;
1285 }
1286
1287 /**
1288  * tracing_is_on - show state of ring buffers enabled
1289  */
1290 int tracing_is_on(void)
1291 {
1292         return tracer_tracing_is_on(&global_trace);
1293 }
1294 EXPORT_SYMBOL_GPL(tracing_is_on);
1295
1296 static int __init set_buf_size(char *str)
1297 {
1298         unsigned long buf_size;
1299
1300         if (!str)
1301                 return 0;
1302         buf_size = memparse(str, &str);
1303         /* nr_entries can not be zero */
1304         if (buf_size == 0)
1305                 return 0;
1306         trace_buf_size = buf_size;
1307         return 1;
1308 }
1309 __setup("trace_buf_size=", set_buf_size);
1310
1311 static int __init set_tracing_thresh(char *str)
1312 {
1313         unsigned long threshold;
1314         int ret;
1315
1316         if (!str)
1317                 return 0;
1318         ret = kstrtoul(str, 0, &threshold);
1319         if (ret < 0)
1320                 return 0;
1321         tracing_thresh = threshold * 1000;
1322         return 1;
1323 }
1324 __setup("tracing_thresh=", set_tracing_thresh);
1325
1326 unsigned long nsecs_to_usecs(unsigned long nsecs)
1327 {
1328         return nsecs / 1000;
1329 }
1330
1331 /*
1332  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1333  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1334  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1335  * of strings in the order that the evals (enum) were defined.
1336  */
1337 #undef C
1338 #define C(a, b) b
1339
1340 /* These must match the bit postions in trace_iterator_flags */
1341 static const char *trace_options[] = {
1342         TRACE_FLAGS
1343         NULL
1344 };
1345
1346 static struct {
1347         u64 (*func)(void);
1348         const char *name;
1349         int in_ns;              /* is this clock in nanoseconds? */
1350 } trace_clocks[] = {
1351         { trace_clock_local,            "local",        1 },
1352         { trace_clock_global,           "global",       1 },
1353         { trace_clock_counter,          "counter",      0 },
1354         { trace_clock_jiffies,          "uptime",       0 },
1355         { trace_clock,                  "perf",         1 },
1356         { ktime_get_mono_fast_ns,       "mono",         1 },
1357         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1358         { ktime_get_boot_fast_ns,       "boot",         1 },
1359         ARCH_TRACE_CLOCKS
1360 };
1361
1362 bool trace_clock_in_ns(struct trace_array *tr)
1363 {
1364         if (trace_clocks[tr->clock_id].in_ns)
1365                 return true;
1366
1367         return false;
1368 }
1369
1370 /*
1371  * trace_parser_get_init - gets the buffer for trace parser
1372  */
1373 int trace_parser_get_init(struct trace_parser *parser, int size)
1374 {
1375         memset(parser, 0, sizeof(*parser));
1376
1377         parser->buffer = kmalloc(size, GFP_KERNEL);
1378         if (!parser->buffer)
1379                 return 1;
1380
1381         parser->size = size;
1382         return 0;
1383 }
1384
1385 /*
1386  * trace_parser_put - frees the buffer for trace parser
1387  */
1388 void trace_parser_put(struct trace_parser *parser)
1389 {
1390         kfree(parser->buffer);
1391         parser->buffer = NULL;
1392 }
1393
1394 /*
1395  * trace_get_user - reads the user input string separated by  space
1396  * (matched by isspace(ch))
1397  *
1398  * For each string found the 'struct trace_parser' is updated,
1399  * and the function returns.
1400  *
1401  * Returns number of bytes read.
1402  *
1403  * See kernel/trace/trace.h for 'struct trace_parser' details.
1404  */
1405 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1406         size_t cnt, loff_t *ppos)
1407 {
1408         char ch;
1409         size_t read = 0;
1410         ssize_t ret;
1411
1412         if (!*ppos)
1413                 trace_parser_clear(parser);
1414
1415         ret = get_user(ch, ubuf++);
1416         if (ret)
1417                 goto out;
1418
1419         read++;
1420         cnt--;
1421
1422         /*
1423          * The parser is not finished with the last write,
1424          * continue reading the user input without skipping spaces.
1425          */
1426         if (!parser->cont) {
1427                 /* skip white space */
1428                 while (cnt && isspace(ch)) {
1429                         ret = get_user(ch, ubuf++);
1430                         if (ret)
1431                                 goto out;
1432                         read++;
1433                         cnt--;
1434                 }
1435
1436                 parser->idx = 0;
1437
1438                 /* only spaces were written */
1439                 if (isspace(ch) || !ch) {
1440                         *ppos += read;
1441                         ret = read;
1442                         goto out;
1443                 }
1444         }
1445
1446         /* read the non-space input */
1447         while (cnt && !isspace(ch) && ch) {
1448                 if (parser->idx < parser->size - 1)
1449                         parser->buffer[parser->idx++] = ch;
1450                 else {
1451                         ret = -EINVAL;
1452                         goto out;
1453                 }
1454                 ret = get_user(ch, ubuf++);
1455                 if (ret)
1456                         goto out;
1457                 read++;
1458                 cnt--;
1459         }
1460
1461         /* We either got finished input or we have to wait for another call. */
1462         if (isspace(ch) || !ch) {
1463                 parser->buffer[parser->idx] = 0;
1464                 parser->cont = false;
1465         } else if (parser->idx < parser->size - 1) {
1466                 parser->cont = true;
1467                 parser->buffer[parser->idx++] = ch;
1468                 /* Make sure the parsed string always terminates with '\0'. */
1469                 parser->buffer[parser->idx] = 0;
1470         } else {
1471                 ret = -EINVAL;
1472                 goto out;
1473         }
1474
1475         *ppos += read;
1476         ret = read;
1477
1478 out:
1479         return ret;
1480 }
1481
1482 /* TODO add a seq_buf_to_buffer() */
1483 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1484 {
1485         int len;
1486
1487         if (trace_seq_used(s) <= s->seq.readpos)
1488                 return -EBUSY;
1489
1490         len = trace_seq_used(s) - s->seq.readpos;
1491         if (cnt > len)
1492                 cnt = len;
1493         memcpy(buf, s->buffer + s->seq.readpos, cnt);
1494
1495         s->seq.readpos += cnt;
1496         return cnt;
1497 }
1498
1499 unsigned long __read_mostly     tracing_thresh;
1500
1501 #ifdef CONFIG_TRACER_MAX_TRACE
1502 /*
1503  * Copy the new maximum trace into the separate maximum-trace
1504  * structure. (this way the maximum trace is permanently saved,
1505  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1506  */
1507 static void
1508 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1509 {
1510         struct trace_buffer *trace_buf = &tr->trace_buffer;
1511         struct trace_buffer *max_buf = &tr->max_buffer;
1512         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1513         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1514
1515         max_buf->cpu = cpu;
1516         max_buf->time_start = data->preempt_timestamp;
1517
1518         max_data->saved_latency = tr->max_latency;
1519         max_data->critical_start = data->critical_start;
1520         max_data->critical_end = data->critical_end;
1521
1522         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1523         max_data->pid = tsk->pid;
1524         /*
1525          * If tsk == current, then use current_uid(), as that does not use
1526          * RCU. The irq tracer can be called out of RCU scope.
1527          */
1528         if (tsk == current)
1529                 max_data->uid = current_uid();
1530         else
1531                 max_data->uid = task_uid(tsk);
1532
1533         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1534         max_data->policy = tsk->policy;
1535         max_data->rt_priority = tsk->rt_priority;
1536
1537         /* record this tasks comm */
1538         tracing_record_cmdline(tsk);
1539 }
1540
1541 /**
1542  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1543  * @tr: tracer
1544  * @tsk: the task with the latency
1545  * @cpu: The cpu that initiated the trace.
1546  * @cond_data: User data associated with a conditional snapshot
1547  *
1548  * Flip the buffers between the @tr and the max_tr and record information
1549  * about which task was the cause of this latency.
1550  */
1551 void
1552 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1553               void *cond_data)
1554 {
1555         if (tr->stop_count)
1556                 return;
1557
1558         WARN_ON_ONCE(!irqs_disabled());
1559
1560         if (!tr->allocated_snapshot) {
1561                 /* Only the nop tracer should hit this when disabling */
1562                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1563                 return;
1564         }
1565
1566         arch_spin_lock(&tr->max_lock);
1567
1568         /* Inherit the recordable setting from trace_buffer */
1569         if (ring_buffer_record_is_set_on(tr->trace_buffer.buffer))
1570                 ring_buffer_record_on(tr->max_buffer.buffer);
1571         else
1572                 ring_buffer_record_off(tr->max_buffer.buffer);
1573
1574 #ifdef CONFIG_TRACER_SNAPSHOT
1575         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1576                 goto out_unlock;
1577 #endif
1578         swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
1579
1580         __update_max_tr(tr, tsk, cpu);
1581
1582  out_unlock:
1583         arch_spin_unlock(&tr->max_lock);
1584 }
1585
1586 /**
1587  * update_max_tr_single - only copy one trace over, and reset the rest
1588  * @tr: tracer
1589  * @tsk: task with the latency
1590  * @cpu: the cpu of the buffer to copy.
1591  *
1592  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1593  */
1594 void
1595 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1596 {
1597         int ret;
1598
1599         if (tr->stop_count)
1600                 return;
1601
1602         WARN_ON_ONCE(!irqs_disabled());
1603         if (!tr->allocated_snapshot) {
1604                 /* Only the nop tracer should hit this when disabling */
1605                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1606                 return;
1607         }
1608
1609         arch_spin_lock(&tr->max_lock);
1610
1611         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1612
1613         if (ret == -EBUSY) {
1614                 /*
1615                  * We failed to swap the buffer due to a commit taking
1616                  * place on this CPU. We fail to record, but we reset
1617                  * the max trace buffer (no one writes directly to it)
1618                  * and flag that it failed.
1619                  */
1620                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1621                         "Failed to swap buffers due to commit in progress\n");
1622         }
1623
1624         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1625
1626         __update_max_tr(tr, tsk, cpu);
1627         arch_spin_unlock(&tr->max_lock);
1628 }
1629 #endif /* CONFIG_TRACER_MAX_TRACE */
1630
1631 static int wait_on_pipe(struct trace_iterator *iter, int full)
1632 {
1633         /* Iterators are static, they should be filled or empty */
1634         if (trace_buffer_iter(iter, iter->cpu_file))
1635                 return 0;
1636
1637         return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1638                                 full);
1639 }
1640
1641 #ifdef CONFIG_FTRACE_STARTUP_TEST
1642 static bool selftests_can_run;
1643
1644 struct trace_selftests {
1645         struct list_head                list;
1646         struct tracer                   *type;
1647 };
1648
1649 static LIST_HEAD(postponed_selftests);
1650
1651 static int save_selftest(struct tracer *type)
1652 {
1653         struct trace_selftests *selftest;
1654
1655         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1656         if (!selftest)
1657                 return -ENOMEM;
1658
1659         selftest->type = type;
1660         list_add(&selftest->list, &postponed_selftests);
1661         return 0;
1662 }
1663
1664 static int run_tracer_selftest(struct tracer *type)
1665 {
1666         struct trace_array *tr = &global_trace;
1667         struct tracer *saved_tracer = tr->current_trace;
1668         int ret;
1669
1670         if (!type->selftest || tracing_selftest_disabled)
1671                 return 0;
1672
1673         /*
1674          * If a tracer registers early in boot up (before scheduling is
1675          * initialized and such), then do not run its selftests yet.
1676          * Instead, run it a little later in the boot process.
1677          */
1678         if (!selftests_can_run)
1679                 return save_selftest(type);
1680
1681         /*
1682          * Run a selftest on this tracer.
1683          * Here we reset the trace buffer, and set the current
1684          * tracer to be this tracer. The tracer can then run some
1685          * internal tracing to verify that everything is in order.
1686          * If we fail, we do not register this tracer.
1687          */
1688         tracing_reset_online_cpus(&tr->trace_buffer);
1689
1690         tr->current_trace = type;
1691
1692 #ifdef CONFIG_TRACER_MAX_TRACE
1693         if (type->use_max_tr) {
1694                 /* If we expanded the buffers, make sure the max is expanded too */
1695                 if (ring_buffer_expanded)
1696                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1697                                            RING_BUFFER_ALL_CPUS);
1698                 tr->allocated_snapshot = true;
1699         }
1700 #endif
1701
1702         /* the test is responsible for initializing and enabling */
1703         pr_info("Testing tracer %s: ", type->name);
1704         ret = type->selftest(type, tr);
1705         /* the test is responsible for resetting too */
1706         tr->current_trace = saved_tracer;
1707         if (ret) {
1708                 printk(KERN_CONT "FAILED!\n");
1709                 /* Add the warning after printing 'FAILED' */
1710                 WARN_ON(1);
1711                 return -1;
1712         }
1713         /* Only reset on passing, to avoid touching corrupted buffers */
1714         tracing_reset_online_cpus(&tr->trace_buffer);
1715
1716 #ifdef CONFIG_TRACER_MAX_TRACE
1717         if (type->use_max_tr) {
1718                 tr->allocated_snapshot = false;
1719
1720                 /* Shrink the max buffer again */
1721                 if (ring_buffer_expanded)
1722                         ring_buffer_resize(tr->max_buffer.buffer, 1,
1723                                            RING_BUFFER_ALL_CPUS);
1724         }
1725 #endif
1726
1727         printk(KERN_CONT "PASSED\n");
1728         return 0;
1729 }
1730
1731 static __init int init_trace_selftests(void)
1732 {
1733         struct trace_selftests *p, *n;
1734         struct tracer *t, **last;
1735         int ret;
1736
1737         selftests_can_run = true;
1738
1739         mutex_lock(&trace_types_lock);
1740
1741         if (list_empty(&postponed_selftests))
1742                 goto out;
1743
1744         pr_info("Running postponed tracer tests:\n");
1745
1746         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1747                 /* This loop can take minutes when sanitizers are enabled, so
1748                  * lets make sure we allow RCU processing.
1749                  */
1750                 cond_resched();
1751                 ret = run_tracer_selftest(p->type);
1752                 /* If the test fails, then warn and remove from available_tracers */
1753                 if (ret < 0) {
1754                         WARN(1, "tracer: %s failed selftest, disabling\n",
1755                              p->type->name);
1756                         last = &trace_types;
1757                         for (t = trace_types; t; t = t->next) {
1758                                 if (t == p->type) {
1759                                         *last = t->next;
1760                                         break;
1761                                 }
1762                                 last = &t->next;
1763                         }
1764                 }
1765                 list_del(&p->list);
1766                 kfree(p);
1767         }
1768
1769  out:
1770         mutex_unlock(&trace_types_lock);
1771
1772         return 0;
1773 }
1774 core_initcall(init_trace_selftests);
1775 #else
1776 static inline int run_tracer_selftest(struct tracer *type)
1777 {
1778         return 0;
1779 }
1780 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1781
1782 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1783
1784 static void __init apply_trace_boot_options(void);
1785
1786 /**
1787  * register_tracer - register a tracer with the ftrace system.
1788  * @type: the plugin for the tracer
1789  *
1790  * Register a new plugin tracer.
1791  */
1792 int __init register_tracer(struct tracer *type)
1793 {
1794         struct tracer *t;
1795         int ret = 0;
1796
1797         if (!type->name) {
1798                 pr_info("Tracer must have a name\n");
1799                 return -1;
1800         }
1801
1802         if (strlen(type->name) >= MAX_TRACER_SIZE) {
1803                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1804                 return -1;
1805         }
1806
1807         mutex_lock(&trace_types_lock);
1808
1809         tracing_selftest_running = true;
1810
1811         for (t = trace_types; t; t = t->next) {
1812                 if (strcmp(type->name, t->name) == 0) {
1813                         /* already found */
1814                         pr_info("Tracer %s already registered\n",
1815                                 type->name);
1816                         ret = -1;
1817                         goto out;
1818                 }
1819         }
1820
1821         if (!type->set_flag)
1822                 type->set_flag = &dummy_set_flag;
1823         if (!type->flags) {
1824                 /*allocate a dummy tracer_flags*/
1825                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1826                 if (!type->flags) {
1827                         ret = -ENOMEM;
1828                         goto out;
1829                 }
1830                 type->flags->val = 0;
1831                 type->flags->opts = dummy_tracer_opt;
1832         } else
1833                 if (!type->flags->opts)
1834                         type->flags->opts = dummy_tracer_opt;
1835
1836         /* store the tracer for __set_tracer_option */
1837         type->flags->trace = type;
1838
1839         ret = run_tracer_selftest(type);
1840         if (ret < 0)
1841                 goto out;
1842
1843         type->next = trace_types;
1844         trace_types = type;
1845         add_tracer_options(&global_trace, type);
1846
1847  out:
1848         tracing_selftest_running = false;
1849         mutex_unlock(&trace_types_lock);
1850
1851         if (ret || !default_bootup_tracer)
1852                 goto out_unlock;
1853
1854         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1855                 goto out_unlock;
1856
1857         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1858         /* Do we want this tracer to start on bootup? */
1859         tracing_set_tracer(&global_trace, type->name);
1860         default_bootup_tracer = NULL;
1861
1862         apply_trace_boot_options();
1863
1864         /* disable other selftests, since this will break it. */
1865         tracing_selftest_disabled = true;
1866 #ifdef CONFIG_FTRACE_STARTUP_TEST
1867         printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1868                type->name);
1869 #endif
1870
1871  out_unlock:
1872         return ret;
1873 }
1874
1875 static void tracing_reset_cpu(struct trace_buffer *buf, int cpu)
1876 {
1877         struct ring_buffer *buffer = buf->buffer;
1878
1879         if (!buffer)
1880                 return;
1881
1882         ring_buffer_record_disable(buffer);
1883
1884         /* Make sure all commits have finished */
1885         synchronize_rcu();
1886         ring_buffer_reset_cpu(buffer, cpu);
1887
1888         ring_buffer_record_enable(buffer);
1889 }
1890
1891 void tracing_reset_online_cpus(struct trace_buffer *buf)
1892 {
1893         struct ring_buffer *buffer = buf->buffer;
1894         int cpu;
1895
1896         if (!buffer)
1897                 return;
1898
1899         ring_buffer_record_disable(buffer);
1900
1901         /* Make sure all commits have finished */
1902         synchronize_rcu();
1903
1904         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1905
1906         for_each_online_cpu(cpu)
1907                 ring_buffer_reset_cpu(buffer, cpu);
1908
1909         ring_buffer_record_enable(buffer);
1910 }
1911
1912 /* Must have trace_types_lock held */
1913 void tracing_reset_all_online_cpus(void)
1914 {
1915         struct trace_array *tr;
1916
1917         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1918                 if (!tr->clear_trace)
1919                         continue;
1920                 tr->clear_trace = false;
1921                 tracing_reset_online_cpus(&tr->trace_buffer);
1922 #ifdef CONFIG_TRACER_MAX_TRACE
1923                 tracing_reset_online_cpus(&tr->max_buffer);
1924 #endif
1925         }
1926 }
1927
1928 static int *tgid_map;
1929
1930 #define SAVED_CMDLINES_DEFAULT 128
1931 #define NO_CMDLINE_MAP UINT_MAX
1932 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1933 struct saved_cmdlines_buffer {
1934         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1935         unsigned *map_cmdline_to_pid;
1936         unsigned cmdline_num;
1937         int cmdline_idx;
1938         char *saved_cmdlines;
1939 };
1940 static struct saved_cmdlines_buffer *savedcmd;
1941
1942 /* temporary disable recording */
1943 static atomic_t trace_record_taskinfo_disabled __read_mostly;
1944
1945 static inline char *get_saved_cmdlines(int idx)
1946 {
1947         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1948 }
1949
1950 static inline void set_cmdline(int idx, const char *cmdline)
1951 {
1952         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1953 }
1954
1955 static int allocate_cmdlines_buffer(unsigned int val,
1956                                     struct saved_cmdlines_buffer *s)
1957 {
1958         s->map_cmdline_to_pid = kmalloc_array(val,
1959                                               sizeof(*s->map_cmdline_to_pid),
1960                                               GFP_KERNEL);
1961         if (!s->map_cmdline_to_pid)
1962                 return -ENOMEM;
1963
1964         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
1965         if (!s->saved_cmdlines) {
1966                 kfree(s->map_cmdline_to_pid);
1967                 return -ENOMEM;
1968         }
1969
1970         s->cmdline_idx = 0;
1971         s->cmdline_num = val;
1972         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1973                sizeof(s->map_pid_to_cmdline));
1974         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1975                val * sizeof(*s->map_cmdline_to_pid));
1976
1977         return 0;
1978 }
1979
1980 static int trace_create_savedcmd(void)
1981 {
1982         int ret;
1983
1984         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1985         if (!savedcmd)
1986                 return -ENOMEM;
1987
1988         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1989         if (ret < 0) {
1990                 kfree(savedcmd);
1991                 savedcmd = NULL;
1992                 return -ENOMEM;
1993         }
1994
1995         return 0;
1996 }
1997
1998 int is_tracing_stopped(void)
1999 {
2000         return global_trace.stop_count;
2001 }
2002
2003 /**
2004  * tracing_start - quick start of the tracer
2005  *
2006  * If tracing is enabled but was stopped by tracing_stop,
2007  * this will start the tracer back up.
2008  */
2009 void tracing_start(void)
2010 {
2011         struct ring_buffer *buffer;
2012         unsigned long flags;
2013
2014         if (tracing_disabled)
2015                 return;
2016
2017         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2018         if (--global_trace.stop_count) {
2019                 if (global_trace.stop_count < 0) {
2020                         /* Someone screwed up their debugging */
2021                         WARN_ON_ONCE(1);
2022                         global_trace.stop_count = 0;
2023                 }
2024                 goto out;
2025         }
2026
2027         /* Prevent the buffers from switching */
2028         arch_spin_lock(&global_trace.max_lock);
2029
2030         buffer = global_trace.trace_buffer.buffer;
2031         if (buffer)
2032                 ring_buffer_record_enable(buffer);
2033
2034 #ifdef CONFIG_TRACER_MAX_TRACE
2035         buffer = global_trace.max_buffer.buffer;
2036         if (buffer)
2037                 ring_buffer_record_enable(buffer);
2038 #endif
2039
2040         arch_spin_unlock(&global_trace.max_lock);
2041
2042  out:
2043         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2044 }
2045
2046 static void tracing_start_tr(struct trace_array *tr)
2047 {
2048         struct ring_buffer *buffer;
2049         unsigned long flags;
2050
2051         if (tracing_disabled)
2052                 return;
2053
2054         /* If global, we need to also start the max tracer */
2055         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2056                 return tracing_start();
2057
2058         raw_spin_lock_irqsave(&tr->start_lock, flags);
2059
2060         if (--tr->stop_count) {
2061                 if (tr->stop_count < 0) {
2062                         /* Someone screwed up their debugging */
2063                         WARN_ON_ONCE(1);
2064                         tr->stop_count = 0;
2065                 }
2066                 goto out;
2067         }
2068
2069         buffer = tr->trace_buffer.buffer;
2070         if (buffer)
2071                 ring_buffer_record_enable(buffer);
2072
2073  out:
2074         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2075 }
2076
2077 /**
2078  * tracing_stop - quick stop of the tracer
2079  *
2080  * Light weight way to stop tracing. Use in conjunction with
2081  * tracing_start.
2082  */
2083 void tracing_stop(void)
2084 {
2085         struct ring_buffer *buffer;
2086         unsigned long flags;
2087
2088         raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2089         if (global_trace.stop_count++)
2090                 goto out;
2091
2092         /* Prevent the buffers from switching */
2093         arch_spin_lock(&global_trace.max_lock);
2094
2095         buffer = global_trace.trace_buffer.buffer;
2096         if (buffer)
2097                 ring_buffer_record_disable(buffer);
2098
2099 #ifdef CONFIG_TRACER_MAX_TRACE
2100         buffer = global_trace.max_buffer.buffer;
2101         if (buffer)
2102                 ring_buffer_record_disable(buffer);
2103 #endif
2104
2105         arch_spin_unlock(&global_trace.max_lock);
2106
2107  out:
2108         raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2109 }
2110
2111 static void tracing_stop_tr(struct trace_array *tr)
2112 {
2113         struct ring_buffer *buffer;
2114         unsigned long flags;
2115
2116         /* If global, we need to also stop the max tracer */
2117         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2118                 return tracing_stop();
2119
2120         raw_spin_lock_irqsave(&tr->start_lock, flags);
2121         if (tr->stop_count++)
2122                 goto out;
2123
2124         buffer = tr->trace_buffer.buffer;
2125         if (buffer)
2126                 ring_buffer_record_disable(buffer);
2127
2128  out:
2129         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2130 }
2131
2132 static int trace_save_cmdline(struct task_struct *tsk)
2133 {
2134         unsigned pid, idx;
2135
2136         /* treat recording of idle task as a success */
2137         if (!tsk->pid)
2138                 return 1;
2139
2140         if (unlikely(tsk->pid > PID_MAX_DEFAULT))
2141                 return 0;
2142
2143         /*
2144          * It's not the end of the world if we don't get
2145          * the lock, but we also don't want to spin
2146          * nor do we want to disable interrupts,
2147          * so if we miss here, then better luck next time.
2148          */
2149         if (!arch_spin_trylock(&trace_cmdline_lock))
2150                 return 0;
2151
2152         idx = savedcmd->map_pid_to_cmdline[tsk->pid];
2153         if (idx == NO_CMDLINE_MAP) {
2154                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2155
2156                 /*
2157                  * Check whether the cmdline buffer at idx has a pid
2158                  * mapped. We are going to overwrite that entry so we
2159                  * need to clear the map_pid_to_cmdline. Otherwise we
2160                  * would read the new comm for the old pid.
2161                  */
2162                 pid = savedcmd->map_cmdline_to_pid[idx];
2163                 if (pid != NO_CMDLINE_MAP)
2164                         savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
2165
2166                 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2167                 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
2168
2169                 savedcmd->cmdline_idx = idx;
2170         }
2171
2172         set_cmdline(idx, tsk->comm);
2173
2174         arch_spin_unlock(&trace_cmdline_lock);
2175
2176         return 1;
2177 }
2178
2179 static void __trace_find_cmdline(int pid, char comm[])
2180 {
2181         unsigned map;
2182
2183         if (!pid) {
2184                 strcpy(comm, "<idle>");
2185                 return;
2186         }
2187
2188         if (WARN_ON_ONCE(pid < 0)) {
2189                 strcpy(comm, "<XXX>");
2190                 return;
2191         }
2192
2193         if (pid > PID_MAX_DEFAULT) {
2194                 strcpy(comm, "<...>");
2195                 return;
2196         }
2197
2198         map = savedcmd->map_pid_to_cmdline[pid];
2199         if (map != NO_CMDLINE_MAP)
2200                 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2201         else
2202                 strcpy(comm, "<...>");
2203 }
2204
2205 void trace_find_cmdline(int pid, char comm[])
2206 {
2207         preempt_disable();
2208         arch_spin_lock(&trace_cmdline_lock);
2209
2210         __trace_find_cmdline(pid, comm);
2211
2212         arch_spin_unlock(&trace_cmdline_lock);
2213         preempt_enable();
2214 }
2215
2216 int trace_find_tgid(int pid)
2217 {
2218         if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2219                 return 0;
2220
2221         return tgid_map[pid];
2222 }
2223
2224 static int trace_save_tgid(struct task_struct *tsk)
2225 {
2226         /* treat recording of idle task as a success */
2227         if (!tsk->pid)
2228                 return 1;
2229
2230         if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2231                 return 0;
2232
2233         tgid_map[tsk->pid] = tsk->tgid;
2234         return 1;
2235 }
2236
2237 static bool tracing_record_taskinfo_skip(int flags)
2238 {
2239         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2240                 return true;
2241         if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
2242                 return true;
2243         if (!__this_cpu_read(trace_taskinfo_save))
2244                 return true;
2245         return false;
2246 }
2247
2248 /**
2249  * tracing_record_taskinfo - record the task info of a task
2250  *
2251  * @task:  task to record
2252  * @flags: TRACE_RECORD_CMDLINE for recording comm
2253  *         TRACE_RECORD_TGID for recording tgid
2254  */
2255 void tracing_record_taskinfo(struct task_struct *task, int flags)
2256 {
2257         bool done;
2258
2259         if (tracing_record_taskinfo_skip(flags))
2260                 return;
2261
2262         /*
2263          * Record as much task information as possible. If some fail, continue
2264          * to try to record the others.
2265          */
2266         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2267         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2268
2269         /* If recording any information failed, retry again soon. */
2270         if (!done)
2271                 return;
2272
2273         __this_cpu_write(trace_taskinfo_save, false);
2274 }
2275
2276 /**
2277  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2278  *
2279  * @prev: previous task during sched_switch
2280  * @next: next task during sched_switch
2281  * @flags: TRACE_RECORD_CMDLINE for recording comm
2282  *         TRACE_RECORD_TGID for recording tgid
2283  */
2284 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2285                                           struct task_struct *next, int flags)
2286 {
2287         bool done;
2288
2289         if (tracing_record_taskinfo_skip(flags))
2290                 return;
2291
2292         /*
2293          * Record as much task information as possible. If some fail, continue
2294          * to try to record the others.
2295          */
2296         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2297         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2298         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2299         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2300
2301         /* If recording any information failed, retry again soon. */
2302         if (!done)
2303                 return;
2304
2305         __this_cpu_write(trace_taskinfo_save, false);
2306 }
2307
2308 /* Helpers to record a specific task information */
2309 void tracing_record_cmdline(struct task_struct *task)
2310 {
2311         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2312 }
2313
2314 void tracing_record_tgid(struct task_struct *task)
2315 {
2316         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2317 }
2318
2319 /*
2320  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2321  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2322  * simplifies those functions and keeps them in sync.
2323  */
2324 enum print_line_t trace_handle_return(struct trace_seq *s)
2325 {
2326         return trace_seq_has_overflowed(s) ?
2327                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2328 }
2329 EXPORT_SYMBOL_GPL(trace_handle_return);
2330
2331 void
2332 tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
2333                              unsigned long flags, int pc)
2334 {
2335         struct task_struct *tsk = current;
2336
2337         entry->preempt_count            = pc & 0xff;
2338         entry->pid                      = (tsk) ? tsk->pid : 0;
2339         entry->type                     = type;
2340         entry->flags =
2341 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2342                 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2343 #else
2344                 TRACE_FLAG_IRQS_NOSUPPORT |
2345 #endif
2346                 ((pc & NMI_MASK    ) ? TRACE_FLAG_NMI     : 0) |
2347                 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2348                 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2349                 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2350                 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2351 }
2352 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2353
2354 struct ring_buffer_event *
2355 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2356                           int type,
2357                           unsigned long len,
2358                           unsigned long flags, int pc)
2359 {
2360         return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2361 }
2362
2363 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2364 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2365 static int trace_buffered_event_ref;
2366
2367 /**
2368  * trace_buffered_event_enable - enable buffering events
2369  *
2370  * When events are being filtered, it is quicker to use a temporary
2371  * buffer to write the event data into if there's a likely chance
2372  * that it will not be committed. The discard of the ring buffer
2373  * is not as fast as committing, and is much slower than copying
2374  * a commit.
2375  *
2376  * When an event is to be filtered, allocate per cpu buffers to
2377  * write the event data into, and if the event is filtered and discarded
2378  * it is simply dropped, otherwise, the entire data is to be committed
2379  * in one shot.
2380  */
2381 void trace_buffered_event_enable(void)
2382 {
2383         struct ring_buffer_event *event;
2384         struct page *page;
2385         int cpu;
2386
2387         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2388
2389         if (trace_buffered_event_ref++)
2390                 return;
2391
2392         for_each_tracing_cpu(cpu) {
2393                 page = alloc_pages_node(cpu_to_node(cpu),
2394                                         GFP_KERNEL | __GFP_NORETRY, 0);
2395                 if (!page)
2396                         goto failed;
2397
2398                 event = page_address(page);
2399                 memset(event, 0, sizeof(*event));
2400
2401                 per_cpu(trace_buffered_event, cpu) = event;
2402
2403                 preempt_disable();
2404                 if (cpu == smp_processor_id() &&
2405                     this_cpu_read(trace_buffered_event) !=
2406                     per_cpu(trace_buffered_event, cpu))
2407                         WARN_ON_ONCE(1);
2408                 preempt_enable();
2409         }
2410
2411         return;
2412  failed:
2413         trace_buffered_event_disable();
2414 }
2415
2416 static void enable_trace_buffered_event(void *data)
2417 {
2418         /* Probably not needed, but do it anyway */
2419         smp_rmb();
2420         this_cpu_dec(trace_buffered_event_cnt);
2421 }
2422
2423 static void disable_trace_buffered_event(void *data)
2424 {
2425         this_cpu_inc(trace_buffered_event_cnt);
2426 }
2427
2428 /**
2429  * trace_buffered_event_disable - disable buffering events
2430  *
2431  * When a filter is removed, it is faster to not use the buffered
2432  * events, and to commit directly into the ring buffer. Free up
2433  * the temp buffers when there are no more users. This requires
2434  * special synchronization with current events.
2435  */
2436 void trace_buffered_event_disable(void)
2437 {
2438         int cpu;
2439
2440         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2441
2442         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2443                 return;
2444
2445         if (--trace_buffered_event_ref)
2446                 return;
2447
2448         preempt_disable();
2449         /* For each CPU, set the buffer as used. */
2450         smp_call_function_many(tracing_buffer_mask,
2451                                disable_trace_buffered_event, NULL, 1);
2452         preempt_enable();
2453
2454         /* Wait for all current users to finish */
2455         synchronize_rcu();
2456
2457         for_each_tracing_cpu(cpu) {
2458                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2459                 per_cpu(trace_buffered_event, cpu) = NULL;
2460         }
2461         /*
2462          * Make sure trace_buffered_event is NULL before clearing
2463          * trace_buffered_event_cnt.
2464          */
2465         smp_wmb();
2466
2467         preempt_disable();
2468         /* Do the work on each cpu */
2469         smp_call_function_many(tracing_buffer_mask,
2470                                enable_trace_buffered_event, NULL, 1);
2471         preempt_enable();
2472 }
2473
2474 static struct ring_buffer *temp_buffer;
2475
2476 struct ring_buffer_event *
2477 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2478                           struct trace_event_file *trace_file,
2479                           int type, unsigned long len,
2480                           unsigned long flags, int pc)
2481 {
2482         struct ring_buffer_event *entry;
2483         int val;
2484
2485         *current_rb = trace_file->tr->trace_buffer.buffer;
2486
2487         if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
2488              (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2489             (entry = this_cpu_read(trace_buffered_event))) {
2490                 /* Try to use the per cpu buffer first */
2491                 val = this_cpu_inc_return(trace_buffered_event_cnt);
2492                 if (val == 1) {
2493                         trace_event_setup(entry, type, flags, pc);
2494                         entry->array[0] = len;
2495                         return entry;
2496                 }
2497                 this_cpu_dec(trace_buffered_event_cnt);
2498         }
2499
2500         entry = __trace_buffer_lock_reserve(*current_rb,
2501                                             type, len, flags, pc);
2502         /*
2503          * If tracing is off, but we have triggers enabled
2504          * we still need to look at the event data. Use the temp_buffer
2505          * to store the trace event for the tigger to use. It's recusive
2506          * safe and will not be recorded anywhere.
2507          */
2508         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2509                 *current_rb = temp_buffer;
2510                 entry = __trace_buffer_lock_reserve(*current_rb,
2511                                                     type, len, flags, pc);
2512         }
2513         return entry;
2514 }
2515 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2516
2517 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2518 static DEFINE_MUTEX(tracepoint_printk_mutex);
2519
2520 static void output_printk(struct trace_event_buffer *fbuffer)
2521 {
2522         struct trace_event_call *event_call;
2523         struct trace_event *event;
2524         unsigned long flags;
2525         struct trace_iterator *iter = tracepoint_print_iter;
2526
2527         /* We should never get here if iter is NULL */
2528         if (WARN_ON_ONCE(!iter))
2529                 return;
2530
2531         event_call = fbuffer->trace_file->event_call;
2532         if (!event_call || !event_call->event.funcs ||
2533             !event_call->event.funcs->trace)
2534                 return;
2535
2536         event = &fbuffer->trace_file->event_call->event;
2537
2538         spin_lock_irqsave(&tracepoint_iter_lock, flags);
2539         trace_seq_init(&iter->seq);
2540         iter->ent = fbuffer->entry;
2541         event_call->event.funcs->trace(iter, 0, event);
2542         trace_seq_putc(&iter->seq, 0);
2543         printk("%s", iter->seq.buffer);
2544
2545         spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2546 }
2547
2548 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2549                              void __user *buffer, size_t *lenp,
2550                              loff_t *ppos)
2551 {
2552         int save_tracepoint_printk;
2553         int ret;
2554
2555         mutex_lock(&tracepoint_printk_mutex);
2556         save_tracepoint_printk = tracepoint_printk;
2557
2558         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2559
2560         /*
2561          * This will force exiting early, as tracepoint_printk
2562          * is always zero when tracepoint_printk_iter is not allocated
2563          */
2564         if (!tracepoint_print_iter)
2565                 tracepoint_printk = 0;
2566
2567         if (save_tracepoint_printk == tracepoint_printk)
2568                 goto out;
2569
2570         if (tracepoint_printk)
2571                 static_key_enable(&tracepoint_printk_key.key);
2572         else
2573                 static_key_disable(&tracepoint_printk_key.key);
2574
2575  out:
2576         mutex_unlock(&tracepoint_printk_mutex);
2577
2578         return ret;
2579 }
2580
2581 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2582 {
2583         if (static_key_false(&tracepoint_printk_key.key))
2584                 output_printk(fbuffer);
2585
2586         event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2587                                     fbuffer->event, fbuffer->entry,
2588                                     fbuffer->flags, fbuffer->pc);
2589 }
2590 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2591
2592 /*
2593  * Skip 3:
2594  *
2595  *   trace_buffer_unlock_commit_regs()
2596  *   trace_event_buffer_commit()
2597  *   trace_event_raw_event_xxx()
2598  */
2599 # define STACK_SKIP 3
2600
2601 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2602                                      struct ring_buffer *buffer,
2603                                      struct ring_buffer_event *event,
2604                                      unsigned long flags, int pc,
2605                                      struct pt_regs *regs)
2606 {
2607         __buffer_unlock_commit(buffer, event);
2608
2609         /*
2610          * If regs is not set, then skip the necessary functions.
2611          * Note, we can still get here via blktrace, wakeup tracer
2612          * and mmiotrace, but that's ok if they lose a function or
2613          * two. They are not that meaningful.
2614          */
2615         ftrace_trace_stack(tr, buffer, flags, regs ? 0 : STACK_SKIP, pc, regs);
2616         ftrace_trace_userstack(buffer, flags, pc);
2617 }
2618
2619 /*
2620  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2621  */
2622 void
2623 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2624                                    struct ring_buffer_event *event)
2625 {
2626         __buffer_unlock_commit(buffer, event);
2627 }
2628
2629 static void
2630 trace_process_export(struct trace_export *export,
2631                struct ring_buffer_event *event)
2632 {
2633         struct trace_entry *entry;
2634         unsigned int size = 0;
2635
2636         entry = ring_buffer_event_data(event);
2637         size = ring_buffer_event_length(event);
2638         export->write(export, entry, size);
2639 }
2640
2641 static DEFINE_MUTEX(ftrace_export_lock);
2642
2643 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2644
2645 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2646
2647 static inline void ftrace_exports_enable(void)
2648 {
2649         static_branch_enable(&ftrace_exports_enabled);
2650 }
2651
2652 static inline void ftrace_exports_disable(void)
2653 {
2654         static_branch_disable(&ftrace_exports_enabled);
2655 }
2656
2657 static void ftrace_exports(struct ring_buffer_event *event)
2658 {
2659         struct trace_export *export;
2660
2661         preempt_disable_notrace();
2662
2663         export = rcu_dereference_raw_check(ftrace_exports_list);
2664         while (export) {
2665                 trace_process_export(export, event);
2666                 export = rcu_dereference_raw_check(export->next);
2667         }
2668
2669         preempt_enable_notrace();
2670 }
2671
2672 static inline void
2673 add_trace_export(struct trace_export **list, struct trace_export *export)
2674 {
2675         rcu_assign_pointer(export->next, *list);
2676         /*
2677          * We are entering export into the list but another
2678          * CPU might be walking that list. We need to make sure
2679          * the export->next pointer is valid before another CPU sees
2680          * the export pointer included into the list.
2681          */
2682         rcu_assign_pointer(*list, export);
2683 }
2684
2685 static inline int
2686 rm_trace_export(struct trace_export **list, struct trace_export *export)
2687 {
2688         struct trace_export **p;
2689
2690         for (p = list; *p != NULL; p = &(*p)->next)
2691                 if (*p == export)
2692                         break;
2693
2694         if (*p != export)
2695                 return -1;
2696
2697         rcu_assign_pointer(*p, (*p)->next);
2698
2699         return 0;
2700 }
2701
2702 static inline void
2703 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2704 {
2705         if (*list == NULL)
2706                 ftrace_exports_enable();
2707
2708         add_trace_export(list, export);
2709 }
2710
2711 static inline int
2712 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2713 {
2714         int ret;
2715
2716         ret = rm_trace_export(list, export);
2717         if (*list == NULL)
2718                 ftrace_exports_disable();
2719
2720         return ret;
2721 }
2722
2723 int register_ftrace_export(struct trace_export *export)
2724 {
2725         if (WARN_ON_ONCE(!export->write))
2726                 return -1;
2727
2728         mutex_lock(&ftrace_export_lock);
2729
2730         add_ftrace_export(&ftrace_exports_list, export);
2731
2732         mutex_unlock(&ftrace_export_lock);
2733
2734         return 0;
2735 }
2736 EXPORT_SYMBOL_GPL(register_ftrace_export);
2737
2738 int unregister_ftrace_export(struct trace_export *export)
2739 {
2740         int ret;
2741
2742         mutex_lock(&ftrace_export_lock);
2743
2744         ret = rm_ftrace_export(&ftrace_exports_list, export);
2745
2746         mutex_unlock(&ftrace_export_lock);
2747
2748         return ret;
2749 }
2750 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2751
2752 void
2753 trace_function(struct trace_array *tr,
2754                unsigned long ip, unsigned long parent_ip, unsigned long flags,
2755                int pc)
2756 {
2757         struct trace_event_call *call = &event_function;
2758         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2759         struct ring_buffer_event *event;
2760         struct ftrace_entry *entry;
2761
2762         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2763                                             flags, pc);
2764         if (!event)
2765                 return;
2766         entry   = ring_buffer_event_data(event);
2767         entry->ip                       = ip;
2768         entry->parent_ip                = parent_ip;
2769
2770         if (!call_filter_check_discard(call, entry, buffer, event)) {
2771                 if (static_branch_unlikely(&ftrace_exports_enabled))
2772                         ftrace_exports(event);
2773                 __buffer_unlock_commit(buffer, event);
2774         }
2775 }
2776
2777 #ifdef CONFIG_STACKTRACE
2778
2779 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2780 #define FTRACE_KSTACK_NESTING   4
2781
2782 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2783
2784 struct ftrace_stack {
2785         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2786 };
2787
2788
2789 struct ftrace_stacks {
2790         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2791 };
2792
2793 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2794 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2795
2796 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2797                                  unsigned long flags,
2798                                  int skip, int pc, struct pt_regs *regs)
2799 {
2800         struct trace_event_call *call = &event_kernel_stack;
2801         struct ring_buffer_event *event;
2802         unsigned int size, nr_entries;
2803         struct ftrace_stack *fstack;
2804         struct stack_entry *entry;
2805         int stackidx;
2806
2807         /*
2808          * Add one, for this function and the call to save_stack_trace()
2809          * If regs is set, then these functions will not be in the way.
2810          */
2811 #ifndef CONFIG_UNWINDER_ORC
2812         if (!regs)
2813                 skip++;
2814 #endif
2815
2816         /*
2817          * Since events can happen in NMIs there's no safe way to
2818          * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2819          * or NMI comes in, it will just have to use the default
2820          * FTRACE_STACK_SIZE.
2821          */
2822         preempt_disable_notrace();
2823
2824         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2825
2826         /* This should never happen. If it does, yell once and skip */
2827         if (WARN_ON_ONCE(stackidx > FTRACE_KSTACK_NESTING))
2828                 goto out;
2829
2830         /*
2831          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2832          * interrupt will either see the value pre increment or post
2833          * increment. If the interrupt happens pre increment it will have
2834          * restored the counter when it returns.  We just need a barrier to
2835          * keep gcc from moving things around.
2836          */
2837         barrier();
2838
2839         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2840         size = ARRAY_SIZE(fstack->calls);
2841
2842         if (regs) {
2843                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2844                                                    size, skip);
2845         } else {
2846                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2847         }
2848
2849         size = nr_entries * sizeof(unsigned long);
2850         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2851                                             sizeof(*entry) + size, flags, pc);
2852         if (!event)
2853                 goto out;
2854         entry = ring_buffer_event_data(event);
2855
2856         memcpy(&entry->caller, fstack->calls, size);
2857         entry->size = nr_entries;
2858
2859         if (!call_filter_check_discard(call, entry, buffer, event))
2860                 __buffer_unlock_commit(buffer, event);
2861
2862  out:
2863         /* Again, don't let gcc optimize things here */
2864         barrier();
2865         __this_cpu_dec(ftrace_stack_reserve);
2866         preempt_enable_notrace();
2867
2868 }
2869
2870 static inline void ftrace_trace_stack(struct trace_array *tr,
2871                                       struct ring_buffer *buffer,
2872                                       unsigned long flags,
2873                                       int skip, int pc, struct pt_regs *regs)
2874 {
2875         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2876                 return;
2877
2878         __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2879 }
2880
2881 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2882                    int pc)
2883 {
2884         struct ring_buffer *buffer = tr->trace_buffer.buffer;
2885
2886         if (rcu_is_watching()) {
2887                 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2888                 return;
2889         }
2890
2891         /*
2892          * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2893          * but if the above rcu_is_watching() failed, then the NMI
2894          * triggered someplace critical, and rcu_irq_enter() should
2895          * not be called from NMI.
2896          */
2897         if (unlikely(in_nmi()))
2898                 return;
2899
2900         rcu_irq_enter_irqson();
2901         __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2902         rcu_irq_exit_irqson();
2903 }
2904
2905 /**
2906  * trace_dump_stack - record a stack back trace in the trace buffer
2907  * @skip: Number of functions to skip (helper handlers)
2908  */
2909 void trace_dump_stack(int skip)
2910 {
2911         unsigned long flags;
2912
2913         if (tracing_disabled || tracing_selftest_running)
2914                 return;
2915
2916         local_save_flags(flags);
2917
2918 #ifndef CONFIG_UNWINDER_ORC
2919         /* Skip 1 to skip this function. */
2920         skip++;
2921 #endif
2922         __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2923                              flags, skip, preempt_count(), NULL);
2924 }
2925 EXPORT_SYMBOL_GPL(trace_dump_stack);
2926
2927 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
2928 static DEFINE_PER_CPU(int, user_stack_count);
2929
2930 static void
2931 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2932 {
2933         struct trace_event_call *call = &event_user_stack;
2934         struct ring_buffer_event *event;
2935         struct userstack_entry *entry;
2936
2937         if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2938                 return;
2939
2940         /*
2941          * NMIs can not handle page faults, even with fix ups.
2942          * The save user stack can (and often does) fault.
2943          */
2944         if (unlikely(in_nmi()))
2945                 return;
2946
2947         /*
2948          * prevent recursion, since the user stack tracing may
2949          * trigger other kernel events.
2950          */
2951         preempt_disable();
2952         if (__this_cpu_read(user_stack_count))
2953                 goto out;
2954
2955         __this_cpu_inc(user_stack_count);
2956
2957         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2958                                             sizeof(*entry), flags, pc);
2959         if (!event)
2960                 goto out_drop_count;
2961         entry   = ring_buffer_event_data(event);
2962
2963         entry->tgid             = current->tgid;
2964         memset(&entry->caller, 0, sizeof(entry->caller));
2965
2966         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
2967         if (!call_filter_check_discard(call, entry, buffer, event))
2968                 __buffer_unlock_commit(buffer, event);
2969
2970  out_drop_count:
2971         __this_cpu_dec(user_stack_count);
2972  out:
2973         preempt_enable();
2974 }
2975 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
2976 static void ftrace_trace_userstack(struct ring_buffer *buffer,
2977                                    unsigned long flags, int pc)
2978 {
2979 }
2980 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
2981
2982 #endif /* CONFIG_STACKTRACE */
2983
2984 /* created for use with alloc_percpu */
2985 struct trace_buffer_struct {
2986         int nesting;
2987         char buffer[4][TRACE_BUF_SIZE];
2988 };
2989
2990 static struct trace_buffer_struct *trace_percpu_buffer;
2991
2992 /*
2993  * Thise allows for lockless recording.  If we're nested too deeply, then
2994  * this returns NULL.
2995  */
2996 static char *get_trace_buf(void)
2997 {
2998         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2999
3000         if (!buffer || buffer->nesting >= 4)
3001                 return NULL;
3002
3003         buffer->nesting++;
3004
3005         /* Interrupts must see nesting incremented before we use the buffer */
3006         barrier();
3007         return &buffer->buffer[buffer->nesting][0];
3008 }
3009
3010 static void put_trace_buf(void)
3011 {
3012         /* Don't let the decrement of nesting leak before this */
3013         barrier();
3014         this_cpu_dec(trace_percpu_buffer->nesting);
3015 }
3016
3017 static int alloc_percpu_trace_buffer(void)
3018 {
3019         struct trace_buffer_struct *buffers;
3020
3021         buffers = alloc_percpu(struct trace_buffer_struct);
3022         if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
3023                 return -ENOMEM;
3024
3025         trace_percpu_buffer = buffers;
3026         return 0;
3027 }
3028
3029 static int buffers_allocated;
3030
3031 void trace_printk_init_buffers(void)
3032 {
3033         if (buffers_allocated)
3034                 return;
3035
3036         if (alloc_percpu_trace_buffer())
3037                 return;
3038
3039         /* trace_printk() is for debug use only. Don't use it in production. */
3040
3041         pr_warn("\n");
3042         pr_warn("**********************************************************\n");
3043         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3044         pr_warn("**                                                      **\n");
3045         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3046         pr_warn("**                                                      **\n");
3047         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3048         pr_warn("** unsafe for production use.                           **\n");
3049         pr_warn("**                                                      **\n");
3050         pr_warn("** If you see this message and you are not debugging    **\n");
3051         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3052         pr_warn("**                                                      **\n");
3053         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3054         pr_warn("**********************************************************\n");
3055
3056         /* Expand the buffers to set size */
3057         tracing_update_buffers();
3058
3059         buffers_allocated = 1;
3060
3061         /*
3062          * trace_printk_init_buffers() can be called by modules.
3063          * If that happens, then we need to start cmdline recording
3064          * directly here. If the global_trace.buffer is already
3065          * allocated here, then this was called by module code.
3066          */
3067         if (global_trace.trace_buffer.buffer)
3068                 tracing_start_cmdline_record();
3069 }
3070 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3071
3072 void trace_printk_start_comm(void)
3073 {
3074         /* Start tracing comms if trace printk is set */
3075         if (!buffers_allocated)
3076                 return;
3077         tracing_start_cmdline_record();
3078 }
3079
3080 static void trace_printk_start_stop_comm(int enabled)
3081 {
3082         if (!buffers_allocated)
3083                 return;
3084
3085         if (enabled)
3086                 tracing_start_cmdline_record();
3087         else
3088                 tracing_stop_cmdline_record();
3089 }
3090
3091 /**
3092  * trace_vbprintk - write binary msg to tracing buffer
3093  * @ip:    The address of the caller
3094  * @fmt:   The string format to write to the buffer
3095  * @args:  Arguments for @fmt
3096  */
3097 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3098 {
3099         struct trace_event_call *call = &event_bprint;
3100         struct ring_buffer_event *event;
3101         struct ring_buffer *buffer;
3102         struct trace_array *tr = &global_trace;
3103         struct bprint_entry *entry;
3104         unsigned long flags;
3105         char *tbuffer;
3106         int len = 0, size, pc;
3107
3108         if (unlikely(tracing_selftest_running || tracing_disabled))
3109                 return 0;
3110
3111         /* Don't pollute graph traces with trace_vprintk internals */
3112         pause_graph_tracing();
3113
3114         pc = preempt_count();
3115         preempt_disable_notrace();
3116
3117         tbuffer = get_trace_buf();
3118         if (!tbuffer) {
3119                 len = 0;
3120                 goto out_nobuffer;
3121         }
3122
3123         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3124
3125         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3126                 goto out;
3127
3128         local_save_flags(flags);
3129         size = sizeof(*entry) + sizeof(u32) * len;
3130         buffer = tr->trace_buffer.buffer;
3131         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3132                                             flags, pc);
3133         if (!event)
3134                 goto out;
3135         entry = ring_buffer_event_data(event);
3136         entry->ip                       = ip;
3137         entry->fmt                      = fmt;
3138
3139         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3140         if (!call_filter_check_discard(call, entry, buffer, event)) {
3141                 __buffer_unlock_commit(buffer, event);
3142                 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
3143         }
3144
3145 out:
3146         put_trace_buf();
3147
3148 out_nobuffer:
3149         preempt_enable_notrace();
3150         unpause_graph_tracing();
3151
3152         return len;
3153 }
3154 EXPORT_SYMBOL_GPL(trace_vbprintk);
3155
3156 __printf(3, 0)
3157 static int
3158 __trace_array_vprintk(struct ring_buffer *buffer,
3159                       unsigned long ip, const char *fmt, va_list args)
3160 {
3161         struct trace_event_call *call = &event_print;
3162         struct ring_buffer_event *event;
3163         int len = 0, size, pc;
3164         struct print_entry *entry;
3165         unsigned long flags;
3166         char *tbuffer;
3167
3168         if (tracing_disabled || tracing_selftest_running)
3169                 return 0;
3170
3171         /* Don't pollute graph traces with trace_vprintk internals */
3172         pause_graph_tracing();
3173
3174         pc = preempt_count();
3175         preempt_disable_notrace();
3176
3177
3178         tbuffer = get_trace_buf();
3179         if (!tbuffer) {
3180                 len = 0;
3181                 goto out_nobuffer;
3182         }
3183
3184         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3185
3186         local_save_flags(flags);
3187         size = sizeof(*entry) + len + 1;
3188         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3189                                             flags, pc);
3190         if (!event)
3191                 goto out;
3192         entry = ring_buffer_event_data(event);
3193         entry->ip = ip;
3194
3195         memcpy(&entry->buf, tbuffer, len + 1);
3196         if (!call_filter_check_discard(call, entry, buffer, event)) {
3197                 __buffer_unlock_commit(buffer, event);
3198                 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
3199         }
3200
3201 out:
3202         put_trace_buf();
3203
3204 out_nobuffer:
3205         preempt_enable_notrace();
3206         unpause_graph_tracing();
3207
3208         return len;
3209 }
3210
3211 __printf(3, 0)
3212 int trace_array_vprintk(struct trace_array *tr,
3213                         unsigned long ip, const char *fmt, va_list args)
3214 {
3215         return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
3216 }
3217
3218 __printf(3, 0)
3219 int trace_array_printk(struct trace_array *tr,
3220                        unsigned long ip, const char *fmt, ...)
3221 {
3222         int ret;
3223         va_list ap;
3224
3225         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3226                 return 0;
3227
3228         va_start(ap, fmt);
3229         ret = trace_array_vprintk(tr, ip, fmt, ap);
3230         va_end(ap);
3231         return ret;
3232 }
3233 EXPORT_SYMBOL_GPL(trace_array_printk);
3234
3235 __printf(3, 4)
3236 int trace_array_printk_buf(struct ring_buffer *buffer,
3237                            unsigned long ip, const char *fmt, ...)
3238 {
3239         int ret;
3240         va_list ap;
3241
3242         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3243                 return 0;
3244
3245         va_start(ap, fmt);
3246         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3247         va_end(ap);
3248         return ret;
3249 }
3250
3251 __printf(2, 0)
3252 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3253 {
3254         return trace_array_vprintk(&global_trace, ip, fmt, args);
3255 }
3256 EXPORT_SYMBOL_GPL(trace_vprintk);
3257
3258 static void trace_iterator_increment(struct trace_iterator *iter)
3259 {
3260         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3261
3262         iter->idx++;
3263         if (buf_iter)
3264                 ring_buffer_read(buf_iter, NULL);
3265 }
3266
3267 static struct trace_entry *
3268 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3269                 unsigned long *lost_events)
3270 {
3271         struct ring_buffer_event *event;
3272         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3273
3274         if (buf_iter)
3275                 event = ring_buffer_iter_peek(buf_iter, ts);
3276         else
3277                 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
3278                                          lost_events);
3279
3280         if (event) {
3281                 iter->ent_size = ring_buffer_event_length(event);
3282                 return ring_buffer_event_data(event);
3283         }
3284         iter->ent_size = 0;
3285         return NULL;
3286 }
3287
3288 static struct trace_entry *
3289 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3290                   unsigned long *missing_events, u64 *ent_ts)
3291 {
3292         struct ring_buffer *buffer = iter->trace_buffer->buffer;
3293         struct trace_entry *ent, *next = NULL;
3294         unsigned long lost_events = 0, next_lost = 0;
3295         int cpu_file = iter->cpu_file;
3296         u64 next_ts = 0, ts;
3297         int next_cpu = -1;
3298         int next_size = 0;
3299         int cpu;
3300
3301         /*
3302          * If we are in a per_cpu trace file, don't bother by iterating over
3303          * all cpu and peek directly.
3304          */
3305         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3306                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3307                         return NULL;
3308                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3309                 if (ent_cpu)
3310                         *ent_cpu = cpu_file;
3311
3312                 return ent;
3313         }
3314
3315         for_each_tracing_cpu(cpu) {
3316
3317                 if (ring_buffer_empty_cpu(buffer, cpu))
3318                         continue;
3319
3320                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3321
3322                 /*
3323                  * Pick the entry with the smallest timestamp:
3324                  */
3325                 if (ent && (!next || ts < next_ts)) {
3326                         next = ent;
3327                         next_cpu = cpu;
3328                         next_ts = ts;
3329                         next_lost = lost_events;
3330                         next_size = iter->ent_size;
3331                 }
3332         }
3333
3334         iter->ent_size = next_size;
3335
3336         if (ent_cpu)
3337                 *ent_cpu = next_cpu;
3338
3339         if (ent_ts)
3340                 *ent_ts = next_ts;
3341
3342         if (missing_events)
3343                 *missing_events = next_lost;
3344
3345         return next;
3346 }
3347
3348 /* Find the next real entry, without updating the iterator itself */
3349 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3350                                           int *ent_cpu, u64 *ent_ts)
3351 {
3352         return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3353 }
3354
3355 /* Find the next real entry, and increment the iterator to the next entry */
3356 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3357 {
3358         iter->ent = __find_next_entry(iter, &iter->cpu,
3359                                       &iter->lost_events, &iter->ts);
3360
3361         if (iter->ent)
3362                 trace_iterator_increment(iter);
3363
3364         return iter->ent ? iter : NULL;
3365 }
3366
3367 static void trace_consume(struct trace_iterator *iter)
3368 {
3369         ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3370                             &iter->lost_events);
3371 }
3372
3373 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3374 {
3375         struct trace_iterator *iter = m->private;
3376         int i = (int)*pos;
3377         void *ent;
3378
3379         WARN_ON_ONCE(iter->leftover);
3380
3381         (*pos)++;
3382
3383         /* can't go backwards */
3384         if (iter->idx > i)
3385                 return NULL;
3386
3387         if (iter->idx < 0)
3388                 ent = trace_find_next_entry_inc(iter);
3389         else
3390                 ent = iter;
3391
3392         while (ent && iter->idx < i)
3393                 ent = trace_find_next_entry_inc(iter);
3394
3395         iter->pos = *pos;
3396
3397         return ent;
3398 }
3399
3400 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3401 {
3402         struct ring_buffer_event *event;
3403         struct ring_buffer_iter *buf_iter;
3404         unsigned long entries = 0;
3405         u64 ts;
3406
3407         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3408
3409         buf_iter = trace_buffer_iter(iter, cpu);
3410         if (!buf_iter)
3411                 return;
3412
3413         ring_buffer_iter_reset(buf_iter);
3414
3415         /*
3416          * We could have the case with the max latency tracers
3417          * that a reset never took place on a cpu. This is evident
3418          * by the timestamp being before the start of the buffer.
3419          */
3420         while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3421                 if (ts >= iter->trace_buffer->time_start)
3422                         break;
3423                 entries++;
3424                 ring_buffer_read(buf_iter, NULL);
3425         }
3426
3427         per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3428 }
3429
3430 /*
3431  * The current tracer is copied to avoid a global locking
3432  * all around.
3433  */
3434 static void *s_start(struct seq_file *m, loff_t *pos)
3435 {
3436         struct trace_iterator *iter = m->private;
3437         struct trace_array *tr = iter->tr;
3438         int cpu_file = iter->cpu_file;
3439         void *p = NULL;
3440         loff_t l = 0;
3441         int cpu;
3442
3443         /*
3444          * copy the tracer to avoid using a global lock all around.
3445          * iter->trace is a copy of current_trace, the pointer to the
3446          * name may be used instead of a strcmp(), as iter->trace->name
3447          * will point to the same string as current_trace->name.
3448          */
3449         mutex_lock(&trace_types_lock);
3450         if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3451                 *iter->trace = *tr->current_trace;
3452         mutex_unlock(&trace_types_lock);
3453
3454 #ifdef CONFIG_TRACER_MAX_TRACE
3455         if (iter->snapshot && iter->trace->use_max_tr)
3456                 return ERR_PTR(-EBUSY);
3457 #endif
3458
3459         if (!iter->snapshot)
3460                 atomic_inc(&trace_record_taskinfo_disabled);
3461
3462         if (*pos != iter->pos) {
3463                 iter->ent = NULL;
3464                 iter->cpu = 0;
3465                 iter->idx = -1;
3466
3467                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3468                         for_each_tracing_cpu(cpu)
3469                                 tracing_iter_reset(iter, cpu);
3470                 } else
3471                         tracing_iter_reset(iter, cpu_file);
3472
3473                 iter->leftover = 0;
3474                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3475                         ;
3476
3477         } else {
3478                 /*
3479                  * If we overflowed the seq_file before, then we want
3480                  * to just reuse the trace_seq buffer again.
3481                  */
3482                 if (iter->leftover)
3483                         p = iter;
3484                 else {
3485                         l = *pos - 1;
3486                         p = s_next(m, p, &l);
3487                 }
3488         }
3489
3490         trace_event_read_lock();
3491         trace_access_lock(cpu_file);
3492         return p;
3493 }
3494
3495 static void s_stop(struct seq_file *m, void *p)
3496 {
3497         struct trace_iterator *iter = m->private;
3498
3499 #ifdef CONFIG_TRACER_MAX_TRACE
3500         if (iter->snapshot && iter->trace->use_max_tr)
3501                 return;
3502 #endif
3503
3504         if (!iter->snapshot)
3505                 atomic_dec(&trace_record_taskinfo_disabled);
3506
3507         trace_access_unlock(iter->cpu_file);
3508         trace_event_read_unlock();
3509 }
3510
3511 static void
3512 get_total_entries_cpu(struct trace_buffer *buf, unsigned long *total,
3513                       unsigned long *entries, int cpu)
3514 {
3515         unsigned long count;
3516
3517         count = ring_buffer_entries_cpu(buf->buffer, cpu);
3518         /*
3519          * If this buffer has skipped entries, then we hold all
3520          * entries for the trace and we need to ignore the
3521          * ones before the time stamp.
3522          */
3523         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3524                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3525                 /* total is the same as the entries */
3526                 *total = count;
3527         } else
3528                 *total = count +
3529                         ring_buffer_overrun_cpu(buf->buffer, cpu);
3530         *entries = count;
3531 }
3532
3533 static void
3534 get_total_entries(struct trace_buffer *buf,
3535                   unsigned long *total, unsigned long *entries)
3536 {
3537         unsigned long t, e;
3538         int cpu;
3539
3540         *total = 0;
3541         *entries = 0;
3542
3543         for_each_tracing_cpu(cpu) {
3544                 get_total_entries_cpu(buf, &t, &e, cpu);
3545                 *total += t;
3546                 *entries += e;
3547         }
3548 }
3549
3550 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
3551 {
3552         unsigned long total, entries;
3553
3554         if (!tr)
3555                 tr = &global_trace;
3556
3557         get_total_entries_cpu(&tr->trace_buffer, &total, &entries, cpu);
3558
3559         return entries;
3560 }
3561
3562 unsigned long trace_total_entries(struct trace_array *tr)
3563 {
3564         unsigned long total, entries;
3565
3566         if (!tr)
3567                 tr = &global_trace;
3568
3569         get_total_entries(&tr->trace_buffer, &total, &entries);
3570
3571         return entries;
3572 }
3573
3574 static void print_lat_help_header(struct seq_file *m)
3575 {
3576         seq_puts(m, "#                  _------=> CPU#            \n"
3577                     "#                 / _-----=> irqs-off        \n"
3578                     "#                | / _----=> need-resched    \n"
3579                     "#                || / _---=> hardirq/softirq \n"
3580                     "#                ||| / _--=> preempt-depth   \n"
3581                     "#                |||| /     delay            \n"
3582                     "#  cmd     pid   ||||| time  |   caller      \n"
3583                     "#     \\   /      |||||  \\    |   /         \n");
3584 }
3585
3586 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3587 {
3588         unsigned long total;
3589         unsigned long entries;
3590
3591         get_total_entries(buf, &total, &entries);
3592         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
3593                    entries, total, num_online_cpus());
3594         seq_puts(m, "#\n");
3595 }
3596
3597 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m,
3598                                    unsigned int flags)
3599 {
3600         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3601
3602         print_event_info(buf, m);
3603
3604         seq_printf(m, "#           TASK-PID   %s  CPU#   TIMESTAMP  FUNCTION\n", tgid ? "TGID     " : "");
3605         seq_printf(m, "#              | |     %s    |       |         |\n",      tgid ? "  |      " : "");
3606 }
3607
3608 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m,
3609                                        unsigned int flags)
3610 {
3611         bool tgid = flags & TRACE_ITER_RECORD_TGID;
3612         const char *space = "          ";
3613         int prec = tgid ? 10 : 2;
3614
3615         print_event_info(buf, m);
3616
3617         seq_printf(m, "#                          %.*s  _-----=> irqs-off\n", prec, space);
3618         seq_printf(m, "#                          %.*s / _----=> need-resched\n", prec, space);
3619         seq_printf(m, "#                          %.*s| / _---=> hardirq/softirq\n", prec, space);
3620         seq_printf(m, "#                          %.*s|| / _--=> preempt-depth\n", prec, space);
3621         seq_printf(m, "#                          %.*s||| /     delay\n", prec, space);
3622         seq_printf(m, "#           TASK-PID %.*sCPU#  ||||    TIMESTAMP  FUNCTION\n", prec, "   TGID   ");
3623         seq_printf(m, "#              | |   %.*s  |   ||||       |         |\n", prec, "     |    ");
3624 }
3625
3626 void
3627 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3628 {
3629         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3630         struct trace_buffer *buf = iter->trace_buffer;
3631         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3632         struct tracer *type = iter->trace;
3633         unsigned long entries;
3634         unsigned long total;
3635         const char *name = "preemption";
3636
3637         name = type->name;
3638
3639         get_total_entries(buf, &total, &entries);
3640
3641         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3642                    name, UTS_RELEASE);
3643         seq_puts(m, "# -----------------------------------"
3644                  "---------------------------------\n");
3645         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3646                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3647                    nsecs_to_usecs(data->saved_latency),
3648                    entries,
3649                    total,
3650                    buf->cpu,
3651 #if defined(CONFIG_PREEMPT_NONE)
3652                    "server",
3653 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3654                    "desktop",
3655 #elif defined(CONFIG_PREEMPT)
3656                    "preempt",
3657 #else
3658                    "unknown",
3659 #endif
3660                    /* These are reserved for later use */
3661                    0, 0, 0, 0);
3662 #ifdef CONFIG_SMP
3663         seq_printf(m, " #P:%d)\n", num_online_cpus());
3664 #else
3665         seq_puts(m, ")\n");
3666 #endif
3667         seq_puts(m, "#    -----------------\n");
3668         seq_printf(m, "#    | task: %.16s-%d "
3669                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3670                    data->comm, data->pid,
3671                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3672                    data->policy, data->rt_priority);
3673         seq_puts(m, "#    -----------------\n");
3674
3675         if (data->critical_start) {
3676                 seq_puts(m, "#  => started at: ");
3677                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3678                 trace_print_seq(m, &iter->seq);
3679                 seq_puts(m, "\n#  => ended at:   ");
3680                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3681                 trace_print_seq(m, &iter->seq);
3682                 seq_puts(m, "\n#\n");
3683         }
3684
3685         seq_puts(m, "#\n");
3686 }
3687
3688 static void test_cpu_buff_start(struct trace_iterator *iter)
3689 {
3690         struct trace_seq *s = &iter->seq;
3691         struct trace_array *tr = iter->tr;
3692
3693         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3694                 return;
3695
3696         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3697                 return;
3698
3699         if (cpumask_available(iter->started) &&
3700             cpumask_test_cpu(iter->cpu, iter->started))
3701                 return;
3702
3703         if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3704                 return;
3705
3706         if (cpumask_available(iter->started))
3707                 cpumask_set_cpu(iter->cpu, iter->started);
3708
3709         /* Don't print started cpu buffer for the first entry of the trace */
3710         if (iter->idx > 1)
3711                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3712                                 iter->cpu);
3713 }
3714
3715 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3716 {
3717         struct trace_array *tr = iter->tr;
3718         struct trace_seq *s = &iter->seq;
3719         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3720         struct trace_entry *entry;
3721         struct trace_event *event;
3722
3723         entry = iter->ent;
3724
3725         test_cpu_buff_start(iter);
3726
3727         event = ftrace_find_event(entry->type);
3728
3729         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3730                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3731                         trace_print_lat_context(iter);
3732                 else
3733                         trace_print_context(iter);
3734         }
3735
3736         if (trace_seq_has_overflowed(s))
3737                 return TRACE_TYPE_PARTIAL_LINE;
3738
3739         if (event)
3740                 return event->funcs->trace(iter, sym_flags, event);
3741
3742         trace_seq_printf(s, "Unknown type %d\n", entry->type);
3743
3744         return trace_handle_return(s);
3745 }
3746
3747 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3748 {
3749         struct trace_array *tr = iter->tr;
3750         struct trace_seq *s = &iter->seq;
3751         struct trace_entry *entry;
3752         struct trace_event *event;
3753
3754         entry = iter->ent;
3755
3756         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3757                 trace_seq_printf(s, "%d %d %llu ",
3758                                  entry->pid, iter->cpu, iter->ts);
3759
3760         if (trace_seq_has_overflowed(s))
3761                 return TRACE_TYPE_PARTIAL_LINE;
3762
3763         event = ftrace_find_event(entry->type);
3764         if (event)
3765                 return event->funcs->raw(iter, 0, event);
3766
3767         trace_seq_printf(s, "%d ?\n", entry->type);
3768
3769         return trace_handle_return(s);
3770 }
3771
3772 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3773 {
3774         struct trace_array *tr = iter->tr;
3775         struct trace_seq *s = &iter->seq;
3776         unsigned char newline = '\n';
3777         struct trace_entry *entry;
3778         struct trace_event *event;
3779
3780         entry = iter->ent;
3781
3782         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3783                 SEQ_PUT_HEX_FIELD(s, entry->pid);
3784                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3785                 SEQ_PUT_HEX_FIELD(s, iter->ts);
3786                 if (trace_seq_has_overflowed(s))
3787                         return TRACE_TYPE_PARTIAL_LINE;
3788         }
3789
3790         event = ftrace_find_event(entry->type);
3791         if (event) {
3792                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3793                 if (ret != TRACE_TYPE_HANDLED)
3794                         return ret;
3795         }
3796
3797         SEQ_PUT_FIELD(s, newline);
3798
3799         return trace_handle_return(s);
3800 }
3801
3802 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3803 {
3804         struct trace_array *tr = iter->tr;
3805         struct trace_seq *s = &iter->seq;
3806         struct trace_entry *entry;
3807         struct trace_event *event;
3808
3809         entry = iter->ent;
3810
3811         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3812                 SEQ_PUT_FIELD(s, entry->pid);
3813                 SEQ_PUT_FIELD(s, iter->cpu);
3814                 SEQ_PUT_FIELD(s, iter->ts);
3815                 if (trace_seq_has_overflowed(s))
3816                         return TRACE_TYPE_PARTIAL_LINE;
3817         }
3818
3819         event = ftrace_find_event(entry->type);
3820         return event ? event->funcs->binary(iter, 0, event) :
3821                 TRACE_TYPE_HANDLED;
3822 }
3823
3824 int trace_empty(struct trace_iterator *iter)
3825 {
3826         struct ring_buffer_iter *buf_iter;
3827         int cpu;
3828
3829         /* If we are looking at one CPU buffer, only check that one */
3830         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3831                 cpu = iter->cpu_file;
3832                 buf_iter = trace_buffer_iter(iter, cpu);
3833                 if (buf_iter) {
3834                         if (!ring_buffer_iter_empty(buf_iter))
3835                                 return 0;
3836                 } else {
3837                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3838                                 return 0;
3839                 }
3840                 return 1;
3841         }
3842
3843         for_each_tracing_cpu(cpu) {
3844                 buf_iter = trace_buffer_iter(iter, cpu);
3845                 if (buf_iter) {
3846                         if (!ring_buffer_iter_empty(buf_iter))
3847                                 return 0;
3848                 } else {
3849                         if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3850                                 return 0;
3851                 }
3852         }
3853
3854         return 1;
3855 }
3856
3857 /*  Called with trace_event_read_lock() held. */
3858 enum print_line_t print_trace_line(struct trace_iterator *iter)
3859 {
3860         struct trace_array *tr = iter->tr;
3861         unsigned long trace_flags = tr->trace_flags;
3862         enum print_line_t ret;
3863
3864         if (iter->lost_events) {
3865                 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3866                                  iter->cpu, iter->lost_events);
3867                 if (trace_seq_has_overflowed(&iter->seq))
3868                         return TRACE_TYPE_PARTIAL_LINE;
3869         }
3870
3871         if (iter->trace && iter->trace->print_line) {
3872                 ret = iter->trace->print_line(iter);
3873                 if (ret != TRACE_TYPE_UNHANDLED)
3874                         return ret;
3875         }
3876
3877         if (iter->ent->type == TRACE_BPUTS &&
3878                         trace_flags & TRACE_ITER_PRINTK &&
3879                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3880                 return trace_print_bputs_msg_only(iter);
3881
3882         if (iter->ent->type == TRACE_BPRINT &&
3883                         trace_flags & TRACE_ITER_PRINTK &&
3884                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3885                 return trace_print_bprintk_msg_only(iter);
3886
3887         if (iter->ent->type == TRACE_PRINT &&
3888                         trace_flags & TRACE_ITER_PRINTK &&
3889                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3890                 return trace_print_printk_msg_only(iter);
3891
3892         if (trace_flags & TRACE_ITER_BIN)
3893                 return print_bin_fmt(iter);
3894
3895         if (trace_flags & TRACE_ITER_HEX)
3896                 return print_hex_fmt(iter);
3897
3898         if (trace_flags & TRACE_ITER_RAW)
3899                 return print_raw_fmt(iter);
3900
3901         return print_trace_fmt(iter);
3902 }
3903
3904 void trace_latency_header(struct seq_file *m)
3905 {
3906         struct trace_iterator *iter = m->private;
3907         struct trace_array *tr = iter->tr;
3908
3909         /* print nothing if the buffers are empty */
3910         if (trace_empty(iter))
3911                 return;
3912
3913         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3914                 print_trace_header(m, iter);
3915
3916         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3917                 print_lat_help_header(m);
3918 }
3919
3920 void trace_default_header(struct seq_file *m)
3921 {
3922         struct trace_iterator *iter = m->private;
3923         struct trace_array *tr = iter->tr;
3924         unsigned long trace_flags = tr->trace_flags;
3925
3926         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3927                 return;
3928
3929         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3930                 /* print nothing if the buffers are empty */
3931                 if (trace_empty(iter))
3932                         return;
3933                 print_trace_header(m, iter);
3934                 if (!(trace_flags & TRACE_ITER_VERBOSE))
3935                         print_lat_help_header(m);
3936         } else {
3937                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3938                         if (trace_flags & TRACE_ITER_IRQ_INFO)
3939                                 print_func_help_header_irq(iter->trace_buffer,
3940                                                            m, trace_flags);
3941                         else
3942                                 print_func_help_header(iter->trace_buffer, m,
3943                                                        trace_flags);
3944                 }
3945         }
3946 }
3947
3948 static void test_ftrace_alive(struct seq_file *m)
3949 {
3950         if (!ftrace_is_dead())
3951                 return;
3952         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3953                     "#          MAY BE MISSING FUNCTION EVENTS\n");
3954 }
3955
3956 #ifdef CONFIG_TRACER_MAX_TRACE
3957 static void show_snapshot_main_help(struct seq_file *m)
3958 {
3959         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3960                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3961                     "#                      Takes a snapshot of the main buffer.\n"
3962                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3963                     "#                      (Doesn't have to be '2' works with any number that\n"
3964                     "#                       is not a '0' or '1')\n");
3965 }
3966
3967 static void show_snapshot_percpu_help(struct seq_file *m)
3968 {
3969         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3970 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3971         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3972                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
3973 #else
3974         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3975                     "#                     Must use main snapshot file to allocate.\n");
3976 #endif
3977         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3978                     "#                      (Doesn't have to be '2' works with any number that\n"
3979                     "#                       is not a '0' or '1')\n");
3980 }
3981
3982 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3983 {
3984         if (iter->tr->allocated_snapshot)
3985                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3986         else
3987                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3988
3989         seq_puts(m, "# Snapshot commands:\n");
3990         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3991                 show_snapshot_main_help(m);
3992         else
3993                 show_snapshot_percpu_help(m);
3994 }
3995 #else
3996 /* Should never be called */
3997 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3998 #endif
3999
4000 static int s_show(struct seq_file *m, void *v)
4001 {
4002         struct trace_iterator *iter = v;
4003         int ret;
4004
4005         if (iter->ent == NULL) {
4006                 if (iter->tr) {
4007                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4008                         seq_puts(m, "#\n");
4009                         test_ftrace_alive(m);
4010                 }
4011                 if (iter->snapshot && trace_empty(iter))
4012                         print_snapshot_help(m, iter);
4013                 else if (iter->trace && iter->trace->print_header)
4014                         iter->trace->print_header(m);
4015                 else
4016                         trace_default_header(m);
4017
4018         } else if (iter->leftover) {
4019                 /*
4020                  * If we filled the seq_file buffer earlier, we
4021                  * want to just show it now.
4022                  */
4023                 ret = trace_print_seq(m, &iter->seq);
4024
4025                 /* ret should this time be zero, but you never know */
4026                 iter->leftover = ret;
4027
4028         } else {
4029                 print_trace_line(iter);
4030                 ret = trace_print_seq(m, &iter->seq);
4031                 /*
4032                  * If we overflow the seq_file buffer, then it will
4033                  * ask us for this data again at start up.
4034                  * Use that instead.
4035                  *  ret is 0 if seq_file write succeeded.
4036                  *        -1 otherwise.
4037                  */
4038                 iter->leftover = ret;
4039         }
4040
4041         return 0;
4042 }
4043
4044 /*
4045  * Should be used after trace_array_get(), trace_types_lock
4046  * ensures that i_cdev was already initialized.
4047  */
4048 static inline int tracing_get_cpu(struct inode *inode)
4049 {
4050         if (inode->i_cdev) /* See trace_create_cpu_file() */
4051                 return (long)inode->i_cdev - 1;
4052         return RING_BUFFER_ALL_CPUS;
4053 }
4054
4055 static const struct seq_operations tracer_seq_ops = {
4056         .start          = s_start,
4057         .next           = s_next,
4058         .stop           = s_stop,
4059         .show           = s_show,
4060 };
4061
4062 static struct trace_iterator *
4063 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4064 {
4065         struct trace_array *tr = inode->i_private;
4066         struct trace_iterator *iter;
4067         int cpu;
4068
4069         if (tracing_disabled)
4070                 return ERR_PTR(-ENODEV);
4071
4072         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4073         if (!iter)
4074                 return ERR_PTR(-ENOMEM);
4075
4076         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4077                                     GFP_KERNEL);
4078         if (!iter->buffer_iter)
4079                 goto release;
4080
4081         /*
4082          * We make a copy of the current tracer to avoid concurrent
4083          * changes on it while we are reading.
4084          */
4085         mutex_lock(&trace_types_lock);
4086         iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4087         if (!iter->trace)
4088                 goto fail;
4089
4090         *iter->trace = *tr->current_trace;
4091
4092         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4093                 goto fail;
4094
4095         iter->tr = tr;
4096
4097 #ifdef CONFIG_TRACER_MAX_TRACE
4098         /* Currently only the top directory has a snapshot */
4099         if (tr->current_trace->print_max || snapshot)
4100                 iter->trace_buffer = &tr->max_buffer;
4101         else
4102 #endif
4103                 iter->trace_buffer = &tr->trace_buffer;
4104         iter->snapshot = snapshot;
4105         iter->pos = -1;
4106         iter->cpu_file = tracing_get_cpu(inode);
4107         mutex_init(&iter->mutex);
4108
4109         /* Notify the tracer early; before we stop tracing. */
4110         if (iter->trace && iter->trace->open)
4111                 iter->trace->open(iter);
4112
4113         /* Annotate start of buffers if we had overruns */
4114         if (ring_buffer_overruns(iter->trace_buffer->buffer))
4115                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4116
4117         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4118         if (trace_clocks[tr->clock_id].in_ns)
4119                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4120
4121         /* stop the trace while dumping if we are not opening "snapshot" */
4122         if (!iter->snapshot)
4123                 tracing_stop_tr(tr);
4124
4125         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4126                 for_each_tracing_cpu(cpu) {
4127                         iter->buffer_iter[cpu] =
4128                                 ring_buffer_read_prepare(iter->trace_buffer->buffer,
4129                                                          cpu, GFP_KERNEL);
4130                 }
4131                 ring_buffer_read_prepare_sync();
4132                 for_each_tracing_cpu(cpu) {
4133                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4134                         tracing_iter_reset(iter, cpu);
4135                 }
4136         } else {
4137                 cpu = iter->cpu_file;
4138                 iter->buffer_iter[cpu] =
4139                         ring_buffer_read_prepare(iter->trace_buffer->buffer,
4140                                                  cpu, GFP_KERNEL);
4141                 ring_buffer_read_prepare_sync();
4142                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4143                 tracing_iter_reset(iter, cpu);
4144         }
4145
4146         mutex_unlock(&trace_types_lock);
4147
4148         return iter;
4149
4150  fail:
4151         mutex_unlock(&trace_types_lock);
4152         kfree(iter->trace);
4153         kfree(iter->buffer_iter);
4154 release:
4155         seq_release_private(inode, file);
4156         return ERR_PTR(-ENOMEM);
4157 }
4158
4159 int tracing_open_generic(struct inode *inode, struct file *filp)
4160 {
4161         int ret;
4162
4163         ret = tracing_check_open_get_tr(NULL);
4164         if (ret)
4165                 return ret;
4166
4167         filp->private_data = inode->i_private;
4168         return 0;
4169 }
4170
4171 bool tracing_is_disabled(void)
4172 {
4173         return (tracing_disabled) ? true: false;
4174 }
4175
4176 /*
4177  * Open and update trace_array ref count.
4178  * Must have the current trace_array passed to it.
4179  */
4180 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4181 {
4182         struct trace_array *tr = inode->i_private;
4183         int ret;
4184
4185         ret = tracing_check_open_get_tr(tr);
4186         if (ret)
4187                 return ret;
4188
4189         filp->private_data = inode->i_private;
4190
4191         return 0;
4192 }
4193
4194 static int tracing_release(struct inode *inode, struct file *file)
4195 {
4196         struct trace_array *tr = inode->i_private;
4197         struct seq_file *m = file->private_data;
4198         struct trace_iterator *iter;
4199         int cpu;
4200
4201         if (!(file->f_mode & FMODE_READ)) {
4202                 trace_array_put(tr);
4203                 return 0;
4204         }
4205
4206         /* Writes do not use seq_file */
4207         iter = m->private;
4208         mutex_lock(&trace_types_lock);
4209
4210         for_each_tracing_cpu(cpu) {
4211                 if (iter->buffer_iter[cpu])
4212                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4213         }
4214
4215         if (iter->trace && iter->trace->close)
4216                 iter->trace->close(iter);
4217
4218         if (!iter->snapshot)
4219                 /* reenable tracing if it was previously enabled */
4220                 tracing_start_tr(tr);
4221
4222         __trace_array_put(tr);
4223
4224         mutex_unlock(&trace_types_lock);
4225
4226         mutex_destroy(&iter->mutex);
4227         free_cpumask_var(iter->started);
4228         kfree(iter->trace);
4229         kfree(iter->buffer_iter);
4230         seq_release_private(inode, file);
4231
4232         return 0;
4233 }
4234
4235 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4236 {
4237         struct trace_array *tr = inode->i_private;
4238
4239         trace_array_put(tr);
4240         return 0;
4241 }
4242
4243 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4244 {
4245         struct trace_array *tr = inode->i_private;
4246
4247         trace_array_put(tr);
4248
4249         return single_release(inode, file);
4250 }
4251
4252 static int tracing_open(struct inode *inode, struct file *file)
4253 {
4254         struct trace_array *tr = inode->i_private;
4255         struct trace_iterator *iter;
4256         int ret;
4257
4258         ret = tracing_check_open_get_tr(tr);
4259         if (ret)
4260                 return ret;
4261
4262         /* If this file was open for write, then erase contents */
4263         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4264                 int cpu = tracing_get_cpu(inode);
4265                 struct trace_buffer *trace_buf = &tr->trace_buffer;
4266
4267 #ifdef CONFIG_TRACER_MAX_TRACE
4268                 if (tr->current_trace->print_max)
4269                         trace_buf = &tr->max_buffer;
4270 #endif
4271
4272                 if (cpu == RING_BUFFER_ALL_CPUS)
4273                         tracing_reset_online_cpus(trace_buf);
4274                 else
4275                         tracing_reset_cpu(trace_buf, cpu);
4276         }
4277
4278         if (file->f_mode & FMODE_READ) {
4279                 iter = __tracing_open(inode, file, false);
4280                 if (IS_ERR(iter))
4281                         ret = PTR_ERR(iter);
4282                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4283                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4284         }
4285
4286         if (ret < 0)
4287                 trace_array_put(tr);
4288
4289         return ret;
4290 }
4291
4292 /*
4293  * Some tracers are not suitable for instance buffers.
4294  * A tracer is always available for the global array (toplevel)
4295  * or if it explicitly states that it is.
4296  */
4297 static bool
4298 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4299 {
4300         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4301 }
4302
4303 /* Find the next tracer that this trace array may use */
4304 static struct tracer *
4305 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4306 {
4307         while (t && !trace_ok_for_array(t, tr))
4308                 t = t->next;
4309
4310         return t;
4311 }
4312
4313 static void *
4314 t_next(struct seq_file *m, void *v, loff_t *pos)
4315 {
4316         struct trace_array *tr = m->private;
4317         struct tracer *t = v;
4318
4319         (*pos)++;
4320
4321         if (t)
4322                 t = get_tracer_for_array(tr, t->next);
4323
4324         return t;
4325 }
4326
4327 static void *t_start(struct seq_file *m, loff_t *pos)
4328 {
4329         struct trace_array *tr = m->private;
4330         struct tracer *t;
4331         loff_t l = 0;
4332
4333         mutex_lock(&trace_types_lock);
4334
4335         t = get_tracer_for_array(tr, trace_types);
4336         for (; t && l < *pos; t = t_next(m, t, &l))
4337                         ;
4338
4339         return t;
4340 }
4341
4342 static void t_stop(struct seq_file *m, void *p)
4343 {
4344         mutex_unlock(&trace_types_lock);
4345 }
4346
4347 static int t_show(struct seq_file *m, void *v)
4348 {
4349         struct tracer *t = v;
4350
4351         if (!t)
4352                 return 0;
4353
4354         seq_puts(m, t->name);
4355         if (t->next)
4356                 seq_putc(m, ' ');
4357         else
4358                 seq_putc(m, '\n');
4359
4360         return 0;
4361 }
4362
4363 static const struct seq_operations show_traces_seq_ops = {
4364         .start          = t_start,
4365         .next           = t_next,
4366         .stop           = t_stop,
4367         .show           = t_show,
4368 };
4369
4370 static int show_traces_open(struct inode *inode, struct file *file)
4371 {
4372         struct trace_array *tr = inode->i_private;
4373         struct seq_file *m;
4374         int ret;
4375
4376         ret = tracing_check_open_get_tr(tr);
4377         if (ret)
4378                 return ret;
4379
4380         ret = seq_open(file, &show_traces_seq_ops);
4381         if (ret) {
4382                 trace_array_put(tr);
4383                 return ret;
4384         }
4385
4386         m = file->private_data;
4387         m->private = tr;
4388
4389         return 0;
4390 }
4391
4392 static int show_traces_release(struct inode *inode, struct file *file)
4393 {
4394         struct trace_array *tr = inode->i_private;
4395
4396         trace_array_put(tr);
4397         return seq_release(inode, file);
4398 }
4399
4400 static ssize_t
4401 tracing_write_stub(struct file *filp, const char __user *ubuf,
4402                    size_t count, loff_t *ppos)
4403 {
4404         return count;
4405 }
4406
4407 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4408 {
4409         int ret;
4410
4411         if (file->f_mode & FMODE_READ)
4412                 ret = seq_lseek(file, offset, whence);
4413         else
4414                 file->f_pos = ret = 0;
4415
4416         return ret;
4417 }
4418
4419 static const struct file_operations tracing_fops = {
4420         .open           = tracing_open,
4421         .read           = seq_read,
4422         .write          = tracing_write_stub,
4423         .llseek         = tracing_lseek,
4424         .release        = tracing_release,
4425 };
4426
4427 static const struct file_operations show_traces_fops = {
4428         .open           = show_traces_open,
4429         .read           = seq_read,
4430         .llseek         = seq_lseek,
4431         .release        = show_traces_release,
4432 };
4433
4434 static ssize_t
4435 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4436                      size_t count, loff_t *ppos)
4437 {
4438         struct trace_array *tr = file_inode(filp)->i_private;
4439         char *mask_str;
4440         int len;
4441
4442         len = snprintf(NULL, 0, "%*pb\n",
4443                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
4444         mask_str = kmalloc(len, GFP_KERNEL);
4445         if (!mask_str)
4446                 return -ENOMEM;
4447
4448         len = snprintf(mask_str, len, "%*pb\n",
4449                        cpumask_pr_args(tr->tracing_cpumask));
4450         if (len >= count) {
4451                 count = -EINVAL;
4452                 goto out_err;
4453         }
4454         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
4455
4456 out_err:
4457         kfree(mask_str);
4458
4459         return count;
4460 }
4461
4462 static ssize_t
4463 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4464                       size_t count, loff_t *ppos)
4465 {
4466         struct trace_array *tr = file_inode(filp)->i_private;
4467         cpumask_var_t tracing_cpumask_new;
4468         int err, cpu;
4469
4470         if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4471                 return -ENOMEM;
4472
4473         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4474         if (err)
4475                 goto err_unlock;
4476
4477         local_irq_disable();
4478         arch_spin_lock(&tr->max_lock);
4479         for_each_tracing_cpu(cpu) {
4480                 /*
4481                  * Increase/decrease the disabled counter if we are
4482                  * about to flip a bit in the cpumask:
4483                  */
4484                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4485                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4486                         atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4487                         ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4488                 }
4489                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4490                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4491                         atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4492                         ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4493                 }
4494         }
4495         arch_spin_unlock(&tr->max_lock);
4496         local_irq_enable();
4497
4498         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4499         free_cpumask_var(tracing_cpumask_new);
4500
4501         return count;
4502
4503 err_unlock:
4504         free_cpumask_var(tracing_cpumask_new);
4505
4506         return err;
4507 }
4508
4509 static const struct file_operations tracing_cpumask_fops = {
4510         .open           = tracing_open_generic_tr,
4511         .read           = tracing_cpumask_read,
4512         .write          = tracing_cpumask_write,
4513         .release        = tracing_release_generic_tr,
4514         .llseek         = generic_file_llseek,
4515 };
4516
4517 static int tracing_trace_options_show(struct seq_file *m, void *v)
4518 {
4519         struct tracer_opt *trace_opts;
4520         struct trace_array *tr = m->private;
4521         u32 tracer_flags;
4522         int i;
4523
4524         mutex_lock(&trace_types_lock);
4525         tracer_flags = tr->current_trace->flags->val;
4526         trace_opts = tr->current_trace->flags->opts;
4527
4528         for (i = 0; trace_options[i]; i++) {
4529                 if (tr->trace_flags & (1 << i))
4530                         seq_printf(m, "%s\n", trace_options[i]);
4531                 else
4532                         seq_printf(m, "no%s\n", trace_options[i]);
4533         }
4534
4535         for (i = 0; trace_opts[i].name; i++) {
4536                 if (tracer_flags & trace_opts[i].bit)
4537                         seq_printf(m, "%s\n", trace_opts[i].name);
4538                 else
4539                         seq_printf(m, "no%s\n", trace_opts[i].name);
4540         }
4541         mutex_unlock(&trace_types_lock);
4542
4543         return 0;
4544 }
4545
4546 static int __set_tracer_option(struct trace_array *tr,
4547                                struct tracer_flags *tracer_flags,
4548                                struct tracer_opt *opts, int neg)
4549 {
4550         struct tracer *trace = tracer_flags->trace;
4551         int ret;
4552
4553         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4554         if (ret)
4555                 return ret;
4556
4557         if (neg)
4558                 tracer_flags->val &= ~opts->bit;
4559         else
4560                 tracer_flags->val |= opts->bit;
4561         return 0;
4562 }
4563
4564 /* Try to assign a tracer specific option */
4565 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4566 {
4567         struct tracer *trace = tr->current_trace;
4568         struct tracer_flags *tracer_flags = trace->flags;
4569         struct tracer_opt *opts = NULL;
4570         int i;
4571
4572         for (i = 0; tracer_flags->opts[i].name; i++) {
4573                 opts = &tracer_flags->opts[i];
4574
4575                 if (strcmp(cmp, opts->name) == 0)
4576                         return __set_tracer_option(tr, trace->flags, opts, neg);
4577         }
4578
4579         return -EINVAL;
4580 }
4581
4582 /* Some tracers require overwrite to stay enabled */
4583 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4584 {
4585         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4586                 return -1;
4587
4588         return 0;
4589 }
4590
4591 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4592 {
4593         /* do nothing if flag is already set */
4594         if (!!(tr->trace_flags & mask) == !!enabled)
4595                 return 0;
4596
4597         /* Give the tracer a chance to approve the change */
4598         if (tr->current_trace->flag_changed)
4599                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4600                         return -EINVAL;
4601
4602         if (enabled)
4603                 tr->trace_flags |= mask;
4604         else
4605                 tr->trace_flags &= ~mask;
4606
4607         if (mask == TRACE_ITER_RECORD_CMD)
4608                 trace_event_enable_cmd_record(enabled);
4609
4610         if (mask == TRACE_ITER_RECORD_TGID) {
4611                 if (!tgid_map)
4612                         tgid_map = kcalloc(PID_MAX_DEFAULT + 1,
4613                                            sizeof(*tgid_map),
4614                                            GFP_KERNEL);
4615                 if (!tgid_map) {
4616                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
4617                         return -ENOMEM;
4618                 }
4619
4620                 trace_event_enable_tgid_record(enabled);
4621         }
4622
4623         if (mask == TRACE_ITER_EVENT_FORK)
4624                 trace_event_follow_fork(tr, enabled);
4625
4626         if (mask == TRACE_ITER_FUNC_FORK)
4627                 ftrace_pid_follow_fork(tr, enabled);
4628
4629         if (mask == TRACE_ITER_OVERWRITE) {
4630                 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4631 #ifdef CONFIG_TRACER_MAX_TRACE
4632                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4633 #endif
4634         }
4635
4636         if (mask == TRACE_ITER_PRINTK) {
4637                 trace_printk_start_stop_comm(enabled);
4638                 trace_printk_control(enabled);
4639         }
4640
4641         return 0;
4642 }
4643
4644 static int trace_set_options(struct trace_array *tr, char *option)
4645 {
4646         char *cmp;
4647         int neg = 0;
4648         int ret;
4649         size_t orig_len = strlen(option);
4650         int len;
4651
4652         cmp = strstrip(option);
4653
4654         len = str_has_prefix(cmp, "no");
4655         if (len)
4656                 neg = 1;
4657
4658         cmp += len;
4659
4660         mutex_lock(&trace_types_lock);
4661
4662         ret = match_string(trace_options, -1, cmp);
4663         /* If no option could be set, test the specific tracer options */
4664         if (ret < 0)
4665                 ret = set_tracer_option(tr, cmp, neg);
4666         else
4667                 ret = set_tracer_flag(tr, 1 << ret, !neg);
4668
4669         mutex_unlock(&trace_types_lock);
4670
4671         /*
4672          * If the first trailing whitespace is replaced with '\0' by strstrip,
4673          * turn it back into a space.
4674          */
4675         if (orig_len > strlen(option))
4676                 option[strlen(option)] = ' ';
4677
4678         return ret;
4679 }
4680
4681 static void __init apply_trace_boot_options(void)
4682 {
4683         char *buf = trace_boot_options_buf;
4684         char *option;
4685
4686         while (true) {
4687                 option = strsep(&buf, ",");
4688
4689                 if (!option)
4690                         break;
4691
4692                 if (*option)
4693                         trace_set_options(&global_trace, option);
4694
4695                 /* Put back the comma to allow this to be called again */
4696                 if (buf)
4697                         *(buf - 1) = ',';
4698         }
4699 }
4700
4701 static ssize_t
4702 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4703                         size_t cnt, loff_t *ppos)
4704 {
4705         struct seq_file *m = filp->private_data;
4706         struct trace_array *tr = m->private;
4707         char buf[64];
4708         int ret;
4709
4710         if (cnt >= sizeof(buf))
4711                 return -EINVAL;
4712
4713         if (copy_from_user(buf, ubuf, cnt))
4714                 return -EFAULT;
4715
4716         buf[cnt] = 0;
4717
4718         ret = trace_set_options(tr, buf);
4719         if (ret < 0)
4720                 return ret;
4721
4722         *ppos += cnt;
4723
4724         return cnt;
4725 }
4726
4727 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4728 {
4729         struct trace_array *tr = inode->i_private;
4730         int ret;
4731
4732         ret = tracing_check_open_get_tr(tr);
4733         if (ret)
4734                 return ret;
4735
4736         ret = single_open(file, tracing_trace_options_show, inode->i_private);
4737         if (ret < 0)
4738                 trace_array_put(tr);
4739
4740         return ret;
4741 }
4742
4743 static const struct file_operations tracing_iter_fops = {
4744         .open           = tracing_trace_options_open,
4745         .read           = seq_read,
4746         .llseek         = seq_lseek,
4747         .release        = tracing_single_release_tr,
4748         .write          = tracing_trace_options_write,
4749 };
4750
4751 static const char readme_msg[] =
4752         "tracing mini-HOWTO:\n\n"
4753         "# echo 0 > tracing_on : quick way to disable tracing\n"
4754         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4755         " Important files:\n"
4756         "  trace\t\t\t- The static contents of the buffer\n"
4757         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
4758         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4759         "  current_tracer\t- function and latency tracers\n"
4760         "  available_tracers\t- list of configured tracers for current_tracer\n"
4761         "  error_log\t- error log for failed commands (that support it)\n"
4762         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
4763         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
4764         "  trace_clock\t\t-change the clock used to order events\n"
4765         "       local:   Per cpu clock but may not be synced across CPUs\n"
4766         "      global:   Synced across CPUs but slows tracing down.\n"
4767         "     counter:   Not a clock, but just an increment\n"
4768         "      uptime:   Jiffy counter from time of boot\n"
4769         "        perf:   Same clock that perf events use\n"
4770 #ifdef CONFIG_X86_64
4771         "     x86-tsc:   TSC cycle counter\n"
4772 #endif
4773         "\n  timestamp_mode\t-view the mode used to timestamp events\n"
4774         "       delta:   Delta difference against a buffer-wide timestamp\n"
4775         "    absolute:   Absolute (standalone) timestamp\n"
4776         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4777         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4778         "  tracing_cpumask\t- Limit which CPUs to trace\n"
4779         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4780         "\t\t\t  Remove sub-buffer with rmdir\n"
4781         "  trace_options\t\t- Set format or modify how tracing happens\n"
4782         "\t\t\t  Disable an option by prefixing 'no' to the\n"
4783         "\t\t\t  option name\n"
4784         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4785 #ifdef CONFIG_DYNAMIC_FTRACE
4786         "\n  available_filter_functions - list of functions that can be filtered on\n"
4787         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
4788         "\t\t\t  functions\n"
4789         "\t     accepts: func_full_name or glob-matching-pattern\n"
4790         "\t     modules: Can select a group via module\n"
4791         "\t      Format: :mod:<module-name>\n"
4792         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
4793         "\t    triggers: a command to perform when function is hit\n"
4794         "\t      Format: <function>:<trigger>[:count]\n"
4795         "\t     trigger: traceon, traceoff\n"
4796         "\t\t      enable_event:<system>:<event>\n"
4797         "\t\t      disable_event:<system>:<event>\n"
4798 #ifdef CONFIG_STACKTRACE
4799         "\t\t      stacktrace\n"
4800 #endif
4801 #ifdef CONFIG_TRACER_SNAPSHOT
4802         "\t\t      snapshot\n"
4803 #endif
4804         "\t\t      dump\n"
4805         "\t\t      cpudump\n"
4806         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
4807         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
4808         "\t     The first one will disable tracing every time do_fault is hit\n"
4809         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
4810         "\t       The first time do trap is hit and it disables tracing, the\n"
4811         "\t       counter will decrement to 2. If tracing is already disabled,\n"
4812         "\t       the counter will not decrement. It only decrements when the\n"
4813         "\t       trigger did work\n"
4814         "\t     To remove trigger without count:\n"
4815         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
4816         "\t     To remove trigger with a count:\n"
4817         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4818         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
4819         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4820         "\t    modules: Can select a group via module command :mod:\n"
4821         "\t    Does not accept triggers\n"
4822 #endif /* CONFIG_DYNAMIC_FTRACE */
4823 #ifdef CONFIG_FUNCTION_TRACER
4824         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4825         "\t\t    (function)\n"
4826 #endif
4827 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4828         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4829         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4830         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4831 #endif
4832 #ifdef CONFIG_TRACER_SNAPSHOT
4833         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
4834         "\t\t\t  snapshot buffer. Read the contents for more\n"
4835         "\t\t\t  information\n"
4836 #endif
4837 #ifdef CONFIG_STACK_TRACER
4838         "  stack_trace\t\t- Shows the max stack trace when active\n"
4839         "  stack_max_size\t- Shows current max stack size that was traced\n"
4840         "\t\t\t  Write into this file to reset the max size (trigger a\n"
4841         "\t\t\t  new trace)\n"
4842 #ifdef CONFIG_DYNAMIC_FTRACE
4843         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4844         "\t\t\t  traces\n"
4845 #endif
4846 #endif /* CONFIG_STACK_TRACER */
4847 #ifdef CONFIG_DYNAMIC_EVENTS
4848         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
4849         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4850 #endif
4851 #ifdef CONFIG_KPROBE_EVENTS
4852         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
4853         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4854 #endif
4855 #ifdef CONFIG_UPROBE_EVENTS
4856         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
4857         "\t\t\t  Write into this file to define/undefine new trace events.\n"
4858 #endif
4859 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4860         "\t  accepts: event-definitions (one definition per line)\n"
4861         "\t   Format: p[:[<group>/]<event>] <place> [<args>]\n"
4862         "\t           r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
4863 #ifdef CONFIG_HIST_TRIGGERS
4864         "\t           s:[synthetic/]<event> <field> [<field>]\n"
4865 #endif
4866         "\t           -:[<group>/]<event>\n"
4867 #ifdef CONFIG_KPROBE_EVENTS
4868         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4869   "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4870 #endif
4871 #ifdef CONFIG_UPROBE_EVENTS
4872   "   place (uprobe): <path>:<offset>[(ref_ctr_offset)]\n"
4873 #endif
4874         "\t     args: <name>=fetcharg[:type]\n"
4875         "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4876 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
4877         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
4878 #else
4879         "\t           $stack<index>, $stack, $retval, $comm,\n"
4880 #endif
4881         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
4882         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
4883         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
4884         "\t           <type>\\[<array-size>\\]\n"
4885 #ifdef CONFIG_HIST_TRIGGERS
4886         "\t    field: <stype> <name>;\n"
4887         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
4888         "\t           [unsigned] char/int/long\n"
4889 #endif
4890 #endif
4891         "  events/\t\t- Directory containing all trace event subsystems:\n"
4892         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4893         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
4894         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4895         "\t\t\t  events\n"
4896         "      filter\t\t- If set, only events passing filter are traced\n"
4897         "  events/<system>/<event>/\t- Directory containing control files for\n"
4898         "\t\t\t  <event>:\n"
4899         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4900         "      filter\t\t- If set, only events passing filter are traced\n"
4901         "      trigger\t\t- If set, a command to perform when event is hit\n"
4902         "\t    Format: <trigger>[:count][if <filter>]\n"
4903         "\t   trigger: traceon, traceoff\n"
4904         "\t            enable_event:<system>:<event>\n"
4905         "\t            disable_event:<system>:<event>\n"
4906 #ifdef CONFIG_HIST_TRIGGERS
4907         "\t            enable_hist:<system>:<event>\n"
4908         "\t            disable_hist:<system>:<event>\n"
4909 #endif
4910 #ifdef CONFIG_STACKTRACE
4911         "\t\t    stacktrace\n"
4912 #endif
4913 #ifdef CONFIG_TRACER_SNAPSHOT
4914         "\t\t    snapshot\n"
4915 #endif
4916 #ifdef CONFIG_HIST_TRIGGERS
4917         "\t\t    hist (see below)\n"
4918 #endif
4919         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
4920         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
4921         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4922         "\t                  events/block/block_unplug/trigger\n"
4923         "\t   The first disables tracing every time block_unplug is hit.\n"
4924         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
4925         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
4926         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4927         "\t   Like function triggers, the counter is only decremented if it\n"
4928         "\t    enabled or disabled tracing.\n"
4929         "\t   To remove a trigger without a count:\n"
4930         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
4931         "\t   To remove a trigger with a count:\n"
4932         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
4933         "\t   Filters can be ignored when removing a trigger.\n"
4934 #ifdef CONFIG_HIST_TRIGGERS
4935         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
4936         "\t    Format: hist:keys=<field1[,field2,...]>\n"
4937         "\t            [:values=<field1[,field2,...]>]\n"
4938         "\t            [:sort=<field1[,field2,...]>]\n"
4939         "\t            [:size=#entries]\n"
4940         "\t            [:pause][:continue][:clear]\n"
4941         "\t            [:name=histname1]\n"
4942         "\t            [:<handler>.<action>]\n"
4943         "\t            [if <filter>]\n\n"
4944         "\t    When a matching event is hit, an entry is added to a hash\n"
4945         "\t    table using the key(s) and value(s) named, and the value of a\n"
4946         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
4947         "\t    correspond to fields in the event's format description.  Keys\n"
4948         "\t    can be any field, or the special string 'stacktrace'.\n"
4949         "\t    Compound keys consisting of up to two fields can be specified\n"
4950         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
4951         "\t    fields.  Sort keys consisting of up to two fields can be\n"
4952         "\t    specified using the 'sort' keyword.  The sort direction can\n"
4953         "\t    be modified by appending '.descending' or '.ascending' to a\n"
4954         "\t    sort field.  The 'size' parameter can be used to specify more\n"
4955         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
4956         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
4957         "\t    its histogram data will be shared with other triggers of the\n"
4958         "\t    same name, and trigger hits will update this common data.\n\n"
4959         "\t    Reading the 'hist' file for the event will dump the hash\n"
4960         "\t    table in its entirety to stdout.  If there are multiple hist\n"
4961         "\t    triggers attached to an event, there will be a table for each\n"
4962         "\t    trigger in the output.  The table displayed for a named\n"
4963         "\t    trigger will be the same as any other instance having the\n"
4964         "\t    same name.  The default format used to display a given field\n"
4965         "\t    can be modified by appending any of the following modifiers\n"
4966         "\t    to the field name, as applicable:\n\n"
4967         "\t            .hex        display a number as a hex value\n"
4968         "\t            .sym        display an address as a symbol\n"
4969         "\t            .sym-offset display an address as a symbol and offset\n"
4970         "\t            .execname   display a common_pid as a program name\n"
4971         "\t            .syscall    display a syscall id as a syscall name\n"
4972         "\t            .log2       display log2 value rather than raw number\n"
4973         "\t            .usecs      display a common_timestamp in microseconds\n\n"
4974         "\t    The 'pause' parameter can be used to pause an existing hist\n"
4975         "\t    trigger or to start a hist trigger but not log any events\n"
4976         "\t    until told to do so.  'continue' can be used to start or\n"
4977         "\t    restart a paused hist trigger.\n\n"
4978         "\t    The 'clear' parameter will clear the contents of a running\n"
4979         "\t    hist trigger and leave its current paused/active state\n"
4980         "\t    unchanged.\n\n"
4981         "\t    The enable_hist and disable_hist triggers can be used to\n"
4982         "\t    have one event conditionally start and stop another event's\n"
4983         "\t    already-attached hist trigger.  The syntax is analogous to\n"
4984         "\t    the enable_event and disable_event triggers.\n\n"
4985         "\t    Hist trigger handlers and actions are executed whenever a\n"
4986         "\t    a histogram entry is added or updated.  They take the form:\n\n"
4987         "\t        <handler>.<action>\n\n"
4988         "\t    The available handlers are:\n\n"
4989         "\t        onmatch(matching.event)  - invoke on addition or update\n"
4990         "\t        onmax(var)               - invoke if var exceeds current max\n"
4991         "\t        onchange(var)            - invoke action if var changes\n\n"
4992         "\t    The available actions are:\n\n"
4993         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
4994         "\t        save(field,...)                      - save current event fields\n"
4995 #ifdef CONFIG_TRACER_SNAPSHOT
4996         "\t        snapshot()                           - snapshot the trace buffer\n"
4997 #endif
4998 #endif
4999 ;
5000
5001 static ssize_t
5002 tracing_readme_read(struct file *filp, char __user *ubuf,
5003                        size_t cnt, loff_t *ppos)
5004 {
5005         return simple_read_from_buffer(ubuf, cnt, ppos,
5006                                         readme_msg, strlen(readme_msg));
5007 }
5008
5009 static const struct file_operations tracing_readme_fops = {
5010         .open           = tracing_open_generic,
5011         .read           = tracing_readme_read,
5012         .llseek         = generic_file_llseek,
5013 };
5014
5015 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5016 {
5017         int *ptr = v;
5018
5019         if (*pos || m->count)
5020                 ptr++;
5021
5022         (*pos)++;
5023
5024         for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5025                 if (trace_find_tgid(*ptr))
5026                         return ptr;
5027         }
5028
5029         return NULL;
5030 }
5031
5032 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5033 {
5034         void *v;
5035         loff_t l = 0;
5036
5037         if (!tgid_map)
5038                 return NULL;
5039
5040         v = &tgid_map[0];
5041         while (l <= *pos) {
5042                 v = saved_tgids_next(m, v, &l);
5043                 if (!v)
5044                         return NULL;
5045         }
5046
5047         return v;
5048 }
5049
5050 static void saved_tgids_stop(struct seq_file *m, void *v)
5051 {
5052 }
5053
5054 static int saved_tgids_show(struct seq_file *m, void *v)
5055 {
5056         int pid = (int *)v - tgid_map;
5057
5058         seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5059         return 0;
5060 }
5061
5062 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5063         .start          = saved_tgids_start,
5064         .stop           = saved_tgids_stop,
5065         .next           = saved_tgids_next,
5066         .show           = saved_tgids_show,
5067 };
5068
5069 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5070 {
5071         int ret;
5072
5073         ret = tracing_check_open_get_tr(NULL);
5074         if (ret)
5075                 return ret;
5076
5077         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5078 }
5079
5080
5081 static const struct file_operations tracing_saved_tgids_fops = {
5082         .open           = tracing_saved_tgids_open,
5083         .read           = seq_read,
5084         .llseek         = seq_lseek,
5085         .release        = seq_release,
5086 };
5087
5088 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5089 {
5090         unsigned int *ptr = v;
5091
5092         if (*pos || m->count)
5093                 ptr++;
5094
5095         (*pos)++;
5096
5097         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5098              ptr++) {
5099                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5100                         continue;
5101
5102                 return ptr;
5103         }
5104
5105         return NULL;
5106 }
5107
5108 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5109 {
5110         void *v;
5111         loff_t l = 0;
5112
5113         preempt_disable();
5114         arch_spin_lock(&trace_cmdline_lock);
5115
5116         v = &savedcmd->map_cmdline_to_pid[0];
5117         while (l <= *pos) {
5118                 v = saved_cmdlines_next(m, v, &l);
5119                 if (!v)
5120                         return NULL;
5121         }
5122
5123         return v;
5124 }
5125
5126 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5127 {
5128         arch_spin_unlock(&trace_cmdline_lock);
5129         preempt_enable();
5130 }
5131
5132 static int saved_cmdlines_show(struct seq_file *m, void *v)
5133 {
5134         char buf[TASK_COMM_LEN];
5135         unsigned int *pid = v;
5136
5137         __trace_find_cmdline(*pid, buf);
5138         seq_printf(m, "%d %s\n", *pid, buf);
5139         return 0;
5140 }
5141
5142 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5143         .start          = saved_cmdlines_start,
5144         .next           = saved_cmdlines_next,
5145         .stop           = saved_cmdlines_stop,
5146         .show           = saved_cmdlines_show,
5147 };
5148
5149 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5150 {
5151         int ret;
5152
5153         ret = tracing_check_open_get_tr(NULL);
5154         if (ret)
5155                 return ret;
5156
5157         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5158 }
5159
5160 static const struct file_operations tracing_saved_cmdlines_fops = {
5161         .open           = tracing_saved_cmdlines_open,
5162         .read           = seq_read,
5163         .llseek         = seq_lseek,
5164         .release        = seq_release,
5165 };
5166
5167 static ssize_t
5168 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5169                                  size_t cnt, loff_t *ppos)
5170 {
5171         char buf[64];
5172         int r;
5173
5174         arch_spin_lock(&trace_cmdline_lock);
5175         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5176         arch_spin_unlock(&trace_cmdline_lock);
5177
5178         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5179 }
5180
5181 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5182 {
5183         kfree(s->saved_cmdlines);
5184         kfree(s->map_cmdline_to_pid);
5185         kfree(s);
5186 }
5187
5188 static int tracing_resize_saved_cmdlines(unsigned int val)
5189 {
5190         struct saved_cmdlines_buffer *s, *savedcmd_temp;
5191
5192         s = kmalloc(sizeof(*s), GFP_KERNEL);
5193         if (!s)
5194                 return -ENOMEM;
5195
5196         if (allocate_cmdlines_buffer(val, s) < 0) {
5197                 kfree(s);
5198                 return -ENOMEM;
5199         }
5200
5201         arch_spin_lock(&trace_cmdline_lock);
5202         savedcmd_temp = savedcmd;
5203         savedcmd = s;
5204         arch_spin_unlock(&trace_cmdline_lock);
5205         free_saved_cmdlines_buffer(savedcmd_temp);
5206
5207         return 0;
5208 }
5209
5210 static ssize_t
5211 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5212                                   size_t cnt, loff_t *ppos)
5213 {
5214         unsigned long val;
5215         int ret;
5216
5217         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5218         if (ret)
5219                 return ret;
5220
5221         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5222         if (!val || val > PID_MAX_DEFAULT)
5223                 return -EINVAL;
5224
5225         ret = tracing_resize_saved_cmdlines((unsigned int)val);
5226         if (ret < 0)
5227                 return ret;
5228
5229         *ppos += cnt;
5230
5231         return cnt;
5232 }
5233
5234 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5235         .open           = tracing_open_generic,
5236         .read           = tracing_saved_cmdlines_size_read,
5237         .write          = tracing_saved_cmdlines_size_write,
5238 };
5239
5240 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5241 static union trace_eval_map_item *
5242 update_eval_map(union trace_eval_map_item *ptr)
5243 {
5244         if (!ptr->map.eval_string) {
5245                 if (ptr->tail.next) {
5246                         ptr = ptr->tail.next;
5247                         /* Set ptr to the next real item (skip head) */
5248                         ptr++;
5249                 } else
5250                         return NULL;
5251         }
5252         return ptr;
5253 }
5254
5255 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5256 {
5257         union trace_eval_map_item *ptr = v;
5258
5259         /*
5260          * Paranoid! If ptr points to end, we don't want to increment past it.
5261          * This really should never happen.
5262          */
5263         ptr = update_eval_map(ptr);
5264         if (WARN_ON_ONCE(!ptr))
5265                 return NULL;
5266
5267         ptr++;
5268
5269         (*pos)++;
5270
5271         ptr = update_eval_map(ptr);
5272
5273         return ptr;
5274 }
5275
5276 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5277 {
5278         union trace_eval_map_item *v;
5279         loff_t l = 0;
5280
5281         mutex_lock(&trace_eval_mutex);
5282
5283         v = trace_eval_maps;
5284         if (v)
5285                 v++;
5286
5287         while (v && l < *pos) {
5288                 v = eval_map_next(m, v, &l);
5289         }
5290
5291         return v;
5292 }
5293
5294 static void eval_map_stop(struct seq_file *m, void *v)
5295 {
5296         mutex_unlock(&trace_eval_mutex);
5297 }
5298
5299 static int eval_map_show(struct seq_file *m, void *v)
5300 {
5301         union trace_eval_map_item *ptr = v;
5302
5303         seq_printf(m, "%s %ld (%s)\n",
5304                    ptr->map.eval_string, ptr->map.eval_value,
5305                    ptr->map.system);
5306
5307         return 0;
5308 }
5309
5310 static const struct seq_operations tracing_eval_map_seq_ops = {
5311         .start          = eval_map_start,
5312         .next           = eval_map_next,
5313         .stop           = eval_map_stop,
5314         .show           = eval_map_show,
5315 };
5316
5317 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5318 {
5319         int ret;
5320
5321         ret = tracing_check_open_get_tr(NULL);
5322         if (ret)
5323                 return ret;
5324
5325         return seq_open(filp, &tracing_eval_map_seq_ops);
5326 }
5327
5328 static const struct file_operations tracing_eval_map_fops = {
5329         .open           = tracing_eval_map_open,
5330         .read           = seq_read,
5331         .llseek         = seq_lseek,
5332         .release        = seq_release,
5333 };
5334
5335 static inline union trace_eval_map_item *
5336 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5337 {
5338         /* Return tail of array given the head */
5339         return ptr + ptr->head.length + 1;
5340 }
5341
5342 static void
5343 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5344                            int len)
5345 {
5346         struct trace_eval_map **stop;
5347         struct trace_eval_map **map;
5348         union trace_eval_map_item *map_array;
5349         union trace_eval_map_item *ptr;
5350
5351         stop = start + len;
5352
5353         /*
5354          * The trace_eval_maps contains the map plus a head and tail item,
5355          * where the head holds the module and length of array, and the
5356          * tail holds a pointer to the next list.
5357          */
5358         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5359         if (!map_array) {
5360                 pr_warn("Unable to allocate trace eval mapping\n");
5361                 return;
5362         }
5363
5364         mutex_lock(&trace_eval_mutex);
5365
5366         if (!trace_eval_maps)
5367                 trace_eval_maps = map_array;
5368         else {
5369                 ptr = trace_eval_maps;
5370                 for (;;) {
5371                         ptr = trace_eval_jmp_to_tail(ptr);
5372                         if (!ptr->tail.next)
5373                                 break;
5374                         ptr = ptr->tail.next;
5375
5376                 }
5377                 ptr->tail.next = map_array;
5378         }
5379         map_array->head.mod = mod;
5380         map_array->head.length = len;
5381         map_array++;
5382
5383         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5384                 map_array->map = **map;
5385                 map_array++;
5386         }
5387         memset(map_array, 0, sizeof(*map_array));
5388
5389         mutex_unlock(&trace_eval_mutex);
5390 }
5391
5392 static void trace_create_eval_file(struct dentry *d_tracer)
5393 {
5394         trace_create_file("eval_map", 0444, d_tracer,
5395                           NULL, &tracing_eval_map_fops);
5396 }
5397
5398 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5399 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5400 static inline void trace_insert_eval_map_file(struct module *mod,
5401                               struct trace_eval_map **start, int len) { }
5402 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5403
5404 static void trace_insert_eval_map(struct module *mod,
5405                                   struct trace_eval_map **start, int len)
5406 {
5407         struct trace_eval_map **map;
5408
5409         if (len <= 0)
5410                 return;
5411
5412         map = start;
5413
5414         trace_event_eval_update(map, len);
5415
5416         trace_insert_eval_map_file(mod, start, len);
5417 }
5418
5419 static ssize_t
5420 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5421                        size_t cnt, loff_t *ppos)
5422 {
5423         struct trace_array *tr = filp->private_data;
5424         char buf[MAX_TRACER_SIZE+2];
5425         int r;
5426
5427         mutex_lock(&trace_types_lock);
5428         r = sprintf(buf, "%s\n", tr->current_trace->name);
5429         mutex_unlock(&trace_types_lock);
5430
5431         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5432 }
5433
5434 int tracer_init(struct tracer *t, struct trace_array *tr)
5435 {
5436         tracing_reset_online_cpus(&tr->trace_buffer);
5437         return t->init(tr);
5438 }
5439
5440 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
5441 {
5442         int cpu;
5443
5444         for_each_tracing_cpu(cpu)
5445                 per_cpu_ptr(buf->data, cpu)->entries = val;
5446 }
5447
5448 #ifdef CONFIG_TRACER_MAX_TRACE
5449 /* resize @tr's buffer to the size of @size_tr's entries */
5450 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
5451                                         struct trace_buffer *size_buf, int cpu_id)
5452 {
5453         int cpu, ret = 0;
5454
5455         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5456                 for_each_tracing_cpu(cpu) {
5457                         ret = ring_buffer_resize(trace_buf->buffer,
5458                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5459                         if (ret < 0)
5460                                 break;
5461                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5462                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5463                 }
5464         } else {
5465                 ret = ring_buffer_resize(trace_buf->buffer,
5466                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5467                 if (ret == 0)
5468                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5469                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5470         }
5471
5472         return ret;
5473 }
5474 #endif /* CONFIG_TRACER_MAX_TRACE */
5475
5476 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5477                                         unsigned long size, int cpu)
5478 {
5479         int ret;
5480
5481         /*
5482          * If kernel or user changes the size of the ring buffer
5483          * we use the size that was given, and we can forget about
5484          * expanding it later.
5485          */
5486         ring_buffer_expanded = true;
5487
5488         /* May be called before buffers are initialized */
5489         if (!tr->trace_buffer.buffer)
5490                 return 0;
5491
5492         ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5493         if (ret < 0)
5494                 return ret;
5495
5496 #ifdef CONFIG_TRACER_MAX_TRACE
5497         if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5498             !tr->current_trace->use_max_tr)
5499                 goto out;
5500
5501         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5502         if (ret < 0) {
5503                 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5504                                                      &tr->trace_buffer, cpu);
5505                 if (r < 0) {
5506                         /*
5507                          * AARGH! We are left with different
5508                          * size max buffer!!!!
5509                          * The max buffer is our "snapshot" buffer.
5510                          * When a tracer needs a snapshot (one of the
5511                          * latency tracers), it swaps the max buffer
5512                          * with the saved snap shot. We succeeded to
5513                          * update the size of the main buffer, but failed to
5514                          * update the size of the max buffer. But when we tried
5515                          * to reset the main buffer to the original size, we
5516                          * failed there too. This is very unlikely to
5517                          * happen, but if it does, warn and kill all
5518                          * tracing.
5519                          */
5520                         WARN_ON(1);
5521                         tracing_disabled = 1;
5522                 }
5523                 return ret;
5524         }
5525
5526         if (cpu == RING_BUFFER_ALL_CPUS)
5527                 set_buffer_entries(&tr->max_buffer, size);
5528         else
5529                 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5530
5531  out:
5532 #endif /* CONFIG_TRACER_MAX_TRACE */
5533
5534         if (cpu == RING_BUFFER_ALL_CPUS)
5535                 set_buffer_entries(&tr->trace_buffer, size);
5536         else
5537                 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5538
5539         return ret;
5540 }
5541
5542 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5543                                           unsigned long size, int cpu_id)
5544 {
5545         int ret = size;
5546
5547         mutex_lock(&trace_types_lock);
5548
5549         if (cpu_id != RING_BUFFER_ALL_CPUS) {
5550                 /* make sure, this cpu is enabled in the mask */
5551                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5552                         ret = -EINVAL;
5553                         goto out;
5554                 }
5555         }
5556
5557         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5558         if (ret < 0)
5559                 ret = -ENOMEM;
5560
5561 out:
5562         mutex_unlock(&trace_types_lock);
5563
5564         return ret;
5565 }
5566
5567
5568 /**
5569  * tracing_update_buffers - used by tracing facility to expand ring buffers
5570  *
5571  * To save on memory when the tracing is never used on a system with it
5572  * configured in. The ring buffers are set to a minimum size. But once
5573  * a user starts to use the tracing facility, then they need to grow
5574  * to their default size.
5575  *
5576  * This function is to be called when a tracer is about to be used.
5577  */
5578 int tracing_update_buffers(void)
5579 {
5580         int ret = 0;
5581
5582         mutex_lock(&trace_types_lock);
5583         if (!ring_buffer_expanded)
5584                 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5585                                                 RING_BUFFER_ALL_CPUS);
5586         mutex_unlock(&trace_types_lock);
5587
5588         return ret;
5589 }
5590
5591 struct trace_option_dentry;
5592
5593 static void
5594 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5595
5596 /*
5597  * Used to clear out the tracer before deletion of an instance.
5598  * Must have trace_types_lock held.
5599  */
5600 static void tracing_set_nop(struct trace_array *tr)
5601 {
5602         if (tr->current_trace == &nop_trace)
5603                 return;
5604         
5605         tr->current_trace->enabled--;
5606
5607         if (tr->current_trace->reset)
5608                 tr->current_trace->reset(tr);
5609
5610         tr->current_trace = &nop_trace;
5611 }
5612
5613 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5614 {
5615         /* Only enable if the directory has been created already. */
5616         if (!tr->dir)
5617                 return;
5618
5619         create_trace_option_files(tr, t);
5620 }
5621
5622 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5623 {
5624         struct tracer *t;
5625 #ifdef CONFIG_TRACER_MAX_TRACE
5626         bool had_max_tr;
5627 #endif
5628         int ret = 0;
5629
5630         mutex_lock(&trace_types_lock);
5631
5632         if (!ring_buffer_expanded) {
5633                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5634                                                 RING_BUFFER_ALL_CPUS);
5635                 if (ret < 0)
5636                         goto out;
5637                 ret = 0;
5638         }
5639
5640         for (t = trace_types; t; t = t->next) {
5641                 if (strcmp(t->name, buf) == 0)
5642                         break;
5643         }
5644         if (!t) {
5645                 ret = -EINVAL;
5646                 goto out;
5647         }
5648         if (t == tr->current_trace)
5649                 goto out;
5650
5651 #ifdef CONFIG_TRACER_SNAPSHOT
5652         if (t->use_max_tr) {
5653                 arch_spin_lock(&tr->max_lock);
5654                 if (tr->cond_snapshot)
5655                         ret = -EBUSY;
5656                 arch_spin_unlock(&tr->max_lock);
5657                 if (ret)
5658                         goto out;
5659         }
5660 #endif
5661         /* Some tracers won't work on kernel command line */
5662         if (system_state < SYSTEM_RUNNING && t->noboot) {
5663                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
5664                         t->name);
5665                 goto out;
5666         }
5667
5668         /* Some tracers are only allowed for the top level buffer */
5669         if (!trace_ok_for_array(t, tr)) {
5670                 ret = -EINVAL;
5671                 goto out;
5672         }
5673
5674         /* If trace pipe files are being read, we can't change the tracer */
5675         if (tr->current_trace->ref) {
5676                 ret = -EBUSY;
5677                 goto out;
5678         }
5679
5680         trace_branch_disable();
5681
5682         tr->current_trace->enabled--;
5683
5684         if (tr->current_trace->reset)
5685                 tr->current_trace->reset(tr);
5686
5687         /* Current trace needs to be nop_trace before synchronize_rcu */
5688         tr->current_trace = &nop_trace;
5689
5690 #ifdef CONFIG_TRACER_MAX_TRACE
5691         had_max_tr = tr->allocated_snapshot;
5692
5693         if (had_max_tr && !t->use_max_tr) {
5694                 /*
5695                  * We need to make sure that the update_max_tr sees that
5696                  * current_trace changed to nop_trace to keep it from
5697                  * swapping the buffers after we resize it.
5698                  * The update_max_tr is called from interrupts disabled
5699                  * so a synchronized_sched() is sufficient.
5700                  */
5701                 synchronize_rcu();
5702                 free_snapshot(tr);
5703         }
5704 #endif
5705
5706 #ifdef CONFIG_TRACER_MAX_TRACE
5707         if (t->use_max_tr && !had_max_tr) {
5708                 ret = tracing_alloc_snapshot_instance(tr);
5709                 if (ret < 0)
5710                         goto out;
5711         }
5712 #endif
5713
5714         if (t->init) {
5715                 ret = tracer_init(t, tr);
5716                 if (ret)
5717                         goto out;
5718         }
5719
5720         tr->current_trace = t;
5721         tr->current_trace->enabled++;
5722         trace_branch_enable(tr);
5723  out:
5724         mutex_unlock(&trace_types_lock);
5725
5726         return ret;
5727 }
5728
5729 static ssize_t
5730 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5731                         size_t cnt, loff_t *ppos)
5732 {
5733         struct trace_array *tr = filp->private_data;
5734         char buf[MAX_TRACER_SIZE+1];
5735         int i;
5736         size_t ret;
5737         int err;
5738
5739         ret = cnt;
5740
5741         if (cnt > MAX_TRACER_SIZE)
5742                 cnt = MAX_TRACER_SIZE;
5743
5744         if (copy_from_user(buf, ubuf, cnt))
5745                 return -EFAULT;
5746
5747         buf[cnt] = 0;
5748
5749         /* strip ending whitespace. */
5750         for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5751                 buf[i] = 0;
5752
5753         err = tracing_set_tracer(tr, buf);
5754         if (err)
5755                 return err;
5756
5757         *ppos += ret;
5758
5759         return ret;
5760 }
5761
5762 static ssize_t
5763 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5764                    size_t cnt, loff_t *ppos)
5765 {
5766         char buf[64];
5767         int r;
5768
5769         r = snprintf(buf, sizeof(buf), "%ld\n",
5770                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5771         if (r > sizeof(buf))
5772                 r = sizeof(buf);
5773         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5774 }
5775
5776 static ssize_t
5777 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5778                     size_t cnt, loff_t *ppos)
5779 {
5780         unsigned long val;
5781         int ret;
5782
5783         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5784         if (ret)
5785                 return ret;
5786
5787         *ptr = val * 1000;
5788
5789         return cnt;
5790 }
5791
5792 static ssize_t
5793 tracing_thresh_read(struct file *filp, char __user *ubuf,
5794                     size_t cnt, loff_t *ppos)
5795 {
5796         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5797 }
5798
5799 static ssize_t
5800 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5801                      size_t cnt, loff_t *ppos)
5802 {
5803         struct trace_array *tr = filp->private_data;
5804         int ret;
5805
5806         mutex_lock(&trace_types_lock);
5807         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5808         if (ret < 0)
5809                 goto out;
5810
5811         if (tr->current_trace->update_thresh) {
5812                 ret = tr->current_trace->update_thresh(tr);
5813                 if (ret < 0)
5814                         goto out;
5815         }
5816
5817         ret = cnt;
5818 out:
5819         mutex_unlock(&trace_types_lock);
5820
5821         return ret;
5822 }
5823
5824 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5825
5826 static ssize_t
5827 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5828                      size_t cnt, loff_t *ppos)
5829 {
5830         return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5831 }
5832
5833 static ssize_t
5834 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5835                       size_t cnt, loff_t *ppos)
5836 {
5837         return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5838 }
5839
5840 #endif
5841
5842 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5843 {
5844         struct trace_array *tr = inode->i_private;
5845         struct trace_iterator *iter;
5846         int ret;
5847
5848         ret = tracing_check_open_get_tr(tr);
5849         if (ret)
5850                 return ret;
5851
5852         mutex_lock(&trace_types_lock);
5853
5854         /* create a buffer to store the information to pass to userspace */
5855         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5856         if (!iter) {
5857                 ret = -ENOMEM;
5858                 __trace_array_put(tr);
5859                 goto out;
5860         }
5861
5862         trace_seq_init(&iter->seq);
5863         iter->trace = tr->current_trace;
5864
5865         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5866                 ret = -ENOMEM;
5867                 goto fail;
5868         }
5869
5870         /* trace pipe does not show start of buffer */
5871         cpumask_setall(iter->started);
5872
5873         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5874                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5875
5876         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5877         if (trace_clocks[tr->clock_id].in_ns)
5878                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5879
5880         iter->tr = tr;
5881         iter->trace_buffer = &tr->trace_buffer;
5882         iter->cpu_file = tracing_get_cpu(inode);
5883         mutex_init(&iter->mutex);
5884         filp->private_data = iter;
5885
5886         if (iter->trace->pipe_open)
5887                 iter->trace->pipe_open(iter);
5888
5889         nonseekable_open(inode, filp);
5890
5891         tr->current_trace->ref++;
5892 out:
5893         mutex_unlock(&trace_types_lock);
5894         return ret;
5895
5896 fail:
5897         kfree(iter);
5898         __trace_array_put(tr);
5899         mutex_unlock(&trace_types_lock);
5900         return ret;
5901 }
5902
5903 static int tracing_release_pipe(struct inode *inode, struct file *file)
5904 {
5905         struct trace_iterator *iter = file->private_data;
5906         struct trace_array *tr = inode->i_private;
5907
5908         mutex_lock(&trace_types_lock);
5909
5910         tr->current_trace->ref--;
5911
5912         if (iter->trace->pipe_close)
5913                 iter->trace->pipe_close(iter);
5914
5915         mutex_unlock(&trace_types_lock);
5916
5917         free_cpumask_var(iter->started);
5918         mutex_destroy(&iter->mutex);
5919         kfree(iter);
5920
5921         trace_array_put(tr);
5922
5923         return 0;
5924 }
5925
5926 static __poll_t
5927 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5928 {
5929         struct trace_array *tr = iter->tr;
5930
5931         /* Iterators are static, they should be filled or empty */
5932         if (trace_buffer_iter(iter, iter->cpu_file))
5933                 return EPOLLIN | EPOLLRDNORM;
5934
5935         if (tr->trace_flags & TRACE_ITER_BLOCK)
5936                 /*
5937                  * Always select as readable when in blocking mode
5938                  */
5939                 return EPOLLIN | EPOLLRDNORM;
5940         else
5941                 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5942                                              filp, poll_table);
5943 }
5944
5945 static __poll_t
5946 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5947 {
5948         struct trace_iterator *iter = filp->private_data;
5949
5950         return trace_poll(iter, filp, poll_table);
5951 }
5952
5953 /* Must be called with iter->mutex held. */
5954 static int tracing_wait_pipe(struct file *filp)
5955 {
5956         struct trace_iterator *iter = filp->private_data;
5957         int ret;
5958
5959         while (trace_empty(iter)) {
5960
5961                 if ((filp->f_flags & O_NONBLOCK)) {
5962                         return -EAGAIN;
5963                 }
5964
5965                 /*
5966                  * We block until we read something and tracing is disabled.
5967                  * We still block if tracing is disabled, but we have never
5968                  * read anything. This allows a user to cat this file, and
5969                  * then enable tracing. But after we have read something,
5970                  * we give an EOF when tracing is again disabled.
5971                  *
5972                  * iter->pos will be 0 if we haven't read anything.
5973                  */
5974                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
5975                         break;
5976
5977                 mutex_unlock(&iter->mutex);
5978
5979                 ret = wait_on_pipe(iter, 0);
5980
5981                 mutex_lock(&iter->mutex);
5982
5983                 if (ret)
5984                         return ret;
5985         }
5986
5987         return 1;
5988 }
5989
5990 /*
5991  * Consumer reader.
5992  */
5993 static ssize_t
5994 tracing_read_pipe(struct file *filp, char __user *ubuf,
5995                   size_t cnt, loff_t *ppos)
5996 {
5997         struct trace_iterator *iter = filp->private_data;
5998         ssize_t sret;
5999
6000         /*
6001          * Avoid more than one consumer on a single file descriptor
6002          * This is just a matter of traces coherency, the ring buffer itself
6003          * is protected.
6004          */
6005         mutex_lock(&iter->mutex);
6006
6007         /* return any leftover data */
6008         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6009         if (sret != -EBUSY)
6010                 goto out;
6011
6012         trace_seq_init(&iter->seq);
6013
6014         if (iter->trace->read) {
6015                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6016                 if (sret)
6017                         goto out;
6018         }
6019
6020 waitagain:
6021         sret = tracing_wait_pipe(filp);
6022         if (sret <= 0)
6023                 goto out;
6024
6025         /* stop when tracing is finished */
6026         if (trace_empty(iter)) {
6027                 sret = 0;
6028                 goto out;
6029         }
6030
6031         if (cnt >= PAGE_SIZE)
6032                 cnt = PAGE_SIZE - 1;
6033
6034         /* reset all but tr, trace, and overruns */
6035         memset(&iter->seq, 0,
6036                sizeof(struct trace_iterator) -
6037                offsetof(struct trace_iterator, seq));
6038         cpumask_clear(iter->started);
6039         trace_seq_init(&iter->seq);
6040         iter->pos = -1;
6041
6042         trace_event_read_lock();
6043         trace_access_lock(iter->cpu_file);
6044         while (trace_find_next_entry_inc(iter) != NULL) {
6045                 enum print_line_t ret;
6046                 int save_len = iter->seq.seq.len;
6047
6048                 ret = print_trace_line(iter);
6049                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6050                         /* don't print partial lines */
6051                         iter->seq.seq.len = save_len;
6052                         break;
6053                 }
6054                 if (ret != TRACE_TYPE_NO_CONSUME)
6055                         trace_consume(iter);
6056
6057                 if (trace_seq_used(&iter->seq) >= cnt)
6058                         break;
6059
6060                 /*
6061                  * Setting the full flag means we reached the trace_seq buffer
6062                  * size and we should leave by partial output condition above.
6063                  * One of the trace_seq_* functions is not used properly.
6064                  */
6065                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6066                           iter->ent->type);
6067         }
6068         trace_access_unlock(iter->cpu_file);
6069         trace_event_read_unlock();
6070
6071         /* Now copy what we have to the user */
6072         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6073         if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6074                 trace_seq_init(&iter->seq);
6075
6076         /*
6077          * If there was nothing to send to user, in spite of consuming trace
6078          * entries, go back to wait for more entries.
6079          */
6080         if (sret == -EBUSY)
6081                 goto waitagain;
6082
6083 out:
6084         mutex_unlock(&iter->mutex);
6085
6086         return sret;
6087 }
6088
6089 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6090                                      unsigned int idx)
6091 {
6092         __free_page(spd->pages[idx]);
6093 }
6094
6095 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
6096         .confirm                = generic_pipe_buf_confirm,
6097         .release                = generic_pipe_buf_release,
6098         .steal                  = generic_pipe_buf_steal,
6099         .get                    = generic_pipe_buf_get,
6100 };
6101
6102 static size_t
6103 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6104 {
6105         size_t count;
6106         int save_len;
6107         int ret;
6108
6109         /* Seq buffer is page-sized, exactly what we need. */
6110         for (;;) {
6111                 save_len = iter->seq.seq.len;
6112                 ret = print_trace_line(iter);
6113
6114                 if (trace_seq_has_overflowed(&iter->seq)) {
6115                         iter->seq.seq.len = save_len;
6116                         break;
6117                 }
6118
6119                 /*
6120                  * This should not be hit, because it should only
6121                  * be set if the iter->seq overflowed. But check it
6122                  * anyway to be safe.
6123                  */
6124                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6125                         iter->seq.seq.len = save_len;
6126                         break;
6127                 }
6128
6129                 count = trace_seq_used(&iter->seq) - save_len;
6130                 if (rem < count) {
6131                         rem = 0;
6132                         iter->seq.seq.len = save_len;
6133                         break;
6134                 }
6135
6136                 if (ret != TRACE_TYPE_NO_CONSUME)
6137                         trace_consume(iter);
6138                 rem -= count;
6139                 if (!trace_find_next_entry_inc(iter))   {
6140                         rem = 0;
6141                         iter->ent = NULL;
6142                         break;
6143                 }
6144         }
6145
6146         return rem;
6147 }
6148
6149 static ssize_t tracing_splice_read_pipe(struct file *filp,
6150                                         loff_t *ppos,
6151                                         struct pipe_inode_info *pipe,
6152                                         size_t len,
6153                                         unsigned int flags)
6154 {
6155         struct page *pages_def[PIPE_DEF_BUFFERS];
6156         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6157         struct trace_iterator *iter = filp->private_data;
6158         struct splice_pipe_desc spd = {
6159                 .pages          = pages_def,
6160                 .partial        = partial_def,
6161                 .nr_pages       = 0, /* This gets updated below. */
6162                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6163                 .ops            = &tracing_pipe_buf_ops,
6164                 .spd_release    = tracing_spd_release_pipe,
6165         };
6166         ssize_t ret;
6167         size_t rem;
6168         unsigned int i;
6169
6170         if (splice_grow_spd(pipe, &spd))
6171                 return -ENOMEM;
6172
6173         mutex_lock(&iter->mutex);
6174
6175         if (iter->trace->splice_read) {
6176                 ret = iter->trace->splice_read(iter, filp,
6177                                                ppos, pipe, len, flags);
6178                 if (ret)
6179                         goto out_err;
6180         }
6181
6182         ret = tracing_wait_pipe(filp);
6183         if (ret <= 0)
6184                 goto out_err;
6185
6186         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6187                 ret = -EFAULT;
6188                 goto out_err;
6189         }
6190
6191         trace_event_read_lock();
6192         trace_access_lock(iter->cpu_file);
6193
6194         /* Fill as many pages as possible. */
6195         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6196                 spd.pages[i] = alloc_page(GFP_KERNEL);
6197                 if (!spd.pages[i])
6198                         break;
6199
6200                 rem = tracing_fill_pipe_page(rem, iter);
6201
6202                 /* Copy the data into the page, so we can start over. */
6203                 ret = trace_seq_to_buffer(&iter->seq,
6204                                           page_address(spd.pages[i]),
6205                                           trace_seq_used(&iter->seq));
6206                 if (ret < 0) {
6207                         __free_page(spd.pages[i]);
6208                         break;
6209                 }
6210                 spd.partial[i].offset = 0;
6211                 spd.partial[i].len = trace_seq_used(&iter->seq);
6212
6213                 trace_seq_init(&iter->seq);
6214         }
6215
6216         trace_access_unlock(iter->cpu_file);
6217         trace_event_read_unlock();
6218         mutex_unlock(&iter->mutex);
6219
6220         spd.nr_pages = i;
6221
6222         if (i)
6223                 ret = splice_to_pipe(pipe, &spd);
6224         else
6225                 ret = 0;
6226 out:
6227         splice_shrink_spd(&spd);
6228         return ret;
6229
6230 out_err:
6231         mutex_unlock(&iter->mutex);
6232         goto out;
6233 }
6234
6235 static ssize_t
6236 tracing_entries_read(struct file *filp, char __user *ubuf,
6237                      size_t cnt, loff_t *ppos)
6238 {
6239         struct inode *inode = file_inode(filp);
6240         struct trace_array *tr = inode->i_private;
6241         int cpu = tracing_get_cpu(inode);
6242         char buf[64];
6243         int r = 0;
6244         ssize_t ret;
6245
6246         mutex_lock(&trace_types_lock);
6247
6248         if (cpu == RING_BUFFER_ALL_CPUS) {
6249                 int cpu, buf_size_same;
6250                 unsigned long size;
6251
6252                 size = 0;
6253                 buf_size_same = 1;
6254                 /* check if all cpu sizes are same */
6255                 for_each_tracing_cpu(cpu) {
6256                         /* fill in the size from first enabled cpu */
6257                         if (size == 0)
6258                                 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
6259                         if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
6260                                 buf_size_same = 0;
6261                                 break;
6262                         }
6263                 }
6264
6265                 if (buf_size_same) {
6266                         if (!ring_buffer_expanded)
6267                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6268                                             size >> 10,
6269                                             trace_buf_size >> 10);
6270                         else
6271                                 r = sprintf(buf, "%lu\n", size >> 10);
6272                 } else
6273                         r = sprintf(buf, "X\n");
6274         } else
6275                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
6276
6277         mutex_unlock(&trace_types_lock);
6278
6279         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6280         return ret;
6281 }
6282
6283 static ssize_t
6284 tracing_entries_write(struct file *filp, const char __user *ubuf,
6285                       size_t cnt, loff_t *ppos)
6286 {
6287         struct inode *inode = file_inode(filp);
6288         struct trace_array *tr = inode->i_private;
6289         unsigned long val;
6290         int ret;
6291
6292         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6293         if (ret)
6294                 return ret;
6295
6296         /* must have at least 1 entry */
6297         if (!val)
6298                 return -EINVAL;
6299
6300         /* value is in KB */
6301         val <<= 10;
6302         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6303         if (ret < 0)
6304                 return ret;
6305
6306         *ppos += cnt;
6307
6308         return cnt;
6309 }
6310
6311 static ssize_t
6312 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6313                                 size_t cnt, loff_t *ppos)
6314 {
6315         struct trace_array *tr = filp->private_data;
6316         char buf[64];
6317         int r, cpu;
6318         unsigned long size = 0, expanded_size = 0;
6319
6320         mutex_lock(&trace_types_lock);
6321         for_each_tracing_cpu(cpu) {
6322                 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
6323                 if (!ring_buffer_expanded)
6324                         expanded_size += trace_buf_size >> 10;
6325         }
6326         if (ring_buffer_expanded)
6327                 r = sprintf(buf, "%lu\n", size);
6328         else
6329                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6330         mutex_unlock(&trace_types_lock);
6331
6332         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6333 }
6334
6335 static ssize_t
6336 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6337                           size_t cnt, loff_t *ppos)
6338 {
6339         /*
6340          * There is no need to read what the user has written, this function
6341          * is just to make sure that there is no error when "echo" is used
6342          */
6343
6344         *ppos += cnt;
6345
6346         return cnt;
6347 }
6348
6349 static int
6350 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6351 {
6352         struct trace_array *tr = inode->i_private;
6353
6354         /* disable tracing ? */
6355         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6356                 tracer_tracing_off(tr);
6357         /* resize the ring buffer to 0 */
6358         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6359
6360         trace_array_put(tr);
6361
6362         return 0;
6363 }
6364
6365 static ssize_t
6366 tracing_mark_write(struct file *filp, const char __user *ubuf,
6367                                         size_t cnt, loff_t *fpos)
6368 {
6369         struct trace_array *tr = filp->private_data;
6370         struct ring_buffer_event *event;
6371         enum event_trigger_type tt = ETT_NONE;
6372         struct ring_buffer *buffer;
6373         struct print_entry *entry;
6374         unsigned long irq_flags;
6375         ssize_t written;
6376         int size;
6377         int len;
6378
6379 /* Used in tracing_mark_raw_write() as well */
6380 #define FAULTED_STR "<faulted>"
6381 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6382
6383         if (tracing_disabled)
6384                 return -EINVAL;
6385
6386         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6387                 return -EINVAL;
6388
6389         if (cnt > TRACE_BUF_SIZE)
6390                 cnt = TRACE_BUF_SIZE;
6391
6392         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6393
6394         local_save_flags(irq_flags);
6395         size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6396
6397         /* If less than "<faulted>", then make sure we can still add that */
6398         if (cnt < FAULTED_SIZE)
6399                 size += FAULTED_SIZE - cnt;
6400
6401         buffer = tr->trace_buffer.buffer;
6402         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6403                                             irq_flags, preempt_count());
6404         if (unlikely(!event))
6405                 /* Ring buffer disabled, return as if not open for write */
6406                 return -EBADF;
6407
6408         entry = ring_buffer_event_data(event);
6409         entry->ip = _THIS_IP_;
6410
6411         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6412         if (len) {
6413                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6414                 cnt = FAULTED_SIZE;
6415                 written = -EFAULT;
6416         } else
6417                 written = cnt;
6418         len = cnt;
6419
6420         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6421                 /* do not add \n before testing triggers, but add \0 */
6422                 entry->buf[cnt] = '\0';
6423                 tt = event_triggers_call(tr->trace_marker_file, entry, event);
6424         }
6425
6426         if (entry->buf[cnt - 1] != '\n') {
6427                 entry->buf[cnt] = '\n';
6428                 entry->buf[cnt + 1] = '\0';
6429         } else
6430                 entry->buf[cnt] = '\0';
6431
6432         __buffer_unlock_commit(buffer, event);
6433
6434         if (tt)
6435                 event_triggers_post_call(tr->trace_marker_file, tt);
6436
6437         if (written > 0)
6438                 *fpos += written;
6439
6440         return written;
6441 }
6442
6443 /* Limit it for now to 3K (including tag) */
6444 #define RAW_DATA_MAX_SIZE (1024*3)
6445
6446 static ssize_t
6447 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6448                                         size_t cnt, loff_t *fpos)
6449 {
6450         struct trace_array *tr = filp->private_data;
6451         struct ring_buffer_event *event;
6452         struct ring_buffer *buffer;
6453         struct raw_data_entry *entry;
6454         unsigned long irq_flags;
6455         ssize_t written;
6456         int size;
6457         int len;
6458
6459 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6460
6461         if (tracing_disabled)
6462                 return -EINVAL;
6463
6464         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6465                 return -EINVAL;
6466
6467         /* The marker must at least have a tag id */
6468         if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
6469                 return -EINVAL;
6470
6471         if (cnt > TRACE_BUF_SIZE)
6472                 cnt = TRACE_BUF_SIZE;
6473
6474         BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6475
6476         local_save_flags(irq_flags);
6477         size = sizeof(*entry) + cnt;
6478         if (cnt < FAULT_SIZE_ID)
6479                 size += FAULT_SIZE_ID - cnt;
6480
6481         buffer = tr->trace_buffer.buffer;
6482         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
6483                                             irq_flags, preempt_count());
6484         if (!event)
6485                 /* Ring buffer disabled, return as if not open for write */
6486                 return -EBADF;
6487
6488         entry = ring_buffer_event_data(event);
6489
6490         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
6491         if (len) {
6492                 entry->id = -1;
6493                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6494                 written = -EFAULT;
6495         } else
6496                 written = cnt;
6497
6498         __buffer_unlock_commit(buffer, event);
6499
6500         if (written > 0)
6501                 *fpos += written;
6502
6503         return written;
6504 }
6505
6506 static int tracing_clock_show(struct seq_file *m, void *v)
6507 {
6508         struct trace_array *tr = m->private;
6509         int i;
6510
6511         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
6512                 seq_printf(m,
6513                         "%s%s%s%s", i ? " " : "",
6514                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
6515                         i == tr->clock_id ? "]" : "");
6516         seq_putc(m, '\n');
6517
6518         return 0;
6519 }
6520
6521 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6522 {
6523         int i;
6524
6525         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6526                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6527                         break;
6528         }
6529         if (i == ARRAY_SIZE(trace_clocks))
6530                 return -EINVAL;
6531
6532         mutex_lock(&trace_types_lock);
6533
6534         tr->clock_id = i;
6535
6536         ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6537
6538         /*
6539          * New clock may not be consistent with the previous clock.
6540          * Reset the buffer so that it doesn't have incomparable timestamps.
6541          */
6542         tracing_reset_online_cpus(&tr->trace_buffer);
6543
6544 #ifdef CONFIG_TRACER_MAX_TRACE
6545         if (tr->max_buffer.buffer)
6546                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6547         tracing_reset_online_cpus(&tr->max_buffer);
6548 #endif
6549
6550         mutex_unlock(&trace_types_lock);
6551
6552         return 0;
6553 }
6554
6555 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6556                                    size_t cnt, loff_t *fpos)
6557 {
6558         struct seq_file *m = filp->private_data;
6559         struct trace_array *tr = m->private;
6560         char buf[64];
6561         const char *clockstr;
6562         int ret;
6563
6564         if (cnt >= sizeof(buf))
6565                 return -EINVAL;
6566
6567         if (copy_from_user(buf, ubuf, cnt))
6568                 return -EFAULT;
6569
6570         buf[cnt] = 0;
6571
6572         clockstr = strstrip(buf);
6573
6574         ret = tracing_set_clock(tr, clockstr);
6575         if (ret)
6576                 return ret;
6577
6578         *fpos += cnt;
6579
6580         return cnt;
6581 }
6582
6583 static int tracing_clock_open(struct inode *inode, struct file *file)
6584 {
6585         struct trace_array *tr = inode->i_private;
6586         int ret;
6587
6588         ret = tracing_check_open_get_tr(tr);
6589         if (ret)
6590                 return ret;
6591
6592         ret = single_open(file, tracing_clock_show, inode->i_private);
6593         if (ret < 0)
6594                 trace_array_put(tr);
6595
6596         return ret;
6597 }
6598
6599 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
6600 {
6601         struct trace_array *tr = m->private;
6602
6603         mutex_lock(&trace_types_lock);
6604
6605         if (ring_buffer_time_stamp_abs(tr->trace_buffer.buffer))
6606                 seq_puts(m, "delta [absolute]\n");
6607         else
6608                 seq_puts(m, "[delta] absolute\n");
6609
6610         mutex_unlock(&trace_types_lock);
6611
6612         return 0;
6613 }
6614
6615 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
6616 {
6617         struct trace_array *tr = inode->i_private;
6618         int ret;
6619
6620         ret = tracing_check_open_get_tr(tr);
6621         if (ret)
6622                 return ret;
6623
6624         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
6625         if (ret < 0)
6626                 trace_array_put(tr);
6627
6628         return ret;
6629 }
6630
6631 int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
6632 {
6633         int ret = 0;
6634
6635         mutex_lock(&trace_types_lock);
6636
6637         if (abs && tr->time_stamp_abs_ref++)
6638                 goto out;
6639
6640         if (!abs) {
6641                 if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
6642                         ret = -EINVAL;
6643                         goto out;
6644                 }
6645
6646                 if (--tr->time_stamp_abs_ref)
6647                         goto out;
6648         }
6649
6650         ring_buffer_set_time_stamp_abs(tr->trace_buffer.buffer, abs);
6651
6652 #ifdef CONFIG_TRACER_MAX_TRACE
6653         if (tr->max_buffer.buffer)
6654                 ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
6655 #endif
6656  out:
6657         mutex_unlock(&trace_types_lock);
6658
6659         return ret;
6660 }
6661
6662 struct ftrace_buffer_info {
6663         struct trace_iterator   iter;
6664         void                    *spare;
6665         unsigned int            spare_cpu;
6666         unsigned int            read;
6667 };
6668
6669 #ifdef CONFIG_TRACER_SNAPSHOT
6670 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6671 {
6672         struct trace_array *tr = inode->i_private;
6673         struct trace_iterator *iter;
6674         struct seq_file *m;
6675         int ret;
6676
6677         ret = tracing_check_open_get_tr(tr);
6678         if (ret)
6679                 return ret;
6680
6681         if (file->f_mode & FMODE_READ) {
6682                 iter = __tracing_open(inode, file, true);
6683                 if (IS_ERR(iter))
6684                         ret = PTR_ERR(iter);
6685         } else {
6686                 /* Writes still need the seq_file to hold the private data */
6687                 ret = -ENOMEM;
6688                 m = kzalloc(sizeof(*m), GFP_KERNEL);
6689                 if (!m)
6690                         goto out;
6691                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6692                 if (!iter) {
6693                         kfree(m);
6694                         goto out;
6695                 }
6696                 ret = 0;
6697
6698                 iter->tr = tr;
6699                 iter->trace_buffer = &tr->max_buffer;
6700                 iter->cpu_file = tracing_get_cpu(inode);
6701                 m->private = iter;
6702                 file->private_data = m;
6703         }
6704 out:
6705         if (ret < 0)
6706                 trace_array_put(tr);
6707
6708         return ret;
6709 }
6710
6711 static ssize_t
6712 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6713                        loff_t *ppos)
6714 {
6715         struct seq_file *m = filp->private_data;
6716         struct trace_iterator *iter = m->private;
6717         struct trace_array *tr = iter->tr;
6718         unsigned long val;
6719         int ret;
6720
6721         ret = tracing_update_buffers();
6722         if (ret < 0)
6723                 return ret;
6724
6725         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6726         if (ret)
6727                 return ret;
6728
6729         mutex_lock(&trace_types_lock);
6730
6731         if (tr->current_trace->use_max_tr) {
6732                 ret = -EBUSY;
6733                 goto out;
6734         }
6735
6736         arch_spin_lock(&tr->max_lock);
6737         if (tr->cond_snapshot)
6738                 ret = -EBUSY;
6739         arch_spin_unlock(&tr->max_lock);
6740         if (ret)
6741                 goto out;
6742
6743         switch (val) {
6744         case 0:
6745                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6746                         ret = -EINVAL;
6747                         break;
6748                 }
6749                 if (tr->allocated_snapshot)
6750                         free_snapshot(tr);
6751                 break;
6752         case 1:
6753 /* Only allow per-cpu swap if the ring buffer supports it */
6754 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6755                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6756                         ret = -EINVAL;
6757                         break;
6758                 }
6759 #endif
6760                 if (tr->allocated_snapshot)
6761                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
6762                                         &tr->trace_buffer, iter->cpu_file);
6763                 else
6764                         ret = tracing_alloc_snapshot_instance(tr);
6765                 if (ret < 0)
6766                         break;
6767                 local_irq_disable();
6768                 /* Now, we're going to swap */
6769                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6770                         update_max_tr(tr, current, smp_processor_id(), NULL);
6771                 else
6772                         update_max_tr_single(tr, current, iter->cpu_file);
6773                 local_irq_enable();
6774                 break;
6775         default:
6776                 if (tr->allocated_snapshot) {
6777                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6778                                 tracing_reset_online_cpus(&tr->max_buffer);
6779                         else
6780                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
6781                 }
6782                 break;
6783         }
6784
6785         if (ret >= 0) {
6786                 *ppos += cnt;
6787                 ret = cnt;
6788         }
6789 out:
6790         mutex_unlock(&trace_types_lock);
6791         return ret;
6792 }
6793
6794 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6795 {
6796         struct seq_file *m = file->private_data;
6797         int ret;
6798
6799         ret = tracing_release(inode, file);
6800
6801         if (file->f_mode & FMODE_READ)
6802                 return ret;
6803
6804         /* If write only, the seq_file is just a stub */
6805         if (m)
6806                 kfree(m->private);
6807         kfree(m);
6808
6809         return 0;
6810 }
6811
6812 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6813 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6814                                     size_t count, loff_t *ppos);
6815 static int tracing_buffers_release(struct inode *inode, struct file *file);
6816 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6817                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6818
6819 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6820 {
6821         struct ftrace_buffer_info *info;
6822         int ret;
6823
6824         /* The following checks for tracefs lockdown */
6825         ret = tracing_buffers_open(inode, filp);
6826         if (ret < 0)
6827                 return ret;
6828
6829         info = filp->private_data;
6830
6831         if (info->iter.trace->use_max_tr) {
6832                 tracing_buffers_release(inode, filp);
6833                 return -EBUSY;
6834         }
6835
6836         info->iter.snapshot = true;
6837         info->iter.trace_buffer = &info->iter.tr->max_buffer;
6838
6839         return ret;
6840 }
6841
6842 #endif /* CONFIG_TRACER_SNAPSHOT */
6843
6844
6845 static const struct file_operations tracing_thresh_fops = {
6846         .open           = tracing_open_generic,
6847         .read           = tracing_thresh_read,
6848         .write          = tracing_thresh_write,
6849         .llseek         = generic_file_llseek,
6850 };
6851
6852 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6853 static const struct file_operations tracing_max_lat_fops = {
6854         .open           = tracing_open_generic,
6855         .read           = tracing_max_lat_read,
6856         .write          = tracing_max_lat_write,
6857         .llseek         = generic_file_llseek,
6858 };
6859 #endif
6860
6861 static const struct file_operations set_tracer_fops = {
6862         .open           = tracing_open_generic,
6863         .read           = tracing_set_trace_read,
6864         .write          = tracing_set_trace_write,
6865         .llseek         = generic_file_llseek,
6866 };
6867
6868 static const struct file_operations tracing_pipe_fops = {
6869         .open           = tracing_open_pipe,
6870         .poll           = tracing_poll_pipe,
6871         .read           = tracing_read_pipe,
6872         .splice_read    = tracing_splice_read_pipe,
6873         .release        = tracing_release_pipe,
6874         .llseek         = no_llseek,
6875 };
6876
6877 static const struct file_operations tracing_entries_fops = {
6878         .open           = tracing_open_generic_tr,
6879         .read           = tracing_entries_read,
6880         .write          = tracing_entries_write,
6881         .llseek         = generic_file_llseek,
6882         .release        = tracing_release_generic_tr,
6883 };
6884
6885 static const struct file_operations tracing_total_entries_fops = {
6886         .open           = tracing_open_generic_tr,
6887         .read           = tracing_total_entries_read,
6888         .llseek         = generic_file_llseek,
6889         .release        = tracing_release_generic_tr,
6890 };
6891
6892 static const struct file_operations tracing_free_buffer_fops = {
6893         .open           = tracing_open_generic_tr,
6894         .write          = tracing_free_buffer_write,
6895         .release        = tracing_free_buffer_release,
6896 };
6897
6898 static const struct file_operations tracing_mark_fops = {
6899         .open           = tracing_open_generic_tr,
6900         .write          = tracing_mark_write,
6901         .llseek         = generic_file_llseek,
6902         .release        = tracing_release_generic_tr,
6903 };
6904
6905 static const struct file_operations tracing_mark_raw_fops = {
6906         .open           = tracing_open_generic_tr,
6907         .write          = tracing_mark_raw_write,
6908         .llseek         = generic_file_llseek,
6909         .release        = tracing_release_generic_tr,
6910 };
6911
6912 static const struct file_operations trace_clock_fops = {
6913         .open           = tracing_clock_open,
6914         .read           = seq_read,
6915         .llseek         = seq_lseek,
6916         .release        = tracing_single_release_tr,
6917         .write          = tracing_clock_write,
6918 };
6919
6920 static const struct file_operations trace_time_stamp_mode_fops = {
6921         .open           = tracing_time_stamp_mode_open,
6922         .read           = seq_read,
6923         .llseek         = seq_lseek,
6924         .release        = tracing_single_release_tr,
6925 };
6926
6927 #ifdef CONFIG_TRACER_SNAPSHOT
6928 static const struct file_operations snapshot_fops = {
6929         .open           = tracing_snapshot_open,
6930         .read           = seq_read,
6931         .write          = tracing_snapshot_write,
6932         .llseek         = tracing_lseek,
6933         .release        = tracing_snapshot_release,
6934 };
6935
6936 static const struct file_operations snapshot_raw_fops = {
6937         .open           = snapshot_raw_open,
6938         .read           = tracing_buffers_read,
6939         .release        = tracing_buffers_release,
6940         .splice_read    = tracing_buffers_splice_read,
6941         .llseek         = no_llseek,
6942 };
6943
6944 #endif /* CONFIG_TRACER_SNAPSHOT */
6945
6946 #define TRACING_LOG_ERRS_MAX    8
6947 #define TRACING_LOG_LOC_MAX     128
6948
6949 #define CMD_PREFIX "  Command: "
6950
6951 struct err_info {
6952         const char      **errs; /* ptr to loc-specific array of err strings */
6953         u8              type;   /* index into errs -> specific err string */
6954         u8              pos;    /* MAX_FILTER_STR_VAL = 256 */
6955         u64             ts;
6956 };
6957
6958 struct tracing_log_err {
6959         struct list_head        list;
6960         struct err_info         info;
6961         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
6962         char                    cmd[MAX_FILTER_STR_VAL]; /* what caused err */
6963 };
6964
6965 static DEFINE_MUTEX(tracing_err_log_lock);
6966
6967 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
6968 {
6969         struct tracing_log_err *err;
6970
6971         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
6972                 err = kzalloc(sizeof(*err), GFP_KERNEL);
6973                 if (!err)
6974                         err = ERR_PTR(-ENOMEM);
6975                 tr->n_err_log_entries++;
6976
6977                 return err;
6978         }
6979
6980         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
6981         list_del(&err->list);
6982
6983         return err;
6984 }
6985
6986 /**
6987  * err_pos - find the position of a string within a command for error careting
6988  * @cmd: The tracing command that caused the error
6989  * @str: The string to position the caret at within @cmd
6990  *
6991  * Finds the position of the first occurence of @str within @cmd.  The
6992  * return value can be passed to tracing_log_err() for caret placement
6993  * within @cmd.
6994  *
6995  * Returns the index within @cmd of the first occurence of @str or 0
6996  * if @str was not found.
6997  */
6998 unsigned int err_pos(char *cmd, const char *str)
6999 {
7000         char *found;
7001
7002         if (WARN_ON(!strlen(cmd)))
7003                 return 0;
7004
7005         found = strstr(cmd, str);
7006         if (found)
7007                 return found - cmd;
7008
7009         return 0;
7010 }
7011
7012 /**
7013  * tracing_log_err - write an error to the tracing error log
7014  * @tr: The associated trace array for the error (NULL for top level array)
7015  * @loc: A string describing where the error occurred
7016  * @cmd: The tracing command that caused the error
7017  * @errs: The array of loc-specific static error strings
7018  * @type: The index into errs[], which produces the specific static err string
7019  * @pos: The position the caret should be placed in the cmd
7020  *
7021  * Writes an error into tracing/error_log of the form:
7022  *
7023  * <loc>: error: <text>
7024  *   Command: <cmd>
7025  *              ^
7026  *
7027  * tracing/error_log is a small log file containing the last
7028  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7029  * unless there has been a tracing error, and the error log can be
7030  * cleared and have its memory freed by writing the empty string in
7031  * truncation mode to it i.e. echo > tracing/error_log.
7032  *
7033  * NOTE: the @errs array along with the @type param are used to
7034  * produce a static error string - this string is not copied and saved
7035  * when the error is logged - only a pointer to it is saved.  See
7036  * existing callers for examples of how static strings are typically
7037  * defined for use with tracing_log_err().
7038  */
7039 void tracing_log_err(struct trace_array *tr,
7040                      const char *loc, const char *cmd,
7041                      const char **errs, u8 type, u8 pos)
7042 {
7043         struct tracing_log_err *err;
7044
7045         if (!tr)
7046                 tr = &global_trace;
7047
7048         mutex_lock(&tracing_err_log_lock);
7049         err = get_tracing_log_err(tr);
7050         if (PTR_ERR(err) == -ENOMEM) {
7051                 mutex_unlock(&tracing_err_log_lock);
7052                 return;
7053         }
7054
7055         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7056         snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7057
7058         err->info.errs = errs;
7059         err->info.type = type;
7060         err->info.pos = pos;
7061         err->info.ts = local_clock();
7062
7063         list_add_tail(&err->list, &tr->err_log);
7064         mutex_unlock(&tracing_err_log_lock);
7065 }
7066
7067 static void clear_tracing_err_log(struct trace_array *tr)
7068 {
7069         struct tracing_log_err *err, *next;
7070
7071         mutex_lock(&tracing_err_log_lock);
7072         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7073                 list_del(&err->list);
7074                 kfree(err);
7075         }
7076
7077         tr->n_err_log_entries = 0;
7078         mutex_unlock(&tracing_err_log_lock);
7079 }
7080
7081 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7082 {
7083         struct trace_array *tr = m->private;
7084
7085         mutex_lock(&tracing_err_log_lock);
7086
7087         return seq_list_start(&tr->err_log, *pos);
7088 }
7089
7090 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7091 {
7092         struct trace_array *tr = m->private;
7093
7094         return seq_list_next(v, &tr->err_log, pos);
7095 }
7096
7097 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7098 {
7099         mutex_unlock(&tracing_err_log_lock);
7100 }
7101
7102 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7103 {
7104         u8 i;
7105
7106         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7107                 seq_putc(m, ' ');
7108         for (i = 0; i < pos; i++)
7109                 seq_putc(m, ' ');
7110         seq_puts(m, "^\n");
7111 }
7112
7113 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7114 {
7115         struct tracing_log_err *err = v;
7116
7117         if (err) {
7118                 const char *err_text = err->info.errs[err->info.type];
7119                 u64 sec = err->info.ts;
7120                 u32 nsec;
7121
7122                 nsec = do_div(sec, NSEC_PER_SEC);
7123                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7124                            err->loc, err_text);
7125                 seq_printf(m, "%s", err->cmd);
7126                 tracing_err_log_show_pos(m, err->info.pos);
7127         }
7128
7129         return 0;
7130 }
7131
7132 static const struct seq_operations tracing_err_log_seq_ops = {
7133         .start  = tracing_err_log_seq_start,
7134         .next   = tracing_err_log_seq_next,
7135         .stop   = tracing_err_log_seq_stop,
7136         .show   = tracing_err_log_seq_show
7137 };
7138
7139 static int tracing_err_log_open(struct inode *inode, struct file *file)
7140 {
7141         struct trace_array *tr = inode->i_private;
7142         int ret = 0;
7143
7144         ret = tracing_check_open_get_tr(tr);
7145         if (ret)
7146                 return ret;
7147
7148         /* If this file was opened for write, then erase contents */
7149         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7150                 clear_tracing_err_log(tr);
7151
7152         if (file->f_mode & FMODE_READ) {
7153                 ret = seq_open(file, &tracing_err_log_seq_ops);
7154                 if (!ret) {
7155                         struct seq_file *m = file->private_data;
7156                         m->private = tr;
7157                 } else {
7158                         trace_array_put(tr);
7159                 }
7160         }
7161         return ret;
7162 }
7163
7164 static ssize_t tracing_err_log_write(struct file *file,
7165                                      const char __user *buffer,
7166                                      size_t count, loff_t *ppos)
7167 {
7168         return count;
7169 }
7170
7171 static int tracing_err_log_release(struct inode *inode, struct file *file)
7172 {
7173         struct trace_array *tr = inode->i_private;
7174
7175         trace_array_put(tr);
7176
7177         if (file->f_mode & FMODE_READ)
7178                 seq_release(inode, file);
7179
7180         return 0;
7181 }
7182
7183 static const struct file_operations tracing_err_log_fops = {
7184         .open           = tracing_err_log_open,
7185         .write          = tracing_err_log_write,
7186         .read           = seq_read,
7187         .llseek         = seq_lseek,
7188         .release        = tracing_err_log_release,
7189 };
7190
7191 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7192 {
7193         struct trace_array *tr = inode->i_private;
7194         struct ftrace_buffer_info *info;
7195         int ret;
7196
7197         ret = tracing_check_open_get_tr(tr);
7198         if (ret)
7199                 return ret;
7200
7201         info = kzalloc(sizeof(*info), GFP_KERNEL);
7202         if (!info) {
7203                 trace_array_put(tr);
7204                 return -ENOMEM;
7205         }
7206
7207         mutex_lock(&trace_types_lock);
7208
7209         info->iter.tr           = tr;
7210         info->iter.cpu_file     = tracing_get_cpu(inode);
7211         info->iter.trace        = tr->current_trace;
7212         info->iter.trace_buffer = &tr->trace_buffer;
7213         info->spare             = NULL;
7214         /* Force reading ring buffer for first read */
7215         info->read              = (unsigned int)-1;
7216
7217         filp->private_data = info;
7218
7219         tr->current_trace->ref++;
7220
7221         mutex_unlock(&trace_types_lock);
7222
7223         ret = nonseekable_open(inode, filp);
7224         if (ret < 0)
7225                 trace_array_put(tr);
7226
7227         return ret;
7228 }
7229
7230 static __poll_t
7231 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7232 {
7233         struct ftrace_buffer_info *info = filp->private_data;
7234         struct trace_iterator *iter = &info->iter;
7235
7236         return trace_poll(iter, filp, poll_table);
7237 }
7238
7239 static ssize_t
7240 tracing_buffers_read(struct file *filp, char __user *ubuf,
7241                      size_t count, loff_t *ppos)
7242 {
7243         struct ftrace_buffer_info *info = filp->private_data;
7244         struct trace_iterator *iter = &info->iter;
7245         ssize_t ret = 0;
7246         ssize_t size;
7247
7248         if (!count)
7249                 return 0;
7250
7251 #ifdef CONFIG_TRACER_MAX_TRACE
7252         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7253                 return -EBUSY;
7254 #endif
7255
7256         if (!info->spare) {
7257                 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
7258                                                           iter->cpu_file);
7259                 if (IS_ERR(info->spare)) {
7260                         ret = PTR_ERR(info->spare);
7261                         info->spare = NULL;
7262                 } else {
7263                         info->spare_cpu = iter->cpu_file;
7264                 }
7265         }
7266         if (!info->spare)
7267                 return ret;
7268
7269         /* Do we have previous read data to read? */
7270         if (info->read < PAGE_SIZE)
7271                 goto read;
7272
7273  again:
7274         trace_access_lock(iter->cpu_file);
7275         ret = ring_buffer_read_page(iter->trace_buffer->buffer,
7276                                     &info->spare,
7277                                     count,
7278                                     iter->cpu_file, 0);
7279         trace_access_unlock(iter->cpu_file);
7280
7281         if (ret < 0) {
7282                 if (trace_empty(iter)) {
7283                         if ((filp->f_flags & O_NONBLOCK))
7284                                 return -EAGAIN;
7285
7286                         ret = wait_on_pipe(iter, 0);
7287                         if (ret)
7288                                 return ret;
7289
7290                         goto again;
7291                 }
7292                 return 0;
7293         }
7294
7295         info->read = 0;
7296  read:
7297         size = PAGE_SIZE - info->read;
7298         if (size > count)
7299                 size = count;
7300
7301         ret = copy_to_user(ubuf, info->spare + info->read, size);
7302         if (ret == size)
7303                 return -EFAULT;
7304
7305         size -= ret;
7306
7307         *ppos += size;
7308         info->read += size;
7309
7310         return size;
7311 }
7312
7313 static int tracing_buffers_release(struct inode *inode, struct file *file)
7314 {
7315         struct ftrace_buffer_info *info = file->private_data;
7316         struct trace_iterator *iter = &info->iter;
7317
7318         mutex_lock(&trace_types_lock);
7319
7320         iter->tr->current_trace->ref--;
7321
7322         __trace_array_put(iter->tr);
7323
7324         if (info->spare)
7325                 ring_buffer_free_read_page(iter->trace_buffer->buffer,
7326                                            info->spare_cpu, info->spare);
7327         kfree(info);
7328
7329         mutex_unlock(&trace_types_lock);
7330
7331         return 0;
7332 }
7333
7334 struct buffer_ref {
7335         struct ring_buffer      *buffer;
7336         void                    *page;
7337         int                     cpu;
7338         refcount_t              refcount;
7339 };
7340
7341 static void buffer_ref_release(struct buffer_ref *ref)
7342 {
7343         if (!refcount_dec_and_test(&ref->refcount))
7344                 return;
7345         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7346         kfree(ref);
7347 }
7348
7349 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7350                                     struct pipe_buffer *buf)
7351 {
7352         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7353
7354         buffer_ref_release(ref);
7355         buf->private = 0;
7356 }
7357
7358 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7359                                 struct pipe_buffer *buf)
7360 {
7361         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7362
7363         if (refcount_read(&ref->refcount) > INT_MAX/2)
7364                 return false;
7365
7366         refcount_inc(&ref->refcount);
7367         return true;
7368 }
7369
7370 /* Pipe buffer operations for a buffer. */
7371 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7372         .confirm                = generic_pipe_buf_confirm,
7373         .release                = buffer_pipe_buf_release,
7374         .steal                  = generic_pipe_buf_nosteal,
7375         .get                    = buffer_pipe_buf_get,
7376 };
7377
7378 /*
7379  * Callback from splice_to_pipe(), if we need to release some pages
7380  * at the end of the spd in case we error'ed out in filling the pipe.
7381  */
7382 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7383 {
7384         struct buffer_ref *ref =
7385                 (struct buffer_ref *)spd->partial[i].private;
7386
7387         buffer_ref_release(ref);
7388         spd->partial[i].private = 0;
7389 }
7390
7391 static ssize_t
7392 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7393                             struct pipe_inode_info *pipe, size_t len,
7394                             unsigned int flags)
7395 {
7396         struct ftrace_buffer_info *info = file->private_data;
7397         struct trace_iterator *iter = &info->iter;
7398         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7399         struct page *pages_def[PIPE_DEF_BUFFERS];
7400         struct splice_pipe_desc spd = {
7401                 .pages          = pages_def,
7402                 .partial        = partial_def,
7403                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7404                 .ops            = &buffer_pipe_buf_ops,
7405                 .spd_release    = buffer_spd_release,
7406         };
7407         struct buffer_ref *ref;
7408         int entries, i;
7409         ssize_t ret = 0;
7410
7411 #ifdef CONFIG_TRACER_MAX_TRACE
7412         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7413                 return -EBUSY;
7414 #endif
7415
7416         if (*ppos & (PAGE_SIZE - 1))
7417                 return -EINVAL;
7418
7419         if (len & (PAGE_SIZE - 1)) {
7420                 if (len < PAGE_SIZE)
7421                         return -EINVAL;
7422                 len &= PAGE_MASK;
7423         }
7424
7425         if (splice_grow_spd(pipe, &spd))
7426                 return -ENOMEM;
7427
7428  again:
7429         trace_access_lock(iter->cpu_file);
7430         entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7431
7432         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
7433                 struct page *page;
7434                 int r;
7435
7436                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
7437                 if (!ref) {
7438                         ret = -ENOMEM;
7439                         break;
7440                 }
7441
7442                 refcount_set(&ref->refcount, 1);
7443                 ref->buffer = iter->trace_buffer->buffer;
7444                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
7445                 if (IS_ERR(ref->page)) {
7446                         ret = PTR_ERR(ref->page);
7447                         ref->page = NULL;
7448                         kfree(ref);
7449                         break;
7450                 }
7451                 ref->cpu = iter->cpu_file;
7452
7453                 r = ring_buffer_read_page(ref->buffer, &ref->page,
7454                                           len, iter->cpu_file, 1);
7455                 if (r < 0) {
7456                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
7457                                                    ref->page);
7458                         kfree(ref);
7459                         break;
7460                 }
7461
7462                 page = virt_to_page(ref->page);
7463
7464                 spd.pages[i] = page;
7465                 spd.partial[i].len = PAGE_SIZE;
7466                 spd.partial[i].offset = 0;
7467                 spd.partial[i].private = (unsigned long)ref;
7468                 spd.nr_pages++;
7469                 *ppos += PAGE_SIZE;
7470
7471                 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
7472         }
7473
7474         trace_access_unlock(iter->cpu_file);
7475         spd.nr_pages = i;
7476
7477         /* did we read anything? */
7478         if (!spd.nr_pages) {
7479                 if (ret)
7480                         goto out;
7481
7482                 ret = -EAGAIN;
7483                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
7484                         goto out;
7485
7486                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
7487                 if (ret)
7488                         goto out;
7489
7490                 goto again;
7491         }
7492
7493         ret = splice_to_pipe(pipe, &spd);
7494 out:
7495         splice_shrink_spd(&spd);
7496
7497         return ret;
7498 }
7499
7500 static const struct file_operations tracing_buffers_fops = {
7501         .open           = tracing_buffers_open,
7502         .read           = tracing_buffers_read,
7503         .poll           = tracing_buffers_poll,
7504         .release        = tracing_buffers_release,
7505         .splice_read    = tracing_buffers_splice_read,
7506         .llseek         = no_llseek,
7507 };
7508
7509 static ssize_t
7510 tracing_stats_read(struct file *filp, char __user *ubuf,
7511                    size_t count, loff_t *ppos)
7512 {
7513         struct inode *inode = file_inode(filp);
7514         struct trace_array *tr = inode->i_private;
7515         struct trace_buffer *trace_buf = &tr->trace_buffer;
7516         int cpu = tracing_get_cpu(inode);
7517         struct trace_seq *s;
7518         unsigned long cnt;
7519         unsigned long long t;
7520         unsigned long usec_rem;
7521
7522         s = kmalloc(sizeof(*s), GFP_KERNEL);
7523         if (!s)
7524                 return -ENOMEM;
7525
7526         trace_seq_init(s);
7527
7528         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
7529         trace_seq_printf(s, "entries: %ld\n", cnt);
7530
7531         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
7532         trace_seq_printf(s, "overrun: %ld\n", cnt);
7533
7534         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
7535         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
7536
7537         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
7538         trace_seq_printf(s, "bytes: %ld\n", cnt);
7539
7540         if (trace_clocks[tr->clock_id].in_ns) {
7541                 /* local or global for trace_clock */
7542                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7543                 usec_rem = do_div(t, USEC_PER_SEC);
7544                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
7545                                                                 t, usec_rem);
7546
7547                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
7548                 usec_rem = do_div(t, USEC_PER_SEC);
7549                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
7550         } else {
7551                 /* counter or tsc mode for trace_clock */
7552                 trace_seq_printf(s, "oldest event ts: %llu\n",
7553                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
7554
7555                 trace_seq_printf(s, "now ts: %llu\n",
7556                                 ring_buffer_time_stamp(trace_buf->buffer, cpu));
7557         }
7558
7559         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
7560         trace_seq_printf(s, "dropped events: %ld\n", cnt);
7561
7562         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
7563         trace_seq_printf(s, "read events: %ld\n", cnt);
7564
7565         count = simple_read_from_buffer(ubuf, count, ppos,
7566                                         s->buffer, trace_seq_used(s));
7567
7568         kfree(s);
7569
7570         return count;
7571 }
7572
7573 static const struct file_operations tracing_stats_fops = {
7574         .open           = tracing_open_generic_tr,
7575         .read           = tracing_stats_read,
7576         .llseek         = generic_file_llseek,
7577         .release        = tracing_release_generic_tr,
7578 };
7579
7580 #ifdef CONFIG_DYNAMIC_FTRACE
7581
7582 static ssize_t
7583 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
7584                   size_t cnt, loff_t *ppos)
7585 {
7586         unsigned long *p = filp->private_data;
7587         char buf[64]; /* Not too big for a shallow stack */
7588         int r;
7589
7590         r = scnprintf(buf, 63, "%ld", *p);
7591         buf[r++] = '\n';
7592
7593         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7594 }
7595
7596 static const struct file_operations tracing_dyn_info_fops = {
7597         .open           = tracing_open_generic,
7598         .read           = tracing_read_dyn_info,
7599         .llseek         = generic_file_llseek,
7600 };
7601 #endif /* CONFIG_DYNAMIC_FTRACE */
7602
7603 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
7604 static void
7605 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
7606                 struct trace_array *tr, struct ftrace_probe_ops *ops,
7607                 void *data)
7608 {
7609         tracing_snapshot_instance(tr);
7610 }
7611
7612 static void
7613 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
7614                       struct trace_array *tr, struct ftrace_probe_ops *ops,
7615                       void *data)
7616 {
7617         struct ftrace_func_mapper *mapper = data;
7618         long *count = NULL;
7619
7620         if (mapper)
7621                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7622
7623         if (count) {
7624
7625                 if (*count <= 0)
7626                         return;
7627
7628                 (*count)--;
7629         }
7630
7631         tracing_snapshot_instance(tr);
7632 }
7633
7634 static int
7635 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
7636                       struct ftrace_probe_ops *ops, void *data)
7637 {
7638         struct ftrace_func_mapper *mapper = data;
7639         long *count = NULL;
7640
7641         seq_printf(m, "%ps:", (void *)ip);
7642
7643         seq_puts(m, "snapshot");
7644
7645         if (mapper)
7646                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
7647
7648         if (count)
7649                 seq_printf(m, ":count=%ld\n", *count);
7650         else
7651                 seq_puts(m, ":unlimited\n");
7652
7653         return 0;
7654 }
7655
7656 static int
7657 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
7658                      unsigned long ip, void *init_data, void **data)
7659 {
7660         struct ftrace_func_mapper *mapper = *data;
7661
7662         if (!mapper) {
7663                 mapper = allocate_ftrace_func_mapper();
7664                 if (!mapper)
7665                         return -ENOMEM;
7666                 *data = mapper;
7667         }
7668
7669         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
7670 }
7671
7672 static void
7673 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
7674                      unsigned long ip, void *data)
7675 {
7676         struct ftrace_func_mapper *mapper = data;
7677
7678         if (!ip) {
7679                 if (!mapper)
7680                         return;
7681                 free_ftrace_func_mapper(mapper, NULL);
7682                 return;
7683         }
7684
7685         ftrace_func_mapper_remove_ip(mapper, ip);
7686 }
7687
7688 static struct ftrace_probe_ops snapshot_probe_ops = {
7689         .func                   = ftrace_snapshot,
7690         .print                  = ftrace_snapshot_print,
7691 };
7692
7693 static struct ftrace_probe_ops snapshot_count_probe_ops = {
7694         .func                   = ftrace_count_snapshot,
7695         .print                  = ftrace_snapshot_print,
7696         .init                   = ftrace_snapshot_init,
7697         .free                   = ftrace_snapshot_free,
7698 };
7699
7700 static int
7701 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
7702                                char *glob, char *cmd, char *param, int enable)
7703 {
7704         struct ftrace_probe_ops *ops;
7705         void *count = (void *)-1;
7706         char *number;
7707         int ret;
7708
7709         if (!tr)
7710                 return -ENODEV;
7711
7712         /* hash funcs only work with set_ftrace_filter */
7713         if (!enable)
7714                 return -EINVAL;
7715
7716         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
7717
7718         if (glob[0] == '!')
7719                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
7720
7721         if (!param)
7722                 goto out_reg;
7723
7724         number = strsep(&param, ":");
7725
7726         if (!strlen(number))
7727                 goto out_reg;
7728
7729         /*
7730          * We use the callback data field (which is a pointer)
7731          * as our counter.
7732          */
7733         ret = kstrtoul(number, 0, (unsigned long *)&count);
7734         if (ret)
7735                 return ret;
7736
7737  out_reg:
7738         ret = tracing_alloc_snapshot_instance(tr);
7739         if (ret < 0)
7740                 goto out;
7741
7742         ret = register_ftrace_function_probe(glob, tr, ops, count);
7743
7744  out:
7745         return ret < 0 ? ret : 0;
7746 }
7747
7748 static struct ftrace_func_command ftrace_snapshot_cmd = {
7749         .name                   = "snapshot",
7750         .func                   = ftrace_trace_snapshot_callback,
7751 };
7752
7753 static __init int register_snapshot_cmd(void)
7754 {
7755         return register_ftrace_command(&ftrace_snapshot_cmd);
7756 }
7757 #else
7758 static inline __init int register_snapshot_cmd(void) { return 0; }
7759 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
7760
7761 static struct dentry *tracing_get_dentry(struct trace_array *tr)
7762 {
7763         if (WARN_ON(!tr->dir))
7764                 return ERR_PTR(-ENODEV);
7765
7766         /* Top directory uses NULL as the parent */
7767         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
7768                 return NULL;
7769
7770         /* All sub buffers have a descriptor */
7771         return tr->dir;
7772 }
7773
7774 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
7775 {
7776         struct dentry *d_tracer;
7777
7778         if (tr->percpu_dir)
7779                 return tr->percpu_dir;
7780
7781         d_tracer = tracing_get_dentry(tr);
7782         if (IS_ERR(d_tracer))
7783                 return NULL;
7784
7785         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
7786
7787         WARN_ONCE(!tr->percpu_dir,
7788                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
7789
7790         return tr->percpu_dir;
7791 }
7792
7793 static struct dentry *
7794 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
7795                       void *data, long cpu, const struct file_operations *fops)
7796 {
7797         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
7798
7799         if (ret) /* See tracing_get_cpu() */
7800                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
7801         return ret;
7802 }
7803
7804 static void
7805 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
7806 {
7807         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
7808         struct dentry *d_cpu;
7809         char cpu_dir[30]; /* 30 characters should be more than enough */
7810
7811         if (!d_percpu)
7812                 return;
7813
7814         snprintf(cpu_dir, 30, "cpu%ld", cpu);
7815         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
7816         if (!d_cpu) {
7817                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
7818                 return;
7819         }
7820
7821         /* per cpu trace_pipe */
7822         trace_create_cpu_file("trace_pipe", 0444, d_cpu,
7823                                 tr, cpu, &tracing_pipe_fops);
7824
7825         /* per cpu trace */
7826         trace_create_cpu_file("trace", 0644, d_cpu,
7827                                 tr, cpu, &tracing_fops);
7828
7829         trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
7830                                 tr, cpu, &tracing_buffers_fops);
7831
7832         trace_create_cpu_file("stats", 0444, d_cpu,
7833                                 tr, cpu, &tracing_stats_fops);
7834
7835         trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
7836                                 tr, cpu, &tracing_entries_fops);
7837
7838 #ifdef CONFIG_TRACER_SNAPSHOT
7839         trace_create_cpu_file("snapshot", 0644, d_cpu,
7840                                 tr, cpu, &snapshot_fops);
7841
7842         trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
7843                                 tr, cpu, &snapshot_raw_fops);
7844 #endif
7845 }
7846
7847 #ifdef CONFIG_FTRACE_SELFTEST
7848 /* Let selftest have access to static functions in this file */
7849 #include "trace_selftest.c"
7850 #endif
7851
7852 static ssize_t
7853 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7854                         loff_t *ppos)
7855 {
7856         struct trace_option_dentry *topt = filp->private_data;
7857         char *buf;
7858
7859         if (topt->flags->val & topt->opt->bit)
7860                 buf = "1\n";
7861         else
7862                 buf = "0\n";
7863
7864         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7865 }
7866
7867 static ssize_t
7868 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7869                          loff_t *ppos)
7870 {
7871         struct trace_option_dentry *topt = filp->private_data;
7872         unsigned long val;
7873         int ret;
7874
7875         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7876         if (ret)
7877                 return ret;
7878
7879         if (val != 0 && val != 1)
7880                 return -EINVAL;
7881
7882         if (!!(topt->flags->val & topt->opt->bit) != val) {
7883                 mutex_lock(&trace_types_lock);
7884                 ret = __set_tracer_option(topt->tr, topt->flags,
7885                                           topt->opt, !val);
7886                 mutex_unlock(&trace_types_lock);
7887                 if (ret)
7888                         return ret;
7889         }
7890
7891         *ppos += cnt;
7892
7893         return cnt;
7894 }
7895
7896
7897 static const struct file_operations trace_options_fops = {
7898         .open = tracing_open_generic,
7899         .read = trace_options_read,
7900         .write = trace_options_write,
7901         .llseek = generic_file_llseek,
7902 };
7903
7904 /*
7905  * In order to pass in both the trace_array descriptor as well as the index
7906  * to the flag that the trace option file represents, the trace_array
7907  * has a character array of trace_flags_index[], which holds the index
7908  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7909  * The address of this character array is passed to the flag option file
7910  * read/write callbacks.
7911  *
7912  * In order to extract both the index and the trace_array descriptor,
7913  * get_tr_index() uses the following algorithm.
7914  *
7915  *   idx = *ptr;
7916  *
7917  * As the pointer itself contains the address of the index (remember
7918  * index[1] == 1).
7919  *
7920  * Then to get the trace_array descriptor, by subtracting that index
7921  * from the ptr, we get to the start of the index itself.
7922  *
7923  *   ptr - idx == &index[0]
7924  *
7925  * Then a simple container_of() from that pointer gets us to the
7926  * trace_array descriptor.
7927  */
7928 static void get_tr_index(void *data, struct trace_array **ptr,
7929                          unsigned int *pindex)
7930 {
7931         *pindex = *(unsigned char *)data;
7932
7933         *ptr = container_of(data - *pindex, struct trace_array,
7934                             trace_flags_index);
7935 }
7936
7937 static ssize_t
7938 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7939                         loff_t *ppos)
7940 {
7941         void *tr_index = filp->private_data;
7942         struct trace_array *tr;
7943         unsigned int index;
7944         char *buf;
7945
7946         get_tr_index(tr_index, &tr, &index);
7947
7948         if (tr->trace_flags & (1 << index))
7949                 buf = "1\n";
7950         else
7951                 buf = "0\n";
7952
7953         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7954 }
7955
7956 static ssize_t
7957 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7958                          loff_t *ppos)
7959 {
7960         void *tr_index = filp->private_data;
7961         struct trace_array *tr;
7962         unsigned int index;
7963         unsigned long val;
7964         int ret;
7965
7966         get_tr_index(tr_index, &tr, &index);
7967
7968         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7969         if (ret)
7970                 return ret;
7971
7972         if (val != 0 && val != 1)
7973                 return -EINVAL;
7974
7975         mutex_lock(&trace_types_lock);
7976         ret = set_tracer_flag(tr, 1 << index, val);
7977         mutex_unlock(&trace_types_lock);
7978
7979         if (ret < 0)
7980                 return ret;
7981
7982         *ppos += cnt;
7983
7984         return cnt;
7985 }
7986
7987 static const struct file_operations trace_options_core_fops = {
7988         .open = tracing_open_generic,
7989         .read = trace_options_core_read,
7990         .write = trace_options_core_write,
7991         .llseek = generic_file_llseek,
7992 };
7993
7994 struct dentry *trace_create_file(const char *name,
7995                                  umode_t mode,
7996                                  struct dentry *parent,
7997                                  void *data,
7998                                  const struct file_operations *fops)
7999 {
8000         struct dentry *ret;
8001
8002         ret = tracefs_create_file(name, mode, parent, data, fops);
8003         if (!ret)
8004                 pr_warn("Could not create tracefs '%s' entry\n", name);
8005
8006         return ret;
8007 }
8008
8009
8010 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8011 {
8012         struct dentry *d_tracer;
8013
8014         if (tr->options)
8015                 return tr->options;
8016
8017         d_tracer = tracing_get_dentry(tr);
8018         if (IS_ERR(d_tracer))
8019                 return NULL;
8020
8021         tr->options = tracefs_create_dir("options", d_tracer);
8022         if (!tr->options) {
8023                 pr_warn("Could not create tracefs directory 'options'\n");
8024                 return NULL;
8025         }
8026
8027         return tr->options;
8028 }
8029
8030 static void
8031 create_trace_option_file(struct trace_array *tr,
8032                          struct trace_option_dentry *topt,
8033                          struct tracer_flags *flags,
8034                          struct tracer_opt *opt)
8035 {
8036         struct dentry *t_options;
8037
8038         t_options = trace_options_init_dentry(tr);
8039         if (!t_options)
8040                 return;
8041
8042         topt->flags = flags;
8043         topt->opt = opt;
8044         topt->tr = tr;
8045
8046         topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8047                                     &trace_options_fops);
8048
8049 }
8050
8051 static void
8052 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8053 {
8054         struct trace_option_dentry *topts;
8055         struct trace_options *tr_topts;
8056         struct tracer_flags *flags;
8057         struct tracer_opt *opts;
8058         int cnt;
8059         int i;
8060
8061         if (!tracer)
8062                 return;
8063
8064         flags = tracer->flags;
8065
8066         if (!flags || !flags->opts)
8067                 return;
8068
8069         /*
8070          * If this is an instance, only create flags for tracers
8071          * the instance may have.
8072          */
8073         if (!trace_ok_for_array(tracer, tr))
8074                 return;
8075
8076         for (i = 0; i < tr->nr_topts; i++) {
8077                 /* Make sure there's no duplicate flags. */
8078                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8079                         return;
8080         }
8081
8082         opts = flags->opts;
8083
8084         for (cnt = 0; opts[cnt].name; cnt++)
8085                 ;
8086
8087         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8088         if (!topts)
8089                 return;
8090
8091         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8092                             GFP_KERNEL);
8093         if (!tr_topts) {
8094                 kfree(topts);
8095                 return;
8096         }
8097
8098         tr->topts = tr_topts;
8099         tr->topts[tr->nr_topts].tracer = tracer;
8100         tr->topts[tr->nr_topts].topts = topts;
8101         tr->nr_topts++;
8102
8103         for (cnt = 0; opts[cnt].name; cnt++) {
8104                 create_trace_option_file(tr, &topts[cnt], flags,
8105                                          &opts[cnt]);
8106                 WARN_ONCE(topts[cnt].entry == NULL,
8107                           "Failed to create trace option: %s",
8108                           opts[cnt].name);
8109         }
8110 }
8111
8112 static struct dentry *
8113 create_trace_option_core_file(struct trace_array *tr,
8114                               const char *option, long index)
8115 {
8116         struct dentry *t_options;
8117
8118         t_options = trace_options_init_dentry(tr);
8119         if (!t_options)
8120                 return NULL;
8121
8122         return trace_create_file(option, 0644, t_options,
8123                                  (void *)&tr->trace_flags_index[index],
8124                                  &trace_options_core_fops);
8125 }
8126
8127 static void create_trace_options_dir(struct trace_array *tr)
8128 {
8129         struct dentry *t_options;
8130         bool top_level = tr == &global_trace;
8131         int i;
8132
8133         t_options = trace_options_init_dentry(tr);
8134         if (!t_options)
8135                 return;
8136
8137         for (i = 0; trace_options[i]; i++) {
8138                 if (top_level ||
8139                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8140                         create_trace_option_core_file(tr, trace_options[i], i);
8141         }
8142 }
8143
8144 static ssize_t
8145 rb_simple_read(struct file *filp, char __user *ubuf,
8146                size_t cnt, loff_t *ppos)
8147 {
8148         struct trace_array *tr = filp->private_data;
8149         char buf[64];
8150         int r;
8151
8152         r = tracer_tracing_is_on(tr);
8153         r = sprintf(buf, "%d\n", r);
8154
8155         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8156 }
8157
8158 static ssize_t
8159 rb_simple_write(struct file *filp, const char __user *ubuf,
8160                 size_t cnt, loff_t *ppos)
8161 {
8162         struct trace_array *tr = filp->private_data;
8163         struct ring_buffer *buffer = tr->trace_buffer.buffer;
8164         unsigned long val;
8165         int ret;
8166
8167         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8168         if (ret)
8169                 return ret;
8170
8171         if (buffer) {
8172                 mutex_lock(&trace_types_lock);
8173                 if (!!val == tracer_tracing_is_on(tr)) {
8174                         val = 0; /* do nothing */
8175                 } else if (val) {
8176                         tracer_tracing_on(tr);
8177                         if (tr->current_trace->start)
8178                                 tr->current_trace->start(tr);
8179                 } else {
8180                         tracer_tracing_off(tr);
8181                         if (tr->current_trace->stop)
8182                                 tr->current_trace->stop(tr);
8183                 }
8184                 mutex_unlock(&trace_types_lock);
8185         }
8186
8187         (*ppos)++;
8188
8189         return cnt;
8190 }
8191
8192 static const struct file_operations rb_simple_fops = {
8193         .open           = tracing_open_generic_tr,
8194         .read           = rb_simple_read,
8195         .write          = rb_simple_write,
8196         .release        = tracing_release_generic_tr,
8197         .llseek         = default_llseek,
8198 };
8199
8200 static ssize_t
8201 buffer_percent_read(struct file *filp, char __user *ubuf,
8202                     size_t cnt, loff_t *ppos)
8203 {
8204         struct trace_array *tr = filp->private_data;
8205         char buf[64];
8206         int r;
8207
8208         r = tr->buffer_percent;
8209         r = sprintf(buf, "%d\n", r);
8210
8211         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8212 }
8213
8214 static ssize_t
8215 buffer_percent_write(struct file *filp, const char __user *ubuf,
8216                      size_t cnt, loff_t *ppos)
8217 {
8218         struct trace_array *tr = filp->private_data;
8219         unsigned long val;
8220         int ret;
8221
8222         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8223         if (ret)
8224                 return ret;
8225
8226         if (val > 100)
8227                 return -EINVAL;
8228
8229         if (!val)
8230                 val = 1;
8231
8232         tr->buffer_percent = val;
8233
8234         (*ppos)++;
8235
8236         return cnt;
8237 }
8238
8239 static const struct file_operations buffer_percent_fops = {
8240         .open           = tracing_open_generic_tr,
8241         .read           = buffer_percent_read,
8242         .write          = buffer_percent_write,
8243         .release        = tracing_release_generic_tr,
8244         .llseek         = default_llseek,
8245 };
8246
8247 static struct dentry *trace_instance_dir;
8248
8249 static void
8250 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8251
8252 static int
8253 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
8254 {
8255         enum ring_buffer_flags rb_flags;
8256
8257         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8258
8259         buf->tr = tr;
8260
8261         buf->buffer = ring_buffer_alloc(size, rb_flags);
8262         if (!buf->buffer)
8263                 return -ENOMEM;
8264
8265         buf->data = alloc_percpu(struct trace_array_cpu);
8266         if (!buf->data) {
8267                 ring_buffer_free(buf->buffer);
8268                 buf->buffer = NULL;
8269                 return -ENOMEM;
8270         }
8271
8272         /* Allocate the first page for all buffers */
8273         set_buffer_entries(&tr->trace_buffer,
8274                            ring_buffer_size(tr->trace_buffer.buffer, 0));
8275
8276         return 0;
8277 }
8278
8279 static int allocate_trace_buffers(struct trace_array *tr, int size)
8280 {
8281         int ret;
8282
8283         ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
8284         if (ret)
8285                 return ret;
8286
8287 #ifdef CONFIG_TRACER_MAX_TRACE
8288         ret = allocate_trace_buffer(tr, &tr->max_buffer,
8289                                     allocate_snapshot ? size : 1);
8290         if (WARN_ON(ret)) {
8291                 ring_buffer_free(tr->trace_buffer.buffer);
8292                 tr->trace_buffer.buffer = NULL;
8293                 free_percpu(tr->trace_buffer.data);
8294                 tr->trace_buffer.data = NULL;
8295                 return -ENOMEM;
8296         }
8297         tr->allocated_snapshot = allocate_snapshot;
8298
8299         /*
8300          * Only the top level trace array gets its snapshot allocated
8301          * from the kernel command line.
8302          */
8303         allocate_snapshot = false;
8304 #endif
8305         return 0;
8306 }
8307
8308 static void free_trace_buffer(struct trace_buffer *buf)
8309 {
8310         if (buf->buffer) {
8311                 ring_buffer_free(buf->buffer);
8312                 buf->buffer = NULL;
8313                 free_percpu(buf->data);
8314                 buf->data = NULL;
8315         }
8316 }
8317
8318 static void free_trace_buffers(struct trace_array *tr)
8319 {
8320         if (!tr)
8321                 return;
8322
8323         free_trace_buffer(&tr->trace_buffer);
8324
8325 #ifdef CONFIG_TRACER_MAX_TRACE
8326         free_trace_buffer(&tr->max_buffer);
8327 #endif
8328 }
8329
8330 static void init_trace_flags_index(struct trace_array *tr)
8331 {
8332         int i;
8333
8334         /* Used by the trace options files */
8335         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8336                 tr->trace_flags_index[i] = i;
8337 }
8338
8339 static void __update_tracer_options(struct trace_array *tr)
8340 {
8341         struct tracer *t;
8342
8343         for (t = trace_types; t; t = t->next)
8344                 add_tracer_options(tr, t);
8345 }
8346
8347 static void update_tracer_options(struct trace_array *tr)
8348 {
8349         mutex_lock(&trace_types_lock);
8350         __update_tracer_options(tr);
8351         mutex_unlock(&trace_types_lock);
8352 }
8353
8354 struct trace_array *trace_array_create(const char *name)
8355 {
8356         struct trace_array *tr;
8357         int ret;
8358
8359         mutex_lock(&event_mutex);
8360         mutex_lock(&trace_types_lock);
8361
8362         ret = -EEXIST;
8363         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8364                 if (tr->name && strcmp(tr->name, name) == 0)
8365                         goto out_unlock;
8366         }
8367
8368         ret = -ENOMEM;
8369         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
8370         if (!tr)
8371                 goto out_unlock;
8372
8373         tr->name = kstrdup(name, GFP_KERNEL);
8374         if (!tr->name)
8375                 goto out_free_tr;
8376
8377         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
8378                 goto out_free_tr;
8379
8380         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
8381
8382         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
8383
8384         raw_spin_lock_init(&tr->start_lock);
8385
8386         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8387
8388         tr->current_trace = &nop_trace;
8389
8390         INIT_LIST_HEAD(&tr->systems);
8391         INIT_LIST_HEAD(&tr->events);
8392         INIT_LIST_HEAD(&tr->hist_vars);
8393         INIT_LIST_HEAD(&tr->err_log);
8394
8395         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
8396                 goto out_free_tr;
8397
8398         tr->dir = tracefs_create_dir(name, trace_instance_dir);
8399         if (!tr->dir)
8400                 goto out_free_tr;
8401
8402         ret = event_trace_add_tracer(tr->dir, tr);
8403         if (ret) {
8404                 tracefs_remove_recursive(tr->dir);
8405                 goto out_free_tr;
8406         }
8407
8408         ftrace_init_trace_array(tr);
8409
8410         init_tracer_tracefs(tr, tr->dir);
8411         init_trace_flags_index(tr);
8412         __update_tracer_options(tr);
8413
8414         list_add(&tr->list, &ftrace_trace_arrays);
8415
8416         mutex_unlock(&trace_types_lock);
8417         mutex_unlock(&event_mutex);
8418
8419         return tr;
8420
8421  out_free_tr:
8422         free_trace_buffers(tr);
8423         free_cpumask_var(tr->tracing_cpumask);
8424         kfree(tr->name);
8425         kfree(tr);
8426
8427  out_unlock:
8428         mutex_unlock(&trace_types_lock);
8429         mutex_unlock(&event_mutex);
8430
8431         return ERR_PTR(ret);
8432 }
8433 EXPORT_SYMBOL_GPL(trace_array_create);
8434
8435 static int instance_mkdir(const char *name)
8436 {
8437         return PTR_ERR_OR_ZERO(trace_array_create(name));
8438 }
8439
8440 static int __remove_instance(struct trace_array *tr)
8441 {
8442         int i;
8443
8444         if (tr->ref || (tr->current_trace && tr->current_trace->ref))
8445                 return -EBUSY;
8446
8447         list_del(&tr->list);
8448
8449         /* Disable all the flags that were enabled coming in */
8450         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
8451                 if ((1 << i) & ZEROED_TRACE_FLAGS)
8452                         set_tracer_flag(tr, 1 << i, 0);
8453         }
8454
8455         tracing_set_nop(tr);
8456         clear_ftrace_function_probes(tr);
8457         event_trace_del_tracer(tr);
8458         ftrace_clear_pids(tr);
8459         ftrace_destroy_function_files(tr);
8460         tracefs_remove_recursive(tr->dir);
8461         free_trace_buffers(tr);
8462
8463         for (i = 0; i < tr->nr_topts; i++) {
8464                 kfree(tr->topts[i].topts);
8465         }
8466         kfree(tr->topts);
8467
8468         free_cpumask_var(tr->tracing_cpumask);
8469         kfree(tr->name);
8470         kfree(tr);
8471         tr = NULL;
8472
8473         return 0;
8474 }
8475
8476 int trace_array_destroy(struct trace_array *tr)
8477 {
8478         int ret;
8479
8480         if (!tr)
8481                 return -EINVAL;
8482
8483         mutex_lock(&event_mutex);
8484         mutex_lock(&trace_types_lock);
8485
8486         ret = __remove_instance(tr);
8487
8488         mutex_unlock(&trace_types_lock);
8489         mutex_unlock(&event_mutex);
8490
8491         return ret;
8492 }
8493 EXPORT_SYMBOL_GPL(trace_array_destroy);
8494
8495 static int instance_rmdir(const char *name)
8496 {
8497         struct trace_array *tr;
8498         int ret;
8499
8500         mutex_lock(&event_mutex);
8501         mutex_lock(&trace_types_lock);
8502
8503         ret = -ENODEV;
8504         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8505                 if (tr->name && strcmp(tr->name, name) == 0) {
8506                         ret = __remove_instance(tr);
8507                         break;
8508                 }
8509         }
8510
8511         mutex_unlock(&trace_types_lock);
8512         mutex_unlock(&event_mutex);
8513
8514         return ret;
8515 }
8516
8517 static __init void create_trace_instances(struct dentry *d_tracer)
8518 {
8519         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
8520                                                          instance_mkdir,
8521                                                          instance_rmdir);
8522         if (WARN_ON(!trace_instance_dir))
8523                 return;
8524 }
8525
8526 static void
8527 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
8528 {
8529         struct trace_event_file *file;
8530         int cpu;
8531
8532         trace_create_file("available_tracers", 0444, d_tracer,
8533                         tr, &show_traces_fops);
8534
8535         trace_create_file("current_tracer", 0644, d_tracer,
8536                         tr, &set_tracer_fops);
8537
8538         trace_create_file("tracing_cpumask", 0644, d_tracer,
8539                           tr, &tracing_cpumask_fops);
8540
8541         trace_create_file("trace_options", 0644, d_tracer,
8542                           tr, &tracing_iter_fops);
8543
8544         trace_create_file("trace", 0644, d_tracer,
8545                           tr, &tracing_fops);
8546
8547         trace_create_file("trace_pipe", 0444, d_tracer,
8548                           tr, &tracing_pipe_fops);
8549
8550         trace_create_file("buffer_size_kb", 0644, d_tracer,
8551                           tr, &tracing_entries_fops);
8552
8553         trace_create_file("buffer_total_size_kb", 0444, d_tracer,
8554                           tr, &tracing_total_entries_fops);
8555
8556         trace_create_file("free_buffer", 0200, d_tracer,
8557                           tr, &tracing_free_buffer_fops);
8558
8559         trace_create_file("trace_marker", 0220, d_tracer,
8560                           tr, &tracing_mark_fops);
8561
8562         file = __find_event_file(tr, "ftrace", "print");
8563         if (file && file->dir)
8564                 trace_create_file("trigger", 0644, file->dir, file,
8565                                   &event_trigger_fops);
8566         tr->trace_marker_file = file;
8567
8568         trace_create_file("trace_marker_raw", 0220, d_tracer,
8569                           tr, &tracing_mark_raw_fops);
8570
8571         trace_create_file("trace_clock", 0644, d_tracer, tr,
8572                           &trace_clock_fops);
8573
8574         trace_create_file("tracing_on", 0644, d_tracer,
8575                           tr, &rb_simple_fops);
8576
8577         trace_create_file("timestamp_mode", 0444, d_tracer, tr,
8578                           &trace_time_stamp_mode_fops);
8579
8580         tr->buffer_percent = 50;
8581
8582         trace_create_file("buffer_percent", 0444, d_tracer,
8583                         tr, &buffer_percent_fops);
8584
8585         create_trace_options_dir(tr);
8586
8587 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
8588         trace_create_file("tracing_max_latency", 0644, d_tracer,
8589                         &tr->max_latency, &tracing_max_lat_fops);
8590 #endif
8591
8592         if (ftrace_create_function_files(tr, d_tracer))
8593                 WARN(1, "Could not allocate function filter files");
8594
8595 #ifdef CONFIG_TRACER_SNAPSHOT
8596         trace_create_file("snapshot", 0644, d_tracer,
8597                           tr, &snapshot_fops);
8598 #endif
8599
8600         trace_create_file("error_log", 0644, d_tracer,
8601                           tr, &tracing_err_log_fops);
8602
8603         for_each_tracing_cpu(cpu)
8604                 tracing_init_tracefs_percpu(tr, cpu);
8605
8606         ftrace_init_tracefs(tr, d_tracer);
8607 }
8608
8609 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
8610 {
8611         struct vfsmount *mnt;
8612         struct file_system_type *type;
8613
8614         /*
8615          * To maintain backward compatibility for tools that mount
8616          * debugfs to get to the tracing facility, tracefs is automatically
8617          * mounted to the debugfs/tracing directory.
8618          */
8619         type = get_fs_type("tracefs");
8620         if (!type)
8621                 return NULL;
8622         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
8623         put_filesystem(type);
8624         if (IS_ERR(mnt))
8625                 return NULL;
8626         mntget(mnt);
8627
8628         return mnt;
8629 }
8630
8631 /**
8632  * tracing_init_dentry - initialize top level trace array
8633  *
8634  * This is called when creating files or directories in the tracing
8635  * directory. It is called via fs_initcall() by any of the boot up code
8636  * and expects to return the dentry of the top level tracing directory.
8637  */
8638 struct dentry *tracing_init_dentry(void)
8639 {
8640         struct trace_array *tr = &global_trace;
8641
8642         /* The top level trace array uses  NULL as parent */
8643         if (tr->dir)
8644                 return NULL;
8645
8646         if (WARN_ON(!tracefs_initialized()) ||
8647                 (IS_ENABLED(CONFIG_DEBUG_FS) &&
8648                  WARN_ON(!debugfs_initialized())))
8649                 return ERR_PTR(-ENODEV);
8650
8651         /*
8652          * As there may still be users that expect the tracing
8653          * files to exist in debugfs/tracing, we must automount
8654          * the tracefs file system there, so older tools still
8655          * work with the newer kerenl.
8656          */
8657         tr->dir = debugfs_create_automount("tracing", NULL,
8658                                            trace_automount, NULL);
8659
8660         return NULL;
8661 }
8662
8663 extern struct trace_eval_map *__start_ftrace_eval_maps[];
8664 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
8665
8666 static void __init trace_eval_init(void)
8667 {
8668         int len;
8669
8670         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
8671         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
8672 }
8673
8674 #ifdef CONFIG_MODULES
8675 static void trace_module_add_evals(struct module *mod)
8676 {
8677         if (!mod->num_trace_evals)
8678                 return;
8679
8680         /*
8681          * Modules with bad taint do not have events created, do
8682          * not bother with enums either.
8683          */
8684         if (trace_module_has_bad_taint(mod))
8685                 return;
8686
8687         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
8688 }
8689
8690 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
8691 static void trace_module_remove_evals(struct module *mod)
8692 {
8693         union trace_eval_map_item *map;
8694         union trace_eval_map_item **last = &trace_eval_maps;
8695
8696         if (!mod->num_trace_evals)
8697                 return;
8698
8699         mutex_lock(&trace_eval_mutex);
8700
8701         map = trace_eval_maps;
8702
8703         while (map) {
8704                 if (map->head.mod == mod)
8705                         break;
8706                 map = trace_eval_jmp_to_tail(map);
8707                 last = &map->tail.next;
8708                 map = map->tail.next;
8709         }
8710         if (!map)
8711                 goto out;
8712
8713         *last = trace_eval_jmp_to_tail(map)->tail.next;
8714         kfree(map);
8715  out:
8716         mutex_unlock(&trace_eval_mutex);
8717 }
8718 #else
8719 static inline void trace_module_remove_evals(struct module *mod) { }
8720 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
8721
8722 static int trace_module_notify(struct notifier_block *self,
8723                                unsigned long val, void *data)
8724 {
8725         struct module *mod = data;
8726
8727         switch (val) {
8728         case MODULE_STATE_COMING:
8729                 trace_module_add_evals(mod);
8730                 break;
8731         case MODULE_STATE_GOING:
8732                 trace_module_remove_evals(mod);
8733                 break;
8734         }
8735
8736         return 0;
8737 }
8738
8739 static struct notifier_block trace_module_nb = {
8740         .notifier_call = trace_module_notify,
8741         .priority = 0,
8742 };
8743 #endif /* CONFIG_MODULES */
8744
8745 static __init int tracer_init_tracefs(void)
8746 {
8747         struct dentry *d_tracer;
8748
8749         trace_access_lock_init();
8750
8751         d_tracer = tracing_init_dentry();
8752         if (IS_ERR(d_tracer))
8753                 return 0;
8754
8755         event_trace_init();
8756
8757         init_tracer_tracefs(&global_trace, d_tracer);
8758         ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
8759
8760         trace_create_file("tracing_thresh", 0644, d_tracer,
8761                         &global_trace, &tracing_thresh_fops);
8762
8763         trace_create_file("README", 0444, d_tracer,
8764                         NULL, &tracing_readme_fops);
8765
8766         trace_create_file("saved_cmdlines", 0444, d_tracer,
8767                         NULL, &tracing_saved_cmdlines_fops);
8768
8769         trace_create_file("saved_cmdlines_size", 0644, d_tracer,
8770                           NULL, &tracing_saved_cmdlines_size_fops);
8771
8772         trace_create_file("saved_tgids", 0444, d_tracer,
8773                         NULL, &tracing_saved_tgids_fops);
8774
8775         trace_eval_init();
8776
8777         trace_create_eval_file(d_tracer);
8778
8779 #ifdef CONFIG_MODULES
8780         register_module_notifier(&trace_module_nb);
8781 #endif
8782
8783 #ifdef CONFIG_DYNAMIC_FTRACE
8784         trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
8785                         &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
8786 #endif
8787
8788         create_trace_instances(d_tracer);
8789
8790         update_tracer_options(&global_trace);
8791
8792         return 0;
8793 }
8794
8795 static int trace_panic_handler(struct notifier_block *this,
8796                                unsigned long event, void *unused)
8797 {
8798         if (ftrace_dump_on_oops)
8799                 ftrace_dump(ftrace_dump_on_oops);
8800         return NOTIFY_OK;
8801 }
8802
8803 static struct notifier_block trace_panic_notifier = {
8804         .notifier_call  = trace_panic_handler,
8805         .next           = NULL,
8806         .priority       = 150   /* priority: INT_MAX >= x >= 0 */
8807 };
8808
8809 static int trace_die_handler(struct notifier_block *self,
8810                              unsigned long val,
8811                              void *data)
8812 {
8813         switch (val) {
8814         case DIE_OOPS:
8815                 if (ftrace_dump_on_oops)
8816                         ftrace_dump(ftrace_dump_on_oops);
8817                 break;
8818         default:
8819                 break;
8820         }
8821         return NOTIFY_OK;
8822 }
8823
8824 static struct notifier_block trace_die_notifier = {
8825         .notifier_call = trace_die_handler,
8826         .priority = 200
8827 };
8828
8829 /*
8830  * printk is set to max of 1024, we really don't need it that big.
8831  * Nothing should be printing 1000 characters anyway.
8832  */
8833 #define TRACE_MAX_PRINT         1000
8834
8835 /*
8836  * Define here KERN_TRACE so that we have one place to modify
8837  * it if we decide to change what log level the ftrace dump
8838  * should be at.
8839  */
8840 #define KERN_TRACE              KERN_EMERG
8841
8842 void
8843 trace_printk_seq(struct trace_seq *s)
8844 {
8845         /* Probably should print a warning here. */
8846         if (s->seq.len >= TRACE_MAX_PRINT)
8847                 s->seq.len = TRACE_MAX_PRINT;
8848
8849         /*
8850          * More paranoid code. Although the buffer size is set to
8851          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
8852          * an extra layer of protection.
8853          */
8854         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
8855                 s->seq.len = s->seq.size - 1;
8856
8857         /* should be zero ended, but we are paranoid. */
8858         s->buffer[s->seq.len] = 0;
8859
8860         printk(KERN_TRACE "%s", s->buffer);
8861
8862         trace_seq_init(s);
8863 }
8864
8865 void trace_init_global_iter(struct trace_iterator *iter)
8866 {
8867         iter->tr = &global_trace;
8868         iter->trace = iter->tr->current_trace;
8869         iter->cpu_file = RING_BUFFER_ALL_CPUS;
8870         iter->trace_buffer = &global_trace.trace_buffer;
8871
8872         if (iter->trace && iter->trace->open)
8873                 iter->trace->open(iter);
8874
8875         /* Annotate start of buffers if we had overruns */
8876         if (ring_buffer_overruns(iter->trace_buffer->buffer))
8877                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
8878
8879         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
8880         if (trace_clocks[iter->tr->clock_id].in_ns)
8881                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
8882 }
8883
8884 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
8885 {
8886         /* use static because iter can be a bit big for the stack */
8887         static struct trace_iterator iter;
8888         static atomic_t dump_running;
8889         struct trace_array *tr = &global_trace;
8890         unsigned int old_userobj;
8891         unsigned long flags;
8892         int cnt = 0, cpu;
8893
8894         /* Only allow one dump user at a time. */
8895         if (atomic_inc_return(&dump_running) != 1) {
8896                 atomic_dec(&dump_running);
8897                 return;
8898         }
8899
8900         /*
8901          * Always turn off tracing when we dump.
8902          * We don't need to show trace output of what happens
8903          * between multiple crashes.
8904          *
8905          * If the user does a sysrq-z, then they can re-enable
8906          * tracing with echo 1 > tracing_on.
8907          */
8908         tracing_off();
8909
8910         local_irq_save(flags);
8911         printk_nmi_direct_enter();
8912
8913         /* Simulate the iterator */
8914         trace_init_global_iter(&iter);
8915
8916         for_each_tracing_cpu(cpu) {
8917                 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8918         }
8919
8920         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
8921
8922         /* don't look at user memory in panic mode */
8923         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
8924
8925         switch (oops_dump_mode) {
8926         case DUMP_ALL:
8927                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8928                 break;
8929         case DUMP_ORIG:
8930                 iter.cpu_file = raw_smp_processor_id();
8931                 break;
8932         case DUMP_NONE:
8933                 goto out_enable;
8934         default:
8935                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
8936                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
8937         }
8938
8939         printk(KERN_TRACE "Dumping ftrace buffer:\n");
8940
8941         /* Did function tracer already get disabled? */
8942         if (ftrace_is_dead()) {
8943                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
8944                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
8945         }
8946
8947         /*
8948          * We need to stop all tracing on all CPUS to read the
8949          * the next buffer. This is a bit expensive, but is
8950          * not done often. We fill all what we can read,
8951          * and then release the locks again.
8952          */
8953
8954         while (!trace_empty(&iter)) {
8955
8956                 if (!cnt)
8957                         printk(KERN_TRACE "---------------------------------\n");
8958
8959                 cnt++;
8960
8961                 trace_iterator_reset(&iter);
8962                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8963
8964                 if (trace_find_next_entry_inc(&iter) != NULL) {
8965                         int ret;
8966
8967                         ret = print_trace_line(&iter);
8968                         if (ret != TRACE_TYPE_NO_CONSUME)
8969                                 trace_consume(&iter);
8970                 }
8971                 touch_nmi_watchdog();
8972
8973                 trace_printk_seq(&iter.seq);
8974         }
8975
8976         if (!cnt)
8977                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
8978         else
8979                 printk(KERN_TRACE "---------------------------------\n");
8980
8981  out_enable:
8982         tr->trace_flags |= old_userobj;
8983
8984         for_each_tracing_cpu(cpu) {
8985                 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8986         }
8987         atomic_dec(&dump_running);
8988         printk_nmi_direct_exit();
8989         local_irq_restore(flags);
8990 }
8991 EXPORT_SYMBOL_GPL(ftrace_dump);
8992
8993 int trace_run_command(const char *buf, int (*createfn)(int, char **))
8994 {
8995         char **argv;
8996         int argc, ret;
8997
8998         argc = 0;
8999         ret = 0;
9000         argv = argv_split(GFP_KERNEL, buf, &argc);
9001         if (!argv)
9002                 return -ENOMEM;
9003
9004         if (argc)
9005                 ret = createfn(argc, argv);
9006
9007         argv_free(argv);
9008
9009         return ret;
9010 }
9011
9012 #define WRITE_BUFSIZE  4096
9013
9014 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9015                                 size_t count, loff_t *ppos,
9016                                 int (*createfn)(int, char **))
9017 {
9018         char *kbuf, *buf, *tmp;
9019         int ret = 0;
9020         size_t done = 0;
9021         size_t size;
9022
9023         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9024         if (!kbuf)
9025                 return -ENOMEM;
9026
9027         while (done < count) {
9028                 size = count - done;
9029
9030                 if (size >= WRITE_BUFSIZE)
9031                         size = WRITE_BUFSIZE - 1;
9032
9033                 if (copy_from_user(kbuf, buffer + done, size)) {
9034                         ret = -EFAULT;
9035                         goto out;
9036                 }
9037                 kbuf[size] = '\0';
9038                 buf = kbuf;
9039                 do {
9040                         tmp = strchr(buf, '\n');
9041                         if (tmp) {
9042                                 *tmp = '\0';
9043                                 size = tmp - buf + 1;
9044                         } else {
9045                                 size = strlen(buf);
9046                                 if (done + size < count) {
9047                                         if (buf != kbuf)
9048                                                 break;
9049                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9050                                         pr_warn("Line length is too long: Should be less than %d\n",
9051                                                 WRITE_BUFSIZE - 2);
9052                                         ret = -EINVAL;
9053                                         goto out;
9054                                 }
9055                         }
9056                         done += size;
9057
9058                         /* Remove comments */
9059                         tmp = strchr(buf, '#');
9060
9061                         if (tmp)
9062                                 *tmp = '\0';
9063
9064                         ret = trace_run_command(buf, createfn);
9065                         if (ret)
9066                                 goto out;
9067                         buf += size;
9068
9069                 } while (done < count);
9070         }
9071         ret = done;
9072
9073 out:
9074         kfree(kbuf);
9075
9076         return ret;
9077 }
9078
9079 __init static int tracer_alloc_buffers(void)
9080 {
9081         int ring_buf_size;
9082         int ret = -ENOMEM;
9083
9084         /*
9085          * Make sure we don't accidently add more trace options
9086          * than we have bits for.
9087          */
9088         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9089
9090         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9091                 goto out;
9092
9093         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9094                 goto out_free_buffer_mask;
9095
9096         /* Only allocate trace_printk buffers if a trace_printk exists */
9097         if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
9098                 /* Must be called before global_trace.buffer is allocated */
9099                 trace_printk_init_buffers();
9100
9101         /* To save memory, keep the ring buffer size to its minimum */
9102         if (ring_buffer_expanded)
9103                 ring_buf_size = trace_buf_size;
9104         else
9105                 ring_buf_size = 1;
9106
9107         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9108         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9109
9110         raw_spin_lock_init(&global_trace.start_lock);
9111
9112         /*
9113          * The prepare callbacks allocates some memory for the ring buffer. We
9114          * don't free the buffer if the if the CPU goes down. If we were to free
9115          * the buffer, then the user would lose any trace that was in the
9116          * buffer. The memory will be removed once the "instance" is removed.
9117          */
9118         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9119                                       "trace/RB:preapre", trace_rb_cpu_prepare,
9120                                       NULL);
9121         if (ret < 0)
9122                 goto out_free_cpumask;
9123         /* Used for event triggers */
9124         ret = -ENOMEM;
9125         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9126         if (!temp_buffer)
9127                 goto out_rm_hp_state;
9128
9129         if (trace_create_savedcmd() < 0)
9130                 goto out_free_temp_buffer;
9131
9132         /* TODO: make the number of buffers hot pluggable with CPUS */
9133         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9134                 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
9135                 WARN_ON(1);
9136                 goto out_free_savedcmd;
9137         }
9138
9139         if (global_trace.buffer_disabled)
9140                 tracing_off();
9141
9142         if (trace_boot_clock) {
9143                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9144                 if (ret < 0)
9145                         pr_warn("Trace clock %s not defined, going back to default\n",
9146                                 trace_boot_clock);
9147         }
9148
9149         /*
9150          * register_tracer() might reference current_trace, so it
9151          * needs to be set before we register anything. This is
9152          * just a bootstrap of current_trace anyway.
9153          */
9154         global_trace.current_trace = &nop_trace;
9155
9156         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9157
9158         ftrace_init_global_array_ops(&global_trace);
9159
9160         init_trace_flags_index(&global_trace);
9161
9162         register_tracer(&nop_trace);
9163
9164         /* Function tracing may start here (via kernel command line) */
9165         init_function_trace();
9166
9167         /* All seems OK, enable tracing */
9168         tracing_disabled = 0;
9169
9170         atomic_notifier_chain_register(&panic_notifier_list,
9171                                        &trace_panic_notifier);
9172
9173         register_die_notifier(&trace_die_notifier);
9174
9175         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9176
9177         INIT_LIST_HEAD(&global_trace.systems);
9178         INIT_LIST_HEAD(&global_trace.events);
9179         INIT_LIST_HEAD(&global_trace.hist_vars);
9180         INIT_LIST_HEAD(&global_trace.err_log);
9181         list_add(&global_trace.list, &ftrace_trace_arrays);
9182
9183         apply_trace_boot_options();
9184
9185         register_snapshot_cmd();
9186
9187         return 0;
9188
9189 out_free_savedcmd:
9190         free_saved_cmdlines_buffer(savedcmd);
9191 out_free_temp_buffer:
9192         ring_buffer_free(temp_buffer);
9193 out_rm_hp_state:
9194         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9195 out_free_cpumask:
9196         free_cpumask_var(global_trace.tracing_cpumask);
9197 out_free_buffer_mask:
9198         free_cpumask_var(tracing_buffer_mask);
9199 out:
9200         return ret;
9201 }
9202
9203 void __init early_trace_init(void)
9204 {
9205         if (tracepoint_printk) {
9206                 tracepoint_print_iter =
9207                         kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9208                 if (WARN_ON(!tracepoint_print_iter))
9209                         tracepoint_printk = 0;
9210                 else
9211                         static_key_enable(&tracepoint_printk_key.key);
9212         }
9213         tracer_alloc_buffers();
9214 }
9215
9216 void __init trace_init(void)
9217 {
9218         trace_event_init();
9219 }
9220
9221 __init static int clear_boot_tracer(void)
9222 {
9223         /*
9224          * The default tracer at boot buffer is an init section.
9225          * This function is called in lateinit. If we did not
9226          * find the boot tracer, then clear it out, to prevent
9227          * later registration from accessing the buffer that is
9228          * about to be freed.
9229          */
9230         if (!default_bootup_tracer)
9231                 return 0;
9232
9233         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9234                default_bootup_tracer);
9235         default_bootup_tracer = NULL;
9236
9237         return 0;
9238 }
9239
9240 fs_initcall(tracer_init_tracefs);
9241 late_initcall_sync(clear_boot_tracer);
9242
9243 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9244 __init static int tracing_set_default_clock(void)
9245 {
9246         /* sched_clock_stable() is determined in late_initcall */
9247         if (!trace_boot_clock && !sched_clock_stable()) {
9248                 printk(KERN_WARNING
9249                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
9250                        "If you want to keep using the local clock, then add:\n"
9251                        "  \"trace_clock=local\"\n"
9252                        "on the kernel command line\n");
9253                 tracing_set_clock(&global_trace, "global");
9254         }
9255
9256         return 0;
9257 }
9258 late_initcall_sync(tracing_set_default_clock);
9259 #endif