Merge tag 'sched-urgent-2024-06-02' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-block.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72
73 void __init disable_tracing_selftest(const char *reason)
74 {
75         if (!tracing_selftest_disabled) {
76                 tracing_selftest_disabled = true;
77                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
78         }
79 }
80 #else
81 #define tracing_selftest_running        0
82 #define tracing_selftest_disabled       0
83 #endif
84
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  * Set instance name if you want to dump the specific trace instance
134  * Multiple instance dump is also supported, and instances are seperated
135  * by commas.
136  */
137 /* Set to string format zero to disable by default */
138 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
139
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
142
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
146         struct module                   *mod;
147         unsigned long                   length;
148 };
149
150 union trace_eval_map_item;
151
152 struct trace_eval_map_tail {
153         /*
154          * "end" is first and points to NULL as it must be different
155          * than "mod" or "eval_string"
156          */
157         union trace_eval_map_item       *next;
158         const char                      *end;   /* points to NULL */
159 };
160
161 static DEFINE_MUTEX(trace_eval_mutex);
162
163 /*
164  * The trace_eval_maps are saved in an array with two extra elements,
165  * one at the beginning, and one at the end. The beginning item contains
166  * the count of the saved maps (head.length), and the module they
167  * belong to if not built in (head.mod). The ending item contains a
168  * pointer to the next array of saved eval_map items.
169  */
170 union trace_eval_map_item {
171         struct trace_eval_map           map;
172         struct trace_eval_map_head      head;
173         struct trace_eval_map_tail      tail;
174 };
175
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181                                    struct trace_buffer *buffer,
182                                    unsigned int trace_ctx);
183
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189
190 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
191 static int boot_instance_index;
192
193 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_snapshot_index;
195
196 static int __init set_cmdline_ftrace(char *str)
197 {
198         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
199         default_bootup_tracer = bootup_tracer_buf;
200         /* We are using ftrace early, expand it */
201         trace_set_ring_buffer_expanded(NULL);
202         return 1;
203 }
204 __setup("ftrace=", set_cmdline_ftrace);
205
206 int ftrace_dump_on_oops_enabled(void)
207 {
208         if (!strcmp("0", ftrace_dump_on_oops))
209                 return 0;
210         else
211                 return 1;
212 }
213
214 static int __init set_ftrace_dump_on_oops(char *str)
215 {
216         if (!*str) {
217                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
218                 return 1;
219         }
220
221         if (*str == ',') {
222                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
223                 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
224                 return 1;
225         }
226
227         if (*str++ == '=') {
228                 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
229                 return 1;
230         }
231
232         return 0;
233 }
234 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
235
236 static int __init stop_trace_on_warning(char *str)
237 {
238         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239                 __disable_trace_on_warning = 1;
240         return 1;
241 }
242 __setup("traceoff_on_warning", stop_trace_on_warning);
243
244 static int __init boot_alloc_snapshot(char *str)
245 {
246         char *slot = boot_snapshot_info + boot_snapshot_index;
247         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
248         int ret;
249
250         if (str[0] == '=') {
251                 str++;
252                 if (strlen(str) >= left)
253                         return -1;
254
255                 ret = snprintf(slot, left, "%s\t", str);
256                 boot_snapshot_index += ret;
257         } else {
258                 allocate_snapshot = true;
259                 /* We also need the main ring buffer expanded */
260                 trace_set_ring_buffer_expanded(NULL);
261         }
262         return 1;
263 }
264 __setup("alloc_snapshot", boot_alloc_snapshot);
265
266
267 static int __init boot_snapshot(char *str)
268 {
269         snapshot_at_boot = true;
270         boot_alloc_snapshot(str);
271         return 1;
272 }
273 __setup("ftrace_boot_snapshot", boot_snapshot);
274
275
276 static int __init boot_instance(char *str)
277 {
278         char *slot = boot_instance_info + boot_instance_index;
279         int left = sizeof(boot_instance_info) - boot_instance_index;
280         int ret;
281
282         if (strlen(str) >= left)
283                 return -1;
284
285         ret = snprintf(slot, left, "%s\t", str);
286         boot_instance_index += ret;
287
288         return 1;
289 }
290 __setup("trace_instance=", boot_instance);
291
292
293 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
294
295 static int __init set_trace_boot_options(char *str)
296 {
297         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
298         return 1;
299 }
300 __setup("trace_options=", set_trace_boot_options);
301
302 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
303 static char *trace_boot_clock __initdata;
304
305 static int __init set_trace_boot_clock(char *str)
306 {
307         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
308         trace_boot_clock = trace_boot_clock_buf;
309         return 1;
310 }
311 __setup("trace_clock=", set_trace_boot_clock);
312
313 static int __init set_tracepoint_printk(char *str)
314 {
315         /* Ignore the "tp_printk_stop_on_boot" param */
316         if (*str == '_')
317                 return 0;
318
319         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
320                 tracepoint_printk = 1;
321         return 1;
322 }
323 __setup("tp_printk", set_tracepoint_printk);
324
325 static int __init set_tracepoint_printk_stop(char *str)
326 {
327         tracepoint_printk_stop_on_boot = true;
328         return 1;
329 }
330 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
331
332 unsigned long long ns2usecs(u64 nsec)
333 {
334         nsec += 500;
335         do_div(nsec, 1000);
336         return nsec;
337 }
338
339 static void
340 trace_process_export(struct trace_export *export,
341                struct ring_buffer_event *event, int flag)
342 {
343         struct trace_entry *entry;
344         unsigned int size = 0;
345
346         if (export->flags & flag) {
347                 entry = ring_buffer_event_data(event);
348                 size = ring_buffer_event_length(event);
349                 export->write(export, entry, size);
350         }
351 }
352
353 static DEFINE_MUTEX(ftrace_export_lock);
354
355 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
356
357 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
358 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
359 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
360
361 static inline void ftrace_exports_enable(struct trace_export *export)
362 {
363         if (export->flags & TRACE_EXPORT_FUNCTION)
364                 static_branch_inc(&trace_function_exports_enabled);
365
366         if (export->flags & TRACE_EXPORT_EVENT)
367                 static_branch_inc(&trace_event_exports_enabled);
368
369         if (export->flags & TRACE_EXPORT_MARKER)
370                 static_branch_inc(&trace_marker_exports_enabled);
371 }
372
373 static inline void ftrace_exports_disable(struct trace_export *export)
374 {
375         if (export->flags & TRACE_EXPORT_FUNCTION)
376                 static_branch_dec(&trace_function_exports_enabled);
377
378         if (export->flags & TRACE_EXPORT_EVENT)
379                 static_branch_dec(&trace_event_exports_enabled);
380
381         if (export->flags & TRACE_EXPORT_MARKER)
382                 static_branch_dec(&trace_marker_exports_enabled);
383 }
384
385 static void ftrace_exports(struct ring_buffer_event *event, int flag)
386 {
387         struct trace_export *export;
388
389         preempt_disable_notrace();
390
391         export = rcu_dereference_raw_check(ftrace_exports_list);
392         while (export) {
393                 trace_process_export(export, event, flag);
394                 export = rcu_dereference_raw_check(export->next);
395         }
396
397         preempt_enable_notrace();
398 }
399
400 static inline void
401 add_trace_export(struct trace_export **list, struct trace_export *export)
402 {
403         rcu_assign_pointer(export->next, *list);
404         /*
405          * We are entering export into the list but another
406          * CPU might be walking that list. We need to make sure
407          * the export->next pointer is valid before another CPU sees
408          * the export pointer included into the list.
409          */
410         rcu_assign_pointer(*list, export);
411 }
412
413 static inline int
414 rm_trace_export(struct trace_export **list, struct trace_export *export)
415 {
416         struct trace_export **p;
417
418         for (p = list; *p != NULL; p = &(*p)->next)
419                 if (*p == export)
420                         break;
421
422         if (*p != export)
423                 return -1;
424
425         rcu_assign_pointer(*p, (*p)->next);
426
427         return 0;
428 }
429
430 static inline void
431 add_ftrace_export(struct trace_export **list, struct trace_export *export)
432 {
433         ftrace_exports_enable(export);
434
435         add_trace_export(list, export);
436 }
437
438 static inline int
439 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
440 {
441         int ret;
442
443         ret = rm_trace_export(list, export);
444         ftrace_exports_disable(export);
445
446         return ret;
447 }
448
449 int register_ftrace_export(struct trace_export *export)
450 {
451         if (WARN_ON_ONCE(!export->write))
452                 return -1;
453
454         mutex_lock(&ftrace_export_lock);
455
456         add_ftrace_export(&ftrace_exports_list, export);
457
458         mutex_unlock(&ftrace_export_lock);
459
460         return 0;
461 }
462 EXPORT_SYMBOL_GPL(register_ftrace_export);
463
464 int unregister_ftrace_export(struct trace_export *export)
465 {
466         int ret;
467
468         mutex_lock(&ftrace_export_lock);
469
470         ret = rm_ftrace_export(&ftrace_exports_list, export);
471
472         mutex_unlock(&ftrace_export_lock);
473
474         return ret;
475 }
476 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
477
478 /* trace_flags holds trace_options default values */
479 #define TRACE_DEFAULT_FLAGS                                             \
480         (FUNCTION_DEFAULT_FLAGS |                                       \
481          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
482          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
483          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
484          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
485          TRACE_ITER_HASH_PTR)
486
487 /* trace_options that are only supported by global_trace */
488 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
489                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
490
491 /* trace_flags that are default zero for instances */
492 #define ZEROED_TRACE_FLAGS \
493         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
494
495 /*
496  * The global_trace is the descriptor that holds the top-level tracing
497  * buffers for the live tracing.
498  */
499 static struct trace_array global_trace = {
500         .trace_flags = TRACE_DEFAULT_FLAGS,
501 };
502
503 void trace_set_ring_buffer_expanded(struct trace_array *tr)
504 {
505         if (!tr)
506                 tr = &global_trace;
507         tr->ring_buffer_expanded = true;
508 }
509
510 LIST_HEAD(ftrace_trace_arrays);
511
512 int trace_array_get(struct trace_array *this_tr)
513 {
514         struct trace_array *tr;
515         int ret = -ENODEV;
516
517         mutex_lock(&trace_types_lock);
518         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
519                 if (tr == this_tr) {
520                         tr->ref++;
521                         ret = 0;
522                         break;
523                 }
524         }
525         mutex_unlock(&trace_types_lock);
526
527         return ret;
528 }
529
530 static void __trace_array_put(struct trace_array *this_tr)
531 {
532         WARN_ON(!this_tr->ref);
533         this_tr->ref--;
534 }
535
536 /**
537  * trace_array_put - Decrement the reference counter for this trace array.
538  * @this_tr : pointer to the trace array
539  *
540  * NOTE: Use this when we no longer need the trace array returned by
541  * trace_array_get_by_name(). This ensures the trace array can be later
542  * destroyed.
543  *
544  */
545 void trace_array_put(struct trace_array *this_tr)
546 {
547         if (!this_tr)
548                 return;
549
550         mutex_lock(&trace_types_lock);
551         __trace_array_put(this_tr);
552         mutex_unlock(&trace_types_lock);
553 }
554 EXPORT_SYMBOL_GPL(trace_array_put);
555
556 int tracing_check_open_get_tr(struct trace_array *tr)
557 {
558         int ret;
559
560         ret = security_locked_down(LOCKDOWN_TRACEFS);
561         if (ret)
562                 return ret;
563
564         if (tracing_disabled)
565                 return -ENODEV;
566
567         if (tr && trace_array_get(tr) < 0)
568                 return -ENODEV;
569
570         return 0;
571 }
572
573 int call_filter_check_discard(struct trace_event_call *call, void *rec,
574                               struct trace_buffer *buffer,
575                               struct ring_buffer_event *event)
576 {
577         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
578             !filter_match_preds(call->filter, rec)) {
579                 __trace_event_discard_commit(buffer, event);
580                 return 1;
581         }
582
583         return 0;
584 }
585
586 /**
587  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
588  * @filtered_pids: The list of pids to check
589  * @search_pid: The PID to find in @filtered_pids
590  *
591  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
592  */
593 bool
594 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
595 {
596         return trace_pid_list_is_set(filtered_pids, search_pid);
597 }
598
599 /**
600  * trace_ignore_this_task - should a task be ignored for tracing
601  * @filtered_pids: The list of pids to check
602  * @filtered_no_pids: The list of pids not to be traced
603  * @task: The task that should be ignored if not filtered
604  *
605  * Checks if @task should be traced or not from @filtered_pids.
606  * Returns true if @task should *NOT* be traced.
607  * Returns false if @task should be traced.
608  */
609 bool
610 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
611                        struct trace_pid_list *filtered_no_pids,
612                        struct task_struct *task)
613 {
614         /*
615          * If filtered_no_pids is not empty, and the task's pid is listed
616          * in filtered_no_pids, then return true.
617          * Otherwise, if filtered_pids is empty, that means we can
618          * trace all tasks. If it has content, then only trace pids
619          * within filtered_pids.
620          */
621
622         return (filtered_pids &&
623                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
624                 (filtered_no_pids &&
625                  trace_find_filtered_pid(filtered_no_pids, task->pid));
626 }
627
628 /**
629  * trace_filter_add_remove_task - Add or remove a task from a pid_list
630  * @pid_list: The list to modify
631  * @self: The current task for fork or NULL for exit
632  * @task: The task to add or remove
633  *
634  * If adding a task, if @self is defined, the task is only added if @self
635  * is also included in @pid_list. This happens on fork and tasks should
636  * only be added when the parent is listed. If @self is NULL, then the
637  * @task pid will be removed from the list, which would happen on exit
638  * of a task.
639  */
640 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
641                                   struct task_struct *self,
642                                   struct task_struct *task)
643 {
644         if (!pid_list)
645                 return;
646
647         /* For forks, we only add if the forking task is listed */
648         if (self) {
649                 if (!trace_find_filtered_pid(pid_list, self->pid))
650                         return;
651         }
652
653         /* "self" is set for forks, and NULL for exits */
654         if (self)
655                 trace_pid_list_set(pid_list, task->pid);
656         else
657                 trace_pid_list_clear(pid_list, task->pid);
658 }
659
660 /**
661  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
662  * @pid_list: The pid list to show
663  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
664  * @pos: The position of the file
665  *
666  * This is used by the seq_file "next" operation to iterate the pids
667  * listed in a trace_pid_list structure.
668  *
669  * Returns the pid+1 as we want to display pid of zero, but NULL would
670  * stop the iteration.
671  */
672 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
673 {
674         long pid = (unsigned long)v;
675         unsigned int next;
676
677         (*pos)++;
678
679         /* pid already is +1 of the actual previous bit */
680         if (trace_pid_list_next(pid_list, pid, &next) < 0)
681                 return NULL;
682
683         pid = next;
684
685         /* Return pid + 1 to allow zero to be represented */
686         return (void *)(pid + 1);
687 }
688
689 /**
690  * trace_pid_start - Used for seq_file to start reading pid lists
691  * @pid_list: The pid list to show
692  * @pos: The position of the file
693  *
694  * This is used by seq_file "start" operation to start the iteration
695  * of listing pids.
696  *
697  * Returns the pid+1 as we want to display pid of zero, but NULL would
698  * stop the iteration.
699  */
700 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
701 {
702         unsigned long pid;
703         unsigned int first;
704         loff_t l = 0;
705
706         if (trace_pid_list_first(pid_list, &first) < 0)
707                 return NULL;
708
709         pid = first;
710
711         /* Return pid + 1 so that zero can be the exit value */
712         for (pid++; pid && l < *pos;
713              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
714                 ;
715         return (void *)pid;
716 }
717
718 /**
719  * trace_pid_show - show the current pid in seq_file processing
720  * @m: The seq_file structure to write into
721  * @v: A void pointer of the pid (+1) value to display
722  *
723  * Can be directly used by seq_file operations to display the current
724  * pid value.
725  */
726 int trace_pid_show(struct seq_file *m, void *v)
727 {
728         unsigned long pid = (unsigned long)v - 1;
729
730         seq_printf(m, "%lu\n", pid);
731         return 0;
732 }
733
734 /* 128 should be much more than enough */
735 #define PID_BUF_SIZE            127
736
737 int trace_pid_write(struct trace_pid_list *filtered_pids,
738                     struct trace_pid_list **new_pid_list,
739                     const char __user *ubuf, size_t cnt)
740 {
741         struct trace_pid_list *pid_list;
742         struct trace_parser parser;
743         unsigned long val;
744         int nr_pids = 0;
745         ssize_t read = 0;
746         ssize_t ret;
747         loff_t pos;
748         pid_t pid;
749
750         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
751                 return -ENOMEM;
752
753         /*
754          * Always recreate a new array. The write is an all or nothing
755          * operation. Always create a new array when adding new pids by
756          * the user. If the operation fails, then the current list is
757          * not modified.
758          */
759         pid_list = trace_pid_list_alloc();
760         if (!pid_list) {
761                 trace_parser_put(&parser);
762                 return -ENOMEM;
763         }
764
765         if (filtered_pids) {
766                 /* copy the current bits to the new max */
767                 ret = trace_pid_list_first(filtered_pids, &pid);
768                 while (!ret) {
769                         trace_pid_list_set(pid_list, pid);
770                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
771                         nr_pids++;
772                 }
773         }
774
775         ret = 0;
776         while (cnt > 0) {
777
778                 pos = 0;
779
780                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
781                 if (ret < 0)
782                         break;
783
784                 read += ret;
785                 ubuf += ret;
786                 cnt -= ret;
787
788                 if (!trace_parser_loaded(&parser))
789                         break;
790
791                 ret = -EINVAL;
792                 if (kstrtoul(parser.buffer, 0, &val))
793                         break;
794
795                 pid = (pid_t)val;
796
797                 if (trace_pid_list_set(pid_list, pid) < 0) {
798                         ret = -1;
799                         break;
800                 }
801                 nr_pids++;
802
803                 trace_parser_clear(&parser);
804                 ret = 0;
805         }
806         trace_parser_put(&parser);
807
808         if (ret < 0) {
809                 trace_pid_list_free(pid_list);
810                 return ret;
811         }
812
813         if (!nr_pids) {
814                 /* Cleared the list of pids */
815                 trace_pid_list_free(pid_list);
816                 pid_list = NULL;
817         }
818
819         *new_pid_list = pid_list;
820
821         return read;
822 }
823
824 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
825 {
826         u64 ts;
827
828         /* Early boot up does not have a buffer yet */
829         if (!buf->buffer)
830                 return trace_clock_local();
831
832         ts = ring_buffer_time_stamp(buf->buffer);
833         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
834
835         return ts;
836 }
837
838 u64 ftrace_now(int cpu)
839 {
840         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
841 }
842
843 /**
844  * tracing_is_enabled - Show if global_trace has been enabled
845  *
846  * Shows if the global trace has been enabled or not. It uses the
847  * mirror flag "buffer_disabled" to be used in fast paths such as for
848  * the irqsoff tracer. But it may be inaccurate due to races. If you
849  * need to know the accurate state, use tracing_is_on() which is a little
850  * slower, but accurate.
851  */
852 int tracing_is_enabled(void)
853 {
854         /*
855          * For quick access (irqsoff uses this in fast path), just
856          * return the mirror variable of the state of the ring buffer.
857          * It's a little racy, but we don't really care.
858          */
859         smp_rmb();
860         return !global_trace.buffer_disabled;
861 }
862
863 /*
864  * trace_buf_size is the size in bytes that is allocated
865  * for a buffer. Note, the number of bytes is always rounded
866  * to page size.
867  *
868  * This number is purposely set to a low number of 16384.
869  * If the dump on oops happens, it will be much appreciated
870  * to not have to wait for all that output. Anyway this can be
871  * boot time and run time configurable.
872  */
873 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
874
875 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
876
877 /* trace_types holds a link list of available tracers. */
878 static struct tracer            *trace_types __read_mostly;
879
880 /*
881  * trace_types_lock is used to protect the trace_types list.
882  */
883 DEFINE_MUTEX(trace_types_lock);
884
885 /*
886  * serialize the access of the ring buffer
887  *
888  * ring buffer serializes readers, but it is low level protection.
889  * The validity of the events (which returns by ring_buffer_peek() ..etc)
890  * are not protected by ring buffer.
891  *
892  * The content of events may become garbage if we allow other process consumes
893  * these events concurrently:
894  *   A) the page of the consumed events may become a normal page
895  *      (not reader page) in ring buffer, and this page will be rewritten
896  *      by events producer.
897  *   B) The page of the consumed events may become a page for splice_read,
898  *      and this page will be returned to system.
899  *
900  * These primitives allow multi process access to different cpu ring buffer
901  * concurrently.
902  *
903  * These primitives don't distinguish read-only and read-consume access.
904  * Multi read-only access are also serialized.
905  */
906
907 #ifdef CONFIG_SMP
908 static DECLARE_RWSEM(all_cpu_access_lock);
909 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
910
911 static inline void trace_access_lock(int cpu)
912 {
913         if (cpu == RING_BUFFER_ALL_CPUS) {
914                 /* gain it for accessing the whole ring buffer. */
915                 down_write(&all_cpu_access_lock);
916         } else {
917                 /* gain it for accessing a cpu ring buffer. */
918
919                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
920                 down_read(&all_cpu_access_lock);
921
922                 /* Secondly block other access to this @cpu ring buffer. */
923                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
924         }
925 }
926
927 static inline void trace_access_unlock(int cpu)
928 {
929         if (cpu == RING_BUFFER_ALL_CPUS) {
930                 up_write(&all_cpu_access_lock);
931         } else {
932                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
933                 up_read(&all_cpu_access_lock);
934         }
935 }
936
937 static inline void trace_access_lock_init(void)
938 {
939         int cpu;
940
941         for_each_possible_cpu(cpu)
942                 mutex_init(&per_cpu(cpu_access_lock, cpu));
943 }
944
945 #else
946
947 static DEFINE_MUTEX(access_lock);
948
949 static inline void trace_access_lock(int cpu)
950 {
951         (void)cpu;
952         mutex_lock(&access_lock);
953 }
954
955 static inline void trace_access_unlock(int cpu)
956 {
957         (void)cpu;
958         mutex_unlock(&access_lock);
959 }
960
961 static inline void trace_access_lock_init(void)
962 {
963 }
964
965 #endif
966
967 #ifdef CONFIG_STACKTRACE
968 static void __ftrace_trace_stack(struct trace_buffer *buffer,
969                                  unsigned int trace_ctx,
970                                  int skip, struct pt_regs *regs);
971 static inline void ftrace_trace_stack(struct trace_array *tr,
972                                       struct trace_buffer *buffer,
973                                       unsigned int trace_ctx,
974                                       int skip, struct pt_regs *regs);
975
976 #else
977 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
978                                         unsigned int trace_ctx,
979                                         int skip, struct pt_regs *regs)
980 {
981 }
982 static inline void ftrace_trace_stack(struct trace_array *tr,
983                                       struct trace_buffer *buffer,
984                                       unsigned long trace_ctx,
985                                       int skip, struct pt_regs *regs)
986 {
987 }
988
989 #endif
990
991 static __always_inline void
992 trace_event_setup(struct ring_buffer_event *event,
993                   int type, unsigned int trace_ctx)
994 {
995         struct trace_entry *ent = ring_buffer_event_data(event);
996
997         tracing_generic_entry_update(ent, type, trace_ctx);
998 }
999
1000 static __always_inline struct ring_buffer_event *
1001 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1002                           int type,
1003                           unsigned long len,
1004                           unsigned int trace_ctx)
1005 {
1006         struct ring_buffer_event *event;
1007
1008         event = ring_buffer_lock_reserve(buffer, len);
1009         if (event != NULL)
1010                 trace_event_setup(event, type, trace_ctx);
1011
1012         return event;
1013 }
1014
1015 void tracer_tracing_on(struct trace_array *tr)
1016 {
1017         if (tr->array_buffer.buffer)
1018                 ring_buffer_record_on(tr->array_buffer.buffer);
1019         /*
1020          * This flag is looked at when buffers haven't been allocated
1021          * yet, or by some tracers (like irqsoff), that just want to
1022          * know if the ring buffer has been disabled, but it can handle
1023          * races of where it gets disabled but we still do a record.
1024          * As the check is in the fast path of the tracers, it is more
1025          * important to be fast than accurate.
1026          */
1027         tr->buffer_disabled = 0;
1028         /* Make the flag seen by readers */
1029         smp_wmb();
1030 }
1031
1032 /**
1033  * tracing_on - enable tracing buffers
1034  *
1035  * This function enables tracing buffers that may have been
1036  * disabled with tracing_off.
1037  */
1038 void tracing_on(void)
1039 {
1040         tracer_tracing_on(&global_trace);
1041 }
1042 EXPORT_SYMBOL_GPL(tracing_on);
1043
1044
1045 static __always_inline void
1046 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1047 {
1048         __this_cpu_write(trace_taskinfo_save, true);
1049
1050         /* If this is the temp buffer, we need to commit fully */
1051         if (this_cpu_read(trace_buffered_event) == event) {
1052                 /* Length is in event->array[0] */
1053                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1054                 /* Release the temp buffer */
1055                 this_cpu_dec(trace_buffered_event_cnt);
1056                 /* ring_buffer_unlock_commit() enables preemption */
1057                 preempt_enable_notrace();
1058         } else
1059                 ring_buffer_unlock_commit(buffer);
1060 }
1061
1062 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1063                        const char *str, int size)
1064 {
1065         struct ring_buffer_event *event;
1066         struct trace_buffer *buffer;
1067         struct print_entry *entry;
1068         unsigned int trace_ctx;
1069         int alloc;
1070
1071         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1072                 return 0;
1073
1074         if (unlikely(tracing_selftest_running && tr == &global_trace))
1075                 return 0;
1076
1077         if (unlikely(tracing_disabled))
1078                 return 0;
1079
1080         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1081
1082         trace_ctx = tracing_gen_ctx();
1083         buffer = tr->array_buffer.buffer;
1084         ring_buffer_nest_start(buffer);
1085         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1086                                             trace_ctx);
1087         if (!event) {
1088                 size = 0;
1089                 goto out;
1090         }
1091
1092         entry = ring_buffer_event_data(event);
1093         entry->ip = ip;
1094
1095         memcpy(&entry->buf, str, size);
1096
1097         /* Add a newline if necessary */
1098         if (entry->buf[size - 1] != '\n') {
1099                 entry->buf[size] = '\n';
1100                 entry->buf[size + 1] = '\0';
1101         } else
1102                 entry->buf[size] = '\0';
1103
1104         __buffer_unlock_commit(buffer, event);
1105         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1106  out:
1107         ring_buffer_nest_end(buffer);
1108         return size;
1109 }
1110 EXPORT_SYMBOL_GPL(__trace_array_puts);
1111
1112 /**
1113  * __trace_puts - write a constant string into the trace buffer.
1114  * @ip:    The address of the caller
1115  * @str:   The constant string to write
1116  * @size:  The size of the string.
1117  */
1118 int __trace_puts(unsigned long ip, const char *str, int size)
1119 {
1120         return __trace_array_puts(&global_trace, ip, str, size);
1121 }
1122 EXPORT_SYMBOL_GPL(__trace_puts);
1123
1124 /**
1125  * __trace_bputs - write the pointer to a constant string into trace buffer
1126  * @ip:    The address of the caller
1127  * @str:   The constant string to write to the buffer to
1128  */
1129 int __trace_bputs(unsigned long ip, const char *str)
1130 {
1131         struct ring_buffer_event *event;
1132         struct trace_buffer *buffer;
1133         struct bputs_entry *entry;
1134         unsigned int trace_ctx;
1135         int size = sizeof(struct bputs_entry);
1136         int ret = 0;
1137
1138         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1139                 return 0;
1140
1141         if (unlikely(tracing_selftest_running || tracing_disabled))
1142                 return 0;
1143
1144         trace_ctx = tracing_gen_ctx();
1145         buffer = global_trace.array_buffer.buffer;
1146
1147         ring_buffer_nest_start(buffer);
1148         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1149                                             trace_ctx);
1150         if (!event)
1151                 goto out;
1152
1153         entry = ring_buffer_event_data(event);
1154         entry->ip                       = ip;
1155         entry->str                      = str;
1156
1157         __buffer_unlock_commit(buffer, event);
1158         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1159
1160         ret = 1;
1161  out:
1162         ring_buffer_nest_end(buffer);
1163         return ret;
1164 }
1165 EXPORT_SYMBOL_GPL(__trace_bputs);
1166
1167 #ifdef CONFIG_TRACER_SNAPSHOT
1168 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1169                                            void *cond_data)
1170 {
1171         struct tracer *tracer = tr->current_trace;
1172         unsigned long flags;
1173
1174         if (in_nmi()) {
1175                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1176                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1177                 return;
1178         }
1179
1180         if (!tr->allocated_snapshot) {
1181                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1182                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1183                 tracer_tracing_off(tr);
1184                 return;
1185         }
1186
1187         /* Note, snapshot can not be used when the tracer uses it */
1188         if (tracer->use_max_tr) {
1189                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1190                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1191                 return;
1192         }
1193
1194         if (tr->mapped) {
1195                 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1196                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1197                 return;
1198         }
1199
1200         local_irq_save(flags);
1201         update_max_tr(tr, current, smp_processor_id(), cond_data);
1202         local_irq_restore(flags);
1203 }
1204
1205 void tracing_snapshot_instance(struct trace_array *tr)
1206 {
1207         tracing_snapshot_instance_cond(tr, NULL);
1208 }
1209
1210 /**
1211  * tracing_snapshot - take a snapshot of the current buffer.
1212  *
1213  * This causes a swap between the snapshot buffer and the current live
1214  * tracing buffer. You can use this to take snapshots of the live
1215  * trace when some condition is triggered, but continue to trace.
1216  *
1217  * Note, make sure to allocate the snapshot with either
1218  * a tracing_snapshot_alloc(), or by doing it manually
1219  * with: echo 1 > /sys/kernel/tracing/snapshot
1220  *
1221  * If the snapshot buffer is not allocated, it will stop tracing.
1222  * Basically making a permanent snapshot.
1223  */
1224 void tracing_snapshot(void)
1225 {
1226         struct trace_array *tr = &global_trace;
1227
1228         tracing_snapshot_instance(tr);
1229 }
1230 EXPORT_SYMBOL_GPL(tracing_snapshot);
1231
1232 /**
1233  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1234  * @tr:         The tracing instance to snapshot
1235  * @cond_data:  The data to be tested conditionally, and possibly saved
1236  *
1237  * This is the same as tracing_snapshot() except that the snapshot is
1238  * conditional - the snapshot will only happen if the
1239  * cond_snapshot.update() implementation receiving the cond_data
1240  * returns true, which means that the trace array's cond_snapshot
1241  * update() operation used the cond_data to determine whether the
1242  * snapshot should be taken, and if it was, presumably saved it along
1243  * with the snapshot.
1244  */
1245 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1246 {
1247         tracing_snapshot_instance_cond(tr, cond_data);
1248 }
1249 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1250
1251 /**
1252  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1253  * @tr:         The tracing instance
1254  *
1255  * When the user enables a conditional snapshot using
1256  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1257  * with the snapshot.  This accessor is used to retrieve it.
1258  *
1259  * Should not be called from cond_snapshot.update(), since it takes
1260  * the tr->max_lock lock, which the code calling
1261  * cond_snapshot.update() has already done.
1262  *
1263  * Returns the cond_data associated with the trace array's snapshot.
1264  */
1265 void *tracing_cond_snapshot_data(struct trace_array *tr)
1266 {
1267         void *cond_data = NULL;
1268
1269         local_irq_disable();
1270         arch_spin_lock(&tr->max_lock);
1271
1272         if (tr->cond_snapshot)
1273                 cond_data = tr->cond_snapshot->cond_data;
1274
1275         arch_spin_unlock(&tr->max_lock);
1276         local_irq_enable();
1277
1278         return cond_data;
1279 }
1280 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1281
1282 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1283                                         struct array_buffer *size_buf, int cpu_id);
1284 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1285
1286 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1287 {
1288         int order;
1289         int ret;
1290
1291         if (!tr->allocated_snapshot) {
1292
1293                 /* Make the snapshot buffer have the same order as main buffer */
1294                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1295                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1296                 if (ret < 0)
1297                         return ret;
1298
1299                 /* allocate spare buffer */
1300                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1301                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1302                 if (ret < 0)
1303                         return ret;
1304
1305                 tr->allocated_snapshot = true;
1306         }
1307
1308         return 0;
1309 }
1310
1311 static void free_snapshot(struct trace_array *tr)
1312 {
1313         /*
1314          * We don't free the ring buffer. instead, resize it because
1315          * The max_tr ring buffer has some state (e.g. ring->clock) and
1316          * we want preserve it.
1317          */
1318         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1319         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1320         set_buffer_entries(&tr->max_buffer, 1);
1321         tracing_reset_online_cpus(&tr->max_buffer);
1322         tr->allocated_snapshot = false;
1323 }
1324
1325 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1326 {
1327         int ret;
1328
1329         lockdep_assert_held(&trace_types_lock);
1330
1331         spin_lock(&tr->snapshot_trigger_lock);
1332         if (tr->snapshot == UINT_MAX || tr->mapped) {
1333                 spin_unlock(&tr->snapshot_trigger_lock);
1334                 return -EBUSY;
1335         }
1336
1337         tr->snapshot++;
1338         spin_unlock(&tr->snapshot_trigger_lock);
1339
1340         ret = tracing_alloc_snapshot_instance(tr);
1341         if (ret) {
1342                 spin_lock(&tr->snapshot_trigger_lock);
1343                 tr->snapshot--;
1344                 spin_unlock(&tr->snapshot_trigger_lock);
1345         }
1346
1347         return ret;
1348 }
1349
1350 int tracing_arm_snapshot(struct trace_array *tr)
1351 {
1352         int ret;
1353
1354         mutex_lock(&trace_types_lock);
1355         ret = tracing_arm_snapshot_locked(tr);
1356         mutex_unlock(&trace_types_lock);
1357
1358         return ret;
1359 }
1360
1361 void tracing_disarm_snapshot(struct trace_array *tr)
1362 {
1363         spin_lock(&tr->snapshot_trigger_lock);
1364         if (!WARN_ON(!tr->snapshot))
1365                 tr->snapshot--;
1366         spin_unlock(&tr->snapshot_trigger_lock);
1367 }
1368
1369 /**
1370  * tracing_alloc_snapshot - allocate snapshot buffer.
1371  *
1372  * This only allocates the snapshot buffer if it isn't already
1373  * allocated - it doesn't also take a snapshot.
1374  *
1375  * This is meant to be used in cases where the snapshot buffer needs
1376  * to be set up for events that can't sleep but need to be able to
1377  * trigger a snapshot.
1378  */
1379 int tracing_alloc_snapshot(void)
1380 {
1381         struct trace_array *tr = &global_trace;
1382         int ret;
1383
1384         ret = tracing_alloc_snapshot_instance(tr);
1385         WARN_ON(ret < 0);
1386
1387         return ret;
1388 }
1389 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1390
1391 /**
1392  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1393  *
1394  * This is similar to tracing_snapshot(), but it will allocate the
1395  * snapshot buffer if it isn't already allocated. Use this only
1396  * where it is safe to sleep, as the allocation may sleep.
1397  *
1398  * This causes a swap between the snapshot buffer and the current live
1399  * tracing buffer. You can use this to take snapshots of the live
1400  * trace when some condition is triggered, but continue to trace.
1401  */
1402 void tracing_snapshot_alloc(void)
1403 {
1404         int ret;
1405
1406         ret = tracing_alloc_snapshot();
1407         if (ret < 0)
1408                 return;
1409
1410         tracing_snapshot();
1411 }
1412 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1413
1414 /**
1415  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1416  * @tr:         The tracing instance
1417  * @cond_data:  User data to associate with the snapshot
1418  * @update:     Implementation of the cond_snapshot update function
1419  *
1420  * Check whether the conditional snapshot for the given instance has
1421  * already been enabled, or if the current tracer is already using a
1422  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1423  * save the cond_data and update function inside.
1424  *
1425  * Returns 0 if successful, error otherwise.
1426  */
1427 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1428                                  cond_update_fn_t update)
1429 {
1430         struct cond_snapshot *cond_snapshot;
1431         int ret = 0;
1432
1433         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1434         if (!cond_snapshot)
1435                 return -ENOMEM;
1436
1437         cond_snapshot->cond_data = cond_data;
1438         cond_snapshot->update = update;
1439
1440         mutex_lock(&trace_types_lock);
1441
1442         if (tr->current_trace->use_max_tr) {
1443                 ret = -EBUSY;
1444                 goto fail_unlock;
1445         }
1446
1447         /*
1448          * The cond_snapshot can only change to NULL without the
1449          * trace_types_lock. We don't care if we race with it going
1450          * to NULL, but we want to make sure that it's not set to
1451          * something other than NULL when we get here, which we can
1452          * do safely with only holding the trace_types_lock and not
1453          * having to take the max_lock.
1454          */
1455         if (tr->cond_snapshot) {
1456                 ret = -EBUSY;
1457                 goto fail_unlock;
1458         }
1459
1460         ret = tracing_arm_snapshot_locked(tr);
1461         if (ret)
1462                 goto fail_unlock;
1463
1464         local_irq_disable();
1465         arch_spin_lock(&tr->max_lock);
1466         tr->cond_snapshot = cond_snapshot;
1467         arch_spin_unlock(&tr->max_lock);
1468         local_irq_enable();
1469
1470         mutex_unlock(&trace_types_lock);
1471
1472         return ret;
1473
1474  fail_unlock:
1475         mutex_unlock(&trace_types_lock);
1476         kfree(cond_snapshot);
1477         return ret;
1478 }
1479 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1480
1481 /**
1482  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1483  * @tr:         The tracing instance
1484  *
1485  * Check whether the conditional snapshot for the given instance is
1486  * enabled; if so, free the cond_snapshot associated with it,
1487  * otherwise return -EINVAL.
1488  *
1489  * Returns 0 if successful, error otherwise.
1490  */
1491 int tracing_snapshot_cond_disable(struct trace_array *tr)
1492 {
1493         int ret = 0;
1494
1495         local_irq_disable();
1496         arch_spin_lock(&tr->max_lock);
1497
1498         if (!tr->cond_snapshot)
1499                 ret = -EINVAL;
1500         else {
1501                 kfree(tr->cond_snapshot);
1502                 tr->cond_snapshot = NULL;
1503         }
1504
1505         arch_spin_unlock(&tr->max_lock);
1506         local_irq_enable();
1507
1508         tracing_disarm_snapshot(tr);
1509
1510         return ret;
1511 }
1512 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1513 #else
1514 void tracing_snapshot(void)
1515 {
1516         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1517 }
1518 EXPORT_SYMBOL_GPL(tracing_snapshot);
1519 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1520 {
1521         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1522 }
1523 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1524 int tracing_alloc_snapshot(void)
1525 {
1526         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1527         return -ENODEV;
1528 }
1529 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1530 void tracing_snapshot_alloc(void)
1531 {
1532         /* Give warning */
1533         tracing_snapshot();
1534 }
1535 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1536 void *tracing_cond_snapshot_data(struct trace_array *tr)
1537 {
1538         return NULL;
1539 }
1540 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1541 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1542 {
1543         return -ENODEV;
1544 }
1545 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1546 int tracing_snapshot_cond_disable(struct trace_array *tr)
1547 {
1548         return false;
1549 }
1550 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1551 #define free_snapshot(tr)       do { } while (0)
1552 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1553 #endif /* CONFIG_TRACER_SNAPSHOT */
1554
1555 void tracer_tracing_off(struct trace_array *tr)
1556 {
1557         if (tr->array_buffer.buffer)
1558                 ring_buffer_record_off(tr->array_buffer.buffer);
1559         /*
1560          * This flag is looked at when buffers haven't been allocated
1561          * yet, or by some tracers (like irqsoff), that just want to
1562          * know if the ring buffer has been disabled, but it can handle
1563          * races of where it gets disabled but we still do a record.
1564          * As the check is in the fast path of the tracers, it is more
1565          * important to be fast than accurate.
1566          */
1567         tr->buffer_disabled = 1;
1568         /* Make the flag seen by readers */
1569         smp_wmb();
1570 }
1571
1572 /**
1573  * tracing_off - turn off tracing buffers
1574  *
1575  * This function stops the tracing buffers from recording data.
1576  * It does not disable any overhead the tracers themselves may
1577  * be causing. This function simply causes all recording to
1578  * the ring buffers to fail.
1579  */
1580 void tracing_off(void)
1581 {
1582         tracer_tracing_off(&global_trace);
1583 }
1584 EXPORT_SYMBOL_GPL(tracing_off);
1585
1586 void disable_trace_on_warning(void)
1587 {
1588         if (__disable_trace_on_warning) {
1589                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1590                         "Disabling tracing due to warning\n");
1591                 tracing_off();
1592         }
1593 }
1594
1595 /**
1596  * tracer_tracing_is_on - show real state of ring buffer enabled
1597  * @tr : the trace array to know if ring buffer is enabled
1598  *
1599  * Shows real state of the ring buffer if it is enabled or not.
1600  */
1601 bool tracer_tracing_is_on(struct trace_array *tr)
1602 {
1603         if (tr->array_buffer.buffer)
1604                 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1605         return !tr->buffer_disabled;
1606 }
1607
1608 /**
1609  * tracing_is_on - show state of ring buffers enabled
1610  */
1611 int tracing_is_on(void)
1612 {
1613         return tracer_tracing_is_on(&global_trace);
1614 }
1615 EXPORT_SYMBOL_GPL(tracing_is_on);
1616
1617 static int __init set_buf_size(char *str)
1618 {
1619         unsigned long buf_size;
1620
1621         if (!str)
1622                 return 0;
1623         buf_size = memparse(str, &str);
1624         /*
1625          * nr_entries can not be zero and the startup
1626          * tests require some buffer space. Therefore
1627          * ensure we have at least 4096 bytes of buffer.
1628          */
1629         trace_buf_size = max(4096UL, buf_size);
1630         return 1;
1631 }
1632 __setup("trace_buf_size=", set_buf_size);
1633
1634 static int __init set_tracing_thresh(char *str)
1635 {
1636         unsigned long threshold;
1637         int ret;
1638
1639         if (!str)
1640                 return 0;
1641         ret = kstrtoul(str, 0, &threshold);
1642         if (ret < 0)
1643                 return 0;
1644         tracing_thresh = threshold * 1000;
1645         return 1;
1646 }
1647 __setup("tracing_thresh=", set_tracing_thresh);
1648
1649 unsigned long nsecs_to_usecs(unsigned long nsecs)
1650 {
1651         return nsecs / 1000;
1652 }
1653
1654 /*
1655  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1656  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1657  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1658  * of strings in the order that the evals (enum) were defined.
1659  */
1660 #undef C
1661 #define C(a, b) b
1662
1663 /* These must match the bit positions in trace_iterator_flags */
1664 static const char *trace_options[] = {
1665         TRACE_FLAGS
1666         NULL
1667 };
1668
1669 static struct {
1670         u64 (*func)(void);
1671         const char *name;
1672         int in_ns;              /* is this clock in nanoseconds? */
1673 } trace_clocks[] = {
1674         { trace_clock_local,            "local",        1 },
1675         { trace_clock_global,           "global",       1 },
1676         { trace_clock_counter,          "counter",      0 },
1677         { trace_clock_jiffies,          "uptime",       0 },
1678         { trace_clock,                  "perf",         1 },
1679         { ktime_get_mono_fast_ns,       "mono",         1 },
1680         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1681         { ktime_get_boot_fast_ns,       "boot",         1 },
1682         { ktime_get_tai_fast_ns,        "tai",          1 },
1683         ARCH_TRACE_CLOCKS
1684 };
1685
1686 bool trace_clock_in_ns(struct trace_array *tr)
1687 {
1688         if (trace_clocks[tr->clock_id].in_ns)
1689                 return true;
1690
1691         return false;
1692 }
1693
1694 /*
1695  * trace_parser_get_init - gets the buffer for trace parser
1696  */
1697 int trace_parser_get_init(struct trace_parser *parser, int size)
1698 {
1699         memset(parser, 0, sizeof(*parser));
1700
1701         parser->buffer = kmalloc(size, GFP_KERNEL);
1702         if (!parser->buffer)
1703                 return 1;
1704
1705         parser->size = size;
1706         return 0;
1707 }
1708
1709 /*
1710  * trace_parser_put - frees the buffer for trace parser
1711  */
1712 void trace_parser_put(struct trace_parser *parser)
1713 {
1714         kfree(parser->buffer);
1715         parser->buffer = NULL;
1716 }
1717
1718 /*
1719  * trace_get_user - reads the user input string separated by  space
1720  * (matched by isspace(ch))
1721  *
1722  * For each string found the 'struct trace_parser' is updated,
1723  * and the function returns.
1724  *
1725  * Returns number of bytes read.
1726  *
1727  * See kernel/trace/trace.h for 'struct trace_parser' details.
1728  */
1729 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1730         size_t cnt, loff_t *ppos)
1731 {
1732         char ch;
1733         size_t read = 0;
1734         ssize_t ret;
1735
1736         if (!*ppos)
1737                 trace_parser_clear(parser);
1738
1739         ret = get_user(ch, ubuf++);
1740         if (ret)
1741                 goto out;
1742
1743         read++;
1744         cnt--;
1745
1746         /*
1747          * The parser is not finished with the last write,
1748          * continue reading the user input without skipping spaces.
1749          */
1750         if (!parser->cont) {
1751                 /* skip white space */
1752                 while (cnt && isspace(ch)) {
1753                         ret = get_user(ch, ubuf++);
1754                         if (ret)
1755                                 goto out;
1756                         read++;
1757                         cnt--;
1758                 }
1759
1760                 parser->idx = 0;
1761
1762                 /* only spaces were written */
1763                 if (isspace(ch) || !ch) {
1764                         *ppos += read;
1765                         ret = read;
1766                         goto out;
1767                 }
1768         }
1769
1770         /* read the non-space input */
1771         while (cnt && !isspace(ch) && ch) {
1772                 if (parser->idx < parser->size - 1)
1773                         parser->buffer[parser->idx++] = ch;
1774                 else {
1775                         ret = -EINVAL;
1776                         goto out;
1777                 }
1778                 ret = get_user(ch, ubuf++);
1779                 if (ret)
1780                         goto out;
1781                 read++;
1782                 cnt--;
1783         }
1784
1785         /* We either got finished input or we have to wait for another call. */
1786         if (isspace(ch) || !ch) {
1787                 parser->buffer[parser->idx] = 0;
1788                 parser->cont = false;
1789         } else if (parser->idx < parser->size - 1) {
1790                 parser->cont = true;
1791                 parser->buffer[parser->idx++] = ch;
1792                 /* Make sure the parsed string always terminates with '\0'. */
1793                 parser->buffer[parser->idx] = 0;
1794         } else {
1795                 ret = -EINVAL;
1796                 goto out;
1797         }
1798
1799         *ppos += read;
1800         ret = read;
1801
1802 out:
1803         return ret;
1804 }
1805
1806 /* TODO add a seq_buf_to_buffer() */
1807 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1808 {
1809         int len;
1810
1811         if (trace_seq_used(s) <= s->readpos)
1812                 return -EBUSY;
1813
1814         len = trace_seq_used(s) - s->readpos;
1815         if (cnt > len)
1816                 cnt = len;
1817         memcpy(buf, s->buffer + s->readpos, cnt);
1818
1819         s->readpos += cnt;
1820         return cnt;
1821 }
1822
1823 unsigned long __read_mostly     tracing_thresh;
1824
1825 #ifdef CONFIG_TRACER_MAX_TRACE
1826 static const struct file_operations tracing_max_lat_fops;
1827
1828 #ifdef LATENCY_FS_NOTIFY
1829
1830 static struct workqueue_struct *fsnotify_wq;
1831
1832 static void latency_fsnotify_workfn(struct work_struct *work)
1833 {
1834         struct trace_array *tr = container_of(work, struct trace_array,
1835                                               fsnotify_work);
1836         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1837 }
1838
1839 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1840 {
1841         struct trace_array *tr = container_of(iwork, struct trace_array,
1842                                               fsnotify_irqwork);
1843         queue_work(fsnotify_wq, &tr->fsnotify_work);
1844 }
1845
1846 static void trace_create_maxlat_file(struct trace_array *tr,
1847                                      struct dentry *d_tracer)
1848 {
1849         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1850         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1851         tr->d_max_latency = trace_create_file("tracing_max_latency",
1852                                               TRACE_MODE_WRITE,
1853                                               d_tracer, tr,
1854                                               &tracing_max_lat_fops);
1855 }
1856
1857 __init static int latency_fsnotify_init(void)
1858 {
1859         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1860                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1861         if (!fsnotify_wq) {
1862                 pr_err("Unable to allocate tr_max_lat_wq\n");
1863                 return -ENOMEM;
1864         }
1865         return 0;
1866 }
1867
1868 late_initcall_sync(latency_fsnotify_init);
1869
1870 void latency_fsnotify(struct trace_array *tr)
1871 {
1872         if (!fsnotify_wq)
1873                 return;
1874         /*
1875          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1876          * possible that we are called from __schedule() or do_idle(), which
1877          * could cause a deadlock.
1878          */
1879         irq_work_queue(&tr->fsnotify_irqwork);
1880 }
1881
1882 #else /* !LATENCY_FS_NOTIFY */
1883
1884 #define trace_create_maxlat_file(tr, d_tracer)                          \
1885         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1886                           d_tracer, tr, &tracing_max_lat_fops)
1887
1888 #endif
1889
1890 /*
1891  * Copy the new maximum trace into the separate maximum-trace
1892  * structure. (this way the maximum trace is permanently saved,
1893  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1894  */
1895 static void
1896 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1897 {
1898         struct array_buffer *trace_buf = &tr->array_buffer;
1899         struct array_buffer *max_buf = &tr->max_buffer;
1900         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1901         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1902
1903         max_buf->cpu = cpu;
1904         max_buf->time_start = data->preempt_timestamp;
1905
1906         max_data->saved_latency = tr->max_latency;
1907         max_data->critical_start = data->critical_start;
1908         max_data->critical_end = data->critical_end;
1909
1910         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1911         max_data->pid = tsk->pid;
1912         /*
1913          * If tsk == current, then use current_uid(), as that does not use
1914          * RCU. The irq tracer can be called out of RCU scope.
1915          */
1916         if (tsk == current)
1917                 max_data->uid = current_uid();
1918         else
1919                 max_data->uid = task_uid(tsk);
1920
1921         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1922         max_data->policy = tsk->policy;
1923         max_data->rt_priority = tsk->rt_priority;
1924
1925         /* record this tasks comm */
1926         tracing_record_cmdline(tsk);
1927         latency_fsnotify(tr);
1928 }
1929
1930 /**
1931  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1932  * @tr: tracer
1933  * @tsk: the task with the latency
1934  * @cpu: The cpu that initiated the trace.
1935  * @cond_data: User data associated with a conditional snapshot
1936  *
1937  * Flip the buffers between the @tr and the max_tr and record information
1938  * about which task was the cause of this latency.
1939  */
1940 void
1941 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1942               void *cond_data)
1943 {
1944         if (tr->stop_count)
1945                 return;
1946
1947         WARN_ON_ONCE(!irqs_disabled());
1948
1949         if (!tr->allocated_snapshot) {
1950                 /* Only the nop tracer should hit this when disabling */
1951                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1952                 return;
1953         }
1954
1955         arch_spin_lock(&tr->max_lock);
1956
1957         /* Inherit the recordable setting from array_buffer */
1958         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1959                 ring_buffer_record_on(tr->max_buffer.buffer);
1960         else
1961                 ring_buffer_record_off(tr->max_buffer.buffer);
1962
1963 #ifdef CONFIG_TRACER_SNAPSHOT
1964         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1965                 arch_spin_unlock(&tr->max_lock);
1966                 return;
1967         }
1968 #endif
1969         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1970
1971         __update_max_tr(tr, tsk, cpu);
1972
1973         arch_spin_unlock(&tr->max_lock);
1974
1975         /* Any waiters on the old snapshot buffer need to wake up */
1976         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1977 }
1978
1979 /**
1980  * update_max_tr_single - only copy one trace over, and reset the rest
1981  * @tr: tracer
1982  * @tsk: task with the latency
1983  * @cpu: the cpu of the buffer to copy.
1984  *
1985  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1986  */
1987 void
1988 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1989 {
1990         int ret;
1991
1992         if (tr->stop_count)
1993                 return;
1994
1995         WARN_ON_ONCE(!irqs_disabled());
1996         if (!tr->allocated_snapshot) {
1997                 /* Only the nop tracer should hit this when disabling */
1998                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1999                 return;
2000         }
2001
2002         arch_spin_lock(&tr->max_lock);
2003
2004         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2005
2006         if (ret == -EBUSY) {
2007                 /*
2008                  * We failed to swap the buffer due to a commit taking
2009                  * place on this CPU. We fail to record, but we reset
2010                  * the max trace buffer (no one writes directly to it)
2011                  * and flag that it failed.
2012                  * Another reason is resize is in progress.
2013                  */
2014                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2015                         "Failed to swap buffers due to commit or resize in progress\n");
2016         }
2017
2018         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2019
2020         __update_max_tr(tr, tsk, cpu);
2021         arch_spin_unlock(&tr->max_lock);
2022 }
2023
2024 #endif /* CONFIG_TRACER_MAX_TRACE */
2025
2026 struct pipe_wait {
2027         struct trace_iterator           *iter;
2028         int                             wait_index;
2029 };
2030
2031 static bool wait_pipe_cond(void *data)
2032 {
2033         struct pipe_wait *pwait = data;
2034         struct trace_iterator *iter = pwait->iter;
2035
2036         if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2037                 return true;
2038
2039         return iter->closed;
2040 }
2041
2042 static int wait_on_pipe(struct trace_iterator *iter, int full)
2043 {
2044         struct pipe_wait pwait;
2045         int ret;
2046
2047         /* Iterators are static, they should be filled or empty */
2048         if (trace_buffer_iter(iter, iter->cpu_file))
2049                 return 0;
2050
2051         pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2052         pwait.iter = iter;
2053
2054         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2055                                wait_pipe_cond, &pwait);
2056
2057 #ifdef CONFIG_TRACER_MAX_TRACE
2058         /*
2059          * Make sure this is still the snapshot buffer, as if a snapshot were
2060          * to happen, this would now be the main buffer.
2061          */
2062         if (iter->snapshot)
2063                 iter->array_buffer = &iter->tr->max_buffer;
2064 #endif
2065         return ret;
2066 }
2067
2068 #ifdef CONFIG_FTRACE_STARTUP_TEST
2069 static bool selftests_can_run;
2070
2071 struct trace_selftests {
2072         struct list_head                list;
2073         struct tracer                   *type;
2074 };
2075
2076 static LIST_HEAD(postponed_selftests);
2077
2078 static int save_selftest(struct tracer *type)
2079 {
2080         struct trace_selftests *selftest;
2081
2082         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2083         if (!selftest)
2084                 return -ENOMEM;
2085
2086         selftest->type = type;
2087         list_add(&selftest->list, &postponed_selftests);
2088         return 0;
2089 }
2090
2091 static int run_tracer_selftest(struct tracer *type)
2092 {
2093         struct trace_array *tr = &global_trace;
2094         struct tracer *saved_tracer = tr->current_trace;
2095         int ret;
2096
2097         if (!type->selftest || tracing_selftest_disabled)
2098                 return 0;
2099
2100         /*
2101          * If a tracer registers early in boot up (before scheduling is
2102          * initialized and such), then do not run its selftests yet.
2103          * Instead, run it a little later in the boot process.
2104          */
2105         if (!selftests_can_run)
2106                 return save_selftest(type);
2107
2108         if (!tracing_is_on()) {
2109                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2110                         type->name);
2111                 return 0;
2112         }
2113
2114         /*
2115          * Run a selftest on this tracer.
2116          * Here we reset the trace buffer, and set the current
2117          * tracer to be this tracer. The tracer can then run some
2118          * internal tracing to verify that everything is in order.
2119          * If we fail, we do not register this tracer.
2120          */
2121         tracing_reset_online_cpus(&tr->array_buffer);
2122
2123         tr->current_trace = type;
2124
2125 #ifdef CONFIG_TRACER_MAX_TRACE
2126         if (type->use_max_tr) {
2127                 /* If we expanded the buffers, make sure the max is expanded too */
2128                 if (tr->ring_buffer_expanded)
2129                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2130                                            RING_BUFFER_ALL_CPUS);
2131                 tr->allocated_snapshot = true;
2132         }
2133 #endif
2134
2135         /* the test is responsible for initializing and enabling */
2136         pr_info("Testing tracer %s: ", type->name);
2137         ret = type->selftest(type, tr);
2138         /* the test is responsible for resetting too */
2139         tr->current_trace = saved_tracer;
2140         if (ret) {
2141                 printk(KERN_CONT "FAILED!\n");
2142                 /* Add the warning after printing 'FAILED' */
2143                 WARN_ON(1);
2144                 return -1;
2145         }
2146         /* Only reset on passing, to avoid touching corrupted buffers */
2147         tracing_reset_online_cpus(&tr->array_buffer);
2148
2149 #ifdef CONFIG_TRACER_MAX_TRACE
2150         if (type->use_max_tr) {
2151                 tr->allocated_snapshot = false;
2152
2153                 /* Shrink the max buffer again */
2154                 if (tr->ring_buffer_expanded)
2155                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2156                                            RING_BUFFER_ALL_CPUS);
2157         }
2158 #endif
2159
2160         printk(KERN_CONT "PASSED\n");
2161         return 0;
2162 }
2163
2164 static int do_run_tracer_selftest(struct tracer *type)
2165 {
2166         int ret;
2167
2168         /*
2169          * Tests can take a long time, especially if they are run one after the
2170          * other, as does happen during bootup when all the tracers are
2171          * registered. This could cause the soft lockup watchdog to trigger.
2172          */
2173         cond_resched();
2174
2175         tracing_selftest_running = true;
2176         ret = run_tracer_selftest(type);
2177         tracing_selftest_running = false;
2178
2179         return ret;
2180 }
2181
2182 static __init int init_trace_selftests(void)
2183 {
2184         struct trace_selftests *p, *n;
2185         struct tracer *t, **last;
2186         int ret;
2187
2188         selftests_can_run = true;
2189
2190         mutex_lock(&trace_types_lock);
2191
2192         if (list_empty(&postponed_selftests))
2193                 goto out;
2194
2195         pr_info("Running postponed tracer tests:\n");
2196
2197         tracing_selftest_running = true;
2198         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2199                 /* This loop can take minutes when sanitizers are enabled, so
2200                  * lets make sure we allow RCU processing.
2201                  */
2202                 cond_resched();
2203                 ret = run_tracer_selftest(p->type);
2204                 /* If the test fails, then warn and remove from available_tracers */
2205                 if (ret < 0) {
2206                         WARN(1, "tracer: %s failed selftest, disabling\n",
2207                              p->type->name);
2208                         last = &trace_types;
2209                         for (t = trace_types; t; t = t->next) {
2210                                 if (t == p->type) {
2211                                         *last = t->next;
2212                                         break;
2213                                 }
2214                                 last = &t->next;
2215                         }
2216                 }
2217                 list_del(&p->list);
2218                 kfree(p);
2219         }
2220         tracing_selftest_running = false;
2221
2222  out:
2223         mutex_unlock(&trace_types_lock);
2224
2225         return 0;
2226 }
2227 core_initcall(init_trace_selftests);
2228 #else
2229 static inline int run_tracer_selftest(struct tracer *type)
2230 {
2231         return 0;
2232 }
2233 static inline int do_run_tracer_selftest(struct tracer *type)
2234 {
2235         return 0;
2236 }
2237 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2238
2239 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2240
2241 static void __init apply_trace_boot_options(void);
2242
2243 /**
2244  * register_tracer - register a tracer with the ftrace system.
2245  * @type: the plugin for the tracer
2246  *
2247  * Register a new plugin tracer.
2248  */
2249 int __init register_tracer(struct tracer *type)
2250 {
2251         struct tracer *t;
2252         int ret = 0;
2253
2254         if (!type->name) {
2255                 pr_info("Tracer must have a name\n");
2256                 return -1;
2257         }
2258
2259         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2260                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2261                 return -1;
2262         }
2263
2264         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2265                 pr_warn("Can not register tracer %s due to lockdown\n",
2266                            type->name);
2267                 return -EPERM;
2268         }
2269
2270         mutex_lock(&trace_types_lock);
2271
2272         for (t = trace_types; t; t = t->next) {
2273                 if (strcmp(type->name, t->name) == 0) {
2274                         /* already found */
2275                         pr_info("Tracer %s already registered\n",
2276                                 type->name);
2277                         ret = -1;
2278                         goto out;
2279                 }
2280         }
2281
2282         if (!type->set_flag)
2283                 type->set_flag = &dummy_set_flag;
2284         if (!type->flags) {
2285                 /*allocate a dummy tracer_flags*/
2286                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2287                 if (!type->flags) {
2288                         ret = -ENOMEM;
2289                         goto out;
2290                 }
2291                 type->flags->val = 0;
2292                 type->flags->opts = dummy_tracer_opt;
2293         } else
2294                 if (!type->flags->opts)
2295                         type->flags->opts = dummy_tracer_opt;
2296
2297         /* store the tracer for __set_tracer_option */
2298         type->flags->trace = type;
2299
2300         ret = do_run_tracer_selftest(type);
2301         if (ret < 0)
2302                 goto out;
2303
2304         type->next = trace_types;
2305         trace_types = type;
2306         add_tracer_options(&global_trace, type);
2307
2308  out:
2309         mutex_unlock(&trace_types_lock);
2310
2311         if (ret || !default_bootup_tracer)
2312                 goto out_unlock;
2313
2314         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2315                 goto out_unlock;
2316
2317         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2318         /* Do we want this tracer to start on bootup? */
2319         tracing_set_tracer(&global_trace, type->name);
2320         default_bootup_tracer = NULL;
2321
2322         apply_trace_boot_options();
2323
2324         /* disable other selftests, since this will break it. */
2325         disable_tracing_selftest("running a tracer");
2326
2327  out_unlock:
2328         return ret;
2329 }
2330
2331 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2332 {
2333         struct trace_buffer *buffer = buf->buffer;
2334
2335         if (!buffer)
2336                 return;
2337
2338         ring_buffer_record_disable(buffer);
2339
2340         /* Make sure all commits have finished */
2341         synchronize_rcu();
2342         ring_buffer_reset_cpu(buffer, cpu);
2343
2344         ring_buffer_record_enable(buffer);
2345 }
2346
2347 void tracing_reset_online_cpus(struct array_buffer *buf)
2348 {
2349         struct trace_buffer *buffer = buf->buffer;
2350
2351         if (!buffer)
2352                 return;
2353
2354         ring_buffer_record_disable(buffer);
2355
2356         /* Make sure all commits have finished */
2357         synchronize_rcu();
2358
2359         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2360
2361         ring_buffer_reset_online_cpus(buffer);
2362
2363         ring_buffer_record_enable(buffer);
2364 }
2365
2366 /* Must have trace_types_lock held */
2367 void tracing_reset_all_online_cpus_unlocked(void)
2368 {
2369         struct trace_array *tr;
2370
2371         lockdep_assert_held(&trace_types_lock);
2372
2373         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2374                 if (!tr->clear_trace)
2375                         continue;
2376                 tr->clear_trace = false;
2377                 tracing_reset_online_cpus(&tr->array_buffer);
2378 #ifdef CONFIG_TRACER_MAX_TRACE
2379                 tracing_reset_online_cpus(&tr->max_buffer);
2380 #endif
2381         }
2382 }
2383
2384 void tracing_reset_all_online_cpus(void)
2385 {
2386         mutex_lock(&trace_types_lock);
2387         tracing_reset_all_online_cpus_unlocked();
2388         mutex_unlock(&trace_types_lock);
2389 }
2390
2391 int is_tracing_stopped(void)
2392 {
2393         return global_trace.stop_count;
2394 }
2395
2396 static void tracing_start_tr(struct trace_array *tr)
2397 {
2398         struct trace_buffer *buffer;
2399         unsigned long flags;
2400
2401         if (tracing_disabled)
2402                 return;
2403
2404         raw_spin_lock_irqsave(&tr->start_lock, flags);
2405         if (--tr->stop_count) {
2406                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2407                         /* Someone screwed up their debugging */
2408                         tr->stop_count = 0;
2409                 }
2410                 goto out;
2411         }
2412
2413         /* Prevent the buffers from switching */
2414         arch_spin_lock(&tr->max_lock);
2415
2416         buffer = tr->array_buffer.buffer;
2417         if (buffer)
2418                 ring_buffer_record_enable(buffer);
2419
2420 #ifdef CONFIG_TRACER_MAX_TRACE
2421         buffer = tr->max_buffer.buffer;
2422         if (buffer)
2423                 ring_buffer_record_enable(buffer);
2424 #endif
2425
2426         arch_spin_unlock(&tr->max_lock);
2427
2428  out:
2429         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2430 }
2431
2432 /**
2433  * tracing_start - quick start of the tracer
2434  *
2435  * If tracing is enabled but was stopped by tracing_stop,
2436  * this will start the tracer back up.
2437  */
2438 void tracing_start(void)
2439
2440 {
2441         return tracing_start_tr(&global_trace);
2442 }
2443
2444 static void tracing_stop_tr(struct trace_array *tr)
2445 {
2446         struct trace_buffer *buffer;
2447         unsigned long flags;
2448
2449         raw_spin_lock_irqsave(&tr->start_lock, flags);
2450         if (tr->stop_count++)
2451                 goto out;
2452
2453         /* Prevent the buffers from switching */
2454         arch_spin_lock(&tr->max_lock);
2455
2456         buffer = tr->array_buffer.buffer;
2457         if (buffer)
2458                 ring_buffer_record_disable(buffer);
2459
2460 #ifdef CONFIG_TRACER_MAX_TRACE
2461         buffer = tr->max_buffer.buffer;
2462         if (buffer)
2463                 ring_buffer_record_disable(buffer);
2464 #endif
2465
2466         arch_spin_unlock(&tr->max_lock);
2467
2468  out:
2469         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2470 }
2471
2472 /**
2473  * tracing_stop - quick stop of the tracer
2474  *
2475  * Light weight way to stop tracing. Use in conjunction with
2476  * tracing_start.
2477  */
2478 void tracing_stop(void)
2479 {
2480         return tracing_stop_tr(&global_trace);
2481 }
2482
2483 /*
2484  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2485  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2486  * simplifies those functions and keeps them in sync.
2487  */
2488 enum print_line_t trace_handle_return(struct trace_seq *s)
2489 {
2490         return trace_seq_has_overflowed(s) ?
2491                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2492 }
2493 EXPORT_SYMBOL_GPL(trace_handle_return);
2494
2495 static unsigned short migration_disable_value(void)
2496 {
2497 #if defined(CONFIG_SMP)
2498         return current->migration_disabled;
2499 #else
2500         return 0;
2501 #endif
2502 }
2503
2504 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2505 {
2506         unsigned int trace_flags = irqs_status;
2507         unsigned int pc;
2508
2509         pc = preempt_count();
2510
2511         if (pc & NMI_MASK)
2512                 trace_flags |= TRACE_FLAG_NMI;
2513         if (pc & HARDIRQ_MASK)
2514                 trace_flags |= TRACE_FLAG_HARDIRQ;
2515         if (in_serving_softirq())
2516                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2517         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2518                 trace_flags |= TRACE_FLAG_BH_OFF;
2519
2520         if (tif_need_resched())
2521                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2522         if (test_preempt_need_resched())
2523                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2524         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2525                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2526 }
2527
2528 struct ring_buffer_event *
2529 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2530                           int type,
2531                           unsigned long len,
2532                           unsigned int trace_ctx)
2533 {
2534         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2535 }
2536
2537 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2538 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2539 static int trace_buffered_event_ref;
2540
2541 /**
2542  * trace_buffered_event_enable - enable buffering events
2543  *
2544  * When events are being filtered, it is quicker to use a temporary
2545  * buffer to write the event data into if there's a likely chance
2546  * that it will not be committed. The discard of the ring buffer
2547  * is not as fast as committing, and is much slower than copying
2548  * a commit.
2549  *
2550  * When an event is to be filtered, allocate per cpu buffers to
2551  * write the event data into, and if the event is filtered and discarded
2552  * it is simply dropped, otherwise, the entire data is to be committed
2553  * in one shot.
2554  */
2555 void trace_buffered_event_enable(void)
2556 {
2557         struct ring_buffer_event *event;
2558         struct page *page;
2559         int cpu;
2560
2561         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2562
2563         if (trace_buffered_event_ref++)
2564                 return;
2565
2566         for_each_tracing_cpu(cpu) {
2567                 page = alloc_pages_node(cpu_to_node(cpu),
2568                                         GFP_KERNEL | __GFP_NORETRY, 0);
2569                 /* This is just an optimization and can handle failures */
2570                 if (!page) {
2571                         pr_err("Failed to allocate event buffer\n");
2572                         break;
2573                 }
2574
2575                 event = page_address(page);
2576                 memset(event, 0, sizeof(*event));
2577
2578                 per_cpu(trace_buffered_event, cpu) = event;
2579
2580                 preempt_disable();
2581                 if (cpu == smp_processor_id() &&
2582                     __this_cpu_read(trace_buffered_event) !=
2583                     per_cpu(trace_buffered_event, cpu))
2584                         WARN_ON_ONCE(1);
2585                 preempt_enable();
2586         }
2587 }
2588
2589 static void enable_trace_buffered_event(void *data)
2590 {
2591         /* Probably not needed, but do it anyway */
2592         smp_rmb();
2593         this_cpu_dec(trace_buffered_event_cnt);
2594 }
2595
2596 static void disable_trace_buffered_event(void *data)
2597 {
2598         this_cpu_inc(trace_buffered_event_cnt);
2599 }
2600
2601 /**
2602  * trace_buffered_event_disable - disable buffering events
2603  *
2604  * When a filter is removed, it is faster to not use the buffered
2605  * events, and to commit directly into the ring buffer. Free up
2606  * the temp buffers when there are no more users. This requires
2607  * special synchronization with current events.
2608  */
2609 void trace_buffered_event_disable(void)
2610 {
2611         int cpu;
2612
2613         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2614
2615         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2616                 return;
2617
2618         if (--trace_buffered_event_ref)
2619                 return;
2620
2621         /* For each CPU, set the buffer as used. */
2622         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2623                          NULL, true);
2624
2625         /* Wait for all current users to finish */
2626         synchronize_rcu();
2627
2628         for_each_tracing_cpu(cpu) {
2629                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2630                 per_cpu(trace_buffered_event, cpu) = NULL;
2631         }
2632
2633         /*
2634          * Wait for all CPUs that potentially started checking if they can use
2635          * their event buffer only after the previous synchronize_rcu() call and
2636          * they still read a valid pointer from trace_buffered_event. It must be
2637          * ensured they don't see cleared trace_buffered_event_cnt else they
2638          * could wrongly decide to use the pointed-to buffer which is now freed.
2639          */
2640         synchronize_rcu();
2641
2642         /* For each CPU, relinquish the buffer */
2643         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2644                          true);
2645 }
2646
2647 static struct trace_buffer *temp_buffer;
2648
2649 struct ring_buffer_event *
2650 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2651                           struct trace_event_file *trace_file,
2652                           int type, unsigned long len,
2653                           unsigned int trace_ctx)
2654 {
2655         struct ring_buffer_event *entry;
2656         struct trace_array *tr = trace_file->tr;
2657         int val;
2658
2659         *current_rb = tr->array_buffer.buffer;
2660
2661         if (!tr->no_filter_buffering_ref &&
2662             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2663                 preempt_disable_notrace();
2664                 /*
2665                  * Filtering is on, so try to use the per cpu buffer first.
2666                  * This buffer will simulate a ring_buffer_event,
2667                  * where the type_len is zero and the array[0] will
2668                  * hold the full length.
2669                  * (see include/linux/ring-buffer.h for details on
2670                  *  how the ring_buffer_event is structured).
2671                  *
2672                  * Using a temp buffer during filtering and copying it
2673                  * on a matched filter is quicker than writing directly
2674                  * into the ring buffer and then discarding it when
2675                  * it doesn't match. That is because the discard
2676                  * requires several atomic operations to get right.
2677                  * Copying on match and doing nothing on a failed match
2678                  * is still quicker than no copy on match, but having
2679                  * to discard out of the ring buffer on a failed match.
2680                  */
2681                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2682                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2683
2684                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2685
2686                         /*
2687                          * Preemption is disabled, but interrupts and NMIs
2688                          * can still come in now. If that happens after
2689                          * the above increment, then it will have to go
2690                          * back to the old method of allocating the event
2691                          * on the ring buffer, and if the filter fails, it
2692                          * will have to call ring_buffer_discard_commit()
2693                          * to remove it.
2694                          *
2695                          * Need to also check the unlikely case that the
2696                          * length is bigger than the temp buffer size.
2697                          * If that happens, then the reserve is pretty much
2698                          * guaranteed to fail, as the ring buffer currently
2699                          * only allows events less than a page. But that may
2700                          * change in the future, so let the ring buffer reserve
2701                          * handle the failure in that case.
2702                          */
2703                         if (val == 1 && likely(len <= max_len)) {
2704                                 trace_event_setup(entry, type, trace_ctx);
2705                                 entry->array[0] = len;
2706                                 /* Return with preemption disabled */
2707                                 return entry;
2708                         }
2709                         this_cpu_dec(trace_buffered_event_cnt);
2710                 }
2711                 /* __trace_buffer_lock_reserve() disables preemption */
2712                 preempt_enable_notrace();
2713         }
2714
2715         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2716                                             trace_ctx);
2717         /*
2718          * If tracing is off, but we have triggers enabled
2719          * we still need to look at the event data. Use the temp_buffer
2720          * to store the trace event for the trigger to use. It's recursive
2721          * safe and will not be recorded anywhere.
2722          */
2723         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2724                 *current_rb = temp_buffer;
2725                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2726                                                     trace_ctx);
2727         }
2728         return entry;
2729 }
2730 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2731
2732 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2733 static DEFINE_MUTEX(tracepoint_printk_mutex);
2734
2735 static void output_printk(struct trace_event_buffer *fbuffer)
2736 {
2737         struct trace_event_call *event_call;
2738         struct trace_event_file *file;
2739         struct trace_event *event;
2740         unsigned long flags;
2741         struct trace_iterator *iter = tracepoint_print_iter;
2742
2743         /* We should never get here if iter is NULL */
2744         if (WARN_ON_ONCE(!iter))
2745                 return;
2746
2747         event_call = fbuffer->trace_file->event_call;
2748         if (!event_call || !event_call->event.funcs ||
2749             !event_call->event.funcs->trace)
2750                 return;
2751
2752         file = fbuffer->trace_file;
2753         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2754             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2755              !filter_match_preds(file->filter, fbuffer->entry)))
2756                 return;
2757
2758         event = &fbuffer->trace_file->event_call->event;
2759
2760         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2761         trace_seq_init(&iter->seq);
2762         iter->ent = fbuffer->entry;
2763         event_call->event.funcs->trace(iter, 0, event);
2764         trace_seq_putc(&iter->seq, 0);
2765         printk("%s", iter->seq.buffer);
2766
2767         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2768 }
2769
2770 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2771                              void *buffer, size_t *lenp,
2772                              loff_t *ppos)
2773 {
2774         int save_tracepoint_printk;
2775         int ret;
2776
2777         mutex_lock(&tracepoint_printk_mutex);
2778         save_tracepoint_printk = tracepoint_printk;
2779
2780         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2781
2782         /*
2783          * This will force exiting early, as tracepoint_printk
2784          * is always zero when tracepoint_printk_iter is not allocated
2785          */
2786         if (!tracepoint_print_iter)
2787                 tracepoint_printk = 0;
2788
2789         if (save_tracepoint_printk == tracepoint_printk)
2790                 goto out;
2791
2792         if (tracepoint_printk)
2793                 static_key_enable(&tracepoint_printk_key.key);
2794         else
2795                 static_key_disable(&tracepoint_printk_key.key);
2796
2797  out:
2798         mutex_unlock(&tracepoint_printk_mutex);
2799
2800         return ret;
2801 }
2802
2803 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2804 {
2805         enum event_trigger_type tt = ETT_NONE;
2806         struct trace_event_file *file = fbuffer->trace_file;
2807
2808         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2809                         fbuffer->entry, &tt))
2810                 goto discard;
2811
2812         if (static_key_false(&tracepoint_printk_key.key))
2813                 output_printk(fbuffer);
2814
2815         if (static_branch_unlikely(&trace_event_exports_enabled))
2816                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2817
2818         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2819                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2820
2821 discard:
2822         if (tt)
2823                 event_triggers_post_call(file, tt);
2824
2825 }
2826 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2827
2828 /*
2829  * Skip 3:
2830  *
2831  *   trace_buffer_unlock_commit_regs()
2832  *   trace_event_buffer_commit()
2833  *   trace_event_raw_event_xxx()
2834  */
2835 # define STACK_SKIP 3
2836
2837 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2838                                      struct trace_buffer *buffer,
2839                                      struct ring_buffer_event *event,
2840                                      unsigned int trace_ctx,
2841                                      struct pt_regs *regs)
2842 {
2843         __buffer_unlock_commit(buffer, event);
2844
2845         /*
2846          * If regs is not set, then skip the necessary functions.
2847          * Note, we can still get here via blktrace, wakeup tracer
2848          * and mmiotrace, but that's ok if they lose a function or
2849          * two. They are not that meaningful.
2850          */
2851         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2852         ftrace_trace_userstack(tr, buffer, trace_ctx);
2853 }
2854
2855 /*
2856  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2857  */
2858 void
2859 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2860                                    struct ring_buffer_event *event)
2861 {
2862         __buffer_unlock_commit(buffer, event);
2863 }
2864
2865 void
2866 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2867                parent_ip, unsigned int trace_ctx)
2868 {
2869         struct trace_event_call *call = &event_function;
2870         struct trace_buffer *buffer = tr->array_buffer.buffer;
2871         struct ring_buffer_event *event;
2872         struct ftrace_entry *entry;
2873
2874         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2875                                             trace_ctx);
2876         if (!event)
2877                 return;
2878         entry   = ring_buffer_event_data(event);
2879         entry->ip                       = ip;
2880         entry->parent_ip                = parent_ip;
2881
2882         if (!call_filter_check_discard(call, entry, buffer, event)) {
2883                 if (static_branch_unlikely(&trace_function_exports_enabled))
2884                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2885                 __buffer_unlock_commit(buffer, event);
2886         }
2887 }
2888
2889 #ifdef CONFIG_STACKTRACE
2890
2891 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2892 #define FTRACE_KSTACK_NESTING   4
2893
2894 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2895
2896 struct ftrace_stack {
2897         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
2898 };
2899
2900
2901 struct ftrace_stacks {
2902         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
2903 };
2904
2905 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2906 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2907
2908 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2909                                  unsigned int trace_ctx,
2910                                  int skip, struct pt_regs *regs)
2911 {
2912         struct trace_event_call *call = &event_kernel_stack;
2913         struct ring_buffer_event *event;
2914         unsigned int size, nr_entries;
2915         struct ftrace_stack *fstack;
2916         struct stack_entry *entry;
2917         int stackidx;
2918
2919         /*
2920          * Add one, for this function and the call to save_stack_trace()
2921          * If regs is set, then these functions will not be in the way.
2922          */
2923 #ifndef CONFIG_UNWINDER_ORC
2924         if (!regs)
2925                 skip++;
2926 #endif
2927
2928         preempt_disable_notrace();
2929
2930         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2931
2932         /* This should never happen. If it does, yell once and skip */
2933         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2934                 goto out;
2935
2936         /*
2937          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2938          * interrupt will either see the value pre increment or post
2939          * increment. If the interrupt happens pre increment it will have
2940          * restored the counter when it returns.  We just need a barrier to
2941          * keep gcc from moving things around.
2942          */
2943         barrier();
2944
2945         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2946         size = ARRAY_SIZE(fstack->calls);
2947
2948         if (regs) {
2949                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2950                                                    size, skip);
2951         } else {
2952                 nr_entries = stack_trace_save(fstack->calls, size, skip);
2953         }
2954
2955         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2956                                     struct_size(entry, caller, nr_entries),
2957                                     trace_ctx);
2958         if (!event)
2959                 goto out;
2960         entry = ring_buffer_event_data(event);
2961
2962         entry->size = nr_entries;
2963         memcpy(&entry->caller, fstack->calls,
2964                flex_array_size(entry, caller, nr_entries));
2965
2966         if (!call_filter_check_discard(call, entry, buffer, event))
2967                 __buffer_unlock_commit(buffer, event);
2968
2969  out:
2970         /* Again, don't let gcc optimize things here */
2971         barrier();
2972         __this_cpu_dec(ftrace_stack_reserve);
2973         preempt_enable_notrace();
2974
2975 }
2976
2977 static inline void ftrace_trace_stack(struct trace_array *tr,
2978                                       struct trace_buffer *buffer,
2979                                       unsigned int trace_ctx,
2980                                       int skip, struct pt_regs *regs)
2981 {
2982         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2983                 return;
2984
2985         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
2986 }
2987
2988 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
2989                    int skip)
2990 {
2991         struct trace_buffer *buffer = tr->array_buffer.buffer;
2992
2993         if (rcu_is_watching()) {
2994                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
2995                 return;
2996         }
2997
2998         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
2999                 return;
3000
3001         /*
3002          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3003          * but if the above rcu_is_watching() failed, then the NMI
3004          * triggered someplace critical, and ct_irq_enter() should
3005          * not be called from NMI.
3006          */
3007         if (unlikely(in_nmi()))
3008                 return;
3009
3010         ct_irq_enter_irqson();
3011         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3012         ct_irq_exit_irqson();
3013 }
3014
3015 /**
3016  * trace_dump_stack - record a stack back trace in the trace buffer
3017  * @skip: Number of functions to skip (helper handlers)
3018  */
3019 void trace_dump_stack(int skip)
3020 {
3021         if (tracing_disabled || tracing_selftest_running)
3022                 return;
3023
3024 #ifndef CONFIG_UNWINDER_ORC
3025         /* Skip 1 to skip this function. */
3026         skip++;
3027 #endif
3028         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3029                              tracing_gen_ctx(), skip, NULL);
3030 }
3031 EXPORT_SYMBOL_GPL(trace_dump_stack);
3032
3033 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3034 static DEFINE_PER_CPU(int, user_stack_count);
3035
3036 static void
3037 ftrace_trace_userstack(struct trace_array *tr,
3038                        struct trace_buffer *buffer, unsigned int trace_ctx)
3039 {
3040         struct trace_event_call *call = &event_user_stack;
3041         struct ring_buffer_event *event;
3042         struct userstack_entry *entry;
3043
3044         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3045                 return;
3046
3047         /*
3048          * NMIs can not handle page faults, even with fix ups.
3049          * The save user stack can (and often does) fault.
3050          */
3051         if (unlikely(in_nmi()))
3052                 return;
3053
3054         /*
3055          * prevent recursion, since the user stack tracing may
3056          * trigger other kernel events.
3057          */
3058         preempt_disable();
3059         if (__this_cpu_read(user_stack_count))
3060                 goto out;
3061
3062         __this_cpu_inc(user_stack_count);
3063
3064         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3065                                             sizeof(*entry), trace_ctx);
3066         if (!event)
3067                 goto out_drop_count;
3068         entry   = ring_buffer_event_data(event);
3069
3070         entry->tgid             = current->tgid;
3071         memset(&entry->caller, 0, sizeof(entry->caller));
3072
3073         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3074         if (!call_filter_check_discard(call, entry, buffer, event))
3075                 __buffer_unlock_commit(buffer, event);
3076
3077  out_drop_count:
3078         __this_cpu_dec(user_stack_count);
3079  out:
3080         preempt_enable();
3081 }
3082 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3083 static void ftrace_trace_userstack(struct trace_array *tr,
3084                                    struct trace_buffer *buffer,
3085                                    unsigned int trace_ctx)
3086 {
3087 }
3088 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3089
3090 #endif /* CONFIG_STACKTRACE */
3091
3092 static inline void
3093 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3094                           unsigned long long delta)
3095 {
3096         entry->bottom_delta_ts = delta & U32_MAX;
3097         entry->top_delta_ts = (delta >> 32);
3098 }
3099
3100 void trace_last_func_repeats(struct trace_array *tr,
3101                              struct trace_func_repeats *last_info,
3102                              unsigned int trace_ctx)
3103 {
3104         struct trace_buffer *buffer = tr->array_buffer.buffer;
3105         struct func_repeats_entry *entry;
3106         struct ring_buffer_event *event;
3107         u64 delta;
3108
3109         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3110                                             sizeof(*entry), trace_ctx);
3111         if (!event)
3112                 return;
3113
3114         delta = ring_buffer_event_time_stamp(buffer, event) -
3115                 last_info->ts_last_call;
3116
3117         entry = ring_buffer_event_data(event);
3118         entry->ip = last_info->ip;
3119         entry->parent_ip = last_info->parent_ip;
3120         entry->count = last_info->count;
3121         func_repeats_set_delta_ts(entry, delta);
3122
3123         __buffer_unlock_commit(buffer, event);
3124 }
3125
3126 /* created for use with alloc_percpu */
3127 struct trace_buffer_struct {
3128         int nesting;
3129         char buffer[4][TRACE_BUF_SIZE];
3130 };
3131
3132 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3133
3134 /*
3135  * This allows for lockless recording.  If we're nested too deeply, then
3136  * this returns NULL.
3137  */
3138 static char *get_trace_buf(void)
3139 {
3140         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3141
3142         if (!trace_percpu_buffer || buffer->nesting >= 4)
3143                 return NULL;
3144
3145         buffer->nesting++;
3146
3147         /* Interrupts must see nesting incremented before we use the buffer */
3148         barrier();
3149         return &buffer->buffer[buffer->nesting - 1][0];
3150 }
3151
3152 static void put_trace_buf(void)
3153 {
3154         /* Don't let the decrement of nesting leak before this */
3155         barrier();
3156         this_cpu_dec(trace_percpu_buffer->nesting);
3157 }
3158
3159 static int alloc_percpu_trace_buffer(void)
3160 {
3161         struct trace_buffer_struct __percpu *buffers;
3162
3163         if (trace_percpu_buffer)
3164                 return 0;
3165
3166         buffers = alloc_percpu(struct trace_buffer_struct);
3167         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3168                 return -ENOMEM;
3169
3170         trace_percpu_buffer = buffers;
3171         return 0;
3172 }
3173
3174 static int buffers_allocated;
3175
3176 void trace_printk_init_buffers(void)
3177 {
3178         if (buffers_allocated)
3179                 return;
3180
3181         if (alloc_percpu_trace_buffer())
3182                 return;
3183
3184         /* trace_printk() is for debug use only. Don't use it in production. */
3185
3186         pr_warn("\n");
3187         pr_warn("**********************************************************\n");
3188         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3189         pr_warn("**                                                      **\n");
3190         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3191         pr_warn("**                                                      **\n");
3192         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3193         pr_warn("** unsafe for production use.                           **\n");
3194         pr_warn("**                                                      **\n");
3195         pr_warn("** If you see this message and you are not debugging    **\n");
3196         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3197         pr_warn("**                                                      **\n");
3198         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3199         pr_warn("**********************************************************\n");
3200
3201         /* Expand the buffers to set size */
3202         tracing_update_buffers(&global_trace);
3203
3204         buffers_allocated = 1;
3205
3206         /*
3207          * trace_printk_init_buffers() can be called by modules.
3208          * If that happens, then we need to start cmdline recording
3209          * directly here. If the global_trace.buffer is already
3210          * allocated here, then this was called by module code.
3211          */
3212         if (global_trace.array_buffer.buffer)
3213                 tracing_start_cmdline_record();
3214 }
3215 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3216
3217 void trace_printk_start_comm(void)
3218 {
3219         /* Start tracing comms if trace printk is set */
3220         if (!buffers_allocated)
3221                 return;
3222         tracing_start_cmdline_record();
3223 }
3224
3225 static void trace_printk_start_stop_comm(int enabled)
3226 {
3227         if (!buffers_allocated)
3228                 return;
3229
3230         if (enabled)
3231                 tracing_start_cmdline_record();
3232         else
3233                 tracing_stop_cmdline_record();
3234 }
3235
3236 /**
3237  * trace_vbprintk - write binary msg to tracing buffer
3238  * @ip:    The address of the caller
3239  * @fmt:   The string format to write to the buffer
3240  * @args:  Arguments for @fmt
3241  */
3242 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3243 {
3244         struct trace_event_call *call = &event_bprint;
3245         struct ring_buffer_event *event;
3246         struct trace_buffer *buffer;
3247         struct trace_array *tr = &global_trace;
3248         struct bprint_entry *entry;
3249         unsigned int trace_ctx;
3250         char *tbuffer;
3251         int len = 0, size;
3252
3253         if (unlikely(tracing_selftest_running || tracing_disabled))
3254                 return 0;
3255
3256         /* Don't pollute graph traces with trace_vprintk internals */
3257         pause_graph_tracing();
3258
3259         trace_ctx = tracing_gen_ctx();
3260         preempt_disable_notrace();
3261
3262         tbuffer = get_trace_buf();
3263         if (!tbuffer) {
3264                 len = 0;
3265                 goto out_nobuffer;
3266         }
3267
3268         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3269
3270         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3271                 goto out_put;
3272
3273         size = sizeof(*entry) + sizeof(u32) * len;
3274         buffer = tr->array_buffer.buffer;
3275         ring_buffer_nest_start(buffer);
3276         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3277                                             trace_ctx);
3278         if (!event)
3279                 goto out;
3280         entry = ring_buffer_event_data(event);
3281         entry->ip                       = ip;
3282         entry->fmt                      = fmt;
3283
3284         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3285         if (!call_filter_check_discard(call, entry, buffer, event)) {
3286                 __buffer_unlock_commit(buffer, event);
3287                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3288         }
3289
3290 out:
3291         ring_buffer_nest_end(buffer);
3292 out_put:
3293         put_trace_buf();
3294
3295 out_nobuffer:
3296         preempt_enable_notrace();
3297         unpause_graph_tracing();
3298
3299         return len;
3300 }
3301 EXPORT_SYMBOL_GPL(trace_vbprintk);
3302
3303 __printf(3, 0)
3304 static int
3305 __trace_array_vprintk(struct trace_buffer *buffer,
3306                       unsigned long ip, const char *fmt, va_list args)
3307 {
3308         struct trace_event_call *call = &event_print;
3309         struct ring_buffer_event *event;
3310         int len = 0, size;
3311         struct print_entry *entry;
3312         unsigned int trace_ctx;
3313         char *tbuffer;
3314
3315         if (tracing_disabled)
3316                 return 0;
3317
3318         /* Don't pollute graph traces with trace_vprintk internals */
3319         pause_graph_tracing();
3320
3321         trace_ctx = tracing_gen_ctx();
3322         preempt_disable_notrace();
3323
3324
3325         tbuffer = get_trace_buf();
3326         if (!tbuffer) {
3327                 len = 0;
3328                 goto out_nobuffer;
3329         }
3330
3331         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3332
3333         size = sizeof(*entry) + len + 1;
3334         ring_buffer_nest_start(buffer);
3335         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3336                                             trace_ctx);
3337         if (!event)
3338                 goto out;
3339         entry = ring_buffer_event_data(event);
3340         entry->ip = ip;
3341
3342         memcpy(&entry->buf, tbuffer, len + 1);
3343         if (!call_filter_check_discard(call, entry, buffer, event)) {
3344                 __buffer_unlock_commit(buffer, event);
3345                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3346         }
3347
3348 out:
3349         ring_buffer_nest_end(buffer);
3350         put_trace_buf();
3351
3352 out_nobuffer:
3353         preempt_enable_notrace();
3354         unpause_graph_tracing();
3355
3356         return len;
3357 }
3358
3359 __printf(3, 0)
3360 int trace_array_vprintk(struct trace_array *tr,
3361                         unsigned long ip, const char *fmt, va_list args)
3362 {
3363         if (tracing_selftest_running && tr == &global_trace)
3364                 return 0;
3365
3366         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3367 }
3368
3369 /**
3370  * trace_array_printk - Print a message to a specific instance
3371  * @tr: The instance trace_array descriptor
3372  * @ip: The instruction pointer that this is called from.
3373  * @fmt: The format to print (printf format)
3374  *
3375  * If a subsystem sets up its own instance, they have the right to
3376  * printk strings into their tracing instance buffer using this
3377  * function. Note, this function will not write into the top level
3378  * buffer (use trace_printk() for that), as writing into the top level
3379  * buffer should only have events that can be individually disabled.
3380  * trace_printk() is only used for debugging a kernel, and should not
3381  * be ever incorporated in normal use.
3382  *
3383  * trace_array_printk() can be used, as it will not add noise to the
3384  * top level tracing buffer.
3385  *
3386  * Note, trace_array_init_printk() must be called on @tr before this
3387  * can be used.
3388  */
3389 __printf(3, 0)
3390 int trace_array_printk(struct trace_array *tr,
3391                        unsigned long ip, const char *fmt, ...)
3392 {
3393         int ret;
3394         va_list ap;
3395
3396         if (!tr)
3397                 return -ENOENT;
3398
3399         /* This is only allowed for created instances */
3400         if (tr == &global_trace)
3401                 return 0;
3402
3403         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3404                 return 0;
3405
3406         va_start(ap, fmt);
3407         ret = trace_array_vprintk(tr, ip, fmt, ap);
3408         va_end(ap);
3409         return ret;
3410 }
3411 EXPORT_SYMBOL_GPL(trace_array_printk);
3412
3413 /**
3414  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3415  * @tr: The trace array to initialize the buffers for
3416  *
3417  * As trace_array_printk() only writes into instances, they are OK to
3418  * have in the kernel (unlike trace_printk()). This needs to be called
3419  * before trace_array_printk() can be used on a trace_array.
3420  */
3421 int trace_array_init_printk(struct trace_array *tr)
3422 {
3423         if (!tr)
3424                 return -ENOENT;
3425
3426         /* This is only allowed for created instances */
3427         if (tr == &global_trace)
3428                 return -EINVAL;
3429
3430         return alloc_percpu_trace_buffer();
3431 }
3432 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3433
3434 __printf(3, 4)
3435 int trace_array_printk_buf(struct trace_buffer *buffer,
3436                            unsigned long ip, const char *fmt, ...)
3437 {
3438         int ret;
3439         va_list ap;
3440
3441         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3442                 return 0;
3443
3444         va_start(ap, fmt);
3445         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3446         va_end(ap);
3447         return ret;
3448 }
3449
3450 __printf(2, 0)
3451 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3452 {
3453         return trace_array_vprintk(&global_trace, ip, fmt, args);
3454 }
3455 EXPORT_SYMBOL_GPL(trace_vprintk);
3456
3457 static void trace_iterator_increment(struct trace_iterator *iter)
3458 {
3459         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3460
3461         iter->idx++;
3462         if (buf_iter)
3463                 ring_buffer_iter_advance(buf_iter);
3464 }
3465
3466 static struct trace_entry *
3467 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3468                 unsigned long *lost_events)
3469 {
3470         struct ring_buffer_event *event;
3471         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3472
3473         if (buf_iter) {
3474                 event = ring_buffer_iter_peek(buf_iter, ts);
3475                 if (lost_events)
3476                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3477                                 (unsigned long)-1 : 0;
3478         } else {
3479                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3480                                          lost_events);
3481         }
3482
3483         if (event) {
3484                 iter->ent_size = ring_buffer_event_length(event);
3485                 return ring_buffer_event_data(event);
3486         }
3487         iter->ent_size = 0;
3488         return NULL;
3489 }
3490
3491 static struct trace_entry *
3492 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3493                   unsigned long *missing_events, u64 *ent_ts)
3494 {
3495         struct trace_buffer *buffer = iter->array_buffer->buffer;
3496         struct trace_entry *ent, *next = NULL;
3497         unsigned long lost_events = 0, next_lost = 0;
3498         int cpu_file = iter->cpu_file;
3499         u64 next_ts = 0, ts;
3500         int next_cpu = -1;
3501         int next_size = 0;
3502         int cpu;
3503
3504         /*
3505          * If we are in a per_cpu trace file, don't bother by iterating over
3506          * all cpu and peek directly.
3507          */
3508         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3509                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3510                         return NULL;
3511                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3512                 if (ent_cpu)
3513                         *ent_cpu = cpu_file;
3514
3515                 return ent;
3516         }
3517
3518         for_each_tracing_cpu(cpu) {
3519
3520                 if (ring_buffer_empty_cpu(buffer, cpu))
3521                         continue;
3522
3523                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3524
3525                 /*
3526                  * Pick the entry with the smallest timestamp:
3527                  */
3528                 if (ent && (!next || ts < next_ts)) {
3529                         next = ent;
3530                         next_cpu = cpu;
3531                         next_ts = ts;
3532                         next_lost = lost_events;
3533                         next_size = iter->ent_size;
3534                 }
3535         }
3536
3537         iter->ent_size = next_size;
3538
3539         if (ent_cpu)
3540                 *ent_cpu = next_cpu;
3541
3542         if (ent_ts)
3543                 *ent_ts = next_ts;
3544
3545         if (missing_events)
3546                 *missing_events = next_lost;
3547
3548         return next;
3549 }
3550
3551 #define STATIC_FMT_BUF_SIZE     128
3552 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3553
3554 char *trace_iter_expand_format(struct trace_iterator *iter)
3555 {
3556         char *tmp;
3557
3558         /*
3559          * iter->tr is NULL when used with tp_printk, which makes
3560          * this get called where it is not safe to call krealloc().
3561          */
3562         if (!iter->tr || iter->fmt == static_fmt_buf)
3563                 return NULL;
3564
3565         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3566                        GFP_KERNEL);
3567         if (tmp) {
3568                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3569                 iter->fmt = tmp;
3570         }
3571
3572         return tmp;
3573 }
3574
3575 /* Returns true if the string is safe to dereference from an event */
3576 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3577                            bool star, int len)
3578 {
3579         unsigned long addr = (unsigned long)str;
3580         struct trace_event *trace_event;
3581         struct trace_event_call *event;
3582
3583         /* Ignore strings with no length */
3584         if (star && !len)
3585                 return true;
3586
3587         /* OK if part of the event data */
3588         if ((addr >= (unsigned long)iter->ent) &&
3589             (addr < (unsigned long)iter->ent + iter->ent_size))
3590                 return true;
3591
3592         /* OK if part of the temp seq buffer */
3593         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3594             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3595                 return true;
3596
3597         /* Core rodata can not be freed */
3598         if (is_kernel_rodata(addr))
3599                 return true;
3600
3601         if (trace_is_tracepoint_string(str))
3602                 return true;
3603
3604         /*
3605          * Now this could be a module event, referencing core module
3606          * data, which is OK.
3607          */
3608         if (!iter->ent)
3609                 return false;
3610
3611         trace_event = ftrace_find_event(iter->ent->type);
3612         if (!trace_event)
3613                 return false;
3614
3615         event = container_of(trace_event, struct trace_event_call, event);
3616         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3617                 return false;
3618
3619         /* Would rather have rodata, but this will suffice */
3620         if (within_module_core(addr, event->module))
3621                 return true;
3622
3623         return false;
3624 }
3625
3626 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3627
3628 static int test_can_verify_check(const char *fmt, ...)
3629 {
3630         char buf[16];
3631         va_list ap;
3632         int ret;
3633
3634         /*
3635          * The verifier is dependent on vsnprintf() modifies the va_list
3636          * passed to it, where it is sent as a reference. Some architectures
3637          * (like x86_32) passes it by value, which means that vsnprintf()
3638          * does not modify the va_list passed to it, and the verifier
3639          * would then need to be able to understand all the values that
3640          * vsnprintf can use. If it is passed by value, then the verifier
3641          * is disabled.
3642          */
3643         va_start(ap, fmt);
3644         vsnprintf(buf, 16, "%d", ap);
3645         ret = va_arg(ap, int);
3646         va_end(ap);
3647
3648         return ret;
3649 }
3650
3651 static void test_can_verify(void)
3652 {
3653         if (!test_can_verify_check("%d %d", 0, 1)) {
3654                 pr_info("trace event string verifier disabled\n");
3655                 static_branch_inc(&trace_no_verify);
3656         }
3657 }
3658
3659 /**
3660  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3661  * @iter: The iterator that holds the seq buffer and the event being printed
3662  * @fmt: The format used to print the event
3663  * @ap: The va_list holding the data to print from @fmt.
3664  *
3665  * This writes the data into the @iter->seq buffer using the data from
3666  * @fmt and @ap. If the format has a %s, then the source of the string
3667  * is examined to make sure it is safe to print, otherwise it will
3668  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3669  * pointer.
3670  */
3671 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3672                          va_list ap)
3673 {
3674         const char *p = fmt;
3675         const char *str;
3676         int i, j;
3677
3678         if (WARN_ON_ONCE(!fmt))
3679                 return;
3680
3681         if (static_branch_unlikely(&trace_no_verify))
3682                 goto print;
3683
3684         /* Don't bother checking when doing a ftrace_dump() */
3685         if (iter->fmt == static_fmt_buf)
3686                 goto print;
3687
3688         while (*p) {
3689                 bool star = false;
3690                 int len = 0;
3691
3692                 j = 0;
3693
3694                 /* We only care about %s and variants */
3695                 for (i = 0; p[i]; i++) {
3696                         if (i + 1 >= iter->fmt_size) {
3697                                 /*
3698                                  * If we can't expand the copy buffer,
3699                                  * just print it.
3700                                  */
3701                                 if (!trace_iter_expand_format(iter))
3702                                         goto print;
3703                         }
3704
3705                         if (p[i] == '\\' && p[i+1]) {
3706                                 i++;
3707                                 continue;
3708                         }
3709                         if (p[i] == '%') {
3710                                 /* Need to test cases like %08.*s */
3711                                 for (j = 1; p[i+j]; j++) {
3712                                         if (isdigit(p[i+j]) ||
3713                                             p[i+j] == '.')
3714                                                 continue;
3715                                         if (p[i+j] == '*') {
3716                                                 star = true;
3717                                                 continue;
3718                                         }
3719                                         break;
3720                                 }
3721                                 if (p[i+j] == 's')
3722                                         break;
3723                                 star = false;
3724                         }
3725                         j = 0;
3726                 }
3727                 /* If no %s found then just print normally */
3728                 if (!p[i])
3729                         break;
3730
3731                 /* Copy up to the %s, and print that */
3732                 strncpy(iter->fmt, p, i);
3733                 iter->fmt[i] = '\0';
3734                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3735
3736                 /*
3737                  * If iter->seq is full, the above call no longer guarantees
3738                  * that ap is in sync with fmt processing, and further calls
3739                  * to va_arg() can return wrong positional arguments.
3740                  *
3741                  * Ensure that ap is no longer used in this case.
3742                  */
3743                 if (iter->seq.full) {
3744                         p = "";
3745                         break;
3746                 }
3747
3748                 if (star)
3749                         len = va_arg(ap, int);
3750
3751                 /* The ap now points to the string data of the %s */
3752                 str = va_arg(ap, const char *);
3753
3754                 /*
3755                  * If you hit this warning, it is likely that the
3756                  * trace event in question used %s on a string that
3757                  * was saved at the time of the event, but may not be
3758                  * around when the trace is read. Use __string(),
3759                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3760                  * instead. See samples/trace_events/trace-events-sample.h
3761                  * for reference.
3762                  */
3763                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3764                               "fmt: '%s' current_buffer: '%s'",
3765                               fmt, seq_buf_str(&iter->seq.seq))) {
3766                         int ret;
3767
3768                         /* Try to safely read the string */
3769                         if (star) {
3770                                 if (len + 1 > iter->fmt_size)
3771                                         len = iter->fmt_size - 1;
3772                                 if (len < 0)
3773                                         len = 0;
3774                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3775                                 iter->fmt[len] = 0;
3776                                 star = false;
3777                         } else {
3778                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3779                                                                   iter->fmt_size);
3780                         }
3781                         if (ret < 0)
3782                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3783                         else
3784                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3785                                                  str, iter->fmt);
3786                         str = "[UNSAFE-MEMORY]";
3787                         strcpy(iter->fmt, "%s");
3788                 } else {
3789                         strncpy(iter->fmt, p + i, j + 1);
3790                         iter->fmt[j+1] = '\0';
3791                 }
3792                 if (star)
3793                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3794                 else
3795                         trace_seq_printf(&iter->seq, iter->fmt, str);
3796
3797                 p += i + j + 1;
3798         }
3799  print:
3800         if (*p)
3801                 trace_seq_vprintf(&iter->seq, p, ap);
3802 }
3803
3804 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3805 {
3806         const char *p, *new_fmt;
3807         char *q;
3808
3809         if (WARN_ON_ONCE(!fmt))
3810                 return fmt;
3811
3812         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3813                 return fmt;
3814
3815         p = fmt;
3816         new_fmt = q = iter->fmt;
3817         while (*p) {
3818                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3819                         if (!trace_iter_expand_format(iter))
3820                                 return fmt;
3821
3822                         q += iter->fmt - new_fmt;
3823                         new_fmt = iter->fmt;
3824                 }
3825
3826                 *q++ = *p++;
3827
3828                 /* Replace %p with %px */
3829                 if (p[-1] == '%') {
3830                         if (p[0] == '%') {
3831                                 *q++ = *p++;
3832                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3833                                 *q++ = *p++;
3834                                 *q++ = 'x';
3835                         }
3836                 }
3837         }
3838         *q = '\0';
3839
3840         return new_fmt;
3841 }
3842
3843 #define STATIC_TEMP_BUF_SIZE    128
3844 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3845
3846 /* Find the next real entry, without updating the iterator itself */
3847 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3848                                           int *ent_cpu, u64 *ent_ts)
3849 {
3850         /* __find_next_entry will reset ent_size */
3851         int ent_size = iter->ent_size;
3852         struct trace_entry *entry;
3853
3854         /*
3855          * If called from ftrace_dump(), then the iter->temp buffer
3856          * will be the static_temp_buf and not created from kmalloc.
3857          * If the entry size is greater than the buffer, we can
3858          * not save it. Just return NULL in that case. This is only
3859          * used to add markers when two consecutive events' time
3860          * stamps have a large delta. See trace_print_lat_context()
3861          */
3862         if (iter->temp == static_temp_buf &&
3863             STATIC_TEMP_BUF_SIZE < ent_size)
3864                 return NULL;
3865
3866         /*
3867          * The __find_next_entry() may call peek_next_entry(), which may
3868          * call ring_buffer_peek() that may make the contents of iter->ent
3869          * undefined. Need to copy iter->ent now.
3870          */
3871         if (iter->ent && iter->ent != iter->temp) {
3872                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3873                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3874                         void *temp;
3875                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3876                         if (!temp)
3877                                 return NULL;
3878                         kfree(iter->temp);
3879                         iter->temp = temp;
3880                         iter->temp_size = iter->ent_size;
3881                 }
3882                 memcpy(iter->temp, iter->ent, iter->ent_size);
3883                 iter->ent = iter->temp;
3884         }
3885         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3886         /* Put back the original ent_size */
3887         iter->ent_size = ent_size;
3888
3889         return entry;
3890 }
3891
3892 /* Find the next real entry, and increment the iterator to the next entry */
3893 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3894 {
3895         iter->ent = __find_next_entry(iter, &iter->cpu,
3896                                       &iter->lost_events, &iter->ts);
3897
3898         if (iter->ent)
3899                 trace_iterator_increment(iter);
3900
3901         return iter->ent ? iter : NULL;
3902 }
3903
3904 static void trace_consume(struct trace_iterator *iter)
3905 {
3906         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3907                             &iter->lost_events);
3908 }
3909
3910 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3911 {
3912         struct trace_iterator *iter = m->private;
3913         int i = (int)*pos;
3914         void *ent;
3915
3916         WARN_ON_ONCE(iter->leftover);
3917
3918         (*pos)++;
3919
3920         /* can't go backwards */
3921         if (iter->idx > i)
3922                 return NULL;
3923
3924         if (iter->idx < 0)
3925                 ent = trace_find_next_entry_inc(iter);
3926         else
3927                 ent = iter;
3928
3929         while (ent && iter->idx < i)
3930                 ent = trace_find_next_entry_inc(iter);
3931
3932         iter->pos = *pos;
3933
3934         return ent;
3935 }
3936
3937 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3938 {
3939         struct ring_buffer_iter *buf_iter;
3940         unsigned long entries = 0;
3941         u64 ts;
3942
3943         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3944
3945         buf_iter = trace_buffer_iter(iter, cpu);
3946         if (!buf_iter)
3947                 return;
3948
3949         ring_buffer_iter_reset(buf_iter);
3950
3951         /*
3952          * We could have the case with the max latency tracers
3953          * that a reset never took place on a cpu. This is evident
3954          * by the timestamp being before the start of the buffer.
3955          */
3956         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3957                 if (ts >= iter->array_buffer->time_start)
3958                         break;
3959                 entries++;
3960                 ring_buffer_iter_advance(buf_iter);
3961         }
3962
3963         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3964 }
3965
3966 /*
3967  * The current tracer is copied to avoid a global locking
3968  * all around.
3969  */
3970 static void *s_start(struct seq_file *m, loff_t *pos)
3971 {
3972         struct trace_iterator *iter = m->private;
3973         struct trace_array *tr = iter->tr;
3974         int cpu_file = iter->cpu_file;
3975         void *p = NULL;
3976         loff_t l = 0;
3977         int cpu;
3978
3979         mutex_lock(&trace_types_lock);
3980         if (unlikely(tr->current_trace != iter->trace)) {
3981                 /* Close iter->trace before switching to the new current tracer */
3982                 if (iter->trace->close)
3983                         iter->trace->close(iter);
3984                 iter->trace = tr->current_trace;
3985                 /* Reopen the new current tracer */
3986                 if (iter->trace->open)
3987                         iter->trace->open(iter);
3988         }
3989         mutex_unlock(&trace_types_lock);
3990
3991 #ifdef CONFIG_TRACER_MAX_TRACE
3992         if (iter->snapshot && iter->trace->use_max_tr)
3993                 return ERR_PTR(-EBUSY);
3994 #endif
3995
3996         if (*pos != iter->pos) {
3997                 iter->ent = NULL;
3998                 iter->cpu = 0;
3999                 iter->idx = -1;
4000
4001                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4002                         for_each_tracing_cpu(cpu)
4003                                 tracing_iter_reset(iter, cpu);
4004                 } else
4005                         tracing_iter_reset(iter, cpu_file);
4006
4007                 iter->leftover = 0;
4008                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4009                         ;
4010
4011         } else {
4012                 /*
4013                  * If we overflowed the seq_file before, then we want
4014                  * to just reuse the trace_seq buffer again.
4015                  */
4016                 if (iter->leftover)
4017                         p = iter;
4018                 else {
4019                         l = *pos - 1;
4020                         p = s_next(m, p, &l);
4021                 }
4022         }
4023
4024         trace_event_read_lock();
4025         trace_access_lock(cpu_file);
4026         return p;
4027 }
4028
4029 static void s_stop(struct seq_file *m, void *p)
4030 {
4031         struct trace_iterator *iter = m->private;
4032
4033 #ifdef CONFIG_TRACER_MAX_TRACE
4034         if (iter->snapshot && iter->trace->use_max_tr)
4035                 return;
4036 #endif
4037
4038         trace_access_unlock(iter->cpu_file);
4039         trace_event_read_unlock();
4040 }
4041
4042 static void
4043 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4044                       unsigned long *entries, int cpu)
4045 {
4046         unsigned long count;
4047
4048         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4049         /*
4050          * If this buffer has skipped entries, then we hold all
4051          * entries for the trace and we need to ignore the
4052          * ones before the time stamp.
4053          */
4054         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4055                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4056                 /* total is the same as the entries */
4057                 *total = count;
4058         } else
4059                 *total = count +
4060                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4061         *entries = count;
4062 }
4063
4064 static void
4065 get_total_entries(struct array_buffer *buf,
4066                   unsigned long *total, unsigned long *entries)
4067 {
4068         unsigned long t, e;
4069         int cpu;
4070
4071         *total = 0;
4072         *entries = 0;
4073
4074         for_each_tracing_cpu(cpu) {
4075                 get_total_entries_cpu(buf, &t, &e, cpu);
4076                 *total += t;
4077                 *entries += e;
4078         }
4079 }
4080
4081 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4082 {
4083         unsigned long total, entries;
4084
4085         if (!tr)
4086                 tr = &global_trace;
4087
4088         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4089
4090         return entries;
4091 }
4092
4093 unsigned long trace_total_entries(struct trace_array *tr)
4094 {
4095         unsigned long total, entries;
4096
4097         if (!tr)
4098                 tr = &global_trace;
4099
4100         get_total_entries(&tr->array_buffer, &total, &entries);
4101
4102         return entries;
4103 }
4104
4105 static void print_lat_help_header(struct seq_file *m)
4106 {
4107         seq_puts(m, "#                    _------=> CPU#            \n"
4108                     "#                   / _-----=> irqs-off/BH-disabled\n"
4109                     "#                  | / _----=> need-resched    \n"
4110                     "#                  || / _---=> hardirq/softirq \n"
4111                     "#                  ||| / _--=> preempt-depth   \n"
4112                     "#                  |||| / _-=> migrate-disable \n"
4113                     "#                  ||||| /     delay           \n"
4114                     "#  cmd     pid     |||||| time  |   caller     \n"
4115                     "#     \\   /        ||||||  \\    |    /       \n");
4116 }
4117
4118 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4119 {
4120         unsigned long total;
4121         unsigned long entries;
4122
4123         get_total_entries(buf, &total, &entries);
4124         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4125                    entries, total, num_online_cpus());
4126         seq_puts(m, "#\n");
4127 }
4128
4129 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4130                                    unsigned int flags)
4131 {
4132         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4133
4134         print_event_info(buf, m);
4135
4136         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4137         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4138 }
4139
4140 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4141                                        unsigned int flags)
4142 {
4143         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4144         static const char space[] = "            ";
4145         int prec = tgid ? 12 : 2;
4146
4147         print_event_info(buf, m);
4148
4149         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4150         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4151         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4152         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4153         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4154         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4155         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4156         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4157 }
4158
4159 void
4160 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4161 {
4162         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4163         struct array_buffer *buf = iter->array_buffer;
4164         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4165         struct tracer *type = iter->trace;
4166         unsigned long entries;
4167         unsigned long total;
4168         const char *name = type->name;
4169
4170         get_total_entries(buf, &total, &entries);
4171
4172         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4173                    name, init_utsname()->release);
4174         seq_puts(m, "# -----------------------------------"
4175                  "---------------------------------\n");
4176         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4177                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4178                    nsecs_to_usecs(data->saved_latency),
4179                    entries,
4180                    total,
4181                    buf->cpu,
4182                    preempt_model_none()      ? "server" :
4183                    preempt_model_voluntary() ? "desktop" :
4184                    preempt_model_full()      ? "preempt" :
4185                    preempt_model_rt()        ? "preempt_rt" :
4186                    "unknown",
4187                    /* These are reserved for later use */
4188                    0, 0, 0, 0);
4189 #ifdef CONFIG_SMP
4190         seq_printf(m, " #P:%d)\n", num_online_cpus());
4191 #else
4192         seq_puts(m, ")\n");
4193 #endif
4194         seq_puts(m, "#    -----------------\n");
4195         seq_printf(m, "#    | task: %.16s-%d "
4196                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4197                    data->comm, data->pid,
4198                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4199                    data->policy, data->rt_priority);
4200         seq_puts(m, "#    -----------------\n");
4201
4202         if (data->critical_start) {
4203                 seq_puts(m, "#  => started at: ");
4204                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4205                 trace_print_seq(m, &iter->seq);
4206                 seq_puts(m, "\n#  => ended at:   ");
4207                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4208                 trace_print_seq(m, &iter->seq);
4209                 seq_puts(m, "\n#\n");
4210         }
4211
4212         seq_puts(m, "#\n");
4213 }
4214
4215 static void test_cpu_buff_start(struct trace_iterator *iter)
4216 {
4217         struct trace_seq *s = &iter->seq;
4218         struct trace_array *tr = iter->tr;
4219
4220         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4221                 return;
4222
4223         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4224                 return;
4225
4226         if (cpumask_available(iter->started) &&
4227             cpumask_test_cpu(iter->cpu, iter->started))
4228                 return;
4229
4230         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4231                 return;
4232
4233         if (cpumask_available(iter->started))
4234                 cpumask_set_cpu(iter->cpu, iter->started);
4235
4236         /* Don't print started cpu buffer for the first entry of the trace */
4237         if (iter->idx > 1)
4238                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4239                                 iter->cpu);
4240 }
4241
4242 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4243 {
4244         struct trace_array *tr = iter->tr;
4245         struct trace_seq *s = &iter->seq;
4246         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4247         struct trace_entry *entry;
4248         struct trace_event *event;
4249
4250         entry = iter->ent;
4251
4252         test_cpu_buff_start(iter);
4253
4254         event = ftrace_find_event(entry->type);
4255
4256         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4257                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4258                         trace_print_lat_context(iter);
4259                 else
4260                         trace_print_context(iter);
4261         }
4262
4263         if (trace_seq_has_overflowed(s))
4264                 return TRACE_TYPE_PARTIAL_LINE;
4265
4266         if (event) {
4267                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4268                         return print_event_fields(iter, event);
4269                 return event->funcs->trace(iter, sym_flags, event);
4270         }
4271
4272         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4273
4274         return trace_handle_return(s);
4275 }
4276
4277 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4278 {
4279         struct trace_array *tr = iter->tr;
4280         struct trace_seq *s = &iter->seq;
4281         struct trace_entry *entry;
4282         struct trace_event *event;
4283
4284         entry = iter->ent;
4285
4286         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4287                 trace_seq_printf(s, "%d %d %llu ",
4288                                  entry->pid, iter->cpu, iter->ts);
4289
4290         if (trace_seq_has_overflowed(s))
4291                 return TRACE_TYPE_PARTIAL_LINE;
4292
4293         event = ftrace_find_event(entry->type);
4294         if (event)
4295                 return event->funcs->raw(iter, 0, event);
4296
4297         trace_seq_printf(s, "%d ?\n", entry->type);
4298
4299         return trace_handle_return(s);
4300 }
4301
4302 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4303 {
4304         struct trace_array *tr = iter->tr;
4305         struct trace_seq *s = &iter->seq;
4306         unsigned char newline = '\n';
4307         struct trace_entry *entry;
4308         struct trace_event *event;
4309
4310         entry = iter->ent;
4311
4312         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4313                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4314                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4315                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4316                 if (trace_seq_has_overflowed(s))
4317                         return TRACE_TYPE_PARTIAL_LINE;
4318         }
4319
4320         event = ftrace_find_event(entry->type);
4321         if (event) {
4322                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4323                 if (ret != TRACE_TYPE_HANDLED)
4324                         return ret;
4325         }
4326
4327         SEQ_PUT_FIELD(s, newline);
4328
4329         return trace_handle_return(s);
4330 }
4331
4332 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4333 {
4334         struct trace_array *tr = iter->tr;
4335         struct trace_seq *s = &iter->seq;
4336         struct trace_entry *entry;
4337         struct trace_event *event;
4338
4339         entry = iter->ent;
4340
4341         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4342                 SEQ_PUT_FIELD(s, entry->pid);
4343                 SEQ_PUT_FIELD(s, iter->cpu);
4344                 SEQ_PUT_FIELD(s, iter->ts);
4345                 if (trace_seq_has_overflowed(s))
4346                         return TRACE_TYPE_PARTIAL_LINE;
4347         }
4348
4349         event = ftrace_find_event(entry->type);
4350         return event ? event->funcs->binary(iter, 0, event) :
4351                 TRACE_TYPE_HANDLED;
4352 }
4353
4354 int trace_empty(struct trace_iterator *iter)
4355 {
4356         struct ring_buffer_iter *buf_iter;
4357         int cpu;
4358
4359         /* If we are looking at one CPU buffer, only check that one */
4360         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4361                 cpu = iter->cpu_file;
4362                 buf_iter = trace_buffer_iter(iter, cpu);
4363                 if (buf_iter) {
4364                         if (!ring_buffer_iter_empty(buf_iter))
4365                                 return 0;
4366                 } else {
4367                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4368                                 return 0;
4369                 }
4370                 return 1;
4371         }
4372
4373         for_each_tracing_cpu(cpu) {
4374                 buf_iter = trace_buffer_iter(iter, cpu);
4375                 if (buf_iter) {
4376                         if (!ring_buffer_iter_empty(buf_iter))
4377                                 return 0;
4378                 } else {
4379                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4380                                 return 0;
4381                 }
4382         }
4383
4384         return 1;
4385 }
4386
4387 /*  Called with trace_event_read_lock() held. */
4388 enum print_line_t print_trace_line(struct trace_iterator *iter)
4389 {
4390         struct trace_array *tr = iter->tr;
4391         unsigned long trace_flags = tr->trace_flags;
4392         enum print_line_t ret;
4393
4394         if (iter->lost_events) {
4395                 if (iter->lost_events == (unsigned long)-1)
4396                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4397                                          iter->cpu);
4398                 else
4399                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4400                                          iter->cpu, iter->lost_events);
4401                 if (trace_seq_has_overflowed(&iter->seq))
4402                         return TRACE_TYPE_PARTIAL_LINE;
4403         }
4404
4405         if (iter->trace && iter->trace->print_line) {
4406                 ret = iter->trace->print_line(iter);
4407                 if (ret != TRACE_TYPE_UNHANDLED)
4408                         return ret;
4409         }
4410
4411         if (iter->ent->type == TRACE_BPUTS &&
4412                         trace_flags & TRACE_ITER_PRINTK &&
4413                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4414                 return trace_print_bputs_msg_only(iter);
4415
4416         if (iter->ent->type == TRACE_BPRINT &&
4417                         trace_flags & TRACE_ITER_PRINTK &&
4418                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4419                 return trace_print_bprintk_msg_only(iter);
4420
4421         if (iter->ent->type == TRACE_PRINT &&
4422                         trace_flags & TRACE_ITER_PRINTK &&
4423                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4424                 return trace_print_printk_msg_only(iter);
4425
4426         if (trace_flags & TRACE_ITER_BIN)
4427                 return print_bin_fmt(iter);
4428
4429         if (trace_flags & TRACE_ITER_HEX)
4430                 return print_hex_fmt(iter);
4431
4432         if (trace_flags & TRACE_ITER_RAW)
4433                 return print_raw_fmt(iter);
4434
4435         return print_trace_fmt(iter);
4436 }
4437
4438 void trace_latency_header(struct seq_file *m)
4439 {
4440         struct trace_iterator *iter = m->private;
4441         struct trace_array *tr = iter->tr;
4442
4443         /* print nothing if the buffers are empty */
4444         if (trace_empty(iter))
4445                 return;
4446
4447         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4448                 print_trace_header(m, iter);
4449
4450         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4451                 print_lat_help_header(m);
4452 }
4453
4454 void trace_default_header(struct seq_file *m)
4455 {
4456         struct trace_iterator *iter = m->private;
4457         struct trace_array *tr = iter->tr;
4458         unsigned long trace_flags = tr->trace_flags;
4459
4460         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4461                 return;
4462
4463         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4464                 /* print nothing if the buffers are empty */
4465                 if (trace_empty(iter))
4466                         return;
4467                 print_trace_header(m, iter);
4468                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4469                         print_lat_help_header(m);
4470         } else {
4471                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4472                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4473                                 print_func_help_header_irq(iter->array_buffer,
4474                                                            m, trace_flags);
4475                         else
4476                                 print_func_help_header(iter->array_buffer, m,
4477                                                        trace_flags);
4478                 }
4479         }
4480 }
4481
4482 static void test_ftrace_alive(struct seq_file *m)
4483 {
4484         if (!ftrace_is_dead())
4485                 return;
4486         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4487                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4488 }
4489
4490 #ifdef CONFIG_TRACER_MAX_TRACE
4491 static void show_snapshot_main_help(struct seq_file *m)
4492 {
4493         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4494                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4495                     "#                      Takes a snapshot of the main buffer.\n"
4496                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4497                     "#                      (Doesn't have to be '2' works with any number that\n"
4498                     "#                       is not a '0' or '1')\n");
4499 }
4500
4501 static void show_snapshot_percpu_help(struct seq_file *m)
4502 {
4503         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4504 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4505         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4506                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4507 #else
4508         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4509                     "#                     Must use main snapshot file to allocate.\n");
4510 #endif
4511         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4512                     "#                      (Doesn't have to be '2' works with any number that\n"
4513                     "#                       is not a '0' or '1')\n");
4514 }
4515
4516 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4517 {
4518         if (iter->tr->allocated_snapshot)
4519                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4520         else
4521                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4522
4523         seq_puts(m, "# Snapshot commands:\n");
4524         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4525                 show_snapshot_main_help(m);
4526         else
4527                 show_snapshot_percpu_help(m);
4528 }
4529 #else
4530 /* Should never be called */
4531 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4532 #endif
4533
4534 static int s_show(struct seq_file *m, void *v)
4535 {
4536         struct trace_iterator *iter = v;
4537         int ret;
4538
4539         if (iter->ent == NULL) {
4540                 if (iter->tr) {
4541                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4542                         seq_puts(m, "#\n");
4543                         test_ftrace_alive(m);
4544                 }
4545                 if (iter->snapshot && trace_empty(iter))
4546                         print_snapshot_help(m, iter);
4547                 else if (iter->trace && iter->trace->print_header)
4548                         iter->trace->print_header(m);
4549                 else
4550                         trace_default_header(m);
4551
4552         } else if (iter->leftover) {
4553                 /*
4554                  * If we filled the seq_file buffer earlier, we
4555                  * want to just show it now.
4556                  */
4557                 ret = trace_print_seq(m, &iter->seq);
4558
4559                 /* ret should this time be zero, but you never know */
4560                 iter->leftover = ret;
4561
4562         } else {
4563                 ret = print_trace_line(iter);
4564                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4565                         iter->seq.full = 0;
4566                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4567                 }
4568                 ret = trace_print_seq(m, &iter->seq);
4569                 /*
4570                  * If we overflow the seq_file buffer, then it will
4571                  * ask us for this data again at start up.
4572                  * Use that instead.
4573                  *  ret is 0 if seq_file write succeeded.
4574                  *        -1 otherwise.
4575                  */
4576                 iter->leftover = ret;
4577         }
4578
4579         return 0;
4580 }
4581
4582 /*
4583  * Should be used after trace_array_get(), trace_types_lock
4584  * ensures that i_cdev was already initialized.
4585  */
4586 static inline int tracing_get_cpu(struct inode *inode)
4587 {
4588         if (inode->i_cdev) /* See trace_create_cpu_file() */
4589                 return (long)inode->i_cdev - 1;
4590         return RING_BUFFER_ALL_CPUS;
4591 }
4592
4593 static const struct seq_operations tracer_seq_ops = {
4594         .start          = s_start,
4595         .next           = s_next,
4596         .stop           = s_stop,
4597         .show           = s_show,
4598 };
4599
4600 /*
4601  * Note, as iter itself can be allocated and freed in different
4602  * ways, this function is only used to free its content, and not
4603  * the iterator itself. The only requirement to all the allocations
4604  * is that it must zero all fields (kzalloc), as freeing works with
4605  * ethier allocated content or NULL.
4606  */
4607 static void free_trace_iter_content(struct trace_iterator *iter)
4608 {
4609         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4610         if (iter->fmt != static_fmt_buf)
4611                 kfree(iter->fmt);
4612
4613         kfree(iter->temp);
4614         kfree(iter->buffer_iter);
4615         mutex_destroy(&iter->mutex);
4616         free_cpumask_var(iter->started);
4617 }
4618
4619 static struct trace_iterator *
4620 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4621 {
4622         struct trace_array *tr = inode->i_private;
4623         struct trace_iterator *iter;
4624         int cpu;
4625
4626         if (tracing_disabled)
4627                 return ERR_PTR(-ENODEV);
4628
4629         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4630         if (!iter)
4631                 return ERR_PTR(-ENOMEM);
4632
4633         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4634                                     GFP_KERNEL);
4635         if (!iter->buffer_iter)
4636                 goto release;
4637
4638         /*
4639          * trace_find_next_entry() may need to save off iter->ent.
4640          * It will place it into the iter->temp buffer. As most
4641          * events are less than 128, allocate a buffer of that size.
4642          * If one is greater, then trace_find_next_entry() will
4643          * allocate a new buffer to adjust for the bigger iter->ent.
4644          * It's not critical if it fails to get allocated here.
4645          */
4646         iter->temp = kmalloc(128, GFP_KERNEL);
4647         if (iter->temp)
4648                 iter->temp_size = 128;
4649
4650         /*
4651          * trace_event_printf() may need to modify given format
4652          * string to replace %p with %px so that it shows real address
4653          * instead of hash value. However, that is only for the event
4654          * tracing, other tracer may not need. Defer the allocation
4655          * until it is needed.
4656          */
4657         iter->fmt = NULL;
4658         iter->fmt_size = 0;
4659
4660         mutex_lock(&trace_types_lock);
4661         iter->trace = tr->current_trace;
4662
4663         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4664                 goto fail;
4665
4666         iter->tr = tr;
4667
4668 #ifdef CONFIG_TRACER_MAX_TRACE
4669         /* Currently only the top directory has a snapshot */
4670         if (tr->current_trace->print_max || snapshot)
4671                 iter->array_buffer = &tr->max_buffer;
4672         else
4673 #endif
4674                 iter->array_buffer = &tr->array_buffer;
4675         iter->snapshot = snapshot;
4676         iter->pos = -1;
4677         iter->cpu_file = tracing_get_cpu(inode);
4678         mutex_init(&iter->mutex);
4679
4680         /* Notify the tracer early; before we stop tracing. */
4681         if (iter->trace->open)
4682                 iter->trace->open(iter);
4683
4684         /* Annotate start of buffers if we had overruns */
4685         if (ring_buffer_overruns(iter->array_buffer->buffer))
4686                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4687
4688         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4689         if (trace_clocks[tr->clock_id].in_ns)
4690                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4691
4692         /*
4693          * If pause-on-trace is enabled, then stop the trace while
4694          * dumping, unless this is the "snapshot" file
4695          */
4696         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4697                 tracing_stop_tr(tr);
4698
4699         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4700                 for_each_tracing_cpu(cpu) {
4701                         iter->buffer_iter[cpu] =
4702                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4703                                                          cpu, GFP_KERNEL);
4704                 }
4705                 ring_buffer_read_prepare_sync();
4706                 for_each_tracing_cpu(cpu) {
4707                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4708                         tracing_iter_reset(iter, cpu);
4709                 }
4710         } else {
4711                 cpu = iter->cpu_file;
4712                 iter->buffer_iter[cpu] =
4713                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4714                                                  cpu, GFP_KERNEL);
4715                 ring_buffer_read_prepare_sync();
4716                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4717                 tracing_iter_reset(iter, cpu);
4718         }
4719
4720         mutex_unlock(&trace_types_lock);
4721
4722         return iter;
4723
4724  fail:
4725         mutex_unlock(&trace_types_lock);
4726         free_trace_iter_content(iter);
4727 release:
4728         seq_release_private(inode, file);
4729         return ERR_PTR(-ENOMEM);
4730 }
4731
4732 int tracing_open_generic(struct inode *inode, struct file *filp)
4733 {
4734         int ret;
4735
4736         ret = tracing_check_open_get_tr(NULL);
4737         if (ret)
4738                 return ret;
4739
4740         filp->private_data = inode->i_private;
4741         return 0;
4742 }
4743
4744 bool tracing_is_disabled(void)
4745 {
4746         return (tracing_disabled) ? true: false;
4747 }
4748
4749 /*
4750  * Open and update trace_array ref count.
4751  * Must have the current trace_array passed to it.
4752  */
4753 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4754 {
4755         struct trace_array *tr = inode->i_private;
4756         int ret;
4757
4758         ret = tracing_check_open_get_tr(tr);
4759         if (ret)
4760                 return ret;
4761
4762         filp->private_data = inode->i_private;
4763
4764         return 0;
4765 }
4766
4767 /*
4768  * The private pointer of the inode is the trace_event_file.
4769  * Update the tr ref count associated to it.
4770  */
4771 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4772 {
4773         struct trace_event_file *file = inode->i_private;
4774         int ret;
4775
4776         ret = tracing_check_open_get_tr(file->tr);
4777         if (ret)
4778                 return ret;
4779
4780         mutex_lock(&event_mutex);
4781
4782         /* Fail if the file is marked for removal */
4783         if (file->flags & EVENT_FILE_FL_FREED) {
4784                 trace_array_put(file->tr);
4785                 ret = -ENODEV;
4786         } else {
4787                 event_file_get(file);
4788         }
4789
4790         mutex_unlock(&event_mutex);
4791         if (ret)
4792                 return ret;
4793
4794         filp->private_data = inode->i_private;
4795
4796         return 0;
4797 }
4798
4799 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4800 {
4801         struct trace_event_file *file = inode->i_private;
4802
4803         trace_array_put(file->tr);
4804         event_file_put(file);
4805
4806         return 0;
4807 }
4808
4809 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4810 {
4811         tracing_release_file_tr(inode, filp);
4812         return single_release(inode, filp);
4813 }
4814
4815 static int tracing_mark_open(struct inode *inode, struct file *filp)
4816 {
4817         stream_open(inode, filp);
4818         return tracing_open_generic_tr(inode, filp);
4819 }
4820
4821 static int tracing_release(struct inode *inode, struct file *file)
4822 {
4823         struct trace_array *tr = inode->i_private;
4824         struct seq_file *m = file->private_data;
4825         struct trace_iterator *iter;
4826         int cpu;
4827
4828         if (!(file->f_mode & FMODE_READ)) {
4829                 trace_array_put(tr);
4830                 return 0;
4831         }
4832
4833         /* Writes do not use seq_file */
4834         iter = m->private;
4835         mutex_lock(&trace_types_lock);
4836
4837         for_each_tracing_cpu(cpu) {
4838                 if (iter->buffer_iter[cpu])
4839                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4840         }
4841
4842         if (iter->trace && iter->trace->close)
4843                 iter->trace->close(iter);
4844
4845         if (!iter->snapshot && tr->stop_count)
4846                 /* reenable tracing if it was previously enabled */
4847                 tracing_start_tr(tr);
4848
4849         __trace_array_put(tr);
4850
4851         mutex_unlock(&trace_types_lock);
4852
4853         free_trace_iter_content(iter);
4854         seq_release_private(inode, file);
4855
4856         return 0;
4857 }
4858
4859 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4860 {
4861         struct trace_array *tr = inode->i_private;
4862
4863         trace_array_put(tr);
4864         return 0;
4865 }
4866
4867 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4868 {
4869         struct trace_array *tr = inode->i_private;
4870
4871         trace_array_put(tr);
4872
4873         return single_release(inode, file);
4874 }
4875
4876 static int tracing_open(struct inode *inode, struct file *file)
4877 {
4878         struct trace_array *tr = inode->i_private;
4879         struct trace_iterator *iter;
4880         int ret;
4881
4882         ret = tracing_check_open_get_tr(tr);
4883         if (ret)
4884                 return ret;
4885
4886         /* If this file was open for write, then erase contents */
4887         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4888                 int cpu = tracing_get_cpu(inode);
4889                 struct array_buffer *trace_buf = &tr->array_buffer;
4890
4891 #ifdef CONFIG_TRACER_MAX_TRACE
4892                 if (tr->current_trace->print_max)
4893                         trace_buf = &tr->max_buffer;
4894 #endif
4895
4896                 if (cpu == RING_BUFFER_ALL_CPUS)
4897                         tracing_reset_online_cpus(trace_buf);
4898                 else
4899                         tracing_reset_cpu(trace_buf, cpu);
4900         }
4901
4902         if (file->f_mode & FMODE_READ) {
4903                 iter = __tracing_open(inode, file, false);
4904                 if (IS_ERR(iter))
4905                         ret = PTR_ERR(iter);
4906                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4907                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4908         }
4909
4910         if (ret < 0)
4911                 trace_array_put(tr);
4912
4913         return ret;
4914 }
4915
4916 /*
4917  * Some tracers are not suitable for instance buffers.
4918  * A tracer is always available for the global array (toplevel)
4919  * or if it explicitly states that it is.
4920  */
4921 static bool
4922 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4923 {
4924         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4925 }
4926
4927 /* Find the next tracer that this trace array may use */
4928 static struct tracer *
4929 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4930 {
4931         while (t && !trace_ok_for_array(t, tr))
4932                 t = t->next;
4933
4934         return t;
4935 }
4936
4937 static void *
4938 t_next(struct seq_file *m, void *v, loff_t *pos)
4939 {
4940         struct trace_array *tr = m->private;
4941         struct tracer *t = v;
4942
4943         (*pos)++;
4944
4945         if (t)
4946                 t = get_tracer_for_array(tr, t->next);
4947
4948         return t;
4949 }
4950
4951 static void *t_start(struct seq_file *m, loff_t *pos)
4952 {
4953         struct trace_array *tr = m->private;
4954         struct tracer *t;
4955         loff_t l = 0;
4956
4957         mutex_lock(&trace_types_lock);
4958
4959         t = get_tracer_for_array(tr, trace_types);
4960         for (; t && l < *pos; t = t_next(m, t, &l))
4961                         ;
4962
4963         return t;
4964 }
4965
4966 static void t_stop(struct seq_file *m, void *p)
4967 {
4968         mutex_unlock(&trace_types_lock);
4969 }
4970
4971 static int t_show(struct seq_file *m, void *v)
4972 {
4973         struct tracer *t = v;
4974
4975         if (!t)
4976                 return 0;
4977
4978         seq_puts(m, t->name);
4979         if (t->next)
4980                 seq_putc(m, ' ');
4981         else
4982                 seq_putc(m, '\n');
4983
4984         return 0;
4985 }
4986
4987 static const struct seq_operations show_traces_seq_ops = {
4988         .start          = t_start,
4989         .next           = t_next,
4990         .stop           = t_stop,
4991         .show           = t_show,
4992 };
4993
4994 static int show_traces_open(struct inode *inode, struct file *file)
4995 {
4996         struct trace_array *tr = inode->i_private;
4997         struct seq_file *m;
4998         int ret;
4999
5000         ret = tracing_check_open_get_tr(tr);
5001         if (ret)
5002                 return ret;
5003
5004         ret = seq_open(file, &show_traces_seq_ops);
5005         if (ret) {
5006                 trace_array_put(tr);
5007                 return ret;
5008         }
5009
5010         m = file->private_data;
5011         m->private = tr;
5012
5013         return 0;
5014 }
5015
5016 static int show_traces_release(struct inode *inode, struct file *file)
5017 {
5018         struct trace_array *tr = inode->i_private;
5019
5020         trace_array_put(tr);
5021         return seq_release(inode, file);
5022 }
5023
5024 static ssize_t
5025 tracing_write_stub(struct file *filp, const char __user *ubuf,
5026                    size_t count, loff_t *ppos)
5027 {
5028         return count;
5029 }
5030
5031 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5032 {
5033         int ret;
5034
5035         if (file->f_mode & FMODE_READ)
5036                 ret = seq_lseek(file, offset, whence);
5037         else
5038                 file->f_pos = ret = 0;
5039
5040         return ret;
5041 }
5042
5043 static const struct file_operations tracing_fops = {
5044         .open           = tracing_open,
5045         .read           = seq_read,
5046         .read_iter      = seq_read_iter,
5047         .splice_read    = copy_splice_read,
5048         .write          = tracing_write_stub,
5049         .llseek         = tracing_lseek,
5050         .release        = tracing_release,
5051 };
5052
5053 static const struct file_operations show_traces_fops = {
5054         .open           = show_traces_open,
5055         .read           = seq_read,
5056         .llseek         = seq_lseek,
5057         .release        = show_traces_release,
5058 };
5059
5060 static ssize_t
5061 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5062                      size_t count, loff_t *ppos)
5063 {
5064         struct trace_array *tr = file_inode(filp)->i_private;
5065         char *mask_str;
5066         int len;
5067
5068         len = snprintf(NULL, 0, "%*pb\n",
5069                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5070         mask_str = kmalloc(len, GFP_KERNEL);
5071         if (!mask_str)
5072                 return -ENOMEM;
5073
5074         len = snprintf(mask_str, len, "%*pb\n",
5075                        cpumask_pr_args(tr->tracing_cpumask));
5076         if (len >= count) {
5077                 count = -EINVAL;
5078                 goto out_err;
5079         }
5080         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5081
5082 out_err:
5083         kfree(mask_str);
5084
5085         return count;
5086 }
5087
5088 int tracing_set_cpumask(struct trace_array *tr,
5089                         cpumask_var_t tracing_cpumask_new)
5090 {
5091         int cpu;
5092
5093         if (!tr)
5094                 return -EINVAL;
5095
5096         local_irq_disable();
5097         arch_spin_lock(&tr->max_lock);
5098         for_each_tracing_cpu(cpu) {
5099                 /*
5100                  * Increase/decrease the disabled counter if we are
5101                  * about to flip a bit in the cpumask:
5102                  */
5103                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5104                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5105                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5106                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5107 #ifdef CONFIG_TRACER_MAX_TRACE
5108                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5109 #endif
5110                 }
5111                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5112                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5113                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5114                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5115 #ifdef CONFIG_TRACER_MAX_TRACE
5116                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5117 #endif
5118                 }
5119         }
5120         arch_spin_unlock(&tr->max_lock);
5121         local_irq_enable();
5122
5123         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5124
5125         return 0;
5126 }
5127
5128 static ssize_t
5129 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5130                       size_t count, loff_t *ppos)
5131 {
5132         struct trace_array *tr = file_inode(filp)->i_private;
5133         cpumask_var_t tracing_cpumask_new;
5134         int err;
5135
5136         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5137                 return -ENOMEM;
5138
5139         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5140         if (err)
5141                 goto err_free;
5142
5143         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5144         if (err)
5145                 goto err_free;
5146
5147         free_cpumask_var(tracing_cpumask_new);
5148
5149         return count;
5150
5151 err_free:
5152         free_cpumask_var(tracing_cpumask_new);
5153
5154         return err;
5155 }
5156
5157 static const struct file_operations tracing_cpumask_fops = {
5158         .open           = tracing_open_generic_tr,
5159         .read           = tracing_cpumask_read,
5160         .write          = tracing_cpumask_write,
5161         .release        = tracing_release_generic_tr,
5162         .llseek         = generic_file_llseek,
5163 };
5164
5165 static int tracing_trace_options_show(struct seq_file *m, void *v)
5166 {
5167         struct tracer_opt *trace_opts;
5168         struct trace_array *tr = m->private;
5169         u32 tracer_flags;
5170         int i;
5171
5172         mutex_lock(&trace_types_lock);
5173         tracer_flags = tr->current_trace->flags->val;
5174         trace_opts = tr->current_trace->flags->opts;
5175
5176         for (i = 0; trace_options[i]; i++) {
5177                 if (tr->trace_flags & (1 << i))
5178                         seq_printf(m, "%s\n", trace_options[i]);
5179                 else
5180                         seq_printf(m, "no%s\n", trace_options[i]);
5181         }
5182
5183         for (i = 0; trace_opts[i].name; i++) {
5184                 if (tracer_flags & trace_opts[i].bit)
5185                         seq_printf(m, "%s\n", trace_opts[i].name);
5186                 else
5187                         seq_printf(m, "no%s\n", trace_opts[i].name);
5188         }
5189         mutex_unlock(&trace_types_lock);
5190
5191         return 0;
5192 }
5193
5194 static int __set_tracer_option(struct trace_array *tr,
5195                                struct tracer_flags *tracer_flags,
5196                                struct tracer_opt *opts, int neg)
5197 {
5198         struct tracer *trace = tracer_flags->trace;
5199         int ret;
5200
5201         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5202         if (ret)
5203                 return ret;
5204
5205         if (neg)
5206                 tracer_flags->val &= ~opts->bit;
5207         else
5208                 tracer_flags->val |= opts->bit;
5209         return 0;
5210 }
5211
5212 /* Try to assign a tracer specific option */
5213 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5214 {
5215         struct tracer *trace = tr->current_trace;
5216         struct tracer_flags *tracer_flags = trace->flags;
5217         struct tracer_opt *opts = NULL;
5218         int i;
5219
5220         for (i = 0; tracer_flags->opts[i].name; i++) {
5221                 opts = &tracer_flags->opts[i];
5222
5223                 if (strcmp(cmp, opts->name) == 0)
5224                         return __set_tracer_option(tr, trace->flags, opts, neg);
5225         }
5226
5227         return -EINVAL;
5228 }
5229
5230 /* Some tracers require overwrite to stay enabled */
5231 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5232 {
5233         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5234                 return -1;
5235
5236         return 0;
5237 }
5238
5239 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5240 {
5241         if ((mask == TRACE_ITER_RECORD_TGID) ||
5242             (mask == TRACE_ITER_RECORD_CMD))
5243                 lockdep_assert_held(&event_mutex);
5244
5245         /* do nothing if flag is already set */
5246         if (!!(tr->trace_flags & mask) == !!enabled)
5247                 return 0;
5248
5249         /* Give the tracer a chance to approve the change */
5250         if (tr->current_trace->flag_changed)
5251                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5252                         return -EINVAL;
5253
5254         if (enabled)
5255                 tr->trace_flags |= mask;
5256         else
5257                 tr->trace_flags &= ~mask;
5258
5259         if (mask == TRACE_ITER_RECORD_CMD)
5260                 trace_event_enable_cmd_record(enabled);
5261
5262         if (mask == TRACE_ITER_RECORD_TGID) {
5263
5264                 if (trace_alloc_tgid_map() < 0) {
5265                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5266                         return -ENOMEM;
5267                 }
5268
5269                 trace_event_enable_tgid_record(enabled);
5270         }
5271
5272         if (mask == TRACE_ITER_EVENT_FORK)
5273                 trace_event_follow_fork(tr, enabled);
5274
5275         if (mask == TRACE_ITER_FUNC_FORK)
5276                 ftrace_pid_follow_fork(tr, enabled);
5277
5278         if (mask == TRACE_ITER_OVERWRITE) {
5279                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5280 #ifdef CONFIG_TRACER_MAX_TRACE
5281                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5282 #endif
5283         }
5284
5285         if (mask == TRACE_ITER_PRINTK) {
5286                 trace_printk_start_stop_comm(enabled);
5287                 trace_printk_control(enabled);
5288         }
5289
5290         return 0;
5291 }
5292
5293 int trace_set_options(struct trace_array *tr, char *option)
5294 {
5295         char *cmp;
5296         int neg = 0;
5297         int ret;
5298         size_t orig_len = strlen(option);
5299         int len;
5300
5301         cmp = strstrip(option);
5302
5303         len = str_has_prefix(cmp, "no");
5304         if (len)
5305                 neg = 1;
5306
5307         cmp += len;
5308
5309         mutex_lock(&event_mutex);
5310         mutex_lock(&trace_types_lock);
5311
5312         ret = match_string(trace_options, -1, cmp);
5313         /* If no option could be set, test the specific tracer options */
5314         if (ret < 0)
5315                 ret = set_tracer_option(tr, cmp, neg);
5316         else
5317                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5318
5319         mutex_unlock(&trace_types_lock);
5320         mutex_unlock(&event_mutex);
5321
5322         /*
5323          * If the first trailing whitespace is replaced with '\0' by strstrip,
5324          * turn it back into a space.
5325          */
5326         if (orig_len > strlen(option))
5327                 option[strlen(option)] = ' ';
5328
5329         return ret;
5330 }
5331
5332 static void __init apply_trace_boot_options(void)
5333 {
5334         char *buf = trace_boot_options_buf;
5335         char *option;
5336
5337         while (true) {
5338                 option = strsep(&buf, ",");
5339
5340                 if (!option)
5341                         break;
5342
5343                 if (*option)
5344                         trace_set_options(&global_trace, option);
5345
5346                 /* Put back the comma to allow this to be called again */
5347                 if (buf)
5348                         *(buf - 1) = ',';
5349         }
5350 }
5351
5352 static ssize_t
5353 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5354                         size_t cnt, loff_t *ppos)
5355 {
5356         struct seq_file *m = filp->private_data;
5357         struct trace_array *tr = m->private;
5358         char buf[64];
5359         int ret;
5360
5361         if (cnt >= sizeof(buf))
5362                 return -EINVAL;
5363
5364         if (copy_from_user(buf, ubuf, cnt))
5365                 return -EFAULT;
5366
5367         buf[cnt] = 0;
5368
5369         ret = trace_set_options(tr, buf);
5370         if (ret < 0)
5371                 return ret;
5372
5373         *ppos += cnt;
5374
5375         return cnt;
5376 }
5377
5378 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5379 {
5380         struct trace_array *tr = inode->i_private;
5381         int ret;
5382
5383         ret = tracing_check_open_get_tr(tr);
5384         if (ret)
5385                 return ret;
5386
5387         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5388         if (ret < 0)
5389                 trace_array_put(tr);
5390
5391         return ret;
5392 }
5393
5394 static const struct file_operations tracing_iter_fops = {
5395         .open           = tracing_trace_options_open,
5396         .read           = seq_read,
5397         .llseek         = seq_lseek,
5398         .release        = tracing_single_release_tr,
5399         .write          = tracing_trace_options_write,
5400 };
5401
5402 static const char readme_msg[] =
5403         "tracing mini-HOWTO:\n\n"
5404         "# echo 0 > tracing_on : quick way to disable tracing\n"
5405         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5406         " Important files:\n"
5407         "  trace\t\t\t- The static contents of the buffer\n"
5408         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5409         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5410         "  current_tracer\t- function and latency tracers\n"
5411         "  available_tracers\t- list of configured tracers for current_tracer\n"
5412         "  error_log\t- error log for failed commands (that support it)\n"
5413         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5414         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5415         "  trace_clock\t\t- change the clock used to order events\n"
5416         "       local:   Per cpu clock but may not be synced across CPUs\n"
5417         "      global:   Synced across CPUs but slows tracing down.\n"
5418         "     counter:   Not a clock, but just an increment\n"
5419         "      uptime:   Jiffy counter from time of boot\n"
5420         "        perf:   Same clock that perf events use\n"
5421 #ifdef CONFIG_X86_64
5422         "     x86-tsc:   TSC cycle counter\n"
5423 #endif
5424         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5425         "       delta:   Delta difference against a buffer-wide timestamp\n"
5426         "    absolute:   Absolute (standalone) timestamp\n"
5427         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5428         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5429         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5430         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5431         "\t\t\t  Remove sub-buffer with rmdir\n"
5432         "  trace_options\t\t- Set format or modify how tracing happens\n"
5433         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5434         "\t\t\t  option name\n"
5435         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5436 #ifdef CONFIG_DYNAMIC_FTRACE
5437         "\n  available_filter_functions - list of functions that can be filtered on\n"
5438         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5439         "\t\t\t  functions\n"
5440         "\t     accepts: func_full_name or glob-matching-pattern\n"
5441         "\t     modules: Can select a group via module\n"
5442         "\t      Format: :mod:<module-name>\n"
5443         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5444         "\t    triggers: a command to perform when function is hit\n"
5445         "\t      Format: <function>:<trigger>[:count]\n"
5446         "\t     trigger: traceon, traceoff\n"
5447         "\t\t      enable_event:<system>:<event>\n"
5448         "\t\t      disable_event:<system>:<event>\n"
5449 #ifdef CONFIG_STACKTRACE
5450         "\t\t      stacktrace\n"
5451 #endif
5452 #ifdef CONFIG_TRACER_SNAPSHOT
5453         "\t\t      snapshot\n"
5454 #endif
5455         "\t\t      dump\n"
5456         "\t\t      cpudump\n"
5457         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5458         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5459         "\t     The first one will disable tracing every time do_fault is hit\n"
5460         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5461         "\t       The first time do trap is hit and it disables tracing, the\n"
5462         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5463         "\t       the counter will not decrement. It only decrements when the\n"
5464         "\t       trigger did work\n"
5465         "\t     To remove trigger without count:\n"
5466         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5467         "\t     To remove trigger with a count:\n"
5468         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5469         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5470         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5471         "\t    modules: Can select a group via module command :mod:\n"
5472         "\t    Does not accept triggers\n"
5473 #endif /* CONFIG_DYNAMIC_FTRACE */
5474 #ifdef CONFIG_FUNCTION_TRACER
5475         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5476         "\t\t    (function)\n"
5477         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5478         "\t\t    (function)\n"
5479 #endif
5480 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5481         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5482         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5483         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5484 #endif
5485 #ifdef CONFIG_TRACER_SNAPSHOT
5486         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5487         "\t\t\t  snapshot buffer. Read the contents for more\n"
5488         "\t\t\t  information\n"
5489 #endif
5490 #ifdef CONFIG_STACK_TRACER
5491         "  stack_trace\t\t- Shows the max stack trace when active\n"
5492         "  stack_max_size\t- Shows current max stack size that was traced\n"
5493         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5494         "\t\t\t  new trace)\n"
5495 #ifdef CONFIG_DYNAMIC_FTRACE
5496         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5497         "\t\t\t  traces\n"
5498 #endif
5499 #endif /* CONFIG_STACK_TRACER */
5500 #ifdef CONFIG_DYNAMIC_EVENTS
5501         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5502         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5503 #endif
5504 #ifdef CONFIG_KPROBE_EVENTS
5505         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5506         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5507 #endif
5508 #ifdef CONFIG_UPROBE_EVENTS
5509         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5510         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5511 #endif
5512 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5513     defined(CONFIG_FPROBE_EVENTS)
5514         "\t  accepts: event-definitions (one definition per line)\n"
5515 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5516         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5517         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5518 #endif
5519 #ifdef CONFIG_FPROBE_EVENTS
5520         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5521         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5522 #endif
5523 #ifdef CONFIG_HIST_TRIGGERS
5524         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5525 #endif
5526         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5527         "\t           -:[<group>/][<event>]\n"
5528 #ifdef CONFIG_KPROBE_EVENTS
5529         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5530   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5531 #endif
5532 #ifdef CONFIG_UPROBE_EVENTS
5533   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5534 #endif
5535         "\t     args: <name>=fetcharg[:type]\n"
5536         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5537 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5538         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5539 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5540         "\t           <argname>[->field[->field|.field...]],\n"
5541 #endif
5542 #else
5543         "\t           $stack<index>, $stack, $retval, $comm,\n"
5544 #endif
5545         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5546         "\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5547         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5548         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5549         "\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5550 #ifdef CONFIG_HIST_TRIGGERS
5551         "\t    field: <stype> <name>;\n"
5552         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5553         "\t           [unsigned] char/int/long\n"
5554 #endif
5555         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5556         "\t            of the <attached-group>/<attached-event>.\n"
5557 #endif
5558         "  events/\t\t- Directory containing all trace event subsystems:\n"
5559         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5560         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5561         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5562         "\t\t\t  events\n"
5563         "      filter\t\t- If set, only events passing filter are traced\n"
5564         "  events/<system>/<event>/\t- Directory containing control files for\n"
5565         "\t\t\t  <event>:\n"
5566         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5567         "      filter\t\t- If set, only events passing filter are traced\n"
5568         "      trigger\t\t- If set, a command to perform when event is hit\n"
5569         "\t    Format: <trigger>[:count][if <filter>]\n"
5570         "\t   trigger: traceon, traceoff\n"
5571         "\t            enable_event:<system>:<event>\n"
5572         "\t            disable_event:<system>:<event>\n"
5573 #ifdef CONFIG_HIST_TRIGGERS
5574         "\t            enable_hist:<system>:<event>\n"
5575         "\t            disable_hist:<system>:<event>\n"
5576 #endif
5577 #ifdef CONFIG_STACKTRACE
5578         "\t\t    stacktrace\n"
5579 #endif
5580 #ifdef CONFIG_TRACER_SNAPSHOT
5581         "\t\t    snapshot\n"
5582 #endif
5583 #ifdef CONFIG_HIST_TRIGGERS
5584         "\t\t    hist (see below)\n"
5585 #endif
5586         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5587         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5588         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5589         "\t                  events/block/block_unplug/trigger\n"
5590         "\t   The first disables tracing every time block_unplug is hit.\n"
5591         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5592         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5593         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5594         "\t   Like function triggers, the counter is only decremented if it\n"
5595         "\t    enabled or disabled tracing.\n"
5596         "\t   To remove a trigger without a count:\n"
5597         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5598         "\t   To remove a trigger with a count:\n"
5599         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5600         "\t   Filters can be ignored when removing a trigger.\n"
5601 #ifdef CONFIG_HIST_TRIGGERS
5602         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5603         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5604         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5605         "\t            [:values=<field1[,field2,...]>]\n"
5606         "\t            [:sort=<field1[,field2,...]>]\n"
5607         "\t            [:size=#entries]\n"
5608         "\t            [:pause][:continue][:clear]\n"
5609         "\t            [:name=histname1]\n"
5610         "\t            [:nohitcount]\n"
5611         "\t            [:<handler>.<action>]\n"
5612         "\t            [if <filter>]\n\n"
5613         "\t    Note, special fields can be used as well:\n"
5614         "\t            common_timestamp - to record current timestamp\n"
5615         "\t            common_cpu - to record the CPU the event happened on\n"
5616         "\n"
5617         "\t    A hist trigger variable can be:\n"
5618         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5619         "\t        - a reference to another variable e.g. y=$x,\n"
5620         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5621         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5622         "\n"
5623         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5624         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5625         "\t    variable reference, field or numeric literal.\n"
5626         "\n"
5627         "\t    When a matching event is hit, an entry is added to a hash\n"
5628         "\t    table using the key(s) and value(s) named, and the value of a\n"
5629         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5630         "\t    correspond to fields in the event's format description.  Keys\n"
5631         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5632         "\t    Compound keys consisting of up to two fields can be specified\n"
5633         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5634         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5635         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5636         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5637         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5638         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5639         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5640         "\t    its histogram data will be shared with other triggers of the\n"
5641         "\t    same name, and trigger hits will update this common data.\n\n"
5642         "\t    Reading the 'hist' file for the event will dump the hash\n"
5643         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5644         "\t    triggers attached to an event, there will be a table for each\n"
5645         "\t    trigger in the output.  The table displayed for a named\n"
5646         "\t    trigger will be the same as any other instance having the\n"
5647         "\t    same name.  The default format used to display a given field\n"
5648         "\t    can be modified by appending any of the following modifiers\n"
5649         "\t    to the field name, as applicable:\n\n"
5650         "\t            .hex        display a number as a hex value\n"
5651         "\t            .sym        display an address as a symbol\n"
5652         "\t            .sym-offset display an address as a symbol and offset\n"
5653         "\t            .execname   display a common_pid as a program name\n"
5654         "\t            .syscall    display a syscall id as a syscall name\n"
5655         "\t            .log2       display log2 value rather than raw number\n"
5656         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5657         "\t            .usecs      display a common_timestamp in microseconds\n"
5658         "\t            .percent    display a number of percentage value\n"
5659         "\t            .graph      display a bar-graph of a value\n\n"
5660         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5661         "\t    trigger or to start a hist trigger but not log any events\n"
5662         "\t    until told to do so.  'continue' can be used to start or\n"
5663         "\t    restart a paused hist trigger.\n\n"
5664         "\t    The 'clear' parameter will clear the contents of a running\n"
5665         "\t    hist trigger and leave its current paused/active state\n"
5666         "\t    unchanged.\n\n"
5667         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5668         "\t    raw hitcount in the histogram.\n\n"
5669         "\t    The enable_hist and disable_hist triggers can be used to\n"
5670         "\t    have one event conditionally start and stop another event's\n"
5671         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5672         "\t    the enable_event and disable_event triggers.\n\n"
5673         "\t    Hist trigger handlers and actions are executed whenever a\n"
5674         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5675         "\t        <handler>.<action>\n\n"
5676         "\t    The available handlers are:\n\n"
5677         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5678         "\t        onmax(var)               - invoke if var exceeds current max\n"
5679         "\t        onchange(var)            - invoke action if var changes\n\n"
5680         "\t    The available actions are:\n\n"
5681         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5682         "\t        save(field,...)                      - save current event fields\n"
5683 #ifdef CONFIG_TRACER_SNAPSHOT
5684         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5685 #endif
5686 #ifdef CONFIG_SYNTH_EVENTS
5687         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5688         "\t  Write into this file to define/undefine new synthetic events.\n"
5689         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5690 #endif
5691 #endif
5692 ;
5693
5694 static ssize_t
5695 tracing_readme_read(struct file *filp, char __user *ubuf,
5696                        size_t cnt, loff_t *ppos)
5697 {
5698         return simple_read_from_buffer(ubuf, cnt, ppos,
5699                                         readme_msg, strlen(readme_msg));
5700 }
5701
5702 static const struct file_operations tracing_readme_fops = {
5703         .open           = tracing_open_generic,
5704         .read           = tracing_readme_read,
5705         .llseek         = generic_file_llseek,
5706 };
5707
5708 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5709 static union trace_eval_map_item *
5710 update_eval_map(union trace_eval_map_item *ptr)
5711 {
5712         if (!ptr->map.eval_string) {
5713                 if (ptr->tail.next) {
5714                         ptr = ptr->tail.next;
5715                         /* Set ptr to the next real item (skip head) */
5716                         ptr++;
5717                 } else
5718                         return NULL;
5719         }
5720         return ptr;
5721 }
5722
5723 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5724 {
5725         union trace_eval_map_item *ptr = v;
5726
5727         /*
5728          * Paranoid! If ptr points to end, we don't want to increment past it.
5729          * This really should never happen.
5730          */
5731         (*pos)++;
5732         ptr = update_eval_map(ptr);
5733         if (WARN_ON_ONCE(!ptr))
5734                 return NULL;
5735
5736         ptr++;
5737         ptr = update_eval_map(ptr);
5738
5739         return ptr;
5740 }
5741
5742 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5743 {
5744         union trace_eval_map_item *v;
5745         loff_t l = 0;
5746
5747         mutex_lock(&trace_eval_mutex);
5748
5749         v = trace_eval_maps;
5750         if (v)
5751                 v++;
5752
5753         while (v && l < *pos) {
5754                 v = eval_map_next(m, v, &l);
5755         }
5756
5757         return v;
5758 }
5759
5760 static void eval_map_stop(struct seq_file *m, void *v)
5761 {
5762         mutex_unlock(&trace_eval_mutex);
5763 }
5764
5765 static int eval_map_show(struct seq_file *m, void *v)
5766 {
5767         union trace_eval_map_item *ptr = v;
5768
5769         seq_printf(m, "%s %ld (%s)\n",
5770                    ptr->map.eval_string, ptr->map.eval_value,
5771                    ptr->map.system);
5772
5773         return 0;
5774 }
5775
5776 static const struct seq_operations tracing_eval_map_seq_ops = {
5777         .start          = eval_map_start,
5778         .next           = eval_map_next,
5779         .stop           = eval_map_stop,
5780         .show           = eval_map_show,
5781 };
5782
5783 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5784 {
5785         int ret;
5786
5787         ret = tracing_check_open_get_tr(NULL);
5788         if (ret)
5789                 return ret;
5790
5791         return seq_open(filp, &tracing_eval_map_seq_ops);
5792 }
5793
5794 static const struct file_operations tracing_eval_map_fops = {
5795         .open           = tracing_eval_map_open,
5796         .read           = seq_read,
5797         .llseek         = seq_lseek,
5798         .release        = seq_release,
5799 };
5800
5801 static inline union trace_eval_map_item *
5802 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5803 {
5804         /* Return tail of array given the head */
5805         return ptr + ptr->head.length + 1;
5806 }
5807
5808 static void
5809 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5810                            int len)
5811 {
5812         struct trace_eval_map **stop;
5813         struct trace_eval_map **map;
5814         union trace_eval_map_item *map_array;
5815         union trace_eval_map_item *ptr;
5816
5817         stop = start + len;
5818
5819         /*
5820          * The trace_eval_maps contains the map plus a head and tail item,
5821          * where the head holds the module and length of array, and the
5822          * tail holds a pointer to the next list.
5823          */
5824         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5825         if (!map_array) {
5826                 pr_warn("Unable to allocate trace eval mapping\n");
5827                 return;
5828         }
5829
5830         mutex_lock(&trace_eval_mutex);
5831
5832         if (!trace_eval_maps)
5833                 trace_eval_maps = map_array;
5834         else {
5835                 ptr = trace_eval_maps;
5836                 for (;;) {
5837                         ptr = trace_eval_jmp_to_tail(ptr);
5838                         if (!ptr->tail.next)
5839                                 break;
5840                         ptr = ptr->tail.next;
5841
5842                 }
5843                 ptr->tail.next = map_array;
5844         }
5845         map_array->head.mod = mod;
5846         map_array->head.length = len;
5847         map_array++;
5848
5849         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5850                 map_array->map = **map;
5851                 map_array++;
5852         }
5853         memset(map_array, 0, sizeof(*map_array));
5854
5855         mutex_unlock(&trace_eval_mutex);
5856 }
5857
5858 static void trace_create_eval_file(struct dentry *d_tracer)
5859 {
5860         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5861                           NULL, &tracing_eval_map_fops);
5862 }
5863
5864 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5865 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5866 static inline void trace_insert_eval_map_file(struct module *mod,
5867                               struct trace_eval_map **start, int len) { }
5868 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5869
5870 static void trace_insert_eval_map(struct module *mod,
5871                                   struct trace_eval_map **start, int len)
5872 {
5873         struct trace_eval_map **map;
5874
5875         if (len <= 0)
5876                 return;
5877
5878         map = start;
5879
5880         trace_event_eval_update(map, len);
5881
5882         trace_insert_eval_map_file(mod, start, len);
5883 }
5884
5885 static ssize_t
5886 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5887                        size_t cnt, loff_t *ppos)
5888 {
5889         struct trace_array *tr = filp->private_data;
5890         char buf[MAX_TRACER_SIZE+2];
5891         int r;
5892
5893         mutex_lock(&trace_types_lock);
5894         r = sprintf(buf, "%s\n", tr->current_trace->name);
5895         mutex_unlock(&trace_types_lock);
5896
5897         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5898 }
5899
5900 int tracer_init(struct tracer *t, struct trace_array *tr)
5901 {
5902         tracing_reset_online_cpus(&tr->array_buffer);
5903         return t->init(tr);
5904 }
5905
5906 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5907 {
5908         int cpu;
5909
5910         for_each_tracing_cpu(cpu)
5911                 per_cpu_ptr(buf->data, cpu)->entries = val;
5912 }
5913
5914 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5915 {
5916         if (cpu == RING_BUFFER_ALL_CPUS) {
5917                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5918         } else {
5919                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5920         }
5921 }
5922
5923 #ifdef CONFIG_TRACER_MAX_TRACE
5924 /* resize @tr's buffer to the size of @size_tr's entries */
5925 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5926                                         struct array_buffer *size_buf, int cpu_id)
5927 {
5928         int cpu, ret = 0;
5929
5930         if (cpu_id == RING_BUFFER_ALL_CPUS) {
5931                 for_each_tracing_cpu(cpu) {
5932                         ret = ring_buffer_resize(trace_buf->buffer,
5933                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5934                         if (ret < 0)
5935                                 break;
5936                         per_cpu_ptr(trace_buf->data, cpu)->entries =
5937                                 per_cpu_ptr(size_buf->data, cpu)->entries;
5938                 }
5939         } else {
5940                 ret = ring_buffer_resize(trace_buf->buffer,
5941                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5942                 if (ret == 0)
5943                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5944                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
5945         }
5946
5947         return ret;
5948 }
5949 #endif /* CONFIG_TRACER_MAX_TRACE */
5950
5951 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5952                                         unsigned long size, int cpu)
5953 {
5954         int ret;
5955
5956         /*
5957          * If kernel or user changes the size of the ring buffer
5958          * we use the size that was given, and we can forget about
5959          * expanding it later.
5960          */
5961         trace_set_ring_buffer_expanded(tr);
5962
5963         /* May be called before buffers are initialized */
5964         if (!tr->array_buffer.buffer)
5965                 return 0;
5966
5967         /* Do not allow tracing while resizing ring buffer */
5968         tracing_stop_tr(tr);
5969
5970         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5971         if (ret < 0)
5972                 goto out_start;
5973
5974 #ifdef CONFIG_TRACER_MAX_TRACE
5975         if (!tr->allocated_snapshot)
5976                 goto out;
5977
5978         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5979         if (ret < 0) {
5980                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
5981                                                      &tr->array_buffer, cpu);
5982                 if (r < 0) {
5983                         /*
5984                          * AARGH! We are left with different
5985                          * size max buffer!!!!
5986                          * The max buffer is our "snapshot" buffer.
5987                          * When a tracer needs a snapshot (one of the
5988                          * latency tracers), it swaps the max buffer
5989                          * with the saved snap shot. We succeeded to
5990                          * update the size of the main buffer, but failed to
5991                          * update the size of the max buffer. But when we tried
5992                          * to reset the main buffer to the original size, we
5993                          * failed there too. This is very unlikely to
5994                          * happen, but if it does, warn and kill all
5995                          * tracing.
5996                          */
5997                         WARN_ON(1);
5998                         tracing_disabled = 1;
5999                 }
6000                 goto out_start;
6001         }
6002
6003         update_buffer_entries(&tr->max_buffer, cpu);
6004
6005  out:
6006 #endif /* CONFIG_TRACER_MAX_TRACE */
6007
6008         update_buffer_entries(&tr->array_buffer, cpu);
6009  out_start:
6010         tracing_start_tr(tr);
6011         return ret;
6012 }
6013
6014 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6015                                   unsigned long size, int cpu_id)
6016 {
6017         int ret;
6018
6019         mutex_lock(&trace_types_lock);
6020
6021         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6022                 /* make sure, this cpu is enabled in the mask */
6023                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6024                         ret = -EINVAL;
6025                         goto out;
6026                 }
6027         }
6028
6029         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6030         if (ret < 0)
6031                 ret = -ENOMEM;
6032
6033 out:
6034         mutex_unlock(&trace_types_lock);
6035
6036         return ret;
6037 }
6038
6039
6040 /**
6041  * tracing_update_buffers - used by tracing facility to expand ring buffers
6042  * @tr: The tracing instance
6043  *
6044  * To save on memory when the tracing is never used on a system with it
6045  * configured in. The ring buffers are set to a minimum size. But once
6046  * a user starts to use the tracing facility, then they need to grow
6047  * to their default size.
6048  *
6049  * This function is to be called when a tracer is about to be used.
6050  */
6051 int tracing_update_buffers(struct trace_array *tr)
6052 {
6053         int ret = 0;
6054
6055         mutex_lock(&trace_types_lock);
6056         if (!tr->ring_buffer_expanded)
6057                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6058                                                 RING_BUFFER_ALL_CPUS);
6059         mutex_unlock(&trace_types_lock);
6060
6061         return ret;
6062 }
6063
6064 struct trace_option_dentry;
6065
6066 static void
6067 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6068
6069 /*
6070  * Used to clear out the tracer before deletion of an instance.
6071  * Must have trace_types_lock held.
6072  */
6073 static void tracing_set_nop(struct trace_array *tr)
6074 {
6075         if (tr->current_trace == &nop_trace)
6076                 return;
6077
6078         tr->current_trace->enabled--;
6079
6080         if (tr->current_trace->reset)
6081                 tr->current_trace->reset(tr);
6082
6083         tr->current_trace = &nop_trace;
6084 }
6085
6086 static bool tracer_options_updated;
6087
6088 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6089 {
6090         /* Only enable if the directory has been created already. */
6091         if (!tr->dir)
6092                 return;
6093
6094         /* Only create trace option files after update_tracer_options finish */
6095         if (!tracer_options_updated)
6096                 return;
6097
6098         create_trace_option_files(tr, t);
6099 }
6100
6101 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6102 {
6103         struct tracer *t;
6104 #ifdef CONFIG_TRACER_MAX_TRACE
6105         bool had_max_tr;
6106 #endif
6107         int ret = 0;
6108
6109         mutex_lock(&trace_types_lock);
6110
6111         if (!tr->ring_buffer_expanded) {
6112                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6113                                                 RING_BUFFER_ALL_CPUS);
6114                 if (ret < 0)
6115                         goto out;
6116                 ret = 0;
6117         }
6118
6119         for (t = trace_types; t; t = t->next) {
6120                 if (strcmp(t->name, buf) == 0)
6121                         break;
6122         }
6123         if (!t) {
6124                 ret = -EINVAL;
6125                 goto out;
6126         }
6127         if (t == tr->current_trace)
6128                 goto out;
6129
6130 #ifdef CONFIG_TRACER_SNAPSHOT
6131         if (t->use_max_tr) {
6132                 local_irq_disable();
6133                 arch_spin_lock(&tr->max_lock);
6134                 if (tr->cond_snapshot)
6135                         ret = -EBUSY;
6136                 arch_spin_unlock(&tr->max_lock);
6137                 local_irq_enable();
6138                 if (ret)
6139                         goto out;
6140         }
6141 #endif
6142         /* Some tracers won't work on kernel command line */
6143         if (system_state < SYSTEM_RUNNING && t->noboot) {
6144                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6145                         t->name);
6146                 goto out;
6147         }
6148
6149         /* Some tracers are only allowed for the top level buffer */
6150         if (!trace_ok_for_array(t, tr)) {
6151                 ret = -EINVAL;
6152                 goto out;
6153         }
6154
6155         /* If trace pipe files are being read, we can't change the tracer */
6156         if (tr->trace_ref) {
6157                 ret = -EBUSY;
6158                 goto out;
6159         }
6160
6161         trace_branch_disable();
6162
6163         tr->current_trace->enabled--;
6164
6165         if (tr->current_trace->reset)
6166                 tr->current_trace->reset(tr);
6167
6168 #ifdef CONFIG_TRACER_MAX_TRACE
6169         had_max_tr = tr->current_trace->use_max_tr;
6170
6171         /* Current trace needs to be nop_trace before synchronize_rcu */
6172         tr->current_trace = &nop_trace;
6173
6174         if (had_max_tr && !t->use_max_tr) {
6175                 /*
6176                  * We need to make sure that the update_max_tr sees that
6177                  * current_trace changed to nop_trace to keep it from
6178                  * swapping the buffers after we resize it.
6179                  * The update_max_tr is called from interrupts disabled
6180                  * so a synchronized_sched() is sufficient.
6181                  */
6182                 synchronize_rcu();
6183                 free_snapshot(tr);
6184                 tracing_disarm_snapshot(tr);
6185         }
6186
6187         if (!had_max_tr && t->use_max_tr) {
6188                 ret = tracing_arm_snapshot_locked(tr);
6189                 if (ret)
6190                         goto out;
6191         }
6192 #else
6193         tr->current_trace = &nop_trace;
6194 #endif
6195
6196         if (t->init) {
6197                 ret = tracer_init(t, tr);
6198                 if (ret) {
6199 #ifdef CONFIG_TRACER_MAX_TRACE
6200                         if (t->use_max_tr)
6201                                 tracing_disarm_snapshot(tr);
6202 #endif
6203                         goto out;
6204                 }
6205         }
6206
6207         tr->current_trace = t;
6208         tr->current_trace->enabled++;
6209         trace_branch_enable(tr);
6210  out:
6211         mutex_unlock(&trace_types_lock);
6212
6213         return ret;
6214 }
6215
6216 static ssize_t
6217 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6218                         size_t cnt, loff_t *ppos)
6219 {
6220         struct trace_array *tr = filp->private_data;
6221         char buf[MAX_TRACER_SIZE+1];
6222         char *name;
6223         size_t ret;
6224         int err;
6225
6226         ret = cnt;
6227
6228         if (cnt > MAX_TRACER_SIZE)
6229                 cnt = MAX_TRACER_SIZE;
6230
6231         if (copy_from_user(buf, ubuf, cnt))
6232                 return -EFAULT;
6233
6234         buf[cnt] = 0;
6235
6236         name = strim(buf);
6237
6238         err = tracing_set_tracer(tr, name);
6239         if (err)
6240                 return err;
6241
6242         *ppos += ret;
6243
6244         return ret;
6245 }
6246
6247 static ssize_t
6248 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6249                    size_t cnt, loff_t *ppos)
6250 {
6251         char buf[64];
6252         int r;
6253
6254         r = snprintf(buf, sizeof(buf), "%ld\n",
6255                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6256         if (r > sizeof(buf))
6257                 r = sizeof(buf);
6258         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6259 }
6260
6261 static ssize_t
6262 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6263                     size_t cnt, loff_t *ppos)
6264 {
6265         unsigned long val;
6266         int ret;
6267
6268         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6269         if (ret)
6270                 return ret;
6271
6272         *ptr = val * 1000;
6273
6274         return cnt;
6275 }
6276
6277 static ssize_t
6278 tracing_thresh_read(struct file *filp, char __user *ubuf,
6279                     size_t cnt, loff_t *ppos)
6280 {
6281         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6282 }
6283
6284 static ssize_t
6285 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6286                      size_t cnt, loff_t *ppos)
6287 {
6288         struct trace_array *tr = filp->private_data;
6289         int ret;
6290
6291         mutex_lock(&trace_types_lock);
6292         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6293         if (ret < 0)
6294                 goto out;
6295
6296         if (tr->current_trace->update_thresh) {
6297                 ret = tr->current_trace->update_thresh(tr);
6298                 if (ret < 0)
6299                         goto out;
6300         }
6301
6302         ret = cnt;
6303 out:
6304         mutex_unlock(&trace_types_lock);
6305
6306         return ret;
6307 }
6308
6309 #ifdef CONFIG_TRACER_MAX_TRACE
6310
6311 static ssize_t
6312 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6313                      size_t cnt, loff_t *ppos)
6314 {
6315         struct trace_array *tr = filp->private_data;
6316
6317         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6318 }
6319
6320 static ssize_t
6321 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6322                       size_t cnt, loff_t *ppos)
6323 {
6324         struct trace_array *tr = filp->private_data;
6325
6326         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6327 }
6328
6329 #endif
6330
6331 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6332 {
6333         if (cpu == RING_BUFFER_ALL_CPUS) {
6334                 if (cpumask_empty(tr->pipe_cpumask)) {
6335                         cpumask_setall(tr->pipe_cpumask);
6336                         return 0;
6337                 }
6338         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6339                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6340                 return 0;
6341         }
6342         return -EBUSY;
6343 }
6344
6345 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6346 {
6347         if (cpu == RING_BUFFER_ALL_CPUS) {
6348                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6349                 cpumask_clear(tr->pipe_cpumask);
6350         } else {
6351                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6352                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6353         }
6354 }
6355
6356 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6357 {
6358         struct trace_array *tr = inode->i_private;
6359         struct trace_iterator *iter;
6360         int cpu;
6361         int ret;
6362
6363         ret = tracing_check_open_get_tr(tr);
6364         if (ret)
6365                 return ret;
6366
6367         mutex_lock(&trace_types_lock);
6368         cpu = tracing_get_cpu(inode);
6369         ret = open_pipe_on_cpu(tr, cpu);
6370         if (ret)
6371                 goto fail_pipe_on_cpu;
6372
6373         /* create a buffer to store the information to pass to userspace */
6374         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6375         if (!iter) {
6376                 ret = -ENOMEM;
6377                 goto fail_alloc_iter;
6378         }
6379
6380         trace_seq_init(&iter->seq);
6381         iter->trace = tr->current_trace;
6382
6383         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6384                 ret = -ENOMEM;
6385                 goto fail;
6386         }
6387
6388         /* trace pipe does not show start of buffer */
6389         cpumask_setall(iter->started);
6390
6391         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6392                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6393
6394         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6395         if (trace_clocks[tr->clock_id].in_ns)
6396                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6397
6398         iter->tr = tr;
6399         iter->array_buffer = &tr->array_buffer;
6400         iter->cpu_file = cpu;
6401         mutex_init(&iter->mutex);
6402         filp->private_data = iter;
6403
6404         if (iter->trace->pipe_open)
6405                 iter->trace->pipe_open(iter);
6406
6407         nonseekable_open(inode, filp);
6408
6409         tr->trace_ref++;
6410
6411         mutex_unlock(&trace_types_lock);
6412         return ret;
6413
6414 fail:
6415         kfree(iter);
6416 fail_alloc_iter:
6417         close_pipe_on_cpu(tr, cpu);
6418 fail_pipe_on_cpu:
6419         __trace_array_put(tr);
6420         mutex_unlock(&trace_types_lock);
6421         return ret;
6422 }
6423
6424 static int tracing_release_pipe(struct inode *inode, struct file *file)
6425 {
6426         struct trace_iterator *iter = file->private_data;
6427         struct trace_array *tr = inode->i_private;
6428
6429         mutex_lock(&trace_types_lock);
6430
6431         tr->trace_ref--;
6432
6433         if (iter->trace->pipe_close)
6434                 iter->trace->pipe_close(iter);
6435         close_pipe_on_cpu(tr, iter->cpu_file);
6436         mutex_unlock(&trace_types_lock);
6437
6438         free_trace_iter_content(iter);
6439         kfree(iter);
6440
6441         trace_array_put(tr);
6442
6443         return 0;
6444 }
6445
6446 static __poll_t
6447 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6448 {
6449         struct trace_array *tr = iter->tr;
6450
6451         /* Iterators are static, they should be filled or empty */
6452         if (trace_buffer_iter(iter, iter->cpu_file))
6453                 return EPOLLIN | EPOLLRDNORM;
6454
6455         if (tr->trace_flags & TRACE_ITER_BLOCK)
6456                 /*
6457                  * Always select as readable when in blocking mode
6458                  */
6459                 return EPOLLIN | EPOLLRDNORM;
6460         else
6461                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6462                                              filp, poll_table, iter->tr->buffer_percent);
6463 }
6464
6465 static __poll_t
6466 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6467 {
6468         struct trace_iterator *iter = filp->private_data;
6469
6470         return trace_poll(iter, filp, poll_table);
6471 }
6472
6473 /* Must be called with iter->mutex held. */
6474 static int tracing_wait_pipe(struct file *filp)
6475 {
6476         struct trace_iterator *iter = filp->private_data;
6477         int ret;
6478
6479         while (trace_empty(iter)) {
6480
6481                 if ((filp->f_flags & O_NONBLOCK)) {
6482                         return -EAGAIN;
6483                 }
6484
6485                 /*
6486                  * We block until we read something and tracing is disabled.
6487                  * We still block if tracing is disabled, but we have never
6488                  * read anything. This allows a user to cat this file, and
6489                  * then enable tracing. But after we have read something,
6490                  * we give an EOF when tracing is again disabled.
6491                  *
6492                  * iter->pos will be 0 if we haven't read anything.
6493                  */
6494                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6495                         break;
6496
6497                 mutex_unlock(&iter->mutex);
6498
6499                 ret = wait_on_pipe(iter, 0);
6500
6501                 mutex_lock(&iter->mutex);
6502
6503                 if (ret)
6504                         return ret;
6505         }
6506
6507         return 1;
6508 }
6509
6510 /*
6511  * Consumer reader.
6512  */
6513 static ssize_t
6514 tracing_read_pipe(struct file *filp, char __user *ubuf,
6515                   size_t cnt, loff_t *ppos)
6516 {
6517         struct trace_iterator *iter = filp->private_data;
6518         ssize_t sret;
6519
6520         /*
6521          * Avoid more than one consumer on a single file descriptor
6522          * This is just a matter of traces coherency, the ring buffer itself
6523          * is protected.
6524          */
6525         mutex_lock(&iter->mutex);
6526
6527         /* return any leftover data */
6528         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6529         if (sret != -EBUSY)
6530                 goto out;
6531
6532         trace_seq_init(&iter->seq);
6533
6534         if (iter->trace->read) {
6535                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6536                 if (sret)
6537                         goto out;
6538         }
6539
6540 waitagain:
6541         sret = tracing_wait_pipe(filp);
6542         if (sret <= 0)
6543                 goto out;
6544
6545         /* stop when tracing is finished */
6546         if (trace_empty(iter)) {
6547                 sret = 0;
6548                 goto out;
6549         }
6550
6551         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6552                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6553
6554         /* reset all but tr, trace, and overruns */
6555         trace_iterator_reset(iter);
6556         cpumask_clear(iter->started);
6557         trace_seq_init(&iter->seq);
6558
6559         trace_event_read_lock();
6560         trace_access_lock(iter->cpu_file);
6561         while (trace_find_next_entry_inc(iter) != NULL) {
6562                 enum print_line_t ret;
6563                 int save_len = iter->seq.seq.len;
6564
6565                 ret = print_trace_line(iter);
6566                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6567                         /*
6568                          * If one print_trace_line() fills entire trace_seq in one shot,
6569                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6570                          * In this case, we need to consume it, otherwise, loop will peek
6571                          * this event next time, resulting in an infinite loop.
6572                          */
6573                         if (save_len == 0) {
6574                                 iter->seq.full = 0;
6575                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6576                                 trace_consume(iter);
6577                                 break;
6578                         }
6579
6580                         /* In other cases, don't print partial lines */
6581                         iter->seq.seq.len = save_len;
6582                         break;
6583                 }
6584                 if (ret != TRACE_TYPE_NO_CONSUME)
6585                         trace_consume(iter);
6586
6587                 if (trace_seq_used(&iter->seq) >= cnt)
6588                         break;
6589
6590                 /*
6591                  * Setting the full flag means we reached the trace_seq buffer
6592                  * size and we should leave by partial output condition above.
6593                  * One of the trace_seq_* functions is not used properly.
6594                  */
6595                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6596                           iter->ent->type);
6597         }
6598         trace_access_unlock(iter->cpu_file);
6599         trace_event_read_unlock();
6600
6601         /* Now copy what we have to the user */
6602         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6603         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6604                 trace_seq_init(&iter->seq);
6605
6606         /*
6607          * If there was nothing to send to user, in spite of consuming trace
6608          * entries, go back to wait for more entries.
6609          */
6610         if (sret == -EBUSY)
6611                 goto waitagain;
6612
6613 out:
6614         mutex_unlock(&iter->mutex);
6615
6616         return sret;
6617 }
6618
6619 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6620                                      unsigned int idx)
6621 {
6622         __free_page(spd->pages[idx]);
6623 }
6624
6625 static size_t
6626 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6627 {
6628         size_t count;
6629         int save_len;
6630         int ret;
6631
6632         /* Seq buffer is page-sized, exactly what we need. */
6633         for (;;) {
6634                 save_len = iter->seq.seq.len;
6635                 ret = print_trace_line(iter);
6636
6637                 if (trace_seq_has_overflowed(&iter->seq)) {
6638                         iter->seq.seq.len = save_len;
6639                         break;
6640                 }
6641
6642                 /*
6643                  * This should not be hit, because it should only
6644                  * be set if the iter->seq overflowed. But check it
6645                  * anyway to be safe.
6646                  */
6647                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6648                         iter->seq.seq.len = save_len;
6649                         break;
6650                 }
6651
6652                 count = trace_seq_used(&iter->seq) - save_len;
6653                 if (rem < count) {
6654                         rem = 0;
6655                         iter->seq.seq.len = save_len;
6656                         break;
6657                 }
6658
6659                 if (ret != TRACE_TYPE_NO_CONSUME)
6660                         trace_consume(iter);
6661                 rem -= count;
6662                 if (!trace_find_next_entry_inc(iter))   {
6663                         rem = 0;
6664                         iter->ent = NULL;
6665                         break;
6666                 }
6667         }
6668
6669         return rem;
6670 }
6671
6672 static ssize_t tracing_splice_read_pipe(struct file *filp,
6673                                         loff_t *ppos,
6674                                         struct pipe_inode_info *pipe,
6675                                         size_t len,
6676                                         unsigned int flags)
6677 {
6678         struct page *pages_def[PIPE_DEF_BUFFERS];
6679         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6680         struct trace_iterator *iter = filp->private_data;
6681         struct splice_pipe_desc spd = {
6682                 .pages          = pages_def,
6683                 .partial        = partial_def,
6684                 .nr_pages       = 0, /* This gets updated below. */
6685                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6686                 .ops            = &default_pipe_buf_ops,
6687                 .spd_release    = tracing_spd_release_pipe,
6688         };
6689         ssize_t ret;
6690         size_t rem;
6691         unsigned int i;
6692
6693         if (splice_grow_spd(pipe, &spd))
6694                 return -ENOMEM;
6695
6696         mutex_lock(&iter->mutex);
6697
6698         if (iter->trace->splice_read) {
6699                 ret = iter->trace->splice_read(iter, filp,
6700                                                ppos, pipe, len, flags);
6701                 if (ret)
6702                         goto out_err;
6703         }
6704
6705         ret = tracing_wait_pipe(filp);
6706         if (ret <= 0)
6707                 goto out_err;
6708
6709         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6710                 ret = -EFAULT;
6711                 goto out_err;
6712         }
6713
6714         trace_event_read_lock();
6715         trace_access_lock(iter->cpu_file);
6716
6717         /* Fill as many pages as possible. */
6718         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6719                 spd.pages[i] = alloc_page(GFP_KERNEL);
6720                 if (!spd.pages[i])
6721                         break;
6722
6723                 rem = tracing_fill_pipe_page(rem, iter);
6724
6725                 /* Copy the data into the page, so we can start over. */
6726                 ret = trace_seq_to_buffer(&iter->seq,
6727                                           page_address(spd.pages[i]),
6728                                           trace_seq_used(&iter->seq));
6729                 if (ret < 0) {
6730                         __free_page(spd.pages[i]);
6731                         break;
6732                 }
6733                 spd.partial[i].offset = 0;
6734                 spd.partial[i].len = trace_seq_used(&iter->seq);
6735
6736                 trace_seq_init(&iter->seq);
6737         }
6738
6739         trace_access_unlock(iter->cpu_file);
6740         trace_event_read_unlock();
6741         mutex_unlock(&iter->mutex);
6742
6743         spd.nr_pages = i;
6744
6745         if (i)
6746                 ret = splice_to_pipe(pipe, &spd);
6747         else
6748                 ret = 0;
6749 out:
6750         splice_shrink_spd(&spd);
6751         return ret;
6752
6753 out_err:
6754         mutex_unlock(&iter->mutex);
6755         goto out;
6756 }
6757
6758 static ssize_t
6759 tracing_entries_read(struct file *filp, char __user *ubuf,
6760                      size_t cnt, loff_t *ppos)
6761 {
6762         struct inode *inode = file_inode(filp);
6763         struct trace_array *tr = inode->i_private;
6764         int cpu = tracing_get_cpu(inode);
6765         char buf[64];
6766         int r = 0;
6767         ssize_t ret;
6768
6769         mutex_lock(&trace_types_lock);
6770
6771         if (cpu == RING_BUFFER_ALL_CPUS) {
6772                 int cpu, buf_size_same;
6773                 unsigned long size;
6774
6775                 size = 0;
6776                 buf_size_same = 1;
6777                 /* check if all cpu sizes are same */
6778                 for_each_tracing_cpu(cpu) {
6779                         /* fill in the size from first enabled cpu */
6780                         if (size == 0)
6781                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6782                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6783                                 buf_size_same = 0;
6784                                 break;
6785                         }
6786                 }
6787
6788                 if (buf_size_same) {
6789                         if (!tr->ring_buffer_expanded)
6790                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
6791                                             size >> 10,
6792                                             trace_buf_size >> 10);
6793                         else
6794                                 r = sprintf(buf, "%lu\n", size >> 10);
6795                 } else
6796                         r = sprintf(buf, "X\n");
6797         } else
6798                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6799
6800         mutex_unlock(&trace_types_lock);
6801
6802         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6803         return ret;
6804 }
6805
6806 static ssize_t
6807 tracing_entries_write(struct file *filp, const char __user *ubuf,
6808                       size_t cnt, loff_t *ppos)
6809 {
6810         struct inode *inode = file_inode(filp);
6811         struct trace_array *tr = inode->i_private;
6812         unsigned long val;
6813         int ret;
6814
6815         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6816         if (ret)
6817                 return ret;
6818
6819         /* must have at least 1 entry */
6820         if (!val)
6821                 return -EINVAL;
6822
6823         /* value is in KB */
6824         val <<= 10;
6825         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6826         if (ret < 0)
6827                 return ret;
6828
6829         *ppos += cnt;
6830
6831         return cnt;
6832 }
6833
6834 static ssize_t
6835 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6836                                 size_t cnt, loff_t *ppos)
6837 {
6838         struct trace_array *tr = filp->private_data;
6839         char buf[64];
6840         int r, cpu;
6841         unsigned long size = 0, expanded_size = 0;
6842
6843         mutex_lock(&trace_types_lock);
6844         for_each_tracing_cpu(cpu) {
6845                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6846                 if (!tr->ring_buffer_expanded)
6847                         expanded_size += trace_buf_size >> 10;
6848         }
6849         if (tr->ring_buffer_expanded)
6850                 r = sprintf(buf, "%lu\n", size);
6851         else
6852                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6853         mutex_unlock(&trace_types_lock);
6854
6855         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6856 }
6857
6858 static ssize_t
6859 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6860                           size_t cnt, loff_t *ppos)
6861 {
6862         /*
6863          * There is no need to read what the user has written, this function
6864          * is just to make sure that there is no error when "echo" is used
6865          */
6866
6867         *ppos += cnt;
6868
6869         return cnt;
6870 }
6871
6872 static int
6873 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6874 {
6875         struct trace_array *tr = inode->i_private;
6876
6877         /* disable tracing ? */
6878         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6879                 tracer_tracing_off(tr);
6880         /* resize the ring buffer to 0 */
6881         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6882
6883         trace_array_put(tr);
6884
6885         return 0;
6886 }
6887
6888 #define TRACE_MARKER_MAX_SIZE           4096
6889
6890 static ssize_t
6891 tracing_mark_write(struct file *filp, const char __user *ubuf,
6892                                         size_t cnt, loff_t *fpos)
6893 {
6894         struct trace_array *tr = filp->private_data;
6895         struct ring_buffer_event *event;
6896         enum event_trigger_type tt = ETT_NONE;
6897         struct trace_buffer *buffer;
6898         struct print_entry *entry;
6899         int meta_size;
6900         ssize_t written;
6901         size_t size;
6902         int len;
6903
6904 /* Used in tracing_mark_raw_write() as well */
6905 #define FAULTED_STR "<faulted>"
6906 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6907
6908         if (tracing_disabled)
6909                 return -EINVAL;
6910
6911         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6912                 return -EINVAL;
6913
6914         if ((ssize_t)cnt < 0)
6915                 return -EINVAL;
6916
6917         if (cnt > TRACE_MARKER_MAX_SIZE)
6918                 cnt = TRACE_MARKER_MAX_SIZE;
6919
6920         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6921  again:
6922         size = cnt + meta_size;
6923
6924         /* If less than "<faulted>", then make sure we can still add that */
6925         if (cnt < FAULTED_SIZE)
6926                 size += FAULTED_SIZE - cnt;
6927
6928         buffer = tr->array_buffer.buffer;
6929         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6930                                             tracing_gen_ctx());
6931         if (unlikely(!event)) {
6932                 /*
6933                  * If the size was greater than what was allowed, then
6934                  * make it smaller and try again.
6935                  */
6936                 if (size > ring_buffer_max_event_size(buffer)) {
6937                         /* cnt < FAULTED size should never be bigger than max */
6938                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6939                                 return -EBADF;
6940                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
6941                         /* The above should only happen once */
6942                         if (WARN_ON_ONCE(cnt + meta_size == size))
6943                                 return -EBADF;
6944                         goto again;
6945                 }
6946
6947                 /* Ring buffer disabled, return as if not open for write */
6948                 return -EBADF;
6949         }
6950
6951         entry = ring_buffer_event_data(event);
6952         entry->ip = _THIS_IP_;
6953
6954         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6955         if (len) {
6956                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6957                 cnt = FAULTED_SIZE;
6958                 written = -EFAULT;
6959         } else
6960                 written = cnt;
6961
6962         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6963                 /* do not add \n before testing triggers, but add \0 */
6964                 entry->buf[cnt] = '\0';
6965                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6966         }
6967
6968         if (entry->buf[cnt - 1] != '\n') {
6969                 entry->buf[cnt] = '\n';
6970                 entry->buf[cnt + 1] = '\0';
6971         } else
6972                 entry->buf[cnt] = '\0';
6973
6974         if (static_branch_unlikely(&trace_marker_exports_enabled))
6975                 ftrace_exports(event, TRACE_EXPORT_MARKER);
6976         __buffer_unlock_commit(buffer, event);
6977
6978         if (tt)
6979                 event_triggers_post_call(tr->trace_marker_file, tt);
6980
6981         return written;
6982 }
6983
6984 static ssize_t
6985 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6986                                         size_t cnt, loff_t *fpos)
6987 {
6988         struct trace_array *tr = filp->private_data;
6989         struct ring_buffer_event *event;
6990         struct trace_buffer *buffer;
6991         struct raw_data_entry *entry;
6992         ssize_t written;
6993         int size;
6994         int len;
6995
6996 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6997
6998         if (tracing_disabled)
6999                 return -EINVAL;
7000
7001         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7002                 return -EINVAL;
7003
7004         /* The marker must at least have a tag id */
7005         if (cnt < sizeof(unsigned int))
7006                 return -EINVAL;
7007
7008         size = sizeof(*entry) + cnt;
7009         if (cnt < FAULT_SIZE_ID)
7010                 size += FAULT_SIZE_ID - cnt;
7011
7012         buffer = tr->array_buffer.buffer;
7013
7014         if (size > ring_buffer_max_event_size(buffer))
7015                 return -EINVAL;
7016
7017         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7018                                             tracing_gen_ctx());
7019         if (!event)
7020                 /* Ring buffer disabled, return as if not open for write */
7021                 return -EBADF;
7022
7023         entry = ring_buffer_event_data(event);
7024
7025         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7026         if (len) {
7027                 entry->id = -1;
7028                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7029                 written = -EFAULT;
7030         } else
7031                 written = cnt;
7032
7033         __buffer_unlock_commit(buffer, event);
7034
7035         return written;
7036 }
7037
7038 static int tracing_clock_show(struct seq_file *m, void *v)
7039 {
7040         struct trace_array *tr = m->private;
7041         int i;
7042
7043         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7044                 seq_printf(m,
7045                         "%s%s%s%s", i ? " " : "",
7046                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7047                         i == tr->clock_id ? "]" : "");
7048         seq_putc(m, '\n');
7049
7050         return 0;
7051 }
7052
7053 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7054 {
7055         int i;
7056
7057         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7058                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7059                         break;
7060         }
7061         if (i == ARRAY_SIZE(trace_clocks))
7062                 return -EINVAL;
7063
7064         mutex_lock(&trace_types_lock);
7065
7066         tr->clock_id = i;
7067
7068         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7069
7070         /*
7071          * New clock may not be consistent with the previous clock.
7072          * Reset the buffer so that it doesn't have incomparable timestamps.
7073          */
7074         tracing_reset_online_cpus(&tr->array_buffer);
7075
7076 #ifdef CONFIG_TRACER_MAX_TRACE
7077         if (tr->max_buffer.buffer)
7078                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7079         tracing_reset_online_cpus(&tr->max_buffer);
7080 #endif
7081
7082         mutex_unlock(&trace_types_lock);
7083
7084         return 0;
7085 }
7086
7087 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7088                                    size_t cnt, loff_t *fpos)
7089 {
7090         struct seq_file *m = filp->private_data;
7091         struct trace_array *tr = m->private;
7092         char buf[64];
7093         const char *clockstr;
7094         int ret;
7095
7096         if (cnt >= sizeof(buf))
7097                 return -EINVAL;
7098
7099         if (copy_from_user(buf, ubuf, cnt))
7100                 return -EFAULT;
7101
7102         buf[cnt] = 0;
7103
7104         clockstr = strstrip(buf);
7105
7106         ret = tracing_set_clock(tr, clockstr);
7107         if (ret)
7108                 return ret;
7109
7110         *fpos += cnt;
7111
7112         return cnt;
7113 }
7114
7115 static int tracing_clock_open(struct inode *inode, struct file *file)
7116 {
7117         struct trace_array *tr = inode->i_private;
7118         int ret;
7119
7120         ret = tracing_check_open_get_tr(tr);
7121         if (ret)
7122                 return ret;
7123
7124         ret = single_open(file, tracing_clock_show, inode->i_private);
7125         if (ret < 0)
7126                 trace_array_put(tr);
7127
7128         return ret;
7129 }
7130
7131 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7132 {
7133         struct trace_array *tr = m->private;
7134
7135         mutex_lock(&trace_types_lock);
7136
7137         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7138                 seq_puts(m, "delta [absolute]\n");
7139         else
7140                 seq_puts(m, "[delta] absolute\n");
7141
7142         mutex_unlock(&trace_types_lock);
7143
7144         return 0;
7145 }
7146
7147 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7148 {
7149         struct trace_array *tr = inode->i_private;
7150         int ret;
7151
7152         ret = tracing_check_open_get_tr(tr);
7153         if (ret)
7154                 return ret;
7155
7156         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7157         if (ret < 0)
7158                 trace_array_put(tr);
7159
7160         return ret;
7161 }
7162
7163 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7164 {
7165         if (rbe == this_cpu_read(trace_buffered_event))
7166                 return ring_buffer_time_stamp(buffer);
7167
7168         return ring_buffer_event_time_stamp(buffer, rbe);
7169 }
7170
7171 /*
7172  * Set or disable using the per CPU trace_buffer_event when possible.
7173  */
7174 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7175 {
7176         int ret = 0;
7177
7178         mutex_lock(&trace_types_lock);
7179
7180         if (set && tr->no_filter_buffering_ref++)
7181                 goto out;
7182
7183         if (!set) {
7184                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7185                         ret = -EINVAL;
7186                         goto out;
7187                 }
7188
7189                 --tr->no_filter_buffering_ref;
7190         }
7191  out:
7192         mutex_unlock(&trace_types_lock);
7193
7194         return ret;
7195 }
7196
7197 struct ftrace_buffer_info {
7198         struct trace_iterator   iter;
7199         void                    *spare;
7200         unsigned int            spare_cpu;
7201         unsigned int            spare_size;
7202         unsigned int            read;
7203 };
7204
7205 #ifdef CONFIG_TRACER_SNAPSHOT
7206 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7207 {
7208         struct trace_array *tr = inode->i_private;
7209         struct trace_iterator *iter;
7210         struct seq_file *m;
7211         int ret;
7212
7213         ret = tracing_check_open_get_tr(tr);
7214         if (ret)
7215                 return ret;
7216
7217         if (file->f_mode & FMODE_READ) {
7218                 iter = __tracing_open(inode, file, true);
7219                 if (IS_ERR(iter))
7220                         ret = PTR_ERR(iter);
7221         } else {
7222                 /* Writes still need the seq_file to hold the private data */
7223                 ret = -ENOMEM;
7224                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7225                 if (!m)
7226                         goto out;
7227                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7228                 if (!iter) {
7229                         kfree(m);
7230                         goto out;
7231                 }
7232                 ret = 0;
7233
7234                 iter->tr = tr;
7235                 iter->array_buffer = &tr->max_buffer;
7236                 iter->cpu_file = tracing_get_cpu(inode);
7237                 m->private = iter;
7238                 file->private_data = m;
7239         }
7240 out:
7241         if (ret < 0)
7242                 trace_array_put(tr);
7243
7244         return ret;
7245 }
7246
7247 static void tracing_swap_cpu_buffer(void *tr)
7248 {
7249         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7250 }
7251
7252 static ssize_t
7253 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7254                        loff_t *ppos)
7255 {
7256         struct seq_file *m = filp->private_data;
7257         struct trace_iterator *iter = m->private;
7258         struct trace_array *tr = iter->tr;
7259         unsigned long val;
7260         int ret;
7261
7262         ret = tracing_update_buffers(tr);
7263         if (ret < 0)
7264                 return ret;
7265
7266         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7267         if (ret)
7268                 return ret;
7269
7270         mutex_lock(&trace_types_lock);
7271
7272         if (tr->current_trace->use_max_tr) {
7273                 ret = -EBUSY;
7274                 goto out;
7275         }
7276
7277         local_irq_disable();
7278         arch_spin_lock(&tr->max_lock);
7279         if (tr->cond_snapshot)
7280                 ret = -EBUSY;
7281         arch_spin_unlock(&tr->max_lock);
7282         local_irq_enable();
7283         if (ret)
7284                 goto out;
7285
7286         switch (val) {
7287         case 0:
7288                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7289                         ret = -EINVAL;
7290                         break;
7291                 }
7292                 if (tr->allocated_snapshot)
7293                         free_snapshot(tr);
7294                 break;
7295         case 1:
7296 /* Only allow per-cpu swap if the ring buffer supports it */
7297 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7298                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7299                         ret = -EINVAL;
7300                         break;
7301                 }
7302 #endif
7303                 if (tr->allocated_snapshot)
7304                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7305                                         &tr->array_buffer, iter->cpu_file);
7306
7307                 ret = tracing_arm_snapshot_locked(tr);
7308                 if (ret)
7309                         break;
7310
7311                 /* Now, we're going to swap */
7312                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7313                         local_irq_disable();
7314                         update_max_tr(tr, current, smp_processor_id(), NULL);
7315                         local_irq_enable();
7316                 } else {
7317                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7318                                                  (void *)tr, 1);
7319                 }
7320                 tracing_disarm_snapshot(tr);
7321                 break;
7322         default:
7323                 if (tr->allocated_snapshot) {
7324                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7325                                 tracing_reset_online_cpus(&tr->max_buffer);
7326                         else
7327                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7328                 }
7329                 break;
7330         }
7331
7332         if (ret >= 0) {
7333                 *ppos += cnt;
7334                 ret = cnt;
7335         }
7336 out:
7337         mutex_unlock(&trace_types_lock);
7338         return ret;
7339 }
7340
7341 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7342 {
7343         struct seq_file *m = file->private_data;
7344         int ret;
7345
7346         ret = tracing_release(inode, file);
7347
7348         if (file->f_mode & FMODE_READ)
7349                 return ret;
7350
7351         /* If write only, the seq_file is just a stub */
7352         if (m)
7353                 kfree(m->private);
7354         kfree(m);
7355
7356         return 0;
7357 }
7358
7359 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7360 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7361                                     size_t count, loff_t *ppos);
7362 static int tracing_buffers_release(struct inode *inode, struct file *file);
7363 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7364                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7365
7366 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7367 {
7368         struct ftrace_buffer_info *info;
7369         int ret;
7370
7371         /* The following checks for tracefs lockdown */
7372         ret = tracing_buffers_open(inode, filp);
7373         if (ret < 0)
7374                 return ret;
7375
7376         info = filp->private_data;
7377
7378         if (info->iter.trace->use_max_tr) {
7379                 tracing_buffers_release(inode, filp);
7380                 return -EBUSY;
7381         }
7382
7383         info->iter.snapshot = true;
7384         info->iter.array_buffer = &info->iter.tr->max_buffer;
7385
7386         return ret;
7387 }
7388
7389 #endif /* CONFIG_TRACER_SNAPSHOT */
7390
7391
7392 static const struct file_operations tracing_thresh_fops = {
7393         .open           = tracing_open_generic,
7394         .read           = tracing_thresh_read,
7395         .write          = tracing_thresh_write,
7396         .llseek         = generic_file_llseek,
7397 };
7398
7399 #ifdef CONFIG_TRACER_MAX_TRACE
7400 static const struct file_operations tracing_max_lat_fops = {
7401         .open           = tracing_open_generic_tr,
7402         .read           = tracing_max_lat_read,
7403         .write          = tracing_max_lat_write,
7404         .llseek         = generic_file_llseek,
7405         .release        = tracing_release_generic_tr,
7406 };
7407 #endif
7408
7409 static const struct file_operations set_tracer_fops = {
7410         .open           = tracing_open_generic_tr,
7411         .read           = tracing_set_trace_read,
7412         .write          = tracing_set_trace_write,
7413         .llseek         = generic_file_llseek,
7414         .release        = tracing_release_generic_tr,
7415 };
7416
7417 static const struct file_operations tracing_pipe_fops = {
7418         .open           = tracing_open_pipe,
7419         .poll           = tracing_poll_pipe,
7420         .read           = tracing_read_pipe,
7421         .splice_read    = tracing_splice_read_pipe,
7422         .release        = tracing_release_pipe,
7423         .llseek         = no_llseek,
7424 };
7425
7426 static const struct file_operations tracing_entries_fops = {
7427         .open           = tracing_open_generic_tr,
7428         .read           = tracing_entries_read,
7429         .write          = tracing_entries_write,
7430         .llseek         = generic_file_llseek,
7431         .release        = tracing_release_generic_tr,
7432 };
7433
7434 static const struct file_operations tracing_total_entries_fops = {
7435         .open           = tracing_open_generic_tr,
7436         .read           = tracing_total_entries_read,
7437         .llseek         = generic_file_llseek,
7438         .release        = tracing_release_generic_tr,
7439 };
7440
7441 static const struct file_operations tracing_free_buffer_fops = {
7442         .open           = tracing_open_generic_tr,
7443         .write          = tracing_free_buffer_write,
7444         .release        = tracing_free_buffer_release,
7445 };
7446
7447 static const struct file_operations tracing_mark_fops = {
7448         .open           = tracing_mark_open,
7449         .write          = tracing_mark_write,
7450         .release        = tracing_release_generic_tr,
7451 };
7452
7453 static const struct file_operations tracing_mark_raw_fops = {
7454         .open           = tracing_mark_open,
7455         .write          = tracing_mark_raw_write,
7456         .release        = tracing_release_generic_tr,
7457 };
7458
7459 static const struct file_operations trace_clock_fops = {
7460         .open           = tracing_clock_open,
7461         .read           = seq_read,
7462         .llseek         = seq_lseek,
7463         .release        = tracing_single_release_tr,
7464         .write          = tracing_clock_write,
7465 };
7466
7467 static const struct file_operations trace_time_stamp_mode_fops = {
7468         .open           = tracing_time_stamp_mode_open,
7469         .read           = seq_read,
7470         .llseek         = seq_lseek,
7471         .release        = tracing_single_release_tr,
7472 };
7473
7474 #ifdef CONFIG_TRACER_SNAPSHOT
7475 static const struct file_operations snapshot_fops = {
7476         .open           = tracing_snapshot_open,
7477         .read           = seq_read,
7478         .write          = tracing_snapshot_write,
7479         .llseek         = tracing_lseek,
7480         .release        = tracing_snapshot_release,
7481 };
7482
7483 static const struct file_operations snapshot_raw_fops = {
7484         .open           = snapshot_raw_open,
7485         .read           = tracing_buffers_read,
7486         .release        = tracing_buffers_release,
7487         .splice_read    = tracing_buffers_splice_read,
7488         .llseek         = no_llseek,
7489 };
7490
7491 #endif /* CONFIG_TRACER_SNAPSHOT */
7492
7493 /*
7494  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7495  * @filp: The active open file structure
7496  * @ubuf: The userspace provided buffer to read value into
7497  * @cnt: The maximum number of bytes to read
7498  * @ppos: The current "file" position
7499  *
7500  * This function implements the write interface for a struct trace_min_max_param.
7501  * The filp->private_data must point to a trace_min_max_param structure that
7502  * defines where to write the value, the min and the max acceptable values,
7503  * and a lock to protect the write.
7504  */
7505 static ssize_t
7506 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7507 {
7508         struct trace_min_max_param *param = filp->private_data;
7509         u64 val;
7510         int err;
7511
7512         if (!param)
7513                 return -EFAULT;
7514
7515         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7516         if (err)
7517                 return err;
7518
7519         if (param->lock)
7520                 mutex_lock(param->lock);
7521
7522         if (param->min && val < *param->min)
7523                 err = -EINVAL;
7524
7525         if (param->max && val > *param->max)
7526                 err = -EINVAL;
7527
7528         if (!err)
7529                 *param->val = val;
7530
7531         if (param->lock)
7532                 mutex_unlock(param->lock);
7533
7534         if (err)
7535                 return err;
7536
7537         return cnt;
7538 }
7539
7540 /*
7541  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7542  * @filp: The active open file structure
7543  * @ubuf: The userspace provided buffer to read value into
7544  * @cnt: The maximum number of bytes to read
7545  * @ppos: The current "file" position
7546  *
7547  * This function implements the read interface for a struct trace_min_max_param.
7548  * The filp->private_data must point to a trace_min_max_param struct with valid
7549  * data.
7550  */
7551 static ssize_t
7552 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7553 {
7554         struct trace_min_max_param *param = filp->private_data;
7555         char buf[U64_STR_SIZE];
7556         int len;
7557         u64 val;
7558
7559         if (!param)
7560                 return -EFAULT;
7561
7562         val = *param->val;
7563
7564         if (cnt > sizeof(buf))
7565                 cnt = sizeof(buf);
7566
7567         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7568
7569         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7570 }
7571
7572 const struct file_operations trace_min_max_fops = {
7573         .open           = tracing_open_generic,
7574         .read           = trace_min_max_read,
7575         .write          = trace_min_max_write,
7576 };
7577
7578 #define TRACING_LOG_ERRS_MAX    8
7579 #define TRACING_LOG_LOC_MAX     128
7580
7581 #define CMD_PREFIX "  Command: "
7582
7583 struct err_info {
7584         const char      **errs; /* ptr to loc-specific array of err strings */
7585         u8              type;   /* index into errs -> specific err string */
7586         u16             pos;    /* caret position */
7587         u64             ts;
7588 };
7589
7590 struct tracing_log_err {
7591         struct list_head        list;
7592         struct err_info         info;
7593         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7594         char                    *cmd;                     /* what caused err */
7595 };
7596
7597 static DEFINE_MUTEX(tracing_err_log_lock);
7598
7599 static struct tracing_log_err *alloc_tracing_log_err(int len)
7600 {
7601         struct tracing_log_err *err;
7602
7603         err = kzalloc(sizeof(*err), GFP_KERNEL);
7604         if (!err)
7605                 return ERR_PTR(-ENOMEM);
7606
7607         err->cmd = kzalloc(len, GFP_KERNEL);
7608         if (!err->cmd) {
7609                 kfree(err);
7610                 return ERR_PTR(-ENOMEM);
7611         }
7612
7613         return err;
7614 }
7615
7616 static void free_tracing_log_err(struct tracing_log_err *err)
7617 {
7618         kfree(err->cmd);
7619         kfree(err);
7620 }
7621
7622 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7623                                                    int len)
7624 {
7625         struct tracing_log_err *err;
7626         char *cmd;
7627
7628         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7629                 err = alloc_tracing_log_err(len);
7630                 if (PTR_ERR(err) != -ENOMEM)
7631                         tr->n_err_log_entries++;
7632
7633                 return err;
7634         }
7635         cmd = kzalloc(len, GFP_KERNEL);
7636         if (!cmd)
7637                 return ERR_PTR(-ENOMEM);
7638         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7639         kfree(err->cmd);
7640         err->cmd = cmd;
7641         list_del(&err->list);
7642
7643         return err;
7644 }
7645
7646 /**
7647  * err_pos - find the position of a string within a command for error careting
7648  * @cmd: The tracing command that caused the error
7649  * @str: The string to position the caret at within @cmd
7650  *
7651  * Finds the position of the first occurrence of @str within @cmd.  The
7652  * return value can be passed to tracing_log_err() for caret placement
7653  * within @cmd.
7654  *
7655  * Returns the index within @cmd of the first occurrence of @str or 0
7656  * if @str was not found.
7657  */
7658 unsigned int err_pos(char *cmd, const char *str)
7659 {
7660         char *found;
7661
7662         if (WARN_ON(!strlen(cmd)))
7663                 return 0;
7664
7665         found = strstr(cmd, str);
7666         if (found)
7667                 return found - cmd;
7668
7669         return 0;
7670 }
7671
7672 /**
7673  * tracing_log_err - write an error to the tracing error log
7674  * @tr: The associated trace array for the error (NULL for top level array)
7675  * @loc: A string describing where the error occurred
7676  * @cmd: The tracing command that caused the error
7677  * @errs: The array of loc-specific static error strings
7678  * @type: The index into errs[], which produces the specific static err string
7679  * @pos: The position the caret should be placed in the cmd
7680  *
7681  * Writes an error into tracing/error_log of the form:
7682  *
7683  * <loc>: error: <text>
7684  *   Command: <cmd>
7685  *              ^
7686  *
7687  * tracing/error_log is a small log file containing the last
7688  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7689  * unless there has been a tracing error, and the error log can be
7690  * cleared and have its memory freed by writing the empty string in
7691  * truncation mode to it i.e. echo > tracing/error_log.
7692  *
7693  * NOTE: the @errs array along with the @type param are used to
7694  * produce a static error string - this string is not copied and saved
7695  * when the error is logged - only a pointer to it is saved.  See
7696  * existing callers for examples of how static strings are typically
7697  * defined for use with tracing_log_err().
7698  */
7699 void tracing_log_err(struct trace_array *tr,
7700                      const char *loc, const char *cmd,
7701                      const char **errs, u8 type, u16 pos)
7702 {
7703         struct tracing_log_err *err;
7704         int len = 0;
7705
7706         if (!tr)
7707                 tr = &global_trace;
7708
7709         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7710
7711         mutex_lock(&tracing_err_log_lock);
7712         err = get_tracing_log_err(tr, len);
7713         if (PTR_ERR(err) == -ENOMEM) {
7714                 mutex_unlock(&tracing_err_log_lock);
7715                 return;
7716         }
7717
7718         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7719         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7720
7721         err->info.errs = errs;
7722         err->info.type = type;
7723         err->info.pos = pos;
7724         err->info.ts = local_clock();
7725
7726         list_add_tail(&err->list, &tr->err_log);
7727         mutex_unlock(&tracing_err_log_lock);
7728 }
7729
7730 static void clear_tracing_err_log(struct trace_array *tr)
7731 {
7732         struct tracing_log_err *err, *next;
7733
7734         mutex_lock(&tracing_err_log_lock);
7735         list_for_each_entry_safe(err, next, &tr->err_log, list) {
7736                 list_del(&err->list);
7737                 free_tracing_log_err(err);
7738         }
7739
7740         tr->n_err_log_entries = 0;
7741         mutex_unlock(&tracing_err_log_lock);
7742 }
7743
7744 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7745 {
7746         struct trace_array *tr = m->private;
7747
7748         mutex_lock(&tracing_err_log_lock);
7749
7750         return seq_list_start(&tr->err_log, *pos);
7751 }
7752
7753 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7754 {
7755         struct trace_array *tr = m->private;
7756
7757         return seq_list_next(v, &tr->err_log, pos);
7758 }
7759
7760 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7761 {
7762         mutex_unlock(&tracing_err_log_lock);
7763 }
7764
7765 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7766 {
7767         u16 i;
7768
7769         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7770                 seq_putc(m, ' ');
7771         for (i = 0; i < pos; i++)
7772                 seq_putc(m, ' ');
7773         seq_puts(m, "^\n");
7774 }
7775
7776 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7777 {
7778         struct tracing_log_err *err = v;
7779
7780         if (err) {
7781                 const char *err_text = err->info.errs[err->info.type];
7782                 u64 sec = err->info.ts;
7783                 u32 nsec;
7784
7785                 nsec = do_div(sec, NSEC_PER_SEC);
7786                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7787                            err->loc, err_text);
7788                 seq_printf(m, "%s", err->cmd);
7789                 tracing_err_log_show_pos(m, err->info.pos);
7790         }
7791
7792         return 0;
7793 }
7794
7795 static const struct seq_operations tracing_err_log_seq_ops = {
7796         .start  = tracing_err_log_seq_start,
7797         .next   = tracing_err_log_seq_next,
7798         .stop   = tracing_err_log_seq_stop,
7799         .show   = tracing_err_log_seq_show
7800 };
7801
7802 static int tracing_err_log_open(struct inode *inode, struct file *file)
7803 {
7804         struct trace_array *tr = inode->i_private;
7805         int ret = 0;
7806
7807         ret = tracing_check_open_get_tr(tr);
7808         if (ret)
7809                 return ret;
7810
7811         /* If this file was opened for write, then erase contents */
7812         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7813                 clear_tracing_err_log(tr);
7814
7815         if (file->f_mode & FMODE_READ) {
7816                 ret = seq_open(file, &tracing_err_log_seq_ops);
7817                 if (!ret) {
7818                         struct seq_file *m = file->private_data;
7819                         m->private = tr;
7820                 } else {
7821                         trace_array_put(tr);
7822                 }
7823         }
7824         return ret;
7825 }
7826
7827 static ssize_t tracing_err_log_write(struct file *file,
7828                                      const char __user *buffer,
7829                                      size_t count, loff_t *ppos)
7830 {
7831         return count;
7832 }
7833
7834 static int tracing_err_log_release(struct inode *inode, struct file *file)
7835 {
7836         struct trace_array *tr = inode->i_private;
7837
7838         trace_array_put(tr);
7839
7840         if (file->f_mode & FMODE_READ)
7841                 seq_release(inode, file);
7842
7843         return 0;
7844 }
7845
7846 static const struct file_operations tracing_err_log_fops = {
7847         .open           = tracing_err_log_open,
7848         .write          = tracing_err_log_write,
7849         .read           = seq_read,
7850         .llseek         = tracing_lseek,
7851         .release        = tracing_err_log_release,
7852 };
7853
7854 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7855 {
7856         struct trace_array *tr = inode->i_private;
7857         struct ftrace_buffer_info *info;
7858         int ret;
7859
7860         ret = tracing_check_open_get_tr(tr);
7861         if (ret)
7862                 return ret;
7863
7864         info = kvzalloc(sizeof(*info), GFP_KERNEL);
7865         if (!info) {
7866                 trace_array_put(tr);
7867                 return -ENOMEM;
7868         }
7869
7870         mutex_lock(&trace_types_lock);
7871
7872         info->iter.tr           = tr;
7873         info->iter.cpu_file     = tracing_get_cpu(inode);
7874         info->iter.trace        = tr->current_trace;
7875         info->iter.array_buffer = &tr->array_buffer;
7876         info->spare             = NULL;
7877         /* Force reading ring buffer for first read */
7878         info->read              = (unsigned int)-1;
7879
7880         filp->private_data = info;
7881
7882         tr->trace_ref++;
7883
7884         mutex_unlock(&trace_types_lock);
7885
7886         ret = nonseekable_open(inode, filp);
7887         if (ret < 0)
7888                 trace_array_put(tr);
7889
7890         return ret;
7891 }
7892
7893 static __poll_t
7894 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7895 {
7896         struct ftrace_buffer_info *info = filp->private_data;
7897         struct trace_iterator *iter = &info->iter;
7898
7899         return trace_poll(iter, filp, poll_table);
7900 }
7901
7902 static ssize_t
7903 tracing_buffers_read(struct file *filp, char __user *ubuf,
7904                      size_t count, loff_t *ppos)
7905 {
7906         struct ftrace_buffer_info *info = filp->private_data;
7907         struct trace_iterator *iter = &info->iter;
7908         void *trace_data;
7909         int page_size;
7910         ssize_t ret = 0;
7911         ssize_t size;
7912
7913         if (!count)
7914                 return 0;
7915
7916 #ifdef CONFIG_TRACER_MAX_TRACE
7917         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7918                 return -EBUSY;
7919 #endif
7920
7921         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7922
7923         /* Make sure the spare matches the current sub buffer size */
7924         if (info->spare) {
7925                 if (page_size != info->spare_size) {
7926                         ring_buffer_free_read_page(iter->array_buffer->buffer,
7927                                                    info->spare_cpu, info->spare);
7928                         info->spare = NULL;
7929                 }
7930         }
7931
7932         if (!info->spare) {
7933                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7934                                                           iter->cpu_file);
7935                 if (IS_ERR(info->spare)) {
7936                         ret = PTR_ERR(info->spare);
7937                         info->spare = NULL;
7938                 } else {
7939                         info->spare_cpu = iter->cpu_file;
7940                         info->spare_size = page_size;
7941                 }
7942         }
7943         if (!info->spare)
7944                 return ret;
7945
7946         /* Do we have previous read data to read? */
7947         if (info->read < page_size)
7948                 goto read;
7949
7950  again:
7951         trace_access_lock(iter->cpu_file);
7952         ret = ring_buffer_read_page(iter->array_buffer->buffer,
7953                                     info->spare,
7954                                     count,
7955                                     iter->cpu_file, 0);
7956         trace_access_unlock(iter->cpu_file);
7957
7958         if (ret < 0) {
7959                 if (trace_empty(iter)) {
7960                         if ((filp->f_flags & O_NONBLOCK))
7961                                 return -EAGAIN;
7962
7963                         ret = wait_on_pipe(iter, 0);
7964                         if (ret)
7965                                 return ret;
7966
7967                         goto again;
7968                 }
7969                 return 0;
7970         }
7971
7972         info->read = 0;
7973  read:
7974         size = page_size - info->read;
7975         if (size > count)
7976                 size = count;
7977         trace_data = ring_buffer_read_page_data(info->spare);
7978         ret = copy_to_user(ubuf, trace_data + info->read, size);
7979         if (ret == size)
7980                 return -EFAULT;
7981
7982         size -= ret;
7983
7984         *ppos += size;
7985         info->read += size;
7986
7987         return size;
7988 }
7989
7990 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7991 {
7992         struct ftrace_buffer_info *info = file->private_data;
7993         struct trace_iterator *iter = &info->iter;
7994
7995         iter->closed = true;
7996         /* Make sure the waiters see the new wait_index */
7997         (void)atomic_fetch_inc_release(&iter->wait_index);
7998
7999         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8000
8001         return 0;
8002 }
8003
8004 static int tracing_buffers_release(struct inode *inode, struct file *file)
8005 {
8006         struct ftrace_buffer_info *info = file->private_data;
8007         struct trace_iterator *iter = &info->iter;
8008
8009         mutex_lock(&trace_types_lock);
8010
8011         iter->tr->trace_ref--;
8012
8013         __trace_array_put(iter->tr);
8014
8015         if (info->spare)
8016                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8017                                            info->spare_cpu, info->spare);
8018         kvfree(info);
8019
8020         mutex_unlock(&trace_types_lock);
8021
8022         return 0;
8023 }
8024
8025 struct buffer_ref {
8026         struct trace_buffer     *buffer;
8027         void                    *page;
8028         int                     cpu;
8029         refcount_t              refcount;
8030 };
8031
8032 static void buffer_ref_release(struct buffer_ref *ref)
8033 {
8034         if (!refcount_dec_and_test(&ref->refcount))
8035                 return;
8036         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8037         kfree(ref);
8038 }
8039
8040 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8041                                     struct pipe_buffer *buf)
8042 {
8043         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8044
8045         buffer_ref_release(ref);
8046         buf->private = 0;
8047 }
8048
8049 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8050                                 struct pipe_buffer *buf)
8051 {
8052         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8053
8054         if (refcount_read(&ref->refcount) > INT_MAX/2)
8055                 return false;
8056
8057         refcount_inc(&ref->refcount);
8058         return true;
8059 }
8060
8061 /* Pipe buffer operations for a buffer. */
8062 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8063         .release                = buffer_pipe_buf_release,
8064         .get                    = buffer_pipe_buf_get,
8065 };
8066
8067 /*
8068  * Callback from splice_to_pipe(), if we need to release some pages
8069  * at the end of the spd in case we error'ed out in filling the pipe.
8070  */
8071 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8072 {
8073         struct buffer_ref *ref =
8074                 (struct buffer_ref *)spd->partial[i].private;
8075
8076         buffer_ref_release(ref);
8077         spd->partial[i].private = 0;
8078 }
8079
8080 static ssize_t
8081 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8082                             struct pipe_inode_info *pipe, size_t len,
8083                             unsigned int flags)
8084 {
8085         struct ftrace_buffer_info *info = file->private_data;
8086         struct trace_iterator *iter = &info->iter;
8087         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8088         struct page *pages_def[PIPE_DEF_BUFFERS];
8089         struct splice_pipe_desc spd = {
8090                 .pages          = pages_def,
8091                 .partial        = partial_def,
8092                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8093                 .ops            = &buffer_pipe_buf_ops,
8094                 .spd_release    = buffer_spd_release,
8095         };
8096         struct buffer_ref *ref;
8097         bool woken = false;
8098         int page_size;
8099         int entries, i;
8100         ssize_t ret = 0;
8101
8102 #ifdef CONFIG_TRACER_MAX_TRACE
8103         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8104                 return -EBUSY;
8105 #endif
8106
8107         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8108         if (*ppos & (page_size - 1))
8109                 return -EINVAL;
8110
8111         if (len & (page_size - 1)) {
8112                 if (len < page_size)
8113                         return -EINVAL;
8114                 len &= (~(page_size - 1));
8115         }
8116
8117         if (splice_grow_spd(pipe, &spd))
8118                 return -ENOMEM;
8119
8120  again:
8121         trace_access_lock(iter->cpu_file);
8122         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8123
8124         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8125                 struct page *page;
8126                 int r;
8127
8128                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8129                 if (!ref) {
8130                         ret = -ENOMEM;
8131                         break;
8132                 }
8133
8134                 refcount_set(&ref->refcount, 1);
8135                 ref->buffer = iter->array_buffer->buffer;
8136                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8137                 if (IS_ERR(ref->page)) {
8138                         ret = PTR_ERR(ref->page);
8139                         ref->page = NULL;
8140                         kfree(ref);
8141                         break;
8142                 }
8143                 ref->cpu = iter->cpu_file;
8144
8145                 r = ring_buffer_read_page(ref->buffer, ref->page,
8146                                           len, iter->cpu_file, 1);
8147                 if (r < 0) {
8148                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8149                                                    ref->page);
8150                         kfree(ref);
8151                         break;
8152                 }
8153
8154                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8155
8156                 spd.pages[i] = page;
8157                 spd.partial[i].len = page_size;
8158                 spd.partial[i].offset = 0;
8159                 spd.partial[i].private = (unsigned long)ref;
8160                 spd.nr_pages++;
8161                 *ppos += page_size;
8162
8163                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8164         }
8165
8166         trace_access_unlock(iter->cpu_file);
8167         spd.nr_pages = i;
8168
8169         /* did we read anything? */
8170         if (!spd.nr_pages) {
8171
8172                 if (ret)
8173                         goto out;
8174
8175                 if (woken)
8176                         goto out;
8177
8178                 ret = -EAGAIN;
8179                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8180                         goto out;
8181
8182                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8183                 if (ret)
8184                         goto out;
8185
8186                 /* No need to wait after waking up when tracing is off */
8187                 if (!tracer_tracing_is_on(iter->tr))
8188                         goto out;
8189
8190                 /* Iterate one more time to collect any new data then exit */
8191                 woken = true;
8192
8193                 goto again;
8194         }
8195
8196         ret = splice_to_pipe(pipe, &spd);
8197 out:
8198         splice_shrink_spd(&spd);
8199
8200         return ret;
8201 }
8202
8203 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8204 {
8205         struct ftrace_buffer_info *info = file->private_data;
8206         struct trace_iterator *iter = &info->iter;
8207         int err;
8208
8209         if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8210                 if (!(file->f_flags & O_NONBLOCK)) {
8211                         err = ring_buffer_wait(iter->array_buffer->buffer,
8212                                                iter->cpu_file,
8213                                                iter->tr->buffer_percent,
8214                                                NULL, NULL);
8215                         if (err)
8216                                 return err;
8217                 }
8218
8219                 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8220                                                   iter->cpu_file);
8221         } else if (cmd) {
8222                 return -ENOTTY;
8223         }
8224
8225         /*
8226          * An ioctl call with cmd 0 to the ring buffer file will wake up all
8227          * waiters
8228          */
8229         mutex_lock(&trace_types_lock);
8230
8231         /* Make sure the waiters see the new wait_index */
8232         (void)atomic_fetch_inc_release(&iter->wait_index);
8233
8234         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8235
8236         mutex_unlock(&trace_types_lock);
8237         return 0;
8238 }
8239
8240 #ifdef CONFIG_TRACER_MAX_TRACE
8241 static int get_snapshot_map(struct trace_array *tr)
8242 {
8243         int err = 0;
8244
8245         /*
8246          * Called with mmap_lock held. lockdep would be unhappy if we would now
8247          * take trace_types_lock. Instead use the specific
8248          * snapshot_trigger_lock.
8249          */
8250         spin_lock(&tr->snapshot_trigger_lock);
8251
8252         if (tr->snapshot || tr->mapped == UINT_MAX)
8253                 err = -EBUSY;
8254         else
8255                 tr->mapped++;
8256
8257         spin_unlock(&tr->snapshot_trigger_lock);
8258
8259         /* Wait for update_max_tr() to observe iter->tr->mapped */
8260         if (tr->mapped == 1)
8261                 synchronize_rcu();
8262
8263         return err;
8264
8265 }
8266 static void put_snapshot_map(struct trace_array *tr)
8267 {
8268         spin_lock(&tr->snapshot_trigger_lock);
8269         if (!WARN_ON(!tr->mapped))
8270                 tr->mapped--;
8271         spin_unlock(&tr->snapshot_trigger_lock);
8272 }
8273 #else
8274 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8275 static inline void put_snapshot_map(struct trace_array *tr) { }
8276 #endif
8277
8278 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8279 {
8280         struct ftrace_buffer_info *info = vma->vm_file->private_data;
8281         struct trace_iterator *iter = &info->iter;
8282
8283         WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8284         put_snapshot_map(iter->tr);
8285 }
8286
8287 static const struct vm_operations_struct tracing_buffers_vmops = {
8288         .close          = tracing_buffers_mmap_close,
8289 };
8290
8291 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8292 {
8293         struct ftrace_buffer_info *info = filp->private_data;
8294         struct trace_iterator *iter = &info->iter;
8295         int ret = 0;
8296
8297         ret = get_snapshot_map(iter->tr);
8298         if (ret)
8299                 return ret;
8300
8301         ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8302         if (ret)
8303                 put_snapshot_map(iter->tr);
8304
8305         vma->vm_ops = &tracing_buffers_vmops;
8306
8307         return ret;
8308 }
8309
8310 static const struct file_operations tracing_buffers_fops = {
8311         .open           = tracing_buffers_open,
8312         .read           = tracing_buffers_read,
8313         .poll           = tracing_buffers_poll,
8314         .release        = tracing_buffers_release,
8315         .flush          = tracing_buffers_flush,
8316         .splice_read    = tracing_buffers_splice_read,
8317         .unlocked_ioctl = tracing_buffers_ioctl,
8318         .llseek         = no_llseek,
8319         .mmap           = tracing_buffers_mmap,
8320 };
8321
8322 static ssize_t
8323 tracing_stats_read(struct file *filp, char __user *ubuf,
8324                    size_t count, loff_t *ppos)
8325 {
8326         struct inode *inode = file_inode(filp);
8327         struct trace_array *tr = inode->i_private;
8328         struct array_buffer *trace_buf = &tr->array_buffer;
8329         int cpu = tracing_get_cpu(inode);
8330         struct trace_seq *s;
8331         unsigned long cnt;
8332         unsigned long long t;
8333         unsigned long usec_rem;
8334
8335         s = kmalloc(sizeof(*s), GFP_KERNEL);
8336         if (!s)
8337                 return -ENOMEM;
8338
8339         trace_seq_init(s);
8340
8341         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8342         trace_seq_printf(s, "entries: %ld\n", cnt);
8343
8344         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8345         trace_seq_printf(s, "overrun: %ld\n", cnt);
8346
8347         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8348         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8349
8350         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8351         trace_seq_printf(s, "bytes: %ld\n", cnt);
8352
8353         if (trace_clocks[tr->clock_id].in_ns) {
8354                 /* local or global for trace_clock */
8355                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8356                 usec_rem = do_div(t, USEC_PER_SEC);
8357                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8358                                                                 t, usec_rem);
8359
8360                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8361                 usec_rem = do_div(t, USEC_PER_SEC);
8362                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8363         } else {
8364                 /* counter or tsc mode for trace_clock */
8365                 trace_seq_printf(s, "oldest event ts: %llu\n",
8366                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8367
8368                 trace_seq_printf(s, "now ts: %llu\n",
8369                                 ring_buffer_time_stamp(trace_buf->buffer));
8370         }
8371
8372         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8373         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8374
8375         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8376         trace_seq_printf(s, "read events: %ld\n", cnt);
8377
8378         count = simple_read_from_buffer(ubuf, count, ppos,
8379                                         s->buffer, trace_seq_used(s));
8380
8381         kfree(s);
8382
8383         return count;
8384 }
8385
8386 static const struct file_operations tracing_stats_fops = {
8387         .open           = tracing_open_generic_tr,
8388         .read           = tracing_stats_read,
8389         .llseek         = generic_file_llseek,
8390         .release        = tracing_release_generic_tr,
8391 };
8392
8393 #ifdef CONFIG_DYNAMIC_FTRACE
8394
8395 static ssize_t
8396 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8397                   size_t cnt, loff_t *ppos)
8398 {
8399         ssize_t ret;
8400         char *buf;
8401         int r;
8402
8403         /* 256 should be plenty to hold the amount needed */
8404         buf = kmalloc(256, GFP_KERNEL);
8405         if (!buf)
8406                 return -ENOMEM;
8407
8408         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8409                       ftrace_update_tot_cnt,
8410                       ftrace_number_of_pages,
8411                       ftrace_number_of_groups);
8412
8413         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8414         kfree(buf);
8415         return ret;
8416 }
8417
8418 static const struct file_operations tracing_dyn_info_fops = {
8419         .open           = tracing_open_generic,
8420         .read           = tracing_read_dyn_info,
8421         .llseek         = generic_file_llseek,
8422 };
8423 #endif /* CONFIG_DYNAMIC_FTRACE */
8424
8425 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8426 static void
8427 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8428                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8429                 void *data)
8430 {
8431         tracing_snapshot_instance(tr);
8432 }
8433
8434 static void
8435 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8436                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8437                       void *data)
8438 {
8439         struct ftrace_func_mapper *mapper = data;
8440         long *count = NULL;
8441
8442         if (mapper)
8443                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8444
8445         if (count) {
8446
8447                 if (*count <= 0)
8448                         return;
8449
8450                 (*count)--;
8451         }
8452
8453         tracing_snapshot_instance(tr);
8454 }
8455
8456 static int
8457 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8458                       struct ftrace_probe_ops *ops, void *data)
8459 {
8460         struct ftrace_func_mapper *mapper = data;
8461         long *count = NULL;
8462
8463         seq_printf(m, "%ps:", (void *)ip);
8464
8465         seq_puts(m, "snapshot");
8466
8467         if (mapper)
8468                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8469
8470         if (count)
8471                 seq_printf(m, ":count=%ld\n", *count);
8472         else
8473                 seq_puts(m, ":unlimited\n");
8474
8475         return 0;
8476 }
8477
8478 static int
8479 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8480                      unsigned long ip, void *init_data, void **data)
8481 {
8482         struct ftrace_func_mapper *mapper = *data;
8483
8484         if (!mapper) {
8485                 mapper = allocate_ftrace_func_mapper();
8486                 if (!mapper)
8487                         return -ENOMEM;
8488                 *data = mapper;
8489         }
8490
8491         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8492 }
8493
8494 static void
8495 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8496                      unsigned long ip, void *data)
8497 {
8498         struct ftrace_func_mapper *mapper = data;
8499
8500         if (!ip) {
8501                 if (!mapper)
8502                         return;
8503                 free_ftrace_func_mapper(mapper, NULL);
8504                 return;
8505         }
8506
8507         ftrace_func_mapper_remove_ip(mapper, ip);
8508 }
8509
8510 static struct ftrace_probe_ops snapshot_probe_ops = {
8511         .func                   = ftrace_snapshot,
8512         .print                  = ftrace_snapshot_print,
8513 };
8514
8515 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8516         .func                   = ftrace_count_snapshot,
8517         .print                  = ftrace_snapshot_print,
8518         .init                   = ftrace_snapshot_init,
8519         .free                   = ftrace_snapshot_free,
8520 };
8521
8522 static int
8523 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8524                                char *glob, char *cmd, char *param, int enable)
8525 {
8526         struct ftrace_probe_ops *ops;
8527         void *count = (void *)-1;
8528         char *number;
8529         int ret;
8530
8531         if (!tr)
8532                 return -ENODEV;
8533
8534         /* hash funcs only work with set_ftrace_filter */
8535         if (!enable)
8536                 return -EINVAL;
8537
8538         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8539
8540         if (glob[0] == '!') {
8541                 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8542                 if (!ret)
8543                         tracing_disarm_snapshot(tr);
8544
8545                 return ret;
8546         }
8547
8548         if (!param)
8549                 goto out_reg;
8550
8551         number = strsep(&param, ":");
8552
8553         if (!strlen(number))
8554                 goto out_reg;
8555
8556         /*
8557          * We use the callback data field (which is a pointer)
8558          * as our counter.
8559          */
8560         ret = kstrtoul(number, 0, (unsigned long *)&count);
8561         if (ret)
8562                 return ret;
8563
8564  out_reg:
8565         ret = tracing_arm_snapshot(tr);
8566         if (ret < 0)
8567                 goto out;
8568
8569         ret = register_ftrace_function_probe(glob, tr, ops, count);
8570         if (ret < 0)
8571                 tracing_disarm_snapshot(tr);
8572  out:
8573         return ret < 0 ? ret : 0;
8574 }
8575
8576 static struct ftrace_func_command ftrace_snapshot_cmd = {
8577         .name                   = "snapshot",
8578         .func                   = ftrace_trace_snapshot_callback,
8579 };
8580
8581 static __init int register_snapshot_cmd(void)
8582 {
8583         return register_ftrace_command(&ftrace_snapshot_cmd);
8584 }
8585 #else
8586 static inline __init int register_snapshot_cmd(void) { return 0; }
8587 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8588
8589 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8590 {
8591         if (WARN_ON(!tr->dir))
8592                 return ERR_PTR(-ENODEV);
8593
8594         /* Top directory uses NULL as the parent */
8595         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8596                 return NULL;
8597
8598         /* All sub buffers have a descriptor */
8599         return tr->dir;
8600 }
8601
8602 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8603 {
8604         struct dentry *d_tracer;
8605
8606         if (tr->percpu_dir)
8607                 return tr->percpu_dir;
8608
8609         d_tracer = tracing_get_dentry(tr);
8610         if (IS_ERR(d_tracer))
8611                 return NULL;
8612
8613         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8614
8615         MEM_FAIL(!tr->percpu_dir,
8616                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8617
8618         return tr->percpu_dir;
8619 }
8620
8621 static struct dentry *
8622 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8623                       void *data, long cpu, const struct file_operations *fops)
8624 {
8625         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8626
8627         if (ret) /* See tracing_get_cpu() */
8628                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8629         return ret;
8630 }
8631
8632 static void
8633 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8634 {
8635         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8636         struct dentry *d_cpu;
8637         char cpu_dir[30]; /* 30 characters should be more than enough */
8638
8639         if (!d_percpu)
8640                 return;
8641
8642         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8643         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8644         if (!d_cpu) {
8645                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8646                 return;
8647         }
8648
8649         /* per cpu trace_pipe */
8650         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8651                                 tr, cpu, &tracing_pipe_fops);
8652
8653         /* per cpu trace */
8654         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8655                                 tr, cpu, &tracing_fops);
8656
8657         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8658                                 tr, cpu, &tracing_buffers_fops);
8659
8660         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8661                                 tr, cpu, &tracing_stats_fops);
8662
8663         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8664                                 tr, cpu, &tracing_entries_fops);
8665
8666 #ifdef CONFIG_TRACER_SNAPSHOT
8667         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8668                                 tr, cpu, &snapshot_fops);
8669
8670         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8671                                 tr, cpu, &snapshot_raw_fops);
8672 #endif
8673 }
8674
8675 #ifdef CONFIG_FTRACE_SELFTEST
8676 /* Let selftest have access to static functions in this file */
8677 #include "trace_selftest.c"
8678 #endif
8679
8680 static ssize_t
8681 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8682                         loff_t *ppos)
8683 {
8684         struct trace_option_dentry *topt = filp->private_data;
8685         char *buf;
8686
8687         if (topt->flags->val & topt->opt->bit)
8688                 buf = "1\n";
8689         else
8690                 buf = "0\n";
8691
8692         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8693 }
8694
8695 static ssize_t
8696 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8697                          loff_t *ppos)
8698 {
8699         struct trace_option_dentry *topt = filp->private_data;
8700         unsigned long val;
8701         int ret;
8702
8703         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8704         if (ret)
8705                 return ret;
8706
8707         if (val != 0 && val != 1)
8708                 return -EINVAL;
8709
8710         if (!!(topt->flags->val & topt->opt->bit) != val) {
8711                 mutex_lock(&trace_types_lock);
8712                 ret = __set_tracer_option(topt->tr, topt->flags,
8713                                           topt->opt, !val);
8714                 mutex_unlock(&trace_types_lock);
8715                 if (ret)
8716                         return ret;
8717         }
8718
8719         *ppos += cnt;
8720
8721         return cnt;
8722 }
8723
8724 static int tracing_open_options(struct inode *inode, struct file *filp)
8725 {
8726         struct trace_option_dentry *topt = inode->i_private;
8727         int ret;
8728
8729         ret = tracing_check_open_get_tr(topt->tr);
8730         if (ret)
8731                 return ret;
8732
8733         filp->private_data = inode->i_private;
8734         return 0;
8735 }
8736
8737 static int tracing_release_options(struct inode *inode, struct file *file)
8738 {
8739         struct trace_option_dentry *topt = file->private_data;
8740
8741         trace_array_put(topt->tr);
8742         return 0;
8743 }
8744
8745 static const struct file_operations trace_options_fops = {
8746         .open = tracing_open_options,
8747         .read = trace_options_read,
8748         .write = trace_options_write,
8749         .llseek = generic_file_llseek,
8750         .release = tracing_release_options,
8751 };
8752
8753 /*
8754  * In order to pass in both the trace_array descriptor as well as the index
8755  * to the flag that the trace option file represents, the trace_array
8756  * has a character array of trace_flags_index[], which holds the index
8757  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8758  * The address of this character array is passed to the flag option file
8759  * read/write callbacks.
8760  *
8761  * In order to extract both the index and the trace_array descriptor,
8762  * get_tr_index() uses the following algorithm.
8763  *
8764  *   idx = *ptr;
8765  *
8766  * As the pointer itself contains the address of the index (remember
8767  * index[1] == 1).
8768  *
8769  * Then to get the trace_array descriptor, by subtracting that index
8770  * from the ptr, we get to the start of the index itself.
8771  *
8772  *   ptr - idx == &index[0]
8773  *
8774  * Then a simple container_of() from that pointer gets us to the
8775  * trace_array descriptor.
8776  */
8777 static void get_tr_index(void *data, struct trace_array **ptr,
8778                          unsigned int *pindex)
8779 {
8780         *pindex = *(unsigned char *)data;
8781
8782         *ptr = container_of(data - *pindex, struct trace_array,
8783                             trace_flags_index);
8784 }
8785
8786 static ssize_t
8787 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8788                         loff_t *ppos)
8789 {
8790         void *tr_index = filp->private_data;
8791         struct trace_array *tr;
8792         unsigned int index;
8793         char *buf;
8794
8795         get_tr_index(tr_index, &tr, &index);
8796
8797         if (tr->trace_flags & (1 << index))
8798                 buf = "1\n";
8799         else
8800                 buf = "0\n";
8801
8802         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8803 }
8804
8805 static ssize_t
8806 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8807                          loff_t *ppos)
8808 {
8809         void *tr_index = filp->private_data;
8810         struct trace_array *tr;
8811         unsigned int index;
8812         unsigned long val;
8813         int ret;
8814
8815         get_tr_index(tr_index, &tr, &index);
8816
8817         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8818         if (ret)
8819                 return ret;
8820
8821         if (val != 0 && val != 1)
8822                 return -EINVAL;
8823
8824         mutex_lock(&event_mutex);
8825         mutex_lock(&trace_types_lock);
8826         ret = set_tracer_flag(tr, 1 << index, val);
8827         mutex_unlock(&trace_types_lock);
8828         mutex_unlock(&event_mutex);
8829
8830         if (ret < 0)
8831                 return ret;
8832
8833         *ppos += cnt;
8834
8835         return cnt;
8836 }
8837
8838 static const struct file_operations trace_options_core_fops = {
8839         .open = tracing_open_generic,
8840         .read = trace_options_core_read,
8841         .write = trace_options_core_write,
8842         .llseek = generic_file_llseek,
8843 };
8844
8845 struct dentry *trace_create_file(const char *name,
8846                                  umode_t mode,
8847                                  struct dentry *parent,
8848                                  void *data,
8849                                  const struct file_operations *fops)
8850 {
8851         struct dentry *ret;
8852
8853         ret = tracefs_create_file(name, mode, parent, data, fops);
8854         if (!ret)
8855                 pr_warn("Could not create tracefs '%s' entry\n", name);
8856
8857         return ret;
8858 }
8859
8860
8861 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8862 {
8863         struct dentry *d_tracer;
8864
8865         if (tr->options)
8866                 return tr->options;
8867
8868         d_tracer = tracing_get_dentry(tr);
8869         if (IS_ERR(d_tracer))
8870                 return NULL;
8871
8872         tr->options = tracefs_create_dir("options", d_tracer);
8873         if (!tr->options) {
8874                 pr_warn("Could not create tracefs directory 'options'\n");
8875                 return NULL;
8876         }
8877
8878         return tr->options;
8879 }
8880
8881 static void
8882 create_trace_option_file(struct trace_array *tr,
8883                          struct trace_option_dentry *topt,
8884                          struct tracer_flags *flags,
8885                          struct tracer_opt *opt)
8886 {
8887         struct dentry *t_options;
8888
8889         t_options = trace_options_init_dentry(tr);
8890         if (!t_options)
8891                 return;
8892
8893         topt->flags = flags;
8894         topt->opt = opt;
8895         topt->tr = tr;
8896
8897         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8898                                         t_options, topt, &trace_options_fops);
8899
8900 }
8901
8902 static void
8903 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8904 {
8905         struct trace_option_dentry *topts;
8906         struct trace_options *tr_topts;
8907         struct tracer_flags *flags;
8908         struct tracer_opt *opts;
8909         int cnt;
8910         int i;
8911
8912         if (!tracer)
8913                 return;
8914
8915         flags = tracer->flags;
8916
8917         if (!flags || !flags->opts)
8918                 return;
8919
8920         /*
8921          * If this is an instance, only create flags for tracers
8922          * the instance may have.
8923          */
8924         if (!trace_ok_for_array(tracer, tr))
8925                 return;
8926
8927         for (i = 0; i < tr->nr_topts; i++) {
8928                 /* Make sure there's no duplicate flags. */
8929                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8930                         return;
8931         }
8932
8933         opts = flags->opts;
8934
8935         for (cnt = 0; opts[cnt].name; cnt++)
8936                 ;
8937
8938         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8939         if (!topts)
8940                 return;
8941
8942         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8943                             GFP_KERNEL);
8944         if (!tr_topts) {
8945                 kfree(topts);
8946                 return;
8947         }
8948
8949         tr->topts = tr_topts;
8950         tr->topts[tr->nr_topts].tracer = tracer;
8951         tr->topts[tr->nr_topts].topts = topts;
8952         tr->nr_topts++;
8953
8954         for (cnt = 0; opts[cnt].name; cnt++) {
8955                 create_trace_option_file(tr, &topts[cnt], flags,
8956                                          &opts[cnt]);
8957                 MEM_FAIL(topts[cnt].entry == NULL,
8958                           "Failed to create trace option: %s",
8959                           opts[cnt].name);
8960         }
8961 }
8962
8963 static struct dentry *
8964 create_trace_option_core_file(struct trace_array *tr,
8965                               const char *option, long index)
8966 {
8967         struct dentry *t_options;
8968
8969         t_options = trace_options_init_dentry(tr);
8970         if (!t_options)
8971                 return NULL;
8972
8973         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8974                                  (void *)&tr->trace_flags_index[index],
8975                                  &trace_options_core_fops);
8976 }
8977
8978 static void create_trace_options_dir(struct trace_array *tr)
8979 {
8980         struct dentry *t_options;
8981         bool top_level = tr == &global_trace;
8982         int i;
8983
8984         t_options = trace_options_init_dentry(tr);
8985         if (!t_options)
8986                 return;
8987
8988         for (i = 0; trace_options[i]; i++) {
8989                 if (top_level ||
8990                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8991                         create_trace_option_core_file(tr, trace_options[i], i);
8992         }
8993 }
8994
8995 static ssize_t
8996 rb_simple_read(struct file *filp, char __user *ubuf,
8997                size_t cnt, loff_t *ppos)
8998 {
8999         struct trace_array *tr = filp->private_data;
9000         char buf[64];
9001         int r;
9002
9003         r = tracer_tracing_is_on(tr);
9004         r = sprintf(buf, "%d\n", r);
9005
9006         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9007 }
9008
9009 static ssize_t
9010 rb_simple_write(struct file *filp, const char __user *ubuf,
9011                 size_t cnt, loff_t *ppos)
9012 {
9013         struct trace_array *tr = filp->private_data;
9014         struct trace_buffer *buffer = tr->array_buffer.buffer;
9015         unsigned long val;
9016         int ret;
9017
9018         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9019         if (ret)
9020                 return ret;
9021
9022         if (buffer) {
9023                 mutex_lock(&trace_types_lock);
9024                 if (!!val == tracer_tracing_is_on(tr)) {
9025                         val = 0; /* do nothing */
9026                 } else if (val) {
9027                         tracer_tracing_on(tr);
9028                         if (tr->current_trace->start)
9029                                 tr->current_trace->start(tr);
9030                 } else {
9031                         tracer_tracing_off(tr);
9032                         if (tr->current_trace->stop)
9033                                 tr->current_trace->stop(tr);
9034                         /* Wake up any waiters */
9035                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9036                 }
9037                 mutex_unlock(&trace_types_lock);
9038         }
9039
9040         (*ppos)++;
9041
9042         return cnt;
9043 }
9044
9045 static const struct file_operations rb_simple_fops = {
9046         .open           = tracing_open_generic_tr,
9047         .read           = rb_simple_read,
9048         .write          = rb_simple_write,
9049         .release        = tracing_release_generic_tr,
9050         .llseek         = default_llseek,
9051 };
9052
9053 static ssize_t
9054 buffer_percent_read(struct file *filp, char __user *ubuf,
9055                     size_t cnt, loff_t *ppos)
9056 {
9057         struct trace_array *tr = filp->private_data;
9058         char buf[64];
9059         int r;
9060
9061         r = tr->buffer_percent;
9062         r = sprintf(buf, "%d\n", r);
9063
9064         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9065 }
9066
9067 static ssize_t
9068 buffer_percent_write(struct file *filp, const char __user *ubuf,
9069                      size_t cnt, loff_t *ppos)
9070 {
9071         struct trace_array *tr = filp->private_data;
9072         unsigned long val;
9073         int ret;
9074
9075         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9076         if (ret)
9077                 return ret;
9078
9079         if (val > 100)
9080                 return -EINVAL;
9081
9082         tr->buffer_percent = val;
9083
9084         (*ppos)++;
9085
9086         return cnt;
9087 }
9088
9089 static const struct file_operations buffer_percent_fops = {
9090         .open           = tracing_open_generic_tr,
9091         .read           = buffer_percent_read,
9092         .write          = buffer_percent_write,
9093         .release        = tracing_release_generic_tr,
9094         .llseek         = default_llseek,
9095 };
9096
9097 static ssize_t
9098 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9099 {
9100         struct trace_array *tr = filp->private_data;
9101         size_t size;
9102         char buf[64];
9103         int order;
9104         int r;
9105
9106         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9107         size = (PAGE_SIZE << order) / 1024;
9108
9109         r = sprintf(buf, "%zd\n", size);
9110
9111         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9112 }
9113
9114 static ssize_t
9115 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9116                          size_t cnt, loff_t *ppos)
9117 {
9118         struct trace_array *tr = filp->private_data;
9119         unsigned long val;
9120         int old_order;
9121         int order;
9122         int pages;
9123         int ret;
9124
9125         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9126         if (ret)
9127                 return ret;
9128
9129         val *= 1024; /* value passed in is in KB */
9130
9131         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9132         order = fls(pages - 1);
9133
9134         /* limit between 1 and 128 system pages */
9135         if (order < 0 || order > 7)
9136                 return -EINVAL;
9137
9138         /* Do not allow tracing while changing the order of the ring buffer */
9139         tracing_stop_tr(tr);
9140
9141         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9142         if (old_order == order)
9143                 goto out;
9144
9145         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9146         if (ret)
9147                 goto out;
9148
9149 #ifdef CONFIG_TRACER_MAX_TRACE
9150
9151         if (!tr->allocated_snapshot)
9152                 goto out_max;
9153
9154         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9155         if (ret) {
9156                 /* Put back the old order */
9157                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9158                 if (WARN_ON_ONCE(cnt)) {
9159                         /*
9160                          * AARGH! We are left with different orders!
9161                          * The max buffer is our "snapshot" buffer.
9162                          * When a tracer needs a snapshot (one of the
9163                          * latency tracers), it swaps the max buffer
9164                          * with the saved snap shot. We succeeded to
9165                          * update the order of the main buffer, but failed to
9166                          * update the order of the max buffer. But when we tried
9167                          * to reset the main buffer to the original size, we
9168                          * failed there too. This is very unlikely to
9169                          * happen, but if it does, warn and kill all
9170                          * tracing.
9171                          */
9172                         tracing_disabled = 1;
9173                 }
9174                 goto out;
9175         }
9176  out_max:
9177 #endif
9178         (*ppos)++;
9179  out:
9180         if (ret)
9181                 cnt = ret;
9182         tracing_start_tr(tr);
9183         return cnt;
9184 }
9185
9186 static const struct file_operations buffer_subbuf_size_fops = {
9187         .open           = tracing_open_generic_tr,
9188         .read           = buffer_subbuf_size_read,
9189         .write          = buffer_subbuf_size_write,
9190         .release        = tracing_release_generic_tr,
9191         .llseek         = default_llseek,
9192 };
9193
9194 static struct dentry *trace_instance_dir;
9195
9196 static void
9197 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9198
9199 static int
9200 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9201 {
9202         enum ring_buffer_flags rb_flags;
9203
9204         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9205
9206         buf->tr = tr;
9207
9208         buf->buffer = ring_buffer_alloc(size, rb_flags);
9209         if (!buf->buffer)
9210                 return -ENOMEM;
9211
9212         buf->data = alloc_percpu(struct trace_array_cpu);
9213         if (!buf->data) {
9214                 ring_buffer_free(buf->buffer);
9215                 buf->buffer = NULL;
9216                 return -ENOMEM;
9217         }
9218
9219         /* Allocate the first page for all buffers */
9220         set_buffer_entries(&tr->array_buffer,
9221                            ring_buffer_size(tr->array_buffer.buffer, 0));
9222
9223         return 0;
9224 }
9225
9226 static void free_trace_buffer(struct array_buffer *buf)
9227 {
9228         if (buf->buffer) {
9229                 ring_buffer_free(buf->buffer);
9230                 buf->buffer = NULL;
9231                 free_percpu(buf->data);
9232                 buf->data = NULL;
9233         }
9234 }
9235
9236 static int allocate_trace_buffers(struct trace_array *tr, int size)
9237 {
9238         int ret;
9239
9240         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9241         if (ret)
9242                 return ret;
9243
9244 #ifdef CONFIG_TRACER_MAX_TRACE
9245         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9246                                     allocate_snapshot ? size : 1);
9247         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9248                 free_trace_buffer(&tr->array_buffer);
9249                 return -ENOMEM;
9250         }
9251         tr->allocated_snapshot = allocate_snapshot;
9252
9253         allocate_snapshot = false;
9254 #endif
9255
9256         return 0;
9257 }
9258
9259 static void free_trace_buffers(struct trace_array *tr)
9260 {
9261         if (!tr)
9262                 return;
9263
9264         free_trace_buffer(&tr->array_buffer);
9265
9266 #ifdef CONFIG_TRACER_MAX_TRACE
9267         free_trace_buffer(&tr->max_buffer);
9268 #endif
9269 }
9270
9271 static void init_trace_flags_index(struct trace_array *tr)
9272 {
9273         int i;
9274
9275         /* Used by the trace options files */
9276         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9277                 tr->trace_flags_index[i] = i;
9278 }
9279
9280 static void __update_tracer_options(struct trace_array *tr)
9281 {
9282         struct tracer *t;
9283
9284         for (t = trace_types; t; t = t->next)
9285                 add_tracer_options(tr, t);
9286 }
9287
9288 static void update_tracer_options(struct trace_array *tr)
9289 {
9290         mutex_lock(&trace_types_lock);
9291         tracer_options_updated = true;
9292         __update_tracer_options(tr);
9293         mutex_unlock(&trace_types_lock);
9294 }
9295
9296 /* Must have trace_types_lock held */
9297 struct trace_array *trace_array_find(const char *instance)
9298 {
9299         struct trace_array *tr, *found = NULL;
9300
9301         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9302                 if (tr->name && strcmp(tr->name, instance) == 0) {
9303                         found = tr;
9304                         break;
9305                 }
9306         }
9307
9308         return found;
9309 }
9310
9311 struct trace_array *trace_array_find_get(const char *instance)
9312 {
9313         struct trace_array *tr;
9314
9315         mutex_lock(&trace_types_lock);
9316         tr = trace_array_find(instance);
9317         if (tr)
9318                 tr->ref++;
9319         mutex_unlock(&trace_types_lock);
9320
9321         return tr;
9322 }
9323
9324 static int trace_array_create_dir(struct trace_array *tr)
9325 {
9326         int ret;
9327
9328         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9329         if (!tr->dir)
9330                 return -EINVAL;
9331
9332         ret = event_trace_add_tracer(tr->dir, tr);
9333         if (ret) {
9334                 tracefs_remove(tr->dir);
9335                 return ret;
9336         }
9337
9338         init_tracer_tracefs(tr, tr->dir);
9339         __update_tracer_options(tr);
9340
9341         return ret;
9342 }
9343
9344 static struct trace_array *
9345 trace_array_create_systems(const char *name, const char *systems)
9346 {
9347         struct trace_array *tr;
9348         int ret;
9349
9350         ret = -ENOMEM;
9351         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9352         if (!tr)
9353                 return ERR_PTR(ret);
9354
9355         tr->name = kstrdup(name, GFP_KERNEL);
9356         if (!tr->name)
9357                 goto out_free_tr;
9358
9359         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9360                 goto out_free_tr;
9361
9362         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9363                 goto out_free_tr;
9364
9365         if (systems) {
9366                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9367                 if (!tr->system_names)
9368                         goto out_free_tr;
9369         }
9370
9371         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9372
9373         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9374
9375         raw_spin_lock_init(&tr->start_lock);
9376
9377         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9378 #ifdef CONFIG_TRACER_MAX_TRACE
9379         spin_lock_init(&tr->snapshot_trigger_lock);
9380 #endif
9381         tr->current_trace = &nop_trace;
9382
9383         INIT_LIST_HEAD(&tr->systems);
9384         INIT_LIST_HEAD(&tr->events);
9385         INIT_LIST_HEAD(&tr->hist_vars);
9386         INIT_LIST_HEAD(&tr->err_log);
9387
9388         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9389                 goto out_free_tr;
9390
9391         /* The ring buffer is defaultly expanded */
9392         trace_set_ring_buffer_expanded(tr);
9393
9394         if (ftrace_allocate_ftrace_ops(tr) < 0)
9395                 goto out_free_tr;
9396
9397         ftrace_init_trace_array(tr);
9398
9399         init_trace_flags_index(tr);
9400
9401         if (trace_instance_dir) {
9402                 ret = trace_array_create_dir(tr);
9403                 if (ret)
9404                         goto out_free_tr;
9405         } else
9406                 __trace_early_add_events(tr);
9407
9408         list_add(&tr->list, &ftrace_trace_arrays);
9409
9410         tr->ref++;
9411
9412         return tr;
9413
9414  out_free_tr:
9415         ftrace_free_ftrace_ops(tr);
9416         free_trace_buffers(tr);
9417         free_cpumask_var(tr->pipe_cpumask);
9418         free_cpumask_var(tr->tracing_cpumask);
9419         kfree_const(tr->system_names);
9420         kfree(tr->name);
9421         kfree(tr);
9422
9423         return ERR_PTR(ret);
9424 }
9425
9426 static struct trace_array *trace_array_create(const char *name)
9427 {
9428         return trace_array_create_systems(name, NULL);
9429 }
9430
9431 static int instance_mkdir(const char *name)
9432 {
9433         struct trace_array *tr;
9434         int ret;
9435
9436         mutex_lock(&event_mutex);
9437         mutex_lock(&trace_types_lock);
9438
9439         ret = -EEXIST;
9440         if (trace_array_find(name))
9441                 goto out_unlock;
9442
9443         tr = trace_array_create(name);
9444
9445         ret = PTR_ERR_OR_ZERO(tr);
9446
9447 out_unlock:
9448         mutex_unlock(&trace_types_lock);
9449         mutex_unlock(&event_mutex);
9450         return ret;
9451 }
9452
9453 /**
9454  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9455  * @name: The name of the trace array to be looked up/created.
9456  * @systems: A list of systems to create event directories for (NULL for all)
9457  *
9458  * Returns pointer to trace array with given name.
9459  * NULL, if it cannot be created.
9460  *
9461  * NOTE: This function increments the reference counter associated with the
9462  * trace array returned. This makes sure it cannot be freed while in use.
9463  * Use trace_array_put() once the trace array is no longer needed.
9464  * If the trace_array is to be freed, trace_array_destroy() needs to
9465  * be called after the trace_array_put(), or simply let user space delete
9466  * it from the tracefs instances directory. But until the
9467  * trace_array_put() is called, user space can not delete it.
9468  *
9469  */
9470 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9471 {
9472         struct trace_array *tr;
9473
9474         mutex_lock(&event_mutex);
9475         mutex_lock(&trace_types_lock);
9476
9477         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9478                 if (tr->name && strcmp(tr->name, name) == 0)
9479                         goto out_unlock;
9480         }
9481
9482         tr = trace_array_create_systems(name, systems);
9483
9484         if (IS_ERR(tr))
9485                 tr = NULL;
9486 out_unlock:
9487         if (tr)
9488                 tr->ref++;
9489
9490         mutex_unlock(&trace_types_lock);
9491         mutex_unlock(&event_mutex);
9492         return tr;
9493 }
9494 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9495
9496 static int __remove_instance(struct trace_array *tr)
9497 {
9498         int i;
9499
9500         /* Reference counter for a newly created trace array = 1. */
9501         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9502                 return -EBUSY;
9503
9504         list_del(&tr->list);
9505
9506         /* Disable all the flags that were enabled coming in */
9507         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9508                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9509                         set_tracer_flag(tr, 1 << i, 0);
9510         }
9511
9512         tracing_set_nop(tr);
9513         clear_ftrace_function_probes(tr);
9514         event_trace_del_tracer(tr);
9515         ftrace_clear_pids(tr);
9516         ftrace_destroy_function_files(tr);
9517         tracefs_remove(tr->dir);
9518         free_percpu(tr->last_func_repeats);
9519         free_trace_buffers(tr);
9520         clear_tracing_err_log(tr);
9521
9522         for (i = 0; i < tr->nr_topts; i++) {
9523                 kfree(tr->topts[i].topts);
9524         }
9525         kfree(tr->topts);
9526
9527         free_cpumask_var(tr->pipe_cpumask);
9528         free_cpumask_var(tr->tracing_cpumask);
9529         kfree_const(tr->system_names);
9530         kfree(tr->name);
9531         kfree(tr);
9532
9533         return 0;
9534 }
9535
9536 int trace_array_destroy(struct trace_array *this_tr)
9537 {
9538         struct trace_array *tr;
9539         int ret;
9540
9541         if (!this_tr)
9542                 return -EINVAL;
9543
9544         mutex_lock(&event_mutex);
9545         mutex_lock(&trace_types_lock);
9546
9547         ret = -ENODEV;
9548
9549         /* Making sure trace array exists before destroying it. */
9550         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9551                 if (tr == this_tr) {
9552                         ret = __remove_instance(tr);
9553                         break;
9554                 }
9555         }
9556
9557         mutex_unlock(&trace_types_lock);
9558         mutex_unlock(&event_mutex);
9559
9560         return ret;
9561 }
9562 EXPORT_SYMBOL_GPL(trace_array_destroy);
9563
9564 static int instance_rmdir(const char *name)
9565 {
9566         struct trace_array *tr;
9567         int ret;
9568
9569         mutex_lock(&event_mutex);
9570         mutex_lock(&trace_types_lock);
9571
9572         ret = -ENODEV;
9573         tr = trace_array_find(name);
9574         if (tr)
9575                 ret = __remove_instance(tr);
9576
9577         mutex_unlock(&trace_types_lock);
9578         mutex_unlock(&event_mutex);
9579
9580         return ret;
9581 }
9582
9583 static __init void create_trace_instances(struct dentry *d_tracer)
9584 {
9585         struct trace_array *tr;
9586
9587         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9588                                                          instance_mkdir,
9589                                                          instance_rmdir);
9590         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9591                 return;
9592
9593         mutex_lock(&event_mutex);
9594         mutex_lock(&trace_types_lock);
9595
9596         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9597                 if (!tr->name)
9598                         continue;
9599                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9600                              "Failed to create instance directory\n"))
9601                         break;
9602         }
9603
9604         mutex_unlock(&trace_types_lock);
9605         mutex_unlock(&event_mutex);
9606 }
9607
9608 static void
9609 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9610 {
9611         int cpu;
9612
9613         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9614                         tr, &show_traces_fops);
9615
9616         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9617                         tr, &set_tracer_fops);
9618
9619         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9620                           tr, &tracing_cpumask_fops);
9621
9622         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9623                           tr, &tracing_iter_fops);
9624
9625         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9626                           tr, &tracing_fops);
9627
9628         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9629                           tr, &tracing_pipe_fops);
9630
9631         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9632                           tr, &tracing_entries_fops);
9633
9634         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9635                           tr, &tracing_total_entries_fops);
9636
9637         trace_create_file("free_buffer", 0200, d_tracer,
9638                           tr, &tracing_free_buffer_fops);
9639
9640         trace_create_file("trace_marker", 0220, d_tracer,
9641                           tr, &tracing_mark_fops);
9642
9643         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9644
9645         trace_create_file("trace_marker_raw", 0220, d_tracer,
9646                           tr, &tracing_mark_raw_fops);
9647
9648         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9649                           &trace_clock_fops);
9650
9651         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9652                           tr, &rb_simple_fops);
9653
9654         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9655                           &trace_time_stamp_mode_fops);
9656
9657         tr->buffer_percent = 50;
9658
9659         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9660                         tr, &buffer_percent_fops);
9661
9662         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9663                           tr, &buffer_subbuf_size_fops);
9664
9665         create_trace_options_dir(tr);
9666
9667 #ifdef CONFIG_TRACER_MAX_TRACE
9668         trace_create_maxlat_file(tr, d_tracer);
9669 #endif
9670
9671         if (ftrace_create_function_files(tr, d_tracer))
9672                 MEM_FAIL(1, "Could not allocate function filter files");
9673
9674 #ifdef CONFIG_TRACER_SNAPSHOT
9675         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9676                           tr, &snapshot_fops);
9677 #endif
9678
9679         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9680                           tr, &tracing_err_log_fops);
9681
9682         for_each_tracing_cpu(cpu)
9683                 tracing_init_tracefs_percpu(tr, cpu);
9684
9685         ftrace_init_tracefs(tr, d_tracer);
9686 }
9687
9688 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9689 {
9690         struct vfsmount *mnt;
9691         struct file_system_type *type;
9692
9693         /*
9694          * To maintain backward compatibility for tools that mount
9695          * debugfs to get to the tracing facility, tracefs is automatically
9696          * mounted to the debugfs/tracing directory.
9697          */
9698         type = get_fs_type("tracefs");
9699         if (!type)
9700                 return NULL;
9701         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9702         put_filesystem(type);
9703         if (IS_ERR(mnt))
9704                 return NULL;
9705         mntget(mnt);
9706
9707         return mnt;
9708 }
9709
9710 /**
9711  * tracing_init_dentry - initialize top level trace array
9712  *
9713  * This is called when creating files or directories in the tracing
9714  * directory. It is called via fs_initcall() by any of the boot up code
9715  * and expects to return the dentry of the top level tracing directory.
9716  */
9717 int tracing_init_dentry(void)
9718 {
9719         struct trace_array *tr = &global_trace;
9720
9721         if (security_locked_down(LOCKDOWN_TRACEFS)) {
9722                 pr_warn("Tracing disabled due to lockdown\n");
9723                 return -EPERM;
9724         }
9725
9726         /* The top level trace array uses  NULL as parent */
9727         if (tr->dir)
9728                 return 0;
9729
9730         if (WARN_ON(!tracefs_initialized()))
9731                 return -ENODEV;
9732
9733         /*
9734          * As there may still be users that expect the tracing
9735          * files to exist in debugfs/tracing, we must automount
9736          * the tracefs file system there, so older tools still
9737          * work with the newer kernel.
9738          */
9739         tr->dir = debugfs_create_automount("tracing", NULL,
9740                                            trace_automount, NULL);
9741
9742         return 0;
9743 }
9744
9745 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9746 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9747
9748 static struct workqueue_struct *eval_map_wq __initdata;
9749 static struct work_struct eval_map_work __initdata;
9750 static struct work_struct tracerfs_init_work __initdata;
9751
9752 static void __init eval_map_work_func(struct work_struct *work)
9753 {
9754         int len;
9755
9756         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9757         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9758 }
9759
9760 static int __init trace_eval_init(void)
9761 {
9762         INIT_WORK(&eval_map_work, eval_map_work_func);
9763
9764         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9765         if (!eval_map_wq) {
9766                 pr_err("Unable to allocate eval_map_wq\n");
9767                 /* Do work here */
9768                 eval_map_work_func(&eval_map_work);
9769                 return -ENOMEM;
9770         }
9771
9772         queue_work(eval_map_wq, &eval_map_work);
9773         return 0;
9774 }
9775
9776 subsys_initcall(trace_eval_init);
9777
9778 static int __init trace_eval_sync(void)
9779 {
9780         /* Make sure the eval map updates are finished */
9781         if (eval_map_wq)
9782                 destroy_workqueue(eval_map_wq);
9783         return 0;
9784 }
9785
9786 late_initcall_sync(trace_eval_sync);
9787
9788
9789 #ifdef CONFIG_MODULES
9790 static void trace_module_add_evals(struct module *mod)
9791 {
9792         if (!mod->num_trace_evals)
9793                 return;
9794
9795         /*
9796          * Modules with bad taint do not have events created, do
9797          * not bother with enums either.
9798          */
9799         if (trace_module_has_bad_taint(mod))
9800                 return;
9801
9802         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9803 }
9804
9805 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9806 static void trace_module_remove_evals(struct module *mod)
9807 {
9808         union trace_eval_map_item *map;
9809         union trace_eval_map_item **last = &trace_eval_maps;
9810
9811         if (!mod->num_trace_evals)
9812                 return;
9813
9814         mutex_lock(&trace_eval_mutex);
9815
9816         map = trace_eval_maps;
9817
9818         while (map) {
9819                 if (map->head.mod == mod)
9820                         break;
9821                 map = trace_eval_jmp_to_tail(map);
9822                 last = &map->tail.next;
9823                 map = map->tail.next;
9824         }
9825         if (!map)
9826                 goto out;
9827
9828         *last = trace_eval_jmp_to_tail(map)->tail.next;
9829         kfree(map);
9830  out:
9831         mutex_unlock(&trace_eval_mutex);
9832 }
9833 #else
9834 static inline void trace_module_remove_evals(struct module *mod) { }
9835 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9836
9837 static int trace_module_notify(struct notifier_block *self,
9838                                unsigned long val, void *data)
9839 {
9840         struct module *mod = data;
9841
9842         switch (val) {
9843         case MODULE_STATE_COMING:
9844                 trace_module_add_evals(mod);
9845                 break;
9846         case MODULE_STATE_GOING:
9847                 trace_module_remove_evals(mod);
9848                 break;
9849         }
9850
9851         return NOTIFY_OK;
9852 }
9853
9854 static struct notifier_block trace_module_nb = {
9855         .notifier_call = trace_module_notify,
9856         .priority = 0,
9857 };
9858 #endif /* CONFIG_MODULES */
9859
9860 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9861 {
9862
9863         event_trace_init();
9864
9865         init_tracer_tracefs(&global_trace, NULL);
9866         ftrace_init_tracefs_toplevel(&global_trace, NULL);
9867
9868         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9869                         &global_trace, &tracing_thresh_fops);
9870
9871         trace_create_file("README", TRACE_MODE_READ, NULL,
9872                         NULL, &tracing_readme_fops);
9873
9874         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9875                         NULL, &tracing_saved_cmdlines_fops);
9876
9877         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9878                           NULL, &tracing_saved_cmdlines_size_fops);
9879
9880         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9881                         NULL, &tracing_saved_tgids_fops);
9882
9883         trace_create_eval_file(NULL);
9884
9885 #ifdef CONFIG_MODULES
9886         register_module_notifier(&trace_module_nb);
9887 #endif
9888
9889 #ifdef CONFIG_DYNAMIC_FTRACE
9890         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9891                         NULL, &tracing_dyn_info_fops);
9892 #endif
9893
9894         create_trace_instances(NULL);
9895
9896         update_tracer_options(&global_trace);
9897 }
9898
9899 static __init int tracer_init_tracefs(void)
9900 {
9901         int ret;
9902
9903         trace_access_lock_init();
9904
9905         ret = tracing_init_dentry();
9906         if (ret)
9907                 return 0;
9908
9909         if (eval_map_wq) {
9910                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9911                 queue_work(eval_map_wq, &tracerfs_init_work);
9912         } else {
9913                 tracer_init_tracefs_work_func(NULL);
9914         }
9915
9916         rv_init_interface();
9917
9918         return 0;
9919 }
9920
9921 fs_initcall(tracer_init_tracefs);
9922
9923 static int trace_die_panic_handler(struct notifier_block *self,
9924                                 unsigned long ev, void *unused);
9925
9926 static struct notifier_block trace_panic_notifier = {
9927         .notifier_call = trace_die_panic_handler,
9928         .priority = INT_MAX - 1,
9929 };
9930
9931 static struct notifier_block trace_die_notifier = {
9932         .notifier_call = trace_die_panic_handler,
9933         .priority = INT_MAX - 1,
9934 };
9935
9936 /*
9937  * The idea is to execute the following die/panic callback early, in order
9938  * to avoid showing irrelevant information in the trace (like other panic
9939  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
9940  * warnings get disabled (to prevent potential log flooding).
9941  */
9942 static int trace_die_panic_handler(struct notifier_block *self,
9943                                 unsigned long ev, void *unused)
9944 {
9945         if (!ftrace_dump_on_oops_enabled())
9946                 return NOTIFY_DONE;
9947
9948         /* The die notifier requires DIE_OOPS to trigger */
9949         if (self == &trace_die_notifier && ev != DIE_OOPS)
9950                 return NOTIFY_DONE;
9951
9952         ftrace_dump(DUMP_PARAM);
9953
9954         return NOTIFY_DONE;
9955 }
9956
9957 /*
9958  * printk is set to max of 1024, we really don't need it that big.
9959  * Nothing should be printing 1000 characters anyway.
9960  */
9961 #define TRACE_MAX_PRINT         1000
9962
9963 /*
9964  * Define here KERN_TRACE so that we have one place to modify
9965  * it if we decide to change what log level the ftrace dump
9966  * should be at.
9967  */
9968 #define KERN_TRACE              KERN_EMERG
9969
9970 void
9971 trace_printk_seq(struct trace_seq *s)
9972 {
9973         /* Probably should print a warning here. */
9974         if (s->seq.len >= TRACE_MAX_PRINT)
9975                 s->seq.len = TRACE_MAX_PRINT;
9976
9977         /*
9978          * More paranoid code. Although the buffer size is set to
9979          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9980          * an extra layer of protection.
9981          */
9982         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9983                 s->seq.len = s->seq.size - 1;
9984
9985         /* should be zero ended, but we are paranoid. */
9986         s->buffer[s->seq.len] = 0;
9987
9988         printk(KERN_TRACE "%s", s->buffer);
9989
9990         trace_seq_init(s);
9991 }
9992
9993 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
9994 {
9995         iter->tr = tr;
9996         iter->trace = iter->tr->current_trace;
9997         iter->cpu_file = RING_BUFFER_ALL_CPUS;
9998         iter->array_buffer = &tr->array_buffer;
9999
10000         if (iter->trace && iter->trace->open)
10001                 iter->trace->open(iter);
10002
10003         /* Annotate start of buffers if we had overruns */
10004         if (ring_buffer_overruns(iter->array_buffer->buffer))
10005                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10006
10007         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10008         if (trace_clocks[iter->tr->clock_id].in_ns)
10009                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10010
10011         /* Can not use kmalloc for iter.temp and iter.fmt */
10012         iter->temp = static_temp_buf;
10013         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10014         iter->fmt = static_fmt_buf;
10015         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10016 }
10017
10018 void trace_init_global_iter(struct trace_iterator *iter)
10019 {
10020         trace_init_iter(iter, &global_trace);
10021 }
10022
10023 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10024 {
10025         /* use static because iter can be a bit big for the stack */
10026         static struct trace_iterator iter;
10027         unsigned int old_userobj;
10028         unsigned long flags;
10029         int cnt = 0, cpu;
10030
10031         /*
10032          * Always turn off tracing when we dump.
10033          * We don't need to show trace output of what happens
10034          * between multiple crashes.
10035          *
10036          * If the user does a sysrq-z, then they can re-enable
10037          * tracing with echo 1 > tracing_on.
10038          */
10039         tracer_tracing_off(tr);
10040
10041         local_irq_save(flags);
10042
10043         /* Simulate the iterator */
10044         trace_init_iter(&iter, tr);
10045
10046         for_each_tracing_cpu(cpu) {
10047                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10048         }
10049
10050         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10051
10052         /* don't look at user memory in panic mode */
10053         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10054
10055         if (dump_mode == DUMP_ORIG)
10056                 iter.cpu_file = raw_smp_processor_id();
10057         else
10058                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10059
10060         if (tr == &global_trace)
10061                 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10062         else
10063                 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10064
10065         /* Did function tracer already get disabled? */
10066         if (ftrace_is_dead()) {
10067                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10068                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10069         }
10070
10071         /*
10072          * We need to stop all tracing on all CPUS to read
10073          * the next buffer. This is a bit expensive, but is
10074          * not done often. We fill all what we can read,
10075          * and then release the locks again.
10076          */
10077
10078         while (!trace_empty(&iter)) {
10079
10080                 if (!cnt)
10081                         printk(KERN_TRACE "---------------------------------\n");
10082
10083                 cnt++;
10084
10085                 trace_iterator_reset(&iter);
10086                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10087
10088                 if (trace_find_next_entry_inc(&iter) != NULL) {
10089                         int ret;
10090
10091                         ret = print_trace_line(&iter);
10092                         if (ret != TRACE_TYPE_NO_CONSUME)
10093                                 trace_consume(&iter);
10094                 }
10095                 touch_nmi_watchdog();
10096
10097                 trace_printk_seq(&iter.seq);
10098         }
10099
10100         if (!cnt)
10101                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10102         else
10103                 printk(KERN_TRACE "---------------------------------\n");
10104
10105         tr->trace_flags |= old_userobj;
10106
10107         for_each_tracing_cpu(cpu) {
10108                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10109         }
10110         local_irq_restore(flags);
10111 }
10112
10113 static void ftrace_dump_by_param(void)
10114 {
10115         bool first_param = true;
10116         char dump_param[MAX_TRACER_SIZE];
10117         char *buf, *token, *inst_name;
10118         struct trace_array *tr;
10119
10120         strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10121         buf = dump_param;
10122
10123         while ((token = strsep(&buf, ",")) != NULL) {
10124                 if (first_param) {
10125                         first_param = false;
10126                         if (!strcmp("0", token))
10127                                 continue;
10128                         else if (!strcmp("1", token)) {
10129                                 ftrace_dump_one(&global_trace, DUMP_ALL);
10130                                 continue;
10131                         }
10132                         else if (!strcmp("2", token) ||
10133                           !strcmp("orig_cpu", token)) {
10134                                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10135                                 continue;
10136                         }
10137                 }
10138
10139                 inst_name = strsep(&token, "=");
10140                 tr = trace_array_find(inst_name);
10141                 if (!tr) {
10142                         printk(KERN_TRACE "Instance %s not found\n", inst_name);
10143                         continue;
10144                 }
10145
10146                 if (token && (!strcmp("2", token) ||
10147                           !strcmp("orig_cpu", token)))
10148                         ftrace_dump_one(tr, DUMP_ORIG);
10149                 else
10150                         ftrace_dump_one(tr, DUMP_ALL);
10151         }
10152 }
10153
10154 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10155 {
10156         static atomic_t dump_running;
10157
10158         /* Only allow one dump user at a time. */
10159         if (atomic_inc_return(&dump_running) != 1) {
10160                 atomic_dec(&dump_running);
10161                 return;
10162         }
10163
10164         switch (oops_dump_mode) {
10165         case DUMP_ALL:
10166                 ftrace_dump_one(&global_trace, DUMP_ALL);
10167                 break;
10168         case DUMP_ORIG:
10169                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10170                 break;
10171         case DUMP_PARAM:
10172                 ftrace_dump_by_param();
10173                 break;
10174         case DUMP_NONE:
10175                 break;
10176         default:
10177                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10178                 ftrace_dump_one(&global_trace, DUMP_ALL);
10179         }
10180
10181         atomic_dec(&dump_running);
10182 }
10183 EXPORT_SYMBOL_GPL(ftrace_dump);
10184
10185 #define WRITE_BUFSIZE  4096
10186
10187 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10188                                 size_t count, loff_t *ppos,
10189                                 int (*createfn)(const char *))
10190 {
10191         char *kbuf, *buf, *tmp;
10192         int ret = 0;
10193         size_t done = 0;
10194         size_t size;
10195
10196         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10197         if (!kbuf)
10198                 return -ENOMEM;
10199
10200         while (done < count) {
10201                 size = count - done;
10202
10203                 if (size >= WRITE_BUFSIZE)
10204                         size = WRITE_BUFSIZE - 1;
10205
10206                 if (copy_from_user(kbuf, buffer + done, size)) {
10207                         ret = -EFAULT;
10208                         goto out;
10209                 }
10210                 kbuf[size] = '\0';
10211                 buf = kbuf;
10212                 do {
10213                         tmp = strchr(buf, '\n');
10214                         if (tmp) {
10215                                 *tmp = '\0';
10216                                 size = tmp - buf + 1;
10217                         } else {
10218                                 size = strlen(buf);
10219                                 if (done + size < count) {
10220                                         if (buf != kbuf)
10221                                                 break;
10222                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10223                                         pr_warn("Line length is too long: Should be less than %d\n",
10224                                                 WRITE_BUFSIZE - 2);
10225                                         ret = -EINVAL;
10226                                         goto out;
10227                                 }
10228                         }
10229                         done += size;
10230
10231                         /* Remove comments */
10232                         tmp = strchr(buf, '#');
10233
10234                         if (tmp)
10235                                 *tmp = '\0';
10236
10237                         ret = createfn(buf);
10238                         if (ret)
10239                                 goto out;
10240                         buf += size;
10241
10242                 } while (done < count);
10243         }
10244         ret = done;
10245
10246 out:
10247         kfree(kbuf);
10248
10249         return ret;
10250 }
10251
10252 #ifdef CONFIG_TRACER_MAX_TRACE
10253 __init static bool tr_needs_alloc_snapshot(const char *name)
10254 {
10255         char *test;
10256         int len = strlen(name);
10257         bool ret;
10258
10259         if (!boot_snapshot_index)
10260                 return false;
10261
10262         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10263             boot_snapshot_info[len] == '\t')
10264                 return true;
10265
10266         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10267         if (!test)
10268                 return false;
10269
10270         sprintf(test, "\t%s\t", name);
10271         ret = strstr(boot_snapshot_info, test) == NULL;
10272         kfree(test);
10273         return ret;
10274 }
10275
10276 __init static void do_allocate_snapshot(const char *name)
10277 {
10278         if (!tr_needs_alloc_snapshot(name))
10279                 return;
10280
10281         /*
10282          * When allocate_snapshot is set, the next call to
10283          * allocate_trace_buffers() (called by trace_array_get_by_name())
10284          * will allocate the snapshot buffer. That will alse clear
10285          * this flag.
10286          */
10287         allocate_snapshot = true;
10288 }
10289 #else
10290 static inline void do_allocate_snapshot(const char *name) { }
10291 #endif
10292
10293 __init static void enable_instances(void)
10294 {
10295         struct trace_array *tr;
10296         char *curr_str;
10297         char *str;
10298         char *tok;
10299
10300         /* A tab is always appended */
10301         boot_instance_info[boot_instance_index - 1] = '\0';
10302         str = boot_instance_info;
10303
10304         while ((curr_str = strsep(&str, "\t"))) {
10305
10306                 tok = strsep(&curr_str, ",");
10307
10308                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10309                         do_allocate_snapshot(tok);
10310
10311                 tr = trace_array_get_by_name(tok, NULL);
10312                 if (!tr) {
10313                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10314                         continue;
10315                 }
10316                 /* Allow user space to delete it */
10317                 trace_array_put(tr);
10318
10319                 while ((tok = strsep(&curr_str, ","))) {
10320                         early_enable_events(tr, tok, true);
10321                 }
10322         }
10323 }
10324
10325 __init static int tracer_alloc_buffers(void)
10326 {
10327         int ring_buf_size;
10328         int ret = -ENOMEM;
10329
10330
10331         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10332                 pr_warn("Tracing disabled due to lockdown\n");
10333                 return -EPERM;
10334         }
10335
10336         /*
10337          * Make sure we don't accidentally add more trace options
10338          * than we have bits for.
10339          */
10340         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10341
10342         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10343                 goto out;
10344
10345         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10346                 goto out_free_buffer_mask;
10347
10348         /* Only allocate trace_printk buffers if a trace_printk exists */
10349         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10350                 /* Must be called before global_trace.buffer is allocated */
10351                 trace_printk_init_buffers();
10352
10353         /* To save memory, keep the ring buffer size to its minimum */
10354         if (global_trace.ring_buffer_expanded)
10355                 ring_buf_size = trace_buf_size;
10356         else
10357                 ring_buf_size = 1;
10358
10359         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10360         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10361
10362         raw_spin_lock_init(&global_trace.start_lock);
10363
10364         /*
10365          * The prepare callbacks allocates some memory for the ring buffer. We
10366          * don't free the buffer if the CPU goes down. If we were to free
10367          * the buffer, then the user would lose any trace that was in the
10368          * buffer. The memory will be removed once the "instance" is removed.
10369          */
10370         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10371                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10372                                       NULL);
10373         if (ret < 0)
10374                 goto out_free_cpumask;
10375         /* Used for event triggers */
10376         ret = -ENOMEM;
10377         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10378         if (!temp_buffer)
10379                 goto out_rm_hp_state;
10380
10381         if (trace_create_savedcmd() < 0)
10382                 goto out_free_temp_buffer;
10383
10384         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10385                 goto out_free_savedcmd;
10386
10387         /* TODO: make the number of buffers hot pluggable with CPUS */
10388         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10389                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10390                 goto out_free_pipe_cpumask;
10391         }
10392         if (global_trace.buffer_disabled)
10393                 tracing_off();
10394
10395         if (trace_boot_clock) {
10396                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10397                 if (ret < 0)
10398                         pr_warn("Trace clock %s not defined, going back to default\n",
10399                                 trace_boot_clock);
10400         }
10401
10402         /*
10403          * register_tracer() might reference current_trace, so it
10404          * needs to be set before we register anything. This is
10405          * just a bootstrap of current_trace anyway.
10406          */
10407         global_trace.current_trace = &nop_trace;
10408
10409         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10410 #ifdef CONFIG_TRACER_MAX_TRACE
10411         spin_lock_init(&global_trace.snapshot_trigger_lock);
10412 #endif
10413         ftrace_init_global_array_ops(&global_trace);
10414
10415         init_trace_flags_index(&global_trace);
10416
10417         register_tracer(&nop_trace);
10418
10419         /* Function tracing may start here (via kernel command line) */
10420         init_function_trace();
10421
10422         /* All seems OK, enable tracing */
10423         tracing_disabled = 0;
10424
10425         atomic_notifier_chain_register(&panic_notifier_list,
10426                                        &trace_panic_notifier);
10427
10428         register_die_notifier(&trace_die_notifier);
10429
10430         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10431
10432         INIT_LIST_HEAD(&global_trace.systems);
10433         INIT_LIST_HEAD(&global_trace.events);
10434         INIT_LIST_HEAD(&global_trace.hist_vars);
10435         INIT_LIST_HEAD(&global_trace.err_log);
10436         list_add(&global_trace.list, &ftrace_trace_arrays);
10437
10438         apply_trace_boot_options();
10439
10440         register_snapshot_cmd();
10441
10442         test_can_verify();
10443
10444         return 0;
10445
10446 out_free_pipe_cpumask:
10447         free_cpumask_var(global_trace.pipe_cpumask);
10448 out_free_savedcmd:
10449         trace_free_saved_cmdlines_buffer();
10450 out_free_temp_buffer:
10451         ring_buffer_free(temp_buffer);
10452 out_rm_hp_state:
10453         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10454 out_free_cpumask:
10455         free_cpumask_var(global_trace.tracing_cpumask);
10456 out_free_buffer_mask:
10457         free_cpumask_var(tracing_buffer_mask);
10458 out:
10459         return ret;
10460 }
10461
10462 void __init ftrace_boot_snapshot(void)
10463 {
10464 #ifdef CONFIG_TRACER_MAX_TRACE
10465         struct trace_array *tr;
10466
10467         if (!snapshot_at_boot)
10468                 return;
10469
10470         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10471                 if (!tr->allocated_snapshot)
10472                         continue;
10473
10474                 tracing_snapshot_instance(tr);
10475                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10476         }
10477 #endif
10478 }
10479
10480 void __init early_trace_init(void)
10481 {
10482         if (tracepoint_printk) {
10483                 tracepoint_print_iter =
10484                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10485                 if (MEM_FAIL(!tracepoint_print_iter,
10486                              "Failed to allocate trace iterator\n"))
10487                         tracepoint_printk = 0;
10488                 else
10489                         static_key_enable(&tracepoint_printk_key.key);
10490         }
10491         tracer_alloc_buffers();
10492
10493         init_events();
10494 }
10495
10496 void __init trace_init(void)
10497 {
10498         trace_event_init();
10499
10500         if (boot_instance_index)
10501                 enable_instances();
10502 }
10503
10504 __init static void clear_boot_tracer(void)
10505 {
10506         /*
10507          * The default tracer at boot buffer is an init section.
10508          * This function is called in lateinit. If we did not
10509          * find the boot tracer, then clear it out, to prevent
10510          * later registration from accessing the buffer that is
10511          * about to be freed.
10512          */
10513         if (!default_bootup_tracer)
10514                 return;
10515
10516         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10517                default_bootup_tracer);
10518         default_bootup_tracer = NULL;
10519 }
10520
10521 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10522 __init static void tracing_set_default_clock(void)
10523 {
10524         /* sched_clock_stable() is determined in late_initcall */
10525         if (!trace_boot_clock && !sched_clock_stable()) {
10526                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10527                         pr_warn("Can not set tracing clock due to lockdown\n");
10528                         return;
10529                 }
10530
10531                 printk(KERN_WARNING
10532                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10533                        "If you want to keep using the local clock, then add:\n"
10534                        "  \"trace_clock=local\"\n"
10535                        "on the kernel command line\n");
10536                 tracing_set_clock(&global_trace, "global");
10537         }
10538 }
10539 #else
10540 static inline void tracing_set_default_clock(void) { }
10541 #endif
10542
10543 __init static int late_trace_init(void)
10544 {
10545         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10546                 static_key_disable(&tracepoint_printk_key.key);
10547                 tracepoint_printk = 0;
10548         }
10549
10550         tracing_set_default_clock();
10551         clear_boot_tracer();
10552         return 0;
10553 }
10554
10555 late_initcall_sync(late_trace_init);