95ae7c4e58357f1d22f4e49d3c0308087c224d34
[linux-2.6-block.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 #include <linux/sort.h>
53 #include <linux/io.h> /* vmap_page_range() */
54 #include <linux/fs_context.h>
55
56 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
57
58 #include "trace.h"
59 #include "trace_output.h"
60
61 #ifdef CONFIG_FTRACE_STARTUP_TEST
62 /*
63  * We need to change this state when a selftest is running.
64  * A selftest will lurk into the ring-buffer to count the
65  * entries inserted during the selftest although some concurrent
66  * insertions into the ring-buffer such as trace_printk could occurred
67  * at the same time, giving false positive or negative results.
68  */
69 static bool __read_mostly tracing_selftest_running;
70
71 /*
72  * If boot-time tracing including tracers/events via kernel cmdline
73  * is running, we do not want to run SELFTEST.
74  */
75 bool __read_mostly tracing_selftest_disabled;
76
77 void __init disable_tracing_selftest(const char *reason)
78 {
79         if (!tracing_selftest_disabled) {
80                 tracing_selftest_disabled = true;
81                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
82         }
83 }
84 #else
85 #define tracing_selftest_running        0
86 #define tracing_selftest_disabled       0
87 #endif
88
89 /* Pipe tracepoints to printk */
90 static struct trace_iterator *tracepoint_print_iter;
91 int tracepoint_printk;
92 static bool tracepoint_printk_stop_on_boot __initdata;
93 static bool traceoff_after_boot __initdata;
94 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
95
96 /* For tracers that don't implement custom flags */
97 static struct tracer_opt dummy_tracer_opt[] = {
98         { }
99 };
100
101 static int
102 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 {
104         return 0;
105 }
106
107 /*
108  * To prevent the comm cache from being overwritten when no
109  * tracing is active, only save the comm when a trace event
110  * occurred.
111  */
112 DEFINE_PER_CPU(bool, trace_taskinfo_save);
113
114 /*
115  * Kill all tracing for good (never come back).
116  * It is initialized to 1 but will turn to zero if the initialization
117  * of the tracer is successful. But that is the only place that sets
118  * this back to zero.
119  */
120 static int tracing_disabled = 1;
121
122 cpumask_var_t __read_mostly     tracing_buffer_mask;
123
124 #define MAX_TRACER_SIZE         100
125 /*
126  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
127  *
128  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
129  * is set, then ftrace_dump is called. This will output the contents
130  * of the ftrace buffers to the console.  This is very useful for
131  * capturing traces that lead to crashes and outputing it to a
132  * serial console.
133  *
134  * It is default off, but you can enable it with either specifying
135  * "ftrace_dump_on_oops" in the kernel command line, or setting
136  * /proc/sys/kernel/ftrace_dump_on_oops
137  * Set 1 if you want to dump buffers of all CPUs
138  * Set 2 if you want to dump the buffer of the CPU that triggered oops
139  * Set instance name if you want to dump the specific trace instance
140  * Multiple instance dump is also supported, and instances are seperated
141  * by commas.
142  */
143 /* Set to string format zero to disable by default */
144 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
145
146 /* When set, tracing will stop when a WARN*() is hit */
147 static int __disable_trace_on_warning;
148
149 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
150                              void *buffer, size_t *lenp, loff_t *ppos);
151 static const struct ctl_table trace_sysctl_table[] = {
152         {
153                 .procname       = "ftrace_dump_on_oops",
154                 .data           = &ftrace_dump_on_oops,
155                 .maxlen         = MAX_TRACER_SIZE,
156                 .mode           = 0644,
157                 .proc_handler   = proc_dostring,
158         },
159         {
160                 .procname       = "traceoff_on_warning",
161                 .data           = &__disable_trace_on_warning,
162                 .maxlen         = sizeof(__disable_trace_on_warning),
163                 .mode           = 0644,
164                 .proc_handler   = proc_dointvec,
165         },
166         {
167                 .procname       = "tracepoint_printk",
168                 .data           = &tracepoint_printk,
169                 .maxlen         = sizeof(tracepoint_printk),
170                 .mode           = 0644,
171                 .proc_handler   = tracepoint_printk_sysctl,
172         },
173 };
174
175 static int __init init_trace_sysctls(void)
176 {
177         register_sysctl_init("kernel", trace_sysctl_table);
178         return 0;
179 }
180 subsys_initcall(init_trace_sysctls);
181
182 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
183 /* Map of enums to their values, for "eval_map" file */
184 struct trace_eval_map_head {
185         struct module                   *mod;
186         unsigned long                   length;
187 };
188
189 union trace_eval_map_item;
190
191 struct trace_eval_map_tail {
192         /*
193          * "end" is first and points to NULL as it must be different
194          * than "mod" or "eval_string"
195          */
196         union trace_eval_map_item       *next;
197         const char                      *end;   /* points to NULL */
198 };
199
200 static DEFINE_MUTEX(trace_eval_mutex);
201
202 /*
203  * The trace_eval_maps are saved in an array with two extra elements,
204  * one at the beginning, and one at the end. The beginning item contains
205  * the count of the saved maps (head.length), and the module they
206  * belong to if not built in (head.mod). The ending item contains a
207  * pointer to the next array of saved eval_map items.
208  */
209 union trace_eval_map_item {
210         struct trace_eval_map           map;
211         struct trace_eval_map_head      head;
212         struct trace_eval_map_tail      tail;
213 };
214
215 static union trace_eval_map_item *trace_eval_maps;
216 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
217
218 int tracing_set_tracer(struct trace_array *tr, const char *buf);
219 static void ftrace_trace_userstack(struct trace_array *tr,
220                                    struct trace_buffer *buffer,
221                                    unsigned int trace_ctx);
222
223 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
224 static char *default_bootup_tracer;
225
226 static bool allocate_snapshot;
227 static bool snapshot_at_boot;
228
229 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
230 static int boot_instance_index;
231
232 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
233 static int boot_snapshot_index;
234
235 static int __init set_cmdline_ftrace(char *str)
236 {
237         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
238         default_bootup_tracer = bootup_tracer_buf;
239         /* We are using ftrace early, expand it */
240         trace_set_ring_buffer_expanded(NULL);
241         return 1;
242 }
243 __setup("ftrace=", set_cmdline_ftrace);
244
245 int ftrace_dump_on_oops_enabled(void)
246 {
247         if (!strcmp("0", ftrace_dump_on_oops))
248                 return 0;
249         else
250                 return 1;
251 }
252
253 static int __init set_ftrace_dump_on_oops(char *str)
254 {
255         if (!*str) {
256                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
257                 return 1;
258         }
259
260         if (*str == ',') {
261                 strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
262                 strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
263                 return 1;
264         }
265
266         if (*str++ == '=') {
267                 strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
268                 return 1;
269         }
270
271         return 0;
272 }
273 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
274
275 static int __init stop_trace_on_warning(char *str)
276 {
277         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
278                 __disable_trace_on_warning = 1;
279         return 1;
280 }
281 __setup("traceoff_on_warning", stop_trace_on_warning);
282
283 static int __init boot_alloc_snapshot(char *str)
284 {
285         char *slot = boot_snapshot_info + boot_snapshot_index;
286         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
287         int ret;
288
289         if (str[0] == '=') {
290                 str++;
291                 if (strlen(str) >= left)
292                         return -1;
293
294                 ret = snprintf(slot, left, "%s\t", str);
295                 boot_snapshot_index += ret;
296         } else {
297                 allocate_snapshot = true;
298                 /* We also need the main ring buffer expanded */
299                 trace_set_ring_buffer_expanded(NULL);
300         }
301         return 1;
302 }
303 __setup("alloc_snapshot", boot_alloc_snapshot);
304
305
306 static int __init boot_snapshot(char *str)
307 {
308         snapshot_at_boot = true;
309         boot_alloc_snapshot(str);
310         return 1;
311 }
312 __setup("ftrace_boot_snapshot", boot_snapshot);
313
314
315 static int __init boot_instance(char *str)
316 {
317         char *slot = boot_instance_info + boot_instance_index;
318         int left = sizeof(boot_instance_info) - boot_instance_index;
319         int ret;
320
321         if (strlen(str) >= left)
322                 return -1;
323
324         ret = snprintf(slot, left, "%s\t", str);
325         boot_instance_index += ret;
326
327         return 1;
328 }
329 __setup("trace_instance=", boot_instance);
330
331
332 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
333
334 static int __init set_trace_boot_options(char *str)
335 {
336         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
337         return 1;
338 }
339 __setup("trace_options=", set_trace_boot_options);
340
341 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
342 static char *trace_boot_clock __initdata;
343
344 static int __init set_trace_boot_clock(char *str)
345 {
346         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
347         trace_boot_clock = trace_boot_clock_buf;
348         return 1;
349 }
350 __setup("trace_clock=", set_trace_boot_clock);
351
352 static int __init set_tracepoint_printk(char *str)
353 {
354         /* Ignore the "tp_printk_stop_on_boot" param */
355         if (*str == '_')
356                 return 0;
357
358         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
359                 tracepoint_printk = 1;
360         return 1;
361 }
362 __setup("tp_printk", set_tracepoint_printk);
363
364 static int __init set_tracepoint_printk_stop(char *str)
365 {
366         tracepoint_printk_stop_on_boot = true;
367         return 1;
368 }
369 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
370
371 static int __init set_traceoff_after_boot(char *str)
372 {
373         traceoff_after_boot = true;
374         return 1;
375 }
376 __setup("traceoff_after_boot", set_traceoff_after_boot);
377
378 unsigned long long ns2usecs(u64 nsec)
379 {
380         nsec += 500;
381         do_div(nsec, 1000);
382         return nsec;
383 }
384
385 static void
386 trace_process_export(struct trace_export *export,
387                struct ring_buffer_event *event, int flag)
388 {
389         struct trace_entry *entry;
390         unsigned int size = 0;
391
392         if (export->flags & flag) {
393                 entry = ring_buffer_event_data(event);
394                 size = ring_buffer_event_length(event);
395                 export->write(export, entry, size);
396         }
397 }
398
399 static DEFINE_MUTEX(ftrace_export_lock);
400
401 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
402
403 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
404 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
405 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
406
407 static inline void ftrace_exports_enable(struct trace_export *export)
408 {
409         if (export->flags & TRACE_EXPORT_FUNCTION)
410                 static_branch_inc(&trace_function_exports_enabled);
411
412         if (export->flags & TRACE_EXPORT_EVENT)
413                 static_branch_inc(&trace_event_exports_enabled);
414
415         if (export->flags & TRACE_EXPORT_MARKER)
416                 static_branch_inc(&trace_marker_exports_enabled);
417 }
418
419 static inline void ftrace_exports_disable(struct trace_export *export)
420 {
421         if (export->flags & TRACE_EXPORT_FUNCTION)
422                 static_branch_dec(&trace_function_exports_enabled);
423
424         if (export->flags & TRACE_EXPORT_EVENT)
425                 static_branch_dec(&trace_event_exports_enabled);
426
427         if (export->flags & TRACE_EXPORT_MARKER)
428                 static_branch_dec(&trace_marker_exports_enabled);
429 }
430
431 static void ftrace_exports(struct ring_buffer_event *event, int flag)
432 {
433         struct trace_export *export;
434
435         preempt_disable_notrace();
436
437         export = rcu_dereference_raw_check(ftrace_exports_list);
438         while (export) {
439                 trace_process_export(export, event, flag);
440                 export = rcu_dereference_raw_check(export->next);
441         }
442
443         preempt_enable_notrace();
444 }
445
446 static inline void
447 add_trace_export(struct trace_export **list, struct trace_export *export)
448 {
449         rcu_assign_pointer(export->next, *list);
450         /*
451          * We are entering export into the list but another
452          * CPU might be walking that list. We need to make sure
453          * the export->next pointer is valid before another CPU sees
454          * the export pointer included into the list.
455          */
456         rcu_assign_pointer(*list, export);
457 }
458
459 static inline int
460 rm_trace_export(struct trace_export **list, struct trace_export *export)
461 {
462         struct trace_export **p;
463
464         for (p = list; *p != NULL; p = &(*p)->next)
465                 if (*p == export)
466                         break;
467
468         if (*p != export)
469                 return -1;
470
471         rcu_assign_pointer(*p, (*p)->next);
472
473         return 0;
474 }
475
476 static inline void
477 add_ftrace_export(struct trace_export **list, struct trace_export *export)
478 {
479         ftrace_exports_enable(export);
480
481         add_trace_export(list, export);
482 }
483
484 static inline int
485 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
486 {
487         int ret;
488
489         ret = rm_trace_export(list, export);
490         ftrace_exports_disable(export);
491
492         return ret;
493 }
494
495 int register_ftrace_export(struct trace_export *export)
496 {
497         if (WARN_ON_ONCE(!export->write))
498                 return -1;
499
500         mutex_lock(&ftrace_export_lock);
501
502         add_ftrace_export(&ftrace_exports_list, export);
503
504         mutex_unlock(&ftrace_export_lock);
505
506         return 0;
507 }
508 EXPORT_SYMBOL_GPL(register_ftrace_export);
509
510 int unregister_ftrace_export(struct trace_export *export)
511 {
512         int ret;
513
514         mutex_lock(&ftrace_export_lock);
515
516         ret = rm_ftrace_export(&ftrace_exports_list, export);
517
518         mutex_unlock(&ftrace_export_lock);
519
520         return ret;
521 }
522 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
523
524 /* trace_flags holds trace_options default values */
525 #define TRACE_DEFAULT_FLAGS                                             \
526         (FUNCTION_DEFAULT_FLAGS |                                       \
527          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
528          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
529          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
530          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
531          TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK |                \
532          TRACE_ITER_COPY_MARKER)
533
534 /* trace_options that are only supported by global_trace */
535 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
536                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
537
538 /* trace_flags that are default zero for instances */
539 #define ZEROED_TRACE_FLAGS \
540         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK | \
541          TRACE_ITER_COPY_MARKER)
542
543 /*
544  * The global_trace is the descriptor that holds the top-level tracing
545  * buffers for the live tracing.
546  */
547 static struct trace_array global_trace = {
548         .trace_flags = TRACE_DEFAULT_FLAGS,
549 };
550
551 static struct trace_array *printk_trace = &global_trace;
552
553 /* List of trace_arrays interested in the top level trace_marker */
554 static LIST_HEAD(marker_copies);
555
556 static __always_inline bool printk_binsafe(struct trace_array *tr)
557 {
558         /*
559          * The binary format of traceprintk can cause a crash if used
560          * by a buffer from another boot. Force the use of the
561          * non binary version of trace_printk if the trace_printk
562          * buffer is a boot mapped ring buffer.
563          */
564         return !(tr->flags & TRACE_ARRAY_FL_BOOT);
565 }
566
567 static void update_printk_trace(struct trace_array *tr)
568 {
569         if (printk_trace == tr)
570                 return;
571
572         printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
573         printk_trace = tr;
574         tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
575 }
576
577 /* Returns true if the status of tr changed */
578 static bool update_marker_trace(struct trace_array *tr, int enabled)
579 {
580         lockdep_assert_held(&event_mutex);
581
582         if (enabled) {
583                 if (!list_empty(&tr->marker_list))
584                         return false;
585
586                 list_add_rcu(&tr->marker_list, &marker_copies);
587                 tr->trace_flags |= TRACE_ITER_COPY_MARKER;
588                 return true;
589         }
590
591         if (list_empty(&tr->marker_list))
592                 return false;
593
594         list_del_init(&tr->marker_list);
595         tr->trace_flags &= ~TRACE_ITER_COPY_MARKER;
596         return true;
597 }
598
599 void trace_set_ring_buffer_expanded(struct trace_array *tr)
600 {
601         if (!tr)
602                 tr = &global_trace;
603         tr->ring_buffer_expanded = true;
604 }
605
606 LIST_HEAD(ftrace_trace_arrays);
607
608 int trace_array_get(struct trace_array *this_tr)
609 {
610         struct trace_array *tr;
611
612         guard(mutex)(&trace_types_lock);
613         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
614                 if (tr == this_tr) {
615                         tr->ref++;
616                         return 0;
617                 }
618         }
619
620         return -ENODEV;
621 }
622
623 static void __trace_array_put(struct trace_array *this_tr)
624 {
625         WARN_ON(!this_tr->ref);
626         this_tr->ref--;
627 }
628
629 /**
630  * trace_array_put - Decrement the reference counter for this trace array.
631  * @this_tr : pointer to the trace array
632  *
633  * NOTE: Use this when we no longer need the trace array returned by
634  * trace_array_get_by_name(). This ensures the trace array can be later
635  * destroyed.
636  *
637  */
638 void trace_array_put(struct trace_array *this_tr)
639 {
640         if (!this_tr)
641                 return;
642
643         mutex_lock(&trace_types_lock);
644         __trace_array_put(this_tr);
645         mutex_unlock(&trace_types_lock);
646 }
647 EXPORT_SYMBOL_GPL(trace_array_put);
648
649 int tracing_check_open_get_tr(struct trace_array *tr)
650 {
651         int ret;
652
653         ret = security_locked_down(LOCKDOWN_TRACEFS);
654         if (ret)
655                 return ret;
656
657         if (tracing_disabled)
658                 return -ENODEV;
659
660         if (tr && trace_array_get(tr) < 0)
661                 return -ENODEV;
662
663         return 0;
664 }
665
666 /**
667  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
668  * @filtered_pids: The list of pids to check
669  * @search_pid: The PID to find in @filtered_pids
670  *
671  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
672  */
673 bool
674 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
675 {
676         return trace_pid_list_is_set(filtered_pids, search_pid);
677 }
678
679 /**
680  * trace_ignore_this_task - should a task be ignored for tracing
681  * @filtered_pids: The list of pids to check
682  * @filtered_no_pids: The list of pids not to be traced
683  * @task: The task that should be ignored if not filtered
684  *
685  * Checks if @task should be traced or not from @filtered_pids.
686  * Returns true if @task should *NOT* be traced.
687  * Returns false if @task should be traced.
688  */
689 bool
690 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
691                        struct trace_pid_list *filtered_no_pids,
692                        struct task_struct *task)
693 {
694         /*
695          * If filtered_no_pids is not empty, and the task's pid is listed
696          * in filtered_no_pids, then return true.
697          * Otherwise, if filtered_pids is empty, that means we can
698          * trace all tasks. If it has content, then only trace pids
699          * within filtered_pids.
700          */
701
702         return (filtered_pids &&
703                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
704                 (filtered_no_pids &&
705                  trace_find_filtered_pid(filtered_no_pids, task->pid));
706 }
707
708 /**
709  * trace_filter_add_remove_task - Add or remove a task from a pid_list
710  * @pid_list: The list to modify
711  * @self: The current task for fork or NULL for exit
712  * @task: The task to add or remove
713  *
714  * If adding a task, if @self is defined, the task is only added if @self
715  * is also included in @pid_list. This happens on fork and tasks should
716  * only be added when the parent is listed. If @self is NULL, then the
717  * @task pid will be removed from the list, which would happen on exit
718  * of a task.
719  */
720 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
721                                   struct task_struct *self,
722                                   struct task_struct *task)
723 {
724         if (!pid_list)
725                 return;
726
727         /* For forks, we only add if the forking task is listed */
728         if (self) {
729                 if (!trace_find_filtered_pid(pid_list, self->pid))
730                         return;
731         }
732
733         /* "self" is set for forks, and NULL for exits */
734         if (self)
735                 trace_pid_list_set(pid_list, task->pid);
736         else
737                 trace_pid_list_clear(pid_list, task->pid);
738 }
739
740 /**
741  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
742  * @pid_list: The pid list to show
743  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
744  * @pos: The position of the file
745  *
746  * This is used by the seq_file "next" operation to iterate the pids
747  * listed in a trace_pid_list structure.
748  *
749  * Returns the pid+1 as we want to display pid of zero, but NULL would
750  * stop the iteration.
751  */
752 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
753 {
754         long pid = (unsigned long)v;
755         unsigned int next;
756
757         (*pos)++;
758
759         /* pid already is +1 of the actual previous bit */
760         if (trace_pid_list_next(pid_list, pid, &next) < 0)
761                 return NULL;
762
763         pid = next;
764
765         /* Return pid + 1 to allow zero to be represented */
766         return (void *)(pid + 1);
767 }
768
769 /**
770  * trace_pid_start - Used for seq_file to start reading pid lists
771  * @pid_list: The pid list to show
772  * @pos: The position of the file
773  *
774  * This is used by seq_file "start" operation to start the iteration
775  * of listing pids.
776  *
777  * Returns the pid+1 as we want to display pid of zero, but NULL would
778  * stop the iteration.
779  */
780 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
781 {
782         unsigned long pid;
783         unsigned int first;
784         loff_t l = 0;
785
786         if (trace_pid_list_first(pid_list, &first) < 0)
787                 return NULL;
788
789         pid = first;
790
791         /* Return pid + 1 so that zero can be the exit value */
792         for (pid++; pid && l < *pos;
793              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
794                 ;
795         return (void *)pid;
796 }
797
798 /**
799  * trace_pid_show - show the current pid in seq_file processing
800  * @m: The seq_file structure to write into
801  * @v: A void pointer of the pid (+1) value to display
802  *
803  * Can be directly used by seq_file operations to display the current
804  * pid value.
805  */
806 int trace_pid_show(struct seq_file *m, void *v)
807 {
808         unsigned long pid = (unsigned long)v - 1;
809
810         seq_printf(m, "%lu\n", pid);
811         return 0;
812 }
813
814 /* 128 should be much more than enough */
815 #define PID_BUF_SIZE            127
816
817 int trace_pid_write(struct trace_pid_list *filtered_pids,
818                     struct trace_pid_list **new_pid_list,
819                     const char __user *ubuf, size_t cnt)
820 {
821         struct trace_pid_list *pid_list;
822         struct trace_parser parser;
823         unsigned long val;
824         int nr_pids = 0;
825         ssize_t read = 0;
826         ssize_t ret;
827         loff_t pos;
828         pid_t pid;
829
830         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
831                 return -ENOMEM;
832
833         /*
834          * Always recreate a new array. The write is an all or nothing
835          * operation. Always create a new array when adding new pids by
836          * the user. If the operation fails, then the current list is
837          * not modified.
838          */
839         pid_list = trace_pid_list_alloc();
840         if (!pid_list) {
841                 trace_parser_put(&parser);
842                 return -ENOMEM;
843         }
844
845         if (filtered_pids) {
846                 /* copy the current bits to the new max */
847                 ret = trace_pid_list_first(filtered_pids, &pid);
848                 while (!ret) {
849                         trace_pid_list_set(pid_list, pid);
850                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
851                         nr_pids++;
852                 }
853         }
854
855         ret = 0;
856         while (cnt > 0) {
857
858                 pos = 0;
859
860                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
861                 if (ret < 0)
862                         break;
863
864                 read += ret;
865                 ubuf += ret;
866                 cnt -= ret;
867
868                 if (!trace_parser_loaded(&parser))
869                         break;
870
871                 ret = -EINVAL;
872                 if (kstrtoul(parser.buffer, 0, &val))
873                         break;
874
875                 pid = (pid_t)val;
876
877                 if (trace_pid_list_set(pid_list, pid) < 0) {
878                         ret = -1;
879                         break;
880                 }
881                 nr_pids++;
882
883                 trace_parser_clear(&parser);
884                 ret = 0;
885         }
886         trace_parser_put(&parser);
887
888         if (ret < 0) {
889                 trace_pid_list_free(pid_list);
890                 return ret;
891         }
892
893         if (!nr_pids) {
894                 /* Cleared the list of pids */
895                 trace_pid_list_free(pid_list);
896                 pid_list = NULL;
897         }
898
899         *new_pid_list = pid_list;
900
901         return read;
902 }
903
904 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
905 {
906         u64 ts;
907
908         /* Early boot up does not have a buffer yet */
909         if (!buf->buffer)
910                 return trace_clock_local();
911
912         ts = ring_buffer_time_stamp(buf->buffer);
913         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
914
915         return ts;
916 }
917
918 u64 ftrace_now(int cpu)
919 {
920         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
921 }
922
923 /**
924  * tracing_is_enabled - Show if global_trace has been enabled
925  *
926  * Shows if the global trace has been enabled or not. It uses the
927  * mirror flag "buffer_disabled" to be used in fast paths such as for
928  * the irqsoff tracer. But it may be inaccurate due to races. If you
929  * need to know the accurate state, use tracing_is_on() which is a little
930  * slower, but accurate.
931  */
932 int tracing_is_enabled(void)
933 {
934         /*
935          * For quick access (irqsoff uses this in fast path), just
936          * return the mirror variable of the state of the ring buffer.
937          * It's a little racy, but we don't really care.
938          */
939         smp_rmb();
940         return !global_trace.buffer_disabled;
941 }
942
943 /*
944  * trace_buf_size is the size in bytes that is allocated
945  * for a buffer. Note, the number of bytes is always rounded
946  * to page size.
947  *
948  * This number is purposely set to a low number of 16384.
949  * If the dump on oops happens, it will be much appreciated
950  * to not have to wait for all that output. Anyway this can be
951  * boot time and run time configurable.
952  */
953 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
954
955 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
956
957 /* trace_types holds a link list of available tracers. */
958 static struct tracer            *trace_types __read_mostly;
959
960 /*
961  * trace_types_lock is used to protect the trace_types list.
962  */
963 DEFINE_MUTEX(trace_types_lock);
964
965 /*
966  * serialize the access of the ring buffer
967  *
968  * ring buffer serializes readers, but it is low level protection.
969  * The validity of the events (which returns by ring_buffer_peek() ..etc)
970  * are not protected by ring buffer.
971  *
972  * The content of events may become garbage if we allow other process consumes
973  * these events concurrently:
974  *   A) the page of the consumed events may become a normal page
975  *      (not reader page) in ring buffer, and this page will be rewritten
976  *      by events producer.
977  *   B) The page of the consumed events may become a page for splice_read,
978  *      and this page will be returned to system.
979  *
980  * These primitives allow multi process access to different cpu ring buffer
981  * concurrently.
982  *
983  * These primitives don't distinguish read-only and read-consume access.
984  * Multi read-only access are also serialized.
985  */
986
987 #ifdef CONFIG_SMP
988 static DECLARE_RWSEM(all_cpu_access_lock);
989 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
990
991 static inline void trace_access_lock(int cpu)
992 {
993         if (cpu == RING_BUFFER_ALL_CPUS) {
994                 /* gain it for accessing the whole ring buffer. */
995                 down_write(&all_cpu_access_lock);
996         } else {
997                 /* gain it for accessing a cpu ring buffer. */
998
999                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
1000                 down_read(&all_cpu_access_lock);
1001
1002                 /* Secondly block other access to this @cpu ring buffer. */
1003                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
1004         }
1005 }
1006
1007 static inline void trace_access_unlock(int cpu)
1008 {
1009         if (cpu == RING_BUFFER_ALL_CPUS) {
1010                 up_write(&all_cpu_access_lock);
1011         } else {
1012                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
1013                 up_read(&all_cpu_access_lock);
1014         }
1015 }
1016
1017 static inline void trace_access_lock_init(void)
1018 {
1019         int cpu;
1020
1021         for_each_possible_cpu(cpu)
1022                 mutex_init(&per_cpu(cpu_access_lock, cpu));
1023 }
1024
1025 #else
1026
1027 static DEFINE_MUTEX(access_lock);
1028
1029 static inline void trace_access_lock(int cpu)
1030 {
1031         (void)cpu;
1032         mutex_lock(&access_lock);
1033 }
1034
1035 static inline void trace_access_unlock(int cpu)
1036 {
1037         (void)cpu;
1038         mutex_unlock(&access_lock);
1039 }
1040
1041 static inline void trace_access_lock_init(void)
1042 {
1043 }
1044
1045 #endif
1046
1047 #ifdef CONFIG_STACKTRACE
1048 static void __ftrace_trace_stack(struct trace_array *tr,
1049                                  struct trace_buffer *buffer,
1050                                  unsigned int trace_ctx,
1051                                  int skip, struct pt_regs *regs);
1052 static inline void ftrace_trace_stack(struct trace_array *tr,
1053                                       struct trace_buffer *buffer,
1054                                       unsigned int trace_ctx,
1055                                       int skip, struct pt_regs *regs);
1056
1057 #else
1058 static inline void __ftrace_trace_stack(struct trace_array *tr,
1059                                         struct trace_buffer *buffer,
1060                                         unsigned int trace_ctx,
1061                                         int skip, struct pt_regs *regs)
1062 {
1063 }
1064 static inline void ftrace_trace_stack(struct trace_array *tr,
1065                                       struct trace_buffer *buffer,
1066                                       unsigned long trace_ctx,
1067                                       int skip, struct pt_regs *regs)
1068 {
1069 }
1070
1071 #endif
1072
1073 static __always_inline void
1074 trace_event_setup(struct ring_buffer_event *event,
1075                   int type, unsigned int trace_ctx)
1076 {
1077         struct trace_entry *ent = ring_buffer_event_data(event);
1078
1079         tracing_generic_entry_update(ent, type, trace_ctx);
1080 }
1081
1082 static __always_inline struct ring_buffer_event *
1083 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1084                           int type,
1085                           unsigned long len,
1086                           unsigned int trace_ctx)
1087 {
1088         struct ring_buffer_event *event;
1089
1090         event = ring_buffer_lock_reserve(buffer, len);
1091         if (event != NULL)
1092                 trace_event_setup(event, type, trace_ctx);
1093
1094         return event;
1095 }
1096
1097 void tracer_tracing_on(struct trace_array *tr)
1098 {
1099         if (tr->array_buffer.buffer)
1100                 ring_buffer_record_on(tr->array_buffer.buffer);
1101         /*
1102          * This flag is looked at when buffers haven't been allocated
1103          * yet, or by some tracers (like irqsoff), that just want to
1104          * know if the ring buffer has been disabled, but it can handle
1105          * races of where it gets disabled but we still do a record.
1106          * As the check is in the fast path of the tracers, it is more
1107          * important to be fast than accurate.
1108          */
1109         tr->buffer_disabled = 0;
1110         /* Make the flag seen by readers */
1111         smp_wmb();
1112 }
1113
1114 /**
1115  * tracing_on - enable tracing buffers
1116  *
1117  * This function enables tracing buffers that may have been
1118  * disabled with tracing_off.
1119  */
1120 void tracing_on(void)
1121 {
1122         tracer_tracing_on(&global_trace);
1123 }
1124 EXPORT_SYMBOL_GPL(tracing_on);
1125
1126
1127 static __always_inline void
1128 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1129 {
1130         __this_cpu_write(trace_taskinfo_save, true);
1131
1132         /* If this is the temp buffer, we need to commit fully */
1133         if (this_cpu_read(trace_buffered_event) == event) {
1134                 /* Length is in event->array[0] */
1135                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1136                 /* Release the temp buffer */
1137                 this_cpu_dec(trace_buffered_event_cnt);
1138                 /* ring_buffer_unlock_commit() enables preemption */
1139                 preempt_enable_notrace();
1140         } else
1141                 ring_buffer_unlock_commit(buffer);
1142 }
1143
1144 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1145                        const char *str, int size)
1146 {
1147         struct ring_buffer_event *event;
1148         struct trace_buffer *buffer;
1149         struct print_entry *entry;
1150         unsigned int trace_ctx;
1151         int alloc;
1152
1153         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1154                 return 0;
1155
1156         if (unlikely(tracing_selftest_running && tr == &global_trace))
1157                 return 0;
1158
1159         if (unlikely(tracing_disabled))
1160                 return 0;
1161
1162         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1163
1164         trace_ctx = tracing_gen_ctx();
1165         buffer = tr->array_buffer.buffer;
1166         ring_buffer_nest_start(buffer);
1167         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1168                                             trace_ctx);
1169         if (!event) {
1170                 size = 0;
1171                 goto out;
1172         }
1173
1174         entry = ring_buffer_event_data(event);
1175         entry->ip = ip;
1176
1177         memcpy(&entry->buf, str, size);
1178
1179         /* Add a newline if necessary */
1180         if (entry->buf[size - 1] != '\n') {
1181                 entry->buf[size] = '\n';
1182                 entry->buf[size + 1] = '\0';
1183         } else
1184                 entry->buf[size] = '\0';
1185
1186         __buffer_unlock_commit(buffer, event);
1187         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1188  out:
1189         ring_buffer_nest_end(buffer);
1190         return size;
1191 }
1192 EXPORT_SYMBOL_GPL(__trace_array_puts);
1193
1194 /**
1195  * __trace_puts - write a constant string into the trace buffer.
1196  * @ip:    The address of the caller
1197  * @str:   The constant string to write
1198  * @size:  The size of the string.
1199  */
1200 int __trace_puts(unsigned long ip, const char *str, int size)
1201 {
1202         return __trace_array_puts(printk_trace, ip, str, size);
1203 }
1204 EXPORT_SYMBOL_GPL(__trace_puts);
1205
1206 /**
1207  * __trace_bputs - write the pointer to a constant string into trace buffer
1208  * @ip:    The address of the caller
1209  * @str:   The constant string to write to the buffer to
1210  */
1211 int __trace_bputs(unsigned long ip, const char *str)
1212 {
1213         struct trace_array *tr = READ_ONCE(printk_trace);
1214         struct ring_buffer_event *event;
1215         struct trace_buffer *buffer;
1216         struct bputs_entry *entry;
1217         unsigned int trace_ctx;
1218         int size = sizeof(struct bputs_entry);
1219         int ret = 0;
1220
1221         if (!printk_binsafe(tr))
1222                 return __trace_puts(ip, str, strlen(str));
1223
1224         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1225                 return 0;
1226
1227         if (unlikely(tracing_selftest_running || tracing_disabled))
1228                 return 0;
1229
1230         trace_ctx = tracing_gen_ctx();
1231         buffer = tr->array_buffer.buffer;
1232
1233         ring_buffer_nest_start(buffer);
1234         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1235                                             trace_ctx);
1236         if (!event)
1237                 goto out;
1238
1239         entry = ring_buffer_event_data(event);
1240         entry->ip                       = ip;
1241         entry->str                      = str;
1242
1243         __buffer_unlock_commit(buffer, event);
1244         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1245
1246         ret = 1;
1247  out:
1248         ring_buffer_nest_end(buffer);
1249         return ret;
1250 }
1251 EXPORT_SYMBOL_GPL(__trace_bputs);
1252
1253 #ifdef CONFIG_TRACER_SNAPSHOT
1254 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1255                                            void *cond_data)
1256 {
1257         struct tracer *tracer = tr->current_trace;
1258         unsigned long flags;
1259
1260         if (in_nmi()) {
1261                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1262                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1263                 return;
1264         }
1265
1266         if (!tr->allocated_snapshot) {
1267                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1268                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1269                 tracer_tracing_off(tr);
1270                 return;
1271         }
1272
1273         /* Note, snapshot can not be used when the tracer uses it */
1274         if (tracer->use_max_tr) {
1275                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1276                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1277                 return;
1278         }
1279
1280         if (tr->mapped) {
1281                 trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1282                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1283                 return;
1284         }
1285
1286         local_irq_save(flags);
1287         update_max_tr(tr, current, smp_processor_id(), cond_data);
1288         local_irq_restore(flags);
1289 }
1290
1291 void tracing_snapshot_instance(struct trace_array *tr)
1292 {
1293         tracing_snapshot_instance_cond(tr, NULL);
1294 }
1295
1296 /**
1297  * tracing_snapshot - take a snapshot of the current buffer.
1298  *
1299  * This causes a swap between the snapshot buffer and the current live
1300  * tracing buffer. You can use this to take snapshots of the live
1301  * trace when some condition is triggered, but continue to trace.
1302  *
1303  * Note, make sure to allocate the snapshot with either
1304  * a tracing_snapshot_alloc(), or by doing it manually
1305  * with: echo 1 > /sys/kernel/tracing/snapshot
1306  *
1307  * If the snapshot buffer is not allocated, it will stop tracing.
1308  * Basically making a permanent snapshot.
1309  */
1310 void tracing_snapshot(void)
1311 {
1312         struct trace_array *tr = &global_trace;
1313
1314         tracing_snapshot_instance(tr);
1315 }
1316 EXPORT_SYMBOL_GPL(tracing_snapshot);
1317
1318 /**
1319  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1320  * @tr:         The tracing instance to snapshot
1321  * @cond_data:  The data to be tested conditionally, and possibly saved
1322  *
1323  * This is the same as tracing_snapshot() except that the snapshot is
1324  * conditional - the snapshot will only happen if the
1325  * cond_snapshot.update() implementation receiving the cond_data
1326  * returns true, which means that the trace array's cond_snapshot
1327  * update() operation used the cond_data to determine whether the
1328  * snapshot should be taken, and if it was, presumably saved it along
1329  * with the snapshot.
1330  */
1331 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1332 {
1333         tracing_snapshot_instance_cond(tr, cond_data);
1334 }
1335 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1336
1337 /**
1338  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1339  * @tr:         The tracing instance
1340  *
1341  * When the user enables a conditional snapshot using
1342  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1343  * with the snapshot.  This accessor is used to retrieve it.
1344  *
1345  * Should not be called from cond_snapshot.update(), since it takes
1346  * the tr->max_lock lock, which the code calling
1347  * cond_snapshot.update() has already done.
1348  *
1349  * Returns the cond_data associated with the trace array's snapshot.
1350  */
1351 void *tracing_cond_snapshot_data(struct trace_array *tr)
1352 {
1353         void *cond_data = NULL;
1354
1355         local_irq_disable();
1356         arch_spin_lock(&tr->max_lock);
1357
1358         if (tr->cond_snapshot)
1359                 cond_data = tr->cond_snapshot->cond_data;
1360
1361         arch_spin_unlock(&tr->max_lock);
1362         local_irq_enable();
1363
1364         return cond_data;
1365 }
1366 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1367
1368 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1369                                         struct array_buffer *size_buf, int cpu_id);
1370 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1371
1372 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1373 {
1374         int order;
1375         int ret;
1376
1377         if (!tr->allocated_snapshot) {
1378
1379                 /* Make the snapshot buffer have the same order as main buffer */
1380                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1381                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1382                 if (ret < 0)
1383                         return ret;
1384
1385                 /* allocate spare buffer */
1386                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1387                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1388                 if (ret < 0)
1389                         return ret;
1390
1391                 tr->allocated_snapshot = true;
1392         }
1393
1394         return 0;
1395 }
1396
1397 static void free_snapshot(struct trace_array *tr)
1398 {
1399         /*
1400          * We don't free the ring buffer. instead, resize it because
1401          * The max_tr ring buffer has some state (e.g. ring->clock) and
1402          * we want preserve it.
1403          */
1404         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1405         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1406         set_buffer_entries(&tr->max_buffer, 1);
1407         tracing_reset_online_cpus(&tr->max_buffer);
1408         tr->allocated_snapshot = false;
1409 }
1410
1411 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1412 {
1413         int ret;
1414
1415         lockdep_assert_held(&trace_types_lock);
1416
1417         spin_lock(&tr->snapshot_trigger_lock);
1418         if (tr->snapshot == UINT_MAX || tr->mapped) {
1419                 spin_unlock(&tr->snapshot_trigger_lock);
1420                 return -EBUSY;
1421         }
1422
1423         tr->snapshot++;
1424         spin_unlock(&tr->snapshot_trigger_lock);
1425
1426         ret = tracing_alloc_snapshot_instance(tr);
1427         if (ret) {
1428                 spin_lock(&tr->snapshot_trigger_lock);
1429                 tr->snapshot--;
1430                 spin_unlock(&tr->snapshot_trigger_lock);
1431         }
1432
1433         return ret;
1434 }
1435
1436 int tracing_arm_snapshot(struct trace_array *tr)
1437 {
1438         int ret;
1439
1440         mutex_lock(&trace_types_lock);
1441         ret = tracing_arm_snapshot_locked(tr);
1442         mutex_unlock(&trace_types_lock);
1443
1444         return ret;
1445 }
1446
1447 void tracing_disarm_snapshot(struct trace_array *tr)
1448 {
1449         spin_lock(&tr->snapshot_trigger_lock);
1450         if (!WARN_ON(!tr->snapshot))
1451                 tr->snapshot--;
1452         spin_unlock(&tr->snapshot_trigger_lock);
1453 }
1454
1455 /**
1456  * tracing_alloc_snapshot - allocate snapshot buffer.
1457  *
1458  * This only allocates the snapshot buffer if it isn't already
1459  * allocated - it doesn't also take a snapshot.
1460  *
1461  * This is meant to be used in cases where the snapshot buffer needs
1462  * to be set up for events that can't sleep but need to be able to
1463  * trigger a snapshot.
1464  */
1465 int tracing_alloc_snapshot(void)
1466 {
1467         struct trace_array *tr = &global_trace;
1468         int ret;
1469
1470         ret = tracing_alloc_snapshot_instance(tr);
1471         WARN_ON(ret < 0);
1472
1473         return ret;
1474 }
1475 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1476
1477 /**
1478  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1479  *
1480  * This is similar to tracing_snapshot(), but it will allocate the
1481  * snapshot buffer if it isn't already allocated. Use this only
1482  * where it is safe to sleep, as the allocation may sleep.
1483  *
1484  * This causes a swap between the snapshot buffer and the current live
1485  * tracing buffer. You can use this to take snapshots of the live
1486  * trace when some condition is triggered, but continue to trace.
1487  */
1488 void tracing_snapshot_alloc(void)
1489 {
1490         int ret;
1491
1492         ret = tracing_alloc_snapshot();
1493         if (ret < 0)
1494                 return;
1495
1496         tracing_snapshot();
1497 }
1498 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1499
1500 /**
1501  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1502  * @tr:         The tracing instance
1503  * @cond_data:  User data to associate with the snapshot
1504  * @update:     Implementation of the cond_snapshot update function
1505  *
1506  * Check whether the conditional snapshot for the given instance has
1507  * already been enabled, or if the current tracer is already using a
1508  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1509  * save the cond_data and update function inside.
1510  *
1511  * Returns 0 if successful, error otherwise.
1512  */
1513 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1514                                  cond_update_fn_t update)
1515 {
1516         struct cond_snapshot *cond_snapshot __free(kfree) =
1517                 kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1518         int ret;
1519
1520         if (!cond_snapshot)
1521                 return -ENOMEM;
1522
1523         cond_snapshot->cond_data = cond_data;
1524         cond_snapshot->update = update;
1525
1526         guard(mutex)(&trace_types_lock);
1527
1528         if (tr->current_trace->use_max_tr)
1529                 return -EBUSY;
1530
1531         /*
1532          * The cond_snapshot can only change to NULL without the
1533          * trace_types_lock. We don't care if we race with it going
1534          * to NULL, but we want to make sure that it's not set to
1535          * something other than NULL when we get here, which we can
1536          * do safely with only holding the trace_types_lock and not
1537          * having to take the max_lock.
1538          */
1539         if (tr->cond_snapshot)
1540                 return -EBUSY;
1541
1542         ret = tracing_arm_snapshot_locked(tr);
1543         if (ret)
1544                 return ret;
1545
1546         local_irq_disable();
1547         arch_spin_lock(&tr->max_lock);
1548         tr->cond_snapshot = no_free_ptr(cond_snapshot);
1549         arch_spin_unlock(&tr->max_lock);
1550         local_irq_enable();
1551
1552         return 0;
1553 }
1554 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1555
1556 /**
1557  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1558  * @tr:         The tracing instance
1559  *
1560  * Check whether the conditional snapshot for the given instance is
1561  * enabled; if so, free the cond_snapshot associated with it,
1562  * otherwise return -EINVAL.
1563  *
1564  * Returns 0 if successful, error otherwise.
1565  */
1566 int tracing_snapshot_cond_disable(struct trace_array *tr)
1567 {
1568         int ret = 0;
1569
1570         local_irq_disable();
1571         arch_spin_lock(&tr->max_lock);
1572
1573         if (!tr->cond_snapshot)
1574                 ret = -EINVAL;
1575         else {
1576                 kfree(tr->cond_snapshot);
1577                 tr->cond_snapshot = NULL;
1578         }
1579
1580         arch_spin_unlock(&tr->max_lock);
1581         local_irq_enable();
1582
1583         tracing_disarm_snapshot(tr);
1584
1585         return ret;
1586 }
1587 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1588 #else
1589 void tracing_snapshot(void)
1590 {
1591         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1592 }
1593 EXPORT_SYMBOL_GPL(tracing_snapshot);
1594 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1595 {
1596         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1597 }
1598 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1599 int tracing_alloc_snapshot(void)
1600 {
1601         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1602         return -ENODEV;
1603 }
1604 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1605 void tracing_snapshot_alloc(void)
1606 {
1607         /* Give warning */
1608         tracing_snapshot();
1609 }
1610 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1611 void *tracing_cond_snapshot_data(struct trace_array *tr)
1612 {
1613         return NULL;
1614 }
1615 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1616 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1617 {
1618         return -ENODEV;
1619 }
1620 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1621 int tracing_snapshot_cond_disable(struct trace_array *tr)
1622 {
1623         return false;
1624 }
1625 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1626 #define free_snapshot(tr)       do { } while (0)
1627 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1628 #endif /* CONFIG_TRACER_SNAPSHOT */
1629
1630 void tracer_tracing_off(struct trace_array *tr)
1631 {
1632         if (tr->array_buffer.buffer)
1633                 ring_buffer_record_off(tr->array_buffer.buffer);
1634         /*
1635          * This flag is looked at when buffers haven't been allocated
1636          * yet, or by some tracers (like irqsoff), that just want to
1637          * know if the ring buffer has been disabled, but it can handle
1638          * races of where it gets disabled but we still do a record.
1639          * As the check is in the fast path of the tracers, it is more
1640          * important to be fast than accurate.
1641          */
1642         tr->buffer_disabled = 1;
1643         /* Make the flag seen by readers */
1644         smp_wmb();
1645 }
1646
1647 /**
1648  * tracer_tracing_disable() - temporary disable the buffer from write
1649  * @tr: The trace array to disable its buffer for
1650  *
1651  * Expects trace_tracing_enable() to re-enable tracing.
1652  * The difference between this and tracer_tracing_off() is that this
1653  * is a counter and can nest, whereas, tracer_tracing_off() can
1654  * be called multiple times and a single trace_tracing_on() will
1655  * enable it.
1656  */
1657 void tracer_tracing_disable(struct trace_array *tr)
1658 {
1659         if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1660                 return;
1661
1662         ring_buffer_record_disable(tr->array_buffer.buffer);
1663 }
1664
1665 /**
1666  * tracer_tracing_enable() - counter part of tracer_tracing_disable()
1667  * @tr: The trace array that had tracer_tracincg_disable() called on it
1668  *
1669  * This is called after tracer_tracing_disable() has been called on @tr,
1670  * when it's safe to re-enable tracing.
1671  */
1672 void tracer_tracing_enable(struct trace_array *tr)
1673 {
1674         if (WARN_ON_ONCE(!tr->array_buffer.buffer))
1675                 return;
1676
1677         ring_buffer_record_enable(tr->array_buffer.buffer);
1678 }
1679
1680 /**
1681  * tracing_off - turn off tracing buffers
1682  *
1683  * This function stops the tracing buffers from recording data.
1684  * It does not disable any overhead the tracers themselves may
1685  * be causing. This function simply causes all recording to
1686  * the ring buffers to fail.
1687  */
1688 void tracing_off(void)
1689 {
1690         tracer_tracing_off(&global_trace);
1691 }
1692 EXPORT_SYMBOL_GPL(tracing_off);
1693
1694 void disable_trace_on_warning(void)
1695 {
1696         if (__disable_trace_on_warning) {
1697                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1698                         "Disabling tracing due to warning\n");
1699                 tracing_off();
1700         }
1701 }
1702
1703 /**
1704  * tracer_tracing_is_on - show real state of ring buffer enabled
1705  * @tr : the trace array to know if ring buffer is enabled
1706  *
1707  * Shows real state of the ring buffer if it is enabled or not.
1708  */
1709 bool tracer_tracing_is_on(struct trace_array *tr)
1710 {
1711         if (tr->array_buffer.buffer)
1712                 return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1713         return !tr->buffer_disabled;
1714 }
1715
1716 /**
1717  * tracing_is_on - show state of ring buffers enabled
1718  */
1719 int tracing_is_on(void)
1720 {
1721         return tracer_tracing_is_on(&global_trace);
1722 }
1723 EXPORT_SYMBOL_GPL(tracing_is_on);
1724
1725 static int __init set_buf_size(char *str)
1726 {
1727         unsigned long buf_size;
1728
1729         if (!str)
1730                 return 0;
1731         buf_size = memparse(str, &str);
1732         /*
1733          * nr_entries can not be zero and the startup
1734          * tests require some buffer space. Therefore
1735          * ensure we have at least 4096 bytes of buffer.
1736          */
1737         trace_buf_size = max(4096UL, buf_size);
1738         return 1;
1739 }
1740 __setup("trace_buf_size=", set_buf_size);
1741
1742 static int __init set_tracing_thresh(char *str)
1743 {
1744         unsigned long threshold;
1745         int ret;
1746
1747         if (!str)
1748                 return 0;
1749         ret = kstrtoul(str, 0, &threshold);
1750         if (ret < 0)
1751                 return 0;
1752         tracing_thresh = threshold * 1000;
1753         return 1;
1754 }
1755 __setup("tracing_thresh=", set_tracing_thresh);
1756
1757 unsigned long nsecs_to_usecs(unsigned long nsecs)
1758 {
1759         return nsecs / 1000;
1760 }
1761
1762 /*
1763  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1764  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1765  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1766  * of strings in the order that the evals (enum) were defined.
1767  */
1768 #undef C
1769 #define C(a, b) b
1770
1771 /* These must match the bit positions in trace_iterator_flags */
1772 static const char *trace_options[] = {
1773         TRACE_FLAGS
1774         NULL
1775 };
1776
1777 static struct {
1778         u64 (*func)(void);
1779         const char *name;
1780         int in_ns;              /* is this clock in nanoseconds? */
1781 } trace_clocks[] = {
1782         { trace_clock_local,            "local",        1 },
1783         { trace_clock_global,           "global",       1 },
1784         { trace_clock_counter,          "counter",      0 },
1785         { trace_clock_jiffies,          "uptime",       0 },
1786         { trace_clock,                  "perf",         1 },
1787         { ktime_get_mono_fast_ns,       "mono",         1 },
1788         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1789         { ktime_get_boot_fast_ns,       "boot",         1 },
1790         { ktime_get_tai_fast_ns,        "tai",          1 },
1791         ARCH_TRACE_CLOCKS
1792 };
1793
1794 bool trace_clock_in_ns(struct trace_array *tr)
1795 {
1796         if (trace_clocks[tr->clock_id].in_ns)
1797                 return true;
1798
1799         return false;
1800 }
1801
1802 /*
1803  * trace_parser_get_init - gets the buffer for trace parser
1804  */
1805 int trace_parser_get_init(struct trace_parser *parser, int size)
1806 {
1807         memset(parser, 0, sizeof(*parser));
1808
1809         parser->buffer = kmalloc(size, GFP_KERNEL);
1810         if (!parser->buffer)
1811                 return 1;
1812
1813         parser->size = size;
1814         return 0;
1815 }
1816
1817 /*
1818  * trace_parser_put - frees the buffer for trace parser
1819  */
1820 void trace_parser_put(struct trace_parser *parser)
1821 {
1822         kfree(parser->buffer);
1823         parser->buffer = NULL;
1824 }
1825
1826 /*
1827  * trace_get_user - reads the user input string separated by  space
1828  * (matched by isspace(ch))
1829  *
1830  * For each string found the 'struct trace_parser' is updated,
1831  * and the function returns.
1832  *
1833  * Returns number of bytes read.
1834  *
1835  * See kernel/trace/trace.h for 'struct trace_parser' details.
1836  */
1837 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1838         size_t cnt, loff_t *ppos)
1839 {
1840         char ch;
1841         size_t read = 0;
1842         ssize_t ret;
1843
1844         if (!*ppos)
1845                 trace_parser_clear(parser);
1846
1847         ret = get_user(ch, ubuf++);
1848         if (ret)
1849                 goto out;
1850
1851         read++;
1852         cnt--;
1853
1854         /*
1855          * The parser is not finished with the last write,
1856          * continue reading the user input without skipping spaces.
1857          */
1858         if (!parser->cont) {
1859                 /* skip white space */
1860                 while (cnt && isspace(ch)) {
1861                         ret = get_user(ch, ubuf++);
1862                         if (ret)
1863                                 goto out;
1864                         read++;
1865                         cnt--;
1866                 }
1867
1868                 parser->idx = 0;
1869
1870                 /* only spaces were written */
1871                 if (isspace(ch) || !ch) {
1872                         *ppos += read;
1873                         ret = read;
1874                         goto out;
1875                 }
1876         }
1877
1878         /* read the non-space input */
1879         while (cnt && !isspace(ch) && ch) {
1880                 if (parser->idx < parser->size - 1)
1881                         parser->buffer[parser->idx++] = ch;
1882                 else {
1883                         ret = -EINVAL;
1884                         goto out;
1885                 }
1886                 ret = get_user(ch, ubuf++);
1887                 if (ret)
1888                         goto out;
1889                 read++;
1890                 cnt--;
1891         }
1892
1893         /* We either got finished input or we have to wait for another call. */
1894         if (isspace(ch) || !ch) {
1895                 parser->buffer[parser->idx] = 0;
1896                 parser->cont = false;
1897         } else if (parser->idx < parser->size - 1) {
1898                 parser->cont = true;
1899                 parser->buffer[parser->idx++] = ch;
1900                 /* Make sure the parsed string always terminates with '\0'. */
1901                 parser->buffer[parser->idx] = 0;
1902         } else {
1903                 ret = -EINVAL;
1904                 goto out;
1905         }
1906
1907         *ppos += read;
1908         ret = read;
1909
1910 out:
1911         return ret;
1912 }
1913
1914 /* TODO add a seq_buf_to_buffer() */
1915 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1916 {
1917         int len;
1918
1919         if (trace_seq_used(s) <= s->readpos)
1920                 return -EBUSY;
1921
1922         len = trace_seq_used(s) - s->readpos;
1923         if (cnt > len)
1924                 cnt = len;
1925         memcpy(buf, s->buffer + s->readpos, cnt);
1926
1927         s->readpos += cnt;
1928         return cnt;
1929 }
1930
1931 unsigned long __read_mostly     tracing_thresh;
1932
1933 #ifdef CONFIG_TRACER_MAX_TRACE
1934 static const struct file_operations tracing_max_lat_fops;
1935
1936 #ifdef LATENCY_FS_NOTIFY
1937
1938 static struct workqueue_struct *fsnotify_wq;
1939
1940 static void latency_fsnotify_workfn(struct work_struct *work)
1941 {
1942         struct trace_array *tr = container_of(work, struct trace_array,
1943                                               fsnotify_work);
1944         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1945 }
1946
1947 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1948 {
1949         struct trace_array *tr = container_of(iwork, struct trace_array,
1950                                               fsnotify_irqwork);
1951         queue_work(fsnotify_wq, &tr->fsnotify_work);
1952 }
1953
1954 static void trace_create_maxlat_file(struct trace_array *tr,
1955                                      struct dentry *d_tracer)
1956 {
1957         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1958         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1959         tr->d_max_latency = trace_create_file("tracing_max_latency",
1960                                               TRACE_MODE_WRITE,
1961                                               d_tracer, tr,
1962                                               &tracing_max_lat_fops);
1963 }
1964
1965 __init static int latency_fsnotify_init(void)
1966 {
1967         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1968                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1969         if (!fsnotify_wq) {
1970                 pr_err("Unable to allocate tr_max_lat_wq\n");
1971                 return -ENOMEM;
1972         }
1973         return 0;
1974 }
1975
1976 late_initcall_sync(latency_fsnotify_init);
1977
1978 void latency_fsnotify(struct trace_array *tr)
1979 {
1980         if (!fsnotify_wq)
1981                 return;
1982         /*
1983          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1984          * possible that we are called from __schedule() or do_idle(), which
1985          * could cause a deadlock.
1986          */
1987         irq_work_queue(&tr->fsnotify_irqwork);
1988 }
1989
1990 #else /* !LATENCY_FS_NOTIFY */
1991
1992 #define trace_create_maxlat_file(tr, d_tracer)                          \
1993         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1994                           d_tracer, tr, &tracing_max_lat_fops)
1995
1996 #endif
1997
1998 /*
1999  * Copy the new maximum trace into the separate maximum-trace
2000  * structure. (this way the maximum trace is permanently saved,
2001  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
2002  */
2003 static void
2004 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
2005 {
2006         struct array_buffer *trace_buf = &tr->array_buffer;
2007         struct array_buffer *max_buf = &tr->max_buffer;
2008         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
2009         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
2010
2011         max_buf->cpu = cpu;
2012         max_buf->time_start = data->preempt_timestamp;
2013
2014         max_data->saved_latency = tr->max_latency;
2015         max_data->critical_start = data->critical_start;
2016         max_data->critical_end = data->critical_end;
2017
2018         strscpy(max_data->comm, tsk->comm);
2019         max_data->pid = tsk->pid;
2020         /*
2021          * If tsk == current, then use current_uid(), as that does not use
2022          * RCU. The irq tracer can be called out of RCU scope.
2023          */
2024         if (tsk == current)
2025                 max_data->uid = current_uid();
2026         else
2027                 max_data->uid = task_uid(tsk);
2028
2029         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
2030         max_data->policy = tsk->policy;
2031         max_data->rt_priority = tsk->rt_priority;
2032
2033         /* record this tasks comm */
2034         tracing_record_cmdline(tsk);
2035         latency_fsnotify(tr);
2036 }
2037
2038 /**
2039  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
2040  * @tr: tracer
2041  * @tsk: the task with the latency
2042  * @cpu: The cpu that initiated the trace.
2043  * @cond_data: User data associated with a conditional snapshot
2044  *
2045  * Flip the buffers between the @tr and the max_tr and record information
2046  * about which task was the cause of this latency.
2047  */
2048 void
2049 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
2050               void *cond_data)
2051 {
2052         if (tr->stop_count)
2053                 return;
2054
2055         WARN_ON_ONCE(!irqs_disabled());
2056
2057         if (!tr->allocated_snapshot) {
2058                 /* Only the nop tracer should hit this when disabling */
2059                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2060                 return;
2061         }
2062
2063         arch_spin_lock(&tr->max_lock);
2064
2065         /* Inherit the recordable setting from array_buffer */
2066         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
2067                 ring_buffer_record_on(tr->max_buffer.buffer);
2068         else
2069                 ring_buffer_record_off(tr->max_buffer.buffer);
2070
2071 #ifdef CONFIG_TRACER_SNAPSHOT
2072         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
2073                 arch_spin_unlock(&tr->max_lock);
2074                 return;
2075         }
2076 #endif
2077         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
2078
2079         __update_max_tr(tr, tsk, cpu);
2080
2081         arch_spin_unlock(&tr->max_lock);
2082
2083         /* Any waiters on the old snapshot buffer need to wake up */
2084         ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
2085 }
2086
2087 /**
2088  * update_max_tr_single - only copy one trace over, and reset the rest
2089  * @tr: tracer
2090  * @tsk: task with the latency
2091  * @cpu: the cpu of the buffer to copy.
2092  *
2093  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2094  */
2095 void
2096 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2097 {
2098         int ret;
2099
2100         if (tr->stop_count)
2101                 return;
2102
2103         WARN_ON_ONCE(!irqs_disabled());
2104         if (!tr->allocated_snapshot) {
2105                 /* Only the nop tracer should hit this when disabling */
2106                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
2107                 return;
2108         }
2109
2110         arch_spin_lock(&tr->max_lock);
2111
2112         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2113
2114         if (ret == -EBUSY) {
2115                 /*
2116                  * We failed to swap the buffer due to a commit taking
2117                  * place on this CPU. We fail to record, but we reset
2118                  * the max trace buffer (no one writes directly to it)
2119                  * and flag that it failed.
2120                  * Another reason is resize is in progress.
2121                  */
2122                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2123                         "Failed to swap buffers due to commit or resize in progress\n");
2124         }
2125
2126         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2127
2128         __update_max_tr(tr, tsk, cpu);
2129         arch_spin_unlock(&tr->max_lock);
2130 }
2131
2132 #endif /* CONFIG_TRACER_MAX_TRACE */
2133
2134 struct pipe_wait {
2135         struct trace_iterator           *iter;
2136         int                             wait_index;
2137 };
2138
2139 static bool wait_pipe_cond(void *data)
2140 {
2141         struct pipe_wait *pwait = data;
2142         struct trace_iterator *iter = pwait->iter;
2143
2144         if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2145                 return true;
2146
2147         return iter->closed;
2148 }
2149
2150 static int wait_on_pipe(struct trace_iterator *iter, int full)
2151 {
2152         struct pipe_wait pwait;
2153         int ret;
2154
2155         /* Iterators are static, they should be filled or empty */
2156         if (trace_buffer_iter(iter, iter->cpu_file))
2157                 return 0;
2158
2159         pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2160         pwait.iter = iter;
2161
2162         ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2163                                wait_pipe_cond, &pwait);
2164
2165 #ifdef CONFIG_TRACER_MAX_TRACE
2166         /*
2167          * Make sure this is still the snapshot buffer, as if a snapshot were
2168          * to happen, this would now be the main buffer.
2169          */
2170         if (iter->snapshot)
2171                 iter->array_buffer = &iter->tr->max_buffer;
2172 #endif
2173         return ret;
2174 }
2175
2176 #ifdef CONFIG_FTRACE_STARTUP_TEST
2177 static bool selftests_can_run;
2178
2179 struct trace_selftests {
2180         struct list_head                list;
2181         struct tracer                   *type;
2182 };
2183
2184 static LIST_HEAD(postponed_selftests);
2185
2186 static int save_selftest(struct tracer *type)
2187 {
2188         struct trace_selftests *selftest;
2189
2190         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2191         if (!selftest)
2192                 return -ENOMEM;
2193
2194         selftest->type = type;
2195         list_add(&selftest->list, &postponed_selftests);
2196         return 0;
2197 }
2198
2199 static int run_tracer_selftest(struct tracer *type)
2200 {
2201         struct trace_array *tr = &global_trace;
2202         struct tracer *saved_tracer = tr->current_trace;
2203         int ret;
2204
2205         if (!type->selftest || tracing_selftest_disabled)
2206                 return 0;
2207
2208         /*
2209          * If a tracer registers early in boot up (before scheduling is
2210          * initialized and such), then do not run its selftests yet.
2211          * Instead, run it a little later in the boot process.
2212          */
2213         if (!selftests_can_run)
2214                 return save_selftest(type);
2215
2216         if (!tracing_is_on()) {
2217                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2218                         type->name);
2219                 return 0;
2220         }
2221
2222         /*
2223          * Run a selftest on this tracer.
2224          * Here we reset the trace buffer, and set the current
2225          * tracer to be this tracer. The tracer can then run some
2226          * internal tracing to verify that everything is in order.
2227          * If we fail, we do not register this tracer.
2228          */
2229         tracing_reset_online_cpus(&tr->array_buffer);
2230
2231         tr->current_trace = type;
2232
2233 #ifdef CONFIG_TRACER_MAX_TRACE
2234         if (type->use_max_tr) {
2235                 /* If we expanded the buffers, make sure the max is expanded too */
2236                 if (tr->ring_buffer_expanded)
2237                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2238                                            RING_BUFFER_ALL_CPUS);
2239                 tr->allocated_snapshot = true;
2240         }
2241 #endif
2242
2243         /* the test is responsible for initializing and enabling */
2244         pr_info("Testing tracer %s: ", type->name);
2245         ret = type->selftest(type, tr);
2246         /* the test is responsible for resetting too */
2247         tr->current_trace = saved_tracer;
2248         if (ret) {
2249                 printk(KERN_CONT "FAILED!\n");
2250                 /* Add the warning after printing 'FAILED' */
2251                 WARN_ON(1);
2252                 return -1;
2253         }
2254         /* Only reset on passing, to avoid touching corrupted buffers */
2255         tracing_reset_online_cpus(&tr->array_buffer);
2256
2257 #ifdef CONFIG_TRACER_MAX_TRACE
2258         if (type->use_max_tr) {
2259                 tr->allocated_snapshot = false;
2260
2261                 /* Shrink the max buffer again */
2262                 if (tr->ring_buffer_expanded)
2263                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2264                                            RING_BUFFER_ALL_CPUS);
2265         }
2266 #endif
2267
2268         printk(KERN_CONT "PASSED\n");
2269         return 0;
2270 }
2271
2272 static int do_run_tracer_selftest(struct tracer *type)
2273 {
2274         int ret;
2275
2276         /*
2277          * Tests can take a long time, especially if they are run one after the
2278          * other, as does happen during bootup when all the tracers are
2279          * registered. This could cause the soft lockup watchdog to trigger.
2280          */
2281         cond_resched();
2282
2283         tracing_selftest_running = true;
2284         ret = run_tracer_selftest(type);
2285         tracing_selftest_running = false;
2286
2287         return ret;
2288 }
2289
2290 static __init int init_trace_selftests(void)
2291 {
2292         struct trace_selftests *p, *n;
2293         struct tracer *t, **last;
2294         int ret;
2295
2296         selftests_can_run = true;
2297
2298         guard(mutex)(&trace_types_lock);
2299
2300         if (list_empty(&postponed_selftests))
2301                 return 0;
2302
2303         pr_info("Running postponed tracer tests:\n");
2304
2305         tracing_selftest_running = true;
2306         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2307                 /* This loop can take minutes when sanitizers are enabled, so
2308                  * lets make sure we allow RCU processing.
2309                  */
2310                 cond_resched();
2311                 ret = run_tracer_selftest(p->type);
2312                 /* If the test fails, then warn and remove from available_tracers */
2313                 if (ret < 0) {
2314                         WARN(1, "tracer: %s failed selftest, disabling\n",
2315                              p->type->name);
2316                         last = &trace_types;
2317                         for (t = trace_types; t; t = t->next) {
2318                                 if (t == p->type) {
2319                                         *last = t->next;
2320                                         break;
2321                                 }
2322                                 last = &t->next;
2323                         }
2324                 }
2325                 list_del(&p->list);
2326                 kfree(p);
2327         }
2328         tracing_selftest_running = false;
2329
2330         return 0;
2331 }
2332 core_initcall(init_trace_selftests);
2333 #else
2334 static inline int do_run_tracer_selftest(struct tracer *type)
2335 {
2336         return 0;
2337 }
2338 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2339
2340 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2341
2342 static void __init apply_trace_boot_options(void);
2343
2344 /**
2345  * register_tracer - register a tracer with the ftrace system.
2346  * @type: the plugin for the tracer
2347  *
2348  * Register a new plugin tracer.
2349  */
2350 int __init register_tracer(struct tracer *type)
2351 {
2352         struct tracer *t;
2353         int ret = 0;
2354
2355         if (!type->name) {
2356                 pr_info("Tracer must have a name\n");
2357                 return -1;
2358         }
2359
2360         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2361                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2362                 return -1;
2363         }
2364
2365         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2366                 pr_warn("Can not register tracer %s due to lockdown\n",
2367                            type->name);
2368                 return -EPERM;
2369         }
2370
2371         mutex_lock(&trace_types_lock);
2372
2373         for (t = trace_types; t; t = t->next) {
2374                 if (strcmp(type->name, t->name) == 0) {
2375                         /* already found */
2376                         pr_info("Tracer %s already registered\n",
2377                                 type->name);
2378                         ret = -1;
2379                         goto out;
2380                 }
2381         }
2382
2383         if (!type->set_flag)
2384                 type->set_flag = &dummy_set_flag;
2385         if (!type->flags) {
2386                 /*allocate a dummy tracer_flags*/
2387                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2388                 if (!type->flags) {
2389                         ret = -ENOMEM;
2390                         goto out;
2391                 }
2392                 type->flags->val = 0;
2393                 type->flags->opts = dummy_tracer_opt;
2394         } else
2395                 if (!type->flags->opts)
2396                         type->flags->opts = dummy_tracer_opt;
2397
2398         /* store the tracer for __set_tracer_option */
2399         type->flags->trace = type;
2400
2401         ret = do_run_tracer_selftest(type);
2402         if (ret < 0)
2403                 goto out;
2404
2405         type->next = trace_types;
2406         trace_types = type;
2407         add_tracer_options(&global_trace, type);
2408
2409  out:
2410         mutex_unlock(&trace_types_lock);
2411
2412         if (ret || !default_bootup_tracer)
2413                 goto out_unlock;
2414
2415         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2416                 goto out_unlock;
2417
2418         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2419         /* Do we want this tracer to start on bootup? */
2420         tracing_set_tracer(&global_trace, type->name);
2421         default_bootup_tracer = NULL;
2422
2423         apply_trace_boot_options();
2424
2425         /* disable other selftests, since this will break it. */
2426         disable_tracing_selftest("running a tracer");
2427
2428  out_unlock:
2429         return ret;
2430 }
2431
2432 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2433 {
2434         struct trace_buffer *buffer = buf->buffer;
2435
2436         if (!buffer)
2437                 return;
2438
2439         ring_buffer_record_disable(buffer);
2440
2441         /* Make sure all commits have finished */
2442         synchronize_rcu();
2443         ring_buffer_reset_cpu(buffer, cpu);
2444
2445         ring_buffer_record_enable(buffer);
2446 }
2447
2448 void tracing_reset_online_cpus(struct array_buffer *buf)
2449 {
2450         struct trace_buffer *buffer = buf->buffer;
2451
2452         if (!buffer)
2453                 return;
2454
2455         ring_buffer_record_disable(buffer);
2456
2457         /* Make sure all commits have finished */
2458         synchronize_rcu();
2459
2460         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2461
2462         ring_buffer_reset_online_cpus(buffer);
2463
2464         ring_buffer_record_enable(buffer);
2465 }
2466
2467 static void tracing_reset_all_cpus(struct array_buffer *buf)
2468 {
2469         struct trace_buffer *buffer = buf->buffer;
2470
2471         if (!buffer)
2472                 return;
2473
2474         ring_buffer_record_disable(buffer);
2475
2476         /* Make sure all commits have finished */
2477         synchronize_rcu();
2478
2479         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2480
2481         ring_buffer_reset(buffer);
2482
2483         ring_buffer_record_enable(buffer);
2484 }
2485
2486 /* Must have trace_types_lock held */
2487 void tracing_reset_all_online_cpus_unlocked(void)
2488 {
2489         struct trace_array *tr;
2490
2491         lockdep_assert_held(&trace_types_lock);
2492
2493         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2494                 if (!tr->clear_trace)
2495                         continue;
2496                 tr->clear_trace = false;
2497                 tracing_reset_online_cpus(&tr->array_buffer);
2498 #ifdef CONFIG_TRACER_MAX_TRACE
2499                 tracing_reset_online_cpus(&tr->max_buffer);
2500 #endif
2501         }
2502 }
2503
2504 void tracing_reset_all_online_cpus(void)
2505 {
2506         mutex_lock(&trace_types_lock);
2507         tracing_reset_all_online_cpus_unlocked();
2508         mutex_unlock(&trace_types_lock);
2509 }
2510
2511 int is_tracing_stopped(void)
2512 {
2513         return global_trace.stop_count;
2514 }
2515
2516 static void tracing_start_tr(struct trace_array *tr)
2517 {
2518         struct trace_buffer *buffer;
2519         unsigned long flags;
2520
2521         if (tracing_disabled)
2522                 return;
2523
2524         raw_spin_lock_irqsave(&tr->start_lock, flags);
2525         if (--tr->stop_count) {
2526                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2527                         /* Someone screwed up their debugging */
2528                         tr->stop_count = 0;
2529                 }
2530                 goto out;
2531         }
2532
2533         /* Prevent the buffers from switching */
2534         arch_spin_lock(&tr->max_lock);
2535
2536         buffer = tr->array_buffer.buffer;
2537         if (buffer)
2538                 ring_buffer_record_enable(buffer);
2539
2540 #ifdef CONFIG_TRACER_MAX_TRACE
2541         buffer = tr->max_buffer.buffer;
2542         if (buffer)
2543                 ring_buffer_record_enable(buffer);
2544 #endif
2545
2546         arch_spin_unlock(&tr->max_lock);
2547
2548  out:
2549         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2550 }
2551
2552 /**
2553  * tracing_start - quick start of the tracer
2554  *
2555  * If tracing is enabled but was stopped by tracing_stop,
2556  * this will start the tracer back up.
2557  */
2558 void tracing_start(void)
2559
2560 {
2561         return tracing_start_tr(&global_trace);
2562 }
2563
2564 static void tracing_stop_tr(struct trace_array *tr)
2565 {
2566         struct trace_buffer *buffer;
2567         unsigned long flags;
2568
2569         raw_spin_lock_irqsave(&tr->start_lock, flags);
2570         if (tr->stop_count++)
2571                 goto out;
2572
2573         /* Prevent the buffers from switching */
2574         arch_spin_lock(&tr->max_lock);
2575
2576         buffer = tr->array_buffer.buffer;
2577         if (buffer)
2578                 ring_buffer_record_disable(buffer);
2579
2580 #ifdef CONFIG_TRACER_MAX_TRACE
2581         buffer = tr->max_buffer.buffer;
2582         if (buffer)
2583                 ring_buffer_record_disable(buffer);
2584 #endif
2585
2586         arch_spin_unlock(&tr->max_lock);
2587
2588  out:
2589         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2590 }
2591
2592 /**
2593  * tracing_stop - quick stop of the tracer
2594  *
2595  * Light weight way to stop tracing. Use in conjunction with
2596  * tracing_start.
2597  */
2598 void tracing_stop(void)
2599 {
2600         return tracing_stop_tr(&global_trace);
2601 }
2602
2603 /*
2604  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2605  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2606  * simplifies those functions and keeps them in sync.
2607  */
2608 enum print_line_t trace_handle_return(struct trace_seq *s)
2609 {
2610         return trace_seq_has_overflowed(s) ?
2611                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2612 }
2613 EXPORT_SYMBOL_GPL(trace_handle_return);
2614
2615 static unsigned short migration_disable_value(void)
2616 {
2617 #if defined(CONFIG_SMP)
2618         return current->migration_disabled;
2619 #else
2620         return 0;
2621 #endif
2622 }
2623
2624 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2625 {
2626         unsigned int trace_flags = irqs_status;
2627         unsigned int pc;
2628
2629         pc = preempt_count();
2630
2631         if (pc & NMI_MASK)
2632                 trace_flags |= TRACE_FLAG_NMI;
2633         if (pc & HARDIRQ_MASK)
2634                 trace_flags |= TRACE_FLAG_HARDIRQ;
2635         if (in_serving_softirq())
2636                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2637         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2638                 trace_flags |= TRACE_FLAG_BH_OFF;
2639
2640         if (tif_need_resched())
2641                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2642         if (test_preempt_need_resched())
2643                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2644         if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2645                 trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2646         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2647                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2648 }
2649
2650 struct ring_buffer_event *
2651 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2652                           int type,
2653                           unsigned long len,
2654                           unsigned int trace_ctx)
2655 {
2656         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2657 }
2658
2659 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2660 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2661 static int trace_buffered_event_ref;
2662
2663 /**
2664  * trace_buffered_event_enable - enable buffering events
2665  *
2666  * When events are being filtered, it is quicker to use a temporary
2667  * buffer to write the event data into if there's a likely chance
2668  * that it will not be committed. The discard of the ring buffer
2669  * is not as fast as committing, and is much slower than copying
2670  * a commit.
2671  *
2672  * When an event is to be filtered, allocate per cpu buffers to
2673  * write the event data into, and if the event is filtered and discarded
2674  * it is simply dropped, otherwise, the entire data is to be committed
2675  * in one shot.
2676  */
2677 void trace_buffered_event_enable(void)
2678 {
2679         struct ring_buffer_event *event;
2680         struct page *page;
2681         int cpu;
2682
2683         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2684
2685         if (trace_buffered_event_ref++)
2686                 return;
2687
2688         for_each_tracing_cpu(cpu) {
2689                 page = alloc_pages_node(cpu_to_node(cpu),
2690                                         GFP_KERNEL | __GFP_NORETRY, 0);
2691                 /* This is just an optimization and can handle failures */
2692                 if (!page) {
2693                         pr_err("Failed to allocate event buffer\n");
2694                         break;
2695                 }
2696
2697                 event = page_address(page);
2698                 memset(event, 0, sizeof(*event));
2699
2700                 per_cpu(trace_buffered_event, cpu) = event;
2701
2702                 preempt_disable();
2703                 if (cpu == smp_processor_id() &&
2704                     __this_cpu_read(trace_buffered_event) !=
2705                     per_cpu(trace_buffered_event, cpu))
2706                         WARN_ON_ONCE(1);
2707                 preempt_enable();
2708         }
2709 }
2710
2711 static void enable_trace_buffered_event(void *data)
2712 {
2713         /* Probably not needed, but do it anyway */
2714         smp_rmb();
2715         this_cpu_dec(trace_buffered_event_cnt);
2716 }
2717
2718 static void disable_trace_buffered_event(void *data)
2719 {
2720         this_cpu_inc(trace_buffered_event_cnt);
2721 }
2722
2723 /**
2724  * trace_buffered_event_disable - disable buffering events
2725  *
2726  * When a filter is removed, it is faster to not use the buffered
2727  * events, and to commit directly into the ring buffer. Free up
2728  * the temp buffers when there are no more users. This requires
2729  * special synchronization with current events.
2730  */
2731 void trace_buffered_event_disable(void)
2732 {
2733         int cpu;
2734
2735         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2736
2737         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2738                 return;
2739
2740         if (--trace_buffered_event_ref)
2741                 return;
2742
2743         /* For each CPU, set the buffer as used. */
2744         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2745                          NULL, true);
2746
2747         /* Wait for all current users to finish */
2748         synchronize_rcu();
2749
2750         for_each_tracing_cpu(cpu) {
2751                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2752                 per_cpu(trace_buffered_event, cpu) = NULL;
2753         }
2754
2755         /*
2756          * Wait for all CPUs that potentially started checking if they can use
2757          * their event buffer only after the previous synchronize_rcu() call and
2758          * they still read a valid pointer from trace_buffered_event. It must be
2759          * ensured they don't see cleared trace_buffered_event_cnt else they
2760          * could wrongly decide to use the pointed-to buffer which is now freed.
2761          */
2762         synchronize_rcu();
2763
2764         /* For each CPU, relinquish the buffer */
2765         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2766                          true);
2767 }
2768
2769 static struct trace_buffer *temp_buffer;
2770
2771 struct ring_buffer_event *
2772 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2773                           struct trace_event_file *trace_file,
2774                           int type, unsigned long len,
2775                           unsigned int trace_ctx)
2776 {
2777         struct ring_buffer_event *entry;
2778         struct trace_array *tr = trace_file->tr;
2779         int val;
2780
2781         *current_rb = tr->array_buffer.buffer;
2782
2783         if (!tr->no_filter_buffering_ref &&
2784             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2785                 preempt_disable_notrace();
2786                 /*
2787                  * Filtering is on, so try to use the per cpu buffer first.
2788                  * This buffer will simulate a ring_buffer_event,
2789                  * where the type_len is zero and the array[0] will
2790                  * hold the full length.
2791                  * (see include/linux/ring-buffer.h for details on
2792                  *  how the ring_buffer_event is structured).
2793                  *
2794                  * Using a temp buffer during filtering and copying it
2795                  * on a matched filter is quicker than writing directly
2796                  * into the ring buffer and then discarding it when
2797                  * it doesn't match. That is because the discard
2798                  * requires several atomic operations to get right.
2799                  * Copying on match and doing nothing on a failed match
2800                  * is still quicker than no copy on match, but having
2801                  * to discard out of the ring buffer on a failed match.
2802                  */
2803                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2804                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2805
2806                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2807
2808                         /*
2809                          * Preemption is disabled, but interrupts and NMIs
2810                          * can still come in now. If that happens after
2811                          * the above increment, then it will have to go
2812                          * back to the old method of allocating the event
2813                          * on the ring buffer, and if the filter fails, it
2814                          * will have to call ring_buffer_discard_commit()
2815                          * to remove it.
2816                          *
2817                          * Need to also check the unlikely case that the
2818                          * length is bigger than the temp buffer size.
2819                          * If that happens, then the reserve is pretty much
2820                          * guaranteed to fail, as the ring buffer currently
2821                          * only allows events less than a page. But that may
2822                          * change in the future, so let the ring buffer reserve
2823                          * handle the failure in that case.
2824                          */
2825                         if (val == 1 && likely(len <= max_len)) {
2826                                 trace_event_setup(entry, type, trace_ctx);
2827                                 entry->array[0] = len;
2828                                 /* Return with preemption disabled */
2829                                 return entry;
2830                         }
2831                         this_cpu_dec(trace_buffered_event_cnt);
2832                 }
2833                 /* __trace_buffer_lock_reserve() disables preemption */
2834                 preempt_enable_notrace();
2835         }
2836
2837         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2838                                             trace_ctx);
2839         /*
2840          * If tracing is off, but we have triggers enabled
2841          * we still need to look at the event data. Use the temp_buffer
2842          * to store the trace event for the trigger to use. It's recursive
2843          * safe and will not be recorded anywhere.
2844          */
2845         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2846                 *current_rb = temp_buffer;
2847                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2848                                                     trace_ctx);
2849         }
2850         return entry;
2851 }
2852 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2853
2854 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2855 static DEFINE_MUTEX(tracepoint_printk_mutex);
2856
2857 static void output_printk(struct trace_event_buffer *fbuffer)
2858 {
2859         struct trace_event_call *event_call;
2860         struct trace_event_file *file;
2861         struct trace_event *event;
2862         unsigned long flags;
2863         struct trace_iterator *iter = tracepoint_print_iter;
2864
2865         /* We should never get here if iter is NULL */
2866         if (WARN_ON_ONCE(!iter))
2867                 return;
2868
2869         event_call = fbuffer->trace_file->event_call;
2870         if (!event_call || !event_call->event.funcs ||
2871             !event_call->event.funcs->trace)
2872                 return;
2873
2874         file = fbuffer->trace_file;
2875         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2876             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2877              !filter_match_preds(file->filter, fbuffer->entry)))
2878                 return;
2879
2880         event = &fbuffer->trace_file->event_call->event;
2881
2882         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2883         trace_seq_init(&iter->seq);
2884         iter->ent = fbuffer->entry;
2885         event_call->event.funcs->trace(iter, 0, event);
2886         trace_seq_putc(&iter->seq, 0);
2887         printk("%s", iter->seq.buffer);
2888
2889         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2890 }
2891
2892 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2893                              void *buffer, size_t *lenp,
2894                              loff_t *ppos)
2895 {
2896         int save_tracepoint_printk;
2897         int ret;
2898
2899         guard(mutex)(&tracepoint_printk_mutex);
2900         save_tracepoint_printk = tracepoint_printk;
2901
2902         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2903
2904         /*
2905          * This will force exiting early, as tracepoint_printk
2906          * is always zero when tracepoint_printk_iter is not allocated
2907          */
2908         if (!tracepoint_print_iter)
2909                 tracepoint_printk = 0;
2910
2911         if (save_tracepoint_printk == tracepoint_printk)
2912                 return ret;
2913
2914         if (tracepoint_printk)
2915                 static_key_enable(&tracepoint_printk_key.key);
2916         else
2917                 static_key_disable(&tracepoint_printk_key.key);
2918
2919         return ret;
2920 }
2921
2922 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2923 {
2924         enum event_trigger_type tt = ETT_NONE;
2925         struct trace_event_file *file = fbuffer->trace_file;
2926
2927         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2928                         fbuffer->entry, &tt))
2929                 goto discard;
2930
2931         if (static_key_false(&tracepoint_printk_key.key))
2932                 output_printk(fbuffer);
2933
2934         if (static_branch_unlikely(&trace_event_exports_enabled))
2935                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2936
2937         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2938                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2939
2940 discard:
2941         if (tt)
2942                 event_triggers_post_call(file, tt);
2943
2944 }
2945 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2946
2947 /*
2948  * Skip 3:
2949  *
2950  *   trace_buffer_unlock_commit_regs()
2951  *   trace_event_buffer_commit()
2952  *   trace_event_raw_event_xxx()
2953  */
2954 # define STACK_SKIP 3
2955
2956 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2957                                      struct trace_buffer *buffer,
2958                                      struct ring_buffer_event *event,
2959                                      unsigned int trace_ctx,
2960                                      struct pt_regs *regs)
2961 {
2962         __buffer_unlock_commit(buffer, event);
2963
2964         /*
2965          * If regs is not set, then skip the necessary functions.
2966          * Note, we can still get here via blktrace, wakeup tracer
2967          * and mmiotrace, but that's ok if they lose a function or
2968          * two. They are not that meaningful.
2969          */
2970         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2971         ftrace_trace_userstack(tr, buffer, trace_ctx);
2972 }
2973
2974 /*
2975  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2976  */
2977 void
2978 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2979                                    struct ring_buffer_event *event)
2980 {
2981         __buffer_unlock_commit(buffer, event);
2982 }
2983
2984 void
2985 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2986                parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2987 {
2988         struct trace_buffer *buffer = tr->array_buffer.buffer;
2989         struct ring_buffer_event *event;
2990         struct ftrace_entry *entry;
2991         int size = sizeof(*entry);
2992
2993         size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2994
2995         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2996                                             trace_ctx);
2997         if (!event)
2998                 return;
2999         entry   = ring_buffer_event_data(event);
3000         entry->ip                       = ip;
3001         entry->parent_ip                = parent_ip;
3002
3003 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
3004         if (fregs) {
3005                 for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
3006                         entry->args[i] = ftrace_regs_get_argument(fregs, i);
3007         }
3008 #endif
3009
3010         if (static_branch_unlikely(&trace_function_exports_enabled))
3011                 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3012         __buffer_unlock_commit(buffer, event);
3013 }
3014
3015 #ifdef CONFIG_STACKTRACE
3016
3017 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3018 #define FTRACE_KSTACK_NESTING   4
3019
3020 #define FTRACE_KSTACK_ENTRIES   (SZ_4K / FTRACE_KSTACK_NESTING)
3021
3022 struct ftrace_stack {
3023         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3024 };
3025
3026
3027 struct ftrace_stacks {
3028         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3029 };
3030
3031 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3032 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3033
3034 static void __ftrace_trace_stack(struct trace_array *tr,
3035                                  struct trace_buffer *buffer,
3036                                  unsigned int trace_ctx,
3037                                  int skip, struct pt_regs *regs)
3038 {
3039         struct ring_buffer_event *event;
3040         unsigned int size, nr_entries;
3041         struct ftrace_stack *fstack;
3042         struct stack_entry *entry;
3043         int stackidx;
3044
3045         /*
3046          * Add one, for this function and the call to save_stack_trace()
3047          * If regs is set, then these functions will not be in the way.
3048          */
3049 #ifndef CONFIG_UNWINDER_ORC
3050         if (!regs)
3051                 skip++;
3052 #endif
3053
3054         preempt_disable_notrace();
3055
3056         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3057
3058         /* This should never happen. If it does, yell once and skip */
3059         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3060                 goto out;
3061
3062         /*
3063          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3064          * interrupt will either see the value pre increment or post
3065          * increment. If the interrupt happens pre increment it will have
3066          * restored the counter when it returns.  We just need a barrier to
3067          * keep gcc from moving things around.
3068          */
3069         barrier();
3070
3071         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3072         size = ARRAY_SIZE(fstack->calls);
3073
3074         if (regs) {
3075                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3076                                                    size, skip);
3077         } else {
3078                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3079         }
3080
3081 #ifdef CONFIG_DYNAMIC_FTRACE
3082         /* Mark entry of stack trace as trampoline code */
3083         if (tr->ops && tr->ops->trampoline) {
3084                 unsigned long tramp_start = tr->ops->trampoline;
3085                 unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
3086                 unsigned long *calls = fstack->calls;
3087
3088                 for (int i = 0; i < nr_entries; i++) {
3089                         if (calls[i] >= tramp_start && calls[i] < tramp_end)
3090                                 calls[i] = FTRACE_TRAMPOLINE_MARKER;
3091                 }
3092         }
3093 #endif
3094
3095         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3096                                     struct_size(entry, caller, nr_entries),
3097                                     trace_ctx);
3098         if (!event)
3099                 goto out;
3100         entry = ring_buffer_event_data(event);
3101
3102         entry->size = nr_entries;
3103         memcpy(&entry->caller, fstack->calls,
3104                flex_array_size(entry, caller, nr_entries));
3105
3106         __buffer_unlock_commit(buffer, event);
3107
3108  out:
3109         /* Again, don't let gcc optimize things here */
3110         barrier();
3111         __this_cpu_dec(ftrace_stack_reserve);
3112         preempt_enable_notrace();
3113
3114 }
3115
3116 static inline void ftrace_trace_stack(struct trace_array *tr,
3117                                       struct trace_buffer *buffer,
3118                                       unsigned int trace_ctx,
3119                                       int skip, struct pt_regs *regs)
3120 {
3121         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3122                 return;
3123
3124         __ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3125 }
3126
3127 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3128                    int skip)
3129 {
3130         struct trace_buffer *buffer = tr->array_buffer.buffer;
3131
3132         if (rcu_is_watching()) {
3133                 __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3134                 return;
3135         }
3136
3137         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3138                 return;
3139
3140         /*
3141          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3142          * but if the above rcu_is_watching() failed, then the NMI
3143          * triggered someplace critical, and ct_irq_enter() should
3144          * not be called from NMI.
3145          */
3146         if (unlikely(in_nmi()))
3147                 return;
3148
3149         ct_irq_enter_irqson();
3150         __ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3151         ct_irq_exit_irqson();
3152 }
3153
3154 /**
3155  * trace_dump_stack - record a stack back trace in the trace buffer
3156  * @skip: Number of functions to skip (helper handlers)
3157  */
3158 void trace_dump_stack(int skip)
3159 {
3160         if (tracing_disabled || tracing_selftest_running)
3161                 return;
3162
3163 #ifndef CONFIG_UNWINDER_ORC
3164         /* Skip 1 to skip this function. */
3165         skip++;
3166 #endif
3167         __ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3168                                 tracing_gen_ctx(), skip, NULL);
3169 }
3170 EXPORT_SYMBOL_GPL(trace_dump_stack);
3171
3172 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3173 static DEFINE_PER_CPU(int, user_stack_count);
3174
3175 static void
3176 ftrace_trace_userstack(struct trace_array *tr,
3177                        struct trace_buffer *buffer, unsigned int trace_ctx)
3178 {
3179         struct ring_buffer_event *event;
3180         struct userstack_entry *entry;
3181
3182         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3183                 return;
3184
3185         /*
3186          * NMIs can not handle page faults, even with fix ups.
3187          * The save user stack can (and often does) fault.
3188          */
3189         if (unlikely(in_nmi()))
3190                 return;
3191
3192         /*
3193          * prevent recursion, since the user stack tracing may
3194          * trigger other kernel events.
3195          */
3196         preempt_disable();
3197         if (__this_cpu_read(user_stack_count))
3198                 goto out;
3199
3200         __this_cpu_inc(user_stack_count);
3201
3202         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3203                                             sizeof(*entry), trace_ctx);
3204         if (!event)
3205                 goto out_drop_count;
3206         entry   = ring_buffer_event_data(event);
3207
3208         entry->tgid             = current->tgid;
3209         memset(&entry->caller, 0, sizeof(entry->caller));
3210
3211         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3212         __buffer_unlock_commit(buffer, event);
3213
3214  out_drop_count:
3215         __this_cpu_dec(user_stack_count);
3216  out:
3217         preempt_enable();
3218 }
3219 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3220 static void ftrace_trace_userstack(struct trace_array *tr,
3221                                    struct trace_buffer *buffer,
3222                                    unsigned int trace_ctx)
3223 {
3224 }
3225 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3226
3227 #endif /* CONFIG_STACKTRACE */
3228
3229 static inline void
3230 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3231                           unsigned long long delta)
3232 {
3233         entry->bottom_delta_ts = delta & U32_MAX;
3234         entry->top_delta_ts = (delta >> 32);
3235 }
3236
3237 void trace_last_func_repeats(struct trace_array *tr,
3238                              struct trace_func_repeats *last_info,
3239                              unsigned int trace_ctx)
3240 {
3241         struct trace_buffer *buffer = tr->array_buffer.buffer;
3242         struct func_repeats_entry *entry;
3243         struct ring_buffer_event *event;
3244         u64 delta;
3245
3246         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3247                                             sizeof(*entry), trace_ctx);
3248         if (!event)
3249                 return;
3250
3251         delta = ring_buffer_event_time_stamp(buffer, event) -
3252                 last_info->ts_last_call;
3253
3254         entry = ring_buffer_event_data(event);
3255         entry->ip = last_info->ip;
3256         entry->parent_ip = last_info->parent_ip;
3257         entry->count = last_info->count;
3258         func_repeats_set_delta_ts(entry, delta);
3259
3260         __buffer_unlock_commit(buffer, event);
3261 }
3262
3263 /* created for use with alloc_percpu */
3264 struct trace_buffer_struct {
3265         int nesting;
3266         char buffer[4][TRACE_BUF_SIZE];
3267 };
3268
3269 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3270
3271 /*
3272  * This allows for lockless recording.  If we're nested too deeply, then
3273  * this returns NULL.
3274  */
3275 static char *get_trace_buf(void)
3276 {
3277         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3278
3279         if (!trace_percpu_buffer || buffer->nesting >= 4)
3280                 return NULL;
3281
3282         buffer->nesting++;
3283
3284         /* Interrupts must see nesting incremented before we use the buffer */
3285         barrier();
3286         return &buffer->buffer[buffer->nesting - 1][0];
3287 }
3288
3289 static void put_trace_buf(void)
3290 {
3291         /* Don't let the decrement of nesting leak before this */
3292         barrier();
3293         this_cpu_dec(trace_percpu_buffer->nesting);
3294 }
3295
3296 static int alloc_percpu_trace_buffer(void)
3297 {
3298         struct trace_buffer_struct __percpu *buffers;
3299
3300         if (trace_percpu_buffer)
3301                 return 0;
3302
3303         buffers = alloc_percpu(struct trace_buffer_struct);
3304         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3305                 return -ENOMEM;
3306
3307         trace_percpu_buffer = buffers;
3308         return 0;
3309 }
3310
3311 static int buffers_allocated;
3312
3313 void trace_printk_init_buffers(void)
3314 {
3315         if (buffers_allocated)
3316                 return;
3317
3318         if (alloc_percpu_trace_buffer())
3319                 return;
3320
3321         /* trace_printk() is for debug use only. Don't use it in production. */
3322
3323         pr_warn("\n");
3324         pr_warn("**********************************************************\n");
3325         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3326         pr_warn("**                                                      **\n");
3327         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3328         pr_warn("**                                                      **\n");
3329         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3330         pr_warn("** unsafe for production use.                           **\n");
3331         pr_warn("**                                                      **\n");
3332         pr_warn("** If you see this message and you are not debugging    **\n");
3333         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3334         pr_warn("**                                                      **\n");
3335         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3336         pr_warn("**********************************************************\n");
3337
3338         /* Expand the buffers to set size */
3339         tracing_update_buffers(&global_trace);
3340
3341         buffers_allocated = 1;
3342
3343         /*
3344          * trace_printk_init_buffers() can be called by modules.
3345          * If that happens, then we need to start cmdline recording
3346          * directly here. If the global_trace.buffer is already
3347          * allocated here, then this was called by module code.
3348          */
3349         if (global_trace.array_buffer.buffer)
3350                 tracing_start_cmdline_record();
3351 }
3352 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3353
3354 void trace_printk_start_comm(void)
3355 {
3356         /* Start tracing comms if trace printk is set */
3357         if (!buffers_allocated)
3358                 return;
3359         tracing_start_cmdline_record();
3360 }
3361
3362 static void trace_printk_start_stop_comm(int enabled)
3363 {
3364         if (!buffers_allocated)
3365                 return;
3366
3367         if (enabled)
3368                 tracing_start_cmdline_record();
3369         else
3370                 tracing_stop_cmdline_record();
3371 }
3372
3373 /**
3374  * trace_vbprintk - write binary msg to tracing buffer
3375  * @ip:    The address of the caller
3376  * @fmt:   The string format to write to the buffer
3377  * @args:  Arguments for @fmt
3378  */
3379 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3380 {
3381         struct ring_buffer_event *event;
3382         struct trace_buffer *buffer;
3383         struct trace_array *tr = READ_ONCE(printk_trace);
3384         struct bprint_entry *entry;
3385         unsigned int trace_ctx;
3386         char *tbuffer;
3387         int len = 0, size;
3388
3389         if (!printk_binsafe(tr))
3390                 return trace_vprintk(ip, fmt, args);
3391
3392         if (unlikely(tracing_selftest_running || tracing_disabled))
3393                 return 0;
3394
3395         /* Don't pollute graph traces with trace_vprintk internals */
3396         pause_graph_tracing();
3397
3398         trace_ctx = tracing_gen_ctx();
3399         preempt_disable_notrace();
3400
3401         tbuffer = get_trace_buf();
3402         if (!tbuffer) {
3403                 len = 0;
3404                 goto out_nobuffer;
3405         }
3406
3407         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3408
3409         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3410                 goto out_put;
3411
3412         size = sizeof(*entry) + sizeof(u32) * len;
3413         buffer = tr->array_buffer.buffer;
3414         ring_buffer_nest_start(buffer);
3415         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3416                                             trace_ctx);
3417         if (!event)
3418                 goto out;
3419         entry = ring_buffer_event_data(event);
3420         entry->ip                       = ip;
3421         entry->fmt                      = fmt;
3422
3423         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3424         __buffer_unlock_commit(buffer, event);
3425         ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3426
3427 out:
3428         ring_buffer_nest_end(buffer);
3429 out_put:
3430         put_trace_buf();
3431
3432 out_nobuffer:
3433         preempt_enable_notrace();
3434         unpause_graph_tracing();
3435
3436         return len;
3437 }
3438 EXPORT_SYMBOL_GPL(trace_vbprintk);
3439
3440 static __printf(3, 0)
3441 int __trace_array_vprintk(struct trace_buffer *buffer,
3442                           unsigned long ip, const char *fmt, va_list args)
3443 {
3444         struct ring_buffer_event *event;
3445         int len = 0, size;
3446         struct print_entry *entry;
3447         unsigned int trace_ctx;
3448         char *tbuffer;
3449
3450         if (tracing_disabled)
3451                 return 0;
3452
3453         /* Don't pollute graph traces with trace_vprintk internals */
3454         pause_graph_tracing();
3455
3456         trace_ctx = tracing_gen_ctx();
3457         preempt_disable_notrace();
3458
3459
3460         tbuffer = get_trace_buf();
3461         if (!tbuffer) {
3462                 len = 0;
3463                 goto out_nobuffer;
3464         }
3465
3466         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3467
3468         size = sizeof(*entry) + len + 1;
3469         ring_buffer_nest_start(buffer);
3470         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3471                                             trace_ctx);
3472         if (!event)
3473                 goto out;
3474         entry = ring_buffer_event_data(event);
3475         entry->ip = ip;
3476
3477         memcpy(&entry->buf, tbuffer, len + 1);
3478         __buffer_unlock_commit(buffer, event);
3479         ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3480
3481 out:
3482         ring_buffer_nest_end(buffer);
3483         put_trace_buf();
3484
3485 out_nobuffer:
3486         preempt_enable_notrace();
3487         unpause_graph_tracing();
3488
3489         return len;
3490 }
3491
3492 int trace_array_vprintk(struct trace_array *tr,
3493                         unsigned long ip, const char *fmt, va_list args)
3494 {
3495         if (tracing_selftest_running && tr == &global_trace)
3496                 return 0;
3497
3498         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3499 }
3500
3501 /**
3502  * trace_array_printk - Print a message to a specific instance
3503  * @tr: The instance trace_array descriptor
3504  * @ip: The instruction pointer that this is called from.
3505  * @fmt: The format to print (printf format)
3506  *
3507  * If a subsystem sets up its own instance, they have the right to
3508  * printk strings into their tracing instance buffer using this
3509  * function. Note, this function will not write into the top level
3510  * buffer (use trace_printk() for that), as writing into the top level
3511  * buffer should only have events that can be individually disabled.
3512  * trace_printk() is only used for debugging a kernel, and should not
3513  * be ever incorporated in normal use.
3514  *
3515  * trace_array_printk() can be used, as it will not add noise to the
3516  * top level tracing buffer.
3517  *
3518  * Note, trace_array_init_printk() must be called on @tr before this
3519  * can be used.
3520  */
3521 int trace_array_printk(struct trace_array *tr,
3522                        unsigned long ip, const char *fmt, ...)
3523 {
3524         int ret;
3525         va_list ap;
3526
3527         if (!tr)
3528                 return -ENOENT;
3529
3530         /* This is only allowed for created instances */
3531         if (tr == &global_trace)
3532                 return 0;
3533
3534         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3535                 return 0;
3536
3537         va_start(ap, fmt);
3538         ret = trace_array_vprintk(tr, ip, fmt, ap);
3539         va_end(ap);
3540         return ret;
3541 }
3542 EXPORT_SYMBOL_GPL(trace_array_printk);
3543
3544 /**
3545  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3546  * @tr: The trace array to initialize the buffers for
3547  *
3548  * As trace_array_printk() only writes into instances, they are OK to
3549  * have in the kernel (unlike trace_printk()). This needs to be called
3550  * before trace_array_printk() can be used on a trace_array.
3551  */
3552 int trace_array_init_printk(struct trace_array *tr)
3553 {
3554         if (!tr)
3555                 return -ENOENT;
3556
3557         /* This is only allowed for created instances */
3558         if (tr == &global_trace)
3559                 return -EINVAL;
3560
3561         return alloc_percpu_trace_buffer();
3562 }
3563 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3564
3565 int trace_array_printk_buf(struct trace_buffer *buffer,
3566                            unsigned long ip, const char *fmt, ...)
3567 {
3568         int ret;
3569         va_list ap;
3570
3571         if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3572                 return 0;
3573
3574         va_start(ap, fmt);
3575         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3576         va_end(ap);
3577         return ret;
3578 }
3579
3580 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3581 {
3582         return trace_array_vprintk(printk_trace, ip, fmt, args);
3583 }
3584 EXPORT_SYMBOL_GPL(trace_vprintk);
3585
3586 static void trace_iterator_increment(struct trace_iterator *iter)
3587 {
3588         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3589
3590         iter->idx++;
3591         if (buf_iter)
3592                 ring_buffer_iter_advance(buf_iter);
3593 }
3594
3595 static struct trace_entry *
3596 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3597                 unsigned long *lost_events)
3598 {
3599         struct ring_buffer_event *event;
3600         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3601
3602         if (buf_iter) {
3603                 event = ring_buffer_iter_peek(buf_iter, ts);
3604                 if (lost_events)
3605                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3606                                 (unsigned long)-1 : 0;
3607         } else {
3608                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3609                                          lost_events);
3610         }
3611
3612         if (event) {
3613                 iter->ent_size = ring_buffer_event_length(event);
3614                 return ring_buffer_event_data(event);
3615         }
3616         iter->ent_size = 0;
3617         return NULL;
3618 }
3619
3620 static struct trace_entry *
3621 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3622                   unsigned long *missing_events, u64 *ent_ts)
3623 {
3624         struct trace_buffer *buffer = iter->array_buffer->buffer;
3625         struct trace_entry *ent, *next = NULL;
3626         unsigned long lost_events = 0, next_lost = 0;
3627         int cpu_file = iter->cpu_file;
3628         u64 next_ts = 0, ts;
3629         int next_cpu = -1;
3630         int next_size = 0;
3631         int cpu;
3632
3633         /*
3634          * If we are in a per_cpu trace file, don't bother by iterating over
3635          * all cpu and peek directly.
3636          */
3637         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3638                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3639                         return NULL;
3640                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3641                 if (ent_cpu)
3642                         *ent_cpu = cpu_file;
3643
3644                 return ent;
3645         }
3646
3647         for_each_tracing_cpu(cpu) {
3648
3649                 if (ring_buffer_empty_cpu(buffer, cpu))
3650                         continue;
3651
3652                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3653
3654                 /*
3655                  * Pick the entry with the smallest timestamp:
3656                  */
3657                 if (ent && (!next || ts < next_ts)) {
3658                         next = ent;
3659                         next_cpu = cpu;
3660                         next_ts = ts;
3661                         next_lost = lost_events;
3662                         next_size = iter->ent_size;
3663                 }
3664         }
3665
3666         iter->ent_size = next_size;
3667
3668         if (ent_cpu)
3669                 *ent_cpu = next_cpu;
3670
3671         if (ent_ts)
3672                 *ent_ts = next_ts;
3673
3674         if (missing_events)
3675                 *missing_events = next_lost;
3676
3677         return next;
3678 }
3679
3680 #define STATIC_FMT_BUF_SIZE     128
3681 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3682
3683 char *trace_iter_expand_format(struct trace_iterator *iter)
3684 {
3685         char *tmp;
3686
3687         /*
3688          * iter->tr is NULL when used with tp_printk, which makes
3689          * this get called where it is not safe to call krealloc().
3690          */
3691         if (!iter->tr || iter->fmt == static_fmt_buf)
3692                 return NULL;
3693
3694         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3695                        GFP_KERNEL);
3696         if (tmp) {
3697                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3698                 iter->fmt = tmp;
3699         }
3700
3701         return tmp;
3702 }
3703
3704 /* Returns true if the string is safe to dereference from an event */
3705 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3706 {
3707         unsigned long addr = (unsigned long)str;
3708         struct trace_event *trace_event;
3709         struct trace_event_call *event;
3710
3711         /* OK if part of the event data */
3712         if ((addr >= (unsigned long)iter->ent) &&
3713             (addr < (unsigned long)iter->ent + iter->ent_size))
3714                 return true;
3715
3716         /* OK if part of the temp seq buffer */
3717         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3718             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3719                 return true;
3720
3721         /* Core rodata can not be freed */
3722         if (is_kernel_rodata(addr))
3723                 return true;
3724
3725         if (trace_is_tracepoint_string(str))
3726                 return true;
3727
3728         /*
3729          * Now this could be a module event, referencing core module
3730          * data, which is OK.
3731          */
3732         if (!iter->ent)
3733                 return false;
3734
3735         trace_event = ftrace_find_event(iter->ent->type);
3736         if (!trace_event)
3737                 return false;
3738
3739         event = container_of(trace_event, struct trace_event_call, event);
3740         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3741                 return false;
3742
3743         /* Would rather have rodata, but this will suffice */
3744         if (within_module_core(addr, event->module))
3745                 return true;
3746
3747         return false;
3748 }
3749
3750 /**
3751  * ignore_event - Check dereferenced fields while writing to the seq buffer
3752  * @iter: The iterator that holds the seq buffer and the event being printed
3753  *
3754  * At boot up, test_event_printk() will flag any event that dereferences
3755  * a string with "%s" that does exist in the ring buffer. It may still
3756  * be valid, as the string may point to a static string in the kernel
3757  * rodata that never gets freed. But if the string pointer is pointing
3758  * to something that was allocated, there's a chance that it can be freed
3759  * by the time the user reads the trace. This would cause a bad memory
3760  * access by the kernel and possibly crash the system.
3761  *
3762  * This function will check if the event has any fields flagged as needing
3763  * to be checked at runtime and perform those checks.
3764  *
3765  * If it is found that a field is unsafe, it will write into the @iter->seq
3766  * a message stating what was found to be unsafe.
3767  *
3768  * @return: true if the event is unsafe and should be ignored,
3769  *          false otherwise.
3770  */
3771 bool ignore_event(struct trace_iterator *iter)
3772 {
3773         struct ftrace_event_field *field;
3774         struct trace_event *trace_event;
3775         struct trace_event_call *event;
3776         struct list_head *head;
3777         struct trace_seq *seq;
3778         const void *ptr;
3779
3780         trace_event = ftrace_find_event(iter->ent->type);
3781
3782         seq = &iter->seq;
3783
3784         if (!trace_event) {
3785                 trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3786                 return true;
3787         }
3788
3789         event = container_of(trace_event, struct trace_event_call, event);
3790         if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3791                 return false;
3792
3793         head = trace_get_fields(event);
3794         if (!head) {
3795                 trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3796                                  trace_event_name(event));
3797                 return true;
3798         }
3799
3800         /* Offsets are from the iter->ent that points to the raw event */
3801         ptr = iter->ent;
3802
3803         list_for_each_entry(field, head, link) {
3804                 const char *str;
3805                 bool good;
3806
3807                 if (!field->needs_test)
3808                         continue;
3809
3810                 str = *(const char **)(ptr + field->offset);
3811
3812                 good = trace_safe_str(iter, str);
3813
3814                 /*
3815                  * If you hit this warning, it is likely that the
3816                  * trace event in question used %s on a string that
3817                  * was saved at the time of the event, but may not be
3818                  * around when the trace is read. Use __string(),
3819                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3820                  * instead. See samples/trace_events/trace-events-sample.h
3821                  * for reference.
3822                  */
3823                 if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3824                               trace_event_name(event), field->name)) {
3825                         trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3826                                          trace_event_name(event), field->name);
3827                         return true;
3828                 }
3829         }
3830         return false;
3831 }
3832
3833 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3834 {
3835         const char *p, *new_fmt;
3836         char *q;
3837
3838         if (WARN_ON_ONCE(!fmt))
3839                 return fmt;
3840
3841         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3842                 return fmt;
3843
3844         p = fmt;
3845         new_fmt = q = iter->fmt;
3846         while (*p) {
3847                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3848                         if (!trace_iter_expand_format(iter))
3849                                 return fmt;
3850
3851                         q += iter->fmt - new_fmt;
3852                         new_fmt = iter->fmt;
3853                 }
3854
3855                 *q++ = *p++;
3856
3857                 /* Replace %p with %px */
3858                 if (p[-1] == '%') {
3859                         if (p[0] == '%') {
3860                                 *q++ = *p++;
3861                         } else if (p[0] == 'p' && !isalnum(p[1])) {
3862                                 *q++ = *p++;
3863                                 *q++ = 'x';
3864                         }
3865                 }
3866         }
3867         *q = '\0';
3868
3869         return new_fmt;
3870 }
3871
3872 #define STATIC_TEMP_BUF_SIZE    128
3873 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3874
3875 /* Find the next real entry, without updating the iterator itself */
3876 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3877                                           int *ent_cpu, u64 *ent_ts)
3878 {
3879         /* __find_next_entry will reset ent_size */
3880         int ent_size = iter->ent_size;
3881         struct trace_entry *entry;
3882
3883         /*
3884          * If called from ftrace_dump(), then the iter->temp buffer
3885          * will be the static_temp_buf and not created from kmalloc.
3886          * If the entry size is greater than the buffer, we can
3887          * not save it. Just return NULL in that case. This is only
3888          * used to add markers when two consecutive events' time
3889          * stamps have a large delta. See trace_print_lat_context()
3890          */
3891         if (iter->temp == static_temp_buf &&
3892             STATIC_TEMP_BUF_SIZE < ent_size)
3893                 return NULL;
3894
3895         /*
3896          * The __find_next_entry() may call peek_next_entry(), which may
3897          * call ring_buffer_peek() that may make the contents of iter->ent
3898          * undefined. Need to copy iter->ent now.
3899          */
3900         if (iter->ent && iter->ent != iter->temp) {
3901                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3902                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3903                         void *temp;
3904                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
3905                         if (!temp)
3906                                 return NULL;
3907                         kfree(iter->temp);
3908                         iter->temp = temp;
3909                         iter->temp_size = iter->ent_size;
3910                 }
3911                 memcpy(iter->temp, iter->ent, iter->ent_size);
3912                 iter->ent = iter->temp;
3913         }
3914         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3915         /* Put back the original ent_size */
3916         iter->ent_size = ent_size;
3917
3918         return entry;
3919 }
3920
3921 /* Find the next real entry, and increment the iterator to the next entry */
3922 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3923 {
3924         iter->ent = __find_next_entry(iter, &iter->cpu,
3925                                       &iter->lost_events, &iter->ts);
3926
3927         if (iter->ent)
3928                 trace_iterator_increment(iter);
3929
3930         return iter->ent ? iter : NULL;
3931 }
3932
3933 static void trace_consume(struct trace_iterator *iter)
3934 {
3935         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3936                             &iter->lost_events);
3937 }
3938
3939 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3940 {
3941         struct trace_iterator *iter = m->private;
3942         int i = (int)*pos;
3943         void *ent;
3944
3945         WARN_ON_ONCE(iter->leftover);
3946
3947         (*pos)++;
3948
3949         /* can't go backwards */
3950         if (iter->idx > i)
3951                 return NULL;
3952
3953         if (iter->idx < 0)
3954                 ent = trace_find_next_entry_inc(iter);
3955         else
3956                 ent = iter;
3957
3958         while (ent && iter->idx < i)
3959                 ent = trace_find_next_entry_inc(iter);
3960
3961         iter->pos = *pos;
3962
3963         return ent;
3964 }
3965
3966 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3967 {
3968         struct ring_buffer_iter *buf_iter;
3969         unsigned long entries = 0;
3970         u64 ts;
3971
3972         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3973
3974         buf_iter = trace_buffer_iter(iter, cpu);
3975         if (!buf_iter)
3976                 return;
3977
3978         ring_buffer_iter_reset(buf_iter);
3979
3980         /*
3981          * We could have the case with the max latency tracers
3982          * that a reset never took place on a cpu. This is evident
3983          * by the timestamp being before the start of the buffer.
3984          */
3985         while (ring_buffer_iter_peek(buf_iter, &ts)) {
3986                 if (ts >= iter->array_buffer->time_start)
3987                         break;
3988                 entries++;
3989                 ring_buffer_iter_advance(buf_iter);
3990                 /* This could be a big loop */
3991                 cond_resched();
3992         }
3993
3994         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3995 }
3996
3997 /*
3998  * The current tracer is copied to avoid a global locking
3999  * all around.
4000  */
4001 static void *s_start(struct seq_file *m, loff_t *pos)
4002 {
4003         struct trace_iterator *iter = m->private;
4004         struct trace_array *tr = iter->tr;
4005         int cpu_file = iter->cpu_file;
4006         void *p = NULL;
4007         loff_t l = 0;
4008         int cpu;
4009
4010         mutex_lock(&trace_types_lock);
4011         if (unlikely(tr->current_trace != iter->trace)) {
4012                 /* Close iter->trace before switching to the new current tracer */
4013                 if (iter->trace->close)
4014                         iter->trace->close(iter);
4015                 iter->trace = tr->current_trace;
4016                 /* Reopen the new current tracer */
4017                 if (iter->trace->open)
4018                         iter->trace->open(iter);
4019         }
4020         mutex_unlock(&trace_types_lock);
4021
4022 #ifdef CONFIG_TRACER_MAX_TRACE
4023         if (iter->snapshot && iter->trace->use_max_tr)
4024                 return ERR_PTR(-EBUSY);
4025 #endif
4026
4027         if (*pos != iter->pos) {
4028                 iter->ent = NULL;
4029                 iter->cpu = 0;
4030                 iter->idx = -1;
4031
4032                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4033                         for_each_tracing_cpu(cpu)
4034                                 tracing_iter_reset(iter, cpu);
4035                 } else
4036                         tracing_iter_reset(iter, cpu_file);
4037
4038                 iter->leftover = 0;
4039                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4040                         ;
4041
4042         } else {
4043                 /*
4044                  * If we overflowed the seq_file before, then we want
4045                  * to just reuse the trace_seq buffer again.
4046                  */
4047                 if (iter->leftover)
4048                         p = iter;
4049                 else {
4050                         l = *pos - 1;
4051                         p = s_next(m, p, &l);
4052                 }
4053         }
4054
4055         trace_event_read_lock();
4056         trace_access_lock(cpu_file);
4057         return p;
4058 }
4059
4060 static void s_stop(struct seq_file *m, void *p)
4061 {
4062         struct trace_iterator *iter = m->private;
4063
4064 #ifdef CONFIG_TRACER_MAX_TRACE
4065         if (iter->snapshot && iter->trace->use_max_tr)
4066                 return;
4067 #endif
4068
4069         trace_access_unlock(iter->cpu_file);
4070         trace_event_read_unlock();
4071 }
4072
4073 static void
4074 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4075                       unsigned long *entries, int cpu)
4076 {
4077         unsigned long count;
4078
4079         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4080         /*
4081          * If this buffer has skipped entries, then we hold all
4082          * entries for the trace and we need to ignore the
4083          * ones before the time stamp.
4084          */
4085         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4086                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4087                 /* total is the same as the entries */
4088                 *total = count;
4089         } else
4090                 *total = count +
4091                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4092         *entries = count;
4093 }
4094
4095 static void
4096 get_total_entries(struct array_buffer *buf,
4097                   unsigned long *total, unsigned long *entries)
4098 {
4099         unsigned long t, e;
4100         int cpu;
4101
4102         *total = 0;
4103         *entries = 0;
4104
4105         for_each_tracing_cpu(cpu) {
4106                 get_total_entries_cpu(buf, &t, &e, cpu);
4107                 *total += t;
4108                 *entries += e;
4109         }
4110 }
4111
4112 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4113 {
4114         unsigned long total, entries;
4115
4116         if (!tr)
4117                 tr = &global_trace;
4118
4119         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4120
4121         return entries;
4122 }
4123
4124 unsigned long trace_total_entries(struct trace_array *tr)
4125 {
4126         unsigned long total, entries;
4127
4128         if (!tr)
4129                 tr = &global_trace;
4130
4131         get_total_entries(&tr->array_buffer, &total, &entries);
4132
4133         return entries;
4134 }
4135
4136 static void print_lat_help_header(struct seq_file *m)
4137 {
4138         seq_puts(m, "#                    _------=> CPU#            \n"
4139                     "#                   / _-----=> irqs-off/BH-disabled\n"
4140                     "#                  | / _----=> need-resched    \n"
4141                     "#                  || / _---=> hardirq/softirq \n"
4142                     "#                  ||| / _--=> preempt-depth   \n"
4143                     "#                  |||| / _-=> migrate-disable \n"
4144                     "#                  ||||| /     delay           \n"
4145                     "#  cmd     pid     |||||| time  |   caller     \n"
4146                     "#     \\   /        ||||||  \\    |    /       \n");
4147 }
4148
4149 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4150 {
4151         unsigned long total;
4152         unsigned long entries;
4153
4154         get_total_entries(buf, &total, &entries);
4155         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4156                    entries, total, num_online_cpus());
4157         seq_puts(m, "#\n");
4158 }
4159
4160 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4161                                    unsigned int flags)
4162 {
4163         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4164
4165         print_event_info(buf, m);
4166
4167         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4168         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4169 }
4170
4171 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4172                                        unsigned int flags)
4173 {
4174         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4175         static const char space[] = "            ";
4176         int prec = tgid ? 12 : 2;
4177
4178         print_event_info(buf, m);
4179
4180         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4181         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4182         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4183         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4184         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4185         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4186         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4187         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4188 }
4189
4190 void
4191 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4192 {
4193         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4194         struct array_buffer *buf = iter->array_buffer;
4195         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4196         struct tracer *type = iter->trace;
4197         unsigned long entries;
4198         unsigned long total;
4199         const char *name = type->name;
4200
4201         get_total_entries(buf, &total, &entries);
4202
4203         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4204                    name, init_utsname()->release);
4205         seq_puts(m, "# -----------------------------------"
4206                  "---------------------------------\n");
4207         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4208                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4209                    nsecs_to_usecs(data->saved_latency),
4210                    entries,
4211                    total,
4212                    buf->cpu,
4213                    preempt_model_str(),
4214                    /* These are reserved for later use */
4215                    0, 0, 0, 0);
4216 #ifdef CONFIG_SMP
4217         seq_printf(m, " #P:%d)\n", num_online_cpus());
4218 #else
4219         seq_puts(m, ")\n");
4220 #endif
4221         seq_puts(m, "#    -----------------\n");
4222         seq_printf(m, "#    | task: %.16s-%d "
4223                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4224                    data->comm, data->pid,
4225                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4226                    data->policy, data->rt_priority);
4227         seq_puts(m, "#    -----------------\n");
4228
4229         if (data->critical_start) {
4230                 seq_puts(m, "#  => started at: ");
4231                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4232                 trace_print_seq(m, &iter->seq);
4233                 seq_puts(m, "\n#  => ended at:   ");
4234                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4235                 trace_print_seq(m, &iter->seq);
4236                 seq_puts(m, "\n#\n");
4237         }
4238
4239         seq_puts(m, "#\n");
4240 }
4241
4242 static void test_cpu_buff_start(struct trace_iterator *iter)
4243 {
4244         struct trace_seq *s = &iter->seq;
4245         struct trace_array *tr = iter->tr;
4246
4247         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4248                 return;
4249
4250         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4251                 return;
4252
4253         if (cpumask_available(iter->started) &&
4254             cpumask_test_cpu(iter->cpu, iter->started))
4255                 return;
4256
4257         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4258                 return;
4259
4260         if (cpumask_available(iter->started))
4261                 cpumask_set_cpu(iter->cpu, iter->started);
4262
4263         /* Don't print started cpu buffer for the first entry of the trace */
4264         if (iter->idx > 1)
4265                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4266                                 iter->cpu);
4267 }
4268
4269 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4270 {
4271         struct trace_array *tr = iter->tr;
4272         struct trace_seq *s = &iter->seq;
4273         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4274         struct trace_entry *entry;
4275         struct trace_event *event;
4276
4277         entry = iter->ent;
4278
4279         test_cpu_buff_start(iter);
4280
4281         event = ftrace_find_event(entry->type);
4282
4283         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4284                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4285                         trace_print_lat_context(iter);
4286                 else
4287                         trace_print_context(iter);
4288         }
4289
4290         if (trace_seq_has_overflowed(s))
4291                 return TRACE_TYPE_PARTIAL_LINE;
4292
4293         if (event) {
4294                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4295                         return print_event_fields(iter, event);
4296                 /*
4297                  * For TRACE_EVENT() events, the print_fmt is not
4298                  * safe to use if the array has delta offsets
4299                  * Force printing via the fields.
4300                  */
4301                 if ((tr->text_delta) &&
4302                     event->type > __TRACE_LAST_TYPE)
4303                         return print_event_fields(iter, event);
4304
4305                 return event->funcs->trace(iter, sym_flags, event);
4306         }
4307
4308         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4309
4310         return trace_handle_return(s);
4311 }
4312
4313 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4314 {
4315         struct trace_array *tr = iter->tr;
4316         struct trace_seq *s = &iter->seq;
4317         struct trace_entry *entry;
4318         struct trace_event *event;
4319
4320         entry = iter->ent;
4321
4322         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4323                 trace_seq_printf(s, "%d %d %llu ",
4324                                  entry->pid, iter->cpu, iter->ts);
4325
4326         if (trace_seq_has_overflowed(s))
4327                 return TRACE_TYPE_PARTIAL_LINE;
4328
4329         event = ftrace_find_event(entry->type);
4330         if (event)
4331                 return event->funcs->raw(iter, 0, event);
4332
4333         trace_seq_printf(s, "%d ?\n", entry->type);
4334
4335         return trace_handle_return(s);
4336 }
4337
4338 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4339 {
4340         struct trace_array *tr = iter->tr;
4341         struct trace_seq *s = &iter->seq;
4342         unsigned char newline = '\n';
4343         struct trace_entry *entry;
4344         struct trace_event *event;
4345
4346         entry = iter->ent;
4347
4348         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4349                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4350                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4351                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4352                 if (trace_seq_has_overflowed(s))
4353                         return TRACE_TYPE_PARTIAL_LINE;
4354         }
4355
4356         event = ftrace_find_event(entry->type);
4357         if (event) {
4358                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4359                 if (ret != TRACE_TYPE_HANDLED)
4360                         return ret;
4361         }
4362
4363         SEQ_PUT_FIELD(s, newline);
4364
4365         return trace_handle_return(s);
4366 }
4367
4368 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4369 {
4370         struct trace_array *tr = iter->tr;
4371         struct trace_seq *s = &iter->seq;
4372         struct trace_entry *entry;
4373         struct trace_event *event;
4374
4375         entry = iter->ent;
4376
4377         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4378                 SEQ_PUT_FIELD(s, entry->pid);
4379                 SEQ_PUT_FIELD(s, iter->cpu);
4380                 SEQ_PUT_FIELD(s, iter->ts);
4381                 if (trace_seq_has_overflowed(s))
4382                         return TRACE_TYPE_PARTIAL_LINE;
4383         }
4384
4385         event = ftrace_find_event(entry->type);
4386         return event ? event->funcs->binary(iter, 0, event) :
4387                 TRACE_TYPE_HANDLED;
4388 }
4389
4390 int trace_empty(struct trace_iterator *iter)
4391 {
4392         struct ring_buffer_iter *buf_iter;
4393         int cpu;
4394
4395         /* If we are looking at one CPU buffer, only check that one */
4396         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4397                 cpu = iter->cpu_file;
4398                 buf_iter = trace_buffer_iter(iter, cpu);
4399                 if (buf_iter) {
4400                         if (!ring_buffer_iter_empty(buf_iter))
4401                                 return 0;
4402                 } else {
4403                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4404                                 return 0;
4405                 }
4406                 return 1;
4407         }
4408
4409         for_each_tracing_cpu(cpu) {
4410                 buf_iter = trace_buffer_iter(iter, cpu);
4411                 if (buf_iter) {
4412                         if (!ring_buffer_iter_empty(buf_iter))
4413                                 return 0;
4414                 } else {
4415                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4416                                 return 0;
4417                 }
4418         }
4419
4420         return 1;
4421 }
4422
4423 /*  Called with trace_event_read_lock() held. */
4424 enum print_line_t print_trace_line(struct trace_iterator *iter)
4425 {
4426         struct trace_array *tr = iter->tr;
4427         unsigned long trace_flags = tr->trace_flags;
4428         enum print_line_t ret;
4429
4430         if (iter->lost_events) {
4431                 if (iter->lost_events == (unsigned long)-1)
4432                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4433                                          iter->cpu);
4434                 else
4435                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4436                                          iter->cpu, iter->lost_events);
4437                 if (trace_seq_has_overflowed(&iter->seq))
4438                         return TRACE_TYPE_PARTIAL_LINE;
4439         }
4440
4441         if (iter->trace && iter->trace->print_line) {
4442                 ret = iter->trace->print_line(iter);
4443                 if (ret != TRACE_TYPE_UNHANDLED)
4444                         return ret;
4445         }
4446
4447         if (iter->ent->type == TRACE_BPUTS &&
4448                         trace_flags & TRACE_ITER_PRINTK &&
4449                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4450                 return trace_print_bputs_msg_only(iter);
4451
4452         if (iter->ent->type == TRACE_BPRINT &&
4453                         trace_flags & TRACE_ITER_PRINTK &&
4454                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4455                 return trace_print_bprintk_msg_only(iter);
4456
4457         if (iter->ent->type == TRACE_PRINT &&
4458                         trace_flags & TRACE_ITER_PRINTK &&
4459                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4460                 return trace_print_printk_msg_only(iter);
4461
4462         if (trace_flags & TRACE_ITER_BIN)
4463                 return print_bin_fmt(iter);
4464
4465         if (trace_flags & TRACE_ITER_HEX)
4466                 return print_hex_fmt(iter);
4467
4468         if (trace_flags & TRACE_ITER_RAW)
4469                 return print_raw_fmt(iter);
4470
4471         return print_trace_fmt(iter);
4472 }
4473
4474 void trace_latency_header(struct seq_file *m)
4475 {
4476         struct trace_iterator *iter = m->private;
4477         struct trace_array *tr = iter->tr;
4478
4479         /* print nothing if the buffers are empty */
4480         if (trace_empty(iter))
4481                 return;
4482
4483         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4484                 print_trace_header(m, iter);
4485
4486         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4487                 print_lat_help_header(m);
4488 }
4489
4490 void trace_default_header(struct seq_file *m)
4491 {
4492         struct trace_iterator *iter = m->private;
4493         struct trace_array *tr = iter->tr;
4494         unsigned long trace_flags = tr->trace_flags;
4495
4496         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4497                 return;
4498
4499         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4500                 /* print nothing if the buffers are empty */
4501                 if (trace_empty(iter))
4502                         return;
4503                 print_trace_header(m, iter);
4504                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4505                         print_lat_help_header(m);
4506         } else {
4507                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4508                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4509                                 print_func_help_header_irq(iter->array_buffer,
4510                                                            m, trace_flags);
4511                         else
4512                                 print_func_help_header(iter->array_buffer, m,
4513                                                        trace_flags);
4514                 }
4515         }
4516 }
4517
4518 static void test_ftrace_alive(struct seq_file *m)
4519 {
4520         if (!ftrace_is_dead())
4521                 return;
4522         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4523                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4524 }
4525
4526 #ifdef CONFIG_TRACER_MAX_TRACE
4527 static void show_snapshot_main_help(struct seq_file *m)
4528 {
4529         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4530                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4531                     "#                      Takes a snapshot of the main buffer.\n"
4532                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4533                     "#                      (Doesn't have to be '2' works with any number that\n"
4534                     "#                       is not a '0' or '1')\n");
4535 }
4536
4537 static void show_snapshot_percpu_help(struct seq_file *m)
4538 {
4539         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4540 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4541         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4542                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4543 #else
4544         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4545                     "#                     Must use main snapshot file to allocate.\n");
4546 #endif
4547         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4548                     "#                      (Doesn't have to be '2' works with any number that\n"
4549                     "#                       is not a '0' or '1')\n");
4550 }
4551
4552 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4553 {
4554         if (iter->tr->allocated_snapshot)
4555                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4556         else
4557                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4558
4559         seq_puts(m, "# Snapshot commands:\n");
4560         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4561                 show_snapshot_main_help(m);
4562         else
4563                 show_snapshot_percpu_help(m);
4564 }
4565 #else
4566 /* Should never be called */
4567 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4568 #endif
4569
4570 static int s_show(struct seq_file *m, void *v)
4571 {
4572         struct trace_iterator *iter = v;
4573         int ret;
4574
4575         if (iter->ent == NULL) {
4576                 if (iter->tr) {
4577                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4578                         seq_puts(m, "#\n");
4579                         test_ftrace_alive(m);
4580                 }
4581                 if (iter->snapshot && trace_empty(iter))
4582                         print_snapshot_help(m, iter);
4583                 else if (iter->trace && iter->trace->print_header)
4584                         iter->trace->print_header(m);
4585                 else
4586                         trace_default_header(m);
4587
4588         } else if (iter->leftover) {
4589                 /*
4590                  * If we filled the seq_file buffer earlier, we
4591                  * want to just show it now.
4592                  */
4593                 ret = trace_print_seq(m, &iter->seq);
4594
4595                 /* ret should this time be zero, but you never know */
4596                 iter->leftover = ret;
4597
4598         } else {
4599                 ret = print_trace_line(iter);
4600                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4601                         iter->seq.full = 0;
4602                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4603                 }
4604                 ret = trace_print_seq(m, &iter->seq);
4605                 /*
4606                  * If we overflow the seq_file buffer, then it will
4607                  * ask us for this data again at start up.
4608                  * Use that instead.
4609                  *  ret is 0 if seq_file write succeeded.
4610                  *        -1 otherwise.
4611                  */
4612                 iter->leftover = ret;
4613         }
4614
4615         return 0;
4616 }
4617
4618 /*
4619  * Should be used after trace_array_get(), trace_types_lock
4620  * ensures that i_cdev was already initialized.
4621  */
4622 static inline int tracing_get_cpu(struct inode *inode)
4623 {
4624         if (inode->i_cdev) /* See trace_create_cpu_file() */
4625                 return (long)inode->i_cdev - 1;
4626         return RING_BUFFER_ALL_CPUS;
4627 }
4628
4629 static const struct seq_operations tracer_seq_ops = {
4630         .start          = s_start,
4631         .next           = s_next,
4632         .stop           = s_stop,
4633         .show           = s_show,
4634 };
4635
4636 /*
4637  * Note, as iter itself can be allocated and freed in different
4638  * ways, this function is only used to free its content, and not
4639  * the iterator itself. The only requirement to all the allocations
4640  * is that it must zero all fields (kzalloc), as freeing works with
4641  * ethier allocated content or NULL.
4642  */
4643 static void free_trace_iter_content(struct trace_iterator *iter)
4644 {
4645         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4646         if (iter->fmt != static_fmt_buf)
4647                 kfree(iter->fmt);
4648
4649         kfree(iter->temp);
4650         kfree(iter->buffer_iter);
4651         mutex_destroy(&iter->mutex);
4652         free_cpumask_var(iter->started);
4653 }
4654
4655 static struct trace_iterator *
4656 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4657 {
4658         struct trace_array *tr = inode->i_private;
4659         struct trace_iterator *iter;
4660         int cpu;
4661
4662         if (tracing_disabled)
4663                 return ERR_PTR(-ENODEV);
4664
4665         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4666         if (!iter)
4667                 return ERR_PTR(-ENOMEM);
4668
4669         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4670                                     GFP_KERNEL);
4671         if (!iter->buffer_iter)
4672                 goto release;
4673
4674         /*
4675          * trace_find_next_entry() may need to save off iter->ent.
4676          * It will place it into the iter->temp buffer. As most
4677          * events are less than 128, allocate a buffer of that size.
4678          * If one is greater, then trace_find_next_entry() will
4679          * allocate a new buffer to adjust for the bigger iter->ent.
4680          * It's not critical if it fails to get allocated here.
4681          */
4682         iter->temp = kmalloc(128, GFP_KERNEL);
4683         if (iter->temp)
4684                 iter->temp_size = 128;
4685
4686         /*
4687          * trace_event_printf() may need to modify given format
4688          * string to replace %p with %px so that it shows real address
4689          * instead of hash value. However, that is only for the event
4690          * tracing, other tracer may not need. Defer the allocation
4691          * until it is needed.
4692          */
4693         iter->fmt = NULL;
4694         iter->fmt_size = 0;
4695
4696         mutex_lock(&trace_types_lock);
4697         iter->trace = tr->current_trace;
4698
4699         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4700                 goto fail;
4701
4702         iter->tr = tr;
4703
4704 #ifdef CONFIG_TRACER_MAX_TRACE
4705         /* Currently only the top directory has a snapshot */
4706         if (tr->current_trace->print_max || snapshot)
4707                 iter->array_buffer = &tr->max_buffer;
4708         else
4709 #endif
4710                 iter->array_buffer = &tr->array_buffer;
4711         iter->snapshot = snapshot;
4712         iter->pos = -1;
4713         iter->cpu_file = tracing_get_cpu(inode);
4714         mutex_init(&iter->mutex);
4715
4716         /* Notify the tracer early; before we stop tracing. */
4717         if (iter->trace->open)
4718                 iter->trace->open(iter);
4719
4720         /* Annotate start of buffers if we had overruns */
4721         if (ring_buffer_overruns(iter->array_buffer->buffer))
4722                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4723
4724         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4725         if (trace_clocks[tr->clock_id].in_ns)
4726                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4727
4728         /*
4729          * If pause-on-trace is enabled, then stop the trace while
4730          * dumping, unless this is the "snapshot" file
4731          */
4732         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4733                 tracing_stop_tr(tr);
4734
4735         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4736                 for_each_tracing_cpu(cpu) {
4737                         iter->buffer_iter[cpu] =
4738                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4739                                                          cpu, GFP_KERNEL);
4740                 }
4741                 ring_buffer_read_prepare_sync();
4742                 for_each_tracing_cpu(cpu) {
4743                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4744                         tracing_iter_reset(iter, cpu);
4745                 }
4746         } else {
4747                 cpu = iter->cpu_file;
4748                 iter->buffer_iter[cpu] =
4749                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4750                                                  cpu, GFP_KERNEL);
4751                 ring_buffer_read_prepare_sync();
4752                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4753                 tracing_iter_reset(iter, cpu);
4754         }
4755
4756         mutex_unlock(&trace_types_lock);
4757
4758         return iter;
4759
4760  fail:
4761         mutex_unlock(&trace_types_lock);
4762         free_trace_iter_content(iter);
4763 release:
4764         seq_release_private(inode, file);
4765         return ERR_PTR(-ENOMEM);
4766 }
4767
4768 int tracing_open_generic(struct inode *inode, struct file *filp)
4769 {
4770         int ret;
4771
4772         ret = tracing_check_open_get_tr(NULL);
4773         if (ret)
4774                 return ret;
4775
4776         filp->private_data = inode->i_private;
4777         return 0;
4778 }
4779
4780 bool tracing_is_disabled(void)
4781 {
4782         return (tracing_disabled) ? true: false;
4783 }
4784
4785 /*
4786  * Open and update trace_array ref count.
4787  * Must have the current trace_array passed to it.
4788  */
4789 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4790 {
4791         struct trace_array *tr = inode->i_private;
4792         int ret;
4793
4794         ret = tracing_check_open_get_tr(tr);
4795         if (ret)
4796                 return ret;
4797
4798         filp->private_data = inode->i_private;
4799
4800         return 0;
4801 }
4802
4803 /*
4804  * The private pointer of the inode is the trace_event_file.
4805  * Update the tr ref count associated to it.
4806  */
4807 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4808 {
4809         struct trace_event_file *file = inode->i_private;
4810         int ret;
4811
4812         ret = tracing_check_open_get_tr(file->tr);
4813         if (ret)
4814                 return ret;
4815
4816         mutex_lock(&event_mutex);
4817
4818         /* Fail if the file is marked for removal */
4819         if (file->flags & EVENT_FILE_FL_FREED) {
4820                 trace_array_put(file->tr);
4821                 ret = -ENODEV;
4822         } else {
4823                 event_file_get(file);
4824         }
4825
4826         mutex_unlock(&event_mutex);
4827         if (ret)
4828                 return ret;
4829
4830         filp->private_data = inode->i_private;
4831
4832         return 0;
4833 }
4834
4835 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4836 {
4837         struct trace_event_file *file = inode->i_private;
4838
4839         trace_array_put(file->tr);
4840         event_file_put(file);
4841
4842         return 0;
4843 }
4844
4845 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4846 {
4847         tracing_release_file_tr(inode, filp);
4848         return single_release(inode, filp);
4849 }
4850
4851 static int tracing_mark_open(struct inode *inode, struct file *filp)
4852 {
4853         stream_open(inode, filp);
4854         return tracing_open_generic_tr(inode, filp);
4855 }
4856
4857 static int tracing_release(struct inode *inode, struct file *file)
4858 {
4859         struct trace_array *tr = inode->i_private;
4860         struct seq_file *m = file->private_data;
4861         struct trace_iterator *iter;
4862         int cpu;
4863
4864         if (!(file->f_mode & FMODE_READ)) {
4865                 trace_array_put(tr);
4866                 return 0;
4867         }
4868
4869         /* Writes do not use seq_file */
4870         iter = m->private;
4871         mutex_lock(&trace_types_lock);
4872
4873         for_each_tracing_cpu(cpu) {
4874                 if (iter->buffer_iter[cpu])
4875                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
4876         }
4877
4878         if (iter->trace && iter->trace->close)
4879                 iter->trace->close(iter);
4880
4881         if (!iter->snapshot && tr->stop_count)
4882                 /* reenable tracing if it was previously enabled */
4883                 tracing_start_tr(tr);
4884
4885         __trace_array_put(tr);
4886
4887         mutex_unlock(&trace_types_lock);
4888
4889         free_trace_iter_content(iter);
4890         seq_release_private(inode, file);
4891
4892         return 0;
4893 }
4894
4895 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4896 {
4897         struct trace_array *tr = inode->i_private;
4898
4899         trace_array_put(tr);
4900         return 0;
4901 }
4902
4903 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4904 {
4905         struct trace_array *tr = inode->i_private;
4906
4907         trace_array_put(tr);
4908
4909         return single_release(inode, file);
4910 }
4911
4912 static int tracing_open(struct inode *inode, struct file *file)
4913 {
4914         struct trace_array *tr = inode->i_private;
4915         struct trace_iterator *iter;
4916         int ret;
4917
4918         ret = tracing_check_open_get_tr(tr);
4919         if (ret)
4920                 return ret;
4921
4922         /* If this file was open for write, then erase contents */
4923         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4924                 int cpu = tracing_get_cpu(inode);
4925                 struct array_buffer *trace_buf = &tr->array_buffer;
4926
4927 #ifdef CONFIG_TRACER_MAX_TRACE
4928                 if (tr->current_trace->print_max)
4929                         trace_buf = &tr->max_buffer;
4930 #endif
4931
4932                 if (cpu == RING_BUFFER_ALL_CPUS)
4933                         tracing_reset_online_cpus(trace_buf);
4934                 else
4935                         tracing_reset_cpu(trace_buf, cpu);
4936         }
4937
4938         if (file->f_mode & FMODE_READ) {
4939                 iter = __tracing_open(inode, file, false);
4940                 if (IS_ERR(iter))
4941                         ret = PTR_ERR(iter);
4942                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4943                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
4944         }
4945
4946         if (ret < 0)
4947                 trace_array_put(tr);
4948
4949         return ret;
4950 }
4951
4952 /*
4953  * Some tracers are not suitable for instance buffers.
4954  * A tracer is always available for the global array (toplevel)
4955  * or if it explicitly states that it is.
4956  */
4957 static bool
4958 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4959 {
4960 #ifdef CONFIG_TRACER_SNAPSHOT
4961         /* arrays with mapped buffer range do not have snapshots */
4962         if (tr->range_addr_start && t->use_max_tr)
4963                 return false;
4964 #endif
4965         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4966 }
4967
4968 /* Find the next tracer that this trace array may use */
4969 static struct tracer *
4970 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4971 {
4972         while (t && !trace_ok_for_array(t, tr))
4973                 t = t->next;
4974
4975         return t;
4976 }
4977
4978 static void *
4979 t_next(struct seq_file *m, void *v, loff_t *pos)
4980 {
4981         struct trace_array *tr = m->private;
4982         struct tracer *t = v;
4983
4984         (*pos)++;
4985
4986         if (t)
4987                 t = get_tracer_for_array(tr, t->next);
4988
4989         return t;
4990 }
4991
4992 static void *t_start(struct seq_file *m, loff_t *pos)
4993 {
4994         struct trace_array *tr = m->private;
4995         struct tracer *t;
4996         loff_t l = 0;
4997
4998         mutex_lock(&trace_types_lock);
4999
5000         t = get_tracer_for_array(tr, trace_types);
5001         for (; t && l < *pos; t = t_next(m, t, &l))
5002                         ;
5003
5004         return t;
5005 }
5006
5007 static void t_stop(struct seq_file *m, void *p)
5008 {
5009         mutex_unlock(&trace_types_lock);
5010 }
5011
5012 static int t_show(struct seq_file *m, void *v)
5013 {
5014         struct tracer *t = v;
5015
5016         if (!t)
5017                 return 0;
5018
5019         seq_puts(m, t->name);
5020         if (t->next)
5021                 seq_putc(m, ' ');
5022         else
5023                 seq_putc(m, '\n');
5024
5025         return 0;
5026 }
5027
5028 static const struct seq_operations show_traces_seq_ops = {
5029         .start          = t_start,
5030         .next           = t_next,
5031         .stop           = t_stop,
5032         .show           = t_show,
5033 };
5034
5035 static int show_traces_open(struct inode *inode, struct file *file)
5036 {
5037         struct trace_array *tr = inode->i_private;
5038         struct seq_file *m;
5039         int ret;
5040
5041         ret = tracing_check_open_get_tr(tr);
5042         if (ret)
5043                 return ret;
5044
5045         ret = seq_open(file, &show_traces_seq_ops);
5046         if (ret) {
5047                 trace_array_put(tr);
5048                 return ret;
5049         }
5050
5051         m = file->private_data;
5052         m->private = tr;
5053
5054         return 0;
5055 }
5056
5057 static int tracing_seq_release(struct inode *inode, struct file *file)
5058 {
5059         struct trace_array *tr = inode->i_private;
5060
5061         trace_array_put(tr);
5062         return seq_release(inode, file);
5063 }
5064
5065 static ssize_t
5066 tracing_write_stub(struct file *filp, const char __user *ubuf,
5067                    size_t count, loff_t *ppos)
5068 {
5069         return count;
5070 }
5071
5072 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5073 {
5074         int ret;
5075
5076         if (file->f_mode & FMODE_READ)
5077                 ret = seq_lseek(file, offset, whence);
5078         else
5079                 file->f_pos = ret = 0;
5080
5081         return ret;
5082 }
5083
5084 static const struct file_operations tracing_fops = {
5085         .open           = tracing_open,
5086         .read           = seq_read,
5087         .read_iter      = seq_read_iter,
5088         .splice_read    = copy_splice_read,
5089         .write          = tracing_write_stub,
5090         .llseek         = tracing_lseek,
5091         .release        = tracing_release,
5092 };
5093
5094 static const struct file_operations show_traces_fops = {
5095         .open           = show_traces_open,
5096         .read           = seq_read,
5097         .llseek         = seq_lseek,
5098         .release        = tracing_seq_release,
5099 };
5100
5101 static ssize_t
5102 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5103                      size_t count, loff_t *ppos)
5104 {
5105         struct trace_array *tr = file_inode(filp)->i_private;
5106         char *mask_str;
5107         int len;
5108
5109         len = snprintf(NULL, 0, "%*pb\n",
5110                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5111         mask_str = kmalloc(len, GFP_KERNEL);
5112         if (!mask_str)
5113                 return -ENOMEM;
5114
5115         len = snprintf(mask_str, len, "%*pb\n",
5116                        cpumask_pr_args(tr->tracing_cpumask));
5117         if (len >= count) {
5118                 count = -EINVAL;
5119                 goto out_err;
5120         }
5121         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5122
5123 out_err:
5124         kfree(mask_str);
5125
5126         return count;
5127 }
5128
5129 int tracing_set_cpumask(struct trace_array *tr,
5130                         cpumask_var_t tracing_cpumask_new)
5131 {
5132         int cpu;
5133
5134         if (!tr)
5135                 return -EINVAL;
5136
5137         local_irq_disable();
5138         arch_spin_lock(&tr->max_lock);
5139         for_each_tracing_cpu(cpu) {
5140                 /*
5141                  * Increase/decrease the disabled counter if we are
5142                  * about to flip a bit in the cpumask:
5143                  */
5144                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5145                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5146                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5147 #ifdef CONFIG_TRACER_MAX_TRACE
5148                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5149 #endif
5150                 }
5151                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5152                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5153                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5154 #ifdef CONFIG_TRACER_MAX_TRACE
5155                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5156 #endif
5157                 }
5158         }
5159         arch_spin_unlock(&tr->max_lock);
5160         local_irq_enable();
5161
5162         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5163
5164         return 0;
5165 }
5166
5167 static ssize_t
5168 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5169                       size_t count, loff_t *ppos)
5170 {
5171         struct trace_array *tr = file_inode(filp)->i_private;
5172         cpumask_var_t tracing_cpumask_new;
5173         int err;
5174
5175         if (count == 0 || count > KMALLOC_MAX_SIZE)
5176                 return -EINVAL;
5177
5178         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5179                 return -ENOMEM;
5180
5181         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5182         if (err)
5183                 goto err_free;
5184
5185         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5186         if (err)
5187                 goto err_free;
5188
5189         free_cpumask_var(tracing_cpumask_new);
5190
5191         return count;
5192
5193 err_free:
5194         free_cpumask_var(tracing_cpumask_new);
5195
5196         return err;
5197 }
5198
5199 static const struct file_operations tracing_cpumask_fops = {
5200         .open           = tracing_open_generic_tr,
5201         .read           = tracing_cpumask_read,
5202         .write          = tracing_cpumask_write,
5203         .release        = tracing_release_generic_tr,
5204         .llseek         = generic_file_llseek,
5205 };
5206
5207 static int tracing_trace_options_show(struct seq_file *m, void *v)
5208 {
5209         struct tracer_opt *trace_opts;
5210         struct trace_array *tr = m->private;
5211         u32 tracer_flags;
5212         int i;
5213
5214         guard(mutex)(&trace_types_lock);
5215
5216         tracer_flags = tr->current_trace->flags->val;
5217         trace_opts = tr->current_trace->flags->opts;
5218
5219         for (i = 0; trace_options[i]; i++) {
5220                 if (tr->trace_flags & (1 << i))
5221                         seq_printf(m, "%s\n", trace_options[i]);
5222                 else
5223                         seq_printf(m, "no%s\n", trace_options[i]);
5224         }
5225
5226         for (i = 0; trace_opts[i].name; i++) {
5227                 if (tracer_flags & trace_opts[i].bit)
5228                         seq_printf(m, "%s\n", trace_opts[i].name);
5229                 else
5230                         seq_printf(m, "no%s\n", trace_opts[i].name);
5231         }
5232
5233         return 0;
5234 }
5235
5236 static int __set_tracer_option(struct trace_array *tr,
5237                                struct tracer_flags *tracer_flags,
5238                                struct tracer_opt *opts, int neg)
5239 {
5240         struct tracer *trace = tracer_flags->trace;
5241         int ret;
5242
5243         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5244         if (ret)
5245                 return ret;
5246
5247         if (neg)
5248                 tracer_flags->val &= ~opts->bit;
5249         else
5250                 tracer_flags->val |= opts->bit;
5251         return 0;
5252 }
5253
5254 /* Try to assign a tracer specific option */
5255 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5256 {
5257         struct tracer *trace = tr->current_trace;
5258         struct tracer_flags *tracer_flags = trace->flags;
5259         struct tracer_opt *opts = NULL;
5260         int i;
5261
5262         for (i = 0; tracer_flags->opts[i].name; i++) {
5263                 opts = &tracer_flags->opts[i];
5264
5265                 if (strcmp(cmp, opts->name) == 0)
5266                         return __set_tracer_option(tr, trace->flags, opts, neg);
5267         }
5268
5269         return -EINVAL;
5270 }
5271
5272 /* Some tracers require overwrite to stay enabled */
5273 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5274 {
5275         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5276                 return -1;
5277
5278         return 0;
5279 }
5280
5281 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5282 {
5283         if ((mask == TRACE_ITER_RECORD_TGID) ||
5284             (mask == TRACE_ITER_RECORD_CMD) ||
5285             (mask == TRACE_ITER_TRACE_PRINTK) ||
5286             (mask == TRACE_ITER_COPY_MARKER))
5287                 lockdep_assert_held(&event_mutex);
5288
5289         /* do nothing if flag is already set */
5290         if (!!(tr->trace_flags & mask) == !!enabled)
5291                 return 0;
5292
5293         /* Give the tracer a chance to approve the change */
5294         if (tr->current_trace->flag_changed)
5295                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5296                         return -EINVAL;
5297
5298         if (mask == TRACE_ITER_TRACE_PRINTK) {
5299                 if (enabled) {
5300                         update_printk_trace(tr);
5301                 } else {
5302                         /*
5303                          * The global_trace cannot clear this.
5304                          * It's flag only gets cleared if another instance sets it.
5305                          */
5306                         if (printk_trace == &global_trace)
5307                                 return -EINVAL;
5308                         /*
5309                          * An instance must always have it set.
5310                          * by default, that's the global_trace instane.
5311                          */
5312                         if (printk_trace == tr)
5313                                 update_printk_trace(&global_trace);
5314                 }
5315         }
5316
5317         if (mask == TRACE_ITER_COPY_MARKER)
5318                 update_marker_trace(tr, enabled);
5319
5320         if (enabled)
5321                 tr->trace_flags |= mask;
5322         else
5323                 tr->trace_flags &= ~mask;
5324
5325         if (mask == TRACE_ITER_RECORD_CMD)
5326                 trace_event_enable_cmd_record(enabled);
5327
5328         if (mask == TRACE_ITER_RECORD_TGID) {
5329
5330                 if (trace_alloc_tgid_map() < 0) {
5331                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5332                         return -ENOMEM;
5333                 }
5334
5335                 trace_event_enable_tgid_record(enabled);
5336         }
5337
5338         if (mask == TRACE_ITER_EVENT_FORK)
5339                 trace_event_follow_fork(tr, enabled);
5340
5341         if (mask == TRACE_ITER_FUNC_FORK)
5342                 ftrace_pid_follow_fork(tr, enabled);
5343
5344         if (mask == TRACE_ITER_OVERWRITE) {
5345                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5346 #ifdef CONFIG_TRACER_MAX_TRACE
5347                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5348 #endif
5349         }
5350
5351         if (mask == TRACE_ITER_PRINTK) {
5352                 trace_printk_start_stop_comm(enabled);
5353                 trace_printk_control(enabled);
5354         }
5355
5356         return 0;
5357 }
5358
5359 int trace_set_options(struct trace_array *tr, char *option)
5360 {
5361         char *cmp;
5362         int neg = 0;
5363         int ret;
5364         size_t orig_len = strlen(option);
5365         int len;
5366
5367         cmp = strstrip(option);
5368
5369         len = str_has_prefix(cmp, "no");
5370         if (len)
5371                 neg = 1;
5372
5373         cmp += len;
5374
5375         mutex_lock(&event_mutex);
5376         mutex_lock(&trace_types_lock);
5377
5378         ret = match_string(trace_options, -1, cmp);
5379         /* If no option could be set, test the specific tracer options */
5380         if (ret < 0)
5381                 ret = set_tracer_option(tr, cmp, neg);
5382         else
5383                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5384
5385         mutex_unlock(&trace_types_lock);
5386         mutex_unlock(&event_mutex);
5387
5388         /*
5389          * If the first trailing whitespace is replaced with '\0' by strstrip,
5390          * turn it back into a space.
5391          */
5392         if (orig_len > strlen(option))
5393                 option[strlen(option)] = ' ';
5394
5395         return ret;
5396 }
5397
5398 static void __init apply_trace_boot_options(void)
5399 {
5400         char *buf = trace_boot_options_buf;
5401         char *option;
5402
5403         while (true) {
5404                 option = strsep(&buf, ",");
5405
5406                 if (!option)
5407                         break;
5408
5409                 if (*option)
5410                         trace_set_options(&global_trace, option);
5411
5412                 /* Put back the comma to allow this to be called again */
5413                 if (buf)
5414                         *(buf - 1) = ',';
5415         }
5416 }
5417
5418 static ssize_t
5419 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5420                         size_t cnt, loff_t *ppos)
5421 {
5422         struct seq_file *m = filp->private_data;
5423         struct trace_array *tr = m->private;
5424         char buf[64];
5425         int ret;
5426
5427         if (cnt >= sizeof(buf))
5428                 return -EINVAL;
5429
5430         if (copy_from_user(buf, ubuf, cnt))
5431                 return -EFAULT;
5432
5433         buf[cnt] = 0;
5434
5435         ret = trace_set_options(tr, buf);
5436         if (ret < 0)
5437                 return ret;
5438
5439         *ppos += cnt;
5440
5441         return cnt;
5442 }
5443
5444 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5445 {
5446         struct trace_array *tr = inode->i_private;
5447         int ret;
5448
5449         ret = tracing_check_open_get_tr(tr);
5450         if (ret)
5451                 return ret;
5452
5453         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5454         if (ret < 0)
5455                 trace_array_put(tr);
5456
5457         return ret;
5458 }
5459
5460 static const struct file_operations tracing_iter_fops = {
5461         .open           = tracing_trace_options_open,
5462         .read           = seq_read,
5463         .llseek         = seq_lseek,
5464         .release        = tracing_single_release_tr,
5465         .write          = tracing_trace_options_write,
5466 };
5467
5468 static const char readme_msg[] =
5469         "tracing mini-HOWTO:\n\n"
5470         "By default tracefs removes all OTH file permission bits.\n"
5471         "When mounting tracefs an optional group id can be specified\n"
5472         "which adds the group to every directory and file in tracefs:\n\n"
5473         "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5474         "# echo 0 > tracing_on : quick way to disable tracing\n"
5475         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5476         " Important files:\n"
5477         "  trace\t\t\t- The static contents of the buffer\n"
5478         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5479         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5480         "  current_tracer\t- function and latency tracers\n"
5481         "  available_tracers\t- list of configured tracers for current_tracer\n"
5482         "  error_log\t- error log for failed commands (that support it)\n"
5483         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5484         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5485         "  trace_clock\t\t- change the clock used to order events\n"
5486         "       local:   Per cpu clock but may not be synced across CPUs\n"
5487         "      global:   Synced across CPUs but slows tracing down.\n"
5488         "     counter:   Not a clock, but just an increment\n"
5489         "      uptime:   Jiffy counter from time of boot\n"
5490         "        perf:   Same clock that perf events use\n"
5491 #ifdef CONFIG_X86_64
5492         "     x86-tsc:   TSC cycle counter\n"
5493 #endif
5494         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5495         "       delta:   Delta difference against a buffer-wide timestamp\n"
5496         "    absolute:   Absolute (standalone) timestamp\n"
5497         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5498         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5499         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5500         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5501         "\t\t\t  Remove sub-buffer with rmdir\n"
5502         "  trace_options\t\t- Set format or modify how tracing happens\n"
5503         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5504         "\t\t\t  option name\n"
5505         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5506 #ifdef CONFIG_DYNAMIC_FTRACE
5507         "\n  available_filter_functions - list of functions that can be filtered on\n"
5508         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5509         "\t\t\t  functions\n"
5510         "\t     accepts: func_full_name or glob-matching-pattern\n"
5511         "\t     modules: Can select a group via module\n"
5512         "\t      Format: :mod:<module-name>\n"
5513         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5514         "\t    triggers: a command to perform when function is hit\n"
5515         "\t      Format: <function>:<trigger>[:count]\n"
5516         "\t     trigger: traceon, traceoff\n"
5517         "\t\t      enable_event:<system>:<event>\n"
5518         "\t\t      disable_event:<system>:<event>\n"
5519 #ifdef CONFIG_STACKTRACE
5520         "\t\t      stacktrace\n"
5521 #endif
5522 #ifdef CONFIG_TRACER_SNAPSHOT
5523         "\t\t      snapshot\n"
5524 #endif
5525         "\t\t      dump\n"
5526         "\t\t      cpudump\n"
5527         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5528         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5529         "\t     The first one will disable tracing every time do_fault is hit\n"
5530         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5531         "\t       The first time do trap is hit and it disables tracing, the\n"
5532         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5533         "\t       the counter will not decrement. It only decrements when the\n"
5534         "\t       trigger did work\n"
5535         "\t     To remove trigger without count:\n"
5536         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5537         "\t     To remove trigger with a count:\n"
5538         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5539         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5540         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5541         "\t    modules: Can select a group via module command :mod:\n"
5542         "\t    Does not accept triggers\n"
5543 #endif /* CONFIG_DYNAMIC_FTRACE */
5544 #ifdef CONFIG_FUNCTION_TRACER
5545         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5546         "\t\t    (function)\n"
5547         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5548         "\t\t    (function)\n"
5549 #endif
5550 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5551         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5552         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5553         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5554 #endif
5555 #ifdef CONFIG_TRACER_SNAPSHOT
5556         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5557         "\t\t\t  snapshot buffer. Read the contents for more\n"
5558         "\t\t\t  information\n"
5559 #endif
5560 #ifdef CONFIG_STACK_TRACER
5561         "  stack_trace\t\t- Shows the max stack trace when active\n"
5562         "  stack_max_size\t- Shows current max stack size that was traced\n"
5563         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5564         "\t\t\t  new trace)\n"
5565 #ifdef CONFIG_DYNAMIC_FTRACE
5566         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5567         "\t\t\t  traces\n"
5568 #endif
5569 #endif /* CONFIG_STACK_TRACER */
5570 #ifdef CONFIG_DYNAMIC_EVENTS
5571         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5572         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5573 #endif
5574 #ifdef CONFIG_KPROBE_EVENTS
5575         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5576         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5577 #endif
5578 #ifdef CONFIG_UPROBE_EVENTS
5579         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5580         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5581 #endif
5582 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5583     defined(CONFIG_FPROBE_EVENTS)
5584         "\t  accepts: event-definitions (one definition per line)\n"
5585 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5586         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5587         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5588 #endif
5589 #ifdef CONFIG_FPROBE_EVENTS
5590         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5591         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5592 #endif
5593 #ifdef CONFIG_HIST_TRIGGERS
5594         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5595 #endif
5596         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5597         "\t           -:[<group>/][<event>]\n"
5598 #ifdef CONFIG_KPROBE_EVENTS
5599         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5600   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5601 #endif
5602 #ifdef CONFIG_UPROBE_EVENTS
5603   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5604 #endif
5605         "\t     args: <name>=fetcharg[:type]\n"
5606         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5607 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5608         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5609 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5610         "\t           <argname>[->field[->field|.field...]],\n"
5611 #endif
5612 #else
5613         "\t           $stack<index>, $stack, $retval, $comm,\n"
5614 #endif
5615         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5616         "\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5617         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5618         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5619         "\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5620 #ifdef CONFIG_HIST_TRIGGERS
5621         "\t    field: <stype> <name>;\n"
5622         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5623         "\t           [unsigned] char/int/long\n"
5624 #endif
5625         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5626         "\t            of the <attached-group>/<attached-event>.\n"
5627 #endif
5628         "  set_event\t\t- Enables events by name written into it\n"
5629         "\t\t\t  Can enable module events via: :mod:<module>\n"
5630         "  events/\t\t- Directory containing all trace event subsystems:\n"
5631         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5632         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5633         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5634         "\t\t\t  events\n"
5635         "      filter\t\t- If set, only events passing filter are traced\n"
5636         "  events/<system>/<event>/\t- Directory containing control files for\n"
5637         "\t\t\t  <event>:\n"
5638         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5639         "      filter\t\t- If set, only events passing filter are traced\n"
5640         "      trigger\t\t- If set, a command to perform when event is hit\n"
5641         "\t    Format: <trigger>[:count][if <filter>]\n"
5642         "\t   trigger: traceon, traceoff\n"
5643         "\t            enable_event:<system>:<event>\n"
5644         "\t            disable_event:<system>:<event>\n"
5645 #ifdef CONFIG_HIST_TRIGGERS
5646         "\t            enable_hist:<system>:<event>\n"
5647         "\t            disable_hist:<system>:<event>\n"
5648 #endif
5649 #ifdef CONFIG_STACKTRACE
5650         "\t\t    stacktrace\n"
5651 #endif
5652 #ifdef CONFIG_TRACER_SNAPSHOT
5653         "\t\t    snapshot\n"
5654 #endif
5655 #ifdef CONFIG_HIST_TRIGGERS
5656         "\t\t    hist (see below)\n"
5657 #endif
5658         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5659         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5660         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5661         "\t                  events/block/block_unplug/trigger\n"
5662         "\t   The first disables tracing every time block_unplug is hit.\n"
5663         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5664         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5665         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5666         "\t   Like function triggers, the counter is only decremented if it\n"
5667         "\t    enabled or disabled tracing.\n"
5668         "\t   To remove a trigger without a count:\n"
5669         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5670         "\t   To remove a trigger with a count:\n"
5671         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5672         "\t   Filters can be ignored when removing a trigger.\n"
5673 #ifdef CONFIG_HIST_TRIGGERS
5674         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5675         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5676         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5677         "\t            [:values=<field1[,field2,...]>]\n"
5678         "\t            [:sort=<field1[,field2,...]>]\n"
5679         "\t            [:size=#entries]\n"
5680         "\t            [:pause][:continue][:clear]\n"
5681         "\t            [:name=histname1]\n"
5682         "\t            [:nohitcount]\n"
5683         "\t            [:<handler>.<action>]\n"
5684         "\t            [if <filter>]\n\n"
5685         "\t    Note, special fields can be used as well:\n"
5686         "\t            common_timestamp - to record current timestamp\n"
5687         "\t            common_cpu - to record the CPU the event happened on\n"
5688         "\n"
5689         "\t    A hist trigger variable can be:\n"
5690         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5691         "\t        - a reference to another variable e.g. y=$x,\n"
5692         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5693         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5694         "\n"
5695         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5696         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5697         "\t    variable reference, field or numeric literal.\n"
5698         "\n"
5699         "\t    When a matching event is hit, an entry is added to a hash\n"
5700         "\t    table using the key(s) and value(s) named, and the value of a\n"
5701         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5702         "\t    correspond to fields in the event's format description.  Keys\n"
5703         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5704         "\t    Compound keys consisting of up to two fields can be specified\n"
5705         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5706         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5707         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5708         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5709         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5710         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5711         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5712         "\t    its histogram data will be shared with other triggers of the\n"
5713         "\t    same name, and trigger hits will update this common data.\n\n"
5714         "\t    Reading the 'hist' file for the event will dump the hash\n"
5715         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5716         "\t    triggers attached to an event, there will be a table for each\n"
5717         "\t    trigger in the output.  The table displayed for a named\n"
5718         "\t    trigger will be the same as any other instance having the\n"
5719         "\t    same name.  The default format used to display a given field\n"
5720         "\t    can be modified by appending any of the following modifiers\n"
5721         "\t    to the field name, as applicable:\n\n"
5722         "\t            .hex        display a number as a hex value\n"
5723         "\t            .sym        display an address as a symbol\n"
5724         "\t            .sym-offset display an address as a symbol and offset\n"
5725         "\t            .execname   display a common_pid as a program name\n"
5726         "\t            .syscall    display a syscall id as a syscall name\n"
5727         "\t            .log2       display log2 value rather than raw number\n"
5728         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5729         "\t            .usecs      display a common_timestamp in microseconds\n"
5730         "\t            .percent    display a number of percentage value\n"
5731         "\t            .graph      display a bar-graph of a value\n\n"
5732         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5733         "\t    trigger or to start a hist trigger but not log any events\n"
5734         "\t    until told to do so.  'continue' can be used to start or\n"
5735         "\t    restart a paused hist trigger.\n\n"
5736         "\t    The 'clear' parameter will clear the contents of a running\n"
5737         "\t    hist trigger and leave its current paused/active state\n"
5738         "\t    unchanged.\n\n"
5739         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5740         "\t    raw hitcount in the histogram.\n\n"
5741         "\t    The enable_hist and disable_hist triggers can be used to\n"
5742         "\t    have one event conditionally start and stop another event's\n"
5743         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5744         "\t    the enable_event and disable_event triggers.\n\n"
5745         "\t    Hist trigger handlers and actions are executed whenever a\n"
5746         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5747         "\t        <handler>.<action>\n\n"
5748         "\t    The available handlers are:\n\n"
5749         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5750         "\t        onmax(var)               - invoke if var exceeds current max\n"
5751         "\t        onchange(var)            - invoke action if var changes\n\n"
5752         "\t    The available actions are:\n\n"
5753         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5754         "\t        save(field,...)                      - save current event fields\n"
5755 #ifdef CONFIG_TRACER_SNAPSHOT
5756         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5757 #endif
5758 #ifdef CONFIG_SYNTH_EVENTS
5759         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5760         "\t  Write into this file to define/undefine new synthetic events.\n"
5761         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5762 #endif
5763 #endif
5764 ;
5765
5766 static ssize_t
5767 tracing_readme_read(struct file *filp, char __user *ubuf,
5768                        size_t cnt, loff_t *ppos)
5769 {
5770         return simple_read_from_buffer(ubuf, cnt, ppos,
5771                                         readme_msg, strlen(readme_msg));
5772 }
5773
5774 static const struct file_operations tracing_readme_fops = {
5775         .open           = tracing_open_generic,
5776         .read           = tracing_readme_read,
5777         .llseek         = generic_file_llseek,
5778 };
5779
5780 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5781 static union trace_eval_map_item *
5782 update_eval_map(union trace_eval_map_item *ptr)
5783 {
5784         if (!ptr->map.eval_string) {
5785                 if (ptr->tail.next) {
5786                         ptr = ptr->tail.next;
5787                         /* Set ptr to the next real item (skip head) */
5788                         ptr++;
5789                 } else
5790                         return NULL;
5791         }
5792         return ptr;
5793 }
5794
5795 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5796 {
5797         union trace_eval_map_item *ptr = v;
5798
5799         /*
5800          * Paranoid! If ptr points to end, we don't want to increment past it.
5801          * This really should never happen.
5802          */
5803         (*pos)++;
5804         ptr = update_eval_map(ptr);
5805         if (WARN_ON_ONCE(!ptr))
5806                 return NULL;
5807
5808         ptr++;
5809         ptr = update_eval_map(ptr);
5810
5811         return ptr;
5812 }
5813
5814 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5815 {
5816         union trace_eval_map_item *v;
5817         loff_t l = 0;
5818
5819         mutex_lock(&trace_eval_mutex);
5820
5821         v = trace_eval_maps;
5822         if (v)
5823                 v++;
5824
5825         while (v && l < *pos) {
5826                 v = eval_map_next(m, v, &l);
5827         }
5828
5829         return v;
5830 }
5831
5832 static void eval_map_stop(struct seq_file *m, void *v)
5833 {
5834         mutex_unlock(&trace_eval_mutex);
5835 }
5836
5837 static int eval_map_show(struct seq_file *m, void *v)
5838 {
5839         union trace_eval_map_item *ptr = v;
5840
5841         seq_printf(m, "%s %ld (%s)\n",
5842                    ptr->map.eval_string, ptr->map.eval_value,
5843                    ptr->map.system);
5844
5845         return 0;
5846 }
5847
5848 static const struct seq_operations tracing_eval_map_seq_ops = {
5849         .start          = eval_map_start,
5850         .next           = eval_map_next,
5851         .stop           = eval_map_stop,
5852         .show           = eval_map_show,
5853 };
5854
5855 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5856 {
5857         int ret;
5858
5859         ret = tracing_check_open_get_tr(NULL);
5860         if (ret)
5861                 return ret;
5862
5863         return seq_open(filp, &tracing_eval_map_seq_ops);
5864 }
5865
5866 static const struct file_operations tracing_eval_map_fops = {
5867         .open           = tracing_eval_map_open,
5868         .read           = seq_read,
5869         .llseek         = seq_lseek,
5870         .release        = seq_release,
5871 };
5872
5873 static inline union trace_eval_map_item *
5874 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5875 {
5876         /* Return tail of array given the head */
5877         return ptr + ptr->head.length + 1;
5878 }
5879
5880 static void
5881 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5882                            int len)
5883 {
5884         struct trace_eval_map **stop;
5885         struct trace_eval_map **map;
5886         union trace_eval_map_item *map_array;
5887         union trace_eval_map_item *ptr;
5888
5889         stop = start + len;
5890
5891         /*
5892          * The trace_eval_maps contains the map plus a head and tail item,
5893          * where the head holds the module and length of array, and the
5894          * tail holds a pointer to the next list.
5895          */
5896         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5897         if (!map_array) {
5898                 pr_warn("Unable to allocate trace eval mapping\n");
5899                 return;
5900         }
5901
5902         guard(mutex)(&trace_eval_mutex);
5903
5904         if (!trace_eval_maps)
5905                 trace_eval_maps = map_array;
5906         else {
5907                 ptr = trace_eval_maps;
5908                 for (;;) {
5909                         ptr = trace_eval_jmp_to_tail(ptr);
5910                         if (!ptr->tail.next)
5911                                 break;
5912                         ptr = ptr->tail.next;
5913
5914                 }
5915                 ptr->tail.next = map_array;
5916         }
5917         map_array->head.mod = mod;
5918         map_array->head.length = len;
5919         map_array++;
5920
5921         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5922                 map_array->map = **map;
5923                 map_array++;
5924         }
5925         memset(map_array, 0, sizeof(*map_array));
5926 }
5927
5928 static void trace_create_eval_file(struct dentry *d_tracer)
5929 {
5930         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5931                           NULL, &tracing_eval_map_fops);
5932 }
5933
5934 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5935 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5936 static inline void trace_insert_eval_map_file(struct module *mod,
5937                               struct trace_eval_map **start, int len) { }
5938 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5939
5940 static void trace_insert_eval_map(struct module *mod,
5941                                   struct trace_eval_map **start, int len)
5942 {
5943         struct trace_eval_map **map;
5944
5945         if (len <= 0)
5946                 return;
5947
5948         map = start;
5949
5950         trace_event_eval_update(map, len);
5951
5952         trace_insert_eval_map_file(mod, start, len);
5953 }
5954
5955 static ssize_t
5956 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5957                        size_t cnt, loff_t *ppos)
5958 {
5959         struct trace_array *tr = filp->private_data;
5960         char buf[MAX_TRACER_SIZE+2];
5961         int r;
5962
5963         mutex_lock(&trace_types_lock);
5964         r = sprintf(buf, "%s\n", tr->current_trace->name);
5965         mutex_unlock(&trace_types_lock);
5966
5967         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5968 }
5969
5970 int tracer_init(struct tracer *t, struct trace_array *tr)
5971 {
5972         tracing_reset_online_cpus(&tr->array_buffer);
5973         return t->init(tr);
5974 }
5975
5976 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5977 {
5978         int cpu;
5979
5980         for_each_tracing_cpu(cpu)
5981                 per_cpu_ptr(buf->data, cpu)->entries = val;
5982 }
5983
5984 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5985 {
5986         if (cpu == RING_BUFFER_ALL_CPUS) {
5987                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5988         } else {
5989                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5990         }
5991 }
5992
5993 #ifdef CONFIG_TRACER_MAX_TRACE
5994 /* resize @tr's buffer to the size of @size_tr's entries */
5995 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5996                                         struct array_buffer *size_buf, int cpu_id)
5997 {
5998         int cpu, ret = 0;
5999
6000         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6001                 for_each_tracing_cpu(cpu) {
6002                         ret = ring_buffer_resize(trace_buf->buffer,
6003                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6004                         if (ret < 0)
6005                                 break;
6006                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6007                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6008                 }
6009         } else {
6010                 ret = ring_buffer_resize(trace_buf->buffer,
6011                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6012                 if (ret == 0)
6013                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6014                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6015         }
6016
6017         return ret;
6018 }
6019 #endif /* CONFIG_TRACER_MAX_TRACE */
6020
6021 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6022                                         unsigned long size, int cpu)
6023 {
6024         int ret;
6025
6026         /*
6027          * If kernel or user changes the size of the ring buffer
6028          * we use the size that was given, and we can forget about
6029          * expanding it later.
6030          */
6031         trace_set_ring_buffer_expanded(tr);
6032
6033         /* May be called before buffers are initialized */
6034         if (!tr->array_buffer.buffer)
6035                 return 0;
6036
6037         /* Do not allow tracing while resizing ring buffer */
6038         tracing_stop_tr(tr);
6039
6040         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6041         if (ret < 0)
6042                 goto out_start;
6043
6044 #ifdef CONFIG_TRACER_MAX_TRACE
6045         if (!tr->allocated_snapshot)
6046                 goto out;
6047
6048         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6049         if (ret < 0) {
6050                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6051                                                      &tr->array_buffer, cpu);
6052                 if (r < 0) {
6053                         /*
6054                          * AARGH! We are left with different
6055                          * size max buffer!!!!
6056                          * The max buffer is our "snapshot" buffer.
6057                          * When a tracer needs a snapshot (one of the
6058                          * latency tracers), it swaps the max buffer
6059                          * with the saved snap shot. We succeeded to
6060                          * update the size of the main buffer, but failed to
6061                          * update the size of the max buffer. But when we tried
6062                          * to reset the main buffer to the original size, we
6063                          * failed there too. This is very unlikely to
6064                          * happen, but if it does, warn and kill all
6065                          * tracing.
6066                          */
6067                         WARN_ON(1);
6068                         tracing_disabled = 1;
6069                 }
6070                 goto out_start;
6071         }
6072
6073         update_buffer_entries(&tr->max_buffer, cpu);
6074
6075  out:
6076 #endif /* CONFIG_TRACER_MAX_TRACE */
6077
6078         update_buffer_entries(&tr->array_buffer, cpu);
6079  out_start:
6080         tracing_start_tr(tr);
6081         return ret;
6082 }
6083
6084 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6085                                   unsigned long size, int cpu_id)
6086 {
6087         guard(mutex)(&trace_types_lock);
6088
6089         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6090                 /* make sure, this cpu is enabled in the mask */
6091                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
6092                         return -EINVAL;
6093         }
6094
6095         return __tracing_resize_ring_buffer(tr, size, cpu_id);
6096 }
6097
6098 struct trace_mod_entry {
6099         unsigned long   mod_addr;
6100         char            mod_name[MODULE_NAME_LEN];
6101 };
6102
6103 struct trace_scratch {
6104         unsigned int            clock_id;
6105         unsigned long           text_addr;
6106         unsigned long           nr_entries;
6107         struct trace_mod_entry  entries[];
6108 };
6109
6110 static DEFINE_MUTEX(scratch_mutex);
6111
6112 static int cmp_mod_entry(const void *key, const void *pivot)
6113 {
6114         unsigned long addr = (unsigned long)key;
6115         const struct trace_mod_entry *ent = pivot;
6116
6117         if (addr >= ent[0].mod_addr && addr < ent[1].mod_addr)
6118                 return 0;
6119         else
6120                 return addr - ent->mod_addr;
6121 }
6122
6123 /**
6124  * trace_adjust_address() - Adjust prev boot address to current address.
6125  * @tr: Persistent ring buffer's trace_array.
6126  * @addr: Address in @tr which is adjusted.
6127  */
6128 unsigned long trace_adjust_address(struct trace_array *tr, unsigned long addr)
6129 {
6130         struct trace_module_delta *module_delta;
6131         struct trace_scratch *tscratch;
6132         struct trace_mod_entry *entry;
6133         unsigned long raddr;
6134         int idx = 0, nr_entries;
6135
6136         /* If we don't have last boot delta, return the address */
6137         if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6138                 return addr;
6139
6140         /* tr->module_delta must be protected by rcu. */
6141         guard(rcu)();
6142         tscratch = tr->scratch;
6143         /* if there is no tscrach, module_delta must be NULL. */
6144         module_delta = READ_ONCE(tr->module_delta);
6145         if (!module_delta || !tscratch->nr_entries ||
6146             tscratch->entries[0].mod_addr > addr) {
6147                 raddr = addr + tr->text_delta;
6148                 return __is_kernel(raddr) || is_kernel_core_data(raddr) ||
6149                         is_kernel_rodata(raddr) ? raddr : addr;
6150         }
6151
6152         /* Note that entries must be sorted. */
6153         nr_entries = tscratch->nr_entries;
6154         if (nr_entries == 1 ||
6155             tscratch->entries[nr_entries - 1].mod_addr < addr)
6156                 idx = nr_entries - 1;
6157         else {
6158                 entry = __inline_bsearch((void *)addr,
6159                                 tscratch->entries,
6160                                 nr_entries - 1,
6161                                 sizeof(tscratch->entries[0]),
6162                                 cmp_mod_entry);
6163                 if (entry)
6164                         idx = entry - tscratch->entries;
6165         }
6166
6167         return addr + module_delta->delta[idx];
6168 }
6169
6170 #ifdef CONFIG_MODULES
6171 static int save_mod(struct module *mod, void *data)
6172 {
6173         struct trace_array *tr = data;
6174         struct trace_scratch *tscratch;
6175         struct trace_mod_entry *entry;
6176         unsigned int size;
6177
6178         tscratch = tr->scratch;
6179         if (!tscratch)
6180                 return -1;
6181         size = tr->scratch_size;
6182
6183         if (struct_size(tscratch, entries, tscratch->nr_entries + 1) > size)
6184                 return -1;
6185
6186         entry = &tscratch->entries[tscratch->nr_entries];
6187
6188         tscratch->nr_entries++;
6189
6190         entry->mod_addr = (unsigned long)mod->mem[MOD_TEXT].base;
6191         strscpy(entry->mod_name, mod->name);
6192
6193         return 0;
6194 }
6195 #else
6196 static int save_mod(struct module *mod, void *data)
6197 {
6198         return 0;
6199 }
6200 #endif
6201
6202 static void update_last_data(struct trace_array *tr)
6203 {
6204         struct trace_module_delta *module_delta;
6205         struct trace_scratch *tscratch;
6206
6207         if (!(tr->flags & TRACE_ARRAY_FL_BOOT))
6208                 return;
6209
6210         if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6211                 return;
6212
6213         /* Only if the buffer has previous boot data clear and update it. */
6214         tr->flags &= ~TRACE_ARRAY_FL_LAST_BOOT;
6215
6216         /* Reset the module list and reload them */
6217         if (tr->scratch) {
6218                 struct trace_scratch *tscratch = tr->scratch;
6219
6220                 tscratch->clock_id = tr->clock_id;
6221                 memset(tscratch->entries, 0,
6222                        flex_array_size(tscratch, entries, tscratch->nr_entries));
6223                 tscratch->nr_entries = 0;
6224
6225                 guard(mutex)(&scratch_mutex);
6226                 module_for_each_mod(save_mod, tr);
6227         }
6228
6229         /*
6230          * Need to clear all CPU buffers as there cannot be events
6231          * from the previous boot mixed with events with this boot
6232          * as that will cause a confusing trace. Need to clear all
6233          * CPU buffers, even for those that may currently be offline.
6234          */
6235         tracing_reset_all_cpus(&tr->array_buffer);
6236
6237         /* Using current data now */
6238         tr->text_delta = 0;
6239
6240         if (!tr->scratch)
6241                 return;
6242
6243         tscratch = tr->scratch;
6244         module_delta = READ_ONCE(tr->module_delta);
6245         WRITE_ONCE(tr->module_delta, NULL);
6246         kfree_rcu(module_delta, rcu);
6247
6248         /* Set the persistent ring buffer meta data to this address */
6249         tscratch->text_addr = (unsigned long)_text;
6250 }
6251
6252 /**
6253  * tracing_update_buffers - used by tracing facility to expand ring buffers
6254  * @tr: The tracing instance
6255  *
6256  * To save on memory when the tracing is never used on a system with it
6257  * configured in. The ring buffers are set to a minimum size. But once
6258  * a user starts to use the tracing facility, then they need to grow
6259  * to their default size.
6260  *
6261  * This function is to be called when a tracer is about to be used.
6262  */
6263 int tracing_update_buffers(struct trace_array *tr)
6264 {
6265         int ret = 0;
6266
6267         mutex_lock(&trace_types_lock);
6268
6269         update_last_data(tr);
6270
6271         if (!tr->ring_buffer_expanded)
6272                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6273                                                 RING_BUFFER_ALL_CPUS);
6274         mutex_unlock(&trace_types_lock);
6275
6276         return ret;
6277 }
6278
6279 struct trace_option_dentry;
6280
6281 static void
6282 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6283
6284 /*
6285  * Used to clear out the tracer before deletion of an instance.
6286  * Must have trace_types_lock held.
6287  */
6288 static void tracing_set_nop(struct trace_array *tr)
6289 {
6290         if (tr->current_trace == &nop_trace)
6291                 return;
6292
6293         tr->current_trace->enabled--;
6294
6295         if (tr->current_trace->reset)
6296                 tr->current_trace->reset(tr);
6297
6298         tr->current_trace = &nop_trace;
6299 }
6300
6301 static bool tracer_options_updated;
6302
6303 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6304 {
6305         /* Only enable if the directory has been created already. */
6306         if (!tr->dir)
6307                 return;
6308
6309         /* Only create trace option files after update_tracer_options finish */
6310         if (!tracer_options_updated)
6311                 return;
6312
6313         create_trace_option_files(tr, t);
6314 }
6315
6316 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6317 {
6318         struct tracer *t;
6319 #ifdef CONFIG_TRACER_MAX_TRACE
6320         bool had_max_tr;
6321 #endif
6322         int ret;
6323
6324         guard(mutex)(&trace_types_lock);
6325
6326         update_last_data(tr);
6327
6328         if (!tr->ring_buffer_expanded) {
6329                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6330                                                 RING_BUFFER_ALL_CPUS);
6331                 if (ret < 0)
6332                         return ret;
6333                 ret = 0;
6334         }
6335
6336         for (t = trace_types; t; t = t->next) {
6337                 if (strcmp(t->name, buf) == 0)
6338                         break;
6339         }
6340         if (!t)
6341                 return -EINVAL;
6342
6343         if (t == tr->current_trace)
6344                 return 0;
6345
6346 #ifdef CONFIG_TRACER_SNAPSHOT
6347         if (t->use_max_tr) {
6348                 local_irq_disable();
6349                 arch_spin_lock(&tr->max_lock);
6350                 ret = tr->cond_snapshot ? -EBUSY : 0;
6351                 arch_spin_unlock(&tr->max_lock);
6352                 local_irq_enable();
6353                 if (ret)
6354                         return ret;
6355         }
6356 #endif
6357         /* Some tracers won't work on kernel command line */
6358         if (system_state < SYSTEM_RUNNING && t->noboot) {
6359                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6360                         t->name);
6361                 return -EINVAL;
6362         }
6363
6364         /* Some tracers are only allowed for the top level buffer */
6365         if (!trace_ok_for_array(t, tr))
6366                 return -EINVAL;
6367
6368         /* If trace pipe files are being read, we can't change the tracer */
6369         if (tr->trace_ref)
6370                 return -EBUSY;
6371
6372         trace_branch_disable();
6373
6374         tr->current_trace->enabled--;
6375
6376         if (tr->current_trace->reset)
6377                 tr->current_trace->reset(tr);
6378
6379 #ifdef CONFIG_TRACER_MAX_TRACE
6380         had_max_tr = tr->current_trace->use_max_tr;
6381
6382         /* Current trace needs to be nop_trace before synchronize_rcu */
6383         tr->current_trace = &nop_trace;
6384
6385         if (had_max_tr && !t->use_max_tr) {
6386                 /*
6387                  * We need to make sure that the update_max_tr sees that
6388                  * current_trace changed to nop_trace to keep it from
6389                  * swapping the buffers after we resize it.
6390                  * The update_max_tr is called from interrupts disabled
6391                  * so a synchronized_sched() is sufficient.
6392                  */
6393                 synchronize_rcu();
6394                 free_snapshot(tr);
6395                 tracing_disarm_snapshot(tr);
6396         }
6397
6398         if (!had_max_tr && t->use_max_tr) {
6399                 ret = tracing_arm_snapshot_locked(tr);
6400                 if (ret)
6401                         return ret;
6402         }
6403 #else
6404         tr->current_trace = &nop_trace;
6405 #endif
6406
6407         if (t->init) {
6408                 ret = tracer_init(t, tr);
6409                 if (ret) {
6410 #ifdef CONFIG_TRACER_MAX_TRACE
6411                         if (t->use_max_tr)
6412                                 tracing_disarm_snapshot(tr);
6413 #endif
6414                         return ret;
6415                 }
6416         }
6417
6418         tr->current_trace = t;
6419         tr->current_trace->enabled++;
6420         trace_branch_enable(tr);
6421
6422         return 0;
6423 }
6424
6425 static ssize_t
6426 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6427                         size_t cnt, loff_t *ppos)
6428 {
6429         struct trace_array *tr = filp->private_data;
6430         char buf[MAX_TRACER_SIZE+1];
6431         char *name;
6432         size_t ret;
6433         int err;
6434
6435         ret = cnt;
6436
6437         if (cnt > MAX_TRACER_SIZE)
6438                 cnt = MAX_TRACER_SIZE;
6439
6440         if (copy_from_user(buf, ubuf, cnt))
6441                 return -EFAULT;
6442
6443         buf[cnt] = 0;
6444
6445         name = strim(buf);
6446
6447         err = tracing_set_tracer(tr, name);
6448         if (err)
6449                 return err;
6450
6451         *ppos += ret;
6452
6453         return ret;
6454 }
6455
6456 static ssize_t
6457 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6458                    size_t cnt, loff_t *ppos)
6459 {
6460         char buf[64];
6461         int r;
6462
6463         r = snprintf(buf, sizeof(buf), "%ld\n",
6464                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6465         if (r > sizeof(buf))
6466                 r = sizeof(buf);
6467         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6468 }
6469
6470 static ssize_t
6471 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6472                     size_t cnt, loff_t *ppos)
6473 {
6474         unsigned long val;
6475         int ret;
6476
6477         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6478         if (ret)
6479                 return ret;
6480
6481         *ptr = val * 1000;
6482
6483         return cnt;
6484 }
6485
6486 static ssize_t
6487 tracing_thresh_read(struct file *filp, char __user *ubuf,
6488                     size_t cnt, loff_t *ppos)
6489 {
6490         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6491 }
6492
6493 static ssize_t
6494 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6495                      size_t cnt, loff_t *ppos)
6496 {
6497         struct trace_array *tr = filp->private_data;
6498         int ret;
6499
6500         guard(mutex)(&trace_types_lock);
6501         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6502         if (ret < 0)
6503                 return ret;
6504
6505         if (tr->current_trace->update_thresh) {
6506                 ret = tr->current_trace->update_thresh(tr);
6507                 if (ret < 0)
6508                         return ret;
6509         }
6510
6511         return cnt;
6512 }
6513
6514 #ifdef CONFIG_TRACER_MAX_TRACE
6515
6516 static ssize_t
6517 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6518                      size_t cnt, loff_t *ppos)
6519 {
6520         struct trace_array *tr = filp->private_data;
6521
6522         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6523 }
6524
6525 static ssize_t
6526 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6527                       size_t cnt, loff_t *ppos)
6528 {
6529         struct trace_array *tr = filp->private_data;
6530
6531         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6532 }
6533
6534 #endif
6535
6536 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6537 {
6538         if (cpu == RING_BUFFER_ALL_CPUS) {
6539                 if (cpumask_empty(tr->pipe_cpumask)) {
6540                         cpumask_setall(tr->pipe_cpumask);
6541                         return 0;
6542                 }
6543         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6544                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6545                 return 0;
6546         }
6547         return -EBUSY;
6548 }
6549
6550 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6551 {
6552         if (cpu == RING_BUFFER_ALL_CPUS) {
6553                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6554                 cpumask_clear(tr->pipe_cpumask);
6555         } else {
6556                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6557                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6558         }
6559 }
6560
6561 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6562 {
6563         struct trace_array *tr = inode->i_private;
6564         struct trace_iterator *iter;
6565         int cpu;
6566         int ret;
6567
6568         ret = tracing_check_open_get_tr(tr);
6569         if (ret)
6570                 return ret;
6571
6572         mutex_lock(&trace_types_lock);
6573         cpu = tracing_get_cpu(inode);
6574         ret = open_pipe_on_cpu(tr, cpu);
6575         if (ret)
6576                 goto fail_pipe_on_cpu;
6577
6578         /* create a buffer to store the information to pass to userspace */
6579         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6580         if (!iter) {
6581                 ret = -ENOMEM;
6582                 goto fail_alloc_iter;
6583         }
6584
6585         trace_seq_init(&iter->seq);
6586         iter->trace = tr->current_trace;
6587
6588         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6589                 ret = -ENOMEM;
6590                 goto fail;
6591         }
6592
6593         /* trace pipe does not show start of buffer */
6594         cpumask_setall(iter->started);
6595
6596         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6597                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6598
6599         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6600         if (trace_clocks[tr->clock_id].in_ns)
6601                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6602
6603         iter->tr = tr;
6604         iter->array_buffer = &tr->array_buffer;
6605         iter->cpu_file = cpu;
6606         mutex_init(&iter->mutex);
6607         filp->private_data = iter;
6608
6609         if (iter->trace->pipe_open)
6610                 iter->trace->pipe_open(iter);
6611
6612         nonseekable_open(inode, filp);
6613
6614         tr->trace_ref++;
6615
6616         mutex_unlock(&trace_types_lock);
6617         return ret;
6618
6619 fail:
6620         kfree(iter);
6621 fail_alloc_iter:
6622         close_pipe_on_cpu(tr, cpu);
6623 fail_pipe_on_cpu:
6624         __trace_array_put(tr);
6625         mutex_unlock(&trace_types_lock);
6626         return ret;
6627 }
6628
6629 static int tracing_release_pipe(struct inode *inode, struct file *file)
6630 {
6631         struct trace_iterator *iter = file->private_data;
6632         struct trace_array *tr = inode->i_private;
6633
6634         mutex_lock(&trace_types_lock);
6635
6636         tr->trace_ref--;
6637
6638         if (iter->trace->pipe_close)
6639                 iter->trace->pipe_close(iter);
6640         close_pipe_on_cpu(tr, iter->cpu_file);
6641         mutex_unlock(&trace_types_lock);
6642
6643         free_trace_iter_content(iter);
6644         kfree(iter);
6645
6646         trace_array_put(tr);
6647
6648         return 0;
6649 }
6650
6651 static __poll_t
6652 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6653 {
6654         struct trace_array *tr = iter->tr;
6655
6656         /* Iterators are static, they should be filled or empty */
6657         if (trace_buffer_iter(iter, iter->cpu_file))
6658                 return EPOLLIN | EPOLLRDNORM;
6659
6660         if (tr->trace_flags & TRACE_ITER_BLOCK)
6661                 /*
6662                  * Always select as readable when in blocking mode
6663                  */
6664                 return EPOLLIN | EPOLLRDNORM;
6665         else
6666                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6667                                              filp, poll_table, iter->tr->buffer_percent);
6668 }
6669
6670 static __poll_t
6671 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6672 {
6673         struct trace_iterator *iter = filp->private_data;
6674
6675         return trace_poll(iter, filp, poll_table);
6676 }
6677
6678 /* Must be called with iter->mutex held. */
6679 static int tracing_wait_pipe(struct file *filp)
6680 {
6681         struct trace_iterator *iter = filp->private_data;
6682         int ret;
6683
6684         while (trace_empty(iter)) {
6685
6686                 if ((filp->f_flags & O_NONBLOCK)) {
6687                         return -EAGAIN;
6688                 }
6689
6690                 /*
6691                  * We block until we read something and tracing is disabled.
6692                  * We still block if tracing is disabled, but we have never
6693                  * read anything. This allows a user to cat this file, and
6694                  * then enable tracing. But after we have read something,
6695                  * we give an EOF when tracing is again disabled.
6696                  *
6697                  * iter->pos will be 0 if we haven't read anything.
6698                  */
6699                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6700                         break;
6701
6702                 mutex_unlock(&iter->mutex);
6703
6704                 ret = wait_on_pipe(iter, 0);
6705
6706                 mutex_lock(&iter->mutex);
6707
6708                 if (ret)
6709                         return ret;
6710         }
6711
6712         return 1;
6713 }
6714
6715 static bool update_last_data_if_empty(struct trace_array *tr)
6716 {
6717         if (!(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
6718                 return false;
6719
6720         if (!ring_buffer_empty(tr->array_buffer.buffer))
6721                 return false;
6722
6723         /*
6724          * If the buffer contains the last boot data and all per-cpu
6725          * buffers are empty, reset it from the kernel side.
6726          */
6727         update_last_data(tr);
6728         return true;
6729 }
6730
6731 /*
6732  * Consumer reader.
6733  */
6734 static ssize_t
6735 tracing_read_pipe(struct file *filp, char __user *ubuf,
6736                   size_t cnt, loff_t *ppos)
6737 {
6738         struct trace_iterator *iter = filp->private_data;
6739         ssize_t sret;
6740
6741         /*
6742          * Avoid more than one consumer on a single file descriptor
6743          * This is just a matter of traces coherency, the ring buffer itself
6744          * is protected.
6745          */
6746         guard(mutex)(&iter->mutex);
6747
6748         /* return any leftover data */
6749         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6750         if (sret != -EBUSY)
6751                 return sret;
6752
6753         trace_seq_init(&iter->seq);
6754
6755         if (iter->trace->read) {
6756                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6757                 if (sret)
6758                         return sret;
6759         }
6760
6761 waitagain:
6762         if (update_last_data_if_empty(iter->tr))
6763                 return 0;
6764
6765         sret = tracing_wait_pipe(filp);
6766         if (sret <= 0)
6767                 return sret;
6768
6769         /* stop when tracing is finished */
6770         if (trace_empty(iter))
6771                 return 0;
6772
6773         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6774                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6775
6776         /* reset all but tr, trace, and overruns */
6777         trace_iterator_reset(iter);
6778         cpumask_clear(iter->started);
6779         trace_seq_init(&iter->seq);
6780
6781         trace_event_read_lock();
6782         trace_access_lock(iter->cpu_file);
6783         while (trace_find_next_entry_inc(iter) != NULL) {
6784                 enum print_line_t ret;
6785                 int save_len = iter->seq.seq.len;
6786
6787                 ret = print_trace_line(iter);
6788                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6789                         /*
6790                          * If one print_trace_line() fills entire trace_seq in one shot,
6791                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6792                          * In this case, we need to consume it, otherwise, loop will peek
6793                          * this event next time, resulting in an infinite loop.
6794                          */
6795                         if (save_len == 0) {
6796                                 iter->seq.full = 0;
6797                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6798                                 trace_consume(iter);
6799                                 break;
6800                         }
6801
6802                         /* In other cases, don't print partial lines */
6803                         iter->seq.seq.len = save_len;
6804                         break;
6805                 }
6806                 if (ret != TRACE_TYPE_NO_CONSUME)
6807                         trace_consume(iter);
6808
6809                 if (trace_seq_used(&iter->seq) >= cnt)
6810                         break;
6811
6812                 /*
6813                  * Setting the full flag means we reached the trace_seq buffer
6814                  * size and we should leave by partial output condition above.
6815                  * One of the trace_seq_* functions is not used properly.
6816                  */
6817                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6818                           iter->ent->type);
6819         }
6820         trace_access_unlock(iter->cpu_file);
6821         trace_event_read_unlock();
6822
6823         /* Now copy what we have to the user */
6824         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6825         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6826                 trace_seq_init(&iter->seq);
6827
6828         /*
6829          * If there was nothing to send to user, in spite of consuming trace
6830          * entries, go back to wait for more entries.
6831          */
6832         if (sret == -EBUSY)
6833                 goto waitagain;
6834
6835         return sret;
6836 }
6837
6838 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6839                                      unsigned int idx)
6840 {
6841         __free_page(spd->pages[idx]);
6842 }
6843
6844 static size_t
6845 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6846 {
6847         size_t count;
6848         int save_len;
6849         int ret;
6850
6851         /* Seq buffer is page-sized, exactly what we need. */
6852         for (;;) {
6853                 save_len = iter->seq.seq.len;
6854                 ret = print_trace_line(iter);
6855
6856                 if (trace_seq_has_overflowed(&iter->seq)) {
6857                         iter->seq.seq.len = save_len;
6858                         break;
6859                 }
6860
6861                 /*
6862                  * This should not be hit, because it should only
6863                  * be set if the iter->seq overflowed. But check it
6864                  * anyway to be safe.
6865                  */
6866                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6867                         iter->seq.seq.len = save_len;
6868                         break;
6869                 }
6870
6871                 count = trace_seq_used(&iter->seq) - save_len;
6872                 if (rem < count) {
6873                         rem = 0;
6874                         iter->seq.seq.len = save_len;
6875                         break;
6876                 }
6877
6878                 if (ret != TRACE_TYPE_NO_CONSUME)
6879                         trace_consume(iter);
6880                 rem -= count;
6881                 if (!trace_find_next_entry_inc(iter))   {
6882                         rem = 0;
6883                         iter->ent = NULL;
6884                         break;
6885                 }
6886         }
6887
6888         return rem;
6889 }
6890
6891 static ssize_t tracing_splice_read_pipe(struct file *filp,
6892                                         loff_t *ppos,
6893                                         struct pipe_inode_info *pipe,
6894                                         size_t len,
6895                                         unsigned int flags)
6896 {
6897         struct page *pages_def[PIPE_DEF_BUFFERS];
6898         struct partial_page partial_def[PIPE_DEF_BUFFERS];
6899         struct trace_iterator *iter = filp->private_data;
6900         struct splice_pipe_desc spd = {
6901                 .pages          = pages_def,
6902                 .partial        = partial_def,
6903                 .nr_pages       = 0, /* This gets updated below. */
6904                 .nr_pages_max   = PIPE_DEF_BUFFERS,
6905                 .ops            = &default_pipe_buf_ops,
6906                 .spd_release    = tracing_spd_release_pipe,
6907         };
6908         ssize_t ret;
6909         size_t rem;
6910         unsigned int i;
6911
6912         if (splice_grow_spd(pipe, &spd))
6913                 return -ENOMEM;
6914
6915         mutex_lock(&iter->mutex);
6916
6917         if (iter->trace->splice_read) {
6918                 ret = iter->trace->splice_read(iter, filp,
6919                                                ppos, pipe, len, flags);
6920                 if (ret)
6921                         goto out_err;
6922         }
6923
6924         ret = tracing_wait_pipe(filp);
6925         if (ret <= 0)
6926                 goto out_err;
6927
6928         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6929                 ret = -EFAULT;
6930                 goto out_err;
6931         }
6932
6933         trace_event_read_lock();
6934         trace_access_lock(iter->cpu_file);
6935
6936         /* Fill as many pages as possible. */
6937         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6938                 spd.pages[i] = alloc_page(GFP_KERNEL);
6939                 if (!spd.pages[i])
6940                         break;
6941
6942                 rem = tracing_fill_pipe_page(rem, iter);
6943
6944                 /* Copy the data into the page, so we can start over. */
6945                 ret = trace_seq_to_buffer(&iter->seq,
6946                                           page_address(spd.pages[i]),
6947                                           min((size_t)trace_seq_used(&iter->seq),
6948                                                   (size_t)PAGE_SIZE));
6949                 if (ret < 0) {
6950                         __free_page(spd.pages[i]);
6951                         break;
6952                 }
6953                 spd.partial[i].offset = 0;
6954                 spd.partial[i].len = ret;
6955
6956                 trace_seq_init(&iter->seq);
6957         }
6958
6959         trace_access_unlock(iter->cpu_file);
6960         trace_event_read_unlock();
6961         mutex_unlock(&iter->mutex);
6962
6963         spd.nr_pages = i;
6964
6965         if (i)
6966                 ret = splice_to_pipe(pipe, &spd);
6967         else
6968                 ret = 0;
6969 out:
6970         splice_shrink_spd(&spd);
6971         return ret;
6972
6973 out_err:
6974         mutex_unlock(&iter->mutex);
6975         goto out;
6976 }
6977
6978 static ssize_t
6979 tracing_entries_read(struct file *filp, char __user *ubuf,
6980                      size_t cnt, loff_t *ppos)
6981 {
6982         struct inode *inode = file_inode(filp);
6983         struct trace_array *tr = inode->i_private;
6984         int cpu = tracing_get_cpu(inode);
6985         char buf[64];
6986         int r = 0;
6987         ssize_t ret;
6988
6989         mutex_lock(&trace_types_lock);
6990
6991         if (cpu == RING_BUFFER_ALL_CPUS) {
6992                 int cpu, buf_size_same;
6993                 unsigned long size;
6994
6995                 size = 0;
6996                 buf_size_same = 1;
6997                 /* check if all cpu sizes are same */
6998                 for_each_tracing_cpu(cpu) {
6999                         /* fill in the size from first enabled cpu */
7000                         if (size == 0)
7001                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7002                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7003                                 buf_size_same = 0;
7004                                 break;
7005                         }
7006                 }
7007
7008                 if (buf_size_same) {
7009                         if (!tr->ring_buffer_expanded)
7010                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7011                                             size >> 10,
7012                                             trace_buf_size >> 10);
7013                         else
7014                                 r = sprintf(buf, "%lu\n", size >> 10);
7015                 } else
7016                         r = sprintf(buf, "X\n");
7017         } else
7018                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7019
7020         mutex_unlock(&trace_types_lock);
7021
7022         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7023         return ret;
7024 }
7025
7026 static ssize_t
7027 tracing_entries_write(struct file *filp, const char __user *ubuf,
7028                       size_t cnt, loff_t *ppos)
7029 {
7030         struct inode *inode = file_inode(filp);
7031         struct trace_array *tr = inode->i_private;
7032         unsigned long val;
7033         int ret;
7034
7035         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7036         if (ret)
7037                 return ret;
7038
7039         /* must have at least 1 entry */
7040         if (!val)
7041                 return -EINVAL;
7042
7043         /* value is in KB */
7044         val <<= 10;
7045         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7046         if (ret < 0)
7047                 return ret;
7048
7049         *ppos += cnt;
7050
7051         return cnt;
7052 }
7053
7054 static ssize_t
7055 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7056                                 size_t cnt, loff_t *ppos)
7057 {
7058         struct trace_array *tr = filp->private_data;
7059         char buf[64];
7060         int r, cpu;
7061         unsigned long size = 0, expanded_size = 0;
7062
7063         mutex_lock(&trace_types_lock);
7064         for_each_tracing_cpu(cpu) {
7065                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7066                 if (!tr->ring_buffer_expanded)
7067                         expanded_size += trace_buf_size >> 10;
7068         }
7069         if (tr->ring_buffer_expanded)
7070                 r = sprintf(buf, "%lu\n", size);
7071         else
7072                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7073         mutex_unlock(&trace_types_lock);
7074
7075         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7076 }
7077
7078 #define LAST_BOOT_HEADER ((void *)1)
7079
7080 static void *l_next(struct seq_file *m, void *v, loff_t *pos)
7081 {
7082         struct trace_array *tr = m->private;
7083         struct trace_scratch *tscratch = tr->scratch;
7084         unsigned int index = *pos;
7085
7086         (*pos)++;
7087
7088         if (*pos == 1)
7089                 return LAST_BOOT_HEADER;
7090
7091         /* Only show offsets of the last boot data */
7092         if (!tscratch || !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7093                 return NULL;
7094
7095         /* *pos 0 is for the header, 1 is for the first module */
7096         index--;
7097
7098         if (index >= tscratch->nr_entries)
7099                 return NULL;
7100
7101         return &tscratch->entries[index];
7102 }
7103
7104 static void *l_start(struct seq_file *m, loff_t *pos)
7105 {
7106         mutex_lock(&scratch_mutex);
7107
7108         return l_next(m, NULL, pos);
7109 }
7110
7111 static void l_stop(struct seq_file *m, void *p)
7112 {
7113         mutex_unlock(&scratch_mutex);
7114 }
7115
7116 static void show_last_boot_header(struct seq_file *m, struct trace_array *tr)
7117 {
7118         struct trace_scratch *tscratch = tr->scratch;
7119
7120         /*
7121          * Do not leak KASLR address. This only shows the KASLR address of
7122          * the last boot. When the ring buffer is started, the LAST_BOOT
7123          * flag gets cleared, and this should only report "current".
7124          * Otherwise it shows the KASLR address from the previous boot which
7125          * should not be the same as the current boot.
7126          */
7127         if (tscratch && (tr->flags & TRACE_ARRAY_FL_LAST_BOOT))
7128                 seq_printf(m, "%lx\t[kernel]\n", tscratch->text_addr);
7129         else
7130                 seq_puts(m, "# Current\n");
7131 }
7132
7133 static int l_show(struct seq_file *m, void *v)
7134 {
7135         struct trace_array *tr = m->private;
7136         struct trace_mod_entry *entry = v;
7137
7138         if (v == LAST_BOOT_HEADER) {
7139                 show_last_boot_header(m, tr);
7140                 return 0;
7141         }
7142
7143         seq_printf(m, "%lx\t%s\n", entry->mod_addr, entry->mod_name);
7144         return 0;
7145 }
7146
7147 static const struct seq_operations last_boot_seq_ops = {
7148         .start          = l_start,
7149         .next           = l_next,
7150         .stop           = l_stop,
7151         .show           = l_show,
7152 };
7153
7154 static int tracing_last_boot_open(struct inode *inode, struct file *file)
7155 {
7156         struct trace_array *tr = inode->i_private;
7157         struct seq_file *m;
7158         int ret;
7159
7160         ret = tracing_check_open_get_tr(tr);
7161         if (ret)
7162                 return ret;
7163
7164         ret = seq_open(file, &last_boot_seq_ops);
7165         if (ret) {
7166                 trace_array_put(tr);
7167                 return ret;
7168         }
7169
7170         m = file->private_data;
7171         m->private = tr;
7172
7173         return 0;
7174 }
7175
7176 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7177 {
7178         struct trace_array *tr = inode->i_private;
7179         int cpu = tracing_get_cpu(inode);
7180         int ret;
7181
7182         ret = tracing_check_open_get_tr(tr);
7183         if (ret)
7184                 return ret;
7185
7186         ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7187         if (ret < 0)
7188                 __trace_array_put(tr);
7189         return ret;
7190 }
7191
7192 static ssize_t
7193 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7194                           size_t cnt, loff_t *ppos)
7195 {
7196         /*
7197          * There is no need to read what the user has written, this function
7198          * is just to make sure that there is no error when "echo" is used
7199          */
7200
7201         *ppos += cnt;
7202
7203         return cnt;
7204 }
7205
7206 static int
7207 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7208 {
7209         struct trace_array *tr = inode->i_private;
7210
7211         /* disable tracing ? */
7212         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7213                 tracer_tracing_off(tr);
7214         /* resize the ring buffer to 0 */
7215         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7216
7217         trace_array_put(tr);
7218
7219         return 0;
7220 }
7221
7222 #define TRACE_MARKER_MAX_SIZE           4096
7223
7224 static ssize_t write_marker_to_buffer(struct trace_array *tr, const char __user *ubuf,
7225                                       size_t cnt, unsigned long ip)
7226 {
7227         struct ring_buffer_event *event;
7228         enum event_trigger_type tt = ETT_NONE;
7229         struct trace_buffer *buffer;
7230         struct print_entry *entry;
7231         int meta_size;
7232         ssize_t written;
7233         size_t size;
7234         int len;
7235
7236 /* Used in tracing_mark_raw_write() as well */
7237 #define FAULTED_STR "<faulted>"
7238 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7239
7240         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7241  again:
7242         size = cnt + meta_size;
7243
7244         /* If less than "<faulted>", then make sure we can still add that */
7245         if (cnt < FAULTED_SIZE)
7246                 size += FAULTED_SIZE - cnt;
7247
7248         buffer = tr->array_buffer.buffer;
7249         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7250                                             tracing_gen_ctx());
7251         if (unlikely(!event)) {
7252                 /*
7253                  * If the size was greater than what was allowed, then
7254                  * make it smaller and try again.
7255                  */
7256                 if (size > ring_buffer_max_event_size(buffer)) {
7257                         /* cnt < FAULTED size should never be bigger than max */
7258                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7259                                 return -EBADF;
7260                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
7261                         /* The above should only happen once */
7262                         if (WARN_ON_ONCE(cnt + meta_size == size))
7263                                 return -EBADF;
7264                         goto again;
7265                 }
7266
7267                 /* Ring buffer disabled, return as if not open for write */
7268                 return -EBADF;
7269         }
7270
7271         entry = ring_buffer_event_data(event);
7272         entry->ip = ip;
7273
7274         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7275         if (len) {
7276                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7277                 cnt = FAULTED_SIZE;
7278                 written = -EFAULT;
7279         } else
7280                 written = cnt;
7281
7282         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7283                 /* do not add \n before testing triggers, but add \0 */
7284                 entry->buf[cnt] = '\0';
7285                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7286         }
7287
7288         if (entry->buf[cnt - 1] != '\n') {
7289                 entry->buf[cnt] = '\n';
7290                 entry->buf[cnt + 1] = '\0';
7291         } else
7292                 entry->buf[cnt] = '\0';
7293
7294         if (static_branch_unlikely(&trace_marker_exports_enabled))
7295                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7296         __buffer_unlock_commit(buffer, event);
7297
7298         if (tt)
7299                 event_triggers_post_call(tr->trace_marker_file, tt);
7300
7301         return written;
7302 }
7303
7304 static ssize_t
7305 tracing_mark_write(struct file *filp, const char __user *ubuf,
7306                                         size_t cnt, loff_t *fpos)
7307 {
7308         struct trace_array *tr = filp->private_data;
7309         ssize_t written = -ENODEV;
7310         unsigned long ip;
7311
7312         if (tracing_disabled)
7313                 return -EINVAL;
7314
7315         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7316                 return -EINVAL;
7317
7318         if ((ssize_t)cnt < 0)
7319                 return -EINVAL;
7320
7321         if (cnt > TRACE_MARKER_MAX_SIZE)
7322                 cnt = TRACE_MARKER_MAX_SIZE;
7323
7324         /* The selftests expect this function to be the IP address */
7325         ip = _THIS_IP_;
7326
7327         /* The global trace_marker can go to multiple instances */
7328         if (tr == &global_trace) {
7329                 guard(rcu)();
7330                 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7331                         written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7332                         if (written < 0)
7333                                 break;
7334                 }
7335         } else {
7336                 written = write_marker_to_buffer(tr, ubuf, cnt, ip);
7337         }
7338
7339         return written;
7340 }
7341
7342 static ssize_t write_raw_marker_to_buffer(struct trace_array *tr,
7343                                           const char __user *ubuf, size_t cnt)
7344 {
7345         struct ring_buffer_event *event;
7346         struct trace_buffer *buffer;
7347         struct raw_data_entry *entry;
7348         ssize_t written;
7349         int size;
7350         int len;
7351
7352 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7353
7354         size = sizeof(*entry) + cnt;
7355         if (cnt < FAULT_SIZE_ID)
7356                 size += FAULT_SIZE_ID - cnt;
7357
7358         buffer = tr->array_buffer.buffer;
7359
7360         if (size > ring_buffer_max_event_size(buffer))
7361                 return -EINVAL;
7362
7363         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7364                                             tracing_gen_ctx());
7365         if (!event)
7366                 /* Ring buffer disabled, return as if not open for write */
7367                 return -EBADF;
7368
7369         entry = ring_buffer_event_data(event);
7370
7371         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7372         if (len) {
7373                 entry->id = -1;
7374                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7375                 written = -EFAULT;
7376         } else
7377                 written = cnt;
7378
7379         __buffer_unlock_commit(buffer, event);
7380
7381         return written;
7382 }
7383
7384 static ssize_t
7385 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7386                                         size_t cnt, loff_t *fpos)
7387 {
7388         struct trace_array *tr = filp->private_data;
7389         ssize_t written = -ENODEV;
7390
7391 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7392
7393         if (tracing_disabled)
7394                 return -EINVAL;
7395
7396         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7397                 return -EINVAL;
7398
7399         /* The marker must at least have a tag id */
7400         if (cnt < sizeof(unsigned int))
7401                 return -EINVAL;
7402
7403         /* The global trace_marker_raw can go to multiple instances */
7404         if (tr == &global_trace) {
7405                 guard(rcu)();
7406                 list_for_each_entry_rcu(tr, &marker_copies, marker_list) {
7407                         written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7408                         if (written < 0)
7409                                 break;
7410                 }
7411         } else {
7412                 written = write_raw_marker_to_buffer(tr, ubuf, cnt);
7413         }
7414
7415         return written;
7416 }
7417
7418 static int tracing_clock_show(struct seq_file *m, void *v)
7419 {
7420         struct trace_array *tr = m->private;
7421         int i;
7422
7423         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7424                 seq_printf(m,
7425                         "%s%s%s%s", i ? " " : "",
7426                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7427                         i == tr->clock_id ? "]" : "");
7428         seq_putc(m, '\n');
7429
7430         return 0;
7431 }
7432
7433 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7434 {
7435         int i;
7436
7437         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7438                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7439                         break;
7440         }
7441         if (i == ARRAY_SIZE(trace_clocks))
7442                 return -EINVAL;
7443
7444         mutex_lock(&trace_types_lock);
7445
7446         tr->clock_id = i;
7447
7448         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7449
7450         /*
7451          * New clock may not be consistent with the previous clock.
7452          * Reset the buffer so that it doesn't have incomparable timestamps.
7453          */
7454         tracing_reset_online_cpus(&tr->array_buffer);
7455
7456 #ifdef CONFIG_TRACER_MAX_TRACE
7457         if (tr->max_buffer.buffer)
7458                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7459         tracing_reset_online_cpus(&tr->max_buffer);
7460 #endif
7461
7462         if (tr->scratch && !(tr->flags & TRACE_ARRAY_FL_LAST_BOOT)) {
7463                 struct trace_scratch *tscratch = tr->scratch;
7464
7465                 tscratch->clock_id = i;
7466         }
7467
7468         mutex_unlock(&trace_types_lock);
7469
7470         return 0;
7471 }
7472
7473 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7474                                    size_t cnt, loff_t *fpos)
7475 {
7476         struct seq_file *m = filp->private_data;
7477         struct trace_array *tr = m->private;
7478         char buf[64];
7479         const char *clockstr;
7480         int ret;
7481
7482         if (cnt >= sizeof(buf))
7483                 return -EINVAL;
7484
7485         if (copy_from_user(buf, ubuf, cnt))
7486                 return -EFAULT;
7487
7488         buf[cnt] = 0;
7489
7490         clockstr = strstrip(buf);
7491
7492         ret = tracing_set_clock(tr, clockstr);
7493         if (ret)
7494                 return ret;
7495
7496         *fpos += cnt;
7497
7498         return cnt;
7499 }
7500
7501 static int tracing_clock_open(struct inode *inode, struct file *file)
7502 {
7503         struct trace_array *tr = inode->i_private;
7504         int ret;
7505
7506         ret = tracing_check_open_get_tr(tr);
7507         if (ret)
7508                 return ret;
7509
7510         ret = single_open(file, tracing_clock_show, inode->i_private);
7511         if (ret < 0)
7512                 trace_array_put(tr);
7513
7514         return ret;
7515 }
7516
7517 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7518 {
7519         struct trace_array *tr = m->private;
7520
7521         mutex_lock(&trace_types_lock);
7522
7523         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7524                 seq_puts(m, "delta [absolute]\n");
7525         else
7526                 seq_puts(m, "[delta] absolute\n");
7527
7528         mutex_unlock(&trace_types_lock);
7529
7530         return 0;
7531 }
7532
7533 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7534 {
7535         struct trace_array *tr = inode->i_private;
7536         int ret;
7537
7538         ret = tracing_check_open_get_tr(tr);
7539         if (ret)
7540                 return ret;
7541
7542         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7543         if (ret < 0)
7544                 trace_array_put(tr);
7545
7546         return ret;
7547 }
7548
7549 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7550 {
7551         if (rbe == this_cpu_read(trace_buffered_event))
7552                 return ring_buffer_time_stamp(buffer);
7553
7554         return ring_buffer_event_time_stamp(buffer, rbe);
7555 }
7556
7557 /*
7558  * Set or disable using the per CPU trace_buffer_event when possible.
7559  */
7560 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7561 {
7562         guard(mutex)(&trace_types_lock);
7563
7564         if (set && tr->no_filter_buffering_ref++)
7565                 return 0;
7566
7567         if (!set) {
7568                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7569                         return -EINVAL;
7570
7571                 --tr->no_filter_buffering_ref;
7572         }
7573
7574         return 0;
7575 }
7576
7577 struct ftrace_buffer_info {
7578         struct trace_iterator   iter;
7579         void                    *spare;
7580         unsigned int            spare_cpu;
7581         unsigned int            spare_size;
7582         unsigned int            read;
7583 };
7584
7585 #ifdef CONFIG_TRACER_SNAPSHOT
7586 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7587 {
7588         struct trace_array *tr = inode->i_private;
7589         struct trace_iterator *iter;
7590         struct seq_file *m;
7591         int ret;
7592
7593         ret = tracing_check_open_get_tr(tr);
7594         if (ret)
7595                 return ret;
7596
7597         if (file->f_mode & FMODE_READ) {
7598                 iter = __tracing_open(inode, file, true);
7599                 if (IS_ERR(iter))
7600                         ret = PTR_ERR(iter);
7601         } else {
7602                 /* Writes still need the seq_file to hold the private data */
7603                 ret = -ENOMEM;
7604                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7605                 if (!m)
7606                         goto out;
7607                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7608                 if (!iter) {
7609                         kfree(m);
7610                         goto out;
7611                 }
7612                 ret = 0;
7613
7614                 iter->tr = tr;
7615                 iter->array_buffer = &tr->max_buffer;
7616                 iter->cpu_file = tracing_get_cpu(inode);
7617                 m->private = iter;
7618                 file->private_data = m;
7619         }
7620 out:
7621         if (ret < 0)
7622                 trace_array_put(tr);
7623
7624         return ret;
7625 }
7626
7627 static void tracing_swap_cpu_buffer(void *tr)
7628 {
7629         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7630 }
7631
7632 static ssize_t
7633 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7634                        loff_t *ppos)
7635 {
7636         struct seq_file *m = filp->private_data;
7637         struct trace_iterator *iter = m->private;
7638         struct trace_array *tr = iter->tr;
7639         unsigned long val;
7640         int ret;
7641
7642         ret = tracing_update_buffers(tr);
7643         if (ret < 0)
7644                 return ret;
7645
7646         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7647         if (ret)
7648                 return ret;
7649
7650         guard(mutex)(&trace_types_lock);
7651
7652         if (tr->current_trace->use_max_tr)
7653                 return -EBUSY;
7654
7655         local_irq_disable();
7656         arch_spin_lock(&tr->max_lock);
7657         if (tr->cond_snapshot)
7658                 ret = -EBUSY;
7659         arch_spin_unlock(&tr->max_lock);
7660         local_irq_enable();
7661         if (ret)
7662                 return ret;
7663
7664         switch (val) {
7665         case 0:
7666                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7667                         return -EINVAL;
7668                 if (tr->allocated_snapshot)
7669                         free_snapshot(tr);
7670                 break;
7671         case 1:
7672 /* Only allow per-cpu swap if the ring buffer supports it */
7673 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7674                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7675                         return -EINVAL;
7676 #endif
7677                 if (tr->allocated_snapshot)
7678                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7679                                         &tr->array_buffer, iter->cpu_file);
7680
7681                 ret = tracing_arm_snapshot_locked(tr);
7682                 if (ret)
7683                         return ret;
7684
7685                 /* Now, we're going to swap */
7686                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7687                         local_irq_disable();
7688                         update_max_tr(tr, current, smp_processor_id(), NULL);
7689                         local_irq_enable();
7690                 } else {
7691                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7692                                                  (void *)tr, 1);
7693                 }
7694                 tracing_disarm_snapshot(tr);
7695                 break;
7696         default:
7697                 if (tr->allocated_snapshot) {
7698                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7699                                 tracing_reset_online_cpus(&tr->max_buffer);
7700                         else
7701                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7702                 }
7703                 break;
7704         }
7705
7706         if (ret >= 0) {
7707                 *ppos += cnt;
7708                 ret = cnt;
7709         }
7710
7711         return ret;
7712 }
7713
7714 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7715 {
7716         struct seq_file *m = file->private_data;
7717         int ret;
7718
7719         ret = tracing_release(inode, file);
7720
7721         if (file->f_mode & FMODE_READ)
7722                 return ret;
7723
7724         /* If write only, the seq_file is just a stub */
7725         if (m)
7726                 kfree(m->private);
7727         kfree(m);
7728
7729         return 0;
7730 }
7731
7732 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7733 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7734                                     size_t count, loff_t *ppos);
7735 static int tracing_buffers_release(struct inode *inode, struct file *file);
7736 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7737                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7738
7739 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7740 {
7741         struct ftrace_buffer_info *info;
7742         int ret;
7743
7744         /* The following checks for tracefs lockdown */
7745         ret = tracing_buffers_open(inode, filp);
7746         if (ret < 0)
7747                 return ret;
7748
7749         info = filp->private_data;
7750
7751         if (info->iter.trace->use_max_tr) {
7752                 tracing_buffers_release(inode, filp);
7753                 return -EBUSY;
7754         }
7755
7756         info->iter.snapshot = true;
7757         info->iter.array_buffer = &info->iter.tr->max_buffer;
7758
7759         return ret;
7760 }
7761
7762 #endif /* CONFIG_TRACER_SNAPSHOT */
7763
7764
7765 static const struct file_operations tracing_thresh_fops = {
7766         .open           = tracing_open_generic,
7767         .read           = tracing_thresh_read,
7768         .write          = tracing_thresh_write,
7769         .llseek         = generic_file_llseek,
7770 };
7771
7772 #ifdef CONFIG_TRACER_MAX_TRACE
7773 static const struct file_operations tracing_max_lat_fops = {
7774         .open           = tracing_open_generic_tr,
7775         .read           = tracing_max_lat_read,
7776         .write          = tracing_max_lat_write,
7777         .llseek         = generic_file_llseek,
7778         .release        = tracing_release_generic_tr,
7779 };
7780 #endif
7781
7782 static const struct file_operations set_tracer_fops = {
7783         .open           = tracing_open_generic_tr,
7784         .read           = tracing_set_trace_read,
7785         .write          = tracing_set_trace_write,
7786         .llseek         = generic_file_llseek,
7787         .release        = tracing_release_generic_tr,
7788 };
7789
7790 static const struct file_operations tracing_pipe_fops = {
7791         .open           = tracing_open_pipe,
7792         .poll           = tracing_poll_pipe,
7793         .read           = tracing_read_pipe,
7794         .splice_read    = tracing_splice_read_pipe,
7795         .release        = tracing_release_pipe,
7796 };
7797
7798 static const struct file_operations tracing_entries_fops = {
7799         .open           = tracing_open_generic_tr,
7800         .read           = tracing_entries_read,
7801         .write          = tracing_entries_write,
7802         .llseek         = generic_file_llseek,
7803         .release        = tracing_release_generic_tr,
7804 };
7805
7806 static const struct file_operations tracing_buffer_meta_fops = {
7807         .open           = tracing_buffer_meta_open,
7808         .read           = seq_read,
7809         .llseek         = seq_lseek,
7810         .release        = tracing_seq_release,
7811 };
7812
7813 static const struct file_operations tracing_total_entries_fops = {
7814         .open           = tracing_open_generic_tr,
7815         .read           = tracing_total_entries_read,
7816         .llseek         = generic_file_llseek,
7817         .release        = tracing_release_generic_tr,
7818 };
7819
7820 static const struct file_operations tracing_free_buffer_fops = {
7821         .open           = tracing_open_generic_tr,
7822         .write          = tracing_free_buffer_write,
7823         .release        = tracing_free_buffer_release,
7824 };
7825
7826 static const struct file_operations tracing_mark_fops = {
7827         .open           = tracing_mark_open,
7828         .write          = tracing_mark_write,
7829         .release        = tracing_release_generic_tr,
7830 };
7831
7832 static const struct file_operations tracing_mark_raw_fops = {
7833         .open           = tracing_mark_open,
7834         .write          = tracing_mark_raw_write,
7835         .release        = tracing_release_generic_tr,
7836 };
7837
7838 static const struct file_operations trace_clock_fops = {
7839         .open           = tracing_clock_open,
7840         .read           = seq_read,
7841         .llseek         = seq_lseek,
7842         .release        = tracing_single_release_tr,
7843         .write          = tracing_clock_write,
7844 };
7845
7846 static const struct file_operations trace_time_stamp_mode_fops = {
7847         .open           = tracing_time_stamp_mode_open,
7848         .read           = seq_read,
7849         .llseek         = seq_lseek,
7850         .release        = tracing_single_release_tr,
7851 };
7852
7853 static const struct file_operations last_boot_fops = {
7854         .open           = tracing_last_boot_open,
7855         .read           = seq_read,
7856         .llseek         = seq_lseek,
7857         .release        = tracing_seq_release,
7858 };
7859
7860 #ifdef CONFIG_TRACER_SNAPSHOT
7861 static const struct file_operations snapshot_fops = {
7862         .open           = tracing_snapshot_open,
7863         .read           = seq_read,
7864         .write          = tracing_snapshot_write,
7865         .llseek         = tracing_lseek,
7866         .release        = tracing_snapshot_release,
7867 };
7868
7869 static const struct file_operations snapshot_raw_fops = {
7870         .open           = snapshot_raw_open,
7871         .read           = tracing_buffers_read,
7872         .release        = tracing_buffers_release,
7873         .splice_read    = tracing_buffers_splice_read,
7874 };
7875
7876 #endif /* CONFIG_TRACER_SNAPSHOT */
7877
7878 /*
7879  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7880  * @filp: The active open file structure
7881  * @ubuf: The userspace provided buffer to read value into
7882  * @cnt: The maximum number of bytes to read
7883  * @ppos: The current "file" position
7884  *
7885  * This function implements the write interface for a struct trace_min_max_param.
7886  * The filp->private_data must point to a trace_min_max_param structure that
7887  * defines where to write the value, the min and the max acceptable values,
7888  * and a lock to protect the write.
7889  */
7890 static ssize_t
7891 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7892 {
7893         struct trace_min_max_param *param = filp->private_data;
7894         u64 val;
7895         int err;
7896
7897         if (!param)
7898                 return -EFAULT;
7899
7900         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7901         if (err)
7902                 return err;
7903
7904         if (param->lock)
7905                 mutex_lock(param->lock);
7906
7907         if (param->min && val < *param->min)
7908                 err = -EINVAL;
7909
7910         if (param->max && val > *param->max)
7911                 err = -EINVAL;
7912
7913         if (!err)
7914                 *param->val = val;
7915
7916         if (param->lock)
7917                 mutex_unlock(param->lock);
7918
7919         if (err)
7920                 return err;
7921
7922         return cnt;
7923 }
7924
7925 /*
7926  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7927  * @filp: The active open file structure
7928  * @ubuf: The userspace provided buffer to read value into
7929  * @cnt: The maximum number of bytes to read
7930  * @ppos: The current "file" position
7931  *
7932  * This function implements the read interface for a struct trace_min_max_param.
7933  * The filp->private_data must point to a trace_min_max_param struct with valid
7934  * data.
7935  */
7936 static ssize_t
7937 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7938 {
7939         struct trace_min_max_param *param = filp->private_data;
7940         char buf[U64_STR_SIZE];
7941         int len;
7942         u64 val;
7943
7944         if (!param)
7945                 return -EFAULT;
7946
7947         val = *param->val;
7948
7949         if (cnt > sizeof(buf))
7950                 cnt = sizeof(buf);
7951
7952         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7953
7954         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7955 }
7956
7957 const struct file_operations trace_min_max_fops = {
7958         .open           = tracing_open_generic,
7959         .read           = trace_min_max_read,
7960         .write          = trace_min_max_write,
7961 };
7962
7963 #define TRACING_LOG_ERRS_MAX    8
7964 #define TRACING_LOG_LOC_MAX     128
7965
7966 #define CMD_PREFIX "  Command: "
7967
7968 struct err_info {
7969         const char      **errs; /* ptr to loc-specific array of err strings */
7970         u8              type;   /* index into errs -> specific err string */
7971         u16             pos;    /* caret position */
7972         u64             ts;
7973 };
7974
7975 struct tracing_log_err {
7976         struct list_head        list;
7977         struct err_info         info;
7978         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7979         char                    *cmd;                     /* what caused err */
7980 };
7981
7982 static DEFINE_MUTEX(tracing_err_log_lock);
7983
7984 static struct tracing_log_err *alloc_tracing_log_err(int len)
7985 {
7986         struct tracing_log_err *err;
7987
7988         err = kzalloc(sizeof(*err), GFP_KERNEL);
7989         if (!err)
7990                 return ERR_PTR(-ENOMEM);
7991
7992         err->cmd = kzalloc(len, GFP_KERNEL);
7993         if (!err->cmd) {
7994                 kfree(err);
7995                 return ERR_PTR(-ENOMEM);
7996         }
7997
7998         return err;
7999 }
8000
8001 static void free_tracing_log_err(struct tracing_log_err *err)
8002 {
8003         kfree(err->cmd);
8004         kfree(err);
8005 }
8006
8007 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8008                                                    int len)
8009 {
8010         struct tracing_log_err *err;
8011         char *cmd;
8012
8013         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8014                 err = alloc_tracing_log_err(len);
8015                 if (PTR_ERR(err) != -ENOMEM)
8016                         tr->n_err_log_entries++;
8017
8018                 return err;
8019         }
8020         cmd = kzalloc(len, GFP_KERNEL);
8021         if (!cmd)
8022                 return ERR_PTR(-ENOMEM);
8023         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8024         kfree(err->cmd);
8025         err->cmd = cmd;
8026         list_del(&err->list);
8027
8028         return err;
8029 }
8030
8031 /**
8032  * err_pos - find the position of a string within a command for error careting
8033  * @cmd: The tracing command that caused the error
8034  * @str: The string to position the caret at within @cmd
8035  *
8036  * Finds the position of the first occurrence of @str within @cmd.  The
8037  * return value can be passed to tracing_log_err() for caret placement
8038  * within @cmd.
8039  *
8040  * Returns the index within @cmd of the first occurrence of @str or 0
8041  * if @str was not found.
8042  */
8043 unsigned int err_pos(char *cmd, const char *str)
8044 {
8045         char *found;
8046
8047         if (WARN_ON(!strlen(cmd)))
8048                 return 0;
8049
8050         found = strstr(cmd, str);
8051         if (found)
8052                 return found - cmd;
8053
8054         return 0;
8055 }
8056
8057 /**
8058  * tracing_log_err - write an error to the tracing error log
8059  * @tr: The associated trace array for the error (NULL for top level array)
8060  * @loc: A string describing where the error occurred
8061  * @cmd: The tracing command that caused the error
8062  * @errs: The array of loc-specific static error strings
8063  * @type: The index into errs[], which produces the specific static err string
8064  * @pos: The position the caret should be placed in the cmd
8065  *
8066  * Writes an error into tracing/error_log of the form:
8067  *
8068  * <loc>: error: <text>
8069  *   Command: <cmd>
8070  *              ^
8071  *
8072  * tracing/error_log is a small log file containing the last
8073  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8074  * unless there has been a tracing error, and the error log can be
8075  * cleared and have its memory freed by writing the empty string in
8076  * truncation mode to it i.e. echo > tracing/error_log.
8077  *
8078  * NOTE: the @errs array along with the @type param are used to
8079  * produce a static error string - this string is not copied and saved
8080  * when the error is logged - only a pointer to it is saved.  See
8081  * existing callers for examples of how static strings are typically
8082  * defined for use with tracing_log_err().
8083  */
8084 void tracing_log_err(struct trace_array *tr,
8085                      const char *loc, const char *cmd,
8086                      const char **errs, u8 type, u16 pos)
8087 {
8088         struct tracing_log_err *err;
8089         int len = 0;
8090
8091         if (!tr)
8092                 tr = &global_trace;
8093
8094         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8095
8096         guard(mutex)(&tracing_err_log_lock);
8097
8098         err = get_tracing_log_err(tr, len);
8099         if (PTR_ERR(err) == -ENOMEM)
8100                 return;
8101
8102         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8103         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8104
8105         err->info.errs = errs;
8106         err->info.type = type;
8107         err->info.pos = pos;
8108         err->info.ts = local_clock();
8109
8110         list_add_tail(&err->list, &tr->err_log);
8111 }
8112
8113 static void clear_tracing_err_log(struct trace_array *tr)
8114 {
8115         struct tracing_log_err *err, *next;
8116
8117         mutex_lock(&tracing_err_log_lock);
8118         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8119                 list_del(&err->list);
8120                 free_tracing_log_err(err);
8121         }
8122
8123         tr->n_err_log_entries = 0;
8124         mutex_unlock(&tracing_err_log_lock);
8125 }
8126
8127 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8128 {
8129         struct trace_array *tr = m->private;
8130
8131         mutex_lock(&tracing_err_log_lock);
8132
8133         return seq_list_start(&tr->err_log, *pos);
8134 }
8135
8136 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8137 {
8138         struct trace_array *tr = m->private;
8139
8140         return seq_list_next(v, &tr->err_log, pos);
8141 }
8142
8143 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8144 {
8145         mutex_unlock(&tracing_err_log_lock);
8146 }
8147
8148 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8149 {
8150         u16 i;
8151
8152         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8153                 seq_putc(m, ' ');
8154         for (i = 0; i < pos; i++)
8155                 seq_putc(m, ' ');
8156         seq_puts(m, "^\n");
8157 }
8158
8159 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8160 {
8161         struct tracing_log_err *err = v;
8162
8163         if (err) {
8164                 const char *err_text = err->info.errs[err->info.type];
8165                 u64 sec = err->info.ts;
8166                 u32 nsec;
8167
8168                 nsec = do_div(sec, NSEC_PER_SEC);
8169                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8170                            err->loc, err_text);
8171                 seq_printf(m, "%s", err->cmd);
8172                 tracing_err_log_show_pos(m, err->info.pos);
8173         }
8174
8175         return 0;
8176 }
8177
8178 static const struct seq_operations tracing_err_log_seq_ops = {
8179         .start  = tracing_err_log_seq_start,
8180         .next   = tracing_err_log_seq_next,
8181         .stop   = tracing_err_log_seq_stop,
8182         .show   = tracing_err_log_seq_show
8183 };
8184
8185 static int tracing_err_log_open(struct inode *inode, struct file *file)
8186 {
8187         struct trace_array *tr = inode->i_private;
8188         int ret = 0;
8189
8190         ret = tracing_check_open_get_tr(tr);
8191         if (ret)
8192                 return ret;
8193
8194         /* If this file was opened for write, then erase contents */
8195         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8196                 clear_tracing_err_log(tr);
8197
8198         if (file->f_mode & FMODE_READ) {
8199                 ret = seq_open(file, &tracing_err_log_seq_ops);
8200                 if (!ret) {
8201                         struct seq_file *m = file->private_data;
8202                         m->private = tr;
8203                 } else {
8204                         trace_array_put(tr);
8205                 }
8206         }
8207         return ret;
8208 }
8209
8210 static ssize_t tracing_err_log_write(struct file *file,
8211                                      const char __user *buffer,
8212                                      size_t count, loff_t *ppos)
8213 {
8214         return count;
8215 }
8216
8217 static int tracing_err_log_release(struct inode *inode, struct file *file)
8218 {
8219         struct trace_array *tr = inode->i_private;
8220
8221         trace_array_put(tr);
8222
8223         if (file->f_mode & FMODE_READ)
8224                 seq_release(inode, file);
8225
8226         return 0;
8227 }
8228
8229 static const struct file_operations tracing_err_log_fops = {
8230         .open           = tracing_err_log_open,
8231         .write          = tracing_err_log_write,
8232         .read           = seq_read,
8233         .llseek         = tracing_lseek,
8234         .release        = tracing_err_log_release,
8235 };
8236
8237 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8238 {
8239         struct trace_array *tr = inode->i_private;
8240         struct ftrace_buffer_info *info;
8241         int ret;
8242
8243         ret = tracing_check_open_get_tr(tr);
8244         if (ret)
8245                 return ret;
8246
8247         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8248         if (!info) {
8249                 trace_array_put(tr);
8250                 return -ENOMEM;
8251         }
8252
8253         mutex_lock(&trace_types_lock);
8254
8255         info->iter.tr           = tr;
8256         info->iter.cpu_file     = tracing_get_cpu(inode);
8257         info->iter.trace        = tr->current_trace;
8258         info->iter.array_buffer = &tr->array_buffer;
8259         info->spare             = NULL;
8260         /* Force reading ring buffer for first read */
8261         info->read              = (unsigned int)-1;
8262
8263         filp->private_data = info;
8264
8265         tr->trace_ref++;
8266
8267         mutex_unlock(&trace_types_lock);
8268
8269         ret = nonseekable_open(inode, filp);
8270         if (ret < 0)
8271                 trace_array_put(tr);
8272
8273         return ret;
8274 }
8275
8276 static __poll_t
8277 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8278 {
8279         struct ftrace_buffer_info *info = filp->private_data;
8280         struct trace_iterator *iter = &info->iter;
8281
8282         return trace_poll(iter, filp, poll_table);
8283 }
8284
8285 static ssize_t
8286 tracing_buffers_read(struct file *filp, char __user *ubuf,
8287                      size_t count, loff_t *ppos)
8288 {
8289         struct ftrace_buffer_info *info = filp->private_data;
8290         struct trace_iterator *iter = &info->iter;
8291         void *trace_data;
8292         int page_size;
8293         ssize_t ret = 0;
8294         ssize_t size;
8295
8296         if (!count)
8297                 return 0;
8298
8299 #ifdef CONFIG_TRACER_MAX_TRACE
8300         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8301                 return -EBUSY;
8302 #endif
8303
8304         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8305
8306         /* Make sure the spare matches the current sub buffer size */
8307         if (info->spare) {
8308                 if (page_size != info->spare_size) {
8309                         ring_buffer_free_read_page(iter->array_buffer->buffer,
8310                                                    info->spare_cpu, info->spare);
8311                         info->spare = NULL;
8312                 }
8313         }
8314
8315         if (!info->spare) {
8316                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8317                                                           iter->cpu_file);
8318                 if (IS_ERR(info->spare)) {
8319                         ret = PTR_ERR(info->spare);
8320                         info->spare = NULL;
8321                 } else {
8322                         info->spare_cpu = iter->cpu_file;
8323                         info->spare_size = page_size;
8324                 }
8325         }
8326         if (!info->spare)
8327                 return ret;
8328
8329         /* Do we have previous read data to read? */
8330         if (info->read < page_size)
8331                 goto read;
8332
8333  again:
8334         trace_access_lock(iter->cpu_file);
8335         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8336                                     info->spare,
8337                                     count,
8338                                     iter->cpu_file, 0);
8339         trace_access_unlock(iter->cpu_file);
8340
8341         if (ret < 0) {
8342                 if (trace_empty(iter) && !iter->closed) {
8343                         if (update_last_data_if_empty(iter->tr))
8344                                 return 0;
8345
8346                         if ((filp->f_flags & O_NONBLOCK))
8347                                 return -EAGAIN;
8348
8349                         ret = wait_on_pipe(iter, 0);
8350                         if (ret)
8351                                 return ret;
8352
8353                         goto again;
8354                 }
8355                 return 0;
8356         }
8357
8358         info->read = 0;
8359  read:
8360         size = page_size - info->read;
8361         if (size > count)
8362                 size = count;
8363         trace_data = ring_buffer_read_page_data(info->spare);
8364         ret = copy_to_user(ubuf, trace_data + info->read, size);
8365         if (ret == size)
8366                 return -EFAULT;
8367
8368         size -= ret;
8369
8370         *ppos += size;
8371         info->read += size;
8372
8373         return size;
8374 }
8375
8376 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8377 {
8378         struct ftrace_buffer_info *info = file->private_data;
8379         struct trace_iterator *iter = &info->iter;
8380
8381         iter->closed = true;
8382         /* Make sure the waiters see the new wait_index */
8383         (void)atomic_fetch_inc_release(&iter->wait_index);
8384
8385         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8386
8387         return 0;
8388 }
8389
8390 static int tracing_buffers_release(struct inode *inode, struct file *file)
8391 {
8392         struct ftrace_buffer_info *info = file->private_data;
8393         struct trace_iterator *iter = &info->iter;
8394
8395         mutex_lock(&trace_types_lock);
8396
8397         iter->tr->trace_ref--;
8398
8399         __trace_array_put(iter->tr);
8400
8401         if (info->spare)
8402                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8403                                            info->spare_cpu, info->spare);
8404         kvfree(info);
8405
8406         mutex_unlock(&trace_types_lock);
8407
8408         return 0;
8409 }
8410
8411 struct buffer_ref {
8412         struct trace_buffer     *buffer;
8413         void                    *page;
8414         int                     cpu;
8415         refcount_t              refcount;
8416 };
8417
8418 static void buffer_ref_release(struct buffer_ref *ref)
8419 {
8420         if (!refcount_dec_and_test(&ref->refcount))
8421                 return;
8422         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8423         kfree(ref);
8424 }
8425
8426 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8427                                     struct pipe_buffer *buf)
8428 {
8429         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8430
8431         buffer_ref_release(ref);
8432         buf->private = 0;
8433 }
8434
8435 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8436                                 struct pipe_buffer *buf)
8437 {
8438         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8439
8440         if (refcount_read(&ref->refcount) > INT_MAX/2)
8441                 return false;
8442
8443         refcount_inc(&ref->refcount);
8444         return true;
8445 }
8446
8447 /* Pipe buffer operations for a buffer. */
8448 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8449         .release                = buffer_pipe_buf_release,
8450         .get                    = buffer_pipe_buf_get,
8451 };
8452
8453 /*
8454  * Callback from splice_to_pipe(), if we need to release some pages
8455  * at the end of the spd in case we error'ed out in filling the pipe.
8456  */
8457 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8458 {
8459         struct buffer_ref *ref =
8460                 (struct buffer_ref *)spd->partial[i].private;
8461
8462         buffer_ref_release(ref);
8463         spd->partial[i].private = 0;
8464 }
8465
8466 static ssize_t
8467 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8468                             struct pipe_inode_info *pipe, size_t len,
8469                             unsigned int flags)
8470 {
8471         struct ftrace_buffer_info *info = file->private_data;
8472         struct trace_iterator *iter = &info->iter;
8473         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8474         struct page *pages_def[PIPE_DEF_BUFFERS];
8475         struct splice_pipe_desc spd = {
8476                 .pages          = pages_def,
8477                 .partial        = partial_def,
8478                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8479                 .ops            = &buffer_pipe_buf_ops,
8480                 .spd_release    = buffer_spd_release,
8481         };
8482         struct buffer_ref *ref;
8483         bool woken = false;
8484         int page_size;
8485         int entries, i;
8486         ssize_t ret = 0;
8487
8488 #ifdef CONFIG_TRACER_MAX_TRACE
8489         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8490                 return -EBUSY;
8491 #endif
8492
8493         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8494         if (*ppos & (page_size - 1))
8495                 return -EINVAL;
8496
8497         if (len & (page_size - 1)) {
8498                 if (len < page_size)
8499                         return -EINVAL;
8500                 len &= (~(page_size - 1));
8501         }
8502
8503         if (splice_grow_spd(pipe, &spd))
8504                 return -ENOMEM;
8505
8506  again:
8507         trace_access_lock(iter->cpu_file);
8508         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8509
8510         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8511                 struct page *page;
8512                 int r;
8513
8514                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8515                 if (!ref) {
8516                         ret = -ENOMEM;
8517                         break;
8518                 }
8519
8520                 refcount_set(&ref->refcount, 1);
8521                 ref->buffer = iter->array_buffer->buffer;
8522                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8523                 if (IS_ERR(ref->page)) {
8524                         ret = PTR_ERR(ref->page);
8525                         ref->page = NULL;
8526                         kfree(ref);
8527                         break;
8528                 }
8529                 ref->cpu = iter->cpu_file;
8530
8531                 r = ring_buffer_read_page(ref->buffer, ref->page,
8532                                           len, iter->cpu_file, 1);
8533                 if (r < 0) {
8534                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8535                                                    ref->page);
8536                         kfree(ref);
8537                         break;
8538                 }
8539
8540                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8541
8542                 spd.pages[i] = page;
8543                 spd.partial[i].len = page_size;
8544                 spd.partial[i].offset = 0;
8545                 spd.partial[i].private = (unsigned long)ref;
8546                 spd.nr_pages++;
8547                 *ppos += page_size;
8548
8549                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8550         }
8551
8552         trace_access_unlock(iter->cpu_file);
8553         spd.nr_pages = i;
8554
8555         /* did we read anything? */
8556         if (!spd.nr_pages) {
8557
8558                 if (ret)
8559                         goto out;
8560
8561                 if (woken)
8562                         goto out;
8563
8564                 ret = -EAGAIN;
8565                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8566                         goto out;
8567
8568                 ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8569                 if (ret)
8570                         goto out;
8571
8572                 /* No need to wait after waking up when tracing is off */
8573                 if (!tracer_tracing_is_on(iter->tr))
8574                         goto out;
8575
8576                 /* Iterate one more time to collect any new data then exit */
8577                 woken = true;
8578
8579                 goto again;
8580         }
8581
8582         ret = splice_to_pipe(pipe, &spd);
8583 out:
8584         splice_shrink_spd(&spd);
8585
8586         return ret;
8587 }
8588
8589 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8590 {
8591         struct ftrace_buffer_info *info = file->private_data;
8592         struct trace_iterator *iter = &info->iter;
8593         int err;
8594
8595         if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8596                 if (!(file->f_flags & O_NONBLOCK)) {
8597                         err = ring_buffer_wait(iter->array_buffer->buffer,
8598                                                iter->cpu_file,
8599                                                iter->tr->buffer_percent,
8600                                                NULL, NULL);
8601                         if (err)
8602                                 return err;
8603                 }
8604
8605                 return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8606                                                   iter->cpu_file);
8607         } else if (cmd) {
8608                 return -ENOTTY;
8609         }
8610
8611         /*
8612          * An ioctl call with cmd 0 to the ring buffer file will wake up all
8613          * waiters
8614          */
8615         mutex_lock(&trace_types_lock);
8616
8617         /* Make sure the waiters see the new wait_index */
8618         (void)atomic_fetch_inc_release(&iter->wait_index);
8619
8620         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8621
8622         mutex_unlock(&trace_types_lock);
8623         return 0;
8624 }
8625
8626 #ifdef CONFIG_TRACER_MAX_TRACE
8627 static int get_snapshot_map(struct trace_array *tr)
8628 {
8629         int err = 0;
8630
8631         /*
8632          * Called with mmap_lock held. lockdep would be unhappy if we would now
8633          * take trace_types_lock. Instead use the specific
8634          * snapshot_trigger_lock.
8635          */
8636         spin_lock(&tr->snapshot_trigger_lock);
8637
8638         if (tr->snapshot || tr->mapped == UINT_MAX)
8639                 err = -EBUSY;
8640         else
8641                 tr->mapped++;
8642
8643         spin_unlock(&tr->snapshot_trigger_lock);
8644
8645         /* Wait for update_max_tr() to observe iter->tr->mapped */
8646         if (tr->mapped == 1)
8647                 synchronize_rcu();
8648
8649         return err;
8650
8651 }
8652 static void put_snapshot_map(struct trace_array *tr)
8653 {
8654         spin_lock(&tr->snapshot_trigger_lock);
8655         if (!WARN_ON(!tr->mapped))
8656                 tr->mapped--;
8657         spin_unlock(&tr->snapshot_trigger_lock);
8658 }
8659 #else
8660 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8661 static inline void put_snapshot_map(struct trace_array *tr) { }
8662 #endif
8663
8664 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8665 {
8666         struct ftrace_buffer_info *info = vma->vm_file->private_data;
8667         struct trace_iterator *iter = &info->iter;
8668
8669         WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8670         put_snapshot_map(iter->tr);
8671 }
8672
8673 static const struct vm_operations_struct tracing_buffers_vmops = {
8674         .close          = tracing_buffers_mmap_close,
8675 };
8676
8677 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8678 {
8679         struct ftrace_buffer_info *info = filp->private_data;
8680         struct trace_iterator *iter = &info->iter;
8681         int ret = 0;
8682
8683         /* A memmap'ed buffer is not supported for user space mmap */
8684         if (iter->tr->flags & TRACE_ARRAY_FL_MEMMAP)
8685                 return -ENODEV;
8686
8687         ret = get_snapshot_map(iter->tr);
8688         if (ret)
8689                 return ret;
8690
8691         ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8692         if (ret)
8693                 put_snapshot_map(iter->tr);
8694
8695         vma->vm_ops = &tracing_buffers_vmops;
8696
8697         return ret;
8698 }
8699
8700 static const struct file_operations tracing_buffers_fops = {
8701         .open           = tracing_buffers_open,
8702         .read           = tracing_buffers_read,
8703         .poll           = tracing_buffers_poll,
8704         .release        = tracing_buffers_release,
8705         .flush          = tracing_buffers_flush,
8706         .splice_read    = tracing_buffers_splice_read,
8707         .unlocked_ioctl = tracing_buffers_ioctl,
8708         .mmap           = tracing_buffers_mmap,
8709 };
8710
8711 static ssize_t
8712 tracing_stats_read(struct file *filp, char __user *ubuf,
8713                    size_t count, loff_t *ppos)
8714 {
8715         struct inode *inode = file_inode(filp);
8716         struct trace_array *tr = inode->i_private;
8717         struct array_buffer *trace_buf = &tr->array_buffer;
8718         int cpu = tracing_get_cpu(inode);
8719         struct trace_seq *s;
8720         unsigned long cnt;
8721         unsigned long long t;
8722         unsigned long usec_rem;
8723
8724         s = kmalloc(sizeof(*s), GFP_KERNEL);
8725         if (!s)
8726                 return -ENOMEM;
8727
8728         trace_seq_init(s);
8729
8730         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8731         trace_seq_printf(s, "entries: %ld\n", cnt);
8732
8733         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8734         trace_seq_printf(s, "overrun: %ld\n", cnt);
8735
8736         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8737         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8738
8739         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8740         trace_seq_printf(s, "bytes: %ld\n", cnt);
8741
8742         if (trace_clocks[tr->clock_id].in_ns) {
8743                 /* local or global for trace_clock */
8744                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8745                 usec_rem = do_div(t, USEC_PER_SEC);
8746                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8747                                                                 t, usec_rem);
8748
8749                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8750                 usec_rem = do_div(t, USEC_PER_SEC);
8751                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8752         } else {
8753                 /* counter or tsc mode for trace_clock */
8754                 trace_seq_printf(s, "oldest event ts: %llu\n",
8755                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8756
8757                 trace_seq_printf(s, "now ts: %llu\n",
8758                                 ring_buffer_time_stamp(trace_buf->buffer));
8759         }
8760
8761         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8762         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8763
8764         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8765         trace_seq_printf(s, "read events: %ld\n", cnt);
8766
8767         count = simple_read_from_buffer(ubuf, count, ppos,
8768                                         s->buffer, trace_seq_used(s));
8769
8770         kfree(s);
8771
8772         return count;
8773 }
8774
8775 static const struct file_operations tracing_stats_fops = {
8776         .open           = tracing_open_generic_tr,
8777         .read           = tracing_stats_read,
8778         .llseek         = generic_file_llseek,
8779         .release        = tracing_release_generic_tr,
8780 };
8781
8782 #ifdef CONFIG_DYNAMIC_FTRACE
8783
8784 static ssize_t
8785 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8786                   size_t cnt, loff_t *ppos)
8787 {
8788         ssize_t ret;
8789         char *buf;
8790         int r;
8791
8792         /* 512 should be plenty to hold the amount needed */
8793 #define DYN_INFO_BUF_SIZE       512
8794
8795         buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8796         if (!buf)
8797                 return -ENOMEM;
8798
8799         r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8800                       "%ld pages:%ld groups: %ld\n"
8801                       "ftrace boot update time = %llu (ns)\n"
8802                       "ftrace module total update time = %llu (ns)\n",
8803                       ftrace_update_tot_cnt,
8804                       ftrace_number_of_pages,
8805                       ftrace_number_of_groups,
8806                       ftrace_update_time,
8807                       ftrace_total_mod_time);
8808
8809         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8810         kfree(buf);
8811         return ret;
8812 }
8813
8814 static const struct file_operations tracing_dyn_info_fops = {
8815         .open           = tracing_open_generic,
8816         .read           = tracing_read_dyn_info,
8817         .llseek         = generic_file_llseek,
8818 };
8819 #endif /* CONFIG_DYNAMIC_FTRACE */
8820
8821 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8822 static void
8823 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8824                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8825                 void *data)
8826 {
8827         tracing_snapshot_instance(tr);
8828 }
8829
8830 static void
8831 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8832                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8833                       void *data)
8834 {
8835         struct ftrace_func_mapper *mapper = data;
8836         long *count = NULL;
8837
8838         if (mapper)
8839                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8840
8841         if (count) {
8842
8843                 if (*count <= 0)
8844                         return;
8845
8846                 (*count)--;
8847         }
8848
8849         tracing_snapshot_instance(tr);
8850 }
8851
8852 static int
8853 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8854                       struct ftrace_probe_ops *ops, void *data)
8855 {
8856         struct ftrace_func_mapper *mapper = data;
8857         long *count = NULL;
8858
8859         seq_printf(m, "%ps:", (void *)ip);
8860
8861         seq_puts(m, "snapshot");
8862
8863         if (mapper)
8864                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8865
8866         if (count)
8867                 seq_printf(m, ":count=%ld\n", *count);
8868         else
8869                 seq_puts(m, ":unlimited\n");
8870
8871         return 0;
8872 }
8873
8874 static int
8875 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8876                      unsigned long ip, void *init_data, void **data)
8877 {
8878         struct ftrace_func_mapper *mapper = *data;
8879
8880         if (!mapper) {
8881                 mapper = allocate_ftrace_func_mapper();
8882                 if (!mapper)
8883                         return -ENOMEM;
8884                 *data = mapper;
8885         }
8886
8887         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8888 }
8889
8890 static void
8891 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8892                      unsigned long ip, void *data)
8893 {
8894         struct ftrace_func_mapper *mapper = data;
8895
8896         if (!ip) {
8897                 if (!mapper)
8898                         return;
8899                 free_ftrace_func_mapper(mapper, NULL);
8900                 return;
8901         }
8902
8903         ftrace_func_mapper_remove_ip(mapper, ip);
8904 }
8905
8906 static struct ftrace_probe_ops snapshot_probe_ops = {
8907         .func                   = ftrace_snapshot,
8908         .print                  = ftrace_snapshot_print,
8909 };
8910
8911 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8912         .func                   = ftrace_count_snapshot,
8913         .print                  = ftrace_snapshot_print,
8914         .init                   = ftrace_snapshot_init,
8915         .free                   = ftrace_snapshot_free,
8916 };
8917
8918 static int
8919 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8920                                char *glob, char *cmd, char *param, int enable)
8921 {
8922         struct ftrace_probe_ops *ops;
8923         void *count = (void *)-1;
8924         char *number;
8925         int ret;
8926
8927         if (!tr)
8928                 return -ENODEV;
8929
8930         /* hash funcs only work with set_ftrace_filter */
8931         if (!enable)
8932                 return -EINVAL;
8933
8934         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8935
8936         if (glob[0] == '!') {
8937                 ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8938                 if (!ret)
8939                         tracing_disarm_snapshot(tr);
8940
8941                 return ret;
8942         }
8943
8944         if (!param)
8945                 goto out_reg;
8946
8947         number = strsep(&param, ":");
8948
8949         if (!strlen(number))
8950                 goto out_reg;
8951
8952         /*
8953          * We use the callback data field (which is a pointer)
8954          * as our counter.
8955          */
8956         ret = kstrtoul(number, 0, (unsigned long *)&count);
8957         if (ret)
8958                 return ret;
8959
8960  out_reg:
8961         ret = tracing_arm_snapshot(tr);
8962         if (ret < 0)
8963                 goto out;
8964
8965         ret = register_ftrace_function_probe(glob, tr, ops, count);
8966         if (ret < 0)
8967                 tracing_disarm_snapshot(tr);
8968  out:
8969         return ret < 0 ? ret : 0;
8970 }
8971
8972 static struct ftrace_func_command ftrace_snapshot_cmd = {
8973         .name                   = "snapshot",
8974         .func                   = ftrace_trace_snapshot_callback,
8975 };
8976
8977 static __init int register_snapshot_cmd(void)
8978 {
8979         return register_ftrace_command(&ftrace_snapshot_cmd);
8980 }
8981 #else
8982 static inline __init int register_snapshot_cmd(void) { return 0; }
8983 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8984
8985 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8986 {
8987         if (WARN_ON(!tr->dir))
8988                 return ERR_PTR(-ENODEV);
8989
8990         /* Top directory uses NULL as the parent */
8991         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8992                 return NULL;
8993
8994         /* All sub buffers have a descriptor */
8995         return tr->dir;
8996 }
8997
8998 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8999 {
9000         struct dentry *d_tracer;
9001
9002         if (tr->percpu_dir)
9003                 return tr->percpu_dir;
9004
9005         d_tracer = tracing_get_dentry(tr);
9006         if (IS_ERR(d_tracer))
9007                 return NULL;
9008
9009         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
9010
9011         MEM_FAIL(!tr->percpu_dir,
9012                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
9013
9014         return tr->percpu_dir;
9015 }
9016
9017 static struct dentry *
9018 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
9019                       void *data, long cpu, const struct file_operations *fops)
9020 {
9021         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
9022
9023         if (ret) /* See tracing_get_cpu() */
9024                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
9025         return ret;
9026 }
9027
9028 static void
9029 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
9030 {
9031         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
9032         struct dentry *d_cpu;
9033         char cpu_dir[30]; /* 30 characters should be more than enough */
9034
9035         if (!d_percpu)
9036                 return;
9037
9038         snprintf(cpu_dir, 30, "cpu%ld", cpu);
9039         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
9040         if (!d_cpu) {
9041                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
9042                 return;
9043         }
9044
9045         /* per cpu trace_pipe */
9046         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
9047                                 tr, cpu, &tracing_pipe_fops);
9048
9049         /* per cpu trace */
9050         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
9051                                 tr, cpu, &tracing_fops);
9052
9053         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
9054                                 tr, cpu, &tracing_buffers_fops);
9055
9056         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
9057                                 tr, cpu, &tracing_stats_fops);
9058
9059         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
9060                                 tr, cpu, &tracing_entries_fops);
9061
9062         if (tr->range_addr_start)
9063                 trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
9064                                       tr, cpu, &tracing_buffer_meta_fops);
9065 #ifdef CONFIG_TRACER_SNAPSHOT
9066         if (!tr->range_addr_start) {
9067                 trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9068                                       tr, cpu, &snapshot_fops);
9069
9070                 trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9071                                       tr, cpu, &snapshot_raw_fops);
9072         }
9073 #endif
9074 }
9075
9076 #ifdef CONFIG_FTRACE_SELFTEST
9077 /* Let selftest have access to static functions in this file */
9078 #include "trace_selftest.c"
9079 #endif
9080
9081 static ssize_t
9082 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9083                         loff_t *ppos)
9084 {
9085         struct trace_option_dentry *topt = filp->private_data;
9086         char *buf;
9087
9088         if (topt->flags->val & topt->opt->bit)
9089                 buf = "1\n";
9090         else
9091                 buf = "0\n";
9092
9093         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9094 }
9095
9096 static ssize_t
9097 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9098                          loff_t *ppos)
9099 {
9100         struct trace_option_dentry *topt = filp->private_data;
9101         unsigned long val;
9102         int ret;
9103
9104         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9105         if (ret)
9106                 return ret;
9107
9108         if (val != 0 && val != 1)
9109                 return -EINVAL;
9110
9111         if (!!(topt->flags->val & topt->opt->bit) != val) {
9112                 mutex_lock(&trace_types_lock);
9113                 ret = __set_tracer_option(topt->tr, topt->flags,
9114                                           topt->opt, !val);
9115                 mutex_unlock(&trace_types_lock);
9116                 if (ret)
9117                         return ret;
9118         }
9119
9120         *ppos += cnt;
9121
9122         return cnt;
9123 }
9124
9125 static int tracing_open_options(struct inode *inode, struct file *filp)
9126 {
9127         struct trace_option_dentry *topt = inode->i_private;
9128         int ret;
9129
9130         ret = tracing_check_open_get_tr(topt->tr);
9131         if (ret)
9132                 return ret;
9133
9134         filp->private_data = inode->i_private;
9135         return 0;
9136 }
9137
9138 static int tracing_release_options(struct inode *inode, struct file *file)
9139 {
9140         struct trace_option_dentry *topt = file->private_data;
9141
9142         trace_array_put(topt->tr);
9143         return 0;
9144 }
9145
9146 static const struct file_operations trace_options_fops = {
9147         .open = tracing_open_options,
9148         .read = trace_options_read,
9149         .write = trace_options_write,
9150         .llseek = generic_file_llseek,
9151         .release = tracing_release_options,
9152 };
9153
9154 /*
9155  * In order to pass in both the trace_array descriptor as well as the index
9156  * to the flag that the trace option file represents, the trace_array
9157  * has a character array of trace_flags_index[], which holds the index
9158  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9159  * The address of this character array is passed to the flag option file
9160  * read/write callbacks.
9161  *
9162  * In order to extract both the index and the trace_array descriptor,
9163  * get_tr_index() uses the following algorithm.
9164  *
9165  *   idx = *ptr;
9166  *
9167  * As the pointer itself contains the address of the index (remember
9168  * index[1] == 1).
9169  *
9170  * Then to get the trace_array descriptor, by subtracting that index
9171  * from the ptr, we get to the start of the index itself.
9172  *
9173  *   ptr - idx == &index[0]
9174  *
9175  * Then a simple container_of() from that pointer gets us to the
9176  * trace_array descriptor.
9177  */
9178 static void get_tr_index(void *data, struct trace_array **ptr,
9179                          unsigned int *pindex)
9180 {
9181         *pindex = *(unsigned char *)data;
9182
9183         *ptr = container_of(data - *pindex, struct trace_array,
9184                             trace_flags_index);
9185 }
9186
9187 static ssize_t
9188 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9189                         loff_t *ppos)
9190 {
9191         void *tr_index = filp->private_data;
9192         struct trace_array *tr;
9193         unsigned int index;
9194         char *buf;
9195
9196         get_tr_index(tr_index, &tr, &index);
9197
9198         if (tr->trace_flags & (1 << index))
9199                 buf = "1\n";
9200         else
9201                 buf = "0\n";
9202
9203         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9204 }
9205
9206 static ssize_t
9207 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9208                          loff_t *ppos)
9209 {
9210         void *tr_index = filp->private_data;
9211         struct trace_array *tr;
9212         unsigned int index;
9213         unsigned long val;
9214         int ret;
9215
9216         get_tr_index(tr_index, &tr, &index);
9217
9218         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9219         if (ret)
9220                 return ret;
9221
9222         if (val != 0 && val != 1)
9223                 return -EINVAL;
9224
9225         mutex_lock(&event_mutex);
9226         mutex_lock(&trace_types_lock);
9227         ret = set_tracer_flag(tr, 1 << index, val);
9228         mutex_unlock(&trace_types_lock);
9229         mutex_unlock(&event_mutex);
9230
9231         if (ret < 0)
9232                 return ret;
9233
9234         *ppos += cnt;
9235
9236         return cnt;
9237 }
9238
9239 static const struct file_operations trace_options_core_fops = {
9240         .open = tracing_open_generic,
9241         .read = trace_options_core_read,
9242         .write = trace_options_core_write,
9243         .llseek = generic_file_llseek,
9244 };
9245
9246 struct dentry *trace_create_file(const char *name,
9247                                  umode_t mode,
9248                                  struct dentry *parent,
9249                                  void *data,
9250                                  const struct file_operations *fops)
9251 {
9252         struct dentry *ret;
9253
9254         ret = tracefs_create_file(name, mode, parent, data, fops);
9255         if (!ret)
9256                 pr_warn("Could not create tracefs '%s' entry\n", name);
9257
9258         return ret;
9259 }
9260
9261
9262 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9263 {
9264         struct dentry *d_tracer;
9265
9266         if (tr->options)
9267                 return tr->options;
9268
9269         d_tracer = tracing_get_dentry(tr);
9270         if (IS_ERR(d_tracer))
9271                 return NULL;
9272
9273         tr->options = tracefs_create_dir("options", d_tracer);
9274         if (!tr->options) {
9275                 pr_warn("Could not create tracefs directory 'options'\n");
9276                 return NULL;
9277         }
9278
9279         return tr->options;
9280 }
9281
9282 static void
9283 create_trace_option_file(struct trace_array *tr,
9284                          struct trace_option_dentry *topt,
9285                          struct tracer_flags *flags,
9286                          struct tracer_opt *opt)
9287 {
9288         struct dentry *t_options;
9289
9290         t_options = trace_options_init_dentry(tr);
9291         if (!t_options)
9292                 return;
9293
9294         topt->flags = flags;
9295         topt->opt = opt;
9296         topt->tr = tr;
9297
9298         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9299                                         t_options, topt, &trace_options_fops);
9300
9301 }
9302
9303 static void
9304 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9305 {
9306         struct trace_option_dentry *topts;
9307         struct trace_options *tr_topts;
9308         struct tracer_flags *flags;
9309         struct tracer_opt *opts;
9310         int cnt;
9311         int i;
9312
9313         if (!tracer)
9314                 return;
9315
9316         flags = tracer->flags;
9317
9318         if (!flags || !flags->opts)
9319                 return;
9320
9321         /*
9322          * If this is an instance, only create flags for tracers
9323          * the instance may have.
9324          */
9325         if (!trace_ok_for_array(tracer, tr))
9326                 return;
9327
9328         for (i = 0; i < tr->nr_topts; i++) {
9329                 /* Make sure there's no duplicate flags. */
9330                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9331                         return;
9332         }
9333
9334         opts = flags->opts;
9335
9336         for (cnt = 0; opts[cnt].name; cnt++)
9337                 ;
9338
9339         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9340         if (!topts)
9341                 return;
9342
9343         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9344                             GFP_KERNEL);
9345         if (!tr_topts) {
9346                 kfree(topts);
9347                 return;
9348         }
9349
9350         tr->topts = tr_topts;
9351         tr->topts[tr->nr_topts].tracer = tracer;
9352         tr->topts[tr->nr_topts].topts = topts;
9353         tr->nr_topts++;
9354
9355         for (cnt = 0; opts[cnt].name; cnt++) {
9356                 create_trace_option_file(tr, &topts[cnt], flags,
9357                                          &opts[cnt]);
9358                 MEM_FAIL(topts[cnt].entry == NULL,
9359                           "Failed to create trace option: %s",
9360                           opts[cnt].name);
9361         }
9362 }
9363
9364 static struct dentry *
9365 create_trace_option_core_file(struct trace_array *tr,
9366                               const char *option, long index)
9367 {
9368         struct dentry *t_options;
9369
9370         t_options = trace_options_init_dentry(tr);
9371         if (!t_options)
9372                 return NULL;
9373
9374         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9375                                  (void *)&tr->trace_flags_index[index],
9376                                  &trace_options_core_fops);
9377 }
9378
9379 static void create_trace_options_dir(struct trace_array *tr)
9380 {
9381         struct dentry *t_options;
9382         bool top_level = tr == &global_trace;
9383         int i;
9384
9385         t_options = trace_options_init_dentry(tr);
9386         if (!t_options)
9387                 return;
9388
9389         for (i = 0; trace_options[i]; i++) {
9390                 if (top_level ||
9391                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9392                         create_trace_option_core_file(tr, trace_options[i], i);
9393         }
9394 }
9395
9396 static ssize_t
9397 rb_simple_read(struct file *filp, char __user *ubuf,
9398                size_t cnt, loff_t *ppos)
9399 {
9400         struct trace_array *tr = filp->private_data;
9401         char buf[64];
9402         int r;
9403
9404         r = tracer_tracing_is_on(tr);
9405         r = sprintf(buf, "%d\n", r);
9406
9407         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9408 }
9409
9410 static ssize_t
9411 rb_simple_write(struct file *filp, const char __user *ubuf,
9412                 size_t cnt, loff_t *ppos)
9413 {
9414         struct trace_array *tr = filp->private_data;
9415         struct trace_buffer *buffer = tr->array_buffer.buffer;
9416         unsigned long val;
9417         int ret;
9418
9419         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9420         if (ret)
9421                 return ret;
9422
9423         if (buffer) {
9424                 mutex_lock(&trace_types_lock);
9425                 if (!!val == tracer_tracing_is_on(tr)) {
9426                         val = 0; /* do nothing */
9427                 } else if (val) {
9428                         tracer_tracing_on(tr);
9429                         if (tr->current_trace->start)
9430                                 tr->current_trace->start(tr);
9431                 } else {
9432                         tracer_tracing_off(tr);
9433                         if (tr->current_trace->stop)
9434                                 tr->current_trace->stop(tr);
9435                         /* Wake up any waiters */
9436                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9437                 }
9438                 mutex_unlock(&trace_types_lock);
9439         }
9440
9441         (*ppos)++;
9442
9443         return cnt;
9444 }
9445
9446 static const struct file_operations rb_simple_fops = {
9447         .open           = tracing_open_generic_tr,
9448         .read           = rb_simple_read,
9449         .write          = rb_simple_write,
9450         .release        = tracing_release_generic_tr,
9451         .llseek         = default_llseek,
9452 };
9453
9454 static ssize_t
9455 buffer_percent_read(struct file *filp, char __user *ubuf,
9456                     size_t cnt, loff_t *ppos)
9457 {
9458         struct trace_array *tr = filp->private_data;
9459         char buf[64];
9460         int r;
9461
9462         r = tr->buffer_percent;
9463         r = sprintf(buf, "%d\n", r);
9464
9465         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9466 }
9467
9468 static ssize_t
9469 buffer_percent_write(struct file *filp, const char __user *ubuf,
9470                      size_t cnt, loff_t *ppos)
9471 {
9472         struct trace_array *tr = filp->private_data;
9473         unsigned long val;
9474         int ret;
9475
9476         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9477         if (ret)
9478                 return ret;
9479
9480         if (val > 100)
9481                 return -EINVAL;
9482
9483         tr->buffer_percent = val;
9484
9485         (*ppos)++;
9486
9487         return cnt;
9488 }
9489
9490 static const struct file_operations buffer_percent_fops = {
9491         .open           = tracing_open_generic_tr,
9492         .read           = buffer_percent_read,
9493         .write          = buffer_percent_write,
9494         .release        = tracing_release_generic_tr,
9495         .llseek         = default_llseek,
9496 };
9497
9498 static ssize_t
9499 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9500 {
9501         struct trace_array *tr = filp->private_data;
9502         size_t size;
9503         char buf[64];
9504         int order;
9505         int r;
9506
9507         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9508         size = (PAGE_SIZE << order) / 1024;
9509
9510         r = sprintf(buf, "%zd\n", size);
9511
9512         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9513 }
9514
9515 static ssize_t
9516 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9517                          size_t cnt, loff_t *ppos)
9518 {
9519         struct trace_array *tr = filp->private_data;
9520         unsigned long val;
9521         int old_order;
9522         int order;
9523         int pages;
9524         int ret;
9525
9526         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9527         if (ret)
9528                 return ret;
9529
9530         val *= 1024; /* value passed in is in KB */
9531
9532         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9533         order = fls(pages - 1);
9534
9535         /* limit between 1 and 128 system pages */
9536         if (order < 0 || order > 7)
9537                 return -EINVAL;
9538
9539         /* Do not allow tracing while changing the order of the ring buffer */
9540         tracing_stop_tr(tr);
9541
9542         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9543         if (old_order == order)
9544                 goto out;
9545
9546         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9547         if (ret)
9548                 goto out;
9549
9550 #ifdef CONFIG_TRACER_MAX_TRACE
9551
9552         if (!tr->allocated_snapshot)
9553                 goto out_max;
9554
9555         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9556         if (ret) {
9557                 /* Put back the old order */
9558                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9559                 if (WARN_ON_ONCE(cnt)) {
9560                         /*
9561                          * AARGH! We are left with different orders!
9562                          * The max buffer is our "snapshot" buffer.
9563                          * When a tracer needs a snapshot (one of the
9564                          * latency tracers), it swaps the max buffer
9565                          * with the saved snap shot. We succeeded to
9566                          * update the order of the main buffer, but failed to
9567                          * update the order of the max buffer. But when we tried
9568                          * to reset the main buffer to the original size, we
9569                          * failed there too. This is very unlikely to
9570                          * happen, but if it does, warn and kill all
9571                          * tracing.
9572                          */
9573                         tracing_disabled = 1;
9574                 }
9575                 goto out;
9576         }
9577  out_max:
9578 #endif
9579         (*ppos)++;
9580  out:
9581         if (ret)
9582                 cnt = ret;
9583         tracing_start_tr(tr);
9584         return cnt;
9585 }
9586
9587 static const struct file_operations buffer_subbuf_size_fops = {
9588         .open           = tracing_open_generic_tr,
9589         .read           = buffer_subbuf_size_read,
9590         .write          = buffer_subbuf_size_write,
9591         .release        = tracing_release_generic_tr,
9592         .llseek         = default_llseek,
9593 };
9594
9595 static struct dentry *trace_instance_dir;
9596
9597 static void
9598 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9599
9600 #ifdef CONFIG_MODULES
9601 static int make_mod_delta(struct module *mod, void *data)
9602 {
9603         struct trace_module_delta *module_delta;
9604         struct trace_scratch *tscratch;
9605         struct trace_mod_entry *entry;
9606         struct trace_array *tr = data;
9607         int i;
9608
9609         tscratch = tr->scratch;
9610         module_delta = READ_ONCE(tr->module_delta);
9611         for (i = 0; i < tscratch->nr_entries; i++) {
9612                 entry = &tscratch->entries[i];
9613                 if (strcmp(mod->name, entry->mod_name))
9614                         continue;
9615                 if (mod->state == MODULE_STATE_GOING)
9616                         module_delta->delta[i] = 0;
9617                 else
9618                         module_delta->delta[i] = (unsigned long)mod->mem[MOD_TEXT].base
9619                                                  - entry->mod_addr;
9620                 break;
9621         }
9622         return 0;
9623 }
9624 #else
9625 static int make_mod_delta(struct module *mod, void *data)
9626 {
9627         return 0;
9628 }
9629 #endif
9630
9631 static int mod_addr_comp(const void *a, const void *b, const void *data)
9632 {
9633         const struct trace_mod_entry *e1 = a;
9634         const struct trace_mod_entry *e2 = b;
9635
9636         return e1->mod_addr > e2->mod_addr ? 1 : -1;
9637 }
9638
9639 static void setup_trace_scratch(struct trace_array *tr,
9640                                 struct trace_scratch *tscratch, unsigned int size)
9641 {
9642         struct trace_module_delta *module_delta;
9643         struct trace_mod_entry *entry;
9644         int i, nr_entries;
9645
9646         if (!tscratch)
9647                 return;
9648
9649         tr->scratch = tscratch;
9650         tr->scratch_size = size;
9651
9652         if (tscratch->text_addr)
9653                 tr->text_delta = (unsigned long)_text - tscratch->text_addr;
9654
9655         if (struct_size(tscratch, entries, tscratch->nr_entries) > size)
9656                 goto reset;
9657
9658         /* Check if each module name is a valid string */
9659         for (i = 0; i < tscratch->nr_entries; i++) {
9660                 int n;
9661
9662                 entry = &tscratch->entries[i];
9663
9664                 for (n = 0; n < MODULE_NAME_LEN; n++) {
9665                         if (entry->mod_name[n] == '\0')
9666                                 break;
9667                         if (!isprint(entry->mod_name[n]))
9668                                 goto reset;
9669                 }
9670                 if (n == MODULE_NAME_LEN)
9671                         goto reset;
9672         }
9673
9674         /* Sort the entries so that we can find appropriate module from address. */
9675         nr_entries = tscratch->nr_entries;
9676         sort_r(tscratch->entries, nr_entries, sizeof(struct trace_mod_entry),
9677                mod_addr_comp, NULL, NULL);
9678
9679         if (IS_ENABLED(CONFIG_MODULES)) {
9680                 module_delta = kzalloc(struct_size(module_delta, delta, nr_entries), GFP_KERNEL);
9681                 if (!module_delta) {
9682                         pr_info("module_delta allocation failed. Not able to decode module address.");
9683                         goto reset;
9684                 }
9685                 init_rcu_head(&module_delta->rcu);
9686         } else
9687                 module_delta = NULL;
9688         WRITE_ONCE(tr->module_delta, module_delta);
9689
9690         /* Scan modules to make text delta for modules. */
9691         module_for_each_mod(make_mod_delta, tr);
9692
9693         /* Set trace_clock as the same of the previous boot. */
9694         if (tscratch->clock_id != tr->clock_id) {
9695                 if (tscratch->clock_id >= ARRAY_SIZE(trace_clocks) ||
9696                     tracing_set_clock(tr, trace_clocks[tscratch->clock_id].name) < 0) {
9697                         pr_info("the previous trace_clock info is not valid.");
9698                         goto reset;
9699                 }
9700         }
9701         return;
9702  reset:
9703         /* Invalid trace modules */
9704         memset(tscratch, 0, size);
9705 }
9706
9707 static int
9708 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9709 {
9710         enum ring_buffer_flags rb_flags;
9711         struct trace_scratch *tscratch;
9712         unsigned int scratch_size = 0;
9713
9714         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9715
9716         buf->tr = tr;
9717
9718         if (tr->range_addr_start && tr->range_addr_size) {
9719                 /* Add scratch buffer to handle 128 modules */
9720                 buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9721                                                       tr->range_addr_start,
9722                                                       tr->range_addr_size,
9723                                                       struct_size(tscratch, entries, 128));
9724
9725                 tscratch = ring_buffer_meta_scratch(buf->buffer, &scratch_size);
9726                 setup_trace_scratch(tr, tscratch, scratch_size);
9727
9728                 /*
9729                  * This is basically the same as a mapped buffer,
9730                  * with the same restrictions.
9731                  */
9732                 tr->mapped++;
9733         } else {
9734                 buf->buffer = ring_buffer_alloc(size, rb_flags);
9735         }
9736         if (!buf->buffer)
9737                 return -ENOMEM;
9738
9739         buf->data = alloc_percpu(struct trace_array_cpu);
9740         if (!buf->data) {
9741                 ring_buffer_free(buf->buffer);
9742                 buf->buffer = NULL;
9743                 return -ENOMEM;
9744         }
9745
9746         /* Allocate the first page for all buffers */
9747         set_buffer_entries(&tr->array_buffer,
9748                            ring_buffer_size(tr->array_buffer.buffer, 0));
9749
9750         return 0;
9751 }
9752
9753 static void free_trace_buffer(struct array_buffer *buf)
9754 {
9755         if (buf->buffer) {
9756                 ring_buffer_free(buf->buffer);
9757                 buf->buffer = NULL;
9758                 free_percpu(buf->data);
9759                 buf->data = NULL;
9760         }
9761 }
9762
9763 static int allocate_trace_buffers(struct trace_array *tr, int size)
9764 {
9765         int ret;
9766
9767         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9768         if (ret)
9769                 return ret;
9770
9771 #ifdef CONFIG_TRACER_MAX_TRACE
9772         /* Fix mapped buffer trace arrays do not have snapshot buffers */
9773         if (tr->range_addr_start)
9774                 return 0;
9775
9776         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9777                                     allocate_snapshot ? size : 1);
9778         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9779                 free_trace_buffer(&tr->array_buffer);
9780                 return -ENOMEM;
9781         }
9782         tr->allocated_snapshot = allocate_snapshot;
9783
9784         allocate_snapshot = false;
9785 #endif
9786
9787         return 0;
9788 }
9789
9790 static void free_trace_buffers(struct trace_array *tr)
9791 {
9792         if (!tr)
9793                 return;
9794
9795         free_trace_buffer(&tr->array_buffer);
9796         kfree(tr->module_delta);
9797
9798 #ifdef CONFIG_TRACER_MAX_TRACE
9799         free_trace_buffer(&tr->max_buffer);
9800 #endif
9801 }
9802
9803 static void init_trace_flags_index(struct trace_array *tr)
9804 {
9805         int i;
9806
9807         /* Used by the trace options files */
9808         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9809                 tr->trace_flags_index[i] = i;
9810 }
9811
9812 static void __update_tracer_options(struct trace_array *tr)
9813 {
9814         struct tracer *t;
9815
9816         for (t = trace_types; t; t = t->next)
9817                 add_tracer_options(tr, t);
9818 }
9819
9820 static void update_tracer_options(struct trace_array *tr)
9821 {
9822         mutex_lock(&trace_types_lock);
9823         tracer_options_updated = true;
9824         __update_tracer_options(tr);
9825         mutex_unlock(&trace_types_lock);
9826 }
9827
9828 /* Must have trace_types_lock held */
9829 struct trace_array *trace_array_find(const char *instance)
9830 {
9831         struct trace_array *tr, *found = NULL;
9832
9833         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9834                 if (tr->name && strcmp(tr->name, instance) == 0) {
9835                         found = tr;
9836                         break;
9837                 }
9838         }
9839
9840         return found;
9841 }
9842
9843 struct trace_array *trace_array_find_get(const char *instance)
9844 {
9845         struct trace_array *tr;
9846
9847         mutex_lock(&trace_types_lock);
9848         tr = trace_array_find(instance);
9849         if (tr)
9850                 tr->ref++;
9851         mutex_unlock(&trace_types_lock);
9852
9853         return tr;
9854 }
9855
9856 static int trace_array_create_dir(struct trace_array *tr)
9857 {
9858         int ret;
9859
9860         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9861         if (!tr->dir)
9862                 return -EINVAL;
9863
9864         ret = event_trace_add_tracer(tr->dir, tr);
9865         if (ret) {
9866                 tracefs_remove(tr->dir);
9867                 return ret;
9868         }
9869
9870         init_tracer_tracefs(tr, tr->dir);
9871         __update_tracer_options(tr);
9872
9873         return ret;
9874 }
9875
9876 static struct trace_array *
9877 trace_array_create_systems(const char *name, const char *systems,
9878                            unsigned long range_addr_start,
9879                            unsigned long range_addr_size)
9880 {
9881         struct trace_array *tr;
9882         int ret;
9883
9884         ret = -ENOMEM;
9885         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9886         if (!tr)
9887                 return ERR_PTR(ret);
9888
9889         tr->name = kstrdup(name, GFP_KERNEL);
9890         if (!tr->name)
9891                 goto out_free_tr;
9892
9893         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9894                 goto out_free_tr;
9895
9896         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9897                 goto out_free_tr;
9898
9899         if (systems) {
9900                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9901                 if (!tr->system_names)
9902                         goto out_free_tr;
9903         }
9904
9905         /* Only for boot up memory mapped ring buffers */
9906         tr->range_addr_start = range_addr_start;
9907         tr->range_addr_size = range_addr_size;
9908
9909         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9910
9911         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9912
9913         raw_spin_lock_init(&tr->start_lock);
9914
9915         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9916 #ifdef CONFIG_TRACER_MAX_TRACE
9917         spin_lock_init(&tr->snapshot_trigger_lock);
9918 #endif
9919         tr->current_trace = &nop_trace;
9920
9921         INIT_LIST_HEAD(&tr->systems);
9922         INIT_LIST_HEAD(&tr->events);
9923         INIT_LIST_HEAD(&tr->hist_vars);
9924         INIT_LIST_HEAD(&tr->err_log);
9925         INIT_LIST_HEAD(&tr->marker_list);
9926
9927 #ifdef CONFIG_MODULES
9928         INIT_LIST_HEAD(&tr->mod_events);
9929 #endif
9930
9931         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9932                 goto out_free_tr;
9933
9934         /* The ring buffer is defaultly expanded */
9935         trace_set_ring_buffer_expanded(tr);
9936
9937         if (ftrace_allocate_ftrace_ops(tr) < 0)
9938                 goto out_free_tr;
9939
9940         ftrace_init_trace_array(tr);
9941
9942         init_trace_flags_index(tr);
9943
9944         if (trace_instance_dir) {
9945                 ret = trace_array_create_dir(tr);
9946                 if (ret)
9947                         goto out_free_tr;
9948         } else
9949                 __trace_early_add_events(tr);
9950
9951         list_add(&tr->list, &ftrace_trace_arrays);
9952
9953         tr->ref++;
9954
9955         return tr;
9956
9957  out_free_tr:
9958         ftrace_free_ftrace_ops(tr);
9959         free_trace_buffers(tr);
9960         free_cpumask_var(tr->pipe_cpumask);
9961         free_cpumask_var(tr->tracing_cpumask);
9962         kfree_const(tr->system_names);
9963         kfree(tr->range_name);
9964         kfree(tr->name);
9965         kfree(tr);
9966
9967         return ERR_PTR(ret);
9968 }
9969
9970 static struct trace_array *trace_array_create(const char *name)
9971 {
9972         return trace_array_create_systems(name, NULL, 0, 0);
9973 }
9974
9975 static int instance_mkdir(const char *name)
9976 {
9977         struct trace_array *tr;
9978         int ret;
9979
9980         guard(mutex)(&event_mutex);
9981         guard(mutex)(&trace_types_lock);
9982
9983         ret = -EEXIST;
9984         if (trace_array_find(name))
9985                 return -EEXIST;
9986
9987         tr = trace_array_create(name);
9988
9989         ret = PTR_ERR_OR_ZERO(tr);
9990
9991         return ret;
9992 }
9993
9994 #ifdef CONFIG_MMU
9995 static u64 map_pages(unsigned long start, unsigned long size)
9996 {
9997         unsigned long vmap_start, vmap_end;
9998         struct vm_struct *area;
9999         int ret;
10000
10001         area = get_vm_area(size, VM_IOREMAP);
10002         if (!area)
10003                 return 0;
10004
10005         vmap_start = (unsigned long) area->addr;
10006         vmap_end = vmap_start + size;
10007
10008         ret = vmap_page_range(vmap_start, vmap_end,
10009                               start, pgprot_nx(PAGE_KERNEL));
10010         if (ret < 0) {
10011                 free_vm_area(area);
10012                 return 0;
10013         }
10014
10015         return (u64)vmap_start;
10016 }
10017 #else
10018 static inline u64 map_pages(unsigned long start, unsigned long size)
10019 {
10020         return 0;
10021 }
10022 #endif
10023
10024 /**
10025  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
10026  * @name: The name of the trace array to be looked up/created.
10027  * @systems: A list of systems to create event directories for (NULL for all)
10028  *
10029  * Returns pointer to trace array with given name.
10030  * NULL, if it cannot be created.
10031  *
10032  * NOTE: This function increments the reference counter associated with the
10033  * trace array returned. This makes sure it cannot be freed while in use.
10034  * Use trace_array_put() once the trace array is no longer needed.
10035  * If the trace_array is to be freed, trace_array_destroy() needs to
10036  * be called after the trace_array_put(), or simply let user space delete
10037  * it from the tracefs instances directory. But until the
10038  * trace_array_put() is called, user space can not delete it.
10039  *
10040  */
10041 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
10042 {
10043         struct trace_array *tr;
10044
10045         guard(mutex)(&event_mutex);
10046         guard(mutex)(&trace_types_lock);
10047
10048         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10049                 if (tr->name && strcmp(tr->name, name) == 0) {
10050                         tr->ref++;
10051                         return tr;
10052                 }
10053         }
10054
10055         tr = trace_array_create_systems(name, systems, 0, 0);
10056
10057         if (IS_ERR(tr))
10058                 tr = NULL;
10059         else
10060                 tr->ref++;
10061
10062         return tr;
10063 }
10064 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
10065
10066 static int __remove_instance(struct trace_array *tr)
10067 {
10068         int i;
10069
10070         /* Reference counter for a newly created trace array = 1. */
10071         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
10072                 return -EBUSY;
10073
10074         list_del(&tr->list);
10075
10076         /* Disable all the flags that were enabled coming in */
10077         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
10078                 if ((1 << i) & ZEROED_TRACE_FLAGS)
10079                         set_tracer_flag(tr, 1 << i, 0);
10080         }
10081
10082         if (printk_trace == tr)
10083                 update_printk_trace(&global_trace);
10084
10085         if (update_marker_trace(tr, 0))
10086                 synchronize_rcu();
10087
10088         tracing_set_nop(tr);
10089         clear_ftrace_function_probes(tr);
10090         event_trace_del_tracer(tr);
10091         ftrace_clear_pids(tr);
10092         ftrace_destroy_function_files(tr);
10093         tracefs_remove(tr->dir);
10094         free_percpu(tr->last_func_repeats);
10095         free_trace_buffers(tr);
10096         clear_tracing_err_log(tr);
10097
10098         if (tr->range_name) {
10099                 reserve_mem_release_by_name(tr->range_name);
10100                 kfree(tr->range_name);
10101         }
10102
10103         for (i = 0; i < tr->nr_topts; i++) {
10104                 kfree(tr->topts[i].topts);
10105         }
10106         kfree(tr->topts);
10107
10108         free_cpumask_var(tr->pipe_cpumask);
10109         free_cpumask_var(tr->tracing_cpumask);
10110         kfree_const(tr->system_names);
10111         kfree(tr->name);
10112         kfree(tr);
10113
10114         return 0;
10115 }
10116
10117 int trace_array_destroy(struct trace_array *this_tr)
10118 {
10119         struct trace_array *tr;
10120
10121         if (!this_tr)
10122                 return -EINVAL;
10123
10124         guard(mutex)(&event_mutex);
10125         guard(mutex)(&trace_types_lock);
10126
10127
10128         /* Making sure trace array exists before destroying it. */
10129         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10130                 if (tr == this_tr)
10131                         return __remove_instance(tr);
10132         }
10133
10134         return -ENODEV;
10135 }
10136 EXPORT_SYMBOL_GPL(trace_array_destroy);
10137
10138 static int instance_rmdir(const char *name)
10139 {
10140         struct trace_array *tr;
10141
10142         guard(mutex)(&event_mutex);
10143         guard(mutex)(&trace_types_lock);
10144
10145         tr = trace_array_find(name);
10146         if (!tr)
10147                 return -ENODEV;
10148
10149         return __remove_instance(tr);
10150 }
10151
10152 static __init void create_trace_instances(struct dentry *d_tracer)
10153 {
10154         struct trace_array *tr;
10155
10156         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
10157                                                          instance_mkdir,
10158                                                          instance_rmdir);
10159         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
10160                 return;
10161
10162         guard(mutex)(&event_mutex);
10163         guard(mutex)(&trace_types_lock);
10164
10165         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10166                 if (!tr->name)
10167                         continue;
10168                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
10169                              "Failed to create instance directory\n"))
10170                         return;
10171         }
10172 }
10173
10174 static void
10175 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
10176 {
10177         int cpu;
10178
10179         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
10180                         tr, &show_traces_fops);
10181
10182         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
10183                         tr, &set_tracer_fops);
10184
10185         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
10186                           tr, &tracing_cpumask_fops);
10187
10188         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
10189                           tr, &tracing_iter_fops);
10190
10191         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
10192                           tr, &tracing_fops);
10193
10194         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
10195                           tr, &tracing_pipe_fops);
10196
10197         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
10198                           tr, &tracing_entries_fops);
10199
10200         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
10201                           tr, &tracing_total_entries_fops);
10202
10203         trace_create_file("free_buffer", 0200, d_tracer,
10204                           tr, &tracing_free_buffer_fops);
10205
10206         trace_create_file("trace_marker", 0220, d_tracer,
10207                           tr, &tracing_mark_fops);
10208
10209         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
10210
10211         trace_create_file("trace_marker_raw", 0220, d_tracer,
10212                           tr, &tracing_mark_raw_fops);
10213
10214         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
10215                           &trace_clock_fops);
10216
10217         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
10218                           tr, &rb_simple_fops);
10219
10220         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
10221                           &trace_time_stamp_mode_fops);
10222
10223         tr->buffer_percent = 50;
10224
10225         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
10226                         tr, &buffer_percent_fops);
10227
10228         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
10229                           tr, &buffer_subbuf_size_fops);
10230
10231         create_trace_options_dir(tr);
10232
10233 #ifdef CONFIG_TRACER_MAX_TRACE
10234         trace_create_maxlat_file(tr, d_tracer);
10235 #endif
10236
10237         if (ftrace_create_function_files(tr, d_tracer))
10238                 MEM_FAIL(1, "Could not allocate function filter files");
10239
10240         if (tr->range_addr_start) {
10241                 trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
10242                                   tr, &last_boot_fops);
10243 #ifdef CONFIG_TRACER_SNAPSHOT
10244         } else {
10245                 trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10246                                   tr, &snapshot_fops);
10247 #endif
10248         }
10249
10250         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10251                           tr, &tracing_err_log_fops);
10252
10253         for_each_tracing_cpu(cpu)
10254                 tracing_init_tracefs_percpu(tr, cpu);
10255
10256         ftrace_init_tracefs(tr, d_tracer);
10257 }
10258
10259 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10260 {
10261         struct vfsmount *mnt;
10262         struct file_system_type *type;
10263         struct fs_context *fc;
10264         int ret;
10265
10266         /*
10267          * To maintain backward compatibility for tools that mount
10268          * debugfs to get to the tracing facility, tracefs is automatically
10269          * mounted to the debugfs/tracing directory.
10270          */
10271         type = get_fs_type("tracefs");
10272         if (!type)
10273                 return NULL;
10274
10275         fc = fs_context_for_submount(type, mntpt);
10276         put_filesystem(type);
10277         if (IS_ERR(fc))
10278                 return ERR_CAST(fc);
10279
10280         ret = vfs_parse_fs_string(fc, "source",
10281                                   "tracefs", strlen("tracefs"));
10282         if (!ret)
10283                 mnt = fc_mount(fc);
10284         else
10285                 mnt = ERR_PTR(ret);
10286
10287         put_fs_context(fc);
10288         return mnt;
10289 }
10290
10291 /**
10292  * tracing_init_dentry - initialize top level trace array
10293  *
10294  * This is called when creating files or directories in the tracing
10295  * directory. It is called via fs_initcall() by any of the boot up code
10296  * and expects to return the dentry of the top level tracing directory.
10297  */
10298 int tracing_init_dentry(void)
10299 {
10300         struct trace_array *tr = &global_trace;
10301
10302         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10303                 pr_warn("Tracing disabled due to lockdown\n");
10304                 return -EPERM;
10305         }
10306
10307         /* The top level trace array uses  NULL as parent */
10308         if (tr->dir)
10309                 return 0;
10310
10311         if (WARN_ON(!tracefs_initialized()))
10312                 return -ENODEV;
10313
10314         /*
10315          * As there may still be users that expect the tracing
10316          * files to exist in debugfs/tracing, we must automount
10317          * the tracefs file system there, so older tools still
10318          * work with the newer kernel.
10319          */
10320         tr->dir = debugfs_create_automount("tracing", NULL,
10321                                            trace_automount, NULL);
10322
10323         return 0;
10324 }
10325
10326 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10327 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10328
10329 static struct workqueue_struct *eval_map_wq __initdata;
10330 static struct work_struct eval_map_work __initdata;
10331 static struct work_struct tracerfs_init_work __initdata;
10332
10333 static void __init eval_map_work_func(struct work_struct *work)
10334 {
10335         int len;
10336
10337         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10338         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10339 }
10340
10341 static int __init trace_eval_init(void)
10342 {
10343         INIT_WORK(&eval_map_work, eval_map_work_func);
10344
10345         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10346         if (!eval_map_wq) {
10347                 pr_err("Unable to allocate eval_map_wq\n");
10348                 /* Do work here */
10349                 eval_map_work_func(&eval_map_work);
10350                 return -ENOMEM;
10351         }
10352
10353         queue_work(eval_map_wq, &eval_map_work);
10354         return 0;
10355 }
10356
10357 subsys_initcall(trace_eval_init);
10358
10359 static int __init trace_eval_sync(void)
10360 {
10361         /* Make sure the eval map updates are finished */
10362         if (eval_map_wq)
10363                 destroy_workqueue(eval_map_wq);
10364         return 0;
10365 }
10366
10367 late_initcall_sync(trace_eval_sync);
10368
10369
10370 #ifdef CONFIG_MODULES
10371
10372 bool module_exists(const char *module)
10373 {
10374         /* All modules have the symbol __this_module */
10375         static const char this_mod[] = "__this_module";
10376         char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
10377         unsigned long val;
10378         int n;
10379
10380         n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
10381
10382         if (n > sizeof(modname) - 1)
10383                 return false;
10384
10385         val = module_kallsyms_lookup_name(modname);
10386         return val != 0;
10387 }
10388
10389 static void trace_module_add_evals(struct module *mod)
10390 {
10391         if (!mod->num_trace_evals)
10392                 return;
10393
10394         /*
10395          * Modules with bad taint do not have events created, do
10396          * not bother with enums either.
10397          */
10398         if (trace_module_has_bad_taint(mod))
10399                 return;
10400
10401         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10402 }
10403
10404 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10405 static void trace_module_remove_evals(struct module *mod)
10406 {
10407         union trace_eval_map_item *map;
10408         union trace_eval_map_item **last = &trace_eval_maps;
10409
10410         if (!mod->num_trace_evals)
10411                 return;
10412
10413         guard(mutex)(&trace_eval_mutex);
10414
10415         map = trace_eval_maps;
10416
10417         while (map) {
10418                 if (map->head.mod == mod)
10419                         break;
10420                 map = trace_eval_jmp_to_tail(map);
10421                 last = &map->tail.next;
10422                 map = map->tail.next;
10423         }
10424         if (!map)
10425                 return;
10426
10427         *last = trace_eval_jmp_to_tail(map)->tail.next;
10428         kfree(map);
10429 }
10430 #else
10431 static inline void trace_module_remove_evals(struct module *mod) { }
10432 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10433
10434 static void trace_module_record(struct module *mod, bool add)
10435 {
10436         struct trace_array *tr;
10437         unsigned long flags;
10438
10439         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10440                 flags = tr->flags & (TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT);
10441                 /* Update any persistent trace array that has already been started */
10442                 if (flags == TRACE_ARRAY_FL_BOOT && add) {
10443                         guard(mutex)(&scratch_mutex);
10444                         save_mod(mod, tr);
10445                 } else if (flags & TRACE_ARRAY_FL_LAST_BOOT) {
10446                         /* Update delta if the module loaded in previous boot */
10447                         make_mod_delta(mod, tr);
10448                 }
10449         }
10450 }
10451
10452 static int trace_module_notify(struct notifier_block *self,
10453                                unsigned long val, void *data)
10454 {
10455         struct module *mod = data;
10456
10457         switch (val) {
10458         case MODULE_STATE_COMING:
10459                 trace_module_add_evals(mod);
10460                 trace_module_record(mod, true);
10461                 break;
10462         case MODULE_STATE_GOING:
10463                 trace_module_remove_evals(mod);
10464                 trace_module_record(mod, false);
10465                 break;
10466         }
10467
10468         return NOTIFY_OK;
10469 }
10470
10471 static struct notifier_block trace_module_nb = {
10472         .notifier_call = trace_module_notify,
10473         .priority = 0,
10474 };
10475 #endif /* CONFIG_MODULES */
10476
10477 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10478 {
10479
10480         event_trace_init();
10481
10482         init_tracer_tracefs(&global_trace, NULL);
10483         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10484
10485         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10486                         &global_trace, &tracing_thresh_fops);
10487
10488         trace_create_file("README", TRACE_MODE_READ, NULL,
10489                         NULL, &tracing_readme_fops);
10490
10491         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10492                         NULL, &tracing_saved_cmdlines_fops);
10493
10494         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10495                           NULL, &tracing_saved_cmdlines_size_fops);
10496
10497         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10498                         NULL, &tracing_saved_tgids_fops);
10499
10500         trace_create_eval_file(NULL);
10501
10502 #ifdef CONFIG_MODULES
10503         register_module_notifier(&trace_module_nb);
10504 #endif
10505
10506 #ifdef CONFIG_DYNAMIC_FTRACE
10507         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10508                         NULL, &tracing_dyn_info_fops);
10509 #endif
10510
10511         create_trace_instances(NULL);
10512
10513         update_tracer_options(&global_trace);
10514 }
10515
10516 static __init int tracer_init_tracefs(void)
10517 {
10518         int ret;
10519
10520         trace_access_lock_init();
10521
10522         ret = tracing_init_dentry();
10523         if (ret)
10524                 return 0;
10525
10526         if (eval_map_wq) {
10527                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10528                 queue_work(eval_map_wq, &tracerfs_init_work);
10529         } else {
10530                 tracer_init_tracefs_work_func(NULL);
10531         }
10532
10533         rv_init_interface();
10534
10535         return 0;
10536 }
10537
10538 fs_initcall(tracer_init_tracefs);
10539
10540 static int trace_die_panic_handler(struct notifier_block *self,
10541                                 unsigned long ev, void *unused);
10542
10543 static struct notifier_block trace_panic_notifier = {
10544         .notifier_call = trace_die_panic_handler,
10545         .priority = INT_MAX - 1,
10546 };
10547
10548 static struct notifier_block trace_die_notifier = {
10549         .notifier_call = trace_die_panic_handler,
10550         .priority = INT_MAX - 1,
10551 };
10552
10553 /*
10554  * The idea is to execute the following die/panic callback early, in order
10555  * to avoid showing irrelevant information in the trace (like other panic
10556  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10557  * warnings get disabled (to prevent potential log flooding).
10558  */
10559 static int trace_die_panic_handler(struct notifier_block *self,
10560                                 unsigned long ev, void *unused)
10561 {
10562         if (!ftrace_dump_on_oops_enabled())
10563                 return NOTIFY_DONE;
10564
10565         /* The die notifier requires DIE_OOPS to trigger */
10566         if (self == &trace_die_notifier && ev != DIE_OOPS)
10567                 return NOTIFY_DONE;
10568
10569         ftrace_dump(DUMP_PARAM);
10570
10571         return NOTIFY_DONE;
10572 }
10573
10574 /*
10575  * printk is set to max of 1024, we really don't need it that big.
10576  * Nothing should be printing 1000 characters anyway.
10577  */
10578 #define TRACE_MAX_PRINT         1000
10579
10580 /*
10581  * Define here KERN_TRACE so that we have one place to modify
10582  * it if we decide to change what log level the ftrace dump
10583  * should be at.
10584  */
10585 #define KERN_TRACE              KERN_EMERG
10586
10587 void
10588 trace_printk_seq(struct trace_seq *s)
10589 {
10590         /* Probably should print a warning here. */
10591         if (s->seq.len >= TRACE_MAX_PRINT)
10592                 s->seq.len = TRACE_MAX_PRINT;
10593
10594         /*
10595          * More paranoid code. Although the buffer size is set to
10596          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10597          * an extra layer of protection.
10598          */
10599         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10600                 s->seq.len = s->seq.size - 1;
10601
10602         /* should be zero ended, but we are paranoid. */
10603         s->buffer[s->seq.len] = 0;
10604
10605         printk(KERN_TRACE "%s", s->buffer);
10606
10607         trace_seq_init(s);
10608 }
10609
10610 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10611 {
10612         iter->tr = tr;
10613         iter->trace = iter->tr->current_trace;
10614         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10615         iter->array_buffer = &tr->array_buffer;
10616
10617         if (iter->trace && iter->trace->open)
10618                 iter->trace->open(iter);
10619
10620         /* Annotate start of buffers if we had overruns */
10621         if (ring_buffer_overruns(iter->array_buffer->buffer))
10622                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10623
10624         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10625         if (trace_clocks[iter->tr->clock_id].in_ns)
10626                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10627
10628         /* Can not use kmalloc for iter.temp and iter.fmt */
10629         iter->temp = static_temp_buf;
10630         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10631         iter->fmt = static_fmt_buf;
10632         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10633 }
10634
10635 void trace_init_global_iter(struct trace_iterator *iter)
10636 {
10637         trace_init_iter(iter, &global_trace);
10638 }
10639
10640 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10641 {
10642         /* use static because iter can be a bit big for the stack */
10643         static struct trace_iterator iter;
10644         unsigned int old_userobj;
10645         unsigned long flags;
10646         int cnt = 0;
10647
10648         /*
10649          * Always turn off tracing when we dump.
10650          * We don't need to show trace output of what happens
10651          * between multiple crashes.
10652          *
10653          * If the user does a sysrq-z, then they can re-enable
10654          * tracing with echo 1 > tracing_on.
10655          */
10656         tracer_tracing_off(tr);
10657
10658         local_irq_save(flags);
10659
10660         /* Simulate the iterator */
10661         trace_init_iter(&iter, tr);
10662
10663         /* While dumping, do not allow the buffer to be enable */
10664         tracer_tracing_disable(tr);
10665
10666         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10667
10668         /* don't look at user memory in panic mode */
10669         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10670
10671         if (dump_mode == DUMP_ORIG)
10672                 iter.cpu_file = raw_smp_processor_id();
10673         else
10674                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10675
10676         if (tr == &global_trace)
10677                 printk(KERN_TRACE "Dumping ftrace buffer:\n");
10678         else
10679                 printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10680
10681         /* Did function tracer already get disabled? */
10682         if (ftrace_is_dead()) {
10683                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10684                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10685         }
10686
10687         /*
10688          * We need to stop all tracing on all CPUS to read
10689          * the next buffer. This is a bit expensive, but is
10690          * not done often. We fill all what we can read,
10691          * and then release the locks again.
10692          */
10693
10694         while (!trace_empty(&iter)) {
10695
10696                 if (!cnt)
10697                         printk(KERN_TRACE "---------------------------------\n");
10698
10699                 cnt++;
10700
10701                 trace_iterator_reset(&iter);
10702                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10703
10704                 if (trace_find_next_entry_inc(&iter) != NULL) {
10705                         int ret;
10706
10707                         ret = print_trace_line(&iter);
10708                         if (ret != TRACE_TYPE_NO_CONSUME)
10709                                 trace_consume(&iter);
10710                 }
10711                 touch_nmi_watchdog();
10712
10713                 trace_printk_seq(&iter.seq);
10714         }
10715
10716         if (!cnt)
10717                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10718         else
10719                 printk(KERN_TRACE "---------------------------------\n");
10720
10721         tr->trace_flags |= old_userobj;
10722
10723         tracer_tracing_enable(tr);
10724         local_irq_restore(flags);
10725 }
10726
10727 static void ftrace_dump_by_param(void)
10728 {
10729         bool first_param = true;
10730         char dump_param[MAX_TRACER_SIZE];
10731         char *buf, *token, *inst_name;
10732         struct trace_array *tr;
10733
10734         strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10735         buf = dump_param;
10736
10737         while ((token = strsep(&buf, ",")) != NULL) {
10738                 if (first_param) {
10739                         first_param = false;
10740                         if (!strcmp("0", token))
10741                                 continue;
10742                         else if (!strcmp("1", token)) {
10743                                 ftrace_dump_one(&global_trace, DUMP_ALL);
10744                                 continue;
10745                         }
10746                         else if (!strcmp("2", token) ||
10747                           !strcmp("orig_cpu", token)) {
10748                                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10749                                 continue;
10750                         }
10751                 }
10752
10753                 inst_name = strsep(&token, "=");
10754                 tr = trace_array_find(inst_name);
10755                 if (!tr) {
10756                         printk(KERN_TRACE "Instance %s not found\n", inst_name);
10757                         continue;
10758                 }
10759
10760                 if (token && (!strcmp("2", token) ||
10761                           !strcmp("orig_cpu", token)))
10762                         ftrace_dump_one(tr, DUMP_ORIG);
10763                 else
10764                         ftrace_dump_one(tr, DUMP_ALL);
10765         }
10766 }
10767
10768 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10769 {
10770         static atomic_t dump_running;
10771
10772         /* Only allow one dump user at a time. */
10773         if (atomic_inc_return(&dump_running) != 1) {
10774                 atomic_dec(&dump_running);
10775                 return;
10776         }
10777
10778         switch (oops_dump_mode) {
10779         case DUMP_ALL:
10780                 ftrace_dump_one(&global_trace, DUMP_ALL);
10781                 break;
10782         case DUMP_ORIG:
10783                 ftrace_dump_one(&global_trace, DUMP_ORIG);
10784                 break;
10785         case DUMP_PARAM:
10786                 ftrace_dump_by_param();
10787                 break;
10788         case DUMP_NONE:
10789                 break;
10790         default:
10791                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10792                 ftrace_dump_one(&global_trace, DUMP_ALL);
10793         }
10794
10795         atomic_dec(&dump_running);
10796 }
10797 EXPORT_SYMBOL_GPL(ftrace_dump);
10798
10799 #define WRITE_BUFSIZE  4096
10800
10801 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10802                                 size_t count, loff_t *ppos,
10803                                 int (*createfn)(const char *))
10804 {
10805         char *kbuf, *buf, *tmp;
10806         int ret = 0;
10807         size_t done = 0;
10808         size_t size;
10809
10810         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10811         if (!kbuf)
10812                 return -ENOMEM;
10813
10814         while (done < count) {
10815                 size = count - done;
10816
10817                 if (size >= WRITE_BUFSIZE)
10818                         size = WRITE_BUFSIZE - 1;
10819
10820                 if (copy_from_user(kbuf, buffer + done, size)) {
10821                         ret = -EFAULT;
10822                         goto out;
10823                 }
10824                 kbuf[size] = '\0';
10825                 buf = kbuf;
10826                 do {
10827                         tmp = strchr(buf, '\n');
10828                         if (tmp) {
10829                                 *tmp = '\0';
10830                                 size = tmp - buf + 1;
10831                         } else {
10832                                 size = strlen(buf);
10833                                 if (done + size < count) {
10834                                         if (buf != kbuf)
10835                                                 break;
10836                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10837                                         pr_warn("Line length is too long: Should be less than %d\n",
10838                                                 WRITE_BUFSIZE - 2);
10839                                         ret = -EINVAL;
10840                                         goto out;
10841                                 }
10842                         }
10843                         done += size;
10844
10845                         /* Remove comments */
10846                         tmp = strchr(buf, '#');
10847
10848                         if (tmp)
10849                                 *tmp = '\0';
10850
10851                         ret = createfn(buf);
10852                         if (ret)
10853                                 goto out;
10854                         buf += size;
10855
10856                 } while (done < count);
10857         }
10858         ret = done;
10859
10860 out:
10861         kfree(kbuf);
10862
10863         return ret;
10864 }
10865
10866 #ifdef CONFIG_TRACER_MAX_TRACE
10867 __init static bool tr_needs_alloc_snapshot(const char *name)
10868 {
10869         char *test;
10870         int len = strlen(name);
10871         bool ret;
10872
10873         if (!boot_snapshot_index)
10874                 return false;
10875
10876         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10877             boot_snapshot_info[len] == '\t')
10878                 return true;
10879
10880         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10881         if (!test)
10882                 return false;
10883
10884         sprintf(test, "\t%s\t", name);
10885         ret = strstr(boot_snapshot_info, test) == NULL;
10886         kfree(test);
10887         return ret;
10888 }
10889
10890 __init static void do_allocate_snapshot(const char *name)
10891 {
10892         if (!tr_needs_alloc_snapshot(name))
10893                 return;
10894
10895         /*
10896          * When allocate_snapshot is set, the next call to
10897          * allocate_trace_buffers() (called by trace_array_get_by_name())
10898          * will allocate the snapshot buffer. That will alse clear
10899          * this flag.
10900          */
10901         allocate_snapshot = true;
10902 }
10903 #else
10904 static inline void do_allocate_snapshot(const char *name) { }
10905 #endif
10906
10907 __init static void enable_instances(void)
10908 {
10909         struct trace_array *tr;
10910         bool memmap_area = false;
10911         char *curr_str;
10912         char *name;
10913         char *str;
10914         char *tok;
10915
10916         /* A tab is always appended */
10917         boot_instance_info[boot_instance_index - 1] = '\0';
10918         str = boot_instance_info;
10919
10920         while ((curr_str = strsep(&str, "\t"))) {
10921                 phys_addr_t start = 0;
10922                 phys_addr_t size = 0;
10923                 unsigned long addr = 0;
10924                 bool traceprintk = false;
10925                 bool traceoff = false;
10926                 char *flag_delim;
10927                 char *addr_delim;
10928                 char *rname __free(kfree) = NULL;
10929
10930                 tok = strsep(&curr_str, ",");
10931
10932                 flag_delim = strchr(tok, '^');
10933                 addr_delim = strchr(tok, '@');
10934
10935                 if (addr_delim)
10936                         *addr_delim++ = '\0';
10937
10938                 if (flag_delim)
10939                         *flag_delim++ = '\0';
10940
10941                 name = tok;
10942
10943                 if (flag_delim) {
10944                         char *flag;
10945
10946                         while ((flag = strsep(&flag_delim, "^"))) {
10947                                 if (strcmp(flag, "traceoff") == 0) {
10948                                         traceoff = true;
10949                                 } else if ((strcmp(flag, "printk") == 0) ||
10950                                            (strcmp(flag, "traceprintk") == 0) ||
10951                                            (strcmp(flag, "trace_printk") == 0)) {
10952                                         traceprintk = true;
10953                                 } else {
10954                                         pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10955                                                 flag, name);
10956                                 }
10957                         }
10958                 }
10959
10960                 tok = addr_delim;
10961                 if (tok && isdigit(*tok)) {
10962                         start = memparse(tok, &tok);
10963                         if (!start) {
10964                                 pr_warn("Tracing: Invalid boot instance address for %s\n",
10965                                         name);
10966                                 continue;
10967                         }
10968                         if (*tok != ':') {
10969                                 pr_warn("Tracing: No size specified for instance %s\n", name);
10970                                 continue;
10971                         }
10972                         tok++;
10973                         size = memparse(tok, &tok);
10974                         if (!size) {
10975                                 pr_warn("Tracing: Invalid boot instance size for %s\n",
10976                                         name);
10977                                 continue;
10978                         }
10979                         memmap_area = true;
10980                 } else if (tok) {
10981                         if (!reserve_mem_find_by_name(tok, &start, &size)) {
10982                                 start = 0;
10983                                 pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10984                                 continue;
10985                         }
10986                         rname = kstrdup(tok, GFP_KERNEL);
10987                 }
10988
10989                 if (start) {
10990                         /* Start and size must be page aligned */
10991                         if (start & ~PAGE_MASK) {
10992                                 pr_warn("Tracing: mapping start addr %pa is not page aligned\n", &start);
10993                                 continue;
10994                         }
10995                         if (size & ~PAGE_MASK) {
10996                                 pr_warn("Tracing: mapping size %pa is not page aligned\n", &size);
10997                                 continue;
10998                         }
10999
11000                         if (memmap_area)
11001                                 addr = map_pages(start, size);
11002                         else
11003                                 addr = (unsigned long)phys_to_virt(start);
11004                         if (addr) {
11005                                 pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
11006                                         name, &start, (unsigned long)size);
11007                         } else {
11008                                 pr_warn("Tracing: Failed to map boot instance %s\n", name);
11009                                 continue;
11010                         }
11011                 } else {
11012                         /* Only non mapped buffers have snapshot buffers */
11013                         if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
11014                                 do_allocate_snapshot(name);
11015                 }
11016
11017                 tr = trace_array_create_systems(name, NULL, addr, size);
11018                 if (IS_ERR(tr)) {
11019                         pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
11020                         continue;
11021                 }
11022
11023                 if (traceoff)
11024                         tracer_tracing_off(tr);
11025
11026                 if (traceprintk)
11027                         update_printk_trace(tr);
11028
11029                 /*
11030                  * memmap'd buffers can not be freed.
11031                  */
11032                 if (memmap_area) {
11033                         tr->flags |= TRACE_ARRAY_FL_MEMMAP;
11034                         tr->ref++;
11035                 }
11036
11037                 if (start) {
11038                         tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
11039                         tr->range_name = no_free_ptr(rname);
11040                 }
11041
11042                 while ((tok = strsep(&curr_str, ","))) {
11043                         early_enable_events(tr, tok, true);
11044                 }
11045         }
11046 }
11047
11048 __init static int tracer_alloc_buffers(void)
11049 {
11050         int ring_buf_size;
11051         int ret = -ENOMEM;
11052
11053
11054         if (security_locked_down(LOCKDOWN_TRACEFS)) {
11055                 pr_warn("Tracing disabled due to lockdown\n");
11056                 return -EPERM;
11057         }
11058
11059         /*
11060          * Make sure we don't accidentally add more trace options
11061          * than we have bits for.
11062          */
11063         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
11064
11065         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
11066                 goto out;
11067
11068         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
11069                 goto out_free_buffer_mask;
11070
11071         /* Only allocate trace_printk buffers if a trace_printk exists */
11072         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
11073                 /* Must be called before global_trace.buffer is allocated */
11074                 trace_printk_init_buffers();
11075
11076         /* To save memory, keep the ring buffer size to its minimum */
11077         if (global_trace.ring_buffer_expanded)
11078                 ring_buf_size = trace_buf_size;
11079         else
11080                 ring_buf_size = 1;
11081
11082         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
11083         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
11084
11085         raw_spin_lock_init(&global_trace.start_lock);
11086
11087         /*
11088          * The prepare callbacks allocates some memory for the ring buffer. We
11089          * don't free the buffer if the CPU goes down. If we were to free
11090          * the buffer, then the user would lose any trace that was in the
11091          * buffer. The memory will be removed once the "instance" is removed.
11092          */
11093         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
11094                                       "trace/RB:prepare", trace_rb_cpu_prepare,
11095                                       NULL);
11096         if (ret < 0)
11097                 goto out_free_cpumask;
11098         /* Used for event triggers */
11099         ret = -ENOMEM;
11100         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
11101         if (!temp_buffer)
11102                 goto out_rm_hp_state;
11103
11104         if (trace_create_savedcmd() < 0)
11105                 goto out_free_temp_buffer;
11106
11107         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
11108                 goto out_free_savedcmd;
11109
11110         /* TODO: make the number of buffers hot pluggable with CPUS */
11111         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
11112                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
11113                 goto out_free_pipe_cpumask;
11114         }
11115         if (global_trace.buffer_disabled)
11116                 tracing_off();
11117
11118         if (trace_boot_clock) {
11119                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
11120                 if (ret < 0)
11121                         pr_warn("Trace clock %s not defined, going back to default\n",
11122                                 trace_boot_clock);
11123         }
11124
11125         /*
11126          * register_tracer() might reference current_trace, so it
11127          * needs to be set before we register anything. This is
11128          * just a bootstrap of current_trace anyway.
11129          */
11130         global_trace.current_trace = &nop_trace;
11131
11132         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
11133 #ifdef CONFIG_TRACER_MAX_TRACE
11134         spin_lock_init(&global_trace.snapshot_trigger_lock);
11135 #endif
11136         ftrace_init_global_array_ops(&global_trace);
11137
11138 #ifdef CONFIG_MODULES
11139         INIT_LIST_HEAD(&global_trace.mod_events);
11140 #endif
11141
11142         init_trace_flags_index(&global_trace);
11143
11144         register_tracer(&nop_trace);
11145
11146         /* Function tracing may start here (via kernel command line) */
11147         init_function_trace();
11148
11149         /* All seems OK, enable tracing */
11150         tracing_disabled = 0;
11151
11152         atomic_notifier_chain_register(&panic_notifier_list,
11153                                        &trace_panic_notifier);
11154
11155         register_die_notifier(&trace_die_notifier);
11156
11157         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
11158
11159         INIT_LIST_HEAD(&global_trace.systems);
11160         INIT_LIST_HEAD(&global_trace.events);
11161         INIT_LIST_HEAD(&global_trace.hist_vars);
11162         INIT_LIST_HEAD(&global_trace.err_log);
11163         list_add(&global_trace.marker_list, &marker_copies);
11164         list_add(&global_trace.list, &ftrace_trace_arrays);
11165
11166         apply_trace_boot_options();
11167
11168         register_snapshot_cmd();
11169
11170         return 0;
11171
11172 out_free_pipe_cpumask:
11173         free_cpumask_var(global_trace.pipe_cpumask);
11174 out_free_savedcmd:
11175         trace_free_saved_cmdlines_buffer();
11176 out_free_temp_buffer:
11177         ring_buffer_free(temp_buffer);
11178 out_rm_hp_state:
11179         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
11180 out_free_cpumask:
11181         free_cpumask_var(global_trace.tracing_cpumask);
11182 out_free_buffer_mask:
11183         free_cpumask_var(tracing_buffer_mask);
11184 out:
11185         return ret;
11186 }
11187
11188 #ifdef CONFIG_FUNCTION_TRACER
11189 /* Used to set module cached ftrace filtering at boot up */
11190 __init struct trace_array *trace_get_global_array(void)
11191 {
11192         return &global_trace;
11193 }
11194 #endif
11195
11196 void __init ftrace_boot_snapshot(void)
11197 {
11198 #ifdef CONFIG_TRACER_MAX_TRACE
11199         struct trace_array *tr;
11200
11201         if (!snapshot_at_boot)
11202                 return;
11203
11204         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
11205                 if (!tr->allocated_snapshot)
11206                         continue;
11207
11208                 tracing_snapshot_instance(tr);
11209                 trace_array_puts(tr, "** Boot snapshot taken **\n");
11210         }
11211 #endif
11212 }
11213
11214 void __init early_trace_init(void)
11215 {
11216         if (tracepoint_printk) {
11217                 tracepoint_print_iter =
11218                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
11219                 if (MEM_FAIL(!tracepoint_print_iter,
11220                              "Failed to allocate trace iterator\n"))
11221                         tracepoint_printk = 0;
11222                 else
11223                         static_key_enable(&tracepoint_printk_key.key);
11224         }
11225         tracer_alloc_buffers();
11226
11227         init_events();
11228 }
11229
11230 void __init trace_init(void)
11231 {
11232         trace_event_init();
11233
11234         if (boot_instance_index)
11235                 enable_instances();
11236 }
11237
11238 __init static void clear_boot_tracer(void)
11239 {
11240         /*
11241          * The default tracer at boot buffer is an init section.
11242          * This function is called in lateinit. If we did not
11243          * find the boot tracer, then clear it out, to prevent
11244          * later registration from accessing the buffer that is
11245          * about to be freed.
11246          */
11247         if (!default_bootup_tracer)
11248                 return;
11249
11250         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
11251                default_bootup_tracer);
11252         default_bootup_tracer = NULL;
11253 }
11254
11255 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
11256 __init static void tracing_set_default_clock(void)
11257 {
11258         /* sched_clock_stable() is determined in late_initcall */
11259         if (!trace_boot_clock && !sched_clock_stable()) {
11260                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
11261                         pr_warn("Can not set tracing clock due to lockdown\n");
11262                         return;
11263                 }
11264
11265                 printk(KERN_WARNING
11266                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
11267                        "If you want to keep using the local clock, then add:\n"
11268                        "  \"trace_clock=local\"\n"
11269                        "on the kernel command line\n");
11270                 tracing_set_clock(&global_trace, "global");
11271         }
11272 }
11273 #else
11274 static inline void tracing_set_default_clock(void) { }
11275 #endif
11276
11277 __init static int late_trace_init(void)
11278 {
11279         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
11280                 static_key_disable(&tracepoint_printk_key.key);
11281                 tracepoint_printk = 0;
11282         }
11283
11284         if (traceoff_after_boot)
11285                 tracing_off();
11286
11287         tracing_set_default_clock();
11288         clear_boot_tracer();
11289         return 0;
11290 }
11291
11292 late_initcall_sync(late_trace_init);