tracing: Update subbuffer with kilobytes not page order
[linux-2.6-block.git] / kernel / trace / trace.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53
54 #include "trace.h"
55 #include "trace_output.h"
56
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72
73 void __init disable_tracing_selftest(const char *reason)
74 {
75         if (!tracing_selftest_disabled) {
76                 tracing_selftest_disabled = true;
77                 pr_info("Ftrace startup test is disabled due to %s\n", reason);
78         }
79 }
80 #else
81 #define tracing_selftest_running        0
82 #define tracing_selftest_disabled       0
83 #endif
84
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93         { }
94 };
95
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99         return 0;
100 }
101
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
108
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116
117 cpumask_var_t __read_mostly     tracing_buffer_mask;
118
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  */
134
135 enum ftrace_dump_mode ftrace_dump_on_oops;
136
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
139
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
143         struct module                   *mod;
144         unsigned long                   length;
145 };
146
147 union trace_eval_map_item;
148
149 struct trace_eval_map_tail {
150         /*
151          * "end" is first and points to NULL as it must be different
152          * than "mod" or "eval_string"
153          */
154         union trace_eval_map_item       *next;
155         const char                      *end;   /* points to NULL */
156 };
157
158 static DEFINE_MUTEX(trace_eval_mutex);
159
160 /*
161  * The trace_eval_maps are saved in an array with two extra elements,
162  * one at the beginning, and one at the end. The beginning item contains
163  * the count of the saved maps (head.length), and the module they
164  * belong to if not built in (head.mod). The ending item contains a
165  * pointer to the next array of saved eval_map items.
166  */
167 union trace_eval_map_item {
168         struct trace_eval_map           map;
169         struct trace_eval_map_head      head;
170         struct trace_eval_map_tail      tail;
171 };
172
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
175
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178                                    struct trace_buffer *buffer,
179                                    unsigned int trace_ctx);
180
181 #define MAX_TRACER_SIZE         100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
184
185 static bool allocate_snapshot;
186 static bool snapshot_at_boot;
187
188 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
189 static int boot_instance_index;
190
191 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
192 static int boot_snapshot_index;
193
194 static int __init set_cmdline_ftrace(char *str)
195 {
196         strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
197         default_bootup_tracer = bootup_tracer_buf;
198         /* We are using ftrace early, expand it */
199         trace_set_ring_buffer_expanded(NULL);
200         return 1;
201 }
202 __setup("ftrace=", set_cmdline_ftrace);
203
204 static int __init set_ftrace_dump_on_oops(char *str)
205 {
206         if (*str++ != '=' || !*str || !strcmp("1", str)) {
207                 ftrace_dump_on_oops = DUMP_ALL;
208                 return 1;
209         }
210
211         if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
212                 ftrace_dump_on_oops = DUMP_ORIG;
213                 return 1;
214         }
215
216         return 0;
217 }
218 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
219
220 static int __init stop_trace_on_warning(char *str)
221 {
222         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
223                 __disable_trace_on_warning = 1;
224         return 1;
225 }
226 __setup("traceoff_on_warning", stop_trace_on_warning);
227
228 static int __init boot_alloc_snapshot(char *str)
229 {
230         char *slot = boot_snapshot_info + boot_snapshot_index;
231         int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
232         int ret;
233
234         if (str[0] == '=') {
235                 str++;
236                 if (strlen(str) >= left)
237                         return -1;
238
239                 ret = snprintf(slot, left, "%s\t", str);
240                 boot_snapshot_index += ret;
241         } else {
242                 allocate_snapshot = true;
243                 /* We also need the main ring buffer expanded */
244                 trace_set_ring_buffer_expanded(NULL);
245         }
246         return 1;
247 }
248 __setup("alloc_snapshot", boot_alloc_snapshot);
249
250
251 static int __init boot_snapshot(char *str)
252 {
253         snapshot_at_boot = true;
254         boot_alloc_snapshot(str);
255         return 1;
256 }
257 __setup("ftrace_boot_snapshot", boot_snapshot);
258
259
260 static int __init boot_instance(char *str)
261 {
262         char *slot = boot_instance_info + boot_instance_index;
263         int left = sizeof(boot_instance_info) - boot_instance_index;
264         int ret;
265
266         if (strlen(str) >= left)
267                 return -1;
268
269         ret = snprintf(slot, left, "%s\t", str);
270         boot_instance_index += ret;
271
272         return 1;
273 }
274 __setup("trace_instance=", boot_instance);
275
276
277 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
278
279 static int __init set_trace_boot_options(char *str)
280 {
281         strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
282         return 1;
283 }
284 __setup("trace_options=", set_trace_boot_options);
285
286 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
287 static char *trace_boot_clock __initdata;
288
289 static int __init set_trace_boot_clock(char *str)
290 {
291         strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
292         trace_boot_clock = trace_boot_clock_buf;
293         return 1;
294 }
295 __setup("trace_clock=", set_trace_boot_clock);
296
297 static int __init set_tracepoint_printk(char *str)
298 {
299         /* Ignore the "tp_printk_stop_on_boot" param */
300         if (*str == '_')
301                 return 0;
302
303         if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
304                 tracepoint_printk = 1;
305         return 1;
306 }
307 __setup("tp_printk", set_tracepoint_printk);
308
309 static int __init set_tracepoint_printk_stop(char *str)
310 {
311         tracepoint_printk_stop_on_boot = true;
312         return 1;
313 }
314 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
315
316 unsigned long long ns2usecs(u64 nsec)
317 {
318         nsec += 500;
319         do_div(nsec, 1000);
320         return nsec;
321 }
322
323 static void
324 trace_process_export(struct trace_export *export,
325                struct ring_buffer_event *event, int flag)
326 {
327         struct trace_entry *entry;
328         unsigned int size = 0;
329
330         if (export->flags & flag) {
331                 entry = ring_buffer_event_data(event);
332                 size = ring_buffer_event_length(event);
333                 export->write(export, entry, size);
334         }
335 }
336
337 static DEFINE_MUTEX(ftrace_export_lock);
338
339 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
340
341 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
342 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
344
345 static inline void ftrace_exports_enable(struct trace_export *export)
346 {
347         if (export->flags & TRACE_EXPORT_FUNCTION)
348                 static_branch_inc(&trace_function_exports_enabled);
349
350         if (export->flags & TRACE_EXPORT_EVENT)
351                 static_branch_inc(&trace_event_exports_enabled);
352
353         if (export->flags & TRACE_EXPORT_MARKER)
354                 static_branch_inc(&trace_marker_exports_enabled);
355 }
356
357 static inline void ftrace_exports_disable(struct trace_export *export)
358 {
359         if (export->flags & TRACE_EXPORT_FUNCTION)
360                 static_branch_dec(&trace_function_exports_enabled);
361
362         if (export->flags & TRACE_EXPORT_EVENT)
363                 static_branch_dec(&trace_event_exports_enabled);
364
365         if (export->flags & TRACE_EXPORT_MARKER)
366                 static_branch_dec(&trace_marker_exports_enabled);
367 }
368
369 static void ftrace_exports(struct ring_buffer_event *event, int flag)
370 {
371         struct trace_export *export;
372
373         preempt_disable_notrace();
374
375         export = rcu_dereference_raw_check(ftrace_exports_list);
376         while (export) {
377                 trace_process_export(export, event, flag);
378                 export = rcu_dereference_raw_check(export->next);
379         }
380
381         preempt_enable_notrace();
382 }
383
384 static inline void
385 add_trace_export(struct trace_export **list, struct trace_export *export)
386 {
387         rcu_assign_pointer(export->next, *list);
388         /*
389          * We are entering export into the list but another
390          * CPU might be walking that list. We need to make sure
391          * the export->next pointer is valid before another CPU sees
392          * the export pointer included into the list.
393          */
394         rcu_assign_pointer(*list, export);
395 }
396
397 static inline int
398 rm_trace_export(struct trace_export **list, struct trace_export *export)
399 {
400         struct trace_export **p;
401
402         for (p = list; *p != NULL; p = &(*p)->next)
403                 if (*p == export)
404                         break;
405
406         if (*p != export)
407                 return -1;
408
409         rcu_assign_pointer(*p, (*p)->next);
410
411         return 0;
412 }
413
414 static inline void
415 add_ftrace_export(struct trace_export **list, struct trace_export *export)
416 {
417         ftrace_exports_enable(export);
418
419         add_trace_export(list, export);
420 }
421
422 static inline int
423 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
424 {
425         int ret;
426
427         ret = rm_trace_export(list, export);
428         ftrace_exports_disable(export);
429
430         return ret;
431 }
432
433 int register_ftrace_export(struct trace_export *export)
434 {
435         if (WARN_ON_ONCE(!export->write))
436                 return -1;
437
438         mutex_lock(&ftrace_export_lock);
439
440         add_ftrace_export(&ftrace_exports_list, export);
441
442         mutex_unlock(&ftrace_export_lock);
443
444         return 0;
445 }
446 EXPORT_SYMBOL_GPL(register_ftrace_export);
447
448 int unregister_ftrace_export(struct trace_export *export)
449 {
450         int ret;
451
452         mutex_lock(&ftrace_export_lock);
453
454         ret = rm_ftrace_export(&ftrace_exports_list, export);
455
456         mutex_unlock(&ftrace_export_lock);
457
458         return ret;
459 }
460 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
461
462 /* trace_flags holds trace_options default values */
463 #define TRACE_DEFAULT_FLAGS                                             \
464         (FUNCTION_DEFAULT_FLAGS |                                       \
465          TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |                  \
466          TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |                \
467          TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |                 \
468          TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |                     \
469          TRACE_ITER_HASH_PTR)
470
471 /* trace_options that are only supported by global_trace */
472 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |                      \
473                TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
474
475 /* trace_flags that are default zero for instances */
476 #define ZEROED_TRACE_FLAGS \
477         (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
478
479 /*
480  * The global_trace is the descriptor that holds the top-level tracing
481  * buffers for the live tracing.
482  */
483 static struct trace_array global_trace = {
484         .trace_flags = TRACE_DEFAULT_FLAGS,
485 };
486
487 void trace_set_ring_buffer_expanded(struct trace_array *tr)
488 {
489         if (!tr)
490                 tr = &global_trace;
491         tr->ring_buffer_expanded = true;
492 }
493
494 LIST_HEAD(ftrace_trace_arrays);
495
496 int trace_array_get(struct trace_array *this_tr)
497 {
498         struct trace_array *tr;
499         int ret = -ENODEV;
500
501         mutex_lock(&trace_types_lock);
502         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
503                 if (tr == this_tr) {
504                         tr->ref++;
505                         ret = 0;
506                         break;
507                 }
508         }
509         mutex_unlock(&trace_types_lock);
510
511         return ret;
512 }
513
514 static void __trace_array_put(struct trace_array *this_tr)
515 {
516         WARN_ON(!this_tr->ref);
517         this_tr->ref--;
518 }
519
520 /**
521  * trace_array_put - Decrement the reference counter for this trace array.
522  * @this_tr : pointer to the trace array
523  *
524  * NOTE: Use this when we no longer need the trace array returned by
525  * trace_array_get_by_name(). This ensures the trace array can be later
526  * destroyed.
527  *
528  */
529 void trace_array_put(struct trace_array *this_tr)
530 {
531         if (!this_tr)
532                 return;
533
534         mutex_lock(&trace_types_lock);
535         __trace_array_put(this_tr);
536         mutex_unlock(&trace_types_lock);
537 }
538 EXPORT_SYMBOL_GPL(trace_array_put);
539
540 int tracing_check_open_get_tr(struct trace_array *tr)
541 {
542         int ret;
543
544         ret = security_locked_down(LOCKDOWN_TRACEFS);
545         if (ret)
546                 return ret;
547
548         if (tracing_disabled)
549                 return -ENODEV;
550
551         if (tr && trace_array_get(tr) < 0)
552                 return -ENODEV;
553
554         return 0;
555 }
556
557 int call_filter_check_discard(struct trace_event_call *call, void *rec,
558                               struct trace_buffer *buffer,
559                               struct ring_buffer_event *event)
560 {
561         if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
562             !filter_match_preds(call->filter, rec)) {
563                 __trace_event_discard_commit(buffer, event);
564                 return 1;
565         }
566
567         return 0;
568 }
569
570 /**
571  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
572  * @filtered_pids: The list of pids to check
573  * @search_pid: The PID to find in @filtered_pids
574  *
575  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
576  */
577 bool
578 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
579 {
580         return trace_pid_list_is_set(filtered_pids, search_pid);
581 }
582
583 /**
584  * trace_ignore_this_task - should a task be ignored for tracing
585  * @filtered_pids: The list of pids to check
586  * @filtered_no_pids: The list of pids not to be traced
587  * @task: The task that should be ignored if not filtered
588  *
589  * Checks if @task should be traced or not from @filtered_pids.
590  * Returns true if @task should *NOT* be traced.
591  * Returns false if @task should be traced.
592  */
593 bool
594 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
595                        struct trace_pid_list *filtered_no_pids,
596                        struct task_struct *task)
597 {
598         /*
599          * If filtered_no_pids is not empty, and the task's pid is listed
600          * in filtered_no_pids, then return true.
601          * Otherwise, if filtered_pids is empty, that means we can
602          * trace all tasks. If it has content, then only trace pids
603          * within filtered_pids.
604          */
605
606         return (filtered_pids &&
607                 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
608                 (filtered_no_pids &&
609                  trace_find_filtered_pid(filtered_no_pids, task->pid));
610 }
611
612 /**
613  * trace_filter_add_remove_task - Add or remove a task from a pid_list
614  * @pid_list: The list to modify
615  * @self: The current task for fork or NULL for exit
616  * @task: The task to add or remove
617  *
618  * If adding a task, if @self is defined, the task is only added if @self
619  * is also included in @pid_list. This happens on fork and tasks should
620  * only be added when the parent is listed. If @self is NULL, then the
621  * @task pid will be removed from the list, which would happen on exit
622  * of a task.
623  */
624 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
625                                   struct task_struct *self,
626                                   struct task_struct *task)
627 {
628         if (!pid_list)
629                 return;
630
631         /* For forks, we only add if the forking task is listed */
632         if (self) {
633                 if (!trace_find_filtered_pid(pid_list, self->pid))
634                         return;
635         }
636
637         /* "self" is set for forks, and NULL for exits */
638         if (self)
639                 trace_pid_list_set(pid_list, task->pid);
640         else
641                 trace_pid_list_clear(pid_list, task->pid);
642 }
643
644 /**
645  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
646  * @pid_list: The pid list to show
647  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
648  * @pos: The position of the file
649  *
650  * This is used by the seq_file "next" operation to iterate the pids
651  * listed in a trace_pid_list structure.
652  *
653  * Returns the pid+1 as we want to display pid of zero, but NULL would
654  * stop the iteration.
655  */
656 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
657 {
658         long pid = (unsigned long)v;
659         unsigned int next;
660
661         (*pos)++;
662
663         /* pid already is +1 of the actual previous bit */
664         if (trace_pid_list_next(pid_list, pid, &next) < 0)
665                 return NULL;
666
667         pid = next;
668
669         /* Return pid + 1 to allow zero to be represented */
670         return (void *)(pid + 1);
671 }
672
673 /**
674  * trace_pid_start - Used for seq_file to start reading pid lists
675  * @pid_list: The pid list to show
676  * @pos: The position of the file
677  *
678  * This is used by seq_file "start" operation to start the iteration
679  * of listing pids.
680  *
681  * Returns the pid+1 as we want to display pid of zero, but NULL would
682  * stop the iteration.
683  */
684 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
685 {
686         unsigned long pid;
687         unsigned int first;
688         loff_t l = 0;
689
690         if (trace_pid_list_first(pid_list, &first) < 0)
691                 return NULL;
692
693         pid = first;
694
695         /* Return pid + 1 so that zero can be the exit value */
696         for (pid++; pid && l < *pos;
697              pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
698                 ;
699         return (void *)pid;
700 }
701
702 /**
703  * trace_pid_show - show the current pid in seq_file processing
704  * @m: The seq_file structure to write into
705  * @v: A void pointer of the pid (+1) value to display
706  *
707  * Can be directly used by seq_file operations to display the current
708  * pid value.
709  */
710 int trace_pid_show(struct seq_file *m, void *v)
711 {
712         unsigned long pid = (unsigned long)v - 1;
713
714         seq_printf(m, "%lu\n", pid);
715         return 0;
716 }
717
718 /* 128 should be much more than enough */
719 #define PID_BUF_SIZE            127
720
721 int trace_pid_write(struct trace_pid_list *filtered_pids,
722                     struct trace_pid_list **new_pid_list,
723                     const char __user *ubuf, size_t cnt)
724 {
725         struct trace_pid_list *pid_list;
726         struct trace_parser parser;
727         unsigned long val;
728         int nr_pids = 0;
729         ssize_t read = 0;
730         ssize_t ret;
731         loff_t pos;
732         pid_t pid;
733
734         if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
735                 return -ENOMEM;
736
737         /*
738          * Always recreate a new array. The write is an all or nothing
739          * operation. Always create a new array when adding new pids by
740          * the user. If the operation fails, then the current list is
741          * not modified.
742          */
743         pid_list = trace_pid_list_alloc();
744         if (!pid_list) {
745                 trace_parser_put(&parser);
746                 return -ENOMEM;
747         }
748
749         if (filtered_pids) {
750                 /* copy the current bits to the new max */
751                 ret = trace_pid_list_first(filtered_pids, &pid);
752                 while (!ret) {
753                         trace_pid_list_set(pid_list, pid);
754                         ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
755                         nr_pids++;
756                 }
757         }
758
759         ret = 0;
760         while (cnt > 0) {
761
762                 pos = 0;
763
764                 ret = trace_get_user(&parser, ubuf, cnt, &pos);
765                 if (ret < 0)
766                         break;
767
768                 read += ret;
769                 ubuf += ret;
770                 cnt -= ret;
771
772                 if (!trace_parser_loaded(&parser))
773                         break;
774
775                 ret = -EINVAL;
776                 if (kstrtoul(parser.buffer, 0, &val))
777                         break;
778
779                 pid = (pid_t)val;
780
781                 if (trace_pid_list_set(pid_list, pid) < 0) {
782                         ret = -1;
783                         break;
784                 }
785                 nr_pids++;
786
787                 trace_parser_clear(&parser);
788                 ret = 0;
789         }
790         trace_parser_put(&parser);
791
792         if (ret < 0) {
793                 trace_pid_list_free(pid_list);
794                 return ret;
795         }
796
797         if (!nr_pids) {
798                 /* Cleared the list of pids */
799                 trace_pid_list_free(pid_list);
800                 pid_list = NULL;
801         }
802
803         *new_pid_list = pid_list;
804
805         return read;
806 }
807
808 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
809 {
810         u64 ts;
811
812         /* Early boot up does not have a buffer yet */
813         if (!buf->buffer)
814                 return trace_clock_local();
815
816         ts = ring_buffer_time_stamp(buf->buffer);
817         ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
818
819         return ts;
820 }
821
822 u64 ftrace_now(int cpu)
823 {
824         return buffer_ftrace_now(&global_trace.array_buffer, cpu);
825 }
826
827 /**
828  * tracing_is_enabled - Show if global_trace has been enabled
829  *
830  * Shows if the global trace has been enabled or not. It uses the
831  * mirror flag "buffer_disabled" to be used in fast paths such as for
832  * the irqsoff tracer. But it may be inaccurate due to races. If you
833  * need to know the accurate state, use tracing_is_on() which is a little
834  * slower, but accurate.
835  */
836 int tracing_is_enabled(void)
837 {
838         /*
839          * For quick access (irqsoff uses this in fast path), just
840          * return the mirror variable of the state of the ring buffer.
841          * It's a little racy, but we don't really care.
842          */
843         smp_rmb();
844         return !global_trace.buffer_disabled;
845 }
846
847 /*
848  * trace_buf_size is the size in bytes that is allocated
849  * for a buffer. Note, the number of bytes is always rounded
850  * to page size.
851  *
852  * This number is purposely set to a low number of 16384.
853  * If the dump on oops happens, it will be much appreciated
854  * to not have to wait for all that output. Anyway this can be
855  * boot time and run time configurable.
856  */
857 #define TRACE_BUF_SIZE_DEFAULT  1441792UL /* 16384 * 88 (sizeof(entry)) */
858
859 static unsigned long            trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
860
861 /* trace_types holds a link list of available tracers. */
862 static struct tracer            *trace_types __read_mostly;
863
864 /*
865  * trace_types_lock is used to protect the trace_types list.
866  */
867 DEFINE_MUTEX(trace_types_lock);
868
869 /*
870  * serialize the access of the ring buffer
871  *
872  * ring buffer serializes readers, but it is low level protection.
873  * The validity of the events (which returns by ring_buffer_peek() ..etc)
874  * are not protected by ring buffer.
875  *
876  * The content of events may become garbage if we allow other process consumes
877  * these events concurrently:
878  *   A) the page of the consumed events may become a normal page
879  *      (not reader page) in ring buffer, and this page will be rewritten
880  *      by events producer.
881  *   B) The page of the consumed events may become a page for splice_read,
882  *      and this page will be returned to system.
883  *
884  * These primitives allow multi process access to different cpu ring buffer
885  * concurrently.
886  *
887  * These primitives don't distinguish read-only and read-consume access.
888  * Multi read-only access are also serialized.
889  */
890
891 #ifdef CONFIG_SMP
892 static DECLARE_RWSEM(all_cpu_access_lock);
893 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
894
895 static inline void trace_access_lock(int cpu)
896 {
897         if (cpu == RING_BUFFER_ALL_CPUS) {
898                 /* gain it for accessing the whole ring buffer. */
899                 down_write(&all_cpu_access_lock);
900         } else {
901                 /* gain it for accessing a cpu ring buffer. */
902
903                 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
904                 down_read(&all_cpu_access_lock);
905
906                 /* Secondly block other access to this @cpu ring buffer. */
907                 mutex_lock(&per_cpu(cpu_access_lock, cpu));
908         }
909 }
910
911 static inline void trace_access_unlock(int cpu)
912 {
913         if (cpu == RING_BUFFER_ALL_CPUS) {
914                 up_write(&all_cpu_access_lock);
915         } else {
916                 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
917                 up_read(&all_cpu_access_lock);
918         }
919 }
920
921 static inline void trace_access_lock_init(void)
922 {
923         int cpu;
924
925         for_each_possible_cpu(cpu)
926                 mutex_init(&per_cpu(cpu_access_lock, cpu));
927 }
928
929 #else
930
931 static DEFINE_MUTEX(access_lock);
932
933 static inline void trace_access_lock(int cpu)
934 {
935         (void)cpu;
936         mutex_lock(&access_lock);
937 }
938
939 static inline void trace_access_unlock(int cpu)
940 {
941         (void)cpu;
942         mutex_unlock(&access_lock);
943 }
944
945 static inline void trace_access_lock_init(void)
946 {
947 }
948
949 #endif
950
951 #ifdef CONFIG_STACKTRACE
952 static void __ftrace_trace_stack(struct trace_buffer *buffer,
953                                  unsigned int trace_ctx,
954                                  int skip, struct pt_regs *regs);
955 static inline void ftrace_trace_stack(struct trace_array *tr,
956                                       struct trace_buffer *buffer,
957                                       unsigned int trace_ctx,
958                                       int skip, struct pt_regs *regs);
959
960 #else
961 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
962                                         unsigned int trace_ctx,
963                                         int skip, struct pt_regs *regs)
964 {
965 }
966 static inline void ftrace_trace_stack(struct trace_array *tr,
967                                       struct trace_buffer *buffer,
968                                       unsigned long trace_ctx,
969                                       int skip, struct pt_regs *regs)
970 {
971 }
972
973 #endif
974
975 static __always_inline void
976 trace_event_setup(struct ring_buffer_event *event,
977                   int type, unsigned int trace_ctx)
978 {
979         struct trace_entry *ent = ring_buffer_event_data(event);
980
981         tracing_generic_entry_update(ent, type, trace_ctx);
982 }
983
984 static __always_inline struct ring_buffer_event *
985 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
986                           int type,
987                           unsigned long len,
988                           unsigned int trace_ctx)
989 {
990         struct ring_buffer_event *event;
991
992         event = ring_buffer_lock_reserve(buffer, len);
993         if (event != NULL)
994                 trace_event_setup(event, type, trace_ctx);
995
996         return event;
997 }
998
999 void tracer_tracing_on(struct trace_array *tr)
1000 {
1001         if (tr->array_buffer.buffer)
1002                 ring_buffer_record_on(tr->array_buffer.buffer);
1003         /*
1004          * This flag is looked at when buffers haven't been allocated
1005          * yet, or by some tracers (like irqsoff), that just want to
1006          * know if the ring buffer has been disabled, but it can handle
1007          * races of where it gets disabled but we still do a record.
1008          * As the check is in the fast path of the tracers, it is more
1009          * important to be fast than accurate.
1010          */
1011         tr->buffer_disabled = 0;
1012         /* Make the flag seen by readers */
1013         smp_wmb();
1014 }
1015
1016 /**
1017  * tracing_on - enable tracing buffers
1018  *
1019  * This function enables tracing buffers that may have been
1020  * disabled with tracing_off.
1021  */
1022 void tracing_on(void)
1023 {
1024         tracer_tracing_on(&global_trace);
1025 }
1026 EXPORT_SYMBOL_GPL(tracing_on);
1027
1028
1029 static __always_inline void
1030 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1031 {
1032         __this_cpu_write(trace_taskinfo_save, true);
1033
1034         /* If this is the temp buffer, we need to commit fully */
1035         if (this_cpu_read(trace_buffered_event) == event) {
1036                 /* Length is in event->array[0] */
1037                 ring_buffer_write(buffer, event->array[0], &event->array[1]);
1038                 /* Release the temp buffer */
1039                 this_cpu_dec(trace_buffered_event_cnt);
1040                 /* ring_buffer_unlock_commit() enables preemption */
1041                 preempt_enable_notrace();
1042         } else
1043                 ring_buffer_unlock_commit(buffer);
1044 }
1045
1046 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1047                        const char *str, int size)
1048 {
1049         struct ring_buffer_event *event;
1050         struct trace_buffer *buffer;
1051         struct print_entry *entry;
1052         unsigned int trace_ctx;
1053         int alloc;
1054
1055         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1056                 return 0;
1057
1058         if (unlikely(tracing_selftest_running && tr == &global_trace))
1059                 return 0;
1060
1061         if (unlikely(tracing_disabled))
1062                 return 0;
1063
1064         alloc = sizeof(*entry) + size + 2; /* possible \n added */
1065
1066         trace_ctx = tracing_gen_ctx();
1067         buffer = tr->array_buffer.buffer;
1068         ring_buffer_nest_start(buffer);
1069         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1070                                             trace_ctx);
1071         if (!event) {
1072                 size = 0;
1073                 goto out;
1074         }
1075
1076         entry = ring_buffer_event_data(event);
1077         entry->ip = ip;
1078
1079         memcpy(&entry->buf, str, size);
1080
1081         /* Add a newline if necessary */
1082         if (entry->buf[size - 1] != '\n') {
1083                 entry->buf[size] = '\n';
1084                 entry->buf[size + 1] = '\0';
1085         } else
1086                 entry->buf[size] = '\0';
1087
1088         __buffer_unlock_commit(buffer, event);
1089         ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1090  out:
1091         ring_buffer_nest_end(buffer);
1092         return size;
1093 }
1094 EXPORT_SYMBOL_GPL(__trace_array_puts);
1095
1096 /**
1097  * __trace_puts - write a constant string into the trace buffer.
1098  * @ip:    The address of the caller
1099  * @str:   The constant string to write
1100  * @size:  The size of the string.
1101  */
1102 int __trace_puts(unsigned long ip, const char *str, int size)
1103 {
1104         return __trace_array_puts(&global_trace, ip, str, size);
1105 }
1106 EXPORT_SYMBOL_GPL(__trace_puts);
1107
1108 /**
1109  * __trace_bputs - write the pointer to a constant string into trace buffer
1110  * @ip:    The address of the caller
1111  * @str:   The constant string to write to the buffer to
1112  */
1113 int __trace_bputs(unsigned long ip, const char *str)
1114 {
1115         struct ring_buffer_event *event;
1116         struct trace_buffer *buffer;
1117         struct bputs_entry *entry;
1118         unsigned int trace_ctx;
1119         int size = sizeof(struct bputs_entry);
1120         int ret = 0;
1121
1122         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1123                 return 0;
1124
1125         if (unlikely(tracing_selftest_running || tracing_disabled))
1126                 return 0;
1127
1128         trace_ctx = tracing_gen_ctx();
1129         buffer = global_trace.array_buffer.buffer;
1130
1131         ring_buffer_nest_start(buffer);
1132         event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1133                                             trace_ctx);
1134         if (!event)
1135                 goto out;
1136
1137         entry = ring_buffer_event_data(event);
1138         entry->ip                       = ip;
1139         entry->str                      = str;
1140
1141         __buffer_unlock_commit(buffer, event);
1142         ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1143
1144         ret = 1;
1145  out:
1146         ring_buffer_nest_end(buffer);
1147         return ret;
1148 }
1149 EXPORT_SYMBOL_GPL(__trace_bputs);
1150
1151 #ifdef CONFIG_TRACER_SNAPSHOT
1152 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1153                                            void *cond_data)
1154 {
1155         struct tracer *tracer = tr->current_trace;
1156         unsigned long flags;
1157
1158         if (in_nmi()) {
1159                 trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1160                 trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1161                 return;
1162         }
1163
1164         if (!tr->allocated_snapshot) {
1165                 trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1166                 trace_array_puts(tr, "*** stopping trace here!   ***\n");
1167                 tracer_tracing_off(tr);
1168                 return;
1169         }
1170
1171         /* Note, snapshot can not be used when the tracer uses it */
1172         if (tracer->use_max_tr) {
1173                 trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1174                 trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1175                 return;
1176         }
1177
1178         local_irq_save(flags);
1179         update_max_tr(tr, current, smp_processor_id(), cond_data);
1180         local_irq_restore(flags);
1181 }
1182
1183 void tracing_snapshot_instance(struct trace_array *tr)
1184 {
1185         tracing_snapshot_instance_cond(tr, NULL);
1186 }
1187
1188 /**
1189  * tracing_snapshot - take a snapshot of the current buffer.
1190  *
1191  * This causes a swap between the snapshot buffer and the current live
1192  * tracing buffer. You can use this to take snapshots of the live
1193  * trace when some condition is triggered, but continue to trace.
1194  *
1195  * Note, make sure to allocate the snapshot with either
1196  * a tracing_snapshot_alloc(), or by doing it manually
1197  * with: echo 1 > /sys/kernel/tracing/snapshot
1198  *
1199  * If the snapshot buffer is not allocated, it will stop tracing.
1200  * Basically making a permanent snapshot.
1201  */
1202 void tracing_snapshot(void)
1203 {
1204         struct trace_array *tr = &global_trace;
1205
1206         tracing_snapshot_instance(tr);
1207 }
1208 EXPORT_SYMBOL_GPL(tracing_snapshot);
1209
1210 /**
1211  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1212  * @tr:         The tracing instance to snapshot
1213  * @cond_data:  The data to be tested conditionally, and possibly saved
1214  *
1215  * This is the same as tracing_snapshot() except that the snapshot is
1216  * conditional - the snapshot will only happen if the
1217  * cond_snapshot.update() implementation receiving the cond_data
1218  * returns true, which means that the trace array's cond_snapshot
1219  * update() operation used the cond_data to determine whether the
1220  * snapshot should be taken, and if it was, presumably saved it along
1221  * with the snapshot.
1222  */
1223 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1224 {
1225         tracing_snapshot_instance_cond(tr, cond_data);
1226 }
1227 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1228
1229 /**
1230  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1231  * @tr:         The tracing instance
1232  *
1233  * When the user enables a conditional snapshot using
1234  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1235  * with the snapshot.  This accessor is used to retrieve it.
1236  *
1237  * Should not be called from cond_snapshot.update(), since it takes
1238  * the tr->max_lock lock, which the code calling
1239  * cond_snapshot.update() has already done.
1240  *
1241  * Returns the cond_data associated with the trace array's snapshot.
1242  */
1243 void *tracing_cond_snapshot_data(struct trace_array *tr)
1244 {
1245         void *cond_data = NULL;
1246
1247         local_irq_disable();
1248         arch_spin_lock(&tr->max_lock);
1249
1250         if (tr->cond_snapshot)
1251                 cond_data = tr->cond_snapshot->cond_data;
1252
1253         arch_spin_unlock(&tr->max_lock);
1254         local_irq_enable();
1255
1256         return cond_data;
1257 }
1258 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1259
1260 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1261                                         struct array_buffer *size_buf, int cpu_id);
1262 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1263
1264 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1265 {
1266         int order;
1267         int ret;
1268
1269         if (!tr->allocated_snapshot) {
1270
1271                 /* Make the snapshot buffer have the same order as main buffer */
1272                 order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1273                 ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1274                 if (ret < 0)
1275                         return ret;
1276
1277                 /* allocate spare buffer */
1278                 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1279                                    &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1280                 if (ret < 0)
1281                         return ret;
1282
1283                 tr->allocated_snapshot = true;
1284         }
1285
1286         return 0;
1287 }
1288
1289 static void free_snapshot(struct trace_array *tr)
1290 {
1291         /*
1292          * We don't free the ring buffer. instead, resize it because
1293          * The max_tr ring buffer has some state (e.g. ring->clock) and
1294          * we want preserve it.
1295          */
1296         ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1297         ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1298         set_buffer_entries(&tr->max_buffer, 1);
1299         tracing_reset_online_cpus(&tr->max_buffer);
1300         tr->allocated_snapshot = false;
1301 }
1302
1303 /**
1304  * tracing_alloc_snapshot - allocate snapshot buffer.
1305  *
1306  * This only allocates the snapshot buffer if it isn't already
1307  * allocated - it doesn't also take a snapshot.
1308  *
1309  * This is meant to be used in cases where the snapshot buffer needs
1310  * to be set up for events that can't sleep but need to be able to
1311  * trigger a snapshot.
1312  */
1313 int tracing_alloc_snapshot(void)
1314 {
1315         struct trace_array *tr = &global_trace;
1316         int ret;
1317
1318         ret = tracing_alloc_snapshot_instance(tr);
1319         WARN_ON(ret < 0);
1320
1321         return ret;
1322 }
1323 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1324
1325 /**
1326  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1327  *
1328  * This is similar to tracing_snapshot(), but it will allocate the
1329  * snapshot buffer if it isn't already allocated. Use this only
1330  * where it is safe to sleep, as the allocation may sleep.
1331  *
1332  * This causes a swap between the snapshot buffer and the current live
1333  * tracing buffer. You can use this to take snapshots of the live
1334  * trace when some condition is triggered, but continue to trace.
1335  */
1336 void tracing_snapshot_alloc(void)
1337 {
1338         int ret;
1339
1340         ret = tracing_alloc_snapshot();
1341         if (ret < 0)
1342                 return;
1343
1344         tracing_snapshot();
1345 }
1346 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1347
1348 /**
1349  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1350  * @tr:         The tracing instance
1351  * @cond_data:  User data to associate with the snapshot
1352  * @update:     Implementation of the cond_snapshot update function
1353  *
1354  * Check whether the conditional snapshot for the given instance has
1355  * already been enabled, or if the current tracer is already using a
1356  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1357  * save the cond_data and update function inside.
1358  *
1359  * Returns 0 if successful, error otherwise.
1360  */
1361 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1362                                  cond_update_fn_t update)
1363 {
1364         struct cond_snapshot *cond_snapshot;
1365         int ret = 0;
1366
1367         cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1368         if (!cond_snapshot)
1369                 return -ENOMEM;
1370
1371         cond_snapshot->cond_data = cond_data;
1372         cond_snapshot->update = update;
1373
1374         mutex_lock(&trace_types_lock);
1375
1376         ret = tracing_alloc_snapshot_instance(tr);
1377         if (ret)
1378                 goto fail_unlock;
1379
1380         if (tr->current_trace->use_max_tr) {
1381                 ret = -EBUSY;
1382                 goto fail_unlock;
1383         }
1384
1385         /*
1386          * The cond_snapshot can only change to NULL without the
1387          * trace_types_lock. We don't care if we race with it going
1388          * to NULL, but we want to make sure that it's not set to
1389          * something other than NULL when we get here, which we can
1390          * do safely with only holding the trace_types_lock and not
1391          * having to take the max_lock.
1392          */
1393         if (tr->cond_snapshot) {
1394                 ret = -EBUSY;
1395                 goto fail_unlock;
1396         }
1397
1398         local_irq_disable();
1399         arch_spin_lock(&tr->max_lock);
1400         tr->cond_snapshot = cond_snapshot;
1401         arch_spin_unlock(&tr->max_lock);
1402         local_irq_enable();
1403
1404         mutex_unlock(&trace_types_lock);
1405
1406         return ret;
1407
1408  fail_unlock:
1409         mutex_unlock(&trace_types_lock);
1410         kfree(cond_snapshot);
1411         return ret;
1412 }
1413 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1414
1415 /**
1416  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1417  * @tr:         The tracing instance
1418  *
1419  * Check whether the conditional snapshot for the given instance is
1420  * enabled; if so, free the cond_snapshot associated with it,
1421  * otherwise return -EINVAL.
1422  *
1423  * Returns 0 if successful, error otherwise.
1424  */
1425 int tracing_snapshot_cond_disable(struct trace_array *tr)
1426 {
1427         int ret = 0;
1428
1429         local_irq_disable();
1430         arch_spin_lock(&tr->max_lock);
1431
1432         if (!tr->cond_snapshot)
1433                 ret = -EINVAL;
1434         else {
1435                 kfree(tr->cond_snapshot);
1436                 tr->cond_snapshot = NULL;
1437         }
1438
1439         arch_spin_unlock(&tr->max_lock);
1440         local_irq_enable();
1441
1442         return ret;
1443 }
1444 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1445 #else
1446 void tracing_snapshot(void)
1447 {
1448         WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1449 }
1450 EXPORT_SYMBOL_GPL(tracing_snapshot);
1451 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1452 {
1453         WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1454 }
1455 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1456 int tracing_alloc_snapshot(void)
1457 {
1458         WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1459         return -ENODEV;
1460 }
1461 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1462 void tracing_snapshot_alloc(void)
1463 {
1464         /* Give warning */
1465         tracing_snapshot();
1466 }
1467 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1468 void *tracing_cond_snapshot_data(struct trace_array *tr)
1469 {
1470         return NULL;
1471 }
1472 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1473 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1474 {
1475         return -ENODEV;
1476 }
1477 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1478 int tracing_snapshot_cond_disable(struct trace_array *tr)
1479 {
1480         return false;
1481 }
1482 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1483 #define free_snapshot(tr)       do { } while (0)
1484 #endif /* CONFIG_TRACER_SNAPSHOT */
1485
1486 void tracer_tracing_off(struct trace_array *tr)
1487 {
1488         if (tr->array_buffer.buffer)
1489                 ring_buffer_record_off(tr->array_buffer.buffer);
1490         /*
1491          * This flag is looked at when buffers haven't been allocated
1492          * yet, or by some tracers (like irqsoff), that just want to
1493          * know if the ring buffer has been disabled, but it can handle
1494          * races of where it gets disabled but we still do a record.
1495          * As the check is in the fast path of the tracers, it is more
1496          * important to be fast than accurate.
1497          */
1498         tr->buffer_disabled = 1;
1499         /* Make the flag seen by readers */
1500         smp_wmb();
1501 }
1502
1503 /**
1504  * tracing_off - turn off tracing buffers
1505  *
1506  * This function stops the tracing buffers from recording data.
1507  * It does not disable any overhead the tracers themselves may
1508  * be causing. This function simply causes all recording to
1509  * the ring buffers to fail.
1510  */
1511 void tracing_off(void)
1512 {
1513         tracer_tracing_off(&global_trace);
1514 }
1515 EXPORT_SYMBOL_GPL(tracing_off);
1516
1517 void disable_trace_on_warning(void)
1518 {
1519         if (__disable_trace_on_warning) {
1520                 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1521                         "Disabling tracing due to warning\n");
1522                 tracing_off();
1523         }
1524 }
1525
1526 /**
1527  * tracer_tracing_is_on - show real state of ring buffer enabled
1528  * @tr : the trace array to know if ring buffer is enabled
1529  *
1530  * Shows real state of the ring buffer if it is enabled or not.
1531  */
1532 bool tracer_tracing_is_on(struct trace_array *tr)
1533 {
1534         if (tr->array_buffer.buffer)
1535                 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1536         return !tr->buffer_disabled;
1537 }
1538
1539 /**
1540  * tracing_is_on - show state of ring buffers enabled
1541  */
1542 int tracing_is_on(void)
1543 {
1544         return tracer_tracing_is_on(&global_trace);
1545 }
1546 EXPORT_SYMBOL_GPL(tracing_is_on);
1547
1548 static int __init set_buf_size(char *str)
1549 {
1550         unsigned long buf_size;
1551
1552         if (!str)
1553                 return 0;
1554         buf_size = memparse(str, &str);
1555         /*
1556          * nr_entries can not be zero and the startup
1557          * tests require some buffer space. Therefore
1558          * ensure we have at least 4096 bytes of buffer.
1559          */
1560         trace_buf_size = max(4096UL, buf_size);
1561         return 1;
1562 }
1563 __setup("trace_buf_size=", set_buf_size);
1564
1565 static int __init set_tracing_thresh(char *str)
1566 {
1567         unsigned long threshold;
1568         int ret;
1569
1570         if (!str)
1571                 return 0;
1572         ret = kstrtoul(str, 0, &threshold);
1573         if (ret < 0)
1574                 return 0;
1575         tracing_thresh = threshold * 1000;
1576         return 1;
1577 }
1578 __setup("tracing_thresh=", set_tracing_thresh);
1579
1580 unsigned long nsecs_to_usecs(unsigned long nsecs)
1581 {
1582         return nsecs / 1000;
1583 }
1584
1585 /*
1586  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1587  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1588  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1589  * of strings in the order that the evals (enum) were defined.
1590  */
1591 #undef C
1592 #define C(a, b) b
1593
1594 /* These must match the bit positions in trace_iterator_flags */
1595 static const char *trace_options[] = {
1596         TRACE_FLAGS
1597         NULL
1598 };
1599
1600 static struct {
1601         u64 (*func)(void);
1602         const char *name;
1603         int in_ns;              /* is this clock in nanoseconds? */
1604 } trace_clocks[] = {
1605         { trace_clock_local,            "local",        1 },
1606         { trace_clock_global,           "global",       1 },
1607         { trace_clock_counter,          "counter",      0 },
1608         { trace_clock_jiffies,          "uptime",       0 },
1609         { trace_clock,                  "perf",         1 },
1610         { ktime_get_mono_fast_ns,       "mono",         1 },
1611         { ktime_get_raw_fast_ns,        "mono_raw",     1 },
1612         { ktime_get_boot_fast_ns,       "boot",         1 },
1613         { ktime_get_tai_fast_ns,        "tai",          1 },
1614         ARCH_TRACE_CLOCKS
1615 };
1616
1617 bool trace_clock_in_ns(struct trace_array *tr)
1618 {
1619         if (trace_clocks[tr->clock_id].in_ns)
1620                 return true;
1621
1622         return false;
1623 }
1624
1625 /*
1626  * trace_parser_get_init - gets the buffer for trace parser
1627  */
1628 int trace_parser_get_init(struct trace_parser *parser, int size)
1629 {
1630         memset(parser, 0, sizeof(*parser));
1631
1632         parser->buffer = kmalloc(size, GFP_KERNEL);
1633         if (!parser->buffer)
1634                 return 1;
1635
1636         parser->size = size;
1637         return 0;
1638 }
1639
1640 /*
1641  * trace_parser_put - frees the buffer for trace parser
1642  */
1643 void trace_parser_put(struct trace_parser *parser)
1644 {
1645         kfree(parser->buffer);
1646         parser->buffer = NULL;
1647 }
1648
1649 /*
1650  * trace_get_user - reads the user input string separated by  space
1651  * (matched by isspace(ch))
1652  *
1653  * For each string found the 'struct trace_parser' is updated,
1654  * and the function returns.
1655  *
1656  * Returns number of bytes read.
1657  *
1658  * See kernel/trace/trace.h for 'struct trace_parser' details.
1659  */
1660 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1661         size_t cnt, loff_t *ppos)
1662 {
1663         char ch;
1664         size_t read = 0;
1665         ssize_t ret;
1666
1667         if (!*ppos)
1668                 trace_parser_clear(parser);
1669
1670         ret = get_user(ch, ubuf++);
1671         if (ret)
1672                 goto out;
1673
1674         read++;
1675         cnt--;
1676
1677         /*
1678          * The parser is not finished with the last write,
1679          * continue reading the user input without skipping spaces.
1680          */
1681         if (!parser->cont) {
1682                 /* skip white space */
1683                 while (cnt && isspace(ch)) {
1684                         ret = get_user(ch, ubuf++);
1685                         if (ret)
1686                                 goto out;
1687                         read++;
1688                         cnt--;
1689                 }
1690
1691                 parser->idx = 0;
1692
1693                 /* only spaces were written */
1694                 if (isspace(ch) || !ch) {
1695                         *ppos += read;
1696                         ret = read;
1697                         goto out;
1698                 }
1699         }
1700
1701         /* read the non-space input */
1702         while (cnt && !isspace(ch) && ch) {
1703                 if (parser->idx < parser->size - 1)
1704                         parser->buffer[parser->idx++] = ch;
1705                 else {
1706                         ret = -EINVAL;
1707                         goto out;
1708                 }
1709                 ret = get_user(ch, ubuf++);
1710                 if (ret)
1711                         goto out;
1712                 read++;
1713                 cnt--;
1714         }
1715
1716         /* We either got finished input or we have to wait for another call. */
1717         if (isspace(ch) || !ch) {
1718                 parser->buffer[parser->idx] = 0;
1719                 parser->cont = false;
1720         } else if (parser->idx < parser->size - 1) {
1721                 parser->cont = true;
1722                 parser->buffer[parser->idx++] = ch;
1723                 /* Make sure the parsed string always terminates with '\0'. */
1724                 parser->buffer[parser->idx] = 0;
1725         } else {
1726                 ret = -EINVAL;
1727                 goto out;
1728         }
1729
1730         *ppos += read;
1731         ret = read;
1732
1733 out:
1734         return ret;
1735 }
1736
1737 /* TODO add a seq_buf_to_buffer() */
1738 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1739 {
1740         int len;
1741
1742         if (trace_seq_used(s) <= s->readpos)
1743                 return -EBUSY;
1744
1745         len = trace_seq_used(s) - s->readpos;
1746         if (cnt > len)
1747                 cnt = len;
1748         memcpy(buf, s->buffer + s->readpos, cnt);
1749
1750         s->readpos += cnt;
1751         return cnt;
1752 }
1753
1754 unsigned long __read_mostly     tracing_thresh;
1755
1756 #ifdef CONFIG_TRACER_MAX_TRACE
1757 static const struct file_operations tracing_max_lat_fops;
1758
1759 #ifdef LATENCY_FS_NOTIFY
1760
1761 static struct workqueue_struct *fsnotify_wq;
1762
1763 static void latency_fsnotify_workfn(struct work_struct *work)
1764 {
1765         struct trace_array *tr = container_of(work, struct trace_array,
1766                                               fsnotify_work);
1767         fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1768 }
1769
1770 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1771 {
1772         struct trace_array *tr = container_of(iwork, struct trace_array,
1773                                               fsnotify_irqwork);
1774         queue_work(fsnotify_wq, &tr->fsnotify_work);
1775 }
1776
1777 static void trace_create_maxlat_file(struct trace_array *tr,
1778                                      struct dentry *d_tracer)
1779 {
1780         INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1781         init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1782         tr->d_max_latency = trace_create_file("tracing_max_latency",
1783                                               TRACE_MODE_WRITE,
1784                                               d_tracer, tr,
1785                                               &tracing_max_lat_fops);
1786 }
1787
1788 __init static int latency_fsnotify_init(void)
1789 {
1790         fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1791                                       WQ_UNBOUND | WQ_HIGHPRI, 0);
1792         if (!fsnotify_wq) {
1793                 pr_err("Unable to allocate tr_max_lat_wq\n");
1794                 return -ENOMEM;
1795         }
1796         return 0;
1797 }
1798
1799 late_initcall_sync(latency_fsnotify_init);
1800
1801 void latency_fsnotify(struct trace_array *tr)
1802 {
1803         if (!fsnotify_wq)
1804                 return;
1805         /*
1806          * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1807          * possible that we are called from __schedule() or do_idle(), which
1808          * could cause a deadlock.
1809          */
1810         irq_work_queue(&tr->fsnotify_irqwork);
1811 }
1812
1813 #else /* !LATENCY_FS_NOTIFY */
1814
1815 #define trace_create_maxlat_file(tr, d_tracer)                          \
1816         trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,      \
1817                           d_tracer, tr, &tracing_max_lat_fops)
1818
1819 #endif
1820
1821 /*
1822  * Copy the new maximum trace into the separate maximum-trace
1823  * structure. (this way the maximum trace is permanently saved,
1824  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1825  */
1826 static void
1827 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1828 {
1829         struct array_buffer *trace_buf = &tr->array_buffer;
1830         struct array_buffer *max_buf = &tr->max_buffer;
1831         struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1832         struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1833
1834         max_buf->cpu = cpu;
1835         max_buf->time_start = data->preempt_timestamp;
1836
1837         max_data->saved_latency = tr->max_latency;
1838         max_data->critical_start = data->critical_start;
1839         max_data->critical_end = data->critical_end;
1840
1841         strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1842         max_data->pid = tsk->pid;
1843         /*
1844          * If tsk == current, then use current_uid(), as that does not use
1845          * RCU. The irq tracer can be called out of RCU scope.
1846          */
1847         if (tsk == current)
1848                 max_data->uid = current_uid();
1849         else
1850                 max_data->uid = task_uid(tsk);
1851
1852         max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1853         max_data->policy = tsk->policy;
1854         max_data->rt_priority = tsk->rt_priority;
1855
1856         /* record this tasks comm */
1857         tracing_record_cmdline(tsk);
1858         latency_fsnotify(tr);
1859 }
1860
1861 /**
1862  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1863  * @tr: tracer
1864  * @tsk: the task with the latency
1865  * @cpu: The cpu that initiated the trace.
1866  * @cond_data: User data associated with a conditional snapshot
1867  *
1868  * Flip the buffers between the @tr and the max_tr and record information
1869  * about which task was the cause of this latency.
1870  */
1871 void
1872 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1873               void *cond_data)
1874 {
1875         if (tr->stop_count)
1876                 return;
1877
1878         WARN_ON_ONCE(!irqs_disabled());
1879
1880         if (!tr->allocated_snapshot) {
1881                 /* Only the nop tracer should hit this when disabling */
1882                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1883                 return;
1884         }
1885
1886         arch_spin_lock(&tr->max_lock);
1887
1888         /* Inherit the recordable setting from array_buffer */
1889         if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1890                 ring_buffer_record_on(tr->max_buffer.buffer);
1891         else
1892                 ring_buffer_record_off(tr->max_buffer.buffer);
1893
1894 #ifdef CONFIG_TRACER_SNAPSHOT
1895         if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1896                 arch_spin_unlock(&tr->max_lock);
1897                 return;
1898         }
1899 #endif
1900         swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1901
1902         __update_max_tr(tr, tsk, cpu);
1903
1904         arch_spin_unlock(&tr->max_lock);
1905 }
1906
1907 /**
1908  * update_max_tr_single - only copy one trace over, and reset the rest
1909  * @tr: tracer
1910  * @tsk: task with the latency
1911  * @cpu: the cpu of the buffer to copy.
1912  *
1913  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1914  */
1915 void
1916 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1917 {
1918         int ret;
1919
1920         if (tr->stop_count)
1921                 return;
1922
1923         WARN_ON_ONCE(!irqs_disabled());
1924         if (!tr->allocated_snapshot) {
1925                 /* Only the nop tracer should hit this when disabling */
1926                 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1927                 return;
1928         }
1929
1930         arch_spin_lock(&tr->max_lock);
1931
1932         ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1933
1934         if (ret == -EBUSY) {
1935                 /*
1936                  * We failed to swap the buffer due to a commit taking
1937                  * place on this CPU. We fail to record, but we reset
1938                  * the max trace buffer (no one writes directly to it)
1939                  * and flag that it failed.
1940                  * Another reason is resize is in progress.
1941                  */
1942                 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1943                         "Failed to swap buffers due to commit or resize in progress\n");
1944         }
1945
1946         WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1947
1948         __update_max_tr(tr, tsk, cpu);
1949         arch_spin_unlock(&tr->max_lock);
1950 }
1951
1952 #endif /* CONFIG_TRACER_MAX_TRACE */
1953
1954 static int wait_on_pipe(struct trace_iterator *iter, int full)
1955 {
1956         /* Iterators are static, they should be filled or empty */
1957         if (trace_buffer_iter(iter, iter->cpu_file))
1958                 return 0;
1959
1960         return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1961                                 full);
1962 }
1963
1964 #ifdef CONFIG_FTRACE_STARTUP_TEST
1965 static bool selftests_can_run;
1966
1967 struct trace_selftests {
1968         struct list_head                list;
1969         struct tracer                   *type;
1970 };
1971
1972 static LIST_HEAD(postponed_selftests);
1973
1974 static int save_selftest(struct tracer *type)
1975 {
1976         struct trace_selftests *selftest;
1977
1978         selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1979         if (!selftest)
1980                 return -ENOMEM;
1981
1982         selftest->type = type;
1983         list_add(&selftest->list, &postponed_selftests);
1984         return 0;
1985 }
1986
1987 static int run_tracer_selftest(struct tracer *type)
1988 {
1989         struct trace_array *tr = &global_trace;
1990         struct tracer *saved_tracer = tr->current_trace;
1991         int ret;
1992
1993         if (!type->selftest || tracing_selftest_disabled)
1994                 return 0;
1995
1996         /*
1997          * If a tracer registers early in boot up (before scheduling is
1998          * initialized and such), then do not run its selftests yet.
1999          * Instead, run it a little later in the boot process.
2000          */
2001         if (!selftests_can_run)
2002                 return save_selftest(type);
2003
2004         if (!tracing_is_on()) {
2005                 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2006                         type->name);
2007                 return 0;
2008         }
2009
2010         /*
2011          * Run a selftest on this tracer.
2012          * Here we reset the trace buffer, and set the current
2013          * tracer to be this tracer. The tracer can then run some
2014          * internal tracing to verify that everything is in order.
2015          * If we fail, we do not register this tracer.
2016          */
2017         tracing_reset_online_cpus(&tr->array_buffer);
2018
2019         tr->current_trace = type;
2020
2021 #ifdef CONFIG_TRACER_MAX_TRACE
2022         if (type->use_max_tr) {
2023                 /* If we expanded the buffers, make sure the max is expanded too */
2024                 if (tr->ring_buffer_expanded)
2025                         ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2026                                            RING_BUFFER_ALL_CPUS);
2027                 tr->allocated_snapshot = true;
2028         }
2029 #endif
2030
2031         /* the test is responsible for initializing and enabling */
2032         pr_info("Testing tracer %s: ", type->name);
2033         ret = type->selftest(type, tr);
2034         /* the test is responsible for resetting too */
2035         tr->current_trace = saved_tracer;
2036         if (ret) {
2037                 printk(KERN_CONT "FAILED!\n");
2038                 /* Add the warning after printing 'FAILED' */
2039                 WARN_ON(1);
2040                 return -1;
2041         }
2042         /* Only reset on passing, to avoid touching corrupted buffers */
2043         tracing_reset_online_cpus(&tr->array_buffer);
2044
2045 #ifdef CONFIG_TRACER_MAX_TRACE
2046         if (type->use_max_tr) {
2047                 tr->allocated_snapshot = false;
2048
2049                 /* Shrink the max buffer again */
2050                 if (tr->ring_buffer_expanded)
2051                         ring_buffer_resize(tr->max_buffer.buffer, 1,
2052                                            RING_BUFFER_ALL_CPUS);
2053         }
2054 #endif
2055
2056         printk(KERN_CONT "PASSED\n");
2057         return 0;
2058 }
2059
2060 static int do_run_tracer_selftest(struct tracer *type)
2061 {
2062         int ret;
2063
2064         /*
2065          * Tests can take a long time, especially if they are run one after the
2066          * other, as does happen during bootup when all the tracers are
2067          * registered. This could cause the soft lockup watchdog to trigger.
2068          */
2069         cond_resched();
2070
2071         tracing_selftest_running = true;
2072         ret = run_tracer_selftest(type);
2073         tracing_selftest_running = false;
2074
2075         return ret;
2076 }
2077
2078 static __init int init_trace_selftests(void)
2079 {
2080         struct trace_selftests *p, *n;
2081         struct tracer *t, **last;
2082         int ret;
2083
2084         selftests_can_run = true;
2085
2086         mutex_lock(&trace_types_lock);
2087
2088         if (list_empty(&postponed_selftests))
2089                 goto out;
2090
2091         pr_info("Running postponed tracer tests:\n");
2092
2093         tracing_selftest_running = true;
2094         list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2095                 /* This loop can take minutes when sanitizers are enabled, so
2096                  * lets make sure we allow RCU processing.
2097                  */
2098                 cond_resched();
2099                 ret = run_tracer_selftest(p->type);
2100                 /* If the test fails, then warn and remove from available_tracers */
2101                 if (ret < 0) {
2102                         WARN(1, "tracer: %s failed selftest, disabling\n",
2103                              p->type->name);
2104                         last = &trace_types;
2105                         for (t = trace_types; t; t = t->next) {
2106                                 if (t == p->type) {
2107                                         *last = t->next;
2108                                         break;
2109                                 }
2110                                 last = &t->next;
2111                         }
2112                 }
2113                 list_del(&p->list);
2114                 kfree(p);
2115         }
2116         tracing_selftest_running = false;
2117
2118  out:
2119         mutex_unlock(&trace_types_lock);
2120
2121         return 0;
2122 }
2123 core_initcall(init_trace_selftests);
2124 #else
2125 static inline int run_tracer_selftest(struct tracer *type)
2126 {
2127         return 0;
2128 }
2129 static inline int do_run_tracer_selftest(struct tracer *type)
2130 {
2131         return 0;
2132 }
2133 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2134
2135 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2136
2137 static void __init apply_trace_boot_options(void);
2138
2139 /**
2140  * register_tracer - register a tracer with the ftrace system.
2141  * @type: the plugin for the tracer
2142  *
2143  * Register a new plugin tracer.
2144  */
2145 int __init register_tracer(struct tracer *type)
2146 {
2147         struct tracer *t;
2148         int ret = 0;
2149
2150         if (!type->name) {
2151                 pr_info("Tracer must have a name\n");
2152                 return -1;
2153         }
2154
2155         if (strlen(type->name) >= MAX_TRACER_SIZE) {
2156                 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2157                 return -1;
2158         }
2159
2160         if (security_locked_down(LOCKDOWN_TRACEFS)) {
2161                 pr_warn("Can not register tracer %s due to lockdown\n",
2162                            type->name);
2163                 return -EPERM;
2164         }
2165
2166         mutex_lock(&trace_types_lock);
2167
2168         for (t = trace_types; t; t = t->next) {
2169                 if (strcmp(type->name, t->name) == 0) {
2170                         /* already found */
2171                         pr_info("Tracer %s already registered\n",
2172                                 type->name);
2173                         ret = -1;
2174                         goto out;
2175                 }
2176         }
2177
2178         if (!type->set_flag)
2179                 type->set_flag = &dummy_set_flag;
2180         if (!type->flags) {
2181                 /*allocate a dummy tracer_flags*/
2182                 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2183                 if (!type->flags) {
2184                         ret = -ENOMEM;
2185                         goto out;
2186                 }
2187                 type->flags->val = 0;
2188                 type->flags->opts = dummy_tracer_opt;
2189         } else
2190                 if (!type->flags->opts)
2191                         type->flags->opts = dummy_tracer_opt;
2192
2193         /* store the tracer for __set_tracer_option */
2194         type->flags->trace = type;
2195
2196         ret = do_run_tracer_selftest(type);
2197         if (ret < 0)
2198                 goto out;
2199
2200         type->next = trace_types;
2201         trace_types = type;
2202         add_tracer_options(&global_trace, type);
2203
2204  out:
2205         mutex_unlock(&trace_types_lock);
2206
2207         if (ret || !default_bootup_tracer)
2208                 goto out_unlock;
2209
2210         if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2211                 goto out_unlock;
2212
2213         printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2214         /* Do we want this tracer to start on bootup? */
2215         tracing_set_tracer(&global_trace, type->name);
2216         default_bootup_tracer = NULL;
2217
2218         apply_trace_boot_options();
2219
2220         /* disable other selftests, since this will break it. */
2221         disable_tracing_selftest("running a tracer");
2222
2223  out_unlock:
2224         return ret;
2225 }
2226
2227 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2228 {
2229         struct trace_buffer *buffer = buf->buffer;
2230
2231         if (!buffer)
2232                 return;
2233
2234         ring_buffer_record_disable(buffer);
2235
2236         /* Make sure all commits have finished */
2237         synchronize_rcu();
2238         ring_buffer_reset_cpu(buffer, cpu);
2239
2240         ring_buffer_record_enable(buffer);
2241 }
2242
2243 void tracing_reset_online_cpus(struct array_buffer *buf)
2244 {
2245         struct trace_buffer *buffer = buf->buffer;
2246
2247         if (!buffer)
2248                 return;
2249
2250         ring_buffer_record_disable(buffer);
2251
2252         /* Make sure all commits have finished */
2253         synchronize_rcu();
2254
2255         buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2256
2257         ring_buffer_reset_online_cpus(buffer);
2258
2259         ring_buffer_record_enable(buffer);
2260 }
2261
2262 /* Must have trace_types_lock held */
2263 void tracing_reset_all_online_cpus_unlocked(void)
2264 {
2265         struct trace_array *tr;
2266
2267         lockdep_assert_held(&trace_types_lock);
2268
2269         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2270                 if (!tr->clear_trace)
2271                         continue;
2272                 tr->clear_trace = false;
2273                 tracing_reset_online_cpus(&tr->array_buffer);
2274 #ifdef CONFIG_TRACER_MAX_TRACE
2275                 tracing_reset_online_cpus(&tr->max_buffer);
2276 #endif
2277         }
2278 }
2279
2280 void tracing_reset_all_online_cpus(void)
2281 {
2282         mutex_lock(&trace_types_lock);
2283         tracing_reset_all_online_cpus_unlocked();
2284         mutex_unlock(&trace_types_lock);
2285 }
2286
2287 /*
2288  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2289  * is the tgid last observed corresponding to pid=i.
2290  */
2291 static int *tgid_map;
2292
2293 /* The maximum valid index into tgid_map. */
2294 static size_t tgid_map_max;
2295
2296 #define SAVED_CMDLINES_DEFAULT 128
2297 #define NO_CMDLINE_MAP UINT_MAX
2298 /*
2299  * Preemption must be disabled before acquiring trace_cmdline_lock.
2300  * The various trace_arrays' max_lock must be acquired in a context
2301  * where interrupt is disabled.
2302  */
2303 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2304 struct saved_cmdlines_buffer {
2305         unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2306         unsigned *map_cmdline_to_pid;
2307         unsigned cmdline_num;
2308         int cmdline_idx;
2309         char *saved_cmdlines;
2310 };
2311 static struct saved_cmdlines_buffer *savedcmd;
2312
2313 static inline char *get_saved_cmdlines(int idx)
2314 {
2315         return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2316 }
2317
2318 static inline void set_cmdline(int idx, const char *cmdline)
2319 {
2320         strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2321 }
2322
2323 static int allocate_cmdlines_buffer(unsigned int val,
2324                                     struct saved_cmdlines_buffer *s)
2325 {
2326         s->map_cmdline_to_pid = kmalloc_array(val,
2327                                               sizeof(*s->map_cmdline_to_pid),
2328                                               GFP_KERNEL);
2329         if (!s->map_cmdline_to_pid)
2330                 return -ENOMEM;
2331
2332         s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2333         if (!s->saved_cmdlines) {
2334                 kfree(s->map_cmdline_to_pid);
2335                 return -ENOMEM;
2336         }
2337
2338         s->cmdline_idx = 0;
2339         s->cmdline_num = val;
2340         memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2341                sizeof(s->map_pid_to_cmdline));
2342         memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2343                val * sizeof(*s->map_cmdline_to_pid));
2344
2345         return 0;
2346 }
2347
2348 static int trace_create_savedcmd(void)
2349 {
2350         int ret;
2351
2352         savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2353         if (!savedcmd)
2354                 return -ENOMEM;
2355
2356         ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2357         if (ret < 0) {
2358                 kfree(savedcmd);
2359                 savedcmd = NULL;
2360                 return -ENOMEM;
2361         }
2362
2363         return 0;
2364 }
2365
2366 int is_tracing_stopped(void)
2367 {
2368         return global_trace.stop_count;
2369 }
2370
2371 static void tracing_start_tr(struct trace_array *tr)
2372 {
2373         struct trace_buffer *buffer;
2374         unsigned long flags;
2375
2376         if (tracing_disabled)
2377                 return;
2378
2379         raw_spin_lock_irqsave(&tr->start_lock, flags);
2380         if (--tr->stop_count) {
2381                 if (WARN_ON_ONCE(tr->stop_count < 0)) {
2382                         /* Someone screwed up their debugging */
2383                         tr->stop_count = 0;
2384                 }
2385                 goto out;
2386         }
2387
2388         /* Prevent the buffers from switching */
2389         arch_spin_lock(&tr->max_lock);
2390
2391         buffer = tr->array_buffer.buffer;
2392         if (buffer)
2393                 ring_buffer_record_enable(buffer);
2394
2395 #ifdef CONFIG_TRACER_MAX_TRACE
2396         buffer = tr->max_buffer.buffer;
2397         if (buffer)
2398                 ring_buffer_record_enable(buffer);
2399 #endif
2400
2401         arch_spin_unlock(&tr->max_lock);
2402
2403  out:
2404         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2405 }
2406
2407 /**
2408  * tracing_start - quick start of the tracer
2409  *
2410  * If tracing is enabled but was stopped by tracing_stop,
2411  * this will start the tracer back up.
2412  */
2413 void tracing_start(void)
2414
2415 {
2416         return tracing_start_tr(&global_trace);
2417 }
2418
2419 static void tracing_stop_tr(struct trace_array *tr)
2420 {
2421         struct trace_buffer *buffer;
2422         unsigned long flags;
2423
2424         raw_spin_lock_irqsave(&tr->start_lock, flags);
2425         if (tr->stop_count++)
2426                 goto out;
2427
2428         /* Prevent the buffers from switching */
2429         arch_spin_lock(&tr->max_lock);
2430
2431         buffer = tr->array_buffer.buffer;
2432         if (buffer)
2433                 ring_buffer_record_disable(buffer);
2434
2435 #ifdef CONFIG_TRACER_MAX_TRACE
2436         buffer = tr->max_buffer.buffer;
2437         if (buffer)
2438                 ring_buffer_record_disable(buffer);
2439 #endif
2440
2441         arch_spin_unlock(&tr->max_lock);
2442
2443  out:
2444         raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2445 }
2446
2447 /**
2448  * tracing_stop - quick stop of the tracer
2449  *
2450  * Light weight way to stop tracing. Use in conjunction with
2451  * tracing_start.
2452  */
2453 void tracing_stop(void)
2454 {
2455         return tracing_stop_tr(&global_trace);
2456 }
2457
2458 static int trace_save_cmdline(struct task_struct *tsk)
2459 {
2460         unsigned tpid, idx;
2461
2462         /* treat recording of idle task as a success */
2463         if (!tsk->pid)
2464                 return 1;
2465
2466         tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2467
2468         /*
2469          * It's not the end of the world if we don't get
2470          * the lock, but we also don't want to spin
2471          * nor do we want to disable interrupts,
2472          * so if we miss here, then better luck next time.
2473          *
2474          * This is called within the scheduler and wake up, so interrupts
2475          * had better been disabled and run queue lock been held.
2476          */
2477         lockdep_assert_preemption_disabled();
2478         if (!arch_spin_trylock(&trace_cmdline_lock))
2479                 return 0;
2480
2481         idx = savedcmd->map_pid_to_cmdline[tpid];
2482         if (idx == NO_CMDLINE_MAP) {
2483                 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2484
2485                 savedcmd->map_pid_to_cmdline[tpid] = idx;
2486                 savedcmd->cmdline_idx = idx;
2487         }
2488
2489         savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2490         set_cmdline(idx, tsk->comm);
2491
2492         arch_spin_unlock(&trace_cmdline_lock);
2493
2494         return 1;
2495 }
2496
2497 static void __trace_find_cmdline(int pid, char comm[])
2498 {
2499         unsigned map;
2500         int tpid;
2501
2502         if (!pid) {
2503                 strcpy(comm, "<idle>");
2504                 return;
2505         }
2506
2507         if (WARN_ON_ONCE(pid < 0)) {
2508                 strcpy(comm, "<XXX>");
2509                 return;
2510         }
2511
2512         tpid = pid & (PID_MAX_DEFAULT - 1);
2513         map = savedcmd->map_pid_to_cmdline[tpid];
2514         if (map != NO_CMDLINE_MAP) {
2515                 tpid = savedcmd->map_cmdline_to_pid[map];
2516                 if (tpid == pid) {
2517                         strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2518                         return;
2519                 }
2520         }
2521         strcpy(comm, "<...>");
2522 }
2523
2524 void trace_find_cmdline(int pid, char comm[])
2525 {
2526         preempt_disable();
2527         arch_spin_lock(&trace_cmdline_lock);
2528
2529         __trace_find_cmdline(pid, comm);
2530
2531         arch_spin_unlock(&trace_cmdline_lock);
2532         preempt_enable();
2533 }
2534
2535 static int *trace_find_tgid_ptr(int pid)
2536 {
2537         /*
2538          * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2539          * if we observe a non-NULL tgid_map then we also observe the correct
2540          * tgid_map_max.
2541          */
2542         int *map = smp_load_acquire(&tgid_map);
2543
2544         if (unlikely(!map || pid > tgid_map_max))
2545                 return NULL;
2546
2547         return &map[pid];
2548 }
2549
2550 int trace_find_tgid(int pid)
2551 {
2552         int *ptr = trace_find_tgid_ptr(pid);
2553
2554         return ptr ? *ptr : 0;
2555 }
2556
2557 static int trace_save_tgid(struct task_struct *tsk)
2558 {
2559         int *ptr;
2560
2561         /* treat recording of idle task as a success */
2562         if (!tsk->pid)
2563                 return 1;
2564
2565         ptr = trace_find_tgid_ptr(tsk->pid);
2566         if (!ptr)
2567                 return 0;
2568
2569         *ptr = tsk->tgid;
2570         return 1;
2571 }
2572
2573 static bool tracing_record_taskinfo_skip(int flags)
2574 {
2575         if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2576                 return true;
2577         if (!__this_cpu_read(trace_taskinfo_save))
2578                 return true;
2579         return false;
2580 }
2581
2582 /**
2583  * tracing_record_taskinfo - record the task info of a task
2584  *
2585  * @task:  task to record
2586  * @flags: TRACE_RECORD_CMDLINE for recording comm
2587  *         TRACE_RECORD_TGID for recording tgid
2588  */
2589 void tracing_record_taskinfo(struct task_struct *task, int flags)
2590 {
2591         bool done;
2592
2593         if (tracing_record_taskinfo_skip(flags))
2594                 return;
2595
2596         /*
2597          * Record as much task information as possible. If some fail, continue
2598          * to try to record the others.
2599          */
2600         done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2601         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2602
2603         /* If recording any information failed, retry again soon. */
2604         if (!done)
2605                 return;
2606
2607         __this_cpu_write(trace_taskinfo_save, false);
2608 }
2609
2610 /**
2611  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2612  *
2613  * @prev: previous task during sched_switch
2614  * @next: next task during sched_switch
2615  * @flags: TRACE_RECORD_CMDLINE for recording comm
2616  *         TRACE_RECORD_TGID for recording tgid
2617  */
2618 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2619                                           struct task_struct *next, int flags)
2620 {
2621         bool done;
2622
2623         if (tracing_record_taskinfo_skip(flags))
2624                 return;
2625
2626         /*
2627          * Record as much task information as possible. If some fail, continue
2628          * to try to record the others.
2629          */
2630         done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2631         done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2632         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2633         done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2634
2635         /* If recording any information failed, retry again soon. */
2636         if (!done)
2637                 return;
2638
2639         __this_cpu_write(trace_taskinfo_save, false);
2640 }
2641
2642 /* Helpers to record a specific task information */
2643 void tracing_record_cmdline(struct task_struct *task)
2644 {
2645         tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2646 }
2647
2648 void tracing_record_tgid(struct task_struct *task)
2649 {
2650         tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2651 }
2652
2653 /*
2654  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2655  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2656  * simplifies those functions and keeps them in sync.
2657  */
2658 enum print_line_t trace_handle_return(struct trace_seq *s)
2659 {
2660         return trace_seq_has_overflowed(s) ?
2661                 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2662 }
2663 EXPORT_SYMBOL_GPL(trace_handle_return);
2664
2665 static unsigned short migration_disable_value(void)
2666 {
2667 #if defined(CONFIG_SMP)
2668         return current->migration_disabled;
2669 #else
2670         return 0;
2671 #endif
2672 }
2673
2674 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2675 {
2676         unsigned int trace_flags = irqs_status;
2677         unsigned int pc;
2678
2679         pc = preempt_count();
2680
2681         if (pc & NMI_MASK)
2682                 trace_flags |= TRACE_FLAG_NMI;
2683         if (pc & HARDIRQ_MASK)
2684                 trace_flags |= TRACE_FLAG_HARDIRQ;
2685         if (in_serving_softirq())
2686                 trace_flags |= TRACE_FLAG_SOFTIRQ;
2687         if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2688                 trace_flags |= TRACE_FLAG_BH_OFF;
2689
2690         if (tif_need_resched())
2691                 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2692         if (test_preempt_need_resched())
2693                 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2694         return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2695                 (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2696 }
2697
2698 struct ring_buffer_event *
2699 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2700                           int type,
2701                           unsigned long len,
2702                           unsigned int trace_ctx)
2703 {
2704         return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2705 }
2706
2707 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2708 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2709 static int trace_buffered_event_ref;
2710
2711 /**
2712  * trace_buffered_event_enable - enable buffering events
2713  *
2714  * When events are being filtered, it is quicker to use a temporary
2715  * buffer to write the event data into if there's a likely chance
2716  * that it will not be committed. The discard of the ring buffer
2717  * is not as fast as committing, and is much slower than copying
2718  * a commit.
2719  *
2720  * When an event is to be filtered, allocate per cpu buffers to
2721  * write the event data into, and if the event is filtered and discarded
2722  * it is simply dropped, otherwise, the entire data is to be committed
2723  * in one shot.
2724  */
2725 void trace_buffered_event_enable(void)
2726 {
2727         struct ring_buffer_event *event;
2728         struct page *page;
2729         int cpu;
2730
2731         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2732
2733         if (trace_buffered_event_ref++)
2734                 return;
2735
2736         for_each_tracing_cpu(cpu) {
2737                 page = alloc_pages_node(cpu_to_node(cpu),
2738                                         GFP_KERNEL | __GFP_NORETRY, 0);
2739                 /* This is just an optimization and can handle failures */
2740                 if (!page) {
2741                         pr_err("Failed to allocate event buffer\n");
2742                         break;
2743                 }
2744
2745                 event = page_address(page);
2746                 memset(event, 0, sizeof(*event));
2747
2748                 per_cpu(trace_buffered_event, cpu) = event;
2749
2750                 preempt_disable();
2751                 if (cpu == smp_processor_id() &&
2752                     __this_cpu_read(trace_buffered_event) !=
2753                     per_cpu(trace_buffered_event, cpu))
2754                         WARN_ON_ONCE(1);
2755                 preempt_enable();
2756         }
2757 }
2758
2759 static void enable_trace_buffered_event(void *data)
2760 {
2761         /* Probably not needed, but do it anyway */
2762         smp_rmb();
2763         this_cpu_dec(trace_buffered_event_cnt);
2764 }
2765
2766 static void disable_trace_buffered_event(void *data)
2767 {
2768         this_cpu_inc(trace_buffered_event_cnt);
2769 }
2770
2771 /**
2772  * trace_buffered_event_disable - disable buffering events
2773  *
2774  * When a filter is removed, it is faster to not use the buffered
2775  * events, and to commit directly into the ring buffer. Free up
2776  * the temp buffers when there are no more users. This requires
2777  * special synchronization with current events.
2778  */
2779 void trace_buffered_event_disable(void)
2780 {
2781         int cpu;
2782
2783         WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2784
2785         if (WARN_ON_ONCE(!trace_buffered_event_ref))
2786                 return;
2787
2788         if (--trace_buffered_event_ref)
2789                 return;
2790
2791         /* For each CPU, set the buffer as used. */
2792         on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2793                          NULL, true);
2794
2795         /* Wait for all current users to finish */
2796         synchronize_rcu();
2797
2798         for_each_tracing_cpu(cpu) {
2799                 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2800                 per_cpu(trace_buffered_event, cpu) = NULL;
2801         }
2802
2803         /*
2804          * Wait for all CPUs that potentially started checking if they can use
2805          * their event buffer only after the previous synchronize_rcu() call and
2806          * they still read a valid pointer from trace_buffered_event. It must be
2807          * ensured they don't see cleared trace_buffered_event_cnt else they
2808          * could wrongly decide to use the pointed-to buffer which is now freed.
2809          */
2810         synchronize_rcu();
2811
2812         /* For each CPU, relinquish the buffer */
2813         on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2814                          true);
2815 }
2816
2817 static struct trace_buffer *temp_buffer;
2818
2819 struct ring_buffer_event *
2820 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2821                           struct trace_event_file *trace_file,
2822                           int type, unsigned long len,
2823                           unsigned int trace_ctx)
2824 {
2825         struct ring_buffer_event *entry;
2826         struct trace_array *tr = trace_file->tr;
2827         int val;
2828
2829         *current_rb = tr->array_buffer.buffer;
2830
2831         if (!tr->no_filter_buffering_ref &&
2832             (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2833                 preempt_disable_notrace();
2834                 /*
2835                  * Filtering is on, so try to use the per cpu buffer first.
2836                  * This buffer will simulate a ring_buffer_event,
2837                  * where the type_len is zero and the array[0] will
2838                  * hold the full length.
2839                  * (see include/linux/ring-buffer.h for details on
2840                  *  how the ring_buffer_event is structured).
2841                  *
2842                  * Using a temp buffer during filtering and copying it
2843                  * on a matched filter is quicker than writing directly
2844                  * into the ring buffer and then discarding it when
2845                  * it doesn't match. That is because the discard
2846                  * requires several atomic operations to get right.
2847                  * Copying on match and doing nothing on a failed match
2848                  * is still quicker than no copy on match, but having
2849                  * to discard out of the ring buffer on a failed match.
2850                  */
2851                 if ((entry = __this_cpu_read(trace_buffered_event))) {
2852                         int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2853
2854                         val = this_cpu_inc_return(trace_buffered_event_cnt);
2855
2856                         /*
2857                          * Preemption is disabled, but interrupts and NMIs
2858                          * can still come in now. If that happens after
2859                          * the above increment, then it will have to go
2860                          * back to the old method of allocating the event
2861                          * on the ring buffer, and if the filter fails, it
2862                          * will have to call ring_buffer_discard_commit()
2863                          * to remove it.
2864                          *
2865                          * Need to also check the unlikely case that the
2866                          * length is bigger than the temp buffer size.
2867                          * If that happens, then the reserve is pretty much
2868                          * guaranteed to fail, as the ring buffer currently
2869                          * only allows events less than a page. But that may
2870                          * change in the future, so let the ring buffer reserve
2871                          * handle the failure in that case.
2872                          */
2873                         if (val == 1 && likely(len <= max_len)) {
2874                                 trace_event_setup(entry, type, trace_ctx);
2875                                 entry->array[0] = len;
2876                                 /* Return with preemption disabled */
2877                                 return entry;
2878                         }
2879                         this_cpu_dec(trace_buffered_event_cnt);
2880                 }
2881                 /* __trace_buffer_lock_reserve() disables preemption */
2882                 preempt_enable_notrace();
2883         }
2884
2885         entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2886                                             trace_ctx);
2887         /*
2888          * If tracing is off, but we have triggers enabled
2889          * we still need to look at the event data. Use the temp_buffer
2890          * to store the trace event for the trigger to use. It's recursive
2891          * safe and will not be recorded anywhere.
2892          */
2893         if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2894                 *current_rb = temp_buffer;
2895                 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2896                                                     trace_ctx);
2897         }
2898         return entry;
2899 }
2900 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2901
2902 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2903 static DEFINE_MUTEX(tracepoint_printk_mutex);
2904
2905 static void output_printk(struct trace_event_buffer *fbuffer)
2906 {
2907         struct trace_event_call *event_call;
2908         struct trace_event_file *file;
2909         struct trace_event *event;
2910         unsigned long flags;
2911         struct trace_iterator *iter = tracepoint_print_iter;
2912
2913         /* We should never get here if iter is NULL */
2914         if (WARN_ON_ONCE(!iter))
2915                 return;
2916
2917         event_call = fbuffer->trace_file->event_call;
2918         if (!event_call || !event_call->event.funcs ||
2919             !event_call->event.funcs->trace)
2920                 return;
2921
2922         file = fbuffer->trace_file;
2923         if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2924             (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2925              !filter_match_preds(file->filter, fbuffer->entry)))
2926                 return;
2927
2928         event = &fbuffer->trace_file->event_call->event;
2929
2930         raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2931         trace_seq_init(&iter->seq);
2932         iter->ent = fbuffer->entry;
2933         event_call->event.funcs->trace(iter, 0, event);
2934         trace_seq_putc(&iter->seq, 0);
2935         printk("%s", iter->seq.buffer);
2936
2937         raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2938 }
2939
2940 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2941                              void *buffer, size_t *lenp,
2942                              loff_t *ppos)
2943 {
2944         int save_tracepoint_printk;
2945         int ret;
2946
2947         mutex_lock(&tracepoint_printk_mutex);
2948         save_tracepoint_printk = tracepoint_printk;
2949
2950         ret = proc_dointvec(table, write, buffer, lenp, ppos);
2951
2952         /*
2953          * This will force exiting early, as tracepoint_printk
2954          * is always zero when tracepoint_printk_iter is not allocated
2955          */
2956         if (!tracepoint_print_iter)
2957                 tracepoint_printk = 0;
2958
2959         if (save_tracepoint_printk == tracepoint_printk)
2960                 goto out;
2961
2962         if (tracepoint_printk)
2963                 static_key_enable(&tracepoint_printk_key.key);
2964         else
2965                 static_key_disable(&tracepoint_printk_key.key);
2966
2967  out:
2968         mutex_unlock(&tracepoint_printk_mutex);
2969
2970         return ret;
2971 }
2972
2973 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2974 {
2975         enum event_trigger_type tt = ETT_NONE;
2976         struct trace_event_file *file = fbuffer->trace_file;
2977
2978         if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2979                         fbuffer->entry, &tt))
2980                 goto discard;
2981
2982         if (static_key_false(&tracepoint_printk_key.key))
2983                 output_printk(fbuffer);
2984
2985         if (static_branch_unlikely(&trace_event_exports_enabled))
2986                 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2987
2988         trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2989                         fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2990
2991 discard:
2992         if (tt)
2993                 event_triggers_post_call(file, tt);
2994
2995 }
2996 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2997
2998 /*
2999  * Skip 3:
3000  *
3001  *   trace_buffer_unlock_commit_regs()
3002  *   trace_event_buffer_commit()
3003  *   trace_event_raw_event_xxx()
3004  */
3005 # define STACK_SKIP 3
3006
3007 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3008                                      struct trace_buffer *buffer,
3009                                      struct ring_buffer_event *event,
3010                                      unsigned int trace_ctx,
3011                                      struct pt_regs *regs)
3012 {
3013         __buffer_unlock_commit(buffer, event);
3014
3015         /*
3016          * If regs is not set, then skip the necessary functions.
3017          * Note, we can still get here via blktrace, wakeup tracer
3018          * and mmiotrace, but that's ok if they lose a function or
3019          * two. They are not that meaningful.
3020          */
3021         ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3022         ftrace_trace_userstack(tr, buffer, trace_ctx);
3023 }
3024
3025 /*
3026  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3027  */
3028 void
3029 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3030                                    struct ring_buffer_event *event)
3031 {
3032         __buffer_unlock_commit(buffer, event);
3033 }
3034
3035 void
3036 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3037                parent_ip, unsigned int trace_ctx)
3038 {
3039         struct trace_event_call *call = &event_function;
3040         struct trace_buffer *buffer = tr->array_buffer.buffer;
3041         struct ring_buffer_event *event;
3042         struct ftrace_entry *entry;
3043
3044         event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3045                                             trace_ctx);
3046         if (!event)
3047                 return;
3048         entry   = ring_buffer_event_data(event);
3049         entry->ip                       = ip;
3050         entry->parent_ip                = parent_ip;
3051
3052         if (!call_filter_check_discard(call, entry, buffer, event)) {
3053                 if (static_branch_unlikely(&trace_function_exports_enabled))
3054                         ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3055                 __buffer_unlock_commit(buffer, event);
3056         }
3057 }
3058
3059 #ifdef CONFIG_STACKTRACE
3060
3061 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3062 #define FTRACE_KSTACK_NESTING   4
3063
3064 #define FTRACE_KSTACK_ENTRIES   (PAGE_SIZE / FTRACE_KSTACK_NESTING)
3065
3066 struct ftrace_stack {
3067         unsigned long           calls[FTRACE_KSTACK_ENTRIES];
3068 };
3069
3070
3071 struct ftrace_stacks {
3072         struct ftrace_stack     stacks[FTRACE_KSTACK_NESTING];
3073 };
3074
3075 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3076 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3077
3078 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3079                                  unsigned int trace_ctx,
3080                                  int skip, struct pt_regs *regs)
3081 {
3082         struct trace_event_call *call = &event_kernel_stack;
3083         struct ring_buffer_event *event;
3084         unsigned int size, nr_entries;
3085         struct ftrace_stack *fstack;
3086         struct stack_entry *entry;
3087         int stackidx;
3088
3089         /*
3090          * Add one, for this function and the call to save_stack_trace()
3091          * If regs is set, then these functions will not be in the way.
3092          */
3093 #ifndef CONFIG_UNWINDER_ORC
3094         if (!regs)
3095                 skip++;
3096 #endif
3097
3098         preempt_disable_notrace();
3099
3100         stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3101
3102         /* This should never happen. If it does, yell once and skip */
3103         if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3104                 goto out;
3105
3106         /*
3107          * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3108          * interrupt will either see the value pre increment or post
3109          * increment. If the interrupt happens pre increment it will have
3110          * restored the counter when it returns.  We just need a barrier to
3111          * keep gcc from moving things around.
3112          */
3113         barrier();
3114
3115         fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3116         size = ARRAY_SIZE(fstack->calls);
3117
3118         if (regs) {
3119                 nr_entries = stack_trace_save_regs(regs, fstack->calls,
3120                                                    size, skip);
3121         } else {
3122                 nr_entries = stack_trace_save(fstack->calls, size, skip);
3123         }
3124
3125         event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3126                                     struct_size(entry, caller, nr_entries),
3127                                     trace_ctx);
3128         if (!event)
3129                 goto out;
3130         entry = ring_buffer_event_data(event);
3131
3132         entry->size = nr_entries;
3133         memcpy(&entry->caller, fstack->calls,
3134                flex_array_size(entry, caller, nr_entries));
3135
3136         if (!call_filter_check_discard(call, entry, buffer, event))
3137                 __buffer_unlock_commit(buffer, event);
3138
3139  out:
3140         /* Again, don't let gcc optimize things here */
3141         barrier();
3142         __this_cpu_dec(ftrace_stack_reserve);
3143         preempt_enable_notrace();
3144
3145 }
3146
3147 static inline void ftrace_trace_stack(struct trace_array *tr,
3148                                       struct trace_buffer *buffer,
3149                                       unsigned int trace_ctx,
3150                                       int skip, struct pt_regs *regs)
3151 {
3152         if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3153                 return;
3154
3155         __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3156 }
3157
3158 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3159                    int skip)
3160 {
3161         struct trace_buffer *buffer = tr->array_buffer.buffer;
3162
3163         if (rcu_is_watching()) {
3164                 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3165                 return;
3166         }
3167
3168         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3169                 return;
3170
3171         /*
3172          * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3173          * but if the above rcu_is_watching() failed, then the NMI
3174          * triggered someplace critical, and ct_irq_enter() should
3175          * not be called from NMI.
3176          */
3177         if (unlikely(in_nmi()))
3178                 return;
3179
3180         ct_irq_enter_irqson();
3181         __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3182         ct_irq_exit_irqson();
3183 }
3184
3185 /**
3186  * trace_dump_stack - record a stack back trace in the trace buffer
3187  * @skip: Number of functions to skip (helper handlers)
3188  */
3189 void trace_dump_stack(int skip)
3190 {
3191         if (tracing_disabled || tracing_selftest_running)
3192                 return;
3193
3194 #ifndef CONFIG_UNWINDER_ORC
3195         /* Skip 1 to skip this function. */
3196         skip++;
3197 #endif
3198         __ftrace_trace_stack(global_trace.array_buffer.buffer,
3199                              tracing_gen_ctx(), skip, NULL);
3200 }
3201 EXPORT_SYMBOL_GPL(trace_dump_stack);
3202
3203 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3204 static DEFINE_PER_CPU(int, user_stack_count);
3205
3206 static void
3207 ftrace_trace_userstack(struct trace_array *tr,
3208                        struct trace_buffer *buffer, unsigned int trace_ctx)
3209 {
3210         struct trace_event_call *call = &event_user_stack;
3211         struct ring_buffer_event *event;
3212         struct userstack_entry *entry;
3213
3214         if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3215                 return;
3216
3217         /*
3218          * NMIs can not handle page faults, even with fix ups.
3219          * The save user stack can (and often does) fault.
3220          */
3221         if (unlikely(in_nmi()))
3222                 return;
3223
3224         /*
3225          * prevent recursion, since the user stack tracing may
3226          * trigger other kernel events.
3227          */
3228         preempt_disable();
3229         if (__this_cpu_read(user_stack_count))
3230                 goto out;
3231
3232         __this_cpu_inc(user_stack_count);
3233
3234         event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3235                                             sizeof(*entry), trace_ctx);
3236         if (!event)
3237                 goto out_drop_count;
3238         entry   = ring_buffer_event_data(event);
3239
3240         entry->tgid             = current->tgid;
3241         memset(&entry->caller, 0, sizeof(entry->caller));
3242
3243         stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3244         if (!call_filter_check_discard(call, entry, buffer, event))
3245                 __buffer_unlock_commit(buffer, event);
3246
3247  out_drop_count:
3248         __this_cpu_dec(user_stack_count);
3249  out:
3250         preempt_enable();
3251 }
3252 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3253 static void ftrace_trace_userstack(struct trace_array *tr,
3254                                    struct trace_buffer *buffer,
3255                                    unsigned int trace_ctx)
3256 {
3257 }
3258 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3259
3260 #endif /* CONFIG_STACKTRACE */
3261
3262 static inline void
3263 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3264                           unsigned long long delta)
3265 {
3266         entry->bottom_delta_ts = delta & U32_MAX;
3267         entry->top_delta_ts = (delta >> 32);
3268 }
3269
3270 void trace_last_func_repeats(struct trace_array *tr,
3271                              struct trace_func_repeats *last_info,
3272                              unsigned int trace_ctx)
3273 {
3274         struct trace_buffer *buffer = tr->array_buffer.buffer;
3275         struct func_repeats_entry *entry;
3276         struct ring_buffer_event *event;
3277         u64 delta;
3278
3279         event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3280                                             sizeof(*entry), trace_ctx);
3281         if (!event)
3282                 return;
3283
3284         delta = ring_buffer_event_time_stamp(buffer, event) -
3285                 last_info->ts_last_call;
3286
3287         entry = ring_buffer_event_data(event);
3288         entry->ip = last_info->ip;
3289         entry->parent_ip = last_info->parent_ip;
3290         entry->count = last_info->count;
3291         func_repeats_set_delta_ts(entry, delta);
3292
3293         __buffer_unlock_commit(buffer, event);
3294 }
3295
3296 /* created for use with alloc_percpu */
3297 struct trace_buffer_struct {
3298         int nesting;
3299         char buffer[4][TRACE_BUF_SIZE];
3300 };
3301
3302 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3303
3304 /*
3305  * This allows for lockless recording.  If we're nested too deeply, then
3306  * this returns NULL.
3307  */
3308 static char *get_trace_buf(void)
3309 {
3310         struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3311
3312         if (!trace_percpu_buffer || buffer->nesting >= 4)
3313                 return NULL;
3314
3315         buffer->nesting++;
3316
3317         /* Interrupts must see nesting incremented before we use the buffer */
3318         barrier();
3319         return &buffer->buffer[buffer->nesting - 1][0];
3320 }
3321
3322 static void put_trace_buf(void)
3323 {
3324         /* Don't let the decrement of nesting leak before this */
3325         barrier();
3326         this_cpu_dec(trace_percpu_buffer->nesting);
3327 }
3328
3329 static int alloc_percpu_trace_buffer(void)
3330 {
3331         struct trace_buffer_struct __percpu *buffers;
3332
3333         if (trace_percpu_buffer)
3334                 return 0;
3335
3336         buffers = alloc_percpu(struct trace_buffer_struct);
3337         if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3338                 return -ENOMEM;
3339
3340         trace_percpu_buffer = buffers;
3341         return 0;
3342 }
3343
3344 static int buffers_allocated;
3345
3346 void trace_printk_init_buffers(void)
3347 {
3348         if (buffers_allocated)
3349                 return;
3350
3351         if (alloc_percpu_trace_buffer())
3352                 return;
3353
3354         /* trace_printk() is for debug use only. Don't use it in production. */
3355
3356         pr_warn("\n");
3357         pr_warn("**********************************************************\n");
3358         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3359         pr_warn("**                                                      **\n");
3360         pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3361         pr_warn("**                                                      **\n");
3362         pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3363         pr_warn("** unsafe for production use.                           **\n");
3364         pr_warn("**                                                      **\n");
3365         pr_warn("** If you see this message and you are not debugging    **\n");
3366         pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3367         pr_warn("**                                                      **\n");
3368         pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3369         pr_warn("**********************************************************\n");
3370
3371         /* Expand the buffers to set size */
3372         tracing_update_buffers(&global_trace);
3373
3374         buffers_allocated = 1;
3375
3376         /*
3377          * trace_printk_init_buffers() can be called by modules.
3378          * If that happens, then we need to start cmdline recording
3379          * directly here. If the global_trace.buffer is already
3380          * allocated here, then this was called by module code.
3381          */
3382         if (global_trace.array_buffer.buffer)
3383                 tracing_start_cmdline_record();
3384 }
3385 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3386
3387 void trace_printk_start_comm(void)
3388 {
3389         /* Start tracing comms if trace printk is set */
3390         if (!buffers_allocated)
3391                 return;
3392         tracing_start_cmdline_record();
3393 }
3394
3395 static void trace_printk_start_stop_comm(int enabled)
3396 {
3397         if (!buffers_allocated)
3398                 return;
3399
3400         if (enabled)
3401                 tracing_start_cmdline_record();
3402         else
3403                 tracing_stop_cmdline_record();
3404 }
3405
3406 /**
3407  * trace_vbprintk - write binary msg to tracing buffer
3408  * @ip:    The address of the caller
3409  * @fmt:   The string format to write to the buffer
3410  * @args:  Arguments for @fmt
3411  */
3412 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3413 {
3414         struct trace_event_call *call = &event_bprint;
3415         struct ring_buffer_event *event;
3416         struct trace_buffer *buffer;
3417         struct trace_array *tr = &global_trace;
3418         struct bprint_entry *entry;
3419         unsigned int trace_ctx;
3420         char *tbuffer;
3421         int len = 0, size;
3422
3423         if (unlikely(tracing_selftest_running || tracing_disabled))
3424                 return 0;
3425
3426         /* Don't pollute graph traces with trace_vprintk internals */
3427         pause_graph_tracing();
3428
3429         trace_ctx = tracing_gen_ctx();
3430         preempt_disable_notrace();
3431
3432         tbuffer = get_trace_buf();
3433         if (!tbuffer) {
3434                 len = 0;
3435                 goto out_nobuffer;
3436         }
3437
3438         len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3439
3440         if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3441                 goto out_put;
3442
3443         size = sizeof(*entry) + sizeof(u32) * len;
3444         buffer = tr->array_buffer.buffer;
3445         ring_buffer_nest_start(buffer);
3446         event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3447                                             trace_ctx);
3448         if (!event)
3449                 goto out;
3450         entry = ring_buffer_event_data(event);
3451         entry->ip                       = ip;
3452         entry->fmt                      = fmt;
3453
3454         memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3455         if (!call_filter_check_discard(call, entry, buffer, event)) {
3456                 __buffer_unlock_commit(buffer, event);
3457                 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3458         }
3459
3460 out:
3461         ring_buffer_nest_end(buffer);
3462 out_put:
3463         put_trace_buf();
3464
3465 out_nobuffer:
3466         preempt_enable_notrace();
3467         unpause_graph_tracing();
3468
3469         return len;
3470 }
3471 EXPORT_SYMBOL_GPL(trace_vbprintk);
3472
3473 __printf(3, 0)
3474 static int
3475 __trace_array_vprintk(struct trace_buffer *buffer,
3476                       unsigned long ip, const char *fmt, va_list args)
3477 {
3478         struct trace_event_call *call = &event_print;
3479         struct ring_buffer_event *event;
3480         int len = 0, size;
3481         struct print_entry *entry;
3482         unsigned int trace_ctx;
3483         char *tbuffer;
3484
3485         if (tracing_disabled)
3486                 return 0;
3487
3488         /* Don't pollute graph traces with trace_vprintk internals */
3489         pause_graph_tracing();
3490
3491         trace_ctx = tracing_gen_ctx();
3492         preempt_disable_notrace();
3493
3494
3495         tbuffer = get_trace_buf();
3496         if (!tbuffer) {
3497                 len = 0;
3498                 goto out_nobuffer;
3499         }
3500
3501         len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3502
3503         size = sizeof(*entry) + len + 1;
3504         ring_buffer_nest_start(buffer);
3505         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3506                                             trace_ctx);
3507         if (!event)
3508                 goto out;
3509         entry = ring_buffer_event_data(event);
3510         entry->ip = ip;
3511
3512         memcpy(&entry->buf, tbuffer, len + 1);
3513         if (!call_filter_check_discard(call, entry, buffer, event)) {
3514                 __buffer_unlock_commit(buffer, event);
3515                 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3516         }
3517
3518 out:
3519         ring_buffer_nest_end(buffer);
3520         put_trace_buf();
3521
3522 out_nobuffer:
3523         preempt_enable_notrace();
3524         unpause_graph_tracing();
3525
3526         return len;
3527 }
3528
3529 __printf(3, 0)
3530 int trace_array_vprintk(struct trace_array *tr,
3531                         unsigned long ip, const char *fmt, va_list args)
3532 {
3533         if (tracing_selftest_running && tr == &global_trace)
3534                 return 0;
3535
3536         return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3537 }
3538
3539 /**
3540  * trace_array_printk - Print a message to a specific instance
3541  * @tr: The instance trace_array descriptor
3542  * @ip: The instruction pointer that this is called from.
3543  * @fmt: The format to print (printf format)
3544  *
3545  * If a subsystem sets up its own instance, they have the right to
3546  * printk strings into their tracing instance buffer using this
3547  * function. Note, this function will not write into the top level
3548  * buffer (use trace_printk() for that), as writing into the top level
3549  * buffer should only have events that can be individually disabled.
3550  * trace_printk() is only used for debugging a kernel, and should not
3551  * be ever incorporated in normal use.
3552  *
3553  * trace_array_printk() can be used, as it will not add noise to the
3554  * top level tracing buffer.
3555  *
3556  * Note, trace_array_init_printk() must be called on @tr before this
3557  * can be used.
3558  */
3559 __printf(3, 0)
3560 int trace_array_printk(struct trace_array *tr,
3561                        unsigned long ip, const char *fmt, ...)
3562 {
3563         int ret;
3564         va_list ap;
3565
3566         if (!tr)
3567                 return -ENOENT;
3568
3569         /* This is only allowed for created instances */
3570         if (tr == &global_trace)
3571                 return 0;
3572
3573         if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3574                 return 0;
3575
3576         va_start(ap, fmt);
3577         ret = trace_array_vprintk(tr, ip, fmt, ap);
3578         va_end(ap);
3579         return ret;
3580 }
3581 EXPORT_SYMBOL_GPL(trace_array_printk);
3582
3583 /**
3584  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3585  * @tr: The trace array to initialize the buffers for
3586  *
3587  * As trace_array_printk() only writes into instances, they are OK to
3588  * have in the kernel (unlike trace_printk()). This needs to be called
3589  * before trace_array_printk() can be used on a trace_array.
3590  */
3591 int trace_array_init_printk(struct trace_array *tr)
3592 {
3593         if (!tr)
3594                 return -ENOENT;
3595
3596         /* This is only allowed for created instances */
3597         if (tr == &global_trace)
3598                 return -EINVAL;
3599
3600         return alloc_percpu_trace_buffer();
3601 }
3602 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3603
3604 __printf(3, 4)
3605 int trace_array_printk_buf(struct trace_buffer *buffer,
3606                            unsigned long ip, const char *fmt, ...)
3607 {
3608         int ret;
3609         va_list ap;
3610
3611         if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3612                 return 0;
3613
3614         va_start(ap, fmt);
3615         ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3616         va_end(ap);
3617         return ret;
3618 }
3619
3620 __printf(2, 0)
3621 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3622 {
3623         return trace_array_vprintk(&global_trace, ip, fmt, args);
3624 }
3625 EXPORT_SYMBOL_GPL(trace_vprintk);
3626
3627 static void trace_iterator_increment(struct trace_iterator *iter)
3628 {
3629         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3630
3631         iter->idx++;
3632         if (buf_iter)
3633                 ring_buffer_iter_advance(buf_iter);
3634 }
3635
3636 static struct trace_entry *
3637 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3638                 unsigned long *lost_events)
3639 {
3640         struct ring_buffer_event *event;
3641         struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3642
3643         if (buf_iter) {
3644                 event = ring_buffer_iter_peek(buf_iter, ts);
3645                 if (lost_events)
3646                         *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3647                                 (unsigned long)-1 : 0;
3648         } else {
3649                 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3650                                          lost_events);
3651         }
3652
3653         if (event) {
3654                 iter->ent_size = ring_buffer_event_length(event);
3655                 return ring_buffer_event_data(event);
3656         }
3657         iter->ent_size = 0;
3658         return NULL;
3659 }
3660
3661 static struct trace_entry *
3662 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3663                   unsigned long *missing_events, u64 *ent_ts)
3664 {
3665         struct trace_buffer *buffer = iter->array_buffer->buffer;
3666         struct trace_entry *ent, *next = NULL;
3667         unsigned long lost_events = 0, next_lost = 0;
3668         int cpu_file = iter->cpu_file;
3669         u64 next_ts = 0, ts;
3670         int next_cpu = -1;
3671         int next_size = 0;
3672         int cpu;
3673
3674         /*
3675          * If we are in a per_cpu trace file, don't bother by iterating over
3676          * all cpu and peek directly.
3677          */
3678         if (cpu_file > RING_BUFFER_ALL_CPUS) {
3679                 if (ring_buffer_empty_cpu(buffer, cpu_file))
3680                         return NULL;
3681                 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3682                 if (ent_cpu)
3683                         *ent_cpu = cpu_file;
3684
3685                 return ent;
3686         }
3687
3688         for_each_tracing_cpu(cpu) {
3689
3690                 if (ring_buffer_empty_cpu(buffer, cpu))
3691                         continue;
3692
3693                 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3694
3695                 /*
3696                  * Pick the entry with the smallest timestamp:
3697                  */
3698                 if (ent && (!next || ts < next_ts)) {
3699                         next = ent;
3700                         next_cpu = cpu;
3701                         next_ts = ts;
3702                         next_lost = lost_events;
3703                         next_size = iter->ent_size;
3704                 }
3705         }
3706
3707         iter->ent_size = next_size;
3708
3709         if (ent_cpu)
3710                 *ent_cpu = next_cpu;
3711
3712         if (ent_ts)
3713                 *ent_ts = next_ts;
3714
3715         if (missing_events)
3716                 *missing_events = next_lost;
3717
3718         return next;
3719 }
3720
3721 #define STATIC_FMT_BUF_SIZE     128
3722 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3723
3724 char *trace_iter_expand_format(struct trace_iterator *iter)
3725 {
3726         char *tmp;
3727
3728         /*
3729          * iter->tr is NULL when used with tp_printk, which makes
3730          * this get called where it is not safe to call krealloc().
3731          */
3732         if (!iter->tr || iter->fmt == static_fmt_buf)
3733                 return NULL;
3734
3735         tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3736                        GFP_KERNEL);
3737         if (tmp) {
3738                 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3739                 iter->fmt = tmp;
3740         }
3741
3742         return tmp;
3743 }
3744
3745 /* Returns true if the string is safe to dereference from an event */
3746 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3747                            bool star, int len)
3748 {
3749         unsigned long addr = (unsigned long)str;
3750         struct trace_event *trace_event;
3751         struct trace_event_call *event;
3752
3753         /* Ignore strings with no length */
3754         if (star && !len)
3755                 return true;
3756
3757         /* OK if part of the event data */
3758         if ((addr >= (unsigned long)iter->ent) &&
3759             (addr < (unsigned long)iter->ent + iter->ent_size))
3760                 return true;
3761
3762         /* OK if part of the temp seq buffer */
3763         if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3764             (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3765                 return true;
3766
3767         /* Core rodata can not be freed */
3768         if (is_kernel_rodata(addr))
3769                 return true;
3770
3771         if (trace_is_tracepoint_string(str))
3772                 return true;
3773
3774         /*
3775          * Now this could be a module event, referencing core module
3776          * data, which is OK.
3777          */
3778         if (!iter->ent)
3779                 return false;
3780
3781         trace_event = ftrace_find_event(iter->ent->type);
3782         if (!trace_event)
3783                 return false;
3784
3785         event = container_of(trace_event, struct trace_event_call, event);
3786         if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3787                 return false;
3788
3789         /* Would rather have rodata, but this will suffice */
3790         if (within_module_core(addr, event->module))
3791                 return true;
3792
3793         return false;
3794 }
3795
3796 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3797
3798 static int test_can_verify_check(const char *fmt, ...)
3799 {
3800         char buf[16];
3801         va_list ap;
3802         int ret;
3803
3804         /*
3805          * The verifier is dependent on vsnprintf() modifies the va_list
3806          * passed to it, where it is sent as a reference. Some architectures
3807          * (like x86_32) passes it by value, which means that vsnprintf()
3808          * does not modify the va_list passed to it, and the verifier
3809          * would then need to be able to understand all the values that
3810          * vsnprintf can use. If it is passed by value, then the verifier
3811          * is disabled.
3812          */
3813         va_start(ap, fmt);
3814         vsnprintf(buf, 16, "%d", ap);
3815         ret = va_arg(ap, int);
3816         va_end(ap);
3817
3818         return ret;
3819 }
3820
3821 static void test_can_verify(void)
3822 {
3823         if (!test_can_verify_check("%d %d", 0, 1)) {
3824                 pr_info("trace event string verifier disabled\n");
3825                 static_branch_inc(&trace_no_verify);
3826         }
3827 }
3828
3829 /**
3830  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3831  * @iter: The iterator that holds the seq buffer and the event being printed
3832  * @fmt: The format used to print the event
3833  * @ap: The va_list holding the data to print from @fmt.
3834  *
3835  * This writes the data into the @iter->seq buffer using the data from
3836  * @fmt and @ap. If the format has a %s, then the source of the string
3837  * is examined to make sure it is safe to print, otherwise it will
3838  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3839  * pointer.
3840  */
3841 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3842                          va_list ap)
3843 {
3844         const char *p = fmt;
3845         const char *str;
3846         int i, j;
3847
3848         if (WARN_ON_ONCE(!fmt))
3849                 return;
3850
3851         if (static_branch_unlikely(&trace_no_verify))
3852                 goto print;
3853
3854         /* Don't bother checking when doing a ftrace_dump() */
3855         if (iter->fmt == static_fmt_buf)
3856                 goto print;
3857
3858         while (*p) {
3859                 bool star = false;
3860                 int len = 0;
3861
3862                 j = 0;
3863
3864                 /* We only care about %s and variants */
3865                 for (i = 0; p[i]; i++) {
3866                         if (i + 1 >= iter->fmt_size) {
3867                                 /*
3868                                  * If we can't expand the copy buffer,
3869                                  * just print it.
3870                                  */
3871                                 if (!trace_iter_expand_format(iter))
3872                                         goto print;
3873                         }
3874
3875                         if (p[i] == '\\' && p[i+1]) {
3876                                 i++;
3877                                 continue;
3878                         }
3879                         if (p[i] == '%') {
3880                                 /* Need to test cases like %08.*s */
3881                                 for (j = 1; p[i+j]; j++) {
3882                                         if (isdigit(p[i+j]) ||
3883                                             p[i+j] == '.')
3884                                                 continue;
3885                                         if (p[i+j] == '*') {
3886                                                 star = true;
3887                                                 continue;
3888                                         }
3889                                         break;
3890                                 }
3891                                 if (p[i+j] == 's')
3892                                         break;
3893                                 star = false;
3894                         }
3895                         j = 0;
3896                 }
3897                 /* If no %s found then just print normally */
3898                 if (!p[i])
3899                         break;
3900
3901                 /* Copy up to the %s, and print that */
3902                 strncpy(iter->fmt, p, i);
3903                 iter->fmt[i] = '\0';
3904                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3905
3906                 /*
3907                  * If iter->seq is full, the above call no longer guarantees
3908                  * that ap is in sync with fmt processing, and further calls
3909                  * to va_arg() can return wrong positional arguments.
3910                  *
3911                  * Ensure that ap is no longer used in this case.
3912                  */
3913                 if (iter->seq.full) {
3914                         p = "";
3915                         break;
3916                 }
3917
3918                 if (star)
3919                         len = va_arg(ap, int);
3920
3921                 /* The ap now points to the string data of the %s */
3922                 str = va_arg(ap, const char *);
3923
3924                 /*
3925                  * If you hit this warning, it is likely that the
3926                  * trace event in question used %s on a string that
3927                  * was saved at the time of the event, but may not be
3928                  * around when the trace is read. Use __string(),
3929                  * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3930                  * instead. See samples/trace_events/trace-events-sample.h
3931                  * for reference.
3932                  */
3933                 if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3934                               "fmt: '%s' current_buffer: '%s'",
3935                               fmt, seq_buf_str(&iter->seq.seq))) {
3936                         int ret;
3937
3938                         /* Try to safely read the string */
3939                         if (star) {
3940                                 if (len + 1 > iter->fmt_size)
3941                                         len = iter->fmt_size - 1;
3942                                 if (len < 0)
3943                                         len = 0;
3944                                 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3945                                 iter->fmt[len] = 0;
3946                                 star = false;
3947                         } else {
3948                                 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3949                                                                   iter->fmt_size);
3950                         }
3951                         if (ret < 0)
3952                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
3953                         else
3954                                 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3955                                                  str, iter->fmt);
3956                         str = "[UNSAFE-MEMORY]";
3957                         strcpy(iter->fmt, "%s");
3958                 } else {
3959                         strncpy(iter->fmt, p + i, j + 1);
3960                         iter->fmt[j+1] = '\0';
3961                 }
3962                 if (star)
3963                         trace_seq_printf(&iter->seq, iter->fmt, len, str);
3964                 else
3965                         trace_seq_printf(&iter->seq, iter->fmt, str);
3966
3967                 p += i + j + 1;
3968         }
3969  print:
3970         if (*p)
3971                 trace_seq_vprintf(&iter->seq, p, ap);
3972 }
3973
3974 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3975 {
3976         const char *p, *new_fmt;
3977         char *q;
3978
3979         if (WARN_ON_ONCE(!fmt))
3980                 return fmt;
3981
3982         if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3983                 return fmt;
3984
3985         p = fmt;
3986         new_fmt = q = iter->fmt;
3987         while (*p) {
3988                 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3989                         if (!trace_iter_expand_format(iter))
3990                                 return fmt;
3991
3992                         q += iter->fmt - new_fmt;
3993                         new_fmt = iter->fmt;
3994                 }
3995
3996                 *q++ = *p++;
3997
3998                 /* Replace %p with %px */
3999                 if (p[-1] == '%') {
4000                         if (p[0] == '%') {
4001                                 *q++ = *p++;
4002                         } else if (p[0] == 'p' && !isalnum(p[1])) {
4003                                 *q++ = *p++;
4004                                 *q++ = 'x';
4005                         }
4006                 }
4007         }
4008         *q = '\0';
4009
4010         return new_fmt;
4011 }
4012
4013 #define STATIC_TEMP_BUF_SIZE    128
4014 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4015
4016 /* Find the next real entry, without updating the iterator itself */
4017 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4018                                           int *ent_cpu, u64 *ent_ts)
4019 {
4020         /* __find_next_entry will reset ent_size */
4021         int ent_size = iter->ent_size;
4022         struct trace_entry *entry;
4023
4024         /*
4025          * If called from ftrace_dump(), then the iter->temp buffer
4026          * will be the static_temp_buf and not created from kmalloc.
4027          * If the entry size is greater than the buffer, we can
4028          * not save it. Just return NULL in that case. This is only
4029          * used to add markers when two consecutive events' time
4030          * stamps have a large delta. See trace_print_lat_context()
4031          */
4032         if (iter->temp == static_temp_buf &&
4033             STATIC_TEMP_BUF_SIZE < ent_size)
4034                 return NULL;
4035
4036         /*
4037          * The __find_next_entry() may call peek_next_entry(), which may
4038          * call ring_buffer_peek() that may make the contents of iter->ent
4039          * undefined. Need to copy iter->ent now.
4040          */
4041         if (iter->ent && iter->ent != iter->temp) {
4042                 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4043                     !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4044                         void *temp;
4045                         temp = kmalloc(iter->ent_size, GFP_KERNEL);
4046                         if (!temp)
4047                                 return NULL;
4048                         kfree(iter->temp);
4049                         iter->temp = temp;
4050                         iter->temp_size = iter->ent_size;
4051                 }
4052                 memcpy(iter->temp, iter->ent, iter->ent_size);
4053                 iter->ent = iter->temp;
4054         }
4055         entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4056         /* Put back the original ent_size */
4057         iter->ent_size = ent_size;
4058
4059         return entry;
4060 }
4061
4062 /* Find the next real entry, and increment the iterator to the next entry */
4063 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4064 {
4065         iter->ent = __find_next_entry(iter, &iter->cpu,
4066                                       &iter->lost_events, &iter->ts);
4067
4068         if (iter->ent)
4069                 trace_iterator_increment(iter);
4070
4071         return iter->ent ? iter : NULL;
4072 }
4073
4074 static void trace_consume(struct trace_iterator *iter)
4075 {
4076         ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4077                             &iter->lost_events);
4078 }
4079
4080 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4081 {
4082         struct trace_iterator *iter = m->private;
4083         int i = (int)*pos;
4084         void *ent;
4085
4086         WARN_ON_ONCE(iter->leftover);
4087
4088         (*pos)++;
4089
4090         /* can't go backwards */
4091         if (iter->idx > i)
4092                 return NULL;
4093
4094         if (iter->idx < 0)
4095                 ent = trace_find_next_entry_inc(iter);
4096         else
4097                 ent = iter;
4098
4099         while (ent && iter->idx < i)
4100                 ent = trace_find_next_entry_inc(iter);
4101
4102         iter->pos = *pos;
4103
4104         return ent;
4105 }
4106
4107 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4108 {
4109         struct ring_buffer_iter *buf_iter;
4110         unsigned long entries = 0;
4111         u64 ts;
4112
4113         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4114
4115         buf_iter = trace_buffer_iter(iter, cpu);
4116         if (!buf_iter)
4117                 return;
4118
4119         ring_buffer_iter_reset(buf_iter);
4120
4121         /*
4122          * We could have the case with the max latency tracers
4123          * that a reset never took place on a cpu. This is evident
4124          * by the timestamp being before the start of the buffer.
4125          */
4126         while (ring_buffer_iter_peek(buf_iter, &ts)) {
4127                 if (ts >= iter->array_buffer->time_start)
4128                         break;
4129                 entries++;
4130                 ring_buffer_iter_advance(buf_iter);
4131         }
4132
4133         per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4134 }
4135
4136 /*
4137  * The current tracer is copied to avoid a global locking
4138  * all around.
4139  */
4140 static void *s_start(struct seq_file *m, loff_t *pos)
4141 {
4142         struct trace_iterator *iter = m->private;
4143         struct trace_array *tr = iter->tr;
4144         int cpu_file = iter->cpu_file;
4145         void *p = NULL;
4146         loff_t l = 0;
4147         int cpu;
4148
4149         mutex_lock(&trace_types_lock);
4150         if (unlikely(tr->current_trace != iter->trace)) {
4151                 /* Close iter->trace before switching to the new current tracer */
4152                 if (iter->trace->close)
4153                         iter->trace->close(iter);
4154                 iter->trace = tr->current_trace;
4155                 /* Reopen the new current tracer */
4156                 if (iter->trace->open)
4157                         iter->trace->open(iter);
4158         }
4159         mutex_unlock(&trace_types_lock);
4160
4161 #ifdef CONFIG_TRACER_MAX_TRACE
4162         if (iter->snapshot && iter->trace->use_max_tr)
4163                 return ERR_PTR(-EBUSY);
4164 #endif
4165
4166         if (*pos != iter->pos) {
4167                 iter->ent = NULL;
4168                 iter->cpu = 0;
4169                 iter->idx = -1;
4170
4171                 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4172                         for_each_tracing_cpu(cpu)
4173                                 tracing_iter_reset(iter, cpu);
4174                 } else
4175                         tracing_iter_reset(iter, cpu_file);
4176
4177                 iter->leftover = 0;
4178                 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4179                         ;
4180
4181         } else {
4182                 /*
4183                  * If we overflowed the seq_file before, then we want
4184                  * to just reuse the trace_seq buffer again.
4185                  */
4186                 if (iter->leftover)
4187                         p = iter;
4188                 else {
4189                         l = *pos - 1;
4190                         p = s_next(m, p, &l);
4191                 }
4192         }
4193
4194         trace_event_read_lock();
4195         trace_access_lock(cpu_file);
4196         return p;
4197 }
4198
4199 static void s_stop(struct seq_file *m, void *p)
4200 {
4201         struct trace_iterator *iter = m->private;
4202
4203 #ifdef CONFIG_TRACER_MAX_TRACE
4204         if (iter->snapshot && iter->trace->use_max_tr)
4205                 return;
4206 #endif
4207
4208         trace_access_unlock(iter->cpu_file);
4209         trace_event_read_unlock();
4210 }
4211
4212 static void
4213 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4214                       unsigned long *entries, int cpu)
4215 {
4216         unsigned long count;
4217
4218         count = ring_buffer_entries_cpu(buf->buffer, cpu);
4219         /*
4220          * If this buffer has skipped entries, then we hold all
4221          * entries for the trace and we need to ignore the
4222          * ones before the time stamp.
4223          */
4224         if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4225                 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4226                 /* total is the same as the entries */
4227                 *total = count;
4228         } else
4229                 *total = count +
4230                         ring_buffer_overrun_cpu(buf->buffer, cpu);
4231         *entries = count;
4232 }
4233
4234 static void
4235 get_total_entries(struct array_buffer *buf,
4236                   unsigned long *total, unsigned long *entries)
4237 {
4238         unsigned long t, e;
4239         int cpu;
4240
4241         *total = 0;
4242         *entries = 0;
4243
4244         for_each_tracing_cpu(cpu) {
4245                 get_total_entries_cpu(buf, &t, &e, cpu);
4246                 *total += t;
4247                 *entries += e;
4248         }
4249 }
4250
4251 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4252 {
4253         unsigned long total, entries;
4254
4255         if (!tr)
4256                 tr = &global_trace;
4257
4258         get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4259
4260         return entries;
4261 }
4262
4263 unsigned long trace_total_entries(struct trace_array *tr)
4264 {
4265         unsigned long total, entries;
4266
4267         if (!tr)
4268                 tr = &global_trace;
4269
4270         get_total_entries(&tr->array_buffer, &total, &entries);
4271
4272         return entries;
4273 }
4274
4275 static void print_lat_help_header(struct seq_file *m)
4276 {
4277         seq_puts(m, "#                    _------=> CPU#            \n"
4278                     "#                   / _-----=> irqs-off/BH-disabled\n"
4279                     "#                  | / _----=> need-resched    \n"
4280                     "#                  || / _---=> hardirq/softirq \n"
4281                     "#                  ||| / _--=> preempt-depth   \n"
4282                     "#                  |||| / _-=> migrate-disable \n"
4283                     "#                  ||||| /     delay           \n"
4284                     "#  cmd     pid     |||||| time  |   caller     \n"
4285                     "#     \\   /        ||||||  \\    |    /       \n");
4286 }
4287
4288 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4289 {
4290         unsigned long total;
4291         unsigned long entries;
4292
4293         get_total_entries(buf, &total, &entries);
4294         seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4295                    entries, total, num_online_cpus());
4296         seq_puts(m, "#\n");
4297 }
4298
4299 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4300                                    unsigned int flags)
4301 {
4302         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4303
4304         print_event_info(buf, m);
4305
4306         seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4307         seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4308 }
4309
4310 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4311                                        unsigned int flags)
4312 {
4313         bool tgid = flags & TRACE_ITER_RECORD_TGID;
4314         static const char space[] = "            ";
4315         int prec = tgid ? 12 : 2;
4316
4317         print_event_info(buf, m);
4318
4319         seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4320         seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4321         seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4322         seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4323         seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4324         seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4325         seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4326         seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4327 }
4328
4329 void
4330 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4331 {
4332         unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4333         struct array_buffer *buf = iter->array_buffer;
4334         struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4335         struct tracer *type = iter->trace;
4336         unsigned long entries;
4337         unsigned long total;
4338         const char *name = type->name;
4339
4340         get_total_entries(buf, &total, &entries);
4341
4342         seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4343                    name, UTS_RELEASE);
4344         seq_puts(m, "# -----------------------------------"
4345                  "---------------------------------\n");
4346         seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4347                    " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4348                    nsecs_to_usecs(data->saved_latency),
4349                    entries,
4350                    total,
4351                    buf->cpu,
4352                    preempt_model_none()      ? "server" :
4353                    preempt_model_voluntary() ? "desktop" :
4354                    preempt_model_full()      ? "preempt" :
4355                    preempt_model_rt()        ? "preempt_rt" :
4356                    "unknown",
4357                    /* These are reserved for later use */
4358                    0, 0, 0, 0);
4359 #ifdef CONFIG_SMP
4360         seq_printf(m, " #P:%d)\n", num_online_cpus());
4361 #else
4362         seq_puts(m, ")\n");
4363 #endif
4364         seq_puts(m, "#    -----------------\n");
4365         seq_printf(m, "#    | task: %.16s-%d "
4366                    "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4367                    data->comm, data->pid,
4368                    from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4369                    data->policy, data->rt_priority);
4370         seq_puts(m, "#    -----------------\n");
4371
4372         if (data->critical_start) {
4373                 seq_puts(m, "#  => started at: ");
4374                 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4375                 trace_print_seq(m, &iter->seq);
4376                 seq_puts(m, "\n#  => ended at:   ");
4377                 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4378                 trace_print_seq(m, &iter->seq);
4379                 seq_puts(m, "\n#\n");
4380         }
4381
4382         seq_puts(m, "#\n");
4383 }
4384
4385 static void test_cpu_buff_start(struct trace_iterator *iter)
4386 {
4387         struct trace_seq *s = &iter->seq;
4388         struct trace_array *tr = iter->tr;
4389
4390         if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4391                 return;
4392
4393         if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4394                 return;
4395
4396         if (cpumask_available(iter->started) &&
4397             cpumask_test_cpu(iter->cpu, iter->started))
4398                 return;
4399
4400         if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4401                 return;
4402
4403         if (cpumask_available(iter->started))
4404                 cpumask_set_cpu(iter->cpu, iter->started);
4405
4406         /* Don't print started cpu buffer for the first entry of the trace */
4407         if (iter->idx > 1)
4408                 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4409                                 iter->cpu);
4410 }
4411
4412 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4413 {
4414         struct trace_array *tr = iter->tr;
4415         struct trace_seq *s = &iter->seq;
4416         unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4417         struct trace_entry *entry;
4418         struct trace_event *event;
4419
4420         entry = iter->ent;
4421
4422         test_cpu_buff_start(iter);
4423
4424         event = ftrace_find_event(entry->type);
4425
4426         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4427                 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4428                         trace_print_lat_context(iter);
4429                 else
4430                         trace_print_context(iter);
4431         }
4432
4433         if (trace_seq_has_overflowed(s))
4434                 return TRACE_TYPE_PARTIAL_LINE;
4435
4436         if (event) {
4437                 if (tr->trace_flags & TRACE_ITER_FIELDS)
4438                         return print_event_fields(iter, event);
4439                 return event->funcs->trace(iter, sym_flags, event);
4440         }
4441
4442         trace_seq_printf(s, "Unknown type %d\n", entry->type);
4443
4444         return trace_handle_return(s);
4445 }
4446
4447 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4448 {
4449         struct trace_array *tr = iter->tr;
4450         struct trace_seq *s = &iter->seq;
4451         struct trace_entry *entry;
4452         struct trace_event *event;
4453
4454         entry = iter->ent;
4455
4456         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4457                 trace_seq_printf(s, "%d %d %llu ",
4458                                  entry->pid, iter->cpu, iter->ts);
4459
4460         if (trace_seq_has_overflowed(s))
4461                 return TRACE_TYPE_PARTIAL_LINE;
4462
4463         event = ftrace_find_event(entry->type);
4464         if (event)
4465                 return event->funcs->raw(iter, 0, event);
4466
4467         trace_seq_printf(s, "%d ?\n", entry->type);
4468
4469         return trace_handle_return(s);
4470 }
4471
4472 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4473 {
4474         struct trace_array *tr = iter->tr;
4475         struct trace_seq *s = &iter->seq;
4476         unsigned char newline = '\n';
4477         struct trace_entry *entry;
4478         struct trace_event *event;
4479
4480         entry = iter->ent;
4481
4482         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4483                 SEQ_PUT_HEX_FIELD(s, entry->pid);
4484                 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4485                 SEQ_PUT_HEX_FIELD(s, iter->ts);
4486                 if (trace_seq_has_overflowed(s))
4487                         return TRACE_TYPE_PARTIAL_LINE;
4488         }
4489
4490         event = ftrace_find_event(entry->type);
4491         if (event) {
4492                 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4493                 if (ret != TRACE_TYPE_HANDLED)
4494                         return ret;
4495         }
4496
4497         SEQ_PUT_FIELD(s, newline);
4498
4499         return trace_handle_return(s);
4500 }
4501
4502 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4503 {
4504         struct trace_array *tr = iter->tr;
4505         struct trace_seq *s = &iter->seq;
4506         struct trace_entry *entry;
4507         struct trace_event *event;
4508
4509         entry = iter->ent;
4510
4511         if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4512                 SEQ_PUT_FIELD(s, entry->pid);
4513                 SEQ_PUT_FIELD(s, iter->cpu);
4514                 SEQ_PUT_FIELD(s, iter->ts);
4515                 if (trace_seq_has_overflowed(s))
4516                         return TRACE_TYPE_PARTIAL_LINE;
4517         }
4518
4519         event = ftrace_find_event(entry->type);
4520         return event ? event->funcs->binary(iter, 0, event) :
4521                 TRACE_TYPE_HANDLED;
4522 }
4523
4524 int trace_empty(struct trace_iterator *iter)
4525 {
4526         struct ring_buffer_iter *buf_iter;
4527         int cpu;
4528
4529         /* If we are looking at one CPU buffer, only check that one */
4530         if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4531                 cpu = iter->cpu_file;
4532                 buf_iter = trace_buffer_iter(iter, cpu);
4533                 if (buf_iter) {
4534                         if (!ring_buffer_iter_empty(buf_iter))
4535                                 return 0;
4536                 } else {
4537                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4538                                 return 0;
4539                 }
4540                 return 1;
4541         }
4542
4543         for_each_tracing_cpu(cpu) {
4544                 buf_iter = trace_buffer_iter(iter, cpu);
4545                 if (buf_iter) {
4546                         if (!ring_buffer_iter_empty(buf_iter))
4547                                 return 0;
4548                 } else {
4549                         if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4550                                 return 0;
4551                 }
4552         }
4553
4554         return 1;
4555 }
4556
4557 /*  Called with trace_event_read_lock() held. */
4558 enum print_line_t print_trace_line(struct trace_iterator *iter)
4559 {
4560         struct trace_array *tr = iter->tr;
4561         unsigned long trace_flags = tr->trace_flags;
4562         enum print_line_t ret;
4563
4564         if (iter->lost_events) {
4565                 if (iter->lost_events == (unsigned long)-1)
4566                         trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4567                                          iter->cpu);
4568                 else
4569                         trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4570                                          iter->cpu, iter->lost_events);
4571                 if (trace_seq_has_overflowed(&iter->seq))
4572                         return TRACE_TYPE_PARTIAL_LINE;
4573         }
4574
4575         if (iter->trace && iter->trace->print_line) {
4576                 ret = iter->trace->print_line(iter);
4577                 if (ret != TRACE_TYPE_UNHANDLED)
4578                         return ret;
4579         }
4580
4581         if (iter->ent->type == TRACE_BPUTS &&
4582                         trace_flags & TRACE_ITER_PRINTK &&
4583                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4584                 return trace_print_bputs_msg_only(iter);
4585
4586         if (iter->ent->type == TRACE_BPRINT &&
4587                         trace_flags & TRACE_ITER_PRINTK &&
4588                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4589                 return trace_print_bprintk_msg_only(iter);
4590
4591         if (iter->ent->type == TRACE_PRINT &&
4592                         trace_flags & TRACE_ITER_PRINTK &&
4593                         trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4594                 return trace_print_printk_msg_only(iter);
4595
4596         if (trace_flags & TRACE_ITER_BIN)
4597                 return print_bin_fmt(iter);
4598
4599         if (trace_flags & TRACE_ITER_HEX)
4600                 return print_hex_fmt(iter);
4601
4602         if (trace_flags & TRACE_ITER_RAW)
4603                 return print_raw_fmt(iter);
4604
4605         return print_trace_fmt(iter);
4606 }
4607
4608 void trace_latency_header(struct seq_file *m)
4609 {
4610         struct trace_iterator *iter = m->private;
4611         struct trace_array *tr = iter->tr;
4612
4613         /* print nothing if the buffers are empty */
4614         if (trace_empty(iter))
4615                 return;
4616
4617         if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4618                 print_trace_header(m, iter);
4619
4620         if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4621                 print_lat_help_header(m);
4622 }
4623
4624 void trace_default_header(struct seq_file *m)
4625 {
4626         struct trace_iterator *iter = m->private;
4627         struct trace_array *tr = iter->tr;
4628         unsigned long trace_flags = tr->trace_flags;
4629
4630         if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4631                 return;
4632
4633         if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4634                 /* print nothing if the buffers are empty */
4635                 if (trace_empty(iter))
4636                         return;
4637                 print_trace_header(m, iter);
4638                 if (!(trace_flags & TRACE_ITER_VERBOSE))
4639                         print_lat_help_header(m);
4640         } else {
4641                 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4642                         if (trace_flags & TRACE_ITER_IRQ_INFO)
4643                                 print_func_help_header_irq(iter->array_buffer,
4644                                                            m, trace_flags);
4645                         else
4646                                 print_func_help_header(iter->array_buffer, m,
4647                                                        trace_flags);
4648                 }
4649         }
4650 }
4651
4652 static void test_ftrace_alive(struct seq_file *m)
4653 {
4654         if (!ftrace_is_dead())
4655                 return;
4656         seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4657                     "#          MAY BE MISSING FUNCTION EVENTS\n");
4658 }
4659
4660 #ifdef CONFIG_TRACER_MAX_TRACE
4661 static void show_snapshot_main_help(struct seq_file *m)
4662 {
4663         seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4664                     "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4665                     "#                      Takes a snapshot of the main buffer.\n"
4666                     "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4667                     "#                      (Doesn't have to be '2' works with any number that\n"
4668                     "#                       is not a '0' or '1')\n");
4669 }
4670
4671 static void show_snapshot_percpu_help(struct seq_file *m)
4672 {
4673         seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4674 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4675         seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4676                     "#                      Takes a snapshot of the main buffer for this cpu.\n");
4677 #else
4678         seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4679                     "#                     Must use main snapshot file to allocate.\n");
4680 #endif
4681         seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4682                     "#                      (Doesn't have to be '2' works with any number that\n"
4683                     "#                       is not a '0' or '1')\n");
4684 }
4685
4686 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4687 {
4688         if (iter->tr->allocated_snapshot)
4689                 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4690         else
4691                 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4692
4693         seq_puts(m, "# Snapshot commands:\n");
4694         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4695                 show_snapshot_main_help(m);
4696         else
4697                 show_snapshot_percpu_help(m);
4698 }
4699 #else
4700 /* Should never be called */
4701 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4702 #endif
4703
4704 static int s_show(struct seq_file *m, void *v)
4705 {
4706         struct trace_iterator *iter = v;
4707         int ret;
4708
4709         if (iter->ent == NULL) {
4710                 if (iter->tr) {
4711                         seq_printf(m, "# tracer: %s\n", iter->trace->name);
4712                         seq_puts(m, "#\n");
4713                         test_ftrace_alive(m);
4714                 }
4715                 if (iter->snapshot && trace_empty(iter))
4716                         print_snapshot_help(m, iter);
4717                 else if (iter->trace && iter->trace->print_header)
4718                         iter->trace->print_header(m);
4719                 else
4720                         trace_default_header(m);
4721
4722         } else if (iter->leftover) {
4723                 /*
4724                  * If we filled the seq_file buffer earlier, we
4725                  * want to just show it now.
4726                  */
4727                 ret = trace_print_seq(m, &iter->seq);
4728
4729                 /* ret should this time be zero, but you never know */
4730                 iter->leftover = ret;
4731
4732         } else {
4733                 ret = print_trace_line(iter);
4734                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
4735                         iter->seq.full = 0;
4736                         trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4737                 }
4738                 ret = trace_print_seq(m, &iter->seq);
4739                 /*
4740                  * If we overflow the seq_file buffer, then it will
4741                  * ask us for this data again at start up.
4742                  * Use that instead.
4743                  *  ret is 0 if seq_file write succeeded.
4744                  *        -1 otherwise.
4745                  */
4746                 iter->leftover = ret;
4747         }
4748
4749         return 0;
4750 }
4751
4752 /*
4753  * Should be used after trace_array_get(), trace_types_lock
4754  * ensures that i_cdev was already initialized.
4755  */
4756 static inline int tracing_get_cpu(struct inode *inode)
4757 {
4758         if (inode->i_cdev) /* See trace_create_cpu_file() */
4759                 return (long)inode->i_cdev - 1;
4760         return RING_BUFFER_ALL_CPUS;
4761 }
4762
4763 static const struct seq_operations tracer_seq_ops = {
4764         .start          = s_start,
4765         .next           = s_next,
4766         .stop           = s_stop,
4767         .show           = s_show,
4768 };
4769
4770 /*
4771  * Note, as iter itself can be allocated and freed in different
4772  * ways, this function is only used to free its content, and not
4773  * the iterator itself. The only requirement to all the allocations
4774  * is that it must zero all fields (kzalloc), as freeing works with
4775  * ethier allocated content or NULL.
4776  */
4777 static void free_trace_iter_content(struct trace_iterator *iter)
4778 {
4779         /* The fmt is either NULL, allocated or points to static_fmt_buf */
4780         if (iter->fmt != static_fmt_buf)
4781                 kfree(iter->fmt);
4782
4783         kfree(iter->temp);
4784         kfree(iter->buffer_iter);
4785         mutex_destroy(&iter->mutex);
4786         free_cpumask_var(iter->started);
4787 }
4788
4789 static struct trace_iterator *
4790 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4791 {
4792         struct trace_array *tr = inode->i_private;
4793         struct trace_iterator *iter;
4794         int cpu;
4795
4796         if (tracing_disabled)
4797                 return ERR_PTR(-ENODEV);
4798
4799         iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4800         if (!iter)
4801                 return ERR_PTR(-ENOMEM);
4802
4803         iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4804                                     GFP_KERNEL);
4805         if (!iter->buffer_iter)
4806                 goto release;
4807
4808         /*
4809          * trace_find_next_entry() may need to save off iter->ent.
4810          * It will place it into the iter->temp buffer. As most
4811          * events are less than 128, allocate a buffer of that size.
4812          * If one is greater, then trace_find_next_entry() will
4813          * allocate a new buffer to adjust for the bigger iter->ent.
4814          * It's not critical if it fails to get allocated here.
4815          */
4816         iter->temp = kmalloc(128, GFP_KERNEL);
4817         if (iter->temp)
4818                 iter->temp_size = 128;
4819
4820         /*
4821          * trace_event_printf() may need to modify given format
4822          * string to replace %p with %px so that it shows real address
4823          * instead of hash value. However, that is only for the event
4824          * tracing, other tracer may not need. Defer the allocation
4825          * until it is needed.
4826          */
4827         iter->fmt = NULL;
4828         iter->fmt_size = 0;
4829
4830         mutex_lock(&trace_types_lock);
4831         iter->trace = tr->current_trace;
4832
4833         if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4834                 goto fail;
4835
4836         iter->tr = tr;
4837
4838 #ifdef CONFIG_TRACER_MAX_TRACE
4839         /* Currently only the top directory has a snapshot */
4840         if (tr->current_trace->print_max || snapshot)
4841                 iter->array_buffer = &tr->max_buffer;
4842         else
4843 #endif
4844                 iter->array_buffer = &tr->array_buffer;
4845         iter->snapshot = snapshot;
4846         iter->pos = -1;
4847         iter->cpu_file = tracing_get_cpu(inode);
4848         mutex_init(&iter->mutex);
4849
4850         /* Notify the tracer early; before we stop tracing. */
4851         if (iter->trace->open)
4852                 iter->trace->open(iter);
4853
4854         /* Annotate start of buffers if we had overruns */
4855         if (ring_buffer_overruns(iter->array_buffer->buffer))
4856                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4857
4858         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4859         if (trace_clocks[tr->clock_id].in_ns)
4860                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4861
4862         /*
4863          * If pause-on-trace is enabled, then stop the trace while
4864          * dumping, unless this is the "snapshot" file
4865          */
4866         if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4867                 tracing_stop_tr(tr);
4868
4869         if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4870                 for_each_tracing_cpu(cpu) {
4871                         iter->buffer_iter[cpu] =
4872                                 ring_buffer_read_prepare(iter->array_buffer->buffer,
4873                                                          cpu, GFP_KERNEL);
4874                 }
4875                 ring_buffer_read_prepare_sync();
4876                 for_each_tracing_cpu(cpu) {
4877                         ring_buffer_read_start(iter->buffer_iter[cpu]);
4878                         tracing_iter_reset(iter, cpu);
4879                 }
4880         } else {
4881                 cpu = iter->cpu_file;
4882                 iter->buffer_iter[cpu] =
4883                         ring_buffer_read_prepare(iter->array_buffer->buffer,
4884                                                  cpu, GFP_KERNEL);
4885                 ring_buffer_read_prepare_sync();
4886                 ring_buffer_read_start(iter->buffer_iter[cpu]);
4887                 tracing_iter_reset(iter, cpu);
4888         }
4889
4890         mutex_unlock(&trace_types_lock);
4891
4892         return iter;
4893
4894  fail:
4895         mutex_unlock(&trace_types_lock);
4896         free_trace_iter_content(iter);
4897 release:
4898         seq_release_private(inode, file);
4899         return ERR_PTR(-ENOMEM);
4900 }
4901
4902 int tracing_open_generic(struct inode *inode, struct file *filp)
4903 {
4904         int ret;
4905
4906         ret = tracing_check_open_get_tr(NULL);
4907         if (ret)
4908                 return ret;
4909
4910         filp->private_data = inode->i_private;
4911         return 0;
4912 }
4913
4914 bool tracing_is_disabled(void)
4915 {
4916         return (tracing_disabled) ? true: false;
4917 }
4918
4919 /*
4920  * Open and update trace_array ref count.
4921  * Must have the current trace_array passed to it.
4922  */
4923 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4924 {
4925         struct trace_array *tr = inode->i_private;
4926         int ret;
4927
4928         ret = tracing_check_open_get_tr(tr);
4929         if (ret)
4930                 return ret;
4931
4932         filp->private_data = inode->i_private;
4933
4934         return 0;
4935 }
4936
4937 /*
4938  * The private pointer of the inode is the trace_event_file.
4939  * Update the tr ref count associated to it.
4940  */
4941 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4942 {
4943         struct trace_event_file *file = inode->i_private;
4944         int ret;
4945
4946         ret = tracing_check_open_get_tr(file->tr);
4947         if (ret)
4948                 return ret;
4949
4950         mutex_lock(&event_mutex);
4951
4952         /* Fail if the file is marked for removal */
4953         if (file->flags & EVENT_FILE_FL_FREED) {
4954                 trace_array_put(file->tr);
4955                 ret = -ENODEV;
4956         } else {
4957                 event_file_get(file);
4958         }
4959
4960         mutex_unlock(&event_mutex);
4961         if (ret)
4962                 return ret;
4963
4964         filp->private_data = inode->i_private;
4965
4966         return 0;
4967 }
4968
4969 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4970 {
4971         struct trace_event_file *file = inode->i_private;
4972
4973         trace_array_put(file->tr);
4974         event_file_put(file);
4975
4976         return 0;
4977 }
4978
4979 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4980 {
4981         tracing_release_file_tr(inode, filp);
4982         return single_release(inode, filp);
4983 }
4984
4985 static int tracing_mark_open(struct inode *inode, struct file *filp)
4986 {
4987         stream_open(inode, filp);
4988         return tracing_open_generic_tr(inode, filp);
4989 }
4990
4991 static int tracing_release(struct inode *inode, struct file *file)
4992 {
4993         struct trace_array *tr = inode->i_private;
4994         struct seq_file *m = file->private_data;
4995         struct trace_iterator *iter;
4996         int cpu;
4997
4998         if (!(file->f_mode & FMODE_READ)) {
4999                 trace_array_put(tr);
5000                 return 0;
5001         }
5002
5003         /* Writes do not use seq_file */
5004         iter = m->private;
5005         mutex_lock(&trace_types_lock);
5006
5007         for_each_tracing_cpu(cpu) {
5008                 if (iter->buffer_iter[cpu])
5009                         ring_buffer_read_finish(iter->buffer_iter[cpu]);
5010         }
5011
5012         if (iter->trace && iter->trace->close)
5013                 iter->trace->close(iter);
5014
5015         if (!iter->snapshot && tr->stop_count)
5016                 /* reenable tracing if it was previously enabled */
5017                 tracing_start_tr(tr);
5018
5019         __trace_array_put(tr);
5020
5021         mutex_unlock(&trace_types_lock);
5022
5023         free_trace_iter_content(iter);
5024         seq_release_private(inode, file);
5025
5026         return 0;
5027 }
5028
5029 int tracing_release_generic_tr(struct inode *inode, struct file *file)
5030 {
5031         struct trace_array *tr = inode->i_private;
5032
5033         trace_array_put(tr);
5034         return 0;
5035 }
5036
5037 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5038 {
5039         struct trace_array *tr = inode->i_private;
5040
5041         trace_array_put(tr);
5042
5043         return single_release(inode, file);
5044 }
5045
5046 static int tracing_open(struct inode *inode, struct file *file)
5047 {
5048         struct trace_array *tr = inode->i_private;
5049         struct trace_iterator *iter;
5050         int ret;
5051
5052         ret = tracing_check_open_get_tr(tr);
5053         if (ret)
5054                 return ret;
5055
5056         /* If this file was open for write, then erase contents */
5057         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5058                 int cpu = tracing_get_cpu(inode);
5059                 struct array_buffer *trace_buf = &tr->array_buffer;
5060
5061 #ifdef CONFIG_TRACER_MAX_TRACE
5062                 if (tr->current_trace->print_max)
5063                         trace_buf = &tr->max_buffer;
5064 #endif
5065
5066                 if (cpu == RING_BUFFER_ALL_CPUS)
5067                         tracing_reset_online_cpus(trace_buf);
5068                 else
5069                         tracing_reset_cpu(trace_buf, cpu);
5070         }
5071
5072         if (file->f_mode & FMODE_READ) {
5073                 iter = __tracing_open(inode, file, false);
5074                 if (IS_ERR(iter))
5075                         ret = PTR_ERR(iter);
5076                 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5077                         iter->iter_flags |= TRACE_FILE_LAT_FMT;
5078         }
5079
5080         if (ret < 0)
5081                 trace_array_put(tr);
5082
5083         return ret;
5084 }
5085
5086 /*
5087  * Some tracers are not suitable for instance buffers.
5088  * A tracer is always available for the global array (toplevel)
5089  * or if it explicitly states that it is.
5090  */
5091 static bool
5092 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5093 {
5094         return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5095 }
5096
5097 /* Find the next tracer that this trace array may use */
5098 static struct tracer *
5099 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5100 {
5101         while (t && !trace_ok_for_array(t, tr))
5102                 t = t->next;
5103
5104         return t;
5105 }
5106
5107 static void *
5108 t_next(struct seq_file *m, void *v, loff_t *pos)
5109 {
5110         struct trace_array *tr = m->private;
5111         struct tracer *t = v;
5112
5113         (*pos)++;
5114
5115         if (t)
5116                 t = get_tracer_for_array(tr, t->next);
5117
5118         return t;
5119 }
5120
5121 static void *t_start(struct seq_file *m, loff_t *pos)
5122 {
5123         struct trace_array *tr = m->private;
5124         struct tracer *t;
5125         loff_t l = 0;
5126
5127         mutex_lock(&trace_types_lock);
5128
5129         t = get_tracer_for_array(tr, trace_types);
5130         for (; t && l < *pos; t = t_next(m, t, &l))
5131                         ;
5132
5133         return t;
5134 }
5135
5136 static void t_stop(struct seq_file *m, void *p)
5137 {
5138         mutex_unlock(&trace_types_lock);
5139 }
5140
5141 static int t_show(struct seq_file *m, void *v)
5142 {
5143         struct tracer *t = v;
5144
5145         if (!t)
5146                 return 0;
5147
5148         seq_puts(m, t->name);
5149         if (t->next)
5150                 seq_putc(m, ' ');
5151         else
5152                 seq_putc(m, '\n');
5153
5154         return 0;
5155 }
5156
5157 static const struct seq_operations show_traces_seq_ops = {
5158         .start          = t_start,
5159         .next           = t_next,
5160         .stop           = t_stop,
5161         .show           = t_show,
5162 };
5163
5164 static int show_traces_open(struct inode *inode, struct file *file)
5165 {
5166         struct trace_array *tr = inode->i_private;
5167         struct seq_file *m;
5168         int ret;
5169
5170         ret = tracing_check_open_get_tr(tr);
5171         if (ret)
5172                 return ret;
5173
5174         ret = seq_open(file, &show_traces_seq_ops);
5175         if (ret) {
5176                 trace_array_put(tr);
5177                 return ret;
5178         }
5179
5180         m = file->private_data;
5181         m->private = tr;
5182
5183         return 0;
5184 }
5185
5186 static int show_traces_release(struct inode *inode, struct file *file)
5187 {
5188         struct trace_array *tr = inode->i_private;
5189
5190         trace_array_put(tr);
5191         return seq_release(inode, file);
5192 }
5193
5194 static ssize_t
5195 tracing_write_stub(struct file *filp, const char __user *ubuf,
5196                    size_t count, loff_t *ppos)
5197 {
5198         return count;
5199 }
5200
5201 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5202 {
5203         int ret;
5204
5205         if (file->f_mode & FMODE_READ)
5206                 ret = seq_lseek(file, offset, whence);
5207         else
5208                 file->f_pos = ret = 0;
5209
5210         return ret;
5211 }
5212
5213 static const struct file_operations tracing_fops = {
5214         .open           = tracing_open,
5215         .read           = seq_read,
5216         .read_iter      = seq_read_iter,
5217         .splice_read    = copy_splice_read,
5218         .write          = tracing_write_stub,
5219         .llseek         = tracing_lseek,
5220         .release        = tracing_release,
5221 };
5222
5223 static const struct file_operations show_traces_fops = {
5224         .open           = show_traces_open,
5225         .read           = seq_read,
5226         .llseek         = seq_lseek,
5227         .release        = show_traces_release,
5228 };
5229
5230 static ssize_t
5231 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5232                      size_t count, loff_t *ppos)
5233 {
5234         struct trace_array *tr = file_inode(filp)->i_private;
5235         char *mask_str;
5236         int len;
5237
5238         len = snprintf(NULL, 0, "%*pb\n",
5239                        cpumask_pr_args(tr->tracing_cpumask)) + 1;
5240         mask_str = kmalloc(len, GFP_KERNEL);
5241         if (!mask_str)
5242                 return -ENOMEM;
5243
5244         len = snprintf(mask_str, len, "%*pb\n",
5245                        cpumask_pr_args(tr->tracing_cpumask));
5246         if (len >= count) {
5247                 count = -EINVAL;
5248                 goto out_err;
5249         }
5250         count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5251
5252 out_err:
5253         kfree(mask_str);
5254
5255         return count;
5256 }
5257
5258 int tracing_set_cpumask(struct trace_array *tr,
5259                         cpumask_var_t tracing_cpumask_new)
5260 {
5261         int cpu;
5262
5263         if (!tr)
5264                 return -EINVAL;
5265
5266         local_irq_disable();
5267         arch_spin_lock(&tr->max_lock);
5268         for_each_tracing_cpu(cpu) {
5269                 /*
5270                  * Increase/decrease the disabled counter if we are
5271                  * about to flip a bit in the cpumask:
5272                  */
5273                 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5274                                 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5275                         atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5276                         ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5277 #ifdef CONFIG_TRACER_MAX_TRACE
5278                         ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5279 #endif
5280                 }
5281                 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5282                                 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5283                         atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5284                         ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5285 #ifdef CONFIG_TRACER_MAX_TRACE
5286                         ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5287 #endif
5288                 }
5289         }
5290         arch_spin_unlock(&tr->max_lock);
5291         local_irq_enable();
5292
5293         cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5294
5295         return 0;
5296 }
5297
5298 static ssize_t
5299 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5300                       size_t count, loff_t *ppos)
5301 {
5302         struct trace_array *tr = file_inode(filp)->i_private;
5303         cpumask_var_t tracing_cpumask_new;
5304         int err;
5305
5306         if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5307                 return -ENOMEM;
5308
5309         err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5310         if (err)
5311                 goto err_free;
5312
5313         err = tracing_set_cpumask(tr, tracing_cpumask_new);
5314         if (err)
5315                 goto err_free;
5316
5317         free_cpumask_var(tracing_cpumask_new);
5318
5319         return count;
5320
5321 err_free:
5322         free_cpumask_var(tracing_cpumask_new);
5323
5324         return err;
5325 }
5326
5327 static const struct file_operations tracing_cpumask_fops = {
5328         .open           = tracing_open_generic_tr,
5329         .read           = tracing_cpumask_read,
5330         .write          = tracing_cpumask_write,
5331         .release        = tracing_release_generic_tr,
5332         .llseek         = generic_file_llseek,
5333 };
5334
5335 static int tracing_trace_options_show(struct seq_file *m, void *v)
5336 {
5337         struct tracer_opt *trace_opts;
5338         struct trace_array *tr = m->private;
5339         u32 tracer_flags;
5340         int i;
5341
5342         mutex_lock(&trace_types_lock);
5343         tracer_flags = tr->current_trace->flags->val;
5344         trace_opts = tr->current_trace->flags->opts;
5345
5346         for (i = 0; trace_options[i]; i++) {
5347                 if (tr->trace_flags & (1 << i))
5348                         seq_printf(m, "%s\n", trace_options[i]);
5349                 else
5350                         seq_printf(m, "no%s\n", trace_options[i]);
5351         }
5352
5353         for (i = 0; trace_opts[i].name; i++) {
5354                 if (tracer_flags & trace_opts[i].bit)
5355                         seq_printf(m, "%s\n", trace_opts[i].name);
5356                 else
5357                         seq_printf(m, "no%s\n", trace_opts[i].name);
5358         }
5359         mutex_unlock(&trace_types_lock);
5360
5361         return 0;
5362 }
5363
5364 static int __set_tracer_option(struct trace_array *tr,
5365                                struct tracer_flags *tracer_flags,
5366                                struct tracer_opt *opts, int neg)
5367 {
5368         struct tracer *trace = tracer_flags->trace;
5369         int ret;
5370
5371         ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5372         if (ret)
5373                 return ret;
5374
5375         if (neg)
5376                 tracer_flags->val &= ~opts->bit;
5377         else
5378                 tracer_flags->val |= opts->bit;
5379         return 0;
5380 }
5381
5382 /* Try to assign a tracer specific option */
5383 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5384 {
5385         struct tracer *trace = tr->current_trace;
5386         struct tracer_flags *tracer_flags = trace->flags;
5387         struct tracer_opt *opts = NULL;
5388         int i;
5389
5390         for (i = 0; tracer_flags->opts[i].name; i++) {
5391                 opts = &tracer_flags->opts[i];
5392
5393                 if (strcmp(cmp, opts->name) == 0)
5394                         return __set_tracer_option(tr, trace->flags, opts, neg);
5395         }
5396
5397         return -EINVAL;
5398 }
5399
5400 /* Some tracers require overwrite to stay enabled */
5401 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5402 {
5403         if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5404                 return -1;
5405
5406         return 0;
5407 }
5408
5409 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5410 {
5411         int *map;
5412
5413         if ((mask == TRACE_ITER_RECORD_TGID) ||
5414             (mask == TRACE_ITER_RECORD_CMD))
5415                 lockdep_assert_held(&event_mutex);
5416
5417         /* do nothing if flag is already set */
5418         if (!!(tr->trace_flags & mask) == !!enabled)
5419                 return 0;
5420
5421         /* Give the tracer a chance to approve the change */
5422         if (tr->current_trace->flag_changed)
5423                 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5424                         return -EINVAL;
5425
5426         if (enabled)
5427                 tr->trace_flags |= mask;
5428         else
5429                 tr->trace_flags &= ~mask;
5430
5431         if (mask == TRACE_ITER_RECORD_CMD)
5432                 trace_event_enable_cmd_record(enabled);
5433
5434         if (mask == TRACE_ITER_RECORD_TGID) {
5435                 if (!tgid_map) {
5436                         tgid_map_max = pid_max;
5437                         map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5438                                        GFP_KERNEL);
5439
5440                         /*
5441                          * Pairs with smp_load_acquire() in
5442                          * trace_find_tgid_ptr() to ensure that if it observes
5443                          * the tgid_map we just allocated then it also observes
5444                          * the corresponding tgid_map_max value.
5445                          */
5446                         smp_store_release(&tgid_map, map);
5447                 }
5448                 if (!tgid_map) {
5449                         tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5450                         return -ENOMEM;
5451                 }
5452
5453                 trace_event_enable_tgid_record(enabled);
5454         }
5455
5456         if (mask == TRACE_ITER_EVENT_FORK)
5457                 trace_event_follow_fork(tr, enabled);
5458
5459         if (mask == TRACE_ITER_FUNC_FORK)
5460                 ftrace_pid_follow_fork(tr, enabled);
5461
5462         if (mask == TRACE_ITER_OVERWRITE) {
5463                 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5464 #ifdef CONFIG_TRACER_MAX_TRACE
5465                 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5466 #endif
5467         }
5468
5469         if (mask == TRACE_ITER_PRINTK) {
5470                 trace_printk_start_stop_comm(enabled);
5471                 trace_printk_control(enabled);
5472         }
5473
5474         return 0;
5475 }
5476
5477 int trace_set_options(struct trace_array *tr, char *option)
5478 {
5479         char *cmp;
5480         int neg = 0;
5481         int ret;
5482         size_t orig_len = strlen(option);
5483         int len;
5484
5485         cmp = strstrip(option);
5486
5487         len = str_has_prefix(cmp, "no");
5488         if (len)
5489                 neg = 1;
5490
5491         cmp += len;
5492
5493         mutex_lock(&event_mutex);
5494         mutex_lock(&trace_types_lock);
5495
5496         ret = match_string(trace_options, -1, cmp);
5497         /* If no option could be set, test the specific tracer options */
5498         if (ret < 0)
5499                 ret = set_tracer_option(tr, cmp, neg);
5500         else
5501                 ret = set_tracer_flag(tr, 1 << ret, !neg);
5502
5503         mutex_unlock(&trace_types_lock);
5504         mutex_unlock(&event_mutex);
5505
5506         /*
5507          * If the first trailing whitespace is replaced with '\0' by strstrip,
5508          * turn it back into a space.
5509          */
5510         if (orig_len > strlen(option))
5511                 option[strlen(option)] = ' ';
5512
5513         return ret;
5514 }
5515
5516 static void __init apply_trace_boot_options(void)
5517 {
5518         char *buf = trace_boot_options_buf;
5519         char *option;
5520
5521         while (true) {
5522                 option = strsep(&buf, ",");
5523
5524                 if (!option)
5525                         break;
5526
5527                 if (*option)
5528                         trace_set_options(&global_trace, option);
5529
5530                 /* Put back the comma to allow this to be called again */
5531                 if (buf)
5532                         *(buf - 1) = ',';
5533         }
5534 }
5535
5536 static ssize_t
5537 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5538                         size_t cnt, loff_t *ppos)
5539 {
5540         struct seq_file *m = filp->private_data;
5541         struct trace_array *tr = m->private;
5542         char buf[64];
5543         int ret;
5544
5545         if (cnt >= sizeof(buf))
5546                 return -EINVAL;
5547
5548         if (copy_from_user(buf, ubuf, cnt))
5549                 return -EFAULT;
5550
5551         buf[cnt] = 0;
5552
5553         ret = trace_set_options(tr, buf);
5554         if (ret < 0)
5555                 return ret;
5556
5557         *ppos += cnt;
5558
5559         return cnt;
5560 }
5561
5562 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5563 {
5564         struct trace_array *tr = inode->i_private;
5565         int ret;
5566
5567         ret = tracing_check_open_get_tr(tr);
5568         if (ret)
5569                 return ret;
5570
5571         ret = single_open(file, tracing_trace_options_show, inode->i_private);
5572         if (ret < 0)
5573                 trace_array_put(tr);
5574
5575         return ret;
5576 }
5577
5578 static const struct file_operations tracing_iter_fops = {
5579         .open           = tracing_trace_options_open,
5580         .read           = seq_read,
5581         .llseek         = seq_lseek,
5582         .release        = tracing_single_release_tr,
5583         .write          = tracing_trace_options_write,
5584 };
5585
5586 static const char readme_msg[] =
5587         "tracing mini-HOWTO:\n\n"
5588         "# echo 0 > tracing_on : quick way to disable tracing\n"
5589         "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5590         " Important files:\n"
5591         "  trace\t\t\t- The static contents of the buffer\n"
5592         "\t\t\t  To clear the buffer write into this file: echo > trace\n"
5593         "  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5594         "  current_tracer\t- function and latency tracers\n"
5595         "  available_tracers\t- list of configured tracers for current_tracer\n"
5596         "  error_log\t- error log for failed commands (that support it)\n"
5597         "  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5598         "  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5599         "  trace_clock\t\t- change the clock used to order events\n"
5600         "       local:   Per cpu clock but may not be synced across CPUs\n"
5601         "      global:   Synced across CPUs but slows tracing down.\n"
5602         "     counter:   Not a clock, but just an increment\n"
5603         "      uptime:   Jiffy counter from time of boot\n"
5604         "        perf:   Same clock that perf events use\n"
5605 #ifdef CONFIG_X86_64
5606         "     x86-tsc:   TSC cycle counter\n"
5607 #endif
5608         "\n  timestamp_mode\t- view the mode used to timestamp events\n"
5609         "       delta:   Delta difference against a buffer-wide timestamp\n"
5610         "    absolute:   Absolute (standalone) timestamp\n"
5611         "\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5612         "\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5613         "  tracing_cpumask\t- Limit which CPUs to trace\n"
5614         "  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5615         "\t\t\t  Remove sub-buffer with rmdir\n"
5616         "  trace_options\t\t- Set format or modify how tracing happens\n"
5617         "\t\t\t  Disable an option by prefixing 'no' to the\n"
5618         "\t\t\t  option name\n"
5619         "  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5620 #ifdef CONFIG_DYNAMIC_FTRACE
5621         "\n  available_filter_functions - list of functions that can be filtered on\n"
5622         "  set_ftrace_filter\t- echo function name in here to only trace these\n"
5623         "\t\t\t  functions\n"
5624         "\t     accepts: func_full_name or glob-matching-pattern\n"
5625         "\t     modules: Can select a group via module\n"
5626         "\t      Format: :mod:<module-name>\n"
5627         "\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5628         "\t    triggers: a command to perform when function is hit\n"
5629         "\t      Format: <function>:<trigger>[:count]\n"
5630         "\t     trigger: traceon, traceoff\n"
5631         "\t\t      enable_event:<system>:<event>\n"
5632         "\t\t      disable_event:<system>:<event>\n"
5633 #ifdef CONFIG_STACKTRACE
5634         "\t\t      stacktrace\n"
5635 #endif
5636 #ifdef CONFIG_TRACER_SNAPSHOT
5637         "\t\t      snapshot\n"
5638 #endif
5639         "\t\t      dump\n"
5640         "\t\t      cpudump\n"
5641         "\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5642         "\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5643         "\t     The first one will disable tracing every time do_fault is hit\n"
5644         "\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5645         "\t       The first time do trap is hit and it disables tracing, the\n"
5646         "\t       counter will decrement to 2. If tracing is already disabled,\n"
5647         "\t       the counter will not decrement. It only decrements when the\n"
5648         "\t       trigger did work\n"
5649         "\t     To remove trigger without count:\n"
5650         "\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5651         "\t     To remove trigger with a count:\n"
5652         "\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5653         "  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5654         "\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5655         "\t    modules: Can select a group via module command :mod:\n"
5656         "\t    Does not accept triggers\n"
5657 #endif /* CONFIG_DYNAMIC_FTRACE */
5658 #ifdef CONFIG_FUNCTION_TRACER
5659         "  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5660         "\t\t    (function)\n"
5661         "  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5662         "\t\t    (function)\n"
5663 #endif
5664 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5665         "  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5666         "  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5667         "  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5668 #endif
5669 #ifdef CONFIG_TRACER_SNAPSHOT
5670         "\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5671         "\t\t\t  snapshot buffer. Read the contents for more\n"
5672         "\t\t\t  information\n"
5673 #endif
5674 #ifdef CONFIG_STACK_TRACER
5675         "  stack_trace\t\t- Shows the max stack trace when active\n"
5676         "  stack_max_size\t- Shows current max stack size that was traced\n"
5677         "\t\t\t  Write into this file to reset the max size (trigger a\n"
5678         "\t\t\t  new trace)\n"
5679 #ifdef CONFIG_DYNAMIC_FTRACE
5680         "  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5681         "\t\t\t  traces\n"
5682 #endif
5683 #endif /* CONFIG_STACK_TRACER */
5684 #ifdef CONFIG_DYNAMIC_EVENTS
5685         "  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5686         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5687 #endif
5688 #ifdef CONFIG_KPROBE_EVENTS
5689         "  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5690         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5691 #endif
5692 #ifdef CONFIG_UPROBE_EVENTS
5693         "  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5694         "\t\t\t  Write into this file to define/undefine new trace events.\n"
5695 #endif
5696 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5697     defined(CONFIG_FPROBE_EVENTS)
5698         "\t  accepts: event-definitions (one definition per line)\n"
5699 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5700         "\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5701         "\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5702 #endif
5703 #ifdef CONFIG_FPROBE_EVENTS
5704         "\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5705         "\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5706 #endif
5707 #ifdef CONFIG_HIST_TRIGGERS
5708         "\t           s:[synthetic/]<event> <field> [<field>]\n"
5709 #endif
5710         "\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5711         "\t           -:[<group>/][<event>]\n"
5712 #ifdef CONFIG_KPROBE_EVENTS
5713         "\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5714   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5715 #endif
5716 #ifdef CONFIG_UPROBE_EVENTS
5717   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5718 #endif
5719         "\t     args: <name>=fetcharg[:type]\n"
5720         "\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5721 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5722 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5723         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5724         "\t           <argname>[->field[->field|.field...]],\n"
5725 #else
5726         "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5727 #endif
5728 #else
5729         "\t           $stack<index>, $stack, $retval, $comm,\n"
5730 #endif
5731         "\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5732         "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5733         "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5734         "\t           symstr, <type>\\[<array-size>\\]\n"
5735 #ifdef CONFIG_HIST_TRIGGERS
5736         "\t    field: <stype> <name>;\n"
5737         "\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5738         "\t           [unsigned] char/int/long\n"
5739 #endif
5740         "\t    efield: For event probes ('e' types), the field is on of the fields\n"
5741         "\t            of the <attached-group>/<attached-event>.\n"
5742 #endif
5743         "  events/\t\t- Directory containing all trace event subsystems:\n"
5744         "      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5745         "  events/<system>/\t- Directory containing all trace events for <system>:\n"
5746         "      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5747         "\t\t\t  events\n"
5748         "      filter\t\t- If set, only events passing filter are traced\n"
5749         "  events/<system>/<event>/\t- Directory containing control files for\n"
5750         "\t\t\t  <event>:\n"
5751         "      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5752         "      filter\t\t- If set, only events passing filter are traced\n"
5753         "      trigger\t\t- If set, a command to perform when event is hit\n"
5754         "\t    Format: <trigger>[:count][if <filter>]\n"
5755         "\t   trigger: traceon, traceoff\n"
5756         "\t            enable_event:<system>:<event>\n"
5757         "\t            disable_event:<system>:<event>\n"
5758 #ifdef CONFIG_HIST_TRIGGERS
5759         "\t            enable_hist:<system>:<event>\n"
5760         "\t            disable_hist:<system>:<event>\n"
5761 #endif
5762 #ifdef CONFIG_STACKTRACE
5763         "\t\t    stacktrace\n"
5764 #endif
5765 #ifdef CONFIG_TRACER_SNAPSHOT
5766         "\t\t    snapshot\n"
5767 #endif
5768 #ifdef CONFIG_HIST_TRIGGERS
5769         "\t\t    hist (see below)\n"
5770 #endif
5771         "\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5772         "\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5773         "\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5774         "\t                  events/block/block_unplug/trigger\n"
5775         "\t   The first disables tracing every time block_unplug is hit.\n"
5776         "\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5777         "\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5778         "\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5779         "\t   Like function triggers, the counter is only decremented if it\n"
5780         "\t    enabled or disabled tracing.\n"
5781         "\t   To remove a trigger without a count:\n"
5782         "\t     echo '!<trigger> > <system>/<event>/trigger\n"
5783         "\t   To remove a trigger with a count:\n"
5784         "\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5785         "\t   Filters can be ignored when removing a trigger.\n"
5786 #ifdef CONFIG_HIST_TRIGGERS
5787         "      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5788         "\t    Format: hist:keys=<field1[,field2,...]>\n"
5789         "\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5790         "\t            [:values=<field1[,field2,...]>]\n"
5791         "\t            [:sort=<field1[,field2,...]>]\n"
5792         "\t            [:size=#entries]\n"
5793         "\t            [:pause][:continue][:clear]\n"
5794         "\t            [:name=histname1]\n"
5795         "\t            [:nohitcount]\n"
5796         "\t            [:<handler>.<action>]\n"
5797         "\t            [if <filter>]\n\n"
5798         "\t    Note, special fields can be used as well:\n"
5799         "\t            common_timestamp - to record current timestamp\n"
5800         "\t            common_cpu - to record the CPU the event happened on\n"
5801         "\n"
5802         "\t    A hist trigger variable can be:\n"
5803         "\t        - a reference to a field e.g. x=current_timestamp,\n"
5804         "\t        - a reference to another variable e.g. y=$x,\n"
5805         "\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5806         "\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5807         "\n"
5808         "\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5809         "\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5810         "\t    variable reference, field or numeric literal.\n"
5811         "\n"
5812         "\t    When a matching event is hit, an entry is added to a hash\n"
5813         "\t    table using the key(s) and value(s) named, and the value of a\n"
5814         "\t    sum called 'hitcount' is incremented.  Keys and values\n"
5815         "\t    correspond to fields in the event's format description.  Keys\n"
5816         "\t    can be any field, or the special string 'common_stacktrace'.\n"
5817         "\t    Compound keys consisting of up to two fields can be specified\n"
5818         "\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5819         "\t    fields.  Sort keys consisting of up to two fields can be\n"
5820         "\t    specified using the 'sort' keyword.  The sort direction can\n"
5821         "\t    be modified by appending '.descending' or '.ascending' to a\n"
5822         "\t    sort field.  The 'size' parameter can be used to specify more\n"
5823         "\t    or fewer than the default 2048 entries for the hashtable size.\n"
5824         "\t    If a hist trigger is given a name using the 'name' parameter,\n"
5825         "\t    its histogram data will be shared with other triggers of the\n"
5826         "\t    same name, and trigger hits will update this common data.\n\n"
5827         "\t    Reading the 'hist' file for the event will dump the hash\n"
5828         "\t    table in its entirety to stdout.  If there are multiple hist\n"
5829         "\t    triggers attached to an event, there will be a table for each\n"
5830         "\t    trigger in the output.  The table displayed for a named\n"
5831         "\t    trigger will be the same as any other instance having the\n"
5832         "\t    same name.  The default format used to display a given field\n"
5833         "\t    can be modified by appending any of the following modifiers\n"
5834         "\t    to the field name, as applicable:\n\n"
5835         "\t            .hex        display a number as a hex value\n"
5836         "\t            .sym        display an address as a symbol\n"
5837         "\t            .sym-offset display an address as a symbol and offset\n"
5838         "\t            .execname   display a common_pid as a program name\n"
5839         "\t            .syscall    display a syscall id as a syscall name\n"
5840         "\t            .log2       display log2 value rather than raw number\n"
5841         "\t            .buckets=size  display values in groups of size rather than raw number\n"
5842         "\t            .usecs      display a common_timestamp in microseconds\n"
5843         "\t            .percent    display a number of percentage value\n"
5844         "\t            .graph      display a bar-graph of a value\n\n"
5845         "\t    The 'pause' parameter can be used to pause an existing hist\n"
5846         "\t    trigger or to start a hist trigger but not log any events\n"
5847         "\t    until told to do so.  'continue' can be used to start or\n"
5848         "\t    restart a paused hist trigger.\n\n"
5849         "\t    The 'clear' parameter will clear the contents of a running\n"
5850         "\t    hist trigger and leave its current paused/active state\n"
5851         "\t    unchanged.\n\n"
5852         "\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5853         "\t    raw hitcount in the histogram.\n\n"
5854         "\t    The enable_hist and disable_hist triggers can be used to\n"
5855         "\t    have one event conditionally start and stop another event's\n"
5856         "\t    already-attached hist trigger.  The syntax is analogous to\n"
5857         "\t    the enable_event and disable_event triggers.\n\n"
5858         "\t    Hist trigger handlers and actions are executed whenever a\n"
5859         "\t    a histogram entry is added or updated.  They take the form:\n\n"
5860         "\t        <handler>.<action>\n\n"
5861         "\t    The available handlers are:\n\n"
5862         "\t        onmatch(matching.event)  - invoke on addition or update\n"
5863         "\t        onmax(var)               - invoke if var exceeds current max\n"
5864         "\t        onchange(var)            - invoke action if var changes\n\n"
5865         "\t    The available actions are:\n\n"
5866         "\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5867         "\t        save(field,...)                      - save current event fields\n"
5868 #ifdef CONFIG_TRACER_SNAPSHOT
5869         "\t        snapshot()                           - snapshot the trace buffer\n\n"
5870 #endif
5871 #ifdef CONFIG_SYNTH_EVENTS
5872         "  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5873         "\t  Write into this file to define/undefine new synthetic events.\n"
5874         "\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5875 #endif
5876 #endif
5877 ;
5878
5879 static ssize_t
5880 tracing_readme_read(struct file *filp, char __user *ubuf,
5881                        size_t cnt, loff_t *ppos)
5882 {
5883         return simple_read_from_buffer(ubuf, cnt, ppos,
5884                                         readme_msg, strlen(readme_msg));
5885 }
5886
5887 static const struct file_operations tracing_readme_fops = {
5888         .open           = tracing_open_generic,
5889         .read           = tracing_readme_read,
5890         .llseek         = generic_file_llseek,
5891 };
5892
5893 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5894 {
5895         int pid = ++(*pos);
5896
5897         return trace_find_tgid_ptr(pid);
5898 }
5899
5900 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5901 {
5902         int pid = *pos;
5903
5904         return trace_find_tgid_ptr(pid);
5905 }
5906
5907 static void saved_tgids_stop(struct seq_file *m, void *v)
5908 {
5909 }
5910
5911 static int saved_tgids_show(struct seq_file *m, void *v)
5912 {
5913         int *entry = (int *)v;
5914         int pid = entry - tgid_map;
5915         int tgid = *entry;
5916
5917         if (tgid == 0)
5918                 return SEQ_SKIP;
5919
5920         seq_printf(m, "%d %d\n", pid, tgid);
5921         return 0;
5922 }
5923
5924 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5925         .start          = saved_tgids_start,
5926         .stop           = saved_tgids_stop,
5927         .next           = saved_tgids_next,
5928         .show           = saved_tgids_show,
5929 };
5930
5931 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5932 {
5933         int ret;
5934
5935         ret = tracing_check_open_get_tr(NULL);
5936         if (ret)
5937                 return ret;
5938
5939         return seq_open(filp, &tracing_saved_tgids_seq_ops);
5940 }
5941
5942
5943 static const struct file_operations tracing_saved_tgids_fops = {
5944         .open           = tracing_saved_tgids_open,
5945         .read           = seq_read,
5946         .llseek         = seq_lseek,
5947         .release        = seq_release,
5948 };
5949
5950 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5951 {
5952         unsigned int *ptr = v;
5953
5954         if (*pos || m->count)
5955                 ptr++;
5956
5957         (*pos)++;
5958
5959         for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5960              ptr++) {
5961                 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5962                         continue;
5963
5964                 return ptr;
5965         }
5966
5967         return NULL;
5968 }
5969
5970 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5971 {
5972         void *v;
5973         loff_t l = 0;
5974
5975         preempt_disable();
5976         arch_spin_lock(&trace_cmdline_lock);
5977
5978         v = &savedcmd->map_cmdline_to_pid[0];
5979         while (l <= *pos) {
5980                 v = saved_cmdlines_next(m, v, &l);
5981                 if (!v)
5982                         return NULL;
5983         }
5984
5985         return v;
5986 }
5987
5988 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5989 {
5990         arch_spin_unlock(&trace_cmdline_lock);
5991         preempt_enable();
5992 }
5993
5994 static int saved_cmdlines_show(struct seq_file *m, void *v)
5995 {
5996         char buf[TASK_COMM_LEN];
5997         unsigned int *pid = v;
5998
5999         __trace_find_cmdline(*pid, buf);
6000         seq_printf(m, "%d %s\n", *pid, buf);
6001         return 0;
6002 }
6003
6004 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6005         .start          = saved_cmdlines_start,
6006         .next           = saved_cmdlines_next,
6007         .stop           = saved_cmdlines_stop,
6008         .show           = saved_cmdlines_show,
6009 };
6010
6011 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6012 {
6013         int ret;
6014
6015         ret = tracing_check_open_get_tr(NULL);
6016         if (ret)
6017                 return ret;
6018
6019         return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6020 }
6021
6022 static const struct file_operations tracing_saved_cmdlines_fops = {
6023         .open           = tracing_saved_cmdlines_open,
6024         .read           = seq_read,
6025         .llseek         = seq_lseek,
6026         .release        = seq_release,
6027 };
6028
6029 static ssize_t
6030 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6031                                  size_t cnt, loff_t *ppos)
6032 {
6033         char buf[64];
6034         int r;
6035
6036         preempt_disable();
6037         arch_spin_lock(&trace_cmdline_lock);
6038         r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6039         arch_spin_unlock(&trace_cmdline_lock);
6040         preempt_enable();
6041
6042         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6043 }
6044
6045 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
6046 {
6047         kfree(s->saved_cmdlines);
6048         kfree(s->map_cmdline_to_pid);
6049         kfree(s);
6050 }
6051
6052 static int tracing_resize_saved_cmdlines(unsigned int val)
6053 {
6054         struct saved_cmdlines_buffer *s, *savedcmd_temp;
6055
6056         s = kmalloc(sizeof(*s), GFP_KERNEL);
6057         if (!s)
6058                 return -ENOMEM;
6059
6060         if (allocate_cmdlines_buffer(val, s) < 0) {
6061                 kfree(s);
6062                 return -ENOMEM;
6063         }
6064
6065         preempt_disable();
6066         arch_spin_lock(&trace_cmdline_lock);
6067         savedcmd_temp = savedcmd;
6068         savedcmd = s;
6069         arch_spin_unlock(&trace_cmdline_lock);
6070         preempt_enable();
6071         free_saved_cmdlines_buffer(savedcmd_temp);
6072
6073         return 0;
6074 }
6075
6076 static ssize_t
6077 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6078                                   size_t cnt, loff_t *ppos)
6079 {
6080         unsigned long val;
6081         int ret;
6082
6083         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6084         if (ret)
6085                 return ret;
6086
6087         /* must have at least 1 entry or less than PID_MAX_DEFAULT */
6088         if (!val || val > PID_MAX_DEFAULT)
6089                 return -EINVAL;
6090
6091         ret = tracing_resize_saved_cmdlines((unsigned int)val);
6092         if (ret < 0)
6093                 return ret;
6094
6095         *ppos += cnt;
6096
6097         return cnt;
6098 }
6099
6100 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6101         .open           = tracing_open_generic,
6102         .read           = tracing_saved_cmdlines_size_read,
6103         .write          = tracing_saved_cmdlines_size_write,
6104 };
6105
6106 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6107 static union trace_eval_map_item *
6108 update_eval_map(union trace_eval_map_item *ptr)
6109 {
6110         if (!ptr->map.eval_string) {
6111                 if (ptr->tail.next) {
6112                         ptr = ptr->tail.next;
6113                         /* Set ptr to the next real item (skip head) */
6114                         ptr++;
6115                 } else
6116                         return NULL;
6117         }
6118         return ptr;
6119 }
6120
6121 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6122 {
6123         union trace_eval_map_item *ptr = v;
6124
6125         /*
6126          * Paranoid! If ptr points to end, we don't want to increment past it.
6127          * This really should never happen.
6128          */
6129         (*pos)++;
6130         ptr = update_eval_map(ptr);
6131         if (WARN_ON_ONCE(!ptr))
6132                 return NULL;
6133
6134         ptr++;
6135         ptr = update_eval_map(ptr);
6136
6137         return ptr;
6138 }
6139
6140 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6141 {
6142         union trace_eval_map_item *v;
6143         loff_t l = 0;
6144
6145         mutex_lock(&trace_eval_mutex);
6146
6147         v = trace_eval_maps;
6148         if (v)
6149                 v++;
6150
6151         while (v && l < *pos) {
6152                 v = eval_map_next(m, v, &l);
6153         }
6154
6155         return v;
6156 }
6157
6158 static void eval_map_stop(struct seq_file *m, void *v)
6159 {
6160         mutex_unlock(&trace_eval_mutex);
6161 }
6162
6163 static int eval_map_show(struct seq_file *m, void *v)
6164 {
6165         union trace_eval_map_item *ptr = v;
6166
6167         seq_printf(m, "%s %ld (%s)\n",
6168                    ptr->map.eval_string, ptr->map.eval_value,
6169                    ptr->map.system);
6170
6171         return 0;
6172 }
6173
6174 static const struct seq_operations tracing_eval_map_seq_ops = {
6175         .start          = eval_map_start,
6176         .next           = eval_map_next,
6177         .stop           = eval_map_stop,
6178         .show           = eval_map_show,
6179 };
6180
6181 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6182 {
6183         int ret;
6184
6185         ret = tracing_check_open_get_tr(NULL);
6186         if (ret)
6187                 return ret;
6188
6189         return seq_open(filp, &tracing_eval_map_seq_ops);
6190 }
6191
6192 static const struct file_operations tracing_eval_map_fops = {
6193         .open           = tracing_eval_map_open,
6194         .read           = seq_read,
6195         .llseek         = seq_lseek,
6196         .release        = seq_release,
6197 };
6198
6199 static inline union trace_eval_map_item *
6200 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6201 {
6202         /* Return tail of array given the head */
6203         return ptr + ptr->head.length + 1;
6204 }
6205
6206 static void
6207 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6208                            int len)
6209 {
6210         struct trace_eval_map **stop;
6211         struct trace_eval_map **map;
6212         union trace_eval_map_item *map_array;
6213         union trace_eval_map_item *ptr;
6214
6215         stop = start + len;
6216
6217         /*
6218          * The trace_eval_maps contains the map plus a head and tail item,
6219          * where the head holds the module and length of array, and the
6220          * tail holds a pointer to the next list.
6221          */
6222         map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6223         if (!map_array) {
6224                 pr_warn("Unable to allocate trace eval mapping\n");
6225                 return;
6226         }
6227
6228         mutex_lock(&trace_eval_mutex);
6229
6230         if (!trace_eval_maps)
6231                 trace_eval_maps = map_array;
6232         else {
6233                 ptr = trace_eval_maps;
6234                 for (;;) {
6235                         ptr = trace_eval_jmp_to_tail(ptr);
6236                         if (!ptr->tail.next)
6237                                 break;
6238                         ptr = ptr->tail.next;
6239
6240                 }
6241                 ptr->tail.next = map_array;
6242         }
6243         map_array->head.mod = mod;
6244         map_array->head.length = len;
6245         map_array++;
6246
6247         for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6248                 map_array->map = **map;
6249                 map_array++;
6250         }
6251         memset(map_array, 0, sizeof(*map_array));
6252
6253         mutex_unlock(&trace_eval_mutex);
6254 }
6255
6256 static void trace_create_eval_file(struct dentry *d_tracer)
6257 {
6258         trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6259                           NULL, &tracing_eval_map_fops);
6260 }
6261
6262 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6263 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6264 static inline void trace_insert_eval_map_file(struct module *mod,
6265                               struct trace_eval_map **start, int len) { }
6266 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6267
6268 static void trace_insert_eval_map(struct module *mod,
6269                                   struct trace_eval_map **start, int len)
6270 {
6271         struct trace_eval_map **map;
6272
6273         if (len <= 0)
6274                 return;
6275
6276         map = start;
6277
6278         trace_event_eval_update(map, len);
6279
6280         trace_insert_eval_map_file(mod, start, len);
6281 }
6282
6283 static ssize_t
6284 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6285                        size_t cnt, loff_t *ppos)
6286 {
6287         struct trace_array *tr = filp->private_data;
6288         char buf[MAX_TRACER_SIZE+2];
6289         int r;
6290
6291         mutex_lock(&trace_types_lock);
6292         r = sprintf(buf, "%s\n", tr->current_trace->name);
6293         mutex_unlock(&trace_types_lock);
6294
6295         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6296 }
6297
6298 int tracer_init(struct tracer *t, struct trace_array *tr)
6299 {
6300         tracing_reset_online_cpus(&tr->array_buffer);
6301         return t->init(tr);
6302 }
6303
6304 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6305 {
6306         int cpu;
6307
6308         for_each_tracing_cpu(cpu)
6309                 per_cpu_ptr(buf->data, cpu)->entries = val;
6310 }
6311
6312 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6313 {
6314         if (cpu == RING_BUFFER_ALL_CPUS) {
6315                 set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6316         } else {
6317                 per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6318         }
6319 }
6320
6321 #ifdef CONFIG_TRACER_MAX_TRACE
6322 /* resize @tr's buffer to the size of @size_tr's entries */
6323 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6324                                         struct array_buffer *size_buf, int cpu_id)
6325 {
6326         int cpu, ret = 0;
6327
6328         if (cpu_id == RING_BUFFER_ALL_CPUS) {
6329                 for_each_tracing_cpu(cpu) {
6330                         ret = ring_buffer_resize(trace_buf->buffer,
6331                                  per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6332                         if (ret < 0)
6333                                 break;
6334                         per_cpu_ptr(trace_buf->data, cpu)->entries =
6335                                 per_cpu_ptr(size_buf->data, cpu)->entries;
6336                 }
6337         } else {
6338                 ret = ring_buffer_resize(trace_buf->buffer,
6339                                  per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6340                 if (ret == 0)
6341                         per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6342                                 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6343         }
6344
6345         return ret;
6346 }
6347 #endif /* CONFIG_TRACER_MAX_TRACE */
6348
6349 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6350                                         unsigned long size, int cpu)
6351 {
6352         int ret;
6353
6354         /*
6355          * If kernel or user changes the size of the ring buffer
6356          * we use the size that was given, and we can forget about
6357          * expanding it later.
6358          */
6359         trace_set_ring_buffer_expanded(tr);
6360
6361         /* May be called before buffers are initialized */
6362         if (!tr->array_buffer.buffer)
6363                 return 0;
6364
6365         /* Do not allow tracing while resizing ring buffer */
6366         tracing_stop_tr(tr);
6367
6368         ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6369         if (ret < 0)
6370                 goto out_start;
6371
6372 #ifdef CONFIG_TRACER_MAX_TRACE
6373         if (!tr->allocated_snapshot)
6374                 goto out;
6375
6376         ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6377         if (ret < 0) {
6378                 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6379                                                      &tr->array_buffer, cpu);
6380                 if (r < 0) {
6381                         /*
6382                          * AARGH! We are left with different
6383                          * size max buffer!!!!
6384                          * The max buffer is our "snapshot" buffer.
6385                          * When a tracer needs a snapshot (one of the
6386                          * latency tracers), it swaps the max buffer
6387                          * with the saved snap shot. We succeeded to
6388                          * update the size of the main buffer, but failed to
6389                          * update the size of the max buffer. But when we tried
6390                          * to reset the main buffer to the original size, we
6391                          * failed there too. This is very unlikely to
6392                          * happen, but if it does, warn and kill all
6393                          * tracing.
6394                          */
6395                         WARN_ON(1);
6396                         tracing_disabled = 1;
6397                 }
6398                 goto out_start;
6399         }
6400
6401         update_buffer_entries(&tr->max_buffer, cpu);
6402
6403  out:
6404 #endif /* CONFIG_TRACER_MAX_TRACE */
6405
6406         update_buffer_entries(&tr->array_buffer, cpu);
6407  out_start:
6408         tracing_start_tr(tr);
6409         return ret;
6410 }
6411
6412 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6413                                   unsigned long size, int cpu_id)
6414 {
6415         int ret;
6416
6417         mutex_lock(&trace_types_lock);
6418
6419         if (cpu_id != RING_BUFFER_ALL_CPUS) {
6420                 /* make sure, this cpu is enabled in the mask */
6421                 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6422                         ret = -EINVAL;
6423                         goto out;
6424                 }
6425         }
6426
6427         ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6428         if (ret < 0)
6429                 ret = -ENOMEM;
6430
6431 out:
6432         mutex_unlock(&trace_types_lock);
6433
6434         return ret;
6435 }
6436
6437
6438 /**
6439  * tracing_update_buffers - used by tracing facility to expand ring buffers
6440  * @tr: The tracing instance
6441  *
6442  * To save on memory when the tracing is never used on a system with it
6443  * configured in. The ring buffers are set to a minimum size. But once
6444  * a user starts to use the tracing facility, then they need to grow
6445  * to their default size.
6446  *
6447  * This function is to be called when a tracer is about to be used.
6448  */
6449 int tracing_update_buffers(struct trace_array *tr)
6450 {
6451         int ret = 0;
6452
6453         mutex_lock(&trace_types_lock);
6454         if (!tr->ring_buffer_expanded)
6455                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6456                                                 RING_BUFFER_ALL_CPUS);
6457         mutex_unlock(&trace_types_lock);
6458
6459         return ret;
6460 }
6461
6462 struct trace_option_dentry;
6463
6464 static void
6465 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6466
6467 /*
6468  * Used to clear out the tracer before deletion of an instance.
6469  * Must have trace_types_lock held.
6470  */
6471 static void tracing_set_nop(struct trace_array *tr)
6472 {
6473         if (tr->current_trace == &nop_trace)
6474                 return;
6475         
6476         tr->current_trace->enabled--;
6477
6478         if (tr->current_trace->reset)
6479                 tr->current_trace->reset(tr);
6480
6481         tr->current_trace = &nop_trace;
6482 }
6483
6484 static bool tracer_options_updated;
6485
6486 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6487 {
6488         /* Only enable if the directory has been created already. */
6489         if (!tr->dir)
6490                 return;
6491
6492         /* Only create trace option files after update_tracer_options finish */
6493         if (!tracer_options_updated)
6494                 return;
6495
6496         create_trace_option_files(tr, t);
6497 }
6498
6499 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6500 {
6501         struct tracer *t;
6502 #ifdef CONFIG_TRACER_MAX_TRACE
6503         bool had_max_tr;
6504 #endif
6505         int ret = 0;
6506
6507         mutex_lock(&trace_types_lock);
6508
6509         if (!tr->ring_buffer_expanded) {
6510                 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6511                                                 RING_BUFFER_ALL_CPUS);
6512                 if (ret < 0)
6513                         goto out;
6514                 ret = 0;
6515         }
6516
6517         for (t = trace_types; t; t = t->next) {
6518                 if (strcmp(t->name, buf) == 0)
6519                         break;
6520         }
6521         if (!t) {
6522                 ret = -EINVAL;
6523                 goto out;
6524         }
6525         if (t == tr->current_trace)
6526                 goto out;
6527
6528 #ifdef CONFIG_TRACER_SNAPSHOT
6529         if (t->use_max_tr) {
6530                 local_irq_disable();
6531                 arch_spin_lock(&tr->max_lock);
6532                 if (tr->cond_snapshot)
6533                         ret = -EBUSY;
6534                 arch_spin_unlock(&tr->max_lock);
6535                 local_irq_enable();
6536                 if (ret)
6537                         goto out;
6538         }
6539 #endif
6540         /* Some tracers won't work on kernel command line */
6541         if (system_state < SYSTEM_RUNNING && t->noboot) {
6542                 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6543                         t->name);
6544                 goto out;
6545         }
6546
6547         /* Some tracers are only allowed for the top level buffer */
6548         if (!trace_ok_for_array(t, tr)) {
6549                 ret = -EINVAL;
6550                 goto out;
6551         }
6552
6553         /* If trace pipe files are being read, we can't change the tracer */
6554         if (tr->trace_ref) {
6555                 ret = -EBUSY;
6556                 goto out;
6557         }
6558
6559         trace_branch_disable();
6560
6561         tr->current_trace->enabled--;
6562
6563         if (tr->current_trace->reset)
6564                 tr->current_trace->reset(tr);
6565
6566 #ifdef CONFIG_TRACER_MAX_TRACE
6567         had_max_tr = tr->current_trace->use_max_tr;
6568
6569         /* Current trace needs to be nop_trace before synchronize_rcu */
6570         tr->current_trace = &nop_trace;
6571
6572         if (had_max_tr && !t->use_max_tr) {
6573                 /*
6574                  * We need to make sure that the update_max_tr sees that
6575                  * current_trace changed to nop_trace to keep it from
6576                  * swapping the buffers after we resize it.
6577                  * The update_max_tr is called from interrupts disabled
6578                  * so a synchronized_sched() is sufficient.
6579                  */
6580                 synchronize_rcu();
6581                 free_snapshot(tr);
6582         }
6583
6584         if (t->use_max_tr && !tr->allocated_snapshot) {
6585                 ret = tracing_alloc_snapshot_instance(tr);
6586                 if (ret < 0)
6587                         goto out;
6588         }
6589 #else
6590         tr->current_trace = &nop_trace;
6591 #endif
6592
6593         if (t->init) {
6594                 ret = tracer_init(t, tr);
6595                 if (ret)
6596                         goto out;
6597         }
6598
6599         tr->current_trace = t;
6600         tr->current_trace->enabled++;
6601         trace_branch_enable(tr);
6602  out:
6603         mutex_unlock(&trace_types_lock);
6604
6605         return ret;
6606 }
6607
6608 static ssize_t
6609 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6610                         size_t cnt, loff_t *ppos)
6611 {
6612         struct trace_array *tr = filp->private_data;
6613         char buf[MAX_TRACER_SIZE+1];
6614         char *name;
6615         size_t ret;
6616         int err;
6617
6618         ret = cnt;
6619
6620         if (cnt > MAX_TRACER_SIZE)
6621                 cnt = MAX_TRACER_SIZE;
6622
6623         if (copy_from_user(buf, ubuf, cnt))
6624                 return -EFAULT;
6625
6626         buf[cnt] = 0;
6627
6628         name = strim(buf);
6629
6630         err = tracing_set_tracer(tr, name);
6631         if (err)
6632                 return err;
6633
6634         *ppos += ret;
6635
6636         return ret;
6637 }
6638
6639 static ssize_t
6640 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6641                    size_t cnt, loff_t *ppos)
6642 {
6643         char buf[64];
6644         int r;
6645
6646         r = snprintf(buf, sizeof(buf), "%ld\n",
6647                      *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6648         if (r > sizeof(buf))
6649                 r = sizeof(buf);
6650         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6651 }
6652
6653 static ssize_t
6654 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6655                     size_t cnt, loff_t *ppos)
6656 {
6657         unsigned long val;
6658         int ret;
6659
6660         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6661         if (ret)
6662                 return ret;
6663
6664         *ptr = val * 1000;
6665
6666         return cnt;
6667 }
6668
6669 static ssize_t
6670 tracing_thresh_read(struct file *filp, char __user *ubuf,
6671                     size_t cnt, loff_t *ppos)
6672 {
6673         return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6674 }
6675
6676 static ssize_t
6677 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6678                      size_t cnt, loff_t *ppos)
6679 {
6680         struct trace_array *tr = filp->private_data;
6681         int ret;
6682
6683         mutex_lock(&trace_types_lock);
6684         ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6685         if (ret < 0)
6686                 goto out;
6687
6688         if (tr->current_trace->update_thresh) {
6689                 ret = tr->current_trace->update_thresh(tr);
6690                 if (ret < 0)
6691                         goto out;
6692         }
6693
6694         ret = cnt;
6695 out:
6696         mutex_unlock(&trace_types_lock);
6697
6698         return ret;
6699 }
6700
6701 #ifdef CONFIG_TRACER_MAX_TRACE
6702
6703 static ssize_t
6704 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6705                      size_t cnt, loff_t *ppos)
6706 {
6707         struct trace_array *tr = filp->private_data;
6708
6709         return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6710 }
6711
6712 static ssize_t
6713 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6714                       size_t cnt, loff_t *ppos)
6715 {
6716         struct trace_array *tr = filp->private_data;
6717
6718         return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6719 }
6720
6721 #endif
6722
6723 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6724 {
6725         if (cpu == RING_BUFFER_ALL_CPUS) {
6726                 if (cpumask_empty(tr->pipe_cpumask)) {
6727                         cpumask_setall(tr->pipe_cpumask);
6728                         return 0;
6729                 }
6730         } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6731                 cpumask_set_cpu(cpu, tr->pipe_cpumask);
6732                 return 0;
6733         }
6734         return -EBUSY;
6735 }
6736
6737 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6738 {
6739         if (cpu == RING_BUFFER_ALL_CPUS) {
6740                 WARN_ON(!cpumask_full(tr->pipe_cpumask));
6741                 cpumask_clear(tr->pipe_cpumask);
6742         } else {
6743                 WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6744                 cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6745         }
6746 }
6747
6748 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6749 {
6750         struct trace_array *tr = inode->i_private;
6751         struct trace_iterator *iter;
6752         int cpu;
6753         int ret;
6754
6755         ret = tracing_check_open_get_tr(tr);
6756         if (ret)
6757                 return ret;
6758
6759         mutex_lock(&trace_types_lock);
6760         cpu = tracing_get_cpu(inode);
6761         ret = open_pipe_on_cpu(tr, cpu);
6762         if (ret)
6763                 goto fail_pipe_on_cpu;
6764
6765         /* create a buffer to store the information to pass to userspace */
6766         iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6767         if (!iter) {
6768                 ret = -ENOMEM;
6769                 goto fail_alloc_iter;
6770         }
6771
6772         trace_seq_init(&iter->seq);
6773         iter->trace = tr->current_trace;
6774
6775         if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6776                 ret = -ENOMEM;
6777                 goto fail;
6778         }
6779
6780         /* trace pipe does not show start of buffer */
6781         cpumask_setall(iter->started);
6782
6783         if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6784                 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6785
6786         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6787         if (trace_clocks[tr->clock_id].in_ns)
6788                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6789
6790         iter->tr = tr;
6791         iter->array_buffer = &tr->array_buffer;
6792         iter->cpu_file = cpu;
6793         mutex_init(&iter->mutex);
6794         filp->private_data = iter;
6795
6796         if (iter->trace->pipe_open)
6797                 iter->trace->pipe_open(iter);
6798
6799         nonseekable_open(inode, filp);
6800
6801         tr->trace_ref++;
6802
6803         mutex_unlock(&trace_types_lock);
6804         return ret;
6805
6806 fail:
6807         kfree(iter);
6808 fail_alloc_iter:
6809         close_pipe_on_cpu(tr, cpu);
6810 fail_pipe_on_cpu:
6811         __trace_array_put(tr);
6812         mutex_unlock(&trace_types_lock);
6813         return ret;
6814 }
6815
6816 static int tracing_release_pipe(struct inode *inode, struct file *file)
6817 {
6818         struct trace_iterator *iter = file->private_data;
6819         struct trace_array *tr = inode->i_private;
6820
6821         mutex_lock(&trace_types_lock);
6822
6823         tr->trace_ref--;
6824
6825         if (iter->trace->pipe_close)
6826                 iter->trace->pipe_close(iter);
6827         close_pipe_on_cpu(tr, iter->cpu_file);
6828         mutex_unlock(&trace_types_lock);
6829
6830         free_trace_iter_content(iter);
6831         kfree(iter);
6832
6833         trace_array_put(tr);
6834
6835         return 0;
6836 }
6837
6838 static __poll_t
6839 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6840 {
6841         struct trace_array *tr = iter->tr;
6842
6843         /* Iterators are static, they should be filled or empty */
6844         if (trace_buffer_iter(iter, iter->cpu_file))
6845                 return EPOLLIN | EPOLLRDNORM;
6846
6847         if (tr->trace_flags & TRACE_ITER_BLOCK)
6848                 /*
6849                  * Always select as readable when in blocking mode
6850                  */
6851                 return EPOLLIN | EPOLLRDNORM;
6852         else
6853                 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6854                                              filp, poll_table, iter->tr->buffer_percent);
6855 }
6856
6857 static __poll_t
6858 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6859 {
6860         struct trace_iterator *iter = filp->private_data;
6861
6862         return trace_poll(iter, filp, poll_table);
6863 }
6864
6865 /* Must be called with iter->mutex held. */
6866 static int tracing_wait_pipe(struct file *filp)
6867 {
6868         struct trace_iterator *iter = filp->private_data;
6869         int ret;
6870
6871         while (trace_empty(iter)) {
6872
6873                 if ((filp->f_flags & O_NONBLOCK)) {
6874                         return -EAGAIN;
6875                 }
6876
6877                 /*
6878                  * We block until we read something and tracing is disabled.
6879                  * We still block if tracing is disabled, but we have never
6880                  * read anything. This allows a user to cat this file, and
6881                  * then enable tracing. But after we have read something,
6882                  * we give an EOF when tracing is again disabled.
6883                  *
6884                  * iter->pos will be 0 if we haven't read anything.
6885                  */
6886                 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6887                         break;
6888
6889                 mutex_unlock(&iter->mutex);
6890
6891                 ret = wait_on_pipe(iter, 0);
6892
6893                 mutex_lock(&iter->mutex);
6894
6895                 if (ret)
6896                         return ret;
6897         }
6898
6899         return 1;
6900 }
6901
6902 /*
6903  * Consumer reader.
6904  */
6905 static ssize_t
6906 tracing_read_pipe(struct file *filp, char __user *ubuf,
6907                   size_t cnt, loff_t *ppos)
6908 {
6909         struct trace_iterator *iter = filp->private_data;
6910         ssize_t sret;
6911
6912         /*
6913          * Avoid more than one consumer on a single file descriptor
6914          * This is just a matter of traces coherency, the ring buffer itself
6915          * is protected.
6916          */
6917         mutex_lock(&iter->mutex);
6918
6919         /* return any leftover data */
6920         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6921         if (sret != -EBUSY)
6922                 goto out;
6923
6924         trace_seq_init(&iter->seq);
6925
6926         if (iter->trace->read) {
6927                 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6928                 if (sret)
6929                         goto out;
6930         }
6931
6932 waitagain:
6933         sret = tracing_wait_pipe(filp);
6934         if (sret <= 0)
6935                 goto out;
6936
6937         /* stop when tracing is finished */
6938         if (trace_empty(iter)) {
6939                 sret = 0;
6940                 goto out;
6941         }
6942
6943         if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6944                 cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6945
6946         /* reset all but tr, trace, and overruns */
6947         trace_iterator_reset(iter);
6948         cpumask_clear(iter->started);
6949         trace_seq_init(&iter->seq);
6950
6951         trace_event_read_lock();
6952         trace_access_lock(iter->cpu_file);
6953         while (trace_find_next_entry_inc(iter) != NULL) {
6954                 enum print_line_t ret;
6955                 int save_len = iter->seq.seq.len;
6956
6957                 ret = print_trace_line(iter);
6958                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6959                         /*
6960                          * If one print_trace_line() fills entire trace_seq in one shot,
6961                          * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6962                          * In this case, we need to consume it, otherwise, loop will peek
6963                          * this event next time, resulting in an infinite loop.
6964                          */
6965                         if (save_len == 0) {
6966                                 iter->seq.full = 0;
6967                                 trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6968                                 trace_consume(iter);
6969                                 break;
6970                         }
6971
6972                         /* In other cases, don't print partial lines */
6973                         iter->seq.seq.len = save_len;
6974                         break;
6975                 }
6976                 if (ret != TRACE_TYPE_NO_CONSUME)
6977                         trace_consume(iter);
6978
6979                 if (trace_seq_used(&iter->seq) >= cnt)
6980                         break;
6981
6982                 /*
6983                  * Setting the full flag means we reached the trace_seq buffer
6984                  * size and we should leave by partial output condition above.
6985                  * One of the trace_seq_* functions is not used properly.
6986                  */
6987                 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6988                           iter->ent->type);
6989         }
6990         trace_access_unlock(iter->cpu_file);
6991         trace_event_read_unlock();
6992
6993         /* Now copy what we have to the user */
6994         sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6995         if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6996                 trace_seq_init(&iter->seq);
6997
6998         /*
6999          * If there was nothing to send to user, in spite of consuming trace
7000          * entries, go back to wait for more entries.
7001          */
7002         if (sret == -EBUSY)
7003                 goto waitagain;
7004
7005 out:
7006         mutex_unlock(&iter->mutex);
7007
7008         return sret;
7009 }
7010
7011 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7012                                      unsigned int idx)
7013 {
7014         __free_page(spd->pages[idx]);
7015 }
7016
7017 static size_t
7018 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7019 {
7020         size_t count;
7021         int save_len;
7022         int ret;
7023
7024         /* Seq buffer is page-sized, exactly what we need. */
7025         for (;;) {
7026                 save_len = iter->seq.seq.len;
7027                 ret = print_trace_line(iter);
7028
7029                 if (trace_seq_has_overflowed(&iter->seq)) {
7030                         iter->seq.seq.len = save_len;
7031                         break;
7032                 }
7033
7034                 /*
7035                  * This should not be hit, because it should only
7036                  * be set if the iter->seq overflowed. But check it
7037                  * anyway to be safe.
7038                  */
7039                 if (ret == TRACE_TYPE_PARTIAL_LINE) {
7040                         iter->seq.seq.len = save_len;
7041                         break;
7042                 }
7043
7044                 count = trace_seq_used(&iter->seq) - save_len;
7045                 if (rem < count) {
7046                         rem = 0;
7047                         iter->seq.seq.len = save_len;
7048                         break;
7049                 }
7050
7051                 if (ret != TRACE_TYPE_NO_CONSUME)
7052                         trace_consume(iter);
7053                 rem -= count;
7054                 if (!trace_find_next_entry_inc(iter))   {
7055                         rem = 0;
7056                         iter->ent = NULL;
7057                         break;
7058                 }
7059         }
7060
7061         return rem;
7062 }
7063
7064 static ssize_t tracing_splice_read_pipe(struct file *filp,
7065                                         loff_t *ppos,
7066                                         struct pipe_inode_info *pipe,
7067                                         size_t len,
7068                                         unsigned int flags)
7069 {
7070         struct page *pages_def[PIPE_DEF_BUFFERS];
7071         struct partial_page partial_def[PIPE_DEF_BUFFERS];
7072         struct trace_iterator *iter = filp->private_data;
7073         struct splice_pipe_desc spd = {
7074                 .pages          = pages_def,
7075                 .partial        = partial_def,
7076                 .nr_pages       = 0, /* This gets updated below. */
7077                 .nr_pages_max   = PIPE_DEF_BUFFERS,
7078                 .ops            = &default_pipe_buf_ops,
7079                 .spd_release    = tracing_spd_release_pipe,
7080         };
7081         ssize_t ret;
7082         size_t rem;
7083         unsigned int i;
7084
7085         if (splice_grow_spd(pipe, &spd))
7086                 return -ENOMEM;
7087
7088         mutex_lock(&iter->mutex);
7089
7090         if (iter->trace->splice_read) {
7091                 ret = iter->trace->splice_read(iter, filp,
7092                                                ppos, pipe, len, flags);
7093                 if (ret)
7094                         goto out_err;
7095         }
7096
7097         ret = tracing_wait_pipe(filp);
7098         if (ret <= 0)
7099                 goto out_err;
7100
7101         if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7102                 ret = -EFAULT;
7103                 goto out_err;
7104         }
7105
7106         trace_event_read_lock();
7107         trace_access_lock(iter->cpu_file);
7108
7109         /* Fill as many pages as possible. */
7110         for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7111                 spd.pages[i] = alloc_page(GFP_KERNEL);
7112                 if (!spd.pages[i])
7113                         break;
7114
7115                 rem = tracing_fill_pipe_page(rem, iter);
7116
7117                 /* Copy the data into the page, so we can start over. */
7118                 ret = trace_seq_to_buffer(&iter->seq,
7119                                           page_address(spd.pages[i]),
7120                                           trace_seq_used(&iter->seq));
7121                 if (ret < 0) {
7122                         __free_page(spd.pages[i]);
7123                         break;
7124                 }
7125                 spd.partial[i].offset = 0;
7126                 spd.partial[i].len = trace_seq_used(&iter->seq);
7127
7128                 trace_seq_init(&iter->seq);
7129         }
7130
7131         trace_access_unlock(iter->cpu_file);
7132         trace_event_read_unlock();
7133         mutex_unlock(&iter->mutex);
7134
7135         spd.nr_pages = i;
7136
7137         if (i)
7138                 ret = splice_to_pipe(pipe, &spd);
7139         else
7140                 ret = 0;
7141 out:
7142         splice_shrink_spd(&spd);
7143         return ret;
7144
7145 out_err:
7146         mutex_unlock(&iter->mutex);
7147         goto out;
7148 }
7149
7150 static ssize_t
7151 tracing_entries_read(struct file *filp, char __user *ubuf,
7152                      size_t cnt, loff_t *ppos)
7153 {
7154         struct inode *inode = file_inode(filp);
7155         struct trace_array *tr = inode->i_private;
7156         int cpu = tracing_get_cpu(inode);
7157         char buf[64];
7158         int r = 0;
7159         ssize_t ret;
7160
7161         mutex_lock(&trace_types_lock);
7162
7163         if (cpu == RING_BUFFER_ALL_CPUS) {
7164                 int cpu, buf_size_same;
7165                 unsigned long size;
7166
7167                 size = 0;
7168                 buf_size_same = 1;
7169                 /* check if all cpu sizes are same */
7170                 for_each_tracing_cpu(cpu) {
7171                         /* fill in the size from first enabled cpu */
7172                         if (size == 0)
7173                                 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7174                         if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7175                                 buf_size_same = 0;
7176                                 break;
7177                         }
7178                 }
7179
7180                 if (buf_size_same) {
7181                         if (!tr->ring_buffer_expanded)
7182                                 r = sprintf(buf, "%lu (expanded: %lu)\n",
7183                                             size >> 10,
7184                                             trace_buf_size >> 10);
7185                         else
7186                                 r = sprintf(buf, "%lu\n", size >> 10);
7187                 } else
7188                         r = sprintf(buf, "X\n");
7189         } else
7190                 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7191
7192         mutex_unlock(&trace_types_lock);
7193
7194         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7195         return ret;
7196 }
7197
7198 static ssize_t
7199 tracing_entries_write(struct file *filp, const char __user *ubuf,
7200                       size_t cnt, loff_t *ppos)
7201 {
7202         struct inode *inode = file_inode(filp);
7203         struct trace_array *tr = inode->i_private;
7204         unsigned long val;
7205         int ret;
7206
7207         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7208         if (ret)
7209                 return ret;
7210
7211         /* must have at least 1 entry */
7212         if (!val)
7213                 return -EINVAL;
7214
7215         /* value is in KB */
7216         val <<= 10;
7217         ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7218         if (ret < 0)
7219                 return ret;
7220
7221         *ppos += cnt;
7222
7223         return cnt;
7224 }
7225
7226 static ssize_t
7227 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7228                                 size_t cnt, loff_t *ppos)
7229 {
7230         struct trace_array *tr = filp->private_data;
7231         char buf[64];
7232         int r, cpu;
7233         unsigned long size = 0, expanded_size = 0;
7234
7235         mutex_lock(&trace_types_lock);
7236         for_each_tracing_cpu(cpu) {
7237                 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7238                 if (!tr->ring_buffer_expanded)
7239                         expanded_size += trace_buf_size >> 10;
7240         }
7241         if (tr->ring_buffer_expanded)
7242                 r = sprintf(buf, "%lu\n", size);
7243         else
7244                 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7245         mutex_unlock(&trace_types_lock);
7246
7247         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7248 }
7249
7250 static ssize_t
7251 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7252                           size_t cnt, loff_t *ppos)
7253 {
7254         /*
7255          * There is no need to read what the user has written, this function
7256          * is just to make sure that there is no error when "echo" is used
7257          */
7258
7259         *ppos += cnt;
7260
7261         return cnt;
7262 }
7263
7264 static int
7265 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7266 {
7267         struct trace_array *tr = inode->i_private;
7268
7269         /* disable tracing ? */
7270         if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7271                 tracer_tracing_off(tr);
7272         /* resize the ring buffer to 0 */
7273         tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7274
7275         trace_array_put(tr);
7276
7277         return 0;
7278 }
7279
7280 static ssize_t
7281 tracing_mark_write(struct file *filp, const char __user *ubuf,
7282                                         size_t cnt, loff_t *fpos)
7283 {
7284         struct trace_array *tr = filp->private_data;
7285         struct ring_buffer_event *event;
7286         enum event_trigger_type tt = ETT_NONE;
7287         struct trace_buffer *buffer;
7288         struct print_entry *entry;
7289         int meta_size;
7290         ssize_t written;
7291         size_t size;
7292         int len;
7293
7294 /* Used in tracing_mark_raw_write() as well */
7295 #define FAULTED_STR "<faulted>"
7296 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7297
7298         if (tracing_disabled)
7299                 return -EINVAL;
7300
7301         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7302                 return -EINVAL;
7303
7304         if ((ssize_t)cnt < 0)
7305                 return -EINVAL;
7306
7307         meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7308  again:
7309         size = cnt + meta_size;
7310
7311         /* If less than "<faulted>", then make sure we can still add that */
7312         if (cnt < FAULTED_SIZE)
7313                 size += FAULTED_SIZE - cnt;
7314
7315         if (size > TRACE_SEQ_BUFFER_SIZE) {
7316                 cnt -= size - TRACE_SEQ_BUFFER_SIZE;
7317                 goto again;
7318         }
7319
7320         buffer = tr->array_buffer.buffer;
7321         event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7322                                             tracing_gen_ctx());
7323         if (unlikely(!event)) {
7324                 /*
7325                  * If the size was greater than what was allowed, then
7326                  * make it smaller and try again.
7327                  */
7328                 if (size > ring_buffer_max_event_size(buffer)) {
7329                         /* cnt < FAULTED size should never be bigger than max */
7330                         if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7331                                 return -EBADF;
7332                         cnt = ring_buffer_max_event_size(buffer) - meta_size;
7333                         /* The above should only happen once */
7334                         if (WARN_ON_ONCE(cnt + meta_size == size))
7335                                 return -EBADF;
7336                         goto again;
7337                 }
7338
7339                 /* Ring buffer disabled, return as if not open for write */
7340                 return -EBADF;
7341         }
7342
7343         entry = ring_buffer_event_data(event);
7344         entry->ip = _THIS_IP_;
7345
7346         len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7347         if (len) {
7348                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7349                 cnt = FAULTED_SIZE;
7350                 written = -EFAULT;
7351         } else
7352                 written = cnt;
7353
7354         if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7355                 /* do not add \n before testing triggers, but add \0 */
7356                 entry->buf[cnt] = '\0';
7357                 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7358         }
7359
7360         if (entry->buf[cnt - 1] != '\n') {
7361                 entry->buf[cnt] = '\n';
7362                 entry->buf[cnt + 1] = '\0';
7363         } else
7364                 entry->buf[cnt] = '\0';
7365
7366         if (static_branch_unlikely(&trace_marker_exports_enabled))
7367                 ftrace_exports(event, TRACE_EXPORT_MARKER);
7368         __buffer_unlock_commit(buffer, event);
7369
7370         if (tt)
7371                 event_triggers_post_call(tr->trace_marker_file, tt);
7372
7373         return written;
7374 }
7375
7376 static ssize_t
7377 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7378                                         size_t cnt, loff_t *fpos)
7379 {
7380         struct trace_array *tr = filp->private_data;
7381         struct ring_buffer_event *event;
7382         struct trace_buffer *buffer;
7383         struct raw_data_entry *entry;
7384         ssize_t written;
7385         int size;
7386         int len;
7387
7388 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7389
7390         if (tracing_disabled)
7391                 return -EINVAL;
7392
7393         if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7394                 return -EINVAL;
7395
7396         /* The marker must at least have a tag id */
7397         if (cnt < sizeof(unsigned int))
7398                 return -EINVAL;
7399
7400         size = sizeof(*entry) + cnt;
7401         if (cnt < FAULT_SIZE_ID)
7402                 size += FAULT_SIZE_ID - cnt;
7403
7404         buffer = tr->array_buffer.buffer;
7405
7406         if (size > ring_buffer_max_event_size(buffer))
7407                 return -EINVAL;
7408
7409         event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7410                                             tracing_gen_ctx());
7411         if (!event)
7412                 /* Ring buffer disabled, return as if not open for write */
7413                 return -EBADF;
7414
7415         entry = ring_buffer_event_data(event);
7416
7417         len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7418         if (len) {
7419                 entry->id = -1;
7420                 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7421                 written = -EFAULT;
7422         } else
7423                 written = cnt;
7424
7425         __buffer_unlock_commit(buffer, event);
7426
7427         return written;
7428 }
7429
7430 static int tracing_clock_show(struct seq_file *m, void *v)
7431 {
7432         struct trace_array *tr = m->private;
7433         int i;
7434
7435         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7436                 seq_printf(m,
7437                         "%s%s%s%s", i ? " " : "",
7438                         i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7439                         i == tr->clock_id ? "]" : "");
7440         seq_putc(m, '\n');
7441
7442         return 0;
7443 }
7444
7445 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7446 {
7447         int i;
7448
7449         for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7450                 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7451                         break;
7452         }
7453         if (i == ARRAY_SIZE(trace_clocks))
7454                 return -EINVAL;
7455
7456         mutex_lock(&trace_types_lock);
7457
7458         tr->clock_id = i;
7459
7460         ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7461
7462         /*
7463          * New clock may not be consistent with the previous clock.
7464          * Reset the buffer so that it doesn't have incomparable timestamps.
7465          */
7466         tracing_reset_online_cpus(&tr->array_buffer);
7467
7468 #ifdef CONFIG_TRACER_MAX_TRACE
7469         if (tr->max_buffer.buffer)
7470                 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7471         tracing_reset_online_cpus(&tr->max_buffer);
7472 #endif
7473
7474         mutex_unlock(&trace_types_lock);
7475
7476         return 0;
7477 }
7478
7479 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7480                                    size_t cnt, loff_t *fpos)
7481 {
7482         struct seq_file *m = filp->private_data;
7483         struct trace_array *tr = m->private;
7484         char buf[64];
7485         const char *clockstr;
7486         int ret;
7487
7488         if (cnt >= sizeof(buf))
7489                 return -EINVAL;
7490
7491         if (copy_from_user(buf, ubuf, cnt))
7492                 return -EFAULT;
7493
7494         buf[cnt] = 0;
7495
7496         clockstr = strstrip(buf);
7497
7498         ret = tracing_set_clock(tr, clockstr);
7499         if (ret)
7500                 return ret;
7501
7502         *fpos += cnt;
7503
7504         return cnt;
7505 }
7506
7507 static int tracing_clock_open(struct inode *inode, struct file *file)
7508 {
7509         struct trace_array *tr = inode->i_private;
7510         int ret;
7511
7512         ret = tracing_check_open_get_tr(tr);
7513         if (ret)
7514                 return ret;
7515
7516         ret = single_open(file, tracing_clock_show, inode->i_private);
7517         if (ret < 0)
7518                 trace_array_put(tr);
7519
7520         return ret;
7521 }
7522
7523 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7524 {
7525         struct trace_array *tr = m->private;
7526
7527         mutex_lock(&trace_types_lock);
7528
7529         if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7530                 seq_puts(m, "delta [absolute]\n");
7531         else
7532                 seq_puts(m, "[delta] absolute\n");
7533
7534         mutex_unlock(&trace_types_lock);
7535
7536         return 0;
7537 }
7538
7539 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7540 {
7541         struct trace_array *tr = inode->i_private;
7542         int ret;
7543
7544         ret = tracing_check_open_get_tr(tr);
7545         if (ret)
7546                 return ret;
7547
7548         ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7549         if (ret < 0)
7550                 trace_array_put(tr);
7551
7552         return ret;
7553 }
7554
7555 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7556 {
7557         if (rbe == this_cpu_read(trace_buffered_event))
7558                 return ring_buffer_time_stamp(buffer);
7559
7560         return ring_buffer_event_time_stamp(buffer, rbe);
7561 }
7562
7563 /*
7564  * Set or disable using the per CPU trace_buffer_event when possible.
7565  */
7566 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7567 {
7568         int ret = 0;
7569
7570         mutex_lock(&trace_types_lock);
7571
7572         if (set && tr->no_filter_buffering_ref++)
7573                 goto out;
7574
7575         if (!set) {
7576                 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7577                         ret = -EINVAL;
7578                         goto out;
7579                 }
7580
7581                 --tr->no_filter_buffering_ref;
7582         }
7583  out:
7584         mutex_unlock(&trace_types_lock);
7585
7586         return ret;
7587 }
7588
7589 struct ftrace_buffer_info {
7590         struct trace_iterator   iter;
7591         void                    *spare;
7592         unsigned int            spare_cpu;
7593         unsigned int            spare_size;
7594         unsigned int            read;
7595 };
7596
7597 #ifdef CONFIG_TRACER_SNAPSHOT
7598 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7599 {
7600         struct trace_array *tr = inode->i_private;
7601         struct trace_iterator *iter;
7602         struct seq_file *m;
7603         int ret;
7604
7605         ret = tracing_check_open_get_tr(tr);
7606         if (ret)
7607                 return ret;
7608
7609         if (file->f_mode & FMODE_READ) {
7610                 iter = __tracing_open(inode, file, true);
7611                 if (IS_ERR(iter))
7612                         ret = PTR_ERR(iter);
7613         } else {
7614                 /* Writes still need the seq_file to hold the private data */
7615                 ret = -ENOMEM;
7616                 m = kzalloc(sizeof(*m), GFP_KERNEL);
7617                 if (!m)
7618                         goto out;
7619                 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7620                 if (!iter) {
7621                         kfree(m);
7622                         goto out;
7623                 }
7624                 ret = 0;
7625
7626                 iter->tr = tr;
7627                 iter->array_buffer = &tr->max_buffer;
7628                 iter->cpu_file = tracing_get_cpu(inode);
7629                 m->private = iter;
7630                 file->private_data = m;
7631         }
7632 out:
7633         if (ret < 0)
7634                 trace_array_put(tr);
7635
7636         return ret;
7637 }
7638
7639 static void tracing_swap_cpu_buffer(void *tr)
7640 {
7641         update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7642 }
7643
7644 static ssize_t
7645 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7646                        loff_t *ppos)
7647 {
7648         struct seq_file *m = filp->private_data;
7649         struct trace_iterator *iter = m->private;
7650         struct trace_array *tr = iter->tr;
7651         unsigned long val;
7652         int ret;
7653
7654         ret = tracing_update_buffers(tr);
7655         if (ret < 0)
7656                 return ret;
7657
7658         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7659         if (ret)
7660                 return ret;
7661
7662         mutex_lock(&trace_types_lock);
7663
7664         if (tr->current_trace->use_max_tr) {
7665                 ret = -EBUSY;
7666                 goto out;
7667         }
7668
7669         local_irq_disable();
7670         arch_spin_lock(&tr->max_lock);
7671         if (tr->cond_snapshot)
7672                 ret = -EBUSY;
7673         arch_spin_unlock(&tr->max_lock);
7674         local_irq_enable();
7675         if (ret)
7676                 goto out;
7677
7678         switch (val) {
7679         case 0:
7680                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7681                         ret = -EINVAL;
7682                         break;
7683                 }
7684                 if (tr->allocated_snapshot)
7685                         free_snapshot(tr);
7686                 break;
7687         case 1:
7688 /* Only allow per-cpu swap if the ring buffer supports it */
7689 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7690                 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7691                         ret = -EINVAL;
7692                         break;
7693                 }
7694 #endif
7695                 if (tr->allocated_snapshot)
7696                         ret = resize_buffer_duplicate_size(&tr->max_buffer,
7697                                         &tr->array_buffer, iter->cpu_file);
7698                 else
7699                         ret = tracing_alloc_snapshot_instance(tr);
7700                 if (ret < 0)
7701                         break;
7702                 /* Now, we're going to swap */
7703                 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7704                         local_irq_disable();
7705                         update_max_tr(tr, current, smp_processor_id(), NULL);
7706                         local_irq_enable();
7707                 } else {
7708                         smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7709                                                  (void *)tr, 1);
7710                 }
7711                 break;
7712         default:
7713                 if (tr->allocated_snapshot) {
7714                         if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7715                                 tracing_reset_online_cpus(&tr->max_buffer);
7716                         else
7717                                 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7718                 }
7719                 break;
7720         }
7721
7722         if (ret >= 0) {
7723                 *ppos += cnt;
7724                 ret = cnt;
7725         }
7726 out:
7727         mutex_unlock(&trace_types_lock);
7728         return ret;
7729 }
7730
7731 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7732 {
7733         struct seq_file *m = file->private_data;
7734         int ret;
7735
7736         ret = tracing_release(inode, file);
7737
7738         if (file->f_mode & FMODE_READ)
7739                 return ret;
7740
7741         /* If write only, the seq_file is just a stub */
7742         if (m)
7743                 kfree(m->private);
7744         kfree(m);
7745
7746         return 0;
7747 }
7748
7749 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7750 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7751                                     size_t count, loff_t *ppos);
7752 static int tracing_buffers_release(struct inode *inode, struct file *file);
7753 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7754                    struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7755
7756 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7757 {
7758         struct ftrace_buffer_info *info;
7759         int ret;
7760
7761         /* The following checks for tracefs lockdown */
7762         ret = tracing_buffers_open(inode, filp);
7763         if (ret < 0)
7764                 return ret;
7765
7766         info = filp->private_data;
7767
7768         if (info->iter.trace->use_max_tr) {
7769                 tracing_buffers_release(inode, filp);
7770                 return -EBUSY;
7771         }
7772
7773         info->iter.snapshot = true;
7774         info->iter.array_buffer = &info->iter.tr->max_buffer;
7775
7776         return ret;
7777 }
7778
7779 #endif /* CONFIG_TRACER_SNAPSHOT */
7780
7781
7782 static const struct file_operations tracing_thresh_fops = {
7783         .open           = tracing_open_generic,
7784         .read           = tracing_thresh_read,
7785         .write          = tracing_thresh_write,
7786         .llseek         = generic_file_llseek,
7787 };
7788
7789 #ifdef CONFIG_TRACER_MAX_TRACE
7790 static const struct file_operations tracing_max_lat_fops = {
7791         .open           = tracing_open_generic_tr,
7792         .read           = tracing_max_lat_read,
7793         .write          = tracing_max_lat_write,
7794         .llseek         = generic_file_llseek,
7795         .release        = tracing_release_generic_tr,
7796 };
7797 #endif
7798
7799 static const struct file_operations set_tracer_fops = {
7800         .open           = tracing_open_generic_tr,
7801         .read           = tracing_set_trace_read,
7802         .write          = tracing_set_trace_write,
7803         .llseek         = generic_file_llseek,
7804         .release        = tracing_release_generic_tr,
7805 };
7806
7807 static const struct file_operations tracing_pipe_fops = {
7808         .open           = tracing_open_pipe,
7809         .poll           = tracing_poll_pipe,
7810         .read           = tracing_read_pipe,
7811         .splice_read    = tracing_splice_read_pipe,
7812         .release        = tracing_release_pipe,
7813         .llseek         = no_llseek,
7814 };
7815
7816 static const struct file_operations tracing_entries_fops = {
7817         .open           = tracing_open_generic_tr,
7818         .read           = tracing_entries_read,
7819         .write          = tracing_entries_write,
7820         .llseek         = generic_file_llseek,
7821         .release        = tracing_release_generic_tr,
7822 };
7823
7824 static const struct file_operations tracing_total_entries_fops = {
7825         .open           = tracing_open_generic_tr,
7826         .read           = tracing_total_entries_read,
7827         .llseek         = generic_file_llseek,
7828         .release        = tracing_release_generic_tr,
7829 };
7830
7831 static const struct file_operations tracing_free_buffer_fops = {
7832         .open           = tracing_open_generic_tr,
7833         .write          = tracing_free_buffer_write,
7834         .release        = tracing_free_buffer_release,
7835 };
7836
7837 static const struct file_operations tracing_mark_fops = {
7838         .open           = tracing_mark_open,
7839         .write          = tracing_mark_write,
7840         .release        = tracing_release_generic_tr,
7841 };
7842
7843 static const struct file_operations tracing_mark_raw_fops = {
7844         .open           = tracing_mark_open,
7845         .write          = tracing_mark_raw_write,
7846         .release        = tracing_release_generic_tr,
7847 };
7848
7849 static const struct file_operations trace_clock_fops = {
7850         .open           = tracing_clock_open,
7851         .read           = seq_read,
7852         .llseek         = seq_lseek,
7853         .release        = tracing_single_release_tr,
7854         .write          = tracing_clock_write,
7855 };
7856
7857 static const struct file_operations trace_time_stamp_mode_fops = {
7858         .open           = tracing_time_stamp_mode_open,
7859         .read           = seq_read,
7860         .llseek         = seq_lseek,
7861         .release        = tracing_single_release_tr,
7862 };
7863
7864 #ifdef CONFIG_TRACER_SNAPSHOT
7865 static const struct file_operations snapshot_fops = {
7866         .open           = tracing_snapshot_open,
7867         .read           = seq_read,
7868         .write          = tracing_snapshot_write,
7869         .llseek         = tracing_lseek,
7870         .release        = tracing_snapshot_release,
7871 };
7872
7873 static const struct file_operations snapshot_raw_fops = {
7874         .open           = snapshot_raw_open,
7875         .read           = tracing_buffers_read,
7876         .release        = tracing_buffers_release,
7877         .splice_read    = tracing_buffers_splice_read,
7878         .llseek         = no_llseek,
7879 };
7880
7881 #endif /* CONFIG_TRACER_SNAPSHOT */
7882
7883 /*
7884  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7885  * @filp: The active open file structure
7886  * @ubuf: The userspace provided buffer to read value into
7887  * @cnt: The maximum number of bytes to read
7888  * @ppos: The current "file" position
7889  *
7890  * This function implements the write interface for a struct trace_min_max_param.
7891  * The filp->private_data must point to a trace_min_max_param structure that
7892  * defines where to write the value, the min and the max acceptable values,
7893  * and a lock to protect the write.
7894  */
7895 static ssize_t
7896 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7897 {
7898         struct trace_min_max_param *param = filp->private_data;
7899         u64 val;
7900         int err;
7901
7902         if (!param)
7903                 return -EFAULT;
7904
7905         err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7906         if (err)
7907                 return err;
7908
7909         if (param->lock)
7910                 mutex_lock(param->lock);
7911
7912         if (param->min && val < *param->min)
7913                 err = -EINVAL;
7914
7915         if (param->max && val > *param->max)
7916                 err = -EINVAL;
7917
7918         if (!err)
7919                 *param->val = val;
7920
7921         if (param->lock)
7922                 mutex_unlock(param->lock);
7923
7924         if (err)
7925                 return err;
7926
7927         return cnt;
7928 }
7929
7930 /*
7931  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7932  * @filp: The active open file structure
7933  * @ubuf: The userspace provided buffer to read value into
7934  * @cnt: The maximum number of bytes to read
7935  * @ppos: The current "file" position
7936  *
7937  * This function implements the read interface for a struct trace_min_max_param.
7938  * The filp->private_data must point to a trace_min_max_param struct with valid
7939  * data.
7940  */
7941 static ssize_t
7942 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7943 {
7944         struct trace_min_max_param *param = filp->private_data;
7945         char buf[U64_STR_SIZE];
7946         int len;
7947         u64 val;
7948
7949         if (!param)
7950                 return -EFAULT;
7951
7952         val = *param->val;
7953
7954         if (cnt > sizeof(buf))
7955                 cnt = sizeof(buf);
7956
7957         len = snprintf(buf, sizeof(buf), "%llu\n", val);
7958
7959         return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7960 }
7961
7962 const struct file_operations trace_min_max_fops = {
7963         .open           = tracing_open_generic,
7964         .read           = trace_min_max_read,
7965         .write          = trace_min_max_write,
7966 };
7967
7968 #define TRACING_LOG_ERRS_MAX    8
7969 #define TRACING_LOG_LOC_MAX     128
7970
7971 #define CMD_PREFIX "  Command: "
7972
7973 struct err_info {
7974         const char      **errs; /* ptr to loc-specific array of err strings */
7975         u8              type;   /* index into errs -> specific err string */
7976         u16             pos;    /* caret position */
7977         u64             ts;
7978 };
7979
7980 struct tracing_log_err {
7981         struct list_head        list;
7982         struct err_info         info;
7983         char                    loc[TRACING_LOG_LOC_MAX]; /* err location */
7984         char                    *cmd;                     /* what caused err */
7985 };
7986
7987 static DEFINE_MUTEX(tracing_err_log_lock);
7988
7989 static struct tracing_log_err *alloc_tracing_log_err(int len)
7990 {
7991         struct tracing_log_err *err;
7992
7993         err = kzalloc(sizeof(*err), GFP_KERNEL);
7994         if (!err)
7995                 return ERR_PTR(-ENOMEM);
7996
7997         err->cmd = kzalloc(len, GFP_KERNEL);
7998         if (!err->cmd) {
7999                 kfree(err);
8000                 return ERR_PTR(-ENOMEM);
8001         }
8002
8003         return err;
8004 }
8005
8006 static void free_tracing_log_err(struct tracing_log_err *err)
8007 {
8008         kfree(err->cmd);
8009         kfree(err);
8010 }
8011
8012 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8013                                                    int len)
8014 {
8015         struct tracing_log_err *err;
8016         char *cmd;
8017
8018         if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8019                 err = alloc_tracing_log_err(len);
8020                 if (PTR_ERR(err) != -ENOMEM)
8021                         tr->n_err_log_entries++;
8022
8023                 return err;
8024         }
8025         cmd = kzalloc(len, GFP_KERNEL);
8026         if (!cmd)
8027                 return ERR_PTR(-ENOMEM);
8028         err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8029         kfree(err->cmd);
8030         err->cmd = cmd;
8031         list_del(&err->list);
8032
8033         return err;
8034 }
8035
8036 /**
8037  * err_pos - find the position of a string within a command for error careting
8038  * @cmd: The tracing command that caused the error
8039  * @str: The string to position the caret at within @cmd
8040  *
8041  * Finds the position of the first occurrence of @str within @cmd.  The
8042  * return value can be passed to tracing_log_err() for caret placement
8043  * within @cmd.
8044  *
8045  * Returns the index within @cmd of the first occurrence of @str or 0
8046  * if @str was not found.
8047  */
8048 unsigned int err_pos(char *cmd, const char *str)
8049 {
8050         char *found;
8051
8052         if (WARN_ON(!strlen(cmd)))
8053                 return 0;
8054
8055         found = strstr(cmd, str);
8056         if (found)
8057                 return found - cmd;
8058
8059         return 0;
8060 }
8061
8062 /**
8063  * tracing_log_err - write an error to the tracing error log
8064  * @tr: The associated trace array for the error (NULL for top level array)
8065  * @loc: A string describing where the error occurred
8066  * @cmd: The tracing command that caused the error
8067  * @errs: The array of loc-specific static error strings
8068  * @type: The index into errs[], which produces the specific static err string
8069  * @pos: The position the caret should be placed in the cmd
8070  *
8071  * Writes an error into tracing/error_log of the form:
8072  *
8073  * <loc>: error: <text>
8074  *   Command: <cmd>
8075  *              ^
8076  *
8077  * tracing/error_log is a small log file containing the last
8078  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8079  * unless there has been a tracing error, and the error log can be
8080  * cleared and have its memory freed by writing the empty string in
8081  * truncation mode to it i.e. echo > tracing/error_log.
8082  *
8083  * NOTE: the @errs array along with the @type param are used to
8084  * produce a static error string - this string is not copied and saved
8085  * when the error is logged - only a pointer to it is saved.  See
8086  * existing callers for examples of how static strings are typically
8087  * defined for use with tracing_log_err().
8088  */
8089 void tracing_log_err(struct trace_array *tr,
8090                      const char *loc, const char *cmd,
8091                      const char **errs, u8 type, u16 pos)
8092 {
8093         struct tracing_log_err *err;
8094         int len = 0;
8095
8096         if (!tr)
8097                 tr = &global_trace;
8098
8099         len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8100
8101         mutex_lock(&tracing_err_log_lock);
8102         err = get_tracing_log_err(tr, len);
8103         if (PTR_ERR(err) == -ENOMEM) {
8104                 mutex_unlock(&tracing_err_log_lock);
8105                 return;
8106         }
8107
8108         snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8109         snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8110
8111         err->info.errs = errs;
8112         err->info.type = type;
8113         err->info.pos = pos;
8114         err->info.ts = local_clock();
8115
8116         list_add_tail(&err->list, &tr->err_log);
8117         mutex_unlock(&tracing_err_log_lock);
8118 }
8119
8120 static void clear_tracing_err_log(struct trace_array *tr)
8121 {
8122         struct tracing_log_err *err, *next;
8123
8124         mutex_lock(&tracing_err_log_lock);
8125         list_for_each_entry_safe(err, next, &tr->err_log, list) {
8126                 list_del(&err->list);
8127                 free_tracing_log_err(err);
8128         }
8129
8130         tr->n_err_log_entries = 0;
8131         mutex_unlock(&tracing_err_log_lock);
8132 }
8133
8134 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8135 {
8136         struct trace_array *tr = m->private;
8137
8138         mutex_lock(&tracing_err_log_lock);
8139
8140         return seq_list_start(&tr->err_log, *pos);
8141 }
8142
8143 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8144 {
8145         struct trace_array *tr = m->private;
8146
8147         return seq_list_next(v, &tr->err_log, pos);
8148 }
8149
8150 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8151 {
8152         mutex_unlock(&tracing_err_log_lock);
8153 }
8154
8155 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8156 {
8157         u16 i;
8158
8159         for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8160                 seq_putc(m, ' ');
8161         for (i = 0; i < pos; i++)
8162                 seq_putc(m, ' ');
8163         seq_puts(m, "^\n");
8164 }
8165
8166 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8167 {
8168         struct tracing_log_err *err = v;
8169
8170         if (err) {
8171                 const char *err_text = err->info.errs[err->info.type];
8172                 u64 sec = err->info.ts;
8173                 u32 nsec;
8174
8175                 nsec = do_div(sec, NSEC_PER_SEC);
8176                 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8177                            err->loc, err_text);
8178                 seq_printf(m, "%s", err->cmd);
8179                 tracing_err_log_show_pos(m, err->info.pos);
8180         }
8181
8182         return 0;
8183 }
8184
8185 static const struct seq_operations tracing_err_log_seq_ops = {
8186         .start  = tracing_err_log_seq_start,
8187         .next   = tracing_err_log_seq_next,
8188         .stop   = tracing_err_log_seq_stop,
8189         .show   = tracing_err_log_seq_show
8190 };
8191
8192 static int tracing_err_log_open(struct inode *inode, struct file *file)
8193 {
8194         struct trace_array *tr = inode->i_private;
8195         int ret = 0;
8196
8197         ret = tracing_check_open_get_tr(tr);
8198         if (ret)
8199                 return ret;
8200
8201         /* If this file was opened for write, then erase contents */
8202         if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8203                 clear_tracing_err_log(tr);
8204
8205         if (file->f_mode & FMODE_READ) {
8206                 ret = seq_open(file, &tracing_err_log_seq_ops);
8207                 if (!ret) {
8208                         struct seq_file *m = file->private_data;
8209                         m->private = tr;
8210                 } else {
8211                         trace_array_put(tr);
8212                 }
8213         }
8214         return ret;
8215 }
8216
8217 static ssize_t tracing_err_log_write(struct file *file,
8218                                      const char __user *buffer,
8219                                      size_t count, loff_t *ppos)
8220 {
8221         return count;
8222 }
8223
8224 static int tracing_err_log_release(struct inode *inode, struct file *file)
8225 {
8226         struct trace_array *tr = inode->i_private;
8227
8228         trace_array_put(tr);
8229
8230         if (file->f_mode & FMODE_READ)
8231                 seq_release(inode, file);
8232
8233         return 0;
8234 }
8235
8236 static const struct file_operations tracing_err_log_fops = {
8237         .open           = tracing_err_log_open,
8238         .write          = tracing_err_log_write,
8239         .read           = seq_read,
8240         .llseek         = tracing_lseek,
8241         .release        = tracing_err_log_release,
8242 };
8243
8244 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8245 {
8246         struct trace_array *tr = inode->i_private;
8247         struct ftrace_buffer_info *info;
8248         int ret;
8249
8250         ret = tracing_check_open_get_tr(tr);
8251         if (ret)
8252                 return ret;
8253
8254         info = kvzalloc(sizeof(*info), GFP_KERNEL);
8255         if (!info) {
8256                 trace_array_put(tr);
8257                 return -ENOMEM;
8258         }
8259
8260         mutex_lock(&trace_types_lock);
8261
8262         info->iter.tr           = tr;
8263         info->iter.cpu_file     = tracing_get_cpu(inode);
8264         info->iter.trace        = tr->current_trace;
8265         info->iter.array_buffer = &tr->array_buffer;
8266         info->spare             = NULL;
8267         /* Force reading ring buffer for first read */
8268         info->read              = (unsigned int)-1;
8269
8270         filp->private_data = info;
8271
8272         tr->trace_ref++;
8273
8274         mutex_unlock(&trace_types_lock);
8275
8276         ret = nonseekable_open(inode, filp);
8277         if (ret < 0)
8278                 trace_array_put(tr);
8279
8280         return ret;
8281 }
8282
8283 static __poll_t
8284 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8285 {
8286         struct ftrace_buffer_info *info = filp->private_data;
8287         struct trace_iterator *iter = &info->iter;
8288
8289         return trace_poll(iter, filp, poll_table);
8290 }
8291
8292 static ssize_t
8293 tracing_buffers_read(struct file *filp, char __user *ubuf,
8294                      size_t count, loff_t *ppos)
8295 {
8296         struct ftrace_buffer_info *info = filp->private_data;
8297         struct trace_iterator *iter = &info->iter;
8298         void *trace_data;
8299         int page_size;
8300         ssize_t ret = 0;
8301         ssize_t size;
8302
8303         if (!count)
8304                 return 0;
8305
8306 #ifdef CONFIG_TRACER_MAX_TRACE
8307         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8308                 return -EBUSY;
8309 #endif
8310
8311         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8312
8313         /* Make sure the spare matches the current sub buffer size */
8314         if (info->spare) {
8315                 if (page_size != info->spare_size) {
8316                         ring_buffer_free_read_page(iter->array_buffer->buffer,
8317                                                    info->spare_cpu, info->spare);
8318                         info->spare = NULL;
8319                 }
8320         }
8321
8322         if (!info->spare) {
8323                 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8324                                                           iter->cpu_file);
8325                 if (IS_ERR(info->spare)) {
8326                         ret = PTR_ERR(info->spare);
8327                         info->spare = NULL;
8328                 } else {
8329                         info->spare_cpu = iter->cpu_file;
8330                         info->spare_size = page_size;
8331                 }
8332         }
8333         if (!info->spare)
8334                 return ret;
8335
8336         /* Do we have previous read data to read? */
8337         if (info->read < page_size)
8338                 goto read;
8339
8340  again:
8341         trace_access_lock(iter->cpu_file);
8342         ret = ring_buffer_read_page(iter->array_buffer->buffer,
8343                                     info->spare,
8344                                     count,
8345                                     iter->cpu_file, 0);
8346         trace_access_unlock(iter->cpu_file);
8347
8348         if (ret < 0) {
8349                 if (trace_empty(iter)) {
8350                         if ((filp->f_flags & O_NONBLOCK))
8351                                 return -EAGAIN;
8352
8353                         ret = wait_on_pipe(iter, 0);
8354                         if (ret)
8355                                 return ret;
8356
8357                         goto again;
8358                 }
8359                 return 0;
8360         }
8361
8362         info->read = 0;
8363  read:
8364         size = page_size - info->read;
8365         if (size > count)
8366                 size = count;
8367         trace_data = ring_buffer_read_page_data(info->spare);
8368         ret = copy_to_user(ubuf, trace_data + info->read, size);
8369         if (ret == size)
8370                 return -EFAULT;
8371
8372         size -= ret;
8373
8374         *ppos += size;
8375         info->read += size;
8376
8377         return size;
8378 }
8379
8380 static int tracing_buffers_release(struct inode *inode, struct file *file)
8381 {
8382         struct ftrace_buffer_info *info = file->private_data;
8383         struct trace_iterator *iter = &info->iter;
8384
8385         mutex_lock(&trace_types_lock);
8386
8387         iter->tr->trace_ref--;
8388
8389         __trace_array_put(iter->tr);
8390
8391         iter->wait_index++;
8392         /* Make sure the waiters see the new wait_index */
8393         smp_wmb();
8394
8395         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8396
8397         if (info->spare)
8398                 ring_buffer_free_read_page(iter->array_buffer->buffer,
8399                                            info->spare_cpu, info->spare);
8400         kvfree(info);
8401
8402         mutex_unlock(&trace_types_lock);
8403
8404         return 0;
8405 }
8406
8407 struct buffer_ref {
8408         struct trace_buffer     *buffer;
8409         void                    *page;
8410         int                     cpu;
8411         refcount_t              refcount;
8412 };
8413
8414 static void buffer_ref_release(struct buffer_ref *ref)
8415 {
8416         if (!refcount_dec_and_test(&ref->refcount))
8417                 return;
8418         ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8419         kfree(ref);
8420 }
8421
8422 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8423                                     struct pipe_buffer *buf)
8424 {
8425         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8426
8427         buffer_ref_release(ref);
8428         buf->private = 0;
8429 }
8430
8431 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8432                                 struct pipe_buffer *buf)
8433 {
8434         struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8435
8436         if (refcount_read(&ref->refcount) > INT_MAX/2)
8437                 return false;
8438
8439         refcount_inc(&ref->refcount);
8440         return true;
8441 }
8442
8443 /* Pipe buffer operations for a buffer. */
8444 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8445         .release                = buffer_pipe_buf_release,
8446         .get                    = buffer_pipe_buf_get,
8447 };
8448
8449 /*
8450  * Callback from splice_to_pipe(), if we need to release some pages
8451  * at the end of the spd in case we error'ed out in filling the pipe.
8452  */
8453 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8454 {
8455         struct buffer_ref *ref =
8456                 (struct buffer_ref *)spd->partial[i].private;
8457
8458         buffer_ref_release(ref);
8459         spd->partial[i].private = 0;
8460 }
8461
8462 static ssize_t
8463 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8464                             struct pipe_inode_info *pipe, size_t len,
8465                             unsigned int flags)
8466 {
8467         struct ftrace_buffer_info *info = file->private_data;
8468         struct trace_iterator *iter = &info->iter;
8469         struct partial_page partial_def[PIPE_DEF_BUFFERS];
8470         struct page *pages_def[PIPE_DEF_BUFFERS];
8471         struct splice_pipe_desc spd = {
8472                 .pages          = pages_def,
8473                 .partial        = partial_def,
8474                 .nr_pages_max   = PIPE_DEF_BUFFERS,
8475                 .ops            = &buffer_pipe_buf_ops,
8476                 .spd_release    = buffer_spd_release,
8477         };
8478         struct buffer_ref *ref;
8479         int page_size;
8480         int entries, i;
8481         ssize_t ret = 0;
8482
8483 #ifdef CONFIG_TRACER_MAX_TRACE
8484         if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8485                 return -EBUSY;
8486 #endif
8487
8488         page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8489         if (*ppos & (page_size - 1))
8490                 return -EINVAL;
8491
8492         if (len & (page_size - 1)) {
8493                 if (len < page_size)
8494                         return -EINVAL;
8495                 len &= (~(page_size - 1));
8496         }
8497
8498         if (splice_grow_spd(pipe, &spd))
8499                 return -ENOMEM;
8500
8501  again:
8502         trace_access_lock(iter->cpu_file);
8503         entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8504
8505         for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8506                 struct page *page;
8507                 int r;
8508
8509                 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8510                 if (!ref) {
8511                         ret = -ENOMEM;
8512                         break;
8513                 }
8514
8515                 refcount_set(&ref->refcount, 1);
8516                 ref->buffer = iter->array_buffer->buffer;
8517                 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8518                 if (IS_ERR(ref->page)) {
8519                         ret = PTR_ERR(ref->page);
8520                         ref->page = NULL;
8521                         kfree(ref);
8522                         break;
8523                 }
8524                 ref->cpu = iter->cpu_file;
8525
8526                 r = ring_buffer_read_page(ref->buffer, ref->page,
8527                                           len, iter->cpu_file, 1);
8528                 if (r < 0) {
8529                         ring_buffer_free_read_page(ref->buffer, ref->cpu,
8530                                                    ref->page);
8531                         kfree(ref);
8532                         break;
8533                 }
8534
8535                 page = virt_to_page(ring_buffer_read_page_data(ref->page));
8536
8537                 spd.pages[i] = page;
8538                 spd.partial[i].len = page_size;
8539                 spd.partial[i].offset = 0;
8540                 spd.partial[i].private = (unsigned long)ref;
8541                 spd.nr_pages++;
8542                 *ppos += page_size;
8543
8544                 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8545         }
8546
8547         trace_access_unlock(iter->cpu_file);
8548         spd.nr_pages = i;
8549
8550         /* did we read anything? */
8551         if (!spd.nr_pages) {
8552                 long wait_index;
8553
8554                 if (ret)
8555                         goto out;
8556
8557                 ret = -EAGAIN;
8558                 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8559                         goto out;
8560
8561                 wait_index = READ_ONCE(iter->wait_index);
8562
8563                 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8564                 if (ret)
8565                         goto out;
8566
8567                 /* No need to wait after waking up when tracing is off */
8568                 if (!tracer_tracing_is_on(iter->tr))
8569                         goto out;
8570
8571                 /* Make sure we see the new wait_index */
8572                 smp_rmb();
8573                 if (wait_index != iter->wait_index)
8574                         goto out;
8575
8576                 goto again;
8577         }
8578
8579         ret = splice_to_pipe(pipe, &spd);
8580 out:
8581         splice_shrink_spd(&spd);
8582
8583         return ret;
8584 }
8585
8586 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8587 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8588 {
8589         struct ftrace_buffer_info *info = file->private_data;
8590         struct trace_iterator *iter = &info->iter;
8591
8592         if (cmd)
8593                 return -ENOIOCTLCMD;
8594
8595         mutex_lock(&trace_types_lock);
8596
8597         iter->wait_index++;
8598         /* Make sure the waiters see the new wait_index */
8599         smp_wmb();
8600
8601         ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8602
8603         mutex_unlock(&trace_types_lock);
8604         return 0;
8605 }
8606
8607 static const struct file_operations tracing_buffers_fops = {
8608         .open           = tracing_buffers_open,
8609         .read           = tracing_buffers_read,
8610         .poll           = tracing_buffers_poll,
8611         .release        = tracing_buffers_release,
8612         .splice_read    = tracing_buffers_splice_read,
8613         .unlocked_ioctl = tracing_buffers_ioctl,
8614         .llseek         = no_llseek,
8615 };
8616
8617 static ssize_t
8618 tracing_stats_read(struct file *filp, char __user *ubuf,
8619                    size_t count, loff_t *ppos)
8620 {
8621         struct inode *inode = file_inode(filp);
8622         struct trace_array *tr = inode->i_private;
8623         struct array_buffer *trace_buf = &tr->array_buffer;
8624         int cpu = tracing_get_cpu(inode);
8625         struct trace_seq *s;
8626         unsigned long cnt;
8627         unsigned long long t;
8628         unsigned long usec_rem;
8629
8630         s = kmalloc(sizeof(*s), GFP_KERNEL);
8631         if (!s)
8632                 return -ENOMEM;
8633
8634         trace_seq_init(s);
8635
8636         cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8637         trace_seq_printf(s, "entries: %ld\n", cnt);
8638
8639         cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8640         trace_seq_printf(s, "overrun: %ld\n", cnt);
8641
8642         cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8643         trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8644
8645         cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8646         trace_seq_printf(s, "bytes: %ld\n", cnt);
8647
8648         if (trace_clocks[tr->clock_id].in_ns) {
8649                 /* local or global for trace_clock */
8650                 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8651                 usec_rem = do_div(t, USEC_PER_SEC);
8652                 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8653                                                                 t, usec_rem);
8654
8655                 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8656                 usec_rem = do_div(t, USEC_PER_SEC);
8657                 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8658         } else {
8659                 /* counter or tsc mode for trace_clock */
8660                 trace_seq_printf(s, "oldest event ts: %llu\n",
8661                                 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8662
8663                 trace_seq_printf(s, "now ts: %llu\n",
8664                                 ring_buffer_time_stamp(trace_buf->buffer));
8665         }
8666
8667         cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8668         trace_seq_printf(s, "dropped events: %ld\n", cnt);
8669
8670         cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8671         trace_seq_printf(s, "read events: %ld\n", cnt);
8672
8673         count = simple_read_from_buffer(ubuf, count, ppos,
8674                                         s->buffer, trace_seq_used(s));
8675
8676         kfree(s);
8677
8678         return count;
8679 }
8680
8681 static const struct file_operations tracing_stats_fops = {
8682         .open           = tracing_open_generic_tr,
8683         .read           = tracing_stats_read,
8684         .llseek         = generic_file_llseek,
8685         .release        = tracing_release_generic_tr,
8686 };
8687
8688 #ifdef CONFIG_DYNAMIC_FTRACE
8689
8690 static ssize_t
8691 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8692                   size_t cnt, loff_t *ppos)
8693 {
8694         ssize_t ret;
8695         char *buf;
8696         int r;
8697
8698         /* 256 should be plenty to hold the amount needed */
8699         buf = kmalloc(256, GFP_KERNEL);
8700         if (!buf)
8701                 return -ENOMEM;
8702
8703         r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8704                       ftrace_update_tot_cnt,
8705                       ftrace_number_of_pages,
8706                       ftrace_number_of_groups);
8707
8708         ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8709         kfree(buf);
8710         return ret;
8711 }
8712
8713 static const struct file_operations tracing_dyn_info_fops = {
8714         .open           = tracing_open_generic,
8715         .read           = tracing_read_dyn_info,
8716         .llseek         = generic_file_llseek,
8717 };
8718 #endif /* CONFIG_DYNAMIC_FTRACE */
8719
8720 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8721 static void
8722 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8723                 struct trace_array *tr, struct ftrace_probe_ops *ops,
8724                 void *data)
8725 {
8726         tracing_snapshot_instance(tr);
8727 }
8728
8729 static void
8730 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8731                       struct trace_array *tr, struct ftrace_probe_ops *ops,
8732                       void *data)
8733 {
8734         struct ftrace_func_mapper *mapper = data;
8735         long *count = NULL;
8736
8737         if (mapper)
8738                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8739
8740         if (count) {
8741
8742                 if (*count <= 0)
8743                         return;
8744
8745                 (*count)--;
8746         }
8747
8748         tracing_snapshot_instance(tr);
8749 }
8750
8751 static int
8752 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8753                       struct ftrace_probe_ops *ops, void *data)
8754 {
8755         struct ftrace_func_mapper *mapper = data;
8756         long *count = NULL;
8757
8758         seq_printf(m, "%ps:", (void *)ip);
8759
8760         seq_puts(m, "snapshot");
8761
8762         if (mapper)
8763                 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8764
8765         if (count)
8766                 seq_printf(m, ":count=%ld\n", *count);
8767         else
8768                 seq_puts(m, ":unlimited\n");
8769
8770         return 0;
8771 }
8772
8773 static int
8774 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8775                      unsigned long ip, void *init_data, void **data)
8776 {
8777         struct ftrace_func_mapper *mapper = *data;
8778
8779         if (!mapper) {
8780                 mapper = allocate_ftrace_func_mapper();
8781                 if (!mapper)
8782                         return -ENOMEM;
8783                 *data = mapper;
8784         }
8785
8786         return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8787 }
8788
8789 static void
8790 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8791                      unsigned long ip, void *data)
8792 {
8793         struct ftrace_func_mapper *mapper = data;
8794
8795         if (!ip) {
8796                 if (!mapper)
8797                         return;
8798                 free_ftrace_func_mapper(mapper, NULL);
8799                 return;
8800         }
8801
8802         ftrace_func_mapper_remove_ip(mapper, ip);
8803 }
8804
8805 static struct ftrace_probe_ops snapshot_probe_ops = {
8806         .func                   = ftrace_snapshot,
8807         .print                  = ftrace_snapshot_print,
8808 };
8809
8810 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8811         .func                   = ftrace_count_snapshot,
8812         .print                  = ftrace_snapshot_print,
8813         .init                   = ftrace_snapshot_init,
8814         .free                   = ftrace_snapshot_free,
8815 };
8816
8817 static int
8818 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8819                                char *glob, char *cmd, char *param, int enable)
8820 {
8821         struct ftrace_probe_ops *ops;
8822         void *count = (void *)-1;
8823         char *number;
8824         int ret;
8825
8826         if (!tr)
8827                 return -ENODEV;
8828
8829         /* hash funcs only work with set_ftrace_filter */
8830         if (!enable)
8831                 return -EINVAL;
8832
8833         ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8834
8835         if (glob[0] == '!')
8836                 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8837
8838         if (!param)
8839                 goto out_reg;
8840
8841         number = strsep(&param, ":");
8842
8843         if (!strlen(number))
8844                 goto out_reg;
8845
8846         /*
8847          * We use the callback data field (which is a pointer)
8848          * as our counter.
8849          */
8850         ret = kstrtoul(number, 0, (unsigned long *)&count);
8851         if (ret)
8852                 return ret;
8853
8854  out_reg:
8855         ret = tracing_alloc_snapshot_instance(tr);
8856         if (ret < 0)
8857                 goto out;
8858
8859         ret = register_ftrace_function_probe(glob, tr, ops, count);
8860
8861  out:
8862         return ret < 0 ? ret : 0;
8863 }
8864
8865 static struct ftrace_func_command ftrace_snapshot_cmd = {
8866         .name                   = "snapshot",
8867         .func                   = ftrace_trace_snapshot_callback,
8868 };
8869
8870 static __init int register_snapshot_cmd(void)
8871 {
8872         return register_ftrace_command(&ftrace_snapshot_cmd);
8873 }
8874 #else
8875 static inline __init int register_snapshot_cmd(void) { return 0; }
8876 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8877
8878 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8879 {
8880         if (WARN_ON(!tr->dir))
8881                 return ERR_PTR(-ENODEV);
8882
8883         /* Top directory uses NULL as the parent */
8884         if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8885                 return NULL;
8886
8887         /* All sub buffers have a descriptor */
8888         return tr->dir;
8889 }
8890
8891 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8892 {
8893         struct dentry *d_tracer;
8894
8895         if (tr->percpu_dir)
8896                 return tr->percpu_dir;
8897
8898         d_tracer = tracing_get_dentry(tr);
8899         if (IS_ERR(d_tracer))
8900                 return NULL;
8901
8902         tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8903
8904         MEM_FAIL(!tr->percpu_dir,
8905                   "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8906
8907         return tr->percpu_dir;
8908 }
8909
8910 static struct dentry *
8911 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8912                       void *data, long cpu, const struct file_operations *fops)
8913 {
8914         struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8915
8916         if (ret) /* See tracing_get_cpu() */
8917                 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8918         return ret;
8919 }
8920
8921 static void
8922 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8923 {
8924         struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8925         struct dentry *d_cpu;
8926         char cpu_dir[30]; /* 30 characters should be more than enough */
8927
8928         if (!d_percpu)
8929                 return;
8930
8931         snprintf(cpu_dir, 30, "cpu%ld", cpu);
8932         d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8933         if (!d_cpu) {
8934                 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8935                 return;
8936         }
8937
8938         /* per cpu trace_pipe */
8939         trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8940                                 tr, cpu, &tracing_pipe_fops);
8941
8942         /* per cpu trace */
8943         trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8944                                 tr, cpu, &tracing_fops);
8945
8946         trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8947                                 tr, cpu, &tracing_buffers_fops);
8948
8949         trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8950                                 tr, cpu, &tracing_stats_fops);
8951
8952         trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8953                                 tr, cpu, &tracing_entries_fops);
8954
8955 #ifdef CONFIG_TRACER_SNAPSHOT
8956         trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8957                                 tr, cpu, &snapshot_fops);
8958
8959         trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8960                                 tr, cpu, &snapshot_raw_fops);
8961 #endif
8962 }
8963
8964 #ifdef CONFIG_FTRACE_SELFTEST
8965 /* Let selftest have access to static functions in this file */
8966 #include "trace_selftest.c"
8967 #endif
8968
8969 static ssize_t
8970 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8971                         loff_t *ppos)
8972 {
8973         struct trace_option_dentry *topt = filp->private_data;
8974         char *buf;
8975
8976         if (topt->flags->val & topt->opt->bit)
8977                 buf = "1\n";
8978         else
8979                 buf = "0\n";
8980
8981         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8982 }
8983
8984 static ssize_t
8985 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8986                          loff_t *ppos)
8987 {
8988         struct trace_option_dentry *topt = filp->private_data;
8989         unsigned long val;
8990         int ret;
8991
8992         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8993         if (ret)
8994                 return ret;
8995
8996         if (val != 0 && val != 1)
8997                 return -EINVAL;
8998
8999         if (!!(topt->flags->val & topt->opt->bit) != val) {
9000                 mutex_lock(&trace_types_lock);
9001                 ret = __set_tracer_option(topt->tr, topt->flags,
9002                                           topt->opt, !val);
9003                 mutex_unlock(&trace_types_lock);
9004                 if (ret)
9005                         return ret;
9006         }
9007
9008         *ppos += cnt;
9009
9010         return cnt;
9011 }
9012
9013 static int tracing_open_options(struct inode *inode, struct file *filp)
9014 {
9015         struct trace_option_dentry *topt = inode->i_private;
9016         int ret;
9017
9018         ret = tracing_check_open_get_tr(topt->tr);
9019         if (ret)
9020                 return ret;
9021
9022         filp->private_data = inode->i_private;
9023         return 0;
9024 }
9025
9026 static int tracing_release_options(struct inode *inode, struct file *file)
9027 {
9028         struct trace_option_dentry *topt = file->private_data;
9029
9030         trace_array_put(topt->tr);
9031         return 0;
9032 }
9033
9034 static const struct file_operations trace_options_fops = {
9035         .open = tracing_open_options,
9036         .read = trace_options_read,
9037         .write = trace_options_write,
9038         .llseek = generic_file_llseek,
9039         .release = tracing_release_options,
9040 };
9041
9042 /*
9043  * In order to pass in both the trace_array descriptor as well as the index
9044  * to the flag that the trace option file represents, the trace_array
9045  * has a character array of trace_flags_index[], which holds the index
9046  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9047  * The address of this character array is passed to the flag option file
9048  * read/write callbacks.
9049  *
9050  * In order to extract both the index and the trace_array descriptor,
9051  * get_tr_index() uses the following algorithm.
9052  *
9053  *   idx = *ptr;
9054  *
9055  * As the pointer itself contains the address of the index (remember
9056  * index[1] == 1).
9057  *
9058  * Then to get the trace_array descriptor, by subtracting that index
9059  * from the ptr, we get to the start of the index itself.
9060  *
9061  *   ptr - idx == &index[0]
9062  *
9063  * Then a simple container_of() from that pointer gets us to the
9064  * trace_array descriptor.
9065  */
9066 static void get_tr_index(void *data, struct trace_array **ptr,
9067                          unsigned int *pindex)
9068 {
9069         *pindex = *(unsigned char *)data;
9070
9071         *ptr = container_of(data - *pindex, struct trace_array,
9072                             trace_flags_index);
9073 }
9074
9075 static ssize_t
9076 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9077                         loff_t *ppos)
9078 {
9079         void *tr_index = filp->private_data;
9080         struct trace_array *tr;
9081         unsigned int index;
9082         char *buf;
9083
9084         get_tr_index(tr_index, &tr, &index);
9085
9086         if (tr->trace_flags & (1 << index))
9087                 buf = "1\n";
9088         else
9089                 buf = "0\n";
9090
9091         return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9092 }
9093
9094 static ssize_t
9095 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9096                          loff_t *ppos)
9097 {
9098         void *tr_index = filp->private_data;
9099         struct trace_array *tr;
9100         unsigned int index;
9101         unsigned long val;
9102         int ret;
9103
9104         get_tr_index(tr_index, &tr, &index);
9105
9106         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9107         if (ret)
9108                 return ret;
9109
9110         if (val != 0 && val != 1)
9111                 return -EINVAL;
9112
9113         mutex_lock(&event_mutex);
9114         mutex_lock(&trace_types_lock);
9115         ret = set_tracer_flag(tr, 1 << index, val);
9116         mutex_unlock(&trace_types_lock);
9117         mutex_unlock(&event_mutex);
9118
9119         if (ret < 0)
9120                 return ret;
9121
9122         *ppos += cnt;
9123
9124         return cnt;
9125 }
9126
9127 static const struct file_operations trace_options_core_fops = {
9128         .open = tracing_open_generic,
9129         .read = trace_options_core_read,
9130         .write = trace_options_core_write,
9131         .llseek = generic_file_llseek,
9132 };
9133
9134 struct dentry *trace_create_file(const char *name,
9135                                  umode_t mode,
9136                                  struct dentry *parent,
9137                                  void *data,
9138                                  const struct file_operations *fops)
9139 {
9140         struct dentry *ret;
9141
9142         ret = tracefs_create_file(name, mode, parent, data, fops);
9143         if (!ret)
9144                 pr_warn("Could not create tracefs '%s' entry\n", name);
9145
9146         return ret;
9147 }
9148
9149
9150 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9151 {
9152         struct dentry *d_tracer;
9153
9154         if (tr->options)
9155                 return tr->options;
9156
9157         d_tracer = tracing_get_dentry(tr);
9158         if (IS_ERR(d_tracer))
9159                 return NULL;
9160
9161         tr->options = tracefs_create_dir("options", d_tracer);
9162         if (!tr->options) {
9163                 pr_warn("Could not create tracefs directory 'options'\n");
9164                 return NULL;
9165         }
9166
9167         return tr->options;
9168 }
9169
9170 static void
9171 create_trace_option_file(struct trace_array *tr,
9172                          struct trace_option_dentry *topt,
9173                          struct tracer_flags *flags,
9174                          struct tracer_opt *opt)
9175 {
9176         struct dentry *t_options;
9177
9178         t_options = trace_options_init_dentry(tr);
9179         if (!t_options)
9180                 return;
9181
9182         topt->flags = flags;
9183         topt->opt = opt;
9184         topt->tr = tr;
9185
9186         topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9187                                         t_options, topt, &trace_options_fops);
9188
9189 }
9190
9191 static void
9192 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9193 {
9194         struct trace_option_dentry *topts;
9195         struct trace_options *tr_topts;
9196         struct tracer_flags *flags;
9197         struct tracer_opt *opts;
9198         int cnt;
9199         int i;
9200
9201         if (!tracer)
9202                 return;
9203
9204         flags = tracer->flags;
9205
9206         if (!flags || !flags->opts)
9207                 return;
9208
9209         /*
9210          * If this is an instance, only create flags for tracers
9211          * the instance may have.
9212          */
9213         if (!trace_ok_for_array(tracer, tr))
9214                 return;
9215
9216         for (i = 0; i < tr->nr_topts; i++) {
9217                 /* Make sure there's no duplicate flags. */
9218                 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9219                         return;
9220         }
9221
9222         opts = flags->opts;
9223
9224         for (cnt = 0; opts[cnt].name; cnt++)
9225                 ;
9226
9227         topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9228         if (!topts)
9229                 return;
9230
9231         tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9232                             GFP_KERNEL);
9233         if (!tr_topts) {
9234                 kfree(topts);
9235                 return;
9236         }
9237
9238         tr->topts = tr_topts;
9239         tr->topts[tr->nr_topts].tracer = tracer;
9240         tr->topts[tr->nr_topts].topts = topts;
9241         tr->nr_topts++;
9242
9243         for (cnt = 0; opts[cnt].name; cnt++) {
9244                 create_trace_option_file(tr, &topts[cnt], flags,
9245                                          &opts[cnt]);
9246                 MEM_FAIL(topts[cnt].entry == NULL,
9247                           "Failed to create trace option: %s",
9248                           opts[cnt].name);
9249         }
9250 }
9251
9252 static struct dentry *
9253 create_trace_option_core_file(struct trace_array *tr,
9254                               const char *option, long index)
9255 {
9256         struct dentry *t_options;
9257
9258         t_options = trace_options_init_dentry(tr);
9259         if (!t_options)
9260                 return NULL;
9261
9262         return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9263                                  (void *)&tr->trace_flags_index[index],
9264                                  &trace_options_core_fops);
9265 }
9266
9267 static void create_trace_options_dir(struct trace_array *tr)
9268 {
9269         struct dentry *t_options;
9270         bool top_level = tr == &global_trace;
9271         int i;
9272
9273         t_options = trace_options_init_dentry(tr);
9274         if (!t_options)
9275                 return;
9276
9277         for (i = 0; trace_options[i]; i++) {
9278                 if (top_level ||
9279                     !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9280                         create_trace_option_core_file(tr, trace_options[i], i);
9281         }
9282 }
9283
9284 static ssize_t
9285 rb_simple_read(struct file *filp, char __user *ubuf,
9286                size_t cnt, loff_t *ppos)
9287 {
9288         struct trace_array *tr = filp->private_data;
9289         char buf[64];
9290         int r;
9291
9292         r = tracer_tracing_is_on(tr);
9293         r = sprintf(buf, "%d\n", r);
9294
9295         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9296 }
9297
9298 static ssize_t
9299 rb_simple_write(struct file *filp, const char __user *ubuf,
9300                 size_t cnt, loff_t *ppos)
9301 {
9302         struct trace_array *tr = filp->private_data;
9303         struct trace_buffer *buffer = tr->array_buffer.buffer;
9304         unsigned long val;
9305         int ret;
9306
9307         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9308         if (ret)
9309                 return ret;
9310
9311         if (buffer) {
9312                 mutex_lock(&trace_types_lock);
9313                 if (!!val == tracer_tracing_is_on(tr)) {
9314                         val = 0; /* do nothing */
9315                 } else if (val) {
9316                         tracer_tracing_on(tr);
9317                         if (tr->current_trace->start)
9318                                 tr->current_trace->start(tr);
9319                 } else {
9320                         tracer_tracing_off(tr);
9321                         if (tr->current_trace->stop)
9322                                 tr->current_trace->stop(tr);
9323                         /* Wake up any waiters */
9324                         ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9325                 }
9326                 mutex_unlock(&trace_types_lock);
9327         }
9328
9329         (*ppos)++;
9330
9331         return cnt;
9332 }
9333
9334 static const struct file_operations rb_simple_fops = {
9335         .open           = tracing_open_generic_tr,
9336         .read           = rb_simple_read,
9337         .write          = rb_simple_write,
9338         .release        = tracing_release_generic_tr,
9339         .llseek         = default_llseek,
9340 };
9341
9342 static ssize_t
9343 buffer_percent_read(struct file *filp, char __user *ubuf,
9344                     size_t cnt, loff_t *ppos)
9345 {
9346         struct trace_array *tr = filp->private_data;
9347         char buf[64];
9348         int r;
9349
9350         r = tr->buffer_percent;
9351         r = sprintf(buf, "%d\n", r);
9352
9353         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9354 }
9355
9356 static ssize_t
9357 buffer_percent_write(struct file *filp, const char __user *ubuf,
9358                      size_t cnt, loff_t *ppos)
9359 {
9360         struct trace_array *tr = filp->private_data;
9361         unsigned long val;
9362         int ret;
9363
9364         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9365         if (ret)
9366                 return ret;
9367
9368         if (val > 100)
9369                 return -EINVAL;
9370
9371         tr->buffer_percent = val;
9372
9373         (*ppos)++;
9374
9375         return cnt;
9376 }
9377
9378 static const struct file_operations buffer_percent_fops = {
9379         .open           = tracing_open_generic_tr,
9380         .read           = buffer_percent_read,
9381         .write          = buffer_percent_write,
9382         .release        = tracing_release_generic_tr,
9383         .llseek         = default_llseek,
9384 };
9385
9386 static ssize_t
9387 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9388 {
9389         struct trace_array *tr = filp->private_data;
9390         size_t size;
9391         char buf[64];
9392         int order;
9393         int r;
9394
9395         order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9396         size = (PAGE_SIZE << order) / 1024;
9397
9398         r = sprintf(buf, "%zd\n", size);
9399
9400         return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9401 }
9402
9403 static ssize_t
9404 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9405                          size_t cnt, loff_t *ppos)
9406 {
9407         struct trace_array *tr = filp->private_data;
9408         unsigned long val;
9409         int old_order;
9410         int order;
9411         int pages;
9412         int ret;
9413
9414         ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9415         if (ret)
9416                 return ret;
9417
9418         val *= 1024; /* value passed in is in KB */
9419
9420         pages = DIV_ROUND_UP(val, PAGE_SIZE);
9421         order = fls(pages - 1);
9422
9423         /* limit between 1 and 128 system pages */
9424         if (order < 0 || order > 7)
9425                 return -EINVAL;
9426
9427         /* Do not allow tracing while changing the order of the ring buffer */
9428         tracing_stop_tr(tr);
9429
9430         old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9431         if (old_order == order)
9432                 goto out;
9433
9434         ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9435         if (ret)
9436                 goto out;
9437
9438 #ifdef CONFIG_TRACER_MAX_TRACE
9439
9440         if (!tr->allocated_snapshot)
9441                 goto out_max;
9442
9443         ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9444         if (ret) {
9445                 /* Put back the old order */
9446                 cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9447                 if (WARN_ON_ONCE(cnt)) {
9448                         /*
9449                          * AARGH! We are left with different orders!
9450                          * The max buffer is our "snapshot" buffer.
9451                          * When a tracer needs a snapshot (one of the
9452                          * latency tracers), it swaps the max buffer
9453                          * with the saved snap shot. We succeeded to
9454                          * update the order of the main buffer, but failed to
9455                          * update the order of the max buffer. But when we tried
9456                          * to reset the main buffer to the original size, we
9457                          * failed there too. This is very unlikely to
9458                          * happen, but if it does, warn and kill all
9459                          * tracing.
9460                          */
9461                         tracing_disabled = 1;
9462                 }
9463                 goto out;
9464         }
9465  out_max:
9466 #endif
9467         (*ppos)++;
9468  out:
9469         if (ret)
9470                 cnt = ret;
9471         tracing_start_tr(tr);
9472         return cnt;
9473 }
9474
9475 static const struct file_operations buffer_subbuf_size_fops = {
9476         .open           = tracing_open_generic_tr,
9477         .read           = buffer_subbuf_size_read,
9478         .write          = buffer_subbuf_size_write,
9479         .release        = tracing_release_generic_tr,
9480         .llseek         = default_llseek,
9481 };
9482
9483 static struct dentry *trace_instance_dir;
9484
9485 static void
9486 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9487
9488 static int
9489 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9490 {
9491         enum ring_buffer_flags rb_flags;
9492
9493         rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9494
9495         buf->tr = tr;
9496
9497         buf->buffer = ring_buffer_alloc(size, rb_flags);
9498         if (!buf->buffer)
9499                 return -ENOMEM;
9500
9501         buf->data = alloc_percpu(struct trace_array_cpu);
9502         if (!buf->data) {
9503                 ring_buffer_free(buf->buffer);
9504                 buf->buffer = NULL;
9505                 return -ENOMEM;
9506         }
9507
9508         /* Allocate the first page for all buffers */
9509         set_buffer_entries(&tr->array_buffer,
9510                            ring_buffer_size(tr->array_buffer.buffer, 0));
9511
9512         return 0;
9513 }
9514
9515 static void free_trace_buffer(struct array_buffer *buf)
9516 {
9517         if (buf->buffer) {
9518                 ring_buffer_free(buf->buffer);
9519                 buf->buffer = NULL;
9520                 free_percpu(buf->data);
9521                 buf->data = NULL;
9522         }
9523 }
9524
9525 static int allocate_trace_buffers(struct trace_array *tr, int size)
9526 {
9527         int ret;
9528
9529         ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9530         if (ret)
9531                 return ret;
9532
9533 #ifdef CONFIG_TRACER_MAX_TRACE
9534         ret = allocate_trace_buffer(tr, &tr->max_buffer,
9535                                     allocate_snapshot ? size : 1);
9536         if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9537                 free_trace_buffer(&tr->array_buffer);
9538                 return -ENOMEM;
9539         }
9540         tr->allocated_snapshot = allocate_snapshot;
9541
9542         allocate_snapshot = false;
9543 #endif
9544
9545         return 0;
9546 }
9547
9548 static void free_trace_buffers(struct trace_array *tr)
9549 {
9550         if (!tr)
9551                 return;
9552
9553         free_trace_buffer(&tr->array_buffer);
9554
9555 #ifdef CONFIG_TRACER_MAX_TRACE
9556         free_trace_buffer(&tr->max_buffer);
9557 #endif
9558 }
9559
9560 static void init_trace_flags_index(struct trace_array *tr)
9561 {
9562         int i;
9563
9564         /* Used by the trace options files */
9565         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9566                 tr->trace_flags_index[i] = i;
9567 }
9568
9569 static void __update_tracer_options(struct trace_array *tr)
9570 {
9571         struct tracer *t;
9572
9573         for (t = trace_types; t; t = t->next)
9574                 add_tracer_options(tr, t);
9575 }
9576
9577 static void update_tracer_options(struct trace_array *tr)
9578 {
9579         mutex_lock(&trace_types_lock);
9580         tracer_options_updated = true;
9581         __update_tracer_options(tr);
9582         mutex_unlock(&trace_types_lock);
9583 }
9584
9585 /* Must have trace_types_lock held */
9586 struct trace_array *trace_array_find(const char *instance)
9587 {
9588         struct trace_array *tr, *found = NULL;
9589
9590         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9591                 if (tr->name && strcmp(tr->name, instance) == 0) {
9592                         found = tr;
9593                         break;
9594                 }
9595         }
9596
9597         return found;
9598 }
9599
9600 struct trace_array *trace_array_find_get(const char *instance)
9601 {
9602         struct trace_array *tr;
9603
9604         mutex_lock(&trace_types_lock);
9605         tr = trace_array_find(instance);
9606         if (tr)
9607                 tr->ref++;
9608         mutex_unlock(&trace_types_lock);
9609
9610         return tr;
9611 }
9612
9613 static int trace_array_create_dir(struct trace_array *tr)
9614 {
9615         int ret;
9616
9617         tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9618         if (!tr->dir)
9619                 return -EINVAL;
9620
9621         ret = event_trace_add_tracer(tr->dir, tr);
9622         if (ret) {
9623                 tracefs_remove(tr->dir);
9624                 return ret;
9625         }
9626
9627         init_tracer_tracefs(tr, tr->dir);
9628         __update_tracer_options(tr);
9629
9630         return ret;
9631 }
9632
9633 static struct trace_array *
9634 trace_array_create_systems(const char *name, const char *systems)
9635 {
9636         struct trace_array *tr;
9637         int ret;
9638
9639         ret = -ENOMEM;
9640         tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9641         if (!tr)
9642                 return ERR_PTR(ret);
9643
9644         tr->name = kstrdup(name, GFP_KERNEL);
9645         if (!tr->name)
9646                 goto out_free_tr;
9647
9648         if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9649                 goto out_free_tr;
9650
9651         if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9652                 goto out_free_tr;
9653
9654         if (systems) {
9655                 tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9656                 if (!tr->system_names)
9657                         goto out_free_tr;
9658         }
9659
9660         tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9661
9662         cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9663
9664         raw_spin_lock_init(&tr->start_lock);
9665
9666         tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9667
9668         tr->current_trace = &nop_trace;
9669
9670         INIT_LIST_HEAD(&tr->systems);
9671         INIT_LIST_HEAD(&tr->events);
9672         INIT_LIST_HEAD(&tr->hist_vars);
9673         INIT_LIST_HEAD(&tr->err_log);
9674
9675         if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9676                 goto out_free_tr;
9677
9678         /* The ring buffer is defaultly expanded */
9679         trace_set_ring_buffer_expanded(tr);
9680
9681         if (ftrace_allocate_ftrace_ops(tr) < 0)
9682                 goto out_free_tr;
9683
9684         ftrace_init_trace_array(tr);
9685
9686         init_trace_flags_index(tr);
9687
9688         if (trace_instance_dir) {
9689                 ret = trace_array_create_dir(tr);
9690                 if (ret)
9691                         goto out_free_tr;
9692         } else
9693                 __trace_early_add_events(tr);
9694
9695         list_add(&tr->list, &ftrace_trace_arrays);
9696
9697         tr->ref++;
9698
9699         return tr;
9700
9701  out_free_tr:
9702         ftrace_free_ftrace_ops(tr);
9703         free_trace_buffers(tr);
9704         free_cpumask_var(tr->pipe_cpumask);
9705         free_cpumask_var(tr->tracing_cpumask);
9706         kfree_const(tr->system_names);
9707         kfree(tr->name);
9708         kfree(tr);
9709
9710         return ERR_PTR(ret);
9711 }
9712
9713 static struct trace_array *trace_array_create(const char *name)
9714 {
9715         return trace_array_create_systems(name, NULL);
9716 }
9717
9718 static int instance_mkdir(const char *name)
9719 {
9720         struct trace_array *tr;
9721         int ret;
9722
9723         mutex_lock(&event_mutex);
9724         mutex_lock(&trace_types_lock);
9725
9726         ret = -EEXIST;
9727         if (trace_array_find(name))
9728                 goto out_unlock;
9729
9730         tr = trace_array_create(name);
9731
9732         ret = PTR_ERR_OR_ZERO(tr);
9733
9734 out_unlock:
9735         mutex_unlock(&trace_types_lock);
9736         mutex_unlock(&event_mutex);
9737         return ret;
9738 }
9739
9740 /**
9741  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9742  * @name: The name of the trace array to be looked up/created.
9743  * @systems: A list of systems to create event directories for (NULL for all)
9744  *
9745  * Returns pointer to trace array with given name.
9746  * NULL, if it cannot be created.
9747  *
9748  * NOTE: This function increments the reference counter associated with the
9749  * trace array returned. This makes sure it cannot be freed while in use.
9750  * Use trace_array_put() once the trace array is no longer needed.
9751  * If the trace_array is to be freed, trace_array_destroy() needs to
9752  * be called after the trace_array_put(), or simply let user space delete
9753  * it from the tracefs instances directory. But until the
9754  * trace_array_put() is called, user space can not delete it.
9755  *
9756  */
9757 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9758 {
9759         struct trace_array *tr;
9760
9761         mutex_lock(&event_mutex);
9762         mutex_lock(&trace_types_lock);
9763
9764         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9765                 if (tr->name && strcmp(tr->name, name) == 0)
9766                         goto out_unlock;
9767         }
9768
9769         tr = trace_array_create_systems(name, systems);
9770
9771         if (IS_ERR(tr))
9772                 tr = NULL;
9773 out_unlock:
9774         if (tr)
9775                 tr->ref++;
9776
9777         mutex_unlock(&trace_types_lock);
9778         mutex_unlock(&event_mutex);
9779         return tr;
9780 }
9781 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9782
9783 static int __remove_instance(struct trace_array *tr)
9784 {
9785         int i;
9786
9787         /* Reference counter for a newly created trace array = 1. */
9788         if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9789                 return -EBUSY;
9790
9791         list_del(&tr->list);
9792
9793         /* Disable all the flags that were enabled coming in */
9794         for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9795                 if ((1 << i) & ZEROED_TRACE_FLAGS)
9796                         set_tracer_flag(tr, 1 << i, 0);
9797         }
9798
9799         tracing_set_nop(tr);
9800         clear_ftrace_function_probes(tr);
9801         event_trace_del_tracer(tr);
9802         ftrace_clear_pids(tr);
9803         ftrace_destroy_function_files(tr);
9804         tracefs_remove(tr->dir);
9805         free_percpu(tr->last_func_repeats);
9806         free_trace_buffers(tr);
9807         clear_tracing_err_log(tr);
9808
9809         for (i = 0; i < tr->nr_topts; i++) {
9810                 kfree(tr->topts[i].topts);
9811         }
9812         kfree(tr->topts);
9813
9814         free_cpumask_var(tr->pipe_cpumask);
9815         free_cpumask_var(tr->tracing_cpumask);
9816         kfree_const(tr->system_names);
9817         kfree(tr->name);
9818         kfree(tr);
9819
9820         return 0;
9821 }
9822
9823 int trace_array_destroy(struct trace_array *this_tr)
9824 {
9825         struct trace_array *tr;
9826         int ret;
9827
9828         if (!this_tr)
9829                 return -EINVAL;
9830
9831         mutex_lock(&event_mutex);
9832         mutex_lock(&trace_types_lock);
9833
9834         ret = -ENODEV;
9835
9836         /* Making sure trace array exists before destroying it. */
9837         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9838                 if (tr == this_tr) {
9839                         ret = __remove_instance(tr);
9840                         break;
9841                 }
9842         }
9843
9844         mutex_unlock(&trace_types_lock);
9845         mutex_unlock(&event_mutex);
9846
9847         return ret;
9848 }
9849 EXPORT_SYMBOL_GPL(trace_array_destroy);
9850
9851 static int instance_rmdir(const char *name)
9852 {
9853         struct trace_array *tr;
9854         int ret;
9855
9856         mutex_lock(&event_mutex);
9857         mutex_lock(&trace_types_lock);
9858
9859         ret = -ENODEV;
9860         tr = trace_array_find(name);
9861         if (tr)
9862                 ret = __remove_instance(tr);
9863
9864         mutex_unlock(&trace_types_lock);
9865         mutex_unlock(&event_mutex);
9866
9867         return ret;
9868 }
9869
9870 static __init void create_trace_instances(struct dentry *d_tracer)
9871 {
9872         struct trace_array *tr;
9873
9874         trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9875                                                          instance_mkdir,
9876                                                          instance_rmdir);
9877         if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9878                 return;
9879
9880         mutex_lock(&event_mutex);
9881         mutex_lock(&trace_types_lock);
9882
9883         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9884                 if (!tr->name)
9885                         continue;
9886                 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9887                              "Failed to create instance directory\n"))
9888                         break;
9889         }
9890
9891         mutex_unlock(&trace_types_lock);
9892         mutex_unlock(&event_mutex);
9893 }
9894
9895 static void
9896 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9897 {
9898         int cpu;
9899
9900         trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9901                         tr, &show_traces_fops);
9902
9903         trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9904                         tr, &set_tracer_fops);
9905
9906         trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9907                           tr, &tracing_cpumask_fops);
9908
9909         trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9910                           tr, &tracing_iter_fops);
9911
9912         trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9913                           tr, &tracing_fops);
9914
9915         trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9916                           tr, &tracing_pipe_fops);
9917
9918         trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9919                           tr, &tracing_entries_fops);
9920
9921         trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9922                           tr, &tracing_total_entries_fops);
9923
9924         trace_create_file("free_buffer", 0200, d_tracer,
9925                           tr, &tracing_free_buffer_fops);
9926
9927         trace_create_file("trace_marker", 0220, d_tracer,
9928                           tr, &tracing_mark_fops);
9929
9930         tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9931
9932         trace_create_file("trace_marker_raw", 0220, d_tracer,
9933                           tr, &tracing_mark_raw_fops);
9934
9935         trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9936                           &trace_clock_fops);
9937
9938         trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9939                           tr, &rb_simple_fops);
9940
9941         trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9942                           &trace_time_stamp_mode_fops);
9943
9944         tr->buffer_percent = 50;
9945
9946         trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9947                         tr, &buffer_percent_fops);
9948
9949         trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9950                           tr, &buffer_subbuf_size_fops);
9951
9952         create_trace_options_dir(tr);
9953
9954 #ifdef CONFIG_TRACER_MAX_TRACE
9955         trace_create_maxlat_file(tr, d_tracer);
9956 #endif
9957
9958         if (ftrace_create_function_files(tr, d_tracer))
9959                 MEM_FAIL(1, "Could not allocate function filter files");
9960
9961 #ifdef CONFIG_TRACER_SNAPSHOT
9962         trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9963                           tr, &snapshot_fops);
9964 #endif
9965
9966         trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9967                           tr, &tracing_err_log_fops);
9968
9969         for_each_tracing_cpu(cpu)
9970                 tracing_init_tracefs_percpu(tr, cpu);
9971
9972         ftrace_init_tracefs(tr, d_tracer);
9973 }
9974
9975 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9976 {
9977         struct vfsmount *mnt;
9978         struct file_system_type *type;
9979
9980         /*
9981          * To maintain backward compatibility for tools that mount
9982          * debugfs to get to the tracing facility, tracefs is automatically
9983          * mounted to the debugfs/tracing directory.
9984          */
9985         type = get_fs_type("tracefs");
9986         if (!type)
9987                 return NULL;
9988         mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9989         put_filesystem(type);
9990         if (IS_ERR(mnt))
9991                 return NULL;
9992         mntget(mnt);
9993
9994         return mnt;
9995 }
9996
9997 /**
9998  * tracing_init_dentry - initialize top level trace array
9999  *
10000  * This is called when creating files or directories in the tracing
10001  * directory. It is called via fs_initcall() by any of the boot up code
10002  * and expects to return the dentry of the top level tracing directory.
10003  */
10004 int tracing_init_dentry(void)
10005 {
10006         struct trace_array *tr = &global_trace;
10007
10008         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10009                 pr_warn("Tracing disabled due to lockdown\n");
10010                 return -EPERM;
10011         }
10012
10013         /* The top level trace array uses  NULL as parent */
10014         if (tr->dir)
10015                 return 0;
10016
10017         if (WARN_ON(!tracefs_initialized()))
10018                 return -ENODEV;
10019
10020         /*
10021          * As there may still be users that expect the tracing
10022          * files to exist in debugfs/tracing, we must automount
10023          * the tracefs file system there, so older tools still
10024          * work with the newer kernel.
10025          */
10026         tr->dir = debugfs_create_automount("tracing", NULL,
10027                                            trace_automount, NULL);
10028
10029         return 0;
10030 }
10031
10032 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10033 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10034
10035 static struct workqueue_struct *eval_map_wq __initdata;
10036 static struct work_struct eval_map_work __initdata;
10037 static struct work_struct tracerfs_init_work __initdata;
10038
10039 static void __init eval_map_work_func(struct work_struct *work)
10040 {
10041         int len;
10042
10043         len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10044         trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10045 }
10046
10047 static int __init trace_eval_init(void)
10048 {
10049         INIT_WORK(&eval_map_work, eval_map_work_func);
10050
10051         eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10052         if (!eval_map_wq) {
10053                 pr_err("Unable to allocate eval_map_wq\n");
10054                 /* Do work here */
10055                 eval_map_work_func(&eval_map_work);
10056                 return -ENOMEM;
10057         }
10058
10059         queue_work(eval_map_wq, &eval_map_work);
10060         return 0;
10061 }
10062
10063 subsys_initcall(trace_eval_init);
10064
10065 static int __init trace_eval_sync(void)
10066 {
10067         /* Make sure the eval map updates are finished */
10068         if (eval_map_wq)
10069                 destroy_workqueue(eval_map_wq);
10070         return 0;
10071 }
10072
10073 late_initcall_sync(trace_eval_sync);
10074
10075
10076 #ifdef CONFIG_MODULES
10077 static void trace_module_add_evals(struct module *mod)
10078 {
10079         if (!mod->num_trace_evals)
10080                 return;
10081
10082         /*
10083          * Modules with bad taint do not have events created, do
10084          * not bother with enums either.
10085          */
10086         if (trace_module_has_bad_taint(mod))
10087                 return;
10088
10089         trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10090 }
10091
10092 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10093 static void trace_module_remove_evals(struct module *mod)
10094 {
10095         union trace_eval_map_item *map;
10096         union trace_eval_map_item **last = &trace_eval_maps;
10097
10098         if (!mod->num_trace_evals)
10099                 return;
10100
10101         mutex_lock(&trace_eval_mutex);
10102
10103         map = trace_eval_maps;
10104
10105         while (map) {
10106                 if (map->head.mod == mod)
10107                         break;
10108                 map = trace_eval_jmp_to_tail(map);
10109                 last = &map->tail.next;
10110                 map = map->tail.next;
10111         }
10112         if (!map)
10113                 goto out;
10114
10115         *last = trace_eval_jmp_to_tail(map)->tail.next;
10116         kfree(map);
10117  out:
10118         mutex_unlock(&trace_eval_mutex);
10119 }
10120 #else
10121 static inline void trace_module_remove_evals(struct module *mod) { }
10122 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10123
10124 static int trace_module_notify(struct notifier_block *self,
10125                                unsigned long val, void *data)
10126 {
10127         struct module *mod = data;
10128
10129         switch (val) {
10130         case MODULE_STATE_COMING:
10131                 trace_module_add_evals(mod);
10132                 break;
10133         case MODULE_STATE_GOING:
10134                 trace_module_remove_evals(mod);
10135                 break;
10136         }
10137
10138         return NOTIFY_OK;
10139 }
10140
10141 static struct notifier_block trace_module_nb = {
10142         .notifier_call = trace_module_notify,
10143         .priority = 0,
10144 };
10145 #endif /* CONFIG_MODULES */
10146
10147 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10148 {
10149
10150         event_trace_init();
10151
10152         init_tracer_tracefs(&global_trace, NULL);
10153         ftrace_init_tracefs_toplevel(&global_trace, NULL);
10154
10155         trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10156                         &global_trace, &tracing_thresh_fops);
10157
10158         trace_create_file("README", TRACE_MODE_READ, NULL,
10159                         NULL, &tracing_readme_fops);
10160
10161         trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10162                         NULL, &tracing_saved_cmdlines_fops);
10163
10164         trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10165                           NULL, &tracing_saved_cmdlines_size_fops);
10166
10167         trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10168                         NULL, &tracing_saved_tgids_fops);
10169
10170         trace_create_eval_file(NULL);
10171
10172 #ifdef CONFIG_MODULES
10173         register_module_notifier(&trace_module_nb);
10174 #endif
10175
10176 #ifdef CONFIG_DYNAMIC_FTRACE
10177         trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10178                         NULL, &tracing_dyn_info_fops);
10179 #endif
10180
10181         create_trace_instances(NULL);
10182
10183         update_tracer_options(&global_trace);
10184 }
10185
10186 static __init int tracer_init_tracefs(void)
10187 {
10188         int ret;
10189
10190         trace_access_lock_init();
10191
10192         ret = tracing_init_dentry();
10193         if (ret)
10194                 return 0;
10195
10196         if (eval_map_wq) {
10197                 INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10198                 queue_work(eval_map_wq, &tracerfs_init_work);
10199         } else {
10200                 tracer_init_tracefs_work_func(NULL);
10201         }
10202
10203         rv_init_interface();
10204
10205         return 0;
10206 }
10207
10208 fs_initcall(tracer_init_tracefs);
10209
10210 static int trace_die_panic_handler(struct notifier_block *self,
10211                                 unsigned long ev, void *unused);
10212
10213 static struct notifier_block trace_panic_notifier = {
10214         .notifier_call = trace_die_panic_handler,
10215         .priority = INT_MAX - 1,
10216 };
10217
10218 static struct notifier_block trace_die_notifier = {
10219         .notifier_call = trace_die_panic_handler,
10220         .priority = INT_MAX - 1,
10221 };
10222
10223 /*
10224  * The idea is to execute the following die/panic callback early, in order
10225  * to avoid showing irrelevant information in the trace (like other panic
10226  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10227  * warnings get disabled (to prevent potential log flooding).
10228  */
10229 static int trace_die_panic_handler(struct notifier_block *self,
10230                                 unsigned long ev, void *unused)
10231 {
10232         if (!ftrace_dump_on_oops)
10233                 return NOTIFY_DONE;
10234
10235         /* The die notifier requires DIE_OOPS to trigger */
10236         if (self == &trace_die_notifier && ev != DIE_OOPS)
10237                 return NOTIFY_DONE;
10238
10239         ftrace_dump(ftrace_dump_on_oops);
10240
10241         return NOTIFY_DONE;
10242 }
10243
10244 /*
10245  * printk is set to max of 1024, we really don't need it that big.
10246  * Nothing should be printing 1000 characters anyway.
10247  */
10248 #define TRACE_MAX_PRINT         1000
10249
10250 /*
10251  * Define here KERN_TRACE so that we have one place to modify
10252  * it if we decide to change what log level the ftrace dump
10253  * should be at.
10254  */
10255 #define KERN_TRACE              KERN_EMERG
10256
10257 void
10258 trace_printk_seq(struct trace_seq *s)
10259 {
10260         /* Probably should print a warning here. */
10261         if (s->seq.len >= TRACE_MAX_PRINT)
10262                 s->seq.len = TRACE_MAX_PRINT;
10263
10264         /*
10265          * More paranoid code. Although the buffer size is set to
10266          * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10267          * an extra layer of protection.
10268          */
10269         if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10270                 s->seq.len = s->seq.size - 1;
10271
10272         /* should be zero ended, but we are paranoid. */
10273         s->buffer[s->seq.len] = 0;
10274
10275         printk(KERN_TRACE "%s", s->buffer);
10276
10277         trace_seq_init(s);
10278 }
10279
10280 void trace_init_global_iter(struct trace_iterator *iter)
10281 {
10282         iter->tr = &global_trace;
10283         iter->trace = iter->tr->current_trace;
10284         iter->cpu_file = RING_BUFFER_ALL_CPUS;
10285         iter->array_buffer = &global_trace.array_buffer;
10286
10287         if (iter->trace && iter->trace->open)
10288                 iter->trace->open(iter);
10289
10290         /* Annotate start of buffers if we had overruns */
10291         if (ring_buffer_overruns(iter->array_buffer->buffer))
10292                 iter->iter_flags |= TRACE_FILE_ANNOTATE;
10293
10294         /* Output in nanoseconds only if we are using a clock in nanoseconds. */
10295         if (trace_clocks[iter->tr->clock_id].in_ns)
10296                 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10297
10298         /* Can not use kmalloc for iter.temp and iter.fmt */
10299         iter->temp = static_temp_buf;
10300         iter->temp_size = STATIC_TEMP_BUF_SIZE;
10301         iter->fmt = static_fmt_buf;
10302         iter->fmt_size = STATIC_FMT_BUF_SIZE;
10303 }
10304
10305 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10306 {
10307         /* use static because iter can be a bit big for the stack */
10308         static struct trace_iterator iter;
10309         static atomic_t dump_running;
10310         struct trace_array *tr = &global_trace;
10311         unsigned int old_userobj;
10312         unsigned long flags;
10313         int cnt = 0, cpu;
10314
10315         /* Only allow one dump user at a time. */
10316         if (atomic_inc_return(&dump_running) != 1) {
10317                 atomic_dec(&dump_running);
10318                 return;
10319         }
10320
10321         /*
10322          * Always turn off tracing when we dump.
10323          * We don't need to show trace output of what happens
10324          * between multiple crashes.
10325          *
10326          * If the user does a sysrq-z, then they can re-enable
10327          * tracing with echo 1 > tracing_on.
10328          */
10329         tracing_off();
10330
10331         local_irq_save(flags);
10332
10333         /* Simulate the iterator */
10334         trace_init_global_iter(&iter);
10335
10336         for_each_tracing_cpu(cpu) {
10337                 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10338         }
10339
10340         old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10341
10342         /* don't look at user memory in panic mode */
10343         tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10344
10345         switch (oops_dump_mode) {
10346         case DUMP_ALL:
10347                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10348                 break;
10349         case DUMP_ORIG:
10350                 iter.cpu_file = raw_smp_processor_id();
10351                 break;
10352         case DUMP_NONE:
10353                 goto out_enable;
10354         default:
10355                 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10356                 iter.cpu_file = RING_BUFFER_ALL_CPUS;
10357         }
10358
10359         printk(KERN_TRACE "Dumping ftrace buffer:\n");
10360
10361         /* Did function tracer already get disabled? */
10362         if (ftrace_is_dead()) {
10363                 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10364                 printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10365         }
10366
10367         /*
10368          * We need to stop all tracing on all CPUS to read
10369          * the next buffer. This is a bit expensive, but is
10370          * not done often. We fill all what we can read,
10371          * and then release the locks again.
10372          */
10373
10374         while (!trace_empty(&iter)) {
10375
10376                 if (!cnt)
10377                         printk(KERN_TRACE "---------------------------------\n");
10378
10379                 cnt++;
10380
10381                 trace_iterator_reset(&iter);
10382                 iter.iter_flags |= TRACE_FILE_LAT_FMT;
10383
10384                 if (trace_find_next_entry_inc(&iter) != NULL) {
10385                         int ret;
10386
10387                         ret = print_trace_line(&iter);
10388                         if (ret != TRACE_TYPE_NO_CONSUME)
10389                                 trace_consume(&iter);
10390                 }
10391                 touch_nmi_watchdog();
10392
10393                 trace_printk_seq(&iter.seq);
10394         }
10395
10396         if (!cnt)
10397                 printk(KERN_TRACE "   (ftrace buffer empty)\n");
10398         else
10399                 printk(KERN_TRACE "---------------------------------\n");
10400
10401  out_enable:
10402         tr->trace_flags |= old_userobj;
10403
10404         for_each_tracing_cpu(cpu) {
10405                 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10406         }
10407         atomic_dec(&dump_running);
10408         local_irq_restore(flags);
10409 }
10410 EXPORT_SYMBOL_GPL(ftrace_dump);
10411
10412 #define WRITE_BUFSIZE  4096
10413
10414 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10415                                 size_t count, loff_t *ppos,
10416                                 int (*createfn)(const char *))
10417 {
10418         char *kbuf, *buf, *tmp;
10419         int ret = 0;
10420         size_t done = 0;
10421         size_t size;
10422
10423         kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10424         if (!kbuf)
10425                 return -ENOMEM;
10426
10427         while (done < count) {
10428                 size = count - done;
10429
10430                 if (size >= WRITE_BUFSIZE)
10431                         size = WRITE_BUFSIZE - 1;
10432
10433                 if (copy_from_user(kbuf, buffer + done, size)) {
10434                         ret = -EFAULT;
10435                         goto out;
10436                 }
10437                 kbuf[size] = '\0';
10438                 buf = kbuf;
10439                 do {
10440                         tmp = strchr(buf, '\n');
10441                         if (tmp) {
10442                                 *tmp = '\0';
10443                                 size = tmp - buf + 1;
10444                         } else {
10445                                 size = strlen(buf);
10446                                 if (done + size < count) {
10447                                         if (buf != kbuf)
10448                                                 break;
10449                                         /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10450                                         pr_warn("Line length is too long: Should be less than %d\n",
10451                                                 WRITE_BUFSIZE - 2);
10452                                         ret = -EINVAL;
10453                                         goto out;
10454                                 }
10455                         }
10456                         done += size;
10457
10458                         /* Remove comments */
10459                         tmp = strchr(buf, '#');
10460
10461                         if (tmp)
10462                                 *tmp = '\0';
10463
10464                         ret = createfn(buf);
10465                         if (ret)
10466                                 goto out;
10467                         buf += size;
10468
10469                 } while (done < count);
10470         }
10471         ret = done;
10472
10473 out:
10474         kfree(kbuf);
10475
10476         return ret;
10477 }
10478
10479 #ifdef CONFIG_TRACER_MAX_TRACE
10480 __init static bool tr_needs_alloc_snapshot(const char *name)
10481 {
10482         char *test;
10483         int len = strlen(name);
10484         bool ret;
10485
10486         if (!boot_snapshot_index)
10487                 return false;
10488
10489         if (strncmp(name, boot_snapshot_info, len) == 0 &&
10490             boot_snapshot_info[len] == '\t')
10491                 return true;
10492
10493         test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10494         if (!test)
10495                 return false;
10496
10497         sprintf(test, "\t%s\t", name);
10498         ret = strstr(boot_snapshot_info, test) == NULL;
10499         kfree(test);
10500         return ret;
10501 }
10502
10503 __init static void do_allocate_snapshot(const char *name)
10504 {
10505         if (!tr_needs_alloc_snapshot(name))
10506                 return;
10507
10508         /*
10509          * When allocate_snapshot is set, the next call to
10510          * allocate_trace_buffers() (called by trace_array_get_by_name())
10511          * will allocate the snapshot buffer. That will alse clear
10512          * this flag.
10513          */
10514         allocate_snapshot = true;
10515 }
10516 #else
10517 static inline void do_allocate_snapshot(const char *name) { }
10518 #endif
10519
10520 __init static void enable_instances(void)
10521 {
10522         struct trace_array *tr;
10523         char *curr_str;
10524         char *str;
10525         char *tok;
10526
10527         /* A tab is always appended */
10528         boot_instance_info[boot_instance_index - 1] = '\0';
10529         str = boot_instance_info;
10530
10531         while ((curr_str = strsep(&str, "\t"))) {
10532
10533                 tok = strsep(&curr_str, ",");
10534
10535                 if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10536                         do_allocate_snapshot(tok);
10537
10538                 tr = trace_array_get_by_name(tok, NULL);
10539                 if (!tr) {
10540                         pr_warn("Failed to create instance buffer %s\n", curr_str);
10541                         continue;
10542                 }
10543                 /* Allow user space to delete it */
10544                 trace_array_put(tr);
10545
10546                 while ((tok = strsep(&curr_str, ","))) {
10547                         early_enable_events(tr, tok, true);
10548                 }
10549         }
10550 }
10551
10552 __init static int tracer_alloc_buffers(void)
10553 {
10554         int ring_buf_size;
10555         int ret = -ENOMEM;
10556
10557
10558         if (security_locked_down(LOCKDOWN_TRACEFS)) {
10559                 pr_warn("Tracing disabled due to lockdown\n");
10560                 return -EPERM;
10561         }
10562
10563         /*
10564          * Make sure we don't accidentally add more trace options
10565          * than we have bits for.
10566          */
10567         BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10568
10569         if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10570                 goto out;
10571
10572         if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10573                 goto out_free_buffer_mask;
10574
10575         /* Only allocate trace_printk buffers if a trace_printk exists */
10576         if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10577                 /* Must be called before global_trace.buffer is allocated */
10578                 trace_printk_init_buffers();
10579
10580         /* To save memory, keep the ring buffer size to its minimum */
10581         if (global_trace.ring_buffer_expanded)
10582                 ring_buf_size = trace_buf_size;
10583         else
10584                 ring_buf_size = 1;
10585
10586         cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10587         cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10588
10589         raw_spin_lock_init(&global_trace.start_lock);
10590
10591         /*
10592          * The prepare callbacks allocates some memory for the ring buffer. We
10593          * don't free the buffer if the CPU goes down. If we were to free
10594          * the buffer, then the user would lose any trace that was in the
10595          * buffer. The memory will be removed once the "instance" is removed.
10596          */
10597         ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10598                                       "trace/RB:prepare", trace_rb_cpu_prepare,
10599                                       NULL);
10600         if (ret < 0)
10601                 goto out_free_cpumask;
10602         /* Used for event triggers */
10603         ret = -ENOMEM;
10604         temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10605         if (!temp_buffer)
10606                 goto out_rm_hp_state;
10607
10608         if (trace_create_savedcmd() < 0)
10609                 goto out_free_temp_buffer;
10610
10611         if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10612                 goto out_free_savedcmd;
10613
10614         /* TODO: make the number of buffers hot pluggable with CPUS */
10615         if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10616                 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10617                 goto out_free_pipe_cpumask;
10618         }
10619         if (global_trace.buffer_disabled)
10620                 tracing_off();
10621
10622         if (trace_boot_clock) {
10623                 ret = tracing_set_clock(&global_trace, trace_boot_clock);
10624                 if (ret < 0)
10625                         pr_warn("Trace clock %s not defined, going back to default\n",
10626                                 trace_boot_clock);
10627         }
10628
10629         /*
10630          * register_tracer() might reference current_trace, so it
10631          * needs to be set before we register anything. This is
10632          * just a bootstrap of current_trace anyway.
10633          */
10634         global_trace.current_trace = &nop_trace;
10635
10636         global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10637
10638         ftrace_init_global_array_ops(&global_trace);
10639
10640         init_trace_flags_index(&global_trace);
10641
10642         register_tracer(&nop_trace);
10643
10644         /* Function tracing may start here (via kernel command line) */
10645         init_function_trace();
10646
10647         /* All seems OK, enable tracing */
10648         tracing_disabled = 0;
10649
10650         atomic_notifier_chain_register(&panic_notifier_list,
10651                                        &trace_panic_notifier);
10652
10653         register_die_notifier(&trace_die_notifier);
10654
10655         global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10656
10657         INIT_LIST_HEAD(&global_trace.systems);
10658         INIT_LIST_HEAD(&global_trace.events);
10659         INIT_LIST_HEAD(&global_trace.hist_vars);
10660         INIT_LIST_HEAD(&global_trace.err_log);
10661         list_add(&global_trace.list, &ftrace_trace_arrays);
10662
10663         apply_trace_boot_options();
10664
10665         register_snapshot_cmd();
10666
10667         test_can_verify();
10668
10669         return 0;
10670
10671 out_free_pipe_cpumask:
10672         free_cpumask_var(global_trace.pipe_cpumask);
10673 out_free_savedcmd:
10674         free_saved_cmdlines_buffer(savedcmd);
10675 out_free_temp_buffer:
10676         ring_buffer_free(temp_buffer);
10677 out_rm_hp_state:
10678         cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10679 out_free_cpumask:
10680         free_cpumask_var(global_trace.tracing_cpumask);
10681 out_free_buffer_mask:
10682         free_cpumask_var(tracing_buffer_mask);
10683 out:
10684         return ret;
10685 }
10686
10687 void __init ftrace_boot_snapshot(void)
10688 {
10689 #ifdef CONFIG_TRACER_MAX_TRACE
10690         struct trace_array *tr;
10691
10692         if (!snapshot_at_boot)
10693                 return;
10694
10695         list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10696                 if (!tr->allocated_snapshot)
10697                         continue;
10698
10699                 tracing_snapshot_instance(tr);
10700                 trace_array_puts(tr, "** Boot snapshot taken **\n");
10701         }
10702 #endif
10703 }
10704
10705 void __init early_trace_init(void)
10706 {
10707         if (tracepoint_printk) {
10708                 tracepoint_print_iter =
10709                         kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10710                 if (MEM_FAIL(!tracepoint_print_iter,
10711                              "Failed to allocate trace iterator\n"))
10712                         tracepoint_printk = 0;
10713                 else
10714                         static_key_enable(&tracepoint_printk_key.key);
10715         }
10716         tracer_alloc_buffers();
10717
10718         init_events();
10719 }
10720
10721 void __init trace_init(void)
10722 {
10723         trace_event_init();
10724
10725         if (boot_instance_index)
10726                 enable_instances();
10727 }
10728
10729 __init static void clear_boot_tracer(void)
10730 {
10731         /*
10732          * The default tracer at boot buffer is an init section.
10733          * This function is called in lateinit. If we did not
10734          * find the boot tracer, then clear it out, to prevent
10735          * later registration from accessing the buffer that is
10736          * about to be freed.
10737          */
10738         if (!default_bootup_tracer)
10739                 return;
10740
10741         printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10742                default_bootup_tracer);
10743         default_bootup_tracer = NULL;
10744 }
10745
10746 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10747 __init static void tracing_set_default_clock(void)
10748 {
10749         /* sched_clock_stable() is determined in late_initcall */
10750         if (!trace_boot_clock && !sched_clock_stable()) {
10751                 if (security_locked_down(LOCKDOWN_TRACEFS)) {
10752                         pr_warn("Can not set tracing clock due to lockdown\n");
10753                         return;
10754                 }
10755
10756                 printk(KERN_WARNING
10757                        "Unstable clock detected, switching default tracing clock to \"global\"\n"
10758                        "If you want to keep using the local clock, then add:\n"
10759                        "  \"trace_clock=local\"\n"
10760                        "on the kernel command line\n");
10761                 tracing_set_clock(&global_trace, "global");
10762         }
10763 }
10764 #else
10765 static inline void tracing_set_default_clock(void) { }
10766 #endif
10767
10768 __init static int late_trace_init(void)
10769 {
10770         if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10771                 static_key_disable(&tracepoint_printk_key.key);
10772                 tracepoint_printk = 0;
10773         }
10774
10775         tracing_set_default_clock();
10776         clear_boot_tracer();
10777         return 0;
10778 }
10779
10780 late_initcall_sync(late_trace_init);