1 // SPDX-License-Identifier: GPL-2.0
3 * ring buffer based function tracer
5 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
8 * Originally taken from the RT patch by:
9 * Arnaldo Carvalho de Melo <acme@redhat.com>
11 * Based on code from the latency_tracer, that is:
12 * Copyright (C) 2004-2006 Ingo Molnar
13 * Copyright (C) 2004 Nadia Yvette Chambers
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/notifier.h>
23 #include <linux/irqflags.h>
24 #include <linux/debugfs.h>
25 #include <linux/tracefs.h>
26 #include <linux/pagemap.h>
27 #include <linux/hardirq.h>
28 #include <linux/linkage.h>
29 #include <linux/uaccess.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
53 #include "trace_output.h"
56 * On boot up, the ring buffer is set to the minimum size, so that
57 * we do not waste memory on systems that are not using tracing.
59 bool ring_buffer_expanded;
62 * We need to change this state when a selftest is running.
63 * A selftest will lurk into the ring-buffer to count the
64 * entries inserted during the selftest although some concurrent
65 * insertions into the ring-buffer such as trace_printk could occurred
66 * at the same time, giving false positive or negative results.
68 static bool __read_mostly tracing_selftest_running;
71 * If boot-time tracing including tracers/events via kernel cmdline
72 * is running, we do not want to run SELFTEST.
74 bool __read_mostly tracing_selftest_disabled;
76 #ifdef CONFIG_FTRACE_STARTUP_TEST
77 void __init disable_tracing_selftest(const char *reason)
79 if (!tracing_selftest_disabled) {
80 tracing_selftest_disabled = true;
81 pr_info("Ftrace startup test is disabled due to %s\n", reason);
86 /* Pipe tracepoints to printk */
87 struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
103 * To prevent the comm cache from being overwritten when no
104 * tracing is active, only save the comm when a trace event
107 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 * Kill all tracing for good (never come back).
111 * It is initialized to 1 but will turn to zero if the initialization
112 * of the tracer is successful. But that is the only place that sets
115 static int tracing_disabled = 1;
117 cpumask_var_t __read_mostly tracing_buffer_mask;
120 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123 * is set, then ftrace_dump is called. This will output the contents
124 * of the ftrace buffers to the console. This is very useful for
125 * capturing traces that lead to crashes and outputing it to a
128 * It is default off, but you can enable it with either specifying
129 * "ftrace_dump_on_oops" in the kernel command line, or setting
130 * /proc/sys/kernel/ftrace_dump_on_oops
131 * Set 1 if you want to dump buffers of all CPUs
132 * Set 2 if you want to dump the buffer of the CPU that triggered oops
135 enum ftrace_dump_mode ftrace_dump_on_oops;
137 /* When set, tracing will stop when a WARN*() is hit */
138 int __disable_trace_on_warning;
140 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
141 /* Map of enums to their values, for "eval_map" file */
142 struct trace_eval_map_head {
144 unsigned long length;
147 union trace_eval_map_item;
149 struct trace_eval_map_tail {
151 * "end" is first and points to NULL as it must be different
152 * than "mod" or "eval_string"
154 union trace_eval_map_item *next;
155 const char *end; /* points to NULL */
158 static DEFINE_MUTEX(trace_eval_mutex);
161 * The trace_eval_maps are saved in an array with two extra elements,
162 * one at the beginning, and one at the end. The beginning item contains
163 * the count of the saved maps (head.length), and the module they
164 * belong to if not built in (head.mod). The ending item contains a
165 * pointer to the next array of saved eval_map items.
167 union trace_eval_map_item {
168 struct trace_eval_map map;
169 struct trace_eval_map_head head;
170 struct trace_eval_map_tail tail;
173 static union trace_eval_map_item *trace_eval_maps;
174 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176 int tracing_set_tracer(struct trace_array *tr, const char *buf);
177 static void ftrace_trace_userstack(struct trace_array *tr,
178 struct trace_buffer *buffer,
179 unsigned int trace_ctx);
181 #define MAX_TRACER_SIZE 100
182 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
183 static char *default_bootup_tracer;
185 static bool allocate_snapshot;
187 static int __init set_cmdline_ftrace(char *str)
189 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
190 default_bootup_tracer = bootup_tracer_buf;
191 /* We are using ftrace early, expand it */
192 ring_buffer_expanded = true;
195 __setup("ftrace=", set_cmdline_ftrace);
197 static int __init set_ftrace_dump_on_oops(char *str)
199 if (*str++ != '=' || !*str) {
200 ftrace_dump_on_oops = DUMP_ALL;
204 if (!strcmp("orig_cpu", str)) {
205 ftrace_dump_on_oops = DUMP_ORIG;
211 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
213 static int __init stop_trace_on_warning(char *str)
215 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
216 __disable_trace_on_warning = 1;
219 __setup("traceoff_on_warning", stop_trace_on_warning);
221 static int __init boot_alloc_snapshot(char *str)
223 allocate_snapshot = true;
224 /* We also need the main ring buffer expanded */
225 ring_buffer_expanded = true;
228 __setup("alloc_snapshot", boot_alloc_snapshot);
231 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
233 static int __init set_trace_boot_options(char *str)
235 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
238 __setup("trace_options=", set_trace_boot_options);
240 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
241 static char *trace_boot_clock __initdata;
243 static int __init set_trace_boot_clock(char *str)
245 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
246 trace_boot_clock = trace_boot_clock_buf;
249 __setup("trace_clock=", set_trace_boot_clock);
251 static int __init set_tracepoint_printk(char *str)
253 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
254 tracepoint_printk = 1;
257 __setup("tp_printk", set_tracepoint_printk);
259 unsigned long long ns2usecs(u64 nsec)
267 trace_process_export(struct trace_export *export,
268 struct ring_buffer_event *event, int flag)
270 struct trace_entry *entry;
271 unsigned int size = 0;
273 if (export->flags & flag) {
274 entry = ring_buffer_event_data(event);
275 size = ring_buffer_event_length(event);
276 export->write(export, entry, size);
280 static DEFINE_MUTEX(ftrace_export_lock);
282 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
284 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
285 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
286 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
288 static inline void ftrace_exports_enable(struct trace_export *export)
290 if (export->flags & TRACE_EXPORT_FUNCTION)
291 static_branch_inc(&trace_function_exports_enabled);
293 if (export->flags & TRACE_EXPORT_EVENT)
294 static_branch_inc(&trace_event_exports_enabled);
296 if (export->flags & TRACE_EXPORT_MARKER)
297 static_branch_inc(&trace_marker_exports_enabled);
300 static inline void ftrace_exports_disable(struct trace_export *export)
302 if (export->flags & TRACE_EXPORT_FUNCTION)
303 static_branch_dec(&trace_function_exports_enabled);
305 if (export->flags & TRACE_EXPORT_EVENT)
306 static_branch_dec(&trace_event_exports_enabled);
308 if (export->flags & TRACE_EXPORT_MARKER)
309 static_branch_dec(&trace_marker_exports_enabled);
312 static void ftrace_exports(struct ring_buffer_event *event, int flag)
314 struct trace_export *export;
316 preempt_disable_notrace();
318 export = rcu_dereference_raw_check(ftrace_exports_list);
320 trace_process_export(export, event, flag);
321 export = rcu_dereference_raw_check(export->next);
324 preempt_enable_notrace();
328 add_trace_export(struct trace_export **list, struct trace_export *export)
330 rcu_assign_pointer(export->next, *list);
332 * We are entering export into the list but another
333 * CPU might be walking that list. We need to make sure
334 * the export->next pointer is valid before another CPU sees
335 * the export pointer included into the list.
337 rcu_assign_pointer(*list, export);
341 rm_trace_export(struct trace_export **list, struct trace_export *export)
343 struct trace_export **p;
345 for (p = list; *p != NULL; p = &(*p)->next)
352 rcu_assign_pointer(*p, (*p)->next);
358 add_ftrace_export(struct trace_export **list, struct trace_export *export)
360 ftrace_exports_enable(export);
362 add_trace_export(list, export);
366 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
370 ret = rm_trace_export(list, export);
371 ftrace_exports_disable(export);
376 int register_ftrace_export(struct trace_export *export)
378 if (WARN_ON_ONCE(!export->write))
381 mutex_lock(&ftrace_export_lock);
383 add_ftrace_export(&ftrace_exports_list, export);
385 mutex_unlock(&ftrace_export_lock);
389 EXPORT_SYMBOL_GPL(register_ftrace_export);
391 int unregister_ftrace_export(struct trace_export *export)
395 mutex_lock(&ftrace_export_lock);
397 ret = rm_ftrace_export(&ftrace_exports_list, export);
399 mutex_unlock(&ftrace_export_lock);
403 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
405 /* trace_flags holds trace_options default values */
406 #define TRACE_DEFAULT_FLAGS \
407 (FUNCTION_DEFAULT_FLAGS | \
408 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
409 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
410 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
411 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS | \
414 /* trace_options that are only supported by global_trace */
415 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
416 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
418 /* trace_flags that are default zero for instances */
419 #define ZEROED_TRACE_FLAGS \
420 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
423 * The global_trace is the descriptor that holds the top-level tracing
424 * buffers for the live tracing.
426 static struct trace_array global_trace = {
427 .trace_flags = TRACE_DEFAULT_FLAGS,
430 LIST_HEAD(ftrace_trace_arrays);
432 int trace_array_get(struct trace_array *this_tr)
434 struct trace_array *tr;
437 mutex_lock(&trace_types_lock);
438 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
445 mutex_unlock(&trace_types_lock);
450 static void __trace_array_put(struct trace_array *this_tr)
452 WARN_ON(!this_tr->ref);
457 * trace_array_put - Decrement the reference counter for this trace array.
458 * @this_tr : pointer to the trace array
460 * NOTE: Use this when we no longer need the trace array returned by
461 * trace_array_get_by_name(). This ensures the trace array can be later
465 void trace_array_put(struct trace_array *this_tr)
470 mutex_lock(&trace_types_lock);
471 __trace_array_put(this_tr);
472 mutex_unlock(&trace_types_lock);
474 EXPORT_SYMBOL_GPL(trace_array_put);
476 int tracing_check_open_get_tr(struct trace_array *tr)
480 ret = security_locked_down(LOCKDOWN_TRACEFS);
484 if (tracing_disabled)
487 if (tr && trace_array_get(tr) < 0)
493 int call_filter_check_discard(struct trace_event_call *call, void *rec,
494 struct trace_buffer *buffer,
495 struct ring_buffer_event *event)
497 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
498 !filter_match_preds(call->filter, rec)) {
499 __trace_event_discard_commit(buffer, event);
506 void trace_free_pid_list(struct trace_pid_list *pid_list)
508 vfree(pid_list->pids);
513 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
514 * @filtered_pids: The list of pids to check
515 * @search_pid: The PID to find in @filtered_pids
517 * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
520 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
523 * If pid_max changed after filtered_pids was created, we
524 * by default ignore all pids greater than the previous pid_max.
526 if (search_pid >= filtered_pids->pid_max)
529 return test_bit(search_pid, filtered_pids->pids);
533 * trace_ignore_this_task - should a task be ignored for tracing
534 * @filtered_pids: The list of pids to check
535 * @filtered_no_pids: The list of pids not to be traced
536 * @task: The task that should be ignored if not filtered
538 * Checks if @task should be traced or not from @filtered_pids.
539 * Returns true if @task should *NOT* be traced.
540 * Returns false if @task should be traced.
543 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
544 struct trace_pid_list *filtered_no_pids,
545 struct task_struct *task)
548 * If filtered_no_pids is not empty, and the task's pid is listed
549 * in filtered_no_pids, then return true.
550 * Otherwise, if filtered_pids is empty, that means we can
551 * trace all tasks. If it has content, then only trace pids
552 * within filtered_pids.
555 return (filtered_pids &&
556 !trace_find_filtered_pid(filtered_pids, task->pid)) ||
558 trace_find_filtered_pid(filtered_no_pids, task->pid));
562 * trace_filter_add_remove_task - Add or remove a task from a pid_list
563 * @pid_list: The list to modify
564 * @self: The current task for fork or NULL for exit
565 * @task: The task to add or remove
567 * If adding a task, if @self is defined, the task is only added if @self
568 * is also included in @pid_list. This happens on fork and tasks should
569 * only be added when the parent is listed. If @self is NULL, then the
570 * @task pid will be removed from the list, which would happen on exit
573 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
574 struct task_struct *self,
575 struct task_struct *task)
580 /* For forks, we only add if the forking task is listed */
582 if (!trace_find_filtered_pid(pid_list, self->pid))
586 /* Sorry, but we don't support pid_max changing after setting */
587 if (task->pid >= pid_list->pid_max)
590 /* "self" is set for forks, and NULL for exits */
592 set_bit(task->pid, pid_list->pids);
594 clear_bit(task->pid, pid_list->pids);
598 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
599 * @pid_list: The pid list to show
600 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
601 * @pos: The position of the file
603 * This is used by the seq_file "next" operation to iterate the pids
604 * listed in a trace_pid_list structure.
606 * Returns the pid+1 as we want to display pid of zero, but NULL would
607 * stop the iteration.
609 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
611 unsigned long pid = (unsigned long)v;
615 /* pid already is +1 of the actual previous bit */
616 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
618 /* Return pid + 1 to allow zero to be represented */
619 if (pid < pid_list->pid_max)
620 return (void *)(pid + 1);
626 * trace_pid_start - Used for seq_file to start reading pid lists
627 * @pid_list: The pid list to show
628 * @pos: The position of the file
630 * This is used by seq_file "start" operation to start the iteration
633 * Returns the pid+1 as we want to display pid of zero, but NULL would
634 * stop the iteration.
636 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
641 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
642 if (pid >= pid_list->pid_max)
645 /* Return pid + 1 so that zero can be the exit value */
646 for (pid++; pid && l < *pos;
647 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
653 * trace_pid_show - show the current pid in seq_file processing
654 * @m: The seq_file structure to write into
655 * @v: A void pointer of the pid (+1) value to display
657 * Can be directly used by seq_file operations to display the current
660 int trace_pid_show(struct seq_file *m, void *v)
662 unsigned long pid = (unsigned long)v - 1;
664 seq_printf(m, "%lu\n", pid);
668 /* 128 should be much more than enough */
669 #define PID_BUF_SIZE 127
671 int trace_pid_write(struct trace_pid_list *filtered_pids,
672 struct trace_pid_list **new_pid_list,
673 const char __user *ubuf, size_t cnt)
675 struct trace_pid_list *pid_list;
676 struct trace_parser parser;
684 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
688 * Always recreate a new array. The write is an all or nothing
689 * operation. Always create a new array when adding new pids by
690 * the user. If the operation fails, then the current list is
693 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
695 trace_parser_put(&parser);
699 pid_list->pid_max = READ_ONCE(pid_max);
701 /* Only truncating will shrink pid_max */
702 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
703 pid_list->pid_max = filtered_pids->pid_max;
705 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
706 if (!pid_list->pids) {
707 trace_parser_put(&parser);
713 /* copy the current bits to the new max */
714 for_each_set_bit(pid, filtered_pids->pids,
715 filtered_pids->pid_max) {
716 set_bit(pid, pid_list->pids);
725 ret = trace_get_user(&parser, ubuf, cnt, &pos);
726 if (ret < 0 || !trace_parser_loaded(&parser))
734 if (kstrtoul(parser.buffer, 0, &val))
736 if (val >= pid_list->pid_max)
741 set_bit(pid, pid_list->pids);
744 trace_parser_clear(&parser);
747 trace_parser_put(&parser);
750 trace_free_pid_list(pid_list);
755 /* Cleared the list of pids */
756 trace_free_pid_list(pid_list);
761 *new_pid_list = pid_list;
766 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
770 /* Early boot up does not have a buffer yet */
772 return trace_clock_local();
774 ts = ring_buffer_time_stamp(buf->buffer);
775 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
780 u64 ftrace_now(int cpu)
782 return buffer_ftrace_now(&global_trace.array_buffer, cpu);
786 * tracing_is_enabled - Show if global_trace has been enabled
788 * Shows if the global trace has been enabled or not. It uses the
789 * mirror flag "buffer_disabled" to be used in fast paths such as for
790 * the irqsoff tracer. But it may be inaccurate due to races. If you
791 * need to know the accurate state, use tracing_is_on() which is a little
792 * slower, but accurate.
794 int tracing_is_enabled(void)
797 * For quick access (irqsoff uses this in fast path), just
798 * return the mirror variable of the state of the ring buffer.
799 * It's a little racy, but we don't really care.
802 return !global_trace.buffer_disabled;
806 * trace_buf_size is the size in bytes that is allocated
807 * for a buffer. Note, the number of bytes is always rounded
810 * This number is purposely set to a low number of 16384.
811 * If the dump on oops happens, it will be much appreciated
812 * to not have to wait for all that output. Anyway this can be
813 * boot time and run time configurable.
815 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
817 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
819 /* trace_types holds a link list of available tracers. */
820 static struct tracer *trace_types __read_mostly;
823 * trace_types_lock is used to protect the trace_types list.
825 DEFINE_MUTEX(trace_types_lock);
828 * serialize the access of the ring buffer
830 * ring buffer serializes readers, but it is low level protection.
831 * The validity of the events (which returns by ring_buffer_peek() ..etc)
832 * are not protected by ring buffer.
834 * The content of events may become garbage if we allow other process consumes
835 * these events concurrently:
836 * A) the page of the consumed events may become a normal page
837 * (not reader page) in ring buffer, and this page will be rewritten
838 * by events producer.
839 * B) The page of the consumed events may become a page for splice_read,
840 * and this page will be returned to system.
842 * These primitives allow multi process access to different cpu ring buffer
845 * These primitives don't distinguish read-only and read-consume access.
846 * Multi read-only access are also serialized.
850 static DECLARE_RWSEM(all_cpu_access_lock);
851 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
853 static inline void trace_access_lock(int cpu)
855 if (cpu == RING_BUFFER_ALL_CPUS) {
856 /* gain it for accessing the whole ring buffer. */
857 down_write(&all_cpu_access_lock);
859 /* gain it for accessing a cpu ring buffer. */
861 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
862 down_read(&all_cpu_access_lock);
864 /* Secondly block other access to this @cpu ring buffer. */
865 mutex_lock(&per_cpu(cpu_access_lock, cpu));
869 static inline void trace_access_unlock(int cpu)
871 if (cpu == RING_BUFFER_ALL_CPUS) {
872 up_write(&all_cpu_access_lock);
874 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
875 up_read(&all_cpu_access_lock);
879 static inline void trace_access_lock_init(void)
883 for_each_possible_cpu(cpu)
884 mutex_init(&per_cpu(cpu_access_lock, cpu));
889 static DEFINE_MUTEX(access_lock);
891 static inline void trace_access_lock(int cpu)
894 mutex_lock(&access_lock);
897 static inline void trace_access_unlock(int cpu)
900 mutex_unlock(&access_lock);
903 static inline void trace_access_lock_init(void)
909 #ifdef CONFIG_STACKTRACE
910 static void __ftrace_trace_stack(struct trace_buffer *buffer,
911 unsigned int trace_ctx,
912 int skip, struct pt_regs *regs);
913 static inline void ftrace_trace_stack(struct trace_array *tr,
914 struct trace_buffer *buffer,
915 unsigned int trace_ctx,
916 int skip, struct pt_regs *regs);
919 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
920 unsigned int trace_ctx,
921 int skip, struct pt_regs *regs)
924 static inline void ftrace_trace_stack(struct trace_array *tr,
925 struct trace_buffer *buffer,
926 unsigned long trace_ctx,
927 int skip, struct pt_regs *regs)
933 static __always_inline void
934 trace_event_setup(struct ring_buffer_event *event,
935 int type, unsigned int trace_ctx)
937 struct trace_entry *ent = ring_buffer_event_data(event);
939 tracing_generic_entry_update(ent, type, trace_ctx);
942 static __always_inline struct ring_buffer_event *
943 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
946 unsigned int trace_ctx)
948 struct ring_buffer_event *event;
950 event = ring_buffer_lock_reserve(buffer, len);
952 trace_event_setup(event, type, trace_ctx);
957 void tracer_tracing_on(struct trace_array *tr)
959 if (tr->array_buffer.buffer)
960 ring_buffer_record_on(tr->array_buffer.buffer);
962 * This flag is looked at when buffers haven't been allocated
963 * yet, or by some tracers (like irqsoff), that just want to
964 * know if the ring buffer has been disabled, but it can handle
965 * races of where it gets disabled but we still do a record.
966 * As the check is in the fast path of the tracers, it is more
967 * important to be fast than accurate.
969 tr->buffer_disabled = 0;
970 /* Make the flag seen by readers */
975 * tracing_on - enable tracing buffers
977 * This function enables tracing buffers that may have been
978 * disabled with tracing_off.
980 void tracing_on(void)
982 tracer_tracing_on(&global_trace);
984 EXPORT_SYMBOL_GPL(tracing_on);
987 static __always_inline void
988 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
990 __this_cpu_write(trace_taskinfo_save, true);
992 /* If this is the temp buffer, we need to commit fully */
993 if (this_cpu_read(trace_buffered_event) == event) {
994 /* Length is in event->array[0] */
995 ring_buffer_write(buffer, event->array[0], &event->array[1]);
996 /* Release the temp buffer */
997 this_cpu_dec(trace_buffered_event_cnt);
999 ring_buffer_unlock_commit(buffer, event);
1003 * __trace_puts - write a constant string into the trace buffer.
1004 * @ip: The address of the caller
1005 * @str: The constant string to write
1006 * @size: The size of the string.
1008 int __trace_puts(unsigned long ip, const char *str, int size)
1010 struct ring_buffer_event *event;
1011 struct trace_buffer *buffer;
1012 struct print_entry *entry;
1013 unsigned int trace_ctx;
1016 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1019 if (unlikely(tracing_selftest_running || tracing_disabled))
1022 alloc = sizeof(*entry) + size + 2; /* possible \n added */
1024 trace_ctx = tracing_gen_ctx();
1025 buffer = global_trace.array_buffer.buffer;
1026 ring_buffer_nest_start(buffer);
1027 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1034 entry = ring_buffer_event_data(event);
1037 memcpy(&entry->buf, str, size);
1039 /* Add a newline if necessary */
1040 if (entry->buf[size - 1] != '\n') {
1041 entry->buf[size] = '\n';
1042 entry->buf[size + 1] = '\0';
1044 entry->buf[size] = '\0';
1046 __buffer_unlock_commit(buffer, event);
1047 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1049 ring_buffer_nest_end(buffer);
1052 EXPORT_SYMBOL_GPL(__trace_puts);
1055 * __trace_bputs - write the pointer to a constant string into trace buffer
1056 * @ip: The address of the caller
1057 * @str: The constant string to write to the buffer to
1059 int __trace_bputs(unsigned long ip, const char *str)
1061 struct ring_buffer_event *event;
1062 struct trace_buffer *buffer;
1063 struct bputs_entry *entry;
1064 unsigned int trace_ctx;
1065 int size = sizeof(struct bputs_entry);
1068 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1071 if (unlikely(tracing_selftest_running || tracing_disabled))
1074 trace_ctx = tracing_gen_ctx();
1075 buffer = global_trace.array_buffer.buffer;
1077 ring_buffer_nest_start(buffer);
1078 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1083 entry = ring_buffer_event_data(event);
1087 __buffer_unlock_commit(buffer, event);
1088 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1092 ring_buffer_nest_end(buffer);
1095 EXPORT_SYMBOL_GPL(__trace_bputs);
1097 #ifdef CONFIG_TRACER_SNAPSHOT
1098 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1101 struct tracer *tracer = tr->current_trace;
1102 unsigned long flags;
1105 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1106 internal_trace_puts("*** snapshot is being ignored ***\n");
1110 if (!tr->allocated_snapshot) {
1111 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
1112 internal_trace_puts("*** stopping trace here! ***\n");
1117 /* Note, snapshot can not be used when the tracer uses it */
1118 if (tracer->use_max_tr) {
1119 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
1120 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
1124 local_irq_save(flags);
1125 update_max_tr(tr, current, smp_processor_id(), cond_data);
1126 local_irq_restore(flags);
1129 void tracing_snapshot_instance(struct trace_array *tr)
1131 tracing_snapshot_instance_cond(tr, NULL);
1135 * tracing_snapshot - take a snapshot of the current buffer.
1137 * This causes a swap between the snapshot buffer and the current live
1138 * tracing buffer. You can use this to take snapshots of the live
1139 * trace when some condition is triggered, but continue to trace.
1141 * Note, make sure to allocate the snapshot with either
1142 * a tracing_snapshot_alloc(), or by doing it manually
1143 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
1145 * If the snapshot buffer is not allocated, it will stop tracing.
1146 * Basically making a permanent snapshot.
1148 void tracing_snapshot(void)
1150 struct trace_array *tr = &global_trace;
1152 tracing_snapshot_instance(tr);
1154 EXPORT_SYMBOL_GPL(tracing_snapshot);
1157 * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1158 * @tr: The tracing instance to snapshot
1159 * @cond_data: The data to be tested conditionally, and possibly saved
1161 * This is the same as tracing_snapshot() except that the snapshot is
1162 * conditional - the snapshot will only happen if the
1163 * cond_snapshot.update() implementation receiving the cond_data
1164 * returns true, which means that the trace array's cond_snapshot
1165 * update() operation used the cond_data to determine whether the
1166 * snapshot should be taken, and if it was, presumably saved it along
1167 * with the snapshot.
1169 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1171 tracing_snapshot_instance_cond(tr, cond_data);
1173 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1176 * tracing_snapshot_cond_data - get the user data associated with a snapshot
1177 * @tr: The tracing instance
1179 * When the user enables a conditional snapshot using
1180 * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1181 * with the snapshot. This accessor is used to retrieve it.
1183 * Should not be called from cond_snapshot.update(), since it takes
1184 * the tr->max_lock lock, which the code calling
1185 * cond_snapshot.update() has already done.
1187 * Returns the cond_data associated with the trace array's snapshot.
1189 void *tracing_cond_snapshot_data(struct trace_array *tr)
1191 void *cond_data = NULL;
1193 arch_spin_lock(&tr->max_lock);
1195 if (tr->cond_snapshot)
1196 cond_data = tr->cond_snapshot->cond_data;
1198 arch_spin_unlock(&tr->max_lock);
1202 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1204 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1205 struct array_buffer *size_buf, int cpu_id);
1206 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1208 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1212 if (!tr->allocated_snapshot) {
1214 /* allocate spare buffer */
1215 ret = resize_buffer_duplicate_size(&tr->max_buffer,
1216 &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1220 tr->allocated_snapshot = true;
1226 static void free_snapshot(struct trace_array *tr)
1229 * We don't free the ring buffer. instead, resize it because
1230 * The max_tr ring buffer has some state (e.g. ring->clock) and
1231 * we want preserve it.
1233 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1234 set_buffer_entries(&tr->max_buffer, 1);
1235 tracing_reset_online_cpus(&tr->max_buffer);
1236 tr->allocated_snapshot = false;
1240 * tracing_alloc_snapshot - allocate snapshot buffer.
1242 * This only allocates the snapshot buffer if it isn't already
1243 * allocated - it doesn't also take a snapshot.
1245 * This is meant to be used in cases where the snapshot buffer needs
1246 * to be set up for events that can't sleep but need to be able to
1247 * trigger a snapshot.
1249 int tracing_alloc_snapshot(void)
1251 struct trace_array *tr = &global_trace;
1254 ret = tracing_alloc_snapshot_instance(tr);
1259 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1262 * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1264 * This is similar to tracing_snapshot(), but it will allocate the
1265 * snapshot buffer if it isn't already allocated. Use this only
1266 * where it is safe to sleep, as the allocation may sleep.
1268 * This causes a swap between the snapshot buffer and the current live
1269 * tracing buffer. You can use this to take snapshots of the live
1270 * trace when some condition is triggered, but continue to trace.
1272 void tracing_snapshot_alloc(void)
1276 ret = tracing_alloc_snapshot();
1282 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1285 * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1286 * @tr: The tracing instance
1287 * @cond_data: User data to associate with the snapshot
1288 * @update: Implementation of the cond_snapshot update function
1290 * Check whether the conditional snapshot for the given instance has
1291 * already been enabled, or if the current tracer is already using a
1292 * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1293 * save the cond_data and update function inside.
1295 * Returns 0 if successful, error otherwise.
1297 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1298 cond_update_fn_t update)
1300 struct cond_snapshot *cond_snapshot;
1303 cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1307 cond_snapshot->cond_data = cond_data;
1308 cond_snapshot->update = update;
1310 mutex_lock(&trace_types_lock);
1312 ret = tracing_alloc_snapshot_instance(tr);
1316 if (tr->current_trace->use_max_tr) {
1322 * The cond_snapshot can only change to NULL without the
1323 * trace_types_lock. We don't care if we race with it going
1324 * to NULL, but we want to make sure that it's not set to
1325 * something other than NULL when we get here, which we can
1326 * do safely with only holding the trace_types_lock and not
1327 * having to take the max_lock.
1329 if (tr->cond_snapshot) {
1334 arch_spin_lock(&tr->max_lock);
1335 tr->cond_snapshot = cond_snapshot;
1336 arch_spin_unlock(&tr->max_lock);
1338 mutex_unlock(&trace_types_lock);
1343 mutex_unlock(&trace_types_lock);
1344 kfree(cond_snapshot);
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1350 * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1351 * @tr: The tracing instance
1353 * Check whether the conditional snapshot for the given instance is
1354 * enabled; if so, free the cond_snapshot associated with it,
1355 * otherwise return -EINVAL.
1357 * Returns 0 if successful, error otherwise.
1359 int tracing_snapshot_cond_disable(struct trace_array *tr)
1363 arch_spin_lock(&tr->max_lock);
1365 if (!tr->cond_snapshot)
1368 kfree(tr->cond_snapshot);
1369 tr->cond_snapshot = NULL;
1372 arch_spin_unlock(&tr->max_lock);
1376 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1378 void tracing_snapshot(void)
1380 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1382 EXPORT_SYMBOL_GPL(tracing_snapshot);
1383 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1385 WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1387 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1388 int tracing_alloc_snapshot(void)
1390 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1393 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1394 void tracing_snapshot_alloc(void)
1399 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1400 void *tracing_cond_snapshot_data(struct trace_array *tr)
1404 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1405 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1409 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1410 int tracing_snapshot_cond_disable(struct trace_array *tr)
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1415 #endif /* CONFIG_TRACER_SNAPSHOT */
1417 void tracer_tracing_off(struct trace_array *tr)
1419 if (tr->array_buffer.buffer)
1420 ring_buffer_record_off(tr->array_buffer.buffer);
1422 * This flag is looked at when buffers haven't been allocated
1423 * yet, or by some tracers (like irqsoff), that just want to
1424 * know if the ring buffer has been disabled, but it can handle
1425 * races of where it gets disabled but we still do a record.
1426 * As the check is in the fast path of the tracers, it is more
1427 * important to be fast than accurate.
1429 tr->buffer_disabled = 1;
1430 /* Make the flag seen by readers */
1435 * tracing_off - turn off tracing buffers
1437 * This function stops the tracing buffers from recording data.
1438 * It does not disable any overhead the tracers themselves may
1439 * be causing. This function simply causes all recording to
1440 * the ring buffers to fail.
1442 void tracing_off(void)
1444 tracer_tracing_off(&global_trace);
1446 EXPORT_SYMBOL_GPL(tracing_off);
1448 void disable_trace_on_warning(void)
1450 if (__disable_trace_on_warning) {
1451 trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1452 "Disabling tracing due to warning\n");
1458 * tracer_tracing_is_on - show real state of ring buffer enabled
1459 * @tr : the trace array to know if ring buffer is enabled
1461 * Shows real state of the ring buffer if it is enabled or not.
1463 bool tracer_tracing_is_on(struct trace_array *tr)
1465 if (tr->array_buffer.buffer)
1466 return ring_buffer_record_is_on(tr->array_buffer.buffer);
1467 return !tr->buffer_disabled;
1471 * tracing_is_on - show state of ring buffers enabled
1473 int tracing_is_on(void)
1475 return tracer_tracing_is_on(&global_trace);
1477 EXPORT_SYMBOL_GPL(tracing_is_on);
1479 static int __init set_buf_size(char *str)
1481 unsigned long buf_size;
1485 buf_size = memparse(str, &str);
1486 /* nr_entries can not be zero */
1489 trace_buf_size = buf_size;
1492 __setup("trace_buf_size=", set_buf_size);
1494 static int __init set_tracing_thresh(char *str)
1496 unsigned long threshold;
1501 ret = kstrtoul(str, 0, &threshold);
1504 tracing_thresh = threshold * 1000;
1507 __setup("tracing_thresh=", set_tracing_thresh);
1509 unsigned long nsecs_to_usecs(unsigned long nsecs)
1511 return nsecs / 1000;
1515 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1516 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1517 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1518 * of strings in the order that the evals (enum) were defined.
1523 /* These must match the bit positions in trace_iterator_flags */
1524 static const char *trace_options[] = {
1532 int in_ns; /* is this clock in nanoseconds? */
1533 } trace_clocks[] = {
1534 { trace_clock_local, "local", 1 },
1535 { trace_clock_global, "global", 1 },
1536 { trace_clock_counter, "counter", 0 },
1537 { trace_clock_jiffies, "uptime", 0 },
1538 { trace_clock, "perf", 1 },
1539 { ktime_get_mono_fast_ns, "mono", 1 },
1540 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1541 { ktime_get_boot_fast_ns, "boot", 1 },
1545 bool trace_clock_in_ns(struct trace_array *tr)
1547 if (trace_clocks[tr->clock_id].in_ns)
1554 * trace_parser_get_init - gets the buffer for trace parser
1556 int trace_parser_get_init(struct trace_parser *parser, int size)
1558 memset(parser, 0, sizeof(*parser));
1560 parser->buffer = kmalloc(size, GFP_KERNEL);
1561 if (!parser->buffer)
1564 parser->size = size;
1569 * trace_parser_put - frees the buffer for trace parser
1571 void trace_parser_put(struct trace_parser *parser)
1573 kfree(parser->buffer);
1574 parser->buffer = NULL;
1578 * trace_get_user - reads the user input string separated by space
1579 * (matched by isspace(ch))
1581 * For each string found the 'struct trace_parser' is updated,
1582 * and the function returns.
1584 * Returns number of bytes read.
1586 * See kernel/trace/trace.h for 'struct trace_parser' details.
1588 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1589 size_t cnt, loff_t *ppos)
1596 trace_parser_clear(parser);
1598 ret = get_user(ch, ubuf++);
1606 * The parser is not finished with the last write,
1607 * continue reading the user input without skipping spaces.
1609 if (!parser->cont) {
1610 /* skip white space */
1611 while (cnt && isspace(ch)) {
1612 ret = get_user(ch, ubuf++);
1621 /* only spaces were written */
1622 if (isspace(ch) || !ch) {
1629 /* read the non-space input */
1630 while (cnt && !isspace(ch) && ch) {
1631 if (parser->idx < parser->size - 1)
1632 parser->buffer[parser->idx++] = ch;
1637 ret = get_user(ch, ubuf++);
1644 /* We either got finished input or we have to wait for another call. */
1645 if (isspace(ch) || !ch) {
1646 parser->buffer[parser->idx] = 0;
1647 parser->cont = false;
1648 } else if (parser->idx < parser->size - 1) {
1649 parser->cont = true;
1650 parser->buffer[parser->idx++] = ch;
1651 /* Make sure the parsed string always terminates with '\0'. */
1652 parser->buffer[parser->idx] = 0;
1665 /* TODO add a seq_buf_to_buffer() */
1666 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1670 if (trace_seq_used(s) <= s->seq.readpos)
1673 len = trace_seq_used(s) - s->seq.readpos;
1676 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1678 s->seq.readpos += cnt;
1682 unsigned long __read_mostly tracing_thresh;
1683 static const struct file_operations tracing_max_lat_fops;
1685 #if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1686 defined(CONFIG_FSNOTIFY)
1688 static struct workqueue_struct *fsnotify_wq;
1690 static void latency_fsnotify_workfn(struct work_struct *work)
1692 struct trace_array *tr = container_of(work, struct trace_array,
1694 fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1697 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1699 struct trace_array *tr = container_of(iwork, struct trace_array,
1701 queue_work(fsnotify_wq, &tr->fsnotify_work);
1704 static void trace_create_maxlat_file(struct trace_array *tr,
1705 struct dentry *d_tracer)
1707 INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1708 init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1709 tr->d_max_latency = trace_create_file("tracing_max_latency", 0644,
1710 d_tracer, &tr->max_latency,
1711 &tracing_max_lat_fops);
1714 __init static int latency_fsnotify_init(void)
1716 fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1717 WQ_UNBOUND | WQ_HIGHPRI, 0);
1719 pr_err("Unable to allocate tr_max_lat_wq\n");
1725 late_initcall_sync(latency_fsnotify_init);
1727 void latency_fsnotify(struct trace_array *tr)
1732 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1733 * possible that we are called from __schedule() or do_idle(), which
1734 * could cause a deadlock.
1736 irq_work_queue(&tr->fsnotify_irqwork);
1740 * (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)) && \
1741 * defined(CONFIG_FSNOTIFY)
1745 #define trace_create_maxlat_file(tr, d_tracer) \
1746 trace_create_file("tracing_max_latency", 0644, d_tracer, \
1747 &tr->max_latency, &tracing_max_lat_fops)
1751 #ifdef CONFIG_TRACER_MAX_TRACE
1753 * Copy the new maximum trace into the separate maximum-trace
1754 * structure. (this way the maximum trace is permanently saved,
1755 * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1758 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1760 struct array_buffer *trace_buf = &tr->array_buffer;
1761 struct array_buffer *max_buf = &tr->max_buffer;
1762 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1763 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1766 max_buf->time_start = data->preempt_timestamp;
1768 max_data->saved_latency = tr->max_latency;
1769 max_data->critical_start = data->critical_start;
1770 max_data->critical_end = data->critical_end;
1772 strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1773 max_data->pid = tsk->pid;
1775 * If tsk == current, then use current_uid(), as that does not use
1776 * RCU. The irq tracer can be called out of RCU scope.
1779 max_data->uid = current_uid();
1781 max_data->uid = task_uid(tsk);
1783 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1784 max_data->policy = tsk->policy;
1785 max_data->rt_priority = tsk->rt_priority;
1787 /* record this tasks comm */
1788 tracing_record_cmdline(tsk);
1789 latency_fsnotify(tr);
1793 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1795 * @tsk: the task with the latency
1796 * @cpu: The cpu that initiated the trace.
1797 * @cond_data: User data associated with a conditional snapshot
1799 * Flip the buffers between the @tr and the max_tr and record information
1800 * about which task was the cause of this latency.
1803 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1809 WARN_ON_ONCE(!irqs_disabled());
1811 if (!tr->allocated_snapshot) {
1812 /* Only the nop tracer should hit this when disabling */
1813 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1817 arch_spin_lock(&tr->max_lock);
1819 /* Inherit the recordable setting from array_buffer */
1820 if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1821 ring_buffer_record_on(tr->max_buffer.buffer);
1823 ring_buffer_record_off(tr->max_buffer.buffer);
1825 #ifdef CONFIG_TRACER_SNAPSHOT
1826 if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
1829 swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1831 __update_max_tr(tr, tsk, cpu);
1834 arch_spin_unlock(&tr->max_lock);
1838 * update_max_tr_single - only copy one trace over, and reset the rest
1840 * @tsk: task with the latency
1841 * @cpu: the cpu of the buffer to copy.
1843 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1846 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1853 WARN_ON_ONCE(!irqs_disabled());
1854 if (!tr->allocated_snapshot) {
1855 /* Only the nop tracer should hit this when disabling */
1856 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1860 arch_spin_lock(&tr->max_lock);
1862 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1864 if (ret == -EBUSY) {
1866 * We failed to swap the buffer due to a commit taking
1867 * place on this CPU. We fail to record, but we reset
1868 * the max trace buffer (no one writes directly to it)
1869 * and flag that it failed.
1871 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1872 "Failed to swap buffers due to commit in progress\n");
1875 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1877 __update_max_tr(tr, tsk, cpu);
1878 arch_spin_unlock(&tr->max_lock);
1880 #endif /* CONFIG_TRACER_MAX_TRACE */
1882 static int wait_on_pipe(struct trace_iterator *iter, int full)
1884 /* Iterators are static, they should be filled or empty */
1885 if (trace_buffer_iter(iter, iter->cpu_file))
1888 return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
1892 #ifdef CONFIG_FTRACE_STARTUP_TEST
1893 static bool selftests_can_run;
1895 struct trace_selftests {
1896 struct list_head list;
1897 struct tracer *type;
1900 static LIST_HEAD(postponed_selftests);
1902 static int save_selftest(struct tracer *type)
1904 struct trace_selftests *selftest;
1906 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1910 selftest->type = type;
1911 list_add(&selftest->list, &postponed_selftests);
1915 static int run_tracer_selftest(struct tracer *type)
1917 struct trace_array *tr = &global_trace;
1918 struct tracer *saved_tracer = tr->current_trace;
1921 if (!type->selftest || tracing_selftest_disabled)
1925 * If a tracer registers early in boot up (before scheduling is
1926 * initialized and such), then do not run its selftests yet.
1927 * Instead, run it a little later in the boot process.
1929 if (!selftests_can_run)
1930 return save_selftest(type);
1932 if (!tracing_is_on()) {
1933 pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
1939 * Run a selftest on this tracer.
1940 * Here we reset the trace buffer, and set the current
1941 * tracer to be this tracer. The tracer can then run some
1942 * internal tracing to verify that everything is in order.
1943 * If we fail, we do not register this tracer.
1945 tracing_reset_online_cpus(&tr->array_buffer);
1947 tr->current_trace = type;
1949 #ifdef CONFIG_TRACER_MAX_TRACE
1950 if (type->use_max_tr) {
1951 /* If we expanded the buffers, make sure the max is expanded too */
1952 if (ring_buffer_expanded)
1953 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1954 RING_BUFFER_ALL_CPUS);
1955 tr->allocated_snapshot = true;
1959 /* the test is responsible for initializing and enabling */
1960 pr_info("Testing tracer %s: ", type->name);
1961 ret = type->selftest(type, tr);
1962 /* the test is responsible for resetting too */
1963 tr->current_trace = saved_tracer;
1965 printk(KERN_CONT "FAILED!\n");
1966 /* Add the warning after printing 'FAILED' */
1970 /* Only reset on passing, to avoid touching corrupted buffers */
1971 tracing_reset_online_cpus(&tr->array_buffer);
1973 #ifdef CONFIG_TRACER_MAX_TRACE
1974 if (type->use_max_tr) {
1975 tr->allocated_snapshot = false;
1977 /* Shrink the max buffer again */
1978 if (ring_buffer_expanded)
1979 ring_buffer_resize(tr->max_buffer.buffer, 1,
1980 RING_BUFFER_ALL_CPUS);
1984 printk(KERN_CONT "PASSED\n");
1988 static __init int init_trace_selftests(void)
1990 struct trace_selftests *p, *n;
1991 struct tracer *t, **last;
1994 selftests_can_run = true;
1996 mutex_lock(&trace_types_lock);
1998 if (list_empty(&postponed_selftests))
2001 pr_info("Running postponed tracer tests:\n");
2003 tracing_selftest_running = true;
2004 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2005 /* This loop can take minutes when sanitizers are enabled, so
2006 * lets make sure we allow RCU processing.
2009 ret = run_tracer_selftest(p->type);
2010 /* If the test fails, then warn and remove from available_tracers */
2012 WARN(1, "tracer: %s failed selftest, disabling\n",
2014 last = &trace_types;
2015 for (t = trace_types; t; t = t->next) {
2026 tracing_selftest_running = false;
2029 mutex_unlock(&trace_types_lock);
2033 core_initcall(init_trace_selftests);
2035 static inline int run_tracer_selftest(struct tracer *type)
2039 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2041 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2043 static void __init apply_trace_boot_options(void);
2046 * register_tracer - register a tracer with the ftrace system.
2047 * @type: the plugin for the tracer
2049 * Register a new plugin tracer.
2051 int __init register_tracer(struct tracer *type)
2057 pr_info("Tracer must have a name\n");
2061 if (strlen(type->name) >= MAX_TRACER_SIZE) {
2062 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2066 if (security_locked_down(LOCKDOWN_TRACEFS)) {
2067 pr_warn("Can not register tracer %s due to lockdown\n",
2072 mutex_lock(&trace_types_lock);
2074 tracing_selftest_running = true;
2076 for (t = trace_types; t; t = t->next) {
2077 if (strcmp(type->name, t->name) == 0) {
2079 pr_info("Tracer %s already registered\n",
2086 if (!type->set_flag)
2087 type->set_flag = &dummy_set_flag;
2089 /*allocate a dummy tracer_flags*/
2090 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2095 type->flags->val = 0;
2096 type->flags->opts = dummy_tracer_opt;
2098 if (!type->flags->opts)
2099 type->flags->opts = dummy_tracer_opt;
2101 /* store the tracer for __set_tracer_option */
2102 type->flags->trace = type;
2104 ret = run_tracer_selftest(type);
2108 type->next = trace_types;
2110 add_tracer_options(&global_trace, type);
2113 tracing_selftest_running = false;
2114 mutex_unlock(&trace_types_lock);
2116 if (ret || !default_bootup_tracer)
2119 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2122 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2123 /* Do we want this tracer to start on bootup? */
2124 tracing_set_tracer(&global_trace, type->name);
2125 default_bootup_tracer = NULL;
2127 apply_trace_boot_options();
2129 /* disable other selftests, since this will break it. */
2130 disable_tracing_selftest("running a tracer");
2136 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2138 struct trace_buffer *buffer = buf->buffer;
2143 ring_buffer_record_disable(buffer);
2145 /* Make sure all commits have finished */
2147 ring_buffer_reset_cpu(buffer, cpu);
2149 ring_buffer_record_enable(buffer);
2152 void tracing_reset_online_cpus(struct array_buffer *buf)
2154 struct trace_buffer *buffer = buf->buffer;
2159 ring_buffer_record_disable(buffer);
2161 /* Make sure all commits have finished */
2164 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2166 ring_buffer_reset_online_cpus(buffer);
2168 ring_buffer_record_enable(buffer);
2171 /* Must have trace_types_lock held */
2172 void tracing_reset_all_online_cpus(void)
2174 struct trace_array *tr;
2176 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2177 if (!tr->clear_trace)
2179 tr->clear_trace = false;
2180 tracing_reset_online_cpus(&tr->array_buffer);
2181 #ifdef CONFIG_TRACER_MAX_TRACE
2182 tracing_reset_online_cpus(&tr->max_buffer);
2187 static int *tgid_map;
2189 #define SAVED_CMDLINES_DEFAULT 128
2190 #define NO_CMDLINE_MAP UINT_MAX
2191 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2192 struct saved_cmdlines_buffer {
2193 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2194 unsigned *map_cmdline_to_pid;
2195 unsigned cmdline_num;
2197 char *saved_cmdlines;
2199 static struct saved_cmdlines_buffer *savedcmd;
2201 static inline char *get_saved_cmdlines(int idx)
2203 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2206 static inline void set_cmdline(int idx, const char *cmdline)
2208 strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2211 static int allocate_cmdlines_buffer(unsigned int val,
2212 struct saved_cmdlines_buffer *s)
2214 s->map_cmdline_to_pid = kmalloc_array(val,
2215 sizeof(*s->map_cmdline_to_pid),
2217 if (!s->map_cmdline_to_pid)
2220 s->saved_cmdlines = kmalloc_array(TASK_COMM_LEN, val, GFP_KERNEL);
2221 if (!s->saved_cmdlines) {
2222 kfree(s->map_cmdline_to_pid);
2227 s->cmdline_num = val;
2228 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2229 sizeof(s->map_pid_to_cmdline));
2230 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2231 val * sizeof(*s->map_cmdline_to_pid));
2236 static int trace_create_savedcmd(void)
2240 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
2244 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
2254 int is_tracing_stopped(void)
2256 return global_trace.stop_count;
2260 * tracing_start - quick start of the tracer
2262 * If tracing is enabled but was stopped by tracing_stop,
2263 * this will start the tracer back up.
2265 void tracing_start(void)
2267 struct trace_buffer *buffer;
2268 unsigned long flags;
2270 if (tracing_disabled)
2273 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2274 if (--global_trace.stop_count) {
2275 if (global_trace.stop_count < 0) {
2276 /* Someone screwed up their debugging */
2278 global_trace.stop_count = 0;
2283 /* Prevent the buffers from switching */
2284 arch_spin_lock(&global_trace.max_lock);
2286 buffer = global_trace.array_buffer.buffer;
2288 ring_buffer_record_enable(buffer);
2290 #ifdef CONFIG_TRACER_MAX_TRACE
2291 buffer = global_trace.max_buffer.buffer;
2293 ring_buffer_record_enable(buffer);
2296 arch_spin_unlock(&global_trace.max_lock);
2299 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2302 static void tracing_start_tr(struct trace_array *tr)
2304 struct trace_buffer *buffer;
2305 unsigned long flags;
2307 if (tracing_disabled)
2310 /* If global, we need to also start the max tracer */
2311 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2312 return tracing_start();
2314 raw_spin_lock_irqsave(&tr->start_lock, flags);
2316 if (--tr->stop_count) {
2317 if (tr->stop_count < 0) {
2318 /* Someone screwed up their debugging */
2325 buffer = tr->array_buffer.buffer;
2327 ring_buffer_record_enable(buffer);
2330 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2334 * tracing_stop - quick stop of the tracer
2336 * Light weight way to stop tracing. Use in conjunction with
2339 void tracing_stop(void)
2341 struct trace_buffer *buffer;
2342 unsigned long flags;
2344 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
2345 if (global_trace.stop_count++)
2348 /* Prevent the buffers from switching */
2349 arch_spin_lock(&global_trace.max_lock);
2351 buffer = global_trace.array_buffer.buffer;
2353 ring_buffer_record_disable(buffer);
2355 #ifdef CONFIG_TRACER_MAX_TRACE
2356 buffer = global_trace.max_buffer.buffer;
2358 ring_buffer_record_disable(buffer);
2361 arch_spin_unlock(&global_trace.max_lock);
2364 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
2367 static void tracing_stop_tr(struct trace_array *tr)
2369 struct trace_buffer *buffer;
2370 unsigned long flags;
2372 /* If global, we need to also stop the max tracer */
2373 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
2374 return tracing_stop();
2376 raw_spin_lock_irqsave(&tr->start_lock, flags);
2377 if (tr->stop_count++)
2380 buffer = tr->array_buffer.buffer;
2382 ring_buffer_record_disable(buffer);
2385 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2388 static int trace_save_cmdline(struct task_struct *tsk)
2392 /* treat recording of idle task as a success */
2396 tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2399 * It's not the end of the world if we don't get
2400 * the lock, but we also don't want to spin
2401 * nor do we want to disable interrupts,
2402 * so if we miss here, then better luck next time.
2404 if (!arch_spin_trylock(&trace_cmdline_lock))
2407 idx = savedcmd->map_pid_to_cmdline[tpid];
2408 if (idx == NO_CMDLINE_MAP) {
2409 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2411 savedcmd->map_pid_to_cmdline[tpid] = idx;
2412 savedcmd->cmdline_idx = idx;
2415 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2416 set_cmdline(idx, tsk->comm);
2418 arch_spin_unlock(&trace_cmdline_lock);
2423 static void __trace_find_cmdline(int pid, char comm[])
2429 strcpy(comm, "<idle>");
2433 if (WARN_ON_ONCE(pid < 0)) {
2434 strcpy(comm, "<XXX>");
2438 tpid = pid & (PID_MAX_DEFAULT - 1);
2439 map = savedcmd->map_pid_to_cmdline[tpid];
2440 if (map != NO_CMDLINE_MAP) {
2441 tpid = savedcmd->map_cmdline_to_pid[map];
2443 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2447 strcpy(comm, "<...>");
2450 void trace_find_cmdline(int pid, char comm[])
2453 arch_spin_lock(&trace_cmdline_lock);
2455 __trace_find_cmdline(pid, comm);
2457 arch_spin_unlock(&trace_cmdline_lock);
2461 int trace_find_tgid(int pid)
2463 if (unlikely(!tgid_map || !pid || pid > PID_MAX_DEFAULT))
2466 return tgid_map[pid];
2469 static int trace_save_tgid(struct task_struct *tsk)
2471 /* treat recording of idle task as a success */
2475 if (unlikely(!tgid_map || tsk->pid > PID_MAX_DEFAULT))
2478 tgid_map[tsk->pid] = tsk->tgid;
2482 static bool tracing_record_taskinfo_skip(int flags)
2484 if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2486 if (!__this_cpu_read(trace_taskinfo_save))
2492 * tracing_record_taskinfo - record the task info of a task
2494 * @task: task to record
2495 * @flags: TRACE_RECORD_CMDLINE for recording comm
2496 * TRACE_RECORD_TGID for recording tgid
2498 void tracing_record_taskinfo(struct task_struct *task, int flags)
2502 if (tracing_record_taskinfo_skip(flags))
2506 * Record as much task information as possible. If some fail, continue
2507 * to try to record the others.
2509 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2510 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2512 /* If recording any information failed, retry again soon. */
2516 __this_cpu_write(trace_taskinfo_save, false);
2520 * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2522 * @prev: previous task during sched_switch
2523 * @next: next task during sched_switch
2524 * @flags: TRACE_RECORD_CMDLINE for recording comm
2525 * TRACE_RECORD_TGID for recording tgid
2527 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2528 struct task_struct *next, int flags)
2532 if (tracing_record_taskinfo_skip(flags))
2536 * Record as much task information as possible. If some fail, continue
2537 * to try to record the others.
2539 done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2540 done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2541 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2542 done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2544 /* If recording any information failed, retry again soon. */
2548 __this_cpu_write(trace_taskinfo_save, false);
2551 /* Helpers to record a specific task information */
2552 void tracing_record_cmdline(struct task_struct *task)
2554 tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2557 void tracing_record_tgid(struct task_struct *task)
2559 tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2563 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2564 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2565 * simplifies those functions and keeps them in sync.
2567 enum print_line_t trace_handle_return(struct trace_seq *s)
2569 return trace_seq_has_overflowed(s) ?
2570 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2572 EXPORT_SYMBOL_GPL(trace_handle_return);
2574 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2576 unsigned int trace_flags = irqs_status;
2579 pc = preempt_count();
2582 trace_flags |= TRACE_FLAG_NMI;
2583 if (pc & HARDIRQ_MASK)
2584 trace_flags |= TRACE_FLAG_HARDIRQ;
2585 if (in_serving_softirq())
2586 trace_flags |= TRACE_FLAG_SOFTIRQ;
2588 if (tif_need_resched())
2589 trace_flags |= TRACE_FLAG_NEED_RESCHED;
2590 if (test_preempt_need_resched())
2591 trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2592 return (trace_flags << 16) | (pc & 0xff);
2595 struct ring_buffer_event *
2596 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2599 unsigned int trace_ctx)
2601 return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2604 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2605 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2606 static int trace_buffered_event_ref;
2609 * trace_buffered_event_enable - enable buffering events
2611 * When events are being filtered, it is quicker to use a temporary
2612 * buffer to write the event data into if there's a likely chance
2613 * that it will not be committed. The discard of the ring buffer
2614 * is not as fast as committing, and is much slower than copying
2617 * When an event is to be filtered, allocate per cpu buffers to
2618 * write the event data into, and if the event is filtered and discarded
2619 * it is simply dropped, otherwise, the entire data is to be committed
2622 void trace_buffered_event_enable(void)
2624 struct ring_buffer_event *event;
2628 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2630 if (trace_buffered_event_ref++)
2633 for_each_tracing_cpu(cpu) {
2634 page = alloc_pages_node(cpu_to_node(cpu),
2635 GFP_KERNEL | __GFP_NORETRY, 0);
2639 event = page_address(page);
2640 memset(event, 0, sizeof(*event));
2642 per_cpu(trace_buffered_event, cpu) = event;
2645 if (cpu == smp_processor_id() &&
2646 __this_cpu_read(trace_buffered_event) !=
2647 per_cpu(trace_buffered_event, cpu))
2654 trace_buffered_event_disable();
2657 static void enable_trace_buffered_event(void *data)
2659 /* Probably not needed, but do it anyway */
2661 this_cpu_dec(trace_buffered_event_cnt);
2664 static void disable_trace_buffered_event(void *data)
2666 this_cpu_inc(trace_buffered_event_cnt);
2670 * trace_buffered_event_disable - disable buffering events
2672 * When a filter is removed, it is faster to not use the buffered
2673 * events, and to commit directly into the ring buffer. Free up
2674 * the temp buffers when there are no more users. This requires
2675 * special synchronization with current events.
2677 void trace_buffered_event_disable(void)
2681 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2683 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2686 if (--trace_buffered_event_ref)
2690 /* For each CPU, set the buffer as used. */
2691 smp_call_function_many(tracing_buffer_mask,
2692 disable_trace_buffered_event, NULL, 1);
2695 /* Wait for all current users to finish */
2698 for_each_tracing_cpu(cpu) {
2699 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2700 per_cpu(trace_buffered_event, cpu) = NULL;
2703 * Make sure trace_buffered_event is NULL before clearing
2704 * trace_buffered_event_cnt.
2709 /* Do the work on each cpu */
2710 smp_call_function_many(tracing_buffer_mask,
2711 enable_trace_buffered_event, NULL, 1);
2715 static struct trace_buffer *temp_buffer;
2717 struct ring_buffer_event *
2718 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2719 struct trace_event_file *trace_file,
2720 int type, unsigned long len,
2721 unsigned int trace_ctx)
2723 struct ring_buffer_event *entry;
2724 struct trace_array *tr = trace_file->tr;
2727 *current_rb = tr->array_buffer.buffer;
2729 if (!tr->no_filter_buffering_ref &&
2730 (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2731 (entry = this_cpu_read(trace_buffered_event))) {
2732 /* Try to use the per cpu buffer first */
2733 val = this_cpu_inc_return(trace_buffered_event_cnt);
2734 if ((len < (PAGE_SIZE - sizeof(*entry) - sizeof(entry->array[0]))) && val == 1) {
2735 trace_event_setup(entry, type, trace_ctx);
2736 entry->array[0] = len;
2739 this_cpu_dec(trace_buffered_event_cnt);
2742 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2745 * If tracing is off, but we have triggers enabled
2746 * we still need to look at the event data. Use the temp_buffer
2747 * to store the trace event for the trigger to use. It's recursive
2748 * safe and will not be recorded anywhere.
2750 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2751 *current_rb = temp_buffer;
2752 entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2757 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2759 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2760 static DEFINE_MUTEX(tracepoint_printk_mutex);
2762 static void output_printk(struct trace_event_buffer *fbuffer)
2764 struct trace_event_call *event_call;
2765 struct trace_event_file *file;
2766 struct trace_event *event;
2767 unsigned long flags;
2768 struct trace_iterator *iter = tracepoint_print_iter;
2770 /* We should never get here if iter is NULL */
2771 if (WARN_ON_ONCE(!iter))
2774 event_call = fbuffer->trace_file->event_call;
2775 if (!event_call || !event_call->event.funcs ||
2776 !event_call->event.funcs->trace)
2779 file = fbuffer->trace_file;
2780 if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2781 (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2782 !filter_match_preds(file->filter, fbuffer->entry)))
2785 event = &fbuffer->trace_file->event_call->event;
2787 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2788 trace_seq_init(&iter->seq);
2789 iter->ent = fbuffer->entry;
2790 event_call->event.funcs->trace(iter, 0, event);
2791 trace_seq_putc(&iter->seq, 0);
2792 printk("%s", iter->seq.buffer);
2794 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2797 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2798 void *buffer, size_t *lenp,
2801 int save_tracepoint_printk;
2804 mutex_lock(&tracepoint_printk_mutex);
2805 save_tracepoint_printk = tracepoint_printk;
2807 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2810 * This will force exiting early, as tracepoint_printk
2811 * is always zero when tracepoint_printk_iter is not allocated
2813 if (!tracepoint_print_iter)
2814 tracepoint_printk = 0;
2816 if (save_tracepoint_printk == tracepoint_printk)
2819 if (tracepoint_printk)
2820 static_key_enable(&tracepoint_printk_key.key);
2822 static_key_disable(&tracepoint_printk_key.key);
2825 mutex_unlock(&tracepoint_printk_mutex);
2830 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2832 if (static_key_false(&tracepoint_printk_key.key))
2833 output_printk(fbuffer);
2835 if (static_branch_unlikely(&trace_event_exports_enabled))
2836 ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2837 event_trigger_unlock_commit_regs(fbuffer->trace_file, fbuffer->buffer,
2838 fbuffer->event, fbuffer->entry,
2839 fbuffer->trace_ctx, fbuffer->regs);
2841 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2846 * trace_buffer_unlock_commit_regs()
2847 * trace_event_buffer_commit()
2848 * trace_event_raw_event_xxx()
2850 # define STACK_SKIP 3
2852 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2853 struct trace_buffer *buffer,
2854 struct ring_buffer_event *event,
2855 unsigned int trace_ctx,
2856 struct pt_regs *regs)
2858 __buffer_unlock_commit(buffer, event);
2861 * If regs is not set, then skip the necessary functions.
2862 * Note, we can still get here via blktrace, wakeup tracer
2863 * and mmiotrace, but that's ok if they lose a function or
2864 * two. They are not that meaningful.
2866 ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2867 ftrace_trace_userstack(tr, buffer, trace_ctx);
2871 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2874 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2875 struct ring_buffer_event *event)
2877 __buffer_unlock_commit(buffer, event);
2881 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2882 parent_ip, unsigned int trace_ctx)
2884 struct trace_event_call *call = &event_function;
2885 struct trace_buffer *buffer = tr->array_buffer.buffer;
2886 struct ring_buffer_event *event;
2887 struct ftrace_entry *entry;
2889 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2893 entry = ring_buffer_event_data(event);
2895 entry->parent_ip = parent_ip;
2897 if (!call_filter_check_discard(call, entry, buffer, event)) {
2898 if (static_branch_unlikely(&trace_function_exports_enabled))
2899 ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2900 __buffer_unlock_commit(buffer, event);
2904 #ifdef CONFIG_STACKTRACE
2906 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2907 #define FTRACE_KSTACK_NESTING 4
2909 #define FTRACE_KSTACK_ENTRIES (PAGE_SIZE / FTRACE_KSTACK_NESTING)
2911 struct ftrace_stack {
2912 unsigned long calls[FTRACE_KSTACK_ENTRIES];
2916 struct ftrace_stacks {
2917 struct ftrace_stack stacks[FTRACE_KSTACK_NESTING];
2920 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2921 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2923 static void __ftrace_trace_stack(struct trace_buffer *buffer,
2924 unsigned int trace_ctx,
2925 int skip, struct pt_regs *regs)
2927 struct trace_event_call *call = &event_kernel_stack;
2928 struct ring_buffer_event *event;
2929 unsigned int size, nr_entries;
2930 struct ftrace_stack *fstack;
2931 struct stack_entry *entry;
2935 * Add one, for this function and the call to save_stack_trace()
2936 * If regs is set, then these functions will not be in the way.
2938 #ifndef CONFIG_UNWINDER_ORC
2943 preempt_disable_notrace();
2945 stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2947 /* This should never happen. If it does, yell once and skip */
2948 if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2952 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2953 * interrupt will either see the value pre increment or post
2954 * increment. If the interrupt happens pre increment it will have
2955 * restored the counter when it returns. We just need a barrier to
2956 * keep gcc from moving things around.
2960 fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2961 size = ARRAY_SIZE(fstack->calls);
2964 nr_entries = stack_trace_save_regs(regs, fstack->calls,
2967 nr_entries = stack_trace_save(fstack->calls, size, skip);
2970 size = nr_entries * sizeof(unsigned long);
2971 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2972 (sizeof(*entry) - sizeof(entry->caller)) + size,
2976 entry = ring_buffer_event_data(event);
2978 memcpy(&entry->caller, fstack->calls, size);
2979 entry->size = nr_entries;
2981 if (!call_filter_check_discard(call, entry, buffer, event))
2982 __buffer_unlock_commit(buffer, event);
2985 /* Again, don't let gcc optimize things here */
2987 __this_cpu_dec(ftrace_stack_reserve);
2988 preempt_enable_notrace();
2992 static inline void ftrace_trace_stack(struct trace_array *tr,
2993 struct trace_buffer *buffer,
2994 unsigned int trace_ctx,
2995 int skip, struct pt_regs *regs)
2997 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3000 __ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3003 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3006 struct trace_buffer *buffer = tr->array_buffer.buffer;
3008 if (rcu_is_watching()) {
3009 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3014 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
3015 * but if the above rcu_is_watching() failed, then the NMI
3016 * triggered someplace critical, and rcu_irq_enter() should
3017 * not be called from NMI.
3019 if (unlikely(in_nmi()))
3022 rcu_irq_enter_irqson();
3023 __ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3024 rcu_irq_exit_irqson();
3028 * trace_dump_stack - record a stack back trace in the trace buffer
3029 * @skip: Number of functions to skip (helper handlers)
3031 void trace_dump_stack(int skip)
3033 if (tracing_disabled || tracing_selftest_running)
3036 #ifndef CONFIG_UNWINDER_ORC
3037 /* Skip 1 to skip this function. */
3040 __ftrace_trace_stack(global_trace.array_buffer.buffer,
3041 tracing_gen_ctx(), skip, NULL);
3043 EXPORT_SYMBOL_GPL(trace_dump_stack);
3045 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3046 static DEFINE_PER_CPU(int, user_stack_count);
3049 ftrace_trace_userstack(struct trace_array *tr,
3050 struct trace_buffer *buffer, unsigned int trace_ctx)
3052 struct trace_event_call *call = &event_user_stack;
3053 struct ring_buffer_event *event;
3054 struct userstack_entry *entry;
3056 if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3060 * NMIs can not handle page faults, even with fix ups.
3061 * The save user stack can (and often does) fault.
3063 if (unlikely(in_nmi()))
3067 * prevent recursion, since the user stack tracing may
3068 * trigger other kernel events.
3071 if (__this_cpu_read(user_stack_count))
3074 __this_cpu_inc(user_stack_count);
3076 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3077 sizeof(*entry), trace_ctx);
3079 goto out_drop_count;
3080 entry = ring_buffer_event_data(event);
3082 entry->tgid = current->tgid;
3083 memset(&entry->caller, 0, sizeof(entry->caller));
3085 stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3086 if (!call_filter_check_discard(call, entry, buffer, event))
3087 __buffer_unlock_commit(buffer, event);
3090 __this_cpu_dec(user_stack_count);
3094 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3095 static void ftrace_trace_userstack(struct trace_array *tr,
3096 struct trace_buffer *buffer,
3097 unsigned int trace_ctx)
3100 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3102 #endif /* CONFIG_STACKTRACE */
3105 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3106 unsigned long long delta)
3108 entry->bottom_delta_ts = delta & U32_MAX;
3109 entry->top_delta_ts = (delta >> 32);
3112 void trace_last_func_repeats(struct trace_array *tr,
3113 struct trace_func_repeats *last_info,
3114 unsigned int trace_ctx)
3116 struct trace_buffer *buffer = tr->array_buffer.buffer;
3117 struct func_repeats_entry *entry;
3118 struct ring_buffer_event *event;
3121 event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3122 sizeof(*entry), trace_ctx);
3126 delta = ring_buffer_event_time_stamp(buffer, event) -
3127 last_info->ts_last_call;
3129 entry = ring_buffer_event_data(event);
3130 entry->ip = last_info->ip;
3131 entry->parent_ip = last_info->parent_ip;
3132 entry->count = last_info->count;
3133 func_repeats_set_delta_ts(entry, delta);
3135 __buffer_unlock_commit(buffer, event);
3138 /* created for use with alloc_percpu */
3139 struct trace_buffer_struct {
3141 char buffer[4][TRACE_BUF_SIZE];
3144 static struct trace_buffer_struct *trace_percpu_buffer;
3147 * This allows for lockless recording. If we're nested too deeply, then
3148 * this returns NULL.
3150 static char *get_trace_buf(void)
3152 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3154 if (!buffer || buffer->nesting >= 4)
3159 /* Interrupts must see nesting incremented before we use the buffer */
3161 return &buffer->buffer[buffer->nesting - 1][0];
3164 static void put_trace_buf(void)
3166 /* Don't let the decrement of nesting leak before this */
3168 this_cpu_dec(trace_percpu_buffer->nesting);
3171 static int alloc_percpu_trace_buffer(void)
3173 struct trace_buffer_struct *buffers;
3175 if (trace_percpu_buffer)
3178 buffers = alloc_percpu(struct trace_buffer_struct);
3179 if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3182 trace_percpu_buffer = buffers;
3186 static int buffers_allocated;
3188 void trace_printk_init_buffers(void)
3190 if (buffers_allocated)
3193 if (alloc_percpu_trace_buffer())
3196 /* trace_printk() is for debug use only. Don't use it in production. */
3199 pr_warn("**********************************************************\n");
3200 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3202 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
3204 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
3205 pr_warn("** unsafe for production use. **\n");
3207 pr_warn("** If you see this message and you are not debugging **\n");
3208 pr_warn("** the kernel, report this immediately to your vendor! **\n");
3210 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
3211 pr_warn("**********************************************************\n");
3213 /* Expand the buffers to set size */
3214 tracing_update_buffers();
3216 buffers_allocated = 1;
3219 * trace_printk_init_buffers() can be called by modules.
3220 * If that happens, then we need to start cmdline recording
3221 * directly here. If the global_trace.buffer is already
3222 * allocated here, then this was called by module code.
3224 if (global_trace.array_buffer.buffer)
3225 tracing_start_cmdline_record();
3227 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3229 void trace_printk_start_comm(void)
3231 /* Start tracing comms if trace printk is set */
3232 if (!buffers_allocated)
3234 tracing_start_cmdline_record();
3237 static void trace_printk_start_stop_comm(int enabled)
3239 if (!buffers_allocated)
3243 tracing_start_cmdline_record();
3245 tracing_stop_cmdline_record();
3249 * trace_vbprintk - write binary msg to tracing buffer
3250 * @ip: The address of the caller
3251 * @fmt: The string format to write to the buffer
3252 * @args: Arguments for @fmt
3254 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3256 struct trace_event_call *call = &event_bprint;
3257 struct ring_buffer_event *event;
3258 struct trace_buffer *buffer;
3259 struct trace_array *tr = &global_trace;
3260 struct bprint_entry *entry;
3261 unsigned int trace_ctx;
3265 if (unlikely(tracing_selftest_running || tracing_disabled))
3268 /* Don't pollute graph traces with trace_vprintk internals */
3269 pause_graph_tracing();
3271 trace_ctx = tracing_gen_ctx();
3272 preempt_disable_notrace();
3274 tbuffer = get_trace_buf();
3280 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3282 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3285 size = sizeof(*entry) + sizeof(u32) * len;
3286 buffer = tr->array_buffer.buffer;
3287 ring_buffer_nest_start(buffer);
3288 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3292 entry = ring_buffer_event_data(event);
3296 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3297 if (!call_filter_check_discard(call, entry, buffer, event)) {
3298 __buffer_unlock_commit(buffer, event);
3299 ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3303 ring_buffer_nest_end(buffer);
3308 preempt_enable_notrace();
3309 unpause_graph_tracing();
3313 EXPORT_SYMBOL_GPL(trace_vbprintk);
3317 __trace_array_vprintk(struct trace_buffer *buffer,
3318 unsigned long ip, const char *fmt, va_list args)
3320 struct trace_event_call *call = &event_print;
3321 struct ring_buffer_event *event;
3323 struct print_entry *entry;
3324 unsigned int trace_ctx;
3327 if (tracing_disabled || tracing_selftest_running)
3330 /* Don't pollute graph traces with trace_vprintk internals */
3331 pause_graph_tracing();
3333 trace_ctx = tracing_gen_ctx();
3334 preempt_disable_notrace();
3337 tbuffer = get_trace_buf();
3343 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3345 size = sizeof(*entry) + len + 1;
3346 ring_buffer_nest_start(buffer);
3347 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3351 entry = ring_buffer_event_data(event);
3354 memcpy(&entry->buf, tbuffer, len + 1);
3355 if (!call_filter_check_discard(call, entry, buffer, event)) {
3356 __buffer_unlock_commit(buffer, event);
3357 ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3361 ring_buffer_nest_end(buffer);
3365 preempt_enable_notrace();
3366 unpause_graph_tracing();
3372 int trace_array_vprintk(struct trace_array *tr,
3373 unsigned long ip, const char *fmt, va_list args)
3375 return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3379 * trace_array_printk - Print a message to a specific instance
3380 * @tr: The instance trace_array descriptor
3381 * @ip: The instruction pointer that this is called from.
3382 * @fmt: The format to print (printf format)
3384 * If a subsystem sets up its own instance, they have the right to
3385 * printk strings into their tracing instance buffer using this
3386 * function. Note, this function will not write into the top level
3387 * buffer (use trace_printk() for that), as writing into the top level
3388 * buffer should only have events that can be individually disabled.
3389 * trace_printk() is only used for debugging a kernel, and should not
3390 * be ever incorporated in normal use.
3392 * trace_array_printk() can be used, as it will not add noise to the
3393 * top level tracing buffer.
3395 * Note, trace_array_init_printk() must be called on @tr before this
3399 int trace_array_printk(struct trace_array *tr,
3400 unsigned long ip, const char *fmt, ...)
3408 /* This is only allowed for created instances */
3409 if (tr == &global_trace)
3412 if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3416 ret = trace_array_vprintk(tr, ip, fmt, ap);
3420 EXPORT_SYMBOL_GPL(trace_array_printk);
3423 * trace_array_init_printk - Initialize buffers for trace_array_printk()
3424 * @tr: The trace array to initialize the buffers for
3426 * As trace_array_printk() only writes into instances, they are OK to
3427 * have in the kernel (unlike trace_printk()). This needs to be called
3428 * before trace_array_printk() can be used on a trace_array.
3430 int trace_array_init_printk(struct trace_array *tr)
3435 /* This is only allowed for created instances */
3436 if (tr == &global_trace)
3439 return alloc_percpu_trace_buffer();
3441 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3444 int trace_array_printk_buf(struct trace_buffer *buffer,
3445 unsigned long ip, const char *fmt, ...)
3450 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3454 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3460 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3462 return trace_array_vprintk(&global_trace, ip, fmt, args);
3464 EXPORT_SYMBOL_GPL(trace_vprintk);
3466 static void trace_iterator_increment(struct trace_iterator *iter)
3468 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3472 ring_buffer_iter_advance(buf_iter);
3475 static struct trace_entry *
3476 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3477 unsigned long *lost_events)
3479 struct ring_buffer_event *event;
3480 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3483 event = ring_buffer_iter_peek(buf_iter, ts);
3485 *lost_events = ring_buffer_iter_dropped(buf_iter) ?
3486 (unsigned long)-1 : 0;
3488 event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3493 iter->ent_size = ring_buffer_event_length(event);
3494 return ring_buffer_event_data(event);
3500 static struct trace_entry *
3501 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3502 unsigned long *missing_events, u64 *ent_ts)
3504 struct trace_buffer *buffer = iter->array_buffer->buffer;
3505 struct trace_entry *ent, *next = NULL;
3506 unsigned long lost_events = 0, next_lost = 0;
3507 int cpu_file = iter->cpu_file;
3508 u64 next_ts = 0, ts;
3514 * If we are in a per_cpu trace file, don't bother by iterating over
3515 * all cpu and peek directly.
3517 if (cpu_file > RING_BUFFER_ALL_CPUS) {
3518 if (ring_buffer_empty_cpu(buffer, cpu_file))
3520 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3522 *ent_cpu = cpu_file;
3527 for_each_tracing_cpu(cpu) {
3529 if (ring_buffer_empty_cpu(buffer, cpu))
3532 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3535 * Pick the entry with the smallest timestamp:
3537 if (ent && (!next || ts < next_ts)) {
3541 next_lost = lost_events;
3542 next_size = iter->ent_size;
3546 iter->ent_size = next_size;
3549 *ent_cpu = next_cpu;
3555 *missing_events = next_lost;
3560 #define STATIC_FMT_BUF_SIZE 128
3561 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3563 static char *trace_iter_expand_format(struct trace_iterator *iter)
3568 * iter->tr is NULL when used with tp_printk, which makes
3569 * this get called where it is not safe to call krealloc().
3571 if (!iter->tr || iter->fmt == static_fmt_buf)
3574 tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3577 iter->fmt_size += STATIC_FMT_BUF_SIZE;
3584 /* Returns true if the string is safe to dereference from an event */
3585 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3587 unsigned long addr = (unsigned long)str;
3588 struct trace_event *trace_event;
3589 struct trace_event_call *event;
3591 /* OK if part of the event data */
3592 if ((addr >= (unsigned long)iter->ent) &&
3593 (addr < (unsigned long)iter->ent + iter->ent_size))
3596 /* OK if part of the temp seq buffer */
3597 if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3598 (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
3601 /* Core rodata can not be freed */
3602 if (is_kernel_rodata(addr))
3605 if (trace_is_tracepoint_string(str))
3609 * Now this could be a module event, referencing core module
3610 * data, which is OK.
3615 trace_event = ftrace_find_event(iter->ent->type);
3619 event = container_of(trace_event, struct trace_event_call, event);
3623 /* Would rather have rodata, but this will suffice */
3624 if (within_module_core(addr, event->mod))
3630 static const char *show_buffer(struct trace_seq *s)
3632 struct seq_buf *seq = &s->seq;
3634 seq_buf_terminate(seq);
3639 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3641 static int test_can_verify_check(const char *fmt, ...)
3648 * The verifier is dependent on vsnprintf() modifies the va_list
3649 * passed to it, where it is sent as a reference. Some architectures
3650 * (like x86_32) passes it by value, which means that vsnprintf()
3651 * does not modify the va_list passed to it, and the verifier
3652 * would then need to be able to understand all the values that
3653 * vsnprintf can use. If it is passed by value, then the verifier
3657 vsnprintf(buf, 16, "%d", ap);
3658 ret = va_arg(ap, int);
3664 static void test_can_verify(void)
3666 if (!test_can_verify_check("%d %d", 0, 1)) {
3667 pr_info("trace event string verifier disabled\n");
3668 static_branch_inc(&trace_no_verify);
3673 * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3674 * @iter: The iterator that holds the seq buffer and the event being printed
3675 * @fmt: The format used to print the event
3676 * @ap: The va_list holding the data to print from @fmt.
3678 * This writes the data into the @iter->seq buffer using the data from
3679 * @fmt and @ap. If the format has a %s, then the source of the string
3680 * is examined to make sure it is safe to print, otherwise it will
3681 * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3684 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3687 const char *p = fmt;
3691 if (WARN_ON_ONCE(!fmt))
3694 if (static_branch_unlikely(&trace_no_verify))
3697 /* Don't bother checking when doing a ftrace_dump() */
3698 if (iter->fmt == static_fmt_buf)
3707 /* We only care about %s and variants */
3708 for (i = 0; p[i]; i++) {
3709 if (i + 1 >= iter->fmt_size) {
3711 * If we can't expand the copy buffer,
3714 if (!trace_iter_expand_format(iter))
3718 if (p[i] == '\\' && p[i+1]) {
3723 /* Need to test cases like %08.*s */
3724 for (j = 1; p[i+j]; j++) {
3725 if (isdigit(p[i+j]) ||
3728 if (p[i+j] == '*') {
3740 /* If no %s found then just print normally */
3744 /* Copy up to the %s, and print that */
3745 strncpy(iter->fmt, p, i);
3746 iter->fmt[i] = '\0';
3747 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3750 len = va_arg(ap, int);
3752 /* The ap now points to the string data of the %s */
3753 str = va_arg(ap, const char *);
3756 * If you hit this warning, it is likely that the
3757 * trace event in question used %s on a string that
3758 * was saved at the time of the event, but may not be
3759 * around when the trace is read. Use __string(),
3760 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3761 * instead. See samples/trace_events/trace-events-sample.h
3764 if (WARN_ONCE(!trace_safe_str(iter, str),
3765 "fmt: '%s' current_buffer: '%s'",
3766 fmt, show_buffer(&iter->seq))) {
3769 /* Try to safely read the string */
3771 if (len + 1 > iter->fmt_size)
3772 len = iter->fmt_size - 1;
3775 ret = copy_from_kernel_nofault(iter->fmt, str, len);
3779 ret = strncpy_from_kernel_nofault(iter->fmt, str,
3783 trace_seq_printf(&iter->seq, "(0x%px)", str);
3785 trace_seq_printf(&iter->seq, "(0x%px:%s)",
3787 str = "[UNSAFE-MEMORY]";
3788 strcpy(iter->fmt, "%s");
3790 strncpy(iter->fmt, p + i, j + 1);
3791 iter->fmt[j+1] = '\0';
3794 trace_seq_printf(&iter->seq, iter->fmt, len, str);
3796 trace_seq_printf(&iter->seq, iter->fmt, str);
3802 trace_seq_vprintf(&iter->seq, p, ap);
3805 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3807 const char *p, *new_fmt;
3810 if (WARN_ON_ONCE(!fmt))
3813 if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3817 new_fmt = q = iter->fmt;
3819 if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3820 if (!trace_iter_expand_format(iter))
3823 q += iter->fmt - new_fmt;
3824 new_fmt = iter->fmt;
3829 /* Replace %p with %px */
3833 } else if (p[0] == 'p' && !isalnum(p[1])) {
3844 #define STATIC_TEMP_BUF_SIZE 128
3845 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3847 /* Find the next real entry, without updating the iterator itself */
3848 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3849 int *ent_cpu, u64 *ent_ts)
3851 /* __find_next_entry will reset ent_size */
3852 int ent_size = iter->ent_size;
3853 struct trace_entry *entry;
3856 * If called from ftrace_dump(), then the iter->temp buffer
3857 * will be the static_temp_buf and not created from kmalloc.
3858 * If the entry size is greater than the buffer, we can
3859 * not save it. Just return NULL in that case. This is only
3860 * used to add markers when two consecutive events' time
3861 * stamps have a large delta. See trace_print_lat_context()
3863 if (iter->temp == static_temp_buf &&
3864 STATIC_TEMP_BUF_SIZE < ent_size)
3868 * The __find_next_entry() may call peek_next_entry(), which may
3869 * call ring_buffer_peek() that may make the contents of iter->ent
3870 * undefined. Need to copy iter->ent now.
3872 if (iter->ent && iter->ent != iter->temp) {
3873 if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3874 !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3876 temp = kmalloc(iter->ent_size, GFP_KERNEL);
3881 iter->temp_size = iter->ent_size;
3883 memcpy(iter->temp, iter->ent, iter->ent_size);
3884 iter->ent = iter->temp;
3886 entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3887 /* Put back the original ent_size */
3888 iter->ent_size = ent_size;
3893 /* Find the next real entry, and increment the iterator to the next entry */
3894 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3896 iter->ent = __find_next_entry(iter, &iter->cpu,
3897 &iter->lost_events, &iter->ts);
3900 trace_iterator_increment(iter);
3902 return iter->ent ? iter : NULL;
3905 static void trace_consume(struct trace_iterator *iter)
3907 ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3908 &iter->lost_events);
3911 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3913 struct trace_iterator *iter = m->private;
3917 WARN_ON_ONCE(iter->leftover);
3921 /* can't go backwards */
3926 ent = trace_find_next_entry_inc(iter);
3930 while (ent && iter->idx < i)
3931 ent = trace_find_next_entry_inc(iter);
3938 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3940 struct ring_buffer_iter *buf_iter;
3941 unsigned long entries = 0;
3944 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3946 buf_iter = trace_buffer_iter(iter, cpu);
3950 ring_buffer_iter_reset(buf_iter);
3953 * We could have the case with the max latency tracers
3954 * that a reset never took place on a cpu. This is evident
3955 * by the timestamp being before the start of the buffer.
3957 while (ring_buffer_iter_peek(buf_iter, &ts)) {
3958 if (ts >= iter->array_buffer->time_start)
3961 ring_buffer_iter_advance(buf_iter);
3964 per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3968 * The current tracer is copied to avoid a global locking
3971 static void *s_start(struct seq_file *m, loff_t *pos)
3973 struct trace_iterator *iter = m->private;
3974 struct trace_array *tr = iter->tr;
3975 int cpu_file = iter->cpu_file;
3981 * copy the tracer to avoid using a global lock all around.
3982 * iter->trace is a copy of current_trace, the pointer to the
3983 * name may be used instead of a strcmp(), as iter->trace->name
3984 * will point to the same string as current_trace->name.
3986 mutex_lock(&trace_types_lock);
3987 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3988 *iter->trace = *tr->current_trace;
3989 mutex_unlock(&trace_types_lock);
3991 #ifdef CONFIG_TRACER_MAX_TRACE
3992 if (iter->snapshot && iter->trace->use_max_tr)
3993 return ERR_PTR(-EBUSY);
3996 if (*pos != iter->pos) {
4001 if (cpu_file == RING_BUFFER_ALL_CPUS) {
4002 for_each_tracing_cpu(cpu)
4003 tracing_iter_reset(iter, cpu);
4005 tracing_iter_reset(iter, cpu_file);
4008 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4013 * If we overflowed the seq_file before, then we want
4014 * to just reuse the trace_seq buffer again.
4020 p = s_next(m, p, &l);
4024 trace_event_read_lock();
4025 trace_access_lock(cpu_file);
4029 static void s_stop(struct seq_file *m, void *p)
4031 struct trace_iterator *iter = m->private;
4033 #ifdef CONFIG_TRACER_MAX_TRACE
4034 if (iter->snapshot && iter->trace->use_max_tr)
4038 trace_access_unlock(iter->cpu_file);
4039 trace_event_read_unlock();
4043 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4044 unsigned long *entries, int cpu)
4046 unsigned long count;
4048 count = ring_buffer_entries_cpu(buf->buffer, cpu);
4050 * If this buffer has skipped entries, then we hold all
4051 * entries for the trace and we need to ignore the
4052 * ones before the time stamp.
4054 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4055 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4056 /* total is the same as the entries */
4060 ring_buffer_overrun_cpu(buf->buffer, cpu);
4065 get_total_entries(struct array_buffer *buf,
4066 unsigned long *total, unsigned long *entries)
4074 for_each_tracing_cpu(cpu) {
4075 get_total_entries_cpu(buf, &t, &e, cpu);
4081 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4083 unsigned long total, entries;
4088 get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4093 unsigned long trace_total_entries(struct trace_array *tr)
4095 unsigned long total, entries;
4100 get_total_entries(&tr->array_buffer, &total, &entries);
4105 static void print_lat_help_header(struct seq_file *m)
4107 seq_puts(m, "# _------=> CPU# \n"
4108 "# / _-----=> irqs-off \n"
4109 "# | / _----=> need-resched \n"
4110 "# || / _---=> hardirq/softirq \n"
4111 "# ||| / _--=> preempt-depth \n"
4113 "# cmd pid ||||| time | caller \n"
4114 "# \\ / ||||| \\ | / \n");
4117 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4119 unsigned long total;
4120 unsigned long entries;
4122 get_total_entries(buf, &total, &entries);
4123 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
4124 entries, total, num_online_cpus());
4128 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4131 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4133 print_event_info(buf, m);
4135 seq_printf(m, "# TASK-PID %s CPU# TIMESTAMP FUNCTION\n", tgid ? " TGID " : "");
4136 seq_printf(m, "# | | %s | | |\n", tgid ? " | " : "");
4139 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4142 bool tgid = flags & TRACE_ITER_RECORD_TGID;
4143 const char *space = " ";
4144 int prec = tgid ? 12 : 2;
4146 print_event_info(buf, m);
4148 seq_printf(m, "# %.*s _-----=> irqs-off\n", prec, space);
4149 seq_printf(m, "# %.*s / _----=> need-resched\n", prec, space);
4150 seq_printf(m, "# %.*s| / _---=> hardirq/softirq\n", prec, space);
4151 seq_printf(m, "# %.*s|| / _--=> preempt-depth\n", prec, space);
4152 seq_printf(m, "# %.*s||| / delay\n", prec, space);
4153 seq_printf(m, "# TASK-PID %.*s CPU# |||| TIMESTAMP FUNCTION\n", prec, " TGID ");
4154 seq_printf(m, "# | | %.*s | |||| | |\n", prec, " | ");
4158 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4160 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4161 struct array_buffer *buf = iter->array_buffer;
4162 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4163 struct tracer *type = iter->trace;
4164 unsigned long entries;
4165 unsigned long total;
4166 const char *name = "preemption";
4170 get_total_entries(buf, &total, &entries);
4172 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4174 seq_puts(m, "# -----------------------------------"
4175 "---------------------------------\n");
4176 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4177 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4178 nsecs_to_usecs(data->saved_latency),
4182 #if defined(CONFIG_PREEMPT_NONE)
4184 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
4186 #elif defined(CONFIG_PREEMPT)
4188 #elif defined(CONFIG_PREEMPT_RT)
4193 /* These are reserved for later use */
4196 seq_printf(m, " #P:%d)\n", num_online_cpus());
4200 seq_puts(m, "# -----------------\n");
4201 seq_printf(m, "# | task: %.16s-%d "
4202 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4203 data->comm, data->pid,
4204 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4205 data->policy, data->rt_priority);
4206 seq_puts(m, "# -----------------\n");
4208 if (data->critical_start) {
4209 seq_puts(m, "# => started at: ");
4210 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4211 trace_print_seq(m, &iter->seq);
4212 seq_puts(m, "\n# => ended at: ");
4213 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4214 trace_print_seq(m, &iter->seq);
4215 seq_puts(m, "\n#\n");
4221 static void test_cpu_buff_start(struct trace_iterator *iter)
4223 struct trace_seq *s = &iter->seq;
4224 struct trace_array *tr = iter->tr;
4226 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4229 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4232 if (cpumask_available(iter->started) &&
4233 cpumask_test_cpu(iter->cpu, iter->started))
4236 if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4239 if (cpumask_available(iter->started))
4240 cpumask_set_cpu(iter->cpu, iter->started);
4242 /* Don't print started cpu buffer for the first entry of the trace */
4244 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4248 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4250 struct trace_array *tr = iter->tr;
4251 struct trace_seq *s = &iter->seq;
4252 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4253 struct trace_entry *entry;
4254 struct trace_event *event;
4258 test_cpu_buff_start(iter);
4260 event = ftrace_find_event(entry->type);
4262 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4263 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4264 trace_print_lat_context(iter);
4266 trace_print_context(iter);
4269 if (trace_seq_has_overflowed(s))
4270 return TRACE_TYPE_PARTIAL_LINE;
4273 return event->funcs->trace(iter, sym_flags, event);
4275 trace_seq_printf(s, "Unknown type %d\n", entry->type);
4277 return trace_handle_return(s);
4280 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4282 struct trace_array *tr = iter->tr;
4283 struct trace_seq *s = &iter->seq;
4284 struct trace_entry *entry;
4285 struct trace_event *event;
4289 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4290 trace_seq_printf(s, "%d %d %llu ",
4291 entry->pid, iter->cpu, iter->ts);
4293 if (trace_seq_has_overflowed(s))
4294 return TRACE_TYPE_PARTIAL_LINE;
4296 event = ftrace_find_event(entry->type);
4298 return event->funcs->raw(iter, 0, event);
4300 trace_seq_printf(s, "%d ?\n", entry->type);
4302 return trace_handle_return(s);
4305 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4307 struct trace_array *tr = iter->tr;
4308 struct trace_seq *s = &iter->seq;
4309 unsigned char newline = '\n';
4310 struct trace_entry *entry;
4311 struct trace_event *event;
4315 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4316 SEQ_PUT_HEX_FIELD(s, entry->pid);
4317 SEQ_PUT_HEX_FIELD(s, iter->cpu);
4318 SEQ_PUT_HEX_FIELD(s, iter->ts);
4319 if (trace_seq_has_overflowed(s))
4320 return TRACE_TYPE_PARTIAL_LINE;
4323 event = ftrace_find_event(entry->type);
4325 enum print_line_t ret = event->funcs->hex(iter, 0, event);
4326 if (ret != TRACE_TYPE_HANDLED)
4330 SEQ_PUT_FIELD(s, newline);
4332 return trace_handle_return(s);
4335 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4337 struct trace_array *tr = iter->tr;
4338 struct trace_seq *s = &iter->seq;
4339 struct trace_entry *entry;
4340 struct trace_event *event;
4344 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4345 SEQ_PUT_FIELD(s, entry->pid);
4346 SEQ_PUT_FIELD(s, iter->cpu);
4347 SEQ_PUT_FIELD(s, iter->ts);
4348 if (trace_seq_has_overflowed(s))
4349 return TRACE_TYPE_PARTIAL_LINE;
4352 event = ftrace_find_event(entry->type);
4353 return event ? event->funcs->binary(iter, 0, event) :
4357 int trace_empty(struct trace_iterator *iter)
4359 struct ring_buffer_iter *buf_iter;
4362 /* If we are looking at one CPU buffer, only check that one */
4363 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4364 cpu = iter->cpu_file;
4365 buf_iter = trace_buffer_iter(iter, cpu);
4367 if (!ring_buffer_iter_empty(buf_iter))
4370 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4376 for_each_tracing_cpu(cpu) {
4377 buf_iter = trace_buffer_iter(iter, cpu);
4379 if (!ring_buffer_iter_empty(buf_iter))
4382 if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4390 /* Called with trace_event_read_lock() held. */
4391 enum print_line_t print_trace_line(struct trace_iterator *iter)
4393 struct trace_array *tr = iter->tr;
4394 unsigned long trace_flags = tr->trace_flags;
4395 enum print_line_t ret;
4397 if (iter->lost_events) {
4398 if (iter->lost_events == (unsigned long)-1)
4399 trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4402 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4403 iter->cpu, iter->lost_events);
4404 if (trace_seq_has_overflowed(&iter->seq))
4405 return TRACE_TYPE_PARTIAL_LINE;
4408 if (iter->trace && iter->trace->print_line) {
4409 ret = iter->trace->print_line(iter);
4410 if (ret != TRACE_TYPE_UNHANDLED)
4414 if (iter->ent->type == TRACE_BPUTS &&
4415 trace_flags & TRACE_ITER_PRINTK &&
4416 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4417 return trace_print_bputs_msg_only(iter);
4419 if (iter->ent->type == TRACE_BPRINT &&
4420 trace_flags & TRACE_ITER_PRINTK &&
4421 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4422 return trace_print_bprintk_msg_only(iter);
4424 if (iter->ent->type == TRACE_PRINT &&
4425 trace_flags & TRACE_ITER_PRINTK &&
4426 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4427 return trace_print_printk_msg_only(iter);
4429 if (trace_flags & TRACE_ITER_BIN)
4430 return print_bin_fmt(iter);
4432 if (trace_flags & TRACE_ITER_HEX)
4433 return print_hex_fmt(iter);
4435 if (trace_flags & TRACE_ITER_RAW)
4436 return print_raw_fmt(iter);
4438 return print_trace_fmt(iter);
4441 void trace_latency_header(struct seq_file *m)
4443 struct trace_iterator *iter = m->private;
4444 struct trace_array *tr = iter->tr;
4446 /* print nothing if the buffers are empty */
4447 if (trace_empty(iter))
4450 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4451 print_trace_header(m, iter);
4453 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4454 print_lat_help_header(m);
4457 void trace_default_header(struct seq_file *m)
4459 struct trace_iterator *iter = m->private;
4460 struct trace_array *tr = iter->tr;
4461 unsigned long trace_flags = tr->trace_flags;
4463 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4466 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4467 /* print nothing if the buffers are empty */
4468 if (trace_empty(iter))
4470 print_trace_header(m, iter);
4471 if (!(trace_flags & TRACE_ITER_VERBOSE))
4472 print_lat_help_header(m);
4474 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4475 if (trace_flags & TRACE_ITER_IRQ_INFO)
4476 print_func_help_header_irq(iter->array_buffer,
4479 print_func_help_header(iter->array_buffer, m,
4485 static void test_ftrace_alive(struct seq_file *m)
4487 if (!ftrace_is_dead())
4489 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4490 "# MAY BE MISSING FUNCTION EVENTS\n");
4493 #ifdef CONFIG_TRACER_MAX_TRACE
4494 static void show_snapshot_main_help(struct seq_file *m)
4496 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4497 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4498 "# Takes a snapshot of the main buffer.\n"
4499 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4500 "# (Doesn't have to be '2' works with any number that\n"
4501 "# is not a '0' or '1')\n");
4504 static void show_snapshot_percpu_help(struct seq_file *m)
4506 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4507 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4508 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4509 "# Takes a snapshot of the main buffer for this cpu.\n");
4511 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4512 "# Must use main snapshot file to allocate.\n");
4514 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4515 "# (Doesn't have to be '2' works with any number that\n"
4516 "# is not a '0' or '1')\n");
4519 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4521 if (iter->tr->allocated_snapshot)
4522 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4524 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4526 seq_puts(m, "# Snapshot commands:\n");
4527 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4528 show_snapshot_main_help(m);
4530 show_snapshot_percpu_help(m);
4533 /* Should never be called */
4534 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4537 static int s_show(struct seq_file *m, void *v)
4539 struct trace_iterator *iter = v;
4542 if (iter->ent == NULL) {
4544 seq_printf(m, "# tracer: %s\n", iter->trace->name);
4546 test_ftrace_alive(m);
4548 if (iter->snapshot && trace_empty(iter))
4549 print_snapshot_help(m, iter);
4550 else if (iter->trace && iter->trace->print_header)
4551 iter->trace->print_header(m);
4553 trace_default_header(m);
4555 } else if (iter->leftover) {
4557 * If we filled the seq_file buffer earlier, we
4558 * want to just show it now.
4560 ret = trace_print_seq(m, &iter->seq);
4562 /* ret should this time be zero, but you never know */
4563 iter->leftover = ret;
4566 print_trace_line(iter);
4567 ret = trace_print_seq(m, &iter->seq);
4569 * If we overflow the seq_file buffer, then it will
4570 * ask us for this data again at start up.
4572 * ret is 0 if seq_file write succeeded.
4575 iter->leftover = ret;
4582 * Should be used after trace_array_get(), trace_types_lock
4583 * ensures that i_cdev was already initialized.
4585 static inline int tracing_get_cpu(struct inode *inode)
4587 if (inode->i_cdev) /* See trace_create_cpu_file() */
4588 return (long)inode->i_cdev - 1;
4589 return RING_BUFFER_ALL_CPUS;
4592 static const struct seq_operations tracer_seq_ops = {
4599 static struct trace_iterator *
4600 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4602 struct trace_array *tr = inode->i_private;
4603 struct trace_iterator *iter;
4606 if (tracing_disabled)
4607 return ERR_PTR(-ENODEV);
4609 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4611 return ERR_PTR(-ENOMEM);
4613 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4615 if (!iter->buffer_iter)
4619 * trace_find_next_entry() may need to save off iter->ent.
4620 * It will place it into the iter->temp buffer. As most
4621 * events are less than 128, allocate a buffer of that size.
4622 * If one is greater, then trace_find_next_entry() will
4623 * allocate a new buffer to adjust for the bigger iter->ent.
4624 * It's not critical if it fails to get allocated here.
4626 iter->temp = kmalloc(128, GFP_KERNEL);
4628 iter->temp_size = 128;
4631 * trace_event_printf() may need to modify given format
4632 * string to replace %p with %px so that it shows real address
4633 * instead of hash value. However, that is only for the event
4634 * tracing, other tracer may not need. Defer the allocation
4635 * until it is needed.
4641 * We make a copy of the current tracer to avoid concurrent
4642 * changes on it while we are reading.
4644 mutex_lock(&trace_types_lock);
4645 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
4649 *iter->trace = *tr->current_trace;
4651 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4656 #ifdef CONFIG_TRACER_MAX_TRACE
4657 /* Currently only the top directory has a snapshot */
4658 if (tr->current_trace->print_max || snapshot)
4659 iter->array_buffer = &tr->max_buffer;
4662 iter->array_buffer = &tr->array_buffer;
4663 iter->snapshot = snapshot;
4665 iter->cpu_file = tracing_get_cpu(inode);
4666 mutex_init(&iter->mutex);
4668 /* Notify the tracer early; before we stop tracing. */
4669 if (iter->trace->open)
4670 iter->trace->open(iter);
4672 /* Annotate start of buffers if we had overruns */
4673 if (ring_buffer_overruns(iter->array_buffer->buffer))
4674 iter->iter_flags |= TRACE_FILE_ANNOTATE;
4676 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
4677 if (trace_clocks[tr->clock_id].in_ns)
4678 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4681 * If pause-on-trace is enabled, then stop the trace while
4682 * dumping, unless this is the "snapshot" file
4684 if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4685 tracing_stop_tr(tr);
4687 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4688 for_each_tracing_cpu(cpu) {
4689 iter->buffer_iter[cpu] =
4690 ring_buffer_read_prepare(iter->array_buffer->buffer,
4693 ring_buffer_read_prepare_sync();
4694 for_each_tracing_cpu(cpu) {
4695 ring_buffer_read_start(iter->buffer_iter[cpu]);
4696 tracing_iter_reset(iter, cpu);
4699 cpu = iter->cpu_file;
4700 iter->buffer_iter[cpu] =
4701 ring_buffer_read_prepare(iter->array_buffer->buffer,
4703 ring_buffer_read_prepare_sync();
4704 ring_buffer_read_start(iter->buffer_iter[cpu]);
4705 tracing_iter_reset(iter, cpu);
4708 mutex_unlock(&trace_types_lock);
4713 mutex_unlock(&trace_types_lock);
4716 kfree(iter->buffer_iter);
4718 seq_release_private(inode, file);
4719 return ERR_PTR(-ENOMEM);
4722 int tracing_open_generic(struct inode *inode, struct file *filp)
4726 ret = tracing_check_open_get_tr(NULL);
4730 filp->private_data = inode->i_private;
4734 bool tracing_is_disabled(void)
4736 return (tracing_disabled) ? true: false;
4740 * Open and update trace_array ref count.
4741 * Must have the current trace_array passed to it.
4743 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4745 struct trace_array *tr = inode->i_private;
4748 ret = tracing_check_open_get_tr(tr);
4752 filp->private_data = inode->i_private;
4757 static int tracing_release(struct inode *inode, struct file *file)
4759 struct trace_array *tr = inode->i_private;
4760 struct seq_file *m = file->private_data;
4761 struct trace_iterator *iter;
4764 if (!(file->f_mode & FMODE_READ)) {
4765 trace_array_put(tr);
4769 /* Writes do not use seq_file */
4771 mutex_lock(&trace_types_lock);
4773 for_each_tracing_cpu(cpu) {
4774 if (iter->buffer_iter[cpu])
4775 ring_buffer_read_finish(iter->buffer_iter[cpu]);
4778 if (iter->trace && iter->trace->close)
4779 iter->trace->close(iter);
4781 if (!iter->snapshot && tr->stop_count)
4782 /* reenable tracing if it was previously enabled */
4783 tracing_start_tr(tr);
4785 __trace_array_put(tr);
4787 mutex_unlock(&trace_types_lock);
4789 mutex_destroy(&iter->mutex);
4790 free_cpumask_var(iter->started);
4794 kfree(iter->buffer_iter);
4795 seq_release_private(inode, file);
4800 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
4802 struct trace_array *tr = inode->i_private;
4804 trace_array_put(tr);
4808 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4810 struct trace_array *tr = inode->i_private;
4812 trace_array_put(tr);
4814 return single_release(inode, file);
4817 static int tracing_open(struct inode *inode, struct file *file)
4819 struct trace_array *tr = inode->i_private;
4820 struct trace_iterator *iter;
4823 ret = tracing_check_open_get_tr(tr);
4827 /* If this file was open for write, then erase contents */
4828 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4829 int cpu = tracing_get_cpu(inode);
4830 struct array_buffer *trace_buf = &tr->array_buffer;
4832 #ifdef CONFIG_TRACER_MAX_TRACE
4833 if (tr->current_trace->print_max)
4834 trace_buf = &tr->max_buffer;
4837 if (cpu == RING_BUFFER_ALL_CPUS)
4838 tracing_reset_online_cpus(trace_buf);
4840 tracing_reset_cpu(trace_buf, cpu);
4843 if (file->f_mode & FMODE_READ) {
4844 iter = __tracing_open(inode, file, false);
4846 ret = PTR_ERR(iter);
4847 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4848 iter->iter_flags |= TRACE_FILE_LAT_FMT;
4852 trace_array_put(tr);
4858 * Some tracers are not suitable for instance buffers.
4859 * A tracer is always available for the global array (toplevel)
4860 * or if it explicitly states that it is.
4863 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4865 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4868 /* Find the next tracer that this trace array may use */
4869 static struct tracer *
4870 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4872 while (t && !trace_ok_for_array(t, tr))
4879 t_next(struct seq_file *m, void *v, loff_t *pos)
4881 struct trace_array *tr = m->private;
4882 struct tracer *t = v;
4887 t = get_tracer_for_array(tr, t->next);
4892 static void *t_start(struct seq_file *m, loff_t *pos)
4894 struct trace_array *tr = m->private;
4898 mutex_lock(&trace_types_lock);
4900 t = get_tracer_for_array(tr, trace_types);
4901 for (; t && l < *pos; t = t_next(m, t, &l))
4907 static void t_stop(struct seq_file *m, void *p)
4909 mutex_unlock(&trace_types_lock);
4912 static int t_show(struct seq_file *m, void *v)
4914 struct tracer *t = v;
4919 seq_puts(m, t->name);
4928 static const struct seq_operations show_traces_seq_ops = {
4935 static int show_traces_open(struct inode *inode, struct file *file)
4937 struct trace_array *tr = inode->i_private;
4941 ret = tracing_check_open_get_tr(tr);
4945 ret = seq_open(file, &show_traces_seq_ops);
4947 trace_array_put(tr);
4951 m = file->private_data;
4957 static int show_traces_release(struct inode *inode, struct file *file)
4959 struct trace_array *tr = inode->i_private;
4961 trace_array_put(tr);
4962 return seq_release(inode, file);
4966 tracing_write_stub(struct file *filp, const char __user *ubuf,
4967 size_t count, loff_t *ppos)
4972 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4976 if (file->f_mode & FMODE_READ)
4977 ret = seq_lseek(file, offset, whence);
4979 file->f_pos = ret = 0;
4984 static const struct file_operations tracing_fops = {
4985 .open = tracing_open,
4987 .write = tracing_write_stub,
4988 .llseek = tracing_lseek,
4989 .release = tracing_release,
4992 static const struct file_operations show_traces_fops = {
4993 .open = show_traces_open,
4995 .llseek = seq_lseek,
4996 .release = show_traces_release,
5000 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5001 size_t count, loff_t *ppos)
5003 struct trace_array *tr = file_inode(filp)->i_private;
5007 len = snprintf(NULL, 0, "%*pb\n",
5008 cpumask_pr_args(tr->tracing_cpumask)) + 1;
5009 mask_str = kmalloc(len, GFP_KERNEL);
5013 len = snprintf(mask_str, len, "%*pb\n",
5014 cpumask_pr_args(tr->tracing_cpumask));
5019 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5027 int tracing_set_cpumask(struct trace_array *tr,
5028 cpumask_var_t tracing_cpumask_new)
5035 local_irq_disable();
5036 arch_spin_lock(&tr->max_lock);
5037 for_each_tracing_cpu(cpu) {
5039 * Increase/decrease the disabled counter if we are
5040 * about to flip a bit in the cpumask:
5042 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5043 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5044 atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5045 ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5047 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5048 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5049 atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5050 ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5053 arch_spin_unlock(&tr->max_lock);
5056 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5062 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5063 size_t count, loff_t *ppos)
5065 struct trace_array *tr = file_inode(filp)->i_private;
5066 cpumask_var_t tracing_cpumask_new;
5069 if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5072 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5076 err = tracing_set_cpumask(tr, tracing_cpumask_new);
5080 free_cpumask_var(tracing_cpumask_new);
5085 free_cpumask_var(tracing_cpumask_new);
5090 static const struct file_operations tracing_cpumask_fops = {
5091 .open = tracing_open_generic_tr,
5092 .read = tracing_cpumask_read,
5093 .write = tracing_cpumask_write,
5094 .release = tracing_release_generic_tr,
5095 .llseek = generic_file_llseek,
5098 static int tracing_trace_options_show(struct seq_file *m, void *v)
5100 struct tracer_opt *trace_opts;
5101 struct trace_array *tr = m->private;
5105 mutex_lock(&trace_types_lock);
5106 tracer_flags = tr->current_trace->flags->val;
5107 trace_opts = tr->current_trace->flags->opts;
5109 for (i = 0; trace_options[i]; i++) {
5110 if (tr->trace_flags & (1 << i))
5111 seq_printf(m, "%s\n", trace_options[i]);
5113 seq_printf(m, "no%s\n", trace_options[i]);
5116 for (i = 0; trace_opts[i].name; i++) {
5117 if (tracer_flags & trace_opts[i].bit)
5118 seq_printf(m, "%s\n", trace_opts[i].name);
5120 seq_printf(m, "no%s\n", trace_opts[i].name);
5122 mutex_unlock(&trace_types_lock);
5127 static int __set_tracer_option(struct trace_array *tr,
5128 struct tracer_flags *tracer_flags,
5129 struct tracer_opt *opts, int neg)
5131 struct tracer *trace = tracer_flags->trace;
5134 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5139 tracer_flags->val &= ~opts->bit;
5141 tracer_flags->val |= opts->bit;
5145 /* Try to assign a tracer specific option */
5146 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5148 struct tracer *trace = tr->current_trace;
5149 struct tracer_flags *tracer_flags = trace->flags;
5150 struct tracer_opt *opts = NULL;
5153 for (i = 0; tracer_flags->opts[i].name; i++) {
5154 opts = &tracer_flags->opts[i];
5156 if (strcmp(cmp, opts->name) == 0)
5157 return __set_tracer_option(tr, trace->flags, opts, neg);
5163 /* Some tracers require overwrite to stay enabled */
5164 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5166 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5172 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5174 if ((mask == TRACE_ITER_RECORD_TGID) ||
5175 (mask == TRACE_ITER_RECORD_CMD))
5176 lockdep_assert_held(&event_mutex);
5178 /* do nothing if flag is already set */
5179 if (!!(tr->trace_flags & mask) == !!enabled)
5182 /* Give the tracer a chance to approve the change */
5183 if (tr->current_trace->flag_changed)
5184 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5188 tr->trace_flags |= mask;
5190 tr->trace_flags &= ~mask;
5192 if (mask == TRACE_ITER_RECORD_CMD)
5193 trace_event_enable_cmd_record(enabled);
5195 if (mask == TRACE_ITER_RECORD_TGID) {
5197 tgid_map = kvcalloc(PID_MAX_DEFAULT + 1,
5201 tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5205 trace_event_enable_tgid_record(enabled);
5208 if (mask == TRACE_ITER_EVENT_FORK)
5209 trace_event_follow_fork(tr, enabled);
5211 if (mask == TRACE_ITER_FUNC_FORK)
5212 ftrace_pid_follow_fork(tr, enabled);
5214 if (mask == TRACE_ITER_OVERWRITE) {
5215 ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5216 #ifdef CONFIG_TRACER_MAX_TRACE
5217 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5221 if (mask == TRACE_ITER_PRINTK) {
5222 trace_printk_start_stop_comm(enabled);
5223 trace_printk_control(enabled);
5229 int trace_set_options(struct trace_array *tr, char *option)
5234 size_t orig_len = strlen(option);
5237 cmp = strstrip(option);
5239 len = str_has_prefix(cmp, "no");
5245 mutex_lock(&event_mutex);
5246 mutex_lock(&trace_types_lock);
5248 ret = match_string(trace_options, -1, cmp);
5249 /* If no option could be set, test the specific tracer options */
5251 ret = set_tracer_option(tr, cmp, neg);
5253 ret = set_tracer_flag(tr, 1 << ret, !neg);
5255 mutex_unlock(&trace_types_lock);
5256 mutex_unlock(&event_mutex);
5259 * If the first trailing whitespace is replaced with '\0' by strstrip,
5260 * turn it back into a space.
5262 if (orig_len > strlen(option))
5263 option[strlen(option)] = ' ';
5268 static void __init apply_trace_boot_options(void)
5270 char *buf = trace_boot_options_buf;
5274 option = strsep(&buf, ",");
5280 trace_set_options(&global_trace, option);
5282 /* Put back the comma to allow this to be called again */
5289 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5290 size_t cnt, loff_t *ppos)
5292 struct seq_file *m = filp->private_data;
5293 struct trace_array *tr = m->private;
5297 if (cnt >= sizeof(buf))
5300 if (copy_from_user(buf, ubuf, cnt))
5305 ret = trace_set_options(tr, buf);
5314 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5316 struct trace_array *tr = inode->i_private;
5319 ret = tracing_check_open_get_tr(tr);
5323 ret = single_open(file, tracing_trace_options_show, inode->i_private);
5325 trace_array_put(tr);
5330 static const struct file_operations tracing_iter_fops = {
5331 .open = tracing_trace_options_open,
5333 .llseek = seq_lseek,
5334 .release = tracing_single_release_tr,
5335 .write = tracing_trace_options_write,
5338 static const char readme_msg[] =
5339 "tracing mini-HOWTO:\n\n"
5340 "# echo 0 > tracing_on : quick way to disable tracing\n"
5341 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5342 " Important files:\n"
5343 " trace\t\t\t- The static contents of the buffer\n"
5344 "\t\t\t To clear the buffer write into this file: echo > trace\n"
5345 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5346 " current_tracer\t- function and latency tracers\n"
5347 " available_tracers\t- list of configured tracers for current_tracer\n"
5348 " error_log\t- error log for failed commands (that support it)\n"
5349 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
5350 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
5351 " trace_clock\t\t-change the clock used to order events\n"
5352 " local: Per cpu clock but may not be synced across CPUs\n"
5353 " global: Synced across CPUs but slows tracing down.\n"
5354 " counter: Not a clock, but just an increment\n"
5355 " uptime: Jiffy counter from time of boot\n"
5356 " perf: Same clock that perf events use\n"
5357 #ifdef CONFIG_X86_64
5358 " x86-tsc: TSC cycle counter\n"
5360 "\n timestamp_mode\t-view the mode used to timestamp events\n"
5361 " delta: Delta difference against a buffer-wide timestamp\n"
5362 " absolute: Absolute (standalone) timestamp\n"
5363 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5364 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5365 " tracing_cpumask\t- Limit which CPUs to trace\n"
5366 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5367 "\t\t\t Remove sub-buffer with rmdir\n"
5368 " trace_options\t\t- Set format or modify how tracing happens\n"
5369 "\t\t\t Disable an option by prefixing 'no' to the\n"
5370 "\t\t\t option name\n"
5371 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5372 #ifdef CONFIG_DYNAMIC_FTRACE
5373 "\n available_filter_functions - list of functions that can be filtered on\n"
5374 " set_ftrace_filter\t- echo function name in here to only trace these\n"
5375 "\t\t\t functions\n"
5376 "\t accepts: func_full_name or glob-matching-pattern\n"
5377 "\t modules: Can select a group via module\n"
5378 "\t Format: :mod:<module-name>\n"
5379 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
5380 "\t triggers: a command to perform when function is hit\n"
5381 "\t Format: <function>:<trigger>[:count]\n"
5382 "\t trigger: traceon, traceoff\n"
5383 "\t\t enable_event:<system>:<event>\n"
5384 "\t\t disable_event:<system>:<event>\n"
5385 #ifdef CONFIG_STACKTRACE
5388 #ifdef CONFIG_TRACER_SNAPSHOT
5393 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
5394 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
5395 "\t The first one will disable tracing every time do_fault is hit\n"
5396 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
5397 "\t The first time do trap is hit and it disables tracing, the\n"
5398 "\t counter will decrement to 2. If tracing is already disabled,\n"
5399 "\t the counter will not decrement. It only decrements when the\n"
5400 "\t trigger did work\n"
5401 "\t To remove trigger without count:\n"
5402 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
5403 "\t To remove trigger with a count:\n"
5404 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5405 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
5406 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5407 "\t modules: Can select a group via module command :mod:\n"
5408 "\t Does not accept triggers\n"
5409 #endif /* CONFIG_DYNAMIC_FTRACE */
5410 #ifdef CONFIG_FUNCTION_TRACER
5411 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5413 " set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5416 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5417 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5418 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5419 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5421 #ifdef CONFIG_TRACER_SNAPSHOT
5422 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
5423 "\t\t\t snapshot buffer. Read the contents for more\n"
5424 "\t\t\t information\n"
5426 #ifdef CONFIG_STACK_TRACER
5427 " stack_trace\t\t- Shows the max stack trace when active\n"
5428 " stack_max_size\t- Shows current max stack size that was traced\n"
5429 "\t\t\t Write into this file to reset the max size (trigger a\n"
5430 "\t\t\t new trace)\n"
5431 #ifdef CONFIG_DYNAMIC_FTRACE
5432 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5435 #endif /* CONFIG_STACK_TRACER */
5436 #ifdef CONFIG_DYNAMIC_EVENTS
5437 " dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5438 "\t\t\t Write into this file to define/undefine new trace events.\n"
5440 #ifdef CONFIG_KPROBE_EVENTS
5441 " kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5442 "\t\t\t Write into this file to define/undefine new trace events.\n"
5444 #ifdef CONFIG_UPROBE_EVENTS
5445 " uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5446 "\t\t\t Write into this file to define/undefine new trace events.\n"
5448 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5449 "\t accepts: event-definitions (one definition per line)\n"
5450 "\t Format: p[:[<group>/]<event>] <place> [<args>]\n"
5451 "\t r[maxactive][:[<group>/]<event>] <place> [<args>]\n"
5452 #ifdef CONFIG_HIST_TRIGGERS
5453 "\t s:[synthetic/]<event> <field> [<field>]\n"
5455 "\t -:[<group>/]<event>\n"
5456 #ifdef CONFIG_KPROBE_EVENTS
5457 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5458 "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5460 #ifdef CONFIG_UPROBE_EVENTS
5461 " place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5463 "\t args: <name>=fetcharg[:type]\n"
5464 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
5465 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5466 "\t $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5468 "\t $stack<index>, $stack, $retval, $comm,\n"
5470 "\t +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5471 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
5472 "\t b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5473 "\t <type>\\[<array-size>\\]\n"
5474 #ifdef CONFIG_HIST_TRIGGERS
5475 "\t field: <stype> <name>;\n"
5476 "\t stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5477 "\t [unsigned] char/int/long\n"
5480 " events/\t\t- Directory containing all trace event subsystems:\n"
5481 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5482 " events/<system>/\t- Directory containing all trace events for <system>:\n"
5483 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5485 " filter\t\t- If set, only events passing filter are traced\n"
5486 " events/<system>/<event>/\t- Directory containing control files for\n"
5488 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5489 " filter\t\t- If set, only events passing filter are traced\n"
5490 " trigger\t\t- If set, a command to perform when event is hit\n"
5491 "\t Format: <trigger>[:count][if <filter>]\n"
5492 "\t trigger: traceon, traceoff\n"
5493 "\t enable_event:<system>:<event>\n"
5494 "\t disable_event:<system>:<event>\n"
5495 #ifdef CONFIG_HIST_TRIGGERS
5496 "\t enable_hist:<system>:<event>\n"
5497 "\t disable_hist:<system>:<event>\n"
5499 #ifdef CONFIG_STACKTRACE
5502 #ifdef CONFIG_TRACER_SNAPSHOT
5505 #ifdef CONFIG_HIST_TRIGGERS
5506 "\t\t hist (see below)\n"
5508 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
5509 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
5510 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5511 "\t events/block/block_unplug/trigger\n"
5512 "\t The first disables tracing every time block_unplug is hit.\n"
5513 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
5514 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
5515 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5516 "\t Like function triggers, the counter is only decremented if it\n"
5517 "\t enabled or disabled tracing.\n"
5518 "\t To remove a trigger without a count:\n"
5519 "\t echo '!<trigger> > <system>/<event>/trigger\n"
5520 "\t To remove a trigger with a count:\n"
5521 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
5522 "\t Filters can be ignored when removing a trigger.\n"
5523 #ifdef CONFIG_HIST_TRIGGERS
5524 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
5525 "\t Format: hist:keys=<field1[,field2,...]>\n"
5526 "\t [:values=<field1[,field2,...]>]\n"
5527 "\t [:sort=<field1[,field2,...]>]\n"
5528 "\t [:size=#entries]\n"
5529 "\t [:pause][:continue][:clear]\n"
5530 "\t [:name=histname1]\n"
5531 "\t [:<handler>.<action>]\n"
5532 "\t [if <filter>]\n\n"
5533 "\t When a matching event is hit, an entry is added to a hash\n"
5534 "\t table using the key(s) and value(s) named, and the value of a\n"
5535 "\t sum called 'hitcount' is incremented. Keys and values\n"
5536 "\t correspond to fields in the event's format description. Keys\n"
5537 "\t can be any field, or the special string 'stacktrace'.\n"
5538 "\t Compound keys consisting of up to two fields can be specified\n"
5539 "\t by the 'keys' keyword. Values must correspond to numeric\n"
5540 "\t fields. Sort keys consisting of up to two fields can be\n"
5541 "\t specified using the 'sort' keyword. The sort direction can\n"
5542 "\t be modified by appending '.descending' or '.ascending' to a\n"
5543 "\t sort field. The 'size' parameter can be used to specify more\n"
5544 "\t or fewer than the default 2048 entries for the hashtable size.\n"
5545 "\t If a hist trigger is given a name using the 'name' parameter,\n"
5546 "\t its histogram data will be shared with other triggers of the\n"
5547 "\t same name, and trigger hits will update this common data.\n\n"
5548 "\t Reading the 'hist' file for the event will dump the hash\n"
5549 "\t table in its entirety to stdout. If there are multiple hist\n"
5550 "\t triggers attached to an event, there will be a table for each\n"
5551 "\t trigger in the output. The table displayed for a named\n"
5552 "\t trigger will be the same as any other instance having the\n"
5553 "\t same name. The default format used to display a given field\n"
5554 "\t can be modified by appending any of the following modifiers\n"
5555 "\t to the field name, as applicable:\n\n"
5556 "\t .hex display a number as a hex value\n"
5557 "\t .sym display an address as a symbol\n"
5558 "\t .sym-offset display an address as a symbol and offset\n"
5559 "\t .execname display a common_pid as a program name\n"
5560 "\t .syscall display a syscall id as a syscall name\n"
5561 "\t .log2 display log2 value rather than raw number\n"
5562 "\t .usecs display a common_timestamp in microseconds\n\n"
5563 "\t The 'pause' parameter can be used to pause an existing hist\n"
5564 "\t trigger or to start a hist trigger but not log any events\n"
5565 "\t until told to do so. 'continue' can be used to start or\n"
5566 "\t restart a paused hist trigger.\n\n"
5567 "\t The 'clear' parameter will clear the contents of a running\n"
5568 "\t hist trigger and leave its current paused/active state\n"
5570 "\t The enable_hist and disable_hist triggers can be used to\n"
5571 "\t have one event conditionally start and stop another event's\n"
5572 "\t already-attached hist trigger. The syntax is analogous to\n"
5573 "\t the enable_event and disable_event triggers.\n\n"
5574 "\t Hist trigger handlers and actions are executed whenever a\n"
5575 "\t a histogram entry is added or updated. They take the form:\n\n"
5576 "\t <handler>.<action>\n\n"
5577 "\t The available handlers are:\n\n"
5578 "\t onmatch(matching.event) - invoke on addition or update\n"
5579 "\t onmax(var) - invoke if var exceeds current max\n"
5580 "\t onchange(var) - invoke action if var changes\n\n"
5581 "\t The available actions are:\n\n"
5582 "\t trace(<synthetic_event>,param list) - generate synthetic event\n"
5583 "\t save(field,...) - save current event fields\n"
5584 #ifdef CONFIG_TRACER_SNAPSHOT
5585 "\t snapshot() - snapshot the trace buffer\n\n"
5587 #ifdef CONFIG_SYNTH_EVENTS
5588 " events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5589 "\t Write into this file to define/undefine new synthetic events.\n"
5590 "\t example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
5596 tracing_readme_read(struct file *filp, char __user *ubuf,
5597 size_t cnt, loff_t *ppos)
5599 return simple_read_from_buffer(ubuf, cnt, ppos,
5600 readme_msg, strlen(readme_msg));
5603 static const struct file_operations tracing_readme_fops = {
5604 .open = tracing_open_generic,
5605 .read = tracing_readme_read,
5606 .llseek = generic_file_llseek,
5609 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5613 if (*pos || m->count)
5618 for (; ptr <= &tgid_map[PID_MAX_DEFAULT]; ptr++) {
5619 if (trace_find_tgid(*ptr))
5626 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5636 v = saved_tgids_next(m, v, &l);
5644 static void saved_tgids_stop(struct seq_file *m, void *v)
5648 static int saved_tgids_show(struct seq_file *m, void *v)
5650 int pid = (int *)v - tgid_map;
5652 seq_printf(m, "%d %d\n", pid, trace_find_tgid(pid));
5656 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5657 .start = saved_tgids_start,
5658 .stop = saved_tgids_stop,
5659 .next = saved_tgids_next,
5660 .show = saved_tgids_show,
5663 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5667 ret = tracing_check_open_get_tr(NULL);
5671 return seq_open(filp, &tracing_saved_tgids_seq_ops);
5675 static const struct file_operations tracing_saved_tgids_fops = {
5676 .open = tracing_saved_tgids_open,
5678 .llseek = seq_lseek,
5679 .release = seq_release,
5682 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5684 unsigned int *ptr = v;
5686 if (*pos || m->count)
5691 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
5693 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
5702 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
5708 arch_spin_lock(&trace_cmdline_lock);
5710 v = &savedcmd->map_cmdline_to_pid[0];
5712 v = saved_cmdlines_next(m, v, &l);
5720 static void saved_cmdlines_stop(struct seq_file *m, void *v)
5722 arch_spin_unlock(&trace_cmdline_lock);
5726 static int saved_cmdlines_show(struct seq_file *m, void *v)
5728 char buf[TASK_COMM_LEN];
5729 unsigned int *pid = v;
5731 __trace_find_cmdline(*pid, buf);
5732 seq_printf(m, "%d %s\n", *pid, buf);
5736 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
5737 .start = saved_cmdlines_start,
5738 .next = saved_cmdlines_next,
5739 .stop = saved_cmdlines_stop,
5740 .show = saved_cmdlines_show,
5743 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
5747 ret = tracing_check_open_get_tr(NULL);
5751 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
5754 static const struct file_operations tracing_saved_cmdlines_fops = {
5755 .open = tracing_saved_cmdlines_open,
5757 .llseek = seq_lseek,
5758 .release = seq_release,
5762 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
5763 size_t cnt, loff_t *ppos)
5768 arch_spin_lock(&trace_cmdline_lock);
5769 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
5770 arch_spin_unlock(&trace_cmdline_lock);
5772 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5775 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
5777 kfree(s->saved_cmdlines);
5778 kfree(s->map_cmdline_to_pid);
5782 static int tracing_resize_saved_cmdlines(unsigned int val)
5784 struct saved_cmdlines_buffer *s, *savedcmd_temp;
5786 s = kmalloc(sizeof(*s), GFP_KERNEL);
5790 if (allocate_cmdlines_buffer(val, s) < 0) {
5795 arch_spin_lock(&trace_cmdline_lock);
5796 savedcmd_temp = savedcmd;
5798 arch_spin_unlock(&trace_cmdline_lock);
5799 free_saved_cmdlines_buffer(savedcmd_temp);
5805 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
5806 size_t cnt, loff_t *ppos)
5811 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5815 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
5816 if (!val || val > PID_MAX_DEFAULT)
5819 ret = tracing_resize_saved_cmdlines((unsigned int)val);
5828 static const struct file_operations tracing_saved_cmdlines_size_fops = {
5829 .open = tracing_open_generic,
5830 .read = tracing_saved_cmdlines_size_read,
5831 .write = tracing_saved_cmdlines_size_write,
5834 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5835 static union trace_eval_map_item *
5836 update_eval_map(union trace_eval_map_item *ptr)
5838 if (!ptr->map.eval_string) {
5839 if (ptr->tail.next) {
5840 ptr = ptr->tail.next;
5841 /* Set ptr to the next real item (skip head) */
5849 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5851 union trace_eval_map_item *ptr = v;
5854 * Paranoid! If ptr points to end, we don't want to increment past it.
5855 * This really should never happen.
5858 ptr = update_eval_map(ptr);
5859 if (WARN_ON_ONCE(!ptr))
5863 ptr = update_eval_map(ptr);
5868 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5870 union trace_eval_map_item *v;
5873 mutex_lock(&trace_eval_mutex);
5875 v = trace_eval_maps;
5879 while (v && l < *pos) {
5880 v = eval_map_next(m, v, &l);
5886 static void eval_map_stop(struct seq_file *m, void *v)
5888 mutex_unlock(&trace_eval_mutex);
5891 static int eval_map_show(struct seq_file *m, void *v)
5893 union trace_eval_map_item *ptr = v;
5895 seq_printf(m, "%s %ld (%s)\n",
5896 ptr->map.eval_string, ptr->map.eval_value,
5902 static const struct seq_operations tracing_eval_map_seq_ops = {
5903 .start = eval_map_start,
5904 .next = eval_map_next,
5905 .stop = eval_map_stop,
5906 .show = eval_map_show,
5909 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5913 ret = tracing_check_open_get_tr(NULL);
5917 return seq_open(filp, &tracing_eval_map_seq_ops);
5920 static const struct file_operations tracing_eval_map_fops = {
5921 .open = tracing_eval_map_open,
5923 .llseek = seq_lseek,
5924 .release = seq_release,
5927 static inline union trace_eval_map_item *
5928 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5930 /* Return tail of array given the head */
5931 return ptr + ptr->head.length + 1;
5935 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5938 struct trace_eval_map **stop;
5939 struct trace_eval_map **map;
5940 union trace_eval_map_item *map_array;
5941 union trace_eval_map_item *ptr;
5946 * The trace_eval_maps contains the map plus a head and tail item,
5947 * where the head holds the module and length of array, and the
5948 * tail holds a pointer to the next list.
5950 map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5952 pr_warn("Unable to allocate trace eval mapping\n");
5956 mutex_lock(&trace_eval_mutex);
5958 if (!trace_eval_maps)
5959 trace_eval_maps = map_array;
5961 ptr = trace_eval_maps;
5963 ptr = trace_eval_jmp_to_tail(ptr);
5964 if (!ptr->tail.next)
5966 ptr = ptr->tail.next;
5969 ptr->tail.next = map_array;
5971 map_array->head.mod = mod;
5972 map_array->head.length = len;
5975 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5976 map_array->map = **map;
5979 memset(map_array, 0, sizeof(*map_array));
5981 mutex_unlock(&trace_eval_mutex);
5984 static void trace_create_eval_file(struct dentry *d_tracer)
5986 trace_create_file("eval_map", 0444, d_tracer,
5987 NULL, &tracing_eval_map_fops);
5990 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5991 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5992 static inline void trace_insert_eval_map_file(struct module *mod,
5993 struct trace_eval_map **start, int len) { }
5994 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5996 static void trace_insert_eval_map(struct module *mod,
5997 struct trace_eval_map **start, int len)
5999 struct trace_eval_map **map;
6006 trace_event_eval_update(map, len);
6008 trace_insert_eval_map_file(mod, start, len);
6012 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6013 size_t cnt, loff_t *ppos)
6015 struct trace_array *tr = filp->private_data;
6016 char buf[MAX_TRACER_SIZE+2];
6019 mutex_lock(&trace_types_lock);
6020 r = sprintf(buf, "%s\n", tr->current_trace->name);
6021 mutex_unlock(&trace_types_lock);
6023 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6026 int tracer_init(struct tracer *t, struct trace_array *tr)
6028 tracing_reset_online_cpus(&tr->array_buffer);
6032 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6036 for_each_tracing_cpu(cpu)
6037 per_cpu_ptr(buf->data, cpu)->entries = val;
6040 #ifdef CONFIG_TRACER_MAX_TRACE
6041 /* resize @tr's buffer to the size of @size_tr's entries */
6042 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6043 struct array_buffer *size_buf, int cpu_id)
6047 if (cpu_id == RING_BUFFER_ALL_CPUS) {
6048 for_each_tracing_cpu(cpu) {
6049 ret = ring_buffer_resize(trace_buf->buffer,
6050 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6053 per_cpu_ptr(trace_buf->data, cpu)->entries =
6054 per_cpu_ptr(size_buf->data, cpu)->entries;
6057 ret = ring_buffer_resize(trace_buf->buffer,
6058 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6060 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6061 per_cpu_ptr(size_buf->data, cpu_id)->entries;
6066 #endif /* CONFIG_TRACER_MAX_TRACE */
6068 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6069 unsigned long size, int cpu)
6074 * If kernel or user changes the size of the ring buffer
6075 * we use the size that was given, and we can forget about
6076 * expanding it later.
6078 ring_buffer_expanded = true;
6080 /* May be called before buffers are initialized */
6081 if (!tr->array_buffer.buffer)
6084 ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6088 #ifdef CONFIG_TRACER_MAX_TRACE
6089 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
6090 !tr->current_trace->use_max_tr)
6093 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6095 int r = resize_buffer_duplicate_size(&tr->array_buffer,
6096 &tr->array_buffer, cpu);
6099 * AARGH! We are left with different
6100 * size max buffer!!!!
6101 * The max buffer is our "snapshot" buffer.
6102 * When a tracer needs a snapshot (one of the
6103 * latency tracers), it swaps the max buffer
6104 * with the saved snap shot. We succeeded to
6105 * update the size of the main buffer, but failed to
6106 * update the size of the max buffer. But when we tried
6107 * to reset the main buffer to the original size, we
6108 * failed there too. This is very unlikely to
6109 * happen, but if it does, warn and kill all
6113 tracing_disabled = 1;
6118 if (cpu == RING_BUFFER_ALL_CPUS)
6119 set_buffer_entries(&tr->max_buffer, size);
6121 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
6124 #endif /* CONFIG_TRACER_MAX_TRACE */
6126 if (cpu == RING_BUFFER_ALL_CPUS)
6127 set_buffer_entries(&tr->array_buffer, size);
6129 per_cpu_ptr(tr->array_buffer.data, cpu)->entries = size;
6134 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6135 unsigned long size, int cpu_id)
6139 mutex_lock(&trace_types_lock);
6141 if (cpu_id != RING_BUFFER_ALL_CPUS) {
6142 /* make sure, this cpu is enabled in the mask */
6143 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6149 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6154 mutex_unlock(&trace_types_lock);
6161 * tracing_update_buffers - used by tracing facility to expand ring buffers
6163 * To save on memory when the tracing is never used on a system with it
6164 * configured in. The ring buffers are set to a minimum size. But once
6165 * a user starts to use the tracing facility, then they need to grow
6166 * to their default size.
6168 * This function is to be called when a tracer is about to be used.
6170 int tracing_update_buffers(void)
6174 mutex_lock(&trace_types_lock);
6175 if (!ring_buffer_expanded)
6176 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
6177 RING_BUFFER_ALL_CPUS);
6178 mutex_unlock(&trace_types_lock);
6183 struct trace_option_dentry;
6186 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6189 * Used to clear out the tracer before deletion of an instance.
6190 * Must have trace_types_lock held.
6192 static void tracing_set_nop(struct trace_array *tr)
6194 if (tr->current_trace == &nop_trace)
6197 tr->current_trace->enabled--;
6199 if (tr->current_trace->reset)
6200 tr->current_trace->reset(tr);
6202 tr->current_trace = &nop_trace;
6205 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6207 /* Only enable if the directory has been created already. */
6211 create_trace_option_files(tr, t);
6214 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6217 #ifdef CONFIG_TRACER_MAX_TRACE
6222 mutex_lock(&trace_types_lock);
6224 if (!ring_buffer_expanded) {
6225 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6226 RING_BUFFER_ALL_CPUS);
6232 for (t = trace_types; t; t = t->next) {
6233 if (strcmp(t->name, buf) == 0)
6240 if (t == tr->current_trace)
6243 #ifdef CONFIG_TRACER_SNAPSHOT
6244 if (t->use_max_tr) {
6245 arch_spin_lock(&tr->max_lock);
6246 if (tr->cond_snapshot)
6248 arch_spin_unlock(&tr->max_lock);
6253 /* Some tracers won't work on kernel command line */
6254 if (system_state < SYSTEM_RUNNING && t->noboot) {
6255 pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6260 /* Some tracers are only allowed for the top level buffer */
6261 if (!trace_ok_for_array(t, tr)) {
6266 /* If trace pipe files are being read, we can't change the tracer */
6267 if (tr->trace_ref) {
6272 trace_branch_disable();
6274 tr->current_trace->enabled--;
6276 if (tr->current_trace->reset)
6277 tr->current_trace->reset(tr);
6279 /* Current trace needs to be nop_trace before synchronize_rcu */
6280 tr->current_trace = &nop_trace;
6282 #ifdef CONFIG_TRACER_MAX_TRACE
6283 had_max_tr = tr->allocated_snapshot;
6285 if (had_max_tr && !t->use_max_tr) {
6287 * We need to make sure that the update_max_tr sees that
6288 * current_trace changed to nop_trace to keep it from
6289 * swapping the buffers after we resize it.
6290 * The update_max_tr is called from interrupts disabled
6291 * so a synchronized_sched() is sufficient.
6298 #ifdef CONFIG_TRACER_MAX_TRACE
6299 if (t->use_max_tr && !had_max_tr) {
6300 ret = tracing_alloc_snapshot_instance(tr);
6307 ret = tracer_init(t, tr);
6312 tr->current_trace = t;
6313 tr->current_trace->enabled++;
6314 trace_branch_enable(tr);
6316 mutex_unlock(&trace_types_lock);
6322 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6323 size_t cnt, loff_t *ppos)
6325 struct trace_array *tr = filp->private_data;
6326 char buf[MAX_TRACER_SIZE+1];
6333 if (cnt > MAX_TRACER_SIZE)
6334 cnt = MAX_TRACER_SIZE;
6336 if (copy_from_user(buf, ubuf, cnt))
6341 /* strip ending whitespace. */
6342 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
6345 err = tracing_set_tracer(tr, buf);
6355 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6356 size_t cnt, loff_t *ppos)
6361 r = snprintf(buf, sizeof(buf), "%ld\n",
6362 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6363 if (r > sizeof(buf))
6365 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6369 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6370 size_t cnt, loff_t *ppos)
6375 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6385 tracing_thresh_read(struct file *filp, char __user *ubuf,
6386 size_t cnt, loff_t *ppos)
6388 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6392 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6393 size_t cnt, loff_t *ppos)
6395 struct trace_array *tr = filp->private_data;
6398 mutex_lock(&trace_types_lock);
6399 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6403 if (tr->current_trace->update_thresh) {
6404 ret = tr->current_trace->update_thresh(tr);
6411 mutex_unlock(&trace_types_lock);
6416 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6419 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6420 size_t cnt, loff_t *ppos)
6422 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
6426 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6427 size_t cnt, loff_t *ppos)
6429 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
6434 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6436 struct trace_array *tr = inode->i_private;
6437 struct trace_iterator *iter;
6440 ret = tracing_check_open_get_tr(tr);
6444 mutex_lock(&trace_types_lock);
6446 /* create a buffer to store the information to pass to userspace */
6447 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6450 __trace_array_put(tr);
6454 trace_seq_init(&iter->seq);
6455 iter->trace = tr->current_trace;
6457 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6462 /* trace pipe does not show start of buffer */
6463 cpumask_setall(iter->started);
6465 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6466 iter->iter_flags |= TRACE_FILE_LAT_FMT;
6468 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
6469 if (trace_clocks[tr->clock_id].in_ns)
6470 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6473 iter->array_buffer = &tr->array_buffer;
6474 iter->cpu_file = tracing_get_cpu(inode);
6475 mutex_init(&iter->mutex);
6476 filp->private_data = iter;
6478 if (iter->trace->pipe_open)
6479 iter->trace->pipe_open(iter);
6481 nonseekable_open(inode, filp);
6485 mutex_unlock(&trace_types_lock);
6490 __trace_array_put(tr);
6491 mutex_unlock(&trace_types_lock);
6495 static int tracing_release_pipe(struct inode *inode, struct file *file)
6497 struct trace_iterator *iter = file->private_data;
6498 struct trace_array *tr = inode->i_private;
6500 mutex_lock(&trace_types_lock);
6504 if (iter->trace->pipe_close)
6505 iter->trace->pipe_close(iter);
6507 mutex_unlock(&trace_types_lock);
6509 free_cpumask_var(iter->started);
6510 mutex_destroy(&iter->mutex);
6513 trace_array_put(tr);
6519 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6521 struct trace_array *tr = iter->tr;
6523 /* Iterators are static, they should be filled or empty */
6524 if (trace_buffer_iter(iter, iter->cpu_file))
6525 return EPOLLIN | EPOLLRDNORM;
6527 if (tr->trace_flags & TRACE_ITER_BLOCK)
6529 * Always select as readable when in blocking mode
6531 return EPOLLIN | EPOLLRDNORM;
6533 return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6538 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6540 struct trace_iterator *iter = filp->private_data;
6542 return trace_poll(iter, filp, poll_table);
6545 /* Must be called with iter->mutex held. */
6546 static int tracing_wait_pipe(struct file *filp)
6548 struct trace_iterator *iter = filp->private_data;
6551 while (trace_empty(iter)) {
6553 if ((filp->f_flags & O_NONBLOCK)) {
6558 * We block until we read something and tracing is disabled.
6559 * We still block if tracing is disabled, but we have never
6560 * read anything. This allows a user to cat this file, and
6561 * then enable tracing. But after we have read something,
6562 * we give an EOF when tracing is again disabled.
6564 * iter->pos will be 0 if we haven't read anything.
6566 if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6569 mutex_unlock(&iter->mutex);
6571 ret = wait_on_pipe(iter, 0);
6573 mutex_lock(&iter->mutex);
6586 tracing_read_pipe(struct file *filp, char __user *ubuf,
6587 size_t cnt, loff_t *ppos)
6589 struct trace_iterator *iter = filp->private_data;
6593 * Avoid more than one consumer on a single file descriptor
6594 * This is just a matter of traces coherency, the ring buffer itself
6597 mutex_lock(&iter->mutex);
6599 /* return any leftover data */
6600 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6604 trace_seq_init(&iter->seq);
6606 if (iter->trace->read) {
6607 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6613 sret = tracing_wait_pipe(filp);
6617 /* stop when tracing is finished */
6618 if (trace_empty(iter)) {
6623 if (cnt >= PAGE_SIZE)
6624 cnt = PAGE_SIZE - 1;
6626 /* reset all but tr, trace, and overruns */
6627 memset(&iter->seq, 0,
6628 sizeof(struct trace_iterator) -
6629 offsetof(struct trace_iterator, seq));
6630 cpumask_clear(iter->started);
6631 trace_seq_init(&iter->seq);
6634 trace_event_read_lock();
6635 trace_access_lock(iter->cpu_file);
6636 while (trace_find_next_entry_inc(iter) != NULL) {
6637 enum print_line_t ret;
6638 int save_len = iter->seq.seq.len;
6640 ret = print_trace_line(iter);
6641 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6642 /* don't print partial lines */
6643 iter->seq.seq.len = save_len;
6646 if (ret != TRACE_TYPE_NO_CONSUME)
6647 trace_consume(iter);
6649 if (trace_seq_used(&iter->seq) >= cnt)
6653 * Setting the full flag means we reached the trace_seq buffer
6654 * size and we should leave by partial output condition above.
6655 * One of the trace_seq_* functions is not used properly.
6657 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6660 trace_access_unlock(iter->cpu_file);
6661 trace_event_read_unlock();
6663 /* Now copy what we have to the user */
6664 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6665 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
6666 trace_seq_init(&iter->seq);
6669 * If there was nothing to send to user, in spite of consuming trace
6670 * entries, go back to wait for more entries.
6676 mutex_unlock(&iter->mutex);
6681 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6684 __free_page(spd->pages[idx]);
6688 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6694 /* Seq buffer is page-sized, exactly what we need. */
6696 save_len = iter->seq.seq.len;
6697 ret = print_trace_line(iter);
6699 if (trace_seq_has_overflowed(&iter->seq)) {
6700 iter->seq.seq.len = save_len;
6705 * This should not be hit, because it should only
6706 * be set if the iter->seq overflowed. But check it
6707 * anyway to be safe.
6709 if (ret == TRACE_TYPE_PARTIAL_LINE) {
6710 iter->seq.seq.len = save_len;
6714 count = trace_seq_used(&iter->seq) - save_len;
6717 iter->seq.seq.len = save_len;
6721 if (ret != TRACE_TYPE_NO_CONSUME)
6722 trace_consume(iter);
6724 if (!trace_find_next_entry_inc(iter)) {
6734 static ssize_t tracing_splice_read_pipe(struct file *filp,
6736 struct pipe_inode_info *pipe,
6740 struct page *pages_def[PIPE_DEF_BUFFERS];
6741 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6742 struct trace_iterator *iter = filp->private_data;
6743 struct splice_pipe_desc spd = {
6745 .partial = partial_def,
6746 .nr_pages = 0, /* This gets updated below. */
6747 .nr_pages_max = PIPE_DEF_BUFFERS,
6748 .ops = &default_pipe_buf_ops,
6749 .spd_release = tracing_spd_release_pipe,
6755 if (splice_grow_spd(pipe, &spd))
6758 mutex_lock(&iter->mutex);
6760 if (iter->trace->splice_read) {
6761 ret = iter->trace->splice_read(iter, filp,
6762 ppos, pipe, len, flags);
6767 ret = tracing_wait_pipe(filp);
6771 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6776 trace_event_read_lock();
6777 trace_access_lock(iter->cpu_file);
6779 /* Fill as many pages as possible. */
6780 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6781 spd.pages[i] = alloc_page(GFP_KERNEL);
6785 rem = tracing_fill_pipe_page(rem, iter);
6787 /* Copy the data into the page, so we can start over. */
6788 ret = trace_seq_to_buffer(&iter->seq,
6789 page_address(spd.pages[i]),
6790 trace_seq_used(&iter->seq));
6792 __free_page(spd.pages[i]);
6795 spd.partial[i].offset = 0;
6796 spd.partial[i].len = trace_seq_used(&iter->seq);
6798 trace_seq_init(&iter->seq);
6801 trace_access_unlock(iter->cpu_file);
6802 trace_event_read_unlock();
6803 mutex_unlock(&iter->mutex);
6808 ret = splice_to_pipe(pipe, &spd);
6812 splice_shrink_spd(&spd);
6816 mutex_unlock(&iter->mutex);
6821 tracing_entries_read(struct file *filp, char __user *ubuf,
6822 size_t cnt, loff_t *ppos)
6824 struct inode *inode = file_inode(filp);
6825 struct trace_array *tr = inode->i_private;
6826 int cpu = tracing_get_cpu(inode);
6831 mutex_lock(&trace_types_lock);
6833 if (cpu == RING_BUFFER_ALL_CPUS) {
6834 int cpu, buf_size_same;
6839 /* check if all cpu sizes are same */
6840 for_each_tracing_cpu(cpu) {
6841 /* fill in the size from first enabled cpu */
6843 size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6844 if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6850 if (buf_size_same) {
6851 if (!ring_buffer_expanded)
6852 r = sprintf(buf, "%lu (expanded: %lu)\n",
6854 trace_buf_size >> 10);
6856 r = sprintf(buf, "%lu\n", size >> 10);
6858 r = sprintf(buf, "X\n");
6860 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6862 mutex_unlock(&trace_types_lock);
6864 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6869 tracing_entries_write(struct file *filp, const char __user *ubuf,
6870 size_t cnt, loff_t *ppos)
6872 struct inode *inode = file_inode(filp);
6873 struct trace_array *tr = inode->i_private;
6877 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6881 /* must have at least 1 entry */
6885 /* value is in KB */
6887 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6897 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6898 size_t cnt, loff_t *ppos)
6900 struct trace_array *tr = filp->private_data;
6903 unsigned long size = 0, expanded_size = 0;
6905 mutex_lock(&trace_types_lock);
6906 for_each_tracing_cpu(cpu) {
6907 size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6908 if (!ring_buffer_expanded)
6909 expanded_size += trace_buf_size >> 10;
6911 if (ring_buffer_expanded)
6912 r = sprintf(buf, "%lu\n", size);
6914 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6915 mutex_unlock(&trace_types_lock);
6917 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6921 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6922 size_t cnt, loff_t *ppos)
6925 * There is no need to read what the user has written, this function
6926 * is just to make sure that there is no error when "echo" is used
6935 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6937 struct trace_array *tr = inode->i_private;
6939 /* disable tracing ? */
6940 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6941 tracer_tracing_off(tr);
6942 /* resize the ring buffer to 0 */
6943 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6945 trace_array_put(tr);
6951 tracing_mark_write(struct file *filp, const char __user *ubuf,
6952 size_t cnt, loff_t *fpos)
6954 struct trace_array *tr = filp->private_data;
6955 struct ring_buffer_event *event;
6956 enum event_trigger_type tt = ETT_NONE;
6957 struct trace_buffer *buffer;
6958 struct print_entry *entry;
6963 /* Used in tracing_mark_raw_write() as well */
6964 #define FAULTED_STR "<faulted>"
6965 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6967 if (tracing_disabled)
6970 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6973 if (cnt > TRACE_BUF_SIZE)
6974 cnt = TRACE_BUF_SIZE;
6976 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
6978 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
6980 /* If less than "<faulted>", then make sure we can still add that */
6981 if (cnt < FAULTED_SIZE)
6982 size += FAULTED_SIZE - cnt;
6984 buffer = tr->array_buffer.buffer;
6985 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6987 if (unlikely(!event))
6988 /* Ring buffer disabled, return as if not open for write */
6991 entry = ring_buffer_event_data(event);
6992 entry->ip = _THIS_IP_;
6994 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6996 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7002 if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7003 /* do not add \n before testing triggers, but add \0 */
7004 entry->buf[cnt] = '\0';
7005 tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7008 if (entry->buf[cnt - 1] != '\n') {
7009 entry->buf[cnt] = '\n';
7010 entry->buf[cnt + 1] = '\0';
7012 entry->buf[cnt] = '\0';
7014 if (static_branch_unlikely(&trace_marker_exports_enabled))
7015 ftrace_exports(event, TRACE_EXPORT_MARKER);
7016 __buffer_unlock_commit(buffer, event);
7019 event_triggers_post_call(tr->trace_marker_file, tt);
7027 /* Limit it for now to 3K (including tag) */
7028 #define RAW_DATA_MAX_SIZE (1024*3)
7031 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7032 size_t cnt, loff_t *fpos)
7034 struct trace_array *tr = filp->private_data;
7035 struct ring_buffer_event *event;
7036 struct trace_buffer *buffer;
7037 struct raw_data_entry *entry;
7042 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7044 if (tracing_disabled)
7047 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7050 /* The marker must at least have a tag id */
7051 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
7054 if (cnt > TRACE_BUF_SIZE)
7055 cnt = TRACE_BUF_SIZE;
7057 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
7059 size = sizeof(*entry) + cnt;
7060 if (cnt < FAULT_SIZE_ID)
7061 size += FAULT_SIZE_ID - cnt;
7063 buffer = tr->array_buffer.buffer;
7064 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7067 /* Ring buffer disabled, return as if not open for write */
7070 entry = ring_buffer_event_data(event);
7072 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7075 memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7080 __buffer_unlock_commit(buffer, event);
7088 static int tracing_clock_show(struct seq_file *m, void *v)
7090 struct trace_array *tr = m->private;
7093 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7095 "%s%s%s%s", i ? " " : "",
7096 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7097 i == tr->clock_id ? "]" : "");
7103 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7107 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7108 if (strcmp(trace_clocks[i].name, clockstr) == 0)
7111 if (i == ARRAY_SIZE(trace_clocks))
7114 mutex_lock(&trace_types_lock);
7118 ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7121 * New clock may not be consistent with the previous clock.
7122 * Reset the buffer so that it doesn't have incomparable timestamps.
7124 tracing_reset_online_cpus(&tr->array_buffer);
7126 #ifdef CONFIG_TRACER_MAX_TRACE
7127 if (tr->max_buffer.buffer)
7128 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7129 tracing_reset_online_cpus(&tr->max_buffer);
7132 mutex_unlock(&trace_types_lock);
7137 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7138 size_t cnt, loff_t *fpos)
7140 struct seq_file *m = filp->private_data;
7141 struct trace_array *tr = m->private;
7143 const char *clockstr;
7146 if (cnt >= sizeof(buf))
7149 if (copy_from_user(buf, ubuf, cnt))
7154 clockstr = strstrip(buf);
7156 ret = tracing_set_clock(tr, clockstr);
7165 static int tracing_clock_open(struct inode *inode, struct file *file)
7167 struct trace_array *tr = inode->i_private;
7170 ret = tracing_check_open_get_tr(tr);
7174 ret = single_open(file, tracing_clock_show, inode->i_private);
7176 trace_array_put(tr);
7181 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7183 struct trace_array *tr = m->private;
7185 mutex_lock(&trace_types_lock);
7187 if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7188 seq_puts(m, "delta [absolute]\n");
7190 seq_puts(m, "[delta] absolute\n");
7192 mutex_unlock(&trace_types_lock);
7197 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7199 struct trace_array *tr = inode->i_private;
7202 ret = tracing_check_open_get_tr(tr);
7206 ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7208 trace_array_put(tr);
7213 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7215 if (rbe == this_cpu_read(trace_buffered_event))
7216 return ring_buffer_time_stamp(buffer);
7218 return ring_buffer_event_time_stamp(buffer, rbe);
7222 * Set or disable using the per CPU trace_buffer_event when possible.
7224 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7228 mutex_lock(&trace_types_lock);
7230 if (set && tr->no_filter_buffering_ref++)
7234 if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7239 --tr->no_filter_buffering_ref;
7242 mutex_unlock(&trace_types_lock);
7247 struct ftrace_buffer_info {
7248 struct trace_iterator iter;
7250 unsigned int spare_cpu;
7254 #ifdef CONFIG_TRACER_SNAPSHOT
7255 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7257 struct trace_array *tr = inode->i_private;
7258 struct trace_iterator *iter;
7262 ret = tracing_check_open_get_tr(tr);
7266 if (file->f_mode & FMODE_READ) {
7267 iter = __tracing_open(inode, file, true);
7269 ret = PTR_ERR(iter);
7271 /* Writes still need the seq_file to hold the private data */
7273 m = kzalloc(sizeof(*m), GFP_KERNEL);
7276 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7284 iter->array_buffer = &tr->max_buffer;
7285 iter->cpu_file = tracing_get_cpu(inode);
7287 file->private_data = m;
7291 trace_array_put(tr);
7297 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7300 struct seq_file *m = filp->private_data;
7301 struct trace_iterator *iter = m->private;
7302 struct trace_array *tr = iter->tr;
7306 ret = tracing_update_buffers();
7310 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7314 mutex_lock(&trace_types_lock);
7316 if (tr->current_trace->use_max_tr) {
7321 arch_spin_lock(&tr->max_lock);
7322 if (tr->cond_snapshot)
7324 arch_spin_unlock(&tr->max_lock);
7330 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7334 if (tr->allocated_snapshot)
7338 /* Only allow per-cpu swap if the ring buffer supports it */
7339 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7340 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7345 if (tr->allocated_snapshot)
7346 ret = resize_buffer_duplicate_size(&tr->max_buffer,
7347 &tr->array_buffer, iter->cpu_file);
7349 ret = tracing_alloc_snapshot_instance(tr);
7352 local_irq_disable();
7353 /* Now, we're going to swap */
7354 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7355 update_max_tr(tr, current, smp_processor_id(), NULL);
7357 update_max_tr_single(tr, current, iter->cpu_file);
7361 if (tr->allocated_snapshot) {
7362 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7363 tracing_reset_online_cpus(&tr->max_buffer);
7365 tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7375 mutex_unlock(&trace_types_lock);
7379 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7381 struct seq_file *m = file->private_data;
7384 ret = tracing_release(inode, file);
7386 if (file->f_mode & FMODE_READ)
7389 /* If write only, the seq_file is just a stub */
7397 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7398 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7399 size_t count, loff_t *ppos);
7400 static int tracing_buffers_release(struct inode *inode, struct file *file);
7401 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7402 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7404 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7406 struct ftrace_buffer_info *info;
7409 /* The following checks for tracefs lockdown */
7410 ret = tracing_buffers_open(inode, filp);
7414 info = filp->private_data;
7416 if (info->iter.trace->use_max_tr) {
7417 tracing_buffers_release(inode, filp);
7421 info->iter.snapshot = true;
7422 info->iter.array_buffer = &info->iter.tr->max_buffer;
7427 #endif /* CONFIG_TRACER_SNAPSHOT */
7430 static const struct file_operations tracing_thresh_fops = {
7431 .open = tracing_open_generic,
7432 .read = tracing_thresh_read,
7433 .write = tracing_thresh_write,
7434 .llseek = generic_file_llseek,
7437 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7438 static const struct file_operations tracing_max_lat_fops = {
7439 .open = tracing_open_generic,
7440 .read = tracing_max_lat_read,
7441 .write = tracing_max_lat_write,
7442 .llseek = generic_file_llseek,
7446 static const struct file_operations set_tracer_fops = {
7447 .open = tracing_open_generic,
7448 .read = tracing_set_trace_read,
7449 .write = tracing_set_trace_write,
7450 .llseek = generic_file_llseek,
7453 static const struct file_operations tracing_pipe_fops = {
7454 .open = tracing_open_pipe,
7455 .poll = tracing_poll_pipe,
7456 .read = tracing_read_pipe,
7457 .splice_read = tracing_splice_read_pipe,
7458 .release = tracing_release_pipe,
7459 .llseek = no_llseek,
7462 static const struct file_operations tracing_entries_fops = {
7463 .open = tracing_open_generic_tr,
7464 .read = tracing_entries_read,
7465 .write = tracing_entries_write,
7466 .llseek = generic_file_llseek,
7467 .release = tracing_release_generic_tr,
7470 static const struct file_operations tracing_total_entries_fops = {
7471 .open = tracing_open_generic_tr,
7472 .read = tracing_total_entries_read,
7473 .llseek = generic_file_llseek,
7474 .release = tracing_release_generic_tr,
7477 static const struct file_operations tracing_free_buffer_fops = {
7478 .open = tracing_open_generic_tr,
7479 .write = tracing_free_buffer_write,
7480 .release = tracing_free_buffer_release,
7483 static const struct file_operations tracing_mark_fops = {
7484 .open = tracing_open_generic_tr,
7485 .write = tracing_mark_write,
7486 .llseek = generic_file_llseek,
7487 .release = tracing_release_generic_tr,
7490 static const struct file_operations tracing_mark_raw_fops = {
7491 .open = tracing_open_generic_tr,
7492 .write = tracing_mark_raw_write,
7493 .llseek = generic_file_llseek,
7494 .release = tracing_release_generic_tr,
7497 static const struct file_operations trace_clock_fops = {
7498 .open = tracing_clock_open,
7500 .llseek = seq_lseek,
7501 .release = tracing_single_release_tr,
7502 .write = tracing_clock_write,
7505 static const struct file_operations trace_time_stamp_mode_fops = {
7506 .open = tracing_time_stamp_mode_open,
7508 .llseek = seq_lseek,
7509 .release = tracing_single_release_tr,
7512 #ifdef CONFIG_TRACER_SNAPSHOT
7513 static const struct file_operations snapshot_fops = {
7514 .open = tracing_snapshot_open,
7516 .write = tracing_snapshot_write,
7517 .llseek = tracing_lseek,
7518 .release = tracing_snapshot_release,
7521 static const struct file_operations snapshot_raw_fops = {
7522 .open = snapshot_raw_open,
7523 .read = tracing_buffers_read,
7524 .release = tracing_buffers_release,
7525 .splice_read = tracing_buffers_splice_read,
7526 .llseek = no_llseek,
7529 #endif /* CONFIG_TRACER_SNAPSHOT */
7531 #define TRACING_LOG_ERRS_MAX 8
7532 #define TRACING_LOG_LOC_MAX 128
7534 #define CMD_PREFIX " Command: "
7537 const char **errs; /* ptr to loc-specific array of err strings */
7538 u8 type; /* index into errs -> specific err string */
7539 u8 pos; /* MAX_FILTER_STR_VAL = 256 */
7543 struct tracing_log_err {
7544 struct list_head list;
7545 struct err_info info;
7546 char loc[TRACING_LOG_LOC_MAX]; /* err location */
7547 char cmd[MAX_FILTER_STR_VAL]; /* what caused err */
7550 static DEFINE_MUTEX(tracing_err_log_lock);
7552 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr)
7554 struct tracing_log_err *err;
7556 if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7557 err = kzalloc(sizeof(*err), GFP_KERNEL);
7559 err = ERR_PTR(-ENOMEM);
7560 tr->n_err_log_entries++;
7565 err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7566 list_del(&err->list);
7572 * err_pos - find the position of a string within a command for error careting
7573 * @cmd: The tracing command that caused the error
7574 * @str: The string to position the caret at within @cmd
7576 * Finds the position of the first occurrence of @str within @cmd. The
7577 * return value can be passed to tracing_log_err() for caret placement
7580 * Returns the index within @cmd of the first occurrence of @str or 0
7581 * if @str was not found.
7583 unsigned int err_pos(char *cmd, const char *str)
7587 if (WARN_ON(!strlen(cmd)))
7590 found = strstr(cmd, str);
7598 * tracing_log_err - write an error to the tracing error log
7599 * @tr: The associated trace array for the error (NULL for top level array)
7600 * @loc: A string describing where the error occurred
7601 * @cmd: The tracing command that caused the error
7602 * @errs: The array of loc-specific static error strings
7603 * @type: The index into errs[], which produces the specific static err string
7604 * @pos: The position the caret should be placed in the cmd
7606 * Writes an error into tracing/error_log of the form:
7608 * <loc>: error: <text>
7612 * tracing/error_log is a small log file containing the last
7613 * TRACING_LOG_ERRS_MAX errors (8). Memory for errors isn't allocated
7614 * unless there has been a tracing error, and the error log can be
7615 * cleared and have its memory freed by writing the empty string in
7616 * truncation mode to it i.e. echo > tracing/error_log.
7618 * NOTE: the @errs array along with the @type param are used to
7619 * produce a static error string - this string is not copied and saved
7620 * when the error is logged - only a pointer to it is saved. See
7621 * existing callers for examples of how static strings are typically
7622 * defined for use with tracing_log_err().
7624 void tracing_log_err(struct trace_array *tr,
7625 const char *loc, const char *cmd,
7626 const char **errs, u8 type, u8 pos)
7628 struct tracing_log_err *err;
7633 mutex_lock(&tracing_err_log_lock);
7634 err = get_tracing_log_err(tr);
7635 if (PTR_ERR(err) == -ENOMEM) {
7636 mutex_unlock(&tracing_err_log_lock);
7640 snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7641 snprintf(err->cmd, MAX_FILTER_STR_VAL,"\n" CMD_PREFIX "%s\n", cmd);
7643 err->info.errs = errs;
7644 err->info.type = type;
7645 err->info.pos = pos;
7646 err->info.ts = local_clock();
7648 list_add_tail(&err->list, &tr->err_log);
7649 mutex_unlock(&tracing_err_log_lock);
7652 static void clear_tracing_err_log(struct trace_array *tr)
7654 struct tracing_log_err *err, *next;
7656 mutex_lock(&tracing_err_log_lock);
7657 list_for_each_entry_safe(err, next, &tr->err_log, list) {
7658 list_del(&err->list);
7662 tr->n_err_log_entries = 0;
7663 mutex_unlock(&tracing_err_log_lock);
7666 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7668 struct trace_array *tr = m->private;
7670 mutex_lock(&tracing_err_log_lock);
7672 return seq_list_start(&tr->err_log, *pos);
7675 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7677 struct trace_array *tr = m->private;
7679 return seq_list_next(v, &tr->err_log, pos);
7682 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7684 mutex_unlock(&tracing_err_log_lock);
7687 static void tracing_err_log_show_pos(struct seq_file *m, u8 pos)
7691 for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7693 for (i = 0; i < pos; i++)
7698 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7700 struct tracing_log_err *err = v;
7703 const char *err_text = err->info.errs[err->info.type];
7704 u64 sec = err->info.ts;
7707 nsec = do_div(sec, NSEC_PER_SEC);
7708 seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7709 err->loc, err_text);
7710 seq_printf(m, "%s", err->cmd);
7711 tracing_err_log_show_pos(m, err->info.pos);
7717 static const struct seq_operations tracing_err_log_seq_ops = {
7718 .start = tracing_err_log_seq_start,
7719 .next = tracing_err_log_seq_next,
7720 .stop = tracing_err_log_seq_stop,
7721 .show = tracing_err_log_seq_show
7724 static int tracing_err_log_open(struct inode *inode, struct file *file)
7726 struct trace_array *tr = inode->i_private;
7729 ret = tracing_check_open_get_tr(tr);
7733 /* If this file was opened for write, then erase contents */
7734 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7735 clear_tracing_err_log(tr);
7737 if (file->f_mode & FMODE_READ) {
7738 ret = seq_open(file, &tracing_err_log_seq_ops);
7740 struct seq_file *m = file->private_data;
7743 trace_array_put(tr);
7749 static ssize_t tracing_err_log_write(struct file *file,
7750 const char __user *buffer,
7751 size_t count, loff_t *ppos)
7756 static int tracing_err_log_release(struct inode *inode, struct file *file)
7758 struct trace_array *tr = inode->i_private;
7760 trace_array_put(tr);
7762 if (file->f_mode & FMODE_READ)
7763 seq_release(inode, file);
7768 static const struct file_operations tracing_err_log_fops = {
7769 .open = tracing_err_log_open,
7770 .write = tracing_err_log_write,
7772 .llseek = seq_lseek,
7773 .release = tracing_err_log_release,
7776 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7778 struct trace_array *tr = inode->i_private;
7779 struct ftrace_buffer_info *info;
7782 ret = tracing_check_open_get_tr(tr);
7786 info = kvzalloc(sizeof(*info), GFP_KERNEL);
7788 trace_array_put(tr);
7792 mutex_lock(&trace_types_lock);
7795 info->iter.cpu_file = tracing_get_cpu(inode);
7796 info->iter.trace = tr->current_trace;
7797 info->iter.array_buffer = &tr->array_buffer;
7799 /* Force reading ring buffer for first read */
7800 info->read = (unsigned int)-1;
7802 filp->private_data = info;
7806 mutex_unlock(&trace_types_lock);
7808 ret = nonseekable_open(inode, filp);
7810 trace_array_put(tr);
7816 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7818 struct ftrace_buffer_info *info = filp->private_data;
7819 struct trace_iterator *iter = &info->iter;
7821 return trace_poll(iter, filp, poll_table);
7825 tracing_buffers_read(struct file *filp, char __user *ubuf,
7826 size_t count, loff_t *ppos)
7828 struct ftrace_buffer_info *info = filp->private_data;
7829 struct trace_iterator *iter = &info->iter;
7836 #ifdef CONFIG_TRACER_MAX_TRACE
7837 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7842 info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7844 if (IS_ERR(info->spare)) {
7845 ret = PTR_ERR(info->spare);
7848 info->spare_cpu = iter->cpu_file;
7854 /* Do we have previous read data to read? */
7855 if (info->read < PAGE_SIZE)
7859 trace_access_lock(iter->cpu_file);
7860 ret = ring_buffer_read_page(iter->array_buffer->buffer,
7864 trace_access_unlock(iter->cpu_file);
7867 if (trace_empty(iter)) {
7868 if ((filp->f_flags & O_NONBLOCK))
7871 ret = wait_on_pipe(iter, 0);
7882 size = PAGE_SIZE - info->read;
7886 ret = copy_to_user(ubuf, info->spare + info->read, size);
7898 static int tracing_buffers_release(struct inode *inode, struct file *file)
7900 struct ftrace_buffer_info *info = file->private_data;
7901 struct trace_iterator *iter = &info->iter;
7903 mutex_lock(&trace_types_lock);
7905 iter->tr->trace_ref--;
7907 __trace_array_put(iter->tr);
7910 ring_buffer_free_read_page(iter->array_buffer->buffer,
7911 info->spare_cpu, info->spare);
7914 mutex_unlock(&trace_types_lock);
7920 struct trace_buffer *buffer;
7923 refcount_t refcount;
7926 static void buffer_ref_release(struct buffer_ref *ref)
7928 if (!refcount_dec_and_test(&ref->refcount))
7930 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
7934 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
7935 struct pipe_buffer *buf)
7937 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7939 buffer_ref_release(ref);
7943 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
7944 struct pipe_buffer *buf)
7946 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
7948 if (refcount_read(&ref->refcount) > INT_MAX/2)
7951 refcount_inc(&ref->refcount);
7955 /* Pipe buffer operations for a buffer. */
7956 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
7957 .release = buffer_pipe_buf_release,
7958 .get = buffer_pipe_buf_get,
7962 * Callback from splice_to_pipe(), if we need to release some pages
7963 * at the end of the spd in case we error'ed out in filling the pipe.
7965 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
7967 struct buffer_ref *ref =
7968 (struct buffer_ref *)spd->partial[i].private;
7970 buffer_ref_release(ref);
7971 spd->partial[i].private = 0;
7975 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7976 struct pipe_inode_info *pipe, size_t len,
7979 struct ftrace_buffer_info *info = file->private_data;
7980 struct trace_iterator *iter = &info->iter;
7981 struct partial_page partial_def[PIPE_DEF_BUFFERS];
7982 struct page *pages_def[PIPE_DEF_BUFFERS];
7983 struct splice_pipe_desc spd = {
7985 .partial = partial_def,
7986 .nr_pages_max = PIPE_DEF_BUFFERS,
7987 .ops = &buffer_pipe_buf_ops,
7988 .spd_release = buffer_spd_release,
7990 struct buffer_ref *ref;
7994 #ifdef CONFIG_TRACER_MAX_TRACE
7995 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7999 if (*ppos & (PAGE_SIZE - 1))
8002 if (len & (PAGE_SIZE - 1)) {
8003 if (len < PAGE_SIZE)
8008 if (splice_grow_spd(pipe, &spd))
8012 trace_access_lock(iter->cpu_file);
8013 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8015 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
8019 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8025 refcount_set(&ref->refcount, 1);
8026 ref->buffer = iter->array_buffer->buffer;
8027 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8028 if (IS_ERR(ref->page)) {
8029 ret = PTR_ERR(ref->page);
8034 ref->cpu = iter->cpu_file;
8036 r = ring_buffer_read_page(ref->buffer, &ref->page,
8037 len, iter->cpu_file, 1);
8039 ring_buffer_free_read_page(ref->buffer, ref->cpu,
8045 page = virt_to_page(ref->page);
8047 spd.pages[i] = page;
8048 spd.partial[i].len = PAGE_SIZE;
8049 spd.partial[i].offset = 0;
8050 spd.partial[i].private = (unsigned long)ref;
8054 entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8057 trace_access_unlock(iter->cpu_file);
8060 /* did we read anything? */
8061 if (!spd.nr_pages) {
8066 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8069 ret = wait_on_pipe(iter, iter->tr->buffer_percent);
8076 ret = splice_to_pipe(pipe, &spd);
8078 splice_shrink_spd(&spd);
8083 static const struct file_operations tracing_buffers_fops = {
8084 .open = tracing_buffers_open,
8085 .read = tracing_buffers_read,
8086 .poll = tracing_buffers_poll,
8087 .release = tracing_buffers_release,
8088 .splice_read = tracing_buffers_splice_read,
8089 .llseek = no_llseek,
8093 tracing_stats_read(struct file *filp, char __user *ubuf,
8094 size_t count, loff_t *ppos)
8096 struct inode *inode = file_inode(filp);
8097 struct trace_array *tr = inode->i_private;
8098 struct array_buffer *trace_buf = &tr->array_buffer;
8099 int cpu = tracing_get_cpu(inode);
8100 struct trace_seq *s;
8102 unsigned long long t;
8103 unsigned long usec_rem;
8105 s = kmalloc(sizeof(*s), GFP_KERNEL);
8111 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8112 trace_seq_printf(s, "entries: %ld\n", cnt);
8114 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8115 trace_seq_printf(s, "overrun: %ld\n", cnt);
8117 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8118 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8120 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8121 trace_seq_printf(s, "bytes: %ld\n", cnt);
8123 if (trace_clocks[tr->clock_id].in_ns) {
8124 /* local or global for trace_clock */
8125 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8126 usec_rem = do_div(t, USEC_PER_SEC);
8127 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8130 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8131 usec_rem = do_div(t, USEC_PER_SEC);
8132 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8134 /* counter or tsc mode for trace_clock */
8135 trace_seq_printf(s, "oldest event ts: %llu\n",
8136 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8138 trace_seq_printf(s, "now ts: %llu\n",
8139 ring_buffer_time_stamp(trace_buf->buffer));
8142 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8143 trace_seq_printf(s, "dropped events: %ld\n", cnt);
8145 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8146 trace_seq_printf(s, "read events: %ld\n", cnt);
8148 count = simple_read_from_buffer(ubuf, count, ppos,
8149 s->buffer, trace_seq_used(s));
8156 static const struct file_operations tracing_stats_fops = {
8157 .open = tracing_open_generic_tr,
8158 .read = tracing_stats_read,
8159 .llseek = generic_file_llseek,
8160 .release = tracing_release_generic_tr,
8163 #ifdef CONFIG_DYNAMIC_FTRACE
8166 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8167 size_t cnt, loff_t *ppos)
8173 /* 256 should be plenty to hold the amount needed */
8174 buf = kmalloc(256, GFP_KERNEL);
8178 r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8179 ftrace_update_tot_cnt,
8180 ftrace_number_of_pages,
8181 ftrace_number_of_groups);
8183 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8188 static const struct file_operations tracing_dyn_info_fops = {
8189 .open = tracing_open_generic,
8190 .read = tracing_read_dyn_info,
8191 .llseek = generic_file_llseek,
8193 #endif /* CONFIG_DYNAMIC_FTRACE */
8195 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8197 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8198 struct trace_array *tr, struct ftrace_probe_ops *ops,
8201 tracing_snapshot_instance(tr);
8205 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8206 struct trace_array *tr, struct ftrace_probe_ops *ops,
8209 struct ftrace_func_mapper *mapper = data;
8213 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8223 tracing_snapshot_instance(tr);
8227 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8228 struct ftrace_probe_ops *ops, void *data)
8230 struct ftrace_func_mapper *mapper = data;
8233 seq_printf(m, "%ps:", (void *)ip);
8235 seq_puts(m, "snapshot");
8238 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8241 seq_printf(m, ":count=%ld\n", *count);
8243 seq_puts(m, ":unlimited\n");
8249 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8250 unsigned long ip, void *init_data, void **data)
8252 struct ftrace_func_mapper *mapper = *data;
8255 mapper = allocate_ftrace_func_mapper();
8261 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8265 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8266 unsigned long ip, void *data)
8268 struct ftrace_func_mapper *mapper = data;
8273 free_ftrace_func_mapper(mapper, NULL);
8277 ftrace_func_mapper_remove_ip(mapper, ip);
8280 static struct ftrace_probe_ops snapshot_probe_ops = {
8281 .func = ftrace_snapshot,
8282 .print = ftrace_snapshot_print,
8285 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8286 .func = ftrace_count_snapshot,
8287 .print = ftrace_snapshot_print,
8288 .init = ftrace_snapshot_init,
8289 .free = ftrace_snapshot_free,
8293 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8294 char *glob, char *cmd, char *param, int enable)
8296 struct ftrace_probe_ops *ops;
8297 void *count = (void *)-1;
8304 /* hash funcs only work with set_ftrace_filter */
8308 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
8311 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8316 number = strsep(¶m, ":");
8318 if (!strlen(number))
8322 * We use the callback data field (which is a pointer)
8325 ret = kstrtoul(number, 0, (unsigned long *)&count);
8330 ret = tracing_alloc_snapshot_instance(tr);
8334 ret = register_ftrace_function_probe(glob, tr, ops, count);
8337 return ret < 0 ? ret : 0;
8340 static struct ftrace_func_command ftrace_snapshot_cmd = {
8342 .func = ftrace_trace_snapshot_callback,
8345 static __init int register_snapshot_cmd(void)
8347 return register_ftrace_command(&ftrace_snapshot_cmd);
8350 static inline __init int register_snapshot_cmd(void) { return 0; }
8351 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8353 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8355 if (WARN_ON(!tr->dir))
8356 return ERR_PTR(-ENODEV);
8358 /* Top directory uses NULL as the parent */
8359 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8362 /* All sub buffers have a descriptor */
8366 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8368 struct dentry *d_tracer;
8371 return tr->percpu_dir;
8373 d_tracer = tracing_get_dentry(tr);
8374 if (IS_ERR(d_tracer))
8377 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8379 MEM_FAIL(!tr->percpu_dir,
8380 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8382 return tr->percpu_dir;
8385 static struct dentry *
8386 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8387 void *data, long cpu, const struct file_operations *fops)
8389 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8391 if (ret) /* See tracing_get_cpu() */
8392 d_inode(ret)->i_cdev = (void *)(cpu + 1);
8397 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8399 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8400 struct dentry *d_cpu;
8401 char cpu_dir[30]; /* 30 characters should be more than enough */
8406 snprintf(cpu_dir, 30, "cpu%ld", cpu);
8407 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8409 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8413 /* per cpu trace_pipe */
8414 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
8415 tr, cpu, &tracing_pipe_fops);
8418 trace_create_cpu_file("trace", 0644, d_cpu,
8419 tr, cpu, &tracing_fops);
8421 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
8422 tr, cpu, &tracing_buffers_fops);
8424 trace_create_cpu_file("stats", 0444, d_cpu,
8425 tr, cpu, &tracing_stats_fops);
8427 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
8428 tr, cpu, &tracing_entries_fops);
8430 #ifdef CONFIG_TRACER_SNAPSHOT
8431 trace_create_cpu_file("snapshot", 0644, d_cpu,
8432 tr, cpu, &snapshot_fops);
8434 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
8435 tr, cpu, &snapshot_raw_fops);
8439 #ifdef CONFIG_FTRACE_SELFTEST
8440 /* Let selftest have access to static functions in this file */
8441 #include "trace_selftest.c"
8445 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8448 struct trace_option_dentry *topt = filp->private_data;
8451 if (topt->flags->val & topt->opt->bit)
8456 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8460 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8463 struct trace_option_dentry *topt = filp->private_data;
8467 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8471 if (val != 0 && val != 1)
8474 if (!!(topt->flags->val & topt->opt->bit) != val) {
8475 mutex_lock(&trace_types_lock);
8476 ret = __set_tracer_option(topt->tr, topt->flags,
8478 mutex_unlock(&trace_types_lock);
8489 static const struct file_operations trace_options_fops = {
8490 .open = tracing_open_generic,
8491 .read = trace_options_read,
8492 .write = trace_options_write,
8493 .llseek = generic_file_llseek,
8497 * In order to pass in both the trace_array descriptor as well as the index
8498 * to the flag that the trace option file represents, the trace_array
8499 * has a character array of trace_flags_index[], which holds the index
8500 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8501 * The address of this character array is passed to the flag option file
8502 * read/write callbacks.
8504 * In order to extract both the index and the trace_array descriptor,
8505 * get_tr_index() uses the following algorithm.
8509 * As the pointer itself contains the address of the index (remember
8512 * Then to get the trace_array descriptor, by subtracting that index
8513 * from the ptr, we get to the start of the index itself.
8515 * ptr - idx == &index[0]
8517 * Then a simple container_of() from that pointer gets us to the
8518 * trace_array descriptor.
8520 static void get_tr_index(void *data, struct trace_array **ptr,
8521 unsigned int *pindex)
8523 *pindex = *(unsigned char *)data;
8525 *ptr = container_of(data - *pindex, struct trace_array,
8530 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8533 void *tr_index = filp->private_data;
8534 struct trace_array *tr;
8538 get_tr_index(tr_index, &tr, &index);
8540 if (tr->trace_flags & (1 << index))
8545 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8549 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8552 void *tr_index = filp->private_data;
8553 struct trace_array *tr;
8558 get_tr_index(tr_index, &tr, &index);
8560 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8564 if (val != 0 && val != 1)
8567 mutex_lock(&event_mutex);
8568 mutex_lock(&trace_types_lock);
8569 ret = set_tracer_flag(tr, 1 << index, val);
8570 mutex_unlock(&trace_types_lock);
8571 mutex_unlock(&event_mutex);
8581 static const struct file_operations trace_options_core_fops = {
8582 .open = tracing_open_generic,
8583 .read = trace_options_core_read,
8584 .write = trace_options_core_write,
8585 .llseek = generic_file_llseek,
8588 struct dentry *trace_create_file(const char *name,
8590 struct dentry *parent,
8592 const struct file_operations *fops)
8596 ret = tracefs_create_file(name, mode, parent, data, fops);
8598 pr_warn("Could not create tracefs '%s' entry\n", name);
8604 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8606 struct dentry *d_tracer;
8611 d_tracer = tracing_get_dentry(tr);
8612 if (IS_ERR(d_tracer))
8615 tr->options = tracefs_create_dir("options", d_tracer);
8617 pr_warn("Could not create tracefs directory 'options'\n");
8625 create_trace_option_file(struct trace_array *tr,
8626 struct trace_option_dentry *topt,
8627 struct tracer_flags *flags,
8628 struct tracer_opt *opt)
8630 struct dentry *t_options;
8632 t_options = trace_options_init_dentry(tr);
8636 topt->flags = flags;
8640 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
8641 &trace_options_fops);
8646 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8648 struct trace_option_dentry *topts;
8649 struct trace_options *tr_topts;
8650 struct tracer_flags *flags;
8651 struct tracer_opt *opts;
8658 flags = tracer->flags;
8660 if (!flags || !flags->opts)
8664 * If this is an instance, only create flags for tracers
8665 * the instance may have.
8667 if (!trace_ok_for_array(tracer, tr))
8670 for (i = 0; i < tr->nr_topts; i++) {
8671 /* Make sure there's no duplicate flags. */
8672 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8678 for (cnt = 0; opts[cnt].name; cnt++)
8681 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8685 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8692 tr->topts = tr_topts;
8693 tr->topts[tr->nr_topts].tracer = tracer;
8694 tr->topts[tr->nr_topts].topts = topts;
8697 for (cnt = 0; opts[cnt].name; cnt++) {
8698 create_trace_option_file(tr, &topts[cnt], flags,
8700 MEM_FAIL(topts[cnt].entry == NULL,
8701 "Failed to create trace option: %s",
8706 static struct dentry *
8707 create_trace_option_core_file(struct trace_array *tr,
8708 const char *option, long index)
8710 struct dentry *t_options;
8712 t_options = trace_options_init_dentry(tr);
8716 return trace_create_file(option, 0644, t_options,
8717 (void *)&tr->trace_flags_index[index],
8718 &trace_options_core_fops);
8721 static void create_trace_options_dir(struct trace_array *tr)
8723 struct dentry *t_options;
8724 bool top_level = tr == &global_trace;
8727 t_options = trace_options_init_dentry(tr);
8731 for (i = 0; trace_options[i]; i++) {
8733 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
8734 create_trace_option_core_file(tr, trace_options[i], i);
8739 rb_simple_read(struct file *filp, char __user *ubuf,
8740 size_t cnt, loff_t *ppos)
8742 struct trace_array *tr = filp->private_data;
8746 r = tracer_tracing_is_on(tr);
8747 r = sprintf(buf, "%d\n", r);
8749 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8753 rb_simple_write(struct file *filp, const char __user *ubuf,
8754 size_t cnt, loff_t *ppos)
8756 struct trace_array *tr = filp->private_data;
8757 struct trace_buffer *buffer = tr->array_buffer.buffer;
8761 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8766 mutex_lock(&trace_types_lock);
8767 if (!!val == tracer_tracing_is_on(tr)) {
8768 val = 0; /* do nothing */
8770 tracer_tracing_on(tr);
8771 if (tr->current_trace->start)
8772 tr->current_trace->start(tr);
8774 tracer_tracing_off(tr);
8775 if (tr->current_trace->stop)
8776 tr->current_trace->stop(tr);
8778 mutex_unlock(&trace_types_lock);
8786 static const struct file_operations rb_simple_fops = {
8787 .open = tracing_open_generic_tr,
8788 .read = rb_simple_read,
8789 .write = rb_simple_write,
8790 .release = tracing_release_generic_tr,
8791 .llseek = default_llseek,
8795 buffer_percent_read(struct file *filp, char __user *ubuf,
8796 size_t cnt, loff_t *ppos)
8798 struct trace_array *tr = filp->private_data;
8802 r = tr->buffer_percent;
8803 r = sprintf(buf, "%d\n", r);
8805 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8809 buffer_percent_write(struct file *filp, const char __user *ubuf,
8810 size_t cnt, loff_t *ppos)
8812 struct trace_array *tr = filp->private_data;
8816 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8826 tr->buffer_percent = val;
8833 static const struct file_operations buffer_percent_fops = {
8834 .open = tracing_open_generic_tr,
8835 .read = buffer_percent_read,
8836 .write = buffer_percent_write,
8837 .release = tracing_release_generic_tr,
8838 .llseek = default_llseek,
8841 static struct dentry *trace_instance_dir;
8844 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
8847 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
8849 enum ring_buffer_flags rb_flags;
8851 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
8855 buf->buffer = ring_buffer_alloc(size, rb_flags);
8859 buf->data = alloc_percpu(struct trace_array_cpu);
8861 ring_buffer_free(buf->buffer);
8866 /* Allocate the first page for all buffers */
8867 set_buffer_entries(&tr->array_buffer,
8868 ring_buffer_size(tr->array_buffer.buffer, 0));
8873 static int allocate_trace_buffers(struct trace_array *tr, int size)
8877 ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
8881 #ifdef CONFIG_TRACER_MAX_TRACE
8882 ret = allocate_trace_buffer(tr, &tr->max_buffer,
8883 allocate_snapshot ? size : 1);
8884 if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
8885 ring_buffer_free(tr->array_buffer.buffer);
8886 tr->array_buffer.buffer = NULL;
8887 free_percpu(tr->array_buffer.data);
8888 tr->array_buffer.data = NULL;
8891 tr->allocated_snapshot = allocate_snapshot;
8894 * Only the top level trace array gets its snapshot allocated
8895 * from the kernel command line.
8897 allocate_snapshot = false;
8903 static void free_trace_buffer(struct array_buffer *buf)
8906 ring_buffer_free(buf->buffer);
8908 free_percpu(buf->data);
8913 static void free_trace_buffers(struct trace_array *tr)
8918 free_trace_buffer(&tr->array_buffer);
8920 #ifdef CONFIG_TRACER_MAX_TRACE
8921 free_trace_buffer(&tr->max_buffer);
8925 static void init_trace_flags_index(struct trace_array *tr)
8929 /* Used by the trace options files */
8930 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
8931 tr->trace_flags_index[i] = i;
8934 static void __update_tracer_options(struct trace_array *tr)
8938 for (t = trace_types; t; t = t->next)
8939 add_tracer_options(tr, t);
8942 static void update_tracer_options(struct trace_array *tr)
8944 mutex_lock(&trace_types_lock);
8945 __update_tracer_options(tr);
8946 mutex_unlock(&trace_types_lock);
8949 /* Must have trace_types_lock held */
8950 struct trace_array *trace_array_find(const char *instance)
8952 struct trace_array *tr, *found = NULL;
8954 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
8955 if (tr->name && strcmp(tr->name, instance) == 0) {
8964 struct trace_array *trace_array_find_get(const char *instance)
8966 struct trace_array *tr;
8968 mutex_lock(&trace_types_lock);
8969 tr = trace_array_find(instance);
8972 mutex_unlock(&trace_types_lock);
8977 static int trace_array_create_dir(struct trace_array *tr)
8981 tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
8985 ret = event_trace_add_tracer(tr->dir, tr);
8987 tracefs_remove(tr->dir);
8989 init_tracer_tracefs(tr, tr->dir);
8990 __update_tracer_options(tr);
8995 static struct trace_array *trace_array_create(const char *name)
8997 struct trace_array *tr;
9001 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9003 return ERR_PTR(ret);
9005 tr->name = kstrdup(name, GFP_KERNEL);
9009 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9012 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9014 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9016 raw_spin_lock_init(&tr->start_lock);
9018 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9020 tr->current_trace = &nop_trace;
9022 INIT_LIST_HEAD(&tr->systems);
9023 INIT_LIST_HEAD(&tr->events);
9024 INIT_LIST_HEAD(&tr->hist_vars);
9025 INIT_LIST_HEAD(&tr->err_log);
9027 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9030 if (ftrace_allocate_ftrace_ops(tr) < 0)
9033 ftrace_init_trace_array(tr);
9035 init_trace_flags_index(tr);
9037 if (trace_instance_dir) {
9038 ret = trace_array_create_dir(tr);
9042 __trace_early_add_events(tr);
9044 list_add(&tr->list, &ftrace_trace_arrays);
9051 ftrace_free_ftrace_ops(tr);
9052 free_trace_buffers(tr);
9053 free_cpumask_var(tr->tracing_cpumask);
9057 return ERR_PTR(ret);
9060 static int instance_mkdir(const char *name)
9062 struct trace_array *tr;
9065 mutex_lock(&event_mutex);
9066 mutex_lock(&trace_types_lock);
9069 if (trace_array_find(name))
9072 tr = trace_array_create(name);
9074 ret = PTR_ERR_OR_ZERO(tr);
9077 mutex_unlock(&trace_types_lock);
9078 mutex_unlock(&event_mutex);
9083 * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9084 * @name: The name of the trace array to be looked up/created.
9086 * Returns pointer to trace array with given name.
9087 * NULL, if it cannot be created.
9089 * NOTE: This function increments the reference counter associated with the
9090 * trace array returned. This makes sure it cannot be freed while in use.
9091 * Use trace_array_put() once the trace array is no longer needed.
9092 * If the trace_array is to be freed, trace_array_destroy() needs to
9093 * be called after the trace_array_put(), or simply let user space delete
9094 * it from the tracefs instances directory. But until the
9095 * trace_array_put() is called, user space can not delete it.
9098 struct trace_array *trace_array_get_by_name(const char *name)
9100 struct trace_array *tr;
9102 mutex_lock(&event_mutex);
9103 mutex_lock(&trace_types_lock);
9105 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9106 if (tr->name && strcmp(tr->name, name) == 0)
9110 tr = trace_array_create(name);
9118 mutex_unlock(&trace_types_lock);
9119 mutex_unlock(&event_mutex);
9122 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9124 static int __remove_instance(struct trace_array *tr)
9128 /* Reference counter for a newly created trace array = 1. */
9129 if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9132 list_del(&tr->list);
9134 /* Disable all the flags that were enabled coming in */
9135 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9136 if ((1 << i) & ZEROED_TRACE_FLAGS)
9137 set_tracer_flag(tr, 1 << i, 0);
9140 tracing_set_nop(tr);
9141 clear_ftrace_function_probes(tr);
9142 event_trace_del_tracer(tr);
9143 ftrace_clear_pids(tr);
9144 ftrace_destroy_function_files(tr);
9145 tracefs_remove(tr->dir);
9146 free_percpu(tr->last_func_repeats);
9147 free_trace_buffers(tr);
9149 for (i = 0; i < tr->nr_topts; i++) {
9150 kfree(tr->topts[i].topts);
9154 free_cpumask_var(tr->tracing_cpumask);
9161 int trace_array_destroy(struct trace_array *this_tr)
9163 struct trace_array *tr;
9169 mutex_lock(&event_mutex);
9170 mutex_lock(&trace_types_lock);
9174 /* Making sure trace array exists before destroying it. */
9175 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9176 if (tr == this_tr) {
9177 ret = __remove_instance(tr);
9182 mutex_unlock(&trace_types_lock);
9183 mutex_unlock(&event_mutex);
9187 EXPORT_SYMBOL_GPL(trace_array_destroy);
9189 static int instance_rmdir(const char *name)
9191 struct trace_array *tr;
9194 mutex_lock(&event_mutex);
9195 mutex_lock(&trace_types_lock);
9198 tr = trace_array_find(name);
9200 ret = __remove_instance(tr);
9202 mutex_unlock(&trace_types_lock);
9203 mutex_unlock(&event_mutex);
9208 static __init void create_trace_instances(struct dentry *d_tracer)
9210 struct trace_array *tr;
9212 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9215 if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9218 mutex_lock(&event_mutex);
9219 mutex_lock(&trace_types_lock);
9221 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9224 if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9225 "Failed to create instance directory\n"))
9229 mutex_unlock(&trace_types_lock);
9230 mutex_unlock(&event_mutex);
9234 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9236 struct trace_event_file *file;
9239 trace_create_file("available_tracers", 0444, d_tracer,
9240 tr, &show_traces_fops);
9242 trace_create_file("current_tracer", 0644, d_tracer,
9243 tr, &set_tracer_fops);
9245 trace_create_file("tracing_cpumask", 0644, d_tracer,
9246 tr, &tracing_cpumask_fops);
9248 trace_create_file("trace_options", 0644, d_tracer,
9249 tr, &tracing_iter_fops);
9251 trace_create_file("trace", 0644, d_tracer,
9254 trace_create_file("trace_pipe", 0444, d_tracer,
9255 tr, &tracing_pipe_fops);
9257 trace_create_file("buffer_size_kb", 0644, d_tracer,
9258 tr, &tracing_entries_fops);
9260 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
9261 tr, &tracing_total_entries_fops);
9263 trace_create_file("free_buffer", 0200, d_tracer,
9264 tr, &tracing_free_buffer_fops);
9266 trace_create_file("trace_marker", 0220, d_tracer,
9267 tr, &tracing_mark_fops);
9269 file = __find_event_file(tr, "ftrace", "print");
9270 if (file && file->dir)
9271 trace_create_file("trigger", 0644, file->dir, file,
9272 &event_trigger_fops);
9273 tr->trace_marker_file = file;
9275 trace_create_file("trace_marker_raw", 0220, d_tracer,
9276 tr, &tracing_mark_raw_fops);
9278 trace_create_file("trace_clock", 0644, d_tracer, tr,
9281 trace_create_file("tracing_on", 0644, d_tracer,
9282 tr, &rb_simple_fops);
9284 trace_create_file("timestamp_mode", 0444, d_tracer, tr,
9285 &trace_time_stamp_mode_fops);
9287 tr->buffer_percent = 50;
9289 trace_create_file("buffer_percent", 0444, d_tracer,
9290 tr, &buffer_percent_fops);
9292 create_trace_options_dir(tr);
9294 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
9295 trace_create_maxlat_file(tr, d_tracer);
9298 if (ftrace_create_function_files(tr, d_tracer))
9299 MEM_FAIL(1, "Could not allocate function filter files");
9301 #ifdef CONFIG_TRACER_SNAPSHOT
9302 trace_create_file("snapshot", 0644, d_tracer,
9303 tr, &snapshot_fops);
9306 trace_create_file("error_log", 0644, d_tracer,
9307 tr, &tracing_err_log_fops);
9309 for_each_tracing_cpu(cpu)
9310 tracing_init_tracefs_percpu(tr, cpu);
9312 ftrace_init_tracefs(tr, d_tracer);
9315 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9317 struct vfsmount *mnt;
9318 struct file_system_type *type;
9321 * To maintain backward compatibility for tools that mount
9322 * debugfs to get to the tracing facility, tracefs is automatically
9323 * mounted to the debugfs/tracing directory.
9325 type = get_fs_type("tracefs");
9328 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9329 put_filesystem(type);
9338 * tracing_init_dentry - initialize top level trace array
9340 * This is called when creating files or directories in the tracing
9341 * directory. It is called via fs_initcall() by any of the boot up code
9342 * and expects to return the dentry of the top level tracing directory.
9344 int tracing_init_dentry(void)
9346 struct trace_array *tr = &global_trace;
9348 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9349 pr_warn("Tracing disabled due to lockdown\n");
9353 /* The top level trace array uses NULL as parent */
9357 if (WARN_ON(!tracefs_initialized()))
9361 * As there may still be users that expect the tracing
9362 * files to exist in debugfs/tracing, we must automount
9363 * the tracefs file system there, so older tools still
9364 * work with the newer kernel.
9366 tr->dir = debugfs_create_automount("tracing", NULL,
9367 trace_automount, NULL);
9372 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9373 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9375 static struct workqueue_struct *eval_map_wq __initdata;
9376 static struct work_struct eval_map_work __initdata;
9378 static void __init eval_map_work_func(struct work_struct *work)
9382 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9383 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9386 static int __init trace_eval_init(void)
9388 INIT_WORK(&eval_map_work, eval_map_work_func);
9390 eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9392 pr_err("Unable to allocate eval_map_wq\n");
9394 eval_map_work_func(&eval_map_work);
9398 queue_work(eval_map_wq, &eval_map_work);
9402 static int __init trace_eval_sync(void)
9404 /* Make sure the eval map updates are finished */
9406 destroy_workqueue(eval_map_wq);
9410 late_initcall_sync(trace_eval_sync);
9413 #ifdef CONFIG_MODULES
9414 static void trace_module_add_evals(struct module *mod)
9416 if (!mod->num_trace_evals)
9420 * Modules with bad taint do not have events created, do
9421 * not bother with enums either.
9423 if (trace_module_has_bad_taint(mod))
9426 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9429 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
9430 static void trace_module_remove_evals(struct module *mod)
9432 union trace_eval_map_item *map;
9433 union trace_eval_map_item **last = &trace_eval_maps;
9435 if (!mod->num_trace_evals)
9438 mutex_lock(&trace_eval_mutex);
9440 map = trace_eval_maps;
9443 if (map->head.mod == mod)
9445 map = trace_eval_jmp_to_tail(map);
9446 last = &map->tail.next;
9447 map = map->tail.next;
9452 *last = trace_eval_jmp_to_tail(map)->tail.next;
9455 mutex_unlock(&trace_eval_mutex);
9458 static inline void trace_module_remove_evals(struct module *mod) { }
9459 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9461 static int trace_module_notify(struct notifier_block *self,
9462 unsigned long val, void *data)
9464 struct module *mod = data;
9467 case MODULE_STATE_COMING:
9468 trace_module_add_evals(mod);
9470 case MODULE_STATE_GOING:
9471 trace_module_remove_evals(mod);
9478 static struct notifier_block trace_module_nb = {
9479 .notifier_call = trace_module_notify,
9482 #endif /* CONFIG_MODULES */
9484 static __init int tracer_init_tracefs(void)
9488 trace_access_lock_init();
9490 ret = tracing_init_dentry();
9496 init_tracer_tracefs(&global_trace, NULL);
9497 ftrace_init_tracefs_toplevel(&global_trace, NULL);
9499 trace_create_file("tracing_thresh", 0644, NULL,
9500 &global_trace, &tracing_thresh_fops);
9502 trace_create_file("README", 0444, NULL,
9503 NULL, &tracing_readme_fops);
9505 trace_create_file("saved_cmdlines", 0444, NULL,
9506 NULL, &tracing_saved_cmdlines_fops);
9508 trace_create_file("saved_cmdlines_size", 0644, NULL,
9509 NULL, &tracing_saved_cmdlines_size_fops);
9511 trace_create_file("saved_tgids", 0444, NULL,
9512 NULL, &tracing_saved_tgids_fops);
9516 trace_create_eval_file(NULL);
9518 #ifdef CONFIG_MODULES
9519 register_module_notifier(&trace_module_nb);
9522 #ifdef CONFIG_DYNAMIC_FTRACE
9523 trace_create_file("dyn_ftrace_total_info", 0444, NULL,
9524 NULL, &tracing_dyn_info_fops);
9527 create_trace_instances(NULL);
9529 update_tracer_options(&global_trace);
9534 static int trace_panic_handler(struct notifier_block *this,
9535 unsigned long event, void *unused)
9537 if (ftrace_dump_on_oops)
9538 ftrace_dump(ftrace_dump_on_oops);
9542 static struct notifier_block trace_panic_notifier = {
9543 .notifier_call = trace_panic_handler,
9545 .priority = 150 /* priority: INT_MAX >= x >= 0 */
9548 static int trace_die_handler(struct notifier_block *self,
9554 if (ftrace_dump_on_oops)
9555 ftrace_dump(ftrace_dump_on_oops);
9563 static struct notifier_block trace_die_notifier = {
9564 .notifier_call = trace_die_handler,
9569 * printk is set to max of 1024, we really don't need it that big.
9570 * Nothing should be printing 1000 characters anyway.
9572 #define TRACE_MAX_PRINT 1000
9575 * Define here KERN_TRACE so that we have one place to modify
9576 * it if we decide to change what log level the ftrace dump
9579 #define KERN_TRACE KERN_EMERG
9582 trace_printk_seq(struct trace_seq *s)
9584 /* Probably should print a warning here. */
9585 if (s->seq.len >= TRACE_MAX_PRINT)
9586 s->seq.len = TRACE_MAX_PRINT;
9589 * More paranoid code. Although the buffer size is set to
9590 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
9591 * an extra layer of protection.
9593 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
9594 s->seq.len = s->seq.size - 1;
9596 /* should be zero ended, but we are paranoid. */
9597 s->buffer[s->seq.len] = 0;
9599 printk(KERN_TRACE "%s", s->buffer);
9604 void trace_init_global_iter(struct trace_iterator *iter)
9606 iter->tr = &global_trace;
9607 iter->trace = iter->tr->current_trace;
9608 iter->cpu_file = RING_BUFFER_ALL_CPUS;
9609 iter->array_buffer = &global_trace.array_buffer;
9611 if (iter->trace && iter->trace->open)
9612 iter->trace->open(iter);
9614 /* Annotate start of buffers if we had overruns */
9615 if (ring_buffer_overruns(iter->array_buffer->buffer))
9616 iter->iter_flags |= TRACE_FILE_ANNOTATE;
9618 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
9619 if (trace_clocks[iter->tr->clock_id].in_ns)
9620 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
9623 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
9625 /* use static because iter can be a bit big for the stack */
9626 static struct trace_iterator iter;
9627 static atomic_t dump_running;
9628 struct trace_array *tr = &global_trace;
9629 unsigned int old_userobj;
9630 unsigned long flags;
9633 /* Only allow one dump user at a time. */
9634 if (atomic_inc_return(&dump_running) != 1) {
9635 atomic_dec(&dump_running);
9640 * Always turn off tracing when we dump.
9641 * We don't need to show trace output of what happens
9642 * between multiple crashes.
9644 * If the user does a sysrq-z, then they can re-enable
9645 * tracing with echo 1 > tracing_on.
9649 local_irq_save(flags);
9650 printk_nmi_direct_enter();
9652 /* Simulate the iterator */
9653 trace_init_global_iter(&iter);
9654 /* Can not use kmalloc for iter.temp and iter.fmt */
9655 iter.temp = static_temp_buf;
9656 iter.temp_size = STATIC_TEMP_BUF_SIZE;
9657 iter.fmt = static_fmt_buf;
9658 iter.fmt_size = STATIC_FMT_BUF_SIZE;
9660 for_each_tracing_cpu(cpu) {
9661 atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9664 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
9666 /* don't look at user memory in panic mode */
9667 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
9669 switch (oops_dump_mode) {
9671 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9674 iter.cpu_file = raw_smp_processor_id();
9679 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
9680 iter.cpu_file = RING_BUFFER_ALL_CPUS;
9683 printk(KERN_TRACE "Dumping ftrace buffer:\n");
9685 /* Did function tracer already get disabled? */
9686 if (ftrace_is_dead()) {
9687 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
9688 printk("# MAY BE MISSING FUNCTION EVENTS\n");
9692 * We need to stop all tracing on all CPUS to read
9693 * the next buffer. This is a bit expensive, but is
9694 * not done often. We fill all what we can read,
9695 * and then release the locks again.
9698 while (!trace_empty(&iter)) {
9701 printk(KERN_TRACE "---------------------------------\n");
9705 trace_iterator_reset(&iter);
9706 iter.iter_flags |= TRACE_FILE_LAT_FMT;
9708 if (trace_find_next_entry_inc(&iter) != NULL) {
9711 ret = print_trace_line(&iter);
9712 if (ret != TRACE_TYPE_NO_CONSUME)
9713 trace_consume(&iter);
9715 touch_nmi_watchdog();
9717 trace_printk_seq(&iter.seq);
9721 printk(KERN_TRACE " (ftrace buffer empty)\n");
9723 printk(KERN_TRACE "---------------------------------\n");
9726 tr->trace_flags |= old_userobj;
9728 for_each_tracing_cpu(cpu) {
9729 atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
9731 atomic_dec(&dump_running);
9732 printk_nmi_direct_exit();
9733 local_irq_restore(flags);
9735 EXPORT_SYMBOL_GPL(ftrace_dump);
9737 #define WRITE_BUFSIZE 4096
9739 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
9740 size_t count, loff_t *ppos,
9741 int (*createfn)(const char *))
9743 char *kbuf, *buf, *tmp;
9748 kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
9752 while (done < count) {
9753 size = count - done;
9755 if (size >= WRITE_BUFSIZE)
9756 size = WRITE_BUFSIZE - 1;
9758 if (copy_from_user(kbuf, buffer + done, size)) {
9765 tmp = strchr(buf, '\n');
9768 size = tmp - buf + 1;
9771 if (done + size < count) {
9774 /* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
9775 pr_warn("Line length is too long: Should be less than %d\n",
9783 /* Remove comments */
9784 tmp = strchr(buf, '#');
9789 ret = createfn(buf);
9794 } while (done < count);
9804 __init static int tracer_alloc_buffers(void)
9810 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9811 pr_warn("Tracing disabled due to lockdown\n");
9816 * Make sure we don't accidentally add more trace options
9817 * than we have bits for.
9819 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
9821 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
9824 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
9825 goto out_free_buffer_mask;
9827 /* Only allocate trace_printk buffers if a trace_printk exists */
9828 if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
9829 /* Must be called before global_trace.buffer is allocated */
9830 trace_printk_init_buffers();
9832 /* To save memory, keep the ring buffer size to its minimum */
9833 if (ring_buffer_expanded)
9834 ring_buf_size = trace_buf_size;
9838 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
9839 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
9841 raw_spin_lock_init(&global_trace.start_lock);
9844 * The prepare callbacks allocates some memory for the ring buffer. We
9845 * don't free the buffer if the CPU goes down. If we were to free
9846 * the buffer, then the user would lose any trace that was in the
9847 * buffer. The memory will be removed once the "instance" is removed.
9849 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
9850 "trace/RB:preapre", trace_rb_cpu_prepare,
9853 goto out_free_cpumask;
9854 /* Used for event triggers */
9856 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
9858 goto out_rm_hp_state;
9860 if (trace_create_savedcmd() < 0)
9861 goto out_free_temp_buffer;
9863 /* TODO: make the number of buffers hot pluggable with CPUS */
9864 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
9865 MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
9866 goto out_free_savedcmd;
9869 if (global_trace.buffer_disabled)
9872 if (trace_boot_clock) {
9873 ret = tracing_set_clock(&global_trace, trace_boot_clock);
9875 pr_warn("Trace clock %s not defined, going back to default\n",
9880 * register_tracer() might reference current_trace, so it
9881 * needs to be set before we register anything. This is
9882 * just a bootstrap of current_trace anyway.
9884 global_trace.current_trace = &nop_trace;
9886 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9888 ftrace_init_global_array_ops(&global_trace);
9890 init_trace_flags_index(&global_trace);
9892 register_tracer(&nop_trace);
9894 /* Function tracing may start here (via kernel command line) */
9895 init_function_trace();
9897 /* All seems OK, enable tracing */
9898 tracing_disabled = 0;
9900 atomic_notifier_chain_register(&panic_notifier_list,
9901 &trace_panic_notifier);
9903 register_die_notifier(&trace_die_notifier);
9905 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
9907 INIT_LIST_HEAD(&global_trace.systems);
9908 INIT_LIST_HEAD(&global_trace.events);
9909 INIT_LIST_HEAD(&global_trace.hist_vars);
9910 INIT_LIST_HEAD(&global_trace.err_log);
9911 list_add(&global_trace.list, &ftrace_trace_arrays);
9913 apply_trace_boot_options();
9915 register_snapshot_cmd();
9922 free_saved_cmdlines_buffer(savedcmd);
9923 out_free_temp_buffer:
9924 ring_buffer_free(temp_buffer);
9926 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
9928 free_cpumask_var(global_trace.tracing_cpumask);
9929 out_free_buffer_mask:
9930 free_cpumask_var(tracing_buffer_mask);
9935 void __init early_trace_init(void)
9937 if (tracepoint_printk) {
9938 tracepoint_print_iter =
9939 kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
9940 if (MEM_FAIL(!tracepoint_print_iter,
9941 "Failed to allocate trace iterator\n"))
9942 tracepoint_printk = 0;
9944 static_key_enable(&tracepoint_printk_key.key);
9946 tracer_alloc_buffers();
9949 void __init trace_init(void)
9954 __init static int clear_boot_tracer(void)
9957 * The default tracer at boot buffer is an init section.
9958 * This function is called in lateinit. If we did not
9959 * find the boot tracer, then clear it out, to prevent
9960 * later registration from accessing the buffer that is
9961 * about to be freed.
9963 if (!default_bootup_tracer)
9966 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
9967 default_bootup_tracer);
9968 default_bootup_tracer = NULL;
9973 fs_initcall(tracer_init_tracefs);
9974 late_initcall_sync(clear_boot_tracer);
9976 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
9977 __init static int tracing_set_default_clock(void)
9979 /* sched_clock_stable() is determined in late_initcall */
9980 if (!trace_boot_clock && !sched_clock_stable()) {
9981 if (security_locked_down(LOCKDOWN_TRACEFS)) {
9982 pr_warn("Can not set tracing clock due to lockdown\n");
9987 "Unstable clock detected, switching default tracing clock to \"global\"\n"
9988 "If you want to keep using the local clock, then add:\n"
9989 " \"trace_clock=local\"\n"
9990 "on the kernel command line\n");
9991 tracing_set_clock(&global_trace, "global");
9996 late_initcall_sync(tracing_set_default_clock);