2 * ring buffer based function tracer
4 * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
5 * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7 * Originally taken from the RT patch by:
8 * Arnaldo Carvalho de Melo <acme@redhat.com>
10 * Based on code from the latency_tracer, that is:
11 * Copyright (C) 2004-2006 Ingo Molnar
12 * Copyright (C) 2004 Nadia Yvette Chambers
14 #include <linux/ring_buffer.h>
15 #include <generated/utsrelease.h>
16 #include <linux/stacktrace.h>
17 #include <linux/writeback.h>
18 #include <linux/kallsyms.h>
19 #include <linux/seq_file.h>
20 #include <linux/notifier.h>
21 #include <linux/irqflags.h>
22 #include <linux/debugfs.h>
23 #include <linux/tracefs.h>
24 #include <linux/pagemap.h>
25 #include <linux/hardirq.h>
26 #include <linux/linkage.h>
27 #include <linux/uaccess.h>
28 #include <linux/vmalloc.h>
29 #include <linux/ftrace.h>
30 #include <linux/module.h>
31 #include <linux/percpu.h>
32 #include <linux/splice.h>
33 #include <linux/kdebug.h>
34 #include <linux/string.h>
35 #include <linux/mount.h>
36 #include <linux/rwsem.h>
37 #include <linux/slab.h>
38 #include <linux/ctype.h>
39 #include <linux/init.h>
40 #include <linux/poll.h>
41 #include <linux/nmi.h>
43 #include <linux/trace.h>
44 #include <linux/sched/rt.h>
47 #include "trace_output.h"
50 * On boot up, the ring buffer is set to the minimum size, so that
51 * we do not waste memory on systems that are not using tracing.
53 bool ring_buffer_expanded;
56 * We need to change this state when a selftest is running.
57 * A selftest will lurk into the ring-buffer to count the
58 * entries inserted during the selftest although some concurrent
59 * insertions into the ring-buffer such as trace_printk could occurred
60 * at the same time, giving false positive or negative results.
62 static bool __read_mostly tracing_selftest_running;
65 * If a tracer is running, we do not want to run SELFTEST.
67 bool __read_mostly tracing_selftest_disabled;
69 /* Pipe tracepoints to printk */
70 struct trace_iterator *tracepoint_print_iter;
71 int tracepoint_printk;
72 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
74 /* For tracers that don't implement custom flags */
75 static struct tracer_opt dummy_tracer_opt[] = {
80 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
86 * To prevent the comm cache from being overwritten when no
87 * tracing is active, only save the comm when a trace event
90 static DEFINE_PER_CPU(bool, trace_cmdline_save);
93 * Kill all tracing for good (never come back).
94 * It is initialized to 1 but will turn to zero if the initialization
95 * of the tracer is successful. But that is the only place that sets
98 static int tracing_disabled = 1;
100 cpumask_var_t __read_mostly tracing_buffer_mask;
103 * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
105 * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
106 * is set, then ftrace_dump is called. This will output the contents
107 * of the ftrace buffers to the console. This is very useful for
108 * capturing traces that lead to crashes and outputing it to a
111 * It is default off, but you can enable it with either specifying
112 * "ftrace_dump_on_oops" in the kernel command line, or setting
113 * /proc/sys/kernel/ftrace_dump_on_oops
114 * Set 1 if you want to dump buffers of all CPUs
115 * Set 2 if you want to dump the buffer of the CPU that triggered oops
118 enum ftrace_dump_mode ftrace_dump_on_oops;
120 /* When set, tracing will stop when a WARN*() is hit */
121 int __disable_trace_on_warning;
123 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
124 /* Map of enums to their values, for "eval_map" file */
125 struct trace_eval_map_head {
127 unsigned long length;
130 union trace_eval_map_item;
132 struct trace_eval_map_tail {
134 * "end" is first and points to NULL as it must be different
135 * than "mod" or "eval_string"
137 union trace_eval_map_item *next;
138 const char *end; /* points to NULL */
141 static DEFINE_MUTEX(trace_eval_mutex);
144 * The trace_eval_maps are saved in an array with two extra elements,
145 * one at the beginning, and one at the end. The beginning item contains
146 * the count of the saved maps (head.length), and the module they
147 * belong to if not built in (head.mod). The ending item contains a
148 * pointer to the next array of saved eval_map items.
150 union trace_eval_map_item {
151 struct trace_eval_map map;
152 struct trace_eval_map_head head;
153 struct trace_eval_map_tail tail;
156 static union trace_eval_map_item *trace_eval_maps;
157 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
159 static int tracing_set_tracer(struct trace_array *tr, const char *buf);
161 #define MAX_TRACER_SIZE 100
162 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
163 static char *default_bootup_tracer;
165 static bool allocate_snapshot;
167 static int __init set_cmdline_ftrace(char *str)
169 strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
170 default_bootup_tracer = bootup_tracer_buf;
171 /* We are using ftrace early, expand it */
172 ring_buffer_expanded = true;
175 __setup("ftrace=", set_cmdline_ftrace);
177 static int __init set_ftrace_dump_on_oops(char *str)
179 if (*str++ != '=' || !*str) {
180 ftrace_dump_on_oops = DUMP_ALL;
184 if (!strcmp("orig_cpu", str)) {
185 ftrace_dump_on_oops = DUMP_ORIG;
191 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
193 static int __init stop_trace_on_warning(char *str)
195 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
196 __disable_trace_on_warning = 1;
199 __setup("traceoff_on_warning", stop_trace_on_warning);
201 static int __init boot_alloc_snapshot(char *str)
203 allocate_snapshot = true;
204 /* We also need the main ring buffer expanded */
205 ring_buffer_expanded = true;
208 __setup("alloc_snapshot", boot_alloc_snapshot);
211 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
213 static int __init set_trace_boot_options(char *str)
215 strlcpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
218 __setup("trace_options=", set_trace_boot_options);
220 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
221 static char *trace_boot_clock __initdata;
223 static int __init set_trace_boot_clock(char *str)
225 strlcpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
226 trace_boot_clock = trace_boot_clock_buf;
229 __setup("trace_clock=", set_trace_boot_clock);
231 static int __init set_tracepoint_printk(char *str)
233 if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
234 tracepoint_printk = 1;
237 __setup("tp_printk", set_tracepoint_printk);
239 unsigned long long ns2usecs(u64 nsec)
246 /* trace_flags holds trace_options default values */
247 #define TRACE_DEFAULT_FLAGS \
248 (FUNCTION_DEFAULT_FLAGS | \
249 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK | \
250 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | \
251 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE | \
252 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS)
254 /* trace_options that are only supported by global_trace */
255 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK | \
256 TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
258 /* trace_flags that are default zero for instances */
259 #define ZEROED_TRACE_FLAGS \
260 (TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
263 * The global_trace is the descriptor that holds the top-level tracing
264 * buffers for the live tracing.
266 static struct trace_array global_trace = {
267 .trace_flags = TRACE_DEFAULT_FLAGS,
270 LIST_HEAD(ftrace_trace_arrays);
272 int trace_array_get(struct trace_array *this_tr)
274 struct trace_array *tr;
277 mutex_lock(&trace_types_lock);
278 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
285 mutex_unlock(&trace_types_lock);
290 static void __trace_array_put(struct trace_array *this_tr)
292 WARN_ON(!this_tr->ref);
296 void trace_array_put(struct trace_array *this_tr)
298 mutex_lock(&trace_types_lock);
299 __trace_array_put(this_tr);
300 mutex_unlock(&trace_types_lock);
303 int call_filter_check_discard(struct trace_event_call *call, void *rec,
304 struct ring_buffer *buffer,
305 struct ring_buffer_event *event)
307 if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
308 !filter_match_preds(call->filter, rec)) {
309 __trace_event_discard_commit(buffer, event);
316 void trace_free_pid_list(struct trace_pid_list *pid_list)
318 vfree(pid_list->pids);
323 * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
324 * @filtered_pids: The list of pids to check
325 * @search_pid: The PID to find in @filtered_pids
327 * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
330 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
333 * If pid_max changed after filtered_pids was created, we
334 * by default ignore all pids greater than the previous pid_max.
336 if (search_pid >= filtered_pids->pid_max)
339 return test_bit(search_pid, filtered_pids->pids);
343 * trace_ignore_this_task - should a task be ignored for tracing
344 * @filtered_pids: The list of pids to check
345 * @task: The task that should be ignored if not filtered
347 * Checks if @task should be traced or not from @filtered_pids.
348 * Returns true if @task should *NOT* be traced.
349 * Returns false if @task should be traced.
352 trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
355 * Return false, because if filtered_pids does not exist,
356 * all pids are good to trace.
361 return !trace_find_filtered_pid(filtered_pids, task->pid);
365 * trace_pid_filter_add_remove - Add or remove a task from a pid_list
366 * @pid_list: The list to modify
367 * @self: The current task for fork or NULL for exit
368 * @task: The task to add or remove
370 * If adding a task, if @self is defined, the task is only added if @self
371 * is also included in @pid_list. This happens on fork and tasks should
372 * only be added when the parent is listed. If @self is NULL, then the
373 * @task pid will be removed from the list, which would happen on exit
376 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
377 struct task_struct *self,
378 struct task_struct *task)
383 /* For forks, we only add if the forking task is listed */
385 if (!trace_find_filtered_pid(pid_list, self->pid))
389 /* Sorry, but we don't support pid_max changing after setting */
390 if (task->pid >= pid_list->pid_max)
393 /* "self" is set for forks, and NULL for exits */
395 set_bit(task->pid, pid_list->pids);
397 clear_bit(task->pid, pid_list->pids);
401 * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
402 * @pid_list: The pid list to show
403 * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
404 * @pos: The position of the file
406 * This is used by the seq_file "next" operation to iterate the pids
407 * listed in a trace_pid_list structure.
409 * Returns the pid+1 as we want to display pid of zero, but NULL would
410 * stop the iteration.
412 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
414 unsigned long pid = (unsigned long)v;
418 /* pid already is +1 of the actual prevous bit */
419 pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
421 /* Return pid + 1 to allow zero to be represented */
422 if (pid < pid_list->pid_max)
423 return (void *)(pid + 1);
429 * trace_pid_start - Used for seq_file to start reading pid lists
430 * @pid_list: The pid list to show
431 * @pos: The position of the file
433 * This is used by seq_file "start" operation to start the iteration
436 * Returns the pid+1 as we want to display pid of zero, but NULL would
437 * stop the iteration.
439 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
444 pid = find_first_bit(pid_list->pids, pid_list->pid_max);
445 if (pid >= pid_list->pid_max)
448 /* Return pid + 1 so that zero can be the exit value */
449 for (pid++; pid && l < *pos;
450 pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
456 * trace_pid_show - show the current pid in seq_file processing
457 * @m: The seq_file structure to write into
458 * @v: A void pointer of the pid (+1) value to display
460 * Can be directly used by seq_file operations to display the current
463 int trace_pid_show(struct seq_file *m, void *v)
465 unsigned long pid = (unsigned long)v - 1;
467 seq_printf(m, "%lu\n", pid);
471 /* 128 should be much more than enough */
472 #define PID_BUF_SIZE 127
474 int trace_pid_write(struct trace_pid_list *filtered_pids,
475 struct trace_pid_list **new_pid_list,
476 const char __user *ubuf, size_t cnt)
478 struct trace_pid_list *pid_list;
479 struct trace_parser parser;
487 if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
491 * Always recreate a new array. The write is an all or nothing
492 * operation. Always create a new array when adding new pids by
493 * the user. If the operation fails, then the current list is
496 pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
500 pid_list->pid_max = READ_ONCE(pid_max);
502 /* Only truncating will shrink pid_max */
503 if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
504 pid_list->pid_max = filtered_pids->pid_max;
506 pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
507 if (!pid_list->pids) {
513 /* copy the current bits to the new max */
514 for_each_set_bit(pid, filtered_pids->pids,
515 filtered_pids->pid_max) {
516 set_bit(pid, pid_list->pids);
525 ret = trace_get_user(&parser, ubuf, cnt, &pos);
526 if (ret < 0 || !trace_parser_loaded(&parser))
533 parser.buffer[parser.idx] = 0;
536 if (kstrtoul(parser.buffer, 0, &val))
538 if (val >= pid_list->pid_max)
543 set_bit(pid, pid_list->pids);
546 trace_parser_clear(&parser);
549 trace_parser_put(&parser);
552 trace_free_pid_list(pid_list);
557 /* Cleared the list of pids */
558 trace_free_pid_list(pid_list);
563 *new_pid_list = pid_list;
568 static u64 buffer_ftrace_now(struct trace_buffer *buf, int cpu)
572 /* Early boot up does not have a buffer yet */
574 return trace_clock_local();
576 ts = ring_buffer_time_stamp(buf->buffer, cpu);
577 ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
582 u64 ftrace_now(int cpu)
584 return buffer_ftrace_now(&global_trace.trace_buffer, cpu);
588 * tracing_is_enabled - Show if global_trace has been disabled
590 * Shows if the global trace has been enabled or not. It uses the
591 * mirror flag "buffer_disabled" to be used in fast paths such as for
592 * the irqsoff tracer. But it may be inaccurate due to races. If you
593 * need to know the accurate state, use tracing_is_on() which is a little
594 * slower, but accurate.
596 int tracing_is_enabled(void)
599 * For quick access (irqsoff uses this in fast path), just
600 * return the mirror variable of the state of the ring buffer.
601 * It's a little racy, but we don't really care.
604 return !global_trace.buffer_disabled;
608 * trace_buf_size is the size in bytes that is allocated
609 * for a buffer. Note, the number of bytes is always rounded
612 * This number is purposely set to a low number of 16384.
613 * If the dump on oops happens, it will be much appreciated
614 * to not have to wait for all that output. Anyway this can be
615 * boot time and run time configurable.
617 #define TRACE_BUF_SIZE_DEFAULT 1441792UL /* 16384 * 88 (sizeof(entry)) */
619 static unsigned long trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
621 /* trace_types holds a link list of available tracers. */
622 static struct tracer *trace_types __read_mostly;
625 * trace_types_lock is used to protect the trace_types list.
627 DEFINE_MUTEX(trace_types_lock);
630 * serialize the access of the ring buffer
632 * ring buffer serializes readers, but it is low level protection.
633 * The validity of the events (which returns by ring_buffer_peek() ..etc)
634 * are not protected by ring buffer.
636 * The content of events may become garbage if we allow other process consumes
637 * these events concurrently:
638 * A) the page of the consumed events may become a normal page
639 * (not reader page) in ring buffer, and this page will be rewrited
640 * by events producer.
641 * B) The page of the consumed events may become a page for splice_read,
642 * and this page will be returned to system.
644 * These primitives allow multi process access to different cpu ring buffer
647 * These primitives don't distinguish read-only and read-consume access.
648 * Multi read-only access are also serialized.
652 static DECLARE_RWSEM(all_cpu_access_lock);
653 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
655 static inline void trace_access_lock(int cpu)
657 if (cpu == RING_BUFFER_ALL_CPUS) {
658 /* gain it for accessing the whole ring buffer. */
659 down_write(&all_cpu_access_lock);
661 /* gain it for accessing a cpu ring buffer. */
663 /* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
664 down_read(&all_cpu_access_lock);
666 /* Secondly block other access to this @cpu ring buffer. */
667 mutex_lock(&per_cpu(cpu_access_lock, cpu));
671 static inline void trace_access_unlock(int cpu)
673 if (cpu == RING_BUFFER_ALL_CPUS) {
674 up_write(&all_cpu_access_lock);
676 mutex_unlock(&per_cpu(cpu_access_lock, cpu));
677 up_read(&all_cpu_access_lock);
681 static inline void trace_access_lock_init(void)
685 for_each_possible_cpu(cpu)
686 mutex_init(&per_cpu(cpu_access_lock, cpu));
691 static DEFINE_MUTEX(access_lock);
693 static inline void trace_access_lock(int cpu)
696 mutex_lock(&access_lock);
699 static inline void trace_access_unlock(int cpu)
702 mutex_unlock(&access_lock);
705 static inline void trace_access_lock_init(void)
711 #ifdef CONFIG_STACKTRACE
712 static void __ftrace_trace_stack(struct ring_buffer *buffer,
714 int skip, int pc, struct pt_regs *regs);
715 static inline void ftrace_trace_stack(struct trace_array *tr,
716 struct ring_buffer *buffer,
718 int skip, int pc, struct pt_regs *regs);
721 static inline void __ftrace_trace_stack(struct ring_buffer *buffer,
723 int skip, int pc, struct pt_regs *regs)
726 static inline void ftrace_trace_stack(struct trace_array *tr,
727 struct ring_buffer *buffer,
729 int skip, int pc, struct pt_regs *regs)
735 static __always_inline void
736 trace_event_setup(struct ring_buffer_event *event,
737 int type, unsigned long flags, int pc)
739 struct trace_entry *ent = ring_buffer_event_data(event);
741 tracing_generic_entry_update(ent, flags, pc);
745 static __always_inline struct ring_buffer_event *
746 __trace_buffer_lock_reserve(struct ring_buffer *buffer,
749 unsigned long flags, int pc)
751 struct ring_buffer_event *event;
753 event = ring_buffer_lock_reserve(buffer, len);
755 trace_event_setup(event, type, flags, pc);
760 void tracer_tracing_on(struct trace_array *tr)
762 if (tr->trace_buffer.buffer)
763 ring_buffer_record_on(tr->trace_buffer.buffer);
765 * This flag is looked at when buffers haven't been allocated
766 * yet, or by some tracers (like irqsoff), that just want to
767 * know if the ring buffer has been disabled, but it can handle
768 * races of where it gets disabled but we still do a record.
769 * As the check is in the fast path of the tracers, it is more
770 * important to be fast than accurate.
772 tr->buffer_disabled = 0;
773 /* Make the flag seen by readers */
778 * tracing_on - enable tracing buffers
780 * This function enables tracing buffers that may have been
781 * disabled with tracing_off.
783 void tracing_on(void)
785 tracer_tracing_on(&global_trace);
787 EXPORT_SYMBOL_GPL(tracing_on);
790 static __always_inline void
791 __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
793 __this_cpu_write(trace_cmdline_save, true);
795 /* If this is the temp buffer, we need to commit fully */
796 if (this_cpu_read(trace_buffered_event) == event) {
797 /* Length is in event->array[0] */
798 ring_buffer_write(buffer, event->array[0], &event->array[1]);
799 /* Release the temp buffer */
800 this_cpu_dec(trace_buffered_event_cnt);
802 ring_buffer_unlock_commit(buffer, event);
806 * __trace_puts - write a constant string into the trace buffer.
807 * @ip: The address of the caller
808 * @str: The constant string to write
809 * @size: The size of the string.
811 int __trace_puts(unsigned long ip, const char *str, int size)
813 struct ring_buffer_event *event;
814 struct ring_buffer *buffer;
815 struct print_entry *entry;
816 unsigned long irq_flags;
820 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
823 pc = preempt_count();
825 if (unlikely(tracing_selftest_running || tracing_disabled))
828 alloc = sizeof(*entry) + size + 2; /* possible \n added */
830 local_save_flags(irq_flags);
831 buffer = global_trace.trace_buffer.buffer;
832 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
837 entry = ring_buffer_event_data(event);
840 memcpy(&entry->buf, str, size);
842 /* Add a newline if necessary */
843 if (entry->buf[size - 1] != '\n') {
844 entry->buf[size] = '\n';
845 entry->buf[size + 1] = '\0';
847 entry->buf[size] = '\0';
849 __buffer_unlock_commit(buffer, event);
850 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
854 EXPORT_SYMBOL_GPL(__trace_puts);
857 * __trace_bputs - write the pointer to a constant string into trace buffer
858 * @ip: The address of the caller
859 * @str: The constant string to write to the buffer to
861 int __trace_bputs(unsigned long ip, const char *str)
863 struct ring_buffer_event *event;
864 struct ring_buffer *buffer;
865 struct bputs_entry *entry;
866 unsigned long irq_flags;
867 int size = sizeof(struct bputs_entry);
870 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
873 pc = preempt_count();
875 if (unlikely(tracing_selftest_running || tracing_disabled))
878 local_save_flags(irq_flags);
879 buffer = global_trace.trace_buffer.buffer;
880 event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
885 entry = ring_buffer_event_data(event);
889 __buffer_unlock_commit(buffer, event);
890 ftrace_trace_stack(&global_trace, buffer, irq_flags, 4, pc, NULL);
894 EXPORT_SYMBOL_GPL(__trace_bputs);
896 #ifdef CONFIG_TRACER_SNAPSHOT
897 static void tracing_snapshot_instance(struct trace_array *tr)
899 struct tracer *tracer = tr->current_trace;
903 internal_trace_puts("*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
904 internal_trace_puts("*** snapshot is being ignored ***\n");
908 if (!tr->allocated_snapshot) {
909 internal_trace_puts("*** SNAPSHOT NOT ALLOCATED ***\n");
910 internal_trace_puts("*** stopping trace here! ***\n");
915 /* Note, snapshot can not be used when the tracer uses it */
916 if (tracer->use_max_tr) {
917 internal_trace_puts("*** LATENCY TRACER ACTIVE ***\n");
918 internal_trace_puts("*** Can not use snapshot (sorry) ***\n");
922 local_irq_save(flags);
923 update_max_tr(tr, current, smp_processor_id());
924 local_irq_restore(flags);
928 * trace_snapshot - take a snapshot of the current buffer.
930 * This causes a swap between the snapshot buffer and the current live
931 * tracing buffer. You can use this to take snapshots of the live
932 * trace when some condition is triggered, but continue to trace.
934 * Note, make sure to allocate the snapshot with either
935 * a tracing_snapshot_alloc(), or by doing it manually
936 * with: echo 1 > /sys/kernel/debug/tracing/snapshot
938 * If the snapshot buffer is not allocated, it will stop tracing.
939 * Basically making a permanent snapshot.
941 void tracing_snapshot(void)
943 struct trace_array *tr = &global_trace;
945 tracing_snapshot_instance(tr);
947 EXPORT_SYMBOL_GPL(tracing_snapshot);
949 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
950 struct trace_buffer *size_buf, int cpu_id);
951 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val);
953 static int alloc_snapshot(struct trace_array *tr)
957 if (!tr->allocated_snapshot) {
959 /* allocate spare buffer */
960 ret = resize_buffer_duplicate_size(&tr->max_buffer,
961 &tr->trace_buffer, RING_BUFFER_ALL_CPUS);
965 tr->allocated_snapshot = true;
971 static void free_snapshot(struct trace_array *tr)
974 * We don't free the ring buffer. instead, resize it because
975 * The max_tr ring buffer has some state (e.g. ring->clock) and
976 * we want preserve it.
978 ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
979 set_buffer_entries(&tr->max_buffer, 1);
980 tracing_reset_online_cpus(&tr->max_buffer);
981 tr->allocated_snapshot = false;
985 * tracing_alloc_snapshot - allocate snapshot buffer.
987 * This only allocates the snapshot buffer if it isn't already
988 * allocated - it doesn't also take a snapshot.
990 * This is meant to be used in cases where the snapshot buffer needs
991 * to be set up for events that can't sleep but need to be able to
992 * trigger a snapshot.
994 int tracing_alloc_snapshot(void)
996 struct trace_array *tr = &global_trace;
999 ret = alloc_snapshot(tr);
1004 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1007 * trace_snapshot_alloc - allocate and take a snapshot of the current buffer.
1009 * This is similar to trace_snapshot(), but it will allocate the
1010 * snapshot buffer if it isn't already allocated. Use this only
1011 * where it is safe to sleep, as the allocation may sleep.
1013 * This causes a swap between the snapshot buffer and the current live
1014 * tracing buffer. You can use this to take snapshots of the live
1015 * trace when some condition is triggered, but continue to trace.
1017 void tracing_snapshot_alloc(void)
1021 ret = tracing_alloc_snapshot();
1027 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1029 void tracing_snapshot(void)
1031 WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1033 EXPORT_SYMBOL_GPL(tracing_snapshot);
1034 int tracing_alloc_snapshot(void)
1036 WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1039 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1040 void tracing_snapshot_alloc(void)
1045 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1046 #endif /* CONFIG_TRACER_SNAPSHOT */
1048 void tracer_tracing_off(struct trace_array *tr)
1050 if (tr->trace_buffer.buffer)
1051 ring_buffer_record_off(tr->trace_buffer.buffer);
1053 * This flag is looked at when buffers haven't been allocated
1054 * yet, or by some tracers (like irqsoff), that just want to
1055 * know if the ring buffer has been disabled, but it can handle
1056 * races of where it gets disabled but we still do a record.
1057 * As the check is in the fast path of the tracers, it is more
1058 * important to be fast than accurate.
1060 tr->buffer_disabled = 1;
1061 /* Make the flag seen by readers */
1066 * tracing_off - turn off tracing buffers
1068 * This function stops the tracing buffers from recording data.
1069 * It does not disable any overhead the tracers themselves may
1070 * be causing. This function simply causes all recording to
1071 * the ring buffers to fail.
1073 void tracing_off(void)
1075 tracer_tracing_off(&global_trace);
1077 EXPORT_SYMBOL_GPL(tracing_off);
1079 void disable_trace_on_warning(void)
1081 if (__disable_trace_on_warning)
1086 * tracer_tracing_is_on - show real state of ring buffer enabled
1087 * @tr : the trace array to know if ring buffer is enabled
1089 * Shows real state of the ring buffer if it is enabled or not.
1091 int tracer_tracing_is_on(struct trace_array *tr)
1093 if (tr->trace_buffer.buffer)
1094 return ring_buffer_record_is_on(tr->trace_buffer.buffer);
1095 return !tr->buffer_disabled;
1099 * tracing_is_on - show state of ring buffers enabled
1101 int tracing_is_on(void)
1103 return tracer_tracing_is_on(&global_trace);
1105 EXPORT_SYMBOL_GPL(tracing_is_on);
1107 static int __init set_buf_size(char *str)
1109 unsigned long buf_size;
1113 buf_size = memparse(str, &str);
1114 /* nr_entries can not be zero */
1117 trace_buf_size = buf_size;
1120 __setup("trace_buf_size=", set_buf_size);
1122 static int __init set_tracing_thresh(char *str)
1124 unsigned long threshold;
1129 ret = kstrtoul(str, 0, &threshold);
1132 tracing_thresh = threshold * 1000;
1135 __setup("tracing_thresh=", set_tracing_thresh);
1137 unsigned long nsecs_to_usecs(unsigned long nsecs)
1139 return nsecs / 1000;
1143 * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1144 * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1145 * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1146 * of strings in the order that the evals (enum) were defined.
1151 /* These must match the bit postions in trace_iterator_flags */
1152 static const char *trace_options[] = {
1160 int in_ns; /* is this clock in nanoseconds? */
1161 } trace_clocks[] = {
1162 { trace_clock_local, "local", 1 },
1163 { trace_clock_global, "global", 1 },
1164 { trace_clock_counter, "counter", 0 },
1165 { trace_clock_jiffies, "uptime", 0 },
1166 { trace_clock, "perf", 1 },
1167 { ktime_get_mono_fast_ns, "mono", 1 },
1168 { ktime_get_raw_fast_ns, "mono_raw", 1 },
1169 { ktime_get_boot_fast_ns, "boot", 1 },
1174 * trace_parser_get_init - gets the buffer for trace parser
1176 int trace_parser_get_init(struct trace_parser *parser, int size)
1178 memset(parser, 0, sizeof(*parser));
1180 parser->buffer = kmalloc(size, GFP_KERNEL);
1181 if (!parser->buffer)
1184 parser->size = size;
1189 * trace_parser_put - frees the buffer for trace parser
1191 void trace_parser_put(struct trace_parser *parser)
1193 kfree(parser->buffer);
1194 parser->buffer = NULL;
1198 * trace_get_user - reads the user input string separated by space
1199 * (matched by isspace(ch))
1201 * For each string found the 'struct trace_parser' is updated,
1202 * and the function returns.
1204 * Returns number of bytes read.
1206 * See kernel/trace/trace.h for 'struct trace_parser' details.
1208 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1209 size_t cnt, loff_t *ppos)
1216 trace_parser_clear(parser);
1218 ret = get_user(ch, ubuf++);
1226 * The parser is not finished with the last write,
1227 * continue reading the user input without skipping spaces.
1229 if (!parser->cont) {
1230 /* skip white space */
1231 while (cnt && isspace(ch)) {
1232 ret = get_user(ch, ubuf++);
1239 /* only spaces were written */
1249 /* read the non-space input */
1250 while (cnt && !isspace(ch)) {
1251 if (parser->idx < parser->size - 1)
1252 parser->buffer[parser->idx++] = ch;
1257 ret = get_user(ch, ubuf++);
1264 /* We either got finished input or we have to wait for another call. */
1266 parser->buffer[parser->idx] = 0;
1267 parser->cont = false;
1268 } else if (parser->idx < parser->size - 1) {
1269 parser->cont = true;
1270 parser->buffer[parser->idx++] = ch;
1283 /* TODO add a seq_buf_to_buffer() */
1284 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1288 if (trace_seq_used(s) <= s->seq.readpos)
1291 len = trace_seq_used(s) - s->seq.readpos;
1294 memcpy(buf, s->buffer + s->seq.readpos, cnt);
1296 s->seq.readpos += cnt;
1300 unsigned long __read_mostly tracing_thresh;
1302 #ifdef CONFIG_TRACER_MAX_TRACE
1304 * Copy the new maximum trace into the separate maximum-trace
1305 * structure. (this way the maximum trace is permanently saved,
1306 * for later retrieval via /sys/kernel/debug/tracing/latency_trace)
1309 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1311 struct trace_buffer *trace_buf = &tr->trace_buffer;
1312 struct trace_buffer *max_buf = &tr->max_buffer;
1313 struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1314 struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1317 max_buf->time_start = data->preempt_timestamp;
1319 max_data->saved_latency = tr->max_latency;
1320 max_data->critical_start = data->critical_start;
1321 max_data->critical_end = data->critical_end;
1323 memcpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1324 max_data->pid = tsk->pid;
1326 * If tsk == current, then use current_uid(), as that does not use
1327 * RCU. The irq tracer can be called out of RCU scope.
1330 max_data->uid = current_uid();
1332 max_data->uid = task_uid(tsk);
1334 max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1335 max_data->policy = tsk->policy;
1336 max_data->rt_priority = tsk->rt_priority;
1338 /* record this tasks comm */
1339 tracing_record_cmdline(tsk);
1343 * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1345 * @tsk: the task with the latency
1346 * @cpu: The cpu that initiated the trace.
1348 * Flip the buffers between the @tr and the max_tr and record information
1349 * about which task was the cause of this latency.
1352 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1354 struct ring_buffer *buf;
1359 WARN_ON_ONCE(!irqs_disabled());
1361 if (!tr->allocated_snapshot) {
1362 /* Only the nop tracer should hit this when disabling */
1363 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1367 arch_spin_lock(&tr->max_lock);
1369 buf = tr->trace_buffer.buffer;
1370 tr->trace_buffer.buffer = tr->max_buffer.buffer;
1371 tr->max_buffer.buffer = buf;
1373 __update_max_tr(tr, tsk, cpu);
1374 arch_spin_unlock(&tr->max_lock);
1378 * update_max_tr_single - only copy one trace over, and reset the rest
1380 * @tsk - task with the latency
1381 * @cpu - the cpu of the buffer to copy.
1383 * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1386 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1393 WARN_ON_ONCE(!irqs_disabled());
1394 if (!tr->allocated_snapshot) {
1395 /* Only the nop tracer should hit this when disabling */
1396 WARN_ON_ONCE(tr->current_trace != &nop_trace);
1400 arch_spin_lock(&tr->max_lock);
1402 ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->trace_buffer.buffer, cpu);
1404 if (ret == -EBUSY) {
1406 * We failed to swap the buffer due to a commit taking
1407 * place on this CPU. We fail to record, but we reset
1408 * the max trace buffer (no one writes directly to it)
1409 * and flag that it failed.
1411 trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1412 "Failed to swap buffers due to commit in progress\n");
1415 WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1417 __update_max_tr(tr, tsk, cpu);
1418 arch_spin_unlock(&tr->max_lock);
1420 #endif /* CONFIG_TRACER_MAX_TRACE */
1422 static int wait_on_pipe(struct trace_iterator *iter, bool full)
1424 /* Iterators are static, they should be filled or empty */
1425 if (trace_buffer_iter(iter, iter->cpu_file))
1428 return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
1432 #ifdef CONFIG_FTRACE_STARTUP_TEST
1433 static bool selftests_can_run;
1435 struct trace_selftests {
1436 struct list_head list;
1437 struct tracer *type;
1440 static LIST_HEAD(postponed_selftests);
1442 static int save_selftest(struct tracer *type)
1444 struct trace_selftests *selftest;
1446 selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
1450 selftest->type = type;
1451 list_add(&selftest->list, &postponed_selftests);
1455 static int run_tracer_selftest(struct tracer *type)
1457 struct trace_array *tr = &global_trace;
1458 struct tracer *saved_tracer = tr->current_trace;
1461 if (!type->selftest || tracing_selftest_disabled)
1465 * If a tracer registers early in boot up (before scheduling is
1466 * initialized and such), then do not run its selftests yet.
1467 * Instead, run it a little later in the boot process.
1469 if (!selftests_can_run)
1470 return save_selftest(type);
1473 * Run a selftest on this tracer.
1474 * Here we reset the trace buffer, and set the current
1475 * tracer to be this tracer. The tracer can then run some
1476 * internal tracing to verify that everything is in order.
1477 * If we fail, we do not register this tracer.
1479 tracing_reset_online_cpus(&tr->trace_buffer);
1481 tr->current_trace = type;
1483 #ifdef CONFIG_TRACER_MAX_TRACE
1484 if (type->use_max_tr) {
1485 /* If we expanded the buffers, make sure the max is expanded too */
1486 if (ring_buffer_expanded)
1487 ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
1488 RING_BUFFER_ALL_CPUS);
1489 tr->allocated_snapshot = true;
1493 /* the test is responsible for initializing and enabling */
1494 pr_info("Testing tracer %s: ", type->name);
1495 ret = type->selftest(type, tr);
1496 /* the test is responsible for resetting too */
1497 tr->current_trace = saved_tracer;
1499 printk(KERN_CONT "FAILED!\n");
1500 /* Add the warning after printing 'FAILED' */
1504 /* Only reset on passing, to avoid touching corrupted buffers */
1505 tracing_reset_online_cpus(&tr->trace_buffer);
1507 #ifdef CONFIG_TRACER_MAX_TRACE
1508 if (type->use_max_tr) {
1509 tr->allocated_snapshot = false;
1511 /* Shrink the max buffer again */
1512 if (ring_buffer_expanded)
1513 ring_buffer_resize(tr->max_buffer.buffer, 1,
1514 RING_BUFFER_ALL_CPUS);
1518 printk(KERN_CONT "PASSED\n");
1522 static __init int init_trace_selftests(void)
1524 struct trace_selftests *p, *n;
1525 struct tracer *t, **last;
1528 selftests_can_run = true;
1530 mutex_lock(&trace_types_lock);
1532 if (list_empty(&postponed_selftests))
1535 pr_info("Running postponed tracer tests:\n");
1537 list_for_each_entry_safe(p, n, &postponed_selftests, list) {
1538 ret = run_tracer_selftest(p->type);
1539 /* If the test fails, then warn and remove from available_tracers */
1541 WARN(1, "tracer: %s failed selftest, disabling\n",
1543 last = &trace_types;
1544 for (t = trace_types; t; t = t->next) {
1557 mutex_unlock(&trace_types_lock);
1561 core_initcall(init_trace_selftests);
1563 static inline int run_tracer_selftest(struct tracer *type)
1567 #endif /* CONFIG_FTRACE_STARTUP_TEST */
1569 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
1571 static void __init apply_trace_boot_options(void);
1574 * register_tracer - register a tracer with the ftrace system.
1575 * @type - the plugin for the tracer
1577 * Register a new plugin tracer.
1579 int __init register_tracer(struct tracer *type)
1585 pr_info("Tracer must have a name\n");
1589 if (strlen(type->name) >= MAX_TRACER_SIZE) {
1590 pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
1594 mutex_lock(&trace_types_lock);
1596 tracing_selftest_running = true;
1598 for (t = trace_types; t; t = t->next) {
1599 if (strcmp(type->name, t->name) == 0) {
1601 pr_info("Tracer %s already registered\n",
1608 if (!type->set_flag)
1609 type->set_flag = &dummy_set_flag;
1611 /*allocate a dummy tracer_flags*/
1612 type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
1617 type->flags->val = 0;
1618 type->flags->opts = dummy_tracer_opt;
1620 if (!type->flags->opts)
1621 type->flags->opts = dummy_tracer_opt;
1623 /* store the tracer for __set_tracer_option */
1624 type->flags->trace = type;
1626 ret = run_tracer_selftest(type);
1630 type->next = trace_types;
1632 add_tracer_options(&global_trace, type);
1635 tracing_selftest_running = false;
1636 mutex_unlock(&trace_types_lock);
1638 if (ret || !default_bootup_tracer)
1641 if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
1644 printk(KERN_INFO "Starting tracer '%s'\n", type->name);
1645 /* Do we want this tracer to start on bootup? */
1646 tracing_set_tracer(&global_trace, type->name);
1647 default_bootup_tracer = NULL;
1649 apply_trace_boot_options();
1651 /* disable other selftests, since this will break it. */
1652 tracing_selftest_disabled = true;
1653 #ifdef CONFIG_FTRACE_STARTUP_TEST
1654 printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
1662 void tracing_reset(struct trace_buffer *buf, int cpu)
1664 struct ring_buffer *buffer = buf->buffer;
1669 ring_buffer_record_disable(buffer);
1671 /* Make sure all commits have finished */
1672 synchronize_sched();
1673 ring_buffer_reset_cpu(buffer, cpu);
1675 ring_buffer_record_enable(buffer);
1678 void tracing_reset_online_cpus(struct trace_buffer *buf)
1680 struct ring_buffer *buffer = buf->buffer;
1686 ring_buffer_record_disable(buffer);
1688 /* Make sure all commits have finished */
1689 synchronize_sched();
1691 buf->time_start = buffer_ftrace_now(buf, buf->cpu);
1693 for_each_online_cpu(cpu)
1694 ring_buffer_reset_cpu(buffer, cpu);
1696 ring_buffer_record_enable(buffer);
1699 /* Must have trace_types_lock held */
1700 void tracing_reset_all_online_cpus(void)
1702 struct trace_array *tr;
1704 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
1705 tracing_reset_online_cpus(&tr->trace_buffer);
1706 #ifdef CONFIG_TRACER_MAX_TRACE
1707 tracing_reset_online_cpus(&tr->max_buffer);
1712 #define SAVED_CMDLINES_DEFAULT 128
1713 #define NO_CMDLINE_MAP UINT_MAX
1714 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
1715 struct saved_cmdlines_buffer {
1716 unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
1717 unsigned *map_cmdline_to_pid;
1718 unsigned cmdline_num;
1720 char *saved_cmdlines;
1722 static struct saved_cmdlines_buffer *savedcmd;
1724 /* temporary disable recording */
1725 static atomic_t trace_record_cmdline_disabled __read_mostly;
1727 static inline char *get_saved_cmdlines(int idx)
1729 return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
1732 static inline void set_cmdline(int idx, const char *cmdline)
1734 memcpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
1737 static int allocate_cmdlines_buffer(unsigned int val,
1738 struct saved_cmdlines_buffer *s)
1740 s->map_cmdline_to_pid = kmalloc(val * sizeof(*s->map_cmdline_to_pid),
1742 if (!s->map_cmdline_to_pid)
1745 s->saved_cmdlines = kmalloc(val * TASK_COMM_LEN, GFP_KERNEL);
1746 if (!s->saved_cmdlines) {
1747 kfree(s->map_cmdline_to_pid);
1752 s->cmdline_num = val;
1753 memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
1754 sizeof(s->map_pid_to_cmdline));
1755 memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
1756 val * sizeof(*s->map_cmdline_to_pid));
1761 static int trace_create_savedcmd(void)
1765 savedcmd = kmalloc(sizeof(*savedcmd), GFP_KERNEL);
1769 ret = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT, savedcmd);
1779 int is_tracing_stopped(void)
1781 return global_trace.stop_count;
1785 * tracing_start - quick start of the tracer
1787 * If tracing is enabled but was stopped by tracing_stop,
1788 * this will start the tracer back up.
1790 void tracing_start(void)
1792 struct ring_buffer *buffer;
1793 unsigned long flags;
1795 if (tracing_disabled)
1798 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1799 if (--global_trace.stop_count) {
1800 if (global_trace.stop_count < 0) {
1801 /* Someone screwed up their debugging */
1803 global_trace.stop_count = 0;
1808 /* Prevent the buffers from switching */
1809 arch_spin_lock(&global_trace.max_lock);
1811 buffer = global_trace.trace_buffer.buffer;
1813 ring_buffer_record_enable(buffer);
1815 #ifdef CONFIG_TRACER_MAX_TRACE
1816 buffer = global_trace.max_buffer.buffer;
1818 ring_buffer_record_enable(buffer);
1821 arch_spin_unlock(&global_trace.max_lock);
1824 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1827 static void tracing_start_tr(struct trace_array *tr)
1829 struct ring_buffer *buffer;
1830 unsigned long flags;
1832 if (tracing_disabled)
1835 /* If global, we need to also start the max tracer */
1836 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1837 return tracing_start();
1839 raw_spin_lock_irqsave(&tr->start_lock, flags);
1841 if (--tr->stop_count) {
1842 if (tr->stop_count < 0) {
1843 /* Someone screwed up their debugging */
1850 buffer = tr->trace_buffer.buffer;
1852 ring_buffer_record_enable(buffer);
1855 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1859 * tracing_stop - quick stop of the tracer
1861 * Light weight way to stop tracing. Use in conjunction with
1864 void tracing_stop(void)
1866 struct ring_buffer *buffer;
1867 unsigned long flags;
1869 raw_spin_lock_irqsave(&global_trace.start_lock, flags);
1870 if (global_trace.stop_count++)
1873 /* Prevent the buffers from switching */
1874 arch_spin_lock(&global_trace.max_lock);
1876 buffer = global_trace.trace_buffer.buffer;
1878 ring_buffer_record_disable(buffer);
1880 #ifdef CONFIG_TRACER_MAX_TRACE
1881 buffer = global_trace.max_buffer.buffer;
1883 ring_buffer_record_disable(buffer);
1886 arch_spin_unlock(&global_trace.max_lock);
1889 raw_spin_unlock_irqrestore(&global_trace.start_lock, flags);
1892 static void tracing_stop_tr(struct trace_array *tr)
1894 struct ring_buffer *buffer;
1895 unsigned long flags;
1897 /* If global, we need to also stop the max tracer */
1898 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
1899 return tracing_stop();
1901 raw_spin_lock_irqsave(&tr->start_lock, flags);
1902 if (tr->stop_count++)
1905 buffer = tr->trace_buffer.buffer;
1907 ring_buffer_record_disable(buffer);
1910 raw_spin_unlock_irqrestore(&tr->start_lock, flags);
1913 static int trace_save_cmdline(struct task_struct *tsk)
1917 if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
1921 * It's not the end of the world if we don't get
1922 * the lock, but we also don't want to spin
1923 * nor do we want to disable interrupts,
1924 * so if we miss here, then better luck next time.
1926 if (!arch_spin_trylock(&trace_cmdline_lock))
1929 idx = savedcmd->map_pid_to_cmdline[tsk->pid];
1930 if (idx == NO_CMDLINE_MAP) {
1931 idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
1934 * Check whether the cmdline buffer at idx has a pid
1935 * mapped. We are going to overwrite that entry so we
1936 * need to clear the map_pid_to_cmdline. Otherwise we
1937 * would read the new comm for the old pid.
1939 pid = savedcmd->map_cmdline_to_pid[idx];
1940 if (pid != NO_CMDLINE_MAP)
1941 savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
1943 savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
1944 savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
1946 savedcmd->cmdline_idx = idx;
1949 set_cmdline(idx, tsk->comm);
1951 arch_spin_unlock(&trace_cmdline_lock);
1956 static void __trace_find_cmdline(int pid, char comm[])
1961 strcpy(comm, "<idle>");
1965 if (WARN_ON_ONCE(pid < 0)) {
1966 strcpy(comm, "<XXX>");
1970 if (pid > PID_MAX_DEFAULT) {
1971 strcpy(comm, "<...>");
1975 map = savedcmd->map_pid_to_cmdline[pid];
1976 if (map != NO_CMDLINE_MAP)
1977 strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
1979 strcpy(comm, "<...>");
1982 void trace_find_cmdline(int pid, char comm[])
1985 arch_spin_lock(&trace_cmdline_lock);
1987 __trace_find_cmdline(pid, comm);
1989 arch_spin_unlock(&trace_cmdline_lock);
1993 void tracing_record_cmdline(struct task_struct *tsk)
1995 if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
1998 if (!__this_cpu_read(trace_cmdline_save))
2001 if (trace_save_cmdline(tsk))
2002 __this_cpu_write(trace_cmdline_save, false);
2006 * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2007 * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2008 * simplifies those functions and keeps them in sync.
2010 enum print_line_t trace_handle_return(struct trace_seq *s)
2012 return trace_seq_has_overflowed(s) ?
2013 TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2015 EXPORT_SYMBOL_GPL(trace_handle_return);
2018 tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
2021 struct task_struct *tsk = current;
2023 entry->preempt_count = pc & 0xff;
2024 entry->pid = (tsk) ? tsk->pid : 0;
2026 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
2027 (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
2029 TRACE_FLAG_IRQS_NOSUPPORT |
2031 ((pc & NMI_MASK ) ? TRACE_FLAG_NMI : 0) |
2032 ((pc & HARDIRQ_MASK) ? TRACE_FLAG_HARDIRQ : 0) |
2033 ((pc & SOFTIRQ_OFFSET) ? TRACE_FLAG_SOFTIRQ : 0) |
2034 (tif_need_resched() ? TRACE_FLAG_NEED_RESCHED : 0) |
2035 (test_preempt_need_resched() ? TRACE_FLAG_PREEMPT_RESCHED : 0);
2037 EXPORT_SYMBOL_GPL(tracing_generic_entry_update);
2039 struct ring_buffer_event *
2040 trace_buffer_lock_reserve(struct ring_buffer *buffer,
2043 unsigned long flags, int pc)
2045 return __trace_buffer_lock_reserve(buffer, type, len, flags, pc);
2048 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2049 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2050 static int trace_buffered_event_ref;
2053 * trace_buffered_event_enable - enable buffering events
2055 * When events are being filtered, it is quicker to use a temporary
2056 * buffer to write the event data into if there's a likely chance
2057 * that it will not be committed. The discard of the ring buffer
2058 * is not as fast as committing, and is much slower than copying
2061 * When an event is to be filtered, allocate per cpu buffers to
2062 * write the event data into, and if the event is filtered and discarded
2063 * it is simply dropped, otherwise, the entire data is to be committed
2066 void trace_buffered_event_enable(void)
2068 struct ring_buffer_event *event;
2072 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2074 if (trace_buffered_event_ref++)
2077 for_each_tracing_cpu(cpu) {
2078 page = alloc_pages_node(cpu_to_node(cpu),
2079 GFP_KERNEL | __GFP_NORETRY, 0);
2083 event = page_address(page);
2084 memset(event, 0, sizeof(*event));
2086 per_cpu(trace_buffered_event, cpu) = event;
2089 if (cpu == smp_processor_id() &&
2090 this_cpu_read(trace_buffered_event) !=
2091 per_cpu(trace_buffered_event, cpu))
2098 trace_buffered_event_disable();
2101 static void enable_trace_buffered_event(void *data)
2103 /* Probably not needed, but do it anyway */
2105 this_cpu_dec(trace_buffered_event_cnt);
2108 static void disable_trace_buffered_event(void *data)
2110 this_cpu_inc(trace_buffered_event_cnt);
2114 * trace_buffered_event_disable - disable buffering events
2116 * When a filter is removed, it is faster to not use the buffered
2117 * events, and to commit directly into the ring buffer. Free up
2118 * the temp buffers when there are no more users. This requires
2119 * special synchronization with current events.
2121 void trace_buffered_event_disable(void)
2125 WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2127 if (WARN_ON_ONCE(!trace_buffered_event_ref))
2130 if (--trace_buffered_event_ref)
2134 /* For each CPU, set the buffer as used. */
2135 smp_call_function_many(tracing_buffer_mask,
2136 disable_trace_buffered_event, NULL, 1);
2139 /* Wait for all current users to finish */
2140 synchronize_sched();
2142 for_each_tracing_cpu(cpu) {
2143 free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2144 per_cpu(trace_buffered_event, cpu) = NULL;
2147 * Make sure trace_buffered_event is NULL before clearing
2148 * trace_buffered_event_cnt.
2153 /* Do the work on each cpu */
2154 smp_call_function_many(tracing_buffer_mask,
2155 enable_trace_buffered_event, NULL, 1);
2159 static struct ring_buffer *temp_buffer;
2161 struct ring_buffer_event *
2162 trace_event_buffer_lock_reserve(struct ring_buffer **current_rb,
2163 struct trace_event_file *trace_file,
2164 int type, unsigned long len,
2165 unsigned long flags, int pc)
2167 struct ring_buffer_event *entry;
2170 *current_rb = trace_file->tr->trace_buffer.buffer;
2172 if ((trace_file->flags &
2173 (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
2174 (entry = this_cpu_read(trace_buffered_event))) {
2175 /* Try to use the per cpu buffer first */
2176 val = this_cpu_inc_return(trace_buffered_event_cnt);
2178 trace_event_setup(entry, type, flags, pc);
2179 entry->array[0] = len;
2182 this_cpu_dec(trace_buffered_event_cnt);
2185 entry = __trace_buffer_lock_reserve(*current_rb,
2186 type, len, flags, pc);
2188 * If tracing is off, but we have triggers enabled
2189 * we still need to look at the event data. Use the temp_buffer
2190 * to store the trace event for the tigger to use. It's recusive
2191 * safe and will not be recorded anywhere.
2193 if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2194 *current_rb = temp_buffer;
2195 entry = __trace_buffer_lock_reserve(*current_rb,
2196 type, len, flags, pc);
2200 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2202 static DEFINE_SPINLOCK(tracepoint_iter_lock);
2203 static DEFINE_MUTEX(tracepoint_printk_mutex);
2205 static void output_printk(struct trace_event_buffer *fbuffer)
2207 struct trace_event_call *event_call;
2208 struct trace_event *event;
2209 unsigned long flags;
2210 struct trace_iterator *iter = tracepoint_print_iter;
2212 /* We should never get here if iter is NULL */
2213 if (WARN_ON_ONCE(!iter))
2216 event_call = fbuffer->trace_file->event_call;
2217 if (!event_call || !event_call->event.funcs ||
2218 !event_call->event.funcs->trace)
2221 event = &fbuffer->trace_file->event_call->event;
2223 spin_lock_irqsave(&tracepoint_iter_lock, flags);
2224 trace_seq_init(&iter->seq);
2225 iter->ent = fbuffer->entry;
2226 event_call->event.funcs->trace(iter, 0, event);
2227 trace_seq_putc(&iter->seq, 0);
2228 printk("%s", iter->seq.buffer);
2230 spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2233 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2234 void __user *buffer, size_t *lenp,
2237 int save_tracepoint_printk;
2240 mutex_lock(&tracepoint_printk_mutex);
2241 save_tracepoint_printk = tracepoint_printk;
2243 ret = proc_dointvec(table, write, buffer, lenp, ppos);
2246 * This will force exiting early, as tracepoint_printk
2247 * is always zero when tracepoint_printk_iter is not allocated
2249 if (!tracepoint_print_iter)
2250 tracepoint_printk = 0;
2252 if (save_tracepoint_printk == tracepoint_printk)
2255 if (tracepoint_printk)
2256 static_key_enable(&tracepoint_printk_key.key);
2258 static_key_disable(&tracepoint_printk_key.key);
2261 mutex_unlock(&tracepoint_printk_mutex);
2266 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2268 if (static_key_false(&tracepoint_printk_key.key))
2269 output_printk(fbuffer);
2271 event_trigger_unlock_commit(fbuffer->trace_file, fbuffer->buffer,
2272 fbuffer->event, fbuffer->entry,
2273 fbuffer->flags, fbuffer->pc);
2275 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2277 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2278 struct ring_buffer *buffer,
2279 struct ring_buffer_event *event,
2280 unsigned long flags, int pc,
2281 struct pt_regs *regs)
2283 __buffer_unlock_commit(buffer, event);
2286 * If regs is not set, then skip the following callers:
2287 * trace_buffer_unlock_commit_regs
2288 * event_trigger_unlock_commit
2289 * trace_event_buffer_commit
2290 * trace_event_raw_event_sched_switch
2291 * Note, we can still get here via blktrace, wakeup tracer
2292 * and mmiotrace, but that's ok if they lose a function or
2293 * two. They are that meaningful.
2295 ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
2296 ftrace_trace_userstack(buffer, flags, pc);
2300 * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2303 trace_buffer_unlock_commit_nostack(struct ring_buffer *buffer,
2304 struct ring_buffer_event *event)
2306 __buffer_unlock_commit(buffer, event);
2310 trace_process_export(struct trace_export *export,
2311 struct ring_buffer_event *event)
2313 struct trace_entry *entry;
2314 unsigned int size = 0;
2316 entry = ring_buffer_event_data(event);
2317 size = ring_buffer_event_length(event);
2318 export->write(entry, size);
2321 static DEFINE_MUTEX(ftrace_export_lock);
2323 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
2325 static DEFINE_STATIC_KEY_FALSE(ftrace_exports_enabled);
2327 static inline void ftrace_exports_enable(void)
2329 static_branch_enable(&ftrace_exports_enabled);
2332 static inline void ftrace_exports_disable(void)
2334 static_branch_disable(&ftrace_exports_enabled);
2337 void ftrace_exports(struct ring_buffer_event *event)
2339 struct trace_export *export;
2341 preempt_disable_notrace();
2343 export = rcu_dereference_raw_notrace(ftrace_exports_list);
2345 trace_process_export(export, event);
2346 export = rcu_dereference_raw_notrace(export->next);
2349 preempt_enable_notrace();
2353 add_trace_export(struct trace_export **list, struct trace_export *export)
2355 rcu_assign_pointer(export->next, *list);
2357 * We are entering export into the list but another
2358 * CPU might be walking that list. We need to make sure
2359 * the export->next pointer is valid before another CPU sees
2360 * the export pointer included into the list.
2362 rcu_assign_pointer(*list, export);
2366 rm_trace_export(struct trace_export **list, struct trace_export *export)
2368 struct trace_export **p;
2370 for (p = list; *p != NULL; p = &(*p)->next)
2377 rcu_assign_pointer(*p, (*p)->next);
2383 add_ftrace_export(struct trace_export **list, struct trace_export *export)
2386 ftrace_exports_enable();
2388 add_trace_export(list, export);
2392 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
2396 ret = rm_trace_export(list, export);
2398 ftrace_exports_disable();
2403 int register_ftrace_export(struct trace_export *export)
2405 if (WARN_ON_ONCE(!export->write))
2408 mutex_lock(&ftrace_export_lock);
2410 add_ftrace_export(&ftrace_exports_list, export);
2412 mutex_unlock(&ftrace_export_lock);
2416 EXPORT_SYMBOL_GPL(register_ftrace_export);
2418 int unregister_ftrace_export(struct trace_export *export)
2422 mutex_lock(&ftrace_export_lock);
2424 ret = rm_ftrace_export(&ftrace_exports_list, export);
2426 mutex_unlock(&ftrace_export_lock);
2430 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
2433 trace_function(struct trace_array *tr,
2434 unsigned long ip, unsigned long parent_ip, unsigned long flags,
2437 struct trace_event_call *call = &event_function;
2438 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2439 struct ring_buffer_event *event;
2440 struct ftrace_entry *entry;
2442 event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2446 entry = ring_buffer_event_data(event);
2448 entry->parent_ip = parent_ip;
2450 if (!call_filter_check_discard(call, entry, buffer, event)) {
2451 if (static_branch_unlikely(&ftrace_exports_enabled))
2452 ftrace_exports(event);
2453 __buffer_unlock_commit(buffer, event);
2457 #ifdef CONFIG_STACKTRACE
2459 #define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
2460 struct ftrace_stack {
2461 unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
2464 static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
2465 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2467 static void __ftrace_trace_stack(struct ring_buffer *buffer,
2468 unsigned long flags,
2469 int skip, int pc, struct pt_regs *regs)
2471 struct trace_event_call *call = &event_kernel_stack;
2472 struct ring_buffer_event *event;
2473 struct stack_entry *entry;
2474 struct stack_trace trace;
2476 int size = FTRACE_STACK_ENTRIES;
2478 trace.nr_entries = 0;
2482 * Add two, for this function and the call to save_stack_trace()
2483 * If regs is set, then these functions will not be in the way.
2489 * Since events can happen in NMIs there's no safe way to
2490 * use the per cpu ftrace_stacks. We reserve it and if an interrupt
2491 * or NMI comes in, it will just have to use the default
2492 * FTRACE_STACK_SIZE.
2494 preempt_disable_notrace();
2496 use_stack = __this_cpu_inc_return(ftrace_stack_reserve);
2498 * We don't need any atomic variables, just a barrier.
2499 * If an interrupt comes in, we don't care, because it would
2500 * have exited and put the counter back to what we want.
2501 * We just need a barrier to keep gcc from moving things
2505 if (use_stack == 1) {
2506 trace.entries = this_cpu_ptr(ftrace_stack.calls);
2507 trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
2510 save_stack_trace_regs(regs, &trace);
2512 save_stack_trace(&trace);
2514 if (trace.nr_entries > size)
2515 size = trace.nr_entries;
2517 /* From now on, use_stack is a boolean */
2520 size *= sizeof(unsigned long);
2522 event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2523 sizeof(*entry) + size, flags, pc);
2526 entry = ring_buffer_event_data(event);
2528 memset(&entry->caller, 0, size);
2531 memcpy(&entry->caller, trace.entries,
2532 trace.nr_entries * sizeof(unsigned long));
2534 trace.max_entries = FTRACE_STACK_ENTRIES;
2535 trace.entries = entry->caller;
2537 save_stack_trace_regs(regs, &trace);
2539 save_stack_trace(&trace);
2542 entry->size = trace.nr_entries;
2544 if (!call_filter_check_discard(call, entry, buffer, event))
2545 __buffer_unlock_commit(buffer, event);
2548 /* Again, don't let gcc optimize things here */
2550 __this_cpu_dec(ftrace_stack_reserve);
2551 preempt_enable_notrace();
2555 static inline void ftrace_trace_stack(struct trace_array *tr,
2556 struct ring_buffer *buffer,
2557 unsigned long flags,
2558 int skip, int pc, struct pt_regs *regs)
2560 if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
2563 __ftrace_trace_stack(buffer, flags, skip, pc, regs);
2566 void __trace_stack(struct trace_array *tr, unsigned long flags, int skip,
2569 struct ring_buffer *buffer = tr->trace_buffer.buffer;
2571 if (rcu_is_watching()) {
2572 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2577 * When an NMI triggers, RCU is enabled via rcu_nmi_enter(),
2578 * but if the above rcu_is_watching() failed, then the NMI
2579 * triggered someplace critical, and rcu_irq_enter() should
2580 * not be called from NMI.
2582 if (unlikely(in_nmi()))
2586 * It is possible that a function is being traced in a
2587 * location that RCU is not watching. A call to
2588 * rcu_irq_enter() will make sure that it is, but there's
2589 * a few internal rcu functions that could be traced
2590 * where that wont work either. In those cases, we just
2593 if (unlikely(rcu_irq_enter_disabled()))
2596 rcu_irq_enter_irqson();
2597 __ftrace_trace_stack(buffer, flags, skip, pc, NULL);
2598 rcu_irq_exit_irqson();
2602 * trace_dump_stack - record a stack back trace in the trace buffer
2603 * @skip: Number of functions to skip (helper handlers)
2605 void trace_dump_stack(int skip)
2607 unsigned long flags;
2609 if (tracing_disabled || tracing_selftest_running)
2612 local_save_flags(flags);
2615 * Skip 3 more, seems to get us at the caller of
2619 __ftrace_trace_stack(global_trace.trace_buffer.buffer,
2620 flags, skip, preempt_count(), NULL);
2623 static DEFINE_PER_CPU(int, user_stack_count);
2626 ftrace_trace_userstack(struct ring_buffer *buffer, unsigned long flags, int pc)
2628 struct trace_event_call *call = &event_user_stack;
2629 struct ring_buffer_event *event;
2630 struct userstack_entry *entry;
2631 struct stack_trace trace;
2633 if (!(global_trace.trace_flags & TRACE_ITER_USERSTACKTRACE))
2637 * NMIs can not handle page faults, even with fix ups.
2638 * The save user stack can (and often does) fault.
2640 if (unlikely(in_nmi()))
2644 * prevent recursion, since the user stack tracing may
2645 * trigger other kernel events.
2648 if (__this_cpu_read(user_stack_count))
2651 __this_cpu_inc(user_stack_count);
2653 event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
2654 sizeof(*entry), flags, pc);
2656 goto out_drop_count;
2657 entry = ring_buffer_event_data(event);
2659 entry->tgid = current->tgid;
2660 memset(&entry->caller, 0, sizeof(entry->caller));
2662 trace.nr_entries = 0;
2663 trace.max_entries = FTRACE_STACK_ENTRIES;
2665 trace.entries = entry->caller;
2667 save_stack_trace_user(&trace);
2668 if (!call_filter_check_discard(call, entry, buffer, event))
2669 __buffer_unlock_commit(buffer, event);
2672 __this_cpu_dec(user_stack_count);
2678 static void __trace_userstack(struct trace_array *tr, unsigned long flags)
2680 ftrace_trace_userstack(tr, flags, preempt_count());
2684 #endif /* CONFIG_STACKTRACE */
2686 /* created for use with alloc_percpu */
2687 struct trace_buffer_struct {
2689 char buffer[4][TRACE_BUF_SIZE];
2692 static struct trace_buffer_struct *trace_percpu_buffer;
2695 * Thise allows for lockless recording. If we're nested too deeply, then
2696 * this returns NULL.
2698 static char *get_trace_buf(void)
2700 struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
2702 if (!buffer || buffer->nesting >= 4)
2705 return &buffer->buffer[buffer->nesting++][0];
2708 static void put_trace_buf(void)
2710 this_cpu_dec(trace_percpu_buffer->nesting);
2713 static int alloc_percpu_trace_buffer(void)
2715 struct trace_buffer_struct *buffers;
2717 buffers = alloc_percpu(struct trace_buffer_struct);
2718 if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
2721 trace_percpu_buffer = buffers;
2725 static int buffers_allocated;
2727 void trace_printk_init_buffers(void)
2729 if (buffers_allocated)
2732 if (alloc_percpu_trace_buffer())
2735 /* trace_printk() is for debug use only. Don't use it in production. */
2738 pr_warn("**********************************************************\n");
2739 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2741 pr_warn("** trace_printk() being used. Allocating extra memory. **\n");
2743 pr_warn("** This means that this is a DEBUG kernel and it is **\n");
2744 pr_warn("** unsafe for production use. **\n");
2746 pr_warn("** If you see this message and you are not debugging **\n");
2747 pr_warn("** the kernel, report this immediately to your vendor! **\n");
2749 pr_warn("** NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE **\n");
2750 pr_warn("**********************************************************\n");
2752 /* Expand the buffers to set size */
2753 tracing_update_buffers();
2755 buffers_allocated = 1;
2758 * trace_printk_init_buffers() can be called by modules.
2759 * If that happens, then we need to start cmdline recording
2760 * directly here. If the global_trace.buffer is already
2761 * allocated here, then this was called by module code.
2763 if (global_trace.trace_buffer.buffer)
2764 tracing_start_cmdline_record();
2767 void trace_printk_start_comm(void)
2769 /* Start tracing comms if trace printk is set */
2770 if (!buffers_allocated)
2772 tracing_start_cmdline_record();
2775 static void trace_printk_start_stop_comm(int enabled)
2777 if (!buffers_allocated)
2781 tracing_start_cmdline_record();
2783 tracing_stop_cmdline_record();
2787 * trace_vbprintk - write binary msg to tracing buffer
2790 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
2792 struct trace_event_call *call = &event_bprint;
2793 struct ring_buffer_event *event;
2794 struct ring_buffer *buffer;
2795 struct trace_array *tr = &global_trace;
2796 struct bprint_entry *entry;
2797 unsigned long flags;
2799 int len = 0, size, pc;
2801 if (unlikely(tracing_selftest_running || tracing_disabled))
2804 /* Don't pollute graph traces with trace_vprintk internals */
2805 pause_graph_tracing();
2807 pc = preempt_count();
2808 preempt_disable_notrace();
2810 tbuffer = get_trace_buf();
2816 len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
2818 if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
2821 local_save_flags(flags);
2822 size = sizeof(*entry) + sizeof(u32) * len;
2823 buffer = tr->trace_buffer.buffer;
2824 event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
2828 entry = ring_buffer_event_data(event);
2832 memcpy(entry->buf, tbuffer, sizeof(u32) * len);
2833 if (!call_filter_check_discard(call, entry, buffer, event)) {
2834 __buffer_unlock_commit(buffer, event);
2835 ftrace_trace_stack(tr, buffer, flags, 6, pc, NULL);
2842 preempt_enable_notrace();
2843 unpause_graph_tracing();
2847 EXPORT_SYMBOL_GPL(trace_vbprintk);
2850 __trace_array_vprintk(struct ring_buffer *buffer,
2851 unsigned long ip, const char *fmt, va_list args)
2853 struct trace_event_call *call = &event_print;
2854 struct ring_buffer_event *event;
2855 int len = 0, size, pc;
2856 struct print_entry *entry;
2857 unsigned long flags;
2860 if (tracing_disabled || tracing_selftest_running)
2863 /* Don't pollute graph traces with trace_vprintk internals */
2864 pause_graph_tracing();
2866 pc = preempt_count();
2867 preempt_disable_notrace();
2870 tbuffer = get_trace_buf();
2876 len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
2878 local_save_flags(flags);
2879 size = sizeof(*entry) + len + 1;
2880 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
2884 entry = ring_buffer_event_data(event);
2887 memcpy(&entry->buf, tbuffer, len + 1);
2888 if (!call_filter_check_discard(call, entry, buffer, event)) {
2889 __buffer_unlock_commit(buffer, event);
2890 ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
2897 preempt_enable_notrace();
2898 unpause_graph_tracing();
2903 int trace_array_vprintk(struct trace_array *tr,
2904 unsigned long ip, const char *fmt, va_list args)
2906 return __trace_array_vprintk(tr->trace_buffer.buffer, ip, fmt, args);
2909 int trace_array_printk(struct trace_array *tr,
2910 unsigned long ip, const char *fmt, ...)
2915 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2919 ret = trace_array_vprintk(tr, ip, fmt, ap);
2924 int trace_array_printk_buf(struct ring_buffer *buffer,
2925 unsigned long ip, const char *fmt, ...)
2930 if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
2934 ret = __trace_array_vprintk(buffer, ip, fmt, ap);
2939 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
2941 return trace_array_vprintk(&global_trace, ip, fmt, args);
2943 EXPORT_SYMBOL_GPL(trace_vprintk);
2945 static void trace_iterator_increment(struct trace_iterator *iter)
2947 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
2951 ring_buffer_read(buf_iter, NULL);
2954 static struct trace_entry *
2955 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
2956 unsigned long *lost_events)
2958 struct ring_buffer_event *event;
2959 struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
2962 event = ring_buffer_iter_peek(buf_iter, ts);
2964 event = ring_buffer_peek(iter->trace_buffer->buffer, cpu, ts,
2968 iter->ent_size = ring_buffer_event_length(event);
2969 return ring_buffer_event_data(event);
2975 static struct trace_entry *
2976 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
2977 unsigned long *missing_events, u64 *ent_ts)
2979 struct ring_buffer *buffer = iter->trace_buffer->buffer;
2980 struct trace_entry *ent, *next = NULL;
2981 unsigned long lost_events = 0, next_lost = 0;
2982 int cpu_file = iter->cpu_file;
2983 u64 next_ts = 0, ts;
2989 * If we are in a per_cpu trace file, don't bother by iterating over
2990 * all cpu and peek directly.
2992 if (cpu_file > RING_BUFFER_ALL_CPUS) {
2993 if (ring_buffer_empty_cpu(buffer, cpu_file))
2995 ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
2997 *ent_cpu = cpu_file;
3002 for_each_tracing_cpu(cpu) {
3004 if (ring_buffer_empty_cpu(buffer, cpu))
3007 ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3010 * Pick the entry with the smallest timestamp:
3012 if (ent && (!next || ts < next_ts)) {
3016 next_lost = lost_events;
3017 next_size = iter->ent_size;
3021 iter->ent_size = next_size;
3024 *ent_cpu = next_cpu;
3030 *missing_events = next_lost;
3035 /* Find the next real entry, without updating the iterator itself */
3036 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3037 int *ent_cpu, u64 *ent_ts)
3039 return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3042 /* Find the next real entry, and increment the iterator to the next entry */
3043 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3045 iter->ent = __find_next_entry(iter, &iter->cpu,
3046 &iter->lost_events, &iter->ts);
3049 trace_iterator_increment(iter);
3051 return iter->ent ? iter : NULL;
3054 static void trace_consume(struct trace_iterator *iter)
3056 ring_buffer_consume(iter->trace_buffer->buffer, iter->cpu, &iter->ts,
3057 &iter->lost_events);
3060 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3062 struct trace_iterator *iter = m->private;
3066 WARN_ON_ONCE(iter->leftover);
3070 /* can't go backwards */
3075 ent = trace_find_next_entry_inc(iter);
3079 while (ent && iter->idx < i)
3080 ent = trace_find_next_entry_inc(iter);
3087 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3089 struct ring_buffer_event *event;
3090 struct ring_buffer_iter *buf_iter;
3091 unsigned long entries = 0;
3094 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = 0;
3096 buf_iter = trace_buffer_iter(iter, cpu);
3100 ring_buffer_iter_reset(buf_iter);
3103 * We could have the case with the max latency tracers
3104 * that a reset never took place on a cpu. This is evident
3105 * by the timestamp being before the start of the buffer.
3107 while ((event = ring_buffer_iter_peek(buf_iter, &ts))) {
3108 if (ts >= iter->trace_buffer->time_start)
3111 ring_buffer_read(buf_iter, NULL);
3114 per_cpu_ptr(iter->trace_buffer->data, cpu)->skipped_entries = entries;
3118 * The current tracer is copied to avoid a global locking
3121 static void *s_start(struct seq_file *m, loff_t *pos)
3123 struct trace_iterator *iter = m->private;
3124 struct trace_array *tr = iter->tr;
3125 int cpu_file = iter->cpu_file;
3131 * copy the tracer to avoid using a global lock all around.
3132 * iter->trace is a copy of current_trace, the pointer to the
3133 * name may be used instead of a strcmp(), as iter->trace->name
3134 * will point to the same string as current_trace->name.
3136 mutex_lock(&trace_types_lock);
3137 if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
3138 *iter->trace = *tr->current_trace;
3139 mutex_unlock(&trace_types_lock);
3141 #ifdef CONFIG_TRACER_MAX_TRACE
3142 if (iter->snapshot && iter->trace->use_max_tr)
3143 return ERR_PTR(-EBUSY);
3146 if (!iter->snapshot)
3147 atomic_inc(&trace_record_cmdline_disabled);
3149 if (*pos != iter->pos) {
3154 if (cpu_file == RING_BUFFER_ALL_CPUS) {
3155 for_each_tracing_cpu(cpu)
3156 tracing_iter_reset(iter, cpu);
3158 tracing_iter_reset(iter, cpu_file);
3161 for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3166 * If we overflowed the seq_file before, then we want
3167 * to just reuse the trace_seq buffer again.
3173 p = s_next(m, p, &l);
3177 trace_event_read_lock();
3178 trace_access_lock(cpu_file);
3182 static void s_stop(struct seq_file *m, void *p)
3184 struct trace_iterator *iter = m->private;
3186 #ifdef CONFIG_TRACER_MAX_TRACE
3187 if (iter->snapshot && iter->trace->use_max_tr)
3191 if (!iter->snapshot)
3192 atomic_dec(&trace_record_cmdline_disabled);
3194 trace_access_unlock(iter->cpu_file);
3195 trace_event_read_unlock();
3199 get_total_entries(struct trace_buffer *buf,
3200 unsigned long *total, unsigned long *entries)
3202 unsigned long count;
3208 for_each_tracing_cpu(cpu) {
3209 count = ring_buffer_entries_cpu(buf->buffer, cpu);
3211 * If this buffer has skipped entries, then we hold all
3212 * entries for the trace and we need to ignore the
3213 * ones before the time stamp.
3215 if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3216 count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3217 /* total is the same as the entries */
3221 ring_buffer_overrun_cpu(buf->buffer, cpu);
3226 static void print_lat_help_header(struct seq_file *m)
3228 seq_puts(m, "# _------=> CPU# \n"
3229 "# / _-----=> irqs-off \n"
3230 "# | / _----=> need-resched \n"
3231 "# || / _---=> hardirq/softirq \n"
3232 "# ||| / _--=> preempt-depth \n"
3234 "# cmd pid ||||| time | caller \n"
3235 "# \\ / ||||| \\ | / \n");
3238 static void print_event_info(struct trace_buffer *buf, struct seq_file *m)
3240 unsigned long total;
3241 unsigned long entries;
3243 get_total_entries(buf, &total, &entries);
3244 seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu #P:%d\n",
3245 entries, total, num_online_cpus());
3249 static void print_func_help_header(struct trace_buffer *buf, struct seq_file *m)
3251 print_event_info(buf, m);
3252 seq_puts(m, "# TASK-PID CPU# TIMESTAMP FUNCTION\n"
3256 static void print_func_help_header_irq(struct trace_buffer *buf, struct seq_file *m)
3258 print_event_info(buf, m);
3259 seq_puts(m, "# _-----=> irqs-off\n"
3260 "# / _----=> need-resched\n"
3261 "# | / _---=> hardirq/softirq\n"
3262 "# || / _--=> preempt-depth\n"
3264 "# TASK-PID CPU# |||| TIMESTAMP FUNCTION\n"
3265 "# | | | |||| | |\n");
3269 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
3271 unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
3272 struct trace_buffer *buf = iter->trace_buffer;
3273 struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
3274 struct tracer *type = iter->trace;
3275 unsigned long entries;
3276 unsigned long total;
3277 const char *name = "preemption";
3281 get_total_entries(buf, &total, &entries);
3283 seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
3285 seq_puts(m, "# -----------------------------------"
3286 "---------------------------------\n");
3287 seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
3288 " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
3289 nsecs_to_usecs(data->saved_latency),
3293 #if defined(CONFIG_PREEMPT_NONE)
3295 #elif defined(CONFIG_PREEMPT_VOLUNTARY)
3297 #elif defined(CONFIG_PREEMPT)
3302 /* These are reserved for later use */
3305 seq_printf(m, " #P:%d)\n", num_online_cpus());
3309 seq_puts(m, "# -----------------\n");
3310 seq_printf(m, "# | task: %.16s-%d "
3311 "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
3312 data->comm, data->pid,
3313 from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
3314 data->policy, data->rt_priority);
3315 seq_puts(m, "# -----------------\n");
3317 if (data->critical_start) {
3318 seq_puts(m, "# => started at: ");
3319 seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
3320 trace_print_seq(m, &iter->seq);
3321 seq_puts(m, "\n# => ended at: ");
3322 seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
3323 trace_print_seq(m, &iter->seq);
3324 seq_puts(m, "\n#\n");
3330 static void test_cpu_buff_start(struct trace_iterator *iter)
3332 struct trace_seq *s = &iter->seq;
3333 struct trace_array *tr = iter->tr;
3335 if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
3338 if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
3341 if (cpumask_available(iter->started) &&
3342 cpumask_test_cpu(iter->cpu, iter->started))
3345 if (per_cpu_ptr(iter->trace_buffer->data, iter->cpu)->skipped_entries)
3348 if (cpumask_available(iter->started))
3349 cpumask_set_cpu(iter->cpu, iter->started);
3351 /* Don't print started cpu buffer for the first entry of the trace */
3353 trace_seq_printf(s, "##### CPU %u buffer started ####\n",
3357 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
3359 struct trace_array *tr = iter->tr;
3360 struct trace_seq *s = &iter->seq;
3361 unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
3362 struct trace_entry *entry;
3363 struct trace_event *event;
3367 test_cpu_buff_start(iter);
3369 event = ftrace_find_event(entry->type);
3371 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3372 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3373 trace_print_lat_context(iter);
3375 trace_print_context(iter);
3378 if (trace_seq_has_overflowed(s))
3379 return TRACE_TYPE_PARTIAL_LINE;
3382 return event->funcs->trace(iter, sym_flags, event);
3384 trace_seq_printf(s, "Unknown type %d\n", entry->type);
3386 return trace_handle_return(s);
3389 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
3391 struct trace_array *tr = iter->tr;
3392 struct trace_seq *s = &iter->seq;
3393 struct trace_entry *entry;
3394 struct trace_event *event;
3398 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
3399 trace_seq_printf(s, "%d %d %llu ",
3400 entry->pid, iter->cpu, iter->ts);
3402 if (trace_seq_has_overflowed(s))
3403 return TRACE_TYPE_PARTIAL_LINE;
3405 event = ftrace_find_event(entry->type);
3407 return event->funcs->raw(iter, 0, event);
3409 trace_seq_printf(s, "%d ?\n", entry->type);
3411 return trace_handle_return(s);
3414 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
3416 struct trace_array *tr = iter->tr;
3417 struct trace_seq *s = &iter->seq;
3418 unsigned char newline = '\n';
3419 struct trace_entry *entry;
3420 struct trace_event *event;
3424 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3425 SEQ_PUT_HEX_FIELD(s, entry->pid);
3426 SEQ_PUT_HEX_FIELD(s, iter->cpu);
3427 SEQ_PUT_HEX_FIELD(s, iter->ts);
3428 if (trace_seq_has_overflowed(s))
3429 return TRACE_TYPE_PARTIAL_LINE;
3432 event = ftrace_find_event(entry->type);
3434 enum print_line_t ret = event->funcs->hex(iter, 0, event);
3435 if (ret != TRACE_TYPE_HANDLED)
3439 SEQ_PUT_FIELD(s, newline);
3441 return trace_handle_return(s);
3444 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
3446 struct trace_array *tr = iter->tr;
3447 struct trace_seq *s = &iter->seq;
3448 struct trace_entry *entry;
3449 struct trace_event *event;
3453 if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
3454 SEQ_PUT_FIELD(s, entry->pid);
3455 SEQ_PUT_FIELD(s, iter->cpu);
3456 SEQ_PUT_FIELD(s, iter->ts);
3457 if (trace_seq_has_overflowed(s))
3458 return TRACE_TYPE_PARTIAL_LINE;
3461 event = ftrace_find_event(entry->type);
3462 return event ? event->funcs->binary(iter, 0, event) :
3466 int trace_empty(struct trace_iterator *iter)
3468 struct ring_buffer_iter *buf_iter;
3471 /* If we are looking at one CPU buffer, only check that one */
3472 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
3473 cpu = iter->cpu_file;
3474 buf_iter = trace_buffer_iter(iter, cpu);
3476 if (!ring_buffer_iter_empty(buf_iter))
3479 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3485 for_each_tracing_cpu(cpu) {
3486 buf_iter = trace_buffer_iter(iter, cpu);
3488 if (!ring_buffer_iter_empty(buf_iter))
3491 if (!ring_buffer_empty_cpu(iter->trace_buffer->buffer, cpu))
3499 /* Called with trace_event_read_lock() held. */
3500 enum print_line_t print_trace_line(struct trace_iterator *iter)
3502 struct trace_array *tr = iter->tr;
3503 unsigned long trace_flags = tr->trace_flags;
3504 enum print_line_t ret;
3506 if (iter->lost_events) {
3507 trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
3508 iter->cpu, iter->lost_events);
3509 if (trace_seq_has_overflowed(&iter->seq))
3510 return TRACE_TYPE_PARTIAL_LINE;
3513 if (iter->trace && iter->trace->print_line) {
3514 ret = iter->trace->print_line(iter);
3515 if (ret != TRACE_TYPE_UNHANDLED)
3519 if (iter->ent->type == TRACE_BPUTS &&
3520 trace_flags & TRACE_ITER_PRINTK &&
3521 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3522 return trace_print_bputs_msg_only(iter);
3524 if (iter->ent->type == TRACE_BPRINT &&
3525 trace_flags & TRACE_ITER_PRINTK &&
3526 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3527 return trace_print_bprintk_msg_only(iter);
3529 if (iter->ent->type == TRACE_PRINT &&
3530 trace_flags & TRACE_ITER_PRINTK &&
3531 trace_flags & TRACE_ITER_PRINTK_MSGONLY)
3532 return trace_print_printk_msg_only(iter);
3534 if (trace_flags & TRACE_ITER_BIN)
3535 return print_bin_fmt(iter);
3537 if (trace_flags & TRACE_ITER_HEX)
3538 return print_hex_fmt(iter);
3540 if (trace_flags & TRACE_ITER_RAW)
3541 return print_raw_fmt(iter);
3543 return print_trace_fmt(iter);
3546 void trace_latency_header(struct seq_file *m)
3548 struct trace_iterator *iter = m->private;
3549 struct trace_array *tr = iter->tr;
3551 /* print nothing if the buffers are empty */
3552 if (trace_empty(iter))
3555 if (iter->iter_flags & TRACE_FILE_LAT_FMT)
3556 print_trace_header(m, iter);
3558 if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
3559 print_lat_help_header(m);
3562 void trace_default_header(struct seq_file *m)
3564 struct trace_iterator *iter = m->private;
3565 struct trace_array *tr = iter->tr;
3566 unsigned long trace_flags = tr->trace_flags;
3568 if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
3571 if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
3572 /* print nothing if the buffers are empty */
3573 if (trace_empty(iter))
3575 print_trace_header(m, iter);
3576 if (!(trace_flags & TRACE_ITER_VERBOSE))
3577 print_lat_help_header(m);
3579 if (!(trace_flags & TRACE_ITER_VERBOSE)) {
3580 if (trace_flags & TRACE_ITER_IRQ_INFO)
3581 print_func_help_header_irq(iter->trace_buffer, m);
3583 print_func_help_header(iter->trace_buffer, m);
3588 static void test_ftrace_alive(struct seq_file *m)
3590 if (!ftrace_is_dead())
3592 seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
3593 "# MAY BE MISSING FUNCTION EVENTS\n");
3596 #ifdef CONFIG_TRACER_MAX_TRACE
3597 static void show_snapshot_main_help(struct seq_file *m)
3599 seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
3600 "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3601 "# Takes a snapshot of the main buffer.\n"
3602 "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
3603 "# (Doesn't have to be '2' works with any number that\n"
3604 "# is not a '0' or '1')\n");
3607 static void show_snapshot_percpu_help(struct seq_file *m)
3609 seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
3610 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
3611 seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
3612 "# Takes a snapshot of the main buffer for this cpu.\n");
3614 seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
3615 "# Must use main snapshot file to allocate.\n");
3617 seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
3618 "# (Doesn't have to be '2' works with any number that\n"
3619 "# is not a '0' or '1')\n");
3622 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
3624 if (iter->tr->allocated_snapshot)
3625 seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
3627 seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
3629 seq_puts(m, "# Snapshot commands:\n");
3630 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
3631 show_snapshot_main_help(m);
3633 show_snapshot_percpu_help(m);
3636 /* Should never be called */
3637 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
3640 static int s_show(struct seq_file *m, void *v)
3642 struct trace_iterator *iter = v;
3645 if (iter->ent == NULL) {
3647 seq_printf(m, "# tracer: %s\n", iter->trace->name);
3649 test_ftrace_alive(m);
3651 if (iter->snapshot && trace_empty(iter))
3652 print_snapshot_help(m, iter);
3653 else if (iter->trace && iter->trace->print_header)
3654 iter->trace->print_header(m);
3656 trace_default_header(m);
3658 } else if (iter->leftover) {
3660 * If we filled the seq_file buffer earlier, we
3661 * want to just show it now.
3663 ret = trace_print_seq(m, &iter->seq);
3665 /* ret should this time be zero, but you never know */
3666 iter->leftover = ret;
3669 print_trace_line(iter);
3670 ret = trace_print_seq(m, &iter->seq);
3672 * If we overflow the seq_file buffer, then it will
3673 * ask us for this data again at start up.
3675 * ret is 0 if seq_file write succeeded.
3678 iter->leftover = ret;
3685 * Should be used after trace_array_get(), trace_types_lock
3686 * ensures that i_cdev was already initialized.
3688 static inline int tracing_get_cpu(struct inode *inode)
3690 if (inode->i_cdev) /* See trace_create_cpu_file() */
3691 return (long)inode->i_cdev - 1;
3692 return RING_BUFFER_ALL_CPUS;
3695 static const struct seq_operations tracer_seq_ops = {
3702 static struct trace_iterator *
3703 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
3705 struct trace_array *tr = inode->i_private;
3706 struct trace_iterator *iter;
3709 if (tracing_disabled)
3710 return ERR_PTR(-ENODEV);
3712 iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
3714 return ERR_PTR(-ENOMEM);
3716 iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
3718 if (!iter->buffer_iter)
3722 * We make a copy of the current tracer to avoid concurrent
3723 * changes on it while we are reading.
3725 mutex_lock(&trace_types_lock);
3726 iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
3730 *iter->trace = *tr->current_trace;
3732 if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
3737 #ifdef CONFIG_TRACER_MAX_TRACE
3738 /* Currently only the top directory has a snapshot */
3739 if (tr->current_trace->print_max || snapshot)
3740 iter->trace_buffer = &tr->max_buffer;
3743 iter->trace_buffer = &tr->trace_buffer;
3744 iter->snapshot = snapshot;
3746 iter->cpu_file = tracing_get_cpu(inode);
3747 mutex_init(&iter->mutex);
3749 /* Notify the tracer early; before we stop tracing. */
3750 if (iter->trace && iter->trace->open)
3751 iter->trace->open(iter);
3753 /* Annotate start of buffers if we had overruns */
3754 if (ring_buffer_overruns(iter->trace_buffer->buffer))
3755 iter->iter_flags |= TRACE_FILE_ANNOTATE;
3757 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
3758 if (trace_clocks[tr->clock_id].in_ns)
3759 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
3761 /* stop the trace while dumping if we are not opening "snapshot" */
3762 if (!iter->snapshot)
3763 tracing_stop_tr(tr);
3765 if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
3766 for_each_tracing_cpu(cpu) {
3767 iter->buffer_iter[cpu] =
3768 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3770 ring_buffer_read_prepare_sync();
3771 for_each_tracing_cpu(cpu) {
3772 ring_buffer_read_start(iter->buffer_iter[cpu]);
3773 tracing_iter_reset(iter, cpu);
3776 cpu = iter->cpu_file;
3777 iter->buffer_iter[cpu] =
3778 ring_buffer_read_prepare(iter->trace_buffer->buffer, cpu);
3779 ring_buffer_read_prepare_sync();
3780 ring_buffer_read_start(iter->buffer_iter[cpu]);
3781 tracing_iter_reset(iter, cpu);
3784 mutex_unlock(&trace_types_lock);
3789 mutex_unlock(&trace_types_lock);
3791 kfree(iter->buffer_iter);
3793 seq_release_private(inode, file);
3794 return ERR_PTR(-ENOMEM);
3797 int tracing_open_generic(struct inode *inode, struct file *filp)
3799 if (tracing_disabled)
3802 filp->private_data = inode->i_private;
3806 bool tracing_is_disabled(void)
3808 return (tracing_disabled) ? true: false;
3812 * Open and update trace_array ref count.
3813 * Must have the current trace_array passed to it.
3815 static int tracing_open_generic_tr(struct inode *inode, struct file *filp)
3817 struct trace_array *tr = inode->i_private;
3819 if (tracing_disabled)
3822 if (trace_array_get(tr) < 0)
3825 filp->private_data = inode->i_private;
3830 static int tracing_release(struct inode *inode, struct file *file)
3832 struct trace_array *tr = inode->i_private;
3833 struct seq_file *m = file->private_data;
3834 struct trace_iterator *iter;
3837 if (!(file->f_mode & FMODE_READ)) {
3838 trace_array_put(tr);
3842 /* Writes do not use seq_file */
3844 mutex_lock(&trace_types_lock);
3846 for_each_tracing_cpu(cpu) {
3847 if (iter->buffer_iter[cpu])
3848 ring_buffer_read_finish(iter->buffer_iter[cpu]);
3851 if (iter->trace && iter->trace->close)
3852 iter->trace->close(iter);
3854 if (!iter->snapshot)
3855 /* reenable tracing if it was previously enabled */
3856 tracing_start_tr(tr);
3858 __trace_array_put(tr);
3860 mutex_unlock(&trace_types_lock);
3862 mutex_destroy(&iter->mutex);
3863 free_cpumask_var(iter->started);
3865 kfree(iter->buffer_iter);
3866 seq_release_private(inode, file);
3871 static int tracing_release_generic_tr(struct inode *inode, struct file *file)
3873 struct trace_array *tr = inode->i_private;
3875 trace_array_put(tr);
3879 static int tracing_single_release_tr(struct inode *inode, struct file *file)
3881 struct trace_array *tr = inode->i_private;
3883 trace_array_put(tr);
3885 return single_release(inode, file);
3888 static int tracing_open(struct inode *inode, struct file *file)
3890 struct trace_array *tr = inode->i_private;
3891 struct trace_iterator *iter;
3894 if (trace_array_get(tr) < 0)
3897 /* If this file was open for write, then erase contents */
3898 if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
3899 int cpu = tracing_get_cpu(inode);
3901 if (cpu == RING_BUFFER_ALL_CPUS)
3902 tracing_reset_online_cpus(&tr->trace_buffer);
3904 tracing_reset(&tr->trace_buffer, cpu);
3907 if (file->f_mode & FMODE_READ) {
3908 iter = __tracing_open(inode, file, false);
3910 ret = PTR_ERR(iter);
3911 else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
3912 iter->iter_flags |= TRACE_FILE_LAT_FMT;
3916 trace_array_put(tr);
3922 * Some tracers are not suitable for instance buffers.
3923 * A tracer is always available for the global array (toplevel)
3924 * or if it explicitly states that it is.
3927 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
3929 return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
3932 /* Find the next tracer that this trace array may use */
3933 static struct tracer *
3934 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
3936 while (t && !trace_ok_for_array(t, tr))
3943 t_next(struct seq_file *m, void *v, loff_t *pos)
3945 struct trace_array *tr = m->private;
3946 struct tracer *t = v;
3951 t = get_tracer_for_array(tr, t->next);
3956 static void *t_start(struct seq_file *m, loff_t *pos)
3958 struct trace_array *tr = m->private;
3962 mutex_lock(&trace_types_lock);
3964 t = get_tracer_for_array(tr, trace_types);
3965 for (; t && l < *pos; t = t_next(m, t, &l))
3971 static void t_stop(struct seq_file *m, void *p)
3973 mutex_unlock(&trace_types_lock);
3976 static int t_show(struct seq_file *m, void *v)
3978 struct tracer *t = v;
3983 seq_puts(m, t->name);
3992 static const struct seq_operations show_traces_seq_ops = {
3999 static int show_traces_open(struct inode *inode, struct file *file)
4001 struct trace_array *tr = inode->i_private;
4005 if (tracing_disabled)
4008 ret = seq_open(file, &show_traces_seq_ops);
4012 m = file->private_data;
4019 tracing_write_stub(struct file *filp, const char __user *ubuf,
4020 size_t count, loff_t *ppos)
4025 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4029 if (file->f_mode & FMODE_READ)
4030 ret = seq_lseek(file, offset, whence);
4032 file->f_pos = ret = 0;
4037 static const struct file_operations tracing_fops = {
4038 .open = tracing_open,
4040 .write = tracing_write_stub,
4041 .llseek = tracing_lseek,
4042 .release = tracing_release,
4045 static const struct file_operations show_traces_fops = {
4046 .open = show_traces_open,
4048 .release = seq_release,
4049 .llseek = seq_lseek,
4053 * The tracer itself will not take this lock, but still we want
4054 * to provide a consistent cpumask to user-space:
4056 static DEFINE_MUTEX(tracing_cpumask_update_lock);
4059 * Temporary storage for the character representation of the
4060 * CPU bitmask (and one more byte for the newline):
4062 static char mask_str[NR_CPUS + 1];
4065 tracing_cpumask_read(struct file *filp, char __user *ubuf,
4066 size_t count, loff_t *ppos)
4068 struct trace_array *tr = file_inode(filp)->i_private;
4071 mutex_lock(&tracing_cpumask_update_lock);
4073 len = snprintf(mask_str, count, "%*pb\n",
4074 cpumask_pr_args(tr->tracing_cpumask));
4079 count = simple_read_from_buffer(ubuf, count, ppos, mask_str, NR_CPUS+1);
4082 mutex_unlock(&tracing_cpumask_update_lock);
4088 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
4089 size_t count, loff_t *ppos)
4091 struct trace_array *tr = file_inode(filp)->i_private;
4092 cpumask_var_t tracing_cpumask_new;
4095 if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
4098 err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
4102 mutex_lock(&tracing_cpumask_update_lock);
4104 local_irq_disable();
4105 arch_spin_lock(&tr->max_lock);
4106 for_each_tracing_cpu(cpu) {
4108 * Increase/decrease the disabled counter if we are
4109 * about to flip a bit in the cpumask:
4111 if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4112 !cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4113 atomic_inc(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4114 ring_buffer_record_disable_cpu(tr->trace_buffer.buffer, cpu);
4116 if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
4117 cpumask_test_cpu(cpu, tracing_cpumask_new)) {
4118 atomic_dec(&per_cpu_ptr(tr->trace_buffer.data, cpu)->disabled);
4119 ring_buffer_record_enable_cpu(tr->trace_buffer.buffer, cpu);
4122 arch_spin_unlock(&tr->max_lock);
4125 cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
4127 mutex_unlock(&tracing_cpumask_update_lock);
4128 free_cpumask_var(tracing_cpumask_new);
4133 free_cpumask_var(tracing_cpumask_new);
4138 static const struct file_operations tracing_cpumask_fops = {
4139 .open = tracing_open_generic_tr,
4140 .read = tracing_cpumask_read,
4141 .write = tracing_cpumask_write,
4142 .release = tracing_release_generic_tr,
4143 .llseek = generic_file_llseek,
4146 static int tracing_trace_options_show(struct seq_file *m, void *v)
4148 struct tracer_opt *trace_opts;
4149 struct trace_array *tr = m->private;
4153 mutex_lock(&trace_types_lock);
4154 tracer_flags = tr->current_trace->flags->val;
4155 trace_opts = tr->current_trace->flags->opts;
4157 for (i = 0; trace_options[i]; i++) {
4158 if (tr->trace_flags & (1 << i))
4159 seq_printf(m, "%s\n", trace_options[i]);
4161 seq_printf(m, "no%s\n", trace_options[i]);
4164 for (i = 0; trace_opts[i].name; i++) {
4165 if (tracer_flags & trace_opts[i].bit)
4166 seq_printf(m, "%s\n", trace_opts[i].name);
4168 seq_printf(m, "no%s\n", trace_opts[i].name);
4170 mutex_unlock(&trace_types_lock);
4175 static int __set_tracer_option(struct trace_array *tr,
4176 struct tracer_flags *tracer_flags,
4177 struct tracer_opt *opts, int neg)
4179 struct tracer *trace = tracer_flags->trace;
4182 ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
4187 tracer_flags->val &= ~opts->bit;
4189 tracer_flags->val |= opts->bit;
4193 /* Try to assign a tracer specific option */
4194 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
4196 struct tracer *trace = tr->current_trace;
4197 struct tracer_flags *tracer_flags = trace->flags;
4198 struct tracer_opt *opts = NULL;
4201 for (i = 0; tracer_flags->opts[i].name; i++) {
4202 opts = &tracer_flags->opts[i];
4204 if (strcmp(cmp, opts->name) == 0)
4205 return __set_tracer_option(tr, trace->flags, opts, neg);
4211 /* Some tracers require overwrite to stay enabled */
4212 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
4214 if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
4220 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
4222 /* do nothing if flag is already set */
4223 if (!!(tr->trace_flags & mask) == !!enabled)
4226 /* Give the tracer a chance to approve the change */
4227 if (tr->current_trace->flag_changed)
4228 if (tr->current_trace->flag_changed(tr, mask, !!enabled))
4232 tr->trace_flags |= mask;
4234 tr->trace_flags &= ~mask;
4236 if (mask == TRACE_ITER_RECORD_CMD)
4237 trace_event_enable_cmd_record(enabled);
4239 if (mask == TRACE_ITER_EVENT_FORK)
4240 trace_event_follow_fork(tr, enabled);
4242 if (mask == TRACE_ITER_FUNC_FORK)
4243 ftrace_pid_follow_fork(tr, enabled);
4245 if (mask == TRACE_ITER_OVERWRITE) {
4246 ring_buffer_change_overwrite(tr->trace_buffer.buffer, enabled);
4247 #ifdef CONFIG_TRACER_MAX_TRACE
4248 ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
4252 if (mask == TRACE_ITER_PRINTK) {
4253 trace_printk_start_stop_comm(enabled);
4254 trace_printk_control(enabled);
4260 static int trace_set_options(struct trace_array *tr, char *option)
4266 size_t orig_len = strlen(option);
4268 cmp = strstrip(option);
4270 if (strncmp(cmp, "no", 2) == 0) {
4275 mutex_lock(&trace_types_lock);
4277 for (i = 0; trace_options[i]; i++) {
4278 if (strcmp(cmp, trace_options[i]) == 0) {
4279 ret = set_tracer_flag(tr, 1 << i, !neg);
4284 /* If no option could be set, test the specific tracer options */
4285 if (!trace_options[i])
4286 ret = set_tracer_option(tr, cmp, neg);
4288 mutex_unlock(&trace_types_lock);
4291 * If the first trailing whitespace is replaced with '\0' by strstrip,
4292 * turn it back into a space.
4294 if (orig_len > strlen(option))
4295 option[strlen(option)] = ' ';
4300 static void __init apply_trace_boot_options(void)
4302 char *buf = trace_boot_options_buf;
4306 option = strsep(&buf, ",");
4312 trace_set_options(&global_trace, option);
4314 /* Put back the comma to allow this to be called again */
4321 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
4322 size_t cnt, loff_t *ppos)
4324 struct seq_file *m = filp->private_data;
4325 struct trace_array *tr = m->private;
4329 if (cnt >= sizeof(buf))
4332 if (copy_from_user(buf, ubuf, cnt))
4337 ret = trace_set_options(tr, buf);
4346 static int tracing_trace_options_open(struct inode *inode, struct file *file)
4348 struct trace_array *tr = inode->i_private;
4351 if (tracing_disabled)
4354 if (trace_array_get(tr) < 0)
4357 ret = single_open(file, tracing_trace_options_show, inode->i_private);
4359 trace_array_put(tr);
4364 static const struct file_operations tracing_iter_fops = {
4365 .open = tracing_trace_options_open,
4367 .llseek = seq_lseek,
4368 .release = tracing_single_release_tr,
4369 .write = tracing_trace_options_write,
4372 static const char readme_msg[] =
4373 "tracing mini-HOWTO:\n\n"
4374 "# echo 0 > tracing_on : quick way to disable tracing\n"
4375 "# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
4376 " Important files:\n"
4377 " trace\t\t\t- The static contents of the buffer\n"
4378 "\t\t\t To clear the buffer write into this file: echo > trace\n"
4379 " trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
4380 " current_tracer\t- function and latency tracers\n"
4381 " available_tracers\t- list of configured tracers for current_tracer\n"
4382 " buffer_size_kb\t- view and modify size of per cpu buffer\n"
4383 " buffer_total_size_kb - view total size of all cpu buffers\n\n"
4384 " trace_clock\t\t-change the clock used to order events\n"
4385 " local: Per cpu clock but may not be synced across CPUs\n"
4386 " global: Synced across CPUs but slows tracing down.\n"
4387 " counter: Not a clock, but just an increment\n"
4388 " uptime: Jiffy counter from time of boot\n"
4389 " perf: Same clock that perf events use\n"
4390 #ifdef CONFIG_X86_64
4391 " x86-tsc: TSC cycle counter\n"
4393 "\n trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
4394 "\n trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
4395 " tracing_cpumask\t- Limit which CPUs to trace\n"
4396 " instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
4397 "\t\t\t Remove sub-buffer with rmdir\n"
4398 " trace_options\t\t- Set format or modify how tracing happens\n"
4399 "\t\t\t Disable an option by adding a suffix 'no' to the\n"
4400 "\t\t\t option name\n"
4401 " saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
4402 #ifdef CONFIG_DYNAMIC_FTRACE
4403 "\n available_filter_functions - list of functions that can be filtered on\n"
4404 " set_ftrace_filter\t- echo function name in here to only trace these\n"
4405 "\t\t\t functions\n"
4406 "\t accepts: func_full_name or glob-matching-pattern\n"
4407 "\t modules: Can select a group via module\n"
4408 "\t Format: :mod:<module-name>\n"
4409 "\t example: echo :mod:ext3 > set_ftrace_filter\n"
4410 "\t triggers: a command to perform when function is hit\n"
4411 "\t Format: <function>:<trigger>[:count]\n"
4412 "\t trigger: traceon, traceoff\n"
4413 "\t\t enable_event:<system>:<event>\n"
4414 "\t\t disable_event:<system>:<event>\n"
4415 #ifdef CONFIG_STACKTRACE
4418 #ifdef CONFIG_TRACER_SNAPSHOT
4423 "\t example: echo do_fault:traceoff > set_ftrace_filter\n"
4424 "\t echo do_trap:traceoff:3 > set_ftrace_filter\n"
4425 "\t The first one will disable tracing every time do_fault is hit\n"
4426 "\t The second will disable tracing at most 3 times when do_trap is hit\n"
4427 "\t The first time do trap is hit and it disables tracing, the\n"
4428 "\t counter will decrement to 2. If tracing is already disabled,\n"
4429 "\t the counter will not decrement. It only decrements when the\n"
4430 "\t trigger did work\n"
4431 "\t To remove trigger without count:\n"
4432 "\t echo '!<function>:<trigger> > set_ftrace_filter\n"
4433 "\t To remove trigger with a count:\n"
4434 "\t echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
4435 " set_ftrace_notrace\t- echo function name in here to never trace.\n"
4436 "\t accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
4437 "\t modules: Can select a group via module command :mod:\n"
4438 "\t Does not accept triggers\n"
4439 #endif /* CONFIG_DYNAMIC_FTRACE */
4440 #ifdef CONFIG_FUNCTION_TRACER
4441 " set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
4444 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
4445 " set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
4446 " set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
4447 " max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
4449 #ifdef CONFIG_TRACER_SNAPSHOT
4450 "\n snapshot\t\t- Like 'trace' but shows the content of the static\n"
4451 "\t\t\t snapshot buffer. Read the contents for more\n"
4452 "\t\t\t information\n"
4454 #ifdef CONFIG_STACK_TRACER
4455 " stack_trace\t\t- Shows the max stack trace when active\n"
4456 " stack_max_size\t- Shows current max stack size that was traced\n"
4457 "\t\t\t Write into this file to reset the max size (trigger a\n"
4458 "\t\t\t new trace)\n"
4459 #ifdef CONFIG_DYNAMIC_FTRACE
4460 " stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
4463 #endif /* CONFIG_STACK_TRACER */
4464 #ifdef CONFIG_KPROBE_EVENTS
4465 " kprobe_events\t\t- Add/remove/show the kernel dynamic events\n"
4466 "\t\t\t Write into this file to define/undefine new trace events.\n"
4468 #ifdef CONFIG_UPROBE_EVENTS
4469 " uprobe_events\t\t- Add/remove/show the userspace dynamic events\n"
4470 "\t\t\t Write into this file to define/undefine new trace events.\n"
4472 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
4473 "\t accepts: event-definitions (one definition per line)\n"
4474 "\t Format: p|r[:[<group>/]<event>] <place> [<args>]\n"
4475 "\t -:[<group>/]<event>\n"
4476 #ifdef CONFIG_KPROBE_EVENTS
4477 "\t place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4478 "place (kretprobe): [<module>:]<symbol>[+<offset>]|<memaddr>\n"
4480 #ifdef CONFIG_UPROBE_EVENTS
4481 "\t place: <path>:<offset>\n"
4483 "\t args: <name>=fetcharg[:type]\n"
4484 "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
4485 "\t $stack<index>, $stack, $retval, $comm\n"
4486 "\t type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string,\n"
4487 "\t b<bit-width>@<bit-offset>/<container-size>\n"
4489 " events/\t\t- Directory containing all trace event subsystems:\n"
4490 " enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
4491 " events/<system>/\t- Directory containing all trace events for <system>:\n"
4492 " enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
4494 " filter\t\t- If set, only events passing filter are traced\n"
4495 " events/<system>/<event>/\t- Directory containing control files for\n"
4497 " enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
4498 " filter\t\t- If set, only events passing filter are traced\n"
4499 " trigger\t\t- If set, a command to perform when event is hit\n"
4500 "\t Format: <trigger>[:count][if <filter>]\n"
4501 "\t trigger: traceon, traceoff\n"
4502 "\t enable_event:<system>:<event>\n"
4503 "\t disable_event:<system>:<event>\n"
4504 #ifdef CONFIG_HIST_TRIGGERS
4505 "\t enable_hist:<system>:<event>\n"
4506 "\t disable_hist:<system>:<event>\n"
4508 #ifdef CONFIG_STACKTRACE
4511 #ifdef CONFIG_TRACER_SNAPSHOT
4514 #ifdef CONFIG_HIST_TRIGGERS
4515 "\t\t hist (see below)\n"
4517 "\t example: echo traceoff > events/block/block_unplug/trigger\n"
4518 "\t echo traceoff:3 > events/block/block_unplug/trigger\n"
4519 "\t echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
4520 "\t events/block/block_unplug/trigger\n"
4521 "\t The first disables tracing every time block_unplug is hit.\n"
4522 "\t The second disables tracing the first 3 times block_unplug is hit.\n"
4523 "\t The third enables the kmalloc event the first 3 times block_unplug\n"
4524 "\t is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
4525 "\t Like function triggers, the counter is only decremented if it\n"
4526 "\t enabled or disabled tracing.\n"
4527 "\t To remove a trigger without a count:\n"
4528 "\t echo '!<trigger> > <system>/<event>/trigger\n"
4529 "\t To remove a trigger with a count:\n"
4530 "\t echo '!<trigger>:0 > <system>/<event>/trigger\n"
4531 "\t Filters can be ignored when removing a trigger.\n"
4532 #ifdef CONFIG_HIST_TRIGGERS
4533 " hist trigger\t- If set, event hits are aggregated into a hash table\n"
4534 "\t Format: hist:keys=<field1[,field2,...]>\n"
4535 "\t [:values=<field1[,field2,...]>]\n"
4536 "\t [:sort=<field1[,field2,...]>]\n"
4537 "\t [:size=#entries]\n"
4538 "\t [:pause][:continue][:clear]\n"
4539 "\t [:name=histname1]\n"
4540 "\t [if <filter>]\n\n"
4541 "\t When a matching event is hit, an entry is added to a hash\n"
4542 "\t table using the key(s) and value(s) named, and the value of a\n"
4543 "\t sum called 'hitcount' is incremented. Keys and values\n"
4544 "\t correspond to fields in the event's format description. Keys\n"
4545 "\t can be any field, or the special string 'stacktrace'.\n"
4546 "\t Compound keys consisting of up to two fields can be specified\n"
4547 "\t by the 'keys' keyword. Values must correspond to numeric\n"
4548 "\t fields. Sort keys consisting of up to two fields can be\n"
4549 "\t specified using the 'sort' keyword. The sort direction can\n"
4550 "\t be modified by appending '.descending' or '.ascending' to a\n"
4551 "\t sort field. The 'size' parameter can be used to specify more\n"
4552 "\t or fewer than the default 2048 entries for the hashtable size.\n"
4553 "\t If a hist trigger is given a name using the 'name' parameter,\n"
4554 "\t its histogram data will be shared with other triggers of the\n"
4555 "\t same name, and trigger hits will update this common data.\n\n"
4556 "\t Reading the 'hist' file for the event will dump the hash\n"
4557 "\t table in its entirety to stdout. If there are multiple hist\n"
4558 "\t triggers attached to an event, there will be a table for each\n"
4559 "\t trigger in the output. The table displayed for a named\n"
4560 "\t trigger will be the same as any other instance having the\n"
4561 "\t same name. The default format used to display a given field\n"
4562 "\t can be modified by appending any of the following modifiers\n"
4563 "\t to the field name, as applicable:\n\n"
4564 "\t .hex display a number as a hex value\n"
4565 "\t .sym display an address as a symbol\n"
4566 "\t .sym-offset display an address as a symbol and offset\n"
4567 "\t .execname display a common_pid as a program name\n"
4568 "\t .syscall display a syscall id as a syscall name\n\n"
4569 "\t .log2 display log2 value rather than raw number\n\n"
4570 "\t The 'pause' parameter can be used to pause an existing hist\n"
4571 "\t trigger or to start a hist trigger but not log any events\n"
4572 "\t until told to do so. 'continue' can be used to start or\n"
4573 "\t restart a paused hist trigger.\n\n"
4574 "\t The 'clear' parameter will clear the contents of a running\n"
4575 "\t hist trigger and leave its current paused/active state\n"
4577 "\t The enable_hist and disable_hist triggers can be used to\n"
4578 "\t have one event conditionally start and stop another event's\n"
4579 "\t already-attached hist trigger. The syntax is analagous to\n"
4580 "\t the enable_event and disable_event triggers.\n"
4585 tracing_readme_read(struct file *filp, char __user *ubuf,
4586 size_t cnt, loff_t *ppos)
4588 return simple_read_from_buffer(ubuf, cnt, ppos,
4589 readme_msg, strlen(readme_msg));
4592 static const struct file_operations tracing_readme_fops = {
4593 .open = tracing_open_generic,
4594 .read = tracing_readme_read,
4595 .llseek = generic_file_llseek,
4598 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
4600 unsigned int *ptr = v;
4602 if (*pos || m->count)
4607 for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
4609 if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
4618 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
4624 arch_spin_lock(&trace_cmdline_lock);
4626 v = &savedcmd->map_cmdline_to_pid[0];
4628 v = saved_cmdlines_next(m, v, &l);
4636 static void saved_cmdlines_stop(struct seq_file *m, void *v)
4638 arch_spin_unlock(&trace_cmdline_lock);
4642 static int saved_cmdlines_show(struct seq_file *m, void *v)
4644 char buf[TASK_COMM_LEN];
4645 unsigned int *pid = v;
4647 __trace_find_cmdline(*pid, buf);
4648 seq_printf(m, "%d %s\n", *pid, buf);
4652 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
4653 .start = saved_cmdlines_start,
4654 .next = saved_cmdlines_next,
4655 .stop = saved_cmdlines_stop,
4656 .show = saved_cmdlines_show,
4659 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
4661 if (tracing_disabled)
4664 return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
4667 static const struct file_operations tracing_saved_cmdlines_fops = {
4668 .open = tracing_saved_cmdlines_open,
4670 .llseek = seq_lseek,
4671 .release = seq_release,
4675 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
4676 size_t cnt, loff_t *ppos)
4681 arch_spin_lock(&trace_cmdline_lock);
4682 r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
4683 arch_spin_unlock(&trace_cmdline_lock);
4685 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4688 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
4690 kfree(s->saved_cmdlines);
4691 kfree(s->map_cmdline_to_pid);
4695 static int tracing_resize_saved_cmdlines(unsigned int val)
4697 struct saved_cmdlines_buffer *s, *savedcmd_temp;
4699 s = kmalloc(sizeof(*s), GFP_KERNEL);
4703 if (allocate_cmdlines_buffer(val, s) < 0) {
4708 arch_spin_lock(&trace_cmdline_lock);
4709 savedcmd_temp = savedcmd;
4711 arch_spin_unlock(&trace_cmdline_lock);
4712 free_saved_cmdlines_buffer(savedcmd_temp);
4718 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
4719 size_t cnt, loff_t *ppos)
4724 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
4728 /* must have at least 1 entry or less than PID_MAX_DEFAULT */
4729 if (!val || val > PID_MAX_DEFAULT)
4732 ret = tracing_resize_saved_cmdlines((unsigned int)val);
4741 static const struct file_operations tracing_saved_cmdlines_size_fops = {
4742 .open = tracing_open_generic,
4743 .read = tracing_saved_cmdlines_size_read,
4744 .write = tracing_saved_cmdlines_size_write,
4747 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
4748 static union trace_eval_map_item *
4749 update_eval_map(union trace_eval_map_item *ptr)
4751 if (!ptr->map.eval_string) {
4752 if (ptr->tail.next) {
4753 ptr = ptr->tail.next;
4754 /* Set ptr to the next real item (skip head) */
4762 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
4764 union trace_eval_map_item *ptr = v;
4767 * Paranoid! If ptr points to end, we don't want to increment past it.
4768 * This really should never happen.
4770 ptr = update_eval_map(ptr);
4771 if (WARN_ON_ONCE(!ptr))
4778 ptr = update_eval_map(ptr);
4783 static void *eval_map_start(struct seq_file *m, loff_t *pos)
4785 union trace_eval_map_item *v;
4788 mutex_lock(&trace_eval_mutex);
4790 v = trace_eval_maps;
4794 while (v && l < *pos) {
4795 v = eval_map_next(m, v, &l);
4801 static void eval_map_stop(struct seq_file *m, void *v)
4803 mutex_unlock(&trace_eval_mutex);
4806 static int eval_map_show(struct seq_file *m, void *v)
4808 union trace_eval_map_item *ptr = v;
4810 seq_printf(m, "%s %ld (%s)\n",
4811 ptr->map.eval_string, ptr->map.eval_value,
4817 static const struct seq_operations tracing_eval_map_seq_ops = {
4818 .start = eval_map_start,
4819 .next = eval_map_next,
4820 .stop = eval_map_stop,
4821 .show = eval_map_show,
4824 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
4826 if (tracing_disabled)
4829 return seq_open(filp, &tracing_eval_map_seq_ops);
4832 static const struct file_operations tracing_eval_map_fops = {
4833 .open = tracing_eval_map_open,
4835 .llseek = seq_lseek,
4836 .release = seq_release,
4839 static inline union trace_eval_map_item *
4840 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
4842 /* Return tail of array given the head */
4843 return ptr + ptr->head.length + 1;
4847 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
4850 struct trace_eval_map **stop;
4851 struct trace_eval_map **map;
4852 union trace_eval_map_item *map_array;
4853 union trace_eval_map_item *ptr;
4858 * The trace_eval_maps contains the map plus a head and tail item,
4859 * where the head holds the module and length of array, and the
4860 * tail holds a pointer to the next list.
4862 map_array = kmalloc(sizeof(*map_array) * (len + 2), GFP_KERNEL);
4864 pr_warn("Unable to allocate trace eval mapping\n");
4868 mutex_lock(&trace_eval_mutex);
4870 if (!trace_eval_maps)
4871 trace_eval_maps = map_array;
4873 ptr = trace_eval_maps;
4875 ptr = trace_eval_jmp_to_tail(ptr);
4876 if (!ptr->tail.next)
4878 ptr = ptr->tail.next;
4881 ptr->tail.next = map_array;
4883 map_array->head.mod = mod;
4884 map_array->head.length = len;
4887 for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
4888 map_array->map = **map;
4891 memset(map_array, 0, sizeof(*map_array));
4893 mutex_unlock(&trace_eval_mutex);
4896 static void trace_create_eval_file(struct dentry *d_tracer)
4898 trace_create_file("eval_map", 0444, d_tracer,
4899 NULL, &tracing_eval_map_fops);
4902 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
4903 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
4904 static inline void trace_insert_eval_map_file(struct module *mod,
4905 struct trace_eval_map **start, int len) { }
4906 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
4908 static void trace_insert_eval_map(struct module *mod,
4909 struct trace_eval_map **start, int len)
4911 struct trace_eval_map **map;
4918 trace_event_eval_update(map, len);
4920 trace_insert_eval_map_file(mod, start, len);
4924 tracing_set_trace_read(struct file *filp, char __user *ubuf,
4925 size_t cnt, loff_t *ppos)
4927 struct trace_array *tr = filp->private_data;
4928 char buf[MAX_TRACER_SIZE+2];
4931 mutex_lock(&trace_types_lock);
4932 r = sprintf(buf, "%s\n", tr->current_trace->name);
4933 mutex_unlock(&trace_types_lock);
4935 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
4938 int tracer_init(struct tracer *t, struct trace_array *tr)
4940 tracing_reset_online_cpus(&tr->trace_buffer);
4944 static void set_buffer_entries(struct trace_buffer *buf, unsigned long val)
4948 for_each_tracing_cpu(cpu)
4949 per_cpu_ptr(buf->data, cpu)->entries = val;
4952 #ifdef CONFIG_TRACER_MAX_TRACE
4953 /* resize @tr's buffer to the size of @size_tr's entries */
4954 static int resize_buffer_duplicate_size(struct trace_buffer *trace_buf,
4955 struct trace_buffer *size_buf, int cpu_id)
4959 if (cpu_id == RING_BUFFER_ALL_CPUS) {
4960 for_each_tracing_cpu(cpu) {
4961 ret = ring_buffer_resize(trace_buf->buffer,
4962 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
4965 per_cpu_ptr(trace_buf->data, cpu)->entries =
4966 per_cpu_ptr(size_buf->data, cpu)->entries;
4969 ret = ring_buffer_resize(trace_buf->buffer,
4970 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
4972 per_cpu_ptr(trace_buf->data, cpu_id)->entries =
4973 per_cpu_ptr(size_buf->data, cpu_id)->entries;
4978 #endif /* CONFIG_TRACER_MAX_TRACE */
4980 static int __tracing_resize_ring_buffer(struct trace_array *tr,
4981 unsigned long size, int cpu)
4986 * If kernel or user changes the size of the ring buffer
4987 * we use the size that was given, and we can forget about
4988 * expanding it later.
4990 ring_buffer_expanded = true;
4992 /* May be called before buffers are initialized */
4993 if (!tr->trace_buffer.buffer)
4996 ret = ring_buffer_resize(tr->trace_buffer.buffer, size, cpu);
5000 #ifdef CONFIG_TRACER_MAX_TRACE
5001 if (!(tr->flags & TRACE_ARRAY_FL_GLOBAL) ||
5002 !tr->current_trace->use_max_tr)
5005 ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5007 int r = resize_buffer_duplicate_size(&tr->trace_buffer,
5008 &tr->trace_buffer, cpu);
5011 * AARGH! We are left with different
5012 * size max buffer!!!!
5013 * The max buffer is our "snapshot" buffer.
5014 * When a tracer needs a snapshot (one of the
5015 * latency tracers), it swaps the max buffer
5016 * with the saved snap shot. We succeeded to
5017 * update the size of the main buffer, but failed to
5018 * update the size of the max buffer. But when we tried
5019 * to reset the main buffer to the original size, we
5020 * failed there too. This is very unlikely to
5021 * happen, but if it does, warn and kill all
5025 tracing_disabled = 1;
5030 if (cpu == RING_BUFFER_ALL_CPUS)
5031 set_buffer_entries(&tr->max_buffer, size);
5033 per_cpu_ptr(tr->max_buffer.data, cpu)->entries = size;
5036 #endif /* CONFIG_TRACER_MAX_TRACE */
5038 if (cpu == RING_BUFFER_ALL_CPUS)
5039 set_buffer_entries(&tr->trace_buffer, size);
5041 per_cpu_ptr(tr->trace_buffer.data, cpu)->entries = size;
5046 static ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5047 unsigned long size, int cpu_id)
5051 mutex_lock(&trace_types_lock);
5053 if (cpu_id != RING_BUFFER_ALL_CPUS) {
5054 /* make sure, this cpu is enabled in the mask */
5055 if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
5061 ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
5066 mutex_unlock(&trace_types_lock);
5073 * tracing_update_buffers - used by tracing facility to expand ring buffers
5075 * To save on memory when the tracing is never used on a system with it
5076 * configured in. The ring buffers are set to a minimum size. But once
5077 * a user starts to use the tracing facility, then they need to grow
5078 * to their default size.
5080 * This function is to be called when a tracer is about to be used.
5082 int tracing_update_buffers(void)
5086 mutex_lock(&trace_types_lock);
5087 if (!ring_buffer_expanded)
5088 ret = __tracing_resize_ring_buffer(&global_trace, trace_buf_size,
5089 RING_BUFFER_ALL_CPUS);
5090 mutex_unlock(&trace_types_lock);
5095 struct trace_option_dentry;
5098 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
5101 * Used to clear out the tracer before deletion of an instance.
5102 * Must have trace_types_lock held.
5104 static void tracing_set_nop(struct trace_array *tr)
5106 if (tr->current_trace == &nop_trace)
5109 tr->current_trace->enabled--;
5111 if (tr->current_trace->reset)
5112 tr->current_trace->reset(tr);
5114 tr->current_trace = &nop_trace;
5117 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
5119 /* Only enable if the directory has been created already. */
5123 create_trace_option_files(tr, t);
5126 static int tracing_set_tracer(struct trace_array *tr, const char *buf)
5129 #ifdef CONFIG_TRACER_MAX_TRACE
5134 mutex_lock(&trace_types_lock);
5136 if (!ring_buffer_expanded) {
5137 ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
5138 RING_BUFFER_ALL_CPUS);
5144 for (t = trace_types; t; t = t->next) {
5145 if (strcmp(t->name, buf) == 0)
5152 if (t == tr->current_trace)
5155 /* Some tracers are only allowed for the top level buffer */
5156 if (!trace_ok_for_array(t, tr)) {
5161 /* If trace pipe files are being read, we can't change the tracer */
5162 if (tr->current_trace->ref) {
5167 trace_branch_disable();
5169 tr->current_trace->enabled--;
5171 if (tr->current_trace->reset)
5172 tr->current_trace->reset(tr);
5174 /* Current trace needs to be nop_trace before synchronize_sched */
5175 tr->current_trace = &nop_trace;
5177 #ifdef CONFIG_TRACER_MAX_TRACE
5178 had_max_tr = tr->allocated_snapshot;
5180 if (had_max_tr && !t->use_max_tr) {
5182 * We need to make sure that the update_max_tr sees that
5183 * current_trace changed to nop_trace to keep it from
5184 * swapping the buffers after we resize it.
5185 * The update_max_tr is called from interrupts disabled
5186 * so a synchronized_sched() is sufficient.
5188 synchronize_sched();
5193 #ifdef CONFIG_TRACER_MAX_TRACE
5194 if (t->use_max_tr && !had_max_tr) {
5195 ret = alloc_snapshot(tr);
5202 ret = tracer_init(t, tr);
5207 tr->current_trace = t;
5208 tr->current_trace->enabled++;
5209 trace_branch_enable(tr);
5211 mutex_unlock(&trace_types_lock);
5217 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
5218 size_t cnt, loff_t *ppos)
5220 struct trace_array *tr = filp->private_data;
5221 char buf[MAX_TRACER_SIZE+1];
5228 if (cnt > MAX_TRACER_SIZE)
5229 cnt = MAX_TRACER_SIZE;
5231 if (copy_from_user(buf, ubuf, cnt))
5236 /* strip ending whitespace. */
5237 for (i = cnt - 1; i > 0 && isspace(buf[i]); i--)
5240 err = tracing_set_tracer(tr, buf);
5250 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
5251 size_t cnt, loff_t *ppos)
5256 r = snprintf(buf, sizeof(buf), "%ld\n",
5257 *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
5258 if (r > sizeof(buf))
5260 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5264 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
5265 size_t cnt, loff_t *ppos)
5270 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5280 tracing_thresh_read(struct file *filp, char __user *ubuf,
5281 size_t cnt, loff_t *ppos)
5283 return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
5287 tracing_thresh_write(struct file *filp, const char __user *ubuf,
5288 size_t cnt, loff_t *ppos)
5290 struct trace_array *tr = filp->private_data;
5293 mutex_lock(&trace_types_lock);
5294 ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
5298 if (tr->current_trace->update_thresh) {
5299 ret = tr->current_trace->update_thresh(tr);
5306 mutex_unlock(&trace_types_lock);
5311 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
5314 tracing_max_lat_read(struct file *filp, char __user *ubuf,
5315 size_t cnt, loff_t *ppos)
5317 return tracing_nsecs_read(filp->private_data, ubuf, cnt, ppos);
5321 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
5322 size_t cnt, loff_t *ppos)
5324 return tracing_nsecs_write(filp->private_data, ubuf, cnt, ppos);
5329 static int tracing_open_pipe(struct inode *inode, struct file *filp)
5331 struct trace_array *tr = inode->i_private;
5332 struct trace_iterator *iter;
5335 if (tracing_disabled)
5338 if (trace_array_get(tr) < 0)
5341 mutex_lock(&trace_types_lock);
5343 /* create a buffer to store the information to pass to userspace */
5344 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
5347 __trace_array_put(tr);
5351 trace_seq_init(&iter->seq);
5352 iter->trace = tr->current_trace;
5354 if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
5359 /* trace pipe does not show start of buffer */
5360 cpumask_setall(iter->started);
5362 if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5363 iter->iter_flags |= TRACE_FILE_LAT_FMT;
5365 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
5366 if (trace_clocks[tr->clock_id].in_ns)
5367 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
5370 iter->trace_buffer = &tr->trace_buffer;
5371 iter->cpu_file = tracing_get_cpu(inode);
5372 mutex_init(&iter->mutex);
5373 filp->private_data = iter;
5375 if (iter->trace->pipe_open)
5376 iter->trace->pipe_open(iter);
5378 nonseekable_open(inode, filp);
5380 tr->current_trace->ref++;
5382 mutex_unlock(&trace_types_lock);
5388 __trace_array_put(tr);
5389 mutex_unlock(&trace_types_lock);
5393 static int tracing_release_pipe(struct inode *inode, struct file *file)
5395 struct trace_iterator *iter = file->private_data;
5396 struct trace_array *tr = inode->i_private;
5398 mutex_lock(&trace_types_lock);
5400 tr->current_trace->ref--;
5402 if (iter->trace->pipe_close)
5403 iter->trace->pipe_close(iter);
5405 mutex_unlock(&trace_types_lock);
5407 free_cpumask_var(iter->started);
5408 mutex_destroy(&iter->mutex);
5411 trace_array_put(tr);
5417 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
5419 struct trace_array *tr = iter->tr;
5421 /* Iterators are static, they should be filled or empty */
5422 if (trace_buffer_iter(iter, iter->cpu_file))
5423 return POLLIN | POLLRDNORM;
5425 if (tr->trace_flags & TRACE_ITER_BLOCK)
5427 * Always select as readable when in blocking mode
5429 return POLLIN | POLLRDNORM;
5431 return ring_buffer_poll_wait(iter->trace_buffer->buffer, iter->cpu_file,
5436 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
5438 struct trace_iterator *iter = filp->private_data;
5440 return trace_poll(iter, filp, poll_table);
5443 /* Must be called with iter->mutex held. */
5444 static int tracing_wait_pipe(struct file *filp)
5446 struct trace_iterator *iter = filp->private_data;
5449 while (trace_empty(iter)) {
5451 if ((filp->f_flags & O_NONBLOCK)) {
5456 * We block until we read something and tracing is disabled.
5457 * We still block if tracing is disabled, but we have never
5458 * read anything. This allows a user to cat this file, and
5459 * then enable tracing. But after we have read something,
5460 * we give an EOF when tracing is again disabled.
5462 * iter->pos will be 0 if we haven't read anything.
5464 if (!tracing_is_on() && iter->pos)
5467 mutex_unlock(&iter->mutex);
5469 ret = wait_on_pipe(iter, false);
5471 mutex_lock(&iter->mutex);
5484 tracing_read_pipe(struct file *filp, char __user *ubuf,
5485 size_t cnt, loff_t *ppos)
5487 struct trace_iterator *iter = filp->private_data;
5491 * Avoid more than one consumer on a single file descriptor
5492 * This is just a matter of traces coherency, the ring buffer itself
5495 mutex_lock(&iter->mutex);
5497 /* return any leftover data */
5498 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5502 trace_seq_init(&iter->seq);
5504 if (iter->trace->read) {
5505 sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
5511 sret = tracing_wait_pipe(filp);
5515 /* stop when tracing is finished */
5516 if (trace_empty(iter)) {
5521 if (cnt >= PAGE_SIZE)
5522 cnt = PAGE_SIZE - 1;
5524 /* reset all but tr, trace, and overruns */
5525 memset(&iter->seq, 0,
5526 sizeof(struct trace_iterator) -
5527 offsetof(struct trace_iterator, seq));
5528 cpumask_clear(iter->started);
5531 trace_event_read_lock();
5532 trace_access_lock(iter->cpu_file);
5533 while (trace_find_next_entry_inc(iter) != NULL) {
5534 enum print_line_t ret;
5535 int save_len = iter->seq.seq.len;
5537 ret = print_trace_line(iter);
5538 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5539 /* don't print partial lines */
5540 iter->seq.seq.len = save_len;
5543 if (ret != TRACE_TYPE_NO_CONSUME)
5544 trace_consume(iter);
5546 if (trace_seq_used(&iter->seq) >= cnt)
5550 * Setting the full flag means we reached the trace_seq buffer
5551 * size and we should leave by partial output condition above.
5552 * One of the trace_seq_* functions is not used properly.
5554 WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
5557 trace_access_unlock(iter->cpu_file);
5558 trace_event_read_unlock();
5560 /* Now copy what we have to the user */
5561 sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
5562 if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
5563 trace_seq_init(&iter->seq);
5566 * If there was nothing to send to user, in spite of consuming trace
5567 * entries, go back to wait for more entries.
5573 mutex_unlock(&iter->mutex);
5578 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
5581 __free_page(spd->pages[idx]);
5584 static const struct pipe_buf_operations tracing_pipe_buf_ops = {
5586 .confirm = generic_pipe_buf_confirm,
5587 .release = generic_pipe_buf_release,
5588 .steal = generic_pipe_buf_steal,
5589 .get = generic_pipe_buf_get,
5593 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
5599 /* Seq buffer is page-sized, exactly what we need. */
5601 save_len = iter->seq.seq.len;
5602 ret = print_trace_line(iter);
5604 if (trace_seq_has_overflowed(&iter->seq)) {
5605 iter->seq.seq.len = save_len;
5610 * This should not be hit, because it should only
5611 * be set if the iter->seq overflowed. But check it
5612 * anyway to be safe.
5614 if (ret == TRACE_TYPE_PARTIAL_LINE) {
5615 iter->seq.seq.len = save_len;
5619 count = trace_seq_used(&iter->seq) - save_len;
5622 iter->seq.seq.len = save_len;
5626 if (ret != TRACE_TYPE_NO_CONSUME)
5627 trace_consume(iter);
5629 if (!trace_find_next_entry_inc(iter)) {
5639 static ssize_t tracing_splice_read_pipe(struct file *filp,
5641 struct pipe_inode_info *pipe,
5645 struct page *pages_def[PIPE_DEF_BUFFERS];
5646 struct partial_page partial_def[PIPE_DEF_BUFFERS];
5647 struct trace_iterator *iter = filp->private_data;
5648 struct splice_pipe_desc spd = {
5650 .partial = partial_def,
5651 .nr_pages = 0, /* This gets updated below. */
5652 .nr_pages_max = PIPE_DEF_BUFFERS,
5653 .ops = &tracing_pipe_buf_ops,
5654 .spd_release = tracing_spd_release_pipe,
5660 if (splice_grow_spd(pipe, &spd))
5663 mutex_lock(&iter->mutex);
5665 if (iter->trace->splice_read) {
5666 ret = iter->trace->splice_read(iter, filp,
5667 ppos, pipe, len, flags);
5672 ret = tracing_wait_pipe(filp);
5676 if (!iter->ent && !trace_find_next_entry_inc(iter)) {
5681 trace_event_read_lock();
5682 trace_access_lock(iter->cpu_file);
5684 /* Fill as many pages as possible. */
5685 for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
5686 spd.pages[i] = alloc_page(GFP_KERNEL);
5690 rem = tracing_fill_pipe_page(rem, iter);
5692 /* Copy the data into the page, so we can start over. */
5693 ret = trace_seq_to_buffer(&iter->seq,
5694 page_address(spd.pages[i]),
5695 trace_seq_used(&iter->seq));
5697 __free_page(spd.pages[i]);
5700 spd.partial[i].offset = 0;
5701 spd.partial[i].len = trace_seq_used(&iter->seq);
5703 trace_seq_init(&iter->seq);
5706 trace_access_unlock(iter->cpu_file);
5707 trace_event_read_unlock();
5708 mutex_unlock(&iter->mutex);
5713 ret = splice_to_pipe(pipe, &spd);
5717 splice_shrink_spd(&spd);
5721 mutex_unlock(&iter->mutex);
5726 tracing_entries_read(struct file *filp, char __user *ubuf,
5727 size_t cnt, loff_t *ppos)
5729 struct inode *inode = file_inode(filp);
5730 struct trace_array *tr = inode->i_private;
5731 int cpu = tracing_get_cpu(inode);
5736 mutex_lock(&trace_types_lock);
5738 if (cpu == RING_BUFFER_ALL_CPUS) {
5739 int cpu, buf_size_same;
5744 /* check if all cpu sizes are same */
5745 for_each_tracing_cpu(cpu) {
5746 /* fill in the size from first enabled cpu */
5748 size = per_cpu_ptr(tr->trace_buffer.data, cpu)->entries;
5749 if (size != per_cpu_ptr(tr->trace_buffer.data, cpu)->entries) {
5755 if (buf_size_same) {
5756 if (!ring_buffer_expanded)
5757 r = sprintf(buf, "%lu (expanded: %lu)\n",
5759 trace_buf_size >> 10);
5761 r = sprintf(buf, "%lu\n", size >> 10);
5763 r = sprintf(buf, "X\n");
5765 r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10);
5767 mutex_unlock(&trace_types_lock);
5769 ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5774 tracing_entries_write(struct file *filp, const char __user *ubuf,
5775 size_t cnt, loff_t *ppos)
5777 struct inode *inode = file_inode(filp);
5778 struct trace_array *tr = inode->i_private;
5782 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
5786 /* must have at least 1 entry */
5790 /* value is in KB */
5792 ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
5802 tracing_total_entries_read(struct file *filp, char __user *ubuf,
5803 size_t cnt, loff_t *ppos)
5805 struct trace_array *tr = filp->private_data;
5808 unsigned long size = 0, expanded_size = 0;
5810 mutex_lock(&trace_types_lock);
5811 for_each_tracing_cpu(cpu) {
5812 size += per_cpu_ptr(tr->trace_buffer.data, cpu)->entries >> 10;
5813 if (!ring_buffer_expanded)
5814 expanded_size += trace_buf_size >> 10;
5816 if (ring_buffer_expanded)
5817 r = sprintf(buf, "%lu\n", size);
5819 r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
5820 mutex_unlock(&trace_types_lock);
5822 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5826 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
5827 size_t cnt, loff_t *ppos)
5830 * There is no need to read what the user has written, this function
5831 * is just to make sure that there is no error when "echo" is used
5840 tracing_free_buffer_release(struct inode *inode, struct file *filp)
5842 struct trace_array *tr = inode->i_private;
5844 /* disable tracing ? */
5845 if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
5846 tracer_tracing_off(tr);
5847 /* resize the ring buffer to 0 */
5848 tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
5850 trace_array_put(tr);
5856 tracing_mark_write(struct file *filp, const char __user *ubuf,
5857 size_t cnt, loff_t *fpos)
5859 struct trace_array *tr = filp->private_data;
5860 struct ring_buffer_event *event;
5861 struct ring_buffer *buffer;
5862 struct print_entry *entry;
5863 unsigned long irq_flags;
5864 const char faulted[] = "<faulted>";
5869 /* Used in tracing_mark_raw_write() as well */
5870 #define FAULTED_SIZE (sizeof(faulted) - 1) /* '\0' is already accounted for */
5872 if (tracing_disabled)
5875 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5878 if (cnt > TRACE_BUF_SIZE)
5879 cnt = TRACE_BUF_SIZE;
5881 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5883 local_save_flags(irq_flags);
5884 size = sizeof(*entry) + cnt + 2; /* add '\0' and possible '\n' */
5886 /* If less than "<faulted>", then make sure we can still add that */
5887 if (cnt < FAULTED_SIZE)
5888 size += FAULTED_SIZE - cnt;
5890 buffer = tr->trace_buffer.buffer;
5891 event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
5892 irq_flags, preempt_count());
5893 if (unlikely(!event))
5894 /* Ring buffer disabled, return as if not open for write */
5897 entry = ring_buffer_event_data(event);
5898 entry->ip = _THIS_IP_;
5900 len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
5902 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5909 if (entry->buf[cnt - 1] != '\n') {
5910 entry->buf[cnt] = '\n';
5911 entry->buf[cnt + 1] = '\0';
5913 entry->buf[cnt] = '\0';
5915 __buffer_unlock_commit(buffer, event);
5923 /* Limit it for now to 3K (including tag) */
5924 #define RAW_DATA_MAX_SIZE (1024*3)
5927 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
5928 size_t cnt, loff_t *fpos)
5930 struct trace_array *tr = filp->private_data;
5931 struct ring_buffer_event *event;
5932 struct ring_buffer *buffer;
5933 struct raw_data_entry *entry;
5934 const char faulted[] = "<faulted>";
5935 unsigned long irq_flags;
5940 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
5942 if (tracing_disabled)
5945 if (!(tr->trace_flags & TRACE_ITER_MARKERS))
5948 /* The marker must at least have a tag id */
5949 if (cnt < sizeof(unsigned int) || cnt > RAW_DATA_MAX_SIZE)
5952 if (cnt > TRACE_BUF_SIZE)
5953 cnt = TRACE_BUF_SIZE;
5955 BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
5957 local_save_flags(irq_flags);
5958 size = sizeof(*entry) + cnt;
5959 if (cnt < FAULT_SIZE_ID)
5960 size += FAULT_SIZE_ID - cnt;
5962 buffer = tr->trace_buffer.buffer;
5963 event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
5964 irq_flags, preempt_count());
5966 /* Ring buffer disabled, return as if not open for write */
5969 entry = ring_buffer_event_data(event);
5971 len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
5974 memcpy(&entry->buf, faulted, FAULTED_SIZE);
5979 __buffer_unlock_commit(buffer, event);
5987 static int tracing_clock_show(struct seq_file *m, void *v)
5989 struct trace_array *tr = m->private;
5992 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
5994 "%s%s%s%s", i ? " " : "",
5995 i == tr->clock_id ? "[" : "", trace_clocks[i].name,
5996 i == tr->clock_id ? "]" : "");
6002 static int tracing_set_clock(struct trace_array *tr, const char *clockstr)
6006 for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
6007 if (strcmp(trace_clocks[i].name, clockstr) == 0)
6010 if (i == ARRAY_SIZE(trace_clocks))
6013 mutex_lock(&trace_types_lock);
6017 ring_buffer_set_clock(tr->trace_buffer.buffer, trace_clocks[i].func);
6020 * New clock may not be consistent with the previous clock.
6021 * Reset the buffer so that it doesn't have incomparable timestamps.
6023 tracing_reset_online_cpus(&tr->trace_buffer);
6025 #ifdef CONFIG_TRACER_MAX_TRACE
6026 if (tr->flags & TRACE_ARRAY_FL_GLOBAL && tr->max_buffer.buffer)
6027 ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
6028 tracing_reset_online_cpus(&tr->max_buffer);
6031 mutex_unlock(&trace_types_lock);
6036 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
6037 size_t cnt, loff_t *fpos)
6039 struct seq_file *m = filp->private_data;
6040 struct trace_array *tr = m->private;
6042 const char *clockstr;
6045 if (cnt >= sizeof(buf))
6048 if (copy_from_user(buf, ubuf, cnt))
6053 clockstr = strstrip(buf);
6055 ret = tracing_set_clock(tr, clockstr);
6064 static int tracing_clock_open(struct inode *inode, struct file *file)
6066 struct trace_array *tr = inode->i_private;
6069 if (tracing_disabled)
6072 if (trace_array_get(tr))
6075 ret = single_open(file, tracing_clock_show, inode->i_private);
6077 trace_array_put(tr);
6082 struct ftrace_buffer_info {
6083 struct trace_iterator iter;
6085 unsigned int spare_cpu;
6089 #ifdef CONFIG_TRACER_SNAPSHOT
6090 static int tracing_snapshot_open(struct inode *inode, struct file *file)
6092 struct trace_array *tr = inode->i_private;
6093 struct trace_iterator *iter;
6097 if (trace_array_get(tr) < 0)
6100 if (file->f_mode & FMODE_READ) {
6101 iter = __tracing_open(inode, file, true);
6103 ret = PTR_ERR(iter);
6105 /* Writes still need the seq_file to hold the private data */
6107 m = kzalloc(sizeof(*m), GFP_KERNEL);
6110 iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6118 iter->trace_buffer = &tr->max_buffer;
6119 iter->cpu_file = tracing_get_cpu(inode);
6121 file->private_data = m;
6125 trace_array_put(tr);
6131 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
6134 struct seq_file *m = filp->private_data;
6135 struct trace_iterator *iter = m->private;
6136 struct trace_array *tr = iter->tr;
6140 ret = tracing_update_buffers();
6144 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6148 mutex_lock(&trace_types_lock);
6150 if (tr->current_trace->use_max_tr) {
6157 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6161 if (tr->allocated_snapshot)
6165 /* Only allow per-cpu swap if the ring buffer supports it */
6166 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
6167 if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
6172 if (!tr->allocated_snapshot) {
6173 ret = alloc_snapshot(tr);
6177 local_irq_disable();
6178 /* Now, we're going to swap */
6179 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6180 update_max_tr(tr, current, smp_processor_id());
6182 update_max_tr_single(tr, current, iter->cpu_file);
6186 if (tr->allocated_snapshot) {
6187 if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
6188 tracing_reset_online_cpus(&tr->max_buffer);
6190 tracing_reset(&tr->max_buffer, iter->cpu_file);
6200 mutex_unlock(&trace_types_lock);
6204 static int tracing_snapshot_release(struct inode *inode, struct file *file)
6206 struct seq_file *m = file->private_data;
6209 ret = tracing_release(inode, file);
6211 if (file->f_mode & FMODE_READ)
6214 /* If write only, the seq_file is just a stub */
6222 static int tracing_buffers_open(struct inode *inode, struct file *filp);
6223 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
6224 size_t count, loff_t *ppos);
6225 static int tracing_buffers_release(struct inode *inode, struct file *file);
6226 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6227 struct pipe_inode_info *pipe, size_t len, unsigned int flags);
6229 static int snapshot_raw_open(struct inode *inode, struct file *filp)
6231 struct ftrace_buffer_info *info;
6234 ret = tracing_buffers_open(inode, filp);
6238 info = filp->private_data;
6240 if (info->iter.trace->use_max_tr) {
6241 tracing_buffers_release(inode, filp);
6245 info->iter.snapshot = true;
6246 info->iter.trace_buffer = &info->iter.tr->max_buffer;
6251 #endif /* CONFIG_TRACER_SNAPSHOT */
6254 static const struct file_operations tracing_thresh_fops = {
6255 .open = tracing_open_generic,
6256 .read = tracing_thresh_read,
6257 .write = tracing_thresh_write,
6258 .llseek = generic_file_llseek,
6261 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
6262 static const struct file_operations tracing_max_lat_fops = {
6263 .open = tracing_open_generic,
6264 .read = tracing_max_lat_read,
6265 .write = tracing_max_lat_write,
6266 .llseek = generic_file_llseek,
6270 static const struct file_operations set_tracer_fops = {
6271 .open = tracing_open_generic,
6272 .read = tracing_set_trace_read,
6273 .write = tracing_set_trace_write,
6274 .llseek = generic_file_llseek,
6277 static const struct file_operations tracing_pipe_fops = {
6278 .open = tracing_open_pipe,
6279 .poll = tracing_poll_pipe,
6280 .read = tracing_read_pipe,
6281 .splice_read = tracing_splice_read_pipe,
6282 .release = tracing_release_pipe,
6283 .llseek = no_llseek,
6286 static const struct file_operations tracing_entries_fops = {
6287 .open = tracing_open_generic_tr,
6288 .read = tracing_entries_read,
6289 .write = tracing_entries_write,
6290 .llseek = generic_file_llseek,
6291 .release = tracing_release_generic_tr,
6294 static const struct file_operations tracing_total_entries_fops = {
6295 .open = tracing_open_generic_tr,
6296 .read = tracing_total_entries_read,
6297 .llseek = generic_file_llseek,
6298 .release = tracing_release_generic_tr,
6301 static const struct file_operations tracing_free_buffer_fops = {
6302 .open = tracing_open_generic_tr,
6303 .write = tracing_free_buffer_write,
6304 .release = tracing_free_buffer_release,
6307 static const struct file_operations tracing_mark_fops = {
6308 .open = tracing_open_generic_tr,
6309 .write = tracing_mark_write,
6310 .llseek = generic_file_llseek,
6311 .release = tracing_release_generic_tr,
6314 static const struct file_operations tracing_mark_raw_fops = {
6315 .open = tracing_open_generic_tr,
6316 .write = tracing_mark_raw_write,
6317 .llseek = generic_file_llseek,
6318 .release = tracing_release_generic_tr,
6321 static const struct file_operations trace_clock_fops = {
6322 .open = tracing_clock_open,
6324 .llseek = seq_lseek,
6325 .release = tracing_single_release_tr,
6326 .write = tracing_clock_write,
6329 #ifdef CONFIG_TRACER_SNAPSHOT
6330 static const struct file_operations snapshot_fops = {
6331 .open = tracing_snapshot_open,
6333 .write = tracing_snapshot_write,
6334 .llseek = tracing_lseek,
6335 .release = tracing_snapshot_release,
6338 static const struct file_operations snapshot_raw_fops = {
6339 .open = snapshot_raw_open,
6340 .read = tracing_buffers_read,
6341 .release = tracing_buffers_release,
6342 .splice_read = tracing_buffers_splice_read,
6343 .llseek = no_llseek,
6346 #endif /* CONFIG_TRACER_SNAPSHOT */
6348 static int tracing_buffers_open(struct inode *inode, struct file *filp)
6350 struct trace_array *tr = inode->i_private;
6351 struct ftrace_buffer_info *info;
6354 if (tracing_disabled)
6357 if (trace_array_get(tr) < 0)
6360 info = kzalloc(sizeof(*info), GFP_KERNEL);
6362 trace_array_put(tr);
6366 mutex_lock(&trace_types_lock);
6369 info->iter.cpu_file = tracing_get_cpu(inode);
6370 info->iter.trace = tr->current_trace;
6371 info->iter.trace_buffer = &tr->trace_buffer;
6373 /* Force reading ring buffer for first read */
6374 info->read = (unsigned int)-1;
6376 filp->private_data = info;
6378 tr->current_trace->ref++;
6380 mutex_unlock(&trace_types_lock);
6382 ret = nonseekable_open(inode, filp);
6384 trace_array_put(tr);
6390 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
6392 struct ftrace_buffer_info *info = filp->private_data;
6393 struct trace_iterator *iter = &info->iter;
6395 return trace_poll(iter, filp, poll_table);
6399 tracing_buffers_read(struct file *filp, char __user *ubuf,
6400 size_t count, loff_t *ppos)
6402 struct ftrace_buffer_info *info = filp->private_data;
6403 struct trace_iterator *iter = &info->iter;
6410 #ifdef CONFIG_TRACER_MAX_TRACE
6411 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6416 info->spare = ring_buffer_alloc_read_page(iter->trace_buffer->buffer,
6418 info->spare_cpu = iter->cpu_file;
6423 /* Do we have previous read data to read? */
6424 if (info->read < PAGE_SIZE)
6428 trace_access_lock(iter->cpu_file);
6429 ret = ring_buffer_read_page(iter->trace_buffer->buffer,
6433 trace_access_unlock(iter->cpu_file);
6436 if (trace_empty(iter)) {
6437 if ((filp->f_flags & O_NONBLOCK))
6440 ret = wait_on_pipe(iter, false);
6451 size = PAGE_SIZE - info->read;
6455 ret = copy_to_user(ubuf, info->spare + info->read, size);
6467 static int tracing_buffers_release(struct inode *inode, struct file *file)
6469 struct ftrace_buffer_info *info = file->private_data;
6470 struct trace_iterator *iter = &info->iter;
6472 mutex_lock(&trace_types_lock);
6474 iter->tr->current_trace->ref--;
6476 __trace_array_put(iter->tr);
6479 ring_buffer_free_read_page(iter->trace_buffer->buffer,
6480 info->spare_cpu, info->spare);
6483 mutex_unlock(&trace_types_lock);
6489 struct ring_buffer *buffer;
6495 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
6496 struct pipe_buffer *buf)
6498 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6503 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6508 static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
6509 struct pipe_buffer *buf)
6511 struct buffer_ref *ref = (struct buffer_ref *)buf->private;
6516 /* Pipe buffer operations for a buffer. */
6517 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
6519 .confirm = generic_pipe_buf_confirm,
6520 .release = buffer_pipe_buf_release,
6521 .steal = generic_pipe_buf_steal,
6522 .get = buffer_pipe_buf_get,
6526 * Callback from splice_to_pipe(), if we need to release some pages
6527 * at the end of the spd in case we error'ed out in filling the pipe.
6529 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
6531 struct buffer_ref *ref =
6532 (struct buffer_ref *)spd->partial[i].private;
6537 ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
6539 spd->partial[i].private = 0;
6543 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
6544 struct pipe_inode_info *pipe, size_t len,
6547 struct ftrace_buffer_info *info = file->private_data;
6548 struct trace_iterator *iter = &info->iter;
6549 struct partial_page partial_def[PIPE_DEF_BUFFERS];
6550 struct page *pages_def[PIPE_DEF_BUFFERS];
6551 struct splice_pipe_desc spd = {
6553 .partial = partial_def,
6554 .nr_pages_max = PIPE_DEF_BUFFERS,
6555 .ops = &buffer_pipe_buf_ops,
6556 .spd_release = buffer_spd_release,
6558 struct buffer_ref *ref;
6559 int entries, size, i;
6562 #ifdef CONFIG_TRACER_MAX_TRACE
6563 if (iter->snapshot && iter->tr->current_trace->use_max_tr)
6567 if (*ppos & (PAGE_SIZE - 1))
6570 if (len & (PAGE_SIZE - 1)) {
6571 if (len < PAGE_SIZE)
6576 if (splice_grow_spd(pipe, &spd))
6580 trace_access_lock(iter->cpu_file);
6581 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6583 for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= PAGE_SIZE) {
6587 ref = kzalloc(sizeof(*ref), GFP_KERNEL);
6594 ref->buffer = iter->trace_buffer->buffer;
6595 ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
6601 ref->cpu = iter->cpu_file;
6603 r = ring_buffer_read_page(ref->buffer, &ref->page,
6604 len, iter->cpu_file, 1);
6606 ring_buffer_free_read_page(ref->buffer, ref->cpu,
6613 * zero out any left over data, this is going to
6616 size = ring_buffer_page_len(ref->page);
6617 if (size < PAGE_SIZE)
6618 memset(ref->page + size, 0, PAGE_SIZE - size);
6620 page = virt_to_page(ref->page);
6622 spd.pages[i] = page;
6623 spd.partial[i].len = PAGE_SIZE;
6624 spd.partial[i].offset = 0;
6625 spd.partial[i].private = (unsigned long)ref;
6629 entries = ring_buffer_entries_cpu(iter->trace_buffer->buffer, iter->cpu_file);
6632 trace_access_unlock(iter->cpu_file);
6635 /* did we read anything? */
6636 if (!spd.nr_pages) {
6641 if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
6644 ret = wait_on_pipe(iter, true);
6651 ret = splice_to_pipe(pipe, &spd);
6653 splice_shrink_spd(&spd);
6658 static const struct file_operations tracing_buffers_fops = {
6659 .open = tracing_buffers_open,
6660 .read = tracing_buffers_read,
6661 .poll = tracing_buffers_poll,
6662 .release = tracing_buffers_release,
6663 .splice_read = tracing_buffers_splice_read,
6664 .llseek = no_llseek,
6668 tracing_stats_read(struct file *filp, char __user *ubuf,
6669 size_t count, loff_t *ppos)
6671 struct inode *inode = file_inode(filp);
6672 struct trace_array *tr = inode->i_private;
6673 struct trace_buffer *trace_buf = &tr->trace_buffer;
6674 int cpu = tracing_get_cpu(inode);
6675 struct trace_seq *s;
6677 unsigned long long t;
6678 unsigned long usec_rem;
6680 s = kmalloc(sizeof(*s), GFP_KERNEL);
6686 cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
6687 trace_seq_printf(s, "entries: %ld\n", cnt);
6689 cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
6690 trace_seq_printf(s, "overrun: %ld\n", cnt);
6692 cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
6693 trace_seq_printf(s, "commit overrun: %ld\n", cnt);
6695 cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
6696 trace_seq_printf(s, "bytes: %ld\n", cnt);
6698 if (trace_clocks[tr->clock_id].in_ns) {
6699 /* local or global for trace_clock */
6700 t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6701 usec_rem = do_div(t, USEC_PER_SEC);
6702 trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
6705 t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
6706 usec_rem = do_div(t, USEC_PER_SEC);
6707 trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
6709 /* counter or tsc mode for trace_clock */
6710 trace_seq_printf(s, "oldest event ts: %llu\n",
6711 ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
6713 trace_seq_printf(s, "now ts: %llu\n",
6714 ring_buffer_time_stamp(trace_buf->buffer, cpu));
6717 cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
6718 trace_seq_printf(s, "dropped events: %ld\n", cnt);
6720 cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
6721 trace_seq_printf(s, "read events: %ld\n", cnt);
6723 count = simple_read_from_buffer(ubuf, count, ppos,
6724 s->buffer, trace_seq_used(s));
6731 static const struct file_operations tracing_stats_fops = {
6732 .open = tracing_open_generic_tr,
6733 .read = tracing_stats_read,
6734 .llseek = generic_file_llseek,
6735 .release = tracing_release_generic_tr,
6738 #ifdef CONFIG_DYNAMIC_FTRACE
6741 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
6742 size_t cnt, loff_t *ppos)
6744 unsigned long *p = filp->private_data;
6745 char buf[64]; /* Not too big for a shallow stack */
6748 r = scnprintf(buf, 63, "%ld", *p);
6751 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6754 static const struct file_operations tracing_dyn_info_fops = {
6755 .open = tracing_open_generic,
6756 .read = tracing_read_dyn_info,
6757 .llseek = generic_file_llseek,
6759 #endif /* CONFIG_DYNAMIC_FTRACE */
6761 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
6763 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
6764 struct trace_array *tr, struct ftrace_probe_ops *ops,
6767 tracing_snapshot_instance(tr);
6771 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
6772 struct trace_array *tr, struct ftrace_probe_ops *ops,
6775 struct ftrace_func_mapper *mapper = data;
6779 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6789 tracing_snapshot_instance(tr);
6793 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
6794 struct ftrace_probe_ops *ops, void *data)
6796 struct ftrace_func_mapper *mapper = data;
6799 seq_printf(m, "%ps:", (void *)ip);
6801 seq_puts(m, "snapshot");
6804 count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
6807 seq_printf(m, ":count=%ld\n", *count);
6809 seq_puts(m, ":unlimited\n");
6815 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
6816 unsigned long ip, void *init_data, void **data)
6818 struct ftrace_func_mapper *mapper = *data;
6821 mapper = allocate_ftrace_func_mapper();
6827 return ftrace_func_mapper_add_ip(mapper, ip, init_data);
6831 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
6832 unsigned long ip, void *data)
6834 struct ftrace_func_mapper *mapper = data;
6839 free_ftrace_func_mapper(mapper, NULL);
6843 ftrace_func_mapper_remove_ip(mapper, ip);
6846 static struct ftrace_probe_ops snapshot_probe_ops = {
6847 .func = ftrace_snapshot,
6848 .print = ftrace_snapshot_print,
6851 static struct ftrace_probe_ops snapshot_count_probe_ops = {
6852 .func = ftrace_count_snapshot,
6853 .print = ftrace_snapshot_print,
6854 .init = ftrace_snapshot_init,
6855 .free = ftrace_snapshot_free,
6859 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
6860 char *glob, char *cmd, char *param, int enable)
6862 struct ftrace_probe_ops *ops;
6863 void *count = (void *)-1;
6867 /* hash funcs only work with set_ftrace_filter */
6871 ops = param ? &snapshot_count_probe_ops : &snapshot_probe_ops;
6874 return unregister_ftrace_function_probe_func(glob+1, tr, ops);
6879 number = strsep(¶m, ":");
6881 if (!strlen(number))
6885 * We use the callback data field (which is a pointer)
6888 ret = kstrtoul(number, 0, (unsigned long *)&count);
6893 ret = alloc_snapshot(tr);
6897 ret = register_ftrace_function_probe(glob, tr, ops, count);
6900 return ret < 0 ? ret : 0;
6903 static struct ftrace_func_command ftrace_snapshot_cmd = {
6905 .func = ftrace_trace_snapshot_callback,
6908 static __init int register_snapshot_cmd(void)
6910 return register_ftrace_command(&ftrace_snapshot_cmd);
6913 static inline __init int register_snapshot_cmd(void) { return 0; }
6914 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
6916 static struct dentry *tracing_get_dentry(struct trace_array *tr)
6918 if (WARN_ON(!tr->dir))
6919 return ERR_PTR(-ENODEV);
6921 /* Top directory uses NULL as the parent */
6922 if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
6925 /* All sub buffers have a descriptor */
6929 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
6931 struct dentry *d_tracer;
6934 return tr->percpu_dir;
6936 d_tracer = tracing_get_dentry(tr);
6937 if (IS_ERR(d_tracer))
6940 tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
6942 WARN_ONCE(!tr->percpu_dir,
6943 "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
6945 return tr->percpu_dir;
6948 static struct dentry *
6949 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
6950 void *data, long cpu, const struct file_operations *fops)
6952 struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
6954 if (ret) /* See tracing_get_cpu() */
6955 d_inode(ret)->i_cdev = (void *)(cpu + 1);
6960 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
6962 struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
6963 struct dentry *d_cpu;
6964 char cpu_dir[30]; /* 30 characters should be more than enough */
6969 snprintf(cpu_dir, 30, "cpu%ld", cpu);
6970 d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
6972 pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
6976 /* per cpu trace_pipe */
6977 trace_create_cpu_file("trace_pipe", 0444, d_cpu,
6978 tr, cpu, &tracing_pipe_fops);
6981 trace_create_cpu_file("trace", 0644, d_cpu,
6982 tr, cpu, &tracing_fops);
6984 trace_create_cpu_file("trace_pipe_raw", 0444, d_cpu,
6985 tr, cpu, &tracing_buffers_fops);
6987 trace_create_cpu_file("stats", 0444, d_cpu,
6988 tr, cpu, &tracing_stats_fops);
6990 trace_create_cpu_file("buffer_size_kb", 0444, d_cpu,
6991 tr, cpu, &tracing_entries_fops);
6993 #ifdef CONFIG_TRACER_SNAPSHOT
6994 trace_create_cpu_file("snapshot", 0644, d_cpu,
6995 tr, cpu, &snapshot_fops);
6997 trace_create_cpu_file("snapshot_raw", 0444, d_cpu,
6998 tr, cpu, &snapshot_raw_fops);
7002 #ifdef CONFIG_FTRACE_SELFTEST
7003 /* Let selftest have access to static functions in this file */
7004 #include "trace_selftest.c"
7008 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
7011 struct trace_option_dentry *topt = filp->private_data;
7014 if (topt->flags->val & topt->opt->bit)
7019 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7023 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
7026 struct trace_option_dentry *topt = filp->private_data;
7030 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7034 if (val != 0 && val != 1)
7037 if (!!(topt->flags->val & topt->opt->bit) != val) {
7038 mutex_lock(&trace_types_lock);
7039 ret = __set_tracer_option(topt->tr, topt->flags,
7041 mutex_unlock(&trace_types_lock);
7052 static const struct file_operations trace_options_fops = {
7053 .open = tracing_open_generic,
7054 .read = trace_options_read,
7055 .write = trace_options_write,
7056 .llseek = generic_file_llseek,
7060 * In order to pass in both the trace_array descriptor as well as the index
7061 * to the flag that the trace option file represents, the trace_array
7062 * has a character array of trace_flags_index[], which holds the index
7063 * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
7064 * The address of this character array is passed to the flag option file
7065 * read/write callbacks.
7067 * In order to extract both the index and the trace_array descriptor,
7068 * get_tr_index() uses the following algorithm.
7072 * As the pointer itself contains the address of the index (remember
7075 * Then to get the trace_array descriptor, by subtracting that index
7076 * from the ptr, we get to the start of the index itself.
7078 * ptr - idx == &index[0]
7080 * Then a simple container_of() from that pointer gets us to the
7081 * trace_array descriptor.
7083 static void get_tr_index(void *data, struct trace_array **ptr,
7084 unsigned int *pindex)
7086 *pindex = *(unsigned char *)data;
7088 *ptr = container_of(data - *pindex, struct trace_array,
7093 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
7096 void *tr_index = filp->private_data;
7097 struct trace_array *tr;
7101 get_tr_index(tr_index, &tr, &index);
7103 if (tr->trace_flags & (1 << index))
7108 return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
7112 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
7115 void *tr_index = filp->private_data;
7116 struct trace_array *tr;
7121 get_tr_index(tr_index, &tr, &index);
7123 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7127 if (val != 0 && val != 1)
7130 mutex_lock(&trace_types_lock);
7131 ret = set_tracer_flag(tr, 1 << index, val);
7132 mutex_unlock(&trace_types_lock);
7142 static const struct file_operations trace_options_core_fops = {
7143 .open = tracing_open_generic,
7144 .read = trace_options_core_read,
7145 .write = trace_options_core_write,
7146 .llseek = generic_file_llseek,
7149 struct dentry *trace_create_file(const char *name,
7151 struct dentry *parent,
7153 const struct file_operations *fops)
7157 ret = tracefs_create_file(name, mode, parent, data, fops);
7159 pr_warn("Could not create tracefs '%s' entry\n", name);
7165 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
7167 struct dentry *d_tracer;
7172 d_tracer = tracing_get_dentry(tr);
7173 if (IS_ERR(d_tracer))
7176 tr->options = tracefs_create_dir("options", d_tracer);
7178 pr_warn("Could not create tracefs directory 'options'\n");
7186 create_trace_option_file(struct trace_array *tr,
7187 struct trace_option_dentry *topt,
7188 struct tracer_flags *flags,
7189 struct tracer_opt *opt)
7191 struct dentry *t_options;
7193 t_options = trace_options_init_dentry(tr);
7197 topt->flags = flags;
7201 topt->entry = trace_create_file(opt->name, 0644, t_options, topt,
7202 &trace_options_fops);
7207 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
7209 struct trace_option_dentry *topts;
7210 struct trace_options *tr_topts;
7211 struct tracer_flags *flags;
7212 struct tracer_opt *opts;
7219 flags = tracer->flags;
7221 if (!flags || !flags->opts)
7225 * If this is an instance, only create flags for tracers
7226 * the instance may have.
7228 if (!trace_ok_for_array(tracer, tr))
7231 for (i = 0; i < tr->nr_topts; i++) {
7232 /* Make sure there's no duplicate flags. */
7233 if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
7239 for (cnt = 0; opts[cnt].name; cnt++)
7242 topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
7246 tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
7253 tr->topts = tr_topts;
7254 tr->topts[tr->nr_topts].tracer = tracer;
7255 tr->topts[tr->nr_topts].topts = topts;
7258 for (cnt = 0; opts[cnt].name; cnt++) {
7259 create_trace_option_file(tr, &topts[cnt], flags,
7261 WARN_ONCE(topts[cnt].entry == NULL,
7262 "Failed to create trace option: %s",
7267 static struct dentry *
7268 create_trace_option_core_file(struct trace_array *tr,
7269 const char *option, long index)
7271 struct dentry *t_options;
7273 t_options = trace_options_init_dentry(tr);
7277 return trace_create_file(option, 0644, t_options,
7278 (void *)&tr->trace_flags_index[index],
7279 &trace_options_core_fops);
7282 static void create_trace_options_dir(struct trace_array *tr)
7284 struct dentry *t_options;
7285 bool top_level = tr == &global_trace;
7288 t_options = trace_options_init_dentry(tr);
7292 for (i = 0; trace_options[i]; i++) {
7294 !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
7295 create_trace_option_core_file(tr, trace_options[i], i);
7300 rb_simple_read(struct file *filp, char __user *ubuf,
7301 size_t cnt, loff_t *ppos)
7303 struct trace_array *tr = filp->private_data;
7307 r = tracer_tracing_is_on(tr);
7308 r = sprintf(buf, "%d\n", r);
7310 return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7314 rb_simple_write(struct file *filp, const char __user *ubuf,
7315 size_t cnt, loff_t *ppos)
7317 struct trace_array *tr = filp->private_data;
7318 struct ring_buffer *buffer = tr->trace_buffer.buffer;
7322 ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7327 mutex_lock(&trace_types_lock);
7329 tracer_tracing_on(tr);
7330 if (tr->current_trace->start)
7331 tr->current_trace->start(tr);
7333 tracer_tracing_off(tr);
7334 if (tr->current_trace->stop)
7335 tr->current_trace->stop(tr);
7337 mutex_unlock(&trace_types_lock);
7345 static const struct file_operations rb_simple_fops = {
7346 .open = tracing_open_generic_tr,
7347 .read = rb_simple_read,
7348 .write = rb_simple_write,
7349 .release = tracing_release_generic_tr,
7350 .llseek = default_llseek,
7353 struct dentry *trace_instance_dir;
7356 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
7359 allocate_trace_buffer(struct trace_array *tr, struct trace_buffer *buf, int size)
7361 enum ring_buffer_flags rb_flags;
7363 rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
7367 buf->buffer = ring_buffer_alloc(size, rb_flags);
7371 buf->data = alloc_percpu(struct trace_array_cpu);
7373 ring_buffer_free(buf->buffer);
7377 /* Allocate the first page for all buffers */
7378 set_buffer_entries(&tr->trace_buffer,
7379 ring_buffer_size(tr->trace_buffer.buffer, 0));
7384 static int allocate_trace_buffers(struct trace_array *tr, int size)
7388 ret = allocate_trace_buffer(tr, &tr->trace_buffer, size);
7392 #ifdef CONFIG_TRACER_MAX_TRACE
7393 ret = allocate_trace_buffer(tr, &tr->max_buffer,
7394 allocate_snapshot ? size : 1);
7396 ring_buffer_free(tr->trace_buffer.buffer);
7397 free_percpu(tr->trace_buffer.data);
7400 tr->allocated_snapshot = allocate_snapshot;
7403 * Only the top level trace array gets its snapshot allocated
7404 * from the kernel command line.
7406 allocate_snapshot = false;
7411 static void free_trace_buffer(struct trace_buffer *buf)
7414 ring_buffer_free(buf->buffer);
7416 free_percpu(buf->data);
7421 static void free_trace_buffers(struct trace_array *tr)
7426 free_trace_buffer(&tr->trace_buffer);
7428 #ifdef CONFIG_TRACER_MAX_TRACE
7429 free_trace_buffer(&tr->max_buffer);
7433 static void init_trace_flags_index(struct trace_array *tr)
7437 /* Used by the trace options files */
7438 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
7439 tr->trace_flags_index[i] = i;
7442 static void __update_tracer_options(struct trace_array *tr)
7446 for (t = trace_types; t; t = t->next)
7447 add_tracer_options(tr, t);
7450 static void update_tracer_options(struct trace_array *tr)
7452 mutex_lock(&trace_types_lock);
7453 __update_tracer_options(tr);
7454 mutex_unlock(&trace_types_lock);
7457 static int instance_mkdir(const char *name)
7459 struct trace_array *tr;
7462 mutex_lock(&trace_types_lock);
7465 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7466 if (tr->name && strcmp(tr->name, name) == 0)
7471 tr = kzalloc(sizeof(*tr), GFP_KERNEL);
7475 tr->name = kstrdup(name, GFP_KERNEL);
7479 if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
7482 tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
7484 cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
7486 raw_spin_lock_init(&tr->start_lock);
7488 tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
7490 tr->current_trace = &nop_trace;
7492 INIT_LIST_HEAD(&tr->systems);
7493 INIT_LIST_HEAD(&tr->events);
7495 if (allocate_trace_buffers(tr, trace_buf_size) < 0)
7498 tr->dir = tracefs_create_dir(name, trace_instance_dir);
7502 ret = event_trace_add_tracer(tr->dir, tr);
7504 tracefs_remove_recursive(tr->dir);
7508 ftrace_init_trace_array(tr);
7510 init_tracer_tracefs(tr, tr->dir);
7511 init_trace_flags_index(tr);
7512 __update_tracer_options(tr);
7514 list_add(&tr->list, &ftrace_trace_arrays);
7516 mutex_unlock(&trace_types_lock);
7521 free_trace_buffers(tr);
7522 free_cpumask_var(tr->tracing_cpumask);
7527 mutex_unlock(&trace_types_lock);
7533 static int instance_rmdir(const char *name)
7535 struct trace_array *tr;
7540 mutex_lock(&trace_types_lock);
7543 list_for_each_entry(tr, &ftrace_trace_arrays, list) {
7544 if (tr->name && strcmp(tr->name, name) == 0) {
7553 if (tr->ref || (tr->current_trace && tr->current_trace->ref))
7556 list_del(&tr->list);
7558 /* Disable all the flags that were enabled coming in */
7559 for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
7560 if ((1 << i) & ZEROED_TRACE_FLAGS)
7561 set_tracer_flag(tr, 1 << i, 0);
7564 tracing_set_nop(tr);
7565 clear_ftrace_function_probes(tr);
7566 event_trace_del_tracer(tr);
7567 ftrace_clear_pids(tr);
7568 ftrace_destroy_function_files(tr);
7569 tracefs_remove_recursive(tr->dir);
7570 free_trace_buffers(tr);
7572 for (i = 0; i < tr->nr_topts; i++) {
7573 kfree(tr->topts[i].topts);
7583 mutex_unlock(&trace_types_lock);
7588 static __init void create_trace_instances(struct dentry *d_tracer)
7590 trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
7593 if (WARN_ON(!trace_instance_dir))
7598 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
7602 trace_create_file("available_tracers", 0444, d_tracer,
7603 tr, &show_traces_fops);
7605 trace_create_file("current_tracer", 0644, d_tracer,
7606 tr, &set_tracer_fops);
7608 trace_create_file("tracing_cpumask", 0644, d_tracer,
7609 tr, &tracing_cpumask_fops);
7611 trace_create_file("trace_options", 0644, d_tracer,
7612 tr, &tracing_iter_fops);
7614 trace_create_file("trace", 0644, d_tracer,
7617 trace_create_file("trace_pipe", 0444, d_tracer,
7618 tr, &tracing_pipe_fops);
7620 trace_create_file("buffer_size_kb", 0644, d_tracer,
7621 tr, &tracing_entries_fops);
7623 trace_create_file("buffer_total_size_kb", 0444, d_tracer,
7624 tr, &tracing_total_entries_fops);
7626 trace_create_file("free_buffer", 0200, d_tracer,
7627 tr, &tracing_free_buffer_fops);
7629 trace_create_file("trace_marker", 0220, d_tracer,
7630 tr, &tracing_mark_fops);
7632 trace_create_file("trace_marker_raw", 0220, d_tracer,
7633 tr, &tracing_mark_raw_fops);
7635 trace_create_file("trace_clock", 0644, d_tracer, tr,
7638 trace_create_file("tracing_on", 0644, d_tracer,
7639 tr, &rb_simple_fops);
7641 create_trace_options_dir(tr);
7643 #if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
7644 trace_create_file("tracing_max_latency", 0644, d_tracer,
7645 &tr->max_latency, &tracing_max_lat_fops);
7648 if (ftrace_create_function_files(tr, d_tracer))
7649 WARN(1, "Could not allocate function filter files");
7651 #ifdef CONFIG_TRACER_SNAPSHOT
7652 trace_create_file("snapshot", 0644, d_tracer,
7653 tr, &snapshot_fops);
7656 for_each_tracing_cpu(cpu)
7657 tracing_init_tracefs_percpu(tr, cpu);
7659 ftrace_init_tracefs(tr, d_tracer);
7662 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
7664 struct vfsmount *mnt;
7665 struct file_system_type *type;
7668 * To maintain backward compatibility for tools that mount
7669 * debugfs to get to the tracing facility, tracefs is automatically
7670 * mounted to the debugfs/tracing directory.
7672 type = get_fs_type("tracefs");
7675 mnt = vfs_submount(mntpt, type, "tracefs", NULL);
7676 put_filesystem(type);
7685 * tracing_init_dentry - initialize top level trace array
7687 * This is called when creating files or directories in the tracing
7688 * directory. It is called via fs_initcall() by any of the boot up code
7689 * and expects to return the dentry of the top level tracing directory.
7691 struct dentry *tracing_init_dentry(void)
7693 struct trace_array *tr = &global_trace;
7695 /* The top level trace array uses NULL as parent */
7699 if (WARN_ON(!tracefs_initialized()) ||
7700 (IS_ENABLED(CONFIG_DEBUG_FS) &&
7701 WARN_ON(!debugfs_initialized())))
7702 return ERR_PTR(-ENODEV);
7705 * As there may still be users that expect the tracing
7706 * files to exist in debugfs/tracing, we must automount
7707 * the tracefs file system there, so older tools still
7708 * work with the newer kerenl.
7710 tr->dir = debugfs_create_automount("tracing", NULL,
7711 trace_automount, NULL);
7713 pr_warn_once("Could not create debugfs directory 'tracing'\n");
7714 return ERR_PTR(-ENOMEM);
7720 extern struct trace_eval_map *__start_ftrace_eval_maps[];
7721 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
7723 static void __init trace_eval_init(void)
7727 len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
7728 trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
7731 #ifdef CONFIG_MODULES
7732 static void trace_module_add_evals(struct module *mod)
7734 if (!mod->num_trace_evals)
7738 * Modules with bad taint do not have events created, do
7739 * not bother with enums either.
7741 if (trace_module_has_bad_taint(mod))
7744 trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
7747 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
7748 static void trace_module_remove_evals(struct module *mod)
7750 union trace_eval_map_item *map;
7751 union trace_eval_map_item **last = &trace_eval_maps;
7753 if (!mod->num_trace_evals)
7756 mutex_lock(&trace_eval_mutex);
7758 map = trace_eval_maps;
7761 if (map->head.mod == mod)
7763 map = trace_eval_jmp_to_tail(map);
7764 last = &map->tail.next;
7765 map = map->tail.next;
7770 *last = trace_eval_jmp_to_tail(map)->tail.next;
7773 mutex_unlock(&trace_eval_mutex);
7776 static inline void trace_module_remove_evals(struct module *mod) { }
7777 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
7779 static int trace_module_notify(struct notifier_block *self,
7780 unsigned long val, void *data)
7782 struct module *mod = data;
7785 case MODULE_STATE_COMING:
7786 trace_module_add_evals(mod);
7788 case MODULE_STATE_GOING:
7789 trace_module_remove_evals(mod);
7796 static struct notifier_block trace_module_nb = {
7797 .notifier_call = trace_module_notify,
7800 #endif /* CONFIG_MODULES */
7802 static __init int tracer_init_tracefs(void)
7804 struct dentry *d_tracer;
7806 trace_access_lock_init();
7808 d_tracer = tracing_init_dentry();
7809 if (IS_ERR(d_tracer))
7812 init_tracer_tracefs(&global_trace, d_tracer);
7813 ftrace_init_tracefs_toplevel(&global_trace, d_tracer);
7815 trace_create_file("tracing_thresh", 0644, d_tracer,
7816 &global_trace, &tracing_thresh_fops);
7818 trace_create_file("README", 0444, d_tracer,
7819 NULL, &tracing_readme_fops);
7821 trace_create_file("saved_cmdlines", 0444, d_tracer,
7822 NULL, &tracing_saved_cmdlines_fops);
7824 trace_create_file("saved_cmdlines_size", 0644, d_tracer,
7825 NULL, &tracing_saved_cmdlines_size_fops);
7829 trace_create_eval_file(d_tracer);
7831 #ifdef CONFIG_MODULES
7832 register_module_notifier(&trace_module_nb);
7835 #ifdef CONFIG_DYNAMIC_FTRACE
7836 trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
7837 &ftrace_update_tot_cnt, &tracing_dyn_info_fops);
7840 create_trace_instances(d_tracer);
7842 update_tracer_options(&global_trace);
7847 static int trace_panic_handler(struct notifier_block *this,
7848 unsigned long event, void *unused)
7850 if (ftrace_dump_on_oops)
7851 ftrace_dump(ftrace_dump_on_oops);
7855 static struct notifier_block trace_panic_notifier = {
7856 .notifier_call = trace_panic_handler,
7858 .priority = 150 /* priority: INT_MAX >= x >= 0 */
7861 static int trace_die_handler(struct notifier_block *self,
7867 if (ftrace_dump_on_oops)
7868 ftrace_dump(ftrace_dump_on_oops);
7876 static struct notifier_block trace_die_notifier = {
7877 .notifier_call = trace_die_handler,
7882 * printk is set to max of 1024, we really don't need it that big.
7883 * Nothing should be printing 1000 characters anyway.
7885 #define TRACE_MAX_PRINT 1000
7888 * Define here KERN_TRACE so that we have one place to modify
7889 * it if we decide to change what log level the ftrace dump
7892 #define KERN_TRACE KERN_EMERG
7895 trace_printk_seq(struct trace_seq *s)
7897 /* Probably should print a warning here. */
7898 if (s->seq.len >= TRACE_MAX_PRINT)
7899 s->seq.len = TRACE_MAX_PRINT;
7902 * More paranoid code. Although the buffer size is set to
7903 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
7904 * an extra layer of protection.
7906 if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
7907 s->seq.len = s->seq.size - 1;
7909 /* should be zero ended, but we are paranoid. */
7910 s->buffer[s->seq.len] = 0;
7912 printk(KERN_TRACE "%s", s->buffer);
7917 void trace_init_global_iter(struct trace_iterator *iter)
7919 iter->tr = &global_trace;
7920 iter->trace = iter->tr->current_trace;
7921 iter->cpu_file = RING_BUFFER_ALL_CPUS;
7922 iter->trace_buffer = &global_trace.trace_buffer;
7924 if (iter->trace && iter->trace->open)
7925 iter->trace->open(iter);
7927 /* Annotate start of buffers if we had overruns */
7928 if (ring_buffer_overruns(iter->trace_buffer->buffer))
7929 iter->iter_flags |= TRACE_FILE_ANNOTATE;
7931 /* Output in nanoseconds only if we are using a clock in nanoseconds. */
7932 if (trace_clocks[iter->tr->clock_id].in_ns)
7933 iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
7936 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
7938 /* use static because iter can be a bit big for the stack */
7939 static struct trace_iterator iter;
7940 static atomic_t dump_running;
7941 struct trace_array *tr = &global_trace;
7942 unsigned int old_userobj;
7943 unsigned long flags;
7946 /* Only allow one dump user at a time. */
7947 if (atomic_inc_return(&dump_running) != 1) {
7948 atomic_dec(&dump_running);
7953 * Always turn off tracing when we dump.
7954 * We don't need to show trace output of what happens
7955 * between multiple crashes.
7957 * If the user does a sysrq-z, then they can re-enable
7958 * tracing with echo 1 > tracing_on.
7962 local_irq_save(flags);
7964 /* Simulate the iterator */
7965 trace_init_global_iter(&iter);
7967 for_each_tracing_cpu(cpu) {
7968 atomic_inc(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
7971 old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
7973 /* don't look at user memory in panic mode */
7974 tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
7976 switch (oops_dump_mode) {
7978 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7981 iter.cpu_file = raw_smp_processor_id();
7986 printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
7987 iter.cpu_file = RING_BUFFER_ALL_CPUS;
7990 printk(KERN_TRACE "Dumping ftrace buffer:\n");
7992 /* Did function tracer already get disabled? */
7993 if (ftrace_is_dead()) {
7994 printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
7995 printk("# MAY BE MISSING FUNCTION EVENTS\n");
7999 * We need to stop all tracing on all CPUS to read the
8000 * the next buffer. This is a bit expensive, but is
8001 * not done often. We fill all what we can read,
8002 * and then release the locks again.
8005 while (!trace_empty(&iter)) {
8008 printk(KERN_TRACE "---------------------------------\n");
8012 /* reset all but tr, trace, and overruns */
8013 memset(&iter.seq, 0,
8014 sizeof(struct trace_iterator) -
8015 offsetof(struct trace_iterator, seq));
8016 iter.iter_flags |= TRACE_FILE_LAT_FMT;
8019 if (trace_find_next_entry_inc(&iter) != NULL) {
8022 ret = print_trace_line(&iter);
8023 if (ret != TRACE_TYPE_NO_CONSUME)
8024 trace_consume(&iter);
8026 touch_nmi_watchdog();
8028 trace_printk_seq(&iter.seq);
8032 printk(KERN_TRACE " (ftrace buffer empty)\n");
8034 printk(KERN_TRACE "---------------------------------\n");
8037 tr->trace_flags |= old_userobj;
8039 for_each_tracing_cpu(cpu) {
8040 atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled);
8042 atomic_dec(&dump_running);
8043 local_irq_restore(flags);
8045 EXPORT_SYMBOL_GPL(ftrace_dump);
8047 __init static int tracer_alloc_buffers(void)
8053 * Make sure we don't accidently add more trace options
8054 * than we have bits for.
8056 BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
8058 if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
8061 if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
8062 goto out_free_buffer_mask;
8064 /* Only allocate trace_printk buffers if a trace_printk exists */
8065 if (__stop___trace_bprintk_fmt != __start___trace_bprintk_fmt)
8066 /* Must be called before global_trace.buffer is allocated */
8067 trace_printk_init_buffers();
8069 /* To save memory, keep the ring buffer size to its minimum */
8070 if (ring_buffer_expanded)
8071 ring_buf_size = trace_buf_size;
8075 cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
8076 cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
8078 raw_spin_lock_init(&global_trace.start_lock);
8081 * The prepare callbacks allocates some memory for the ring buffer. We
8082 * don't free the buffer if the if the CPU goes down. If we were to free
8083 * the buffer, then the user would lose any trace that was in the
8084 * buffer. The memory will be removed once the "instance" is removed.
8086 ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
8087 "trace/RB:preapre", trace_rb_cpu_prepare,
8090 goto out_free_cpumask;
8091 /* Used for event triggers */
8092 temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
8094 goto out_rm_hp_state;
8096 if (trace_create_savedcmd() < 0)
8097 goto out_free_temp_buffer;
8099 /* TODO: make the number of buffers hot pluggable with CPUS */
8100 if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
8101 printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
8103 goto out_free_savedcmd;
8106 if (global_trace.buffer_disabled)
8109 if (trace_boot_clock) {
8110 ret = tracing_set_clock(&global_trace, trace_boot_clock);
8112 pr_warn("Trace clock %s not defined, going back to default\n",
8117 * register_tracer() might reference current_trace, so it
8118 * needs to be set before we register anything. This is
8119 * just a bootstrap of current_trace anyway.
8121 global_trace.current_trace = &nop_trace;
8123 global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
8125 ftrace_init_global_array_ops(&global_trace);
8127 init_trace_flags_index(&global_trace);
8129 register_tracer(&nop_trace);
8131 /* Function tracing may start here (via kernel command line) */
8132 init_function_trace();
8134 /* All seems OK, enable tracing */
8135 tracing_disabled = 0;
8137 atomic_notifier_chain_register(&panic_notifier_list,
8138 &trace_panic_notifier);
8140 register_die_notifier(&trace_die_notifier);
8142 global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
8144 INIT_LIST_HEAD(&global_trace.systems);
8145 INIT_LIST_HEAD(&global_trace.events);
8146 list_add(&global_trace.list, &ftrace_trace_arrays);
8148 apply_trace_boot_options();
8150 register_snapshot_cmd();
8155 free_saved_cmdlines_buffer(savedcmd);
8156 out_free_temp_buffer:
8157 ring_buffer_free(temp_buffer);
8159 cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
8161 free_cpumask_var(global_trace.tracing_cpumask);
8162 out_free_buffer_mask:
8163 free_cpumask_var(tracing_buffer_mask);
8168 void __init early_trace_init(void)
8170 if (tracepoint_printk) {
8171 tracepoint_print_iter =
8172 kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
8173 if (WARN_ON(!tracepoint_print_iter))
8174 tracepoint_printk = 0;
8176 static_key_enable(&tracepoint_printk_key.key);
8178 tracer_alloc_buffers();
8181 void __init trace_init(void)
8186 __init static int clear_boot_tracer(void)
8189 * The default tracer at boot buffer is an init section.
8190 * This function is called in lateinit. If we did not
8191 * find the boot tracer, then clear it out, to prevent
8192 * later registration from accessing the buffer that is
8193 * about to be freed.
8195 if (!default_bootup_tracer)
8198 printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
8199 default_bootup_tracer);
8200 default_bootup_tracer = NULL;
8205 fs_initcall(tracer_init_tracefs);
8206 late_initcall(clear_boot_tracer);