tracing: Create set_event_notrace_pid to not trace tasks
authorSteven Rostedt (VMware) <rostedt@goodmis.org>
Wed, 25 Mar 2020 23:51:19 +0000 (19:51 -0400)
committerSteven Rostedt (VMware) <rostedt@goodmis.org>
Fri, 27 Mar 2020 20:39:02 +0000 (16:39 -0400)
There's currently a way to select a task that should only have its events
traced, but there's no way to select a task not to have itsevents traced.
Add a set_event_notrace_pid file that acts the same as set_event_pid (and is
also affected by event-fork), but the task pids in this file will not be
traced even if they are listed in the set_event_pid file. This makes it easy
for tools like trace-cmd to "hide" itself from beint traced by events when
it is recording other tasks.

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
kernel/trace/ftrace.c
kernel/trace/trace.h
kernel/trace/trace_events.c

index 7239d9acd09fe4b0e523f5e896e1175b6e927879..0bb62e64280cfceff3abd8a5136057bef44afbe7 100644 (file)
@@ -6987,11 +6987,6 @@ void ftrace_pid_follow_fork(struct trace_array *tr, bool enable)
        }
 }
 
-enum {
-       TRACE_PIDS              = BIT(0),
-       TRACE_NO_PIDS           = BIT(1),
-};
-
 static void clear_ftrace_pids(struct trace_array *tr, int type)
 {
        struct trace_pid_list *pid_list;
@@ -7004,13 +6999,11 @@ static void clear_ftrace_pids(struct trace_array *tr, int type)
                                                lockdep_is_held(&ftrace_lock));
 
        /* Make sure there's something to do */
-       if (!(((type & TRACE_PIDS) && pid_list) ||
-             ((type & TRACE_NO_PIDS) && no_pid_list)))
+       if (!pid_type_enabled(type, pid_list, no_pid_list))
                return;
 
        /* See if the pids still need to be checked after this */
-       if (!((!(type & TRACE_PIDS) && pid_list) ||
-             (!(type & TRACE_NO_PIDS) && no_pid_list))) {
+       if (!still_need_pid_events(type, pid_list, no_pid_list)) {
                unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
                for_each_possible_cpu(cpu)
                        per_cpu_ptr(tr->array_buffer.data, cpu)->ftrace_ignore_pid = FTRACE_PID_TRACE;
index 6b5ff5adb4ad97882b99b41d481aa6dec6e289e3..4eb1d004d5f2b2c441d63ec4658a937ae0d6ec0c 100644 (file)
@@ -207,6 +207,30 @@ struct trace_pid_list {
        unsigned long                   *pids;
 };
 
+enum {
+       TRACE_PIDS              = BIT(0),
+       TRACE_NO_PIDS           = BIT(1),
+};
+
+static inline bool pid_type_enabled(int type, struct trace_pid_list *pid_list,
+                                   struct trace_pid_list *no_pid_list)
+{
+       /* Return true if the pid list in type has pids */
+       return ((type & TRACE_PIDS) && pid_list) ||
+               ((type & TRACE_NO_PIDS) && no_pid_list);
+}
+
+static inline bool still_need_pid_events(int type, struct trace_pid_list *pid_list,
+                                        struct trace_pid_list *no_pid_list)
+{
+       /*
+        * Turning off what is in @type, return true if the "other"
+        * pid list, still has pids in it.
+        */
+       return (!(type & TRACE_PIDS) && pid_list) ||
+               (!(type & TRACE_NO_PIDS) && no_pid_list);
+}
+
 typedef bool (*cond_update_fn_t)(struct trace_array *tr, void *cond_data);
 
 /**
@@ -285,6 +309,7 @@ struct trace_array {
 #endif
 #endif
        struct trace_pid_list   __rcu *filtered_pids;
+       struct trace_pid_list   __rcu *filtered_no_pids;
        /*
         * max_lock is used to protect the swapping of buffers
         * when taking a max snapshot. The buffers themselves are
index c196d0dc587179a75d55b47d72621c365c06e35e..242f59e7f17d5db18c19b2de44b4349817c0fe22 100644 (file)
@@ -232,10 +232,13 @@ bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
 {
        struct trace_array *tr = trace_file->tr;
        struct trace_array_cpu *data;
+       struct trace_pid_list *no_pid_list;
        struct trace_pid_list *pid_list;
 
        pid_list = rcu_dereference_raw(tr->filtered_pids);
-       if (!pid_list)
+       no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
+
+       if (!pid_list && !no_pid_list)
                return false;
 
        data = this_cpu_ptr(tr->array_buffer.data);
@@ -510,6 +513,9 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
 
        pid_list = rcu_dereference_raw(tr->filtered_pids);
        trace_filter_add_remove_task(pid_list, NULL, task);
+
+       pid_list = rcu_dereference_raw(tr->filtered_no_pids);
+       trace_filter_add_remove_task(pid_list, NULL, task);
 }
 
 static void
@@ -522,6 +528,9 @@ event_filter_pid_sched_process_fork(void *data,
 
        pid_list = rcu_dereference_sched(tr->filtered_pids);
        trace_filter_add_remove_task(pid_list, self, task);
+
+       pid_list = rcu_dereference_sched(tr->filtered_no_pids);
+       trace_filter_add_remove_task(pid_list, self, task);
 }
 
 void trace_event_follow_fork(struct trace_array *tr, bool enable)
@@ -544,13 +553,23 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
                    struct task_struct *prev, struct task_struct *next)
 {
        struct trace_array *tr = data;
+       struct trace_pid_list *no_pid_list;
        struct trace_pid_list *pid_list;
+       bool ret;
 
        pid_list = rcu_dereference_sched(tr->filtered_pids);
+       no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 
-       this_cpu_write(tr->array_buffer.data->ignore_pid,
-                      trace_ignore_this_task(pid_list, NULL, prev) &&
-                      trace_ignore_this_task(pid_list, NULL, next));
+       /*
+        * Sched switch is funny, as we only want to ignore it
+        * in the notrace case if both prev and next should be ignored.
+        */
+       ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
+               trace_ignore_this_task(NULL, no_pid_list, next);
+
+       this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
+                      (trace_ignore_this_task(pid_list, NULL, prev) &&
+                       trace_ignore_this_task(pid_list, NULL, next)));
 }
 
 static void
@@ -558,18 +577,21 @@ event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
                    struct task_struct *prev, struct task_struct *next)
 {
        struct trace_array *tr = data;
+       struct trace_pid_list *no_pid_list;
        struct trace_pid_list *pid_list;
 
        pid_list = rcu_dereference_sched(tr->filtered_pids);
+       no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 
        this_cpu_write(tr->array_buffer.data->ignore_pid,
-                      trace_ignore_this_task(pid_list, NULL, next));
+                      trace_ignore_this_task(pid_list, no_pid_list, next));
 }
 
 static void
 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
 {
        struct trace_array *tr = data;
+       struct trace_pid_list *no_pid_list;
        struct trace_pid_list *pid_list;
 
        /* Nothing to do if we are already tracing */
@@ -577,15 +599,17 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
                return;
 
        pid_list = rcu_dereference_sched(tr->filtered_pids);
+       no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 
        this_cpu_write(tr->array_buffer.data->ignore_pid,
-                      trace_ignore_this_task(pid_list, NULL, task));
+                      trace_ignore_this_task(pid_list, no_pid_list, task));
 }
 
 static void
 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
 {
        struct trace_array *tr = data;
+       struct trace_pid_list *no_pid_list;
        struct trace_pid_list *pid_list;
 
        /* Nothing to do if we are not tracing */
@@ -593,23 +617,15 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
                return;
 
        pid_list = rcu_dereference_sched(tr->filtered_pids);
+       no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 
        /* Set tracing if current is enabled */
        this_cpu_write(tr->array_buffer.data->ignore_pid,
-                      trace_ignore_this_task(pid_list, NULL, current));
+                      trace_ignore_this_task(pid_list, no_pid_list, current));
 }
 
-static void __ftrace_clear_event_pids(struct trace_array *tr)
+static void unregister_pid_events(struct trace_array *tr)
 {
-       struct trace_pid_list *pid_list;
-       struct trace_event_file *file;
-       int cpu;
-
-       pid_list = rcu_dereference_protected(tr->filtered_pids,
-                                            lockdep_is_held(&event_mutex));
-       if (!pid_list)
-               return;
-
        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
        unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
 
@@ -621,26 +637,55 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
 
        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
        unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
+}
 
-       list_for_each_entry(file, &tr->events, list) {
-               clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
+{
+       struct trace_pid_list *pid_list;
+       struct trace_pid_list *no_pid_list;
+       struct trace_event_file *file;
+       int cpu;
+
+       pid_list = rcu_dereference_protected(tr->filtered_pids,
+                                            lockdep_is_held(&event_mutex));
+       no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
+                                            lockdep_is_held(&event_mutex));
+
+       /* Make sure there's something to do */
+       if (!pid_type_enabled(type, pid_list, no_pid_list))
+               return;
+
+       if (!still_need_pid_events(type, pid_list, no_pid_list)) {
+               unregister_pid_events(tr);
+
+               list_for_each_entry(file, &tr->events, list) {
+                       clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
+               }
+
+               for_each_possible_cpu(cpu)
+                       per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
        }
 
-       for_each_possible_cpu(cpu)
-               per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
+       if (type & TRACE_PIDS)
+               rcu_assign_pointer(tr->filtered_pids, NULL);
 
-       rcu_assign_pointer(tr->filtered_pids, NULL);
+       if (type & TRACE_NO_PIDS)
+               rcu_assign_pointer(tr->filtered_no_pids, NULL);
 
        /* Wait till all users are no longer using pid filtering */
        tracepoint_synchronize_unregister();
 
-       trace_free_pid_list(pid_list);
+       if ((type & TRACE_PIDS) && pid_list)
+               trace_free_pid_list(pid_list);
+
+       if ((type & TRACE_NO_PIDS) && no_pid_list)
+               trace_free_pid_list(no_pid_list);
 }
 
-static void ftrace_clear_event_pids(struct trace_array *tr)
+static void ftrace_clear_event_pids(struct trace_array *tr, int type)
 {
        mutex_lock(&event_mutex);
-       __ftrace_clear_event_pids(tr);
+       __ftrace_clear_event_pids(tr, type);
        mutex_unlock(&event_mutex);
 }
 
@@ -1013,15 +1058,32 @@ static void t_stop(struct seq_file *m, void *p)
 }
 
 static void *
-p_next(struct seq_file *m, void *v, loff_t *pos)
+__next(struct seq_file *m, void *v, loff_t *pos, int type)
 {
        struct trace_array *tr = m->private;
-       struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
+       struct trace_pid_list *pid_list;
+
+       if (type == TRACE_PIDS)
+               pid_list = rcu_dereference_sched(tr->filtered_pids);
+       else
+               pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 
        return trace_pid_next(pid_list, v, pos);
 }
 
-static void *p_start(struct seq_file *m, loff_t *pos)
+static void *
+p_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       return __next(m, v, pos, TRACE_PIDS);
+}
+
+static void *
+np_next(struct seq_file *m, void *v, loff_t *pos)
+{
+       return __next(m, v, pos, TRACE_NO_PIDS);
+}
+
+static void *__start(struct seq_file *m, loff_t *pos, int type)
        __acquires(RCU)
 {
        struct trace_pid_list *pid_list;
@@ -1036,7 +1098,10 @@ static void *p_start(struct seq_file *m, loff_t *pos)
        mutex_lock(&event_mutex);
        rcu_read_lock_sched();
 
-       pid_list = rcu_dereference_sched(tr->filtered_pids);
+       if (type == TRACE_PIDS)
+               pid_list = rcu_dereference_sched(tr->filtered_pids);
+       else
+               pid_list = rcu_dereference_sched(tr->filtered_no_pids);
 
        if (!pid_list)
                return NULL;
@@ -1044,6 +1109,18 @@ static void *p_start(struct seq_file *m, loff_t *pos)
        return trace_pid_start(pid_list, pos);
 }
 
+static void *p_start(struct seq_file *m, loff_t *pos)
+       __acquires(RCU)
+{
+       return __start(m, pos, TRACE_PIDS);
+}
+
+static void *np_start(struct seq_file *m, loff_t *pos)
+       __acquires(RCU)
+{
+       return __start(m, pos, TRACE_NO_PIDS);
+}
+
 static void p_stop(struct seq_file *m, void *p)
        __releases(RCU)
 {
@@ -1588,6 +1665,7 @@ static void ignore_task_cpu(void *data)
 {
        struct trace_array *tr = data;
        struct trace_pid_list *pid_list;
+       struct trace_pid_list *no_pid_list;
 
        /*
         * This function is called by on_each_cpu() while the
@@ -1595,18 +1673,50 @@ static void ignore_task_cpu(void *data)
         */
        pid_list = rcu_dereference_protected(tr->filtered_pids,
                                             mutex_is_locked(&event_mutex));
+       no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
+                                            mutex_is_locked(&event_mutex));
 
        this_cpu_write(tr->array_buffer.data->ignore_pid,
-                      trace_ignore_this_task(pid_list, NULL, current));
+                      trace_ignore_this_task(pid_list, no_pid_list, current));
+}
+
+static void register_pid_events(struct trace_array *tr)
+{
+       /*
+        * Register a probe that is called before all other probes
+        * to set ignore_pid if next or prev do not match.
+        * Register a probe this is called after all other probes
+        * to only keep ignore_pid set if next pid matches.
+        */
+       register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
+                                        tr, INT_MAX);
+       register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
+                                        tr, 0);
+
+       register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
+                                        tr, INT_MAX);
+       register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
+                                        tr, 0);
+
+       register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
+                                            tr, INT_MAX);
+       register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
+                                            tr, 0);
+
+       register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
+                                        tr, INT_MAX);
+       register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
+                                        tr, 0);
 }
 
 static ssize_t
-ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
-                      size_t cnt, loff_t *ppos)
+event_pid_write(struct file *filp, const char __user *ubuf,
+               size_t cnt, loff_t *ppos, int type)
 {
        struct seq_file *m = filp->private_data;
        struct trace_array *tr = m->private;
        struct trace_pid_list *filtered_pids = NULL;
+       struct trace_pid_list *other_pids = NULL;
        struct trace_pid_list *pid_list;
        struct trace_event_file *file;
        ssize_t ret;
@@ -1620,14 +1730,26 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
 
        mutex_lock(&event_mutex);
 
-       filtered_pids = rcu_dereference_protected(tr->filtered_pids,
-                                            lockdep_is_held(&event_mutex));
+       if (type == TRACE_PIDS) {
+               filtered_pids = rcu_dereference_protected(tr->filtered_pids,
+                                                         lockdep_is_held(&event_mutex));
+               other_pids = rcu_dereference_protected(tr->filtered_no_pids,
+                                                         lockdep_is_held(&event_mutex));
+       } else {
+               filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
+                                                         lockdep_is_held(&event_mutex));
+               other_pids = rcu_dereference_protected(tr->filtered_pids,
+                                                         lockdep_is_held(&event_mutex));
+       }
 
        ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
        if (ret < 0)
                goto out;
 
-       rcu_assign_pointer(tr->filtered_pids, pid_list);
+       if (type == TRACE_PIDS)
+               rcu_assign_pointer(tr->filtered_pids, pid_list);
+       else
+               rcu_assign_pointer(tr->filtered_no_pids, pid_list);
 
        list_for_each_entry(file, &tr->events, list) {
                set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
@@ -1636,32 +1758,8 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
        if (filtered_pids) {
                tracepoint_synchronize_unregister();
                trace_free_pid_list(filtered_pids);
-       } else if (pid_list) {
-               /*
-                * Register a probe that is called before all other probes
-                * to set ignore_pid if next or prev do not match.
-                * Register a probe this is called after all other probes
-                * to only keep ignore_pid set if next pid matches.
-                */
-               register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
-                                                tr, INT_MAX);
-               register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
-                                                tr, 0);
-
-               register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
-                                                tr, INT_MAX);
-               register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
-                                                tr, 0);
-
-               register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
-                                                    tr, INT_MAX);
-               register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
-                                                    tr, 0);
-
-               register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
-                                                tr, INT_MAX);
-               register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
-                                                tr, 0);
+       } else if (pid_list && !other_pids) {
+               register_pid_events(tr);
        }
 
        /*
@@ -1680,9 +1778,24 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
        return ret;
 }
 
+static ssize_t
+ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
+                      size_t cnt, loff_t *ppos)
+{
+       return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
+}
+
+static ssize_t
+ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
+                       size_t cnt, loff_t *ppos)
+{
+       return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
+}
+
 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
 static int ftrace_event_set_open(struct inode *inode, struct file *file);
 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
+static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
 static int ftrace_event_release(struct inode *inode, struct file *file);
 
 static const struct seq_operations show_event_seq_ops = {
@@ -1706,6 +1819,13 @@ static const struct seq_operations show_set_pid_seq_ops = {
        .stop = p_stop,
 };
 
+static const struct seq_operations show_set_no_pid_seq_ops = {
+       .start = np_start,
+       .next = np_next,
+       .show = trace_pid_show,
+       .stop = p_stop,
+};
+
 static const struct file_operations ftrace_avail_fops = {
        .open = ftrace_event_avail_open,
        .read = seq_read,
@@ -1729,6 +1849,14 @@ static const struct file_operations ftrace_set_event_pid_fops = {
        .release = ftrace_event_release,
 };
 
+static const struct file_operations ftrace_set_event_notrace_pid_fops = {
+       .open = ftrace_event_set_npid_open,
+       .read = seq_read,
+       .write = ftrace_event_npid_write,
+       .llseek = seq_lseek,
+       .release = ftrace_event_release,
+};
+
 static const struct file_operations ftrace_enable_fops = {
        .open = tracing_open_generic,
        .read = event_enable_read,
@@ -1858,7 +1986,28 @@ ftrace_event_set_pid_open(struct inode *inode, struct file *file)
 
        if ((file->f_mode & FMODE_WRITE) &&
            (file->f_flags & O_TRUNC))
-               ftrace_clear_event_pids(tr);
+               ftrace_clear_event_pids(tr, TRACE_PIDS);
+
+       ret = ftrace_event_open(inode, file, seq_ops);
+       if (ret < 0)
+               trace_array_put(tr);
+       return ret;
+}
+
+static int
+ftrace_event_set_npid_open(struct inode *inode, struct file *file)
+{
+       const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
+       struct trace_array *tr = inode->i_private;
+       int ret;
+
+       ret = tracing_check_open_get_tr(tr);
+       if (ret)
+               return ret;
+
+       if ((file->f_mode & FMODE_WRITE) &&
+           (file->f_flags & O_TRUNC))
+               ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
 
        ret = ftrace_event_open(inode, file, seq_ops);
        if (ret < 0)
@@ -3075,6 +3224,11 @@ create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
        if (!entry)
                pr_warn("Could not create tracefs 'set_event_pid' entry\n");
 
+       entry = tracefs_create_file("set_event_notrace_pid", 0644, parent,
+                                   tr, &ftrace_set_event_notrace_pid_fops);
+       if (!entry)
+               pr_warn("Could not create tracefs 'set_event_notrace_pid' entry\n");
+
        /* ring buffer internal formats */
        entry = trace_create_file("header_page", 0444, d_events,
                                  ring_buffer_print_page_header,
@@ -3158,7 +3312,7 @@ int event_trace_del_tracer(struct trace_array *tr)
        clear_event_triggers(tr);
 
        /* Clear the pid list */
-       __ftrace_clear_event_pids(tr);
+       __ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
 
        /* Disable any running events */
        __ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);