Merge tag 'trace-v5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 3 May 2021 18:19:54 +0000 (11:19 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 3 May 2021 18:19:54 +0000 (11:19 -0700)
Pull tracing updates from Steven Rostedt:
 "New feature:

   - A new "func-no-repeats" option in tracefs/options directory.

     When set the function tracer will detect if the current function
     being traced is the same as the previous one, and instead of
     recording it, it will keep track of the number of times that the
     function is repeated in a row. And when another function is
     recorded, it will write a new event that shows the function that
     repeated, the number of times it repeated and the time stamp of
     when the last repeated function occurred.

  Enhancements:

   - In order to implement the above "func-no-repeats" option, the ring
     buffer timestamp can now give the accurate timestamp of the event
     as it is being recorded, instead of having to record an absolute
     timestamp for all events. This helps the histogram code which no
     longer needs to waste ring buffer space.

   - New validation logic to make sure all trace events that access
     dereferenced pointers do so in a safe way, and will warn otherwise.

  Fixes:

   - No longer limit the PIDs of tasks that are recorded for
     "saved_cmdlines" to PID_MAX_DEFAULT (32768), as systemd now allows
     for a much larger range. This caused the mapping of PIDs to the
     task names to be dropped for all tasks with a PID greater than
     32768.

   - Change trace_clock_global() to never block. This caused a deadlock.

  Clean ups:

   - Typos, prototype fixes, and removing of duplicate or unused code.

   - Better management of ftrace_page allocations"

* tag 'trace-v5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (32 commits)
  tracing: Restructure trace_clock_global() to never block
  tracing: Map all PIDs to command lines
  ftrace: Reuse the output of the function tracer for func_repeats
  tracing: Add "func_no_repeats" option for function tracing
  tracing: Unify the logic for function tracing options
  tracing: Add method for recording "func_repeats" events
  tracing: Add "last_func_repeats" to struct trace_array
  tracing: Define new ftrace event "func_repeats"
  tracing: Define static void trace_print_time()
  ftrace: Simplify the calculation of page number for ftrace_page->records some more
  ftrace: Store the order of pages allocated in ftrace_page
  tracing: Remove unused argument from "ring_buffer_time_stamp()
  tracing: Remove duplicate struct declaration in trace_events.h
  tracing: Update create_system_filter() kernel-doc comment
  tracing: A minor cleanup for create_system_filter()
  kernel: trace: Mundane typo fixes in the file trace_events_filter.c
  tracing: Fix various typos in comments
  scripts/recordmcount.pl: Make vim and emacs indent the same
  scripts/recordmcount.pl: Make indent spacing consistent
  tracing: Add a verifier to check string pointers for trace events
  ...

1  2 
include/trace/events/io_uring.h
include/trace/events/rcu.h
init/main.c
kernel/trace/ftrace.c
kernel/trace/trace.c

index bd528176a3d5d99587b826afc58863efe6503946,ba78a5602cd19b0258ad04c17dbd3366dea00eee..abb8b24744fdb726da819afc2768f6615c6d959e
@@@ -49,7 -49,7 +49,7 @@@ TRACE_EVENT(io_uring_create
  );
  
  /**
-  * io_uring_register - called after a buffer/file/eventfd was succesfully
+  * io_uring_register - called after a buffer/file/eventfd was successfully
   *                                       registered for a ring
   *
   * @ctx:                      pointer to a ring context structure
@@@ -290,32 -290,29 +290,32 @@@ TRACE_EVENT(io_uring_fail_link
   * @ctx:              pointer to a ring context structure
   * @user_data:                user data associated with the request
   * @res:              result of the request
 + * @cflags:           completion flags
   *
   */
  TRACE_EVENT(io_uring_complete,
  
 -      TP_PROTO(void *ctx, u64 user_data, long res),
 +      TP_PROTO(void *ctx, u64 user_data, long res, unsigned cflags),
  
 -      TP_ARGS(ctx, user_data, res),
 +      TP_ARGS(ctx, user_data, res, cflags),
  
        TP_STRUCT__entry (
                __field(  void *,       ctx             )
                __field(  u64,          user_data       )
                __field(  long,         res             )
 +              __field(  unsigned,     cflags          )
        ),
  
        TP_fast_assign(
                __entry->ctx            = ctx;
                __entry->user_data      = user_data;
                __entry->res            = res;
 +              __entry->cflags         = cflags;
        ),
  
 -      TP_printk("ring %p, user_data 0x%llx, result %ld",
 +      TP_printk("ring %p, user_data 0x%llx, result %ld, cflags %x",
                          __entry->ctx, (unsigned long long)__entry->user_data,
 -                        __entry->res)
 +                        __entry->res, __entry->cflags)
  );
  
  
index c7711e9b6900545e9b389caef647dbf32f55cdc7,97177c10bf64713a2e5b36c5773f77b8dcfaefc1..6768b64bc738b03ffa8918587e2a0b4b8e0d2c6f
@@@ -48,7 -48,7 +48,7 @@@ TRACE_EVENT(rcu_utilization
   * RCU flavor, the grace-period number, and a string identifying the
   * grace-period-related event as follows:
   *
-  *    "AccReadyCB": CPU acclerates new callbacks to RCU_NEXT_READY_TAIL.
+  *    "AccReadyCB": CPU accelerates new callbacks to RCU_NEXT_READY_TAIL.
   *    "AccWaitCB": CPU accelerates new callbacks to RCU_WAIT_TAIL.
   *    "newreq": Request a new grace period.
   *    "start": Start a grace period.
@@@ -432,34 -432,6 +432,34 @@@ TRACE_EVENT_RCU(rcu_fqs
                  __entry->cpu, __entry->qsevent)
  );
  
 +/*
 + * Tracepoint for RCU stall events. Takes a string identifying the RCU flavor
 + * and a string identifying which function detected the RCU stall as follows:
 + *
 + *    "StallDetected": Scheduler-tick detects other CPU's stalls.
 + *    "SelfDetected": Scheduler-tick detects a current CPU's stall.
 + *    "ExpeditedStall": Expedited grace period detects stalls.
 + */
 +TRACE_EVENT(rcu_stall_warning,
 +
 +      TP_PROTO(const char *rcuname, const char *msg),
 +
 +      TP_ARGS(rcuname, msg),
 +
 +      TP_STRUCT__entry(
 +              __field(const char *, rcuname)
 +              __field(const char *, msg)
 +      ),
 +
 +      TP_fast_assign(
 +              __entry->rcuname = rcuname;
 +              __entry->msg = msg;
 +      ),
 +
 +      TP_printk("%s %s",
 +                __entry->rcuname, __entry->msg)
 +);
 +
  #endif /* #if defined(CONFIG_TREE_RCU) */
  
  /*
diff --combined init/main.c
index dd11bfd10eadeaf9e607d23c6af85e490e62b233,407976d8669e5ea9d8d2b70bf1e25e02aefd53a7..543fbe3060be3b23c021134f394e06c81e143469
@@@ -405,7 -405,7 +405,7 @@@ static int __init bootconfig_params(cha
        return 0;
  }
  
- static void __init setup_boot_config(const char *cmdline)
+ static void __init setup_boot_config(void)
  {
        static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
        const char *msg;
  
  #else
  
- static void __init setup_boot_config(const char *cmdline)
+ static void __init setup_boot_config(void)
  {
        /* Remove bootconfig data from initrd */
        get_boot_config_from_initrd(NULL, NULL);
@@@ -830,7 -830,6 +830,7 @@@ static void __init mm_init(void
        report_meminit();
        stack_depot_init();
        mem_init();
 +      mem_init_print_info();
        /* page_owner must be initialized after buddy is ready */
        page_ext_init_flatmem_late();
        kmem_cache_init();
        pgtable_init();
        debug_objects_mem_init();
        vmalloc_init();
 -      ioremap_huge_init();
        /* Should be run before the first non-init thread is created */
        init_espfix_bsp();
        /* Should be run after espfix64 is set up. */
        pti_init();
  }
  
 +#ifdef CONFIG_HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
 +DEFINE_STATIC_KEY_MAYBE_RO(CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT,
 +                         randomize_kstack_offset);
 +DEFINE_PER_CPU(u32, kstack_offset);
 +
 +static int __init early_randomize_kstack_offset(char *buf)
 +{
 +      int ret;
 +      bool bool_result;
 +
 +      ret = kstrtobool(buf, &bool_result);
 +      if (ret)
 +              return ret;
 +
 +      if (bool_result)
 +              static_branch_enable(&randomize_kstack_offset);
 +      else
 +              static_branch_disable(&randomize_kstack_offset);
 +      return 0;
 +}
 +early_param("randomize_kstack_offset", early_randomize_kstack_offset);
 +#endif
 +
  void __init __weak arch_call_rest_init(void)
  {
        rest_init();
@@@ -895,7 -872,7 +895,7 @@@ asmlinkage __visible void __init __no_s
        pr_notice("%s", linux_banner);
        early_security_init();
        setup_arch(&command_line);
-       setup_boot_config(command_line);
+       setup_boot_config();
        setup_command_line(command_line);
        setup_nr_cpu_ids();
        setup_per_cpu_areas();
diff --combined kernel/trace/ftrace.c
index 3ba52d4e1314228147112047aa497709dbe6ba59,057e962ca5ce412c8594a7f7269fbbdd7373a28e..792b5589ba645bafb707c2ce9fe4d75a108c90d4
@@@ -1045,7 -1045,7 +1045,7 @@@ struct ftrace_ops global_ops = 
  };
  
  /*
-  * Used by the stack undwinder to know about dynamic ftrace trampolines.
+  * Used by the stack unwinder to know about dynamic ftrace trampolines.
   */
  struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr)
  {
@@@ -1090,7 -1090,7 +1090,7 @@@ struct ftrace_page 
        struct ftrace_page      *next;
        struct dyn_ftrace       *records;
        int                     index;
-       int                     size;
+       int                     order;
  };
  
  #define ENTRY_SIZE sizeof(struct dyn_ftrace)
@@@ -3000,7 -3000,7 +3000,7 @@@ int ftrace_shutdown(struct ftrace_ops *
                 * When the kernel is preemptive, tasks can be preempted
                 * while on a ftrace trampoline. Just scheduling a task on
                 * a CPU is not good enough to flush them. Calling
-                * synchornize_rcu_tasks() will wait for those tasks to
+                * synchronize_rcu_tasks() will wait for those tasks to
                 * execute and either schedule voluntarily or enter user space.
                 */
                if (IS_ENABLED(CONFIG_PREEMPTION))
@@@ -3156,15 -3156,9 +3156,9 @@@ static int ftrace_allocate_records(stru
        if (WARN_ON(!count))
                return -EINVAL;
  
+       /* We want to fill as much as possible, with no empty pages */
        pages = DIV_ROUND_UP(count, ENTRIES_PER_PAGE);
-       order = get_count_order(pages);
-       /*
-        * We want to fill as much as possible. No more than a page
-        * may be empty.
-        */
-       if (!is_power_of_2(pages))
-               order--;
+       order = fls(pages) - 1;
  
   again:
        pg->records = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, order);
        ftrace_number_of_groups++;
  
        cnt = (PAGE_SIZE << order) / ENTRY_SIZE;
-       pg->size = cnt;
+       pg->order = order;
  
        if (cnt > count)
                cnt = count;
@@@ -3194,7 -3188,6 +3188,6 @@@ ftrace_allocate_pages(unsigned long num
  {
        struct ftrace_page *start_pg;
        struct ftrace_page *pg;
-       int order;
        int cnt;
  
        if (!num_to_init)
   free_pages:
        pg = start_pg;
        while (pg) {
-               order = get_count_order(pg->size / ENTRIES_PER_PAGE);
-               if (order >= 0)
-                       free_pages((unsigned long)pg->records, order);
+               if (pg->records) {
+                       free_pages((unsigned long)pg->records, pg->order);
+                       ftrace_number_of_pages -= 1 << pg->order;
+               }
                start_pg = pg->next;
                kfree(pg);
                pg = start_pg;
-               ftrace_number_of_pages -= 1 << order;
                ftrace_number_of_groups--;
        }
        pr_info("ftrace: FAILED to allocate memory for functions\n");
@@@ -5046,20 -5039,6 +5039,20 @@@ struct ftrace_direct_func *ftrace_find_
        return NULL;
  }
  
 +static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr)
 +{
 +      struct ftrace_direct_func *direct;
 +
 +      direct = kmalloc(sizeof(*direct), GFP_KERNEL);
 +      if (!direct)
 +              return NULL;
 +      direct->addr = addr;
 +      direct->count = 0;
 +      list_add_rcu(&direct->next, &ftrace_direct_funcs);
 +      ftrace_direct_func_count++;
 +      return direct;
 +}
 +
  /**
   * register_ftrace_direct - Call a custom trampoline directly
   * @ip: The address of the nop at the beginning of a function
@@@ -5135,11 -5114,15 +5128,11 @@@ int register_ftrace_direct(unsigned lon
  
        direct = ftrace_find_direct_func(addr);
        if (!direct) {
 -              direct = kmalloc(sizeof(*direct), GFP_KERNEL);
 +              direct = ftrace_alloc_direct_func(addr);
                if (!direct) {
                        kfree(entry);
                        goto out_unlock;
                }
 -              direct->addr = addr;
 -              direct->count = 0;
 -              list_add_rcu(&direct->next, &ftrace_direct_funcs);
 -              ftrace_direct_func_count++;
        }
  
        entry->ip = ip;
@@@ -5340,7 -5323,6 +5333,7 @@@ int __weak ftrace_modify_direct_caller(
  int modify_ftrace_direct(unsigned long ip,
                         unsigned long old_addr, unsigned long new_addr)
  {
 +      struct ftrace_direct_func *direct, *new_direct = NULL;
        struct ftrace_func_entry *entry;
        struct dyn_ftrace *rec;
        int ret = -ENODEV;
        if (entry->direct != old_addr)
                goto out_unlock;
  
 +      direct = ftrace_find_direct_func(old_addr);
 +      if (WARN_ON(!direct))
 +              goto out_unlock;
 +      if (direct->count > 1) {
 +              ret = -ENOMEM;
 +              new_direct = ftrace_alloc_direct_func(new_addr);
 +              if (!new_direct)
 +                      goto out_unlock;
 +              direct->count--;
 +              new_direct->count++;
 +      } else {
 +              direct->addr = new_addr;
 +      }
 +
        /*
         * If there's no other ftrace callback on the rec->ip location,
         * then it can be changed directly by the architecture.
                ret = 0;
        }
  
 +      if (unlikely(ret && new_direct)) {
 +              direct->count++;
 +              list_del_rcu(&new_direct->next);
 +              synchronize_rcu_tasks();
 +              kfree(new_direct);
 +              ftrace_direct_func_count--;
 +      }
 +
   out_unlock:
        mutex_unlock(&ftrace_lock);
        mutex_unlock(&direct_mutex);
@@@ -5407,7 -5367,7 +5400,7 @@@ EXPORT_SYMBOL_GPL(modify_ftrace_direct)
   * @reset - non zero to reset all filters before applying this filter.
   *
   * Filters denote which functions should be enabled when tracing is enabled
-  * If @ip is NULL, it failes to update filter.
+  * If @ip is NULL, it fails to update filter.
   */
  int ftrace_set_filter_ip(struct ftrace_ops *ops, unsigned long ip,
                         int remove, int reset)
@@@ -6221,6 -6181,7 +6214,7 @@@ static int ftrace_process_locs(struct m
        p = start;
        pg = start_pg;
        while (p < end) {
+               unsigned long end_offset;
                addr = ftrace_call_adjust(*p++);
                /*
                 * Some architecture linkers will pad between
                if (!addr)
                        continue;
  
-               if (pg->index == pg->size) {
+               end_offset = (pg->index+1) * sizeof(pg->records[0]);
+               if (end_offset > PAGE_SIZE << pg->order) {
                        /* We should have allocated enough */
                        if (WARN_ON(!pg->next))
                                break;
@@@ -6359,7 -6321,7 +6354,7 @@@ clear_mod_from_hash(struct ftrace_page 
        }
  }
  
- /* Clear any records from hashs */
+ /* Clear any records from hashes */
  static void clear_mod_from_hashes(struct ftrace_page *pg)
  {
        struct trace_array *tr;
@@@ -6400,7 -6362,6 +6395,6 @@@ void ftrace_release_mod(struct module *
        struct ftrace_page **last_pg;
        struct ftrace_page *tmp_page = NULL;
        struct ftrace_page *pg;
-       int order;
  
        mutex_lock(&ftrace_lock);
  
                /* Needs to be called outside of ftrace_lock */
                clear_mod_from_hashes(pg);
  
-               order = get_count_order(pg->size / ENTRIES_PER_PAGE);
-               if (order >= 0)
-                       free_pages((unsigned long)pg->records, order);
+               if (pg->records) {
+                       free_pages((unsigned long)pg->records, pg->order);
+                       ftrace_number_of_pages -= 1 << pg->order;
+               }
                tmp_page = pg->next;
                kfree(pg);
-               ftrace_number_of_pages -= 1 << order;
                ftrace_number_of_groups--;
        }
  }
@@@ -6774,7 -6735,6 +6768,6 @@@ void ftrace_free_mem(struct module *mod
        struct ftrace_mod_map *mod_map = NULL;
        struct ftrace_init_func *func, *func_next;
        struct list_head clear_hash;
-       int order;
  
        INIT_LIST_HEAD(&clear_hash);
  
                ftrace_update_tot_cnt--;
                if (!pg->index) {
                        *last_pg = pg->next;
-                       order = get_count_order(pg->size / ENTRIES_PER_PAGE);
-                       if (order >= 0)
-                               free_pages((unsigned long)pg->records, order);
-                       ftrace_number_of_pages -= 1 << order;
+                       if (pg->records) {
+                               free_pages((unsigned long)pg->records, pg->order);
+                               ftrace_number_of_pages -= 1 << pg->order;
+                       }
                        ftrace_number_of_groups--;
                        kfree(pg);
                        pg = container_of(last_pg, struct ftrace_page, next);
diff --combined kernel/trace/trace.c
index 915fe8790f045a247df2514c75277b5bfb58ef59,e28d08905124a1468b7d0acb875727231e8ca6c2..560e4c8d3825bd361db8a2dbf9ae939d3ef0a727
@@@ -514,7 -514,7 +514,7 @@@ void trace_free_pid_list(struct trace_p
   * @filtered_pids: The list of pids to check
   * @search_pid: The PID to find in @filtered_pids
   *
-  * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
+  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
   */
  bool
  trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
@@@ -545,7 -545,7 +545,7 @@@ trace_ignore_this_task(struct trace_pid
                       struct task_struct *task)
  {
        /*
-        * If filterd_no_pids is not empty, and the task's pid is listed
+        * If filtered_no_pids is not empty, and the task's pid is listed
         * in filtered_no_pids, then return true.
         * Otherwise, if filtered_pids is empty, that means we can
         * trace all tasks. If it has content, then only trace pids
@@@ -612,7 -612,7 +612,7 @@@ void *trace_pid_next(struct trace_pid_l
  
        (*pos)++;
  
-       /* pid already is +1 of the actual prevous bit */
+       /* pid already is +1 of the actual previous bit */
        pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
  
        /* Return pid + 1 to allow zero to be represented */
@@@ -771,7 -771,7 +771,7 @@@ static u64 buffer_ftrace_now(struct arr
        if (!buf->buffer)
                return trace_clock_local();
  
-       ts = ring_buffer_time_stamp(buf->buffer, cpu);
+       ts = ring_buffer_time_stamp(buf->buffer);
        ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
  
        return ts;
@@@ -834,7 -834,7 +834,7 @@@ DEFINE_MUTEX(trace_types_lock)
   * The content of events may become garbage if we allow other process consumes
   * these events concurrently:
   *   A) the page of the consumed events may become a normal page
-  *      (not reader page) in ring buffer, and this page will be rewrited
+  *      (not reader page) in ring buffer, and this page will be rewritten
   *      by events producer.
   *   B) The page of the consumed events may become a page for splice_read,
   *      and this page will be returned to system.
@@@ -1520,7 -1520,7 +1520,7 @@@ unsigned long nsecs_to_usecs(unsigned l
  #undef C
  #define C(a, b) b
  
- /* These must match the bit postions in trace_iterator_flags */
+ /* These must match the bit positions in trace_iterator_flags */
  static const char *trace_options[] = {
        TRACE_FLAGS
        NULL
@@@ -2390,14 -2390,13 +2390,13 @@@ static void tracing_stop_tr(struct trac
  
  static int trace_save_cmdline(struct task_struct *tsk)
  {
-       unsigned pid, idx;
+       unsigned tpid, idx;
  
        /* treat recording of idle task as a success */
        if (!tsk->pid)
                return 1;
  
-       if (unlikely(tsk->pid > PID_MAX_DEFAULT))
-               return 0;
+       tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
  
        /*
         * It's not the end of the world if we don't get
        if (!arch_spin_trylock(&trace_cmdline_lock))
                return 0;
  
-       idx = savedcmd->map_pid_to_cmdline[tsk->pid];
+       idx = savedcmd->map_pid_to_cmdline[tpid];
        if (idx == NO_CMDLINE_MAP) {
                idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
  
-               /*
-                * Check whether the cmdline buffer at idx has a pid
-                * mapped. We are going to overwrite that entry so we
-                * need to clear the map_pid_to_cmdline. Otherwise we
-                * would read the new comm for the old pid.
-                */
-               pid = savedcmd->map_cmdline_to_pid[idx];
-               if (pid != NO_CMDLINE_MAP)
-                       savedcmd->map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
-               savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
-               savedcmd->map_pid_to_cmdline[tsk->pid] = idx;
+               savedcmd->map_pid_to_cmdline[tpid] = idx;
                savedcmd->cmdline_idx = idx;
        }
  
+       savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
        set_cmdline(idx, tsk->comm);
  
        arch_spin_unlock(&trace_cmdline_lock);
  static void __trace_find_cmdline(int pid, char comm[])
  {
        unsigned map;
+       int tpid;
  
        if (!pid) {
                strcpy(comm, "<idle>");
                return;
        }
  
-       if (pid > PID_MAX_DEFAULT) {
-               strcpy(comm, "<...>");
-               return;
+       tpid = pid & (PID_MAX_DEFAULT - 1);
+       map = savedcmd->map_pid_to_cmdline[tpid];
+       if (map != NO_CMDLINE_MAP) {
+               tpid = savedcmd->map_cmdline_to_pid[map];
+               if (tpid == pid) {
+                       strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
+                       return;
+               }
        }
-       map = savedcmd->map_pid_to_cmdline[pid];
-       if (map != NO_CMDLINE_MAP)
-               strlcpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
-       else
-               strcpy(comm, "<...>");
+       strcpy(comm, "<...>");
  }
  
  void trace_find_cmdline(int pid, char comm[])
@@@ -2737,12 -2726,13 +2726,13 @@@ trace_event_buffer_lock_reserve(struct 
                          unsigned int trace_ctx)
  {
        struct ring_buffer_event *entry;
+       struct trace_array *tr = trace_file->tr;
        int val;
  
-       *current_rb = trace_file->tr->array_buffer.buffer;
+       *current_rb = tr->array_buffer.buffer;
  
-       if (!ring_buffer_time_stamp_abs(*current_rb) && (trace_file->flags &
-            (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
+       if (!tr->no_filter_buffering_ref &&
+           (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED)) &&
            (entry = this_cpu_read(trace_buffered_event))) {
                /* Try to use the per cpu buffer first */
                val = this_cpu_inc_return(trace_buffered_event_cnt);
@@@ -3116,6 -3106,40 +3106,40 @@@ static void ftrace_trace_userstack(stru
  
  #endif /* CONFIG_STACKTRACE */
  
+ static inline void
+ func_repeats_set_delta_ts(struct func_repeats_entry *entry,
+                         unsigned long long delta)
+ {
+       entry->bottom_delta_ts = delta & U32_MAX;
+       entry->top_delta_ts = (delta >> 32);
+ }
+ void trace_last_func_repeats(struct trace_array *tr,
+                            struct trace_func_repeats *last_info,
+                            unsigned int trace_ctx)
+ {
+       struct trace_buffer *buffer = tr->array_buffer.buffer;
+       struct func_repeats_entry *entry;
+       struct ring_buffer_event *event;
+       u64 delta;
+       event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
+                                           sizeof(*entry), trace_ctx);
+       if (!event)
+               return;
+       delta = ring_buffer_event_time_stamp(buffer, event) -
+               last_info->ts_last_call;
+       entry = ring_buffer_event_data(event);
+       entry->ip = last_info->ip;
+       entry->parent_ip = last_info->parent_ip;
+       entry->count = last_info->count;
+       func_repeats_set_delta_ts(entry, delta);
+       __buffer_unlock_commit(buffer, event);
+ }
  /* created for use with alloc_percpu */
  struct trace_buffer_struct {
        int nesting;
@@@ -3368,7 -3392,7 +3392,7 @@@ int trace_array_vprintk(struct trace_ar
   * buffer (use trace_printk() for that), as writing into the top level
   * buffer should only have events that can be individually disabled.
   * trace_printk() is only used for debugging a kernel, and should not
-  * be ever encorporated in normal use.
+  * be ever incorporated in normal use.
   *
   * trace_array_printk() can be used, as it will not add noise to the
   * top level tracing buffer.
@@@ -3545,11 -3569,7 +3569,11 @@@ static char *trace_iter_expand_format(s
  {
        char *tmp;
  
 -      if (iter->fmt == static_fmt_buf)
 +      /*
 +       * iter->tr is NULL when used with tp_printk, which makes
 +       * this get called where it is not safe to call krealloc().
 +       */
 +      if (!iter->tr || iter->fmt == static_fmt_buf)
                return NULL;
  
        tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
        return tmp;
  }
  
+ /* Returns true if the string is safe to dereference from an event */
+ static bool trace_safe_str(struct trace_iterator *iter, const char *str)
+ {
+       unsigned long addr = (unsigned long)str;
+       struct trace_event *trace_event;
+       struct trace_event_call *event;
+       /* OK if part of the event data */
+       if ((addr >= (unsigned long)iter->ent) &&
+           (addr < (unsigned long)iter->ent + iter->ent_size))
+               return true;
+       /* OK if part of the temp seq buffer */
+       if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
+           (addr < (unsigned long)iter->tmp_seq.buffer + PAGE_SIZE))
+               return true;
+       /* Core rodata can not be freed */
+       if (is_kernel_rodata(addr))
+               return true;
+       if (trace_is_tracepoint_string(str))
+               return true;
+       /*
+        * Now this could be a module event, referencing core module
+        * data, which is OK.
+        */
+       if (!iter->ent)
+               return false;
+       trace_event = ftrace_find_event(iter->ent->type);
+       if (!trace_event)
+               return false;
+       event = container_of(trace_event, struct trace_event_call, event);
+       if (!event->mod)
+               return false;
+       /* Would rather have rodata, but this will suffice */
+       if (within_module_core(addr, event->mod))
+               return true;
+       return false;
+ }
+ static const char *show_buffer(struct trace_seq *s)
+ {
+       struct seq_buf *seq = &s->seq;
+       seq_buf_terminate(seq);
+       return seq->buffer;
+ }
+ static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
+ static int test_can_verify_check(const char *fmt, ...)
+ {
+       char buf[16];
+       va_list ap;
+       int ret;
+       /*
+        * The verifier is dependent on vsnprintf() modifies the va_list
+        * passed to it, where it is sent as a reference. Some architectures
+        * (like x86_32) passes it by value, which means that vsnprintf()
+        * does not modify the va_list passed to it, and the verifier
+        * would then need to be able to understand all the values that
+        * vsnprintf can use. If it is passed by value, then the verifier
+        * is disabled.
+        */
+       va_start(ap, fmt);
+       vsnprintf(buf, 16, "%d", ap);
+       ret = va_arg(ap, int);
+       va_end(ap);
+       return ret;
+ }
+ static void test_can_verify(void)
+ {
+       if (!test_can_verify_check("%d %d", 0, 1)) {
+               pr_info("trace event string verifier disabled\n");
+               static_branch_inc(&trace_no_verify);
+       }
+ }
+ /**
+  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
+  * @iter: The iterator that holds the seq buffer and the event being printed
+  * @fmt: The format used to print the event
+  * @ap: The va_list holding the data to print from @fmt.
+  *
+  * This writes the data into the @iter->seq buffer using the data from
+  * @fmt and @ap. If the format has a %s, then the source of the string
+  * is examined to make sure it is safe to print, otherwise it will
+  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
+  * pointer.
+  */
+ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
+                        va_list ap)
+ {
+       const char *p = fmt;
+       const char *str;
+       int i, j;
+       if (WARN_ON_ONCE(!fmt))
+               return;
+       if (static_branch_unlikely(&trace_no_verify))
+               goto print;
+       /* Don't bother checking when doing a ftrace_dump() */
+       if (iter->fmt == static_fmt_buf)
+               goto print;
+       while (*p) {
+               j = 0;
+               /* We only care about %s and variants */
+               for (i = 0; p[i]; i++) {
+                       if (i + 1 >= iter->fmt_size) {
+                               /*
+                                * If we can't expand the copy buffer,
+                                * just print it.
+                                */
+                               if (!trace_iter_expand_format(iter))
+                                       goto print;
+                       }
+                       if (p[i] == '\\' && p[i+1]) {
+                               i++;
+                               continue;
+                       }
+                       if (p[i] == '%') {
+                               /* Need to test cases like %08.*s */
+                               for (j = 1; p[i+j]; j++) {
+                                       if (isdigit(p[i+j]) ||
+                                           p[i+j] == '*' ||
+                                           p[i+j] == '.')
+                                               continue;
+                                       break;
+                               }
+                               if (p[i+j] == 's')
+                                       break;
+                       }
+                       j = 0;
+               }
+               /* If no %s found then just print normally */
+               if (!p[i])
+                       break;
+               /* Copy up to the %s, and print that */
+               strncpy(iter->fmt, p, i);
+               iter->fmt[i] = '\0';
+               trace_seq_vprintf(&iter->seq, iter->fmt, ap);
+               /* The ap now points to the string data of the %s */
+               str = va_arg(ap, const char *);
+               /*
+                * If you hit this warning, it is likely that the
+                * trace event in question used %s on a string that
+                * was saved at the time of the event, but may not be
+                * around when the trace is read. Use __string(),
+                * __assign_str() and __get_str() helpers in the TRACE_EVENT()
+                * instead. See samples/trace_events/trace-events-sample.h
+                * for reference.
+                */
+               if (WARN_ONCE(!trace_safe_str(iter, str),
+                             "fmt: '%s' current_buffer: '%s'",
+                             fmt, show_buffer(&iter->seq))) {
+                       int ret;
+                       /* Try to safely read the string */
+                       ret = strncpy_from_kernel_nofault(iter->fmt, str,
+                                                         iter->fmt_size);
+                       if (ret < 0)
+                               trace_seq_printf(&iter->seq, "(0x%px)", str);
+                       else
+                               trace_seq_printf(&iter->seq, "(0x%px:%s)",
+                                                str, iter->fmt);
+                       str = "[UNSAFE-MEMORY]";
+                       strcpy(iter->fmt, "%s");
+               } else {
+                       strncpy(iter->fmt, p + i, j + 1);
+                       iter->fmt[j+1] = '\0';
+               }
+               trace_seq_printf(&iter->seq, iter->fmt, str);
+               p += i + j + 1;
+       }
+  print:
+       if (*p)
+               trace_seq_vprintf(&iter->seq, p, ap);
+ }
  const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
  {
        const char *p, *new_fmt;
        if (WARN_ON_ONCE(!fmt))
                return fmt;
  
 -      if (iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
 +      if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
                return fmt;
  
        p = fmt;
@@@ -4832,7 -5050,7 +5054,7 @@@ tracing_cpumask_write(struct file *filp
        cpumask_var_t tracing_cpumask_new;
        int err;
  
 -      if (!alloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
 +      if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
                return -ENOMEM;
  
        err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
@@@ -6768,7 -6986,7 +6990,7 @@@ tracing_mark_write(struct file *filp, c
        if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
                /* do not add \n before testing triggers, but add \0 */
                entry->buf[cnt] = '\0';
-               tt = event_triggers_call(tr->trace_marker_file, entry, event);
+               tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
        }
  
        if (entry->buf[cnt - 1] != '\n') {
@@@ -6976,31 -7194,34 +7198,34 @@@ static int tracing_time_stamp_mode_open
        return ret;
  }
  
- int tracing_set_time_stamp_abs(struct trace_array *tr, bool abs)
+ u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
+ {
+       if (rbe == this_cpu_read(trace_buffered_event))
+               return ring_buffer_time_stamp(buffer);
+       return ring_buffer_event_time_stamp(buffer, rbe);
+ }
+ /*
+  * Set or disable using the per CPU trace_buffer_event when possible.
+  */
+ int tracing_set_filter_buffering(struct trace_array *tr, bool set)
  {
        int ret = 0;
  
        mutex_lock(&trace_types_lock);
  
-       if (abs && tr->time_stamp_abs_ref++)
+       if (set && tr->no_filter_buffering_ref++)
                goto out;
  
-       if (!abs) {
-               if (WARN_ON_ONCE(!tr->time_stamp_abs_ref)) {
+       if (!set) {
+               if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
                        ret = -EINVAL;
                        goto out;
                }
  
-               if (--tr->time_stamp_abs_ref)
-                       goto out;
+               --tr->no_filter_buffering_ref;
        }
-       ring_buffer_set_time_stamp_abs(tr->array_buffer.buffer, abs);
- #ifdef CONFIG_TRACER_MAX_TRACE
-       if (tr->max_buffer.buffer)
-               ring_buffer_set_time_stamp_abs(tr->max_buffer.buffer, abs);
- #endif
   out:
        mutex_unlock(&trace_types_lock);
  
@@@ -7336,11 -7557,11 +7561,11 @@@ static struct tracing_log_err *get_trac
   * @cmd: The tracing command that caused the error
   * @str: The string to position the caret at within @cmd
   *
-  * Finds the position of the first occurence of @str within @cmd.  The
+  * Finds the position of the first occurrence of @str within @cmd.  The
   * return value can be passed to tracing_log_err() for caret placement
   * within @cmd.
   *
-  * Returns the index within @cmd of the first occurence of @str or 0
+  * Returns the index within @cmd of the first occurrence of @str or 0
   * if @str was not found.
   */
  unsigned int err_pos(char *cmd, const char *str)
@@@ -7890,7 -8111,7 +8115,7 @@@ tracing_stats_read(struct file *filp, c
                trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
                                                                t, usec_rem);
  
-               t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer, cpu));
+               t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
                usec_rem = do_div(t, USEC_PER_SEC);
                trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
        } else {
                                ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
  
                trace_seq_printf(s, "now ts: %llu\n",
-                               ring_buffer_time_stamp(trace_buf->buffer, cpu));
+                               ring_buffer_time_stamp(trace_buf->buffer));
        }
  
        cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
@@@ -8906,6 -9127,7 +9131,7 @@@ static int __remove_instance(struct tra
        ftrace_clear_pids(tr);
        ftrace_destroy_function_files(tr);
        tracefs_remove(tr->dir);
+       free_percpu(tr->last_func_repeats);
        free_trace_buffers(tr);
  
        for (i = 0; i < tr->nr_topts; i++) {
@@@ -9123,7 -9345,7 +9349,7 @@@ int tracing_init_dentry(void
         * As there may still be users that expect the tracing
         * files to exist in debugfs/tracing, we must automount
         * the tracefs file system there, so older tools still
-        * work with the newer kerenl.
+        * work with the newer kernel.
         */
        tr->dir = debugfs_create_automount("tracing", NULL,
                                           trace_automount, NULL);
@@@ -9676,6 -9898,8 +9902,8 @@@ __init static int tracer_alloc_buffers(
  
        register_snapshot_cmd();
  
+       test_can_verify();
        return 0;
  
  out_free_savedcmd:
@@@ -9696,7 -9920,7 +9924,7 @@@ void __init early_trace_init(void
  {
        if (tracepoint_printk) {
                tracepoint_print_iter =
 -                      kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
 +                      kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
                if (MEM_FAIL(!tracepoint_print_iter,
                             "Failed to allocate trace iterator\n"))
                        tracepoint_printk = 0;