Merge tag 'trace-v5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 18 Jul 2019 18:51:00 +0000 (11:51 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 18 Jul 2019 18:51:00 +0000 (11:51 -0700)
Pull tracing updates from Steven Rostedt:
 "The main changes in this release include:

   - Add user space specific memory reading for kprobes

   - Allow kprobes to be executed earlier in boot

  The rest are mostly just various clean ups and small fixes"

* tag 'trace-v5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace: (33 commits)
  tracing: Make trace_get_fields() global
  tracing: Let filter_assign_type() detect FILTER_PTR_STRING
  tracing: Pass type into tracing_generic_entry_update()
  ftrace/selftest: Test if set_event/ftrace_pid exists before writing
  ftrace/selftests: Return the skip code when tracing directory not configured in kernel
  tracing/kprobe: Check registered state using kprobe
  tracing/probe: Add trace_event_call accesses APIs
  tracing/probe: Add probe event name and group name accesses APIs
  tracing/probe: Add trace flag access APIs for trace_probe
  tracing/probe: Add trace_event_file access APIs for trace_probe
  tracing/probe: Add trace_event_call register API for trace_probe
  tracing/probe: Add trace_probe init and free functions
  tracing/uprobe: Set print format when parsing command
  tracing/kprobe: Set print format right after parsed command
  kprobes: Fix to init kprobes in subsys_initcall
  tracepoint: Use struct_size() in kmalloc()
  ring-buffer: Remove HAVE_64BIT_ALIGNED_ACCESS
  ftrace: Enable trampoline when rec count returns back to one
  tracing/kprobe: Do not run kprobe boot tests if kprobe_event is on cmdline
  tracing: Make a separate config for trace event self tests
  ...

33 files changed:
Documentation/admin-guide/kernel-parameters.txt
Documentation/trace/kprobetrace.rst
Documentation/trace/uprobetracer.rst
arch/Kconfig
arch/x86/include/asm/uaccess.h
arch/x86/kernel/ftrace.c
include/linux/ftrace.h
include/linux/trace_events.h
include/linux/uaccess.h
kernel/kprobes.c
kernel/trace/Kconfig
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace_event_perf.c
kernel/trace/trace_events.c
kernel/trace/trace_events_filter.c
kernel/trace/trace_kprobe.c
kernel/trace/trace_probe.c
kernel/trace/trace_probe.h
kernel/trace/trace_probe_tmpl.h
kernel/trace/trace_uprobe.c
kernel/tracepoint.c
mm/maccess.c
tools/perf/Documentation/perf-probe.txt
tools/perf/util/probe-event.c
tools/perf/util/probe-event.h
tools/perf/util/probe-file.c
tools/perf/util/probe-file.h
tools/perf/util/probe-finder.c
tools/testing/selftests/ftrace/ftracetest
tools/testing/selftests/ftrace/test.d/functions
tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc [new file with mode: 0644]

index a5f4004e8705ec5286ca61474b9d29cf37470180..f0461456d91033528ea0e7137b0937144a530ec7 100644 (file)
                        Built with CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y,
                        the default is off.
 
+       kprobe_event=[probe-list]
+                       [FTRACE] Add kprobe events and enable at boot time.
+                       The probe-list is a semicolon delimited list of probe
+                       definitions. Each definition is same as kprobe_events
+                       interface, but the parameters are comma delimited.
+                       For example, to add a kprobe event on vfs_read with
+                       arg1 and arg2, add to the command line;
+
+                             kprobe_event=p,vfs_read,$arg1,$arg2
+
+                       See also Documentation/trace/kprobetrace.rst "Kernel
+                       Boot Parameter" section.
+
        kpti=           [ARM64] Control page table isolation of user
                        and kernel address spaces.
                        Default: enabled on cores which need mitigation.
index 7d2b0178d3f31dd60f4450cb831ad0b5cfe4c56d..fbb314bfa11270af97e877930af14c0262958815 100644 (file)
@@ -51,15 +51,17 @@ Synopsis of kprobe_events
   $argN                : Fetch the Nth function argument. (N >= 1) (\*1)
   $retval      : Fetch return value.(\*2)
   $comm                : Fetch current task comm.
-  +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(\*3)
+  +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*3)(\*4)
   NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
   FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
                  (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
-                 (x8/x16/x32/x64), "string" and bitfield are supported.
+                 (x8/x16/x32/x64), "string", "ustring" and bitfield
+                 are supported.
 
   (\*1) only for the probe on function entry (offs == 0).
   (\*2) only for return probe.
   (\*3) this is useful for fetching a field of data structures.
+  (\*4) "u" means user-space dereference. See :ref:`user_mem_access`.
 
 Types
 -----
@@ -77,7 +79,8 @@ apply it to registers/stack-entries etc. (for example, '$stack1:x8[8]' is
 wrong, but '+8($stack):x8[8]' is OK.)
 String type is a special type, which fetches a "null-terminated" string from
 kernel space. This means it will fail and store NULL if the string container
-has been paged out.
+has been paged out. "ustring" type is an alternative of string for user-space.
+See :ref:`user_mem_access` for more info..
 The string array type is a bit different from other types. For other base
 types, <base-type>[1] is equal to <base-type> (e.g. +0(%di):x32[1] is same
 as +0(%di):x32.) But string[1] is not equal to string. The string type itself
@@ -92,6 +95,25 @@ Symbol type('symbol') is an alias of u32 or u64 type (depends on BITS_PER_LONG)
 which shows given pointer in "symbol+offset" style.
 For $comm, the default type is "string"; any other type is invalid.
 
+.. _user_mem_access:
+User Memory Access
+------------------
+Kprobe events supports user-space memory access. For that purpose, you can use
+either user-space dereference syntax or 'ustring' type.
+
+The user-space dereference syntax allows you to access a field of a data
+structure in user-space. This is done by adding the "u" prefix to the
+dereference syntax. For example, +u4(%si) means it will read memory from the
+address in the register %si offset by 4, and the memory is expected to be in
+user-space. You can use this for strings too, e.g. +u0(%si):string will read
+a string from the address in the register %si that is expected to be in user-
+space. 'ustring' is a shortcut way of performing the same task. That is,
++0(%si):ustring is equivalent to +u0(%si):string.
+
+Note that kprobe-event provides the user-memory access syntax but it doesn't
+use it transparently. This means if you use normal dereference or string type
+for user memory, it might fail, and may always fail on some archs. The user
+has to carefully check if the target data is in kernel or user space.
 
 Per-Probe Event Filtering
 -------------------------
@@ -124,6 +146,20 @@ You can check the total number of probe hits and probe miss-hits via
 The first column is event name, the second is the number of probe hits,
 the third is the number of probe miss-hits.
 
+Kernel Boot Parameter
+---------------------
+You can add and enable new kprobe events when booting up the kernel by
+"kprobe_event=" parameter. The parameter accepts a semicolon-delimited
+kprobe events, which format is similar to the kprobe_events.
+The difference is that the probe definition parameters are comma-delimited
+instead of space. For example, adding myprobe event on do_sys_open like below
+
+  p:myprobe do_sys_open dfd=%ax filename=%dx flags=%cx mode=+4($stack)
+
+should be below for kernel boot parameter (just replace spaces with comma)
+
+  p:myprobe,do_sys_open,dfd=%ax,filename=%dx,flags=%cx,mode=+4($stack)
+
 
 Usage examples
 --------------
index 0b21305fabdc79d3f44db3cbcc0c942a5b83235a..6e75a6c5a2c86f29f44d10496abd3b9d344642c6 100644 (file)
@@ -42,16 +42,18 @@ Synopsis of uprobe_tracer
    @+OFFSET    : Fetch memory at OFFSET (OFFSET from same file as PATH)
    $stackN     : Fetch Nth entry of stack (N >= 0)
    $stack      : Fetch stack address.
-   $retval     : Fetch return value.(*)
+   $retval     : Fetch return value.(\*1)
    $comm       : Fetch current task comm.
-   +|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
+   +|-[u]OFFS(FETCHARG) : Fetch memory at FETCHARG +|- OFFS address.(\*2)(\*3)
    NAME=FETCHARG     : Set NAME as the argument name of FETCHARG.
    FETCHARG:TYPE     : Set TYPE as the type of FETCHARG. Currently, basic types
                       (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal types
                       (x8/x16/x32/x64), "string" and bitfield are supported.
 
-  (*) only for return probe.
-  (**) this is useful for fetching a field of data structures.
+  (\*1) only for return probe.
+  (\*2) this is useful for fetching a field of data structures.
+  (\*3) Unlike kprobe event, "u" prefix will just be ignored, becuse uprobe
+        events can access only user-space memory.
 
 Types
 -----
index e8d19c3cb91f226adf2a6444e61eca4c75b406c7..6dd1faab6ccb048f94f785a487fe3f31fcf9b417 100644 (file)
@@ -128,22 +128,6 @@ config UPROBES
            managed by the kernel and kept transparent to the probed
            application. )
 
-config HAVE_64BIT_ALIGNED_ACCESS
-       def_bool 64BIT && !HAVE_EFFICIENT_UNALIGNED_ACCESS
-       help
-         Some architectures require 64 bit accesses to be 64 bit
-         aligned, which also requires structs containing 64 bit values
-         to be 64 bit aligned too. This includes some 32 bit
-         architectures which can do 64 bit accesses, as well as 64 bit
-         architectures without unaligned access.
-
-         This symbol should be selected by an architecture if 64 bit
-         accesses are required to be 64 bit aligned in this way even
-         though it is not a 64 bit architecture.
-
-         See Documentation/unaligned-memory-access.txt for more
-         information on the topic of unaligned memory accesses.
-
 config HAVE_EFFICIENT_UNALIGNED_ACCESS
        bool
        help
index c82abd6e4ca39ad7e5d8c3ae454fc5d7a8671da3..9c4435307ff89b92dde662b5d0d4adb31cb9f220 100644 (file)
@@ -66,7 +66,9 @@ static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, un
 })
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
-# define WARN_ON_IN_IRQ()      WARN_ON_ONCE(!in_task())
+static inline bool pagefault_disabled(void);
+# define WARN_ON_IN_IRQ()      \
+       WARN_ON_ONCE(!in_task() && !pagefault_disabled())
 #else
 # define WARN_ON_IN_IRQ()
 #endif
index 4b73f5937f41dc2f021f29af9eb58ce694fc28cd..024c3053dbbab673d4d23f7e78726022b03620b5 100644 (file)
@@ -373,7 +373,7 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
        return add_break(rec->ip, old);
 }
 
-static int add_breakpoints(struct dyn_ftrace *rec, int enable)
+static int add_breakpoints(struct dyn_ftrace *rec, bool enable)
 {
        unsigned long ftrace_addr;
        int ret;
@@ -481,7 +481,7 @@ static int add_update_nop(struct dyn_ftrace *rec)
        return add_update_code(ip, new);
 }
 
-static int add_update(struct dyn_ftrace *rec, int enable)
+static int add_update(struct dyn_ftrace *rec, bool enable)
 {
        unsigned long ftrace_addr;
        int ret;
@@ -527,7 +527,7 @@ static int finish_update_nop(struct dyn_ftrace *rec)
        return ftrace_write(ip, new, 1);
 }
 
-static int finish_update(struct dyn_ftrace *rec, int enable)
+static int finish_update(struct dyn_ftrace *rec, bool enable)
 {
        unsigned long ftrace_addr;
        int ret;
index 25e2995d4a4c1f3f89bbb1e8f5fe9595126a4514..8a8cb3c401b269600489e453ce8c9cbc65bcd86b 100644 (file)
@@ -427,8 +427,8 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter);
             iter = ftrace_rec_iter_next(iter))
 
 
-int ftrace_update_record(struct dyn_ftrace *rec, int enable);
-int ftrace_test_record(struct dyn_ftrace *rec, int enable);
+int ftrace_update_record(struct dyn_ftrace *rec, bool enable);
+int ftrace_test_record(struct dyn_ftrace *rec, bool enable);
 void ftrace_run_stop_machine(int command);
 unsigned long ftrace_location(unsigned long ip);
 unsigned long ftrace_location_range(unsigned long start, unsigned long end);
index 8a62731673f768514a73c6d7442dd190efcace4a..5150436783e8e44e920a62a045e181e70fb98887 100644 (file)
@@ -142,6 +142,7 @@ enum print_line_t {
 enum print_line_t trace_handle_return(struct trace_seq *s);
 
 void tracing_generic_entry_update(struct trace_entry *entry,
+                                 unsigned short type,
                                  unsigned long flags,
                                  int pc);
 struct trace_event_file;
@@ -317,6 +318,14 @@ trace_event_name(struct trace_event_call *call)
                return call->name;
 }
 
+static inline struct list_head *
+trace_get_fields(struct trace_event_call *event_call)
+{
+       if (!event_call->class->get_fields)
+               return &event_call->class->fields;
+       return event_call->class->get_fields(event_call);
+}
+
 struct trace_array;
 struct trace_subsystem_dir;
 
index 2b70130af58578da68627927201efd1c5160900a..34a038563d979ed28855df8f2a130a5729434080 100644 (file)
@@ -203,7 +203,10 @@ static inline void pagefault_enable(void)
 /*
  * Is the pagefault handler disabled? If so, user access methods will not sleep.
  */
-#define pagefault_disabled() (current->pagefault_disabled != 0)
+static inline bool pagefault_disabled(void)
+{
+       return current->pagefault_disabled != 0;
+}
 
 /*
  * The pagefault handler is in general disabled by pagefault_disable() or
@@ -239,6 +242,18 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to,
 extern long probe_kernel_read(void *dst, const void *src, size_t size);
 extern long __probe_kernel_read(void *dst, const void *src, size_t size);
 
+/*
+ * probe_user_read(): safely attempt to read from a location in user space
+ * @dst: pointer to the buffer that shall take the data
+ * @src: address to read from
+ * @size: size of the data chunk
+ *
+ * Safely read from address @src to the buffer at @dst.  If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+extern long probe_user_read(void *dst, const void __user *src, size_t size);
+extern long __probe_user_read(void *dst, const void __user *src, size_t size);
+
 /*
  * probe_kernel_write(): safely attempt to write to a location
  * @dst: address to write to
@@ -252,6 +267,9 @@ extern long notrace probe_kernel_write(void *dst, const void *src, size_t size);
 extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size);
 
 extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count);
+extern long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
+                                    long count);
+extern long strnlen_unsafe_user(const void __user *unsafe_addr, long count);
 
 /**
  * probe_kernel_address(): safely attempt to read from a location
index 9f5433a52488c51f9bf86f49f3a0c17d6f973c61..9873fc627d61f14295545817547aa4b96f7a00eb 100644 (file)
@@ -2276,6 +2276,7 @@ static int __init init_kprobes(void)
                init_test_probes();
        return err;
 }
+subsys_initcall(init_kprobes);
 
 #ifdef CONFIG_DEBUG_FS
 static void report_probe(struct seq_file *pi, struct kprobe *p,
@@ -2588,5 +2589,3 @@ static int __init debugfs_kprobe_init(void)
 
 late_initcall(debugfs_kprobe_init);
 #endif /* CONFIG_DEBUG_FS */
-
-module_init(init_kprobes);
index 564e5fdb025ffbaa7c4b468ee08fb966ac13b1bd..98da8998c25ce406a2b0c9620f7295231120ae27 100644 (file)
@@ -597,9 +597,19 @@ config FTRACE_STARTUP_TEST
          functioning properly. It will do tests on all the configured
          tracers of ftrace.
 
+config EVENT_TRACE_STARTUP_TEST
+       bool "Run selftest on trace events"
+       depends on FTRACE_STARTUP_TEST
+       default y
+       help
+         This option performs a test on all trace events in the system.
+         It basically just enables each event and runs some code that
+         will trigger events (not necessarily the event it enables)
+         This may take some time run as there are a lot of events.
+
 config EVENT_TRACE_TEST_SYSCALLS
        bool "Run selftest on syscall events"
-       depends on FTRACE_STARTUP_TEST
+       depends on EVENT_TRACE_STARTUP_TEST
        help
         This option will also enable testing every syscall event.
         It only enables the event and disables it and runs various loads
index 576c41644e77cec3f8c18f8b04580320deec3f49..eca34503f178ece3f25a469ed3d73f186fd98c9e 100644 (file)
@@ -1622,6 +1622,11 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
        return  keep_regs;
 }
 
+static struct ftrace_ops *
+ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
+static struct ftrace_ops *
+ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
+
 static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
                                     int filter_hash,
                                     bool inc)
@@ -1750,15 +1755,17 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
                        }
 
                        /*
-                        * If the rec had TRAMP enabled, then it needs to
-                        * be cleared. As TRAMP can only be enabled iff
-                        * there is only a single ops attached to it.
-                        * In otherwords, always disable it on decrementing.
-                        * In the future, we may set it if rec count is
-                        * decremented to one, and the ops that is left
-                        * has a trampoline.
+                        * The TRAMP needs to be set only if rec count
+                        * is decremented to one, and the ops that is
+                        * left has a trampoline. As TRAMP can only be
+                        * enabled if there is only a single ops attached
+                        * to it.
                         */
-                       rec->flags &= ~FTRACE_FL_TRAMP;
+                       if (ftrace_rec_count(rec) == 1 &&
+                           ftrace_find_tramp_ops_any(rec))
+                               rec->flags |= FTRACE_FL_TRAMP;
+                       else
+                               rec->flags &= ~FTRACE_FL_TRAMP;
 
                        /*
                         * flags will be cleared in ftrace_check_record()
@@ -1768,7 +1775,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
                count++;
 
                /* Must match FTRACE_UPDATE_CALLS in ftrace_modify_all_code() */
-               update |= ftrace_test_record(rec, 1) != FTRACE_UPDATE_IGNORE;
+               update |= ftrace_test_record(rec, true) != FTRACE_UPDATE_IGNORE;
 
                /* Shortcut, if we handled all records, we are done. */
                if (!all && count == hash->count)
@@ -1951,11 +1958,6 @@ static void print_ip_ins(const char *fmt, const unsigned char *p)
                printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
 }
 
-static struct ftrace_ops *
-ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
-static struct ftrace_ops *
-ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
-
 enum ftrace_bug_type ftrace_bug_type;
 const void *ftrace_expected;
 
@@ -2047,7 +2049,7 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec)
        }
 }
 
-static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
+static int ftrace_check_record(struct dyn_ftrace *rec, bool enable, bool update)
 {
        unsigned long flag = 0UL;
 
@@ -2146,28 +2148,28 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 /**
  * ftrace_update_record, set a record that now is tracing or not
  * @rec: the record to update
- * @enable: set to 1 if the record is tracing, zero to force disable
+ * @enable: set to true if the record is tracing, false to force disable
  *
  * The records that represent all functions that can be traced need
  * to be updated when tracing has been enabled.
  */
-int ftrace_update_record(struct dyn_ftrace *rec, int enable)
+int ftrace_update_record(struct dyn_ftrace *rec, bool enable)
 {
-       return ftrace_check_record(rec, enable, 1);
+       return ftrace_check_record(rec, enable, true);
 }
 
 /**
  * ftrace_test_record, check if the record has been enabled or not
  * @rec: the record to test
- * @enable: set to 1 to check if enabled, 0 if it is disabled
+ * @enable: set to true to check if enabled, false if it is disabled
  *
  * The arch code may need to test if a record is already set to
  * tracing to determine how to modify the function code that it
  * represents.
  */
-int ftrace_test_record(struct dyn_ftrace *rec, int enable)
+int ftrace_test_record(struct dyn_ftrace *rec, bool enable)
 {
-       return ftrace_check_record(rec, enable, 0);
+       return ftrace_check_record(rec, enable, false);
 }
 
 static struct ftrace_ops *
@@ -2356,7 +2358,7 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
 }
 
 static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+__ftrace_replace_code(struct dyn_ftrace *rec, bool enable)
 {
        unsigned long ftrace_old_addr;
        unsigned long ftrace_addr;
@@ -2395,7 +2397,7 @@ void __weak ftrace_replace_code(int mod_flags)
 {
        struct dyn_ftrace *rec;
        struct ftrace_page *pg;
-       int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
+       bool enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
        int schedulable = mod_flags & FTRACE_MODIFY_MAY_SLEEP_FL;
        int failed;
 
index 05b0b3139ebcc1a9fd3bc659b52a8dfb9ca42844..66358d66c9336ca9ec87de22056f56cc0da9f61f 100644 (file)
@@ -128,16 +128,7 @@ int ring_buffer_print_entry_header(struct trace_seq *s)
 #define RB_ALIGNMENT           4U
 #define RB_MAX_SMALL_DATA      (RB_ALIGNMENT * RINGBUF_TYPE_DATA_TYPE_LEN_MAX)
 #define RB_EVNT_MIN_SIZE       8U      /* two 32bit words */
-
-#ifndef CONFIG_HAVE_64BIT_ALIGNED_ACCESS
-# define RB_FORCE_8BYTE_ALIGNMENT      0
-# define RB_ARCH_ALIGNMENT             RB_ALIGNMENT
-#else
-# define RB_FORCE_8BYTE_ALIGNMENT      1
-# define RB_ARCH_ALIGNMENT             8U
-#endif
-
-#define RB_ALIGN_DATA          __aligned(RB_ARCH_ALIGNMENT)
+#define RB_ALIGN_DATA          __aligned(RB_ALIGNMENT)
 
 /* define RINGBUF_TYPE_DATA for 'case RINGBUF_TYPE_DATA:' */
 #define RINGBUF_TYPE_DATA 0 ... RINGBUF_TYPE_DATA_TYPE_LEN_MAX
@@ -2373,7 +2364,7 @@ rb_update_event(struct ring_buffer_per_cpu *cpu_buffer,
 
        event->time_delta = delta;
        length -= RB_EVNT_HDR_SIZE;
-       if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT) {
+       if (length > RB_MAX_SMALL_DATA) {
                event->type_len = 0;
                event->array[0] = length;
        } else
@@ -2388,11 +2379,11 @@ static unsigned rb_calculate_event_length(unsigned length)
        if (!length)
                length++;
 
-       if (length > RB_MAX_SMALL_DATA || RB_FORCE_8BYTE_ALIGNMENT)
+       if (length > RB_MAX_SMALL_DATA)
                length += sizeof(event.array[0]);
 
        length += RB_EVNT_HDR_SIZE;
-       length = ALIGN(length, RB_ARCH_ALIGNMENT);
+       length = ALIGN(length, RB_ALIGNMENT);
 
        /*
         * In case the time delta is larger than the 27 bits for it
index c90c687cf950a4f53baa1aea1dc37f58a4645244..525a97fbbc603fa476a27b23d49dff7b7932a112 100644 (file)
@@ -366,7 +366,7 @@ trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct
 }
 
 /**
- * trace_pid_filter_add_remove_task - Add or remove a task from a pid_list
+ * trace_filter_add_remove_task - Add or remove a task from a pid_list
  * @pid_list: The list to modify
  * @self: The current task for fork or NULL for exit
  * @task: The task to add or remove
@@ -743,8 +743,7 @@ trace_event_setup(struct ring_buffer_event *event,
 {
        struct trace_entry *ent = ring_buffer_event_data(event);
 
-       tracing_generic_entry_update(ent, flags, pc);
-       ent->type = type;
+       tracing_generic_entry_update(ent, type, flags, pc);
 }
 
 static __always_inline struct ring_buffer_event *
@@ -2312,13 +2311,14 @@ enum print_line_t trace_handle_return(struct trace_seq *s)
 EXPORT_SYMBOL_GPL(trace_handle_return);
 
 void
-tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
-                            int pc)
+tracing_generic_entry_update(struct trace_entry *entry, unsigned short type,
+                            unsigned long flags, int pc)
 {
        struct task_struct *tsk = current;
 
        entry->preempt_count            = pc & 0xff;
        entry->pid                      = (tsk) ? tsk->pid : 0;
+       entry->type                     = type;
        entry->flags =
 #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
                (irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -4842,12 +4842,13 @@ static const char readme_msg[] =
        "\t     args: <name>=fetcharg[:type]\n"
        "\t fetcharg: %<register>, @<address>, @<symbol>[+|-<offset>],\n"
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
-       "\t           $stack<index>, $stack, $retval, $comm, $arg<N>\n"
+       "\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
 #else
-       "\t           $stack<index>, $stack, $retval, $comm\n"
+       "\t           $stack<index>, $stack, $retval, $comm,\n"
 #endif
+       "\t           +|-[u]<offset>(<fetcharg>)\n"
        "\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, string, symbol,\n"
-       "\t           b<bit-width>@<bit-offset>/<container-size>,\n"
+       "\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
        "\t           <type>\\[<array-size>\\]\n"
 #ifdef CONFIG_HIST_TRIGGERS
        "\t    field: <stype> <name>;\n"
index 4629a610447455c41a579fff011549cb3b4b1a2a..0892e38ed6fbeca73258ce357d678c922e48468d 100644 (file)
@@ -416,8 +416,7 @@ void perf_trace_buf_update(void *record, u16 type)
        unsigned long flags;
 
        local_save_flags(flags);
-       tracing_generic_entry_update(entry, flags, pc);
-       entry->type = type;
+       tracing_generic_entry_update(entry, type, flags, pc);
 }
 NOKPROBE_SYMBOL(perf_trace_buf_update);
 
index 0ce3db67f5569dda5550ce06f98a9cb0e62a96e5..c7506bc81b757f776ff690be99749ea1b8338e5f 100644 (file)
@@ -70,14 +70,6 @@ static int system_refcount_dec(struct event_subsystem *system)
 #define while_for_each_event_file()            \
        }
 
-static struct list_head *
-trace_get_fields(struct trace_event_call *event_call)
-{
-       if (!event_call->class->get_fields)
-               return &event_call->class->fields;
-       return event_call->class->get_fields(event_call);
-}
-
 static struct ftrace_event_field *
 __find_event_field(struct list_head *head, char *name)
 {
@@ -3190,7 +3182,7 @@ void __init trace_event_init(void)
        event_trace_enable();
 }
 
-#ifdef CONFIG_FTRACE_STARTUP_TEST
+#ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
 
 static DEFINE_SPINLOCK(test_spinlock);
 static DEFINE_SPINLOCK(test_spinlock_irq);
index 5079d1db3754a0bc872f39c12905aa0192d7839c..c773b8fb270c51a99f259edb51fd1f55b7202517 100644 (file)
@@ -1084,6 +1084,9 @@ int filter_assign_type(const char *type)
        if (strchr(type, '[') && strstr(type, "char"))
                return FILTER_STATIC_STRING;
 
+       if (strcmp(type, "char *") == 0 || strcmp(type, "const char *") == 0)
+               return FILTER_PTR_STRING;
+
        return FILTER_OTHER;
 }
 
index 7d736248a070b2f633349856345e2013d4a3cd34..9d483ad9bb6c40b48963d7a1f5b11e28318f49ae 100644 (file)
@@ -12,6 +12,8 @@
 #include <linux/rculist.h>
 #include <linux/error-injection.h>
 
+#include <asm/setup.h>  /* for COMMAND_LINE_SIZE */
+
 #include "trace_dynevent.h"
 #include "trace_kprobe_selftest.h"
 #include "trace_probe.h"
 
 #define KPROBE_EVENT_SYSTEM "kprobes"
 #define KRETPROBE_MAXACTIVE_MAX 4096
+#define MAX_KPROBE_CMDLINE_SIZE 1024
+
+/* Kprobe early definition from command line */
+static char kprobe_boot_events_buf[COMMAND_LINE_SIZE] __initdata;
+static bool kprobe_boot_events_enabled __initdata;
+
+static int __init set_kprobe_boot_events(char *str)
+{
+       strlcpy(kprobe_boot_events_buf, str, COMMAND_LINE_SIZE);
+       return 0;
+}
+__setup("kprobe_event=", set_kprobe_boot_events);
 
 static int trace_kprobe_create(int argc, const char **argv);
 static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev);
@@ -128,8 +142,8 @@ static bool trace_kprobe_match(const char *system, const char *event,
 {
        struct trace_kprobe *tk = to_trace_kprobe(ev);
 
-       return strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
-           (!system || strcmp(tk->tp.call.class->system, system) == 0);
+       return strcmp(trace_probe_name(&tk->tp), event) == 0 &&
+           (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0);
 }
 
 static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
@@ -143,6 +157,12 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
        return nhit;
 }
 
+static nokprobe_inline bool trace_kprobe_is_registered(struct trace_kprobe *tk)
+{
+       return !(list_empty(&tk->rp.kp.list) &&
+                hlist_unhashed(&tk->rp.kp.hlist));
+}
+
 /* Return 0 if it fails to find the symbol address */
 static nokprobe_inline
 unsigned long trace_kprobe_address(struct trace_kprobe *tk)
@@ -183,6 +203,16 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs);
 static int kretprobe_dispatcher(struct kretprobe_instance *ri,
                                struct pt_regs *regs);
 
+static void free_trace_kprobe(struct trace_kprobe *tk)
+{
+       if (tk) {
+               trace_probe_cleanup(&tk->tp);
+               kfree(tk->symbol);
+               free_percpu(tk->nhit);
+               kfree(tk);
+       }
+}
+
 /*
  * Allocate new trace_probe and initialize it (including kprobes).
  */
@@ -220,49 +250,20 @@ static struct trace_kprobe *alloc_trace_kprobe(const char *group,
                tk->rp.kp.pre_handler = kprobe_dispatcher;
 
        tk->rp.maxactive = maxactive;
+       INIT_HLIST_NODE(&tk->rp.kp.hlist);
+       INIT_LIST_HEAD(&tk->rp.kp.list);
 
-       if (!event || !group) {
-               ret = -EINVAL;
-               goto error;
-       }
-
-       tk->tp.call.class = &tk->tp.class;
-       tk->tp.call.name = kstrdup(event, GFP_KERNEL);
-       if (!tk->tp.call.name)
-               goto error;
-
-       tk->tp.class.system = kstrdup(group, GFP_KERNEL);
-       if (!tk->tp.class.system)
+       ret = trace_probe_init(&tk->tp, event, group);
+       if (ret < 0)
                goto error;
 
        dyn_event_init(&tk->devent, &trace_kprobe_ops);
-       INIT_LIST_HEAD(&tk->tp.files);
        return tk;
 error:
-       kfree(tk->tp.call.name);
-       kfree(tk->symbol);
-       free_percpu(tk->nhit);
-       kfree(tk);
+       free_trace_kprobe(tk);
        return ERR_PTR(ret);
 }
 
-static void free_trace_kprobe(struct trace_kprobe *tk)
-{
-       int i;
-
-       if (!tk)
-               return;
-
-       for (i = 0; i < tk->tp.nr_args; i++)
-               traceprobe_free_probe_arg(&tk->tp.args[i]);
-
-       kfree(tk->tp.call.class->system);
-       kfree(tk->tp.call.name);
-       kfree(tk->symbol);
-       free_percpu(tk->nhit);
-       kfree(tk);
-}
-
 static struct trace_kprobe *find_trace_kprobe(const char *event,
                                              const char *group)
 {
@@ -270,8 +271,8 @@ static struct trace_kprobe *find_trace_kprobe(const char *event,
        struct trace_kprobe *tk;
 
        for_each_trace_kprobe(tk, pos)
-               if (strcmp(trace_event_name(&tk->tp.call), event) == 0 &&
-                   strcmp(tk->tp.call.class->system, group) == 0)
+               if (strcmp(trace_probe_name(&tk->tp), event) == 0 &&
+                   strcmp(trace_probe_group_name(&tk->tp), group) == 0)
                        return tk;
        return NULL;
 }
@@ -280,7 +281,7 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
 {
        int ret = 0;
 
-       if (trace_probe_is_registered(&tk->tp) && !trace_kprobe_has_gone(tk)) {
+       if (trace_kprobe_is_registered(tk) && !trace_kprobe_has_gone(tk)) {
                if (trace_kprobe_is_return(tk))
                        ret = enable_kretprobe(&tk->rp);
                else
@@ -297,34 +298,27 @@ static inline int __enable_trace_kprobe(struct trace_kprobe *tk)
 static int
 enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 {
-       struct event_file_link *link;
+       bool enabled = trace_probe_is_enabled(&tk->tp);
        int ret = 0;
 
        if (file) {
-               link = kmalloc(sizeof(*link), GFP_KERNEL);
-               if (!link) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
-
-               link->file = file;
-               list_add_tail_rcu(&link->list, &tk->tp.files);
+               ret = trace_probe_add_file(&tk->tp, file);
+               if (ret)
+                       return ret;
+       } else
+               trace_probe_set_flag(&tk->tp, TP_FLAG_PROFILE);
 
-               tk->tp.flags |= TP_FLAG_TRACE;
-               ret = __enable_trace_kprobe(tk);
-               if (ret) {
-                       list_del_rcu(&link->list);
-                       kfree(link);
-                       tk->tp.flags &= ~TP_FLAG_TRACE;
-               }
+       if (enabled)
+               return 0;
 
-       } else {
-               tk->tp.flags |= TP_FLAG_PROFILE;
-               ret = __enable_trace_kprobe(tk);
-               if (ret)
-                       tk->tp.flags &= ~TP_FLAG_PROFILE;
+       ret = __enable_trace_kprobe(tk);
+       if (ret) {
+               if (file)
+                       trace_probe_remove_file(&tk->tp, file);
+               else
+                       trace_probe_clear_flag(&tk->tp, TP_FLAG_PROFILE);
        }
- out:
+
        return ret;
 }
 
@@ -335,54 +329,34 @@ enable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 static int
 disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
 {
-       struct event_file_link *link = NULL;
-       int wait = 0;
+       struct trace_probe *tp = &tk->tp;
        int ret = 0;
 
        if (file) {
-               link = find_event_file_link(&tk->tp, file);
-               if (!link) {
-                       ret = -EINVAL;
-                       goto out;
-               }
-
-               list_del_rcu(&link->list);
-               wait = 1;
-               if (!list_empty(&tk->tp.files))
+               if (!trace_probe_get_file_link(tp, file))
+                       return -ENOENT;
+               if (!trace_probe_has_single_file(tp))
                        goto out;
-
-               tk->tp.flags &= ~TP_FLAG_TRACE;
+               trace_probe_clear_flag(tp, TP_FLAG_TRACE);
        } else
-               tk->tp.flags &= ~TP_FLAG_PROFILE;
+               trace_probe_clear_flag(tp, TP_FLAG_PROFILE);
 
-       if (!trace_probe_is_enabled(&tk->tp) && trace_probe_is_registered(&tk->tp)) {
+       if (!trace_probe_is_enabled(tp) && trace_kprobe_is_registered(tk)) {
                if (trace_kprobe_is_return(tk))
                        disable_kretprobe(&tk->rp);
                else
                        disable_kprobe(&tk->rp.kp);
-               wait = 1;
        }
 
-       /*
-        * if tk is not added to any list, it must be a local trace_kprobe
-        * created with perf_event_open. We don't need to wait for these
-        * trace_kprobes
-        */
-       if (list_empty(&tk->devent.list))
-               wait = 0;
  out:
-       if (wait) {
+       if (file)
                /*
-                * Synchronize with kprobe_trace_func/kretprobe_trace_func
-                * to ensure disabled (all running handlers are finished).
-                * This is not only for kfree(), but also the caller,
-                * trace_remove_event_call() supposes it for releasing
-                * event_call related objects, which will be accessed in
-                * the kprobe_trace_func/kretprobe_trace_func.
+                * Synchronization is done in below function. For perf event,
+                * file == NULL and perf_trace_event_unreg() calls
+                * tracepoint_synchronize_unregister() to ensure synchronize
+                * event. We don't need to care about it.
                 */
-               synchronize_rcu();
-               kfree(link);    /* Ignored if link == NULL */
-       }
+               trace_probe_remove_file(tp, file);
 
        return ret;
 }
@@ -415,7 +389,7 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
 {
        int i, ret;
 
-       if (trace_probe_is_registered(&tk->tp))
+       if (trace_kprobe_is_registered(tk))
                return -EINVAL;
 
        if (within_notrace_func(tk)) {
@@ -441,21 +415,20 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
        else
                ret = register_kprobe(&tk->rp.kp);
 
-       if (ret == 0)
-               tk->tp.flags |= TP_FLAG_REGISTERED;
        return ret;
 }
 
 /* Internal unregister function - just handle k*probes and flags */
 static void __unregister_trace_kprobe(struct trace_kprobe *tk)
 {
-       if (trace_probe_is_registered(&tk->tp)) {
+       if (trace_kprobe_is_registered(tk)) {
                if (trace_kprobe_is_return(tk))
                        unregister_kretprobe(&tk->rp);
                else
                        unregister_kprobe(&tk->rp.kp);
-               tk->tp.flags &= ~TP_FLAG_REGISTERED;
-               /* Cleanup kprobe for reuse */
+               /* Cleanup kprobe for reuse and mark it unregistered */
+               INIT_HLIST_NODE(&tk->rp.kp.hlist);
+               INIT_LIST_HEAD(&tk->rp.kp.list);
                if (tk->rp.kp.symbol_name)
                        tk->rp.kp.addr = NULL;
        }
@@ -487,8 +460,8 @@ static int register_trace_kprobe(struct trace_kprobe *tk)
        mutex_lock(&event_mutex);
 
        /* Delete old (same name) event if exist */
-       old_tk = find_trace_kprobe(trace_event_name(&tk->tp.call),
-                       tk->tp.call.class->system);
+       old_tk = find_trace_kprobe(trace_probe_name(&tk->tp),
+                                  trace_probe_group_name(&tk->tp));
        if (old_tk) {
                ret = unregister_trace_kprobe(old_tk);
                if (ret < 0)
@@ -541,7 +514,7 @@ static int trace_kprobe_module_callback(struct notifier_block *nb,
                        ret = __register_trace_kprobe(tk);
                        if (ret)
                                pr_warn("Failed to re-register probe %s on %s: %d\n",
-                                       trace_event_name(&tk->tp.call),
+                                       trace_probe_name(&tk->tp),
                                        mod->name, ret);
                }
        }
@@ -716,6 +689,10 @@ static int trace_kprobe_create(int argc, const char *argv[])
                        goto error;     /* This can be -ENOMEM */
        }
 
+       ret = traceprobe_set_print_fmt(&tk->tp, is_return);
+       if (ret < 0)
+               goto error;
+
        ret = register_trace_kprobe(tk);
        if (ret) {
                trace_probe_log_set_index(1);
@@ -767,8 +744,8 @@ static int trace_kprobe_show(struct seq_file *m, struct dyn_event *ev)
        int i;
 
        seq_putc(m, trace_kprobe_is_return(tk) ? 'r' : 'p');
-       seq_printf(m, ":%s/%s", tk->tp.call.class->system,
-                       trace_event_name(&tk->tp.call));
+       seq_printf(m, ":%s/%s", trace_probe_group_name(&tk->tp),
+                               trace_probe_name(&tk->tp));
 
        if (!tk->symbol)
                seq_printf(m, " 0x%p", tk->rp.kp.addr);
@@ -842,7 +819,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
 
        tk = to_trace_kprobe(ev);
        seq_printf(m, "  %-44s %15lu %15lu\n",
-                  trace_event_name(&tk->tp.call),
+                  trace_probe_name(&tk->tp),
                   trace_kprobe_nhit(tk),
                   tk->rp.kp.nmissed);
 
@@ -886,6 +863,15 @@ fetch_store_strlen(unsigned long addr)
        return (ret < 0) ? ret : len;
 }
 
+/* Return the length of string -- including null terminal byte */
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+       const void __user *uaddr =  (__force const void __user *)addr;
+
+       return strnlen_unsafe_user(uaddr, MAX_STRING_SIZE);
+}
+
 /*
  * Fetch a null-terminated string. Caller MUST set *(u32 *)buf with max
  * length and relative data location.
@@ -894,19 +880,46 @@ static nokprobe_inline int
 fetch_store_string(unsigned long addr, void *dest, void *base)
 {
        int maxlen = get_loc_len(*(u32 *)dest);
-       u8 *dst = get_loc_data(dest, base);
+       void *__dest;
        long ret;
 
        if (unlikely(!maxlen))
                return -ENOMEM;
+
+       __dest = get_loc_data(dest, base);
+
        /*
         * Try to get string again, since the string can be changed while
         * probing.
         */
-       ret = strncpy_from_unsafe(dst, (void *)addr, maxlen);
+       ret = strncpy_from_unsafe(__dest, (void *)addr, maxlen);
+       if (ret >= 0)
+               *(u32 *)dest = make_data_loc(ret, __dest - base);
+
+       return ret;
+}
 
+/*
+ * Fetch a null-terminated string from user. Caller MUST set *(u32 *)buf
+ * with max length and relative data location.
+ */
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+       const void __user *uaddr =  (__force const void __user *)addr;
+       int maxlen = get_loc_len(*(u32 *)dest);
+       void *__dest;
+       long ret;
+
+       if (unlikely(!maxlen))
+               return -ENOMEM;
+
+       __dest = get_loc_data(dest, base);
+
+       ret = strncpy_from_unsafe_user(__dest, uaddr, maxlen);
        if (ret >= 0)
-               *(u32 *)dest = make_data_loc(ret, (void *)dst - base);
+               *(u32 *)dest = make_data_loc(ret, __dest - base);
+
        return ret;
 }
 
@@ -916,6 +929,14 @@ probe_mem_read(void *dest, void *src, size_t size)
        return probe_kernel_read(dest, src, size);
 }
 
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+       const void __user *uaddr =  (__force const void __user *)src;
+
+       return probe_user_read(dest, uaddr, size);
+}
+
 /* Note that we don't verify it, since the code does not come from user space */
 static int
 process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs, void *dest,
@@ -971,7 +992,7 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
        struct ring_buffer *buffer;
        int size, dsize, pc;
        unsigned long irq_flags;
-       struct trace_event_call *call = &tk->tp.call;
+       struct trace_event_call *call = trace_probe_event_call(&tk->tp);
 
        WARN_ON(call != trace_file->event_call);
 
@@ -1003,7 +1024,7 @@ kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
        struct event_file_link *link;
 
-       list_for_each_entry_rcu(link, &tk->tp.files, list)
+       trace_probe_for_each_link_rcu(link, &tk->tp)
                __kprobe_trace_func(tk, regs, link->file);
 }
 NOKPROBE_SYMBOL(kprobe_trace_func);
@@ -1019,7 +1040,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
        struct ring_buffer *buffer;
        int size, pc, dsize;
        unsigned long irq_flags;
-       struct trace_event_call *call = &tk->tp.call;
+       struct trace_event_call *call = trace_probe_event_call(&tk->tp);
 
        WARN_ON(call != trace_file->event_call);
 
@@ -1053,7 +1074,7 @@ kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 {
        struct event_file_link *link;
 
-       list_for_each_entry_rcu(link, &tk->tp.files, list)
+       trace_probe_for_each_link_rcu(link, &tk->tp)
                __kretprobe_trace_func(tk, ri, regs, link->file);
 }
 NOKPROBE_SYMBOL(kretprobe_trace_func);
@@ -1070,7 +1091,7 @@ print_kprobe_event(struct trace_iterator *iter, int flags,
        field = (struct kprobe_trace_entry_head *)iter->ent;
        tp = container_of(event, struct trace_probe, call.event);
 
-       trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
+       trace_seq_printf(s, "%s: (", trace_probe_name(tp));
 
        if (!seq_print_ip_sym(s, field->ip, flags | TRACE_ITER_SYM_OFFSET))
                goto out;
@@ -1097,7 +1118,7 @@ print_kretprobe_event(struct trace_iterator *iter, int flags,
        field = (struct kretprobe_trace_entry_head *)iter->ent;
        tp = container_of(event, struct trace_probe, call.event);
 
-       trace_seq_printf(s, "%s: (", trace_event_name(&tp->call));
+       trace_seq_printf(s, "%s: (", trace_probe_name(tp));
 
        if (!seq_print_ip_sym(s, field->ret_ip, flags | TRACE_ITER_SYM_OFFSET))
                goto out;
@@ -1149,7 +1170,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
 static int
 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
-       struct trace_event_call *call = &tk->tp.call;
+       struct trace_event_call *call = trace_probe_event_call(&tk->tp);
        struct kprobe_trace_entry_head *entry;
        struct hlist_head *head;
        int size, __size, dsize;
@@ -1199,7 +1220,7 @@ static void
 kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
                    struct pt_regs *regs)
 {
-       struct trace_event_call *call = &tk->tp.call;
+       struct trace_event_call *call = trace_probe_event_call(&tk->tp);
        struct kretprobe_trace_entry_head *entry;
        struct hlist_head *head;
        int size, __size, dsize;
@@ -1299,10 +1320,10 @@ static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 
        raw_cpu_inc(*tk->nhit);
 
-       if (tk->tp.flags & TP_FLAG_TRACE)
+       if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
                kprobe_trace_func(tk, regs);
 #ifdef CONFIG_PERF_EVENTS
-       if (tk->tp.flags & TP_FLAG_PROFILE)
+       if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
                ret = kprobe_perf_func(tk, regs);
 #endif
        return ret;
@@ -1316,10 +1337,10 @@ kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
 
        raw_cpu_inc(*tk->nhit);
 
-       if (tk->tp.flags & TP_FLAG_TRACE)
+       if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
                kretprobe_trace_func(tk, ri, regs);
 #ifdef CONFIG_PERF_EVENTS
-       if (tk->tp.flags & TP_FLAG_PROFILE)
+       if (trace_probe_test_flag(&tk->tp, TP_FLAG_PROFILE))
                kretprobe_perf_func(tk, ri, regs);
 #endif
        return 0;       /* We don't tweek kernel, so just return 0 */
@@ -1334,10 +1355,10 @@ static struct trace_event_functions kprobe_funcs = {
        .trace          = print_kprobe_event
 };
 
-static inline void init_trace_event_call(struct trace_kprobe *tk,
-                                        struct trace_event_call *call)
+static inline void init_trace_event_call(struct trace_kprobe *tk)
 {
-       INIT_LIST_HEAD(&call->class->fields);
+       struct trace_event_call *call = trace_probe_event_call(&tk->tp);
+
        if (trace_kprobe_is_return(tk)) {
                call->event.funcs = &kretprobe_funcs;
                call->class->define_fields = kretprobe_event_define_fields;
@@ -1353,37 +1374,14 @@ static inline void init_trace_event_call(struct trace_kprobe *tk,
 
 static int register_kprobe_event(struct trace_kprobe *tk)
 {
-       struct trace_event_call *call = &tk->tp.call;
-       int ret = 0;
-
-       init_trace_event_call(tk, call);
+       init_trace_event_call(tk);
 
-       if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
-               return -ENOMEM;
-       ret = register_trace_event(&call->event);
-       if (!ret) {
-               kfree(call->print_fmt);
-               return -ENODEV;
-       }
-       ret = trace_add_event_call(call);
-       if (ret) {
-               pr_info("Failed to register kprobe event: %s\n",
-                       trace_event_name(call));
-               kfree(call->print_fmt);
-               unregister_trace_event(&call->event);
-       }
-       return ret;
+       return trace_probe_register_event_call(&tk->tp);
 }
 
 static int unregister_kprobe_event(struct trace_kprobe *tk)
 {
-       int ret;
-
-       /* tp->event is unregistered in trace_remove_event_call() */
-       ret = trace_remove_event_call(&tk->tp.call);
-       if (!ret)
-               kfree(tk->tp.call.print_fmt);
-       return ret;
+       return trace_probe_unregister_event_call(&tk->tp);
 }
 
 #ifdef CONFIG_PERF_EVENTS
@@ -1413,7 +1411,7 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
                return ERR_CAST(tk);
        }
 
-       init_trace_event_call(tk, &tk->tp.call);
+       init_trace_event_call(tk);
 
        if (traceprobe_set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
                ret = -ENOMEM;
@@ -1421,12 +1419,10 @@ create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
        }
 
        ret = __register_trace_kprobe(tk);
-       if (ret < 0) {
-               kfree(tk->tp.call.print_fmt);
+       if (ret < 0)
                goto error;
-       }
 
-       return &tk->tp.call;
+       return trace_probe_event_call(&tk->tp);
 error:
        free_trace_kprobe(tk);
        return ERR_PTR(ret);
@@ -1445,11 +1441,50 @@ void destroy_local_trace_kprobe(struct trace_event_call *event_call)
 
        __unregister_trace_kprobe(tk);
 
-       kfree(tk->tp.call.print_fmt);
        free_trace_kprobe(tk);
 }
 #endif /* CONFIG_PERF_EVENTS */
 
+static __init void enable_boot_kprobe_events(void)
+{
+       struct trace_array *tr = top_trace_array();
+       struct trace_event_file *file;
+       struct trace_kprobe *tk;
+       struct dyn_event *pos;
+
+       mutex_lock(&event_mutex);
+       for_each_trace_kprobe(tk, pos) {
+               list_for_each_entry(file, &tr->events, list)
+                       if (file->event_call == trace_probe_event_call(&tk->tp))
+                               trace_event_enable_disable(file, 1, 0);
+       }
+       mutex_unlock(&event_mutex);
+}
+
+static __init void setup_boot_kprobe_events(void)
+{
+       char *p, *cmd = kprobe_boot_events_buf;
+       int ret;
+
+       strreplace(kprobe_boot_events_buf, ',', ' ');
+
+       while (cmd && *cmd != '\0') {
+               p = strchr(cmd, ';');
+               if (p)
+                       *p++ = '\0';
+
+               ret = trace_run_command(cmd, create_or_delete_trace_kprobe);
+               if (ret)
+                       pr_warn("Failed to add event(%d): %s\n", ret, cmd);
+               else
+                       kprobe_boot_events_enabled = true;
+
+               cmd = p;
+       }
+
+       enable_boot_kprobe_events();
+}
+
 /* Make a tracefs interface for controlling probe points */
 static __init int init_kprobe_trace(void)
 {
@@ -1481,6 +1516,9 @@ static __init int init_kprobe_trace(void)
 
        if (!entry)
                pr_warn("Could not create tracefs 'kprobe_profile' entry\n");
+
+       setup_boot_kprobe_events();
+
        return 0;
 }
 fs_initcall(init_kprobe_trace);
@@ -1493,7 +1531,7 @@ find_trace_probe_file(struct trace_kprobe *tk, struct trace_array *tr)
        struct trace_event_file *file;
 
        list_for_each_entry(file, &tr->events, list)
-               if (file->event_call == &tk->tp.call)
+               if (file->event_call == trace_probe_event_call(&tk->tp))
                        return file;
 
        return NULL;
@@ -1513,6 +1551,11 @@ static __init int kprobe_trace_self_tests_init(void)
        if (tracing_is_disabled())
                return -ENODEV;
 
+       if (kprobe_boot_events_enabled) {
+               pr_info("Skipping kprobe tests due to kprobe_event on cmdline\n");
+               return 0;
+       }
+
        target = kprobe_trace_selftest_target;
 
        pr_info("Testing kprobe tracing: ");
index a347faced9595092464744b9729c80145ea8e0c8..dbef0d1350754c5404995811f31358bc39087400 100644 (file)
@@ -78,6 +78,8 @@ static const struct fetch_type probe_fetch_types[] = {
        /* Special types */
        __ASSIGN_FETCH_TYPE("string", string, string, sizeof(u32), 1,
                            "__data_loc char[]"),
+       __ASSIGN_FETCH_TYPE("ustring", string, string, sizeof(u32), 1,
+                           "__data_loc char[]"),
        /* Basic types */
        ASSIGN_FETCH_TYPE(u8,  u8,  0),
        ASSIGN_FETCH_TYPE(u16, u16, 0),
@@ -322,6 +324,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
 {
        struct fetch_insn *code = *pcode;
        unsigned long param;
+       int deref = FETCH_OP_DEREF;
        long offset = 0;
        char *tmp;
        int ret = 0;
@@ -394,9 +397,14 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
                break;
 
        case '+':       /* deref memory */
-               arg++;  /* Skip '+', because kstrtol() rejects it. */
-               /* fall through */
        case '-':
+               if (arg[1] == 'u') {
+                       deref = FETCH_OP_UDEREF;
+                       arg[1] = arg[0];
+                       arg++;
+               }
+               if (arg[0] == '+')
+                       arg++;  /* Skip '+', because kstrtol() rejects it. */
                tmp = strchr(arg, '(');
                if (!tmp) {
                        trace_probe_log_err(offs, DEREF_NEED_BRACE);
@@ -432,7 +440,7 @@ parse_probe_arg(char *arg, const struct fetch_type *type,
                        }
                        *pcode = code;
 
-                       code->op = FETCH_OP_DEREF;
+                       code->op = deref;
                        code->offset = offset;
                }
                break;
@@ -569,15 +577,17 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
                goto fail;
 
        /* Store operation */
-       if (!strcmp(parg->type->name, "string")) {
-               if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_IMM &&
-                   code->op != FETCH_OP_COMM) {
+       if (!strcmp(parg->type->name, "string") ||
+           !strcmp(parg->type->name, "ustring")) {
+               if (code->op != FETCH_OP_DEREF && code->op != FETCH_OP_UDEREF &&
+                   code->op != FETCH_OP_IMM && code->op != FETCH_OP_COMM) {
                        trace_probe_log_err(offset + (t ? (t - arg) : 0),
                                            BAD_STRING);
                        ret = -EINVAL;
                        goto fail;
                }
-               if (code->op != FETCH_OP_DEREF || parg->count) {
+               if ((code->op == FETCH_OP_IMM || code->op == FETCH_OP_COMM) ||
+                    parg->count) {
                        /*
                         * IMM and COMM is pointing actual address, those must
                         * be kept, and if parg->count != 0, this is an array
@@ -590,12 +600,20 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
                                goto fail;
                        }
                }
-               code->op = FETCH_OP_ST_STRING;  /* In DEREF case, replace it */
+               /* If op == DEREF, replace it with STRING */
+               if (!strcmp(parg->type->name, "ustring") ||
+                   code->op == FETCH_OP_UDEREF)
+                       code->op = FETCH_OP_ST_USTRING;
+               else
+                       code->op = FETCH_OP_ST_STRING;
                code->size = parg->type->size;
                parg->dynamic = true;
        } else if (code->op == FETCH_OP_DEREF) {
                code->op = FETCH_OP_ST_MEM;
                code->size = parg->type->size;
+       } else if (code->op == FETCH_OP_UDEREF) {
+               code->op = FETCH_OP_ST_UMEM;
+               code->size = parg->type->size;
        } else {
                code++;
                if (code->op != FETCH_OP_NOP) {
@@ -618,7 +636,8 @@ static int traceprobe_parse_probe_arg_body(char *arg, ssize_t *size,
        /* Loop(Array) operation */
        if (parg->count) {
                if (scode->op != FETCH_OP_ST_MEM &&
-                   scode->op != FETCH_OP_ST_STRING) {
+                   scode->op != FETCH_OP_ST_STRING &&
+                   scode->op != FETCH_OP_ST_USTRING) {
                        trace_probe_log_err(offset + (t ? (t - arg) : 0),
                                            BAD_STRING);
                        ret = -EINVAL;
@@ -825,6 +844,7 @@ static int __set_print_fmt(struct trace_probe *tp, char *buf, int len,
 
 int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return)
 {
+       struct trace_event_call *call = trace_probe_event_call(tp);
        int len;
        char *print_fmt;
 
@@ -836,7 +856,7 @@ int traceprobe_set_print_fmt(struct trace_probe *tp, bool is_return)
 
        /* Second: actually write the @print_fmt */
        __set_print_fmt(tp, print_fmt, len + 1, is_return);
-       tp->call.print_fmt = print_fmt;
+       call->print_fmt = print_fmt;
 
        return 0;
 }
@@ -865,3 +885,105 @@ int traceprobe_define_arg_fields(struct trace_event_call *event_call,
        }
        return 0;
 }
+
+
+void trace_probe_cleanup(struct trace_probe *tp)
+{
+       struct trace_event_call *call = trace_probe_event_call(tp);
+       int i;
+
+       for (i = 0; i < tp->nr_args; i++)
+               traceprobe_free_probe_arg(&tp->args[i]);
+
+       kfree(call->class->system);
+       kfree(call->name);
+       kfree(call->print_fmt);
+}
+
+int trace_probe_init(struct trace_probe *tp, const char *event,
+                    const char *group)
+{
+       struct trace_event_call *call = trace_probe_event_call(tp);
+
+       if (!event || !group)
+               return -EINVAL;
+
+       call->class = &tp->class;
+       call->name = kstrdup(event, GFP_KERNEL);
+       if (!call->name)
+               return -ENOMEM;
+
+       tp->class.system = kstrdup(group, GFP_KERNEL);
+       if (!tp->class.system) {
+               kfree(call->name);
+               call->name = NULL;
+               return -ENOMEM;
+       }
+       INIT_LIST_HEAD(&tp->files);
+       INIT_LIST_HEAD(&tp->class.fields);
+
+       return 0;
+}
+
+int trace_probe_register_event_call(struct trace_probe *tp)
+{
+       struct trace_event_call *call = trace_probe_event_call(tp);
+       int ret;
+
+       ret = register_trace_event(&call->event);
+       if (!ret)
+               return -ENODEV;
+
+       ret = trace_add_event_call(call);
+       if (ret)
+               unregister_trace_event(&call->event);
+
+       return ret;
+}
+
+int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file)
+{
+       struct event_file_link *link;
+
+       link = kmalloc(sizeof(*link), GFP_KERNEL);
+       if (!link)
+               return -ENOMEM;
+
+       link->file = file;
+       INIT_LIST_HEAD(&link->list);
+       list_add_tail_rcu(&link->list, &tp->files);
+       trace_probe_set_flag(tp, TP_FLAG_TRACE);
+       return 0;
+}
+
+struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp,
+                                                 struct trace_event_file *file)
+{
+       struct event_file_link *link;
+
+       trace_probe_for_each_link(link, tp) {
+               if (link->file == file)
+                       return link;
+       }
+
+       return NULL;
+}
+
+int trace_probe_remove_file(struct trace_probe *tp,
+                           struct trace_event_file *file)
+{
+       struct event_file_link *link;
+
+       link = trace_probe_get_file_link(tp, file);
+       if (!link)
+               return -ENOENT;
+
+       list_del_rcu(&link->list);
+       synchronize_rcu();
+       kfree(link);
+
+       if (list_empty(&tp->files))
+               trace_probe_clear_flag(tp, TP_FLAG_TRACE);
+
+       return 0;
+}
index f9a8c632188bcf8a3d849cdf94f2d9a11a081eb6..d1714820efe1949d2ec7c9343bb623bd42d17ed5 100644 (file)
@@ -55,7 +55,6 @@
 /* Flags for trace_probe */
 #define TP_FLAG_TRACE          1
 #define TP_FLAG_PROFILE                2
-#define TP_FLAG_REGISTERED     4
 
 /* data_loc: data location, compatible with u32 */
 #define make_data_loc(len, offs)       \
@@ -92,10 +91,13 @@ enum fetch_op {
        FETCH_OP_FOFFS,         /* File offset: .immediate */
        // Stage 2 (dereference) op
        FETCH_OP_DEREF,         /* Dereference: .offset */
+       FETCH_OP_UDEREF,        /* User-space Dereference: .offset */
        // Stage 3 (store) ops
        FETCH_OP_ST_RAW,        /* Raw: .size */
        FETCH_OP_ST_MEM,        /* Mem: .offset, .size */
+       FETCH_OP_ST_UMEM,       /* Mem: .offset, .size */
        FETCH_OP_ST_STRING,     /* String: .offset, .size */
+       FETCH_OP_ST_USTRING,    /* User String: .offset, .size */
        // Stage 4 (modify) op
        FETCH_OP_MOD_BF,        /* Bitfield: .basesize, .lshift, .rshift */
        // Stage 5 (loop) op
@@ -235,16 +237,71 @@ struct event_file_link {
        struct list_head                list;
 };
 
+static inline bool trace_probe_test_flag(struct trace_probe *tp,
+                                        unsigned int flag)
+{
+       return !!(tp->flags & flag);
+}
+
+static inline void trace_probe_set_flag(struct trace_probe *tp,
+                                       unsigned int flag)
+{
+       tp->flags |= flag;
+}
+
+static inline void trace_probe_clear_flag(struct trace_probe *tp,
+                                         unsigned int flag)
+{
+       tp->flags &= ~flag;
+}
+
 static inline bool trace_probe_is_enabled(struct trace_probe *tp)
 {
-       return !!(tp->flags & (TP_FLAG_TRACE | TP_FLAG_PROFILE));
+       return trace_probe_test_flag(tp, TP_FLAG_TRACE | TP_FLAG_PROFILE);
 }
 
-static inline bool trace_probe_is_registered(struct trace_probe *tp)
+static inline const char *trace_probe_name(struct trace_probe *tp)
 {
-       return !!(tp->flags & TP_FLAG_REGISTERED);
+       return trace_event_name(&tp->call);
 }
 
+static inline const char *trace_probe_group_name(struct trace_probe *tp)
+{
+       return tp->call.class->system;
+}
+
+static inline struct trace_event_call *
+       trace_probe_event_call(struct trace_probe *tp)
+{
+       return &tp->call;
+}
+
+static inline int trace_probe_unregister_event_call(struct trace_probe *tp)
+{
+       /* tp->event is unregistered in trace_remove_event_call() */
+       return trace_remove_event_call(&tp->call);
+}
+
+static inline bool trace_probe_has_single_file(struct trace_probe *tp)
+{
+       return !!list_is_singular(&tp->files);
+}
+
+int trace_probe_init(struct trace_probe *tp, const char *event,
+                    const char *group);
+void trace_probe_cleanup(struct trace_probe *tp);
+int trace_probe_register_event_call(struct trace_probe *tp);
+int trace_probe_add_file(struct trace_probe *tp, struct trace_event_file *file);
+int trace_probe_remove_file(struct trace_probe *tp,
+                           struct trace_event_file *file);
+struct event_file_link *trace_probe_get_file_link(struct trace_probe *tp,
+                                               struct trace_event_file *file);
+
+#define trace_probe_for_each_link(pos, tp)     \
+       list_for_each_entry(pos, &(tp)->files, list)
+#define trace_probe_for_each_link_rcu(pos, tp) \
+       list_for_each_entry_rcu(pos, &(tp)->files, list)
+
 /* Check the name is good for event/group/fields */
 static inline bool is_good_name(const char *name)
 {
@@ -257,18 +314,6 @@ static inline bool is_good_name(const char *name)
        return true;
 }
 
-static inline struct event_file_link *
-find_event_file_link(struct trace_probe *tp, struct trace_event_file *file)
-{
-       struct event_file_link *link;
-
-       list_for_each_entry(link, &tp->files, list)
-               if (link->file == file)
-                       return link;
-
-       return NULL;
-}
-
 #define TPARG_FL_RETURN BIT(0)
 #define TPARG_FL_KERNEL BIT(1)
 #define TPARG_FL_FENTRY BIT(2)
index c30c61f12dddff4b1e791751c2029a34bd182b93..e5282828f4a60583e2c471f0c71efdf867969b2c 100644 (file)
@@ -59,8 +59,13 @@ process_fetch_insn(struct fetch_insn *code, struct pt_regs *regs,
 static nokprobe_inline int fetch_store_strlen(unsigned long addr);
 static nokprobe_inline int
 fetch_store_string(unsigned long addr, void *dest, void *base);
+static nokprobe_inline int fetch_store_strlen_user(unsigned long addr);
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base);
 static nokprobe_inline int
 probe_mem_read(void *dest, void *src, size_t size);
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size);
 
 /* From the 2nd stage, routine is same */
 static nokprobe_inline int
@@ -74,14 +79,21 @@ process_fetch_insn_bottom(struct fetch_insn *code, unsigned long val,
 
 stage2:
        /* 2nd stage: dereference memory if needed */
-       while (code->op == FETCH_OP_DEREF) {
-               lval = val;
-               ret = probe_mem_read(&val, (void *)val + code->offset,
-                                       sizeof(val));
+       do {
+               if (code->op == FETCH_OP_DEREF) {
+                       lval = val;
+                       ret = probe_mem_read(&val, (void *)val + code->offset,
+                                            sizeof(val));
+               } else if (code->op == FETCH_OP_UDEREF) {
+                       lval = val;
+                       ret = probe_mem_read_user(&val,
+                                (void *)val + code->offset, sizeof(val));
+               } else
+                       break;
                if (ret)
                        return ret;
                code++;
-       }
+       } while (1);
 
        s3 = code;
 stage3:
@@ -91,6 +103,10 @@ stage3:
                        ret = fetch_store_strlen(val + code->offset);
                        code++;
                        goto array;
+               } else if (code->op == FETCH_OP_ST_USTRING) {
+                       ret += fetch_store_strlen_user(val + code->offset);
+                       code++;
+                       goto array;
                } else
                        return -EILSEQ;
        }
@@ -102,10 +118,17 @@ stage3:
        case FETCH_OP_ST_MEM:
                probe_mem_read(dest, (void *)val + code->offset, code->size);
                break;
+       case FETCH_OP_ST_UMEM:
+               probe_mem_read_user(dest, (void *)val + code->offset, code->size);
+               break;
        case FETCH_OP_ST_STRING:
                loc = *(u32 *)dest;
                ret = fetch_store_string(val + code->offset, dest, base);
                break;
+       case FETCH_OP_ST_USTRING:
+               loc = *(u32 *)dest;
+               ret = fetch_store_string_user(val + code->offset, dest, base);
+               break;
        default:
                return -EILSEQ;
        }
@@ -123,7 +146,8 @@ array:
                total += ret;
                if (++i < code->param) {
                        code = s3;
-                       if (s3->op != FETCH_OP_ST_STRING) {
+                       if (s3->op != FETCH_OP_ST_STRING &&
+                           s3->op != FETCH_OP_ST_USTRING) {
                                dest += s3->size;
                                val += s3->size;
                                goto stage3;
index 7860e3f59fad6e257c9a8381f2a2d9b5ce9d70dd..1ceedb9146b114e6225a7662044c6b00c4cba136 100644 (file)
@@ -140,6 +140,13 @@ probe_mem_read(void *dest, void *src, size_t size)
 
        return copy_from_user(dest, vaddr, size) ? -EFAULT : 0;
 }
+
+static nokprobe_inline int
+probe_mem_read_user(void *dest, void *src, size_t size)
+{
+       return probe_mem_read(dest, src, size);
+}
+
 /*
  * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max
  * length and relative data location.
@@ -176,6 +183,12 @@ fetch_store_string(unsigned long addr, void *dest, void *base)
        return ret;
 }
 
+static nokprobe_inline int
+fetch_store_string_user(unsigned long addr, void *dest, void *base)
+{
+       return fetch_store_string(addr, dest, base);
+}
+
 /* Return the length of string -- including null terminal byte */
 static nokprobe_inline int
 fetch_store_strlen(unsigned long addr)
@@ -191,6 +204,12 @@ fetch_store_strlen(unsigned long addr)
        return (len > MAX_STRING_SIZE) ? 0 : len;
 }
 
+static nokprobe_inline int
+fetch_store_strlen_user(unsigned long addr)
+{
+       return fetch_store_strlen(addr);
+}
+
 static unsigned long translate_user_vaddr(unsigned long file_offset)
 {
        unsigned long base_addr;
@@ -270,8 +289,8 @@ static bool trace_uprobe_match(const char *system, const char *event,
 {
        struct trace_uprobe *tu = to_trace_uprobe(ev);
 
-       return strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
-               (!system || strcmp(tu->tp.call.class->system, system) == 0);
+       return strcmp(trace_probe_name(&tu->tp), event) == 0 &&
+           (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0);
 }
 
 /*
@@ -281,25 +300,17 @@ static struct trace_uprobe *
 alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
 {
        struct trace_uprobe *tu;
-
-       if (!event || !group)
-               return ERR_PTR(-EINVAL);
+       int ret;
 
        tu = kzalloc(SIZEOF_TRACE_UPROBE(nargs), GFP_KERNEL);
        if (!tu)
                return ERR_PTR(-ENOMEM);
 
-       tu->tp.call.class = &tu->tp.class;
-       tu->tp.call.name = kstrdup(event, GFP_KERNEL);
-       if (!tu->tp.call.name)
-               goto error;
-
-       tu->tp.class.system = kstrdup(group, GFP_KERNEL);
-       if (!tu->tp.class.system)
+       ret = trace_probe_init(&tu->tp, event, group);
+       if (ret < 0)
                goto error;
 
        dyn_event_init(&tu->devent, &trace_uprobe_ops);
-       INIT_LIST_HEAD(&tu->tp.files);
        tu->consumer.handler = uprobe_dispatcher;
        if (is_ret)
                tu->consumer.ret_handler = uretprobe_dispatcher;
@@ -307,25 +318,18 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
        return tu;
 
 error:
-       kfree(tu->tp.call.name);
        kfree(tu);
 
-       return ERR_PTR(-ENOMEM);
+       return ERR_PTR(ret);
 }
 
 static void free_trace_uprobe(struct trace_uprobe *tu)
 {
-       int i;
-
        if (!tu)
                return;
 
-       for (i = 0; i < tu->tp.nr_args; i++)
-               traceprobe_free_probe_arg(&tu->tp.args[i]);
-
        path_put(&tu->path);
-       kfree(tu->tp.call.class->system);
-       kfree(tu->tp.call.name);
+       trace_probe_cleanup(&tu->tp);
        kfree(tu->filename);
        kfree(tu);
 }
@@ -336,8 +340,8 @@ static struct trace_uprobe *find_probe_event(const char *event, const char *grou
        struct trace_uprobe *tu;
 
        for_each_trace_uprobe(tu, pos)
-               if (strcmp(trace_event_name(&tu->tp.call), event) == 0 &&
-                   strcmp(tu->tp.call.class->system, group) == 0)
+               if (strcmp(trace_probe_name(&tu->tp), event) == 0 &&
+                   strcmp(trace_probe_group_name(&tu->tp), group) == 0)
                        return tu;
 
        return NULL;
@@ -372,8 +376,8 @@ static struct trace_uprobe *find_old_trace_uprobe(struct trace_uprobe *new)
        struct trace_uprobe *tmp, *old = NULL;
        struct inode *new_inode = d_real_inode(new->path.dentry);
 
-       old = find_probe_event(trace_event_name(&new->tp.call),
-                               new->tp.call.class->system);
+       old = find_probe_event(trace_probe_name(&new->tp),
+                               trace_probe_group_name(&new->tp));
 
        for_each_trace_uprobe(tmp, pos) {
                if ((old ? old != tmp : true) &&
@@ -578,6 +582,10 @@ static int trace_uprobe_create(int argc, const char **argv)
                        goto error;
        }
 
+       ret = traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu));
+       if (ret < 0)
+               goto error;
+
        ret = register_trace_uprobe(tu);
        if (!ret)
                goto out;
@@ -621,8 +629,8 @@ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev)
        char c = is_ret_probe(tu) ? 'r' : 'p';
        int i;
 
-       seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, tu->tp.call.class->system,
-                       trace_event_name(&tu->tp.call), tu->filename,
+       seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, trace_probe_group_name(&tu->tp),
+                       trace_probe_name(&tu->tp), tu->filename,
                        (int)(sizeof(void *) * 2), tu->offset);
 
        if (tu->ref_ctr_offset)
@@ -692,7 +700,7 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
 
        tu = to_trace_uprobe(ev);
        seq_printf(m, "  %s %-44s %15lu\n", tu->filename,
-                       trace_event_name(&tu->tp.call), tu->nhit);
+                       trace_probe_name(&tu->tp), tu->nhit);
        return 0;
 }
 
@@ -818,7 +826,7 @@ static void __uprobe_trace_func(struct trace_uprobe *tu,
        struct ring_buffer *buffer;
        void *data;
        int size, esize;
-       struct trace_event_call *call = &tu->tp.call;
+       struct trace_event_call *call = trace_probe_event_call(&tu->tp);
 
        WARN_ON(call != trace_file->event_call);
 
@@ -860,7 +868,7 @@ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs,
                return 0;
 
        rcu_read_lock();
-       list_for_each_entry_rcu(link, &tu->tp.files, list)
+       trace_probe_for_each_link_rcu(link, &tu->tp)
                __uprobe_trace_func(tu, 0, regs, ucb, dsize, link->file);
        rcu_read_unlock();
 
@@ -874,7 +882,7 @@ static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func,
        struct event_file_link *link;
 
        rcu_read_lock();
-       list_for_each_entry_rcu(link, &tu->tp.files, list)
+       trace_probe_for_each_link_rcu(link, &tu->tp)
                __uprobe_trace_func(tu, func, regs, ucb, dsize, link->file);
        rcu_read_unlock();
 }
@@ -893,12 +901,12 @@ print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *e
 
        if (is_ret_probe(tu)) {
                trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)",
-                                trace_event_name(&tu->tp.call),
+                                trace_probe_name(&tu->tp),
                                 entry->vaddr[1], entry->vaddr[0]);
                data = DATAOF_TRACE_ENTRY(entry, true);
        } else {
                trace_seq_printf(s, "%s: (0x%lx)",
-                                trace_event_name(&tu->tp.call),
+                                trace_probe_name(&tu->tp),
                                 entry->vaddr[0]);
                data = DATAOF_TRACE_ENTRY(entry, false);
        }
@@ -921,26 +929,20 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
                   filter_func_t filter)
 {
        bool enabled = trace_probe_is_enabled(&tu->tp);
-       struct event_file_link *link = NULL;
        int ret;
 
        if (file) {
-               if (tu->tp.flags & TP_FLAG_PROFILE)
+               if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
                        return -EINTR;
 
-               link = kmalloc(sizeof(*link), GFP_KERNEL);
-               if (!link)
-                       return -ENOMEM;
-
-               link->file = file;
-               list_add_tail_rcu(&link->list, &tu->tp.files);
-
-               tu->tp.flags |= TP_FLAG_TRACE;
+               ret = trace_probe_add_file(&tu->tp, file);
+               if (ret < 0)
+                       return ret;
        } else {
-               if (tu->tp.flags & TP_FLAG_TRACE)
+               if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
                        return -EINTR;
 
-               tu->tp.flags |= TP_FLAG_PROFILE;
+               trace_probe_set_flag(&tu->tp, TP_FLAG_PROFILE);
        }
 
        WARN_ON(!uprobe_filter_is_empty(&tu->filter));
@@ -970,13 +972,11 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,
        uprobe_buffer_disable();
 
  err_flags:
-       if (file) {
-               list_del(&link->list);
-               kfree(link);
-               tu->tp.flags &= ~TP_FLAG_TRACE;
-       } else {
-               tu->tp.flags &= ~TP_FLAG_PROFILE;
-       }
+       if (file)
+               trace_probe_remove_file(&tu->tp, file);
+       else
+               trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE);
+
        return ret;
 }
 
@@ -987,26 +987,18 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)
                return;
 
        if (file) {
-               struct event_file_link *link;
-
-               link = find_event_file_link(&tu->tp, file);
-               if (!link)
+               if (trace_probe_remove_file(&tu->tp, file) < 0)
                        return;
 
-               list_del_rcu(&link->list);
-               /* synchronize with u{,ret}probe_trace_func */
-               synchronize_rcu();
-               kfree(link);
-
-               if (!list_empty(&tu->tp.files))
+               if (trace_probe_is_enabled(&tu->tp))
                        return;
-       }
+       } else
+               trace_probe_clear_flag(&tu->tp, TP_FLAG_PROFILE);
 
        WARN_ON(!uprobe_filter_is_empty(&tu->filter));
 
        uprobe_unregister(tu->inode, tu->offset, &tu->consumer);
        tu->inode = NULL;
-       tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;
 
        uprobe_buffer_disable();
 }
@@ -1126,7 +1118,7 @@ static void __uprobe_perf_func(struct trace_uprobe *tu,
                               unsigned long func, struct pt_regs *regs,
                               struct uprobe_cpu_buffer *ucb, int dsize)
 {
-       struct trace_event_call *call = &tu->tp.call;
+       struct trace_event_call *call = trace_probe_event_call(&tu->tp);
        struct uprobe_trace_entry_head *entry;
        struct hlist_head *head;
        void *data;
@@ -1279,11 +1271,11 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
        ucb = uprobe_buffer_get();
        store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
 
-       if (tu->tp.flags & TP_FLAG_TRACE)
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
                ret |= uprobe_trace_func(tu, regs, ucb, dsize);
 
 #ifdef CONFIG_PERF_EVENTS
-       if (tu->tp.flags & TP_FLAG_PROFILE)
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
                ret |= uprobe_perf_func(tu, regs, ucb, dsize);
 #endif
        uprobe_buffer_put(ucb);
@@ -1314,11 +1306,11 @@ static int uretprobe_dispatcher(struct uprobe_consumer *con,
        ucb = uprobe_buffer_get();
        store_trace_args(ucb->buf, &tu->tp, regs, esize, dsize);
 
-       if (tu->tp.flags & TP_FLAG_TRACE)
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE))
                uretprobe_trace_func(tu, func, regs, ucb, dsize);
 
 #ifdef CONFIG_PERF_EVENTS
-       if (tu->tp.flags & TP_FLAG_PROFILE)
+       if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE))
                uretprobe_perf_func(tu, func, regs, ucb, dsize);
 #endif
        uprobe_buffer_put(ucb);
@@ -1329,10 +1321,10 @@ static struct trace_event_functions uprobe_funcs = {
        .trace          = print_uprobe_event
 };
 
-static inline void init_trace_event_call(struct trace_uprobe *tu,
-                                        struct trace_event_call *call)
+static inline void init_trace_event_call(struct trace_uprobe *tu)
 {
-       INIT_LIST_HEAD(&call->class->fields);
+       struct trace_event_call *call = trace_probe_event_call(&tu->tp);
+
        call->event.funcs = &uprobe_funcs;
        call->class->define_fields = uprobe_event_define_fields;
 
@@ -1343,43 +1335,14 @@ static inline void init_trace_event_call(struct trace_uprobe *tu,
 
 static int register_uprobe_event(struct trace_uprobe *tu)
 {
-       struct trace_event_call *call = &tu->tp.call;
-       int ret = 0;
-
-       init_trace_event_call(tu, call);
-
-       if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
-               return -ENOMEM;
+       init_trace_event_call(tu);
 
-       ret = register_trace_event(&call->event);
-       if (!ret) {
-               kfree(call->print_fmt);
-               return -ENODEV;
-       }
-
-       ret = trace_add_event_call(call);
-
-       if (ret) {
-               pr_info("Failed to register uprobe event: %s\n",
-                       trace_event_name(call));
-               kfree(call->print_fmt);
-               unregister_trace_event(&call->event);
-       }
-
-       return ret;
+       return trace_probe_register_event_call(&tu->tp);
 }
 
 static int unregister_uprobe_event(struct trace_uprobe *tu)
 {
-       int ret;
-
-       /* tu->event is unregistered in trace_remove_event_call() */
-       ret = trace_remove_event_call(&tu->tp.call);
-       if (ret)
-               return ret;
-       kfree(tu->tp.call.print_fmt);
-       tu->tp.call.print_fmt = NULL;
-       return 0;
+       return trace_probe_unregister_event_call(&tu->tp);
 }
 
 #ifdef CONFIG_PERF_EVENTS
@@ -1419,14 +1382,14 @@ create_local_trace_uprobe(char *name, unsigned long offs,
        tu->path = path;
        tu->ref_ctr_offset = ref_ctr_offset;
        tu->filename = kstrdup(name, GFP_KERNEL);
-       init_trace_event_call(tu, &tu->tp.call);
+       init_trace_event_call(tu);
 
        if (traceprobe_set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
                ret = -ENOMEM;
                goto error;
        }
 
-       return &tu->tp.call;
+       return trace_probe_event_call(&tu->tp);
 error:
        free_trace_uprobe(tu);
        return ERR_PTR(ret);
@@ -1438,9 +1401,6 @@ void destroy_local_trace_uprobe(struct trace_event_call *event_call)
 
        tu = container_of(event_call, struct trace_uprobe, tp.call);
 
-       kfree(tu->tp.call.print_fmt);
-       tu->tp.call.print_fmt = NULL;
-
        free_trace_uprobe(tu);
 }
 #endif /* CONFIG_PERF_EVENTS */
index df3ade14ccbde53181623c38cad90f3d69bb534b..73956eaff8a9c412177b9a7eeac91d76c3947d5d 100644 (file)
@@ -55,8 +55,8 @@ struct tp_probes {
 
 static inline void *allocate_probes(int count)
 {
-       struct tp_probes *p  = kmalloc(count * sizeof(struct tracepoint_func)
-                       + sizeof(struct tp_probes), GFP_KERNEL);
+       struct tp_probes *p  = kmalloc(struct_size(p, probes, count),
+                                      GFP_KERNEL);
        return p == NULL ? NULL : p->probes;
 }
 
index 482d4d670f19eedbb0a9b10719b121bcadebe89f..d065736f6b876b5d72a9a962466187144def90c7 100644 (file)
@@ -6,8 +6,20 @@
 #include <linux/mm.h>
 #include <linux/uaccess.h>
 
+static __always_inline long
+probe_read_common(void *dst, const void __user *src, size_t size)
+{
+       long ret;
+
+       pagefault_disable();
+       ret = __copy_from_user_inatomic(dst, src, size);
+       pagefault_enable();
+
+       return ret ? -EFAULT : 0;
+}
+
 /**
- * probe_kernel_read(): safely attempt to read from a location
+ * probe_kernel_read(): safely attempt to read from a kernel-space location
  * @dst: pointer to the buffer that shall take the data
  * @src: address to read from
  * @size: size of the data chunk
@@ -30,16 +42,40 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
        mm_segment_t old_fs = get_fs();
 
        set_fs(KERNEL_DS);
-       pagefault_disable();
-       ret = __copy_from_user_inatomic(dst,
-                       (__force const void __user *)src, size);
-       pagefault_enable();
+       ret = probe_read_common(dst, (__force const void __user *)src, size);
        set_fs(old_fs);
 
-       return ret ? -EFAULT : 0;
+       return ret;
 }
 EXPORT_SYMBOL_GPL(probe_kernel_read);
 
+/**
+ * probe_user_read(): safely attempt to read from a user-space location
+ * @dst: pointer to the buffer that shall take the data
+ * @src: address to read from. This must be a user address.
+ * @size: size of the data chunk
+ *
+ * Safely read from user address @src to the buffer at @dst. If a kernel fault
+ * happens, handle that and return -EFAULT.
+ */
+
+long __weak probe_user_read(void *dst, const void __user *src, size_t size)
+    __attribute__((alias("__probe_user_read")));
+
+long __probe_user_read(void *dst, const void __user *src, size_t size)
+{
+       long ret = -EFAULT;
+       mm_segment_t old_fs = get_fs();
+
+       set_fs(USER_DS);
+       if (access_ok(src, size))
+               ret = probe_read_common(dst, src, size);
+       set_fs(old_fs);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(probe_user_read);
+
 /**
  * probe_kernel_write(): safely attempt to write to a location
  * @dst: address to write to
@@ -67,6 +103,7 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
 }
 EXPORT_SYMBOL_GPL(probe_kernel_write);
 
+
 /**
  * strncpy_from_unsafe: - Copy a NUL terminated string from unsafe address.
  * @dst:   Destination address, in kernel space.  This buffer must be at
@@ -106,3 +143,76 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
 
        return ret ? -EFAULT : src - unsafe_addr;
 }
+
+/**
+ * strncpy_from_unsafe_user: - Copy a NUL terminated string from unsafe user
+ *                             address.
+ * @dst:   Destination address, in kernel space.  This buffer must be at
+ *         least @count bytes long.
+ * @unsafe_addr: Unsafe user address.
+ * @count: Maximum number of bytes to copy, including the trailing NUL.
+ *
+ * Copies a NUL-terminated string from unsafe user address to kernel buffer.
+ *
+ * On success, returns the length of the string INCLUDING the trailing NUL.
+ *
+ * If access fails, returns -EFAULT (some data may have been copied
+ * and the trailing NUL added).
+ *
+ * If @count is smaller than the length of the string, copies @count-1 bytes,
+ * sets the last byte of @dst buffer to NUL and returns @count.
+ */
+long strncpy_from_unsafe_user(char *dst, const void __user *unsafe_addr,
+                             long count)
+{
+       mm_segment_t old_fs = get_fs();
+       long ret;
+
+       if (unlikely(count <= 0))
+               return 0;
+
+       set_fs(USER_DS);
+       pagefault_disable();
+       ret = strncpy_from_user(dst, unsafe_addr, count);
+       pagefault_enable();
+       set_fs(old_fs);
+
+       if (ret >= count) {
+               ret = count;
+               dst[ret - 1] = '\0';
+       } else if (ret > 0) {
+               ret++;
+       }
+
+       return ret;
+}
+
+/**
+ * strnlen_unsafe_user: - Get the size of a user string INCLUDING final NUL.
+ * @unsafe_addr: The string to measure.
+ * @count: Maximum count (including NUL)
+ *
+ * Get the size of a NUL-terminated string in user space without pagefault.
+ *
+ * Returns the size of the string INCLUDING the terminating NUL.
+ *
+ * If the string is too long, returns a number larger than @count. User
+ * has to check the return value against "> count".
+ * On exception (or invalid count), returns 0.
+ *
+ * Unlike strnlen_user, this can be used from IRQ handler etc. because
+ * it disables pagefaults.
+ */
+long strnlen_unsafe_user(const void __user *unsafe_addr, long count)
+{
+       mm_segment_t old_fs = get_fs();
+       int ret;
+
+       set_fs(USER_DS);
+       pagefault_disable();
+       ret = strnlen_user(unsafe_addr, count);
+       pagefault_enable();
+       set_fs(old_fs);
+
+       return ret;
+}
index b6866a05edd25f5895867df960c7700052b627e8..ed3ecfa422e10629d077b9cd539316055532fbf2 100644 (file)
@@ -194,12 +194,13 @@ PROBE ARGUMENT
 --------------
 Each probe argument follows below syntax.
 
- [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
+ [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE][@user]
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
 '$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo (*). Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
+"@user" is a special attribute which means the LOCALVAR will be treated as a user-space memory. This is only valid for kprobe event.
 
 TYPES
 -----
index 0c3b55d0617dc32c03229af3d1ada79307c64e8c..cd1eb73cfe8316c3dd9c7462f787ce1c0f540d7a 100644 (file)
@@ -1562,6 +1562,17 @@ static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
                str = tmp + 1;
        }
 
+       tmp = strchr(str, '@');
+       if (tmp && tmp != str && strcmp(tmp + 1, "user")) { /* user attr */
+               if (!user_access_is_supported()) {
+                       semantic_error("ftrace does not support user access\n");
+                       return -EINVAL;
+               }
+               *tmp = '\0';
+               arg->user_access = true;
+               pr_debug("user_access ");
+       }
+
        tmp = strchr(str, ':');
        if (tmp) {      /* Type setting */
                *tmp = '\0';
index 05c8d571a901016148243539381aedbede13ddc3..96a319cd23783d6745148e1f6b4ce23566704773 100644 (file)
@@ -37,6 +37,7 @@ struct probe_trace_point {
 struct probe_trace_arg_ref {
        struct probe_trace_arg_ref      *next;  /* Next reference */
        long                            offset; /* Offset value */
+       bool                            user_access;    /* User-memory access */
 };
 
 /* kprobe-tracer and uprobe-tracer tracing argument */
@@ -82,6 +83,7 @@ struct perf_probe_arg {
        char                            *var;   /* Variable name */
        char                            *type;  /* Type name */
        struct perf_probe_arg_field     *field; /* Structure fields */
+       bool                            user_access;    /* User-memory access */
 };
 
 /* Perf probe probing event (point + arg) */
index c2998f90b23c82e46bef98b422f68af0582190d3..5b4d49382932238dcf7bf15df82f4b5b14935031 100644 (file)
@@ -1005,6 +1005,7 @@ enum ftrace_readme {
        FTRACE_README_PROBE_TYPE_X = 0,
        FTRACE_README_KRETPROBE_OFFSET,
        FTRACE_README_UPROBE_REF_CTR,
+       FTRACE_README_USER_ACCESS,
        FTRACE_README_END,
 };
 
@@ -1017,6 +1018,7 @@ static struct {
        DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
        DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
        DEFINE_TYPE(FTRACE_README_UPROBE_REF_CTR, "*ref_ctr_offset*"),
+       DEFINE_TYPE(FTRACE_README_USER_ACCESS, "*[u]<offset>*"),
 };
 
 static bool scan_ftrace_readme(enum ftrace_readme type)
@@ -1077,3 +1079,8 @@ bool uprobe_ref_ctr_is_supported(void)
 {
        return scan_ftrace_readme(FTRACE_README_UPROBE_REF_CTR);
 }
+
+bool user_access_is_supported(void)
+{
+       return scan_ftrace_readme(FTRACE_README_USER_ACCESS);
+}
index 2a249182f2a62f7a862c9c158da6592c8d08c59e..986c1c94f64fecadc2bae7c42afe9255b9de8013 100644 (file)
@@ -70,6 +70,7 @@ int probe_cache__show_all_caches(struct strfilter *filter);
 bool probe_type_is_available(enum probe_type type);
 bool kretprobe_offset_is_supported(void);
 bool uprobe_ref_ctr_is_supported(void);
+bool user_access_is_supported(void);
 #else  /* ! HAVE_LIBELF_SUPPORT */
 static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused)
 {
index 7d8c9973492850a3a66efbcf0a4a55f8c58037ba..025fc4491993caff17ae68812a6311e550375d0e 100644 (file)
@@ -280,7 +280,7 @@ static_var:
 
 static int convert_variable_type(Dwarf_Die *vr_die,
                                 struct probe_trace_arg *tvar,
-                                const char *cast)
+                                const char *cast, bool user_access)
 {
        struct probe_trace_arg_ref **ref_ptr = &tvar->ref;
        Dwarf_Die type;
@@ -320,7 +320,8 @@ static int convert_variable_type(Dwarf_Die *vr_die,
        pr_debug("%s type is %s.\n",
                 dwarf_diename(vr_die), dwarf_diename(&type));
 
-       if (cast && strcmp(cast, "string") == 0) {      /* String type */
+       if (cast && (!strcmp(cast, "string") || !strcmp(cast, "ustring"))) {
+               /* String type */
                ret = dwarf_tag(&type);
                if (ret != DW_TAG_pointer_type &&
                    ret != DW_TAG_array_type) {
@@ -343,6 +344,7 @@ static int convert_variable_type(Dwarf_Die *vr_die,
                                pr_warning("Out of memory error\n");
                                return -ENOMEM;
                        }
+                       (*ref_ptr)->user_access = user_access;
                }
                if (!die_compare_name(&type, "char") &&
                    !die_compare_name(&type, "unsigned char")) {
@@ -397,7 +399,7 @@ formatted:
 static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
                                    struct perf_probe_arg_field *field,
                                    struct probe_trace_arg_ref **ref_ptr,
-                                   Dwarf_Die *die_mem)
+                                   Dwarf_Die *die_mem, bool user_access)
 {
        struct probe_trace_arg_ref *ref = *ref_ptr;
        Dwarf_Die type;
@@ -434,6 +436,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
                                *ref_ptr = ref;
                }
                ref->offset += dwarf_bytesize(&type) * field->index;
+               ref->user_access = user_access;
                goto next;
        } else if (tag == DW_TAG_pointer_type) {
                /* Check the pointer and dereference */
@@ -505,17 +508,18 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
                }
        }
        ref->offset += (long)offs;
+       ref->user_access = user_access;
 
        /* If this member is unnamed, we need to reuse this field */
        if (!dwarf_diename(die_mem))
                return convert_variable_fields(die_mem, varname, field,
-                                               &ref, die_mem);
+                                               &ref, die_mem, user_access);
 
 next:
        /* Converting next field */
        if (field->next)
                return convert_variable_fields(die_mem, field->name,
-                                       field->next, &ref, die_mem);
+                               field->next, &ref, die_mem, user_access);
        else
                return 0;
 }
@@ -541,11 +545,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
        else if (ret == 0 && pf->pvar->field) {
                ret = convert_variable_fields(vr_die, pf->pvar->var,
                                              pf->pvar->field, &pf->tvar->ref,
-                                             &die_mem);
+                                             &die_mem, pf->pvar->user_access);
                vr_die = &die_mem;
        }
        if (ret == 0)
-               ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type);
+               ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type,
+                                           pf->pvar->user_access);
        /* *expr will be cached in libdw. Don't free it. */
        return ret;
 }
index 6d5e9e87c4b7f51f5904d17fa016a134563ba0d0..063ecb290a5a3442bb116639dbc7ac866c0f0639 100755 (executable)
@@ -23,9 +23,15 @@ echo "                           If <dir> is -, all logs output in console only"
 exit $1
 }
 
+# default error
+err_ret=1
+
+# kselftest skip code is 4
+err_skip=4
+
 errexit() { # message
   echo "Error: $1" 1>&2
-  exit 1
+  exit $err_ret
 }
 
 # Ensuring user privilege
@@ -116,11 +122,31 @@ parse_opts() { # opts
 }
 
 # Parameters
-DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1`
-if [ -z "$DEBUGFS_DIR" ]; then
-    TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1`
-else
-    TRACING_DIR=$DEBUGFS_DIR/tracing
+TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1`
+if [ -z "$TRACING_DIR" ]; then
+    DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1`
+    if [ -z "$DEBUGFS_DIR" ]; then
+       # If tracefs exists, then so does /sys/kernel/tracing
+       if [ -d "/sys/kernel/tracing" ]; then
+           mount -t tracefs nodev /sys/kernel/tracing ||
+             errexit "Failed to mount /sys/kernel/tracing"
+           TRACING_DIR="/sys/kernel/tracing"
+       # If debugfs exists, then so does /sys/kernel/debug
+       elif [ -d "/sys/kernel/debug" ]; then
+           mount -t debugfs nodev /sys/kernel/debug ||
+             errexit "Failed to mount /sys/kernel/debug"
+           TRACING_DIR="/sys/kernel/debug/tracing"
+       else
+           err_ret=$err_skip
+           errexit "debugfs and tracefs are not configured in this kernel"
+       fi
+    else
+       TRACING_DIR="$DEBUGFS_DIR/tracing"
+    fi
+fi
+if [ ! -d "$TRACING_DIR" ]; then
+    err_ret=$err_skip
+    errexit "ftrace is not configured in this kernel"
 fi
 
 TOP_DIR=`absdir $0`
index 779ec11f61bda8f88502ea04430d4fd89e1d575b..1d96c5f7e402b74d0bb83b5466c79fcba96b6545 100644 (file)
@@ -91,8 +91,8 @@ initialize_ftrace() { # Reset ftrace to initial-state
     reset_events_filter
     reset_ftrace_filter
     disable_events
-    echo > set_event_pid       # event tracer is always on
-    echo > set_ftrace_pid
+    [ -f set_event_pid ] && echo > set_event_pid
+    [ -f set_ftrace_pid ] && echo > set_ftrace_pid
     [ -f set_ftrace_filter ] && echo | tee set_ftrace_*
     [ -f set_graph_function ] && echo | tee set_graph_*
     [ -f stack_trace_filter ] && echo > stack_trace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
new file mode 100644 (file)
index 0000000..0f60087
--- /dev/null
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event user-memory access
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep -q '\$arg<N>' README || exit_unresolved # depends on arch
+grep -A10 "fetcharg:" README | grep -q 'ustring' || exit_unsupported
+grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported
+
+:;: "user-memory access syntax and ustring working on user memory";:
+echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \
+       > kprobe_events
+
+grep myevent kprobe_events | \
+       grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string'
+echo 1 > events/kprobes/myevent/enable
+echo > /dev/null
+echo 0 > events/kprobes/myevent/enable
+
+grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"'
+
+:;: "user-memory access syntax and ustring not working with kernel memory";:
+echo 'p:myevent vfs_symlink path=+0($arg3):ustring path2=+u0($arg3):string' \
+       > kprobe_events
+echo 1 > events/kprobes/myevent/enable
+ln -s foo $TMPDIR/bar
+echo 0 > events/kprobes/myevent/enable
+
+grep myevent trace | grep -q 'path=(fault) path2=(fault)'
+
+exit 0